From 53e6ac8132993605ec6b084b9dd7bda690aca7de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20R=C3=B6ger?= Date: Tue, 5 Nov 2024 20:20:46 +0100 Subject: [PATCH] feat(parser): add expression parser --- src/lisp/mod.rs | 6 ++ src/main.rs | 61 ++++----------- src/parser/mod.rs | 4 + src/parser/parser.rs | 163 ++++++++++++++++++++++++++++++++++++++++ src/parser/tokenizer.rs | 2 +- 5 files changed, 187 insertions(+), 49 deletions(-) create mode 100644 src/parser/parser.rs diff --git a/src/lisp/mod.rs b/src/lisp/mod.rs index 44d6dbe..6e01290 100644 --- a/src/lisp/mod.rs +++ b/src/lisp/mod.rs @@ -1,2 +1,8 @@ pub mod environment; pub mod expression; + +pub use environment::Environment; +pub use environment::EnvironmentLayer; +pub use expression::eval_prelude; +pub use expression::EvalError; +pub use expression::Expression; diff --git a/src/main.rs b/src/main.rs index dc61e2e..c8b0ea4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,56 +1,21 @@ mod lisp; mod parser; -use lisp::expression::{eval_prelude, Expression}; +use lisp::eval_prelude; +use parser::ExpressionStream; fn main() { - let mut test = "(add 10 (sub 1.1 200.5)) (concat-if true \"true\" 'nil (a . b))".chars(); + let program = "((lambda (x y) (add (if (lt x 10) (add x 10) x) y)) 2 20)"; - let mut tkns = parser::tokenizer::tokenize(&mut test); - - while let Some(tk) = tkns.next() { - println!("{:?}", tk); + for r in ExpressionStream::from_char_stream(program.chars()) { + match r { + Err(err) => println!("ParserError: {:?}", err), + Ok(expr) => println!( + "{:?} \n vvvvvvvvvvv \n {:?}\n", + expr.clone(), + eval_prelude(expr) + ), + } } - let expr: Expression = vec![ - vec![ - Expression::Symbol("lambda".to_string()), - vec![ - Expression::Symbol("x".to_string()), - Expression::Symbol("y".to_string()), - ] - .into(), - vec![ - Expression::Symbol("if".to_string()), - vec![ - Expression::Symbol("==".to_string()), - Expression::Symbol("x".to_string()), - Expression::Integer(5), - ] - .into(), - vec![ - Expression::Symbol("add".to_string()), - Expression::Symbol("x".to_string()), - Expression::Symbol("y".to_string()), - ] - .into(), - Expression::String("x is not 5".to_string()), - ] - .into(), - ] - .into(), - Expression::Integer(5), - vec![ - Expression::Symbol("let".to_string()), - vec![Expression::Cell( - Box::new(Expression::Symbol("y".to_string())), - Box::new(Expression::Integer(7)), - )] - .into(), - Expression::Symbol("y".to_string()), - ] - .into(), - ] - .into(); - - println!("{:?} evaluates to {:?}", expr.clone(), eval_prelude(expr)); + println!("Interpreter Done!"); } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 486ea3d..14539fc 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,2 +1,6 @@ +pub mod parser; pub mod token; pub mod tokenizer; + +pub use parser::ExpressionStream; +pub use parser::ParserError; diff --git a/src/parser/parser.rs b/src/parser/parser.rs new file mode 100644 index 0000000..a5ed319 --- /dev/null +++ b/src/parser/parser.rs @@ -0,0 +1,163 @@ +use super::token::Token; +use super::tokenizer::tokenize; +use super::tokenizer::TokenStream; +use super::tokenizer::TokenizerError; +use crate::lisp::Expression; +use std::iter::Peekable; + +#[derive(Debug, Clone, PartialEq)] +pub enum ParserError { + UnexpectedToken(Token), + TokenizerError(TokenizerError), + UnexpectedEndOfInput, +} + +impl From for ParserError { + fn from(value: TokenizerError) -> Self { + ParserError::TokenizerError(value) + } +} + +fn parse_list(stream: &mut Peekable>) -> Result +where + I: Iterator, +{ + let mut list = Vec::new(); + + loop { + match stream.peek() { + // Return current list or nil + Some(Ok(Token::ParClose)) => { + stream.next(); + if list.len() == 0 { + return Ok(Expression::Nil); + } else { + return Ok(list.into()); + } + } + // Switch to cons-pair parsing + Some(Ok(Token::Dot)) => { + stream.next(); + if list.len() > 1 || list.len() == 0 { + return Err(ParserError::UnexpectedToken(Token::Dot)); + } else { + let second_expr = parse_expression(stream)?; + match stream.next() { + Some(Ok(Token::ParClose)) => { + return Ok(Expression::Cell( + Box::new(list[0].to_owned()), + Box::new(second_expr), + )); + } + Some(Ok(t)) => { + return Err(ParserError::UnexpectedToken(t)); + } + Some(Err(e)) => { + return Err(e.into()); + } + None => { + return Err(ParserError::UnexpectedEndOfInput); + } + } + } + } + _ => {} + } + list.push(parse_expression(stream)?); + } +} + +fn parse_expression(stream: &mut Peekable>) -> Result +where + I: Iterator, +{ + match stream.next() { + Some(Ok(Token::ParOpen)) => parse_list(stream), + Some(Ok(Token::Nil)) => Ok(Expression::Nil), + Some(Ok(Token::IntLiteral(n))) => Ok(Expression::Integer(n)), + Some(Ok(Token::FloatLiteral(f))) => Ok(Expression::Float(f)), + Some(Ok(Token::StringLiteral(s))) => Ok(Expression::String(s)), + Some(Ok(Token::True)) => Ok(Expression::True), + Some(Ok(Token::Symbol(s))) => Ok(Expression::Symbol(s)), + Some(Ok(Token::Quote)) => Ok(Expression::Quote(Box::new(parse_expression(stream)?))), + Some(Err(e)) => Err(ParserError::TokenizerError(e)), + Some(Ok(x)) => Err(ParserError::UnexpectedToken(x)), + None => Err(ParserError::UnexpectedEndOfInput), + } +} + +pub struct ExpressionStream> { + token_stream: Peekable>, +} + +impl> ExpressionStream { + pub fn from_token_stream(token_stream: TokenStream) -> Self { + ExpressionStream { + token_stream: token_stream.peekable(), + } + } + + pub fn from_char_stream(char_stream: I) -> Self { + ExpressionStream { + token_stream: tokenize(char_stream).peekable(), + } + } +} + +impl> Iterator for ExpressionStream { + type Item = Result; + + fn next(&mut self) -> Option { + if self.token_stream.peek() == None { + return None; + } + + Some(parse_expression(&mut self.token_stream)) + } +} + +#[test] +fn test_parser() { + let input = "(1 2 3) (4 5 6) (1 . 2) (1 . (2 . (3))) \"test\" '(a b c true nil)"; + let ts = tokenize(input.chars()); + let es = ExpressionStream::from_token_stream(ts); + let exprs = es.collect::, ParserError>>(); + assert_eq!( + exprs, + Ok(vec![ + vec![ + Expression::Integer(1), + Expression::Integer(2), + Expression::Integer(3), + ] + .into(), + vec![ + Expression::Integer(4), + Expression::Integer(5), + Expression::Integer(6), + ] + .into(), + Expression::Cell( + Box::new(Expression::Integer(1)), + Box::new(Expression::Integer(2)), + ), + vec![ + Expression::Integer(1), + Expression::Integer(2), + Expression::Integer(3), + ] + .into(), + Expression::String("test".to_string()), + Expression::Quote(Box::new( + vec![ + Expression::Symbol("a".to_string()), + Expression::Symbol("b".to_string()), + Expression::Symbol("c".to_string()), + Expression::True, + Expression::Nil, + ] + .into() + )), + ]) + ); +} diff --git a/src/parser/tokenizer.rs b/src/parser/tokenizer.rs index 808bdee..e129de8 100644 --- a/src/parser/tokenizer.rs +++ b/src/parser/tokenizer.rs @@ -1,6 +1,6 @@ use super::token::Token; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] /// Errors the tokenizer can yield. pub enum TokenizerError { /// The tokenizer could not read the associated sequence.