commit a40058a5b5365b5e05a32eb8ba8ef34d2d3b51ab from: Murilo Ijanc date: Fri Nov 14 18:00:00 2025 UTC Add full parser with statements and functions Parse complete programs: FnDecl, let/let mut, if/else/else if, while, for..in range(), return, print, assignments. Semicolons terminate statements, last expression without semicolon is implicit return. commit - ebe3a0a72c98c733977195ad2a30c57189ce8701 commit + a40058a5b5365b5e05a32eb8ba8ef34d2d3b51ab blob - 8aaab798979dedf14befb4da4574d361936120d1 blob + 0e3ae3e1108786e6afd353e596e2f0870e775006 --- src/main.rs +++ src/main.rs @@ -67,8 +67,8 @@ fn cmd_parse(source: &str) { }; let mut parser = parser::Parser::new(tokens); - match parser.parse_expression() { - Ok(expr) => println!("{:#?}", expr), + match parser.parse_program() { + Ok(program) => println!("{:#?}", program), Err(e) => { eprintln!("{e}"); process::exit(1); blob - 1445329a8518ba61dfc298e9e4c639e637afe502 blob + a88808288eaec57a056546e818e1e2810e707b67 --- src/parser/ast.rs +++ src/parser/ast.rs @@ -18,6 +18,82 @@ use crate::span::Span; #[derive(Debug, Clone)] +pub enum Type { + Int, + Float, + Str, + Bool, +} + +#[derive(Debug, Clone)] +pub struct Param { + pub name: String, + pub ty: Type, + pub span: Span, +} + +#[derive(Debug, Clone)] +pub struct FnDecl { + pub name: String, + pub params: Vec, + pub return_ty: Option, + pub body: Block, + pub span: Span, +} + +#[derive(Debug, Clone)] +pub struct Program { + pub functions: Vec, +} + +pub type Block = Vec; + +#[derive(Debug, Clone)] +pub enum Stmt { + Let { + name: String, + mutable: bool, + ty: Option, + value: Expr, + span: Span, + }, + Assign { + name: String, + value: Expr, + span: Span, + }, + If { + condition: Expr, + then_block: Block, + else_block: Option, + span: Span, + }, + For { + var: String, + iter: Expr, + body: Block, + span: Span, + }, + While { + condition: Expr, + body: Block, + span: Span, + }, + Return { + value: Option, + span: Span, + }, + Print { + args: Vec, + span: Span, + }, + ExprStmt { + expr: Expr, + span: Span, + }, +} + +#[derive(Debug, Clone)] pub enum BinOp { Add, Sub, blob - fefc755e3dac17e7b89ce7583e1752d34287c4b7 blob + 4d736731fc3365f00b0fa9ef325be1462bb09896 --- src/parser/mod.rs +++ src/parser/mod.rs @@ -20,7 +20,10 @@ pub mod ast; use crate::error::OlangError; use crate::lexer::token::{Token, TokenKind}; use crate::span::Span; -use ast::{BinOp, Expr, UnaryOp}; +use ast::{ + BinOp, Block, Expr, FnDecl, Param, Program, Stmt, + Type, UnaryOp, +}; pub struct Parser { tokens: Vec, @@ -66,6 +69,355 @@ impl Parser { } } + fn expect_ident( + &mut self, + ) -> Result<(String, Span), OlangError> { + let token = self.advance().clone(); + if let TokenKind::Ident(name) = token.kind { + Ok((name, token.span)) + } else { + Err(OlangError::new( + format!( + "expected identifier, found {:?}", + token.kind + ), + token.span, + )) + } + } + + // --- Program --- + + pub fn parse_program( + &mut self, + ) -> Result { + let mut functions = Vec::new(); + + while *self.peek() != TokenKind::Eof { + functions.push(self.parse_fn_decl()?); + } + + Ok(Program { functions }) + } + + fn parse_fn_decl( + &mut self, + ) -> Result { + let start = self.expect(&TokenKind::Fn)?; + let (name, _) = self.expect_ident()?; + + self.expect(&TokenKind::LParen)?; + let params = self.parse_params()?; + self.expect(&TokenKind::RParen)?; + + let return_ty = if *self.peek() == TokenKind::Arrow + { + self.advance(); + Some(self.parse_type()?) + } else { + None + }; + + let body = self.parse_block()?; + let end = self.tokens[self.pos - 1].span; + + Ok(FnDecl { + name, + params, + return_ty, + body, + span: Span::new(start.start, end.end), + }) + } + + fn parse_params( + &mut self, + ) -> Result, OlangError> { + let mut params = Vec::new(); + + if *self.peek() == TokenKind::RParen { + return Ok(params); + } + + params.push(self.parse_param()?); + while *self.peek() == TokenKind::Comma { + self.advance(); + params.push(self.parse_param()?); + } + + Ok(params) + } + + fn parse_param( + &mut self, + ) -> Result { + let (name, start) = self.expect_ident()?; + self.expect(&TokenKind::Colon)?; + let ty = self.parse_type()?; + let end = self.tokens[self.pos - 1].span; + + Ok(Param { + name, + ty, + span: Span::new(start.start, end.end), + }) + } + + fn parse_type( + &mut self, + ) -> Result { + let token = self.advance().clone(); + match token.kind { + TokenKind::IntType => Ok(Type::Int), + TokenKind::FloatType => Ok(Type::Float), + TokenKind::StrType => Ok(Type::Str), + TokenKind::BoolType => Ok(Type::Bool), + _ => Err(OlangError::new( + format!( + "expected type, found {:?}", + token.kind + ), + token.span, + )), + } + } + + // --- Block --- + + fn parse_block( + &mut self, + ) -> Result { + self.expect(&TokenKind::LBrace)?; + let mut stmts = Vec::new(); + + while *self.peek() != TokenKind::RBrace { + stmts.push(self.parse_statement()?); + } + + self.expect(&TokenKind::RBrace)?; + Ok(stmts) + } + + // --- Statements --- + + fn parse_statement( + &mut self, + ) -> Result { + match self.peek() { + TokenKind::Let => self.parse_let(), + TokenKind::If => self.parse_if(), + TokenKind::For => self.parse_for(), + TokenKind::While => self.parse_while(), + TokenKind::Return => self.parse_return(), + TokenKind::Print => self.parse_print(), + _ => self.parse_assign_or_expr(), + } + } + + fn parse_let( + &mut self, + ) -> Result { + let start = self.expect(&TokenKind::Let)?; + + let mutable = + if *self.peek() == TokenKind::Mut { + self.advance(); + true + } else { + false + }; + + let (name, _) = self.expect_ident()?; + + let ty = if *self.peek() == TokenKind::Colon { + self.advance(); + Some(self.parse_type()?) + } else { + None + }; + + self.expect(&TokenKind::Eq)?; + let value = self.parse_expression()?; + let end = self.expect(&TokenKind::Semicolon)?; + + Ok(Stmt::Let { + name, + mutable, + ty, + value, + span: Span::new(start.start, end.end), + }) + } + + fn parse_if( + &mut self, + ) -> Result { + let start = self.expect(&TokenKind::If)?; + let condition = self.parse_expression()?; + let then_block = self.parse_block()?; + + let else_block = + if *self.peek() == TokenKind::Else { + self.advance(); + if *self.peek() == TokenKind::If { + // else if -> wrap in a block with + // one If statement + let if_stmt = self.parse_if()?; + Some(vec![if_stmt]) + } else { + Some(self.parse_block()?) + } + } else { + None + }; + + let end = self.tokens[self.pos - 1].span; + + Ok(Stmt::If { + condition, + then_block, + else_block, + span: Span::new(start.start, end.end), + }) + } + + fn parse_for( + &mut self, + ) -> Result { + let start = self.expect(&TokenKind::For)?; + let (var, _) = self.expect_ident()?; + self.expect(&TokenKind::In)?; + + // Parse range(expr) + let range_start = self.peek_span(); + self.expect(&TokenKind::Range)?; + self.expect(&TokenKind::LParen)?; + let limit = self.parse_expression()?; + let range_end = + self.expect(&TokenKind::RParen)?; + + let iter = Expr::Call { + name: "range".to_string(), + args: vec![limit], + span: Span::new( + range_start.start, + range_end.end, + ), + }; + + let body = self.parse_block()?; + let end = self.tokens[self.pos - 1].span; + + Ok(Stmt::For { + var, + iter, + body, + span: Span::new(start.start, end.end), + }) + } + + fn parse_while( + &mut self, + ) -> Result { + let start = self.expect(&TokenKind::While)?; + let condition = self.parse_expression()?; + let body = self.parse_block()?; + let end = self.tokens[self.pos - 1].span; + + Ok(Stmt::While { + condition, + body, + span: Span::new(start.start, end.end), + }) + } + + fn parse_return( + &mut self, + ) -> Result { + let start = self.expect(&TokenKind::Return)?; + + let value = + if *self.peek() == TokenKind::Semicolon { + None + } else { + Some(self.parse_expression()?) + }; + + let end = self.expect(&TokenKind::Semicolon)?; + + Ok(Stmt::Return { + value, + span: Span::new(start.start, end.end), + }) + } + + fn parse_print( + &mut self, + ) -> Result { + let start = self.expect(&TokenKind::Print)?; + self.expect(&TokenKind::LParen)?; + + let mut args = Vec::new(); + if *self.peek() != TokenKind::RParen { + args.push(self.parse_expression()?); + while *self.peek() == TokenKind::Comma { + self.advance(); + args.push(self.parse_expression()?); + } + } + + self.expect(&TokenKind::RParen)?; + let end = self.expect(&TokenKind::Semicolon)?; + + Ok(Stmt::Print { + args, + span: Span::new(start.start, end.end), + }) + } + + fn parse_assign_or_expr( + &mut self, + ) -> Result { + let expr = self.parse_expression()?; + + // Check if it's an assignment: ident = expr; + if let Expr::Ident(ref name, _) = expr { + if *self.peek() == TokenKind::Eq { + let name = name.clone(); + let start = self.span_of(&expr); + self.advance(); + let value = self.parse_expression()?; + let end = + self.expect(&TokenKind::Semicolon)?; + return Ok(Stmt::Assign { + name, + value, + span: Span::new( + start.start, + end.end, + ), + }); + } + } + + let span = self.span_of(&expr); + + // Expression statement with ; + if *self.peek() == TokenKind::Semicolon { + let end = + self.expect(&TokenKind::Semicolon)?; + return Ok(Stmt::ExprStmt { + expr, + span: Span::new(span.start, end.end), + }); + } + + // Expression without ; (implicit return) + Ok(Stmt::ExprStmt { expr, span }) + } + + // --- Expressions --- + pub fn parse_expression( &mut self, ) -> Result { @@ -76,6 +428,7 @@ impl Parser { &mut self, ) -> Result { let mut left = self.parse_comparison()?; + loop { let op = match self.peek() { TokenKind::EqEq => BinOp::Eq, @@ -95,6 +448,7 @@ impl Parser { span, }; } + Ok(left) } @@ -102,14 +456,13 @@ impl Parser { &mut self, ) -> Result { let mut left = self.parse_addition()?; + loop { let op = match self.peek() { TokenKind::Less => BinOp::Less, TokenKind::LessEq => BinOp::LessEq, TokenKind::Greater => BinOp::Greater, - TokenKind::GreaterEq => { - BinOp::GreaterEq - } + TokenKind::GreaterEq => BinOp::GreaterEq, _ => break, }; self.advance(); @@ -125,6 +478,7 @@ impl Parser { span, }; } + Ok(left) } @@ -132,6 +486,7 @@ impl Parser { &mut self, ) -> Result { let mut left = self.parse_multiply()?; + loop { let op = match self.peek() { TokenKind::Plus => BinOp::Add, @@ -151,6 +506,7 @@ impl Parser { span, }; } + Ok(left) } @@ -158,6 +514,7 @@ impl Parser { &mut self, ) -> Result { let mut left = self.parse_unary()?; + loop { let op = match self.peek() { TokenKind::Star => BinOp::Mul, @@ -178,6 +535,7 @@ impl Parser { span, }; } + Ok(left) } @@ -185,6 +543,7 @@ impl Parser { &mut self, ) -> Result { let start = self.peek_span(); + if *self.peek() == TokenKind::Minus { self.advance(); let expr = self.parse_unary()?; @@ -198,6 +557,7 @@ impl Parser { span, }); } + self.parse_call() } @@ -205,18 +565,17 @@ impl Parser { &mut self, ) -> Result { let expr = self.parse_primary()?; + if let Expr::Ident(ref name, _) = expr { if *self.peek() == TokenKind::LParen { let name = name.clone(); let start = self.span_of(&expr); self.advance(); let mut args = Vec::new(); + if *self.peek() != TokenKind::RParen { - args.push( - self.parse_expression()?, - ); - while *self.peek() - == TokenKind::Comma + args.push(self.parse_expression()?); + while *self.peek() == TokenKind::Comma { self.advance(); args.push( @@ -224,6 +583,7 @@ impl Parser { ); } } + let end = self.expect(&TokenKind::RParen)?; let span = @@ -235,6 +595,7 @@ impl Parser { }); } } + Ok(expr) } @@ -242,6 +603,7 @@ impl Parser { &mut self, ) -> Result { let token = self.advance().clone(); + match token.kind { TokenKind::IntLit(v) => { Ok(Expr::IntLit(v, token.span))