commit e415d90b662b2024a6f38196ded11f3881cbf636 from: Murilo Ijanc date: Mon Dec 8 22:30:00 2025 UTC Add bytecode compiler and stack-based VM Compile AST to bytecode with opcodes for constants, arithmetic, print, and return. Stack-based VM with dispatch loop executes bytecode. First programs running end-to-end: source -> lexer -> parser -> compiler -> VM -> output. commit - a40058a5b5365b5e05a32eb8ba8ef34d2d3b51ab commit + e415d90b662b2024a6f38196ded11f3881cbf636 blob - /dev/null blob + fb580a3dd5681dd53cdf648d3238b0d3ff185147 (mode 644) --- /dev/null +++ src/compiler/mod.rs @@ -0,0 +1,127 @@ +// vim: set tw=79 cc=80 ts=4 sw=4 sts=4 et : +// +// Copyright (c) 2025-2026 Murilo Ijanc' +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// + +pub mod opcode; + +use crate::error::OlangError; +use crate::parser::ast::{BinOp, Expr, Program, Stmt, UnaryOp}; +use crate::span::Span; +use crate::vm::value::Value; +use opcode::{Chunk, OpCode}; + +pub struct Compiler { + chunk: Chunk, +} + +impl Compiler { + pub fn new() -> Self { + Self { chunk: Chunk::new() } + } + + pub fn compile( + mut self, program: &Program, + ) -> Result { + let main_fn = program.functions.iter() + .find(|f| f.name == "main") + .ok_or_else(|| OlangError::new( + "no main() function found", Span::new(0, 0), + ))?; + + for stmt in &main_fn.body { + self.compile_stmt(stmt)?; + } + + self.chunk.emit_op(OpCode::Return, 0); + Ok(self.chunk) + } + + fn compile_stmt(&mut self, stmt: &Stmt) -> Result<(), OlangError> { + match stmt { + Stmt::Print { args, .. } => { + for arg in args { + self.compile_expr(arg)?; + } + self.chunk.emit_op(OpCode::Print, 0); + self.chunk.emit_byte(args.len() as u8, 0); + } + Stmt::ExprStmt { expr, .. } => { + self.compile_expr(expr)?; + self.chunk.emit_op(OpCode::Pop, 0); + } + _ => { + return Err(OlangError::new( + format!("unsupported statement: {:?}", stmt), + Span::new(0, 0), + )); + } + } + Ok(()) + } + + fn compile_expr(&mut self, expr: &Expr) -> Result<(), OlangError> { + match expr { + Expr::IntLit(v, _) => { + self.chunk.emit_constant(Value::Int(*v), 0); + } + Expr::FloatLit(v, _) => { + self.chunk.emit_constant(Value::Float(*v), 0); + } + Expr::StrLit(s, _) => { + self.chunk.emit_constant(Value::Str(s.clone()), 0); + } + Expr::BoolLit(v, _) => { + self.chunk.emit_constant(Value::Bool(*v), 0); + } + Expr::Unary { op, expr, .. } => { + self.compile_expr(expr)?; + match op { + UnaryOp::Negate => self.chunk.emit_op(OpCode::Negate, 0), + UnaryOp::Not => { + return Err(OlangError::new( + "not operator not yet supported", Span::new(0, 0), + )); + } + } + } + Expr::Binary { op, left, right, .. } => { + self.compile_expr(left)?; + self.compile_expr(right)?; + let opcode = match op { + BinOp::Add => OpCode::Add, + BinOp::Sub => OpCode::Sub, + BinOp::Mul => OpCode::Mul, + BinOp::Div => OpCode::Div, + BinOp::Mod => OpCode::Mod, + _ => { + return Err(OlangError::new( + format!("unsupported binary op: {:?}", op), + Span::new(0, 0), + )); + } + }; + self.chunk.emit_op(opcode, 0); + } + _ => { + return Err(OlangError::new( + format!("unsupported expression: {:?}", expr), + Span::new(0, 0), + )); + } + } + Ok(()) + } +} blob - /dev/null blob + 9c9cbdf349de944ccdf23437fd67f1d7b8e00860 (mode 644) --- /dev/null +++ src/compiler/opcode.rs @@ -0,0 +1,121 @@ +// vim: set tw=79 cc=80 ts=4 sw=4 sts=4 et : +// +// Copyright (c) 2025-2026 Murilo Ijanc' +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// + +use crate::vm::value::Value; + +#[derive(Debug, Clone, Copy, PartialEq)] +#[repr(u8)] +pub enum OpCode { + Constant, + Pop, + Add, + Sub, + Mul, + Div, + Mod, + Negate, + Print, + Return, +} + +impl OpCode { + pub fn from_byte(byte: u8) -> Option { + match byte { + 0 => Some(OpCode::Constant), + 1 => Some(OpCode::Pop), + 2 => Some(OpCode::Add), + 3 => Some(OpCode::Sub), + 4 => Some(OpCode::Mul), + 5 => Some(OpCode::Div), + 6 => Some(OpCode::Mod), + 7 => Some(OpCode::Negate), + 8 => Some(OpCode::Print), + 9 => Some(OpCode::Return), + _ => None, + } + } +} + +pub struct Chunk { + pub code: Vec, + pub constants: Vec, + pub lines: Vec, +} + +impl Chunk { + pub fn new() -> Self { + Self { + code: Vec::new(), + constants: Vec::new(), + lines: Vec::new(), + } + } + + pub fn emit_byte(&mut self, byte: u8, line: usize) { + self.code.push(byte); + self.lines.push(line); + } + + pub fn emit_op(&mut self, op: OpCode, line: usize) { + self.emit_byte(op as u8, line); + } + + pub fn add_constant(&mut self, value: Value) -> u8 { + self.constants.push(value); + (self.constants.len() - 1) as u8 + } + + pub fn emit_constant(&mut self, value: Value, line: usize) { + let index = self.add_constant(value); + self.emit_op(OpCode::Constant, line); + self.emit_byte(index, line); + } + + pub fn disassemble(&self, name: &str) { + println!("== {name} =="); + let mut offset = 0; + while offset < self.code.len() { + offset = self.disassemble_instruction(offset); + } + } + + fn disassemble_instruction(&self, offset: usize) -> usize { + print!("{:04} ", offset); + let byte = self.code[offset]; + let Some(op) = OpCode::from_byte(byte) else { + println!("unknown opcode {byte}"); + return offset + 1; + }; + match op { + OpCode::Constant => { + let index = self.code[offset + 1] as usize; + let value = &self.constants[index]; + println!("{:<12} {:4} ({})", "Constant", index, value); + offset + 2 + } + OpCode::Print => { + let count = self.code[offset + 1] as usize; + println!("{:<12} {:4}", "Print", count); + offset + 2 + } + _ => { + println!("{:?}", op); + offset + 1 + } + } + } +} blob - 0e3ae3e1108786e6afd353e596e2f0870e775006 blob + fa4dce2b2edbef541e42a742ef2aeeaff7999c20 --- src/main.rs +++ src/main.rs @@ -15,10 +15,12 @@ // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. // +mod compiler; mod error; mod lexer; mod parser; mod span; +mod vm; use std::env; use std::fs; @@ -29,7 +31,7 @@ fn main() { if args.len() < 3 { eprintln!("usage: ol "); - eprintln!("commands: tokenize, parse"); + eprintln!("commands: run, tokenize, parse, compile"); process::exit(1); } @@ -39,16 +41,16 @@ fn main() { let source = match fs::read_to_string(filename) { Ok(s) => s, Err(e) => { - eprintln!( - "error reading '{filename}': {e}" - ); + eprintln!("error reading '{filename}': {e}"); process::exit(1); } }; match command.as_str() { + "run" => cmd_run(&source), "tokenize" => cmd_tokenize(&source), "parse" => cmd_parse(&source), + "compile" => cmd_compile(&source), _ => { eprintln!("unknown command: {command}"); process::exit(1); @@ -56,29 +58,62 @@ fn main() { } } +fn cmd_run(source: &str) { + let mut lexer = lexer::Lexer::new(source); + let tokens = match lexer.tokenize() { + Ok(t) => t, + Err(e) => { eprintln!("{e}"); process::exit(1); } + }; + let mut parser = parser::Parser::new(tokens); + let program = match parser.parse_program() { + Ok(p) => p, + Err(e) => { eprintln!("{e}"); process::exit(1); } + }; + let comp = compiler::Compiler::new(); + let chunk = match comp.compile(&program) { + Ok(c) => c, + Err(e) => { eprintln!("{e}"); process::exit(1); } + }; + let mut machine = vm::VM::new(chunk); + if let Err(e) = machine.run() { + eprintln!("runtime error: {e}"); + process::exit(1); + } +} + +fn cmd_compile(source: &str) { + let mut lexer = lexer::Lexer::new(source); + let tokens = match lexer.tokenize() { + Ok(t) => t, + Err(e) => { eprintln!("{e}"); process::exit(1); } + }; + let mut parser = parser::Parser::new(tokens); + let program = match parser.parse_program() { + Ok(p) => p, + Err(e) => { eprintln!("{e}"); process::exit(1); } + }; + let comp = compiler::Compiler::new(); + match comp.compile(&program) { + Ok(chunk) => chunk.disassemble("main"), + Err(e) => { eprintln!("{e}"); process::exit(1); } + } +} + fn cmd_parse(source: &str) { let mut lexer = lexer::Lexer::new(source); let tokens = match lexer.tokenize() { Ok(t) => t, - Err(e) => { - eprintln!("{e}"); - process::exit(1); - } + Err(e) => { eprintln!("{e}"); process::exit(1); } }; - let mut parser = parser::Parser::new(tokens); match parser.parse_program() { Ok(program) => println!("{:#?}", program), - Err(e) => { - eprintln!("{e}"); - process::exit(1); - } + Err(e) => { eprintln!("{e}"); process::exit(1); } } } fn cmd_tokenize(source: &str) { let mut lexer = lexer::Lexer::new(source); - match lexer.tokenize() { Ok(tokens) => { let parts: Vec = tokens @@ -87,9 +122,6 @@ fn cmd_tokenize(source: &str) { .collect(); println!("{}", parts.join(" ")); } - Err(e) => { - eprintln!("{e}"); - process::exit(1); - } + Err(e) => { eprintln!("{e}"); process::exit(1); } } } blob - /dev/null blob + 6e43e1f2d1aa8021436c58f5f3813ae58beb6812 (mode 644) --- /dev/null +++ src/vm/mod.rs @@ -0,0 +1,172 @@ +// vim: set tw=79 cc=80 ts=4 sw=4 sts=4 et : +// +// Copyright (c) 2025-2026 Murilo Ijanc' +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// + +pub mod value; + +use crate::compiler::opcode::{Chunk, OpCode}; +use value::Value; + +pub struct VM { + chunk: Chunk, + ip: usize, + stack: Vec, +} + +impl VM { + pub fn new(chunk: Chunk) -> Self { + Self { + chunk, + ip: 0, + stack: Vec::new(), + } + } + + fn read_byte(&mut self) -> u8 { + let byte = self.chunk.code[self.ip]; + self.ip += 1; + byte + } + + fn push(&mut self, value: Value) { + self.stack.push(value); + } + + fn pop(&mut self) -> Value { + self.stack.pop().expect("stack underflow") + } + + pub fn run(&mut self) -> Result<(), String> { + loop { + let byte = self.read_byte(); + let Some(op) = OpCode::from_byte(byte) + else { + return Err(format!( + "unknown opcode: {byte}" + )); + }; + + match op { + OpCode::Constant => { + let index = + self.read_byte() as usize; + let value = + self.chunk.constants[index] + .clone(); + self.push(value); + } + OpCode::Pop => { + self.pop(); + } + OpCode::Add => { + let b = self.pop(); + let a = self.pop(); + let result = match (&a, &b) { + (Value::Int(a), Value::Int(b)) => + Value::Int(a + b), + (Value::Float(a), Value::Float(b)) => + Value::Float(a + b), + (Value::Str(a), Value::Str(b)) => + Value::Str(format!("{a}{b}")), + _ => return Err(format!( + "cannot add {:?} and {:?}", a, b + )), + }; + self.push(result); + } + OpCode::Sub => { + let b = self.pop(); + let a = self.pop(); + let result = match (&a, &b) { + (Value::Int(a), Value::Int(b)) => + Value::Int(a - b), + (Value::Float(a), Value::Float(b)) => + Value::Float(a - b), + _ => return Err(format!( + "cannot subtract {:?} and {:?}", a, b + )), + }; + self.push(result); + } + OpCode::Mul => { + let b = self.pop(); + let a = self.pop(); + let result = match (&a, &b) { + (Value::Int(a), Value::Int(b)) => + Value::Int(a * b), + (Value::Float(a), Value::Float(b)) => + Value::Float(a * b), + _ => return Err(format!( + "cannot multiply {:?} and {:?}", a, b + )), + }; + self.push(result); + } + OpCode::Div => { + let b = self.pop(); + let a = self.pop(); + let result = match (&a, &b) { + (Value::Int(a), Value::Int(b)) => { + if *b == 0 { return Err("division by zero".to_string()); } + Value::Int(a / b) + } + (Value::Float(a), Value::Float(b)) => + Value::Float(a / b), + _ => return Err(format!( + "cannot divide {:?} and {:?}", a, b + )), + }; + self.push(result); + } + OpCode::Mod => { + let b = self.pop(); + let a = self.pop(); + let result = match (&a, &b) { + (Value::Int(a), Value::Int(b)) => { + if *b == 0 { return Err("modulo by zero".to_string()); } + Value::Int(a % b) + } + _ => return Err(format!( + "cannot modulo {:?} and {:?}", a, b + )), + }; + self.push(result); + } + OpCode::Negate => { + let v = self.pop(); + let result = match v { + Value::Int(n) => Value::Int(-n), + Value::Float(n) => Value::Float(-n), + _ => return Err(format!("cannot negate {:?}", v)), + }; + self.push(result); + } + OpCode::Print => { + let count = self.read_byte() as usize; + let start = self.stack.len() - count; + let args: Vec = + self.stack.drain(start..).collect(); + let output: Vec = + args.iter().map(|v| format!("{v}")).collect(); + println!("{}", output.join(" ")); + } + OpCode::Return => { + return Ok(()); + } + } + } + } +} blob - /dev/null blob + b4a9c703ba469c408293df4caec68392e45be323 (mode 644) --- /dev/null +++ src/vm/value.rs @@ -0,0 +1,37 @@ +// vim: set tw=79 cc=80 ts=4 sw=4 sts=4 et : +// +// Copyright (c) 2025-2026 Murilo Ijanc' +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// + +use std::fmt; + +#[derive(Debug, Clone)] +pub enum Value { + Int(i64), + Float(f64), + Str(String), + Bool(bool), +} + +impl fmt::Display for Value { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Value::Int(v) => write!(f, "{v}"), + Value::Float(v) => write!(f, "{v}"), + Value::Str(v) => write!(f, "{v}"), + Value::Bool(v) => write!(f, "{v}"), + } + } +}