commit c916ec471c406a8caef7d57af437fa182632e120 from: Murilo Ijanc date: Tue Jul 15 13:30:00 2025 UTC Init project scaffolding Setup Cargo.toml, Makefile, LICENSE, README and initial main.rs for the olang programming language. commit - /dev/null commit + c916ec471c406a8caef7d57af437fa182632e120 blob - /dev/null blob + ea8c4bf7f35f6f77f75d92ad8ce8349f6e81ddba (mode 644) --- /dev/null +++ .gitignore @@ -0,0 +1 @@ +/target blob - /dev/null blob + df99c69198f5813df5fc3eaa007a2af0e60a7bbd (mode 644) --- /dev/null +++ .rustfmt.toml @@ -0,0 +1 @@ +max_width = 80 blob - /dev/null blob + 1cb4b3ac7df615aa870445ed07b328c60274cde8 (mode 644) --- /dev/null +++ Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "ol" +version = "0.1.0" blob - /dev/null blob + 5c2caa78aaad360f629e528e4e0b0e3ee0616d39 (mode 644) --- /dev/null +++ Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "ol" +version = "0.1.0" +authors = ["Murilo Ijanc' "] +edition = "2024" +homepage = "https://ijanc.org/ol/" +license = "ISC" +readme = "README.md" +repository = "https://git.ijanc.org/ol/" + +[dependencies] blob - /dev/null blob + ccb3f3d7f0d5c2380172474e614e4d98aa56aa1f (mode 644) --- /dev/null +++ LICENSE @@ -0,0 +1,13 @@ +Copyright (c) 2025-2026 Murilo Ijanc' + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. blob - /dev/null blob + e2621bd2f65f44cd30d6bb7edde1cf3543ac6dc5 (mode 644) --- /dev/null +++ Makefile @@ -0,0 +1,39 @@ +.PHONY: all check build build-release test fmt clippy lint doc clean install run + +TARGET = --target x86_64-unknown-linux-gnu + +all: run + +check: + cargo check $(TARGET) + +build: + cargo build $(TARGET) + +build-release: + cargo build $(TARGET) --release + +test: + cargo test $(TARGET) + +fmt: + cargo fmt + +clippy: + cargo clippy $(TARGET) + +lint: + cargo fmt --check + cargo clippy $(TARGET) --all-features -- -D warnings + +doc: + cargo doc $(TARGET) + +clean: + cargo clean + +install: + cargo install --path . + +run: + cargo run $(TARGET) blob - /dev/null blob + c777aa32b29c91e0b20c5e8fd262c1ee8b7a4e47 (mode 644) --- /dev/null +++ PLAN.md @@ -0,0 +1,505 @@ +# Olang — Plano de Desenvolvimento + +## Contexto + +Construir a linguagem de programacao **olang** do zero em Rust com foco em **aprendizado** e **simplicidade**. O projeto segue o pipeline classico: source -> tokens -> AST -> bytecode -> VM. Cada fase eh um post publicavel da serie, com explicacao + codigo funcional. No final da serie, temos a v0.1 rodando programas completos. + +--- + +## 0. Toolchain e Prerequisitos + +**Instalar:** +```bash +rustup toolchain install stable +``` + +**Criar `Cargo.toml`:** +- `name = "ol"`, `edition = "2024"` +- Sem dependencias externas — tudo escrito do zero para aprendizado + +**Convencoes:** +- Extensao de arquivos olang: `.ol` +- `;` como terminador de statements (similar ao Rust) +- Ultima expressao do bloco **sem** `;` = retorno implicito + +--- + +## Sintaxe da olang + +``` +fn main() { + let x = 42; + let mut name: str = "world"; + + if x > 10 { + print("hello " + name); + } + + for i in range(10) { + print(i); + } +} + +fn add(a: int, b: int) -> int { + a + b +} +``` + +- `let` / `let mut`, tipos `int`, `float`, `str`, `bool` +- Blocos com `{}`, `;` termina statements, sem `;` = retorno implicito, extensao `.ol` + +--- + +## Estrutura do Projeto + +``` +ol/ +├── Cargo.toml +├── Makefile +├── src/ +│ ├── main.rs # CLI: orquestra pipeline e subcomandos +│ ├── span.rs # Span para posicoes no codigo fonte +│ ├── error.rs # OlangError com linha, coluna, trecho +│ ├── lexer/ +│ │ ├── mod.rs # Lexer: source -> tokens +│ │ └── token.rs # Token e TokenKind enum +│ ├── parser/ +│ │ ├── mod.rs # Parser recursive-descent +│ │ └── ast.rs # AST nodes (Expr, Stmt, FnDecl, Program) +│ ├── compiler/ +│ │ ├── mod.rs # Compiler: AST -> bytecode +│ │ └── opcode.rs # OpCode enum + Chunk (code + constants) +│ └── vm/ +│ ├── mod.rs # VM stack-based: dispatch loop +│ └── value.rs # Value enum (Int, Float, Str, Bool) +└── tests/ # .ol files com stdout esperado +``` + +--- + +## Fase 1: Lexer Basico + +**Objetivo:** Source -> tokens para expressoes simples (numeros, operadores, parenteses). + +### span.rs +- `Span { start: usize, end: usize }` para rastrear posicoes no fonte + +### error.rs +- `OlangError { message, span }` basico + +### lexer/token.rs +- `Token { kind: TokenKind, span: Span }` +- `TokenKind`: `IntLit(i64)`, `Plus`, `Minus`, `Star`, `Slash`, `LParen`, `RParen`, `Semicolon`, `Eof` + +### lexer/mod.rs +- Lexer struct com `source`, `pos`, `line`, `col` +- `tokenize() -> Result, OlangError>` +- Pular whitespace, reconhecer numeros e operadores + +### main.rs +- Subcomando `tokenize`: le `.ol` e imprime tokens + +### Arquivos desta fase +- `src/main.rs`, `src/span.rs`, `src/error.rs` +- `src/lexer/mod.rs`, `src/lexer/token.rs` + +### Verificacao +```bash +$ echo "42 + 10" > test.ol +$ cargo run -- tokenize test.ol +IntLit(42) Plus IntLit(10) Eof +``` + +--- + +## Fase 2: Lexer Completo + +**Objetivo:** Suportar strings, keywords, identificadores, comentarios e tokens multi-char. + +### Adicoes ao lexer +- **Strings** com escapes (`\"`, `\\`, `\n`) +- **Identificadores**: `[a-zA-Z_][a-zA-Z0-9_]*` +- **Keywords**: `let`, `mut`, `fn`, `if`, `else`, `for`, `in`, `while`, `return`, `print`, `range`, `true`, `false` +- **Tokens 2-char**: `->`, `==`, `!=`, `<=`, `>=` +- **Tokens extras**: `Colon`, `Comma`, `LBrace`, `RBrace`, `Semicolon` +- **Comentarios**: `#` ate fim da linha (ignorados) +- **Tipos**: `IntType`, `FloatType`, `StrType`, `BoolType` + +### Erros +- Mensagens com linha e coluna: `"unexpected char 'X' at 3:12"` + +### Verificacao +```bash +$ cat hello.ol +fn main() { + let x: int = 42; + print(x); +} +$ cargo run -- tokenize hello.ol +Fn Ident("main") LParen RParen LBrace +Let Ident("x") Colon IntType Eq IntLit(42) Semicolon +Print LParen Ident("x") RParen Semicolon +RBrace Eof +``` + +--- + +## Fase 3: Parser de Expressoes + +**Objetivo:** Tokens -> AST para expressoes, usando recursive-descent com precedencia. + +### parser/ast.rs +- `Expr`: `IntLit`, `FloatLit`, `StrLit`, `BoolLit`, `Ident`, `Unary`, `Binary`, `Call` + +### parser/mod.rs +- Parser com precedencia (menor -> maior): + - `or` > `and` > `equality` > `comparison` > `addition` > `multiply` > `unary` > `call` > `primary` +- `parse_expression() -> Result` + +### Arquivos desta fase +- `src/parser/mod.rs`, `src/parser/ast.rs` + +### Verificacao +```bash +$ echo "1 + 2 * 3" > expr.ol +$ cargo run -- parse expr.ol +Binary(Add, IntLit(1), Binary(Mul, IntLit(2), IntLit(3))) +``` + +--- + +## Fase 4: Parser Completo + +**Objetivo:** Parsear programas completos: statements, funcoes, blocos, controle de fluxo. + +### Adicoes ao AST +- `Program { functions: Vec }` +- `FnDecl { name, params: Vec, return_ty, body: Block }` +- `Param { name, ty: Type }` +- `Type`: `Int`, `Float`, `Str`, `Bool` +- `Stmt`: `Let`, `Assign`, `If`, `For`, `While`, `Return`, `Print`, `ExprStmt` +- `Block`: `Vec` + +### Parser de statements +- `let` / `let mut` com tipo opcional +- `if` / `else` (encadeado) +- `for ident in range(expr) { block }` +- `while expr { block }` +- `return expr` +- `print(args)` +- `;` como terminador de statements +- Ultima expressao sem `;` = retorno implicito + +### Verificacao +```bash +$ cargo run -- parse hello.ol +Program { + functions: [ + FnDecl { name: "main", params: [], return_ty: None, + body: [ + Let { name: "x", ty: Some(Int), value: IntLit(42) }; + Print { args: [Ident("x")] }; + ] + } + ] +} +``` + +--- + +## Fase 5: Compiler Basico (AST -> Bytecode) + +**Objetivo:** Traduzir AST em bytecode para uma stack machine. Compilar constantes, aritmetica, print. + +### vm/value.rs +- `Value`: `Int(i64)`, `Float(f64)`, `Str(String)`, `Bool(bool)` + +### compiler/opcode.rs +- `OpCode`: `Constant`, `Pop`, `Add`, `Sub`, `Mul`, `Div`, `Mod`, `Negate`, `Print`, `Return` +- `Chunk { code: Vec, constants: Vec, lines: Vec }` +- `disassemble()` para debug + +### compiler/mod.rs +- `compile(program: &Program) -> Result` +- Compilar expressoes (constantes, binarios, unarios) e `print` + +### Verificacao +```bash +$ cargo run -- compile hello.ol +== main == +0000 Constant 0 (42) +0002 Print 1 +0004 Return +``` + +--- + +## Fase 6: VM Basica + +**Objetivo:** Executar bytecode. Primeiro programa rodando de verdade! + +### vm/mod.rs +- `VM { stack: Vec, chunk: Chunk, ip: usize }` +- Dispatch loop: `fetch -> decode -> execute` +- Implementar opcodes: `Constant`, `Pop`, `Add`/`Sub`/`Mul`/`Div`/`Mod`/`Negate`, `Print`, `Return` +- Concatenacao de strings com `+` + +### Pipeline completo +- `source -> lexer -> parser -> compiler -> VM` +- Subcomando `run` no CLI + +### Verificacao +```bash +$ cat hello.ol +fn main() { + print(1 + 2); + print("hello " + "world"); +} +$ cargo run -- run hello.ol +3 +hello world +``` + +--- + +## Fase 7: Variaveis + +**Objetivo:** `let` / `let mut`, stack slots, inferencia basica de tipos. + +### Opcodes novos +- `GetLocal(index)`, `SetLocal(index)` + +### Compiler +- Resolver nomes de variaveis para indices de stack slots +- Verificar mutabilidade em compile-time +- Anotacao de tipo opcional (inferencia pelo valor) + +### Verificacao +```bash +$ cat vars.ol +fn main() { + let x: int = 10; + let mut y = 20; + y = y + x; + print(y); +} +$ cargo run -- run vars.ol +30 +``` + +--- + +## Fase 8: Controle de Fluxo (If/Else + While) + +**Objetivo:** Jumps em bytecode, comparacao, logicos, if/else, while. + +### Opcodes novos +- Comparacao: `Equal`, `NotEqual`, `Less`, `LessEqual`, `Greater`, `GreaterEqual` +- Logicos: `Not`, `And`, `Or` +- Jumps: `Jump`, `JumpIfFalse`, `Loop` + +### Compiler +- `if`/`else` com jump patching (emitir jump placeholder, preencher offset depois) +- `while` com `Loop` (backward jump) + +### Verificacao +```bash +$ cat fizzbuzz.ol +fn main() { + let mut i = 1; + while i <= 20 { + if i % 15 == 0 { + print("FizzBuzz"); + } else if i % 3 == 0 { + print("Fizz"); + } else if i % 5 == 0 { + print("Buzz"); + } else { + print(i); + } + i = i + 1; + } +} +$ cargo run -- run fizzbuzz.ol +1 +2 +Fizz +... +``` + +--- + +## Fase 9: Funcoes + +**Objetivo:** Call stack, CallFrames, passagem de argumentos, retorno implicito e explicito. + +### Opcodes novos +- `Call(arg_count)`, `Return` + +### VM +- `CallFrame { ip, chunk, slot_offset }` stack +- Parametros como primeiros locals do frame + +### Compiler +- Cada `FnDecl` vira um `Chunk` separado +- `main()` como entry point +- Retorno implicito (ultima expressao) + `return` explicito + +### Verificacao +```bash +$ cat functions.ol +fn add(a: int, b: int) -> int { + a + b +} + +fn abs(x: int) -> int { + if x < 0 { + return -x + } + x +} + +fn main() { + print(add(3, 4)); + print(abs(-42)); +} +$ cargo run -- run functions.ol +7 +42 +``` + +--- + +## Fase 10: For + Range + +**Objetivo:** Desugaring de `for..in range()` em `while` no compiler. + +### Compiler +- `for i in range(n) { body }` -> `let mut i = 0; while i < n { body; i = i + 1; }` +- Nenhum opcode novo necessario + +### Verificacao +```bash +$ cat countdown.ol +fn main() { + for i in range(5) { + print(i); + } +} +$ cargo run -- run countdown.ol +0 +1 +2 +3 +4 +``` + +--- + +## Fase 11: Type Checking + Erros Bonitos + +**Objetivo:** Erros em compile-time com contexto, type checking basico. + +### Compiler +- Rastrear tipo de cada local +- Erros: variavel indefinida, atribuicao a imutavel, tipo incompativel + +### VM +- Type check em runtime para operacoes + +### Erros com contexto +``` +error[E001]: type mismatch + --> bad.ol:2:19 + | +2 | let x: int = "hello" + | ^^^^^^^ expected int, found str +``` + +### Verificacao +```bash +$ cat bad.ol +fn main() { + let x: int = "hello"; +} +$ cargo run -- run bad.ol +error[E001]: type mismatch + --> bad.ol:2:19 + | +2 | let x: int = "hello"; + | ^^^^^^^ expected int, found str +``` + +--- + +## Fase 12: CLI + Testes + Publicacao (v0.1) + +**Objetivo:** Empacotar, testar e publicar a v0.1 da olang. + +### CLI +- Subcomandos: `run`, `tokenize`, `parse`, `compile` + +### Testes +- Testes de integracao: `.ol` files com stdout esperado +- `cargo test` com cobertura dos principais cenarios + +### Publicacao +- README com exemplos e instrucoes +- v0.1 publicada no GitHub + +### Verificacao +```bash +cargo build +cargo test +cargo run -- run fizzbuzz.ol # programa completo funciona +``` + +--- + +## Arquivos Criticos + +| Arquivo | Papel | +|---------|-------| +| `src/lexer/token.rs` | Contrato entre lexer e parser | +| `src/parser/ast.rs` | Contrato entre parser e compiler | +| `src/compiler/opcode.rs` | Contrato entre compiler e VM | +| `src/vm/mod.rs` | Onde a linguagem realmente roda | +| `src/main.rs` | Orquestra o pipeline e CLI | + +--- + +## Debugging + +- `cargo run -- tokenize file.ol` para inspecionar tokens +- `cargo run -- parse file.ol` para visualizar a AST +- `cargo run -- compile file.ol` para ver bytecode disassembled +- `RUST_BACKTRACE=1` para stack traces +- **Testes unitarios em cada modulo** — rodar `cargo test` frequentemente + +--- + +## Sequencia de Commits (por fase) + +**Fase 1:** estrutura do projeto -> span + error -> TokenKind basico -> lexer numeros/operadores -> CLI tokenize + +**Fase 2:** strings + escapes -> identificadores + keywords -> tokens 2-char -> newlines + comentarios -> erros com posicao + +**Fase 3:** AST de expressoes -> parser recursive-descent -> precedencia de operadores -> calls + +**Fase 4:** AST statements + FnDecl -> parser let/if/for/while/return -> parser funcoes -> CLI parse + +**Fase 5:** Value enum -> OpCode + Chunk -> compiler expressoes -> compiler print -> disassemble -> CLI compile + +**Fase 6:** VM struct + dispatch loop -> opcodes aritmeticos -> Print + Return -> pipeline completo -> CLI run + +**Fase 7:** GetLocal/SetLocal -> resolver variaveis -> mutabilidade -> inferencia de tipo + +**Fase 8:** opcodes comparacao/logicos -> Jump/JumpIfFalse/Loop -> compilar if/else -> compilar while + +**Fase 9:** CallFrame -> compilar FnDecl -> Call/Return -> retorno implicito -> main entry point + +**Fase 10:** desugaring for..in range -> testes + +**Fase 11:** type tracking -> erros compile-time -> erros com contexto (linha + trecho) + +**Fase 12:** CLI final -> testes integracao -> README -> v0.1 blob - /dev/null blob + 80ae3c30048c15a8684a1cbd50b7428ddaeeeef0 (mode 644) --- /dev/null +++ README.md @@ -0,0 +1,30 @@ +# olang + +Olivia Lang + +```` +fn main() { + let x = 42 + let mut name: str = "world" + + if x > 10 { + print("hello " + name) + } + + for i in range(10) { + print(i) + } + + let num = add(x, 1) + print(num) + +} + +fn add(a: int, b: int) -> int { + a + b +} +``` + +## License + +ISC — see [LICENSE](LICENSE). blob - /dev/null blob + cc1182a89a75f06c6d393998c54b36e0df043a9f (mode 644) --- /dev/null +++ src/main.rs @@ -0,0 +1,20 @@ +// vim: set tw=79 cc=80 ts=4 sw=4 sts=4 et : +// +// Copyright (c) 2025-2026 Murilo Ijanc' +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// + +fn main() { + println!("Hello, world!"); +}