Commit Diff


commit - 2f626af47071623ff393a568fdf765526342fb27
commit + 6004e88422764c3bf275c6a4f1be37220f847d97
blob - cc1182a89a75f06c6d393998c54b36e0df043a9f
blob + e91891d1c7e74a8aa6eb5a5fd993596ad05f0eb4
--- src/main.rs
+++ src/main.rs
@@ -15,6 +15,59 @@
 // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
+mod error;
+mod lexer;
+mod span;
+
+use std::env;
+use std::fs;
+use std::process;
+
 fn main() {
-    println!("Hello, world!");
+    let args: Vec<String> = env::args().collect();
+
+    if args.len() < 3 {
+        eprintln!("usage: ol <command> <file>");
+        eprintln!("commands: tokenize");
+        process::exit(1);
+    }
+
+    let command = &args[1];
+    let filename = &args[2];
+
+    let source = match fs::read_to_string(filename) {
+        Ok(s) => s,
+        Err(e) => {
+            eprintln!(
+                "error reading '{filename}': {e}"
+            );
+            process::exit(1);
+        }
+    };
+
+    match command.as_str() {
+        "tokenize" => cmd_tokenize(&source),
+        _ => {
+            eprintln!("unknown command: {command}");
+            process::exit(1);
+        }
+    }
 }
+
+fn cmd_tokenize(source: &str) {
+    let mut lexer = lexer::Lexer::new(source);
+
+    match lexer.tokenize() {
+        Ok(tokens) => {
+            let parts: Vec<String> = tokens
+                .iter()
+                .map(|t| format!("{:?}", t.kind))
+                .collect();
+            println!("{}", parts.join(" "));
+        }
+        Err(e) => {
+            eprintln!("{e}");
+            process::exit(1);
+        }
+    }
+}
blob - /dev/null
blob + 2f8635a833b62092ba29d4dda0a37d510dd77cb1 (mode 644)
--- /dev/null
+++ src/lexer/mod.rs
@@ -0,0 +1,140 @@
+// vim: set tw=79 cc=80 ts=4 sw=4 sts=4 et :
+//
+// Copyright (c) 2025-2026 Murilo Ijanc' <murilo@ijanc.org>
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+pub mod token;
+
+use crate::error::OlangError;
+use crate::span::Span;
+use token::{Token, TokenKind};
+
+pub struct Lexer {
+    source: Vec<char>,
+    pos: usize,
+}
+
+impl Lexer {
+    pub fn new(source: &str) -> Self {
+        Self {
+            source: source.chars().collect(),
+            pos: 0,
+        }
+    }
+
+    fn peek(&self) -> Option<char> {
+        self.source.get(self.pos).copied()
+    }
+
+    fn advance(&mut self) -> Option<char> {
+        let ch = self.source.get(self.pos).copied();
+        self.pos += 1;
+        ch
+    }
+
+    fn skip_whitespace(&mut self) {
+        while let Some(ch) = self.peek() {
+            if ch.is_ascii_whitespace() {
+                self.advance();
+            } else {
+                break;
+            }
+        }
+    }
+
+    fn read_number(&mut self, start: usize) -> Token {
+        while let Some(ch) = self.peek() {
+            if ch.is_ascii_digit() {
+                self.advance();
+            } else {
+                break;
+            }
+        }
+
+        let text: String =
+            self.source[start..self.pos].iter().collect();
+        let value: i64 = text.parse().unwrap();
+
+        Token::new(
+            TokenKind::IntLit(value),
+            Span::new(start, self.pos),
+        )
+    }
+
+    pub fn tokenize(
+        &mut self,
+    ) -> Result<Vec<Token>, OlangError> {
+        let mut tokens = Vec::new();
+
+        loop {
+            self.skip_whitespace();
+            let start = self.pos;
+
+            let Some(ch) = self.advance() else {
+                tokens.push(Token::new(
+                    TokenKind::Eof,
+                    Span::new(start, start),
+                ));
+                break;
+            };
+
+            let token = match ch {
+                '+' => Token::new(
+                    TokenKind::Plus,
+                    Span::new(start, self.pos),
+                ),
+                '-' => Token::new(
+                    TokenKind::Minus,
+                    Span::new(start, self.pos),
+                ),
+                '*' => Token::new(
+                    TokenKind::Star,
+                    Span::new(start, self.pos),
+                ),
+                '/' => Token::new(
+                    TokenKind::Slash,
+                    Span::new(start, self.pos),
+                ),
+                '(' => Token::new(
+                    TokenKind::LParen,
+                    Span::new(start, self.pos),
+                ),
+                ')' => Token::new(
+                    TokenKind::RParen,
+                    Span::new(start, self.pos),
+                ),
+                ';' => Token::new(
+                    TokenKind::Semicolon,
+                    Span::new(start, self.pos),
+                ),
+                c if c.is_ascii_digit() => {
+                    self.read_number(start)
+                }
+                c => {
+                    return Err(OlangError::new(
+                        format!(
+                            "unexpected char '{c}'"
+                        ),
+                        Span::new(start, self.pos),
+                    ));
+                }
+            };
+
+            tokens.push(token);
+        }
+
+        Ok(tokens)
+    }
+}
blob - /dev/null
blob + 79d4169f9a10218c78f90f1669eb903f8e53a8cd (mode 644)
--- /dev/null
+++ src/lexer/token.rs
@@ -0,0 +1,43 @@
+// vim: set tw=79 cc=80 ts=4 sw=4 sts=4 et :
+//
+// Copyright (c) 2025-2026 Murilo Ijanc' <murilo@ijanc.org>
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+use crate::span::Span;
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum TokenKind {
+    IntLit(i64),
+    Plus,
+    Minus,
+    Star,
+    Slash,
+    LParen,
+    RParen,
+    Semicolon,
+    Eof,
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub struct Token {
+    pub kind: TokenKind,
+    pub span: Span,
+}
+
+impl Token {
+    pub fn new(kind: TokenKind, span: Span) -> Self {
+        Self { kind, span }
+    }
+}