commit - 2f626af47071623ff393a568fdf765526342fb27
commit + 6004e88422764c3bf275c6a4f1be37220f847d97
blob - cc1182a89a75f06c6d393998c54b36e0df043a9f
blob + e91891d1c7e74a8aa6eb5a5fd993596ad05f0eb4
--- src/main.rs
+++ src/main.rs
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
+mod error;
+mod lexer;
+mod span;
+
+use std::env;
+use std::fs;
+use std::process;
+
fn main() {
- println!("Hello, world!");
+ let args: Vec<String> = env::args().collect();
+
+ if args.len() < 3 {
+ eprintln!("usage: ol <command> <file>");
+ eprintln!("commands: tokenize");
+ process::exit(1);
+ }
+
+ let command = &args[1];
+ let filename = &args[2];
+
+ let source = match fs::read_to_string(filename) {
+ Ok(s) => s,
+ Err(e) => {
+ eprintln!(
+ "error reading '{filename}': {e}"
+ );
+ process::exit(1);
+ }
+ };
+
+ match command.as_str() {
+ "tokenize" => cmd_tokenize(&source),
+ _ => {
+ eprintln!("unknown command: {command}");
+ process::exit(1);
+ }
+ }
}
+
+fn cmd_tokenize(source: &str) {
+ let mut lexer = lexer::Lexer::new(source);
+
+ match lexer.tokenize() {
+ Ok(tokens) => {
+ let parts: Vec<String> = tokens
+ .iter()
+ .map(|t| format!("{:?}", t.kind))
+ .collect();
+ println!("{}", parts.join(" "));
+ }
+ Err(e) => {
+ eprintln!("{e}");
+ process::exit(1);
+ }
+ }
+}
blob - /dev/null
blob + 2f8635a833b62092ba29d4dda0a37d510dd77cb1 (mode 644)
--- /dev/null
+++ src/lexer/mod.rs
+// vim: set tw=79 cc=80 ts=4 sw=4 sts=4 et :
+//
+// Copyright (c) 2025-2026 Murilo Ijanc' <murilo@ijanc.org>
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+pub mod token;
+
+use crate::error::OlangError;
+use crate::span::Span;
+use token::{Token, TokenKind};
+
+pub struct Lexer {
+ source: Vec<char>,
+ pos: usize,
+}
+
+impl Lexer {
+ pub fn new(source: &str) -> Self {
+ Self {
+ source: source.chars().collect(),
+ pos: 0,
+ }
+ }
+
+ fn peek(&self) -> Option<char> {
+ self.source.get(self.pos).copied()
+ }
+
+ fn advance(&mut self) -> Option<char> {
+ let ch = self.source.get(self.pos).copied();
+ self.pos += 1;
+ ch
+ }
+
+ fn skip_whitespace(&mut self) {
+ while let Some(ch) = self.peek() {
+ if ch.is_ascii_whitespace() {
+ self.advance();
+ } else {
+ break;
+ }
+ }
+ }
+
+ fn read_number(&mut self, start: usize) -> Token {
+ while let Some(ch) = self.peek() {
+ if ch.is_ascii_digit() {
+ self.advance();
+ } else {
+ break;
+ }
+ }
+
+ let text: String =
+ self.source[start..self.pos].iter().collect();
+ let value: i64 = text.parse().unwrap();
+
+ Token::new(
+ TokenKind::IntLit(value),
+ Span::new(start, self.pos),
+ )
+ }
+
+ pub fn tokenize(
+ &mut self,
+ ) -> Result<Vec<Token>, OlangError> {
+ let mut tokens = Vec::new();
+
+ loop {
+ self.skip_whitespace();
+ let start = self.pos;
+
+ let Some(ch) = self.advance() else {
+ tokens.push(Token::new(
+ TokenKind::Eof,
+ Span::new(start, start),
+ ));
+ break;
+ };
+
+ let token = match ch {
+ '+' => Token::new(
+ TokenKind::Plus,
+ Span::new(start, self.pos),
+ ),
+ '-' => Token::new(
+ TokenKind::Minus,
+ Span::new(start, self.pos),
+ ),
+ '*' => Token::new(
+ TokenKind::Star,
+ Span::new(start, self.pos),
+ ),
+ '/' => Token::new(
+ TokenKind::Slash,
+ Span::new(start, self.pos),
+ ),
+ '(' => Token::new(
+ TokenKind::LParen,
+ Span::new(start, self.pos),
+ ),
+ ')' => Token::new(
+ TokenKind::RParen,
+ Span::new(start, self.pos),
+ ),
+ ';' => Token::new(
+ TokenKind::Semicolon,
+ Span::new(start, self.pos),
+ ),
+ c if c.is_ascii_digit() => {
+ self.read_number(start)
+ }
+ c => {
+ return Err(OlangError::new(
+ format!(
+ "unexpected char '{c}'"
+ ),
+ Span::new(start, self.pos),
+ ));
+ }
+ };
+
+ tokens.push(token);
+ }
+
+ Ok(tokens)
+ }
+}
blob - /dev/null
blob + 79d4169f9a10218c78f90f1669eb903f8e53a8cd (mode 644)
--- /dev/null
+++ src/lexer/token.rs
+// vim: set tw=79 cc=80 ts=4 sw=4 sts=4 et :
+//
+// Copyright (c) 2025-2026 Murilo Ijanc' <murilo@ijanc.org>
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+use crate::span::Span;
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum TokenKind {
+ IntLit(i64),
+ Plus,
+ Minus,
+ Star,
+ Slash,
+ LParen,
+ RParen,
+ Semicolon,
+ Eof,
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub struct Token {
+ pub kind: TokenKind,
+ pub span: Span,
+}
+
+impl Token {
+ pub fn new(kind: TokenKind, span: Span) -> Self {
+ Self { kind, span }
+ }
+}