commit - 6004e88422764c3bf275c6a4f1be37220f847d97
commit + 8b5123e6050f1d24f2f6f3a000fef4665b878a2b
blob - 2f8635a833b62092ba29d4dda0a37d510dd77cb1
blob + 2dd48afc7d320f5e104d79c933f6e2527a264729
--- src/lexer/mod.rs
+++ src/lexer/mod.rs
pub struct Lexer {
source: Vec<char>,
pos: usize,
+ line: usize,
+ col: usize,
}
impl Lexer {
Self {
source: source.chars().collect(),
pos: 0,
+ line: 1,
+ col: 1,
}
}
fn advance(&mut self) -> Option<char> {
let ch = self.source.get(self.pos).copied();
+ if let Some(c) = ch {
+ if c == '\n' {
+ self.line += 1;
+ self.col = 1;
+ } else {
+ self.col += 1;
+ }
+ }
self.pos += 1;
ch
}
+ fn match_next(&mut self, expected: char) -> bool {
+ if self.peek() == Some(expected) {
+ self.advance();
+ true
+ } else {
+ false
+ }
+ }
+
fn skip_whitespace(&mut self) {
while let Some(ch) = self.peek() {
if ch.is_ascii_whitespace() {
}
}
+ fn skip_comment(&mut self) {
+ while let Some(ch) = self.peek() {
+ if ch == '\n' {
+ break;
+ }
+ self.advance();
+ }
+ }
+
fn read_number(&mut self, start: usize) -> Token {
while let Some(ch) = self.peek() {
if ch.is_ascii_digit() {
}
}
+ if self.peek() == Some('.') {
+ self.advance();
+ while let Some(ch) = self.peek() {
+ if ch.is_ascii_digit() {
+ self.advance();
+ } else {
+ break;
+ }
+ }
+ let text: String =
+ self.source[start..self.pos].iter().collect();
+ let value: f64 = text.parse().unwrap();
+ return Token::new(
+ TokenKind::FloatLit(value),
+ Span::new(start, self.pos),
+ );
+ }
+
let text: String =
self.source[start..self.pos].iter().collect();
let value: i64 = text.parse().unwrap();
-
Token::new(
TokenKind::IntLit(value),
Span::new(start, self.pos),
)
}
+ fn read_string(
+ &mut self,
+ start: usize,
+ ) -> Result<Token, OlangError> {
+ let mut value = String::new();
+
+ loop {
+ match self.advance() {
+ None => {
+ return Err(OlangError::new(
+ "unterminated string",
+ Span::new(start, self.pos),
+ ));
+ }
+ Some('"') => break,
+ Some('\\') => match self.advance() {
+ Some('n') => value.push('\n'),
+ Some('t') => value.push('\t'),
+ Some('\\') => value.push('\\'),
+ Some('"') => value.push('"'),
+ Some(c) => {
+ return Err(OlangError::new(
+ format!("invalid escape '\\{c}'"),
+ Span::new(self.pos - 2, self.pos),
+ ));
+ }
+ None => {
+ return Err(OlangError::new(
+ "unterminated escape",
+ Span::new(start, self.pos),
+ ));
+ }
+ },
+ Some(c) => value.push(c),
+ }
+ }
+
+ Ok(Token::new(
+ TokenKind::StrLit(value),
+ Span::new(start, self.pos),
+ ))
+ }
+
+ fn read_identifier(&mut self, start: usize) -> Token {
+ while let Some(ch) = self.peek() {
+ if ch.is_ascii_alphanumeric() || ch == '_' {
+ self.advance();
+ } else {
+ break;
+ }
+ }
+
+ let text: String =
+ self.source[start..self.pos].iter().collect();
+ let span = Span::new(start, self.pos);
+
+ let kind = match text.as_str() {
+ "let" => TokenKind::Let,
+ "mut" => TokenKind::Mut,
+ "fn" => TokenKind::Fn,
+ "if" => TokenKind::If,
+ "else" => TokenKind::Else,
+ "for" => TokenKind::For,
+ "in" => TokenKind::In,
+ "while" => TokenKind::While,
+ "return" => TokenKind::Return,
+ "print" => TokenKind::Print,
+ "range" => TokenKind::Range,
+ "true" => TokenKind::BoolLit(true),
+ "false" => TokenKind::BoolLit(false),
+ "int" => TokenKind::IntType,
+ "float" => TokenKind::FloatType,
+ "str" => TokenKind::StrType,
+ "bool" => TokenKind::BoolType,
+ _ => TokenKind::Ident(text),
+ };
+
+ Token::new(kind, span)
+ }
+
pub fn tokenize(
&mut self,
) -> Result<Vec<Token>, OlangError> {
loop {
self.skip_whitespace();
+
let start = self.pos;
let Some(ch) = self.advance() else {
break;
};
+ if ch == '#' {
+ self.skip_comment();
+ continue;
+ }
+
let token = match ch {
'+' => Token::new(
TokenKind::Plus,
Span::new(start, self.pos),
),
- '-' => Token::new(
- TokenKind::Minus,
- Span::new(start, self.pos),
- ),
'*' => Token::new(
TokenKind::Star,
Span::new(start, self.pos),
TokenKind::Slash,
Span::new(start, self.pos),
),
+ '%' => Token::new(
+ TokenKind::Percent,
+ Span::new(start, self.pos),
+ ),
'(' => Token::new(
TokenKind::LParen,
Span::new(start, self.pos),
TokenKind::RParen,
Span::new(start, self.pos),
),
+ '{' => Token::new(
+ TokenKind::LBrace,
+ Span::new(start, self.pos),
+ ),
+ '}' => Token::new(
+ TokenKind::RBrace,
+ Span::new(start, self.pos),
+ ),
+ ',' => Token::new(
+ TokenKind::Comma,
+ Span::new(start, self.pos),
+ ),
+ ':' => Token::new(
+ TokenKind::Colon,
+ Span::new(start, self.pos),
+ ),
';' => Token::new(
TokenKind::Semicolon,
Span::new(start, self.pos),
),
+ '-' => {
+ if self.match_next('>') {
+ Token::new(
+ TokenKind::Arrow,
+ Span::new(start, self.pos),
+ )
+ } else {
+ Token::new(
+ TokenKind::Minus,
+ Span::new(start, self.pos),
+ )
+ }
+ }
+ '=' => {
+ if self.match_next('=') {
+ Token::new(
+ TokenKind::EqEq,
+ Span::new(start, self.pos),
+ )
+ } else {
+ Token::new(
+ TokenKind::Eq,
+ Span::new(start, self.pos),
+ )
+ }
+ }
+ '!' => {
+ if self.match_next('=') {
+ Token::new(
+ TokenKind::BangEq,
+ Span::new(start, self.pos),
+ )
+ } else {
+ return Err(OlangError::new(
+ format!(
+ "unexpected char '!' at {}:{}",
+ self.line, self.col
+ ),
+ Span::new(start, self.pos),
+ ));
+ }
+ }
+ '<' => {
+ if self.match_next('=') {
+ Token::new(
+ TokenKind::LessEq,
+ Span::new(start, self.pos),
+ )
+ } else {
+ Token::new(
+ TokenKind::Less,
+ Span::new(start, self.pos),
+ )
+ }
+ }
+ '>' => {
+ if self.match_next('=') {
+ Token::new(
+ TokenKind::GreaterEq,
+ Span::new(start, self.pos),
+ )
+ } else {
+ Token::new(
+ TokenKind::Greater,
+ Span::new(start, self.pos),
+ )
+ }
+ }
+ '"' => self.read_string(start)?,
c if c.is_ascii_digit() => {
self.read_number(start)
}
+ c if c.is_ascii_alphabetic() || c == '_' => {
+ self.read_identifier(start)
+ }
c => {
return Err(OlangError::new(
format!(
- "unexpected char '{c}'"
+ "unexpected char '{c}' at {}:{}",
+ self.line, self.col
),
Span::new(start, self.pos),
));
blob - 79d4169f9a10218c78f90f1669eb903f8e53a8cd
blob + 475f5863b5f3a624a4344329bd3dc0061f7112a5
--- src/lexer/token.rs
+++ src/lexer/token.rs
#[derive(Debug, Clone, PartialEq)]
pub enum TokenKind {
+ // Literals
IntLit(i64),
+ FloatLit(f64),
+ StrLit(String),
+ BoolLit(bool),
+ Ident(String),
+
+ // Operators
Plus,
Minus,
Star,
Slash,
+ Percent,
+ Eq,
+ EqEq,
+ BangEq,
+ Less,
+ LessEq,
+ Greater,
+ GreaterEq,
+ Arrow,
+
+ // Delimiters
LParen,
RParen,
+ LBrace,
+ RBrace,
+ Comma,
+ Colon,
Semicolon,
+
+ // Keywords
+ Let,
+ Mut,
+ Fn,
+ If,
+ Else,
+ For,
+ In,
+ While,
+ Return,
+ Print,
+ Range,
+
+ // Types
+ IntType,
+ FloatType,
+ StrType,
+ BoolType,
+
Eof,
}