191 lines
6.9 KiB
Rust
191 lines
6.9 KiB
Rust
#[derive(Debug, Clone, PartialEq)]
|
|
pub(crate) enum Token {
|
|
Num(f64),
|
|
Ident(String),
|
|
Plus,
|
|
Minus,
|
|
Star,
|
|
Slash,
|
|
LParen,
|
|
RParen,
|
|
LBrace,
|
|
RBrace,
|
|
Comma,
|
|
Caret,
|
|
Eq,
|
|
Semi,
|
|
Colon,
|
|
Newline,
|
|
Dot,
|
|
DotDot,
|
|
Percent,
|
|
}
|
|
|
|
pub(crate) fn tokenize(input: &str) -> Result<(Vec<Token>, Vec<usize>), String> {
|
|
let mut tokens = Vec::new();
|
|
let mut lines = Vec::new();
|
|
let mut line: usize = 1;
|
|
let mut chars = input.chars().peekable();
|
|
|
|
while let Some(&c) = chars.peek() {
|
|
match c {
|
|
' ' | '\t' | '\r' => { chars.next(); }
|
|
'\n' => {
|
|
chars.next();
|
|
if !matches!(tokens.last(), Some(Token::Newline)) {
|
|
tokens.push(Token::Newline);
|
|
lines.push(line);
|
|
}
|
|
line += 1;
|
|
}
|
|
'+' => { tokens.push(Token::Plus); lines.push(line); chars.next(); }
|
|
'-' => { tokens.push(Token::Minus); lines.push(line); chars.next(); }
|
|
'*' => { tokens.push(Token::Star); lines.push(line); chars.next(); }
|
|
'/' => {
|
|
chars.next();
|
|
match chars.peek() {
|
|
Some('/') | Some('=') => {
|
|
while let Some(&c) = chars.peek() {
|
|
chars.next();
|
|
if c == '\n' { line += 1; break; }
|
|
}
|
|
}
|
|
Some('*') => {
|
|
chars.next();
|
|
let mut depth = 1u32;
|
|
while depth > 0 {
|
|
match chars.next() {
|
|
Some('\n') => line += 1,
|
|
Some('*') if chars.peek() == Some(&'/') => {
|
|
chars.next();
|
|
depth -= 1;
|
|
}
|
|
Some('/') if chars.peek() == Some(&'*') => {
|
|
chars.next();
|
|
depth += 1;
|
|
}
|
|
None => break,
|
|
_ => {}
|
|
}
|
|
}
|
|
}
|
|
_ => { tokens.push(Token::Slash); lines.push(line); }
|
|
}
|
|
}
|
|
'(' => { tokens.push(Token::LParen); lines.push(line); chars.next(); }
|
|
')' => { tokens.push(Token::RParen); lines.push(line); chars.next(); }
|
|
'{' => { tokens.push(Token::LBrace); lines.push(line); chars.next(); }
|
|
'}' => { tokens.push(Token::RBrace); lines.push(line); chars.next(); }
|
|
',' => { tokens.push(Token::Comma); lines.push(line); chars.next(); }
|
|
'^' => { tokens.push(Token::Caret); lines.push(line); chars.next(); }
|
|
'%' => { tokens.push(Token::Percent); lines.push(line); chars.next(); }
|
|
'=' => { tokens.push(Token::Eq); lines.push(line); chars.next(); }
|
|
';' => { tokens.push(Token::Semi); lines.push(line); chars.next(); }
|
|
':' => { tokens.push(Token::Colon); lines.push(line); chars.next(); }
|
|
'.' => {
|
|
chars.next();
|
|
if chars.peek() == Some(&'.') {
|
|
chars.next();
|
|
tokens.push(Token::DotDot); lines.push(line);
|
|
} else if chars.peek().map_or(false, |c| c.is_ascii_digit()) {
|
|
let mut num_str = String::from("0.");
|
|
while let Some(&c) = chars.peek() {
|
|
if c.is_ascii_digit() || c == '.' {
|
|
num_str.push(c);
|
|
chars.next();
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
let val: f64 = num_str.parse()
|
|
.map_err(|_| format!("invalid number: {num_str}"))?;
|
|
tokens.push(Token::Num(val)); lines.push(line);
|
|
} else {
|
|
tokens.push(Token::Dot); lines.push(line);
|
|
}
|
|
}
|
|
'0'..='9' => {
|
|
let mut num_str = String::new();
|
|
let mut has_dot = false;
|
|
while let Some(&c) = chars.peek() {
|
|
if c.is_ascii_digit() {
|
|
num_str.push(c);
|
|
chars.next();
|
|
} else if c == '.' && !has_dot {
|
|
let mut lookahead = chars.clone();
|
|
lookahead.next();
|
|
if lookahead.peek().map_or(false, |c| c.is_ascii_digit()) {
|
|
has_dot = true;
|
|
num_str.push(c);
|
|
chars.next();
|
|
} else {
|
|
break;
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
let val: f64 = num_str.parse()
|
|
.map_err(|_| format!("invalid number: {num_str}"))?;
|
|
tokens.push(Token::Num(val)); lines.push(line);
|
|
}
|
|
'a'..='z' | 'A'..='Z' | '_' => {
|
|
let mut name = String::new();
|
|
while let Some(&c) = chars.peek() {
|
|
if c.is_alphanumeric() || c == '_' {
|
|
name.push(c);
|
|
chars.next();
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
tokens.push(Token::Ident(name)); lines.push(line);
|
|
}
|
|
_ => return Err(format!("unexpected character: '{c}'")),
|
|
}
|
|
}
|
|
|
|
Ok((tokens, lines))
|
|
}
|
|
|
|
pub(crate) fn merge_ngon_tokens_with_lines(tokens: &mut Vec<Token>, lines: &mut Vec<usize>) {
|
|
let mut i = 0;
|
|
while i + 2 < tokens.len() {
|
|
let merge = if let (Token::Num(n), Token::Minus, Token::Ident(s)) =
|
|
(&tokens[i], &tokens[i + 1], &tokens[i + 2])
|
|
{
|
|
if s == "gon" && *n >= 3.0 && *n == (*n as u32 as f64) {
|
|
Some(*n as u32)
|
|
} else {
|
|
None
|
|
}
|
|
} else {
|
|
None
|
|
};
|
|
|
|
if let Some(n) = merge {
|
|
tokens[i] = Token::Ident(format!("{n}gon"));
|
|
tokens.remove(i + 2); lines.remove(i + 2);
|
|
tokens.remove(i + 1); lines.remove(i + 1);
|
|
} else {
|
|
i += 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use crate::*;
|
|
|
|
#[test]
|
|
fn dot_syntax_tokenizer() {
|
|
let (tokens, _) = tokenize("a.cast()").unwrap();
|
|
assert_eq!(tokens.len(), 5);
|
|
assert!(matches!(&tokens[0], Token::Ident(n) if n == "a"));
|
|
assert!(matches!(&tokens[1], Token::Dot));
|
|
assert!(matches!(&tokens[2], Token::Ident(n) if n == "cast"));
|
|
assert!(matches!(&tokens[3], Token::LParen));
|
|
assert!(matches!(&tokens[4], Token::RParen));
|
|
}
|
|
}
|