Cord/crates/cord-expr/src/token.rs

191 lines
6.9 KiB
Rust

#[derive(Debug, Clone, PartialEq)]
pub(crate) enum Token {
Num(f64),
Ident(String),
Plus,
Minus,
Star,
Slash,
LParen,
RParen,
LBrace,
RBrace,
Comma,
Caret,
Eq,
Semi,
Colon,
Newline,
Dot,
DotDot,
Percent,
}
pub(crate) fn tokenize(input: &str) -> Result<(Vec<Token>, Vec<usize>), String> {
let mut tokens = Vec::new();
let mut lines = Vec::new();
let mut line: usize = 1;
let mut chars = input.chars().peekable();
while let Some(&c) = chars.peek() {
match c {
' ' | '\t' | '\r' => { chars.next(); }
'\n' => {
chars.next();
if !matches!(tokens.last(), Some(Token::Newline)) {
tokens.push(Token::Newline);
lines.push(line);
}
line += 1;
}
'+' => { tokens.push(Token::Plus); lines.push(line); chars.next(); }
'-' => { tokens.push(Token::Minus); lines.push(line); chars.next(); }
'*' => { tokens.push(Token::Star); lines.push(line); chars.next(); }
'/' => {
chars.next();
match chars.peek() {
Some('/') | Some('=') => {
while let Some(&c) = chars.peek() {
chars.next();
if c == '\n' { line += 1; break; }
}
}
Some('*') => {
chars.next();
let mut depth = 1u32;
while depth > 0 {
match chars.next() {
Some('\n') => line += 1,
Some('*') if chars.peek() == Some(&'/') => {
chars.next();
depth -= 1;
}
Some('/') if chars.peek() == Some(&'*') => {
chars.next();
depth += 1;
}
None => break,
_ => {}
}
}
}
_ => { tokens.push(Token::Slash); lines.push(line); }
}
}
'(' => { tokens.push(Token::LParen); lines.push(line); chars.next(); }
')' => { tokens.push(Token::RParen); lines.push(line); chars.next(); }
'{' => { tokens.push(Token::LBrace); lines.push(line); chars.next(); }
'}' => { tokens.push(Token::RBrace); lines.push(line); chars.next(); }
',' => { tokens.push(Token::Comma); lines.push(line); chars.next(); }
'^' => { tokens.push(Token::Caret); lines.push(line); chars.next(); }
'%' => { tokens.push(Token::Percent); lines.push(line); chars.next(); }
'=' => { tokens.push(Token::Eq); lines.push(line); chars.next(); }
';' => { tokens.push(Token::Semi); lines.push(line); chars.next(); }
':' => { tokens.push(Token::Colon); lines.push(line); chars.next(); }
'.' => {
chars.next();
if chars.peek() == Some(&'.') {
chars.next();
tokens.push(Token::DotDot); lines.push(line);
} else if chars.peek().map_or(false, |c| c.is_ascii_digit()) {
let mut num_str = String::from("0.");
while let Some(&c) = chars.peek() {
if c.is_ascii_digit() || c == '.' {
num_str.push(c);
chars.next();
} else {
break;
}
}
let val: f64 = num_str.parse()
.map_err(|_| format!("invalid number: {num_str}"))?;
tokens.push(Token::Num(val)); lines.push(line);
} else {
tokens.push(Token::Dot); lines.push(line);
}
}
'0'..='9' => {
let mut num_str = String::new();
let mut has_dot = false;
while let Some(&c) = chars.peek() {
if c.is_ascii_digit() {
num_str.push(c);
chars.next();
} else if c == '.' && !has_dot {
let mut lookahead = chars.clone();
lookahead.next();
if lookahead.peek().map_or(false, |c| c.is_ascii_digit()) {
has_dot = true;
num_str.push(c);
chars.next();
} else {
break;
}
} else {
break;
}
}
let val: f64 = num_str.parse()
.map_err(|_| format!("invalid number: {num_str}"))?;
tokens.push(Token::Num(val)); lines.push(line);
}
'a'..='z' | 'A'..='Z' | '_' => {
let mut name = String::new();
while let Some(&c) = chars.peek() {
if c.is_alphanumeric() || c == '_' {
name.push(c);
chars.next();
} else {
break;
}
}
tokens.push(Token::Ident(name)); lines.push(line);
}
_ => return Err(format!("unexpected character: '{c}'")),
}
}
Ok((tokens, lines))
}
pub(crate) fn merge_ngon_tokens_with_lines(tokens: &mut Vec<Token>, lines: &mut Vec<usize>) {
let mut i = 0;
while i + 2 < tokens.len() {
let merge = if let (Token::Num(n), Token::Minus, Token::Ident(s)) =
(&tokens[i], &tokens[i + 1], &tokens[i + 2])
{
if s == "gon" && *n >= 3.0 && *n == (*n as u32 as f64) {
Some(*n as u32)
} else {
None
}
} else {
None
};
if let Some(n) = merge {
tokens[i] = Token::Ident(format!("{n}gon"));
tokens.remove(i + 2); lines.remove(i + 2);
tokens.remove(i + 1); lines.remove(i + 1);
} else {
i += 1;
}
}
}
#[cfg(test)]
mod tests {
use crate::*;
#[test]
fn dot_syntax_tokenizer() {
let (tokens, _) = tokenize("a.cast()").unwrap();
assert_eq!(tokens.len(), 5);
assert!(matches!(&tokens[0], Token::Ident(n) if n == "a"));
assert!(matches!(&tokens[1], Token::Dot));
assert!(matches!(&tokens[2], Token::Ident(n) if n == "cast"));
assert!(matches!(&tokens[3], Token::LParen));
assert!(matches!(&tokens[4], Token::RParen));
}
}