#[derive(Debug, Clone, PartialEq)] pub(crate) enum Token { Num(f64), Ident(String), Plus, Minus, Star, Slash, LParen, RParen, LBrace, RBrace, Comma, Caret, Eq, Semi, Colon, Newline, Dot, DotDot, Percent, } pub(crate) fn tokenize(input: &str) -> Result<(Vec, Vec), String> { let mut tokens = Vec::new(); let mut lines = Vec::new(); let mut line: usize = 1; let mut chars = input.chars().peekable(); while let Some(&c) = chars.peek() { match c { ' ' | '\t' | '\r' => { chars.next(); } '\n' => { chars.next(); if !matches!(tokens.last(), Some(Token::Newline)) { tokens.push(Token::Newline); lines.push(line); } line += 1; } '+' => { tokens.push(Token::Plus); lines.push(line); chars.next(); } '-' => { tokens.push(Token::Minus); lines.push(line); chars.next(); } '*' => { tokens.push(Token::Star); lines.push(line); chars.next(); } '/' => { chars.next(); match chars.peek() { Some('/') | Some('=') => { while let Some(&c) = chars.peek() { chars.next(); if c == '\n' { line += 1; break; } } } Some('*') => { chars.next(); let mut depth = 1u32; while depth > 0 { match chars.next() { Some('\n') => line += 1, Some('*') if chars.peek() == Some(&'/') => { chars.next(); depth -= 1; } Some('/') if chars.peek() == Some(&'*') => { chars.next(); depth += 1; } None => break, _ => {} } } } _ => { tokens.push(Token::Slash); lines.push(line); } } } '(' => { tokens.push(Token::LParen); lines.push(line); chars.next(); } ')' => { tokens.push(Token::RParen); lines.push(line); chars.next(); } '{' => { tokens.push(Token::LBrace); lines.push(line); chars.next(); } '}' => { tokens.push(Token::RBrace); lines.push(line); chars.next(); } ',' => { tokens.push(Token::Comma); lines.push(line); chars.next(); } '^' => { tokens.push(Token::Caret); lines.push(line); chars.next(); } '%' => { tokens.push(Token::Percent); lines.push(line); chars.next(); } '=' => { tokens.push(Token::Eq); lines.push(line); chars.next(); } ';' => { tokens.push(Token::Semi); lines.push(line); chars.next(); } ':' => { tokens.push(Token::Colon); lines.push(line); chars.next(); } '.' => { chars.next(); if chars.peek() == Some(&'.') { chars.next(); tokens.push(Token::DotDot); lines.push(line); } else if chars.peek().map_or(false, |c| c.is_ascii_digit()) { let mut num_str = String::from("0."); while let Some(&c) = chars.peek() { if c.is_ascii_digit() || c == '.' { num_str.push(c); chars.next(); } else { break; } } let val: f64 = num_str.parse() .map_err(|_| format!("invalid number: {num_str}"))?; tokens.push(Token::Num(val)); lines.push(line); } else { tokens.push(Token::Dot); lines.push(line); } } '0'..='9' => { let mut num_str = String::new(); let mut has_dot = false; while let Some(&c) = chars.peek() { if c.is_ascii_digit() { num_str.push(c); chars.next(); } else if c == '.' && !has_dot { let mut lookahead = chars.clone(); lookahead.next(); if lookahead.peek().map_or(false, |c| c.is_ascii_digit()) { has_dot = true; num_str.push(c); chars.next(); } else { break; } } else { break; } } let val: f64 = num_str.parse() .map_err(|_| format!("invalid number: {num_str}"))?; tokens.push(Token::Num(val)); lines.push(line); } 'a'..='z' | 'A'..='Z' | '_' => { let mut name = String::new(); while let Some(&c) = chars.peek() { if c.is_alphanumeric() || c == '_' { name.push(c); chars.next(); } else { break; } } tokens.push(Token::Ident(name)); lines.push(line); } _ => return Err(format!("unexpected character: '{c}'")), } } Ok((tokens, lines)) } pub(crate) fn merge_ngon_tokens_with_lines(tokens: &mut Vec, lines: &mut Vec) { let mut i = 0; while i + 2 < tokens.len() { let merge = if let (Token::Num(n), Token::Minus, Token::Ident(s)) = (&tokens[i], &tokens[i + 1], &tokens[i + 2]) { if s == "gon" && *n >= 3.0 && *n == (*n as u32 as f64) { Some(*n as u32) } else { None } } else { None }; if let Some(n) = merge { tokens[i] = Token::Ident(format!("{n}gon")); tokens.remove(i + 2); lines.remove(i + 2); tokens.remove(i + 1); lines.remove(i + 1); } else { i += 1; } } } #[cfg(test)] mod tests { use crate::*; #[test] fn dot_syntax_tokenizer() { let (tokens, _) = tokenize("a.cast()").unwrap(); assert_eq!(tokens.len(), 5); assert!(matches!(&tokens[0], Token::Ident(n) if n == "a")); assert!(matches!(&tokens[1], Token::Dot)); assert!(matches!(&tokens[2], Token::Ident(n) if n == "cast")); assert!(matches!(&tokens[3], Token::LParen)); assert!(matches!(&tokens[4], Token::RParen)); } }