fix block comment parsing with proper nesting support

This commit is contained in:
jess 2026-04-05 05:29:00 -07:00
parent 80017cf84f
commit 23594548ed
1 changed files with 42 additions and 20 deletions

View File

@ -88,36 +88,40 @@ fn is_ident(s: &str) -> bool {
pub fn classify_document(text: &str) -> Vec<ClassifiedLine> {
let mut result = Vec::new();
let mut in_block_comment = false;
let mut depth: usize = 0;
for (i, line) in text.lines().enumerate() {
if in_block_comment {
let cl = ClassifiedLine { index: i, kind: LineKind::Comment, content: line.to_string() };
if line.contains("*/") {
in_block_comment = false;
}
result.push(cl);
continue;
}
let was_in_comment = depth > 0;
depth = scan_comment_depth(line, depth);
let trimmed = line.trim();
if trimmed.starts_with("/*") {
if trimmed.contains("*/") && trimmed.find("*/").unwrap() > trimmed.find("/*").unwrap() {
// single-line block comment
} else {
in_block_comment = true;
}
if was_in_comment || line.trim().starts_with("/*") {
result.push(ClassifiedLine { index: i, kind: LineKind::Comment, content: line.to_string() });
continue;
} else {
result.push(classify_line(i, line));
}
result.push(classify_line(i, line));
}
result
}
fn scan_comment_depth(line: &str, mut depth: usize) -> usize {
let bytes = line.as_bytes();
let len = bytes.len();
let mut i = 0;
while i < len.saturating_sub(1) {
if bytes[i] == b'/' && bytes[i + 1] == b'*' {
depth += 1;
i += 2;
} else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
depth = depth.saturating_sub(1);
i += 2;
} else {
i += 1;
}
}
depth
}
#[cfg(test)]
mod tests {
use super::*;
@ -199,4 +203,22 @@ mod tests {
assert_eq!(lines[0].kind, LineKind::Comment);
assert_eq!(lines[1].kind, LineKind::Eval);
}
#[test]
fn nested_block_comments() {
let lines = classify_document("/* outer /* inner */ still comment */\nlet x = 5");
assert_eq!(lines[0].kind, LineKind::Comment);
assert_eq!(lines[1].kind, LineKind::Cordial);
}
#[test]
fn nested_multiline_block_comments() {
let doc = "/* outer\n/* inner */\nstill in outer\n*/\nlet x = 5";
let lines = classify_document(doc);
assert_eq!(lines[0].kind, LineKind::Comment);
assert_eq!(lines[1].kind, LineKind::Comment);
assert_eq!(lines[2].kind, LineKind::Comment);
assert_eq!(lines[3].kind, LineKind::Comment);
assert_eq!(lines[4].kind, LineKind::Cordial);
}
}