use core_types::registry::types::SignedInteger; use core_types::table::{Table, TableRow}; use core_types::{ATTR_END, ATTR_NAME, ATTR_START, Ctx}; /// Checks whether the string contains a match for the given regular expression pattern. Optionally restricts the match to only the start and/or end of the string. #[node_macro::node(category("Text: Regex"))] fn regex_contains( _: impl Ctx, /// The string to search within. string: String, /// The regular expression pattern to search for. pattern: String, /// Match letters regardless of case. case_insensitive: bool, /// Make `^` and `$` match the start and end of each line, not just the whole string. multiline: bool, /// Only match if the pattern appears at the start of the string. at_start: bool, /// Only match if the pattern appears at the end of the string. at_end: bool, ) -> bool { let flags = match (case_insensitive, multiline) { (false, false) => "", (true, false) => "(?i)", (false, true) => "(?m)", (true, true) => "(?im)", }; let anchored_pattern = match (at_start, at_end) { (true, true) => format!("{flags}\\A(?:{pattern})\\z"), (true, false) => format!("{flags}\\A(?:{pattern})"), (false, true) => format!("{flags}(?:{pattern})\\z"), (false, false) => format!("{flags}{pattern}"), }; let Ok(regex) = fancy_regex::Regex::new(&anchored_pattern) else { log::error!("Invalid regex pattern: {pattern}"); return false; }; regex.is_match(&string).unwrap_or(false) } /// Replaces matches of a regular expression pattern in the string. The replacement string can reference captures: `$0` for the whole match and `$1`, `$2`, etc. for capture groups. #[node_macro::node(category("Text: Regex"))] fn regex_replace( _: impl Ctx, string: String, /// The regular expression pattern to search for. pattern: String, /// The replacement string. Use `$0` for the whole match and `$1`, `$2`, etc. for capture groups. replacement: String, /// Replace all matches. When disabled, only the first match is replaced. #[default(true)] replace_all: bool, /// Match letters regardless of case. case_insensitive: bool, /// Make `^` and `$` match the start and end of each line, not just the whole string. multiline: bool, ) -> String { let flags = match (case_insensitive, multiline) { (false, false) => "", (true, false) => "(?i)", (false, true) => "(?m)", (true, true) => "(?im)", }; let full_pattern = format!("{flags}{pattern}"); let Ok(regex) = fancy_regex::Regex::new(&full_pattern) else { log::warn!("Invalid regex pattern: {pattern}"); return string; }; if replace_all { regex.replace_all(&string, replacement.as_str()).into_owned() } else { regex.replace(&string, replacement.as_str()).into_owned() } } /// Finds a regex match in the string and returns its components. The result is a list where the first item is the whole match (`$0`) and subsequent items are the capture groups (`$1`, `$2`, etc., if any). /// /// The match index selects which non-overlapping occurrence to return (0 for the first match). Returns an empty list if no match is found at the given index. /// /// Each item carries `start` and `end` byte-offset attributes pointing into the original string, plus a `name` attribute holding /// the capture group's name (empty for unnamed groups, and for index 0 which is the whole match). #[node_macro::node(category(""))] fn regex_find( _: impl Ctx, /// The string to search within. string: String, /// The regular expression pattern to search for. pattern: String, /// Which non-overlapping occurrence of the pattern to return, starting from 0 for the first match. Negative indices count backwards from the last match. match_index: SignedInteger, /// Match letters regardless of case. case_insensitive: bool, /// Make `^` and `$` match the start and end of each line, not just the whole string. multiline: bool, ) -> Table { if pattern.is_empty() { return Table::new(); } let flags = match (case_insensitive, multiline) { (false, false) => "", (true, false) => "(?i)", (false, true) => "(?m)", (true, true) => "(?im)", }; let full_pattern = format!("{flags}{pattern}"); let Ok(regex) = fancy_regex::Regex::new(&full_pattern) else { log::error!("Invalid regex pattern: {pattern}"); return Table::new(); }; // Capture group names indexed positionally; index 0 (the whole match) is always None. let capture_names: Vec> = regex.capture_names().map(|name| name.map(str::to_string)).collect(); // Collect all matches since we need to support negative indexing let matches: Vec<_> = regex.captures_iter(&string).filter_map(|c| c.ok()).collect(); let match_index = match_index as i32; let resolved_index = if match_index < 0 { let from_end = (-match_index) as usize; if from_end > matches.len() { return Table::new(); } matches.len() - from_end } else { match_index as usize }; let Some(captures) = matches.get(resolved_index) else { return Table::new(); }; // Index 0 is the whole match, 1+ are capture groups (0..captures.len()) .map(|i| { let captured = captures.get(i); let text = captured.map_or(String::new(), |m| m.as_str().to_string()); let start = captured.map_or(0_u64, |m| m.start() as u64); let end = captured.map_or(0_u64, |m| m.end() as u64); let name = capture_names.get(i).cloned().flatten().unwrap_or_default(); TableRow::new_from_element(text) .with_attribute(ATTR_START, start) .with_attribute(ATTR_END, end) .with_attribute(ATTR_NAME, name) }) .collect() } /// Finds all non-overlapping matches of a regular expression pattern in the string, returning a list of the matched substrings. /// /// Each item carries `start` and `end` byte-offset attributes pointing into the original string. #[node_macro::node(category("Text: Regex"))] fn regex_find_all( _: impl Ctx, /// The string to search within. string: String, /// The regular expression pattern to search for. pattern: String, /// Match letters regardless of case. case_insensitive: bool, /// Make `^` and `$` match the start and end of each line, not just the whole string. multiline: bool, ) -> Table { if pattern.is_empty() { return Table::new(); } let flags = match (case_insensitive, multiline) { (false, false) => "", (true, false) => "(?i)", (false, true) => "(?m)", (true, true) => "(?im)", }; let full_pattern = format!("{flags}{pattern}"); let Ok(regex) = fancy_regex::Regex::new(&full_pattern) else { log::error!("Invalid regex pattern: {pattern}"); return Table::new(); }; regex .find_iter(&string) .filter_map(|m| m.ok()) .map(|m| { TableRow::new_from_element(m.as_str().to_string()) .with_attribute(ATTR_START, m.start() as u64) .with_attribute(ATTR_END, m.end() as u64) }) .collect() } /// Splits a string into a list of substrings pulled from between separator characters as matched by a regular expression. /// /// For example, splitting "Three, two, one... LIFTOFF" with pattern `\W+` (non-word characters) produces `["Three", "two", "one", "LIFTOFF"]`. #[node_macro::node(category("Text: Regex"))] fn regex_split( _: impl Ctx, /// The string to split into substrings. string: String, /// The regular expression pattern to split on. Matches are consumed and not included in the output. pattern: String, /// Match letters regardless of case. case_insensitive: bool, /// Make `^` and `$` match the start and end of each line, not just the whole string. multiline: bool, ) -> Table { if pattern.is_empty() { return Table::new_from_element(string); } let flags = match (case_insensitive, multiline) { (false, false) => "", (true, false) => "(?i)", (false, true) => "(?m)", (true, true) => "(?im)", }; let full_pattern = format!("{flags}{pattern}"); let Ok(regex) = fancy_regex::Regex::new(&full_pattern) else { log::error!("Invalid regex pattern: {pattern}"); return Table::new_from_element(string); }; regex.split(&string).filter_map(|s| s.ok()).map(|s| s.to_string()).map(TableRow::new_from_element).collect() }