New nodes: 'Regex Contains', 'Regex Replace', 'Regex Find', 'Regex Find All', and 'Regex Split' (#4045)

* Add a family of 5 regex nodes for string processing

* Fix unstaged change
This commit is contained in:
Keavon Chambers 2026-04-24 00:55:24 -07:00 committed by GitHub
parent c32c808d5b
commit c9c76df40c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 394 additions and 2 deletions

16
Cargo.lock generated
View File

@ -1399,6 +1399,17 @@ dependencies = [
"num-traits",
]
[[package]]
name = "fancy-regex"
version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1e1dacd0d2082dfcf1351c4bdd566bbe89a2b263235a2b50058f1e130a47277"
dependencies = [
"bit-set",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "fastnoise-lite"
version = "1.1.1"
@ -4452,9 +4463,9 @@ dependencies = [
[[package]]
name = "regex-automata"
version = "0.4.10"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6"
checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
dependencies = [
"aho-corasick",
"memchr",
@ -5500,6 +5511,7 @@ dependencies = [
"convert_case 0.8.0",
"core-types",
"dyn-any",
"fancy-regex",
"glam",
"log",
"node-macro",

View File

@ -107,6 +107,7 @@ bitflags = { version = "2.4", features = ["serde"] }
ctor = "0.2"
convert_case = "0.8"
titlecase = "3.6"
fancy-regex = "0.18.0"
unicode-segmentation = "1.13.2"
indoc = "2.0.5"
derivative = "2.2"

View File

@ -1489,6 +1489,113 @@ fn document_node_definitions() -> HashMap<DefinitionIdentifier, DocumentNodeDefi
description: Cow::Borrowed("TODO"),
properties: None,
},
DocumentNodeDefinition {
identifier: "Regex Find",
category: "Text: Regex",
node_template: NodeTemplate {
document_node: DocumentNode {
implementation: DocumentNodeImplementation::Network(NodeNetwork {
exports: vec![
// Primary output: the whole match (String)
NodeInput::node(NodeId(1), 0),
// Secondary output: capture groups (Vec<String>)
NodeInput::node(NodeId(2), 0),
],
nodes: [
// Node 0: regex_find proto node — returns Vec<String> of [whole_match, ...capture_groups]
DocumentNode {
inputs: vec![
NodeInput::import(concrete!(String), 0),
NodeInput::import(concrete!(String), 1),
NodeInput::import(concrete!(f64), 2),
NodeInput::import(concrete!(bool), 3),
NodeInput::import(concrete!(bool), 4),
],
implementation: DocumentNodeImplementation::ProtoNode(text_nodes::regex::regex_find::IDENTIFIER),
..Default::default()
},
// Node 1: index_elements at index 0 — extracts the whole match as a String
DocumentNode {
inputs: vec![NodeInput::node(NodeId(0), 0), NodeInput::value(TaggedValue::F64(0.), false)],
implementation: DocumentNodeImplementation::ProtoNode(graphic::index_elements::IDENTIFIER),
..Default::default()
},
// Node 2: omit_element at index 0 — returns capture groups as Vec<String>
DocumentNode {
inputs: vec![NodeInput::node(NodeId(0), 0), NodeInput::value(TaggedValue::F64(0.), false)],
implementation: DocumentNodeImplementation::ProtoNode(graphic::omit_element::IDENTIFIER),
..Default::default()
},
]
.into_iter()
.enumerate()
.map(|(id, node)| (NodeId(id as u64), node))
.collect(),
..Default::default()
}),
inputs: vec![
NodeInput::value(TaggedValue::String(String::new()), true),
NodeInput::value(TaggedValue::String(String::new()), false),
NodeInput::value(TaggedValue::F64(0.), false),
NodeInput::value(TaggedValue::Bool(false), false),
NodeInput::value(TaggedValue::Bool(false), false),
],
..Default::default()
},
persistent_node_metadata: DocumentNodePersistentMetadata {
input_metadata: vec![
("String", "The string to search within.").into(),
("Pattern", "The regular expression pattern to search for.").into(),
(
"Match Index",
"Which non-overlapping occurrence of the pattern to return, starting from 0 for the first match. Negative indices count backwards from the last match.",
)
.into(),
("Case Insensitive", "Match letters regardless of case.").into(),
("Multiline", "Make `^` and `$` match the start and end of each line, not just the whole string.").into(),
],
output_names: vec!["Match".to_string(), "Captures".to_string()],
network_metadata: Some(NodeNetworkMetadata {
persistent_metadata: NodeNetworkPersistentMetadata {
node_metadata: [
DocumentNodeMetadata {
persistent_metadata: DocumentNodePersistentMetadata {
node_type_metadata: NodeTypePersistentMetadata::node(IVec2::new(0, 0)),
..Default::default()
},
..Default::default()
},
DocumentNodeMetadata {
persistent_metadata: DocumentNodePersistentMetadata {
node_type_metadata: NodeTypePersistentMetadata::node(IVec2::new(8, 0)),
..Default::default()
},
..Default::default()
},
DocumentNodeMetadata {
persistent_metadata: DocumentNodePersistentMetadata {
node_type_metadata: NodeTypePersistentMetadata::node(IVec2::new(8, 2)),
..Default::default()
},
..Default::default()
},
]
.into_iter()
.enumerate()
.map(|(id, node)| (NodeId(id as u64), node))
.collect(),
..Default::default()
},
..Default::default()
}),
..Default::default()
},
},
description: Cow::Borrowed(
r#"Finds a portion of the string matching a regular expression pattern. With "Match Index" at its default 0, it selects the first non-overlapping occurrence, but others may be selected. Capture groups, if any, are produced as a list in the "Captures" output."#,
),
properties: None,
},
// Aims for interoperable compatibility with:
// https://www.adobe.com/devnet-apps/photoshop/fileformatashtml/#:~:text=levl%27%20%3D%20Levels-,%27curv%27%20%3D%20Curves,-%27expA%27%20%3D%20Exposure
// https://www.adobe.com/devnet-apps/photoshop/fileformatashtml/#:~:text=Max%20input%20range-,Curves,-Curves%20settings%20files

View File

@ -397,6 +397,41 @@ impl<T: Clone> AtIndex for Table<T> {
}
}
pub trait OmitIndex {
fn omit_index(&self, index: usize) -> Self;
fn omit_index_from_end(&self, index: usize) -> Self;
}
impl<T: Clone> OmitIndex for Vec<T> {
fn omit_index(&self, index: usize) -> Self {
self.iter().enumerate().filter(|(i, _)| *i != index).map(|(_, v)| v.clone()).collect()
}
fn omit_index_from_end(&self, index: usize) -> Self {
if index == 0 || index > self.len() {
return self.clone();
}
self.omit_index(self.len() - index)
}
}
impl<T: Clone> OmitIndex for Table<T> {
fn omit_index(&self, index: usize) -> Self {
let mut result = Self::default();
for (i, row) in self.iter().enumerate() {
if i != index {
result.push(row.into_cloned());
}
}
result
}
fn omit_index_from_end(&self, index: usize) -> Self {
if index == 0 || index > self.len() {
return self.clone();
}
self.omit_index(self.len() - index)
}
}
// TODO: Eventually remove this migration document upgrade code
pub fn migrate_graphic<'de, D: serde::Deserializer<'de>>(deserializer: D) -> Result<Table<Graphic>, D::Error> {
use serde::Deserialize;

View File

@ -46,6 +46,39 @@ where
.unwrap_or_default()
}
/// Returns the collection with the element at the specified index removed.
/// If no value exists at that index, the collection is returned unchanged.
#[node_macro::node(category("General"))]
pub fn omit_element<T: graphic_types::graphic::OmitIndex + Clone + Default>(
_: impl Ctx,
/// The collection of data, such as a list or table.
#[implementations(
Vec<f64>,
Vec<u32>,
Vec<u64>,
Vec<DVec2>,
Vec<String>,
Table<Artboard>,
Table<Graphic>,
Table<Vector>,
Table<Raster<CPU>>,
Table<Raster<GPU>>,
Table<Color>,
Table<GradientStops>,
)]
collection: T,
/// The index of the item to remove, starting from 0 for the first item. Negative indices count backwards from the end of the collection, starting from -1 for the last item.
index: SignedInteger,
) -> T {
let index = index as i32;
if index < 0 {
collection.omit_index_from_end(-index as usize)
} else {
collection.omit_index(index as usize)
}
}
#[node_macro::node(category("General"))]
async fn map<Item: AnyHash + Send + Sync + std::hash::Hash>(
ctx: impl Ctx + CloneVarArgs + ExtractAll,

View File

@ -26,6 +26,7 @@ log = { workspace = true }
serde_json = { workspace = true }
convert_case = { workspace = true }
titlecase = { workspace = true }
fancy-regex = { workspace = true }
unicode-segmentation = { workspace = true }
# Optional workspace dependencies

View File

@ -1,6 +1,7 @@
mod font_cache;
pub mod json;
mod path_builder;
pub mod regex;
mod text_context;
mod to_path;

View File

@ -0,0 +1,201 @@
use core_types::Ctx;
use core_types::registry::types::SignedInteger;
/// Checks whether the string contains a match for the given regular expression pattern. Optionally restricts the match to only the start and/or end of the string.
#[node_macro::node(category("Text: Regex"))]
fn regex_contains(
_: impl Ctx,
/// The string to search within.
string: String,
/// The regular expression pattern to search for.
pattern: String,
/// Match letters regardless of case.
case_insensitive: bool,
/// Make `^` and `$` match the start and end of each line, not just the whole string.
multiline: bool,
/// Only match if the pattern appears at the start of the string.
at_start: bool,
/// Only match if the pattern appears at the end of the string.
at_end: bool,
) -> bool {
let flags = match (case_insensitive, multiline) {
(false, false) => "",
(true, false) => "(?i)",
(false, true) => "(?m)",
(true, true) => "(?im)",
};
let anchored_pattern = match (at_start, at_end) {
(true, true) => format!("{flags}\\A(?:{pattern})\\z"),
(true, false) => format!("{flags}\\A(?:{pattern})"),
(false, true) => format!("{flags}(?:{pattern})\\z"),
(false, false) => format!("{flags}{pattern}"),
};
let Ok(regex) = fancy_regex::Regex::new(&anchored_pattern) else {
log::error!("Invalid regex pattern: {pattern}");
return false;
};
regex.is_match(&string).unwrap_or(false)
}
/// Replaces matches of a regular expression pattern in the string. The replacement string can reference captures: `$0` for the whole match and `$1`, `$2`, etc. for capture groups.
#[node_macro::node(category("Text: Regex"))]
fn regex_replace(
_: impl Ctx,
string: String,
/// The regular expression pattern to search for.
pattern: String,
/// The replacement string. Use `$0` for the whole match and `$1`, `$2`, etc. for capture groups.
replacement: String,
/// Replace all matches. When disabled, only the first match is replaced.
#[default(true)]
replace_all: bool,
/// Match letters regardless of case.
case_insensitive: bool,
/// Make `^` and `$` match the start and end of each line, not just the whole string.
multiline: bool,
) -> String {
let flags = match (case_insensitive, multiline) {
(false, false) => "",
(true, false) => "(?i)",
(false, true) => "(?m)",
(true, true) => "(?im)",
};
let full_pattern = format!("{flags}{pattern}");
let Ok(regex) = fancy_regex::Regex::new(&full_pattern) else {
log::warn!("Invalid regex pattern: {pattern}");
return string;
};
if replace_all {
regex.replace_all(&string, replacement.as_str()).into_owned()
} else {
regex.replace(&string, replacement.as_str()).into_owned()
}
}
/// Finds a regex match in the string and returns its components. The result is a list where the first element is the whole match (`$0`) and subsequent elements are the capture groups (`$1`, `$2`, etc., if any).
///
/// The match index selects which non-overlapping occurrence to return (0 for the first match). Returns an empty list if no match is found at the given index.
#[node_macro::node(category(""))]
fn regex_find(
_: impl Ctx,
/// The string to search within.
string: String,
/// The regular expression pattern to search for.
pattern: String,
/// Which non-overlapping occurrence of the pattern to return, starting from 0 for the first match. Negative indices count backwards from the last match.
match_index: SignedInteger,
/// Match letters regardless of case.
case_insensitive: bool,
/// Make `^` and `$` match the start and end of each line, not just the whole string.
multiline: bool,
) -> Vec<String> {
if pattern.is_empty() {
return Vec::new();
}
let flags = match (case_insensitive, multiline) {
(false, false) => "",
(true, false) => "(?i)",
(false, true) => "(?m)",
(true, true) => "(?im)",
};
let full_pattern = format!("{flags}{pattern}");
let Ok(regex) = fancy_regex::Regex::new(&full_pattern) else {
log::error!("Invalid regex pattern: {pattern}");
return Vec::new();
};
// Collect all matches since we need to support negative indexing
let matches: Vec<_> = regex.captures_iter(&string).filter_map(|c| c.ok()).collect();
let match_index = match_index as i32;
let resolved_index = if match_index < 0 {
let from_end = (-match_index) as usize;
if from_end > matches.len() {
return Vec::new();
}
matches.len() - from_end
} else {
match_index as usize
};
let Some(captures) = matches.get(resolved_index) else {
return Vec::new();
};
// Index 0 is the whole match, 1+ are capture groups
(0..captures.len()).map(|i| captures.get(i).map_or(String::new(), |m| m.as_str().to_string())).collect()
}
/// Finds all non-overlapping matches of a regular expression pattern in the string, returning a list of the matched substrings.
#[node_macro::node(category("Text: Regex"))]
fn regex_find_all(
_: impl Ctx,
/// The string to search within.
string: String,
/// The regular expression pattern to search for.
pattern: String,
/// Match letters regardless of case.
case_insensitive: bool,
/// Make `^` and `$` match the start and end of each line, not just the whole string.
multiline: bool,
) -> Vec<String> {
if pattern.is_empty() {
return Vec::new();
}
let flags = match (case_insensitive, multiline) {
(false, false) => "",
(true, false) => "(?i)",
(false, true) => "(?m)",
(true, true) => "(?im)",
};
let full_pattern = format!("{flags}{pattern}");
let Ok(regex) = fancy_regex::Regex::new(&full_pattern) else {
log::error!("Invalid regex pattern: {pattern}");
return Vec::new();
};
regex.find_iter(&string).filter_map(|m| m.ok()).map(|m| m.as_str().to_string()).collect()
}
/// Splits a string into a list of substrings pulled from between separator characters as matched by a regular expression.
///
/// For example, splitting "Three, two, one... LIFTOFF" with pattern `\W+` (non-word characters) produces `["Three", "two", "one", "LIFTOFF"]`.
#[node_macro::node(category("Text: Regex"))]
fn regex_split(
_: impl Ctx,
/// The string to split into substrings.
string: String,
/// The regular expression pattern to split on. Matches are consumed and not included in the output.
pattern: String,
/// Match letters regardless of case.
case_insensitive: bool,
/// Make `^` and `$` match the start and end of each line, not just the whole string.
multiline: bool,
) -> Vec<String> {
if pattern.is_empty() {
return vec![string];
}
let flags = match (case_insensitive, multiline) {
(false, false) => "",
(true, false) => "(?i)",
(false, true) => "(?m)",
(true, true) => "(?im)",
};
let full_pattern = format!("{flags}{pattern}");
let Ok(regex) = fancy_regex::Regex::new(&full_pattern) else {
log::error!("Invalid regex pattern: {pattern}");
return vec![string];
};
regex.split(&string).filter_map(|s| s.ok()).map(|s| s.to_string()).collect()
}

View File

@ -34,6 +34,7 @@ pub fn category_description(category: &str) -> &str {
"Raster: Pattern" => "Nodes in this category generate procedural raster patterns, fractals, textures, and noise.",
"Raster" => "Nodes in this category deal with fundamental raster image operations.",
"Text" => "Nodes in this category support the manipulation, formatting, and rendering of text strings.",
"Text: Regex" => "Nodes in this category perform string operations involving regular expressions, such as pattern matching and replacement.",
"Text: JSON" => "Nodes in this category perform string operations involving JSON data, such as parsing and stringifying.",
"Value" => "Nodes in this category supply data values of common types such as numbers, colors, booleans, and strings.",
"Vector: Measure" => "Nodes in this category perform measurements and analysis on vector graphics, such as length/area calculations, path traversal, and hit testing.",