New nodes: 'Regex Contains', 'Regex Replace', 'Regex Find', 'Regex Find All', and 'Regex Split' (#4045)

* Add a family of 5 regex nodes for string processing * Fix unstaged change
2026-04-24 00:55:24 -07:00 · 2026-04-24 00:55:24 -07:00 · c9c76df40c
parent c32c808d5b
commit c9c76df40c
9 changed files with 394 additions and 2 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1399,6 +1399,17 @@ dependencies = [
 "num-traits",
 ]

+[[package]]
+name = "fancy-regex"
+version = "0.18.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1e1dacd0d2082dfcf1351c4bdd566bbe89a2b263235a2b50058f1e130a47277"
+dependencies = [
+ "bit-set",
+ "regex-automata",
+ "regex-syntax",
+]
+
 [[package]]
 name = "fastnoise-lite"
 version = "1.1.1"
@ -4452,9 +4463,9 @@ dependencies = [

 [[package]]
 name = "regex-automata"
-version = "0.4.10"
+version = "0.4.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6"
+checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
 dependencies = [
 "aho-corasick",
 "memchr",
@ -5500,6 +5511,7 @@ dependencies = [
 "convert_case 0.8.0",
 "core-types",
 "dyn-any",
+ "fancy-regex",
 "glam",
 "log",
 "node-macro",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -107,6 +107,7 @@ bitflags = { version = "2.4", features = ["serde"] }
 ctor = "0.2"
 convert_case = "0.8"
 titlecase = "3.6"
+fancy-regex = "0.18.0"
 unicode-segmentation = "1.13.2"
 indoc = "2.0.5"
 derivative = "2.2"
--- a/editor/src/messages/portfolio/document/node_graph/document_node_definitions.rs
+++ b/editor/src/messages/portfolio/document/node_graph/document_node_definitions.rs
@ -1489,6 +1489,113 @@ fn document_node_definitions() -> HashMap<DefinitionIdentifier, DocumentNodeDefi
 			description: Cow::Borrowed("TODO"),
 			properties: None,
 		},
+		DocumentNodeDefinition {
+			identifier: "Regex Find",
+			category: "Text: Regex",
+			node_template: NodeTemplate {
+				document_node: DocumentNode {
+					implementation: DocumentNodeImplementation::Network(NodeNetwork {
+						exports: vec![
+							// Primary output: the whole match (String)
+							NodeInput::node(NodeId(1), 0),
+							// Secondary output: capture groups (Vec<String>)
+							NodeInput::node(NodeId(2), 0),
+						],
+						nodes: [
+							// Node 0: regex_find proto node — returns Vec<String> of [whole_match, ...capture_groups]
+							DocumentNode {
+								inputs: vec![
+									NodeInput::import(concrete!(String), 0),
+									NodeInput::import(concrete!(String), 1),
+									NodeInput::import(concrete!(f64), 2),
+									NodeInput::import(concrete!(bool), 3),
+									NodeInput::import(concrete!(bool), 4),
+								],
+								implementation: DocumentNodeImplementation::ProtoNode(text_nodes::regex::regex_find::IDENTIFIER),
+								..Default::default()
+							},
+							// Node 1: index_elements at index 0 — extracts the whole match as a String
+							DocumentNode {
+								inputs: vec![NodeInput::node(NodeId(0), 0), NodeInput::value(TaggedValue::F64(0.), false)],
+								implementation: DocumentNodeImplementation::ProtoNode(graphic::index_elements::IDENTIFIER),
+								..Default::default()
+							},
+							// Node 2: omit_element at index 0 — returns capture groups as Vec<String>
+							DocumentNode {
+								inputs: vec![NodeInput::node(NodeId(0), 0), NodeInput::value(TaggedValue::F64(0.), false)],
+								implementation: DocumentNodeImplementation::ProtoNode(graphic::omit_element::IDENTIFIER),
+								..Default::default()
+							},
+						]
+						.into_iter()
+						.enumerate()
+						.map(|(id, node)| (NodeId(id as u64), node))
+						.collect(),
+						..Default::default()
+					}),
+					inputs: vec![
+						NodeInput::value(TaggedValue::String(String::new()), true),
+						NodeInput::value(TaggedValue::String(String::new()), false),
+						NodeInput::value(TaggedValue::F64(0.), false),
+						NodeInput::value(TaggedValue::Bool(false), false),
+						NodeInput::value(TaggedValue::Bool(false), false),
+					],
+					..Default::default()
+				},
+				persistent_node_metadata: DocumentNodePersistentMetadata {
+					input_metadata: vec![
+						("String", "The string to search within.").into(),
+						("Pattern", "The regular expression pattern to search for.").into(),
+						(
+							"Match Index",
+							"Which non-overlapping occurrence of the pattern to return, starting from 0 for the first match. Negative indices count backwards from the last match.",
+						)
+							.into(),
+						("Case Insensitive", "Match letters regardless of case.").into(),
+						("Multiline", "Make `^` and `$` match the start and end of each line, not just the whole string.").into(),
+					],
+					output_names: vec!["Match".to_string(), "Captures".to_string()],
+					network_metadata: Some(NodeNetworkMetadata {
+						persistent_metadata: NodeNetworkPersistentMetadata {
+							node_metadata: [
+								DocumentNodeMetadata {
+									persistent_metadata: DocumentNodePersistentMetadata {
+										node_type_metadata: NodeTypePersistentMetadata::node(IVec2::new(0, 0)),
+										..Default::default()
+									},
+									..Default::default()
+								},
+								DocumentNodeMetadata {
+									persistent_metadata: DocumentNodePersistentMetadata {
+										node_type_metadata: NodeTypePersistentMetadata::node(IVec2::new(8, 0)),
+										..Default::default()
+									},
+									..Default::default()
+								},
+								DocumentNodeMetadata {
+									persistent_metadata: DocumentNodePersistentMetadata {
+										node_type_metadata: NodeTypePersistentMetadata::node(IVec2::new(8, 2)),
+										..Default::default()
+									},
+									..Default::default()
+								},
+							]
+							.into_iter()
+							.enumerate()
+							.map(|(id, node)| (NodeId(id as u64), node))
+							.collect(),
+							..Default::default()
+						},
+						..Default::default()
+					}),
+					..Default::default()
+				},
+			},
+			description: Cow::Borrowed(
+				r#"Finds a portion of the string matching a regular expression pattern. With "Match Index" at its default 0, it selects the first non-overlapping occurrence, but others may be selected. Capture groups, if any, are produced as a list in the "Captures" output."#,
+			),
+			properties: None,
+		},
 		// Aims for interoperable compatibility with:
 		// https://www.adobe.com/devnet-apps/photoshop/fileformatashtml/#:~:text=levl%27%20%3D%20Levels-,%27curv%27%20%3D%20Curves,-%27expA%27%20%3D%20Exposure
 		// https://www.adobe.com/devnet-apps/photoshop/fileformatashtml/#:~:text=Max%20input%20range-,Curves,-Curves%20settings%20files
--- a/node-graph/libraries/graphic-types/src/graphic.rs
+++ b/node-graph/libraries/graphic-types/src/graphic.rs
@ -397,6 +397,41 @@ impl<T: Clone> AtIndex for Table<T> {
 	}
 }

+pub trait OmitIndex {
+	fn omit_index(&self, index: usize) -> Self;
+	fn omit_index_from_end(&self, index: usize) -> Self;
+}
+impl<T: Clone> OmitIndex for Vec<T> {
+	fn omit_index(&self, index: usize) -> Self {
+		self.iter().enumerate().filter(|(i, _)| *i != index).map(|(_, v)| v.clone()).collect()
+	}
+
+	fn omit_index_from_end(&self, index: usize) -> Self {
+		if index == 0 || index > self.len() {
+			return self.clone();
+		}
+		self.omit_index(self.len() - index)
+	}
+}
+impl<T: Clone> OmitIndex for Table<T> {
+	fn omit_index(&self, index: usize) -> Self {
+		let mut result = Self::default();
+		for (i, row) in self.iter().enumerate() {
+			if i != index {
+				result.push(row.into_cloned());
+			}
+		}
+		result
+	}
+
+	fn omit_index_from_end(&self, index: usize) -> Self {
+		if index == 0 || index > self.len() {
+			return self.clone();
+		}
+		self.omit_index(self.len() - index)
+	}
+}
+
 // TODO: Eventually remove this migration document upgrade code
 pub fn migrate_graphic<'de, D: serde::Deserializer<'de>>(deserializer: D) -> Result<Table<Graphic>, D::Error> {
 	use serde::Deserialize;
--- a/node-graph/nodes/graphic/src/graphic.rs
+++ b/node-graph/nodes/graphic/src/graphic.rs
@ -46,6 +46,39 @@ where
 	.unwrap_or_default()
 }

+/// Returns the collection with the element at the specified index removed.
+/// If no value exists at that index, the collection is returned unchanged.
+#[node_macro::node(category("General"))]
+pub fn omit_element<T: graphic_types::graphic::OmitIndex + Clone + Default>(
+	_: impl Ctx,
+	/// The collection of data, such as a list or table.
+	#[implementations(
+		Vec<f64>,
+		Vec<u32>,
+		Vec<u64>,
+		Vec<DVec2>,
+		Vec<String>,
+		Table<Artboard>,
+		Table<Graphic>,
+		Table<Vector>,
+		Table<Raster<CPU>>,
+		Table<Raster<GPU>>,
+		Table<Color>,
+		Table<GradientStops>,
+	)]
+	collection: T,
+	/// The index of the item to remove, starting from 0 for the first item. Negative indices count backwards from the end of the collection, starting from -1 for the last item.
+	index: SignedInteger,
+) -> T {
+	let index = index as i32;
+
+	if index < 0 {
+		collection.omit_index_from_end(-index as usize)
+	} else {
+		collection.omit_index(index as usize)
+	}
+}
+
 #[node_macro::node(category("General"))]
 async fn map<Item: AnyHash + Send + Sync + std::hash::Hash>(
 	ctx: impl Ctx + CloneVarArgs + ExtractAll,
--- a/node-graph/nodes/text/Cargo.toml
+++ b/node-graph/nodes/text/Cargo.toml
@ -26,6 +26,7 @@ log = { workspace = true }
 serde_json = { workspace = true }
 convert_case = { workspace = true }
 titlecase = { workspace = true }
+fancy-regex = { workspace = true }
 unicode-segmentation = { workspace = true }

 # Optional workspace dependencies
--- a/node-graph/nodes/text/src/lib.rs
+++ b/node-graph/nodes/text/src/lib.rs
@ -1,6 +1,7 @@
 mod font_cache;
 pub mod json;
 mod path_builder;
+pub mod regex;
 mod text_context;
 mod to_path;

--- a/node-graph/nodes/text/src/regex.rs
+++ b/node-graph/nodes/text/src/regex.rs
@ -0,0 +1,201 @@
+use core_types::Ctx;
+use core_types::registry::types::SignedInteger;
+
+/// Checks whether the string contains a match for the given regular expression pattern. Optionally restricts the match to only the start and/or end of the string.
+#[node_macro::node(category("Text: Regex"))]
+fn regex_contains(
+	_: impl Ctx,
+	/// The string to search within.
+	string: String,
+	/// The regular expression pattern to search for.
+	pattern: String,
+	/// Match letters regardless of case.
+	case_insensitive: bool,
+	/// Make `^` and `$` match the start and end of each line, not just the whole string.
+	multiline: bool,
+	/// Only match if the pattern appears at the start of the string.
+	at_start: bool,
+	/// Only match if the pattern appears at the end of the string.
+	at_end: bool,
+) -> bool {
+	let flags = match (case_insensitive, multiline) {
+		(false, false) => "",
+		(true, false) => "(?i)",
+		(false, true) => "(?m)",
+		(true, true) => "(?im)",
+	};
+	let anchored_pattern = match (at_start, at_end) {
+		(true, true) => format!("{flags}\\A(?:{pattern})\\z"),
+		(true, false) => format!("{flags}\\A(?:{pattern})"),
+		(false, true) => format!("{flags}(?:{pattern})\\z"),
+		(false, false) => format!("{flags}{pattern}"),
+	};
+
+	let Ok(regex) = fancy_regex::Regex::new(&anchored_pattern) else {
+		log::error!("Invalid regex pattern: {pattern}");
+		return false;
+	};
+
+	regex.is_match(&string).unwrap_or(false)
+}
+
+/// Replaces matches of a regular expression pattern in the string. The replacement string can reference captures: `$0` for the whole match and `$1`, `$2`, etc. for capture groups.
+#[node_macro::node(category("Text: Regex"))]
+fn regex_replace(
+	_: impl Ctx,
+	string: String,
+	/// The regular expression pattern to search for.
+	pattern: String,
+	/// The replacement string. Use `$0` for the whole match and `$1`, `$2`, etc. for capture groups.
+	replacement: String,
+	/// Replace all matches. When disabled, only the first match is replaced.
+	#[default(true)]
+	replace_all: bool,
+	/// Match letters regardless of case.
+	case_insensitive: bool,
+	/// Make `^` and `$` match the start and end of each line, not just the whole string.
+	multiline: bool,
+) -> String {
+	let flags = match (case_insensitive, multiline) {
+		(false, false) => "",
+		(true, false) => "(?i)",
+		(false, true) => "(?m)",
+		(true, true) => "(?im)",
+	};
+	let full_pattern = format!("{flags}{pattern}");
+
+	let Ok(regex) = fancy_regex::Regex::new(&full_pattern) else {
+		log::warn!("Invalid regex pattern: {pattern}");
+		return string;
+	};
+
+	if replace_all {
+		regex.replace_all(&string, replacement.as_str()).into_owned()
+	} else {
+		regex.replace(&string, replacement.as_str()).into_owned()
+	}
+}
+
+/// Finds a regex match in the string and returns its components. The result is a list where the first element is the whole match (`$0`) and subsequent elements are the capture groups (`$1`, `$2`, etc., if any).
+///
+/// The match index selects which non-overlapping occurrence to return (0 for the first match). Returns an empty list if no match is found at the given index.
+#[node_macro::node(category(""))]
+fn regex_find(
+	_: impl Ctx,
+	/// The string to search within.
+	string: String,
+	/// The regular expression pattern to search for.
+	pattern: String,
+	/// Which non-overlapping occurrence of the pattern to return, starting from 0 for the first match. Negative indices count backwards from the last match.
+	match_index: SignedInteger,
+	/// Match letters regardless of case.
+	case_insensitive: bool,
+	/// Make `^` and `$` match the start and end of each line, not just the whole string.
+	multiline: bool,
+) -> Vec<String> {
+	if pattern.is_empty() {
+		return Vec::new();
+	}
+
+	let flags = match (case_insensitive, multiline) {
+		(false, false) => "",
+		(true, false) => "(?i)",
+		(false, true) => "(?m)",
+		(true, true) => "(?im)",
+	};
+	let full_pattern = format!("{flags}{pattern}");
+
+	let Ok(regex) = fancy_regex::Regex::new(&full_pattern) else {
+		log::error!("Invalid regex pattern: {pattern}");
+		return Vec::new();
+	};
+
+	// Collect all matches since we need to support negative indexing
+	let matches: Vec<_> = regex.captures_iter(&string).filter_map(|c| c.ok()).collect();
+
+	let match_index = match_index as i32;
+	let resolved_index = if match_index < 0 {
+		let from_end = (-match_index) as usize;
+		if from_end > matches.len() {
+			return Vec::new();
+		}
+		matches.len() - from_end
+	} else {
+		match_index as usize
+	};
+
+	let Some(captures) = matches.get(resolved_index) else {
+		return Vec::new();
+	};
+
+	// Index 0 is the whole match, 1+ are capture groups
+	(0..captures.len()).map(|i| captures.get(i).map_or(String::new(), |m| m.as_str().to_string())).collect()
+}
+
+/// Finds all non-overlapping matches of a regular expression pattern in the string, returning a list of the matched substrings.
+#[node_macro::node(category("Text: Regex"))]
+fn regex_find_all(
+	_: impl Ctx,
+	/// The string to search within.
+	string: String,
+	/// The regular expression pattern to search for.
+	pattern: String,
+	/// Match letters regardless of case.
+	case_insensitive: bool,
+	/// Make `^` and `$` match the start and end of each line, not just the whole string.
+	multiline: bool,
+) -> Vec<String> {
+	if pattern.is_empty() {
+		return Vec::new();
+	}
+
+	let flags = match (case_insensitive, multiline) {
+		(false, false) => "",
+		(true, false) => "(?i)",
+		(false, true) => "(?m)",
+		(true, true) => "(?im)",
+	};
+	let full_pattern = format!("{flags}{pattern}");
+
+	let Ok(regex) = fancy_regex::Regex::new(&full_pattern) else {
+		log::error!("Invalid regex pattern: {pattern}");
+		return Vec::new();
+	};
+
+	regex.find_iter(&string).filter_map(|m| m.ok()).map(|m| m.as_str().to_string()).collect()
+}
+
+/// Splits a string into a list of substrings pulled from between separator characters as matched by a regular expression.
+///
+/// For example, splitting "Three, two, one... LIFTOFF" with pattern `\W+` (non-word characters) produces `["Three", "two", "one", "LIFTOFF"]`.
+#[node_macro::node(category("Text: Regex"))]
+fn regex_split(
+	_: impl Ctx,
+	/// The string to split into substrings.
+	string: String,
+	/// The regular expression pattern to split on. Matches are consumed and not included in the output.
+	pattern: String,
+	/// Match letters regardless of case.
+	case_insensitive: bool,
+	/// Make `^` and `$` match the start and end of each line, not just the whole string.
+	multiline: bool,
+) -> Vec<String> {
+	if pattern.is_empty() {
+		return vec![string];
+	}
+
+	let flags = match (case_insensitive, multiline) {
+		(false, false) => "",
+		(true, false) => "(?i)",
+		(false, true) => "(?m)",
+		(true, true) => "(?im)",
+	};
+	let full_pattern = format!("{flags}{pattern}");
+
+	let Ok(regex) = fancy_regex::Regex::new(&full_pattern) else {
+		log::error!("Invalid regex pattern: {pattern}");
+		return vec![string];
+	};
+
+	regex.split(&string).filter_map(|s| s.ok()).map(|s| s.to_string()).collect()
+}
--- a/tools/node-docs/src/utility.rs
+++ b/tools/node-docs/src/utility.rs
@ -34,6 +34,7 @@ pub fn category_description(category: &str) -> &str {
 		"Raster: Pattern" => "Nodes in this category generate procedural raster patterns, fractals, textures, and noise.",
 		"Raster" => "Nodes in this category deal with fundamental raster image operations.",
 		"Text" => "Nodes in this category support the manipulation, formatting, and rendering of text strings.",
+		"Text: Regex" => "Nodes in this category perform string operations involving regular expressions, such as pattern matching and replacement.",
 		"Text: JSON" => "Nodes in this category perform string operations involving JSON data, such as parsing and stringifying.",
 		"Value" => "Nodes in this category supply data values of common types such as numbers, colors, booleans, and strings.",
 		"Vector: Measure" => "Nodes in this category perform measurements and analysis on vector graphics, such as length/area calculations, path traversal, and hit testing.",