New nodes: 'Regex Contains', 'Regex Replace', 'Regex Find', 'Regex Find All', and 'Regex Split' (#4045)
* Add a family of 5 regex nodes for string processing * Fix unstaged change
This commit is contained in:
parent
c32c808d5b
commit
c9c76df40c
|
|
@ -1399,6 +1399,17 @@ dependencies = [
|
|||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fancy-regex"
|
||||
version = "0.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e1e1dacd0d2082dfcf1351c4bdd566bbe89a2b263235a2b50058f1e130a47277"
|
||||
dependencies = [
|
||||
"bit-set",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fastnoise-lite"
|
||||
version = "1.1.1"
|
||||
|
|
@ -4452,9 +4463,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.4.10"
|
||||
version = "0.4.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6"
|
||||
checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
|
|
@ -5500,6 +5511,7 @@ dependencies = [
|
|||
"convert_case 0.8.0",
|
||||
"core-types",
|
||||
"dyn-any",
|
||||
"fancy-regex",
|
||||
"glam",
|
||||
"log",
|
||||
"node-macro",
|
||||
|
|
|
|||
|
|
@ -107,6 +107,7 @@ bitflags = { version = "2.4", features = ["serde"] }
|
|||
ctor = "0.2"
|
||||
convert_case = "0.8"
|
||||
titlecase = "3.6"
|
||||
fancy-regex = "0.18.0"
|
||||
unicode-segmentation = "1.13.2"
|
||||
indoc = "2.0.5"
|
||||
derivative = "2.2"
|
||||
|
|
|
|||
|
|
@ -1489,6 +1489,113 @@ fn document_node_definitions() -> HashMap<DefinitionIdentifier, DocumentNodeDefi
|
|||
description: Cow::Borrowed("TODO"),
|
||||
properties: None,
|
||||
},
|
||||
DocumentNodeDefinition {
|
||||
identifier: "Regex Find",
|
||||
category: "Text: Regex",
|
||||
node_template: NodeTemplate {
|
||||
document_node: DocumentNode {
|
||||
implementation: DocumentNodeImplementation::Network(NodeNetwork {
|
||||
exports: vec![
|
||||
// Primary output: the whole match (String)
|
||||
NodeInput::node(NodeId(1), 0),
|
||||
// Secondary output: capture groups (Vec<String>)
|
||||
NodeInput::node(NodeId(2), 0),
|
||||
],
|
||||
nodes: [
|
||||
// Node 0: regex_find proto node — returns Vec<String> of [whole_match, ...capture_groups]
|
||||
DocumentNode {
|
||||
inputs: vec![
|
||||
NodeInput::import(concrete!(String), 0),
|
||||
NodeInput::import(concrete!(String), 1),
|
||||
NodeInput::import(concrete!(f64), 2),
|
||||
NodeInput::import(concrete!(bool), 3),
|
||||
NodeInput::import(concrete!(bool), 4),
|
||||
],
|
||||
implementation: DocumentNodeImplementation::ProtoNode(text_nodes::regex::regex_find::IDENTIFIER),
|
||||
..Default::default()
|
||||
},
|
||||
// Node 1: index_elements at index 0 — extracts the whole match as a String
|
||||
DocumentNode {
|
||||
inputs: vec![NodeInput::node(NodeId(0), 0), NodeInput::value(TaggedValue::F64(0.), false)],
|
||||
implementation: DocumentNodeImplementation::ProtoNode(graphic::index_elements::IDENTIFIER),
|
||||
..Default::default()
|
||||
},
|
||||
// Node 2: omit_element at index 0 — returns capture groups as Vec<String>
|
||||
DocumentNode {
|
||||
inputs: vec![NodeInput::node(NodeId(0), 0), NodeInput::value(TaggedValue::F64(0.), false)],
|
||||
implementation: DocumentNodeImplementation::ProtoNode(graphic::omit_element::IDENTIFIER),
|
||||
..Default::default()
|
||||
},
|
||||
]
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(id, node)| (NodeId(id as u64), node))
|
||||
.collect(),
|
||||
..Default::default()
|
||||
}),
|
||||
inputs: vec![
|
||||
NodeInput::value(TaggedValue::String(String::new()), true),
|
||||
NodeInput::value(TaggedValue::String(String::new()), false),
|
||||
NodeInput::value(TaggedValue::F64(0.), false),
|
||||
NodeInput::value(TaggedValue::Bool(false), false),
|
||||
NodeInput::value(TaggedValue::Bool(false), false),
|
||||
],
|
||||
..Default::default()
|
||||
},
|
||||
persistent_node_metadata: DocumentNodePersistentMetadata {
|
||||
input_metadata: vec![
|
||||
("String", "The string to search within.").into(),
|
||||
("Pattern", "The regular expression pattern to search for.").into(),
|
||||
(
|
||||
"Match Index",
|
||||
"Which non-overlapping occurrence of the pattern to return, starting from 0 for the first match. Negative indices count backwards from the last match.",
|
||||
)
|
||||
.into(),
|
||||
("Case Insensitive", "Match letters regardless of case.").into(),
|
||||
("Multiline", "Make `^` and `$` match the start and end of each line, not just the whole string.").into(),
|
||||
],
|
||||
output_names: vec!["Match".to_string(), "Captures".to_string()],
|
||||
network_metadata: Some(NodeNetworkMetadata {
|
||||
persistent_metadata: NodeNetworkPersistentMetadata {
|
||||
node_metadata: [
|
||||
DocumentNodeMetadata {
|
||||
persistent_metadata: DocumentNodePersistentMetadata {
|
||||
node_type_metadata: NodeTypePersistentMetadata::node(IVec2::new(0, 0)),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
},
|
||||
DocumentNodeMetadata {
|
||||
persistent_metadata: DocumentNodePersistentMetadata {
|
||||
node_type_metadata: NodeTypePersistentMetadata::node(IVec2::new(8, 0)),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
},
|
||||
DocumentNodeMetadata {
|
||||
persistent_metadata: DocumentNodePersistentMetadata {
|
||||
node_type_metadata: NodeTypePersistentMetadata::node(IVec2::new(8, 2)),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
},
|
||||
]
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(id, node)| (NodeId(id as u64), node))
|
||||
.collect(),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
},
|
||||
},
|
||||
description: Cow::Borrowed(
|
||||
r#"Finds a portion of the string matching a regular expression pattern. With "Match Index" at its default 0, it selects the first non-overlapping occurrence, but others may be selected. Capture groups, if any, are produced as a list in the "Captures" output."#,
|
||||
),
|
||||
properties: None,
|
||||
},
|
||||
// Aims for interoperable compatibility with:
|
||||
// https://www.adobe.com/devnet-apps/photoshop/fileformatashtml/#:~:text=levl%27%20%3D%20Levels-,%27curv%27%20%3D%20Curves,-%27expA%27%20%3D%20Exposure
|
||||
// https://www.adobe.com/devnet-apps/photoshop/fileformatashtml/#:~:text=Max%20input%20range-,Curves,-Curves%20settings%20files
|
||||
|
|
|
|||
|
|
@ -397,6 +397,41 @@ impl<T: Clone> AtIndex for Table<T> {
|
|||
}
|
||||
}
|
||||
|
||||
pub trait OmitIndex {
|
||||
fn omit_index(&self, index: usize) -> Self;
|
||||
fn omit_index_from_end(&self, index: usize) -> Self;
|
||||
}
|
||||
impl<T: Clone> OmitIndex for Vec<T> {
|
||||
fn omit_index(&self, index: usize) -> Self {
|
||||
self.iter().enumerate().filter(|(i, _)| *i != index).map(|(_, v)| v.clone()).collect()
|
||||
}
|
||||
|
||||
fn omit_index_from_end(&self, index: usize) -> Self {
|
||||
if index == 0 || index > self.len() {
|
||||
return self.clone();
|
||||
}
|
||||
self.omit_index(self.len() - index)
|
||||
}
|
||||
}
|
||||
impl<T: Clone> OmitIndex for Table<T> {
|
||||
fn omit_index(&self, index: usize) -> Self {
|
||||
let mut result = Self::default();
|
||||
for (i, row) in self.iter().enumerate() {
|
||||
if i != index {
|
||||
result.push(row.into_cloned());
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn omit_index_from_end(&self, index: usize) -> Self {
|
||||
if index == 0 || index > self.len() {
|
||||
return self.clone();
|
||||
}
|
||||
self.omit_index(self.len() - index)
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Eventually remove this migration document upgrade code
|
||||
pub fn migrate_graphic<'de, D: serde::Deserializer<'de>>(deserializer: D) -> Result<Table<Graphic>, D::Error> {
|
||||
use serde::Deserialize;
|
||||
|
|
|
|||
|
|
@ -46,6 +46,39 @@ where
|
|||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Returns the collection with the element at the specified index removed.
|
||||
/// If no value exists at that index, the collection is returned unchanged.
|
||||
#[node_macro::node(category("General"))]
|
||||
pub fn omit_element<T: graphic_types::graphic::OmitIndex + Clone + Default>(
|
||||
_: impl Ctx,
|
||||
/// The collection of data, such as a list or table.
|
||||
#[implementations(
|
||||
Vec<f64>,
|
||||
Vec<u32>,
|
||||
Vec<u64>,
|
||||
Vec<DVec2>,
|
||||
Vec<String>,
|
||||
Table<Artboard>,
|
||||
Table<Graphic>,
|
||||
Table<Vector>,
|
||||
Table<Raster<CPU>>,
|
||||
Table<Raster<GPU>>,
|
||||
Table<Color>,
|
||||
Table<GradientStops>,
|
||||
)]
|
||||
collection: T,
|
||||
/// The index of the item to remove, starting from 0 for the first item. Negative indices count backwards from the end of the collection, starting from -1 for the last item.
|
||||
index: SignedInteger,
|
||||
) -> T {
|
||||
let index = index as i32;
|
||||
|
||||
if index < 0 {
|
||||
collection.omit_index_from_end(-index as usize)
|
||||
} else {
|
||||
collection.omit_index(index as usize)
|
||||
}
|
||||
}
|
||||
|
||||
#[node_macro::node(category("General"))]
|
||||
async fn map<Item: AnyHash + Send + Sync + std::hash::Hash>(
|
||||
ctx: impl Ctx + CloneVarArgs + ExtractAll,
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ log = { workspace = true }
|
|||
serde_json = { workspace = true }
|
||||
convert_case = { workspace = true }
|
||||
titlecase = { workspace = true }
|
||||
fancy-regex = { workspace = true }
|
||||
unicode-segmentation = { workspace = true }
|
||||
|
||||
# Optional workspace dependencies
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
mod font_cache;
|
||||
pub mod json;
|
||||
mod path_builder;
|
||||
pub mod regex;
|
||||
mod text_context;
|
||||
mod to_path;
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,201 @@
|
|||
use core_types::Ctx;
|
||||
use core_types::registry::types::SignedInteger;
|
||||
|
||||
/// Checks whether the string contains a match for the given regular expression pattern. Optionally restricts the match to only the start and/or end of the string.
|
||||
#[node_macro::node(category("Text: Regex"))]
|
||||
fn regex_contains(
|
||||
_: impl Ctx,
|
||||
/// The string to search within.
|
||||
string: String,
|
||||
/// The regular expression pattern to search for.
|
||||
pattern: String,
|
||||
/// Match letters regardless of case.
|
||||
case_insensitive: bool,
|
||||
/// Make `^` and `$` match the start and end of each line, not just the whole string.
|
||||
multiline: bool,
|
||||
/// Only match if the pattern appears at the start of the string.
|
||||
at_start: bool,
|
||||
/// Only match if the pattern appears at the end of the string.
|
||||
at_end: bool,
|
||||
) -> bool {
|
||||
let flags = match (case_insensitive, multiline) {
|
||||
(false, false) => "",
|
||||
(true, false) => "(?i)",
|
||||
(false, true) => "(?m)",
|
||||
(true, true) => "(?im)",
|
||||
};
|
||||
let anchored_pattern = match (at_start, at_end) {
|
||||
(true, true) => format!("{flags}\\A(?:{pattern})\\z"),
|
||||
(true, false) => format!("{flags}\\A(?:{pattern})"),
|
||||
(false, true) => format!("{flags}(?:{pattern})\\z"),
|
||||
(false, false) => format!("{flags}{pattern}"),
|
||||
};
|
||||
|
||||
let Ok(regex) = fancy_regex::Regex::new(&anchored_pattern) else {
|
||||
log::error!("Invalid regex pattern: {pattern}");
|
||||
return false;
|
||||
};
|
||||
|
||||
regex.is_match(&string).unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Replaces matches of a regular expression pattern in the string. The replacement string can reference captures: `$0` for the whole match and `$1`, `$2`, etc. for capture groups.
|
||||
#[node_macro::node(category("Text: Regex"))]
|
||||
fn regex_replace(
|
||||
_: impl Ctx,
|
||||
string: String,
|
||||
/// The regular expression pattern to search for.
|
||||
pattern: String,
|
||||
/// The replacement string. Use `$0` for the whole match and `$1`, `$2`, etc. for capture groups.
|
||||
replacement: String,
|
||||
/// Replace all matches. When disabled, only the first match is replaced.
|
||||
#[default(true)]
|
||||
replace_all: bool,
|
||||
/// Match letters regardless of case.
|
||||
case_insensitive: bool,
|
||||
/// Make `^` and `$` match the start and end of each line, not just the whole string.
|
||||
multiline: bool,
|
||||
) -> String {
|
||||
let flags = match (case_insensitive, multiline) {
|
||||
(false, false) => "",
|
||||
(true, false) => "(?i)",
|
||||
(false, true) => "(?m)",
|
||||
(true, true) => "(?im)",
|
||||
};
|
||||
let full_pattern = format!("{flags}{pattern}");
|
||||
|
||||
let Ok(regex) = fancy_regex::Regex::new(&full_pattern) else {
|
||||
log::warn!("Invalid regex pattern: {pattern}");
|
||||
return string;
|
||||
};
|
||||
|
||||
if replace_all {
|
||||
regex.replace_all(&string, replacement.as_str()).into_owned()
|
||||
} else {
|
||||
regex.replace(&string, replacement.as_str()).into_owned()
|
||||
}
|
||||
}
|
||||
|
||||
/// Finds a regex match in the string and returns its components. The result is a list where the first element is the whole match (`$0`) and subsequent elements are the capture groups (`$1`, `$2`, etc., if any).
|
||||
///
|
||||
/// The match index selects which non-overlapping occurrence to return (0 for the first match). Returns an empty list if no match is found at the given index.
|
||||
#[node_macro::node(category(""))]
|
||||
fn regex_find(
|
||||
_: impl Ctx,
|
||||
/// The string to search within.
|
||||
string: String,
|
||||
/// The regular expression pattern to search for.
|
||||
pattern: String,
|
||||
/// Which non-overlapping occurrence of the pattern to return, starting from 0 for the first match. Negative indices count backwards from the last match.
|
||||
match_index: SignedInteger,
|
||||
/// Match letters regardless of case.
|
||||
case_insensitive: bool,
|
||||
/// Make `^` and `$` match the start and end of each line, not just the whole string.
|
||||
multiline: bool,
|
||||
) -> Vec<String> {
|
||||
if pattern.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let flags = match (case_insensitive, multiline) {
|
||||
(false, false) => "",
|
||||
(true, false) => "(?i)",
|
||||
(false, true) => "(?m)",
|
||||
(true, true) => "(?im)",
|
||||
};
|
||||
let full_pattern = format!("{flags}{pattern}");
|
||||
|
||||
let Ok(regex) = fancy_regex::Regex::new(&full_pattern) else {
|
||||
log::error!("Invalid regex pattern: {pattern}");
|
||||
return Vec::new();
|
||||
};
|
||||
|
||||
// Collect all matches since we need to support negative indexing
|
||||
let matches: Vec<_> = regex.captures_iter(&string).filter_map(|c| c.ok()).collect();
|
||||
|
||||
let match_index = match_index as i32;
|
||||
let resolved_index = if match_index < 0 {
|
||||
let from_end = (-match_index) as usize;
|
||||
if from_end > matches.len() {
|
||||
return Vec::new();
|
||||
}
|
||||
matches.len() - from_end
|
||||
} else {
|
||||
match_index as usize
|
||||
};
|
||||
|
||||
let Some(captures) = matches.get(resolved_index) else {
|
||||
return Vec::new();
|
||||
};
|
||||
|
||||
// Index 0 is the whole match, 1+ are capture groups
|
||||
(0..captures.len()).map(|i| captures.get(i).map_or(String::new(), |m| m.as_str().to_string())).collect()
|
||||
}
|
||||
|
||||
/// Finds all non-overlapping matches of a regular expression pattern in the string, returning a list of the matched substrings.
|
||||
#[node_macro::node(category("Text: Regex"))]
|
||||
fn regex_find_all(
|
||||
_: impl Ctx,
|
||||
/// The string to search within.
|
||||
string: String,
|
||||
/// The regular expression pattern to search for.
|
||||
pattern: String,
|
||||
/// Match letters regardless of case.
|
||||
case_insensitive: bool,
|
||||
/// Make `^` and `$` match the start and end of each line, not just the whole string.
|
||||
multiline: bool,
|
||||
) -> Vec<String> {
|
||||
if pattern.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let flags = match (case_insensitive, multiline) {
|
||||
(false, false) => "",
|
||||
(true, false) => "(?i)",
|
||||
(false, true) => "(?m)",
|
||||
(true, true) => "(?im)",
|
||||
};
|
||||
let full_pattern = format!("{flags}{pattern}");
|
||||
|
||||
let Ok(regex) = fancy_regex::Regex::new(&full_pattern) else {
|
||||
log::error!("Invalid regex pattern: {pattern}");
|
||||
return Vec::new();
|
||||
};
|
||||
|
||||
regex.find_iter(&string).filter_map(|m| m.ok()).map(|m| m.as_str().to_string()).collect()
|
||||
}
|
||||
|
||||
/// Splits a string into a list of substrings pulled from between separator characters as matched by a regular expression.
|
||||
///
|
||||
/// For example, splitting "Three, two, one... LIFTOFF" with pattern `\W+` (non-word characters) produces `["Three", "two", "one", "LIFTOFF"]`.
|
||||
#[node_macro::node(category("Text: Regex"))]
|
||||
fn regex_split(
|
||||
_: impl Ctx,
|
||||
/// The string to split into substrings.
|
||||
string: String,
|
||||
/// The regular expression pattern to split on. Matches are consumed and not included in the output.
|
||||
pattern: String,
|
||||
/// Match letters regardless of case.
|
||||
case_insensitive: bool,
|
||||
/// Make `^` and `$` match the start and end of each line, not just the whole string.
|
||||
multiline: bool,
|
||||
) -> Vec<String> {
|
||||
if pattern.is_empty() {
|
||||
return vec![string];
|
||||
}
|
||||
|
||||
let flags = match (case_insensitive, multiline) {
|
||||
(false, false) => "",
|
||||
(true, false) => "(?i)",
|
||||
(false, true) => "(?m)",
|
||||
(true, true) => "(?im)",
|
||||
};
|
||||
let full_pattern = format!("{flags}{pattern}");
|
||||
|
||||
let Ok(regex) = fancy_regex::Regex::new(&full_pattern) else {
|
||||
log::error!("Invalid regex pattern: {pattern}");
|
||||
return vec![string];
|
||||
};
|
||||
|
||||
regex.split(&string).filter_map(|s| s.ok()).map(|s| s.to_string()).collect()
|
||||
}
|
||||
|
|
@ -34,6 +34,7 @@ pub fn category_description(category: &str) -> &str {
|
|||
"Raster: Pattern" => "Nodes in this category generate procedural raster patterns, fractals, textures, and noise.",
|
||||
"Raster" => "Nodes in this category deal with fundamental raster image operations.",
|
||||
"Text" => "Nodes in this category support the manipulation, formatting, and rendering of text strings.",
|
||||
"Text: Regex" => "Nodes in this category perform string operations involving regular expressions, such as pattern matching and replacement.",
|
||||
"Text: JSON" => "Nodes in this category perform string operations involving JSON data, such as parsing and stringifying.",
|
||||
"Value" => "Nodes in this category supply data values of common types such as numbers, colors, booleans, and strings.",
|
||||
"Vector: Measure" => "Nodes in this category perform measurements and analysis on vector graphics, such as length/area calculations, path traversal, and hit testing.",
|
||||
|
|
|
|||
Loading…
Reference in New Issue