Add useful attributes to the JSON and Regex nodes (#4069)

* Add useful attributes to the JSON and Regex nodes

* Code review fix
This commit is contained in:
Keavon Chambers 2026-04-28 14:11:34 -07:00 committed by GitHub
parent b396d17211
commit 84fb901b5a
4 changed files with 82 additions and 14 deletions

View File

@ -1545,11 +1545,11 @@ fn document_node_definitions() -> HashMap<DefinitionIdentifier, DocumentNodeDefi
exports: vec![ exports: vec![
// Primary output: the whole match (String) // Primary output: the whole match (String)
NodeInput::node(NodeId(1), 0), NodeInput::node(NodeId(1), 0),
// Secondary output: capture groups (Vec<String>) // Secondary output: capture groups (Table<String>), each row carries `start`/`end`/`name` attributes from `regex_find`
NodeInput::node(NodeId(2), 0), NodeInput::node(NodeId(2), 0),
], ],
nodes: [ nodes: [
// Node 0: regex_find proto node — returns Vec<String> of [whole_match, ...capture_groups] // Node 0: regex_find proto node — returns Table<String> of [whole_match, ...capture_groups]
DocumentNode { DocumentNode {
inputs: vec![ inputs: vec![
NodeInput::import(concrete!(String), 0), NodeInput::import(concrete!(String), 0),
@ -1561,13 +1561,13 @@ fn document_node_definitions() -> HashMap<DefinitionIdentifier, DocumentNodeDefi
implementation: DocumentNodeImplementation::ProtoNode(text_nodes::regex::regex_find::IDENTIFIER), implementation: DocumentNodeImplementation::ProtoNode(text_nodes::regex::regex_find::IDENTIFIER),
..Default::default() ..Default::default()
}, },
// Node 1: index_elements at index 0 — extracts the whole match as a String // Node 1: extract_element at index 0, extracts the whole match as a bare String (drops the row's start/end/name attributes since the unwrapped String can't carry them)
DocumentNode { DocumentNode {
inputs: vec![NodeInput::node(NodeId(0), 0), NodeInput::value(TaggedValue::F64(0.), false)], inputs: vec![NodeInput::node(NodeId(0), 0), NodeInput::value(TaggedValue::F64(0.), false)],
implementation: DocumentNodeImplementation::ProtoNode(graphic::index_elements::IDENTIFIER), implementation: DocumentNodeImplementation::ProtoNode(graphic::extract_element::IDENTIFIER),
..Default::default() ..Default::default()
}, },
// Node 2: omit_element at index 0 — returns capture groups as Vec<String> // Node 2: omit_element at index 0, returns the capture group rows as a Table<String>, preserving each row's start/end/name attributes
DocumentNode { DocumentNode {
inputs: vec![NodeInput::node(NodeId(0), 0), NodeInput::value(TaggedValue::F64(0.), false)], inputs: vec![NodeInput::node(NodeId(0), 0), NodeInput::value(TaggedValue::F64(0.), false)],
implementation: DocumentNodeImplementation::ProtoNode(graphic::omit_element::IDENTIFIER), implementation: DocumentNodeImplementation::ProtoNode(graphic::omit_element::IDENTIFIER),

View File

@ -68,12 +68,49 @@ pub fn omit_element<T: graphic_types::graphic::OmitIndex + Clone + Default>(
let index = index as i32; let index = index as i32;
if index < 0 { if index < 0 {
collection.omit_index_from_end(-index as usize) collection.omit_index_from_end(index.unsigned_abs() as usize)
} else { } else {
collection.omit_index(index as usize) collection.omit_index(index as usize)
} }
} }
/// Returns the bare element (without its row attributes) at the specified index in a table.
/// Use this when downstream nodes want just the inner value rather than a single-row table.
/// If no value exists at that index, the element type's default is returned.
#[node_macro::node(category("General"))]
pub fn extract_element<T: Clone + Default + Send + Sync + 'static>(
_: impl Ctx,
/// The table of data to extract from.
#[implementations(
Table<String>,
Table<f64>,
Table<u8>,
Table<NodeId>,
Table<Color>,
Table<GradientStops>,
Table<Vector>,
Table<Raster<CPU>>,
Table<Graphic>,
Table<Artboard>,
)]
table: Table<T>,
/// The index of the item to retrieve, starting from 0 for the first item. Negative indices count backwards from the end of the collection, starting from -1 for the last item.
index: SignedInteger,
) -> T {
let len = table.len();
let index = index as i32;
let resolved = if index < 0 {
let from_end = index.unsigned_abs() as usize;
if from_end > len {
return T::default();
}
len - from_end
} else {
index as usize
};
table.element(resolved).cloned().unwrap_or_default()
}
#[node_macro::node(category("General"))] #[node_macro::node(category("General"))]
async fn map<Item: AnyHash + Send + Sync + core_types::CacheHash>( async fn map<Item: AnyHash + Send + Sync + core_types::CacheHash>(
ctx: impl Ctx + CloneVarArgs + ExtractAll, ctx: impl Ctx + CloneVarArgs + ExtractAll,

View File

@ -210,11 +210,13 @@ fn query_json(
let mut results = Vec::new(); let mut results = Vec::new();
resolve_all(&value, &segments, !unquote_strings, &mut results); resolve_all(&value, &segments, !unquote_strings, &mut results);
results.into_iter().next().unwrap_or_default() results.into_iter().next().map(|(text, _ty)| text).unwrap_or_default()
} }
/// Extracts every matched value from a JSON string using a path expression (see that parameter's description for its syntax). A list of zero or more resultant strings is produced. The `[]` path accessor is used to read more than one value. /// Extracts every matched value from a JSON string using a path expression (see that parameter's description for its syntax). A list of zero or more resultant strings is produced. The `[]` path accessor is used to read more than one value.
/// ///
/// Each row carries a `type` attribute holding the matched value's JSON type (`"string"`, `"number"`, `"bool"`, `"null"`, `"object"`, or `"array"`).
///
/// This is useful in conjunction with the nodes: /// This is useful in conjunction with the nodes:
/// • **Index Elements**: access the `N`th query result. /// • **Index Elements**: access the `N`th query result.
/// • **String to Number**: convert numeric query results to numbers. /// • **String to Number**: convert numeric query results to numbers.
@ -246,7 +248,7 @@ fn query_json_all(
let mut results = Vec::new(); let mut results = Vec::new();
resolve_all(&value, &segments, !unquote_strings, &mut results); resolve_all(&value, &segments, !unquote_strings, &mut results);
results.into_iter().map(TableRow::new_from_element).collect() results.into_iter().map(|(text, ty)| TableRow::new_from_element(text).with_attribute("type", ty.to_string())).collect()
} }
/// A parsed segment of a JSON access path. /// A parsed segment of a JSON access path.
@ -402,6 +404,18 @@ fn json_value_to_string(value: &serde_json::Value, quote_strings: bool) -> Strin
} }
} }
/// Returns a short JSON-type name (`"string"`, `"number"`, `"bool"`, `"null"`, `"object"`, `"array"`) for a parsed value.
fn json_value_type_name(value: &serde_json::Value) -> &'static str {
match value {
serde_json::Value::String(_) => "string",
serde_json::Value::Number(_) => "number",
serde_json::Value::Bool(_) => "bool",
serde_json::Value::Null => "null",
serde_json::Value::Object(_) => "object",
serde_json::Value::Array(_) => "array",
}
}
/// Navigates a JSON value by one path segment, returning the resulting value (or `None` if the path is invalid). /// Navigates a JSON value by one path segment, returning the resulting value (or `None` if the path is invalid).
fn json_navigate<'a>(value: &'a serde_json::Value, segment: &JsonPathSegment) -> Option<&'a serde_json::Value> { fn json_navigate<'a>(value: &'a serde_json::Value, segment: &JsonPathSegment) -> Option<&'a serde_json::Value> {
match segment { match segment {
@ -416,7 +430,7 @@ fn json_navigate<'a>(value: &'a serde_json::Value, segment: &JsonPathSegment) ->
} }
/// Recursively resolves a path against a JSON value, fanning out at each `[]` and collecting leaf results. /// Recursively resolves a path against a JSON value, fanning out at each `[]` and collecting leaf results.
fn resolve_all(value: &serde_json::Value, segments: &[JsonPathSegment], quote_strings: bool, results: &mut Vec<String>) { fn resolve_all(value: &serde_json::Value, segments: &[JsonPathSegment], quote_strings: bool, results: &mut Vec<(String, &'static str)>) {
// Find the next IterateAll in the remaining segments // Find the next IterateAll in the remaining segments
let Some(iterate_position) = segments.iter().position(|s| matches!(s, JsonPathSegment::IterateAll)) else { let Some(iterate_position) = segments.iter().position(|s| matches!(s, JsonPathSegment::IterateAll)) else {
// No more [] segments, navigate the rest linearly // No more [] segments, navigate the rest linearly
@ -425,7 +439,7 @@ fn resolve_all(value: &serde_json::Value, segments: &[JsonPathSegment], quote_st
let Some(next) = json_navigate(current, segment) else { return }; let Some(next) = json_navigate(current, segment) else { return };
current = next; current = next;
} }
results.push(json_value_to_string(current, quote_strings)); results.push((json_value_to_string(current, quote_strings), json_value_type_name(current)));
return; return;
}; };

View File

@ -80,6 +80,9 @@ fn regex_replace(
/// Finds a regex match in the string and returns its components. The result is a list where the first element is the whole match (`$0`) and subsequent elements are the capture groups (`$1`, `$2`, etc., if any). /// Finds a regex match in the string and returns its components. The result is a list where the first element is the whole match (`$0`) and subsequent elements are the capture groups (`$1`, `$2`, etc., if any).
/// ///
/// The match index selects which non-overlapping occurrence to return (0 for the first match). Returns an empty list if no match is found at the given index. /// The match index selects which non-overlapping occurrence to return (0 for the first match). Returns an empty list if no match is found at the given index.
///
/// Each row carries `start` and `end` byte-offset attributes pointing into the original string, plus a `name` attribute holding
/// the capture group's name (empty for unnamed groups, and for index 0 which is the whole match).
#[node_macro::node(category(""))] #[node_macro::node(category(""))]
fn regex_find( fn regex_find(
_: impl Ctx, _: impl Ctx,
@ -111,6 +114,9 @@ fn regex_find(
return Table::new(); return Table::new();
}; };
// Capture group names indexed positionally; index 0 (the whole match) is always None.
let capture_names: Vec<Option<String>> = regex.capture_names().map(|name| name.map(str::to_string)).collect();
// Collect all matches since we need to support negative indexing // Collect all matches since we need to support negative indexing
let matches: Vec<_> = regex.captures_iter(&string).filter_map(|c| c.ok()).collect(); let matches: Vec<_> = regex.captures_iter(&string).filter_map(|c| c.ok()).collect();
@ -131,12 +137,20 @@ fn regex_find(
// Index 0 is the whole match, 1+ are capture groups // Index 0 is the whole match, 1+ are capture groups
(0..captures.len()) (0..captures.len())
.map(|i| captures.get(i).map_or(String::new(), |m| m.as_str().to_string())) .map(|i| {
.map(TableRow::new_from_element) let captured = captures.get(i);
let text = captured.map_or(String::new(), |m| m.as_str().to_string());
let start = captured.map_or(0_u64, |m| m.start() as u64);
let end = captured.map_or(0_u64, |m| m.end() as u64);
let name = capture_names.get(i).cloned().flatten().unwrap_or_default();
TableRow::new_from_element(text).with_attribute("start", start).with_attribute("end", end).with_attribute("name", name)
})
.collect() .collect()
} }
/// Finds all non-overlapping matches of a regular expression pattern in the string, returning a list of the matched substrings. /// Finds all non-overlapping matches of a regular expression pattern in the string, returning a list of the matched substrings.
///
/// Each row carries `start` and `end` byte-offset attributes pointing into the original string.
#[node_macro::node(category("Text: Regex"))] #[node_macro::node(category("Text: Regex"))]
fn regex_find_all( fn regex_find_all(
_: impl Ctx, _: impl Ctx,
@ -169,8 +183,11 @@ fn regex_find_all(
regex regex
.find_iter(&string) .find_iter(&string)
.filter_map(|m| m.ok()) .filter_map(|m| m.ok())
.map(|m| m.as_str().to_string()) .map(|m| {
.map(TableRow::new_from_element) TableRow::new_from_element(m.as_str().to_string())
.with_attribute("start", m.start() as u64)
.with_attribute("end", m.end() as u64)
})
.collect() .collect()
} }