Add useful attributes to the JSON and Regex nodes (#4069)
* Add useful attributes to the JSON and Regex nodes * Code review fix
This commit is contained in:
parent
b396d17211
commit
84fb901b5a
|
|
@ -1545,11 +1545,11 @@ fn document_node_definitions() -> HashMap<DefinitionIdentifier, DocumentNodeDefi
|
||||||
exports: vec![
|
exports: vec![
|
||||||
// Primary output: the whole match (String)
|
// Primary output: the whole match (String)
|
||||||
NodeInput::node(NodeId(1), 0),
|
NodeInput::node(NodeId(1), 0),
|
||||||
// Secondary output: capture groups (Vec<String>)
|
// Secondary output: capture groups (Table<String>), each row carries `start`/`end`/`name` attributes from `regex_find`
|
||||||
NodeInput::node(NodeId(2), 0),
|
NodeInput::node(NodeId(2), 0),
|
||||||
],
|
],
|
||||||
nodes: [
|
nodes: [
|
||||||
// Node 0: regex_find proto node — returns Vec<String> of [whole_match, ...capture_groups]
|
// Node 0: regex_find proto node — returns Table<String> of [whole_match, ...capture_groups]
|
||||||
DocumentNode {
|
DocumentNode {
|
||||||
inputs: vec![
|
inputs: vec![
|
||||||
NodeInput::import(concrete!(String), 0),
|
NodeInput::import(concrete!(String), 0),
|
||||||
|
|
@ -1561,13 +1561,13 @@ fn document_node_definitions() -> HashMap<DefinitionIdentifier, DocumentNodeDefi
|
||||||
implementation: DocumentNodeImplementation::ProtoNode(text_nodes::regex::regex_find::IDENTIFIER),
|
implementation: DocumentNodeImplementation::ProtoNode(text_nodes::regex::regex_find::IDENTIFIER),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
},
|
},
|
||||||
// Node 1: index_elements at index 0 — extracts the whole match as a String
|
// Node 1: extract_element at index 0, extracts the whole match as a bare String (drops the row's start/end/name attributes since the unwrapped String can't carry them)
|
||||||
DocumentNode {
|
DocumentNode {
|
||||||
inputs: vec![NodeInput::node(NodeId(0), 0), NodeInput::value(TaggedValue::F64(0.), false)],
|
inputs: vec![NodeInput::node(NodeId(0), 0), NodeInput::value(TaggedValue::F64(0.), false)],
|
||||||
implementation: DocumentNodeImplementation::ProtoNode(graphic::index_elements::IDENTIFIER),
|
implementation: DocumentNodeImplementation::ProtoNode(graphic::extract_element::IDENTIFIER),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
},
|
},
|
||||||
// Node 2: omit_element at index 0 — returns capture groups as Vec<String>
|
// Node 2: omit_element at index 0, returns the capture group rows as a Table<String>, preserving each row's start/end/name attributes
|
||||||
DocumentNode {
|
DocumentNode {
|
||||||
inputs: vec![NodeInput::node(NodeId(0), 0), NodeInput::value(TaggedValue::F64(0.), false)],
|
inputs: vec![NodeInput::node(NodeId(0), 0), NodeInput::value(TaggedValue::F64(0.), false)],
|
||||||
implementation: DocumentNodeImplementation::ProtoNode(graphic::omit_element::IDENTIFIER),
|
implementation: DocumentNodeImplementation::ProtoNode(graphic::omit_element::IDENTIFIER),
|
||||||
|
|
|
||||||
|
|
@ -68,12 +68,49 @@ pub fn omit_element<T: graphic_types::graphic::OmitIndex + Clone + Default>(
|
||||||
let index = index as i32;
|
let index = index as i32;
|
||||||
|
|
||||||
if index < 0 {
|
if index < 0 {
|
||||||
collection.omit_index_from_end(-index as usize)
|
collection.omit_index_from_end(index.unsigned_abs() as usize)
|
||||||
} else {
|
} else {
|
||||||
collection.omit_index(index as usize)
|
collection.omit_index(index as usize)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the bare element (without its row attributes) at the specified index in a table.
|
||||||
|
/// Use this when downstream nodes want just the inner value rather than a single-row table.
|
||||||
|
/// If no value exists at that index, the element type's default is returned.
|
||||||
|
#[node_macro::node(category("General"))]
|
||||||
|
pub fn extract_element<T: Clone + Default + Send + Sync + 'static>(
|
||||||
|
_: impl Ctx,
|
||||||
|
/// The table of data to extract from.
|
||||||
|
#[implementations(
|
||||||
|
Table<String>,
|
||||||
|
Table<f64>,
|
||||||
|
Table<u8>,
|
||||||
|
Table<NodeId>,
|
||||||
|
Table<Color>,
|
||||||
|
Table<GradientStops>,
|
||||||
|
Table<Vector>,
|
||||||
|
Table<Raster<CPU>>,
|
||||||
|
Table<Graphic>,
|
||||||
|
Table<Artboard>,
|
||||||
|
)]
|
||||||
|
table: Table<T>,
|
||||||
|
/// The index of the item to retrieve, starting from 0 for the first item. Negative indices count backwards from the end of the collection, starting from -1 for the last item.
|
||||||
|
index: SignedInteger,
|
||||||
|
) -> T {
|
||||||
|
let len = table.len();
|
||||||
|
let index = index as i32;
|
||||||
|
let resolved = if index < 0 {
|
||||||
|
let from_end = index.unsigned_abs() as usize;
|
||||||
|
if from_end > len {
|
||||||
|
return T::default();
|
||||||
|
}
|
||||||
|
len - from_end
|
||||||
|
} else {
|
||||||
|
index as usize
|
||||||
|
};
|
||||||
|
table.element(resolved).cloned().unwrap_or_default()
|
||||||
|
}
|
||||||
|
|
||||||
#[node_macro::node(category("General"))]
|
#[node_macro::node(category("General"))]
|
||||||
async fn map<Item: AnyHash + Send + Sync + core_types::CacheHash>(
|
async fn map<Item: AnyHash + Send + Sync + core_types::CacheHash>(
|
||||||
ctx: impl Ctx + CloneVarArgs + ExtractAll,
|
ctx: impl Ctx + CloneVarArgs + ExtractAll,
|
||||||
|
|
|
||||||
|
|
@ -210,11 +210,13 @@ fn query_json(
|
||||||
let mut results = Vec::new();
|
let mut results = Vec::new();
|
||||||
resolve_all(&value, &segments, !unquote_strings, &mut results);
|
resolve_all(&value, &segments, !unquote_strings, &mut results);
|
||||||
|
|
||||||
results.into_iter().next().unwrap_or_default()
|
results.into_iter().next().map(|(text, _ty)| text).unwrap_or_default()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extracts every matched value from a JSON string using a path expression (see that parameter's description for its syntax). A list of zero or more resultant strings is produced. The `[]` path accessor is used to read more than one value.
|
/// Extracts every matched value from a JSON string using a path expression (see that parameter's description for its syntax). A list of zero or more resultant strings is produced. The `[]` path accessor is used to read more than one value.
|
||||||
///
|
///
|
||||||
|
/// Each row carries a `type` attribute holding the matched value's JSON type (`"string"`, `"number"`, `"bool"`, `"null"`, `"object"`, or `"array"`).
|
||||||
|
///
|
||||||
/// This is useful in conjunction with the nodes:
|
/// This is useful in conjunction with the nodes:
|
||||||
/// • **Index Elements**: access the `N`th query result.
|
/// • **Index Elements**: access the `N`th query result.
|
||||||
/// • **String to Number**: convert numeric query results to numbers.
|
/// • **String to Number**: convert numeric query results to numbers.
|
||||||
|
|
@ -246,7 +248,7 @@ fn query_json_all(
|
||||||
let mut results = Vec::new();
|
let mut results = Vec::new();
|
||||||
resolve_all(&value, &segments, !unquote_strings, &mut results);
|
resolve_all(&value, &segments, !unquote_strings, &mut results);
|
||||||
|
|
||||||
results.into_iter().map(TableRow::new_from_element).collect()
|
results.into_iter().map(|(text, ty)| TableRow::new_from_element(text).with_attribute("type", ty.to_string())).collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A parsed segment of a JSON access path.
|
/// A parsed segment of a JSON access path.
|
||||||
|
|
@ -402,6 +404,18 @@ fn json_value_to_string(value: &serde_json::Value, quote_strings: bool) -> Strin
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns a short JSON-type name (`"string"`, `"number"`, `"bool"`, `"null"`, `"object"`, `"array"`) for a parsed value.
|
||||||
|
fn json_value_type_name(value: &serde_json::Value) -> &'static str {
|
||||||
|
match value {
|
||||||
|
serde_json::Value::String(_) => "string",
|
||||||
|
serde_json::Value::Number(_) => "number",
|
||||||
|
serde_json::Value::Bool(_) => "bool",
|
||||||
|
serde_json::Value::Null => "null",
|
||||||
|
serde_json::Value::Object(_) => "object",
|
||||||
|
serde_json::Value::Array(_) => "array",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Navigates a JSON value by one path segment, returning the resulting value (or `None` if the path is invalid).
|
/// Navigates a JSON value by one path segment, returning the resulting value (or `None` if the path is invalid).
|
||||||
fn json_navigate<'a>(value: &'a serde_json::Value, segment: &JsonPathSegment) -> Option<&'a serde_json::Value> {
|
fn json_navigate<'a>(value: &'a serde_json::Value, segment: &JsonPathSegment) -> Option<&'a serde_json::Value> {
|
||||||
match segment {
|
match segment {
|
||||||
|
|
@ -416,7 +430,7 @@ fn json_navigate<'a>(value: &'a serde_json::Value, segment: &JsonPathSegment) ->
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Recursively resolves a path against a JSON value, fanning out at each `[]` and collecting leaf results.
|
/// Recursively resolves a path against a JSON value, fanning out at each `[]` and collecting leaf results.
|
||||||
fn resolve_all(value: &serde_json::Value, segments: &[JsonPathSegment], quote_strings: bool, results: &mut Vec<String>) {
|
fn resolve_all(value: &serde_json::Value, segments: &[JsonPathSegment], quote_strings: bool, results: &mut Vec<(String, &'static str)>) {
|
||||||
// Find the next IterateAll in the remaining segments
|
// Find the next IterateAll in the remaining segments
|
||||||
let Some(iterate_position) = segments.iter().position(|s| matches!(s, JsonPathSegment::IterateAll)) else {
|
let Some(iterate_position) = segments.iter().position(|s| matches!(s, JsonPathSegment::IterateAll)) else {
|
||||||
// No more [] segments, navigate the rest linearly
|
// No more [] segments, navigate the rest linearly
|
||||||
|
|
@ -425,7 +439,7 @@ fn resolve_all(value: &serde_json::Value, segments: &[JsonPathSegment], quote_st
|
||||||
let Some(next) = json_navigate(current, segment) else { return };
|
let Some(next) = json_navigate(current, segment) else { return };
|
||||||
current = next;
|
current = next;
|
||||||
}
|
}
|
||||||
results.push(json_value_to_string(current, quote_strings));
|
results.push((json_value_to_string(current, quote_strings), json_value_type_name(current)));
|
||||||
return;
|
return;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -80,6 +80,9 @@ fn regex_replace(
|
||||||
/// Finds a regex match in the string and returns its components. The result is a list where the first element is the whole match (`$0`) and subsequent elements are the capture groups (`$1`, `$2`, etc., if any).
|
/// Finds a regex match in the string and returns its components. The result is a list where the first element is the whole match (`$0`) and subsequent elements are the capture groups (`$1`, `$2`, etc., if any).
|
||||||
///
|
///
|
||||||
/// The match index selects which non-overlapping occurrence to return (0 for the first match). Returns an empty list if no match is found at the given index.
|
/// The match index selects which non-overlapping occurrence to return (0 for the first match). Returns an empty list if no match is found at the given index.
|
||||||
|
///
|
||||||
|
/// Each row carries `start` and `end` byte-offset attributes pointing into the original string, plus a `name` attribute holding
|
||||||
|
/// the capture group's name (empty for unnamed groups, and for index 0 which is the whole match).
|
||||||
#[node_macro::node(category(""))]
|
#[node_macro::node(category(""))]
|
||||||
fn regex_find(
|
fn regex_find(
|
||||||
_: impl Ctx,
|
_: impl Ctx,
|
||||||
|
|
@ -111,6 +114,9 @@ fn regex_find(
|
||||||
return Table::new();
|
return Table::new();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Capture group names indexed positionally; index 0 (the whole match) is always None.
|
||||||
|
let capture_names: Vec<Option<String>> = regex.capture_names().map(|name| name.map(str::to_string)).collect();
|
||||||
|
|
||||||
// Collect all matches since we need to support negative indexing
|
// Collect all matches since we need to support negative indexing
|
||||||
let matches: Vec<_> = regex.captures_iter(&string).filter_map(|c| c.ok()).collect();
|
let matches: Vec<_> = regex.captures_iter(&string).filter_map(|c| c.ok()).collect();
|
||||||
|
|
||||||
|
|
@ -131,12 +137,20 @@ fn regex_find(
|
||||||
|
|
||||||
// Index 0 is the whole match, 1+ are capture groups
|
// Index 0 is the whole match, 1+ are capture groups
|
||||||
(0..captures.len())
|
(0..captures.len())
|
||||||
.map(|i| captures.get(i).map_or(String::new(), |m| m.as_str().to_string()))
|
.map(|i| {
|
||||||
.map(TableRow::new_from_element)
|
let captured = captures.get(i);
|
||||||
|
let text = captured.map_or(String::new(), |m| m.as_str().to_string());
|
||||||
|
let start = captured.map_or(0_u64, |m| m.start() as u64);
|
||||||
|
let end = captured.map_or(0_u64, |m| m.end() as u64);
|
||||||
|
let name = capture_names.get(i).cloned().flatten().unwrap_or_default();
|
||||||
|
TableRow::new_from_element(text).with_attribute("start", start).with_attribute("end", end).with_attribute("name", name)
|
||||||
|
})
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Finds all non-overlapping matches of a regular expression pattern in the string, returning a list of the matched substrings.
|
/// Finds all non-overlapping matches of a regular expression pattern in the string, returning a list of the matched substrings.
|
||||||
|
///
|
||||||
|
/// Each row carries `start` and `end` byte-offset attributes pointing into the original string.
|
||||||
#[node_macro::node(category("Text: Regex"))]
|
#[node_macro::node(category("Text: Regex"))]
|
||||||
fn regex_find_all(
|
fn regex_find_all(
|
||||||
_: impl Ctx,
|
_: impl Ctx,
|
||||||
|
|
@ -169,8 +183,11 @@ fn regex_find_all(
|
||||||
regex
|
regex
|
||||||
.find_iter(&string)
|
.find_iter(&string)
|
||||||
.filter_map(|m| m.ok())
|
.filter_map(|m| m.ok())
|
||||||
.map(|m| m.as_str().to_string())
|
.map(|m| {
|
||||||
.map(TableRow::new_from_element)
|
TableRow::new_from_element(m.as_str().to_string())
|
||||||
|
.with_attribute("start", m.start() as u64)
|
||||||
|
.with_attribute("end", m.end() as u64)
|
||||||
|
})
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue