Implement Convert trait to convert between CPU and GPU images (#3194)

* Add upload texture trait * Make convert trait use explicit converter * Add gpu texture download implementation * Add footprint to convert trait * Cleanup texture upload / download * Download wgpu textures aligned * abstract texture download into converter helper * rename module not only doing uploads anymore conversion looks like a ok name * Remove into_iter call and intermediate vector allocation --------- Co-authored-by: Timon Schelling <me@timon.zip>
2025-09-25 20:54:51 +02:00 · 2025-09-25 20:54:51 +02:00 · ffc74273cc
parent 4bb1d05fc3
commit ffc74273cc
9 changed files with 309 additions and 72 deletions
--- a/editor/src/messages/portfolio/document/node_graph/document_node_definitions.rs
+++ b/editor/src/messages/portfolio/document/node_graph/document_node_definitions.rs
@ -980,7 +980,7 @@ fn static_nodes() -> Vec<DocumentNodeDefinition> {
 							DocumentNode {
 								inputs: vec![NodeInput::import(concrete!(Table<Raster<CPU>>), 0), NodeInput::node(NodeId(0), 0)],
 								call_argument: generic!(T),
-								implementation: DocumentNodeImplementation::ProtoNode(wgpu_executor::texture_upload::upload_texture::IDENTIFIER),
+								implementation: DocumentNodeImplementation::ProtoNode(wgpu_executor::texture_conversion::upload_texture::IDENTIFIER),
 								..Default::default()
 							},
 							DocumentNode {
--- a/node-graph/gcore/src/ops.rs
+++ b/node-graph/gcore/src/ops.rs
@ -1,5 +1,6 @@
 use crate::Node;
 use graphene_core_shaders::Ctx;
 use crate::{ExtractFootprint, Node, transform::Footprint};
 use std::marker::PhantomData;
 // TODO: Rename to "Passthrough"
@ -49,16 +50,16 @@ fn into<'i, T: 'i + Send + Into<O>, O: 'i + Send>(_: impl Ctx, value: T, _out_ty
 /// The [`Convert`] trait allows for conversion between Rust primitive numeric types.
 /// Because number casting is lossy, we cannot use the normal [`Into`] trait like we do for other types.
-pub trait Convert<T>: Sized {
+pub trait Convert<T, C>: Sized {
 	/// Converts this type into the (usually inferred) output type.
 	#[must_use]
-	fn convert(self) -> T;
+	fn convert(self, footprint: Footprint, converter: C) -> impl Future<Output = T> + Send;
 }
-impl<T: ToString> Convert<String> for T {
+impl<T: ToString + Send> Convert<String, ()> for T {
 	/// Converts this type into a `String` using its `ToString` implementation.
 	#[inline]
-	fn convert(self) -> String {
+	async fn convert(self, _: Footprint, _converter: ()) -> String {
 		self.to_string()
 	}
 }
@ -66,8 +67,8 @@ impl<T: ToString> Convert<String> for T {
 /// Implements the [`Convert`] trait for conversion between the cartesian product of Rust's primitive numeric types.
 macro_rules! impl_convert {
 	($from:ty, $to:ty) => {
-		impl Convert<$to> for $from {
+		impl Convert<$to, ()> for $from {
-			fn convert(self) -> $to {
+			async fn convert(self, _: Footprint, _: ()) -> $to {
 				self as $to
 			}
 		}
@ -105,8 +106,8 @@ impl_convert!(isize);
 impl_convert!(usize);
 #[node_macro::node(skip_impl)]
-fn convert<'i, T: 'i + Send + Convert<O>, O: 'i + Send>(_: impl Ctx, value: T, _out_ty: PhantomData<O>) -> O {
+async fn convert<'i, T: 'i + Send + Convert<O, C>, O: 'i + Send, C: 'i + Send>(ctx: impl Ctx + ExtractFootprint, value: T, converter: C, _out_ty: PhantomData<O>) -> O {
-	value.convert()
+	value.convert(*ctx.try_footprint().unwrap_or(&Footprint::DEFAULT), converter).await
 }
 #[cfg(test)]
--- a/node-graph/gcore/src/registry.rs
+++ b/node-graph/gcore/src/registry.rs
@ -9,7 +9,7 @@ use std::sync::{LazyLock, Mutex};
 pub use graphene_core_shaders::registry::types;
 // Translation struct between macro and definition
-#[derive(Clone)]
+#[derive(Clone, Debug)]
 pub struct NodeMetadata {
 	pub display_name: &'static str,
 	pub category: Option<&'static str>,
--- a/node-graph/interpreted-executor/src/node_registry.rs
+++ b/node-graph/interpreted-executor/src/node_registry.rs
@ -61,6 +61,14 @@ fn node_registry() -> HashMap<ProtoNodeIdentifier, HashMap<NodeIOTypes, NodeCons
 		convert_node!(from: DVec2, to: String),
 		convert_node!(from: IVec2, to: String),
 		convert_node!(from: DAffine2, to: String),
 		#[cfg(feature = "gpu")]
 		convert_node!(from: Table<Raster<CPU>>, to: Table<Raster<CPU>>, converter: &WgpuExecutor),
 		#[cfg(feature = "gpu")]
 		convert_node!(from: Table<Raster<CPU>>, to: Table<Raster<GPU>>, converter: &WgpuExecutor),
 		#[cfg(feature = "gpu")]
 		convert_node!(from: Table<Raster<GPU>>, to: Table<Raster<GPU>>, converter: &WgpuExecutor),
 		#[cfg(feature = "gpu")]
 		convert_node!(from: Table<Raster<GPU>>, to: Table<Raster<CPU>>, converter: &WgpuExecutor),
 		// =============
 		// MONITOR NODES
 		// =============
@ -394,21 +402,30 @@ mod node_registry_macros {
 			x
 		}};
 		(from: $from:ty, to: $to:ty) => {
 			convert_node!(from: $from, to: $to, converter: ())
 		};
 		(from: $from:ty, to: $to:ty, converter: $convert:ty) => {
 			(
 				ProtoNodeIdentifier::new(concat!["graphene_core::ops::ConvertNode<", stringify!($to), ">"]),
 				|mut args| {
 					Box::pin(async move {
-						let node = graphene_core::ops::ConvertNode::new(graphene_std::any::downcast_node::<Context, $from>(args.pop().unwrap()),
+						let mut args = args.drain(..);
-graphene_std::any::FutureWrapperNode::new(graphene_std::value::ClonedNode::new(std::marker::PhantomData::<$to>))						);
+						let node = graphene_core::ops::ConvertNode::new(
 							graphene_std::any::downcast_node::<Context, $from>(args.next().expect("Convert node did not get first argument")),
 							graphene_std::any::downcast_node::<Context, $convert>(args.next().expect("Convert node did not get converter argument")),
 							graphene_std::any::FutureWrapperNode::new(graphene_std::value::ClonedNode::new(std::marker::PhantomData::<$to>))
 						);
 						let any: DynAnyNode<Context, $to, _> = graphene_std::any::DynAnyNode::new(node);
 						Box::new(any) as TypeErasedBox
 					})
 				},
 				{
-					let node = graphene_core::ops::ConvertNode::new(graphene_std::any::PanicNode::<Context, core::pin::Pin<Box<dyn core::future::Future<Output = $from> + Send>>>::new(),
+					let node = graphene_core::ops::ConvertNode::new(
-
+						graphene_std::any::PanicNode::<Context, core::pin::Pin<Box<dyn core::future::Future<Output = $from> + Send>>>::new(),
-graphene_std::any::FutureWrapperNode::new(graphene_std::value::ClonedNode::new(std::marker::PhantomData::<$to>))					);
+						graphene_std::any::PanicNode::<Context, core::pin::Pin<Box<dyn core::future::Future<Output = $convert> + Send>>>::new(),
-					let params = vec![fn_type_fut!(Context, $from)];
+						graphene_std::any::FutureWrapperNode::new(graphene_std::value::ClonedNode::new(std::marker::PhantomData::<$to>))
 					);
 					let params = vec![fn_type_fut!(Context, $from), fn_type_fut!(Context, $convert)];
 					let node_io = NodeIO::<'_, Context>::to_async_node_io(&node, params);
 					node_io
 				},
--- a/node-graph/node-macro/src/shader_nodes/per_pixel_adjust.rs
+++ b/node-graph/node-macro/src/shader_nodes/per_pixel_adjust.rs
@ -128,7 +128,7 @@ impl PerPixelAdjustCodegen<'_> {
 				#(pub #uniform_members),*
 			}
 		};
-		let uniform_struct_shader_struct_derive = crate::buffer_struct::derive_buffer_struct_struct(&self.crate_ident, &uniform_struct)?;
+		let uniform_struct_shader_struct_derive = crate::buffer_struct::derive_buffer_struct_struct(self.crate_ident, &uniform_struct)?;
 		let image_params = self
 			.params
--- a/node-graph/preprocessor/src/lib.rs
+++ b/node-graph/preprocessor/src/lib.rs
@ -67,6 +67,7 @@ pub fn generate_node_substitutions() -> HashMap<ProtoNodeIdentifier, DocumentNod
 						1 => {
 							let input = inputs.iter().next().unwrap();
 							let input_ty = input.nested_type();
 							let mut inputs = vec![NodeInput::import(input.clone(), i)];
 							let into_node_identifier = ProtoNodeIdentifier {
 								name: format!("graphene_core::ops::IntoNode<{}>", input_ty.clone()).into(),
@ -80,13 +81,14 @@ pub fn generate_node_substitutions() -> HashMap<ProtoNodeIdentifier, DocumentNod
 								into_node_identifier
 							} else if into_node_registry.keys().any(|ident| ident.name.as_ref() == convert_node_identifier.name.as_ref()) {
 								generated_nodes += 1;
 								inputs.push(NodeInput::value(TaggedValue::None, false));
 								convert_node_identifier
 							} else {
 								identity_node.clone()
 							};
 							DocumentNode {
-								inputs: vec![NodeInput::import(input.clone(), i)],
+								inputs,
 								implementation: DocumentNodeImplementation::ProtoNode(proto_node),
 								visible: true,
 								..Default::default()
--- a/node-graph/wgpu-executor/src/lib.rs
+++ b/node-graph/wgpu-executor/src/lib.rs
@ -1,6 +1,6 @@
 mod context;
 pub mod shader_runtime;
-pub mod texture_upload;
+pub mod texture_conversion;
 use crate::shader_runtime::ShaderRuntime;
 use anyhow::Result;
--- a/node-graph/wgpu-executor/src/texture_conversion.rs
+++ b/node-graph/wgpu-executor/src/texture_conversion.rs
@ -0,0 +1,269 @@
 use crate::WgpuExecutor;
 use graphene_core::Color;
 use graphene_core::Ctx;
 use graphene_core::color::SRGBA8;
 use graphene_core::ops::Convert;
 use graphene_core::raster::Image;
 use graphene_core::raster_types::{CPU, GPU, Raster};
 use graphene_core::table::{Table, TableRow};
 use graphene_core::transform::Footprint;
 use wgpu::util::{DeviceExt, TextureDataOrder};
 use wgpu::{Extent3d, TextureDescriptor, TextureDimension, TextureFormat, TextureUsages};
 /// Uploads CPU image data to a GPU texture
 ///
 /// Creates a new WGPU texture with RGBA8UnormSrgb format and uploads the provided
 /// image data. The texture is configured for binding, copying, and source operations.
 fn upload_to_texture(device: &std::sync::Arc<wgpu::Device>, queue: &std::sync::Arc<wgpu::Queue>, image: &Raster<CPU>) -> wgpu::Texture {
 	let rgba8_data: Vec<SRGBA8> = image.data.iter().map(|x| (*x).into()).collect();
 	device.create_texture_with_data(
 		queue,
 		&TextureDescriptor {
 			label: Some("upload_texture node texture"),
 			size: Extent3d {
 				width: image.width,
 				height: image.height,
 				depth_or_array_layers: 1,
 			},
 			mip_level_count: 1,
 			sample_count: 1,
 			dimension: TextureDimension::D2,
 			format: TextureFormat::Rgba8UnormSrgb,
 			usage: TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_DST | TextureUsages::COPY_SRC,
 			view_formats: &[],
 		},
 		TextureDataOrder::LayerMajor,
 		bytemuck::cast_slice(rgba8_data.as_slice()),
 	)
 }
 /// Converts a Raster<GPU> texture to Raster<CPU> by downloading the underlying texture data.
 ///
 /// Assumptions:
 /// - 2D texture, mip level 0
 /// - 4 bytes-per-pixel RGBA8
 /// - Texture has COPY_SRC usage
 struct RasterGpuToRasterCpuConverter {
 	buffer: wgpu::Buffer,
 	width: u32,
 	height: u32,
 	unpadded_bytes_per_row: u32,
 	padded_bytes_per_row: u32,
 }
 impl RasterGpuToRasterCpuConverter {
 	fn new(device: &std::sync::Arc<wgpu::Device>, encoder: &mut wgpu::CommandEncoder, data_gpu: Raster<GPU>) -> Self {
 		let texture = data_gpu.data();
 		let width = texture.width();
 		let height = texture.height();
 		let bytes_per_pixel = 4; // RGBA8
 		let unpadded_bytes_per_row = width * bytes_per_pixel;
 		let align = wgpu::COPY_BYTES_PER_ROW_ALIGNMENT;
 		let padded_bytes_per_row = unpadded_bytes_per_row.div_ceil(align) * align;
 		let buffer_size = padded_bytes_per_row as u64 * height as u64;
 		let buffer = device.create_buffer(&wgpu::BufferDescriptor {
 			label: Some("texture_download_buffer"),
 			size: buffer_size,
 			usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
 			mapped_at_creation: false,
 		});
 		encoder.copy_texture_to_buffer(
 			wgpu::TexelCopyTextureInfo {
 				texture,
 				mip_level: 0,
 				origin: wgpu::Origin3d::ZERO,
 				aspect: wgpu::TextureAspect::All,
 			},
 			wgpu::TexelCopyBufferInfo {
 				buffer: &buffer,
 				layout: wgpu::TexelCopyBufferLayout {
 					offset: 0,
 					bytes_per_row: Some(padded_bytes_per_row),
 					rows_per_image: Some(height),
 				},
 			},
 			Extent3d {
 				width,
 				height,
 				depth_or_array_layers: 1,
 			},
 		);
 		Self {
 			buffer,
 			width,
 			height,
 			unpadded_bytes_per_row,
 			padded_bytes_per_row,
 		}
 	}
 	async fn convert(self) -> Result<Raster<CPU>, wgpu::BufferAsyncError> {
 		let buffer_slice = self.buffer.slice(..);
 		let (sender, receiver) = futures::channel::oneshot::channel();
 		buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
 			let _ = sender.send(result);
 		});
 		receiver.await.expect("Failed to receive map result")?;
 		let view = buffer_slice.get_mapped_range();
 		let row_stride = self.padded_bytes_per_row as usize;
 		let row_bytes = self.unpadded_bytes_per_row as usize;
 		let mut cpu_data: Vec<Color> = Vec::with_capacity((self.width * self.height) as usize);
 		for row in 0..self.height as usize {
 			let start = row * row_stride;
 			let row_slice = &view[start..start + row_bytes];
 			for px in row_slice.chunks_exact(4) {
 				cpu_data.push(Color::from_rgba8_srgb(px[0], px[1], px[2], px[3]));
 			}
 		}
 		drop(view);
 		self.buffer.unmap();
 		let cpu_image = Image {
 			data: cpu_data,
 			width: self.width,
 			height: self.height,
 			base64_string: None,
 		};
 		Ok(Raster::new_cpu(cpu_image))
 	}
 }
 /// Passthrough conversion for GPU tables - no conversion needed
 impl<'i> Convert<Table<Raster<GPU>>, &'i WgpuExecutor> for Table<Raster<GPU>> {
 	async fn convert(self, _: Footprint, _converter: &'i WgpuExecutor) -> Table<Raster<GPU>> {
 		self
 	}
 }
 /// Converts CPU raster table to GPU by uploading each image to a texture
 impl<'i> Convert<Table<Raster<GPU>>, &'i WgpuExecutor> for Table<Raster<CPU>> {
 	async fn convert(self, _: Footprint, executor: &'i WgpuExecutor) -> Table<Raster<GPU>> {
 		let device = &executor.context.device;
 		let queue = &executor.context.queue;
 		let table = self
 			.iter()
 			.map(|row| {
 				let image = row.element;
 				let texture = upload_to_texture(device, queue, image);
 				TableRow {
 					element: Raster::new_gpu(texture),
 					transform: *row.transform,
 					alpha_blending: *row.alpha_blending,
 					source_node_id: *row.source_node_id,
 				}
 			})
 			.collect();
 		queue.submit([]);
 		table
 	}
 }
 /// Converts single CPU raster to GPU by uploading to texture
 impl<'i> Convert<Raster<GPU>, &'i WgpuExecutor> for Raster<CPU> {
 	async fn convert(self, _: Footprint, executor: &'i WgpuExecutor) -> Raster<GPU> {
 		let device = &executor.context.device;
 		let queue = &executor.context.queue;
 		let texture = upload_to_texture(device, queue, &self);
 		queue.submit([]);
 		Raster::new_gpu(texture)
 	}
 }
 /// Passthrough conversion for CPU tables - no conversion needed
 impl<'i> Convert<Table<Raster<CPU>>, &'i WgpuExecutor> for Table<Raster<CPU>> {
 	async fn convert(self, _: Footprint, _converter: &'i WgpuExecutor) -> Table<Raster<CPU>> {
 		self
 	}
 }
 /// Converts GPU raster table to CPU by downloading texture data in one go
 ///
 /// then asynchronously maps all buffers and processes the results.
 impl<'i> Convert<Table<Raster<CPU>>, &'i WgpuExecutor> for Table<Raster<GPU>> {
 	async fn convert(self, _: Footprint, executor: &'i WgpuExecutor) -> Table<Raster<CPU>> {
 		let device = &executor.context.device;
 		let queue = &executor.context.queue;
 		let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
 			label: Some("batch_texture_download_encoder"),
 		});
 		let mut converters = Vec::new();
 		let mut rows_meta = Vec::new();
 		for row in self {
 			let gpu_raster = row.element;
 			converters.push(RasterGpuToRasterCpuConverter::new(device, &mut encoder, gpu_raster));
 			rows_meta.push(TableRow {
 				element: (),
 				transform: row.transform,
 				alpha_blending: row.alpha_blending,
 				source_node_id: row.source_node_id,
 			});
 		}
 		queue.submit([encoder.finish()]);
 		let mut map_futures = Vec::new();
 		for converter in converters {
 			map_futures.push(converter.convert());
 		}
 		let map_results = futures::future::try_join_all(map_futures)
 			.await
 			.map_err(|_| "Failed to receive map result")
 			.expect("Buffer mapping communication failed");
 		map_results
 			.into_iter()
 			.zip(rows_meta.into_iter())
 			.map(|(element, row)| TableRow {
 				element,
 				transform: row.transform,
 				alpha_blending: row.alpha_blending,
 				source_node_id: row.source_node_id,
 			})
 			.collect()
 	}
 }
 /// Converts single GPU raster to CPU by downloading texture data
 impl<'i> Convert<Raster<CPU>, &'i WgpuExecutor> for Raster<GPU> {
 	async fn convert(self, _: Footprint, executor: &'i WgpuExecutor) -> Raster<CPU> {
 		let device = &executor.context.device;
 		let queue = &executor.context.queue;
 		let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
 			label: Some("single_texture_download_encoder"),
 		});
 		let converter = RasterGpuToRasterCpuConverter::new(device, &mut encoder, self);
 		queue.submit([encoder.finish()]);
 		converter.convert().await.expect("Failed to download texture data")
 	}
 }
 /// Node for uploading textures from CPU to GPU. This Is now deprecated and
 /// we should use the Convert node in the future.
 ///
 /// Accepts either individual rasters or tables of rasters and converts them
 /// to GPU format using the WgpuExecutor's device and queue.
 #[node_macro::node(category(""))]
 pub async fn upload_texture<'a: 'n, T: Convert<Table<Raster<GPU>>, &'a WgpuExecutor>>(
 	_: impl Ctx,
 	#[implementations(Table<Raster<CPU>>, Table<Raster<GPU>>)] input: T,
 	executor: &'a WgpuExecutor,
 ) -> Table<Raster<GPU>> {
 	input.convert(Footprint::DEFAULT, executor).await
 }
--- a/node-graph/wgpu-executor/src/texture_upload.rs
+++ b/node-graph/wgpu-executor/src/texture_upload.rs
@ -1,52 +0,0 @@
 use crate::WgpuExecutor;
 use graphene_core::Ctx;
 use graphene_core::color::SRGBA8;
 use graphene_core::raster_types::{CPU, GPU, Raster};
 use graphene_core::table::{Table, TableRow};
 use wgpu::util::{DeviceExt, TextureDataOrder};
 use wgpu::{Extent3d, TextureDescriptor, TextureDimension, TextureFormat, TextureUsages};
 #[node_macro::node(category(""))]
 pub async fn upload_texture<'a: 'n>(_: impl Ctx, input: Table<Raster<CPU>>, executor: &'a WgpuExecutor) -> Table<Raster<GPU>> {
 	let device = &executor.context.device;
 	let queue = &executor.context.queue;
 	let table = input
 		.iter()
 		.map(|row| {
 			let image = row.element;
 			let rgba8_data: Vec<SRGBA8> = image.data.iter().map(|x| (*x).into()).collect();
 			let texture = device.create_texture_with_data(
 				queue,
 				&TextureDescriptor {
 					label: Some("upload_texture node texture"),
 					size: Extent3d {
 						width: image.width,
 						height: image.height,
 						depth_or_array_layers: 1,
 					},
 					mip_level_count: 1,
 					sample_count: 1,
 					dimension: TextureDimension::D2,
 					format: TextureFormat::Rgba8UnormSrgb,
 					// I don't know what usages are actually necessary
 					usage: TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_DST | TextureUsages::COPY_SRC,
 					view_formats: &[],
 				},
 				TextureDataOrder::LayerMajor,
 				bytemuck::cast_slice(rgba8_data.as_slice()),
 			);
 			TableRow {
 				element: Raster::new_gpu(texture),
 				transform: *row.transform,
 				alpha_blending: *row.alpha_blending,
 				source_node_id: *row.source_node_id,
 			}
 		})
 		.collect();
 	queue.submit([]);
 	table
 }