Implement Convert trait to convert between CPU and GPU images (#3194)

* Add upload texture trait

* Make convert trait use explicit converter

* Add gpu texture download implementation

* Add footprint to convert trait

* Cleanup texture upload / download

* Download wgpu textures aligned

* abstract texture download into converter helper

* rename module

not only doing uploads anymore
conversion looks like a ok name

* Remove into_iter call and intermediate vector allocation

---------

Co-authored-by: Timon Schelling <me@timon.zip>
This commit is contained in:
Dennis Kobert 2025-09-25 20:54:51 +02:00
parent 4bb1d05fc3
commit ffc74273cc
No known key found for this signature in database
GPG Key ID: 5A4358CB9530F933
9 changed files with 309 additions and 72 deletions

View File

@ -980,7 +980,7 @@ fn static_nodes() -> Vec<DocumentNodeDefinition> {
DocumentNode {
inputs: vec![NodeInput::import(concrete!(Table<Raster<CPU>>), 0), NodeInput::node(NodeId(0), 0)],
call_argument: generic!(T),
implementation: DocumentNodeImplementation::ProtoNode(wgpu_executor::texture_upload::upload_texture::IDENTIFIER),
implementation: DocumentNodeImplementation::ProtoNode(wgpu_executor::texture_conversion::upload_texture::IDENTIFIER),
..Default::default()
},
DocumentNode {

View File

@ -1,5 +1,6 @@
use crate::Node;
use graphene_core_shaders::Ctx;
use crate::{ExtractFootprint, Node, transform::Footprint};
use std::marker::PhantomData;
// TODO: Rename to "Passthrough"
@ -49,16 +50,16 @@ fn into<'i, T: 'i + Send + Into<O>, O: 'i + Send>(_: impl Ctx, value: T, _out_ty
/// The [`Convert`] trait allows for conversion between Rust primitive numeric types.
/// Because number casting is lossy, we cannot use the normal [`Into`] trait like we do for other types.
pub trait Convert<T>: Sized {
pub trait Convert<T, C>: Sized {
/// Converts this type into the (usually inferred) output type.
#[must_use]
fn convert(self) -> T;
fn convert(self, footprint: Footprint, converter: C) -> impl Future<Output = T> + Send;
}
impl<T: ToString> Convert<String> for T {
impl<T: ToString + Send> Convert<String, ()> for T {
/// Converts this type into a `String` using its `ToString` implementation.
#[inline]
fn convert(self) -> String {
async fn convert(self, _: Footprint, _converter: ()) -> String {
self.to_string()
}
}
@ -66,8 +67,8 @@ impl<T: ToString> Convert<String> for T {
/// Implements the [`Convert`] trait for conversion between the cartesian product of Rust's primitive numeric types.
macro_rules! impl_convert {
($from:ty, $to:ty) => {
impl Convert<$to> for $from {
fn convert(self) -> $to {
impl Convert<$to, ()> for $from {
async fn convert(self, _: Footprint, _: ()) -> $to {
self as $to
}
}
@ -105,8 +106,8 @@ impl_convert!(isize);
impl_convert!(usize);
#[node_macro::node(skip_impl)]
fn convert<'i, T: 'i + Send + Convert<O>, O: 'i + Send>(_: impl Ctx, value: T, _out_ty: PhantomData<O>) -> O {
value.convert()
async fn convert<'i, T: 'i + Send + Convert<O, C>, O: 'i + Send, C: 'i + Send>(ctx: impl Ctx + ExtractFootprint, value: T, converter: C, _out_ty: PhantomData<O>) -> O {
value.convert(*ctx.try_footprint().unwrap_or(&Footprint::DEFAULT), converter).await
}
#[cfg(test)]

View File

@ -9,7 +9,7 @@ use std::sync::{LazyLock, Mutex};
pub use graphene_core_shaders::registry::types;
// Translation struct between macro and definition
#[derive(Clone)]
#[derive(Clone, Debug)]
pub struct NodeMetadata {
pub display_name: &'static str,
pub category: Option<&'static str>,

View File

@ -61,6 +61,14 @@ fn node_registry() -> HashMap<ProtoNodeIdentifier, HashMap<NodeIOTypes, NodeCons
convert_node!(from: DVec2, to: String),
convert_node!(from: IVec2, to: String),
convert_node!(from: DAffine2, to: String),
#[cfg(feature = "gpu")]
convert_node!(from: Table<Raster<CPU>>, to: Table<Raster<CPU>>, converter: &WgpuExecutor),
#[cfg(feature = "gpu")]
convert_node!(from: Table<Raster<CPU>>, to: Table<Raster<GPU>>, converter: &WgpuExecutor),
#[cfg(feature = "gpu")]
convert_node!(from: Table<Raster<GPU>>, to: Table<Raster<GPU>>, converter: &WgpuExecutor),
#[cfg(feature = "gpu")]
convert_node!(from: Table<Raster<GPU>>, to: Table<Raster<CPU>>, converter: &WgpuExecutor),
// =============
// MONITOR NODES
// =============
@ -394,21 +402,30 @@ mod node_registry_macros {
x
}};
(from: $from:ty, to: $to:ty) => {
convert_node!(from: $from, to: $to, converter: ())
};
(from: $from:ty, to: $to:ty, converter: $convert:ty) => {
(
ProtoNodeIdentifier::new(concat!["graphene_core::ops::ConvertNode<", stringify!($to), ">"]),
|mut args| {
Box::pin(async move {
let node = graphene_core::ops::ConvertNode::new(graphene_std::any::downcast_node::<Context, $from>(args.pop().unwrap()),
graphene_std::any::FutureWrapperNode::new(graphene_std::value::ClonedNode::new(std::marker::PhantomData::<$to>)) );
let mut args = args.drain(..);
let node = graphene_core::ops::ConvertNode::new(
graphene_std::any::downcast_node::<Context, $from>(args.next().expect("Convert node did not get first argument")),
graphene_std::any::downcast_node::<Context, $convert>(args.next().expect("Convert node did not get converter argument")),
graphene_std::any::FutureWrapperNode::new(graphene_std::value::ClonedNode::new(std::marker::PhantomData::<$to>))
);
let any: DynAnyNode<Context, $to, _> = graphene_std::any::DynAnyNode::new(node);
Box::new(any) as TypeErasedBox
})
},
{
let node = graphene_core::ops::ConvertNode::new(graphene_std::any::PanicNode::<Context, core::pin::Pin<Box<dyn core::future::Future<Output = $from> + Send>>>::new(),
graphene_std::any::FutureWrapperNode::new(graphene_std::value::ClonedNode::new(std::marker::PhantomData::<$to>)) );
let params = vec![fn_type_fut!(Context, $from)];
let node = graphene_core::ops::ConvertNode::new(
graphene_std::any::PanicNode::<Context, core::pin::Pin<Box<dyn core::future::Future<Output = $from> + Send>>>::new(),
graphene_std::any::PanicNode::<Context, core::pin::Pin<Box<dyn core::future::Future<Output = $convert> + Send>>>::new(),
graphene_std::any::FutureWrapperNode::new(graphene_std::value::ClonedNode::new(std::marker::PhantomData::<$to>))
);
let params = vec![fn_type_fut!(Context, $from), fn_type_fut!(Context, $convert)];
let node_io = NodeIO::<'_, Context>::to_async_node_io(&node, params);
node_io
},

View File

@ -128,7 +128,7 @@ impl PerPixelAdjustCodegen<'_> {
#(pub #uniform_members),*
}
};
let uniform_struct_shader_struct_derive = crate::buffer_struct::derive_buffer_struct_struct(&self.crate_ident, &uniform_struct)?;
let uniform_struct_shader_struct_derive = crate::buffer_struct::derive_buffer_struct_struct(self.crate_ident, &uniform_struct)?;
let image_params = self
.params

View File

@ -67,6 +67,7 @@ pub fn generate_node_substitutions() -> HashMap<ProtoNodeIdentifier, DocumentNod
1 => {
let input = inputs.iter().next().unwrap();
let input_ty = input.nested_type();
let mut inputs = vec![NodeInput::import(input.clone(), i)];
let into_node_identifier = ProtoNodeIdentifier {
name: format!("graphene_core::ops::IntoNode<{}>", input_ty.clone()).into(),
@ -80,13 +81,14 @@ pub fn generate_node_substitutions() -> HashMap<ProtoNodeIdentifier, DocumentNod
into_node_identifier
} else if into_node_registry.keys().any(|ident| ident.name.as_ref() == convert_node_identifier.name.as_ref()) {
generated_nodes += 1;
inputs.push(NodeInput::value(TaggedValue::None, false));
convert_node_identifier
} else {
identity_node.clone()
};
DocumentNode {
inputs: vec![NodeInput::import(input.clone(), i)],
inputs,
implementation: DocumentNodeImplementation::ProtoNode(proto_node),
visible: true,
..Default::default()

View File

@ -1,6 +1,6 @@
mod context;
pub mod shader_runtime;
pub mod texture_upload;
pub mod texture_conversion;
use crate::shader_runtime::ShaderRuntime;
use anyhow::Result;

View File

@ -0,0 +1,269 @@
use crate::WgpuExecutor;
use graphene_core::Color;
use graphene_core::Ctx;
use graphene_core::color::SRGBA8;
use graphene_core::ops::Convert;
use graphene_core::raster::Image;
use graphene_core::raster_types::{CPU, GPU, Raster};
use graphene_core::table::{Table, TableRow};
use graphene_core::transform::Footprint;
use wgpu::util::{DeviceExt, TextureDataOrder};
use wgpu::{Extent3d, TextureDescriptor, TextureDimension, TextureFormat, TextureUsages};
/// Uploads CPU image data to a GPU texture
///
/// Creates a new WGPU texture with RGBA8UnormSrgb format and uploads the provided
/// image data. The texture is configured for binding, copying, and source operations.
fn upload_to_texture(device: &std::sync::Arc<wgpu::Device>, queue: &std::sync::Arc<wgpu::Queue>, image: &Raster<CPU>) -> wgpu::Texture {
let rgba8_data: Vec<SRGBA8> = image.data.iter().map(|x| (*x).into()).collect();
device.create_texture_with_data(
queue,
&TextureDescriptor {
label: Some("upload_texture node texture"),
size: Extent3d {
width: image.width,
height: image.height,
depth_or_array_layers: 1,
},
mip_level_count: 1,
sample_count: 1,
dimension: TextureDimension::D2,
format: TextureFormat::Rgba8UnormSrgb,
usage: TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_DST | TextureUsages::COPY_SRC,
view_formats: &[],
},
TextureDataOrder::LayerMajor,
bytemuck::cast_slice(rgba8_data.as_slice()),
)
}
/// Converts a Raster<GPU> texture to Raster<CPU> by downloading the underlying texture data.
///
/// Assumptions:
/// - 2D texture, mip level 0
/// - 4 bytes-per-pixel RGBA8
/// - Texture has COPY_SRC usage
struct RasterGpuToRasterCpuConverter {
buffer: wgpu::Buffer,
width: u32,
height: u32,
unpadded_bytes_per_row: u32,
padded_bytes_per_row: u32,
}
impl RasterGpuToRasterCpuConverter {
fn new(device: &std::sync::Arc<wgpu::Device>, encoder: &mut wgpu::CommandEncoder, data_gpu: Raster<GPU>) -> Self {
let texture = data_gpu.data();
let width = texture.width();
let height = texture.height();
let bytes_per_pixel = 4; // RGBA8
let unpadded_bytes_per_row = width * bytes_per_pixel;
let align = wgpu::COPY_BYTES_PER_ROW_ALIGNMENT;
let padded_bytes_per_row = unpadded_bytes_per_row.div_ceil(align) * align;
let buffer_size = padded_bytes_per_row as u64 * height as u64;
let buffer = device.create_buffer(&wgpu::BufferDescriptor {
label: Some("texture_download_buffer"),
size: buffer_size,
usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
mapped_at_creation: false,
});
encoder.copy_texture_to_buffer(
wgpu::TexelCopyTextureInfo {
texture,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
wgpu::TexelCopyBufferInfo {
buffer: &buffer,
layout: wgpu::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: Some(padded_bytes_per_row),
rows_per_image: Some(height),
},
},
Extent3d {
width,
height,
depth_or_array_layers: 1,
},
);
Self {
buffer,
width,
height,
unpadded_bytes_per_row,
padded_bytes_per_row,
}
}
async fn convert(self) -> Result<Raster<CPU>, wgpu::BufferAsyncError> {
let buffer_slice = self.buffer.slice(..);
let (sender, receiver) = futures::channel::oneshot::channel();
buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
let _ = sender.send(result);
});
receiver.await.expect("Failed to receive map result")?;
let view = buffer_slice.get_mapped_range();
let row_stride = self.padded_bytes_per_row as usize;
let row_bytes = self.unpadded_bytes_per_row as usize;
let mut cpu_data: Vec<Color> = Vec::with_capacity((self.width * self.height) as usize);
for row in 0..self.height as usize {
let start = row * row_stride;
let row_slice = &view[start..start + row_bytes];
for px in row_slice.chunks_exact(4) {
cpu_data.push(Color::from_rgba8_srgb(px[0], px[1], px[2], px[3]));
}
}
drop(view);
self.buffer.unmap();
let cpu_image = Image {
data: cpu_data,
width: self.width,
height: self.height,
base64_string: None,
};
Ok(Raster::new_cpu(cpu_image))
}
}
/// Passthrough conversion for GPU tables - no conversion needed
impl<'i> Convert<Table<Raster<GPU>>, &'i WgpuExecutor> for Table<Raster<GPU>> {
async fn convert(self, _: Footprint, _converter: &'i WgpuExecutor) -> Table<Raster<GPU>> {
self
}
}
/// Converts CPU raster table to GPU by uploading each image to a texture
impl<'i> Convert<Table<Raster<GPU>>, &'i WgpuExecutor> for Table<Raster<CPU>> {
async fn convert(self, _: Footprint, executor: &'i WgpuExecutor) -> Table<Raster<GPU>> {
let device = &executor.context.device;
let queue = &executor.context.queue;
let table = self
.iter()
.map(|row| {
let image = row.element;
let texture = upload_to_texture(device, queue, image);
TableRow {
element: Raster::new_gpu(texture),
transform: *row.transform,
alpha_blending: *row.alpha_blending,
source_node_id: *row.source_node_id,
}
})
.collect();
queue.submit([]);
table
}
}
/// Converts single CPU raster to GPU by uploading to texture
impl<'i> Convert<Raster<GPU>, &'i WgpuExecutor> for Raster<CPU> {
async fn convert(self, _: Footprint, executor: &'i WgpuExecutor) -> Raster<GPU> {
let device = &executor.context.device;
let queue = &executor.context.queue;
let texture = upload_to_texture(device, queue, &self);
queue.submit([]);
Raster::new_gpu(texture)
}
}
/// Passthrough conversion for CPU tables - no conversion needed
impl<'i> Convert<Table<Raster<CPU>>, &'i WgpuExecutor> for Table<Raster<CPU>> {
async fn convert(self, _: Footprint, _converter: &'i WgpuExecutor) -> Table<Raster<CPU>> {
self
}
}
/// Converts GPU raster table to CPU by downloading texture data in one go
///
/// then asynchronously maps all buffers and processes the results.
impl<'i> Convert<Table<Raster<CPU>>, &'i WgpuExecutor> for Table<Raster<GPU>> {
async fn convert(self, _: Footprint, executor: &'i WgpuExecutor) -> Table<Raster<CPU>> {
let device = &executor.context.device;
let queue = &executor.context.queue;
let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("batch_texture_download_encoder"),
});
let mut converters = Vec::new();
let mut rows_meta = Vec::new();
for row in self {
let gpu_raster = row.element;
converters.push(RasterGpuToRasterCpuConverter::new(device, &mut encoder, gpu_raster));
rows_meta.push(TableRow {
element: (),
transform: row.transform,
alpha_blending: row.alpha_blending,
source_node_id: row.source_node_id,
});
}
queue.submit([encoder.finish()]);
let mut map_futures = Vec::new();
for converter in converters {
map_futures.push(converter.convert());
}
let map_results = futures::future::try_join_all(map_futures)
.await
.map_err(|_| "Failed to receive map result")
.expect("Buffer mapping communication failed");
map_results
.into_iter()
.zip(rows_meta.into_iter())
.map(|(element, row)| TableRow {
element,
transform: row.transform,
alpha_blending: row.alpha_blending,
source_node_id: row.source_node_id,
})
.collect()
}
}
/// Converts single GPU raster to CPU by downloading texture data
impl<'i> Convert<Raster<CPU>, &'i WgpuExecutor> for Raster<GPU> {
async fn convert(self, _: Footprint, executor: &'i WgpuExecutor) -> Raster<CPU> {
let device = &executor.context.device;
let queue = &executor.context.queue;
let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("single_texture_download_encoder"),
});
let converter = RasterGpuToRasterCpuConverter::new(device, &mut encoder, self);
queue.submit([encoder.finish()]);
converter.convert().await.expect("Failed to download texture data")
}
}
/// Node for uploading textures from CPU to GPU. This Is now deprecated and
/// we should use the Convert node in the future.
///
/// Accepts either individual rasters or tables of rasters and converts them
/// to GPU format using the WgpuExecutor's device and queue.
#[node_macro::node(category(""))]
pub async fn upload_texture<'a: 'n, T: Convert<Table<Raster<GPU>>, &'a WgpuExecutor>>(
_: impl Ctx,
#[implementations(Table<Raster<CPU>>, Table<Raster<GPU>>)] input: T,
executor: &'a WgpuExecutor,
) -> Table<Raster<GPU>> {
input.convert(Footprint::DEFAULT, executor).await
}

View File

@ -1,52 +0,0 @@
use crate::WgpuExecutor;
use graphene_core::Ctx;
use graphene_core::color::SRGBA8;
use graphene_core::raster_types::{CPU, GPU, Raster};
use graphene_core::table::{Table, TableRow};
use wgpu::util::{DeviceExt, TextureDataOrder};
use wgpu::{Extent3d, TextureDescriptor, TextureDimension, TextureFormat, TextureUsages};
#[node_macro::node(category(""))]
pub async fn upload_texture<'a: 'n>(_: impl Ctx, input: Table<Raster<CPU>>, executor: &'a WgpuExecutor) -> Table<Raster<GPU>> {
let device = &executor.context.device;
let queue = &executor.context.queue;
let table = input
.iter()
.map(|row| {
let image = row.element;
let rgba8_data: Vec<SRGBA8> = image.data.iter().map(|x| (*x).into()).collect();
let texture = device.create_texture_with_data(
queue,
&TextureDescriptor {
label: Some("upload_texture node texture"),
size: Extent3d {
width: image.width,
height: image.height,
depth_or_array_layers: 1,
},
mip_level_count: 1,
sample_count: 1,
dimension: TextureDimension::D2,
format: TextureFormat::Rgba8UnormSrgb,
// I don't know what usages are actually necessary
usage: TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_DST | TextureUsages::COPY_SRC,
view_formats: &[],
},
TextureDataOrder::LayerMajor,
bytemuck::cast_slice(rgba8_data.as_slice()),
);
TableRow {
element: Raster::new_gpu(texture),
transform: *row.transform,
alpha_blending: *row.alpha_blending,
source_node_id: *row.source_node_id,
}
})
.collect();
queue.submit([]);
table
}