From bb93d243a01fb1f413e2ef146dae9100c58aaf80 Mon Sep 17 00:00:00 2001
From: Orson Peters <orsonpeters@gmail.com>
Date: Thu, 25 May 2023 10:51:40 +0200
Subject: [PATCH] Optimize colorspace conversion (#1228)

* Enabled cross-crate inlining for release builds.

* Sprinkled inline-enabling directives in color.rs.

* Reduced abstraction in color channel handling for better inlining.

* Alpha should not use gamma.

* Oops, didn't compile.

* Added discrete sRGB <-> linear float conversion.

* Use new float <-> sRGB conversion.

* Improved comments.

* Don't convert fully transparent pixels.
---
 Cargo.toml                                   |   3 +
 node-graph/gcore/src/raster.rs               |  82 ++++----
 node-graph/gcore/src/raster/color.rs         |  58 ++++++
 node-graph/gcore/src/raster/discrete_srgb.rs | 185 +++++++++++++++++++
 node-graph/gcore/src/raster/image.rs         |  39 ++--
 node-graph/gstd/src/brush.rs                 |   2 +-
 node-graph/gstd/src/main.rs                  |   4 +-
 node-graph/gstd/src/raster.rs                |   4 +-
 8 files changed, 322 insertions(+), 55 deletions(-)
 create mode 100644 node-graph/gcore/src/raster/discrete_srgb.rs
diff --git a/Cargo.toml b/Cargo.toml
index ee4de000..510bf932 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -58,3 +58,6 @@ opt-level = 3
 
 [profile.dev]
 opt-level = 1
+
+[profile.release]
+lto = "thin"
diff --git a/node-graph/gcore/src/raster.rs b/node-graph/gcore/src/raster.rs
index 0f291357..aa2b58cd 100644
--- a/node-graph/gcore/src/raster.rs
+++ b/node-graph/gcore/src/raster.rs
@@ -4,10 +4,6 @@ use crate::Node;
 
 use bytemuck::{Pod, Zeroable};
 use glam::DVec2;
-#[cfg(not(target_arch = "spirv"))]
-use num_traits::{cast::cast as num_cast, Num, NumCast};
-#[cfg(target_arch = "spirv")]
-use spirv_std::num_traits::{cast::cast as num_cast, float::Float, FromPrimitive, Num, NumCast, ToPrimitive};
 
 pub use self::color::{Color, Luma};
 
@@ -15,44 +11,57 @@ pub mod adjustments;
 #[cfg(not(target_arch = "spirv"))]
 pub mod brightness_contrast;
 pub mod color;
+pub mod discrete_srgb;
 pub use adjustments::*;
 
-pub trait Channel: Copy + Debug + Num + NumCast {
-	fn to_linear<Out: Linear>(self) -> Out;
-	fn from_linear<In: Linear>(linear: In) -> Self;
-	fn to_f32(self) -> f32 {
-		num_cast(self).expect("Failed to convert channel to f32")
-	}
-	fn from_f32(value: f32) -> Self {
-		num_cast(value).expect("Failed to convert f32 to channel")
-	}
-	fn to_f64(self) -> f64 {
-		num_cast(self).expect("Failed to convert channel to f64")
-	}
-	fn from_f64(value: f64) -> Self {
-		num_cast(value).expect("Failed to convert f64 to channel")
-	}
-	fn to_channel<Out: Channel>(self) -> Out {
-		num_cast(self).expect("Failed to convert channel to channel")
-	}
+pub trait Linear {
+	fn from_f32(x: f32) -> Self;
+	fn to_f32(self) -> f32;
+	fn from_f64(x: f64) -> Self;
+	fn to_f64(self) -> f64;
 }
 
-pub trait Linear: NumCast + Num {}
-impl Linear for f32 {}
-impl Linear for f64 {}
+#[rustfmt::skip]
+impl Linear for f32 {
+	#[inline(always)] fn from_f32(x: f32) -> Self { x }
+	#[inline(always)] fn to_f32(self) -> f32 { self }
+	#[inline(always)] fn from_f64(x: f64) -> Self { x as f32 }
+	#[inline(always)] fn to_f64(self) -> f64 { self as f64 }
+}
+
+#[rustfmt::skip]
+impl Linear for f64 {
+	#[inline(always)] fn from_f32(x: f32) -> Self { x as f64 }
+	#[inline(always)] fn to_f32(self) -> f32 { self as f32 }
+	#[inline(always)] fn from_f64(x: f64) -> Self { x }
+	#[inline(always)] fn to_f64(self) -> f64 { self }
+}
+
+pub trait Channel: Copy + Debug {
+	fn to_linear<Out: Linear>(self) -> Out;
+	fn from_linear<In: Linear>(linear: In) -> Self;
+}
+
+pub trait LinearChannel: Channel {
+	fn cast_linear_channel<Out: LinearChannel>(self) -> Out {
+		Out::from_linear(self.to_linear::<f64>())
+	}
+}
 
 impl<T: Linear + Debug + Copy> Channel for T {
 	#[inline(always)]
 	fn to_linear<Out: Linear>(self) -> Out {
-		num_cast(self).expect("Failed to convert channel to linear")
+		Out::from_f64(self.to_f64())
 	}
 
 	#[inline(always)]
 	fn from_linear<In: Linear>(linear: In) -> Self {
-		num_cast(linear).expect("Failed to convert linear to channel")
+		Self::from_f64(linear.to_f64())
 	}
 }
 
+impl<T: Linear + Debug + Copy> LinearChannel for T {}
+
 use num_derive::*;
 #[derive(Copy, Clone, Debug, PartialEq, PartialOrd, Num, NumCast, NumOps, One, Zero, ToPrimitive, FromPrimitive)]
 struct SRGBGammaFloat(f32);
@@ -60,16 +69,18 @@ struct SRGBGammaFloat(f32);
 impl Channel for SRGBGammaFloat {
 	#[inline(always)]
 	fn to_linear<Out: Linear>(self) -> Out {
-		let channel = num_cast::<_, f32>(self).expect("Failed to convert srgb to linear");
-		let out = if channel <= 0.04045 { channel / 12.92 } else { ((channel + 0.055) / 1.055).powf(2.4) };
-		num_cast(out).expect("Failed to convert srgb to linear")
+		let x = self.0;
+		Out::from_f32(if x <= 0.04045 { x / 12.92 } else { ((x + 0.055) / 1.055).powf(2.4) })
 	}
 
 	#[inline(always)]
 	fn from_linear<In: Linear>(linear: In) -> Self {
-		let linear = num_cast::<_, f32>(linear).expect("Failed to convert linear to srgb");
-		let out = if linear <= 0.0031308 { linear * 12.92 } else { 1.055 * linear.powf(1. / 2.4) - 0.055 };
-		num_cast(out).expect("Failed to convert linear to srgb")
+		let x = linear.to_f32();
+		if x <= 0.0031308 {
+			Self(x * 12.92)
+		} else {
+			Self(1.055 * x.powf(1. / 2.4) - 0.055)
+		}
 	}
 }
 pub trait RGBPrimaries {
@@ -115,6 +126,7 @@ pub trait Pixel: Clone + Pod + Zeroable {
 }
 pub trait RGB: Pixel {
 	type ColorChannel: Channel;
+
 	fn red(&self) -> Self::ColorChannel;
 	fn r(&self) -> Self::ColorChannel {
 		self.red()
@@ -138,7 +150,7 @@ pub trait UnassociatedAlpha: RGB + Alpha {
 }
 
 pub trait Alpha {
-	type AlphaChannel: Channel;
+	type AlphaChannel: LinearChannel;
 	const TRANSPARENT: Self;
 	fn alpha(&self) -> Self::AlphaChannel;
 	fn a(&self) -> Self::AlphaChannel {
@@ -161,7 +173,7 @@ pub trait ExtraChannels<const NUM: usize> {
 }
 
 pub trait Luminance {
-	type LuminanceChannel: Channel;
+	type LuminanceChannel: LinearChannel;
 	fn luminance(&self) -> Self::LuminanceChannel;
 	fn l(&self) -> Self::LuminanceChannel {
 		self.luminance()
diff --git a/node-graph/gcore/src/raster/color.rs b/node-graph/gcore/src/raster/color.rs
index f52644df..14365431 100644
--- a/node-graph/gcore/src/raster/color.rs
+++ b/node-graph/gcore/src/raster/color.rs
@@ -22,6 +22,7 @@ pub struct Luma(pub f32);
 
 impl Luminance for Luma {
 	type LuminanceChannel = f32;
+	#[inline(always)]
 	fn luminance(&self) -> f32 {
 		self.0
 	}
@@ -29,12 +30,15 @@ impl Luminance for Luma {
 
 impl RGB for Luma {
 	type ColorChannel = f32;
+	#[inline(always)]
 	fn red(&self) -> f32 {
 		self.0
 	}
+	#[inline(always)]
 	fn green(&self) -> f32 {
 		self.0
 	}
+	#[inline(always)]
 	fn blue(&self) -> f32 {
 		self.0
 	}
@@ -69,12 +73,15 @@ impl Hash for Color {
 
 impl RGB for Color {
 	type ColorChannel = f32;
+	#[inline(always)]
 	fn red(&self) -> f32 {
 		self.red
 	}
+	#[inline(always)]
 	fn green(&self) -> f32 {
 		self.green
 	}
+	#[inline(always)]
 	fn blue(&self) -> f32 {
 		self.blue
 	}
@@ -98,9 +105,11 @@ impl Alpha for Color {
 	type AlphaChannel = f32;
 	const TRANSPARENT: Self = Self::TRANSPARENT;
 
+	#[inline(always)]
 	fn alpha(&self) -> f32 {
 		self.alpha
 	}
+	#[inline(always)]
 	fn multiplied_alpha(&self, alpha: Self::AlphaChannel) -> Self {
 		Self {
 			red: self.red * alpha,
@@ -119,6 +128,7 @@ impl AssociatedAlpha for Color {
 
 impl Luminance for Color {
 	type LuminanceChannel = f32;
+	#[inline(always)]
 	fn luminance(&self) -> f32 {
 		0.2126 * self.red + 0.7152 * self.green + 0.0722 * self.blue
 	}
@@ -152,6 +162,7 @@ impl Color {
 	/// let color = Color::from_rgbaf32(1.0, 1.0, 1.0, f32::NAN);
 	/// assert!(color == None);
 	/// ```
+	#[inline(always)]
 	pub fn from_rgbaf32(red: f32, green: f32, blue: f32, alpha: f32) -> Option<Color> {
 		if alpha > 1. || [red, green, blue, alpha].iter().any(|c| c.is_sign_negative() || !c.is_finite()) {
 			return None;
@@ -161,16 +172,19 @@ impl Color {
 	}
 
 	/// Return an opaque `Color` from given `f32` RGB channels.
+	#[inline(always)]
 	pub const fn from_rgbf32_unchecked(red: f32, green: f32, blue: f32) -> Color {
 		Color { red, green, blue, alpha: 1. }
 	}
 
 	/// Return an opaque `Color` from given `f32` RGB channels.
+	#[inline(always)]
 	pub const fn from_rgbaf32_unchecked(red: f32, green: f32, blue: f32, alpha: f32) -> Color {
 		Color { red, green, blue, alpha }
 	}
 
 	/// Return an opaque `Color` from given `f32` RGB channels.
+	#[inline(always)]
 	pub fn from_unassociated_alpha(red: f32, green: f32, blue: f32, alpha: f32) -> Color {
 		Color::from_rgbaf32_unchecked(red * alpha, green * alpha, blue * alpha, alpha)
 	}
@@ -184,6 +198,7 @@ impl Color {
 	/// let color2 = Color::from_rgba8_srgb(0x72, 0x67, 0x62, 0xFF);
 	/// assert_eq!(color, color2)
 	/// ```
+	#[inline(always)]
 	pub fn from_rgb8_srgb(red: u8, green: u8, blue: u8) -> Color {
 		Color::from_rgba8_srgb(red, green, blue, 255)
 	}
@@ -195,6 +210,7 @@ impl Color {
 	/// use graphene_core::raster::color::Color;
 	/// let color = Color::from_rgba8_srgb(0x72, 0x67, 0x62, 0x61);
 	/// ```
+	#[inline(always)]
 	pub fn from_rgba8_srgb(red: u8, green: u8, blue: u8, alpha: u8) -> Color {
 		let map_range = |int_color| int_color as f32 / 255.0;
 		Color {
@@ -256,6 +272,7 @@ impl Color {
 	/// let color = Color::from_rgbaf32(0.114, 0.103, 0.98, 0.97).unwrap();
 	/// assert!(color.r() == 0.114);
 	/// ```
+	#[inline(always)]
 	pub fn r(&self) -> f32 {
 		self.red
 	}
@@ -268,6 +285,7 @@ impl Color {
 	/// let color = Color::from_rgbaf32(0.114, 0.103, 0.98, 0.97).unwrap();
 	/// assert!(color.g() == 0.103);
 	/// ```
+	#[inline(always)]
 	pub fn g(&self) -> f32 {
 		self.green
 	}
@@ -280,6 +298,7 @@ impl Color {
 	/// let color = Color::from_rgbaf32(0.114, 0.103, 0.98, 0.97).unwrap();
 	/// assert!(color.b() == 0.98);
 	/// ```
+	#[inline(always)]
 	pub fn b(&self) -> f32 {
 		self.blue
 	}
@@ -292,38 +311,46 @@ impl Color {
 	/// let color = Color::from_rgbaf32(0.114, 0.103, 0.98, 0.97).unwrap();
 	/// assert!(color.a() == 0.97);
 	/// ```
+	#[inline(always)]
 	pub fn a(&self) -> f32 {
 		self.alpha
 	}
 
+	#[inline(always)]
 	pub fn average_rgb_channels(&self) -> f32 {
 		(self.red + self.green + self.blue) / 3.
 	}
 
+	#[inline(always)]
 	pub fn minimum_rgb_channels(&self) -> f32 {
 		self.red.min(self.green).min(self.blue)
 	}
 
+	#[inline(always)]
 	pub fn maximum_rgb_channels(&self) -> f32 {
 		self.red.max(self.green).max(self.blue)
 	}
 
 	// From https://stackoverflow.com/a/56678483/775283
+	#[inline(always)]
 	pub fn luminance_srgb(&self) -> f32 {
 		0.2126 * self.red + 0.7152 * self.green + 0.0722 * self.blue
 	}
 
 	// From https://en.wikipedia.org/wiki/Luma_(video)#Rec._601_luma_versus_Rec._709_luma_coefficients
+	#[inline(always)]
 	pub fn luminance_rec_601(&self) -> f32 {
 		0.299 * self.red + 0.587 * self.green + 0.114 * self.blue
 	}
 
 	// From https://en.wikipedia.org/wiki/Luma_(video)#Rec._601_luma_versus_Rec._709_luma_coefficients
+	#[inline(always)]
 	pub fn luminance_rec_601_rounded(&self) -> f32 {
 		0.3 * self.red + 0.59 * self.green + 0.11 * self.blue
 	}
 
 	// From https://stackoverflow.com/a/56678483/775283
+	#[inline(always)]
 	pub fn luminance_perceptual(&self) -> f32 {
 		let luminance = self.luminance_srgb();
 
@@ -334,6 +361,7 @@ impl Color {
 		}
 	}
 
+	#[inline(always)]
 	pub fn from_luminance(luminance: f32) -> Color {
 		Color {
 			red: luminance,
@@ -343,11 +371,13 @@ impl Color {
 		}
 	}
 
+	#[inline(always)]
 	pub fn with_luminance(&self, luminance: f32) -> Color {
 		let delta = luminance - self.luminance_rec_601_rounded();
 		self.map_rgb(|c| (c + delta).clamp(0., 1.))
 	}
 
+	#[inline(always)]
 	pub fn saturation(&self) -> f32 {
 		let max = (self.red).max(self.green).max(self.blue);
 		let min = (self.red).min(self.green).min(self.blue);
@@ -355,23 +385,28 @@ impl Color {
 		max - min
 	}
 
+	#[inline(always)]
 	pub fn with_saturation(&self, saturation: f32) -> Color {
 		let [hue, _, lightness, alpha] = self.to_hsla();
 		Color::from_hsla(hue, saturation, lightness, alpha)
 	}
 
+	#[inline(always)]
 	pub fn blend_normal(_c_b: f32, c_s: f32) -> f32 {
 		c_s
 	}
 
+	#[inline(always)]
 	pub fn blend_multiply(c_b: f32, c_s: f32) -> f32 {
 		c_s * c_b
 	}
 
+	#[inline(always)]
 	pub fn blend_darken(c_b: f32, c_s: f32) -> f32 {
 		c_s.min(c_b)
 	}
 
+	#[inline(always)]
 	pub fn blend_color_burn(c_b: f32, c_s: f32) -> f32 {
 		if c_b == 1. {
 			1.
@@ -382,10 +417,12 @@ impl Color {
 		}
 	}
 
+	#[inline(always)]
 	pub fn blend_linear_burn(c_b: f32, c_s: f32) -> f32 {
 		c_b + c_s - 1.
 	}
 
+	#[inline(always)]
 	pub fn blend_darker_color(&self, other: Color) -> Color {
 		if self.average_rgb_channels() <= other.average_rgb_channels() {
 			*self
@@ -394,14 +431,17 @@ impl Color {
 		}
 	}
 
+	#[inline(always)]
 	pub fn blend_screen(c_b: f32, c_s: f32) -> f32 {
 		1. - (1. - c_s) * (1. - c_b)
 	}
 
+	#[inline(always)]
 	pub fn blend_lighten(c_b: f32, c_s: f32) -> f32 {
 		c_s.max(c_b)
 	}
 
+	#[inline(always)]
 	pub fn blend_color_dodge(c_b: f32, c_s: f32) -> f32 {
 		if c_s == 1. {
 			1.
@@ -410,10 +450,12 @@ impl Color {
 		}
 	}
 
+	#[inline(always)]
 	pub fn blend_linear_dodge(c_b: f32, c_s: f32) -> f32 {
 		c_b + c_s
 	}
 
+	#[inline(always)]
 	pub fn blend_lighter_color(&self, other: Color) -> Color {
 		if self.average_rgb_channels() >= other.average_rgb_channels() {
 			*self
@@ -524,6 +566,7 @@ impl Color {
 	/// let color = Color::from_rgbaf32(0.114, 0.103, 0.98, 0.97).unwrap();
 	/// assert_eq!(color.components(),  (0.114, 0.103, 0.98, 0.97));
 	/// ```
+	#[inline(always)]
 	pub fn components(&self) -> (f32, f32, f32, f32) {
 		(self.red, self.green, self.blue, self.alpha)
 	}
@@ -566,6 +609,7 @@ impl Color {
 	/// let color = Color::from_rgbaf32(0.114, 0.103, 0.98, 0.97).unwrap();
 	/// //TODO: Add test
 	/// ```
+	#[inline(always)]
 	pub fn to_rgba8_srgb(&self) -> [u8; 4] {
 		let gamma = self.to_gamma_srgb();
 		[(gamma.red * 255.) as u8, (gamma.green * 255.) as u8, (gamma.blue * 255.) as u8, (gamma.alpha * 255.) as u8]
@@ -646,6 +690,7 @@ impl Color {
 	/// Linearly interpolates between two colors based on t.
 	///
 	/// T must be between 0 and 1.
+	#[inline(always)]
 	pub fn lerp(self, other: Color, t: f32) -> Self {
 		assert!((0. ..=1.).contains(&t));
 		Color::from_rgbaf32_unchecked(
@@ -656,12 +701,14 @@ impl Color {
 		)
 	}
 
+	#[inline(always)]
 	pub fn gamma(&self, gamma: f32) -> Color {
 		// From https://www.dfstudios.co.uk/articles/programming/image-programming-algorithms/image-processing-algorithms-part-6-gamma-correction/
 		let inverse_gamma = 1. / gamma;
 		self.map_rgb(|c: f32| c.powf(inverse_gamma))
 	}
 
+	#[inline(always)]
 	pub fn to_linear_srgb(&self) -> Self {
 		Self {
 			red: Self::srgb_to_linear(self.red),
@@ -671,6 +718,7 @@ impl Color {
 		}
 	}
 
+	#[inline(always)]
 	pub fn to_gamma_srgb(&self) -> Self {
 		Self {
 			red: Self::linear_to_srgb(self.red),
@@ -680,6 +728,7 @@ impl Color {
 		}
 	}
 
+	#[inline(always)]
 	pub fn srgb_to_linear(channel: f32) -> f32 {
 		if channel <= 0.04045 {
 			channel / 12.92
@@ -688,6 +737,7 @@ impl Color {
 		}
 	}
 
+	#[inline(always)]
 	pub fn linear_to_srgb(channel: f32) -> f32 {
 		if channel <= 0.0031308 {
 			channel * 12.92
@@ -696,17 +746,22 @@ impl Color {
 		}
 	}
 
+	#[inline(always)]
 	pub fn map_rgba<F: Fn(f32) -> f32>(&self, f: F) -> Self {
 		Self::from_rgbaf32_unchecked(f(self.r()), f(self.g()), f(self.b()), f(self.a()))
 	}
+
+	#[inline(always)]
 	pub fn map_rgb<F: Fn(f32) -> f32>(&self, f: F) -> Self {
 		Self::from_rgbaf32_unchecked(f(self.r()), f(self.g()), f(self.b()), self.a())
 	}
 
+	#[inline(always)]
 	pub fn apply_opacity(&self, opacity: f32) -> Self {
 		Self::from_rgbaf32_unchecked(self.r() * opacity, self.g() * opacity, self.b() * opacity, self.a() * opacity)
 	}
 
+	#[inline(always)]
 	pub fn to_associated_alpha(&self, alpha: f32) -> Self {
 		Self {
 			red: self.red * alpha,
@@ -716,6 +771,7 @@ impl Color {
 		}
 	}
 
+	#[inline(always)]
 	pub fn to_unassociated_alpha(&self) -> Self {
 		if self.alpha == 0. {
 			return *self;
@@ -729,6 +785,7 @@ impl Color {
 		}
 	}
 
+	#[inline(always)]
 	pub fn blend_rgb<F: Fn(f32, f32) -> f32>(&self, other: Color, f: F) -> Self {
 		let background = self.to_unassociated_alpha();
 		Color {
@@ -739,6 +796,7 @@ impl Color {
 		}
 	}
 
+	#[inline(always)]
 	pub fn alpha_blend(&self, other: Color) -> Self {
 		let inv_alpha = 1. - other.alpha;
 		Self {
diff --git a/node-graph/gcore/src/raster/discrete_srgb.rs b/node-graph/gcore/src/raster/discrete_srgb.rs
new file mode 100644
index 00000000..b14d8299
--- /dev/null
+++ b/node-graph/gcore/src/raster/discrete_srgb.rs
@@ -0,0 +1,185 @@
+//! Fast conversions between u8 sRGB and linear float.
+
+// Inspired by https://gist.github.com/rygorous/2203834, but with a slightly
+// modified method, custom derived constants and error correction for perfect
+// accuracy in accordance with the D3D11 spec:
+// https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#FLOATtoSRGB.
+
+/// CRITICAL_POINTS[i] is the last float value such that it maps to i after
+/// conversion to integer sRGB. So if x > CRITICAL_POINTS[i] you know you need
+/// to increment i.
+#[rustfmt::skip]
+const CRITICAL_POINTS: [f32; 256] = [
+	0.00015176347, 0.00045529046, 0.0007588174, 0.0010623443, 0.0013658714, 0.0016693983, 0.0019729252, 0.0022764523,
+	0.0025799791, 0.0028835062, 0.0031883009, 0.003509259, 0.003848315, 0.004205748, 0.0045818323, 0.0049768374,
+	0.005391024, 0.00582465, 0.0062779686, 0.0067512267, 0.0072446675, 0.0077585294, 0.008293047, 0.008848451,
+	0.0094249705, 0.010022825, 0.010642236, 0.01128342, 0.011946591, 0.012631957, 0.013339729, 0.014070111,
+	0.0148233045, 0.015599505, 0.01639891, 0.017221717, 0.018068114, 0.018938294, 0.019832445, 0.020750746,
+	0.021693384, 0.022660539, 0.02365239, 0.024669115, 0.025710886, 0.026777886, 0.027870273, 0.028988222,
+	0.030131903, 0.03130148, 0.032497127, 0.033718992, 0.034967244, 0.03624204, 0.03754355, 0.03887192,
+	0.040227327, 0.041609894, 0.04301979, 0.044457167, 0.04592218, 0.04741497, 0.04893569, 0.050484486,
+	0.05206151, 0.053666897, 0.055300802, 0.056963358, 0.058654714, 0.060375024, 0.062124394, 0.06390298,
+	0.065710925, 0.06754836, 0.06941542, 0.07131224, 0.07323896, 0.07519571, 0.07718261, 0.07919981,
+	0.08124744, 0.08332562, 0.08543448, 0.08757417, 0.08974478, 0.091946445, 0.09417931, 0.09644348,
+	0.098739095, 0.10106628, 0.10342514, 0.105815805, 0.1082384, 0.110693045, 0.11317986, 0.11569896,
+	0.118250474, 0.12083454, 0.12345121, 0.12610064, 0.12878296, 0.13149826, 0.13424668, 0.1370283,
+	0.13984327, 0.14269169, 0.14557366, 0.1484893, 0.15143873, 0.15442204, 0.15743938, 0.16049084,
+	0.1635765, 0.16669647, 0.16985092, 0.1730399, 0.17626354, 0.17952198, 0.18281525, 0.1861435,
+	0.18950681, 0.19290532, 0.19633913, 0.19980833, 0.20331302, 0.20685332, 0.21042931, 0.21404111,
+	0.21768881, 0.22137253, 0.22509235, 0.22884844, 0.23264077, 0.23646952, 0.24033478, 0.24423665,
+	0.24817522, 0.25215057, 0.25616285, 0.26021212, 0.26429847, 0.26842204, 0.27258286, 0.27678108,
+	0.2810168, 0.28529006, 0.289601, 0.2939497, 0.29833627, 0.30276078, 0.30722332, 0.311724,
+	0.31626293, 0.32084015, 0.32545578, 0.33010995, 0.3348027, 0.3395341, 0.34430432, 0.34911346,
+	0.3539615, 0.35884857, 0.3637748, 0.36874023, 0.373745, 0.37878913, 0.38387278, 0.388996,
+	0.39415887, 0.39936152, 0.404604, 0.4098864, 0.41520882, 0.42057133, 0.425974, 0.431417,
+	0.43690032, 0.4424241, 0.44798836, 0.45359328, 0.45923886, 0.46492523, 0.47065246, 0.47642064,
+	0.48222986, 0.48808017, 0.4939718, 0.49990457, 0.5058787, 0.5118943, 0.5179514, 0.5240501,
+	0.5301905, 0.5363727, 0.5425967, 0.54886264, 0.5551706, 0.56152064, 0.5679129, 0.5743473,
+	0.5808241, 0.5873433, 0.593905, 0.60050917, 0.60715604, 0.61384565, 0.62057805, 0.6273533,
+	0.63417155, 0.6410328, 0.6479372, 0.65488476, 0.66187555, 0.6689097, 0.6759874, 0.68310845,
+	0.6902731, 0.6974814, 0.7047334, 0.71202916, 0.7193688, 0.7267524, 0.73418003, 0.7416518,
+	0.7491677, 0.7567278, 0.76433223, 0.7719811, 0.7796744, 0.7874122, 0.7951947, 0.80302185,
+	0.8108938, 0.81881046, 0.82677215, 0.8347787, 0.8428304, 0.8509272, 0.85906917, 0.8672564,
+	0.875489, 0.8837671, 0.89209044, 0.9004596, 0.9088741, 0.91733456, 0.9258405, 0.9343926,
+	0.94299024, 0.95163417, 0.96032387, 0.96906, 0.977842, 0.9866705, 0.9955452, 1.,
+];
+
+#[rustfmt::skip]
+const FLOAT_SRGB_LERP: [u32; 27] = [
+	0x66f, 0x66f063b, 0xcaa0515, 0x11c00773, 0x193305dc, 0x1f1004f3, 0x24030481, 0x28850773,
+	0x2ff9065e, 0x365805a1, 0x3bfa0547, 0x414108f7, 0x4a3907d8, 0x52110709, 0x591b06aa, 0x5fc50b70,
+	0x6b350a18, 0x754e091c, 0x7e6b08aa, 0x87160ef1, 0x96070d3e, 0xa3460bfc, 0xaf430b6c, 0xbaaf13bd,
+	0xce6d1187, 0xdff40fe3, 0xefd70f28,
+];
+
+#[inline]
+pub fn float_to_srgb_u8(mut f: f32) -> u8 {
+	// Clamp f to [0, 1], with a negated condition to handle NaNs as 0.
+	if !(f >= 0.0) {
+		f = 0.0;
+	} else if f > 1.0 {
+		f = 1.0;
+	}
+
+	// Shift away slightly from 0.0 to reduce exponent range.
+	const C: f32 = 0.009842521f32;
+	let u = (f + C).to_bits() - C.to_bits();
+	if u > (1.0 + C).to_bits() - C.to_bits() {
+		// We clamped f to [0, 1], and the integer representations
+		// of the positive finite non-NaN floats are monotonic.
+		// This makes the later LUT lookup panicless.
+		unsafe { std::hint::unreachable_unchecked() }
+	}
+
+	// Compute a piecewise linear interpolation that is always
+	// the correct answer, or one less than it.
+	let u16mask = (1 << 16) - 1;
+	let lut_idx = u >> 21;
+	let lerp_idx = (u >> 5) & u16mask;
+	let bias_mult = FLOAT_SRGB_LERP[lut_idx as usize];
+	let bias = (bias_mult >> 16) << 16;
+	let mult = bias_mult & u16mask;
+	// I don't believe this wraps, but since we test in release mode,
+	// better make sure debug mode behaves the same.
+	let lerp = bias.wrapping_add(mult * lerp_idx) >> 24;
+
+	// Adjust linear interpolation to the correct value.
+	if f > CRITICAL_POINTS[lerp as usize] {
+		lerp as u8 + 1
+	} else {
+		lerp as u8
+	}
+}
+
+#[rustfmt::skip]
+const FROM_SRGB_U8: [f32; 256] = [
+	0., 0.000303527, 0.000607054, 0.00091058103, 0.001214108, 0.001517635, 0.0018211621, 0.002124689,
+	0.002428216, 0.002731743, 0.00303527, 0.0033465356, 0.003676507, 0.004024717, 0.004391442,
+	0.0047769533, 0.005181517, 0.0056053917, 0.0060488326, 0.006512091, 0.00699541, 0.0074990317,
+	0.008023192, 0.008568125, 0.009134057, 0.009721218, 0.010329823, 0.010960094, 0.011612245,
+	0.012286487, 0.012983031, 0.013702081, 0.014443844, 0.015208514, 0.015996292, 0.016807375,
+	0.017641952, 0.018500218, 0.019382361, 0.020288562, 0.02121901, 0.022173883, 0.023153365,
+	0.02415763, 0.025186857, 0.026241222, 0.027320892, 0.028426038, 0.029556843, 0.03071345, 0.03189604,
+	0.033104774, 0.03433981, 0.035601325, 0.036889452, 0.038204376, 0.039546248, 0.04091521, 0.042311423,
+	0.043735042, 0.045186214, 0.046665095, 0.048171833, 0.049706575, 0.051269468, 0.052860655, 0.05448028,
+	0.056128494, 0.057805434, 0.05951124, 0.06124607, 0.06301003, 0.06480328, 0.06662595, 0.06847818,
+	0.07036011, 0.07227186, 0.07421358, 0.07618539, 0.07818743, 0.08021983, 0.082282715, 0.084376216,
+	0.086500466, 0.088655606, 0.09084173, 0.09305898, 0.095307484, 0.09758736, 0.09989874, 0.10224175,
+	0.10461649, 0.10702311, 0.10946172, 0.111932434, 0.11443538, 0.116970696, 0.11953845, 0.12213881,
+	0.12477186, 0.12743773, 0.13013652, 0.13286836, 0.13563336, 0.13843165, 0.14126332, 0.1441285,
+	0.1470273, 0.14995982, 0.15292618, 0.1559265, 0.15896086, 0.16202943, 0.16513224, 0.16826946,
+	0.17144115, 0.17464745, 0.17788847, 0.1811643, 0.18447503, 0.1878208, 0.19120172, 0.19461787,
+	0.19806935, 0.2015563, 0.20507877, 0.2086369, 0.21223079, 0.21586053, 0.21952623, 0.22322798,
+	0.22696589, 0.23074007, 0.23455065, 0.23839766, 0.2422812, 0.2462014, 0.25015837, 0.25415218,
+	0.2581829, 0.26225072, 0.26635566, 0.27049786, 0.27467737, 0.27889434, 0.2831488, 0.2874409,
+	0.2917707, 0.29613832, 0.30054384, 0.30498737, 0.30946895, 0.31398875, 0.31854683, 0.32314324,
+	0.32777813, 0.33245158, 0.33716366, 0.34191445, 0.3467041, 0.3515327, 0.35640025, 0.36130688,
+	0.3662527, 0.37123778, 0.37626222, 0.3813261, 0.38642952, 0.39157256, 0.3967553, 0.40197787,
+	0.4072403, 0.4125427, 0.41788515, 0.42326775, 0.42869055, 0.4341537, 0.43965724, 0.44520125,
+	0.45078585, 0.45641106, 0.46207705, 0.46778384, 0.47353154, 0.47932023, 0.48514998, 0.4910209,
+	0.49693304, 0.5028866, 0.50888145, 0.5149178, 0.5209957, 0.52711535, 0.5332766, 0.5394797,
+	0.5457247, 0.5520116, 0.5583406, 0.5647117, 0.57112503, 0.57758063, 0.5840786, 0.590619, 0.597202,
+	0.60382754, 0.61049575, 0.61720675, 0.62396055, 0.63075733, 0.637597, 0.6444799, 0.6514058,
+	0.65837497, 0.66538745, 0.67244333, 0.6795426, 0.68668544, 0.69387203, 0.70110214, 0.70837605,
+	0.7156938, 0.72305536, 0.730461, 0.7379107, 0.7454045, 0.75294244, 0.76052475, 0.7681514, 0.77582246,
+	0.78353804, 0.79129815, 0.79910296, 0.8069525, 0.8148468, 0.822786, 0.8307701, 0.83879924, 0.84687346,
+	0.8549928, 0.8631574, 0.87136734, 0.8796226, 0.8879232, 0.89626956, 0.90466136, 0.913099, 0.92158204,
+	0.93011117, 0.9386859, 0.9473069, 0.9559735, 0.9646866, 0.9734455, 0.98225087, 0.9911022, 1.,
+];
+
+#[inline]
+pub fn srgb_u8_to_float(c: u8) -> f32 {
+	FROM_SRGB_U8[c as usize]
+}
+
+#[cfg(test)]
+mod tests {
+	use super::*;
+
+	// https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#FLOATtoSRGB
+	fn float_to_srgb_ref(f: f32) -> f32 {
+		if !(f > 0_f32) {
+			0_f32
+		} else if f <= 0.0031308f32 {
+			12.92_f32 * f
+		} else if f < 1_f32 {
+			1.055f32 * f.powf(1.0f_32 / 2.4_f32) - 0.055f32
+		} else {
+			1_f32
+		}
+	}
+
+	fn float_to_srgb_u8_ref(f: f32) -> u8 {
+		(float_to_srgb_ref(f) * 255_f32 + 0.5_f32) as u8
+	}
+
+	// https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#SRGBtoFLOAT
+	fn srgb_to_float_ref(f: f32) -> f32 {
+		if f <= 0.04045f32 {
+			f / 12.92f32
+		} else {
+			((f + 0.055f32) / 1.055f32).powf(2.4_f32)
+		}
+	}
+
+	fn srgb_u8_to_float_ref(c: u8) -> f32 {
+		srgb_to_float_ref(c as f32 * (1_f32 / 255.0f_32))
+	}
+
+	#[test]
+	fn test_float_to_srgb_u8() {
+		for u in 0..=u8::MAX {
+			assert!(srgb_u8_to_float(u) == srgb_u8_to_float_ref(u));
+		}
+	}
+
+	#[ignore = "expensive, test in release mode"]
+	#[test]
+	fn test_srgb_u8_to_float() {
+		// Simply... check all float values.
+		for u in 0..=u32::MAX {
+			let f = f32::from_bits(u);
+			assert!(float_to_srgb_u8(f) == float_to_srgb_u8_ref(f));
+		}
+	}
+}
diff --git a/node-graph/gcore/src/raster/image.rs b/node-graph/gcore/src/raster/image.rs
index 0698d006..ca87b91b 100644
--- a/node-graph/gcore/src/raster/image.rs
+++ b/node-graph/gcore/src/raster/image.rs
@@ -1,3 +1,4 @@
+use super::discrete_srgb::float_to_srgb_u8;
 use super::{Color, ImageSlice};
 use crate::Node;
 use alloc::vec::Vec;
@@ -135,27 +136,35 @@ use super::*;
 impl<P: Alpha + RGB + AssociatedAlpha> Image<P>
 where
 	P::ColorChannel: Linear,
+	<P as Alpha>::AlphaChannel: Linear,
 {
 	/// Flattens each channel cast to a u8
 	pub fn into_flat_u8(self) -> (Vec<u8>, u32, u32) {
 		let Image { width, height, data } = self;
+		assert!(data.len() == width as usize * height as usize);
 
-		let to_gamma = SRGBGammaFloat::from_linear;
-		let to_u8 = |x| (num_cast::<_, f32>(x).unwrap() * 255.) as u8;
+		let mut result = Vec::with_capacity(data.len() * 4);
+		for color in data {
+			let a = color.a().to_f32();
+			if a < 0.5 / 255.0 {
+				// This would map to fully transparent anyway, avoid expensive encoding.
+				result.push(0);
+				result.push(0);
+				result.push(0);
+				result.push(0);
+			} else {
+				let undo_premultiply = 1.0 / a;
+				let r = float_to_srgb_u8(color.r().to_f32() * undo_premultiply);
+				let g = float_to_srgb_u8(color.g().to_f32() * undo_premultiply);
+				let b = float_to_srgb_u8(color.b().to_f32() * undo_premultiply);
+				result.push(r);
+				result.push(g);
+				result.push(b);
+				result.push((a * 255.0 + 0.5) as u8);
+			}
+		}
 
-		let result_bytes = data
-			.into_iter()
-			.flat_map(|color| {
-				[
-					to_u8(to_gamma(color.r() / color.a().to_channel())),
-					to_u8(to_gamma(color.g() / color.a().to_channel())),
-					to_u8(to_gamma(color.b() / color.a().to_channel())),
-					(num_cast::<_, f32>(color.a()).unwrap() * 255.) as u8,
-				]
-			})
-			.collect();
-
-		(result_bytes, width, height)
+		(result, width, height)
 	}
 }
 
diff --git a/node-graph/gstd/src/brush.rs b/node-graph/gstd/src/brush.rs
index 7364f5a9..ef3ec00a 100644
--- a/node-graph/gstd/src/brush.rs
+++ b/node-graph/gstd/src/brush.rs
@@ -81,7 +81,7 @@ impl<P: Pixel + Alpha> Sample for BrushStampGenerator<P> {
 		};
 
 		use graphene_core::raster::Channel;
-		Some(self.color.multiplied_alpha(P::AlphaChannel::from_f32(result)))
+		Some(self.color.multiplied_alpha(P::AlphaChannel::from_linear(result)))
 	}
 }
 
diff --git a/node-graph/gstd/src/main.rs b/node-graph/gstd/src/main.rs
index fa416508..67609113 100644
--- a/node-graph/gstd/src/main.rs
+++ b/node-graph/gstd/src/main.rs
@@ -112,7 +112,7 @@ fn main() {
 		//let mut mul = mul::MulNode::new();
 		let mut stack: borrow_stack::FixedSizeStack<Box<dyn Node<'_, Output = &dyn DynAny>>> =
 			borrow_stack::FixedSizeStack::new(42);
-		unsafe { stack.push(Box::new(AnyValueNode::new(1f32))) };
+		unsafe { stack.push(Box::new(AnyValueNode::new(1_f32))) };
 		//let node = unsafe { stack.get(0) };
 		//let boxed = Box::new(StorageNode::new(node));
 		//unsafe { stack.push(boxed) };
@@ -123,7 +123,7 @@ fn main() {
 				.push(Box::new(AnyRefNode::new(stack.get(0).as_ref()))
 					as Box<dyn Node<(), Output = &dyn DynAny>>)
 		};*/
-		let f = (3.2f32, 3.1f32);
+		let f = (3.2_f32, 3.1_f32);
 		let a = ValueNode::new(1.);
 		let id = std::any::TypeId::of::<&f32>();
 		let any_a = AnyRefNode::new(&a);
diff --git a/node-graph/gstd/src/raster.rs b/node-graph/gstd/src/raster.rs
index fcbd0b19..14767699 100644
--- a/node-graph/gstd/src/raster.rs
+++ b/node-graph/gstd/src/raster.rs
@@ -1,6 +1,6 @@
 use dyn_any::{DynAny, StaticType};
 use glam::{DAffine2, DVec2};
-use graphene_core::raster::{Alpha, BlendMode, BlendNode, Channel, Image, ImageFrame, Luminance, Pixel, RasterMut, Sample};
+use graphene_core::raster::{Alpha, BlendMode, BlendNode, Image, ImageFrame, LinearChannel, Luminance, Pixel, RasterMut, Sample};
 use graphene_core::transform::Transform;
 
 use graphene_core::value::CopiedNode;
@@ -217,7 +217,7 @@ fn mask_image<
 
 			let image_pixel = image.get_pixel_mut(x, y).unwrap();
 			if let Some(mask_pixel) = stencil.sample(mask_point, area) {
-				*image_pixel = image_pixel.multiplied_alpha(mask_pixel.l().to_channel());
+				*image_pixel = image_pixel.multiplied_alpha(mask_pixel.l().cast_linear_channel());
 			}
 		}
 	}