From bb93d243a01fb1f413e2ef146dae9100c58aaf80 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Thu, 25 May 2023 10:51:40 +0200 Subject: [PATCH] Optimize colorspace conversion (#1228) * Enabled cross-crate inlining for release builds. * Sprinkled inline-enabling directives in color.rs. * Reduced abstraction in color channel handling for better inlining. * Alpha should not use gamma. * Oops, didn't compile. * Added discrete sRGB <-> linear float conversion. * Use new float <-> sRGB conversion. * Improved comments. * Don't convert fully transparent pixels. --- Cargo.toml | 3 + node-graph/gcore/src/raster.rs | 82 ++++---- node-graph/gcore/src/raster/color.rs | 58 ++++++ node-graph/gcore/src/raster/discrete_srgb.rs | 185 +++++++++++++++++++ node-graph/gcore/src/raster/image.rs | 39 ++-- node-graph/gstd/src/brush.rs | 2 +- node-graph/gstd/src/main.rs | 4 +- node-graph/gstd/src/raster.rs | 4 +- 8 files changed, 322 insertions(+), 55 deletions(-) create mode 100644 node-graph/gcore/src/raster/discrete_srgb.rs diff --git a/Cargo.toml b/Cargo.toml index ee4de000..510bf932 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,3 +58,6 @@ opt-level = 3 [profile.dev] opt-level = 1 + +[profile.release] +lto = "thin" diff --git a/node-graph/gcore/src/raster.rs b/node-graph/gcore/src/raster.rs index 0f291357..aa2b58cd 100644 --- a/node-graph/gcore/src/raster.rs +++ b/node-graph/gcore/src/raster.rs @@ -4,10 +4,6 @@ use crate::Node; use bytemuck::{Pod, Zeroable}; use glam::DVec2; -#[cfg(not(target_arch = "spirv"))] -use num_traits::{cast::cast as num_cast, Num, NumCast}; -#[cfg(target_arch = "spirv")] -use spirv_std::num_traits::{cast::cast as num_cast, float::Float, FromPrimitive, Num, NumCast, ToPrimitive}; pub use self::color::{Color, Luma}; @@ -15,44 +11,57 @@ pub mod adjustments; #[cfg(not(target_arch = "spirv"))] pub mod brightness_contrast; pub mod color; +pub mod discrete_srgb; pub use adjustments::*; -pub trait Channel: Copy + Debug + Num + NumCast { - fn to_linear(self) -> Out; - fn from_linear(linear: In) -> Self; - fn to_f32(self) -> f32 { - num_cast(self).expect("Failed to convert channel to f32") - } - fn from_f32(value: f32) -> Self { - num_cast(value).expect("Failed to convert f32 to channel") - } - fn to_f64(self) -> f64 { - num_cast(self).expect("Failed to convert channel to f64") - } - fn from_f64(value: f64) -> Self { - num_cast(value).expect("Failed to convert f64 to channel") - } - fn to_channel(self) -> Out { - num_cast(self).expect("Failed to convert channel to channel") - } +pub trait Linear { + fn from_f32(x: f32) -> Self; + fn to_f32(self) -> f32; + fn from_f64(x: f64) -> Self; + fn to_f64(self) -> f64; } -pub trait Linear: NumCast + Num {} -impl Linear for f32 {} -impl Linear for f64 {} +#[rustfmt::skip] +impl Linear for f32 { + #[inline(always)] fn from_f32(x: f32) -> Self { x } + #[inline(always)] fn to_f32(self) -> f32 { self } + #[inline(always)] fn from_f64(x: f64) -> Self { x as f32 } + #[inline(always)] fn to_f64(self) -> f64 { self as f64 } +} + +#[rustfmt::skip] +impl Linear for f64 { + #[inline(always)] fn from_f32(x: f32) -> Self { x as f64 } + #[inline(always)] fn to_f32(self) -> f32 { self as f32 } + #[inline(always)] fn from_f64(x: f64) -> Self { x } + #[inline(always)] fn to_f64(self) -> f64 { self } +} + +pub trait Channel: Copy + Debug { + fn to_linear(self) -> Out; + fn from_linear(linear: In) -> Self; +} + +pub trait LinearChannel: Channel { + fn cast_linear_channel(self) -> Out { + Out::from_linear(self.to_linear::()) + } +} impl Channel for T { #[inline(always)] fn to_linear(self) -> Out { - num_cast(self).expect("Failed to convert channel to linear") + Out::from_f64(self.to_f64()) } #[inline(always)] fn from_linear(linear: In) -> Self { - num_cast(linear).expect("Failed to convert linear to channel") + Self::from_f64(linear.to_f64()) } } +impl LinearChannel for T {} + use num_derive::*; #[derive(Copy, Clone, Debug, PartialEq, PartialOrd, Num, NumCast, NumOps, One, Zero, ToPrimitive, FromPrimitive)] struct SRGBGammaFloat(f32); @@ -60,16 +69,18 @@ struct SRGBGammaFloat(f32); impl Channel for SRGBGammaFloat { #[inline(always)] fn to_linear(self) -> Out { - let channel = num_cast::<_, f32>(self).expect("Failed to convert srgb to linear"); - let out = if channel <= 0.04045 { channel / 12.92 } else { ((channel + 0.055) / 1.055).powf(2.4) }; - num_cast(out).expect("Failed to convert srgb to linear") + let x = self.0; + Out::from_f32(if x <= 0.04045 { x / 12.92 } else { ((x + 0.055) / 1.055).powf(2.4) }) } #[inline(always)] fn from_linear(linear: In) -> Self { - let linear = num_cast::<_, f32>(linear).expect("Failed to convert linear to srgb"); - let out = if linear <= 0.0031308 { linear * 12.92 } else { 1.055 * linear.powf(1. / 2.4) - 0.055 }; - num_cast(out).expect("Failed to convert linear to srgb") + let x = linear.to_f32(); + if x <= 0.0031308 { + Self(x * 12.92) + } else { + Self(1.055 * x.powf(1. / 2.4) - 0.055) + } } } pub trait RGBPrimaries { @@ -115,6 +126,7 @@ pub trait Pixel: Clone + Pod + Zeroable { } pub trait RGB: Pixel { type ColorChannel: Channel; + fn red(&self) -> Self::ColorChannel; fn r(&self) -> Self::ColorChannel { self.red() @@ -138,7 +150,7 @@ pub trait UnassociatedAlpha: RGB + Alpha { } pub trait Alpha { - type AlphaChannel: Channel; + type AlphaChannel: LinearChannel; const TRANSPARENT: Self; fn alpha(&self) -> Self::AlphaChannel; fn a(&self) -> Self::AlphaChannel { @@ -161,7 +173,7 @@ pub trait ExtraChannels { } pub trait Luminance { - type LuminanceChannel: Channel; + type LuminanceChannel: LinearChannel; fn luminance(&self) -> Self::LuminanceChannel; fn l(&self) -> Self::LuminanceChannel { self.luminance() diff --git a/node-graph/gcore/src/raster/color.rs b/node-graph/gcore/src/raster/color.rs index f52644df..14365431 100644 --- a/node-graph/gcore/src/raster/color.rs +++ b/node-graph/gcore/src/raster/color.rs @@ -22,6 +22,7 @@ pub struct Luma(pub f32); impl Luminance for Luma { type LuminanceChannel = f32; + #[inline(always)] fn luminance(&self) -> f32 { self.0 } @@ -29,12 +30,15 @@ impl Luminance for Luma { impl RGB for Luma { type ColorChannel = f32; + #[inline(always)] fn red(&self) -> f32 { self.0 } + #[inline(always)] fn green(&self) -> f32 { self.0 } + #[inline(always)] fn blue(&self) -> f32 { self.0 } @@ -69,12 +73,15 @@ impl Hash for Color { impl RGB for Color { type ColorChannel = f32; + #[inline(always)] fn red(&self) -> f32 { self.red } + #[inline(always)] fn green(&self) -> f32 { self.green } + #[inline(always)] fn blue(&self) -> f32 { self.blue } @@ -98,9 +105,11 @@ impl Alpha for Color { type AlphaChannel = f32; const TRANSPARENT: Self = Self::TRANSPARENT; + #[inline(always)] fn alpha(&self) -> f32 { self.alpha } + #[inline(always)] fn multiplied_alpha(&self, alpha: Self::AlphaChannel) -> Self { Self { red: self.red * alpha, @@ -119,6 +128,7 @@ impl AssociatedAlpha for Color { impl Luminance for Color { type LuminanceChannel = f32; + #[inline(always)] fn luminance(&self) -> f32 { 0.2126 * self.red + 0.7152 * self.green + 0.0722 * self.blue } @@ -152,6 +162,7 @@ impl Color { /// let color = Color::from_rgbaf32(1.0, 1.0, 1.0, f32::NAN); /// assert!(color == None); /// ``` + #[inline(always)] pub fn from_rgbaf32(red: f32, green: f32, blue: f32, alpha: f32) -> Option { if alpha > 1. || [red, green, blue, alpha].iter().any(|c| c.is_sign_negative() || !c.is_finite()) { return None; @@ -161,16 +172,19 @@ impl Color { } /// Return an opaque `Color` from given `f32` RGB channels. + #[inline(always)] pub const fn from_rgbf32_unchecked(red: f32, green: f32, blue: f32) -> Color { Color { red, green, blue, alpha: 1. } } /// Return an opaque `Color` from given `f32` RGB channels. + #[inline(always)] pub const fn from_rgbaf32_unchecked(red: f32, green: f32, blue: f32, alpha: f32) -> Color { Color { red, green, blue, alpha } } /// Return an opaque `Color` from given `f32` RGB channels. + #[inline(always)] pub fn from_unassociated_alpha(red: f32, green: f32, blue: f32, alpha: f32) -> Color { Color::from_rgbaf32_unchecked(red * alpha, green * alpha, blue * alpha, alpha) } @@ -184,6 +198,7 @@ impl Color { /// let color2 = Color::from_rgba8_srgb(0x72, 0x67, 0x62, 0xFF); /// assert_eq!(color, color2) /// ``` + #[inline(always)] pub fn from_rgb8_srgb(red: u8, green: u8, blue: u8) -> Color { Color::from_rgba8_srgb(red, green, blue, 255) } @@ -195,6 +210,7 @@ impl Color { /// use graphene_core::raster::color::Color; /// let color = Color::from_rgba8_srgb(0x72, 0x67, 0x62, 0x61); /// ``` + #[inline(always)] pub fn from_rgba8_srgb(red: u8, green: u8, blue: u8, alpha: u8) -> Color { let map_range = |int_color| int_color as f32 / 255.0; Color { @@ -256,6 +272,7 @@ impl Color { /// let color = Color::from_rgbaf32(0.114, 0.103, 0.98, 0.97).unwrap(); /// assert!(color.r() == 0.114); /// ``` + #[inline(always)] pub fn r(&self) -> f32 { self.red } @@ -268,6 +285,7 @@ impl Color { /// let color = Color::from_rgbaf32(0.114, 0.103, 0.98, 0.97).unwrap(); /// assert!(color.g() == 0.103); /// ``` + #[inline(always)] pub fn g(&self) -> f32 { self.green } @@ -280,6 +298,7 @@ impl Color { /// let color = Color::from_rgbaf32(0.114, 0.103, 0.98, 0.97).unwrap(); /// assert!(color.b() == 0.98); /// ``` + #[inline(always)] pub fn b(&self) -> f32 { self.blue } @@ -292,38 +311,46 @@ impl Color { /// let color = Color::from_rgbaf32(0.114, 0.103, 0.98, 0.97).unwrap(); /// assert!(color.a() == 0.97); /// ``` + #[inline(always)] pub fn a(&self) -> f32 { self.alpha } + #[inline(always)] pub fn average_rgb_channels(&self) -> f32 { (self.red + self.green + self.blue) / 3. } + #[inline(always)] pub fn minimum_rgb_channels(&self) -> f32 { self.red.min(self.green).min(self.blue) } + #[inline(always)] pub fn maximum_rgb_channels(&self) -> f32 { self.red.max(self.green).max(self.blue) } // From https://stackoverflow.com/a/56678483/775283 + #[inline(always)] pub fn luminance_srgb(&self) -> f32 { 0.2126 * self.red + 0.7152 * self.green + 0.0722 * self.blue } // From https://en.wikipedia.org/wiki/Luma_(video)#Rec._601_luma_versus_Rec._709_luma_coefficients + #[inline(always)] pub fn luminance_rec_601(&self) -> f32 { 0.299 * self.red + 0.587 * self.green + 0.114 * self.blue } // From https://en.wikipedia.org/wiki/Luma_(video)#Rec._601_luma_versus_Rec._709_luma_coefficients + #[inline(always)] pub fn luminance_rec_601_rounded(&self) -> f32 { 0.3 * self.red + 0.59 * self.green + 0.11 * self.blue } // From https://stackoverflow.com/a/56678483/775283 + #[inline(always)] pub fn luminance_perceptual(&self) -> f32 { let luminance = self.luminance_srgb(); @@ -334,6 +361,7 @@ impl Color { } } + #[inline(always)] pub fn from_luminance(luminance: f32) -> Color { Color { red: luminance, @@ -343,11 +371,13 @@ impl Color { } } + #[inline(always)] pub fn with_luminance(&self, luminance: f32) -> Color { let delta = luminance - self.luminance_rec_601_rounded(); self.map_rgb(|c| (c + delta).clamp(0., 1.)) } + #[inline(always)] pub fn saturation(&self) -> f32 { let max = (self.red).max(self.green).max(self.blue); let min = (self.red).min(self.green).min(self.blue); @@ -355,23 +385,28 @@ impl Color { max - min } + #[inline(always)] pub fn with_saturation(&self, saturation: f32) -> Color { let [hue, _, lightness, alpha] = self.to_hsla(); Color::from_hsla(hue, saturation, lightness, alpha) } + #[inline(always)] pub fn blend_normal(_c_b: f32, c_s: f32) -> f32 { c_s } + #[inline(always)] pub fn blend_multiply(c_b: f32, c_s: f32) -> f32 { c_s * c_b } + #[inline(always)] pub fn blend_darken(c_b: f32, c_s: f32) -> f32 { c_s.min(c_b) } + #[inline(always)] pub fn blend_color_burn(c_b: f32, c_s: f32) -> f32 { if c_b == 1. { 1. @@ -382,10 +417,12 @@ impl Color { } } + #[inline(always)] pub fn blend_linear_burn(c_b: f32, c_s: f32) -> f32 { c_b + c_s - 1. } + #[inline(always)] pub fn blend_darker_color(&self, other: Color) -> Color { if self.average_rgb_channels() <= other.average_rgb_channels() { *self @@ -394,14 +431,17 @@ impl Color { } } + #[inline(always)] pub fn blend_screen(c_b: f32, c_s: f32) -> f32 { 1. - (1. - c_s) * (1. - c_b) } + #[inline(always)] pub fn blend_lighten(c_b: f32, c_s: f32) -> f32 { c_s.max(c_b) } + #[inline(always)] pub fn blend_color_dodge(c_b: f32, c_s: f32) -> f32 { if c_s == 1. { 1. @@ -410,10 +450,12 @@ impl Color { } } + #[inline(always)] pub fn blend_linear_dodge(c_b: f32, c_s: f32) -> f32 { c_b + c_s } + #[inline(always)] pub fn blend_lighter_color(&self, other: Color) -> Color { if self.average_rgb_channels() >= other.average_rgb_channels() { *self @@ -524,6 +566,7 @@ impl Color { /// let color = Color::from_rgbaf32(0.114, 0.103, 0.98, 0.97).unwrap(); /// assert_eq!(color.components(), (0.114, 0.103, 0.98, 0.97)); /// ``` + #[inline(always)] pub fn components(&self) -> (f32, f32, f32, f32) { (self.red, self.green, self.blue, self.alpha) } @@ -566,6 +609,7 @@ impl Color { /// let color = Color::from_rgbaf32(0.114, 0.103, 0.98, 0.97).unwrap(); /// //TODO: Add test /// ``` + #[inline(always)] pub fn to_rgba8_srgb(&self) -> [u8; 4] { let gamma = self.to_gamma_srgb(); [(gamma.red * 255.) as u8, (gamma.green * 255.) as u8, (gamma.blue * 255.) as u8, (gamma.alpha * 255.) as u8] @@ -646,6 +690,7 @@ impl Color { /// Linearly interpolates between two colors based on t. /// /// T must be between 0 and 1. + #[inline(always)] pub fn lerp(self, other: Color, t: f32) -> Self { assert!((0. ..=1.).contains(&t)); Color::from_rgbaf32_unchecked( @@ -656,12 +701,14 @@ impl Color { ) } + #[inline(always)] pub fn gamma(&self, gamma: f32) -> Color { // From https://www.dfstudios.co.uk/articles/programming/image-programming-algorithms/image-processing-algorithms-part-6-gamma-correction/ let inverse_gamma = 1. / gamma; self.map_rgb(|c: f32| c.powf(inverse_gamma)) } + #[inline(always)] pub fn to_linear_srgb(&self) -> Self { Self { red: Self::srgb_to_linear(self.red), @@ -671,6 +718,7 @@ impl Color { } } + #[inline(always)] pub fn to_gamma_srgb(&self) -> Self { Self { red: Self::linear_to_srgb(self.red), @@ -680,6 +728,7 @@ impl Color { } } + #[inline(always)] pub fn srgb_to_linear(channel: f32) -> f32 { if channel <= 0.04045 { channel / 12.92 @@ -688,6 +737,7 @@ impl Color { } } + #[inline(always)] pub fn linear_to_srgb(channel: f32) -> f32 { if channel <= 0.0031308 { channel * 12.92 @@ -696,17 +746,22 @@ impl Color { } } + #[inline(always)] pub fn map_rgba f32>(&self, f: F) -> Self { Self::from_rgbaf32_unchecked(f(self.r()), f(self.g()), f(self.b()), f(self.a())) } + + #[inline(always)] pub fn map_rgb f32>(&self, f: F) -> Self { Self::from_rgbaf32_unchecked(f(self.r()), f(self.g()), f(self.b()), self.a()) } + #[inline(always)] pub fn apply_opacity(&self, opacity: f32) -> Self { Self::from_rgbaf32_unchecked(self.r() * opacity, self.g() * opacity, self.b() * opacity, self.a() * opacity) } + #[inline(always)] pub fn to_associated_alpha(&self, alpha: f32) -> Self { Self { red: self.red * alpha, @@ -716,6 +771,7 @@ impl Color { } } + #[inline(always)] pub fn to_unassociated_alpha(&self) -> Self { if self.alpha == 0. { return *self; @@ -729,6 +785,7 @@ impl Color { } } + #[inline(always)] pub fn blend_rgb f32>(&self, other: Color, f: F) -> Self { let background = self.to_unassociated_alpha(); Color { @@ -739,6 +796,7 @@ impl Color { } } + #[inline(always)] pub fn alpha_blend(&self, other: Color) -> Self { let inv_alpha = 1. - other.alpha; Self { diff --git a/node-graph/gcore/src/raster/discrete_srgb.rs b/node-graph/gcore/src/raster/discrete_srgb.rs new file mode 100644 index 00000000..b14d8299 --- /dev/null +++ b/node-graph/gcore/src/raster/discrete_srgb.rs @@ -0,0 +1,185 @@ +//! Fast conversions between u8 sRGB and linear float. + +// Inspired by https://gist.github.com/rygorous/2203834, but with a slightly +// modified method, custom derived constants and error correction for perfect +// accuracy in accordance with the D3D11 spec: +// https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#FLOATtoSRGB. + +/// CRITICAL_POINTS[i] is the last float value such that it maps to i after +/// conversion to integer sRGB. So if x > CRITICAL_POINTS[i] you know you need +/// to increment i. +#[rustfmt::skip] +const CRITICAL_POINTS: [f32; 256] = [ + 0.00015176347, 0.00045529046, 0.0007588174, 0.0010623443, 0.0013658714, 0.0016693983, 0.0019729252, 0.0022764523, + 0.0025799791, 0.0028835062, 0.0031883009, 0.003509259, 0.003848315, 0.004205748, 0.0045818323, 0.0049768374, + 0.005391024, 0.00582465, 0.0062779686, 0.0067512267, 0.0072446675, 0.0077585294, 0.008293047, 0.008848451, + 0.0094249705, 0.010022825, 0.010642236, 0.01128342, 0.011946591, 0.012631957, 0.013339729, 0.014070111, + 0.0148233045, 0.015599505, 0.01639891, 0.017221717, 0.018068114, 0.018938294, 0.019832445, 0.020750746, + 0.021693384, 0.022660539, 0.02365239, 0.024669115, 0.025710886, 0.026777886, 0.027870273, 0.028988222, + 0.030131903, 0.03130148, 0.032497127, 0.033718992, 0.034967244, 0.03624204, 0.03754355, 0.03887192, + 0.040227327, 0.041609894, 0.04301979, 0.044457167, 0.04592218, 0.04741497, 0.04893569, 0.050484486, + 0.05206151, 0.053666897, 0.055300802, 0.056963358, 0.058654714, 0.060375024, 0.062124394, 0.06390298, + 0.065710925, 0.06754836, 0.06941542, 0.07131224, 0.07323896, 0.07519571, 0.07718261, 0.07919981, + 0.08124744, 0.08332562, 0.08543448, 0.08757417, 0.08974478, 0.091946445, 0.09417931, 0.09644348, + 0.098739095, 0.10106628, 0.10342514, 0.105815805, 0.1082384, 0.110693045, 0.11317986, 0.11569896, + 0.118250474, 0.12083454, 0.12345121, 0.12610064, 0.12878296, 0.13149826, 0.13424668, 0.1370283, + 0.13984327, 0.14269169, 0.14557366, 0.1484893, 0.15143873, 0.15442204, 0.15743938, 0.16049084, + 0.1635765, 0.16669647, 0.16985092, 0.1730399, 0.17626354, 0.17952198, 0.18281525, 0.1861435, + 0.18950681, 0.19290532, 0.19633913, 0.19980833, 0.20331302, 0.20685332, 0.21042931, 0.21404111, + 0.21768881, 0.22137253, 0.22509235, 0.22884844, 0.23264077, 0.23646952, 0.24033478, 0.24423665, + 0.24817522, 0.25215057, 0.25616285, 0.26021212, 0.26429847, 0.26842204, 0.27258286, 0.27678108, + 0.2810168, 0.28529006, 0.289601, 0.2939497, 0.29833627, 0.30276078, 0.30722332, 0.311724, + 0.31626293, 0.32084015, 0.32545578, 0.33010995, 0.3348027, 0.3395341, 0.34430432, 0.34911346, + 0.3539615, 0.35884857, 0.3637748, 0.36874023, 0.373745, 0.37878913, 0.38387278, 0.388996, + 0.39415887, 0.39936152, 0.404604, 0.4098864, 0.41520882, 0.42057133, 0.425974, 0.431417, + 0.43690032, 0.4424241, 0.44798836, 0.45359328, 0.45923886, 0.46492523, 0.47065246, 0.47642064, + 0.48222986, 0.48808017, 0.4939718, 0.49990457, 0.5058787, 0.5118943, 0.5179514, 0.5240501, + 0.5301905, 0.5363727, 0.5425967, 0.54886264, 0.5551706, 0.56152064, 0.5679129, 0.5743473, + 0.5808241, 0.5873433, 0.593905, 0.60050917, 0.60715604, 0.61384565, 0.62057805, 0.6273533, + 0.63417155, 0.6410328, 0.6479372, 0.65488476, 0.66187555, 0.6689097, 0.6759874, 0.68310845, + 0.6902731, 0.6974814, 0.7047334, 0.71202916, 0.7193688, 0.7267524, 0.73418003, 0.7416518, + 0.7491677, 0.7567278, 0.76433223, 0.7719811, 0.7796744, 0.7874122, 0.7951947, 0.80302185, + 0.8108938, 0.81881046, 0.82677215, 0.8347787, 0.8428304, 0.8509272, 0.85906917, 0.8672564, + 0.875489, 0.8837671, 0.89209044, 0.9004596, 0.9088741, 0.91733456, 0.9258405, 0.9343926, + 0.94299024, 0.95163417, 0.96032387, 0.96906, 0.977842, 0.9866705, 0.9955452, 1., +]; + +#[rustfmt::skip] +const FLOAT_SRGB_LERP: [u32; 27] = [ + 0x66f, 0x66f063b, 0xcaa0515, 0x11c00773, 0x193305dc, 0x1f1004f3, 0x24030481, 0x28850773, + 0x2ff9065e, 0x365805a1, 0x3bfa0547, 0x414108f7, 0x4a3907d8, 0x52110709, 0x591b06aa, 0x5fc50b70, + 0x6b350a18, 0x754e091c, 0x7e6b08aa, 0x87160ef1, 0x96070d3e, 0xa3460bfc, 0xaf430b6c, 0xbaaf13bd, + 0xce6d1187, 0xdff40fe3, 0xefd70f28, +]; + +#[inline] +pub fn float_to_srgb_u8(mut f: f32) -> u8 { + // Clamp f to [0, 1], with a negated condition to handle NaNs as 0. + if !(f >= 0.0) { + f = 0.0; + } else if f > 1.0 { + f = 1.0; + } + + // Shift away slightly from 0.0 to reduce exponent range. + const C: f32 = 0.009842521f32; + let u = (f + C).to_bits() - C.to_bits(); + if u > (1.0 + C).to_bits() - C.to_bits() { + // We clamped f to [0, 1], and the integer representations + // of the positive finite non-NaN floats are monotonic. + // This makes the later LUT lookup panicless. + unsafe { std::hint::unreachable_unchecked() } + } + + // Compute a piecewise linear interpolation that is always + // the correct answer, or one less than it. + let u16mask = (1 << 16) - 1; + let lut_idx = u >> 21; + let lerp_idx = (u >> 5) & u16mask; + let bias_mult = FLOAT_SRGB_LERP[lut_idx as usize]; + let bias = (bias_mult >> 16) << 16; + let mult = bias_mult & u16mask; + // I don't believe this wraps, but since we test in release mode, + // better make sure debug mode behaves the same. + let lerp = bias.wrapping_add(mult * lerp_idx) >> 24; + + // Adjust linear interpolation to the correct value. + if f > CRITICAL_POINTS[lerp as usize] { + lerp as u8 + 1 + } else { + lerp as u8 + } +} + +#[rustfmt::skip] +const FROM_SRGB_U8: [f32; 256] = [ + 0., 0.000303527, 0.000607054, 0.00091058103, 0.001214108, 0.001517635, 0.0018211621, 0.002124689, + 0.002428216, 0.002731743, 0.00303527, 0.0033465356, 0.003676507, 0.004024717, 0.004391442, + 0.0047769533, 0.005181517, 0.0056053917, 0.0060488326, 0.006512091, 0.00699541, 0.0074990317, + 0.008023192, 0.008568125, 0.009134057, 0.009721218, 0.010329823, 0.010960094, 0.011612245, + 0.012286487, 0.012983031, 0.013702081, 0.014443844, 0.015208514, 0.015996292, 0.016807375, + 0.017641952, 0.018500218, 0.019382361, 0.020288562, 0.02121901, 0.022173883, 0.023153365, + 0.02415763, 0.025186857, 0.026241222, 0.027320892, 0.028426038, 0.029556843, 0.03071345, 0.03189604, + 0.033104774, 0.03433981, 0.035601325, 0.036889452, 0.038204376, 0.039546248, 0.04091521, 0.042311423, + 0.043735042, 0.045186214, 0.046665095, 0.048171833, 0.049706575, 0.051269468, 0.052860655, 0.05448028, + 0.056128494, 0.057805434, 0.05951124, 0.06124607, 0.06301003, 0.06480328, 0.06662595, 0.06847818, + 0.07036011, 0.07227186, 0.07421358, 0.07618539, 0.07818743, 0.08021983, 0.082282715, 0.084376216, + 0.086500466, 0.088655606, 0.09084173, 0.09305898, 0.095307484, 0.09758736, 0.09989874, 0.10224175, + 0.10461649, 0.10702311, 0.10946172, 0.111932434, 0.11443538, 0.116970696, 0.11953845, 0.12213881, + 0.12477186, 0.12743773, 0.13013652, 0.13286836, 0.13563336, 0.13843165, 0.14126332, 0.1441285, + 0.1470273, 0.14995982, 0.15292618, 0.1559265, 0.15896086, 0.16202943, 0.16513224, 0.16826946, + 0.17144115, 0.17464745, 0.17788847, 0.1811643, 0.18447503, 0.1878208, 0.19120172, 0.19461787, + 0.19806935, 0.2015563, 0.20507877, 0.2086369, 0.21223079, 0.21586053, 0.21952623, 0.22322798, + 0.22696589, 0.23074007, 0.23455065, 0.23839766, 0.2422812, 0.2462014, 0.25015837, 0.25415218, + 0.2581829, 0.26225072, 0.26635566, 0.27049786, 0.27467737, 0.27889434, 0.2831488, 0.2874409, + 0.2917707, 0.29613832, 0.30054384, 0.30498737, 0.30946895, 0.31398875, 0.31854683, 0.32314324, + 0.32777813, 0.33245158, 0.33716366, 0.34191445, 0.3467041, 0.3515327, 0.35640025, 0.36130688, + 0.3662527, 0.37123778, 0.37626222, 0.3813261, 0.38642952, 0.39157256, 0.3967553, 0.40197787, + 0.4072403, 0.4125427, 0.41788515, 0.42326775, 0.42869055, 0.4341537, 0.43965724, 0.44520125, + 0.45078585, 0.45641106, 0.46207705, 0.46778384, 0.47353154, 0.47932023, 0.48514998, 0.4910209, + 0.49693304, 0.5028866, 0.50888145, 0.5149178, 0.5209957, 0.52711535, 0.5332766, 0.5394797, + 0.5457247, 0.5520116, 0.5583406, 0.5647117, 0.57112503, 0.57758063, 0.5840786, 0.590619, 0.597202, + 0.60382754, 0.61049575, 0.61720675, 0.62396055, 0.63075733, 0.637597, 0.6444799, 0.6514058, + 0.65837497, 0.66538745, 0.67244333, 0.6795426, 0.68668544, 0.69387203, 0.70110214, 0.70837605, + 0.7156938, 0.72305536, 0.730461, 0.7379107, 0.7454045, 0.75294244, 0.76052475, 0.7681514, 0.77582246, + 0.78353804, 0.79129815, 0.79910296, 0.8069525, 0.8148468, 0.822786, 0.8307701, 0.83879924, 0.84687346, + 0.8549928, 0.8631574, 0.87136734, 0.8796226, 0.8879232, 0.89626956, 0.90466136, 0.913099, 0.92158204, + 0.93011117, 0.9386859, 0.9473069, 0.9559735, 0.9646866, 0.9734455, 0.98225087, 0.9911022, 1., +]; + +#[inline] +pub fn srgb_u8_to_float(c: u8) -> f32 { + FROM_SRGB_U8[c as usize] +} + +#[cfg(test)] +mod tests { + use super::*; + + // https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#FLOATtoSRGB + fn float_to_srgb_ref(f: f32) -> f32 { + if !(f > 0_f32) { + 0_f32 + } else if f <= 0.0031308f32 { + 12.92_f32 * f + } else if f < 1_f32 { + 1.055f32 * f.powf(1.0f_32 / 2.4_f32) - 0.055f32 + } else { + 1_f32 + } + } + + fn float_to_srgb_u8_ref(f: f32) -> u8 { + (float_to_srgb_ref(f) * 255_f32 + 0.5_f32) as u8 + } + + // https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#SRGBtoFLOAT + fn srgb_to_float_ref(f: f32) -> f32 { + if f <= 0.04045f32 { + f / 12.92f32 + } else { + ((f + 0.055f32) / 1.055f32).powf(2.4_f32) + } + } + + fn srgb_u8_to_float_ref(c: u8) -> f32 { + srgb_to_float_ref(c as f32 * (1_f32 / 255.0f_32)) + } + + #[test] + fn test_float_to_srgb_u8() { + for u in 0..=u8::MAX { + assert!(srgb_u8_to_float(u) == srgb_u8_to_float_ref(u)); + } + } + + #[ignore = "expensive, test in release mode"] + #[test] + fn test_srgb_u8_to_float() { + // Simply... check all float values. + for u in 0..=u32::MAX { + let f = f32::from_bits(u); + assert!(float_to_srgb_u8(f) == float_to_srgb_u8_ref(f)); + } + } +} diff --git a/node-graph/gcore/src/raster/image.rs b/node-graph/gcore/src/raster/image.rs index 0698d006..ca87b91b 100644 --- a/node-graph/gcore/src/raster/image.rs +++ b/node-graph/gcore/src/raster/image.rs @@ -1,3 +1,4 @@ +use super::discrete_srgb::float_to_srgb_u8; use super::{Color, ImageSlice}; use crate::Node; use alloc::vec::Vec; @@ -135,27 +136,35 @@ use super::*; impl Image

where P::ColorChannel: Linear, +

::AlphaChannel: Linear, { /// Flattens each channel cast to a u8 pub fn into_flat_u8(self) -> (Vec, u32, u32) { let Image { width, height, data } = self; + assert!(data.len() == width as usize * height as usize); - let to_gamma = SRGBGammaFloat::from_linear; - let to_u8 = |x| (num_cast::<_, f32>(x).unwrap() * 255.) as u8; + let mut result = Vec::with_capacity(data.len() * 4); + for color in data { + let a = color.a().to_f32(); + if a < 0.5 / 255.0 { + // This would map to fully transparent anyway, avoid expensive encoding. + result.push(0); + result.push(0); + result.push(0); + result.push(0); + } else { + let undo_premultiply = 1.0 / a; + let r = float_to_srgb_u8(color.r().to_f32() * undo_premultiply); + let g = float_to_srgb_u8(color.g().to_f32() * undo_premultiply); + let b = float_to_srgb_u8(color.b().to_f32() * undo_premultiply); + result.push(r); + result.push(g); + result.push(b); + result.push((a * 255.0 + 0.5) as u8); + } + } - let result_bytes = data - .into_iter() - .flat_map(|color| { - [ - to_u8(to_gamma(color.r() / color.a().to_channel())), - to_u8(to_gamma(color.g() / color.a().to_channel())), - to_u8(to_gamma(color.b() / color.a().to_channel())), - (num_cast::<_, f32>(color.a()).unwrap() * 255.) as u8, - ] - }) - .collect(); - - (result_bytes, width, height) + (result, width, height) } } diff --git a/node-graph/gstd/src/brush.rs b/node-graph/gstd/src/brush.rs index 7364f5a9..ef3ec00a 100644 --- a/node-graph/gstd/src/brush.rs +++ b/node-graph/gstd/src/brush.rs @@ -81,7 +81,7 @@ impl Sample for BrushStampGenerator

{ }; use graphene_core::raster::Channel; - Some(self.color.multiplied_alpha(P::AlphaChannel::from_f32(result))) + Some(self.color.multiplied_alpha(P::AlphaChannel::from_linear(result))) } } diff --git a/node-graph/gstd/src/main.rs b/node-graph/gstd/src/main.rs index fa416508..67609113 100644 --- a/node-graph/gstd/src/main.rs +++ b/node-graph/gstd/src/main.rs @@ -112,7 +112,7 @@ fn main() { //let mut mul = mul::MulNode::new(); let mut stack: borrow_stack::FixedSizeStack>> = borrow_stack::FixedSizeStack::new(42); - unsafe { stack.push(Box::new(AnyValueNode::new(1f32))) }; + unsafe { stack.push(Box::new(AnyValueNode::new(1_f32))) }; //let node = unsafe { stack.get(0) }; //let boxed = Box::new(StorageNode::new(node)); //unsafe { stack.push(boxed) }; @@ -123,7 +123,7 @@ fn main() { .push(Box::new(AnyRefNode::new(stack.get(0).as_ref())) as Box>) };*/ - let f = (3.2f32, 3.1f32); + let f = (3.2_f32, 3.1_f32); let a = ValueNode::new(1.); let id = std::any::TypeId::of::<&f32>(); let any_a = AnyRefNode::new(&a); diff --git a/node-graph/gstd/src/raster.rs b/node-graph/gstd/src/raster.rs index fcbd0b19..14767699 100644 --- a/node-graph/gstd/src/raster.rs +++ b/node-graph/gstd/src/raster.rs @@ -1,6 +1,6 @@ use dyn_any::{DynAny, StaticType}; use glam::{DAffine2, DVec2}; -use graphene_core::raster::{Alpha, BlendMode, BlendNode, Channel, Image, ImageFrame, Luminance, Pixel, RasterMut, Sample}; +use graphene_core::raster::{Alpha, BlendMode, BlendNode, Image, ImageFrame, LinearChannel, Luminance, Pixel, RasterMut, Sample}; use graphene_core::transform::Transform; use graphene_core::value::CopiedNode; @@ -217,7 +217,7 @@ fn mask_image< let image_pixel = image.get_pixel_mut(x, y).unwrap(); if let Some(mask_pixel) = stencil.sample(mask_point, area) { - *image_pixel = image_pixel.multiplied_alpha(mask_pixel.l().to_channel()); + *image_pixel = image_pixel.multiplied_alpha(mask_pixel.l().cast_linear_channel()); } } }