audio-oxide/au-o2-gui/src/engine/atmos.rs

319 lines
9.0 KiB
Rust

/// Object-based spatial audio renderer.
///
/// Provides binaural rendering via simplified HRTF (ITD + ILD + head shadow)
/// and 7.1.4 speaker bed rendering via VBAP.
// 7.1.4 speaker layout (azimuth, elevation in degrees)
const SPEAKERS_714: [(f32, f32); 12] = [
(-30.0, 0.0), // L
(30.0, 0.0), // R
(0.0, 0.0), // C
(0.0, -30.0), // LFE (below, but gain-only)
(-110.0, 0.0), // Ls
(110.0, 0.0), // Rs
(-150.0, 0.0), // Lrs
(150.0, 0.0), // Rrs
(-45.0, 45.0), // Ltf
(45.0, 45.0), // Rtf
(-135.0, 45.0), // Ltr
(135.0, 45.0), // Rtr
];
const HEAD_RADIUS_M: f32 = 0.0875;
const SPEED_OF_SOUND: f32 = 343.0;
const MAX_ITD_SAMPLES_48K: usize = 30;
#[derive(Debug, Clone, Copy)]
pub struct ObjectPosition {
pub x: f32, // -1..1 left/right
pub y: f32, // -1..1 front/back
pub z: f32, // -1..1 bottom/top
pub size: f32, // 0..1 object spread
}
impl ObjectPosition {
fn azimuth_rad(&self) -> f32 {
self.x.atan2(-self.y)
}
fn elevation_rad(&self) -> f32 {
let horiz = (self.x * self.x + self.y * self.y).sqrt();
self.z.atan2(horiz)
}
}
/// Binaural renderer state for a single object
pub struct BinauralState {
delay_line_l: Vec<f32>,
delay_line_r: Vec<f32>,
write_pos: usize,
// One-pole low-pass state for head shadow
shadow_state_l: f32,
shadow_state_r: f32,
cached_az: f32,
cached_el: f32,
cached_x: f32,
cached_y: f32,
cached_z: f32,
}
impl BinauralState {
pub fn new() -> Self {
Self {
delay_line_l: vec![0.0; MAX_ITD_SAMPLES_48K + 1],
delay_line_r: vec![0.0; MAX_ITD_SAMPLES_48K + 1],
write_pos: 0,
shadow_state_l: 0.0,
shadow_state_r: 0.0,
cached_az: 0.0,
cached_el: 0.0,
cached_x: 0.0,
cached_y: 0.0,
cached_z: 0.0,
}
}
/// Render mono source to binaural stereo using simplified HRTF.
pub fn render(
&mut self,
mono: &[f32],
out_l: &mut [f32],
out_r: &mut [f32],
pos: &ObjectPosition,
sample_rate: u32,
) {
let (az, el) = if pos.x != self.cached_x || pos.y != self.cached_y || pos.z != self.cached_z {
let a = pos.azimuth_rad();
let e = pos.elevation_rad();
self.cached_az = a;
self.cached_el = e;
self.cached_x = pos.x;
self.cached_y = pos.y;
self.cached_z = pos.z;
(a, e)
} else {
(self.cached_az, self.cached_el)
};
// ITD: Woodworth formula
let itd_sec = HEAD_RADIUS_M / SPEED_OF_SOUND * (az.sin() + az);
let itd_samples = (itd_sec.abs() * sample_rate as f32) as usize;
let itd_samples = itd_samples.min(MAX_ITD_SAMPLES_48K);
let source_left = az < 0.0; // negative azimuth = left side
// ILD: frequency-independent approximation, ~6dB max
let ild_db = 6.0 * az.sin();
let gain_l;
let gain_r;
if source_left {
gain_l = 1.0;
gain_r = 10.0_f32.powf(-ild_db.abs() / 20.0);
} else {
gain_l = 10.0_f32.powf(-ild_db.abs() / 20.0);
gain_r = 1.0;
}
// Elevation: attenuate slightly when source is above/below
let el_atten = 1.0 - 0.15 * el.abs();
// Head shadow: one-pole LPF coefficient for contralateral ear
// More shadow (lower cutoff) for sources further to the side
let shadow_amount = az.sin().abs() * 0.4;
let shadow_coeff_l = if source_left { 0.0 } else { shadow_amount };
let shadow_coeff_r = if source_left { shadow_amount } else { 0.0 };
let dl_len = self.delay_line_l.len();
for i in 0..mono.len() {
let s = mono[i] * el_atten;
self.delay_line_l[self.write_pos] = s;
self.delay_line_r[self.write_pos] = s;
// Read with ITD offset
let read_l = if source_left {
self.write_pos
} else {
(self.write_pos + dl_len - itd_samples) % dl_len
};
let read_r = if source_left {
(self.write_pos + dl_len - itd_samples) % dl_len
} else {
self.write_pos
};
let raw_l = self.delay_line_l[read_l] * gain_l;
let raw_r = self.delay_line_r[read_r] * gain_r;
// Head shadow filter
self.shadow_state_l += shadow_coeff_l * (raw_l - self.shadow_state_l);
self.shadow_state_r += shadow_coeff_r * (raw_r - self.shadow_state_r);
let filtered_l = raw_l - shadow_coeff_l * self.shadow_state_l;
let filtered_r = raw_r - shadow_coeff_r * self.shadow_state_r;
out_l[i] += filtered_l;
out_r[i] += filtered_r;
self.write_pos = (self.write_pos + 1) % dl_len;
}
}
}
/// Compute 7.1.4 VBAP gains for a given object position.
/// Returns gains for each of the 12 speakers.
pub fn vbap_714(pos: &ObjectPosition) -> [f32; 12] {
let az = pos.azimuth_rad().to_degrees();
let el = pos.elevation_rad().to_degrees();
let mut gains = [0.0_f32; 12];
let mut total = 0.0_f32;
for (i, &(spk_az, spk_el)) in SPEAKERS_714.iter().enumerate() {
if i == 3 { continue; } // Skip LFE for directional panning
let daz = angle_diff(az, spk_az);
let del = el - spk_el;
let dist = (daz * daz + del * del).sqrt().max(1.0);
let g = (1.0 / dist).max(0.0);
gains[i] = g;
total += g * g;
}
// Normalize to constant power
if total > 0.0 {
let norm = total.sqrt().recip();
for g in &mut gains {
*g *= norm;
}
}
// LFE: low-frequency content gets a fixed send
gains[3] = 0.25;
// Object size: spread energy across more speakers
if pos.size > 0.0 {
let base = gains;
let spread = pos.size.clamp(0.0, 1.0);
let uniform = 1.0 / 12.0_f32.sqrt();
for (i, g) in gains.iter_mut().enumerate() {
*g = base[i] * (1.0 - spread) + uniform * spread;
}
}
gains
}
/// Render mono source to 7.1.4 speaker bed (12 channels interleaved).
pub fn render_714(
mono: &[f32],
output: &mut [f32],
pos: &ObjectPosition,
) {
let gains = vbap_714(pos);
let frames = mono.len();
debug_assert!(output.len() >= frames * 12);
for i in 0..frames {
let s = mono[i];
for ch in 0..12 {
output[i * 12 + ch] += s * gains[ch];
}
}
}
/// Downmix 7.1.4 (12ch) to stereo using ITU-R BS.775 derived coefficients.
pub fn downmix_714_to_stereo(input: &[f32], out_l: &mut [f32], out_r: &mut [f32]) {
let frames = out_l.len();
let inv_sqrt2 = std::f32::consts::FRAC_1_SQRT_2;
for i in 0..frames {
let base = i * 12;
if base + 11 >= input.len() { break; }
let l = input[base];
let r = input[base + 1];
let c = input[base + 2];
let lfe = input[base + 3];
let ls = input[base + 4];
let rs = input[base + 5];
let lrs = input[base + 6];
let rrs = input[base + 7];
let ltf = input[base + 8];
let rtf = input[base + 9];
let ltr = input[base + 10];
let rtr = input[base + 11];
out_l[i] += l + inv_sqrt2 * c + inv_sqrt2 * lfe
+ inv_sqrt2 * ls + 0.5 * lrs
+ inv_sqrt2 * ltf + 0.5 * ltr;
out_r[i] += r + inv_sqrt2 * c + inv_sqrt2 * lfe
+ inv_sqrt2 * rs + 0.5 * rrs
+ inv_sqrt2 * rtf + 0.5 * rtr;
}
}
fn angle_diff(a: f32, b: f32) -> f32 {
let mut d = a - b;
while d > 180.0 { d -= 360.0; }
while d < -180.0 { d += 360.0; }
d
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SpatialRenderMode {
Mono,
Stereo,
Binaural,
Surround714,
}
impl SpatialRenderMode {
pub const ALL: [SpatialRenderMode; 4] = [
SpatialRenderMode::Mono,
SpatialRenderMode::Stereo,
SpatialRenderMode::Binaural,
SpatialRenderMode::Surround714,
];
}
impl Default for SpatialRenderMode {
fn default() -> Self {
Self::Stereo
}
}
impl std::fmt::Display for SpatialRenderMode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Mono => write!(f, "Mono"),
Self::Stereo => write!(f, "Stereo"),
Self::Binaural => write!(f, "Binaural"),
Self::Surround714 => write!(f, "7.1.4"),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum MonoLane {
#[default]
Mix,
Left,
Right,
}
impl MonoLane {
pub const ALL: [MonoLane; 3] = [MonoLane::Mix, MonoLane::Left, MonoLane::Right];
}
impl std::fmt::Display for MonoLane {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Mix => write!(f, "L+R"),
Self::Left => write!(f, "L"),
Self::Right => write!(f, "R"),
}
}
}