319 lines
9.0 KiB
Rust
319 lines
9.0 KiB
Rust
/// Object-based spatial audio renderer.
|
|
///
|
|
/// Provides binaural rendering via simplified HRTF (ITD + ILD + head shadow)
|
|
/// and 7.1.4 speaker bed rendering via VBAP.
|
|
|
|
// 7.1.4 speaker layout (azimuth, elevation in degrees)
|
|
const SPEAKERS_714: [(f32, f32); 12] = [
|
|
(-30.0, 0.0), // L
|
|
(30.0, 0.0), // R
|
|
(0.0, 0.0), // C
|
|
(0.0, -30.0), // LFE (below, but gain-only)
|
|
(-110.0, 0.0), // Ls
|
|
(110.0, 0.0), // Rs
|
|
(-150.0, 0.0), // Lrs
|
|
(150.0, 0.0), // Rrs
|
|
(-45.0, 45.0), // Ltf
|
|
(45.0, 45.0), // Rtf
|
|
(-135.0, 45.0), // Ltr
|
|
(135.0, 45.0), // Rtr
|
|
];
|
|
|
|
const HEAD_RADIUS_M: f32 = 0.0875;
|
|
const SPEED_OF_SOUND: f32 = 343.0;
|
|
const MAX_ITD_SAMPLES_48K: usize = 30;
|
|
|
|
#[derive(Debug, Clone, Copy)]
|
|
pub struct ObjectPosition {
|
|
pub x: f32, // -1..1 left/right
|
|
pub y: f32, // -1..1 front/back
|
|
pub z: f32, // -1..1 bottom/top
|
|
pub size: f32, // 0..1 object spread
|
|
}
|
|
|
|
impl ObjectPosition {
|
|
fn azimuth_rad(&self) -> f32 {
|
|
self.x.atan2(-self.y)
|
|
}
|
|
|
|
fn elevation_rad(&self) -> f32 {
|
|
let horiz = (self.x * self.x + self.y * self.y).sqrt();
|
|
self.z.atan2(horiz)
|
|
}
|
|
}
|
|
|
|
/// Binaural renderer state for a single object
|
|
pub struct BinauralState {
|
|
delay_line_l: Vec<f32>,
|
|
delay_line_r: Vec<f32>,
|
|
write_pos: usize,
|
|
// One-pole low-pass state for head shadow
|
|
shadow_state_l: f32,
|
|
shadow_state_r: f32,
|
|
cached_az: f32,
|
|
cached_el: f32,
|
|
cached_x: f32,
|
|
cached_y: f32,
|
|
cached_z: f32,
|
|
}
|
|
|
|
impl BinauralState {
|
|
pub fn new() -> Self {
|
|
Self {
|
|
delay_line_l: vec![0.0; MAX_ITD_SAMPLES_48K + 1],
|
|
delay_line_r: vec![0.0; MAX_ITD_SAMPLES_48K + 1],
|
|
write_pos: 0,
|
|
shadow_state_l: 0.0,
|
|
shadow_state_r: 0.0,
|
|
cached_az: 0.0,
|
|
cached_el: 0.0,
|
|
cached_x: 0.0,
|
|
cached_y: 0.0,
|
|
cached_z: 0.0,
|
|
}
|
|
}
|
|
|
|
/// Render mono source to binaural stereo using simplified HRTF.
|
|
pub fn render(
|
|
&mut self,
|
|
mono: &[f32],
|
|
out_l: &mut [f32],
|
|
out_r: &mut [f32],
|
|
pos: &ObjectPosition,
|
|
sample_rate: u32,
|
|
) {
|
|
let (az, el) = if pos.x != self.cached_x || pos.y != self.cached_y || pos.z != self.cached_z {
|
|
let a = pos.azimuth_rad();
|
|
let e = pos.elevation_rad();
|
|
self.cached_az = a;
|
|
self.cached_el = e;
|
|
self.cached_x = pos.x;
|
|
self.cached_y = pos.y;
|
|
self.cached_z = pos.z;
|
|
(a, e)
|
|
} else {
|
|
(self.cached_az, self.cached_el)
|
|
};
|
|
|
|
// ITD: Woodworth formula
|
|
let itd_sec = HEAD_RADIUS_M / SPEED_OF_SOUND * (az.sin() + az);
|
|
let itd_samples = (itd_sec.abs() * sample_rate as f32) as usize;
|
|
let itd_samples = itd_samples.min(MAX_ITD_SAMPLES_48K);
|
|
let source_left = az < 0.0; // negative azimuth = left side
|
|
|
|
// ILD: frequency-independent approximation, ~6dB max
|
|
let ild_db = 6.0 * az.sin();
|
|
let gain_l;
|
|
let gain_r;
|
|
if source_left {
|
|
gain_l = 1.0;
|
|
gain_r = 10.0_f32.powf(-ild_db.abs() / 20.0);
|
|
} else {
|
|
gain_l = 10.0_f32.powf(-ild_db.abs() / 20.0);
|
|
gain_r = 1.0;
|
|
}
|
|
|
|
// Elevation: attenuate slightly when source is above/below
|
|
let el_atten = 1.0 - 0.15 * el.abs();
|
|
|
|
// Head shadow: one-pole LPF coefficient for contralateral ear
|
|
// More shadow (lower cutoff) for sources further to the side
|
|
let shadow_amount = az.sin().abs() * 0.4;
|
|
let shadow_coeff_l = if source_left { 0.0 } else { shadow_amount };
|
|
let shadow_coeff_r = if source_left { shadow_amount } else { 0.0 };
|
|
|
|
let dl_len = self.delay_line_l.len();
|
|
|
|
for i in 0..mono.len() {
|
|
let s = mono[i] * el_atten;
|
|
|
|
self.delay_line_l[self.write_pos] = s;
|
|
self.delay_line_r[self.write_pos] = s;
|
|
|
|
// Read with ITD offset
|
|
let read_l = if source_left {
|
|
self.write_pos
|
|
} else {
|
|
(self.write_pos + dl_len - itd_samples) % dl_len
|
|
};
|
|
let read_r = if source_left {
|
|
(self.write_pos + dl_len - itd_samples) % dl_len
|
|
} else {
|
|
self.write_pos
|
|
};
|
|
|
|
let raw_l = self.delay_line_l[read_l] * gain_l;
|
|
let raw_r = self.delay_line_r[read_r] * gain_r;
|
|
|
|
// Head shadow filter
|
|
self.shadow_state_l += shadow_coeff_l * (raw_l - self.shadow_state_l);
|
|
self.shadow_state_r += shadow_coeff_r * (raw_r - self.shadow_state_r);
|
|
|
|
let filtered_l = raw_l - shadow_coeff_l * self.shadow_state_l;
|
|
let filtered_r = raw_r - shadow_coeff_r * self.shadow_state_r;
|
|
|
|
out_l[i] += filtered_l;
|
|
out_r[i] += filtered_r;
|
|
|
|
self.write_pos = (self.write_pos + 1) % dl_len;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Compute 7.1.4 VBAP gains for a given object position.
|
|
/// Returns gains for each of the 12 speakers.
|
|
pub fn vbap_714(pos: &ObjectPosition) -> [f32; 12] {
|
|
let az = pos.azimuth_rad().to_degrees();
|
|
let el = pos.elevation_rad().to_degrees();
|
|
|
|
let mut gains = [0.0_f32; 12];
|
|
let mut total = 0.0_f32;
|
|
|
|
for (i, &(spk_az, spk_el)) in SPEAKERS_714.iter().enumerate() {
|
|
if i == 3 { continue; } // Skip LFE for directional panning
|
|
|
|
let daz = angle_diff(az, spk_az);
|
|
let del = el - spk_el;
|
|
let dist = (daz * daz + del * del).sqrt().max(1.0);
|
|
let g = (1.0 / dist).max(0.0);
|
|
gains[i] = g;
|
|
total += g * g;
|
|
}
|
|
|
|
// Normalize to constant power
|
|
if total > 0.0 {
|
|
let norm = total.sqrt().recip();
|
|
for g in &mut gains {
|
|
*g *= norm;
|
|
}
|
|
}
|
|
|
|
// LFE: low-frequency content gets a fixed send
|
|
gains[3] = 0.25;
|
|
|
|
// Object size: spread energy across more speakers
|
|
if pos.size > 0.0 {
|
|
let base = gains;
|
|
let spread = pos.size.clamp(0.0, 1.0);
|
|
let uniform = 1.0 / 12.0_f32.sqrt();
|
|
for (i, g) in gains.iter_mut().enumerate() {
|
|
*g = base[i] * (1.0 - spread) + uniform * spread;
|
|
}
|
|
}
|
|
|
|
gains
|
|
}
|
|
|
|
/// Render mono source to 7.1.4 speaker bed (12 channels interleaved).
|
|
pub fn render_714(
|
|
mono: &[f32],
|
|
output: &mut [f32],
|
|
pos: &ObjectPosition,
|
|
) {
|
|
let gains = vbap_714(pos);
|
|
let frames = mono.len();
|
|
debug_assert!(output.len() >= frames * 12);
|
|
|
|
for i in 0..frames {
|
|
let s = mono[i];
|
|
for ch in 0..12 {
|
|
output[i * 12 + ch] += s * gains[ch];
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Downmix 7.1.4 (12ch) to stereo using ITU-R BS.775 derived coefficients.
|
|
pub fn downmix_714_to_stereo(input: &[f32], out_l: &mut [f32], out_r: &mut [f32]) {
|
|
let frames = out_l.len();
|
|
let inv_sqrt2 = std::f32::consts::FRAC_1_SQRT_2;
|
|
|
|
for i in 0..frames {
|
|
let base = i * 12;
|
|
if base + 11 >= input.len() { break; }
|
|
|
|
let l = input[base];
|
|
let r = input[base + 1];
|
|
let c = input[base + 2];
|
|
let lfe = input[base + 3];
|
|
let ls = input[base + 4];
|
|
let rs = input[base + 5];
|
|
let lrs = input[base + 6];
|
|
let rrs = input[base + 7];
|
|
let ltf = input[base + 8];
|
|
let rtf = input[base + 9];
|
|
let ltr = input[base + 10];
|
|
let rtr = input[base + 11];
|
|
|
|
out_l[i] += l + inv_sqrt2 * c + inv_sqrt2 * lfe
|
|
+ inv_sqrt2 * ls + 0.5 * lrs
|
|
+ inv_sqrt2 * ltf + 0.5 * ltr;
|
|
|
|
out_r[i] += r + inv_sqrt2 * c + inv_sqrt2 * lfe
|
|
+ inv_sqrt2 * rs + 0.5 * rrs
|
|
+ inv_sqrt2 * rtf + 0.5 * rtr;
|
|
}
|
|
}
|
|
|
|
fn angle_diff(a: f32, b: f32) -> f32 {
|
|
let mut d = a - b;
|
|
while d > 180.0 { d -= 360.0; }
|
|
while d < -180.0 { d += 360.0; }
|
|
d
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum SpatialRenderMode {
|
|
Mono,
|
|
Stereo,
|
|
Binaural,
|
|
Surround714,
|
|
}
|
|
|
|
impl SpatialRenderMode {
|
|
pub const ALL: [SpatialRenderMode; 4] = [
|
|
SpatialRenderMode::Mono,
|
|
SpatialRenderMode::Stereo,
|
|
SpatialRenderMode::Binaural,
|
|
SpatialRenderMode::Surround714,
|
|
];
|
|
}
|
|
|
|
impl Default for SpatialRenderMode {
|
|
fn default() -> Self {
|
|
Self::Stereo
|
|
}
|
|
}
|
|
|
|
impl std::fmt::Display for SpatialRenderMode {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
match self {
|
|
Self::Mono => write!(f, "Mono"),
|
|
Self::Stereo => write!(f, "Stereo"),
|
|
Self::Binaural => write!(f, "Binaural"),
|
|
Self::Surround714 => write!(f, "7.1.4"),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
|
|
pub enum MonoLane {
|
|
#[default]
|
|
Mix,
|
|
Left,
|
|
Right,
|
|
}
|
|
|
|
impl MonoLane {
|
|
pub const ALL: [MonoLane; 3] = [MonoLane::Mix, MonoLane::Left, MonoLane::Right];
|
|
}
|
|
|
|
impl std::fmt::Display for MonoLane {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
match self {
|
|
Self::Mix => write!(f, "L+R"),
|
|
Self::Left => write!(f, "L"),
|
|
Self::Right => write!(f, "R"),
|
|
}
|
|
}
|
|
}
|