YrXtals/src/analyzer.rs

511 lines
18 KiB
Rust

use std::collections::VecDeque;
use std::sync::Arc;
use num_complex::Complex64;
use crate::hilbert_stream::RealtimeHilbert;
use crate::processor::Processor;
use crate::track::TrackData;
/// per-channel analyzer output combining the compressed multi-band db, the unmodified main-band db, and an optional cepstrum.
#[derive(Debug, Clone, Default)]
pub struct FrameData {
pub freqs: Vec<f32>,
pub db: Vec<f32>,
pub primary_db: Vec<f32>,
pub cepstrum: Vec<f32>,
}
/// stereo three-band processor pool driven by a streaming hilbert source and surfacing one frame per call to step.
pub struct Analyzer {
main: [Processor; 2],
transient: [Processor; 2],
deep: [Processor; 2],
frame_size: usize,
hop_size: usize,
hilbert: RealtimeHilbert,
hilbert_fft_size: usize,
hilbert_hop_size: usize,
hilbert_needs_reset: bool,
last_hilbert_sample: usize,
track: Option<Arc<TrackData>>,
last_frames: Vec<FrameData>,
/// most recently observed live-PCM sample rate.
live_sample_rate: u32,
/// interleaved-stereo PCM accumulated by push_live_pcm and drained one hop at a time by step_live.
live_buffer: VecDeque<f32>,
/// soft cap on the live buffer length.
live_buffer_max: usize,
/// smoothed AGC gain applied to incoming live PCM before it enters the analyzer buffer.
live_gain: f64,
/// dB threshold below which incoming live PCM chunks are replaced with silence.
noise_gate_db: f32,
}
impl Analyzer {
/// builds the main, transient, and deep processor pairs with band-appropriate expanders, hpf, and smoothing.
pub fn new(device: wgpu::Device, queue: wgpu::Queue) -> Self {
let frame_size = 4096_usize;
let hop_size = 1024_usize;
let mut main = [
Processor::new(frame_size, 48_000, device.clone(), queue.clone()),
Processor::new(frame_size, 48_000, device.clone(), queue.clone()),
];
for p in &mut main {
p.set_expander(1.5, -50.0);
p.set_hpf(80.0);
p.set_smoothing(3);
}
let trans_size = (frame_size / 4).max(64);
let mut transient = [
Processor::new(trans_size, 48_000, device.clone(), queue.clone()),
Processor::new(trans_size, 48_000, device.clone(), queue.clone()),
];
for p in &mut transient {
p.set_expander(2.5, -40.0);
p.set_hpf(100.0);
p.set_smoothing(2);
}
let mut deep = [
Processor::new(frame_size * 2, 48_000, device.clone(), queue.clone()),
Processor::new(frame_size * 2, 48_000, device.clone(), queue.clone()),
];
for p in &mut deep {
p.set_expander(1.2, -60.0);
p.set_hpf(0.0);
p.set_smoothing(5);
}
Self {
main,
transient,
deep,
frame_size,
hop_size,
hilbert: RealtimeHilbert::new(),
hilbert_fft_size: 8192,
hilbert_hop_size: hop_size,
hilbert_needs_reset: true,
last_hilbert_sample: 0,
track: None,
last_frames: Vec::new(),
live_sample_rate: 0,
live_buffer: VecDeque::with_capacity(192_000),
live_buffer_max: 192_000,
live_gain: 1.0,
noise_gate_db: -100.0,
}
}
/// stores the noise-gate threshold in dB applied to live-mode PCM chunks.
pub fn set_noise_gate(&mut self, db: f32) {
self.noise_gate_db = db;
}
/// returns the loaded track's sample rate, falling back to 48 khz before any track loads.
pub fn sample_rate(&self) -> u32 {
self.track.as_ref().map(|t| t.sample_rate).unwrap_or(48_000)
}
/// returns the loaded track's stereo-frame count.
pub fn total_samples(&self) -> usize {
self.track.as_ref().map(|t| t.total_samples()).unwrap_or(0)
}
/// swaps in incoming pcm data and marks the streaming hilbert dirty.
pub fn set_track_data(&mut self, data: Arc<TrackData>) {
let rate = data.sample_rate;
for p in self.main.iter_mut().chain(self.transient.iter_mut()).chain(self.deep.iter_mut()) {
p.set_sample_rate(rate);
}
self.track = Some(data);
self.hilbert_needs_reset = true;
}
/// retunes the three bands' transform lengths around a base frame size and routes the hilbert hop to match.
/// always force-clears processor buffers, smoothing history, hilbert state, and the live buffer so the result of a given (fft, hop) pair doesn't depend on the path taken to get there.
pub fn set_dsp_params(&mut self, frame_size: usize, hop_size: usize) {
let trans_size = (frame_size / 4).max(64);
let deep_size = if frame_size < 2048 { frame_size * 4 } else { frame_size * 2 };
self.hilbert_fft_size = frame_size;
self.hilbert_hop_size = hop_size;
self.hilbert_needs_reset = true;
self.live_buffer.clear();
self.frame_size = frame_size;
self.hop_size = hop_size;
for p in &mut self.main {
p.set_frame_size(frame_size);
p.clear_state();
}
for p in &mut self.transient {
p.set_frame_size(trans_size);
p.clear_state();
}
for p in &mut self.deep {
p.set_frame_size(deep_size);
p.clear_state();
}
}
/// propagates the bin count to every band and channel.
pub fn set_num_bins(&mut self, n: usize) {
for p in self.main.iter_mut().chain(self.transient.iter_mut()).chain(self.deep.iter_mut()) {
p.set_num_bins(n);
}
}
/// distributes the cepstral idealisation parameters across the three bands with band-specific strength weights.
pub fn set_smoothing_params(&mut self, granularity: i32, detail: i32, strength: f32) {
for p in &mut self.main {
p.set_cepstral_params(granularity, detail, strength);
}
for p in &mut self.transient {
p.set_cepstral_params(granularity, detail, strength * 0.3);
}
for p in &mut self.deep {
p.set_cepstral_params(granularity, detail, strength * 1.2);
}
}
/// propagates the cpu/gpu fft crossfade to every band and channel.
pub fn set_gpu_blend(&mut self, blend: f32) {
for p in self
.main
.iter_mut()
.chain(self.transient.iter_mut())
.chain(self.deep.iter_mut())
{
p.set_gpu_blend(blend);
}
}
/// advances one hop of analytic-signal data at the requested normalised playhead and publishes a stereo frame pair.
pub fn step(&mut self, position: f64) -> Option<&[FrameData]> {
let total = self.total_samples();
if total == 0 {
return None;
}
let target = (position.clamp(0.0, 1.0) * total as f64) as usize;
let track = self.track.clone()?;
if !track.is_valid() {
return None;
}
let total_samples = track.total_samples();
let hop = self.hilbert_hop_size;
let fft_size = self.hilbert_fft_size;
if hop == 0 || fft_size == 0 || target + hop >= total_samples {
return None;
}
if !self.hilbert_needs_reset {
let delta = target.abs_diff(self.last_hilbert_sample);
if delta > 2 * hop {
self.hilbert_needs_reset = true;
}
}
if self.hilbert_needs_reset {
self.hilbert.reinit(fft_size);
self.hilbert_needs_reset = false;
// pre-fills the sliding history with the prior fft_size of audio.
let warmup_blocks = fft_size / hop;
let warmup_start = target.saturating_sub(warmup_blocks * hop);
for w in 0..warmup_blocks {
let block_start = warmup_start + w * hop;
if block_start + hop > total_samples {
break;
}
let (left, right) = stereo_block(&track.pcm, block_start, hop);
let _ = self.hilbert.process(&left, &right);
}
}
self.last_hilbert_sample = target;
let (left, right) = stereo_block(&track.pcm, target, hop);
let (cl, cr) = self.hilbert.process(&left, &right);
self.push_to_processors(&cl, &cr);
let produced_new = true;
if produced_new {
self.compute_and_publish();
Some(&self.last_frames)
} else {
None
}
}
/// appends interleaved PCM into the live-mode buffer with stereo conversion. propagates sample-rate changes and caps the buffer length.
pub fn push_live_pcm(&mut self, samples: &[f32], sample_rate: u32, channels: u32) {
if samples.is_empty() || channels == 0 {
return;
}
if sample_rate != self.live_sample_rate {
self.live_sample_rate = sample_rate;
for p in self
.main
.iter_mut()
.chain(self.transient.iter_mut())
.chain(self.deep.iter_mut())
{
p.set_sample_rate(sample_rate);
}
self.hilbert_needs_reset = true;
}
// chunk-RMS AGC: keep mic levels near a fixed target so a quiet phone-mic still drives the visualizer.
const TARGET_RMS: f64 = 0.15;
const MIN_RMS: f64 = 0.001;
const MAX_GAIN: f64 = 50.0;
const MIN_GAIN: f64 = 0.01;
const ATTACK: f64 = 0.30;
const RELEASE: f64 = 0.02;
let mut sum_sq = 0.0_f64;
for &s in samples {
sum_sq += (s as f64) * (s as f64);
}
let rms = (sum_sq / samples.len() as f64).sqrt();
let rms_db = 20.0 * (rms.max(1e-12)).log10() as f32;
let ch = channels as usize;
if rms_db < self.noise_gate_db {
// chunk is below the gate — write silence at the same frame count to keep the hop clock alive.
let frames = if ch > 0 { samples.len() / ch } else { 0 };
for _ in 0..frames {
self.live_buffer.push_back(0.0);
self.live_buffer.push_back(0.0);
}
while self.live_buffer.len() > self.live_buffer_max {
self.live_buffer.pop_front();
self.live_buffer.pop_front();
}
self.trim_live_buffer();
return;
}
let target_gain = if rms > MIN_RMS {
(TARGET_RMS / rms).clamp(MIN_GAIN, MAX_GAIN)
} else {
self.live_gain.clamp(MIN_GAIN, MAX_GAIN)
};
let alpha = if target_gain < self.live_gain { ATTACK } else { RELEASE };
self.live_gain += alpha * (target_gain - self.live_gain);
let gain = self.live_gain as f32;
match ch {
2 => {
for &s in samples {
self.live_buffer.push_back(s * gain);
}
}
1 => {
for &s in samples {
let scaled = s * gain;
self.live_buffer.push_back(scaled);
self.live_buffer.push_back(scaled);
}
}
_ => {
let mut i = 0;
while i + ch <= samples.len() {
self.live_buffer.push_back(samples[i] * gain);
self.live_buffer.push_back(samples[i + 1] * gain);
i += ch;
}
}
}
while self.live_buffer.len() > self.live_buffer_max {
self.live_buffer.pop_front();
self.live_buffer.pop_front();
}
// real-time anchor every PCM push, not only when step_live happens to fire. without this, audio piles up between step_lives whenever the worker is busy and the visualizer slides further behind reality until the buffer hits its 2-second cap.
self.trim_live_buffer();
}
/// drops oldest live-buffer entries down to what the current (fft, hop) needs plus a small fixed real-time slack, so the visualizer always works against recent audio.
fn trim_live_buffer(&mut self) {
let hop = self.hilbert_hop_size;
let fft = self.hilbert_fft_size;
if hop == 0 || fft == 0 {
return;
}
let warmup_blocks = fft / hop;
let needed = if self.hilbert_needs_reset {
warmup_blocks * hop * 2 + hop * 2
} else {
hop * 2
};
// fixed ~30ms slack across all configs — keeps lag bounded regardless of hop size.
let slack_ms = 30usize;
let slack_entries = if self.live_sample_rate == 0 {
hop * 4
} else {
(self.live_sample_rate as usize * slack_ms / 1000) * 2
};
let max_keep = (needed + slack_entries).min(self.live_buffer_max);
if self.live_buffer.len() > max_keep {
let drop_pairs = (self.live_buffer.len() - max_keep) / 2;
for _ in 0..drop_pairs {
self.live_buffer.pop_front();
self.live_buffer.pop_front();
}
}
}
/// consumes one hop's worth of interleaved stereo from the live buffer and publishes a frame pair.
pub fn step_live(&mut self) -> Option<&[FrameData]> {
let hop = self.hilbert_hop_size;
let fft = self.hilbert_fft_size;
if hop == 0 || fft == 0 {
return None;
}
// defense-in-depth trim in case PCM stopped arriving briefly (no push_live_pcm calls but old buffered audio still around).
self.trim_live_buffer();
if self.hilbert_needs_reset {
let warmup_blocks = fft / hop;
let warmup_stereo = warmup_blocks * hop * 2;
if self.live_buffer.len() < warmup_stereo + hop * 2 {
return None;
}
self.hilbert.reinit(fft);
for _ in 0..warmup_blocks {
let mut left = Vec::with_capacity(hop);
let mut right = Vec::with_capacity(hop);
for _ in 0..hop {
left.push(self.live_buffer.pop_front().unwrap() as f64);
right.push(self.live_buffer.pop_front().unwrap() as f64);
}
let _ = self.hilbert.process(&left, &right);
}
self.hilbert_needs_reset = false;
}
if self.live_buffer.len() < hop * 2 {
return None;
}
let mut left = Vec::with_capacity(hop);
let mut right = Vec::with_capacity(hop);
for _ in 0..hop {
left.push(self.live_buffer.pop_front().unwrap() as f64);
right.push(self.live_buffer.pop_front().unwrap() as f64);
}
let (cl, cr) = self.hilbert.process(&left, &right);
self.push_to_processors(&cl, &cr);
self.compute_and_publish();
Some(&self.last_frames)
}
/// distributes one analytic-signal hop into all three band processors per channel.
fn push_to_processors(&mut self, complex_l: &[Complex64], complex_r: &[Complex64]) {
self.main[0].push_data(complex_l);
self.main[1].push_data(complex_r);
self.transient[0].push_data(complex_l);
self.transient[1].push_data(complex_r);
self.deep[0].push_data(complex_l);
self.deep[1].push_data(complex_r);
}
/// borrows the most recently computed stereo frame pair without recomputing.
pub fn latest(&self) -> &[FrameData] {
&self.last_frames
}
/// runs the six band/channel spectra in parallel and per-bin max-merges them through a soft-knee compressor.
fn compute_and_publish(&mut self) {
let comp_threshold = -15.0_f32;
let comp_ratio = 4.0_f32;
let (m0, m1) = self.main.split_at_mut(1);
let (t0, t1) = self.transient.split_at_mut(1);
let (d0, d1) = self.deep.split_at_mut(1);
let main_l = &mut m0[0];
let main_r = &mut m1[0];
let trans_l = &mut t0[0];
let trans_r = &mut t1[0];
let deep_l = &mut d0[0];
let deep_r = &mut d1[0];
let mut sml = None;
let mut smr = None;
let mut stl = None;
let mut str_ = None;
let mut sdl = None;
let mut sdr = None;
rayon::scope(|s| {
s.spawn(|_| sml = Some(main_l.get_spectrum()));
s.spawn(|_| smr = Some(main_r.get_spectrum()));
s.spawn(|_| stl = Some(trans_l.get_spectrum()));
s.spawn(|_| str_ = Some(trans_r.get_spectrum()));
s.spawn(|_| sdl = Some(deep_l.get_spectrum()));
s.spawn(|_| sdr = Some(deep_r.get_spectrum()));
});
let pairs = [
(sml.unwrap(), stl.unwrap(), sdl.unwrap()),
(smr.unwrap(), str_.unwrap(), sdr.unwrap()),
];
let mut results = Vec::with_capacity(2);
for (i, (mut spec_main, spec_trans, spec_deep)) in pairs.into_iter().enumerate() {
let primary_db = spec_main.db.clone();
let same_size = spec_main.db.len() == spec_trans.db.len()
&& spec_main.db.len() == spec_deep.db.len();
if same_size {
for b in 0..spec_main.db.len() {
let mut val = spec_main.db[b].max(spec_trans.db[b]).max(spec_deep.db[b]);
if val > comp_threshold {
val = comp_threshold + (val - comp_threshold) / comp_ratio;
}
spec_main.db[b] = val;
}
}
let cepstrum = if i == 0 {
std::mem::take(&mut spec_main.cepstrum)
} else {
Vec::new()
};
results.push(FrameData {
freqs: spec_main.freqs,
db: spec_main.db,
primary_db,
cepstrum,
});
}
self.last_frames = results;
}
}
/// deinterleaves a stereo pcm slice starting at the given frame into separate left and right f64 buffers.
fn stereo_block(pcm: &[f32], start: usize, hop: usize) -> (Vec<f64>, Vec<f64>) {
let mut left = Vec::with_capacity(hop);
let mut right = Vec::with_capacity(hop);
for i in 0..hop {
let base = (start + i) * 2;
left.push(pcm[base] as f64);
right.push(pcm[base + 1] as f64);
}
(left, right)
}