511 lines
18 KiB
Rust
511 lines
18 KiB
Rust
|
|
|
|
use std::collections::VecDeque;
|
|
use std::sync::Arc;
|
|
|
|
use num_complex::Complex64;
|
|
|
|
use crate::hilbert_stream::RealtimeHilbert;
|
|
use crate::processor::Processor;
|
|
use crate::track::TrackData;
|
|
|
|
/// per-channel analyzer output combining the compressed multi-band db, the unmodified main-band db, and an optional cepstrum.
|
|
#[derive(Debug, Clone, Default)]
|
|
pub struct FrameData {
|
|
pub freqs: Vec<f32>,
|
|
pub db: Vec<f32>,
|
|
pub primary_db: Vec<f32>,
|
|
|
|
pub cepstrum: Vec<f32>,
|
|
}
|
|
|
|
/// stereo three-band processor pool driven by a streaming hilbert source and surfacing one frame per call to step.
|
|
pub struct Analyzer {
|
|
main: [Processor; 2],
|
|
transient: [Processor; 2],
|
|
deep: [Processor; 2],
|
|
|
|
frame_size: usize,
|
|
hop_size: usize,
|
|
|
|
hilbert: RealtimeHilbert,
|
|
hilbert_fft_size: usize,
|
|
hilbert_hop_size: usize,
|
|
hilbert_needs_reset: bool,
|
|
last_hilbert_sample: usize,
|
|
|
|
track: Option<Arc<TrackData>>,
|
|
last_frames: Vec<FrameData>,
|
|
|
|
/// most recently observed live-PCM sample rate.
|
|
live_sample_rate: u32,
|
|
|
|
/// interleaved-stereo PCM accumulated by push_live_pcm and drained one hop at a time by step_live.
|
|
live_buffer: VecDeque<f32>,
|
|
|
|
/// soft cap on the live buffer length.
|
|
live_buffer_max: usize,
|
|
|
|
/// smoothed AGC gain applied to incoming live PCM before it enters the analyzer buffer.
|
|
live_gain: f64,
|
|
|
|
/// dB threshold below which incoming live PCM chunks are replaced with silence.
|
|
noise_gate_db: f32,
|
|
}
|
|
|
|
impl Analyzer {
|
|
/// builds the main, transient, and deep processor pairs with band-appropriate expanders, hpf, and smoothing.
|
|
pub fn new(device: wgpu::Device, queue: wgpu::Queue) -> Self {
|
|
let frame_size = 4096_usize;
|
|
let hop_size = 1024_usize;
|
|
|
|
let mut main = [
|
|
Processor::new(frame_size, 48_000, device.clone(), queue.clone()),
|
|
Processor::new(frame_size, 48_000, device.clone(), queue.clone()),
|
|
];
|
|
for p in &mut main {
|
|
p.set_expander(1.5, -50.0);
|
|
p.set_hpf(80.0);
|
|
p.set_smoothing(3);
|
|
}
|
|
|
|
let trans_size = (frame_size / 4).max(64);
|
|
let mut transient = [
|
|
Processor::new(trans_size, 48_000, device.clone(), queue.clone()),
|
|
Processor::new(trans_size, 48_000, device.clone(), queue.clone()),
|
|
];
|
|
for p in &mut transient {
|
|
p.set_expander(2.5, -40.0);
|
|
p.set_hpf(100.0);
|
|
p.set_smoothing(2);
|
|
}
|
|
|
|
let mut deep = [
|
|
Processor::new(frame_size * 2, 48_000, device.clone(), queue.clone()),
|
|
Processor::new(frame_size * 2, 48_000, device.clone(), queue.clone()),
|
|
];
|
|
for p in &mut deep {
|
|
p.set_expander(1.2, -60.0);
|
|
p.set_hpf(0.0);
|
|
p.set_smoothing(5);
|
|
}
|
|
|
|
Self {
|
|
main,
|
|
transient,
|
|
deep,
|
|
frame_size,
|
|
hop_size,
|
|
hilbert: RealtimeHilbert::new(),
|
|
hilbert_fft_size: 8192,
|
|
hilbert_hop_size: hop_size,
|
|
hilbert_needs_reset: true,
|
|
last_hilbert_sample: 0,
|
|
track: None,
|
|
last_frames: Vec::new(),
|
|
live_sample_rate: 0,
|
|
live_buffer: VecDeque::with_capacity(192_000),
|
|
live_buffer_max: 192_000,
|
|
live_gain: 1.0,
|
|
noise_gate_db: -100.0,
|
|
}
|
|
}
|
|
|
|
/// stores the noise-gate threshold in dB applied to live-mode PCM chunks.
|
|
pub fn set_noise_gate(&mut self, db: f32) {
|
|
self.noise_gate_db = db;
|
|
}
|
|
|
|
/// returns the loaded track's sample rate, falling back to 48 khz before any track loads.
|
|
pub fn sample_rate(&self) -> u32 {
|
|
self.track.as_ref().map(|t| t.sample_rate).unwrap_or(48_000)
|
|
}
|
|
|
|
/// returns the loaded track's stereo-frame count.
|
|
pub fn total_samples(&self) -> usize {
|
|
self.track.as_ref().map(|t| t.total_samples()).unwrap_or(0)
|
|
}
|
|
|
|
/// swaps in incoming pcm data and marks the streaming hilbert dirty.
|
|
pub fn set_track_data(&mut self, data: Arc<TrackData>) {
|
|
let rate = data.sample_rate;
|
|
for p in self.main.iter_mut().chain(self.transient.iter_mut()).chain(self.deep.iter_mut()) {
|
|
p.set_sample_rate(rate);
|
|
}
|
|
self.track = Some(data);
|
|
self.hilbert_needs_reset = true;
|
|
}
|
|
|
|
/// retunes the three bands' transform lengths around a base frame size and routes the hilbert hop to match.
|
|
/// always force-clears processor buffers, smoothing history, hilbert state, and the live buffer so the result of a given (fft, hop) pair doesn't depend on the path taken to get there.
|
|
pub fn set_dsp_params(&mut self, frame_size: usize, hop_size: usize) {
|
|
let trans_size = (frame_size / 4).max(64);
|
|
let deep_size = if frame_size < 2048 { frame_size * 4 } else { frame_size * 2 };
|
|
|
|
self.hilbert_fft_size = frame_size;
|
|
self.hilbert_hop_size = hop_size;
|
|
self.hilbert_needs_reset = true;
|
|
self.live_buffer.clear();
|
|
self.frame_size = frame_size;
|
|
self.hop_size = hop_size;
|
|
|
|
for p in &mut self.main {
|
|
p.set_frame_size(frame_size);
|
|
p.clear_state();
|
|
}
|
|
for p in &mut self.transient {
|
|
p.set_frame_size(trans_size);
|
|
p.clear_state();
|
|
}
|
|
for p in &mut self.deep {
|
|
p.set_frame_size(deep_size);
|
|
p.clear_state();
|
|
}
|
|
}
|
|
|
|
/// propagates the bin count to every band and channel.
|
|
pub fn set_num_bins(&mut self, n: usize) {
|
|
for p in self.main.iter_mut().chain(self.transient.iter_mut()).chain(self.deep.iter_mut()) {
|
|
p.set_num_bins(n);
|
|
}
|
|
}
|
|
|
|
/// distributes the cepstral idealisation parameters across the three bands with band-specific strength weights.
|
|
pub fn set_smoothing_params(&mut self, granularity: i32, detail: i32, strength: f32) {
|
|
for p in &mut self.main {
|
|
p.set_cepstral_params(granularity, detail, strength);
|
|
}
|
|
for p in &mut self.transient {
|
|
p.set_cepstral_params(granularity, detail, strength * 0.3);
|
|
}
|
|
for p in &mut self.deep {
|
|
p.set_cepstral_params(granularity, detail, strength * 1.2);
|
|
}
|
|
}
|
|
|
|
/// propagates the cpu/gpu fft crossfade to every band and channel.
|
|
pub fn set_gpu_blend(&mut self, blend: f32) {
|
|
for p in self
|
|
.main
|
|
.iter_mut()
|
|
.chain(self.transient.iter_mut())
|
|
.chain(self.deep.iter_mut())
|
|
{
|
|
p.set_gpu_blend(blend);
|
|
}
|
|
}
|
|
|
|
/// advances one hop of analytic-signal data at the requested normalised playhead and publishes a stereo frame pair.
|
|
pub fn step(&mut self, position: f64) -> Option<&[FrameData]> {
|
|
let total = self.total_samples();
|
|
if total == 0 {
|
|
return None;
|
|
}
|
|
let target = (position.clamp(0.0, 1.0) * total as f64) as usize;
|
|
let track = self.track.clone()?;
|
|
if !track.is_valid() {
|
|
return None;
|
|
}
|
|
let total_samples = track.total_samples();
|
|
let hop = self.hilbert_hop_size;
|
|
let fft_size = self.hilbert_fft_size;
|
|
if hop == 0 || fft_size == 0 || target + hop >= total_samples {
|
|
return None;
|
|
}
|
|
|
|
if !self.hilbert_needs_reset {
|
|
let delta = target.abs_diff(self.last_hilbert_sample);
|
|
if delta > 2 * hop {
|
|
self.hilbert_needs_reset = true;
|
|
}
|
|
}
|
|
|
|
if self.hilbert_needs_reset {
|
|
self.hilbert.reinit(fft_size);
|
|
self.hilbert_needs_reset = false;
|
|
// pre-fills the sliding history with the prior fft_size of audio.
|
|
let warmup_blocks = fft_size / hop;
|
|
let warmup_start = target.saturating_sub(warmup_blocks * hop);
|
|
for w in 0..warmup_blocks {
|
|
let block_start = warmup_start + w * hop;
|
|
if block_start + hop > total_samples {
|
|
break;
|
|
}
|
|
let (left, right) = stereo_block(&track.pcm, block_start, hop);
|
|
let _ = self.hilbert.process(&left, &right);
|
|
}
|
|
}
|
|
|
|
self.last_hilbert_sample = target;
|
|
|
|
let (left, right) = stereo_block(&track.pcm, target, hop);
|
|
let (cl, cr) = self.hilbert.process(&left, &right);
|
|
self.push_to_processors(&cl, &cr);
|
|
|
|
let produced_new = true;
|
|
if produced_new {
|
|
self.compute_and_publish();
|
|
Some(&self.last_frames)
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
/// appends interleaved PCM into the live-mode buffer with stereo conversion. propagates sample-rate changes and caps the buffer length.
|
|
pub fn push_live_pcm(&mut self, samples: &[f32], sample_rate: u32, channels: u32) {
|
|
if samples.is_empty() || channels == 0 {
|
|
return;
|
|
}
|
|
if sample_rate != self.live_sample_rate {
|
|
self.live_sample_rate = sample_rate;
|
|
for p in self
|
|
.main
|
|
.iter_mut()
|
|
.chain(self.transient.iter_mut())
|
|
.chain(self.deep.iter_mut())
|
|
{
|
|
p.set_sample_rate(sample_rate);
|
|
}
|
|
self.hilbert_needs_reset = true;
|
|
}
|
|
|
|
// chunk-RMS AGC: keep mic levels near a fixed target so a quiet phone-mic still drives the visualizer.
|
|
const TARGET_RMS: f64 = 0.15;
|
|
const MIN_RMS: f64 = 0.001;
|
|
const MAX_GAIN: f64 = 50.0;
|
|
const MIN_GAIN: f64 = 0.01;
|
|
const ATTACK: f64 = 0.30;
|
|
const RELEASE: f64 = 0.02;
|
|
|
|
let mut sum_sq = 0.0_f64;
|
|
for &s in samples {
|
|
sum_sq += (s as f64) * (s as f64);
|
|
}
|
|
let rms = (sum_sq / samples.len() as f64).sqrt();
|
|
let rms_db = 20.0 * (rms.max(1e-12)).log10() as f32;
|
|
|
|
let ch = channels as usize;
|
|
if rms_db < self.noise_gate_db {
|
|
// chunk is below the gate — write silence at the same frame count to keep the hop clock alive.
|
|
let frames = if ch > 0 { samples.len() / ch } else { 0 };
|
|
for _ in 0..frames {
|
|
self.live_buffer.push_back(0.0);
|
|
self.live_buffer.push_back(0.0);
|
|
}
|
|
while self.live_buffer.len() > self.live_buffer_max {
|
|
self.live_buffer.pop_front();
|
|
self.live_buffer.pop_front();
|
|
}
|
|
self.trim_live_buffer();
|
|
return;
|
|
}
|
|
|
|
let target_gain = if rms > MIN_RMS {
|
|
(TARGET_RMS / rms).clamp(MIN_GAIN, MAX_GAIN)
|
|
} else {
|
|
self.live_gain.clamp(MIN_GAIN, MAX_GAIN)
|
|
};
|
|
let alpha = if target_gain < self.live_gain { ATTACK } else { RELEASE };
|
|
self.live_gain += alpha * (target_gain - self.live_gain);
|
|
let gain = self.live_gain as f32;
|
|
|
|
match ch {
|
|
2 => {
|
|
for &s in samples {
|
|
self.live_buffer.push_back(s * gain);
|
|
}
|
|
}
|
|
1 => {
|
|
for &s in samples {
|
|
let scaled = s * gain;
|
|
self.live_buffer.push_back(scaled);
|
|
self.live_buffer.push_back(scaled);
|
|
}
|
|
}
|
|
_ => {
|
|
let mut i = 0;
|
|
while i + ch <= samples.len() {
|
|
self.live_buffer.push_back(samples[i] * gain);
|
|
self.live_buffer.push_back(samples[i + 1] * gain);
|
|
i += ch;
|
|
}
|
|
}
|
|
}
|
|
while self.live_buffer.len() > self.live_buffer_max {
|
|
self.live_buffer.pop_front();
|
|
self.live_buffer.pop_front();
|
|
}
|
|
|
|
// real-time anchor every PCM push, not only when step_live happens to fire. without this, audio piles up between step_lives whenever the worker is busy and the visualizer slides further behind reality until the buffer hits its 2-second cap.
|
|
self.trim_live_buffer();
|
|
}
|
|
|
|
/// drops oldest live-buffer entries down to what the current (fft, hop) needs plus a small fixed real-time slack, so the visualizer always works against recent audio.
|
|
fn trim_live_buffer(&mut self) {
|
|
let hop = self.hilbert_hop_size;
|
|
let fft = self.hilbert_fft_size;
|
|
if hop == 0 || fft == 0 {
|
|
return;
|
|
}
|
|
let warmup_blocks = fft / hop;
|
|
let needed = if self.hilbert_needs_reset {
|
|
warmup_blocks * hop * 2 + hop * 2
|
|
} else {
|
|
hop * 2
|
|
};
|
|
// fixed ~30ms slack across all configs — keeps lag bounded regardless of hop size.
|
|
let slack_ms = 30usize;
|
|
let slack_entries = if self.live_sample_rate == 0 {
|
|
hop * 4
|
|
} else {
|
|
(self.live_sample_rate as usize * slack_ms / 1000) * 2
|
|
};
|
|
let max_keep = (needed + slack_entries).min(self.live_buffer_max);
|
|
if self.live_buffer.len() > max_keep {
|
|
let drop_pairs = (self.live_buffer.len() - max_keep) / 2;
|
|
for _ in 0..drop_pairs {
|
|
self.live_buffer.pop_front();
|
|
self.live_buffer.pop_front();
|
|
}
|
|
}
|
|
}
|
|
|
|
/// consumes one hop's worth of interleaved stereo from the live buffer and publishes a frame pair.
|
|
pub fn step_live(&mut self) -> Option<&[FrameData]> {
|
|
let hop = self.hilbert_hop_size;
|
|
let fft = self.hilbert_fft_size;
|
|
if hop == 0 || fft == 0 {
|
|
return None;
|
|
}
|
|
|
|
// defense-in-depth trim in case PCM stopped arriving briefly (no push_live_pcm calls but old buffered audio still around).
|
|
self.trim_live_buffer();
|
|
|
|
if self.hilbert_needs_reset {
|
|
let warmup_blocks = fft / hop;
|
|
let warmup_stereo = warmup_blocks * hop * 2;
|
|
if self.live_buffer.len() < warmup_stereo + hop * 2 {
|
|
return None;
|
|
}
|
|
self.hilbert.reinit(fft);
|
|
for _ in 0..warmup_blocks {
|
|
let mut left = Vec::with_capacity(hop);
|
|
let mut right = Vec::with_capacity(hop);
|
|
for _ in 0..hop {
|
|
left.push(self.live_buffer.pop_front().unwrap() as f64);
|
|
right.push(self.live_buffer.pop_front().unwrap() as f64);
|
|
}
|
|
let _ = self.hilbert.process(&left, &right);
|
|
}
|
|
self.hilbert_needs_reset = false;
|
|
}
|
|
if self.live_buffer.len() < hop * 2 {
|
|
return None;
|
|
}
|
|
let mut left = Vec::with_capacity(hop);
|
|
let mut right = Vec::with_capacity(hop);
|
|
for _ in 0..hop {
|
|
left.push(self.live_buffer.pop_front().unwrap() as f64);
|
|
right.push(self.live_buffer.pop_front().unwrap() as f64);
|
|
}
|
|
let (cl, cr) = self.hilbert.process(&left, &right);
|
|
self.push_to_processors(&cl, &cr);
|
|
self.compute_and_publish();
|
|
Some(&self.last_frames)
|
|
}
|
|
|
|
/// distributes one analytic-signal hop into all three band processors per channel.
|
|
fn push_to_processors(&mut self, complex_l: &[Complex64], complex_r: &[Complex64]) {
|
|
self.main[0].push_data(complex_l);
|
|
self.main[1].push_data(complex_r);
|
|
self.transient[0].push_data(complex_l);
|
|
self.transient[1].push_data(complex_r);
|
|
self.deep[0].push_data(complex_l);
|
|
self.deep[1].push_data(complex_r);
|
|
}
|
|
|
|
/// borrows the most recently computed stereo frame pair without recomputing.
|
|
pub fn latest(&self) -> &[FrameData] {
|
|
&self.last_frames
|
|
}
|
|
|
|
/// runs the six band/channel spectra in parallel and per-bin max-merges them through a soft-knee compressor.
|
|
fn compute_and_publish(&mut self) {
|
|
let comp_threshold = -15.0_f32;
|
|
let comp_ratio = 4.0_f32;
|
|
|
|
let (m0, m1) = self.main.split_at_mut(1);
|
|
let (t0, t1) = self.transient.split_at_mut(1);
|
|
let (d0, d1) = self.deep.split_at_mut(1);
|
|
let main_l = &mut m0[0];
|
|
let main_r = &mut m1[0];
|
|
let trans_l = &mut t0[0];
|
|
let trans_r = &mut t1[0];
|
|
let deep_l = &mut d0[0];
|
|
let deep_r = &mut d1[0];
|
|
|
|
let mut sml = None;
|
|
let mut smr = None;
|
|
let mut stl = None;
|
|
let mut str_ = None;
|
|
let mut sdl = None;
|
|
let mut sdr = None;
|
|
rayon::scope(|s| {
|
|
s.spawn(|_| sml = Some(main_l.get_spectrum()));
|
|
s.spawn(|_| smr = Some(main_r.get_spectrum()));
|
|
s.spawn(|_| stl = Some(trans_l.get_spectrum()));
|
|
s.spawn(|_| str_ = Some(trans_r.get_spectrum()));
|
|
s.spawn(|_| sdl = Some(deep_l.get_spectrum()));
|
|
s.spawn(|_| sdr = Some(deep_r.get_spectrum()));
|
|
});
|
|
let pairs = [
|
|
(sml.unwrap(), stl.unwrap(), sdl.unwrap()),
|
|
(smr.unwrap(), str_.unwrap(), sdr.unwrap()),
|
|
];
|
|
|
|
let mut results = Vec::with_capacity(2);
|
|
for (i, (mut spec_main, spec_trans, spec_deep)) in pairs.into_iter().enumerate() {
|
|
let primary_db = spec_main.db.clone();
|
|
|
|
let same_size = spec_main.db.len() == spec_trans.db.len()
|
|
&& spec_main.db.len() == spec_deep.db.len();
|
|
if same_size {
|
|
for b in 0..spec_main.db.len() {
|
|
let mut val = spec_main.db[b].max(spec_trans.db[b]).max(spec_deep.db[b]);
|
|
if val > comp_threshold {
|
|
val = comp_threshold + (val - comp_threshold) / comp_ratio;
|
|
}
|
|
spec_main.db[b] = val;
|
|
}
|
|
}
|
|
|
|
let cepstrum = if i == 0 {
|
|
std::mem::take(&mut spec_main.cepstrum)
|
|
} else {
|
|
Vec::new()
|
|
};
|
|
|
|
results.push(FrameData {
|
|
freqs: spec_main.freqs,
|
|
db: spec_main.db,
|
|
primary_db,
|
|
cepstrum,
|
|
});
|
|
}
|
|
|
|
self.last_frames = results;
|
|
}
|
|
}
|
|
|
|
/// deinterleaves a stereo pcm slice starting at the given frame into separate left and right f64 buffers.
|
|
fn stereo_block(pcm: &[f32], start: usize, hop: usize) -> (Vec<f64>, Vec<f64>) {
|
|
let mut left = Vec::with_capacity(hop);
|
|
let mut right = Vec::with_capacity(hop);
|
|
for i in 0..hop {
|
|
let base = (start + i) * 2;
|
|
left.push(pcm[base] as f64);
|
|
right.push(pcm[base + 1] as f64);
|
|
}
|
|
(left, right)
|
|
}
|