diff --git a/crates/cord-cordic/src/compiler.rs b/crates/cord-cordic/src/compiler.rs index c4f502f..c11a54b 100644 --- a/crates/cord-cordic/src/compiler.rs +++ b/crates/cord-cordic/src/compiler.rs @@ -1,4 +1,5 @@ use cord_trig::{TrigGraph, TrigOp}; +use crate::lut::CordicTable; use crate::ops::*; /// A compiled CORDIC program ready for binary serialization or execution. @@ -77,12 +78,14 @@ impl CORDICProgram { } }).collect(); + let table = CordicTable::for_word_bits(config.word_bits); + CORDICProgram { word_bits: config.word_bits, instructions, output: graph.output, - atan_table: atan_table(config.word_bits), - gain: cordic_gain(config.word_bits, frac_bits), + atan_table: table.atan.to_vec(), + gain: table.gain, } } diff --git a/crates/cord-cordic/src/eval.rs b/crates/cord-cordic/src/eval.rs index 33d5ebd..033e6f6 100644 --- a/crates/cord-cordic/src/eval.rs +++ b/crates/cord-cordic/src/eval.rs @@ -1,38 +1,28 @@ +use std::sync::Arc; use cord_trig::{TrigGraph, TrigOp}; +use crate::lut::CordicTable; /// CORDIC evaluator: evaluates a TrigGraph using only integer /// shifts, adds, and comparisons. No floating point trig. /// /// Proof that the entire pipeline compiles down to -/// binary arithmetic — shift, add, compare, repeat. +/// binary arithmetic -- shift, add, compare, repeat. pub struct CORDICEvaluator { word_bits: u8, frac_bits: u8, - atan_table: Vec, + atan_table: Arc<[i64]>, gain: i64, } impl CORDICEvaluator { pub fn new(word_bits: u8) -> Self { - let frac_bits = word_bits - 1; - let iterations = word_bits; - - // Precompute atan(2^-i) as fixed-point - let atan_table: Vec = (0..iterations) - .map(|i| { - let angle = (2.0f64).powi(-(i as i32)).atan(); - (angle * (1i64 << frac_bits) as f64).round() as i64 - }) - .collect(); - - // CORDIC gain K = product of 1/sqrt(1 + 2^{-2i}) - let mut k = 1.0f64; - for i in 0..iterations { - k *= 1.0 / (1.0 + (2.0f64).powi(-2 * i as i32)).sqrt(); + let table = CordicTable::for_word_bits(word_bits); + CORDICEvaluator { + word_bits, + frac_bits: table.frac_bits, + atan_table: table.atan, + gain: table.gain, } - let gain = (k * (1i64 << frac_bits) as f64).round() as i64; - - CORDICEvaluator { word_bits, frac_bits, atan_table, gain } } /// Convert f64 to fixed-point. diff --git a/crates/cord-cordic/src/lib.rs b/crates/cord-cordic/src/lib.rs index b269a49..2128f94 100644 --- a/crates/cord-cordic/src/lib.rs +++ b/crates/cord-cordic/src/lib.rs @@ -10,6 +10,8 @@ pub mod compiler; pub mod ops; pub mod eval; +pub mod lut; pub use compiler::CORDICProgram; pub use eval::CORDICEvaluator; +pub use lut::CordicTable; diff --git a/crates/cord-cordic/src/lut.rs b/crates/cord-cordic/src/lut.rs new file mode 100644 index 0000000..5d1101c --- /dev/null +++ b/crates/cord-cordic/src/lut.rs @@ -0,0 +1,114 @@ +use std::collections::HashMap; +use std::sync::{Arc, Mutex, OnceLock}; + +/// Pre-computed CORDIC table: atan values + gain for a given iteration count. +#[derive(Debug, Clone)] +pub struct CordicTable { + pub atan: Arc<[i64]>, + pub gain: i64, + pub iterations: u8, + pub frac_bits: u8, +} + +/// Global table registry. Keyed by (iterations, frac_bits) so any +/// combination of word width and iteration count shares a single +/// allocation. Thread-safe via interior Mutex on the map only; +/// once an Arc is cloned out, no lock is held during evaluation. +struct LutRegistry { + tables: Mutex>, +} + +static REGISTRY: OnceLock = OnceLock::new(); + +fn registry() -> &'static LutRegistry { + REGISTRY.get_or_init(|| LutRegistry { + tables: Mutex::new(HashMap::new()), + }) +} + +impl CordicTable { + /// Retrieve or compute the CORDIC table for the given parameters. + /// Repeated calls with the same (iterations, frac_bits) return + /// a clone of the same Arc'd data -- no recomputation. + pub fn get(iterations: u8, frac_bits: u8) -> Self { + let reg = registry(); + let key = (iterations, frac_bits); + + let mut map = reg.tables.lock().unwrap(); + if let Some(cached) = map.get(&key) { + return cached.clone(); + } + + let table = Self::compute(iterations, frac_bits); + map.insert(key, table.clone()); + table + } + + /// Convenience: derive iterations and frac_bits from word_bits + /// using the standard CORDIC convention (iterations = word_bits, + /// frac_bits = word_bits - 1). + pub fn for_word_bits(word_bits: u8) -> Self { + Self::get(word_bits, word_bits - 1) + } + + fn compute(iterations: u8, frac_bits: u8) -> Self { + let scale = (1i64 << frac_bits) as f64; + + let atan: Arc<[i64]> = (0..iterations) + .map(|i| { + let angle = (2.0f64).powi(-(i as i32)).atan(); + (angle * scale).round() as i64 + }) + .collect::>() + .into(); + + let mut k = 1.0f64; + for i in 0..iterations { + k *= 1.0 / (1.0 + (2.0f64).powi(-2 * i as i32)).sqrt(); + } + let gain = (k * scale).round() as i64; + + CordicTable { atan, gain, iterations, frac_bits } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ops; + + #[test] + fn cached_matches_raw() { + for bits in [8u8, 16, 32, 48, 64] { + let table = CordicTable::for_word_bits(bits); + let raw_atan = ops::atan_table(bits); + let raw_gain = ops::cordic_gain(bits, bits - 1); + + assert_eq!(table.atan.len(), raw_atan.len(), "len mismatch at {bits}"); + assert_eq!(&*table.atan, &raw_atan[..], "atan mismatch at {bits}"); + assert_eq!(table.gain, raw_gain, "gain mismatch at {bits}"); + } + } + + #[test] + fn deduplication() { + let a = CordicTable::for_word_bits(32); + let b = CordicTable::for_word_bits(32); + assert!(Arc::ptr_eq(&a.atan, &b.atan)); + } + + #[test] + fn different_widths_distinct() { + let a = CordicTable::for_word_bits(16); + let b = CordicTable::for_word_bits(32); + assert!(!Arc::ptr_eq(&a.atan, &b.atan)); + assert_ne!(a.atan.len(), b.atan.len()); + } + + #[test] + fn custom_iteration_count() { + let t = CordicTable::get(16, 31); + assert_eq!(t.atan.len(), 16); + assert_eq!(t.frac_bits, 31); + } +}