use crate::proc::{Procer, UpDequer, DequerUtils, Update}; use crate::notes::{Note, NoteValue, ProcerNote}; use core::ops::Range; //use std::collections::VecDeque; use std::sync::Arc; use realfft::{RealFftPlanner, FftNum, RealToComplex}; use rustfft::num_complex::Complex; //use rustfft::num_traits::Zero; use rustfft::num_traits::One; use rustfft::num_traits::float::Float; use derive_more::Display; use std::fmt::Debug; trait DebugableRTC: RealToComplex + Debug {} // the Arc here really increases compile times, but that might just be because it has to link // in realfft/rustfft where before it didn't get linked in #[derive(Display)] #[display(fmt="RfftProcer(size={}, outsize={}, freq={}, deque=(size={}, cap={}), in_buf_size={}, scratch=(size={}, cap={}) out_data=(size={}, cap={}))", size, outsize, frequency, "in_deque.len()", "in_deque.capacity()", "in_buf.len()", "scratch.len()", "scratch.capacity()", "out_data.len()", "out_data.capacity()")] pub struct RFftProcer { pub size: usize, pub outsize: usize, pub frequency: usize, pub in_deque: UpDequer, pub in_buf: [I; BS], pub rfft: Arc>, // TODO: could make this a fixed array instead of a vec too maybe pub scratch: Vec>, pub out_data: Vec>, pub scale: I, //pub in_deque: VecDeque>, } impl RFftProcer { pub fn new(frequency: usize) -> Self { let retsize = BS/2+1; // TODO: could do some cleverness with using double then you'd only need to pop elements // off after you're double-full, but i think just having a buffer of 1 or 2 elements is // easier here and popping before each add in process_data if we're full let dq_capacity = (BS/US)+2; assert!((dq_capacity * US) > BS); let mut planner = RealFftPlanner::new(); let rfft = planner.plan_fft_forward(BS); let scratch = rfft.make_scratch_vec(); let out_data = rfft.make_output_vec(); return Self { size: BS, outsize: retsize, frequency: frequency, in_deque: UpDequer::with_capacity(dq_capacity), in_buf: [I::default(); BS], rfft: rfft, //out_data: Vec::with_capacity(retsize), scratch: scratch, out_data: out_data, // the docs say to scale by 1/sqrt(len), but it seems like 1/len (where len is the real // input size) is the correct scaling to get results with norm_sqrt results between 0-1 // I'm still not entirely sure about this though, l1_norm also seems to max out at len // without scaling so 1/len i think is right for it too, but it also gives lower // results when there's an imaginary part //scale: /*I::from_usize(10).unwrap()*/I::one() / I::from_usize(retsize/*-1*/).unwrap().sqrt(), scale: I::one() / I::from_usize(BS).unwrap(), } } } impl Procer for RFftProcer { fn get_size(&self) -> usize { self.size } fn get_frequency(&self) -> usize { self.frequency } fn make_pnotes<'nl>(&self, notes: &'nl [Note]) -> Vec> { // TODO: +1 or nah? (might have to handle it differently for even or odd, hopefully not // though) //let retsize = self.size/2+1; let retsize = self.outsize; let noteslen = notes.len(); let mut ret: Vec> = Vec::with_capacity(noteslen); let mut cur_note_idx = 0; let mut cutoff = notes[0].high_freq; let scale: f64 = (self.frequency as f64)/(self.size as f64); // TODO: since we do have .low_freq here as well you can start the low size of the range // close to .low_freq (how close?) but for now we'll start it at 1 let mut cur_range: Range = 1..1; for i in 1..retsize { let freq: f64 = (i as f64) * scale; if freq > cutoff { while freq > cutoff { let pnote = ProcerNote::<'nl, I>(¬es[cur_note_idx], cur_range.clone(), I::default(), I::default()); ret.push(pnote); cur_range.start = cur_range.end; // worth noting this is different than the python code, // here frequencies above the high note aren't added in // where the python code adds them all to the highest // note, you change that by changing the return below // here to a cutoff = i // think cur_note_idx += 1; if cur_note_idx >= noteslen { return ret; } cutoff = notes[cur_note_idx].high_freq; } } //else { cur_range.end += 1; //} } // add the final pnote if we ran out of frequencies before running // out of notes ret.push(ProcerNote::<'nl, I>(¬es[cur_note_idx], cur_range, I::default(), I::default())); return ret; } fn process_data(&mut self, input: &Update, notes: &mut [ProcerNote]) -> bool { let dq_len = self.in_deque.len(); let dq_cap = self.in_deque.capacity(); // we should probably make sure buf_size can be made with the length of dq as well though // so capacity can grow if new() messed up with the capacity if dq_len == dq_cap { self.in_deque.pop_front(); } let scale = self.scale; // XXX: mm this copies twice per update (once to add it to the deque and once to the buffer) // which i kinda wanted to avoid (using an Arc would avoid it but it'd be extra heap // allocations) atleast it doesn't realloc though so hopefully that's ok // depending on how this works with Alsa i might wanna switch to passing in a slice and // storing references in the UpDeque but that makes lifetime soup // or something else maybes // OHH though since i'm probably getting i16 from alsa and want to convert to f32 anyway it // makes sense to copy (question is will that add a third copy or will the compiler be // smart enough to convert inplace, i think ideally the conversion would happen here in // this push_back actually, maybe the input Update should be AI and this push_back should // be as I or Into or whatever) self.in_deque.push_back(input.clone()); let updated = self.in_deque.update_buffer_array(&mut self.in_buf); //println!("updated = {}", updated); if updated { //assert_eq!(self.in_buf, [I::default(); BS]); let post_scale = I::from_u8(2).unwrap().sqrt(); self.rfft.process_with_scratch(&mut self.in_buf, &mut self.out_data, &mut self.scratch).unwrap(); for pnote in notes { // TODO: figure out the best calculation for this // TODO: map/sum or product might be quicker then fold here, since fold can't // really use simd, also look at what the perftest code did to use simd // XXX: python's abs uses the equiv of sqrt(norm_sqr) (https://docs.rs/num-complex/0.4.3/num_complex/struct.Complex.html#method.norm_sqr) // ie. sqrt(|real|**2 + |imag|**2), gonna just use norm_sqrt here for now // l1_norm might also be a good option (just |real| + |imag|), and gets rid of the // pow too, but i dunno what's best here // TODO: the python test2.py code actually currently uses max instead of folding here too //let complexes = self.out_data[pnote.1.clone()].iter().fold((I::default(), Complex::::default()), |(sum, mx), c| {let c_sqrt = c.scale(scale).norm_sqr().sqrt(); (sum + c_sqrt, std::cmp::max_by(mx, *c, |a: &Complex, b: &Complex| { a.norm_sqr().partial_cmp(&b.norm_sqr()).unwrap() }))}); let complexes = self.out_data[pnote.1.clone()].iter().fold((I::default(), Complex::::default(), I::default()), |(sum, mx_c, mx_sqrt), c| { let c_sqr = c.scale(scale).norm_sqr(); let c_sqrt = c_sqr.sqrt(); if c_sqrt > mx_sqrt { (sum + c_sqr, *c, c_sqrt) } else { (sum + c_sqr, mx_c, mx_sqrt) } }); pnote.2 = complexes.0.sqrt() * post_scale; // / I::from_usize(pnote.1.len()).unwrap(); //pnote.3 = complexes.1.scale(scale).norm_sqr() * I::from_f32(2.0).unwrap(); //pnote.2 = complexes.0.scale(scale).norm_sqr().sqrt(); //pnote.3 = complexes.1.scale(scale).norm_sqr().sqrt(); pnote.3 = complexes.2 * post_scale; //pnote.2 = self.out_data[pnote.1.clone()].iter().fold(I::default(), |acc, c| acc + (c.norm_sqr().sqrt()*scale))/*.sqrt()*/; //pnote.2 = self.out_data[pnote.1.clone()].iter().map(|c| c.norm_sqr()).max_by(|a, b| { a.partial_cmp(b).unwrap() } ); } } return updated; } }