diff options
author | Wavy Harp <wavyharp@gmail.com> | 2023-05-07 23:04:53 -0600 |
---|---|---|
committer | Kyle McFarland <tfkyle@gmail.com> | 2023-05-07 23:04:53 -0600 |
commit | 991849b32acf83dd14a5096540bb053d2572502a (patch) | |
tree | 279b59d75d4ad6081f5242cf77d843ae6b37fc3d /src/rfft.rs | |
download | rustynotes-master.zip rustynotes-master.tar.gz rustynotes-master.tar.bz2 |
currently everything is very tied to alsa and my system,
for the moment you'll need to manually change the device names
and maybe channels/period_size in src/main.rs, src/bin/smolguitar.rs
and src/bin/loopbacker.rs, i'll fix that and add runtime period
size/updater allocation soon, (and probably make a cpal backend as well
so it can work on other platforms), but doing this initial commit to play
around with stereo for now~
Diffstat (limited to 'src/rfft.rs')
-rw-r--r-- | src/rfft.rs | 179 |
1 files changed, 179 insertions, 0 deletions
diff --git a/src/rfft.rs b/src/rfft.rs new file mode 100644 index 0000000..8e5fc0f --- /dev/null +++ b/src/rfft.rs @@ -0,0 +1,179 @@ +use crate::proc::{Procer, UpDequer, DequerUtils, Update}; +use crate::notes::{Note, NoteValue, ProcerNote}; +use core::ops::Range; +//use std::collections::VecDeque; +use std::sync::Arc; + +use realfft::{RealFftPlanner, FftNum, RealToComplex}; +use rustfft::num_complex::Complex; +//use rustfft::num_traits::Zero; +use rustfft::num_traits::One; +use rustfft::num_traits::float::Float; +use derive_more::Display; +use std::fmt::Debug; + + +trait DebugableRTC<I>: RealToComplex<I> + Debug {} + + +// the Arc<dyn> here really increases compile times, but that might just be because it has to link +// in realfft/rustfft where before it didn't get linked in + +#[derive(Display)] +#[display(fmt="RfftProcer(size={}, outsize={}, freq={}, deque=(size={}, cap={}), in_buf_size={}, scratch=(size={}, cap={}) out_data=(size={}, cap={}))", size, outsize, frequency, "in_deque.len()", "in_deque.capacity()", "in_buf.len()", "scratch.len()", "scratch.capacity()", "out_data.len()", "out_data.capacity()")] +pub struct RFftProcer<I: Clone + FftNum, const US: usize, const BS: usize> { + pub size: usize, + pub outsize: usize, + pub frequency: usize, + pub in_deque: UpDequer<I, US>, + pub in_buf: [I; BS], + pub rfft: Arc<dyn RealToComplex<I>>, + // TODO: could make this a fixed array instead of a vec too maybe + pub scratch: Vec<Complex<I>>, + pub out_data: Vec<Complex<I>>, + pub scale: I, + //pub in_deque: VecDeque<Update<A>>, +} + +impl<I: Default + Copy + FftNum + Float, const US: usize, const BS: usize> RFftProcer<I, US, BS> { + pub fn new(frequency: usize) -> Self { + let retsize = BS/2+1; + // TODO: could do some cleverness with using double then you'd only need to pop elements + // off after you're double-full, but i think just having a buffer of 1 or 2 elements is + // easier here and popping before each add in process_data if we're full + let dq_capacity = (BS/US)+2; + assert!((dq_capacity * US) > BS); + let mut planner = RealFftPlanner::new(); + let rfft = planner.plan_fft_forward(BS); + let scratch = rfft.make_scratch_vec(); + let out_data = rfft.make_output_vec(); + return Self { + size: BS, + outsize: retsize, + frequency: frequency, + in_deque: UpDequer::with_capacity(dq_capacity), + in_buf: [I::default(); BS], + rfft: rfft, + //out_data: Vec::with_capacity(retsize), + scratch: scratch, + out_data: out_data, + // the docs say to scale by 1/sqrt(len), but it seems like 1/len (where len is the real + // input size) is the correct scaling to get results with norm_sqrt results between 0-1 + // I'm still not entirely sure about this though, l1_norm also seems to max out at len + // without scaling so 1/len i think is right for it too, but it also gives lower + // results when there's an imaginary part + //scale: /*I::from_usize(10).unwrap()*/I::one() / I::from_usize(retsize/*-1*/).unwrap().sqrt(), + scale: I::one() / I::from_usize(BS).unwrap(), + } + } +} + +impl<I: Default + Clone + FftNum + Float + NoteValue, const US: usize, const BS: usize> Procer<I, US> for RFftProcer<I, US, BS> { + fn get_size(&self) -> usize { + self.size + } + fn get_frequency(&self) -> usize { + self.frequency + } + + fn make_pnotes<'nl>(&self, notes: &'nl [Note]) -> Vec<ProcerNote<'nl, I>> { + // TODO: +1 or nah? (might have to handle it differently for even or odd, hopefully not + // though) + //let retsize = self.size/2+1; + let retsize = self.outsize; + let noteslen = notes.len(); + let mut ret: Vec<ProcerNote<'nl, I>> = Vec::with_capacity(noteslen); + let mut cur_note_idx = 0; + let mut cutoff = notes[0].high_freq; + let scale: f64 = (self.frequency as f64)/(self.size as f64); + // TODO: since we do have .low_freq here as well you can start the low size of the range + // close to .low_freq (how close?) but for now we'll start it at 1 + let mut cur_range: Range<usize> = 1..1; + for i in 1..retsize { + let freq: f64 = (i as f64) * scale; + if freq > cutoff { + while freq > cutoff { + let pnote = ProcerNote::<'nl, I>(¬es[cur_note_idx], cur_range.clone(), I::default(), I::default()); + ret.push(pnote); + cur_range.start = cur_range.end; + // worth noting this is different than the python code, + // here frequencies above the high note aren't added in + // where the python code adds them all to the highest + // note, you change that by changing the return below + // here to a cutoff = <however you express infinite> i + // think + cur_note_idx += 1; + if cur_note_idx >= noteslen { + return ret; + } + cutoff = notes[cur_note_idx].high_freq; + } + } + //else { + cur_range.end += 1; + //} + } + // add the final pnote if we ran out of frequencies before running + // out of notes + ret.push(ProcerNote::<'nl, I>(¬es[cur_note_idx], cur_range, I::default(), I::default())); + return ret; + } + + fn process_data(&mut self, input: &Update<I, US>, notes: &mut [ProcerNote<I>]) -> bool { + let dq_len = self.in_deque.len(); + let dq_cap = self.in_deque.capacity(); + // we should probably make sure buf_size can be made with the length of dq as well though + // so capacity can grow if new() messed up with the capacity + if dq_len == dq_cap { + self.in_deque.pop_front(); + } + let scale = self.scale; + // XXX: mm this copies twice per update (once to add it to the deque and once to the buffer) + // which i kinda wanted to avoid (using an Arc would avoid it but it'd be extra heap + // allocations) atleast it doesn't realloc though so hopefully that's ok + // depending on how this works with Alsa i might wanna switch to passing in a slice and + // storing references in the UpDeque but that makes lifetime soup + // or something else maybes + // OHH though since i'm probably getting i16 from alsa and want to convert to f32 anyway it + // makes sense to copy (question is will that add a third copy or will the compiler be + // smart enough to convert inplace, i think ideally the conversion would happen here in + // this push_back actually, maybe the input Update should be AI and this push_back should + // be as I or Into or whatever) + self.in_deque.push_back(input.clone()); + let updated = self.in_deque.update_buffer_array(&mut self.in_buf); + //println!("updated = {}", updated); + if updated { + //assert_eq!(self.in_buf, [I::default(); BS]); + let post_scale = I::from_u8(2).unwrap().sqrt(); + self.rfft.process_with_scratch(&mut self.in_buf, &mut self.out_data, &mut self.scratch).unwrap(); + for pnote in notes { + // TODO: figure out the best calculation for this + // TODO: map/sum or product might be quicker then fold here, since fold can't + // really use simd, also look at what the perftest code did to use simd + // XXX: python's abs uses the equiv of sqrt(norm_sqr) (https://docs.rs/num-complex/0.4.3/num_complex/struct.Complex.html#method.norm_sqr) + // ie. sqrt(|real|**2 + |imag|**2), gonna just use norm_sqrt here for now + // l1_norm might also be a good option (just |real| + |imag|), and gets rid of the + // pow too, but i dunno what's best here + // TODO: the python test2.py code actually currently uses max instead of folding here too + //let complexes = self.out_data[pnote.1.clone()].iter().fold((I::default(), Complex::<I>::default()), |(sum, mx), c| {let c_sqrt = c.scale(scale).norm_sqr().sqrt(); (sum + c_sqrt, std::cmp::max_by(mx, *c, |a: &Complex<I>, b: &Complex<I>| { a.norm_sqr().partial_cmp(&b.norm_sqr()).unwrap() }))}); + let complexes = self.out_data[pnote.1.clone()].iter().fold((I::default(), Complex::<I>::default(), I::default()), |(sum, mx_c, mx_sqrt), c| { + let c_sqr = c.scale(scale).norm_sqr(); + let c_sqrt = c_sqr.sqrt(); + if c_sqrt > mx_sqrt { + (sum + c_sqr, *c, c_sqrt) + } else { + (sum + c_sqr, mx_c, mx_sqrt) + } + }); + pnote.2 = complexes.0.sqrt() * post_scale; // / I::from_usize(pnote.1.len()).unwrap(); + //pnote.3 = complexes.1.scale(scale).norm_sqr() * I::from_f32(2.0).unwrap(); + //pnote.2 = complexes.0.scale(scale).norm_sqr().sqrt(); + //pnote.3 = complexes.1.scale(scale).norm_sqr().sqrt(); + pnote.3 = complexes.2 * post_scale; + //pnote.2 = self.out_data[pnote.1.clone()].iter().fold(I::default(), |acc, c| acc + (c.norm_sqr().sqrt()*scale))/*.sqrt()*/; + //pnote.2 = self.out_data[pnote.1.clone()].iter().map(|c| c.norm_sqr()).max_by(|a, b| { a.partial_cmp(b).unwrap() } ); + } + } + return updated; + } +} |