summaryrefslogtreecommitdiff
path: root/src/rfft.rs
diff options
context:
space:
mode:
authorWavy Harp <wavyharp@gmail.com>2023-05-07 23:04:53 -0600
committerKyle McFarland <tfkyle@gmail.com>2023-05-07 23:04:53 -0600
commit991849b32acf83dd14a5096540bb053d2572502a (patch)
tree279b59d75d4ad6081f5242cf77d843ae6b37fc3d /src/rfft.rs
downloadrustynotes-991849b32acf83dd14a5096540bb053d2572502a.zip
rustynotes-991849b32acf83dd14a5096540bb053d2572502a.tar.gz
rustynotes-991849b32acf83dd14a5096540bb053d2572502a.tar.bz2
initial importHEADmaster
currently everything is very tied to alsa and my system, for the moment you'll need to manually change the device names and maybe channels/period_size in src/main.rs, src/bin/smolguitar.rs and src/bin/loopbacker.rs, i'll fix that and add runtime period size/updater allocation soon, (and probably make a cpal backend as well so it can work on other platforms), but doing this initial commit to play around with stereo for now~
Diffstat (limited to 'src/rfft.rs')
-rw-r--r--src/rfft.rs179
1 files changed, 179 insertions, 0 deletions
diff --git a/src/rfft.rs b/src/rfft.rs
new file mode 100644
index 0000000..8e5fc0f
--- /dev/null
+++ b/src/rfft.rs
@@ -0,0 +1,179 @@
+use crate::proc::{Procer, UpDequer, DequerUtils, Update};
+use crate::notes::{Note, NoteValue, ProcerNote};
+use core::ops::Range;
+//use std::collections::VecDeque;
+use std::sync::Arc;
+
+use realfft::{RealFftPlanner, FftNum, RealToComplex};
+use rustfft::num_complex::Complex;
+//use rustfft::num_traits::Zero;
+use rustfft::num_traits::One;
+use rustfft::num_traits::float::Float;
+use derive_more::Display;
+use std::fmt::Debug;
+
+
+trait DebugableRTC<I>: RealToComplex<I> + Debug {}
+
+
+// the Arc<dyn> here really increases compile times, but that might just be because it has to link
+// in realfft/rustfft where before it didn't get linked in
+
+#[derive(Display)]
+#[display(fmt="RfftProcer(size={}, outsize={}, freq={}, deque=(size={}, cap={}), in_buf_size={}, scratch=(size={}, cap={}) out_data=(size={}, cap={}))", size, outsize, frequency, "in_deque.len()", "in_deque.capacity()", "in_buf.len()", "scratch.len()", "scratch.capacity()", "out_data.len()", "out_data.capacity()")]
+pub struct RFftProcer<I: Clone + FftNum, const US: usize, const BS: usize> {
+ pub size: usize,
+ pub outsize: usize,
+ pub frequency: usize,
+ pub in_deque: UpDequer<I, US>,
+ pub in_buf: [I; BS],
+ pub rfft: Arc<dyn RealToComplex<I>>,
+ // TODO: could make this a fixed array instead of a vec too maybe
+ pub scratch: Vec<Complex<I>>,
+ pub out_data: Vec<Complex<I>>,
+ pub scale: I,
+ //pub in_deque: VecDeque<Update<A>>,
+}
+
+impl<I: Default + Copy + FftNum + Float, const US: usize, const BS: usize> RFftProcer<I, US, BS> {
+ pub fn new(frequency: usize) -> Self {
+ let retsize = BS/2+1;
+ // TODO: could do some cleverness with using double then you'd only need to pop elements
+ // off after you're double-full, but i think just having a buffer of 1 or 2 elements is
+ // easier here and popping before each add in process_data if we're full
+ let dq_capacity = (BS/US)+2;
+ assert!((dq_capacity * US) > BS);
+ let mut planner = RealFftPlanner::new();
+ let rfft = planner.plan_fft_forward(BS);
+ let scratch = rfft.make_scratch_vec();
+ let out_data = rfft.make_output_vec();
+ return Self {
+ size: BS,
+ outsize: retsize,
+ frequency: frequency,
+ in_deque: UpDequer::with_capacity(dq_capacity),
+ in_buf: [I::default(); BS],
+ rfft: rfft,
+ //out_data: Vec::with_capacity(retsize),
+ scratch: scratch,
+ out_data: out_data,
+ // the docs say to scale by 1/sqrt(len), but it seems like 1/len (where len is the real
+ // input size) is the correct scaling to get results with norm_sqrt results between 0-1
+ // I'm still not entirely sure about this though, l1_norm also seems to max out at len
+ // without scaling so 1/len i think is right for it too, but it also gives lower
+ // results when there's an imaginary part
+ //scale: /*I::from_usize(10).unwrap()*/I::one() / I::from_usize(retsize/*-1*/).unwrap().sqrt(),
+ scale: I::one() / I::from_usize(BS).unwrap(),
+ }
+ }
+}
+
+impl<I: Default + Clone + FftNum + Float + NoteValue, const US: usize, const BS: usize> Procer<I, US> for RFftProcer<I, US, BS> {
+ fn get_size(&self) -> usize {
+ self.size
+ }
+ fn get_frequency(&self) -> usize {
+ self.frequency
+ }
+
+ fn make_pnotes<'nl>(&self, notes: &'nl [Note]) -> Vec<ProcerNote<'nl, I>> {
+ // TODO: +1 or nah? (might have to handle it differently for even or odd, hopefully not
+ // though)
+ //let retsize = self.size/2+1;
+ let retsize = self.outsize;
+ let noteslen = notes.len();
+ let mut ret: Vec<ProcerNote<'nl, I>> = Vec::with_capacity(noteslen);
+ let mut cur_note_idx = 0;
+ let mut cutoff = notes[0].high_freq;
+ let scale: f64 = (self.frequency as f64)/(self.size as f64);
+ // TODO: since we do have .low_freq here as well you can start the low size of the range
+ // close to .low_freq (how close?) but for now we'll start it at 1
+ let mut cur_range: Range<usize> = 1..1;
+ for i in 1..retsize {
+ let freq: f64 = (i as f64) * scale;
+ if freq > cutoff {
+ while freq > cutoff {
+ let pnote = ProcerNote::<'nl, I>(&notes[cur_note_idx], cur_range.clone(), I::default(), I::default());
+ ret.push(pnote);
+ cur_range.start = cur_range.end;
+ // worth noting this is different than the python code,
+ // here frequencies above the high note aren't added in
+ // where the python code adds them all to the highest
+ // note, you change that by changing the return below
+ // here to a cutoff = <however you express infinite> i
+ // think
+ cur_note_idx += 1;
+ if cur_note_idx >= noteslen {
+ return ret;
+ }
+ cutoff = notes[cur_note_idx].high_freq;
+ }
+ }
+ //else {
+ cur_range.end += 1;
+ //}
+ }
+ // add the final pnote if we ran out of frequencies before running
+ // out of notes
+ ret.push(ProcerNote::<'nl, I>(&notes[cur_note_idx], cur_range, I::default(), I::default()));
+ return ret;
+ }
+
+ fn process_data(&mut self, input: &Update<I, US>, notes: &mut [ProcerNote<I>]) -> bool {
+ let dq_len = self.in_deque.len();
+ let dq_cap = self.in_deque.capacity();
+ // we should probably make sure buf_size can be made with the length of dq as well though
+ // so capacity can grow if new() messed up with the capacity
+ if dq_len == dq_cap {
+ self.in_deque.pop_front();
+ }
+ let scale = self.scale;
+ // XXX: mm this copies twice per update (once to add it to the deque and once to the buffer)
+ // which i kinda wanted to avoid (using an Arc would avoid it but it'd be extra heap
+ // allocations) atleast it doesn't realloc though so hopefully that's ok
+ // depending on how this works with Alsa i might wanna switch to passing in a slice and
+ // storing references in the UpDeque but that makes lifetime soup
+ // or something else maybes
+ // OHH though since i'm probably getting i16 from alsa and want to convert to f32 anyway it
+ // makes sense to copy (question is will that add a third copy or will the compiler be
+ // smart enough to convert inplace, i think ideally the conversion would happen here in
+ // this push_back actually, maybe the input Update should be AI and this push_back should
+ // be as I or Into or whatever)
+ self.in_deque.push_back(input.clone());
+ let updated = self.in_deque.update_buffer_array(&mut self.in_buf);
+ //println!("updated = {}", updated);
+ if updated {
+ //assert_eq!(self.in_buf, [I::default(); BS]);
+ let post_scale = I::from_u8(2).unwrap().sqrt();
+ self.rfft.process_with_scratch(&mut self.in_buf, &mut self.out_data, &mut self.scratch).unwrap();
+ for pnote in notes {
+ // TODO: figure out the best calculation for this
+ // TODO: map/sum or product might be quicker then fold here, since fold can't
+ // really use simd, also look at what the perftest code did to use simd
+ // XXX: python's abs uses the equiv of sqrt(norm_sqr) (https://docs.rs/num-complex/0.4.3/num_complex/struct.Complex.html#method.norm_sqr)
+ // ie. sqrt(|real|**2 + |imag|**2), gonna just use norm_sqrt here for now
+ // l1_norm might also be a good option (just |real| + |imag|), and gets rid of the
+ // pow too, but i dunno what's best here
+ // TODO: the python test2.py code actually currently uses max instead of folding here too
+ //let complexes = self.out_data[pnote.1.clone()].iter().fold((I::default(), Complex::<I>::default()), |(sum, mx), c| {let c_sqrt = c.scale(scale).norm_sqr().sqrt(); (sum + c_sqrt, std::cmp::max_by(mx, *c, |a: &Complex<I>, b: &Complex<I>| { a.norm_sqr().partial_cmp(&b.norm_sqr()).unwrap() }))});
+ let complexes = self.out_data[pnote.1.clone()].iter().fold((I::default(), Complex::<I>::default(), I::default()), |(sum, mx_c, mx_sqrt), c| {
+ let c_sqr = c.scale(scale).norm_sqr();
+ let c_sqrt = c_sqr.sqrt();
+ if c_sqrt > mx_sqrt {
+ (sum + c_sqr, *c, c_sqrt)
+ } else {
+ (sum + c_sqr, mx_c, mx_sqrt)
+ }
+ });
+ pnote.2 = complexes.0.sqrt() * post_scale; // / I::from_usize(pnote.1.len()).unwrap();
+ //pnote.3 = complexes.1.scale(scale).norm_sqr() * I::from_f32(2.0).unwrap();
+ //pnote.2 = complexes.0.scale(scale).norm_sqr().sqrt();
+ //pnote.3 = complexes.1.scale(scale).norm_sqr().sqrt();
+ pnote.3 = complexes.2 * post_scale;
+ //pnote.2 = self.out_data[pnote.1.clone()].iter().fold(I::default(), |acc, c| acc + (c.norm_sqr().sqrt()*scale))/*.sqrt()*/;
+ //pnote.2 = self.out_data[pnote.1.clone()].iter().map(|c| c.norm_sqr()).max_by(|a, b| { a.partial_cmp(b).unwrap() } );
+ }
+ }
+ return updated;
+ }
+}