summaryrefslogtreecommitdiff
path: root/src/rfft.rs
blob: 8e5fc0f2b24c41481b55dfb29ed4e74f4c7cab4d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
use crate::proc::{Procer, UpDequer, DequerUtils, Update};
use crate::notes::{Note, NoteValue, ProcerNote};
use core::ops::Range;
//use std::collections::VecDeque;
use std::sync::Arc;

use realfft::{RealFftPlanner, FftNum, RealToComplex};
use rustfft::num_complex::Complex;
//use rustfft::num_traits::Zero;
use rustfft::num_traits::One;
use rustfft::num_traits::float::Float;
use derive_more::Display;
use std::fmt::Debug;


trait DebugableRTC<I>: RealToComplex<I> + Debug {}


// the Arc<dyn> here really increases compile times, but that might just be because it has to link
// in realfft/rustfft where before it didn't get linked in

#[derive(Display)]
#[display(fmt="RfftProcer(size={}, outsize={}, freq={}, deque=(size={}, cap={}), in_buf_size={}, scratch=(size={}, cap={}) out_data=(size={}, cap={}))", size, outsize, frequency, "in_deque.len()", "in_deque.capacity()", "in_buf.len()", "scratch.len()", "scratch.capacity()", "out_data.len()", "out_data.capacity()")]
pub struct RFftProcer<I: Clone + FftNum, const US: usize, const BS: usize> {
    pub size: usize,
    pub outsize: usize,
    pub frequency: usize,
    pub in_deque: UpDequer<I, US>,
    pub in_buf: [I; BS],
    pub rfft: Arc<dyn RealToComplex<I>>,
    // TODO: could make this a fixed array instead of a vec too maybe
    pub scratch: Vec<Complex<I>>,
    pub out_data: Vec<Complex<I>>,
    pub scale: I,
    //pub in_deque: VecDeque<Update<A>>,
}

impl<I: Default + Copy + FftNum + Float, const US: usize, const BS: usize> RFftProcer<I, US, BS> {
    pub fn new(frequency: usize) -> Self {
        let retsize = BS/2+1;
        // TODO: could do some cleverness with using double then you'd only need to pop elements
        // off after you're double-full, but i think just having a buffer of 1 or 2 elements is
        // easier here and popping before each add in process_data if we're full
        let dq_capacity = (BS/US)+2;
        assert!((dq_capacity * US) > BS);
        let mut planner = RealFftPlanner::new();
        let rfft = planner.plan_fft_forward(BS);
        let scratch = rfft.make_scratch_vec();
        let out_data = rfft.make_output_vec();
        return Self {
            size: BS,
            outsize: retsize,
            frequency: frequency,
            in_deque: UpDequer::with_capacity(dq_capacity),
            in_buf: [I::default(); BS],
            rfft: rfft,
            //out_data: Vec::with_capacity(retsize),
            scratch: scratch,
            out_data: out_data,
            // the docs say to scale by 1/sqrt(len), but it seems like 1/len (where len is the real
            // input size) is the correct scaling to get results with norm_sqrt results between 0-1
            // I'm still not entirely sure about this though, l1_norm also seems to max out at len
            // without scaling so 1/len i think is right for it too, but it also gives lower
            // results when there's an imaginary part
            //scale: /*I::from_usize(10).unwrap()*/I::one() / I::from_usize(retsize/*-1*/).unwrap().sqrt(),
            scale: I::one() / I::from_usize(BS).unwrap(),
        }
    }
}

impl<I: Default + Clone + FftNum + Float + NoteValue, const US: usize, const BS: usize> Procer<I, US> for RFftProcer<I, US, BS> {
    fn get_size(&self) -> usize {
        self.size
    }
    fn get_frequency(&self) -> usize {
        self.frequency
    }

    fn make_pnotes<'nl>(&self, notes: &'nl [Note]) -> Vec<ProcerNote<'nl, I>> {
        // TODO: +1 or nah? (might have to handle it differently for even or odd, hopefully not
        // though)
        //let retsize = self.size/2+1;
        let retsize = self.outsize;
        let noteslen = notes.len();
        let mut ret: Vec<ProcerNote<'nl, I>> = Vec::with_capacity(noteslen);
        let mut cur_note_idx = 0;
        let mut cutoff = notes[0].high_freq;
        let scale: f64 = (self.frequency as f64)/(self.size as f64);
        // TODO: since we do have .low_freq here as well you can start the low size of the range
        // close to .low_freq (how close?) but for now we'll start it at 1
        let mut cur_range: Range<usize> = 1..1;
        for i in 1..retsize {
            let freq: f64 = (i as f64) * scale;
            if freq > cutoff {
                while freq > cutoff {
                    let pnote = ProcerNote::<'nl, I>(&notes[cur_note_idx], cur_range.clone(), I::default(), I::default());
                    ret.push(pnote);
                    cur_range.start = cur_range.end;
                    // worth noting this is different than the python code,
                    // here frequencies above the high note aren't added in
                    // where the python code adds them all to the highest
                    // note, you change that by changing the return below
                    // here to a cutoff = <however you express infinite> i
                    // think
                    cur_note_idx += 1;
                    if cur_note_idx >= noteslen {
                        return ret;
                    }
                    cutoff = notes[cur_note_idx].high_freq;
                }
            }
            //else {
            cur_range.end += 1;
            //}
        }
        // add the final pnote if we ran out of frequencies before running
        // out of notes
        ret.push(ProcerNote::<'nl, I>(&notes[cur_note_idx], cur_range, I::default(), I::default()));
        return ret;
    }

    fn process_data(&mut self, input: &Update<I, US>, notes: &mut [ProcerNote<I>]) -> bool {
        let dq_len = self.in_deque.len();
        let dq_cap = self.in_deque.capacity();
        // we should probably make sure buf_size can be made with the length of dq as well though
        // so capacity can grow if new() messed up with the capacity
        if dq_len == dq_cap {
            self.in_deque.pop_front();
        }
        let scale = self.scale;
        // XXX: mm this copies twice per update (once to add it to the deque and once to the buffer)
        // which i kinda wanted to avoid (using an Arc would avoid it but it'd be extra heap
        // allocations) atleast it doesn't realloc though so hopefully that's ok
        // depending on how this works with Alsa i might wanna switch to passing in a slice and
        // storing references in the UpDeque but that makes lifetime soup
        // or something else maybes
        // OHH though since i'm probably getting i16 from alsa and want to convert to f32 anyway it
        // makes sense to copy (question is will that add a third copy or will the compiler be
        // smart enough to convert inplace, i think ideally the conversion would happen here in
        // this push_back actually, maybe the input Update should be AI and this push_back should
        // be as I or Into or whatever)
        self.in_deque.push_back(input.clone());
        let updated = self.in_deque.update_buffer_array(&mut self.in_buf);
        //println!("updated = {}", updated);
        if updated {
            //assert_eq!(self.in_buf, [I::default(); BS]);
            let post_scale = I::from_u8(2).unwrap().sqrt();
            self.rfft.process_with_scratch(&mut self.in_buf, &mut self.out_data, &mut self.scratch).unwrap();
            for pnote in notes {
                // TODO: figure out the best calculation for this
                // TODO: map/sum or product might be quicker then fold here, since fold can't
                // really use simd, also look at what the perftest code did to use simd
                // XXX: python's abs uses the equiv of sqrt(norm_sqr) (https://docs.rs/num-complex/0.4.3/num_complex/struct.Complex.html#method.norm_sqr)
                // ie. sqrt(|real|**2 + |imag|**2), gonna just use norm_sqrt here for now
                // l1_norm might also be a good option (just |real| + |imag|), and gets rid of the
                // pow too, but i dunno what's best here
                // TODO: the python test2.py code actually currently uses max instead of folding here too
                //let complexes = self.out_data[pnote.1.clone()].iter().fold((I::default(), Complex::<I>::default()), |(sum, mx), c| {let c_sqrt = c.scale(scale).norm_sqr().sqrt(); (sum + c_sqrt, std::cmp::max_by(mx, *c, |a: &Complex<I>, b: &Complex<I>| { a.norm_sqr().partial_cmp(&b.norm_sqr()).unwrap() }))});
                let complexes = self.out_data[pnote.1.clone()].iter().fold((I::default(), Complex::<I>::default(), I::default()), |(sum, mx_c, mx_sqrt), c| {
                    let c_sqr = c.scale(scale).norm_sqr();
                    let c_sqrt = c_sqr.sqrt();
                    if c_sqrt > mx_sqrt {
                        (sum + c_sqr, *c, c_sqrt)
                    } else {
                        (sum + c_sqr, mx_c, mx_sqrt)
                    }
                });
                pnote.2 = complexes.0.sqrt() * post_scale; // / I::from_usize(pnote.1.len()).unwrap();
                //pnote.3 = complexes.1.scale(scale).norm_sqr() * I::from_f32(2.0).unwrap();
                //pnote.2 = complexes.0.scale(scale).norm_sqr().sqrt();
                //pnote.3 = complexes.1.scale(scale).norm_sqr().sqrt();
                pnote.3 = complexes.2 * post_scale;
                //pnote.2 = self.out_data[pnote.1.clone()].iter().fold(I::default(), |acc, c| acc + (c.norm_sqr().sqrt()*scale))/*.sqrt()*/;
                //pnote.2 = self.out_data[pnote.1.clone()].iter().map(|c| c.norm_sqr()).max_by(|a, b| { a.partial_cmp(b).unwrap() } );
            }
        }
        return updated;
    }
}