ardupilot/libraries/AP_HAL_ChibiOS/DSP.cpp
Andy Piper 33c1523905 AP_HAL_ChibiOS: add harmonics to DSP
add vector_mean_float() to DSP
allow fft_start() to use ObjectBuffer<float> for lock-free access
2020-05-24 07:43:34 +10:00

304 lines
11 KiB
C++

/*
* This file is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This file is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program. If not, see <http://www.gnu.org/licenses/>.
*
* Code by Andy Piper and the betaflight team
*/
#include "AP_HAL_ChibiOS.h"
#if HAL_WITH_DSP
#include <AP_HAL/AP_HAL.h>
#include <AP_Math/AP_Math.h>
#include <GCS_MAVLink/GCS.h>
#include "DSP.h"
#include <cmath>
using namespace ChibiOS;
#if DEBUG_FFT
#define TIMER_START(timer) \
void *istate = hal.scheduler->disable_interrupts_save(); \
uint32_t timer##now = AP_HAL::micros()
#define TIMER_END(timer) timer.time(timer##now); \
hal.scheduler->restore_interrupts(istate)
#else
#define TIMER_START(timer)
#define TIMER_END(timer)
#endif
#define TICK_CYCLE 10
extern const AP_HAL::HAL& hal;
// The algorithms originally came from betaflight but are now substantially modified based on theory and experiment.
// https://holometer.fnal.gov/GH_FFT.pdf "Spectrum and spectral density estimation by the Discrete Fourier transform (DFT),
// including a comprehensive list of window functions and some new flat-top windows." - Heinzel et. al is a great reference
// for understanding the underlying theory although we do not use spectral density here since time resolution is equally
// important as frequency resolution. Referred to as [Heinz] throughout the code.
// initialize the FFT state machine
AP_HAL::DSP::FFTWindowState* DSP::fft_init(uint16_t window_size, uint16_t sample_rate, uint8_t harmonics)
{
DSP::FFTWindowStateARM* fft = new DSP::FFTWindowStateARM(window_size, sample_rate, harmonics);
if (fft == nullptr || fft->_hanning_window == nullptr || fft->_rfft_data == nullptr || fft->_freq_bins == nullptr || fft->_derivative_freq_bins == nullptr) {
delete fft;
return nullptr;
}
return fft;
}
// start an FFT analysis
void DSP::fft_start(FFTWindowState* state, FloatBuffer& samples, uint16_t advance)
{
step_hanning((FFTWindowStateARM*)state, samples, advance);
}
// perform remaining steps of an FFT analysis
uint16_t DSP::fft_analyse(AP_HAL::DSP::FFTWindowState* state, uint16_t start_bin, uint16_t end_bin, float noise_att_cutoff)
{
FFTWindowStateARM* fft = (FFTWindowStateARM*)state;
step_arm_cfft_f32(fft);
step_bitreversal(fft);
step_stage_rfft_f32(fft);
step_arm_cmplx_mag_f32(fft, start_bin, end_bin, noise_att_cutoff);
return step_calc_frequencies_f32(fft, start_bin, end_bin);
}
// create an instance of the FFT state machine
DSP::FFTWindowStateARM::FFTWindowStateARM(uint16_t window_size, uint16_t sample_rate, uint8_t harmonics)
: AP_HAL::DSP::FFTWindowState::FFTWindowState(window_size, sample_rate, harmonics)
{
if (_freq_bins == nullptr || _hanning_window == nullptr || _rfft_data == nullptr || _derivative_freq_bins == nullptr) {
GCS_SEND_TEXT(MAV_SEVERITY_WARNING, "Failed to allocate %u bytes for window %u for DSP",
unsigned(sizeof(float) * (window_size * 3 + 2)), unsigned(window_size));
return;
}
// initialize the ARM data structure.
// it's important not to use arm_rfft_fast_init_f32() as this links all of the twiddle tables
// by being selective we save 70k in text space
switch (window_size) {
case 32:
arm_rfft_32_fast_init_f32(&_fft_instance);
break;
case 64:
arm_rfft_64_fast_init_f32(&_fft_instance);
break;
case 128:
arm_rfft_128_fast_init_f32(&_fft_instance);
break;
case 256:
arm_rfft_256_fast_init_f32(&_fft_instance);
break;
#if defined(STM32H7)
// Don't pull in the larger FFT tables unless we have to
case 512:
arm_rfft_512_fast_init_f32(&_fft_instance);
break;
case 1024:
arm_rfft_1024_fast_init_f32(&_fft_instance);
break;
#endif
}
}
DSP::FFTWindowStateARM::~FFTWindowStateARM() {}
extern "C" {
void stage_rfft_f32(arm_rfft_fast_instance_f32 *S, float32_t *p, float32_t *pOut);
void arm_cfft_radix8by2_f32(arm_cfft_instance_f32 *S, float32_t *p1);
void arm_cfft_radix8by4_f32(arm_cfft_instance_f32 *S, float32_t *p1);
void arm_radix8_butterfly_f32(float32_t *pSrc, uint16_t fftLen, const float32_t *pCoef, uint16_t twidCoefModifier);
void arm_bitreversal_32(uint32_t *pSrc, const uint16_t bitRevLen, const uint16_t *pBitRevTable);
}
// step 1: filter the incoming samples through a Hanning window
void DSP::step_hanning(FFTWindowStateARM* fft, FloatBuffer& samples, uint16_t advance)
{
TIMER_START(_hanning_timer);
// 5us
// apply hanning window to gyro samples and store result in _freq_bins
// hanning starts and ends with 0, could be skipped for minor speed improvement
samples.peek(&fft->_freq_bins[0], fft->_window_size); // the caller ensures we get a full buffer of samples
samples.advance(advance);
arm_mult_f32(&fft->_freq_bins[0], &fft->_hanning_window[0], &fft->_freq_bins[0], fft->_window_size);
TIMER_END(_hanning_timer);
}
// step 2: guts of complex fft processing
void DSP::step_arm_cfft_f32(FFTWindowStateARM* fft)
{
arm_cfft_instance_f32 *Sint = &(fft->_fft_instance.Sint);
Sint->fftLen = fft->_fft_instance.fftLenRFFT / 2;
TIMER_START(_arm_cfft_f32_timer);
switch (fft->_bin_count) {
case 16: // window 32
// 16us (BF)
// 5us F7, 7us F4, 8us H7
case 128: // window 256
// 37us F7, 81us F4, 17us H7
arm_cfft_radix8by2_f32(Sint, fft->_freq_bins);
break;
case 32: // window 64
// 35us (BF)
// 10us F7, 24us F4
case 256: // window 512
// 66us F7, 174us F4, 37us H7
arm_cfft_radix8by4_f32(Sint, fft->_freq_bins);
break;
case 64: // window 128
// 70us BF
// 21us F7, 34us F4
case 512: // window 1024
// 152us F7, 73us H7
arm_radix8_butterfly_f32(fft->_freq_bins, fft->_bin_count, Sint->pTwiddle, 1);
break;
}
TIMER_END(_arm_cfft_f32_timer);
}
// step 3: reverse the bits of the output
void DSP::step_bitreversal(FFTWindowStateARM* fft)
{
TIMER_START(_bitreversal_timer);
// 6us (BF)
// 32 - 2us F7, 3us F4, 1us H7
// 64 - 3us F7, 6us F4
// 128 - 4us F7, 9us F4
// 256 - 10us F7, 20us F4, 5us H7
// 512 - 22us F7, 54us F4, 15us H7
// 1024 - 42us F7, 15us H7
arm_bitreversal_32((uint32_t *)fft->_freq_bins, fft->_fft_instance.Sint.bitRevLength, fft->_fft_instance.Sint.pBitRevTable);
TIMER_END(_bitreversal_timer);
}
// step 4: convert from complex to real data
void DSP::step_stage_rfft_f32(FFTWindowStateARM* fft)
{
TIMER_START(_stage_rfft_f32_timer);
// 14us (BF)
// 32 - 2us F7, 5us F4, 2us H7
// 64 - 5us F7, 16us F4
// 128 - 17us F7, 26us F4
// 256 - 21us F7, 70us F4, 9us H7
// 512 - 35us F7, 71us F4, 17us H7
// 1024 - 76us F7, 33us H7
// this does not work in place => _freq_bins AND _rfft_data needed
stage_rfft_f32(&fft->_fft_instance, fft->_freq_bins, fft->_rfft_data);
TIMER_END(_stage_rfft_f32_timer);
}
// step 5: find the magnitudes of the complex data
void DSP::step_arm_cmplx_mag_f32(FFTWindowStateARM* fft, uint16_t start_bin, uint16_t end_bin, float noise_att_cutoff)
{
TIMER_START(_arm_cmplx_mag_f32_timer);
// 8us (BF)
// 32 - 4us F7, 5us F4, 5us H7
// 64 - 7us F7, 13us F4
// 128 - 14us F7, 17us F4
// 256 - 29us F7, 28us F4, 7us H7
// 512 - 55us F7, 93us F4, 13us H7
// 1024 - 131us F7, 25us H7
// General case for the magnitudes - see https://stackoverflow.com/questions/42299932/dsp-libraries-rfft-strange-results
// The frequency of each of those frequency components are given by k*fs/N
arm_cmplx_mag_squared_f32(&fft->_rfft_data[2], &fft->_freq_bins[1], fft->_bin_count - 1);
fft->_freq_bins[0] = sq(fft->_rfft_data[0]); // DC
fft->_freq_bins[fft->_bin_count] = sq(fft->_rfft_data[1]); // Nyquist
fft->_rfft_data[fft->_window_size] = fft->_rfft_data[1]; // Nyquist for the interpolator
fft->_rfft_data[fft->_window_size + 1] = 0;
step_cmplx_mag(fft, start_bin, end_bin, noise_att_cutoff);
TIMER_END(_arm_cmplx_mag_f32_timer);
}
// step 6: find the bin with the highest energy and interpolate the required frequency
uint16_t DSP::step_calc_frequencies_f32(FFTWindowStateARM* fft, uint16_t start_bin, uint16_t end_bin)
{
TIMER_START(_step_calc_frequencies);
// 4us H7
step_calc_frequencies(fft, start_bin, end_bin);
TIMER_END(_step_calc_frequencies);
#if DEBUG_FFT
_output_count++;
// outputs at approx 1hz
if (_output_count % 400 == 0) {
GCS_SEND_TEXT(MAV_SEVERITY_WARNING, "FFT(us): t1:%lu,t2:%lu,t3:%lu,t4:%lu,t5:%lu,t6:%lu",
_hanning_timer._timer_avg, _arm_cfft_f32_timer._timer_avg, _bitreversal_timer._timer_avg, _stage_rfft_f32_timer._timer_avg, _arm_cmplx_mag_f32_timer._timer_avg, _step_calc_frequencies._timer_avg);
}
#endif
return fft->_peak_data[CENTER]._bin;
}
static const float PI_N = M_PI / 32.0f;
static const float CANDAN_FACTOR = tanf(PI_N) / PI_N;
// Interpolate center frequency using http://users.metu.edu.tr/ccandan//pub_dir/FineDopplerEst_IEEE_SPL_June2011.pdf
// This is slightly less accurate than Quinn, but much cheaper to calculate
float DSP::calculate_candans_estimator(const FFTWindowStateARM* fft, uint16_t k_max) const
{
if (k_max <= 1 || k_max == fft->_bin_count) {
return 0.0f;
}
const uint16_t k_m1 = (k_max - 1) * 2;
const uint16_t k_p1 = (k_max + 1) * 2;
const uint16_t k = k_max * 2;
const float npr = fft->_rfft_data[k_m1] - fft->_rfft_data[k_p1];
const float npc = fft->_rfft_data[k_m1 + 1] - fft->_rfft_data[k_p1 + 1];
const float dpr = 2.0f * fft->_rfft_data[k] - fft->_rfft_data[k_m1] - fft->_rfft_data[k_p1];
const float dpc = 2.0f * fft->_rfft_data[k + 1] - fft->_rfft_data[k_m1 + 1] - fft->_rfft_data[k_p1 + 1];
const float realn = npr * dpr + npc * dpc;
const float reald = dpr * dpr + dpc * dpc;
// sanity check
if (is_zero(reald)) {
return 0.0f;
}
float d = CANDAN_FACTOR * (realn / reald);
// -0.5 < d < 0.5 which is the fraction of the sample spacing about the center element
return constrain_float(d, -0.5f, 0.5f);
}
#if DEBUG_FFT
void DSP::StepTimer::time(uint32_t start)
{
_timer_total += (AP_HAL::micros() - start);
_time_ticks = (_time_ticks + 1) % TICK_CYCLE;
if (_time_ticks == 0) {
_timer_avg = _timer_total / TICK_CYCLE;
_timer_total = 0;
}
}
#endif
#endif