diff --git a/libraries/AP_GyroFFT/AP_GyroFFT.cpp b/libraries/AP_GyroFFT/AP_GyroFFT.cpp new file mode 100644 index 0000000000..f0adecfab7 --- /dev/null +++ b/libraries/AP_GyroFFT/AP_GyroFFT.cpp @@ -0,0 +1,756 @@ +/* + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + Code by Andy Piper with help from betaflight + */ + +#include "AP_GyroFFT.h" + +#if HAL_GYROFFT_ENABLED + +#include +#include +#include + +extern const AP_HAL::HAL& hal; + +#define FFT_DEFAULT_WINDOW_SIZE 32 +#define FFT_DEFAULT_WINDOW_OVERLAP 0.5f +#define FFT_THR_REF_DEFAULT 0.35f // the estimated throttle reference, 0 ~ 1 +#define FFT_SNR_DEFAULT 25.0f // a higher SNR is safer and this works quite well on a Pixracer +#define FFT_STACK_SIZE 1024 +#define FFT_REQUIRED_HARMONIC_FIT 10.0f + +// table of user settable parameters +const AP_Param::GroupInfo AP_GyroFFT::var_info[] = { + + // @Param: ENABLE + // @DisplayName: Enable + // @Description: Enable Gyro FFT analyser + // @Values: 0:Disabled,1:Enabled + // @User: Advanced + // @RebootRequired: True + AP_GROUPINFO_FLAGS("ENABLE", 1, AP_GyroFFT, _enable, 0, AP_PARAM_FLAG_ENABLE), + + // @Param: MINHZ + // @DisplayName: Minimum Frequency + // @Description: Lower bound of FFT frequency detection in Hz. + // @Range: 10 400 + // @Units: Hz + // @User: Advanced + AP_GROUPINFO("MINHZ", 2, AP_GyroFFT, _fft_min_hz, 80), + + // @Param: MAXHZ + // @DisplayName: Maximum Frequency + // @Description: Upper bound of FFT frequency detection in Hz. + // @Range: 10 400 + // @Units: Hz + // @User: Advanced + AP_GROUPINFO("MAXHZ", 3, AP_GyroFFT, _fft_max_hz, 200), + + // @Param: SAMPLE_MODE + // @DisplayName: Sample Mode + // @Description: Sampling mode (and therefore rate). 0: Gyro rate sampling, 1: Fast loop rate sampling, 2: Fast loop rate / 2 sampling, 3: Fast loop rate / 3 sampling. Takes effect on reboot. + // @Range: 0 4 + // @User: Advanced + // @RebootRequired: True + AP_GROUPINFO("SAMPLE_MODE", 4, AP_GyroFFT, _sample_mode, 0), + + // @Param: WINDOW_SIZE + // @DisplayName: FFT window size + // @Description: Size of window to be used in FFT calculations. Takes effect on reboot. Must be a power of 2 and between 32 and 1024. Larger windows give greater frequency resolution but poorer time resolution, consume more CPU time and may not be appropriate for all vehicles. Time and frequency resolution are given by the sample-rate / window-size. Windows of 256 are only really recommended for F7 class boards, windows of 512 or more H7 class. + // @Range: 32 1024 + // @User: Advanced + // @RebootRequired: True + AP_GROUPINFO("WINDOW_SIZE", 5, AP_GyroFFT, _window_size, FFT_DEFAULT_WINDOW_SIZE), + + // @Param: WINDOW_OLAP + // @DisplayName: FFT window overlap + // @Description: Percentage of window to be overlapped before another frame is process. Takes effect on reboot. A good default is 50% overlap. Higher overlap results in more processed frames but not necessarily more temporal resolution. Lower overlap results in lost information at the frame edges. + // @Range: 0 0.9 + // @User: Advanced + // @RebootRequired: True + AP_GROUPINFO("WINDOW_OLAP", 6, AP_GyroFFT, _window_overlap, FFT_DEFAULT_WINDOW_OVERLAP), + + // @Param: FREQ_HOVER + // @DisplayName: FFT learned hover frequency + // @Description: The learned hover noise frequency + // @Range: 0 250 + // @User: Advanced + AP_GROUPINFO("FREQ_HOVER", 7, AP_GyroFFT, _freq_hover_hz, 80.0f), + + // @Param: THR_REF + // @DisplayName: FFT learned thrust reference + // @Description: FFT learned thrust reference for the hover frequency and FFT minimum frequency. + // @Range: 0.01 0.9 + // @User: Advanced + AP_GROUPINFO("THR_REF", 8, AP_GyroFFT, _throttle_ref, FFT_THR_REF_DEFAULT), + + // @Param: SNR_REF + // @DisplayName: FFT SNR reference threshold + // @Description: FFT SNR reference threshold in dB at which a signal is determined to be present. + // @Range: 0.0 100.0 + // @User: Advanced + AP_GROUPINFO("SNR_REF", 9, AP_GyroFFT, _snr_threshold_db, FFT_SNR_DEFAULT), + + // @Param: ATT_REF + // @DisplayName: FFT attenuation for bandwidth calculation + // @Description: FFT attenuation level in dB for bandwidth calculation and peak detection. The bandwidth is calculated by comparing peak power output with the attenuated version. The default of 15 has shown to be a good compromise in both simulations and real flight. + // @Range: 0 100 + // @User: Advanced + AP_GROUPINFO("ATT_REF", 10, AP_GyroFFT, _attenuation_power_db, 15), + + // @Param: BW_HOVER + // @DisplayName: FFT learned bandwidth at hover + // @Description: FFT learned bandwidth at hover for the attenuation frequencies. + // @Range: 0 200 + // @User: Advanced + AP_GROUPINFO("BW_HOVER", 11, AP_GyroFFT, _bandwidth_hover_hz, 20), + + AP_GROUPEND +}; + +// The FFT splits the frequency domain into an number of bins +// A sampling frequency of 1000 and max frequency (Nyquist) of 500 at a window size of 32 gives 16 frequency bins each 31.25Hz wide +// The first bin is used to store the DC and Nyquist values combined. +// Eg [DC/Nyquist], [16,47), [47,78), [78,109) etc +// For a loop rate of 800Hz, 16 bins each 25Hz wide +// Eg X[0]=[DC/Nyquist], X[1]=[12,37), X[2]=[37,62), X[3]=[62,87), X[4]=[87,112) +// So middle frequency is X[n] * 25 and the range is X[n] * 25 - 12 < f < X[n] * 25 + 12 + +// Maximum tolerated number of cycles with missing signal +#define FFT_MAX_MISSED_UPDATES 3 + +const extern AP_HAL::HAL& hal; + +AP_GyroFFT::AP_GyroFFT() +{ + _thread_state._noise_needs_calibration = 0x07; // all axes need calibration + AP_Param::setup_object_defaults(this, var_info); + + if (_singleton != nullptr) { + AP_HAL::panic("AP_GyroFFT must be singleton"); + } + _singleton = this; +} + +// initialize the FFT parameters and engine +void AP_GyroFFT::init(uint32_t target_looptime_us) +{ + // if FFT analysis is not enabled we don't want to allocate any of the associated resources + if (!_enable) { + return; + } + + _ins = &AP::ins(); + + // sanity check + if (_ins->get_raw_gyro_rate_hz() == 0) { + AP_HAL::panic("AP_GyroFFT must be initialized after AP_InertialSensor"); + } + + // check that we support the window size requested and it is a power of 2 + _window_size = 1 << lrintf(log2f(_window_size.get())); +#if defined(STM32H7) || CONFIG_HAL_BOARD == HAL_BOARD_LINUX || CONFIG_HAL_BOARD == HAL_BOARD_SITL + _window_size = constrain_int16(_window_size, 32, 1024); +#else + _window_size = constrain_int16(_window_size, 32, 256); +#endif + + // check that we have enough memory for the window size requested + // INS: XYZ_AXIS_COUNT * INS_MAX_INSTANCES * _window_size, DSP: 3 * _window_size, FFT: XYZ_AXIS_COUNT + 3 * _window_size + const uint32_t allocation_count = (XYZ_AXIS_COUNT * INS_MAX_INSTANCES + 3 + XYZ_AXIS_COUNT + 3) * sizeof(float); + if (allocation_count * FFT_DEFAULT_WINDOW_SIZE > hal.util->available_memory() / 2) { + gcs().send_text(MAV_SEVERITY_WARNING, "AP_GyroFFT: disabled, required %u bytes", (unsigned int)allocation_count * FFT_DEFAULT_WINDOW_SIZE); + return; + } else if (allocation_count * _window_size > hal.util->available_memory() / 2) { + gcs().send_text(MAV_SEVERITY_WARNING, "AP_GyroFFT: req alloc %u bytes (free=%u)", (unsigned int)allocation_count * _window_size, (unsigned int)hal.util->available_memory()); + _window_size = FFT_DEFAULT_WINDOW_SIZE; + } + // save any changes that were made + _window_size.save(); + + // determine the FFT sample rate based on the gyro rate, loop rate and configuration + if (_sample_mode == 0) { + _fft_sampling_rate_hz = _ins->get_raw_gyro_rate_hz(); + } else { + const uint16_t loop_rate_hz = 1000*1000UL / target_looptime_us; + _fft_sampling_rate_hz = loop_rate_hz / _sample_mode; + for (uint8_t axis = 0; axis < XYZ_AXIS_COUNT; axis++) { + _downsampled_gyro_data[axis] = (float*)hal.util->malloc_type(sizeof(float) * _window_size, DSP_MEM_REGION); + if (_downsampled_gyro_data[axis] == nullptr) { + gcs().send_text(MAV_SEVERITY_WARNING, "Failed to allocate window for AP_GyroFFT"); + return; + } + } + } + _current_sample_mode = _sample_mode; + + _ref_energy = new Vector3f[_window_size]; + if (_ref_energy == nullptr) { + gcs().send_text(MAV_SEVERITY_WARNING, "Failed to allocate window for AP_GyroFFT"); + return; + } + + // make the gyro window match the window size plus the maximum that can be in play from the backend + if (!_ins->set_gyro_window_size(_window_size + INS_MAX_GYRO_WINDOW_SAMPLES)) { + return; + } + // current read marker is the beginning of the window + if (_sample_mode == 0) { + _circular_buffer_idx = _ins->get_raw_gyro_window_index(); + } else { + _circular_buffer_idx = 0; + } + + // initialise the HAL DSP subsystem + _state = hal.dsp->fft_init(_window_size, _fft_sampling_rate_hz); + if (_state == nullptr) { + gcs().send_text(MAV_SEVERITY_WARNING, "Failed to initialize DSP engine"); + return; + } + + // number of samples needed before a new frame can be processed + _window_overlap = constrain_float(_window_overlap, 0.0f, 0.9f); + _window_overlap.save(); + _samples_per_frame = (1.0f - _window_overlap) * _window_size; + _samples_per_frame = 1 << lrintf(log2f(_samples_per_frame)); + + // The update rate for the output + const float output_rate = _fft_sampling_rate_hz / _samples_per_frame; + // establish suitable defaults for the detected values + for (uint8_t axis = 0; axis < XYZ_AXIS_COUNT; axis++) { + _thread_state._center_freq_hz[axis] = _fft_min_hz; + _thread_state._center_freq_hz_filtered[axis] = _fft_min_hz; + // calculate low-pass filter characteristics based on overlap size + _center_freq_filter[axis].set_cutoff_frequency(output_rate, output_rate * 0.48f); + // smooth the bandwidth output more aggressively + _center_bandwidth_filter[axis].set_cutoff_frequency(output_rate, output_rate * 0.25f); + // number of cycles to average over, two complete windows to be sure + _noise_calibration_cycles[axis] = (_window_size / _samples_per_frame) * 2; + } + + // the number of cycles required to have a proper noise reference + _noise_cycles = (_window_size / _samples_per_frame) * XYZ_AXIS_COUNT; + // calculate harmonic multiplier + uint8_t first_harmonic = 0; + _harmonics = 1; // always search for 1 + for (uint8_t i = 0; i < HNF_MAX_HARMONICS; i++) { + if (_ins->get_gyro_harmonic_notch_harmonics() & (1<get_raw_gyro_window_index() - _circular_buffer_idx + get_buffer_size()) % get_buffer_size()); + + if (start_analysis()) { + hal.dsp->fft_start(_state, _ins->get_raw_gyro_window(_update_axis), _circular_buffer_idx, get_buffer_size()); + // we have pushed a frame into the FFT loop, move the index to the beginning of the next frame + _circular_buffer_idx = (_circular_buffer_idx + _samples_per_frame) % get_buffer_size(); + _sample_count -= _samples_per_frame; + } + } + // for loop rate sampling accumulate and average gyro samples + else { + _oversampled_gyro_accum += _ins->get_raw_gyro(); + _oversampled_gyro_count++; + + if ((_oversampled_gyro_count % _current_sample_mode) == 0) { + // calculate mean value of accumulated samples + Vector3f sample = _oversampled_gyro_accum / _current_sample_mode; + // fast sampling means that the raw gyro values have already been averaged over 8 samples + _downsampled_gyro_data[0][_downsampled_gyro_idx] = sample.x; + _downsampled_gyro_data[1][_downsampled_gyro_idx] = sample.y; + _downsampled_gyro_data[2][_downsampled_gyro_idx] = sample.z; + + _oversampled_gyro_accum.zero(); + _oversampled_gyro_count = 0; + _downsampled_gyro_idx = (_downsampled_gyro_idx + 1) % _state->_window_size; + _sample_count++; + + if (start_analysis()) { + hal.dsp->fft_start(_state, _downsampled_gyro_data[_update_axis], _circular_buffer_idx, _state->_window_size); + _circular_buffer_idx = (_circular_buffer_idx + _samples_per_frame) % _state->_window_size; + _sample_count -= _samples_per_frame; + } + } + } + + _global_state = _thread_state; +} + +// analyse gyro data using FFT, returns number of samples still held +// called from FFT thread +uint16_t AP_GyroFFT::update() +{ + if (!analysis_enabled()) { + return 0; + } + + if (!_sem.take(HAL_SEMAPHORE_BLOCK_FOREVER)) { + return 0; + } + + // only proceeed if a full frame has been pushed into the dsp + if (!_thread_state._analysis_started) { + uint16_t new_sample_count = _sample_count; + _sem.give(); + return new_sample_count; + } + + uint16_t start_bin = _config._fft_start_bin; + uint16_t end_bin = _config._fft_end_bin; + + _sem.give(); + + // calculate FFT and update filters outside the semaphore + uint16_t bin_max = hal.dsp->fft_analyse(_state, start_bin, end_bin, _harmonics, _config._attenuation_cutoff); + + // in order to access _config state we need the semaphore again + WITH_SEMAPHORE(_sem); + + // something has been detected, update the peak frequency and associated metrics + update_ref_energy(bin_max); + calculate_noise(bin_max); + + // ready to receive another frame + _thread_state._analysis_started = false; + + return _sample_count; +} + + // whether analysis can be run again or not +bool AP_GyroFFT::start_analysis() { + if (_thread_state._analysis_started) { + return false; + } + if (_sample_count >= _state->_window_size) { + _thread_state._analysis_started = true; + return true; + } + return false; +} + +// update calculated values of dynamic parameters - runs at 1Hz +void AP_GyroFFT::update_parameters() +{ + if (!_initialized) { + return; + } + + WITH_SEMAPHORE(_sem); + + // don't allow MAXHZ to go to Nyquist + _fft_max_hz = MIN(_fft_max_hz, _fft_sampling_rate_hz * 0.48); + _config._snr_threshold_db = _snr_threshold_db; + _config._fft_min_hz = _fft_min_hz; + _config._fft_max_hz = _fft_max_hz; + // determine the start FFT bin for all frequency detection + _config._fft_start_bin = MAX(floorf(_fft_min_hz.get() / _state->_bin_resolution), 1); + // determine the endt FFT bin for all frequency detection + _config._fft_end_bin = MIN(ceilf(_fft_max_hz.get() / _state->_bin_resolution), _state->_bin_count); + // actual attenuation from the db value + _config._attenuation_cutoff = powf(10.0f, -_attenuation_power_db / 10.0f); + _config._analysis_enabled = _analysis_enabled; +} + +// thread for processing gyro data via FFT +void AP_GyroFFT::update_thread(void) +{ + while (true) { + uint16_t remaining_samples = update(); + // wait approximately until we are likely to have another frame ready + uint32_t delay = constrain_int32((int32_t(_window_size) - remaining_samples) * 1e6 / _fft_sampling_rate_hz, 0, 100000); + if (delay > 0) { + hal.scheduler->delay_microseconds(delay); + } + } +} + +// start the update thread +void AP_GyroFFT::start_update_thread(void) +{ + WITH_SEMAPHORE(_sem); + + if (_thread_created) { + return; + } + + if (!hal.scheduler->thread_create(FUNCTOR_BIND_MEMBER(&AP_GyroFFT::update_thread, void), "apm_fft", FFT_STACK_SIZE, AP_HAL::Scheduler::PRIORITY_IO, 1)) { + AP_HAL::panic("Failed to start AP_GyroFFT update thread"); + } + + _thread_created = true; +} + +// self-test the FFT analyser - can only be done while samples are not being taken +// called from main thread +bool AP_GyroFFT::calibration_check() +{ + if (!analysis_enabled()) { + return true; + } + + // analysis is started in the main thread, don't trample on in-flight analysis + if (_global_state._analysis_started) { + return true; + } + + // still calibrating noise so not ready + if (_global_state._noise_needs_calibration) { + return false; + } + + // make sure the frequency maxium is below Nyquist + if (_fft_max_hz > _fft_sampling_rate_hz * 0.5f) { + gcs().send_text(MAV_SEVERITY_CRITICAL, "FFT: config MAXHZ %dHz > %dHz", _fft_max_hz.get(), _fft_sampling_rate_hz / 2); + return false; + } + + // larger windows make the the self-test run too long, triggering the watchdog + if (AP_Logger::get_singleton()->log_while_disarmed() + || _window_size > FFT_DEFAULT_WINDOW_SIZE * 2) { + return true; + } + float max_divergence = self_test_bin_frequencies(); + + if (max_divergence > _state->_bin_resolution * 0.5f) { + gcs().send_text(MAV_SEVERITY_WARNING, "FFT: self-test FAILED, max error %fHz", max_divergence); + } + return max_divergence <= _state->_bin_resolution * 0.5f; +} + +// update the hover frequency input filter. should be called at 100hz when in a stable hover +// called from main thread +void AP_GyroFFT::update_freq_hover(float dt, float throttle_out) +{ + if (!analysis_enabled()) { + return; + } + // we have chosen to constrain the hover frequency to be within the range reachable by the third order expo polynomial. + _freq_hover_hz = constrain_float(_freq_hover_hz + (dt / (10.0f + dt)) * (get_weighted_noise_center_freq_hz() - _freq_hover_hz), _fft_min_hz, _fft_max_hz); + _bandwidth_hover_hz = constrain_float(_bandwidth_hover_hz + (dt / (10.0f + dt)) * (get_weighted_noise_center_bandwidth_hz() - _bandwidth_hover_hz), 0, _fft_max_hz * 0.5f); + _throttle_ref = constrain_float(_throttle_ref + (dt / (10.0f + dt)) * (throttle_out * sq((float)_fft_min_hz.get() / _freq_hover_hz.get()) - _throttle_ref), 0.01f, 0.9f); +} + +// save parameters as part of disarming +// called from main thread +void AP_GyroFFT::save_params_on_disarm() +{ + if (!analysis_enabled()) { + return; + } + + _freq_hover_hz.save(); + _bandwidth_hover_hz.save(); + _throttle_ref.save(); +} + +// return an average center frequency weighted by bin energy +// called from main thread +float AP_GyroFFT::get_weighted_noise_center_freq_hz() +{ + if (!analysis_enabled()) { + return _fft_min_hz; + } + + // there is generally a lot of high-energy, slightly lower frequency noise on yaw, however this + // appears to be a second-order effect as only targetting pitch and roll (x & y) produces much cleaner output all round + if (!_global_state._center_freq_energy.is_nan() + && !is_zero(_global_state._center_freq_energy.x) + && !is_zero(_global_state._center_freq_energy.y)) { + return (_global_state._center_freq_hz_filtered.x * _global_state._center_freq_energy.x + + _global_state._center_freq_hz_filtered.y * _global_state._center_freq_energy.y) + / (_global_state._center_freq_energy.x + _global_state._center_freq_energy.y); + } + else { + return (_global_state._center_freq_hz_filtered.x + _global_state._center_freq_hz_filtered.y) * 0.5f; + } +} + +static const char* LOG_FTN1_NAME = "FTN1"; +static const char* LOG_FTN1_LABELS = "TimeUS,PkAvg,PkX,PkY,PkZ,BwAvg,BwX,BwY,BwZ,DnF"; +static const char* LOG_FTN1_UNITS = "szzzzzzzzz"; +static const char* LOG_FTN1_SCALE = "F---------"; +static const char* LOG_FTN1_FORMAT = "Qfffffffff"; + +static const char* LOG_FTN2_NAME = "FTN2"; +static const char* LOG_FTN2_LABELS = "TimeUS,FtX,FtY,FtZ,EnX,EnY,EnZ,SnX,SnY,SnZ,Bin"; +static const char* LOG_FTN2_UNITS = "s%%%-------"; +static const char* LOG_FTN2_SCALE = "F----------"; +static const char* LOG_FTN2_FORMAT = "QfffffffffB"; + +// log gyro fft messages +void AP_GyroFFT::write_log_messages() +{ + if (!analysis_enabled()) { + return; + } + + AP::logger().Write(LOG_FTN1_NAME, LOG_FTN1_LABELS, LOG_FTN1_UNITS, LOG_FTN1_SCALE, LOG_FTN1_FORMAT, + AP_HAL::micros64(), + get_weighted_noise_center_freq_hz(), + get_noise_center_freq_hz().x, + get_noise_center_freq_hz().y, + get_noise_center_freq_hz().z, + get_weighted_noise_center_bandwidth_hz(), + get_noise_center_bandwidth_hz().x, + get_noise_center_bandwidth_hz().y, + get_noise_center_bandwidth_hz().z, + _ins->get_gyro_dynamic_notch_center_freq_hz()); + + AP::logger().Write(LOG_FTN2_NAME, LOG_FTN2_LABELS, LOG_FTN2_UNITS, LOG_FTN2_SCALE, LOG_FTN2_FORMAT, + AP_HAL::micros64(), + get_raw_noise_harmonic_fit().x, + get_raw_noise_harmonic_fit().y, + get_raw_noise_harmonic_fit().z, + get_center_freq_energy().x, + get_center_freq_energy().y, + get_center_freq_energy().z, + get_noise_signal_to_noise_db().x, + get_noise_signal_to_noise_db().y, + get_noise_signal_to_noise_db().z, + get_center_freq_bin().z); + +#if DEBUG_FFT + const uint32_t now = AP_HAL::millis(); + // output at 1hz + if ((now - _last_output_ms) > 1000) { + // doing this from the update thread overflows the stack + WITH_SEMAPHORE(_sem); + gcs().send_text(MAV_SEVERITY_WARNING, "FFT: f:%.1f, fr:%.1f, b:%u, fd:%.1f", + _debug_state._center_freq_hz_filtered[_update_axis], _debug_state._center_freq_hz[_update_axis], _debug_max_bin, _debug_max_bin_freq); + gcs().send_text(MAV_SEVERITY_WARNING, "FFT: bw:%.1f, e:%.1f, r:%.1f, snr:%.1f", + _debug_state._center_bandwidth_hz[_update_axis], _debug_max_freq_bin, _ref_energy[_update_axis][_debug_max_bin], _debug_snr); + _last_output_ms = now; + } +#endif +} + +// return an average noise bandwidth weighted by bin energy +// called from main thread +float AP_GyroFFT::get_weighted_noise_center_bandwidth_hz() +{ + if (!analysis_enabled()) { + return 0.0f; + } + + if (!_global_state._center_freq_energy.is_nan() + && !is_zero(_global_state._center_freq_energy.x) + && !is_zero(_global_state._center_freq_energy.y)) { + return (_global_state._center_bandwidth_hz.x * _global_state._center_freq_energy.x + + _global_state._center_bandwidth_hz.y * _global_state._center_freq_energy.y) + / (_global_state._center_freq_energy.x + _global_state._center_freq_energy.y); + } + else { + return (_global_state._center_bandwidth_hz.x + _global_state._center_bandwidth_hz.y) * 0.5f; + } +} + +// calculate noise frequencies from FFT data provided by the HAL subsystem +// called from FFT thread +void AP_GyroFFT::calculate_noise(uint16_t max_bin) +{ + _thread_state._center_freq_bin[_update_axis] = max_bin; + + float weighted_center_freq_hz = 0; + + // cacluate the SNR and center frequency energy + const float max_energy = MAX(1.0f, _state->_freq_bins[max_bin]); + const float ref_energy = MAX(1.0f, _ref_energy[_update_axis][max_bin]); + float snr = 10.f * log10f(max_energy) - log10f(ref_energy); + // if the bin energy is above the noise threshold then we have a signal + if (!_thread_state._noise_needs_calibration && !isnan(_state->_freq_bins[max_bin]) && snr > _config._snr_threshold_db) { + // if targetting more than one harmonic then make sure we get the fundamental + // on larger copters the second harmonic often has more energy + const float peak_freq_hz = constrain_float(_state->_max_bin_freq, (float)_config._fft_min_hz, (float)_config._fft_max_hz); + const float second_peak_freq_hz = constrain_float(_state->_second_bin_freq, (float)_config._fft_min_hz, (float)_config._fft_max_hz); + const float harmonic_fit = 100.0f * fabsf(peak_freq_hz - second_peak_freq_hz * _harmonic_multiplier) / peak_freq_hz; + + // required fit of 10% is fairly conservative when testing in SITL + if (_harmonics > 1 && peak_freq_hz > second_peak_freq_hz && harmonic_fit < FFT_REQUIRED_HARMONIC_FIT) { + weighted_center_freq_hz = second_peak_freq_hz; + _thread_state._center_freq_energy[_update_axis] = _state->_freq_bins[_state->_second_energy_bin]; + _thread_state._center_bandwidth_hz[_update_axis] = _center_bandwidth_filter[_update_axis].apply(_state->_second_noise_width_hz); + } else { + weighted_center_freq_hz = peak_freq_hz; + _thread_state._center_freq_energy[_update_axis] = _state->_freq_bins[max_bin]; + _thread_state._center_bandwidth_hz[_update_axis] = _center_bandwidth_filter[_update_axis].apply(_state->_max_noise_width_hz); + } + // record how good the fit was + if (peak_freq_hz > second_peak_freq_hz) { + _thread_state._harmonic_fit[_update_axis] = harmonic_fit; + } else { + _thread_state._harmonic_fit[_update_axis] = 0.0f; + } + _thread_state._center_freq_hz[_update_axis] = weighted_center_freq_hz; + _thread_state._center_snr[_update_axis] = snr; + _missed_cycles = 0; + } + // if we failed to find a signal, carry on using the previous reading + else if (_missed_cycles++ < FFT_MAX_MISSED_UPDATES) { + weighted_center_freq_hz = _thread_state._center_freq_hz[_update_axis]; + // Keep the previous center frequency and energy + } + // we failed to find a signal for more than FFT_MAX_MISSED_UPDATES cycles + else { + weighted_center_freq_hz = _config._fft_min_hz; + _thread_state._center_freq_hz[_update_axis] = _config._fft_min_hz; + _thread_state._center_freq_energy[_update_axis] = 0.0f; + _thread_state._center_snr[_update_axis] = 0.0f; + _thread_state._center_bandwidth_hz[_update_axis] = _center_bandwidth_filter[_update_axis].apply(_bandwidth_hover_hz); + } + + _thread_state._center_freq_hz_filtered[_update_axis] = _center_freq_filter[_update_axis].apply(weighted_center_freq_hz); + +#if DEBUG_FFT + WITH_SEMAPHORE(_sem); + _debug_state = _thread_state; + _debug_max_freq_bin = _state->_freq_bins[max_bin]; + _debug_max_bin_freq = _state->_max_bin_freq; + _debug_snr = snr; + _debug_max_bin = max_bin; +#endif + _update_axis = (_update_axis + 1) % XYZ_AXIS_COUNT; +} + +// calculate noise baseline from FFT data provided by the HAL subsystem +// called from FFT thread +void AP_GyroFFT::update_ref_energy(uint16_t max_bin) +{ + if (!_thread_state._noise_needs_calibration) { + return; + } + // according to https://www.tcd.ie/Physics/research/groups/magnetism/files/lectures/py5021/MagneticSensors3.pdf sensor noise is not necessarily gaussian + // determine a PS noise reference at each of the possble center frequencies + if (_noise_cycles == 0 && _noise_calibration_cycles[_update_axis] > 0) { + for (uint16_t i = 1; i < _state->_bin_count; i++) { + _ref_energy[_update_axis][i] += _state->_freq_bins[i]; + } + if (--_noise_calibration_cycles[_update_axis] == 0) { + for (uint16_t i = 1; i < _state->_bin_count; i++) { + const float cycles = (_window_size / _samples_per_frame) * 2; + // overall random noise is reduced by sqrt(N) when averaging periodigrams so adjust for that + _ref_energy[_update_axis][i] = (_ref_energy[_update_axis][i] / cycles) * sqrtf(cycles); + } + _thread_state._noise_needs_calibration &= ~(1 << _update_axis); + } + } + else if (_noise_cycles > 0) { + _noise_cycles--; + } +} + +// perform FFT analysis on the range of frequencies supported by the analyser +// called from main thread +float AP_GyroFFT::self_test_bin_frequencies() +{ + if (_state->_window_size * sizeof(float) > hal.util->available_memory() / 2) { + gcs().send_text(MAV_SEVERITY_WARNING, "AP_GyroFFT: unable to run self-test, required %u bytes", (unsigned int)(_state->_window_size * sizeof(float))); + return 0.0f; + } + + GyroWindow test_window = (float*)hal.util->malloc_type(sizeof(float) * _state->_window_size, DSP_MEM_REGION); + // in the unlikely event we can't allocate a test window, skip the checks + if (test_window == nullptr) { + return 0.0f; + } + + float max_divergence = 0; + + for (uint16_t bin = _config._fft_start_bin; bin <= _config._fft_end_bin; bin++) { + // the algorithm will only ever return values in this range + float frequency = constrain_float(bin * _state->_bin_resolution, _fft_min_hz, _fft_max_hz); + max_divergence = MAX(max_divergence, self_test(frequency, test_window)); // test bin centers + frequency = constrain_float(bin * _state->_bin_resolution - _state->_bin_resolution / 4, _fft_min_hz, _fft_max_hz); + max_divergence = MAX(max_divergence, self_test(frequency, test_window)); // test bin off-centers + } + + hal.util->free_type(test_window, sizeof(float) * _window_size, DSP_MEM_REGION); + return max_divergence; +} + +// perform FFT analysis of a single sine wave at the selected frequency +// called from main thread +float AP_GyroFFT::self_test(float frequency, GyroWindow test_window) +{ + for(uint16_t i = 0; i < _state->_window_size; i++) { + test_window[i]= sinf(2.0f * M_PI * frequency * i / _fft_sampling_rate_hz) * ToRad(20) * 2000; + } + + _update_axis = 0; + + hal.dsp->fft_start(_state, test_window, 0, _state->_window_size); + uint16_t max_bin = hal.dsp->fft_analyse(_state, _config._fft_start_bin, _config._fft_end_bin, _harmonics, _config._attenuation_cutoff); + + if (max_bin <= 0) { + gcs().send_text(MAV_SEVERITY_WARNING, "FFT: self-test failed, failed to find frequency %f", frequency); + } + + calculate_noise(max_bin); + + float max_divergence = 0; + // make sure the selected frequencies are in the right bin + max_divergence = MAX(max_divergence, fabsf(frequency - _thread_state._center_freq_hz[0])); + if (_thread_state._center_freq_hz[0] < (frequency - _state->_bin_resolution * 0.5f) || _thread_state._center_freq_hz[0] > (frequency + _state->_bin_resolution * 0.5f)) { + gcs().send_text(MAV_SEVERITY_WARNING, "FFT: self-test failed: wanted %f, had %f", frequency, _thread_state._center_freq_hz[0]); + } +#if DEBUG_FFT + else { + gcs().send_text(MAV_SEVERITY_INFO, "FFT: self-test succeeded: wanted %f, had %f", frequency, _thread_state._center_freq_hz[0]); + } +#endif + + return max_divergence; +} + +// singleton instance +AP_GyroFFT *AP_GyroFFT::_singleton; + +namespace AP { + +AP_GyroFFT *fft() +{ + return AP_GyroFFT::get_singleton(); +} + +} + +#endif // HAL_GYROFFT_ENABLED diff --git a/libraries/AP_GyroFFT/AP_GyroFFT.h b/libraries/AP_GyroFFT/AP_GyroFFT.h new file mode 100644 index 0000000000..d8429deb73 --- /dev/null +++ b/libraries/AP_GyroFFT/AP_GyroFFT.h @@ -0,0 +1,246 @@ +/* + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + Code by Andy Piper + */ +#pragma once + +#include +#include + +#ifndef HAL_GYROFFT_ENABLED +#define HAL_GYROFFT_ENABLED HAL_WITH_DSP +#endif + +#if HAL_GYROFFT_ENABLED + +#include +#include +#include +#include +#include +#include + +#define DEBUG_FFT 0 + +// a library that leverages the HAL DSP support to perform FFT analysis on gyro samples +class AP_GyroFFT +{ +public: + AP_GyroFFT(); + + // Do not allow copies + AP_GyroFFT(const AP_GyroFFT &other) = delete; + AP_GyroFFT &operator=(const AP_GyroFFT&) = delete; + + void init(uint32_t target_looptime); + + // cycle through the FFT steps - runs at 400Hz + uint16_t update(); + // capture gyro values at the appropriate update rate - runs at fast loop rate + void sample_gyros(); + // update calculated values of dynamic parameters - runs at 1Hz + void update_parameters(); + // thread for processing gyro data via FFT + void update_thread(); + // start the update thread + void start_update_thread(); + + // check at startup that standard frequencies can be detected + bool calibration_check(); + // called when hovering to determine the average peak frequency and reference value + void update_freq_hover(float dt, float throttle_out); + // called to save the average peak frequency and reference value + void save_params_on_disarm(); + // dynamically enable or disable the analysis through the aux switch + void set_analysis_enabled(bool enabled) { _analysis_enabled = enabled; }; + + // detected peak frequency filtered at 1/3 the update rate + Vector3f get_noise_center_freq_hz() const { return _global_state._center_freq_hz_filtered; } + // energy of the background noise at the detected center frequency + Vector3f get_noise_signal_to_noise_db() const { return _global_state._center_snr; } + // detected peak frequency weighted by energy + float get_weighted_noise_center_freq_hz(); + // detected peak frequency + Vector3f get_raw_noise_center_freq_hz() const { return _global_state._center_freq_hz; } + // match between first and second harmonics + Vector3f get_raw_noise_harmonic_fit() const { return _global_state._harmonic_fit; } + // energy of the detected peak frequency + Vector3f get_center_freq_energy() const { return _global_state._center_freq_energy; } + // index of the FFT bin containing the detected peak frequency + Vector3 get_center_freq_bin() const { return _global_state._center_freq_bin; } + // detected peak bandwidth + Vector3f get_noise_center_bandwidth_hz() const { return _global_state._center_bandwidth_hz; }; + // weighted detected peak bandwidth + float get_weighted_noise_center_bandwidth_hz(); + // log gyro fft messages + void write_log_messages(); + + static const struct AP_Param::GroupInfo var_info[]; + static AP_GyroFFT *get_singleton() { return _singleton; } + +private: + // calculate the peak noise frequency + void calculate_noise(uint16_t max_bin); + // update the estimation of the background noise energy + void update_ref_energy(uint16_t max_bin); + // test frequency detection for all of the allowable bins + float self_test_bin_frequencies(); + // detect the provided frequency + float self_test(float frequency, GyroWindow test_window); + // whether to run analysis or not + bool analysis_enabled() const { return _initialized && _analysis_enabled && _thread_created; }; + // whether analysis can be run again or not + bool start_analysis(); + // the size of the ring buffer in both the IMU backend + uint16_t get_buffer_size() const { return _state->_window_size + INS_MAX_GYRO_WINDOW_SAMPLES; } + // semaphore for access to shared FFT data + HAL_Semaphore _sem; + + // data accessible from the main thread protected by the semaphore + struct EngineState { + // energy of the detected peak frequency + Vector3f _center_freq_energy; + // energy of the detected peak frequency in dB + Vector3f _center_freq_energy_db; + // detected peak frequency + Vector3f _center_freq_hz; + // fit between first and second harmonics + Vector3f _harmonic_fit; + // bin of detected poeak frequency + Vector3 _center_freq_bin; + // filtered version of the peak frequency + Vector3f _center_freq_hz_filtered; + // signal to noise ratio of PSD at the detected centre frequency + Vector3f _center_snr; + // detected peak width + Vector3f _center_bandwidth_hz; + // axes that still require noise calibration + uint8_t _noise_needs_calibration : 3; + // whether the analyzer is mid-cycle + bool _analysis_started = false; + }; + + // Shared FFT engine state local to the FFT thread + EngineState _thread_state; + // Shared FFT engine state accessible by the main thread + EngineState _global_state; + + // configuration data local to the FFT thread but set from the main thread + struct EngineConfig { + // whether the analyzer should be run + bool _analysis_enabled; + // minimum frequency of the detection window + uint16_t _fft_min_hz; + // maximum frequency of the detection window + uint16_t _fft_max_hz; + // configured start bin based on min hz + uint16_t _fft_start_bin; + // configured end bin based on max hz + uint16_t _fft_end_bin; + // attenuation cutoff for calculation of hover bandwidth + float _attenuation_cutoff; + // SNR Threshold + float _snr_threshold_db; + } _config; + + // number of sampeles needed before a new frame can be processed + uint16_t _samples_per_frame; + // downsampled gyro data circular buffer index frequency analysis + uint16_t _circular_buffer_idx; + // number of collected unprocessed gyro samples + uint16_t _sample_count; + + // downsampled gyro data circular buffer for frequency analysis + float* _downsampled_gyro_data[XYZ_AXIS_COUNT]; + // accumulator for sampled gyro data + Vector3f _oversampled_gyro_accum; + // count of oversamples + uint16_t _oversampled_gyro_count; + // current write index in the buffer + uint16_t _downsampled_gyro_idx; + + // state of the FFT engine + AP_HAL::DSP::FFTWindowState* _state; + // update state machine step information + uint8_t _update_axis; + // noise base of the gyros + Vector3f* _ref_energy; + // the number of cycles required to have a proper noise reference + uint16_t _noise_cycles; + // number of cycles over which to generate noise ensemble averages + uint16_t _noise_calibration_cycles[XYZ_AXIS_COUNT]; + // current _sample_mode + uint8_t _current_sample_mode : 3; + // harmonic multiplier for two highest peaks + float _harmonic_multiplier; + // searched harmonics - inferred from harmonic notch harmoncis + uint8_t _harmonics; + + // smoothing filter on the output + LowPassFilterFloat _center_freq_filter[XYZ_AXIS_COUNT]; + // smoothing filter on the bandwidth + LowPassFilterFloat _center_bandwidth_filter[XYZ_AXIS_COUNT]; + + // configured sampling rate + uint16_t _fft_sampling_rate_hz; + // number of cycles without a detected signal + uint8_t _missed_cycles; + // whether the analyzer initialized correctly + bool _initialized; + + // whether the analyzer should be run + bool _analysis_enabled = true; + // whether the update thread was created + bool _thread_created = false; + // minimum frequency of the detection window + AP_Int16 _fft_min_hz; + // maximum frequency of the detection window + AP_Int16 _fft_max_hz; + // size of the FFT window + AP_Int16 _window_size; + // percentage overlap of FFT windows + AP_Float _window_overlap; + // overall enablement of the feature + AP_Int8 _enable; + // gyro rate sampling or cycle divider + AP_Int8 _sample_mode; + // learned throttle reference for the hover frequency + AP_Float _throttle_ref; + // learned hover filter frequency + AP_Float _freq_hover_hz; + // SNR Threshold + AP_Float _snr_threshold_db; + // attenuation to use for calculating the peak bandwidth at hover + AP_Float _attenuation_power_db; + // learned peak bandwidth at configured attenuation at hover + AP_Float _bandwidth_hover_hz; + AP_InertialSensor* _ins; +#if DEBUG_FFT + uint32_t _last_output_ms; + EngineState _debug_state; + float _debug_max_bin_freq; + float _debug_max_freq_bin; + uint16_t _debug_max_bin; + float _debug_snr; +#endif + + static AP_GyroFFT *_singleton; +}; + +namespace AP { + AP_GyroFFT *fft(); +}; + +#endif // HAL_GYROFFT_ENABLED \ No newline at end of file diff --git a/libraries/AP_GyroFFT/CMSIS_5/LICENSE.txt b/libraries/AP_GyroFFT/CMSIS_5/LICENSE.txt new file mode 100644 index 0000000000..8dada3edaf --- /dev/null +++ b/libraries/AP_GyroFFT/CMSIS_5/LICENSE.txt @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/libraries/AP_GyroFFT/CMSIS_5/README.md b/libraries/AP_GyroFFT/CMSIS_5/README.md new file mode 100644 index 0000000000..84c9427c81 --- /dev/null +++ b/libraries/AP_GyroFFT/CMSIS_5/README.md @@ -0,0 +1,145 @@ +# CMSIS Version 5 - ArduPilot + +This directory contains the pre-built CMSIS v5.6.0 (master branch) DSP libraries and headers for Arm Cortex chips taken from github. +The CMSIS 5 repository is very large (400Mb) so we only copy the pieces that we need for building ArduPilot. + +The CMSIS 5 README follows. + +# CMSIS Version 5 + +The branch *master* of this GitHub repository contains the CMSIS Version 5.6.0. The [documentation](http://arm-software.github.io/CMSIS_5/General/html/index.html) is available under http://arm-software.github.io/CMSIS_5/General/html/index.html + +Use [Issues](https://github.com/ARM-software/CMSIS_5#issues-and-labels) to provide feedback and report problems for CMSIS Version 5. + +**Note:** The branch *develop* of this GitHub repository reflects our current state of development and is constantly updated. It gives our users and partners contiguous access to the CMSIS development. It allows you to review the work and provide feedback or create pull requests for contributions. + +A [pre-built documentation](http://www.keil.com/pack/doc/CMSIS_Dev/index.html) is updated from time to time, but may be also generated using the instructions under [Generate CMSIS Pack for Release](https://github.com/ARM-software/CMSIS_5#generate-cmsis-pack-for-release). + +## Overview of CMSIS Components + +The following is an list of all CMSIS components that are available. + +| CMSIS-... | Target Processors | Description | +|:----------|:--------------------|:-------------| +|[Core(M)](http://arm-software.github.io/CMSIS_5/Core/html/index.html) | All Cortex-M, SecurCore | Standardized API for the Cortex-M processor core and peripherals. Includes intrinsic functions for Cortex-M4/M7/M33/M35P SIMD instructions.| +|[Core(A)](http://arm-software.github.io/CMSIS_5/Core_A/html/index.html)| Cortex-A5/A7/A9 | API and basic run-time system for the Cortex-A5/A7/A9 processor core and peripherals.| +|[Driver](http://arm-software.github.io/CMSIS_5/Driver/html/index.html) | All Cortex-M, SecurCore | Generic peripheral driver interfaces for middleware. Connects microcontroller peripherals with middleware that implements for example communication stacks, file systems, or graphic user interfaces.| +|[DSP](http://arm-software.github.io/CMSIS_5/DSP/html/index.html) | All Cortex-M | DSP library collection with over 60 Functions for various data types: fixed-point (fractional q7, q15, q31) and single precision floating-point (32-bit). Implementations optimized for the SIMD instruction set are available for Cortex-M4/M7/M33/M35P.| +|[NN](http://arm-software.github.io/CMSIS_5/NN/html/index.html) | All Cortex-M | Collection of efficient neural network kernels developed to maximize the performance and minimize the memory footprint on Cortex-M processor cores.| +|[RTOS v1](http://arm-software.github.io/CMSIS_5/RTOS/html/index.html) | Cortex-M0/M0+/M3/M4/M7 | Common API for real-time operating systems along with a reference implementation based on RTX. It enables software components that can work across multiple RTOS systems.| +|[RTOS v2](http://arm-software.github.io/CMSIS_5/RTOS2/html/index.html)| All Cortex-M, Cortex-A5/A7/A9 | Extends CMSIS-RTOS v1 with Armv8-M support, dynamic object creation, provisions for multi-core systems, binary compatible interface. | +|[Pack](http://arm-software.github.io/CMSIS_5/Pack/html/index.html) | All Cortex-M, SecurCore, Cortex-A5/A7/A9 | Describes a delivery mechanism for software components, device parameters, and evaluation board support. It simplifies software re-use and product life-cycle management (PLM). | +|[SVD](http://arm-software.github.io/CMSIS_5/SVD/html/index.html) | All Cortex-M, SecurCore | Peripheral description of a device that can be used to create peripheral awareness in debuggers or CMSIS-Core header files.| +|[DAP](http://arm-software.github.io/CMSIS_5/DAP/html/index.html) | All Cortex | Firmware for a debug unit that interfaces to the CoreSight Debug Access Port. | +|[Zone](http://arm-software.github.io/CMSIS_5/Zone/html/index.html) | All Cortex | Defines methods to describe system resources and to partition these resources into multiple projects and execution areas. | + +## Implemented Enhancements + - CMSIS-Core-A, RTX5: implementation for Cortex-A5/A7/A9 + - Support for Armv8-M Architecture (Mainline and Baseline) as well as devices Cortex-M23 and Cortex-M33 + - CMSIS-RTOS2: RTX 5 is now available for IAR, GCC, Arm Compiler 5, Arm Compiler 6 + - CMSIS-RTOS2: FreeRTOS adoption (release) is available https://github.com/ARM-software/CMSIS-FreeRTOS + - CMSIS-NN: Bare metal Neural Network function library. + - CMSIS-DAP v2: with WinUSB for faster communication and separate pipe for SWO support + - Config Wizard extension: access enum’s for configuration information + +## Further Planned Enhancements + - CMSIS-Zone: management of complex system + - CMSIS-Pack: + - System Description SDF Format: describe more complex debug topologies than with a Debug Description in a tool agnostic way + - Github based workflow: allows to develop software packs using github infra-structure + - Flash algorithm via debugger: Some TurstZone enable devices cannot execute RAM. Commands that allow flash programming will be added to Debug Description. + - CPDSC project file format: allows project templates that are agnostic of an IDE + - Minimize need for IDE specific settings: CMSIS-Pack supports IDE specific parameters. Analyze and minimize + +For further details see also the [Slides of the Embedded World CMSIS Partner Meeting](https://github.com/ARM-software/CMSIS_5/blob/develop/CMSIS_EW2019.pdf). + +## Other related GitHub repositories + +| Repository | Description | +|:--------------------------- |:--------------------------------------------------------- | +| [cmsis-pack-eclipse](https://github.com/ARM-software/cmsis-pack-eclipse) | CMSIS-Pack Management for Eclipse reference implementation Pack support | +| [CMSIS-FreeRTOS](https://github.com/arm-software/CMSIS-FreeRTOS) | CMSIS-RTOS adoption of FreeRTOS | +| [CMSIS-Driver](https://github.com/arm-software/CMSIS-Driver) | Generic MCU driver implementations and templates for Ethernet MAC/PHY and Flash. | +| [CMSIS-Driver_Validation](https://github.com/ARM-software/CMSIS-Driver_Validation) | CMSIS-Driver Validation can be used to verify CMSIS-Driver in a user system | +| [CMSIS-Zone](https://github.com/ARM-software/CMSIS-Zone) | CMSIS-Zone Utility along with example projects and FreeMarker templates | +| [NXP_LPC](https://github.com/ARM-software/NXP_LPC) | CMSIS Driver Implementations for the NXP LPC Microcontroller Series | +| [mdk-packs](https://github.com/mdk-packs) | IoT cloud connectors as trail implementations for MDK (help us to make it generic)| +| [trustedfirmware.org](https://www.trustedfirmware.org/) | Arm Trusted Firmware provides a reference implementation of secure world software for Armv8-A and Armv8-M.| + + +## Directory Structure + +| Directory | Content | +|:-------------------- |:--------------------------------------------------------- | +| CMSIS/Core | CMSIS-Core(M) related files (for release) | +| CMSIS/Core_A | CMSIS-Core(A) related files (for release) | +| CMSIS/CoreValidation | Validation for Core(M) and Core(A) (NOT part of release) | +| CMSIS/DAP | CMSIS-DAP related files and examples | +| CMSIS/Driver | CMSIS-Driver API headers and template files | +| CMSIS/DSP | CMSIS-DSP related files | +| CMSIS/NN | CMSIS-NN related files | +| CMSIS/RTOS | RTOS v1 related files (for Cortex-M) | +| CMSIS/RTOS2 | RTOS v2 related files (for Cortex-M & Armv8-M) | +| CMSIS/Pack | CMSIS-Pack examples and tutorials | +| CMSIS/DoxyGen | Source of the documentation | +| CMSIS/Utilities | Utility programs | + +## Generate CMSIS Pack for Release + +This GitHub development repository contains already pre-built libraries (stored in Git-LFS) of various software components (DSP, RTOS, RTOS2). +These libraries are validated for release. Git-LFS needs to be installed to retrieve the actual binary files, please see https://git-lfs.github.com/. + +To build a complete CMSIS pack for installation the following additional tools are required: + - **doxygen.exe** Version: 1.8.6 (Documentation Generator) + - **mscgen.exe** Version: 0.20 (Message Sequence Chart Converter) + - **7z.exe (7-Zip)** Version: 16.02 (File Archiver) + +Using these tools, you can generate on a Windows PC: + - **CMSIS Software Pack** using the batch file **gen_pack.bat** (located in ./CMSIS/Utilities). This batch file also generates the documentation. + + - **CMSIS Documentation** using the batch file **genDoc.bat** (located in ./CMSIS/Doxygen). + +The file ./CMSIS/DoxyGen/How2Doc.txt describes the rules for creating API documentation. + +## License + +Arm CMSIS is licensed under Apache-2.0. + +## Contributions and Pull Requests + +Contributions are accepted under Apache-2.0. Only submit contributions where you have authored all of the code. + +### Issues and Labels + +Please feel free to raise an [issue on GitHub](https://github.com/ARM-software/CMSIS_5/issues) +to report misbehavior (i.e. bugs) or start discussions about enhancements. This +is your best way to interact directly with the maintenance team and the community. +We encourage you to append implementation suggestions as this helps to decrease the +workload of the very limited maintenance team. + +We will be monitoring and responding to issues as best we can. +Please attempt to avoid filing duplicates of open or closed items when possible. +In the spirit of openness we will be tagging issues with the following: + +- **bug** – We consider this issue to be a bug that will be investigated. + +- **wontfix** - We appreciate this issue but decided not to change the current behavior. + +- **enhancement** – Denotes something that will be implemented soon. + +- **future** - Denotes something not yet schedule for implementation. + +- **out-of-scope** - We consider this issue loosely related to CMSIS. It might by implemented outside of CMSIS. Let us know about your work. + +- **question** – We have further questions to this issue. Please review and provide feedback. + +- **documentation** - This issue is a documentation flaw that will be improved in future. + +- **review** - This issue is under review. Please be patient. + +- **DONE** - We consider this issue as resolved - please review and close it. In case of no further activity this issues will be closed after a week. + +- **duplicate** - This issue is already addressed elsewhere, see comment with provided references. + +- **Important Information** - We provide essential informations regarding planned or resolved major enhancements. + diff --git a/libraries/AP_GyroFFT/CMSIS_5/include/arm_common_tables.h b/libraries/AP_GyroFFT/CMSIS_5/include/arm_common_tables.h new file mode 100644 index 0000000000..6a4337f7e8 --- /dev/null +++ b/libraries/AP_GyroFFT/CMSIS_5/include/arm_common_tables.h @@ -0,0 +1,378 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_common_tables.h + * Description: Extern declaration for common tables + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _ARM_COMMON_TABLES_H +#define _ARM_COMMON_TABLES_H + +#include "arm_math.h" + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREV_1024) + extern const uint16_t armBitRevTable[1024]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_16) + extern const float32_t twiddleCoef_16[32]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_32) + extern const float32_t twiddleCoef_32[64]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_64) + extern const float32_t twiddleCoef_64[128]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_128) + extern const float32_t twiddleCoef_128[256]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_256) + extern const float32_t twiddleCoef_256[512]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_512) + extern const float32_t twiddleCoef_512[1024]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_1024) + extern const float32_t twiddleCoef_1024[2048]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_2048) + extern const float32_t twiddleCoef_2048[4096]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_4096) + extern const float32_t twiddleCoef_4096[8192]; + #define twiddleCoef twiddleCoef_4096 + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_16) + extern const q31_t twiddleCoef_16_q31[24]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_32) + extern const q31_t twiddleCoef_32_q31[48]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_64) + extern const q31_t twiddleCoef_64_q31[96]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_128) + extern const q31_t twiddleCoef_128_q31[192]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_256) + extern const q31_t twiddleCoef_256_q31[384]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_512) + extern const q31_t twiddleCoef_512_q31[768]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_1024) + extern const q31_t twiddleCoef_1024_q31[1536]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_2048) + extern const q31_t twiddleCoef_2048_q31[3072]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_4096) + extern const q31_t twiddleCoef_4096_q31[6144]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_16) + extern const q15_t twiddleCoef_16_q15[24]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_32) + extern const q15_t twiddleCoef_32_q15[48]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_64) + extern const q15_t twiddleCoef_64_q15[96]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_128) + extern const q15_t twiddleCoef_128_q15[192]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_256) + extern const q15_t twiddleCoef_256_q15[384]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_512) + extern const q15_t twiddleCoef_512_q15[768]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_1024) + extern const q15_t twiddleCoef_1024_q15[1536]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_2048) + extern const q15_t twiddleCoef_2048_q15[3072]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_4096) + extern const q15_t twiddleCoef_4096_q15[6144]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_32) + extern const float32_t twiddleCoef_rfft_32[32]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_64) + extern const float32_t twiddleCoef_rfft_64[64]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_128) + extern const float32_t twiddleCoef_rfft_128[128]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_256) + extern const float32_t twiddleCoef_rfft_256[256]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_512) + extern const float32_t twiddleCoef_rfft_512[512]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_1024) + extern const float32_t twiddleCoef_rfft_1024[1024]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_2048) + extern const float32_t twiddleCoef_rfft_2048[2048]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_4096) + extern const float32_t twiddleCoef_rfft_4096[4096]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + /* floating-point bit reversal tables */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_16) + #define ARMBITREVINDEXTABLE_16_TABLE_LENGTH ((uint16_t)20) + extern const uint16_t armBitRevIndexTable16[ARMBITREVINDEXTABLE_16_TABLE_LENGTH]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_32) + #define ARMBITREVINDEXTABLE_32_TABLE_LENGTH ((uint16_t)48) + extern const uint16_t armBitRevIndexTable32[ARMBITREVINDEXTABLE_32_TABLE_LENGTH]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_64) + #define ARMBITREVINDEXTABLE_64_TABLE_LENGTH ((uint16_t)56) + extern const uint16_t armBitRevIndexTable64[ARMBITREVINDEXTABLE_64_TABLE_LENGTH]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_128) + #define ARMBITREVINDEXTABLE_128_TABLE_LENGTH ((uint16_t)208) + extern const uint16_t armBitRevIndexTable128[ARMBITREVINDEXTABLE_128_TABLE_LENGTH]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_256) + #define ARMBITREVINDEXTABLE_256_TABLE_LENGTH ((uint16_t)440) + extern const uint16_t armBitRevIndexTable256[ARMBITREVINDEXTABLE_256_TABLE_LENGTH]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_512) + #define ARMBITREVINDEXTABLE_512_TABLE_LENGTH ((uint16_t)448) + extern const uint16_t armBitRevIndexTable512[ARMBITREVINDEXTABLE_512_TABLE_LENGTH]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_1024) + #define ARMBITREVINDEXTABLE_1024_TABLE_LENGTH ((uint16_t)1800) + extern const uint16_t armBitRevIndexTable1024[ARMBITREVINDEXTABLE_1024_TABLE_LENGTH]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_2048) + #define ARMBITREVINDEXTABLE_2048_TABLE_LENGTH ((uint16_t)3808) + extern const uint16_t armBitRevIndexTable2048[ARMBITREVINDEXTABLE_2048_TABLE_LENGTH]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_4096) + #define ARMBITREVINDEXTABLE_4096_TABLE_LENGTH ((uint16_t)4032) + extern const uint16_t armBitRevIndexTable4096[ARMBITREVINDEXTABLE_4096_TABLE_LENGTH]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + + /* fixed-point bit reversal tables */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_16) + #define ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH ((uint16_t)12) + extern const uint16_t armBitRevIndexTable_fixed_16[ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_32) + #define ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH ((uint16_t)24) + extern const uint16_t armBitRevIndexTable_fixed_32[ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_64) + #define ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH ((uint16_t)56) + extern const uint16_t armBitRevIndexTable_fixed_64[ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_128) + #define ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH ((uint16_t)112) + extern const uint16_t armBitRevIndexTable_fixed_128[ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_256) + #define ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH ((uint16_t)240) + extern const uint16_t armBitRevIndexTable_fixed_256[ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_512) + #define ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH ((uint16_t)480) + extern const uint16_t armBitRevIndexTable_fixed_512[ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_1024) + #define ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH ((uint16_t)992) + extern const uint16_t armBitRevIndexTable_fixed_1024[ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_2048) + #define ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH ((uint16_t)1984) + extern const uint16_t armBitRevIndexTable_fixed_2048[ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_4096) + #define ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH ((uint16_t)4032) + extern const uint16_t armBitRevIndexTable_fixed_4096[ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_REALCOEF_F32) + extern const float32_t realCoefA[8192]; + extern const float32_t realCoefB[8192]; + #endif + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_REALCOEF_Q31) + extern const q31_t realCoefAQ31[8192]; + extern const q31_t realCoefBQ31[8192]; + #endif + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_REALCOEF_Q15) + extern const q15_t realCoefAQ15[8192]; + extern const q15_t realCoefBQ15[8192]; + #endif + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_128) + extern const float32_t Weights_128[256]; + extern const float32_t cos_factors_128[128]; + #endif + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_512) + extern const float32_t Weights_512[1024]; + extern const float32_t cos_factors_512[512]; + #endif + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_2048) + extern const float32_t Weights_2048[4096]; + extern const float32_t cos_factors_2048[2048]; + #endif + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_8192) + extern const float32_t Weights_8192[16384]; + extern const float32_t cos_factors_8192[8192]; + #endif + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_128) + extern const q15_t WeightsQ15_128[256]; + extern const q15_t cos_factorsQ15_128[128]; + #endif + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_512) + extern const q15_t WeightsQ15_512[1024]; + extern const q15_t cos_factorsQ15_512[512]; + #endif + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_2048) + extern const q15_t WeightsQ15_2048[4096]; + extern const q15_t cos_factorsQ15_2048[2048]; + #endif + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_8192) + extern const q15_t WeightsQ15_8192[16384]; + extern const q15_t cos_factorsQ15_8192[8192]; + #endif + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_128) + extern const q31_t WeightsQ31_128[256]; + extern const q31_t cos_factorsQ31_128[128]; + #endif + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_512) + extern const q31_t WeightsQ31_512[1024]; + extern const q31_t cos_factorsQ31_512[512]; + #endif + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_2048) + extern const q31_t WeightsQ31_2048[4096]; + extern const q31_t cos_factorsQ31_2048[2048]; + #endif + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_8192) + extern const q31_t WeightsQ31_8192[16384]; + extern const q31_t cos_factorsQ31_8192[8192]; + #endif + +#endif /* if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_TABLES) */ + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FAST_ALLOW_TABLES) + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_RECIP_Q15) + extern const q15_t armRecipTableQ15[64]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_RECIP_Q31) + extern const q31_t armRecipTableQ31[64]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */ + + /* Tables for Fast Math Sine and Cosine */ + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_F32) + extern const float32_t sinTable_f32[FAST_MATH_TABLE_SIZE + 1]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_Q31) + extern const q31_t sinTable_q31[FAST_MATH_TABLE_SIZE + 1]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */ + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_Q15) + extern const q15_t sinTable_q15[FAST_MATH_TABLE_SIZE + 1]; + #endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */ + +#endif /* if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FAST_TABLES) */ + +#endif /* ARM_COMMON_TABLES_H */ diff --git a/libraries/AP_GyroFFT/CMSIS_5/include/arm_const_structs.h b/libraries/AP_GyroFFT/CMSIS_5/include/arm_const_structs.h new file mode 100644 index 0000000000..80a3e8bbe7 --- /dev/null +++ b/libraries/AP_GyroFFT/CMSIS_5/include/arm_const_structs.h @@ -0,0 +1,66 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_const_structs.h + * Description: Constant structs that are initialized for user convenience. + * For example, some can be given as arguments to the arm_cfft_f32() function. + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _ARM_CONST_STRUCTS_H +#define _ARM_CONST_STRUCTS_H + +#include "arm_math.h" +#include "arm_common_tables.h" + + extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len16; + extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len32; + extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len64; + extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len128; + extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len256; + extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len512; + extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len1024; + extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len2048; + extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len4096; + + extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len16; + extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len32; + extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len64; + extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len128; + extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len256; + extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len512; + extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len1024; + extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len2048; + extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len4096; + + extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len16; + extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len32; + extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len64; + extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len128; + extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len256; + extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len512; + extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len1024; + extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len2048; + extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len4096; + +#endif diff --git a/libraries/AP_GyroFFT/CMSIS_5/include/arm_math.h b/libraries/AP_GyroFFT/CMSIS_5/include/arm_math.h new file mode 100644 index 0000000000..eb37f8223c --- /dev/null +++ b/libraries/AP_GyroFFT/CMSIS_5/include/arm_math.h @@ -0,0 +1,7361 @@ +/****************************************************************************** + * @file arm_math.h + * @brief Public header file for CMSIS DSP Library + * @version V1.6.0 + * @date 18. March 2019 + ******************************************************************************/ +/* + * Copyright (c) 2010-2019 Arm Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + \mainpage CMSIS DSP Software Library + * + * Introduction + * ------------ + * + * This user manual describes the CMSIS DSP software library, + * a suite of common signal processing functions for use on Cortex-M processor based devices. + * + * The library is divided into a number of functions each covering a specific category: + * - Basic math functions + * - Fast math functions + * - Complex math functions + * - Filters + * - Matrix functions + * - Transform functions + * - Motor control functions + * - Statistical functions + * - Support functions + * - Interpolation functions + * + * The library has separate functions for operating on 8-bit integers, 16-bit integers, + * 32-bit integer and 32-bit floating-point values. + * + * Using the Library + * ------------ + * + * The library installer contains prebuilt versions of the libraries in the Lib folder. + * - arm_cortexM7lfdp_math.lib (Cortex-M7, Little endian, Double Precision Floating Point Unit) + * - arm_cortexM7bfdp_math.lib (Cortex-M7, Big endian, Double Precision Floating Point Unit) + * - arm_cortexM7lfsp_math.lib (Cortex-M7, Little endian, Single Precision Floating Point Unit) + * - arm_cortexM7bfsp_math.lib (Cortex-M7, Big endian and Single Precision Floating Point Unit on) + * - arm_cortexM7l_math.lib (Cortex-M7, Little endian) + * - arm_cortexM7b_math.lib (Cortex-M7, Big endian) + * - arm_cortexM4lf_math.lib (Cortex-M4, Little endian, Floating Point Unit) + * - arm_cortexM4bf_math.lib (Cortex-M4, Big endian, Floating Point Unit) + * - arm_cortexM4l_math.lib (Cortex-M4, Little endian) + * - arm_cortexM4b_math.lib (Cortex-M4, Big endian) + * - arm_cortexM3l_math.lib (Cortex-M3, Little endian) + * - arm_cortexM3b_math.lib (Cortex-M3, Big endian) + * - arm_cortexM0l_math.lib (Cortex-M0 / Cortex-M0+, Little endian) + * - arm_cortexM0b_math.lib (Cortex-M0 / Cortex-M0+, Big endian) + * - arm_ARMv8MBLl_math.lib (Armv8-M Baseline, Little endian) + * - arm_ARMv8MMLl_math.lib (Armv8-M Mainline, Little endian) + * - arm_ARMv8MMLlfsp_math.lib (Armv8-M Mainline, Little endian, Single Precision Floating Point Unit) + * - arm_ARMv8MMLld_math.lib (Armv8-M Mainline, Little endian, DSP instructions) + * - arm_ARMv8MMLldfsp_math.lib (Armv8-M Mainline, Little endian, DSP instructions, Single Precision Floating Point Unit) + * + * The library functions are declared in the public file arm_math.h which is placed in the Include folder. + * Simply include this file and link the appropriate library in the application and begin calling the library functions. The Library supports single + * public header file arm_math.h for Cortex-M cores with little endian and big endian. Same header file will be used for floating point unit(FPU) variants. + * + * + * Examples + * -------- + * + * The library ships with a number of examples which demonstrate how to use the library functions. + * + * Toolchain Support + * ------------ + * + * The library has been developed and tested with MDK version 5.14.0.0 + * The library is being tested in GCC and IAR toolchains and updates on this activity will be made available shortly. + * + * Building the Library + * ------------ + * + * The library installer contains a project file to rebuild libraries on MDK toolchain in the CMSIS\\DSP\\Projects\\ARM folder. + * - arm_cortexM_math.uvprojx + * + * + * The libraries can be built by opening the arm_cortexM_math.uvprojx project in MDK-ARM, selecting a specific target, and defining the optional preprocessor macros detailed above. + * + * Preprocessor Macros + * ------------ + * + * Each library project have different preprocessor macros. + * + * - ARM_MATH_BIG_ENDIAN: + * + * Define macro ARM_MATH_BIG_ENDIAN to build the library for big endian targets. By default library builds for little endian targets. + * + * - ARM_MATH_MATRIX_CHECK: + * + * Define macro ARM_MATH_MATRIX_CHECK for checking on the input and output sizes of matrices + * + * - ARM_MATH_ROUNDING: + * + * Define macro ARM_MATH_ROUNDING for rounding on support functions + * + * - ARM_MATH_LOOPUNROLL: + * + * Define macro ARM_MATH_LOOPUNROLL to enable manual loop unrolling in DSP functions + * + * - ARM_MATH_NEON: + * + * Define macro ARM_MATH_NEON to enable Neon versions of the DSP functions. + * It is not enabled by default when Neon is available because performances are + * dependent on the compiler and target architecture. + * + * - ARM_MATH_NEON_EXPERIMENTAL: + * + * Define macro ARM_MATH_NEON_EXPERIMENTAL to enable experimental Neon versions of + * of some DSP functions. Experimental Neon versions currently do not have better + * performances than the scalar versions. + * + *
+ * CMSIS-DSP in ARM::CMSIS Pack + * ----------------------------- + * + * The following files relevant to CMSIS-DSP are present in the ARM::CMSIS Pack directories: + * |File/Folder |Content | + * |---------------------------------|------------------------------------------------------------------------| + * |\b CMSIS\\Documentation\\DSP | This documentation | + * |\b CMSIS\\DSP\\DSP_Lib_TestSuite | DSP_Lib test suite | + * |\b CMSIS\\DSP\\Examples | Example projects demonstrating the usage of the library functions | + * |\b CMSIS\\DSP\\Include | DSP_Lib include files | + * |\b CMSIS\\DSP\\Lib | DSP_Lib binaries | + * |\b CMSIS\\DSP\\Projects | Projects to rebuild DSP_Lib binaries | + * |\b CMSIS\\DSP\\Source | DSP_Lib source files | + * + *
+ * Revision History of CMSIS-DSP + * ------------ + * Please refer to \ref ChangeLog_pg. + */ + + +/** + * @defgroup groupMath Basic Math Functions + */ + +/** + * @defgroup groupFastMath Fast Math Functions + * This set of functions provides a fast approximation to sine, cosine, and square root. + * As compared to most of the other functions in the CMSIS math library, the fast math functions + * operate on individual values and not arrays. + * There are separate functions for Q15, Q31, and floating-point data. + * + */ + +/** + * @defgroup groupCmplxMath Complex Math Functions + * This set of functions operates on complex data vectors. + * The data in the complex arrays is stored in an interleaved fashion + * (real, imag, real, imag, ...). + * In the API functions, the number of samples in a complex array refers + * to the number of complex values; the array contains twice this number of + * real values. + */ + +/** + * @defgroup groupFilters Filtering Functions + */ + +/** + * @defgroup groupMatrix Matrix Functions + * + * This set of functions provides basic matrix math operations. + * The functions operate on matrix data structures. For example, + * the type + * definition for the floating-point matrix structure is shown + * below: + *
+ *     typedef struct
+ *     {
+ *       uint16_t numRows;     // number of rows of the matrix.
+ *       uint16_t numCols;     // number of columns of the matrix.
+ *       float32_t *pData;     // points to the data of the matrix.
+ *     } arm_matrix_instance_f32;
+ * 
+ * There are similar definitions for Q15 and Q31 data types. + * + * The structure specifies the size of the matrix and then points to + * an array of data. The array is of size numRows X numCols + * and the values are arranged in row order. That is, the + * matrix element (i, j) is stored at: + *
+ *     pData[i*numCols + j]
+ * 
+ * + * \par Init Functions + * There is an associated initialization function for each type of matrix + * data structure. + * The initialization function sets the values of the internal structure fields. + * Refer to \ref arm_mat_init_f32(), \ref arm_mat_init_q31() and \ref arm_mat_init_q15() + * for floating-point, Q31 and Q15 types, respectively. + * + * \par + * Use of the initialization function is optional. However, if initialization function is used + * then the instance structure cannot be placed into a const data section. + * To place the instance structure in a const data + * section, manually initialize the data structure. For example: + *
+ * arm_matrix_instance_f32 S = {nRows, nColumns, pData};
+ * arm_matrix_instance_q31 S = {nRows, nColumns, pData};
+ * arm_matrix_instance_q15 S = {nRows, nColumns, pData};
+ * 
+ * where nRows specifies the number of rows, nColumns + * specifies the number of columns, and pData points to the + * data array. + * + * \par Size Checking + * By default all of the matrix functions perform size checking on the input and + * output matrices. For example, the matrix addition function verifies that the + * two input matrices and the output matrix all have the same number of rows and + * columns. If the size check fails the functions return: + *
+ *     ARM_MATH_SIZE_MISMATCH
+ * 
+ * Otherwise the functions return + *
+ *     ARM_MATH_SUCCESS
+ * 
+ * There is some overhead associated with this matrix size checking. + * The matrix size checking is enabled via the \#define + *
+ *     ARM_MATH_MATRIX_CHECK
+ * 
+ * within the library project settings. By default this macro is defined + * and size checking is enabled. By changing the project settings and + * undefining this macro size checking is eliminated and the functions + * run a bit faster. With size checking disabled the functions always + * return ARM_MATH_SUCCESS. + */ + +/** + * @defgroup groupTransforms Transform Functions + */ + +/** + * @defgroup groupController Controller Functions + */ + +/** + * @defgroup groupStats Statistics Functions + */ + +/** + * @defgroup groupSupport Support Functions + */ + +/** + * @defgroup groupInterpolation Interpolation Functions + * These functions perform 1- and 2-dimensional interpolation of data. + * Linear interpolation is used for 1-dimensional data and + * bilinear interpolation is used for 2-dimensional data. + */ + +/** + * @defgroup groupExamples Examples + */ + + +#ifndef _ARM_MATH_H +#define _ARM_MATH_H + +/* Compiler specific diagnostic adjustment */ +#if defined ( __CC_ARM ) + +#elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 ) + +#elif defined ( __GNUC__ ) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wsign-conversion" + #pragma GCC diagnostic ignored "-Wconversion" + #pragma GCC diagnostic ignored "-Wunused-parameter" + +#elif defined ( __ICCARM__ ) + +#elif defined ( __TI_ARM__ ) + +#elif defined ( __CSMC__ ) + +#elif defined ( __TASKING__ ) + +#elif defined ( _MSC_VER ) + +#else + #error Unknown compiler +#endif + + +/* Included for instrinsics definitions */ +#if !defined ( _MSC_VER ) +#include "cmsis_compiler.h" +#else +#include +#define __STATIC_FORCEINLINE static __forceinline +#define __ALIGNED(x) __declspec(align(x)) +#define LOW_OPTIMIZATION_ENTER +#define LOW_OPTIMIZATION_EXIT +#define IAR_ONLY_LOW_OPTIMIZATION_ENTER +#define IAR_ONLY_LOW_OPTIMIZATION_EXIT +#endif + +#include "string.h" +#include "math.h" +#include "float.h" + +/* evaluate ARM DSP feature */ +#if (defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1)) + #define ARM_MATH_DSP 1 +#endif + +#if defined(__ARM_NEON) +#include +#endif + + +#ifdef __cplusplus +extern "C" +{ +#endif + + + /** + * @brief Macros required for reciprocal calculation in Normalized LMS + */ + +#define DELTA_Q31 (0x100) +#define DELTA_Q15 0x5 +#define INDEX_MASK 0x0000003F +#ifndef PI + #define PI 3.14159265358979f +#endif + + /** + * @brief Macros required for SINE and COSINE Fast math approximations + */ + +#define FAST_MATH_TABLE_SIZE 512 +#define FAST_MATH_Q31_SHIFT (32 - 10) +#define FAST_MATH_Q15_SHIFT (16 - 10) +#define CONTROLLER_Q31_SHIFT (32 - 9) +#define TABLE_SPACING_Q31 0x400000 +#define TABLE_SPACING_Q15 0x80 + + /** + * @brief Macros required for SINE and COSINE Controller functions + */ + /* 1.31(q31) Fixed value of 2/360 */ + /* -1 to +1 is divided into 360 values so total spacing is (2/360) */ +#define INPUT_SPACING 0xB60B61 + + + /** + * @brief Error status returned by some functions in the library. + */ + + typedef enum + { + ARM_MATH_SUCCESS = 0, /**< No error */ + ARM_MATH_ARGUMENT_ERROR = -1, /**< One or more arguments are incorrect */ + ARM_MATH_LENGTH_ERROR = -2, /**< Length of data buffer is incorrect */ + ARM_MATH_SIZE_MISMATCH = -3, /**< Size of matrices is not compatible with the operation */ + ARM_MATH_NANINF = -4, /**< Not-a-number (NaN) or infinity is generated */ + ARM_MATH_SINGULAR = -5, /**< Input matrix is singular and cannot be inverted */ + ARM_MATH_TEST_FAILURE = -6 /**< Test Failed */ + } arm_status; + + /** + * @brief 8-bit fractional data type in 1.7 format. + */ + typedef int8_t q7_t; + + /** + * @brief 16-bit fractional data type in 1.15 format. + */ + typedef int16_t q15_t; + + /** + * @brief 32-bit fractional data type in 1.31 format. + */ + typedef int32_t q31_t; + + /** + * @brief 64-bit fractional data type in 1.63 format. + */ + typedef int64_t q63_t; + + /** + * @brief 32-bit floating-point type definition. + */ + typedef float float32_t; + + /** + * @brief 64-bit floating-point type definition. + */ + typedef double float64_t; + + +/** + @brief definition to read/write two 16 bit values. + @deprecated + */ +#if defined ( __CC_ARM ) + #define __SIMD32_TYPE int32_t __packed +#elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 ) + #define __SIMD32_TYPE int32_t +#elif defined ( __GNUC__ ) + #define __SIMD32_TYPE int32_t +#elif defined ( __ICCARM__ ) + #define __SIMD32_TYPE int32_t __packed +#elif defined ( __TI_ARM__ ) + #define __SIMD32_TYPE int32_t +#elif defined ( __CSMC__ ) + #define __SIMD32_TYPE int32_t +#elif defined ( __TASKING__ ) + #define __SIMD32_TYPE __un(aligned) int32_t +#elif defined(_MSC_VER ) + #define __SIMD32_TYPE int32_t +#else + #error Unknown compiler +#endif + +#define __SIMD32(addr) (*(__SIMD32_TYPE **) & (addr)) +#define __SIMD32_CONST(addr) ( (__SIMD32_TYPE * ) (addr)) +#define _SIMD32_OFFSET(addr) (*(__SIMD32_TYPE * ) (addr)) +#define __SIMD64(addr) (*( int64_t **) & (addr)) + +/* SIMD replacement */ + + +/** + @brief Read 2 Q15 from Q15 pointer. + @param[in] pQ15 points to input value + @return Q31 value + */ +__STATIC_FORCEINLINE q31_t read_q15x2 ( + q15_t * pQ15) +{ + q31_t val; + + memcpy (&val, pQ15, 4); + + return (val); +} + +/** + @brief Read 2 Q15 from Q15 pointer and increment pointer afterwards. + @param[in] pQ15 points to input value + @return Q31 value + */ +__STATIC_FORCEINLINE q31_t read_q15x2_ia ( + q15_t ** pQ15) +{ + q31_t val; + + memcpy (&val, *pQ15, 4); + *pQ15 += 2; + + return (val); +} + +/** + @brief Read 2 Q15 from Q15 pointer and decrement pointer afterwards. + @param[in] pQ15 points to input value + @return Q31 value + */ +__STATIC_FORCEINLINE q31_t read_q15x2_da ( + q15_t ** pQ15) +{ + q31_t val; + + memcpy (&val, *pQ15, 4); + *pQ15 -= 2; + + return (val); +} + +/** + @brief Write 2 Q15 to Q15 pointer and increment pointer afterwards. + @param[in] pQ15 points to input value + @param[in] value Q31 value + @return none + */ +__STATIC_FORCEINLINE void write_q15x2_ia ( + q15_t ** pQ15, + q31_t value) +{ + q31_t val = value; + + memcpy (*pQ15, &val, 4); + *pQ15 += 2; +} + +/** + @brief Write 2 Q15 to Q15 pointer. + @param[in] pQ15 points to input value + @param[in] value Q31 value + @return none + */ +__STATIC_FORCEINLINE void write_q15x2 ( + q15_t * pQ15, + q31_t value) +{ + q31_t val = value; + + memcpy (pQ15, &val, 4); +} + + +/** + @brief Read 4 Q7 from Q7 pointer and increment pointer afterwards. + @param[in] pQ7 points to input value + @return Q31 value + */ +__STATIC_FORCEINLINE q31_t read_q7x4_ia ( + q7_t ** pQ7) +{ + q31_t val; + + memcpy (&val, *pQ7, 4); + *pQ7 += 4; + + return (val); +} + +/** + @brief Read 4 Q7 from Q7 pointer and decrement pointer afterwards. + @param[in] pQ7 points to input value + @return Q31 value + */ +__STATIC_FORCEINLINE q31_t read_q7x4_da ( + q7_t ** pQ7) +{ + q31_t val; + + memcpy (&val, *pQ7, 4); + *pQ7 -= 4; + + return (val); +} + +/** + @brief Write 4 Q7 to Q7 pointer and increment pointer afterwards. + @param[in] pQ7 points to input value + @param[in] value Q31 value + @return none + */ +__STATIC_FORCEINLINE void write_q7x4_ia ( + q7_t ** pQ7, + q31_t value) +{ + q31_t val = value; + + memcpy (*pQ7, &val, 4); + *pQ7 += 4; +} + +/* + +Normally those kind of definitions are in a compiler file +in Core or Core_A. + +But for MSVC compiler it is a bit special. The goal is very specific +to CMSIS-DSP and only to allow the use of this library from other +systems like Python or Matlab. + +MSVC is not going to be used to cross-compile to ARM. So, having a MSVC +compiler file in Core or Core_A would not make sense. + +*/ +#if defined ( _MSC_VER ) + __STATIC_FORCEINLINE uint8_t __CLZ(uint32_t data) + { + if (data == 0U) { return 32U; } + + uint32_t count = 0U; + uint32_t mask = 0x80000000U; + + while ((data & mask) == 0U) + { + count += 1U; + mask = mask >> 1U; + } + return count; + } + + __STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat) + { + if ((sat >= 1U) && (sat <= 32U)) + { + const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U); + const int32_t min = -1 - max ; + if (val > max) + { + return max; + } + else if (val < min) + { + return min; + } + } + return val; + } + + __STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat) + { + if (sat <= 31U) + { + const uint32_t max = ((1U << sat) - 1U); + if (val > (int32_t)max) + { + return max; + } + else if (val < 0) + { + return 0U; + } + } + return (uint32_t)val; + } +#endif + +#ifndef ARM_MATH_DSP + /** + * @brief definition to pack two 16 bit values. + */ + #define __PKHBT(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) << 0) & (int32_t)0x0000FFFF) | \ + (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000) ) + #define __PKHTB(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) << 0) & (int32_t)0xFFFF0000) | \ + (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF) ) +#endif + + /** + * @brief definition to pack four 8 bit values. + */ +#ifndef ARM_MATH_BIG_ENDIAN + #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) << 0) & (int32_t)0x000000FF) | \ + (((int32_t)(v1) << 8) & (int32_t)0x0000FF00) | \ + (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | \ + (((int32_t)(v3) << 24) & (int32_t)0xFF000000) ) +#else + #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v3) << 0) & (int32_t)0x000000FF) | \ + (((int32_t)(v2) << 8) & (int32_t)0x0000FF00) | \ + (((int32_t)(v1) << 16) & (int32_t)0x00FF0000) | \ + (((int32_t)(v0) << 24) & (int32_t)0xFF000000) ) +#endif + + + /** + * @brief Clips Q63 to Q31 values. + */ + __STATIC_FORCEINLINE q31_t clip_q63_to_q31( + q63_t x) + { + return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ? + ((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x; + } + + /** + * @brief Clips Q63 to Q15 values. + */ + __STATIC_FORCEINLINE q15_t clip_q63_to_q15( + q63_t x) + { + return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ? + ((0x7FFF ^ ((q15_t) (x >> 63)))) : (q15_t) (x >> 15); + } + + /** + * @brief Clips Q31 to Q7 values. + */ + __STATIC_FORCEINLINE q7_t clip_q31_to_q7( + q31_t x) + { + return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ? + ((0x7F ^ ((q7_t) (x >> 31)))) : (q7_t) x; + } + + /** + * @brief Clips Q31 to Q15 values. + */ + __STATIC_FORCEINLINE q15_t clip_q31_to_q15( + q31_t x) + { + return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ? + ((0x7FFF ^ ((q15_t) (x >> 31)))) : (q15_t) x; + } + + /** + * @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format. + */ + __STATIC_FORCEINLINE q63_t mult32x64( + q63_t x, + q31_t y) + { + return ((((q63_t) (x & 0x00000000FFFFFFFF) * y) >> 32) + + (((q63_t) (x >> 32) * y) ) ); + } + + /** + * @brief Function to Calculates 1/in (reciprocal) value of Q31 Data type. + */ + __STATIC_FORCEINLINE uint32_t arm_recip_q31( + q31_t in, + q31_t * dst, + const q31_t * pRecipTable) + { + q31_t out; + uint32_t tempVal; + uint32_t index, i; + uint32_t signBits; + + if (in > 0) + { + signBits = ((uint32_t) (__CLZ( in) - 1)); + } + else + { + signBits = ((uint32_t) (__CLZ(-in) - 1)); + } + + /* Convert input sample to 1.31 format */ + in = (in << signBits); + + /* calculation of index for initial approximated Val */ + index = (uint32_t)(in >> 24); + index = (index & INDEX_MASK); + + /* 1.31 with exp 1 */ + out = pRecipTable[index]; + + /* calculation of reciprocal value */ + /* running approximation for two iterations */ + for (i = 0U; i < 2U; i++) + { + tempVal = (uint32_t) (((q63_t) in * out) >> 31); + tempVal = 0x7FFFFFFFu - tempVal; + /* 1.31 with exp 1 */ + /* out = (q31_t) (((q63_t) out * tempVal) >> 30); */ + out = clip_q63_to_q31(((q63_t) out * tempVal) >> 30); + } + + /* write output */ + *dst = out; + + /* return num of signbits of out = 1/in value */ + return (signBits + 1U); + } + + + /** + * @brief Function to Calculates 1/in (reciprocal) value of Q15 Data type. + */ + __STATIC_FORCEINLINE uint32_t arm_recip_q15( + q15_t in, + q15_t * dst, + const q15_t * pRecipTable) + { + q15_t out = 0; + uint32_t tempVal = 0; + uint32_t index = 0, i = 0; + uint32_t signBits = 0; + + if (in > 0) + { + signBits = ((uint32_t)(__CLZ( in) - 17)); + } + else + { + signBits = ((uint32_t)(__CLZ(-in) - 17)); + } + + /* Convert input sample to 1.15 format */ + in = (in << signBits); + + /* calculation of index for initial approximated Val */ + index = (uint32_t)(in >> 8); + index = (index & INDEX_MASK); + + /* 1.15 with exp 1 */ + out = pRecipTable[index]; + + /* calculation of reciprocal value */ + /* running approximation for two iterations */ + for (i = 0U; i < 2U; i++) + { + tempVal = (uint32_t) (((q31_t) in * out) >> 15); + tempVal = 0x7FFFu - tempVal; + /* 1.15 with exp 1 */ + out = (q15_t) (((q31_t) out * tempVal) >> 14); + /* out = clip_q31_to_q15(((q31_t) out * tempVal) >> 14); */ + } + + /* write output */ + *dst = out; + + /* return num of signbits of out = 1/in value */ + return (signBits + 1); + } + +#if defined(ARM_MATH_NEON) + +static inline float32x4_t __arm_vec_sqrt_f32_neon(float32x4_t x) +{ + float32x4_t x1 = vmaxq_f32(x, vdupq_n_f32(FLT_MIN)); + float32x4_t e = vrsqrteq_f32(x1); + e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x1, e), e), e); + e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x1, e), e), e); + return vmulq_f32(x, e); +} + +static inline int16x8_t __arm_vec_sqrt_q15_neon(int16x8_t vec) +{ + float32x4_t tempF; + int32x4_t tempHI,tempLO; + + tempLO = vmovl_s16(vget_low_s16(vec)); + tempF = vcvtq_n_f32_s32(tempLO,15); + tempF = __arm_vec_sqrt_f32_neon(tempF); + tempLO = vcvtq_n_s32_f32(tempF,15); + + tempHI = vmovl_s16(vget_high_s16(vec)); + tempF = vcvtq_n_f32_s32(tempHI,15); + tempF = __arm_vec_sqrt_f32_neon(tempF); + tempHI = vcvtq_n_s32_f32(tempF,15); + + return(vcombine_s16(vqmovn_s32(tempLO),vqmovn_s32(tempHI))); +} + +static inline int32x4_t __arm_vec_sqrt_q31_neon(int32x4_t vec) +{ + float32x4_t temp; + + temp = vcvtq_n_f32_s32(vec,31); + temp = __arm_vec_sqrt_f32_neon(temp); + return(vcvtq_n_s32_f32(temp,31)); +} + +#endif + +/* + * @brief C custom defined intrinsic functions + */ +#if !defined (ARM_MATH_DSP) + + /* + * @brief C custom defined QADD8 + */ + __STATIC_FORCEINLINE uint32_t __QADD8( + uint32_t x, + uint32_t y) + { + q31_t r, s, t, u; + + r = __SSAT(((((q31_t)x << 24) >> 24) + (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF; + s = __SSAT(((((q31_t)x << 16) >> 24) + (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF; + t = __SSAT(((((q31_t)x << 8) >> 24) + (((q31_t)y << 8) >> 24)), 8) & (int32_t)0x000000FF; + u = __SSAT(((((q31_t)x ) >> 24) + (((q31_t)y ) >> 24)), 8) & (int32_t)0x000000FF; + + return ((uint32_t)((u << 24) | (t << 16) | (s << 8) | (r ))); + } + + + /* + * @brief C custom defined QSUB8 + */ + __STATIC_FORCEINLINE uint32_t __QSUB8( + uint32_t x, + uint32_t y) + { + q31_t r, s, t, u; + + r = __SSAT(((((q31_t)x << 24) >> 24) - (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF; + s = __SSAT(((((q31_t)x << 16) >> 24) - (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF; + t = __SSAT(((((q31_t)x << 8) >> 24) - (((q31_t)y << 8) >> 24)), 8) & (int32_t)0x000000FF; + u = __SSAT(((((q31_t)x ) >> 24) - (((q31_t)y ) >> 24)), 8) & (int32_t)0x000000FF; + + return ((uint32_t)((u << 24) | (t << 16) | (s << 8) | (r ))); + } + + + /* + * @brief C custom defined QADD16 + */ + __STATIC_FORCEINLINE uint32_t __QADD16( + uint32_t x, + uint32_t y) + { +/* q31_t r, s; without initialisation 'arm_offset_q15 test' fails but 'intrinsic' tests pass! for armCC */ + q31_t r = 0, s = 0; + + r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; + s = __SSAT(((((q31_t)x ) >> 16) + (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF; + + return ((uint32_t)((s << 16) | (r ))); + } + + + /* + * @brief C custom defined SHADD16 + */ + __STATIC_FORCEINLINE uint32_t __SHADD16( + uint32_t x, + uint32_t y) + { + q31_t r, s; + + r = (((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF; + s = (((((q31_t)x ) >> 16) + (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF; + + return ((uint32_t)((s << 16) | (r ))); + } + + + /* + * @brief C custom defined QSUB16 + */ + __STATIC_FORCEINLINE uint32_t __QSUB16( + uint32_t x, + uint32_t y) + { + q31_t r, s; + + r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; + s = __SSAT(((((q31_t)x ) >> 16) - (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF; + + return ((uint32_t)((s << 16) | (r ))); + } + + + /* + * @brief C custom defined SHSUB16 + */ + __STATIC_FORCEINLINE uint32_t __SHSUB16( + uint32_t x, + uint32_t y) + { + q31_t r, s; + + r = (((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF; + s = (((((q31_t)x ) >> 16) - (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF; + + return ((uint32_t)((s << 16) | (r ))); + } + + + /* + * @brief C custom defined QASX + */ + __STATIC_FORCEINLINE uint32_t __QASX( + uint32_t x, + uint32_t y) + { + q31_t r, s; + + r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF; + s = __SSAT(((((q31_t)x ) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; + + return ((uint32_t)((s << 16) | (r ))); + } + + + /* + * @brief C custom defined SHASX + */ + __STATIC_FORCEINLINE uint32_t __SHASX( + uint32_t x, + uint32_t y) + { + q31_t r, s; + + r = (((((q31_t)x << 16) >> 16) - (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF; + s = (((((q31_t)x ) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF; + + return ((uint32_t)((s << 16) | (r ))); + } + + + /* + * @brief C custom defined QSAX + */ + __STATIC_FORCEINLINE uint32_t __QSAX( + uint32_t x, + uint32_t y) + { + q31_t r, s; + + r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF; + s = __SSAT(((((q31_t)x ) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; + + return ((uint32_t)((s << 16) | (r ))); + } + + + /* + * @brief C custom defined SHSAX + */ + __STATIC_FORCEINLINE uint32_t __SHSAX( + uint32_t x, + uint32_t y) + { + q31_t r, s; + + r = (((((q31_t)x << 16) >> 16) + (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF; + s = (((((q31_t)x ) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF; + + return ((uint32_t)((s << 16) | (r ))); + } + + + /* + * @brief C custom defined SMUSDX + */ + __STATIC_FORCEINLINE uint32_t __SMUSDX( + uint32_t x, + uint32_t y) + { + return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) - + ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) )); + } + + /* + * @brief C custom defined SMUADX + */ + __STATIC_FORCEINLINE uint32_t __SMUADX( + uint32_t x, + uint32_t y) + { + return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) + + ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) )); + } + + + /* + * @brief C custom defined QADD + */ + __STATIC_FORCEINLINE int32_t __QADD( + int32_t x, + int32_t y) + { + return ((int32_t)(clip_q63_to_q31((q63_t)x + (q31_t)y))); + } + + + /* + * @brief C custom defined QSUB + */ + __STATIC_FORCEINLINE int32_t __QSUB( + int32_t x, + int32_t y) + { + return ((int32_t)(clip_q63_to_q31((q63_t)x - (q31_t)y))); + } + + + /* + * @brief C custom defined SMLAD + */ + __STATIC_FORCEINLINE uint32_t __SMLAD( + uint32_t x, + uint32_t y, + uint32_t sum) + { + return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) + + ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) + + ( ((q31_t)sum ) ) )); + } + + + /* + * @brief C custom defined SMLADX + */ + __STATIC_FORCEINLINE uint32_t __SMLADX( + uint32_t x, + uint32_t y, + uint32_t sum) + { + return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) + + ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) + + ( ((q31_t)sum ) ) )); + } + + + /* + * @brief C custom defined SMLSDX + */ + __STATIC_FORCEINLINE uint32_t __SMLSDX( + uint32_t x, + uint32_t y, + uint32_t sum) + { + return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) - + ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) + + ( ((q31_t)sum ) ) )); + } + + + /* + * @brief C custom defined SMLALD + */ + __STATIC_FORCEINLINE uint64_t __SMLALD( + uint32_t x, + uint32_t y, + uint64_t sum) + { +/* return (sum + ((q15_t) (x >> 16) * (q15_t) (y >> 16)) + ((q15_t) x * (q15_t) y)); */ + return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) + + ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) + + ( ((q63_t)sum ) ) )); + } + + + /* + * @brief C custom defined SMLALDX + */ + __STATIC_FORCEINLINE uint64_t __SMLALDX( + uint32_t x, + uint32_t y, + uint64_t sum) + { +/* return (sum + ((q15_t) (x >> 16) * (q15_t) y)) + ((q15_t) x * (q15_t) (y >> 16)); */ + return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) + + ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) + + ( ((q63_t)sum ) ) )); + } + + + /* + * @brief C custom defined SMUAD + */ + __STATIC_FORCEINLINE uint32_t __SMUAD( + uint32_t x, + uint32_t y) + { + return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) + + ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) )); + } + + + /* + * @brief C custom defined SMUSD + */ + __STATIC_FORCEINLINE uint32_t __SMUSD( + uint32_t x, + uint32_t y) + { + return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) - + ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) )); + } + + + /* + * @brief C custom defined SXTB16 + */ + __STATIC_FORCEINLINE uint32_t __SXTB16( + uint32_t x) + { + return ((uint32_t)(((((q31_t)x << 24) >> 24) & (q31_t)0x0000FFFF) | + ((((q31_t)x << 8) >> 8) & (q31_t)0xFFFF0000) )); + } + + /* + * @brief C custom defined SMMLA + */ + __STATIC_FORCEINLINE int32_t __SMMLA( + int32_t x, + int32_t y, + int32_t sum) + { + return (sum + (int32_t) (((int64_t) x * y) >> 32)); + } + +#endif /* !defined (ARM_MATH_DSP) */ + + + /** + * @brief Instance structure for the Q7 FIR filter. + */ + typedef struct + { + uint16_t numTaps; /**< number of filter coefficients in the filter. */ + q7_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ + const q7_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ + } arm_fir_instance_q7; + + /** + * @brief Instance structure for the Q15 FIR filter. + */ + typedef struct + { + uint16_t numTaps; /**< number of filter coefficients in the filter. */ + q15_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ + const q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ + } arm_fir_instance_q15; + + /** + * @brief Instance structure for the Q31 FIR filter. + */ + typedef struct + { + uint16_t numTaps; /**< number of filter coefficients in the filter. */ + q31_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ + const q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ + } arm_fir_instance_q31; + + /** + * @brief Instance structure for the floating-point FIR filter. + */ + typedef struct + { + uint16_t numTaps; /**< number of filter coefficients in the filter. */ + float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ + const float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ + } arm_fir_instance_f32; + + /** + * @brief Processing function for the Q7 FIR filter. + * @param[in] S points to an instance of the Q7 FIR filter structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data. + * @param[in] blockSize number of samples to process. + */ + void arm_fir_q7( + const arm_fir_instance_q7 * S, + const q7_t * pSrc, + q7_t * pDst, + uint32_t blockSize); + + /** + * @brief Initialization function for the Q7 FIR filter. + * @param[in,out] S points to an instance of the Q7 FIR structure. + * @param[in] numTaps Number of filter coefficients in the filter. + * @param[in] pCoeffs points to the filter coefficients. + * @param[in] pState points to the state buffer. + * @param[in] blockSize number of samples that are processed. + */ + void arm_fir_init_q7( + arm_fir_instance_q7 * S, + uint16_t numTaps, + const q7_t * pCoeffs, + q7_t * pState, + uint32_t blockSize); + + /** + * @brief Processing function for the Q15 FIR filter. + * @param[in] S points to an instance of the Q15 FIR structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data. + * @param[in] blockSize number of samples to process. + */ + void arm_fir_q15( + const arm_fir_instance_q15 * S, + const q15_t * pSrc, + q15_t * pDst, + uint32_t blockSize); + + /** + * @brief Processing function for the fast Q15 FIR filter (fast version). + * @param[in] S points to an instance of the Q15 FIR filter structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data. + * @param[in] blockSize number of samples to process. + */ + void arm_fir_fast_q15( + const arm_fir_instance_q15 * S, + const q15_t * pSrc, + q15_t * pDst, + uint32_t blockSize); + + /** + * @brief Initialization function for the Q15 FIR filter. + * @param[in,out] S points to an instance of the Q15 FIR filter structure. + * @param[in] numTaps Number of filter coefficients in the filter. Must be even and greater than or equal to 4. + * @param[in] pCoeffs points to the filter coefficients. + * @param[in] pState points to the state buffer. + * @param[in] blockSize number of samples that are processed at a time. + * @return The function returns either + * ARM_MATH_SUCCESS if initialization was successful or + * ARM_MATH_ARGUMENT_ERROR if numTaps is not a supported value. + */ + arm_status arm_fir_init_q15( + arm_fir_instance_q15 * S, + uint16_t numTaps, + const q15_t * pCoeffs, + q15_t * pState, + uint32_t blockSize); + + /** + * @brief Processing function for the Q31 FIR filter. + * @param[in] S points to an instance of the Q31 FIR filter structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data. + * @param[in] blockSize number of samples to process. + */ + void arm_fir_q31( + const arm_fir_instance_q31 * S, + const q31_t * pSrc, + q31_t * pDst, + uint32_t blockSize); + + /** + * @brief Processing function for the fast Q31 FIR filter (fast version). + * @param[in] S points to an instance of the Q31 FIR filter structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data. + * @param[in] blockSize number of samples to process. + */ + void arm_fir_fast_q31( + const arm_fir_instance_q31 * S, + const q31_t * pSrc, + q31_t * pDst, + uint32_t blockSize); + + /** + * @brief Initialization function for the Q31 FIR filter. + * @param[in,out] S points to an instance of the Q31 FIR structure. + * @param[in] numTaps Number of filter coefficients in the filter. + * @param[in] pCoeffs points to the filter coefficients. + * @param[in] pState points to the state buffer. + * @param[in] blockSize number of samples that are processed at a time. + */ + void arm_fir_init_q31( + arm_fir_instance_q31 * S, + uint16_t numTaps, + const q31_t * pCoeffs, + q31_t * pState, + uint32_t blockSize); + + /** + * @brief Processing function for the floating-point FIR filter. + * @param[in] S points to an instance of the floating-point FIR structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data. + * @param[in] blockSize number of samples to process. + */ + void arm_fir_f32( + const arm_fir_instance_f32 * S, + const float32_t * pSrc, + float32_t * pDst, + uint32_t blockSize); + + /** + * @brief Initialization function for the floating-point FIR filter. + * @param[in,out] S points to an instance of the floating-point FIR filter structure. + * @param[in] numTaps Number of filter coefficients in the filter. + * @param[in] pCoeffs points to the filter coefficients. + * @param[in] pState points to the state buffer. + * @param[in] blockSize number of samples that are processed at a time. + */ + void arm_fir_init_f32( + arm_fir_instance_f32 * S, + uint16_t numTaps, + const float32_t * pCoeffs, + float32_t * pState, + uint32_t blockSize); + + /** + * @brief Instance structure for the Q15 Biquad cascade filter. + */ + typedef struct + { + int8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ + q15_t *pState; /**< Points to the array of state coefficients. The array is of length 4*numStages. */ + const q15_t *pCoeffs; /**< Points to the array of coefficients. The array is of length 5*numStages. */ + int8_t postShift; /**< Additional shift, in bits, applied to each output sample. */ + } arm_biquad_casd_df1_inst_q15; + + /** + * @brief Instance structure for the Q31 Biquad cascade filter. + */ + typedef struct + { + uint32_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ + q31_t *pState; /**< Points to the array of state coefficients. The array is of length 4*numStages. */ + const q31_t *pCoeffs; /**< Points to the array of coefficients. The array is of length 5*numStages. */ + uint8_t postShift; /**< Additional shift, in bits, applied to each output sample. */ + } arm_biquad_casd_df1_inst_q31; + + /** + * @brief Instance structure for the floating-point Biquad cascade filter. + */ + typedef struct + { + uint32_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ + float32_t *pState; /**< Points to the array of state coefficients. The array is of length 4*numStages. */ + const float32_t *pCoeffs; /**< Points to the array of coefficients. The array is of length 5*numStages. */ + } arm_biquad_casd_df1_inst_f32; + + /** + * @brief Processing function for the Q15 Biquad cascade filter. + * @param[in] S points to an instance of the Q15 Biquad cascade structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data. + * @param[in] blockSize number of samples to process. + */ + void arm_biquad_cascade_df1_q15( + const arm_biquad_casd_df1_inst_q15 * S, + const q15_t * pSrc, + q15_t * pDst, + uint32_t blockSize); + + /** + * @brief Initialization function for the Q15 Biquad cascade filter. + * @param[in,out] S points to an instance of the Q15 Biquad cascade structure. + * @param[in] numStages number of 2nd order stages in the filter. + * @param[in] pCoeffs points to the filter coefficients. + * @param[in] pState points to the state buffer. + * @param[in] postShift Shift to be applied to the output. Varies according to the coefficients format + */ + void arm_biquad_cascade_df1_init_q15( + arm_biquad_casd_df1_inst_q15 * S, + uint8_t numStages, + const q15_t * pCoeffs, + q15_t * pState, + int8_t postShift); + + /** + * @brief Fast but less precise processing function for the Q15 Biquad cascade filter for Cortex-M3 and Cortex-M4. + * @param[in] S points to an instance of the Q15 Biquad cascade structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data. + * @param[in] blockSize number of samples to process. + */ + void arm_biquad_cascade_df1_fast_q15( + const arm_biquad_casd_df1_inst_q15 * S, + const q15_t * pSrc, + q15_t * pDst, + uint32_t blockSize); + + /** + * @brief Processing function for the Q31 Biquad cascade filter + * @param[in] S points to an instance of the Q31 Biquad cascade structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data. + * @param[in] blockSize number of samples to process. + */ + void arm_biquad_cascade_df1_q31( + const arm_biquad_casd_df1_inst_q31 * S, + const q31_t * pSrc, + q31_t * pDst, + uint32_t blockSize); + + /** + * @brief Fast but less precise processing function for the Q31 Biquad cascade filter for Cortex-M3 and Cortex-M4. + * @param[in] S points to an instance of the Q31 Biquad cascade structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data. + * @param[in] blockSize number of samples to process. + */ + void arm_biquad_cascade_df1_fast_q31( + const arm_biquad_casd_df1_inst_q31 * S, + const q31_t * pSrc, + q31_t * pDst, + uint32_t blockSize); + + /** + * @brief Initialization function for the Q31 Biquad cascade filter. + * @param[in,out] S points to an instance of the Q31 Biquad cascade structure. + * @param[in] numStages number of 2nd order stages in the filter. + * @param[in] pCoeffs points to the filter coefficients. + * @param[in] pState points to the state buffer. + * @param[in] postShift Shift to be applied to the output. Varies according to the coefficients format + */ + void arm_biquad_cascade_df1_init_q31( + arm_biquad_casd_df1_inst_q31 * S, + uint8_t numStages, + const q31_t * pCoeffs, + q31_t * pState, + int8_t postShift); + + /** + * @brief Processing function for the floating-point Biquad cascade filter. + * @param[in] S points to an instance of the floating-point Biquad cascade structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data. + * @param[in] blockSize number of samples to process. + */ + void arm_biquad_cascade_df1_f32( + const arm_biquad_casd_df1_inst_f32 * S, + const float32_t * pSrc, + float32_t * pDst, + uint32_t blockSize); + + /** + * @brief Initialization function for the floating-point Biquad cascade filter. + * @param[in,out] S points to an instance of the floating-point Biquad cascade structure. + * @param[in] numStages number of 2nd order stages in the filter. + * @param[in] pCoeffs points to the filter coefficients. + * @param[in] pState points to the state buffer. + */ + void arm_biquad_cascade_df1_init_f32( + arm_biquad_casd_df1_inst_f32 * S, + uint8_t numStages, + const float32_t * pCoeffs, + float32_t * pState); + + /** + * @brief Instance structure for the floating-point matrix structure. + */ + typedef struct + { + uint16_t numRows; /**< number of rows of the matrix. */ + uint16_t numCols; /**< number of columns of the matrix. */ + float32_t *pData; /**< points to the data of the matrix. */ + } arm_matrix_instance_f32; + + + /** + * @brief Instance structure for the floating-point matrix structure. + */ + typedef struct + { + uint16_t numRows; /**< number of rows of the matrix. */ + uint16_t numCols; /**< number of columns of the matrix. */ + float64_t *pData; /**< points to the data of the matrix. */ + } arm_matrix_instance_f64; + + /** + * @brief Instance structure for the Q15 matrix structure. + */ + typedef struct + { + uint16_t numRows; /**< number of rows of the matrix. */ + uint16_t numCols; /**< number of columns of the matrix. */ + q15_t *pData; /**< points to the data of the matrix. */ + } arm_matrix_instance_q15; + + /** + * @brief Instance structure for the Q31 matrix structure. + */ + typedef struct + { + uint16_t numRows; /**< number of rows of the matrix. */ + uint16_t numCols; /**< number of columns of the matrix. */ + q31_t *pData; /**< points to the data of the matrix. */ + } arm_matrix_instance_q31; + + /** + * @brief Floating-point matrix addition. + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_add_f32( + const arm_matrix_instance_f32 * pSrcA, + const arm_matrix_instance_f32 * pSrcB, + arm_matrix_instance_f32 * pDst); + + /** + * @brief Q15 matrix addition. + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_add_q15( + const arm_matrix_instance_q15 * pSrcA, + const arm_matrix_instance_q15 * pSrcB, + arm_matrix_instance_q15 * pDst); + + /** + * @brief Q31 matrix addition. + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_add_q31( + const arm_matrix_instance_q31 * pSrcA, + const arm_matrix_instance_q31 * pSrcB, + arm_matrix_instance_q31 * pDst); + + /** + * @brief Floating-point, complex, matrix multiplication. + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_cmplx_mult_f32( + const arm_matrix_instance_f32 * pSrcA, + const arm_matrix_instance_f32 * pSrcB, + arm_matrix_instance_f32 * pDst); + + /** + * @brief Q15, complex, matrix multiplication. + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_cmplx_mult_q15( + const arm_matrix_instance_q15 * pSrcA, + const arm_matrix_instance_q15 * pSrcB, + arm_matrix_instance_q15 * pDst, + q15_t * pScratch); + + /** + * @brief Q31, complex, matrix multiplication. + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_cmplx_mult_q31( + const arm_matrix_instance_q31 * pSrcA, + const arm_matrix_instance_q31 * pSrcB, + arm_matrix_instance_q31 * pDst); + + /** + * @brief Floating-point matrix transpose. + * @param[in] pSrc points to the input matrix + * @param[out] pDst points to the output matrix + * @return The function returns either ARM_MATH_SIZE_MISMATCH + * or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_trans_f32( + const arm_matrix_instance_f32 * pSrc, + arm_matrix_instance_f32 * pDst); + + /** + * @brief Q15 matrix transpose. + * @param[in] pSrc points to the input matrix + * @param[out] pDst points to the output matrix + * @return The function returns either ARM_MATH_SIZE_MISMATCH + * or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_trans_q15( + const arm_matrix_instance_q15 * pSrc, + arm_matrix_instance_q15 * pDst); + + /** + * @brief Q31 matrix transpose. + * @param[in] pSrc points to the input matrix + * @param[out] pDst points to the output matrix + * @return The function returns either ARM_MATH_SIZE_MISMATCH + * or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_trans_q31( + const arm_matrix_instance_q31 * pSrc, + arm_matrix_instance_q31 * pDst); + + /** + * @brief Floating-point matrix multiplication + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_mult_f32( + const arm_matrix_instance_f32 * pSrcA, + const arm_matrix_instance_f32 * pSrcB, + arm_matrix_instance_f32 * pDst); + + /** + * @brief Q15 matrix multiplication + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @param[in] pState points to the array for storing intermediate results + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_mult_q15( + const arm_matrix_instance_q15 * pSrcA, + const arm_matrix_instance_q15 * pSrcB, + arm_matrix_instance_q15 * pDst, + q15_t * pState); + + /** + * @brief Q15 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4 + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @param[in] pState points to the array for storing intermediate results + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_mult_fast_q15( + const arm_matrix_instance_q15 * pSrcA, + const arm_matrix_instance_q15 * pSrcB, + arm_matrix_instance_q15 * pDst, + q15_t * pState); + + /** + * @brief Q31 matrix multiplication + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_mult_q31( + const arm_matrix_instance_q31 * pSrcA, + const arm_matrix_instance_q31 * pSrcB, + arm_matrix_instance_q31 * pDst); + + /** + * @brief Q31 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4 + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_mult_fast_q31( + const arm_matrix_instance_q31 * pSrcA, + const arm_matrix_instance_q31 * pSrcB, + arm_matrix_instance_q31 * pDst); + + /** + * @brief Floating-point matrix subtraction + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_sub_f32( + const arm_matrix_instance_f32 * pSrcA, + const arm_matrix_instance_f32 * pSrcB, + arm_matrix_instance_f32 * pDst); + + /** + * @brief Q15 matrix subtraction + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_sub_q15( + const arm_matrix_instance_q15 * pSrcA, + const arm_matrix_instance_q15 * pSrcB, + arm_matrix_instance_q15 * pDst); + + /** + * @brief Q31 matrix subtraction + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_sub_q31( + const arm_matrix_instance_q31 * pSrcA, + const arm_matrix_instance_q31 * pSrcB, + arm_matrix_instance_q31 * pDst); + + /** + * @brief Floating-point matrix scaling. + * @param[in] pSrc points to the input matrix + * @param[in] scale scale factor + * @param[out] pDst points to the output matrix + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_scale_f32( + const arm_matrix_instance_f32 * pSrc, + float32_t scale, + arm_matrix_instance_f32 * pDst); + + /** + * @brief Q15 matrix scaling. + * @param[in] pSrc points to input matrix + * @param[in] scaleFract fractional portion of the scale factor + * @param[in] shift number of bits to shift the result by + * @param[out] pDst points to output matrix + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_scale_q15( + const arm_matrix_instance_q15 * pSrc, + q15_t scaleFract, + int32_t shift, + arm_matrix_instance_q15 * pDst); + + /** + * @brief Q31 matrix scaling. + * @param[in] pSrc points to input matrix + * @param[in] scaleFract fractional portion of the scale factor + * @param[in] shift number of bits to shift the result by + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_scale_q31( + const arm_matrix_instance_q31 * pSrc, + q31_t scaleFract, + int32_t shift, + arm_matrix_instance_q31 * pDst); + + /** + * @brief Q31 matrix initialization. + * @param[in,out] S points to an instance of the floating-point matrix structure. + * @param[in] nRows number of rows in the matrix. + * @param[in] nColumns number of columns in the matrix. + * @param[in] pData points to the matrix data array. + */ +void arm_mat_init_q31( + arm_matrix_instance_q31 * S, + uint16_t nRows, + uint16_t nColumns, + q31_t * pData); + + /** + * @brief Q15 matrix initialization. + * @param[in,out] S points to an instance of the floating-point matrix structure. + * @param[in] nRows number of rows in the matrix. + * @param[in] nColumns number of columns in the matrix. + * @param[in] pData points to the matrix data array. + */ +void arm_mat_init_q15( + arm_matrix_instance_q15 * S, + uint16_t nRows, + uint16_t nColumns, + q15_t * pData); + + /** + * @brief Floating-point matrix initialization. + * @param[in,out] S points to an instance of the floating-point matrix structure. + * @param[in] nRows number of rows in the matrix. + * @param[in] nColumns number of columns in the matrix. + * @param[in] pData points to the matrix data array. + */ +void arm_mat_init_f32( + arm_matrix_instance_f32 * S, + uint16_t nRows, + uint16_t nColumns, + float32_t * pData); + + + /** + * @brief Instance structure for the Q15 PID Control. + */ + typedef struct + { + q15_t A0; /**< The derived gain, A0 = Kp + Ki + Kd . */ +#if !defined (ARM_MATH_DSP) + q15_t A1; + q15_t A2; +#else + q31_t A1; /**< The derived gain A1 = -Kp - 2Kd | Kd.*/ +#endif + q15_t state[3]; /**< The state array of length 3. */ + q15_t Kp; /**< The proportional gain. */ + q15_t Ki; /**< The integral gain. */ + q15_t Kd; /**< The derivative gain. */ + } arm_pid_instance_q15; + + /** + * @brief Instance structure for the Q31 PID Control. + */ + typedef struct + { + q31_t A0; /**< The derived gain, A0 = Kp + Ki + Kd . */ + q31_t A1; /**< The derived gain, A1 = -Kp - 2Kd. */ + q31_t A2; /**< The derived gain, A2 = Kd . */ + q31_t state[3]; /**< The state array of length 3. */ + q31_t Kp; /**< The proportional gain. */ + q31_t Ki; /**< The integral gain. */ + q31_t Kd; /**< The derivative gain. */ + } arm_pid_instance_q31; + + /** + * @brief Instance structure for the floating-point PID Control. + */ + typedef struct + { + float32_t A0; /**< The derived gain, A0 = Kp + Ki + Kd . */ + float32_t A1; /**< The derived gain, A1 = -Kp - 2Kd. */ + float32_t A2; /**< The derived gain, A2 = Kd . */ + float32_t state[3]; /**< The state array of length 3. */ + float32_t Kp; /**< The proportional gain. */ + float32_t Ki; /**< The integral gain. */ + float32_t Kd; /**< The derivative gain. */ + } arm_pid_instance_f32; + + + + /** + * @brief Initialization function for the floating-point PID Control. + * @param[in,out] S points to an instance of the PID structure. + * @param[in] resetStateFlag flag to reset the state. 0 = no change in state 1 = reset the state. + */ + void arm_pid_init_f32( + arm_pid_instance_f32 * S, + int32_t resetStateFlag); + + + /** + * @brief Reset function for the floating-point PID Control. + * @param[in,out] S is an instance of the floating-point PID Control structure + */ + void arm_pid_reset_f32( + arm_pid_instance_f32 * S); + + + /** + * @brief Initialization function for the Q31 PID Control. + * @param[in,out] S points to an instance of the Q15 PID structure. + * @param[in] resetStateFlag flag to reset the state. 0 = no change in state 1 = reset the state. + */ + void arm_pid_init_q31( + arm_pid_instance_q31 * S, + int32_t resetStateFlag); + + + /** + * @brief Reset function for the Q31 PID Control. + * @param[in,out] S points to an instance of the Q31 PID Control structure + */ + + void arm_pid_reset_q31( + arm_pid_instance_q31 * S); + + + /** + * @brief Initialization function for the Q15 PID Control. + * @param[in,out] S points to an instance of the Q15 PID structure. + * @param[in] resetStateFlag flag to reset the state. 0 = no change in state 1 = reset the state. + */ + void arm_pid_init_q15( + arm_pid_instance_q15 * S, + int32_t resetStateFlag); + + + /** + * @brief Reset function for the Q15 PID Control. + * @param[in,out] S points to an instance of the q15 PID Control structure + */ + void arm_pid_reset_q15( + arm_pid_instance_q15 * S); + + + /** + * @brief Instance structure for the floating-point Linear Interpolate function. + */ + typedef struct + { + uint32_t nValues; /**< nValues */ + float32_t x1; /**< x1 */ + float32_t xSpacing; /**< xSpacing */ + float32_t *pYData; /**< pointer to the table of Y values */ + } arm_linear_interp_instance_f32; + + /** + * @brief Instance structure for the floating-point bilinear interpolation function. + */ + typedef struct + { + uint16_t numRows; /**< number of rows in the data table. */ + uint16_t numCols; /**< number of columns in the data table. */ + float32_t *pData; /**< points to the data table. */ + } arm_bilinear_interp_instance_f32; + + /** + * @brief Instance structure for the Q31 bilinear interpolation function. + */ + typedef struct + { + uint16_t numRows; /**< number of rows in the data table. */ + uint16_t numCols; /**< number of columns in the data table. */ + q31_t *pData; /**< points to the data table. */ + } arm_bilinear_interp_instance_q31; + + /** + * @brief Instance structure for the Q15 bilinear interpolation function. + */ + typedef struct + { + uint16_t numRows; /**< number of rows in the data table. */ + uint16_t numCols; /**< number of columns in the data table. */ + q15_t *pData; /**< points to the data table. */ + } arm_bilinear_interp_instance_q15; + + /** + * @brief Instance structure for the Q15 bilinear interpolation function. + */ + typedef struct + { + uint16_t numRows; /**< number of rows in the data table. */ + uint16_t numCols; /**< number of columns in the data table. */ + q7_t *pData; /**< points to the data table. */ + } arm_bilinear_interp_instance_q7; + + + /** + * @brief Q7 vector multiplication. + * @param[in] pSrcA points to the first input vector + * @param[in] pSrcB points to the second input vector + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in each vector + */ + void arm_mult_q7( + const q7_t * pSrcA, + const q7_t * pSrcB, + q7_t * pDst, + uint32_t blockSize); + + + /** + * @brief Q15 vector multiplication. + * @param[in] pSrcA points to the first input vector + * @param[in] pSrcB points to the second input vector + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in each vector + */ + void arm_mult_q15( + const q15_t * pSrcA, + const q15_t * pSrcB, + q15_t * pDst, + uint32_t blockSize); + + + /** + * @brief Q31 vector multiplication. + * @param[in] pSrcA points to the first input vector + * @param[in] pSrcB points to the second input vector + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in each vector + */ + void arm_mult_q31( + const q31_t * pSrcA, + const q31_t * pSrcB, + q31_t * pDst, + uint32_t blockSize); + + + /** + * @brief Floating-point vector multiplication. + * @param[in] pSrcA points to the first input vector + * @param[in] pSrcB points to the second input vector + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in each vector + */ + void arm_mult_f32( + const float32_t * pSrcA, + const float32_t * pSrcB, + float32_t * pDst, + uint32_t blockSize); + + + /** + * @brief Instance structure for the Q15 CFFT/CIFFT function. + */ + typedef struct + { + uint16_t fftLen; /**< length of the FFT. */ + uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ + uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ + const q15_t *pTwiddle; /**< points to the Sin twiddle factor table. */ + const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ + uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ + uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ + } arm_cfft_radix2_instance_q15; + +/* Deprecated */ + arm_status arm_cfft_radix2_init_q15( + arm_cfft_radix2_instance_q15 * S, + uint16_t fftLen, + uint8_t ifftFlag, + uint8_t bitReverseFlag); + +/* Deprecated */ + void arm_cfft_radix2_q15( + const arm_cfft_radix2_instance_q15 * S, + q15_t * pSrc); + + + /** + * @brief Instance structure for the Q15 CFFT/CIFFT function. + */ + typedef struct + { + uint16_t fftLen; /**< length of the FFT. */ + uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ + uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ + const q15_t *pTwiddle; /**< points to the twiddle factor table. */ + const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ + uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ + uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ + } arm_cfft_radix4_instance_q15; + +/* Deprecated */ + arm_status arm_cfft_radix4_init_q15( + arm_cfft_radix4_instance_q15 * S, + uint16_t fftLen, + uint8_t ifftFlag, + uint8_t bitReverseFlag); + +/* Deprecated */ + void arm_cfft_radix4_q15( + const arm_cfft_radix4_instance_q15 * S, + q15_t * pSrc); + + /** + * @brief Instance structure for the Radix-2 Q31 CFFT/CIFFT function. + */ + typedef struct + { + uint16_t fftLen; /**< length of the FFT. */ + uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ + uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ + const q31_t *pTwiddle; /**< points to the Twiddle factor table. */ + const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ + uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ + uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ + } arm_cfft_radix2_instance_q31; + +/* Deprecated */ + arm_status arm_cfft_radix2_init_q31( + arm_cfft_radix2_instance_q31 * S, + uint16_t fftLen, + uint8_t ifftFlag, + uint8_t bitReverseFlag); + +/* Deprecated */ + void arm_cfft_radix2_q31( + const arm_cfft_radix2_instance_q31 * S, + q31_t * pSrc); + + /** + * @brief Instance structure for the Q31 CFFT/CIFFT function. + */ + typedef struct + { + uint16_t fftLen; /**< length of the FFT. */ + uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ + uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ + const q31_t *pTwiddle; /**< points to the twiddle factor table. */ + const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ + uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ + uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ + } arm_cfft_radix4_instance_q31; + +/* Deprecated */ + void arm_cfft_radix4_q31( + const arm_cfft_radix4_instance_q31 * S, + q31_t * pSrc); + +/* Deprecated */ + arm_status arm_cfft_radix4_init_q31( + arm_cfft_radix4_instance_q31 * S, + uint16_t fftLen, + uint8_t ifftFlag, + uint8_t bitReverseFlag); + + /** + * @brief Instance structure for the floating-point CFFT/CIFFT function. + */ + typedef struct + { + uint16_t fftLen; /**< length of the FFT. */ + uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ + uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ + const float32_t *pTwiddle; /**< points to the Twiddle factor table. */ + const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ + uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ + uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ + float32_t onebyfftLen; /**< value of 1/fftLen. */ + } arm_cfft_radix2_instance_f32; + +/* Deprecated */ + arm_status arm_cfft_radix2_init_f32( + arm_cfft_radix2_instance_f32 * S, + uint16_t fftLen, + uint8_t ifftFlag, + uint8_t bitReverseFlag); + +/* Deprecated */ + void arm_cfft_radix2_f32( + const arm_cfft_radix2_instance_f32 * S, + float32_t * pSrc); + + /** + * @brief Instance structure for the floating-point CFFT/CIFFT function. + */ + typedef struct + { + uint16_t fftLen; /**< length of the FFT. */ + uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ + uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ + const float32_t *pTwiddle; /**< points to the Twiddle factor table. */ + const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ + uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ + uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ + float32_t onebyfftLen; /**< value of 1/fftLen. */ + } arm_cfft_radix4_instance_f32; + +/* Deprecated */ + arm_status arm_cfft_radix4_init_f32( + arm_cfft_radix4_instance_f32 * S, + uint16_t fftLen, + uint8_t ifftFlag, + uint8_t bitReverseFlag); + +/* Deprecated */ + void arm_cfft_radix4_f32( + const arm_cfft_radix4_instance_f32 * S, + float32_t * pSrc); + + /** + * @brief Instance structure for the fixed-point CFFT/CIFFT function. + */ + typedef struct + { + uint16_t fftLen; /**< length of the FFT. */ + const q15_t *pTwiddle; /**< points to the Twiddle factor table. */ + const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ + uint16_t bitRevLength; /**< bit reversal table length. */ + } arm_cfft_instance_q15; + +void arm_cfft_q15( + const arm_cfft_instance_q15 * S, + q15_t * p1, + uint8_t ifftFlag, + uint8_t bitReverseFlag); + + /** + * @brief Instance structure for the fixed-point CFFT/CIFFT function. + */ + typedef struct + { + uint16_t fftLen; /**< length of the FFT. */ + const q31_t *pTwiddle; /**< points to the Twiddle factor table. */ + const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ + uint16_t bitRevLength; /**< bit reversal table length. */ + } arm_cfft_instance_q31; + +void arm_cfft_q31( + const arm_cfft_instance_q31 * S, + q31_t * p1, + uint8_t ifftFlag, + uint8_t bitReverseFlag); + + /** + * @brief Instance structure for the floating-point CFFT/CIFFT function. + */ + typedef struct + { + uint16_t fftLen; /**< length of the FFT. */ + const float32_t *pTwiddle; /**< points to the Twiddle factor table. */ + const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ + uint16_t bitRevLength; /**< bit reversal table length. */ + } arm_cfft_instance_f32; + + void arm_cfft_f32( + const arm_cfft_instance_f32 * S, + float32_t * p1, + uint8_t ifftFlag, + uint8_t bitReverseFlag); + + /** + * @brief Instance structure for the Q15 RFFT/RIFFT function. + */ + typedef struct + { + uint32_t fftLenReal; /**< length of the real FFT. */ + uint8_t ifftFlagR; /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */ + uint8_t bitReverseFlagR; /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */ + uint32_t twidCoefRModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ + const q15_t *pTwiddleAReal; /**< points to the real twiddle factor table. */ + const q15_t *pTwiddleBReal; /**< points to the imag twiddle factor table. */ + const arm_cfft_instance_q15 *pCfft; /**< points to the complex FFT instance. */ + } arm_rfft_instance_q15; + + arm_status arm_rfft_init_q15( + arm_rfft_instance_q15 * S, + uint32_t fftLenReal, + uint32_t ifftFlagR, + uint32_t bitReverseFlag); + + void arm_rfft_q15( + const arm_rfft_instance_q15 * S, + q15_t * pSrc, + q15_t * pDst); + + /** + * @brief Instance structure for the Q31 RFFT/RIFFT function. + */ + typedef struct + { + uint32_t fftLenReal; /**< length of the real FFT. */ + uint8_t ifftFlagR; /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */ + uint8_t bitReverseFlagR; /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */ + uint32_t twidCoefRModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ + const q31_t *pTwiddleAReal; /**< points to the real twiddle factor table. */ + const q31_t *pTwiddleBReal; /**< points to the imag twiddle factor table. */ + const arm_cfft_instance_q31 *pCfft; /**< points to the complex FFT instance. */ + } arm_rfft_instance_q31; + + arm_status arm_rfft_init_q31( + arm_rfft_instance_q31 * S, + uint32_t fftLenReal, + uint32_t ifftFlagR, + uint32_t bitReverseFlag); + + void arm_rfft_q31( + const arm_rfft_instance_q31 * S, + q31_t * pSrc, + q31_t * pDst); + + /** + * @brief Instance structure for the floating-point RFFT/RIFFT function. + */ + typedef struct + { + uint32_t fftLenReal; /**< length of the real FFT. */ + uint16_t fftLenBy2; /**< length of the complex FFT. */ + uint8_t ifftFlagR; /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */ + uint8_t bitReverseFlagR; /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */ + uint32_t twidCoefRModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ + const float32_t *pTwiddleAReal; /**< points to the real twiddle factor table. */ + const float32_t *pTwiddleBReal; /**< points to the imag twiddle factor table. */ + arm_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */ + } arm_rfft_instance_f32; + + arm_status arm_rfft_init_f32( + arm_rfft_instance_f32 * S, + arm_cfft_radix4_instance_f32 * S_CFFT, + uint32_t fftLenReal, + uint32_t ifftFlagR, + uint32_t bitReverseFlag); + + void arm_rfft_f32( + const arm_rfft_instance_f32 * S, + float32_t * pSrc, + float32_t * pDst); + + /** + * @brief Instance structure for the floating-point RFFT/RIFFT function. + */ +typedef struct + { + arm_cfft_instance_f32 Sint; /**< Internal CFFT structure. */ + uint16_t fftLenRFFT; /**< length of the real sequence */ + const float32_t * pTwiddleRFFT; /**< Twiddle factors real stage */ + } arm_rfft_fast_instance_f32 ; + +arm_status arm_rfft_fast_init_f32 ( + arm_rfft_fast_instance_f32 * S, + uint16_t fftLen); + +arm_status arm_rfft_32_fast_init_f32 ( arm_rfft_fast_instance_f32 * S ); + +arm_status arm_rfft_64_fast_init_f32 ( arm_rfft_fast_instance_f32 * S ); + +arm_status arm_rfft_128_fast_init_f32 ( arm_rfft_fast_instance_f32 * S ); + +arm_status arm_rfft_256_fast_init_f32 ( arm_rfft_fast_instance_f32 * S ); + +arm_status arm_rfft_512_fast_init_f32 ( arm_rfft_fast_instance_f32 * S ); + +arm_status arm_rfft_1024_fast_init_f32 ( arm_rfft_fast_instance_f32 * S ); + +arm_status arm_rfft_2048_fast_init_f32 ( arm_rfft_fast_instance_f32 * S ); + +arm_status arm_rfft_4096_fast_init_f32 ( arm_rfft_fast_instance_f32 * S ); + + + void arm_rfft_fast_f32( + arm_rfft_fast_instance_f32 * S, + float32_t * p, float32_t * pOut, + uint8_t ifftFlag); + + /** + * @brief Instance structure for the floating-point DCT4/IDCT4 function. + */ + typedef struct + { + uint16_t N; /**< length of the DCT4. */ + uint16_t Nby2; /**< half of the length of the DCT4. */ + float32_t normalize; /**< normalizing factor. */ + const float32_t *pTwiddle; /**< points to the twiddle factor table. */ + const float32_t *pCosFactor; /**< points to the cosFactor table. */ + arm_rfft_instance_f32 *pRfft; /**< points to the real FFT instance. */ + arm_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */ + } arm_dct4_instance_f32; + + + /** + * @brief Initialization function for the floating-point DCT4/IDCT4. + * @param[in,out] S points to an instance of floating-point DCT4/IDCT4 structure. + * @param[in] S_RFFT points to an instance of floating-point RFFT/RIFFT structure. + * @param[in] S_CFFT points to an instance of floating-point CFFT/CIFFT structure. + * @param[in] N length of the DCT4. + * @param[in] Nby2 half of the length of the DCT4. + * @param[in] normalize normalizing factor. + * @return arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if fftLenReal is not a supported transform length. + */ + arm_status arm_dct4_init_f32( + arm_dct4_instance_f32 * S, + arm_rfft_instance_f32 * S_RFFT, + arm_cfft_radix4_instance_f32 * S_CFFT, + uint16_t N, + uint16_t Nby2, + float32_t normalize); + + + /** + * @brief Processing function for the floating-point DCT4/IDCT4. + * @param[in] S points to an instance of the floating-point DCT4/IDCT4 structure. + * @param[in] pState points to state buffer. + * @param[in,out] pInlineBuffer points to the in-place input and output buffer. + */ + void arm_dct4_f32( + const arm_dct4_instance_f32 * S, + float32_t * pState, + float32_t * pInlineBuffer); + + + /** + * @brief Instance structure for the Q31 DCT4/IDCT4 function. + */ + typedef struct + { + uint16_t N; /**< length of the DCT4. */ + uint16_t Nby2; /**< half of the length of the DCT4. */ + q31_t normalize; /**< normalizing factor. */ + const q31_t *pTwiddle; /**< points to the twiddle factor table. */ + const q31_t *pCosFactor; /**< points to the cosFactor table. */ + arm_rfft_instance_q31 *pRfft; /**< points to the real FFT instance. */ + arm_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */ + } arm_dct4_instance_q31; + + + /** + * @brief Initialization function for the Q31 DCT4/IDCT4. + * @param[in,out] S points to an instance of Q31 DCT4/IDCT4 structure. + * @param[in] S_RFFT points to an instance of Q31 RFFT/RIFFT structure + * @param[in] S_CFFT points to an instance of Q31 CFFT/CIFFT structure + * @param[in] N length of the DCT4. + * @param[in] Nby2 half of the length of the DCT4. + * @param[in] normalize normalizing factor. + * @return arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if N is not a supported transform length. + */ + arm_status arm_dct4_init_q31( + arm_dct4_instance_q31 * S, + arm_rfft_instance_q31 * S_RFFT, + arm_cfft_radix4_instance_q31 * S_CFFT, + uint16_t N, + uint16_t Nby2, + q31_t normalize); + + + /** + * @brief Processing function for the Q31 DCT4/IDCT4. + * @param[in] S points to an instance of the Q31 DCT4 structure. + * @param[in] pState points to state buffer. + * @param[in,out] pInlineBuffer points to the in-place input and output buffer. + */ + void arm_dct4_q31( + const arm_dct4_instance_q31 * S, + q31_t * pState, + q31_t * pInlineBuffer); + + + /** + * @brief Instance structure for the Q15 DCT4/IDCT4 function. + */ + typedef struct + { + uint16_t N; /**< length of the DCT4. */ + uint16_t Nby2; /**< half of the length of the DCT4. */ + q15_t normalize; /**< normalizing factor. */ + const q15_t *pTwiddle; /**< points to the twiddle factor table. */ + const q15_t *pCosFactor; /**< points to the cosFactor table. */ + arm_rfft_instance_q15 *pRfft; /**< points to the real FFT instance. */ + arm_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */ + } arm_dct4_instance_q15; + + + /** + * @brief Initialization function for the Q15 DCT4/IDCT4. + * @param[in,out] S points to an instance of Q15 DCT4/IDCT4 structure. + * @param[in] S_RFFT points to an instance of Q15 RFFT/RIFFT structure. + * @param[in] S_CFFT points to an instance of Q15 CFFT/CIFFT structure. + * @param[in] N length of the DCT4. + * @param[in] Nby2 half of the length of the DCT4. + * @param[in] normalize normalizing factor. + * @return arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if N is not a supported transform length. + */ + arm_status arm_dct4_init_q15( + arm_dct4_instance_q15 * S, + arm_rfft_instance_q15 * S_RFFT, + arm_cfft_radix4_instance_q15 * S_CFFT, + uint16_t N, + uint16_t Nby2, + q15_t normalize); + + + /** + * @brief Processing function for the Q15 DCT4/IDCT4. + * @param[in] S points to an instance of the Q15 DCT4 structure. + * @param[in] pState points to state buffer. + * @param[in,out] pInlineBuffer points to the in-place input and output buffer. + */ + void arm_dct4_q15( + const arm_dct4_instance_q15 * S, + q15_t * pState, + q15_t * pInlineBuffer); + + + /** + * @brief Floating-point vector addition. + * @param[in] pSrcA points to the first input vector + * @param[in] pSrcB points to the second input vector + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in each vector + */ + void arm_add_f32( + const float32_t * pSrcA, + const float32_t * pSrcB, + float32_t * pDst, + uint32_t blockSize); + + + /** + * @brief Q7 vector addition. + * @param[in] pSrcA points to the first input vector + * @param[in] pSrcB points to the second input vector + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in each vector + */ + void arm_add_q7( + const q7_t * pSrcA, + const q7_t * pSrcB, + q7_t * pDst, + uint32_t blockSize); + + + /** + * @brief Q15 vector addition. + * @param[in] pSrcA points to the first input vector + * @param[in] pSrcB points to the second input vector + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in each vector + */ + void arm_add_q15( + const q15_t * pSrcA, + const q15_t * pSrcB, + q15_t * pDst, + uint32_t blockSize); + + + /** + * @brief Q31 vector addition. + * @param[in] pSrcA points to the first input vector + * @param[in] pSrcB points to the second input vector + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in each vector + */ + void arm_add_q31( + const q31_t * pSrcA, + const q31_t * pSrcB, + q31_t * pDst, + uint32_t blockSize); + + + /** + * @brief Floating-point vector subtraction. + * @param[in] pSrcA points to the first input vector + * @param[in] pSrcB points to the second input vector + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in each vector + */ + void arm_sub_f32( + const float32_t * pSrcA, + const float32_t * pSrcB, + float32_t * pDst, + uint32_t blockSize); + + + /** + * @brief Q7 vector subtraction. + * @param[in] pSrcA points to the first input vector + * @param[in] pSrcB points to the second input vector + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in each vector + */ + void arm_sub_q7( + const q7_t * pSrcA, + const q7_t * pSrcB, + q7_t * pDst, + uint32_t blockSize); + + + /** + * @brief Q15 vector subtraction. + * @param[in] pSrcA points to the first input vector + * @param[in] pSrcB points to the second input vector + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in each vector + */ + void arm_sub_q15( + const q15_t * pSrcA, + const q15_t * pSrcB, + q15_t * pDst, + uint32_t blockSize); + + + /** + * @brief Q31 vector subtraction. + * @param[in] pSrcA points to the first input vector + * @param[in] pSrcB points to the second input vector + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in each vector + */ + void arm_sub_q31( + const q31_t * pSrcA, + const q31_t * pSrcB, + q31_t * pDst, + uint32_t blockSize); + + + /** + * @brief Multiplies a floating-point vector by a scalar. + * @param[in] pSrc points to the input vector + * @param[in] scale scale factor to be applied + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in the vector + */ + void arm_scale_f32( + const float32_t * pSrc, + float32_t scale, + float32_t * pDst, + uint32_t blockSize); + + + /** + * @brief Multiplies a Q7 vector by a scalar. + * @param[in] pSrc points to the input vector + * @param[in] scaleFract fractional portion of the scale value + * @param[in] shift number of bits to shift the result by + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in the vector + */ + void arm_scale_q7( + const q7_t * pSrc, + q7_t scaleFract, + int8_t shift, + q7_t * pDst, + uint32_t blockSize); + + + /** + * @brief Multiplies a Q15 vector by a scalar. + * @param[in] pSrc points to the input vector + * @param[in] scaleFract fractional portion of the scale value + * @param[in] shift number of bits to shift the result by + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in the vector + */ + void arm_scale_q15( + const q15_t * pSrc, + q15_t scaleFract, + int8_t shift, + q15_t * pDst, + uint32_t blockSize); + + + /** + * @brief Multiplies a Q31 vector by a scalar. + * @param[in] pSrc points to the input vector + * @param[in] scaleFract fractional portion of the scale value + * @param[in] shift number of bits to shift the result by + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in the vector + */ + void arm_scale_q31( + const q31_t * pSrc, + q31_t scaleFract, + int8_t shift, + q31_t * pDst, + uint32_t blockSize); + + + /** + * @brief Q7 vector absolute value. + * @param[in] pSrc points to the input buffer + * @param[out] pDst points to the output buffer + * @param[in] blockSize number of samples in each vector + */ + void arm_abs_q7( + const q7_t * pSrc, + q7_t * pDst, + uint32_t blockSize); + + + /** + * @brief Floating-point vector absolute value. + * @param[in] pSrc points to the input buffer + * @param[out] pDst points to the output buffer + * @param[in] blockSize number of samples in each vector + */ + void arm_abs_f32( + const float32_t * pSrc, + float32_t * pDst, + uint32_t blockSize); + + + /** + * @brief Q15 vector absolute value. + * @param[in] pSrc points to the input buffer + * @param[out] pDst points to the output buffer + * @param[in] blockSize number of samples in each vector + */ + void arm_abs_q15( + const q15_t * pSrc, + q15_t * pDst, + uint32_t blockSize); + + + /** + * @brief Q31 vector absolute value. + * @param[in] pSrc points to the input buffer + * @param[out] pDst points to the output buffer + * @param[in] blockSize number of samples in each vector + */ + void arm_abs_q31( + const q31_t * pSrc, + q31_t * pDst, + uint32_t blockSize); + + + /** + * @brief Dot product of floating-point vectors. + * @param[in] pSrcA points to the first input vector + * @param[in] pSrcB points to the second input vector + * @param[in] blockSize number of samples in each vector + * @param[out] result output result returned here + */ + void arm_dot_prod_f32( + const float32_t * pSrcA, + const float32_t * pSrcB, + uint32_t blockSize, + float32_t * result); + + + /** + * @brief Dot product of Q7 vectors. + * @param[in] pSrcA points to the first input vector + * @param[in] pSrcB points to the second input vector + * @param[in] blockSize number of samples in each vector + * @param[out] result output result returned here + */ + void arm_dot_prod_q7( + const q7_t * pSrcA, + const q7_t * pSrcB, + uint32_t blockSize, + q31_t * result); + + + /** + * @brief Dot product of Q15 vectors. + * @param[in] pSrcA points to the first input vector + * @param[in] pSrcB points to the second input vector + * @param[in] blockSize number of samples in each vector + * @param[out] result output result returned here + */ + void arm_dot_prod_q15( + const q15_t * pSrcA, + const q15_t * pSrcB, + uint32_t blockSize, + q63_t * result); + + + /** + * @brief Dot product of Q31 vectors. + * @param[in] pSrcA points to the first input vector + * @param[in] pSrcB points to the second input vector + * @param[in] blockSize number of samples in each vector + * @param[out] result output result returned here + */ + void arm_dot_prod_q31( + const q31_t * pSrcA, + const q31_t * pSrcB, + uint32_t blockSize, + q63_t * result); + + + /** + * @brief Shifts the elements of a Q7 vector a specified number of bits. + * @param[in] pSrc points to the input vector + * @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right. + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in the vector + */ + void arm_shift_q7( + const q7_t * pSrc, + int8_t shiftBits, + q7_t * pDst, + uint32_t blockSize); + + + /** + * @brief Shifts the elements of a Q15 vector a specified number of bits. + * @param[in] pSrc points to the input vector + * @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right. + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in the vector + */ + void arm_shift_q15( + const q15_t * pSrc, + int8_t shiftBits, + q15_t * pDst, + uint32_t blockSize); + + + /** + * @brief Shifts the elements of a Q31 vector a specified number of bits. + * @param[in] pSrc points to the input vector + * @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right. + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in the vector + */ + void arm_shift_q31( + const q31_t * pSrc, + int8_t shiftBits, + q31_t * pDst, + uint32_t blockSize); + + + /** + * @brief Adds a constant offset to a floating-point vector. + * @param[in] pSrc points to the input vector + * @param[in] offset is the offset to be added + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in the vector + */ + void arm_offset_f32( + const float32_t * pSrc, + float32_t offset, + float32_t * pDst, + uint32_t blockSize); + + + /** + * @brief Adds a constant offset to a Q7 vector. + * @param[in] pSrc points to the input vector + * @param[in] offset is the offset to be added + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in the vector + */ + void arm_offset_q7( + const q7_t * pSrc, + q7_t offset, + q7_t * pDst, + uint32_t blockSize); + + + /** + * @brief Adds a constant offset to a Q15 vector. + * @param[in] pSrc points to the input vector + * @param[in] offset is the offset to be added + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in the vector + */ + void arm_offset_q15( + const q15_t * pSrc, + q15_t offset, + q15_t * pDst, + uint32_t blockSize); + + + /** + * @brief Adds a constant offset to a Q31 vector. + * @param[in] pSrc points to the input vector + * @param[in] offset is the offset to be added + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in the vector + */ + void arm_offset_q31( + const q31_t * pSrc, + q31_t offset, + q31_t * pDst, + uint32_t blockSize); + + + /** + * @brief Negates the elements of a floating-point vector. + * @param[in] pSrc points to the input vector + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in the vector + */ + void arm_negate_f32( + const float32_t * pSrc, + float32_t * pDst, + uint32_t blockSize); + + + /** + * @brief Negates the elements of a Q7 vector. + * @param[in] pSrc points to the input vector + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in the vector + */ + void arm_negate_q7( + const q7_t * pSrc, + q7_t * pDst, + uint32_t blockSize); + + + /** + * @brief Negates the elements of a Q15 vector. + * @param[in] pSrc points to the input vector + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in the vector + */ + void arm_negate_q15( + const q15_t * pSrc, + q15_t * pDst, + uint32_t blockSize); + + + /** + * @brief Negates the elements of a Q31 vector. + * @param[in] pSrc points to the input vector + * @param[out] pDst points to the output vector + * @param[in] blockSize number of samples in the vector + */ + void arm_negate_q31( + const q31_t * pSrc, + q31_t * pDst, + uint32_t blockSize); + + + /** + * @brief Copies the elements of a floating-point vector. + * @param[in] pSrc input pointer + * @param[out] pDst output pointer + * @param[in] blockSize number of samples to process + */ + void arm_copy_f32( + const float32_t * pSrc, + float32_t * pDst, + uint32_t blockSize); + + + /** + * @brief Copies the elements of a Q7 vector. + * @param[in] pSrc input pointer + * @param[out] pDst output pointer + * @param[in] blockSize number of samples to process + */ + void arm_copy_q7( + const q7_t * pSrc, + q7_t * pDst, + uint32_t blockSize); + + + /** + * @brief Copies the elements of a Q15 vector. + * @param[in] pSrc input pointer + * @param[out] pDst output pointer + * @param[in] blockSize number of samples to process + */ + void arm_copy_q15( + const q15_t * pSrc, + q15_t * pDst, + uint32_t blockSize); + + + /** + * @brief Copies the elements of a Q31 vector. + * @param[in] pSrc input pointer + * @param[out] pDst output pointer + * @param[in] blockSize number of samples to process + */ + void arm_copy_q31( + const q31_t * pSrc, + q31_t * pDst, + uint32_t blockSize); + + + /** + * @brief Fills a constant value into a floating-point vector. + * @param[in] value input value to be filled + * @param[out] pDst output pointer + * @param[in] blockSize number of samples to process + */ + void arm_fill_f32( + float32_t value, + float32_t * pDst, + uint32_t blockSize); + + + /** + * @brief Fills a constant value into a Q7 vector. + * @param[in] value input value to be filled + * @param[out] pDst output pointer + * @param[in] blockSize number of samples to process + */ + void arm_fill_q7( + q7_t value, + q7_t * pDst, + uint32_t blockSize); + + + /** + * @brief Fills a constant value into a Q15 vector. + * @param[in] value input value to be filled + * @param[out] pDst output pointer + * @param[in] blockSize number of samples to process + */ + void arm_fill_q15( + q15_t value, + q15_t * pDst, + uint32_t blockSize); + + + /** + * @brief Fills a constant value into a Q31 vector. + * @param[in] value input value to be filled + * @param[out] pDst output pointer + * @param[in] blockSize number of samples to process + */ + void arm_fill_q31( + q31_t value, + q31_t * pDst, + uint32_t blockSize); + + +/** + * @brief Convolution of floating-point sequences. + * @param[in] pSrcA points to the first input sequence. + * @param[in] srcALen length of the first input sequence. + * @param[in] pSrcB points to the second input sequence. + * @param[in] srcBLen length of the second input sequence. + * @param[out] pDst points to the location where the output result is written. Length srcALen+srcBLen-1. + */ + void arm_conv_f32( + const float32_t * pSrcA, + uint32_t srcALen, + const float32_t * pSrcB, + uint32_t srcBLen, + float32_t * pDst); + + + /** + * @brief Convolution of Q15 sequences. + * @param[in] pSrcA points to the first input sequence. + * @param[in] srcALen length of the first input sequence. + * @param[in] pSrcB points to the second input sequence. + * @param[in] srcBLen length of the second input sequence. + * @param[out] pDst points to the block of output data Length srcALen+srcBLen-1. + * @param[in] pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. + * @param[in] pScratch2 points to scratch buffer of size min(srcALen, srcBLen). + */ + void arm_conv_opt_q15( + const q15_t * pSrcA, + uint32_t srcALen, + const q15_t * pSrcB, + uint32_t srcBLen, + q15_t * pDst, + q15_t * pScratch1, + q15_t * pScratch2); + + +/** + * @brief Convolution of Q15 sequences. + * @param[in] pSrcA points to the first input sequence. + * @param[in] srcALen length of the first input sequence. + * @param[in] pSrcB points to the second input sequence. + * @param[in] srcBLen length of the second input sequence. + * @param[out] pDst points to the location where the output result is written. Length srcALen+srcBLen-1. + */ + void arm_conv_q15( + const q15_t * pSrcA, + uint32_t srcALen, + const q15_t * pSrcB, + uint32_t srcBLen, + q15_t * pDst); + + + /** + * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4 + * @param[in] pSrcA points to the first input sequence. + * @param[in] srcALen length of the first input sequence. + * @param[in] pSrcB points to the second input sequence. + * @param[in] srcBLen length of the second input sequence. + * @param[out] pDst points to the block of output data Length srcALen+srcBLen-1. + */ + void arm_conv_fast_q15( + const q15_t * pSrcA, + uint32_t srcALen, + const q15_t * pSrcB, + uint32_t srcBLen, + q15_t * pDst); + + + /** + * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4 + * @param[in] pSrcA points to the first input sequence. + * @param[in] srcALen length of the first input sequence. + * @param[in] pSrcB points to the second input sequence. + * @param[in] srcBLen length of the second input sequence. + * @param[out] pDst points to the block of output data Length srcALen+srcBLen-1. + * @param[in] pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. + * @param[in] pScratch2 points to scratch buffer of size min(srcALen, srcBLen). + */ + void arm_conv_fast_opt_q15( + const q15_t * pSrcA, + uint32_t srcALen, + const q15_t * pSrcB, + uint32_t srcBLen, + q15_t * pDst, + q15_t * pScratch1, + q15_t * pScratch2); + + + /** + * @brief Convolution of Q31 sequences. + * @param[in] pSrcA points to the first input sequence. + * @param[in] srcALen length of the first input sequence. + * @param[in] pSrcB points to the second input sequence. + * @param[in] srcBLen length of the second input sequence. + * @param[out] pDst points to the block of output data Length srcALen+srcBLen-1. + */ + void arm_conv_q31( + const q31_t * pSrcA, + uint32_t srcALen, + const q31_t * pSrcB, + uint32_t srcBLen, + q31_t * pDst); + + + /** + * @brief Convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4 + * @param[in] pSrcA points to the first input sequence. + * @param[in] srcALen length of the first input sequence. + * @param[in] pSrcB points to the second input sequence. + * @param[in] srcBLen length of the second input sequence. + * @param[out] pDst points to the block of output data Length srcALen+srcBLen-1. + */ + void arm_conv_fast_q31( + const q31_t * pSrcA, + uint32_t srcALen, + const q31_t * pSrcB, + uint32_t srcBLen, + q31_t * pDst); + + + /** + * @brief Convolution of Q7 sequences. + * @param[in] pSrcA points to the first input sequence. + * @param[in] srcALen length of the first input sequence. + * @param[in] pSrcB points to the second input sequence. + * @param[in] srcBLen length of the second input sequence. + * @param[out] pDst points to the block of output data Length srcALen+srcBLen-1. + * @param[in] pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. + * @param[in] pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen). + */ + void arm_conv_opt_q7( + const q7_t * pSrcA, + uint32_t srcALen, + const q7_t * pSrcB, + uint32_t srcBLen, + q7_t * pDst, + q15_t * pScratch1, + q15_t * pScratch2); + + + /** + * @brief Convolution of Q7 sequences. + * @param[in] pSrcA points to the first input sequence. + * @param[in] srcALen length of the first input sequence. + * @param[in] pSrcB points to the second input sequence. + * @param[in] srcBLen length of the second input sequence. + * @param[out] pDst points to the block of output data Length srcALen+srcBLen-1. + */ + void arm_conv_q7( + const q7_t * pSrcA, + uint32_t srcALen, + const q7_t * pSrcB, + uint32_t srcBLen, + q7_t * pDst); + + + /** + * @brief Partial convolution of floating-point sequences. + * @param[in] pSrcA points to the first input sequence. + * @param[in] srcALen length of the first input sequence. + * @param[in] pSrcB points to the second input sequence. + * @param[in] srcBLen length of the second input sequence. + * @param[out] pDst points to the block of output data + * @param[in] firstIndex is the first output sample to start with. + * @param[in] numPoints is the number of output points to be computed. + * @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. + */ + arm_status arm_conv_partial_f32( + const float32_t * pSrcA, + uint32_t srcALen, + const float32_t * pSrcB, + uint32_t srcBLen, + float32_t * pDst, + uint32_t firstIndex, + uint32_t numPoints); + + + /** + * @brief Partial convolution of Q15 sequences. + * @param[in] pSrcA points to the first input sequence. + * @param[in] srcALen length of the first input sequence. + * @param[in] pSrcB points to the second input sequence. + * @param[in] srcBLen length of the second input sequence. + * @param[out] pDst points to the block of output data + * @param[in] firstIndex is the first output sample to start with. + * @param[in] numPoints is the number of output points to be computed. + * @param[in] pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. + * @param[in] pScratch2 points to scratch buffer of size min(srcALen, srcBLen). + * @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. + */ + arm_status arm_conv_partial_opt_q15( + const q15_t * pSrcA, + uint32_t srcALen, + const q15_t * pSrcB, + uint32_t srcBLen, + q15_t * pDst, + uint32_t firstIndex, + uint32_t numPoints, + q15_t * pScratch1, + q15_t * pScratch2); + + + /** + * @brief Partial convolution of Q15 sequences. + * @param[in] pSrcA points to the first input sequence. + * @param[in] srcALen length of the first input sequence. + * @param[in] pSrcB points to the second input sequence. + * @param[in] srcBLen length of the second input sequence. + * @param[out] pDst points to the block of output data + * @param[in] firstIndex is the first output sample to start with. + * @param[in] numPoints is the number of output points to be computed. + * @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. + */ + arm_status arm_conv_partial_q15( + const q15_t * pSrcA, + uint32_t srcALen, + const q15_t * pSrcB, + uint32_t srcBLen, + q15_t * pDst, + uint32_t firstIndex, + uint32_t numPoints); + + + /** + * @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4 + * @param[in] pSrcA points to the first input sequence. + * @param[in] srcALen length of the first input sequence. + * @param[in] pSrcB points to the second input sequence. + * @param[in] srcBLen length of the second input sequence. + * @param[out] pDst points to the block of output data + * @param[in] firstIndex is the first output sample to start with. + * @param[in] numPoints is the number of output points to be computed. + * @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. + */ + arm_status arm_conv_partial_fast_q15( + const q15_t * pSrcA, + uint32_t srcALen, + const q15_t * pSrcB, + uint32_t srcBLen, + q15_t * pDst, + uint32_t firstIndex, + uint32_t numPoints); + + + /** + * @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4 + * @param[in] pSrcA points to the first input sequence. + * @param[in] srcALen length of the first input sequence. + * @param[in] pSrcB points to the second input sequence. + * @param[in] srcBLen length of the second input sequence. + * @param[out] pDst points to the block of output data + * @param[in] firstIndex is the first output sample to start with. + * @param[in] numPoints is the number of output points to be computed. + * @param[in] pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. + * @param[in] pScratch2 points to scratch buffer of size min(srcALen, srcBLen). + * @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. + */ + arm_status arm_conv_partial_fast_opt_q15( + const q15_t * pSrcA, + uint32_t srcALen, + const q15_t * pSrcB, + uint32_t srcBLen, + q15_t * pDst, + uint32_t firstIndex, + uint32_t numPoints, + q15_t * pScratch1, + q15_t * pScratch2); + + + /** + * @brief Partial convolution of Q31 sequences. + * @param[in] pSrcA points to the first input sequence. + * @param[in] srcALen length of the first input sequence. + * @param[in] pSrcB points to the second input sequence. + * @param[in] srcBLen length of the second input sequence. + * @param[out] pDst points to the block of output data + * @param[in] firstIndex is the first output sample to start with. + * @param[in] numPoints is the number of output points to be computed. + * @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. + */ + arm_status arm_conv_partial_q31( + const q31_t * pSrcA, + uint32_t srcALen, + const q31_t * pSrcB, + uint32_t srcBLen, + q31_t * pDst, + uint32_t firstIndex, + uint32_t numPoints); + + + /** + * @brief Partial convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4 + * @param[in] pSrcA points to the first input sequence. + * @param[in] srcALen length of the first input sequence. + * @param[in] pSrcB points to the second input sequence. + * @param[in] srcBLen length of the second input sequence. + * @param[out] pDst points to the block of output data + * @param[in] firstIndex is the first output sample to start with. + * @param[in] numPoints is the number of output points to be computed. + * @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. + */ + arm_status arm_conv_partial_fast_q31( + const q31_t * pSrcA, + uint32_t srcALen, + const q31_t * pSrcB, + uint32_t srcBLen, + q31_t * pDst, + uint32_t firstIndex, + uint32_t numPoints); + + + /** + * @brief Partial convolution of Q7 sequences + * @param[in] pSrcA points to the first input sequence. + * @param[in] srcALen length of the first input sequence. + * @param[in] pSrcB points to the second input sequence. + * @param[in] srcBLen length of the second input sequence. + * @param[out] pDst points to the block of output data + * @param[in] firstIndex is the first output sample to start with. + * @param[in] numPoints is the number of output points to be computed. + * @param[in] pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. + * @param[in] pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen). + * @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. + */ + arm_status arm_conv_partial_opt_q7( + const q7_t * pSrcA, + uint32_t srcALen, + const q7_t * pSrcB, + uint32_t srcBLen, + q7_t * pDst, + uint32_t firstIndex, + uint32_t numPoints, + q15_t * pScratch1, + q15_t * pScratch2); + + +/** + * @brief Partial convolution of Q7 sequences. + * @param[in] pSrcA points to the first input sequence. + * @param[in] srcALen length of the first input sequence. + * @param[in] pSrcB points to the second input sequence. + * @param[in] srcBLen length of the second input sequence. + * @param[out] pDst points to the block of output data + * @param[in] firstIndex is the first output sample to start with. + * @param[in] numPoints is the number of output points to be computed. + * @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. + */ + arm_status arm_conv_partial_q7( + const q7_t * pSrcA, + uint32_t srcALen, + const q7_t * pSrcB, + uint32_t srcBLen, + q7_t * pDst, + uint32_t firstIndex, + uint32_t numPoints); + + + /** + * @brief Instance structure for the Q15 FIR decimator. + */ + typedef struct + { + uint8_t M; /**< decimation factor. */ + uint16_t numTaps; /**< number of coefficients in the filter. */ + const q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ + q15_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ + } arm_fir_decimate_instance_q15; + + /** + * @brief Instance structure for the Q31 FIR decimator. + */ + typedef struct + { + uint8_t M; /**< decimation factor. */ + uint16_t numTaps; /**< number of coefficients in the filter. */ + const q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ + q31_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ + } arm_fir_decimate_instance_q31; + +/** + @brief Instance structure for floating-point FIR decimator. + */ +typedef struct + { + uint8_t M; /**< decimation factor. */ + uint16_t numTaps; /**< number of coefficients in the filter. */ + const float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ + float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ + } arm_fir_decimate_instance_f32; + + +/** + @brief Processing function for floating-point FIR decimator. + @param[in] S points to an instance of the floating-point FIR decimator structure + @param[in] pSrc points to the block of input data + @param[out] pDst points to the block of output data + @param[in] blockSize number of samples to process + */ +void arm_fir_decimate_f32( + const arm_fir_decimate_instance_f32 * S, + const float32_t * pSrc, + float32_t * pDst, + uint32_t blockSize); + + +/** + @brief Initialization function for the floating-point FIR decimator. + @param[in,out] S points to an instance of the floating-point FIR decimator structure + @param[in] numTaps number of coefficients in the filter + @param[in] M decimation factor + @param[in] pCoeffs points to the filter coefficients + @param[in] pState points to the state buffer + @param[in] blockSize number of input samples to process per call + @return execution status + - \ref ARM_MATH_SUCCESS : Operation successful + - \ref ARM_MATH_LENGTH_ERROR : blockSize is not a multiple of M + */ +arm_status arm_fir_decimate_init_f32( + arm_fir_decimate_instance_f32 * S, + uint16_t numTaps, + uint8_t M, + const float32_t * pCoeffs, + float32_t * pState, + uint32_t blockSize); + + + /** + * @brief Processing function for the Q15 FIR decimator. + * @param[in] S points to an instance of the Q15 FIR decimator structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data + * @param[in] blockSize number of input samples to process per call. + */ + void arm_fir_decimate_q15( + const arm_fir_decimate_instance_q15 * S, + const q15_t * pSrc, + q15_t * pDst, + uint32_t blockSize); + + + /** + * @brief Processing function for the Q15 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4. + * @param[in] S points to an instance of the Q15 FIR decimator structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data + * @param[in] blockSize number of input samples to process per call. + */ + void arm_fir_decimate_fast_q15( + const arm_fir_decimate_instance_q15 * S, + const q15_t * pSrc, + q15_t * pDst, + uint32_t blockSize); + + + /** + * @brief Initialization function for the Q15 FIR decimator. + * @param[in,out] S points to an instance of the Q15 FIR decimator structure. + * @param[in] numTaps number of coefficients in the filter. + * @param[in] M decimation factor. + * @param[in] pCoeffs points to the filter coefficients. + * @param[in] pState points to the state buffer. + * @param[in] blockSize number of input samples to process per call. + * @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if + * blockSize is not a multiple of M. + */ + arm_status arm_fir_decimate_init_q15( + arm_fir_decimate_instance_q15 * S, + uint16_t numTaps, + uint8_t M, + const q15_t * pCoeffs, + q15_t * pState, + uint32_t blockSize); + + + /** + * @brief Processing function for the Q31 FIR decimator. + * @param[in] S points to an instance of the Q31 FIR decimator structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data + * @param[in] blockSize number of input samples to process per call. + */ + void arm_fir_decimate_q31( + const arm_fir_decimate_instance_q31 * S, + const q31_t * pSrc, + q31_t * pDst, + uint32_t blockSize); + + /** + * @brief Processing function for the Q31 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4. + * @param[in] S points to an instance of the Q31 FIR decimator structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data + * @param[in] blockSize number of input samples to process per call. + */ + void arm_fir_decimate_fast_q31( + const arm_fir_decimate_instance_q31 * S, + const q31_t * pSrc, + q31_t * pDst, + uint32_t blockSize); + + + /** + * @brief Initialization function for the Q31 FIR decimator. + * @param[in,out] S points to an instance of the Q31 FIR decimator structure. + * @param[in] numTaps number of coefficients in the filter. + * @param[in] M decimation factor. + * @param[in] pCoeffs points to the filter coefficients. + * @param[in] pState points to the state buffer. + * @param[in] blockSize number of input samples to process per call. + * @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if + * blockSize is not a multiple of M. + */ + arm_status arm_fir_decimate_init_q31( + arm_fir_decimate_instance_q31 * S, + uint16_t numTaps, + uint8_t M, + const q31_t * pCoeffs, + q31_t * pState, + uint32_t blockSize); + + + /** + * @brief Instance structure for the Q15 FIR interpolator. + */ + typedef struct + { + uint8_t L; /**< upsample factor. */ + uint16_t phaseLength; /**< length of each polyphase filter component. */ + const q15_t *pCoeffs; /**< points to the coefficient array. The array is of length L*phaseLength. */ + q15_t *pState; /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */ + } arm_fir_interpolate_instance_q15; + + /** + * @brief Instance structure for the Q31 FIR interpolator. + */ + typedef struct + { + uint8_t L; /**< upsample factor. */ + uint16_t phaseLength; /**< length of each polyphase filter component. */ + const q31_t *pCoeffs; /**< points to the coefficient array. The array is of length L*phaseLength. */ + q31_t *pState; /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */ + } arm_fir_interpolate_instance_q31; + + /** + * @brief Instance structure for the floating-point FIR interpolator. + */ + typedef struct + { + uint8_t L; /**< upsample factor. */ + uint16_t phaseLength; /**< length of each polyphase filter component. */ + const float32_t *pCoeffs; /**< points to the coefficient array. The array is of length L*phaseLength. */ + float32_t *pState; /**< points to the state variable array. The array is of length phaseLength+numTaps-1. */ + } arm_fir_interpolate_instance_f32; + + + /** + * @brief Processing function for the Q15 FIR interpolator. + * @param[in] S points to an instance of the Q15 FIR interpolator structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data. + * @param[in] blockSize number of input samples to process per call. + */ + void arm_fir_interpolate_q15( + const arm_fir_interpolate_instance_q15 * S, + const q15_t * pSrc, + q15_t * pDst, + uint32_t blockSize); + + + /** + * @brief Initialization function for the Q15 FIR interpolator. + * @param[in,out] S points to an instance of the Q15 FIR interpolator structure. + * @param[in] L upsample factor. + * @param[in] numTaps number of filter coefficients in the filter. + * @param[in] pCoeffs points to the filter coefficient buffer. + * @param[in] pState points to the state buffer. + * @param[in] blockSize number of input samples to process per call. + * @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if + * the filter length numTaps is not a multiple of the interpolation factor L. + */ + arm_status arm_fir_interpolate_init_q15( + arm_fir_interpolate_instance_q15 * S, + uint8_t L, + uint16_t numTaps, + const q15_t * pCoeffs, + q15_t * pState, + uint32_t blockSize); + + + /** + * @brief Processing function for the Q31 FIR interpolator. + * @param[in] S points to an instance of the Q15 FIR interpolator structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data. + * @param[in] blockSize number of input samples to process per call. + */ + void arm_fir_interpolate_q31( + const arm_fir_interpolate_instance_q31 * S, + const q31_t * pSrc, + q31_t * pDst, + uint32_t blockSize); + + + /** + * @brief Initialization function for the Q31 FIR interpolator. + * @param[in,out] S points to an instance of the Q31 FIR interpolator structure. + * @param[in] L upsample factor. + * @param[in] numTaps number of filter coefficients in the filter. + * @param[in] pCoeffs points to the filter coefficient buffer. + * @param[in] pState points to the state buffer. + * @param[in] blockSize number of input samples to process per call. + * @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if + * the filter length numTaps is not a multiple of the interpolation factor L. + */ + arm_status arm_fir_interpolate_init_q31( + arm_fir_interpolate_instance_q31 * S, + uint8_t L, + uint16_t numTaps, + const q31_t * pCoeffs, + q31_t * pState, + uint32_t blockSize); + + + /** + * @brief Processing function for the floating-point FIR interpolator. + * @param[in] S points to an instance of the floating-point FIR interpolator structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data. + * @param[in] blockSize number of input samples to process per call. + */ + void arm_fir_interpolate_f32( + const arm_fir_interpolate_instance_f32 * S, + const float32_t * pSrc, + float32_t * pDst, + uint32_t blockSize); + + + /** + * @brief Initialization function for the floating-point FIR interpolator. + * @param[in,out] S points to an instance of the floating-point FIR interpolator structure. + * @param[in] L upsample factor. + * @param[in] numTaps number of filter coefficients in the filter. + * @param[in] pCoeffs points to the filter coefficient buffer. + * @param[in] pState points to the state buffer. + * @param[in] blockSize number of input samples to process per call. + * @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if + * the filter length numTaps is not a multiple of the interpolation factor L. + */ + arm_status arm_fir_interpolate_init_f32( + arm_fir_interpolate_instance_f32 * S, + uint8_t L, + uint16_t numTaps, + const float32_t * pCoeffs, + float32_t * pState, + uint32_t blockSize); + + + /** + * @brief Instance structure for the high precision Q31 Biquad cascade filter. + */ + typedef struct + { + uint8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ + q63_t *pState; /**< points to the array of state coefficients. The array is of length 4*numStages. */ + const q31_t *pCoeffs; /**< points to the array of coefficients. The array is of length 5*numStages. */ + uint8_t postShift; /**< additional shift, in bits, applied to each output sample. */ + } arm_biquad_cas_df1_32x64_ins_q31; + + + /** + * @param[in] S points to an instance of the high precision Q31 Biquad cascade filter structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data + * @param[in] blockSize number of samples to process. + */ + void arm_biquad_cas_df1_32x64_q31( + const arm_biquad_cas_df1_32x64_ins_q31 * S, + q31_t * pSrc, + q31_t * pDst, + uint32_t blockSize); + + + /** + * @param[in,out] S points to an instance of the high precision Q31 Biquad cascade filter structure. + * @param[in] numStages number of 2nd order stages in the filter. + * @param[in] pCoeffs points to the filter coefficients. + * @param[in] pState points to the state buffer. + * @param[in] postShift shift to be applied to the output. Varies according to the coefficients format + */ + void arm_biquad_cas_df1_32x64_init_q31( + arm_biquad_cas_df1_32x64_ins_q31 * S, + uint8_t numStages, + const q31_t * pCoeffs, + q63_t * pState, + uint8_t postShift); + + + /** + * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter. + */ + typedef struct + { + uint8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ + float32_t *pState; /**< points to the array of state coefficients. The array is of length 2*numStages. */ + const float32_t *pCoeffs; /**< points to the array of coefficients. The array is of length 5*numStages. */ + } arm_biquad_cascade_df2T_instance_f32; + + /** + * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter. + */ + typedef struct + { + uint8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ + float32_t *pState; /**< points to the array of state coefficients. The array is of length 4*numStages. */ + const float32_t *pCoeffs; /**< points to the array of coefficients. The array is of length 5*numStages. */ + } arm_biquad_cascade_stereo_df2T_instance_f32; + + /** + * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter. + */ + typedef struct + { + uint8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ + float64_t *pState; /**< points to the array of state coefficients. The array is of length 2*numStages. */ + float64_t *pCoeffs; /**< points to the array of coefficients. The array is of length 5*numStages. */ + } arm_biquad_cascade_df2T_instance_f64; + + + /** + * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter. + * @param[in] S points to an instance of the filter data structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data + * @param[in] blockSize number of samples to process. + */ + void arm_biquad_cascade_df2T_f32( + const arm_biquad_cascade_df2T_instance_f32 * S, + const float32_t * pSrc, + float32_t * pDst, + uint32_t blockSize); + + + /** + * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter. 2 channels + * @param[in] S points to an instance of the filter data structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data + * @param[in] blockSize number of samples to process. + */ + void arm_biquad_cascade_stereo_df2T_f32( + const arm_biquad_cascade_stereo_df2T_instance_f32 * S, + const float32_t * pSrc, + float32_t * pDst, + uint32_t blockSize); + + + /** + * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter. + * @param[in] S points to an instance of the filter data structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data + * @param[in] blockSize number of samples to process. + */ + void arm_biquad_cascade_df2T_f64( + const arm_biquad_cascade_df2T_instance_f64 * S, + float64_t * pSrc, + float64_t * pDst, + uint32_t blockSize); + + +#if defined(ARM_MATH_NEON) +void arm_biquad_cascade_df2T_compute_coefs_f32( + arm_biquad_cascade_df2T_instance_f32 * S, + uint8_t numStages, + float32_t * pCoeffs); +#endif + /** + * @brief Initialization function for the floating-point transposed direct form II Biquad cascade filter. + * @param[in,out] S points to an instance of the filter data structure. + * @param[in] numStages number of 2nd order stages in the filter. + * @param[in] pCoeffs points to the filter coefficients. + * @param[in] pState points to the state buffer. + */ + void arm_biquad_cascade_df2T_init_f32( + arm_biquad_cascade_df2T_instance_f32 * S, + uint8_t numStages, + const float32_t * pCoeffs, + float32_t * pState); + + + /** + * @brief Initialization function for the floating-point transposed direct form II Biquad cascade filter. + * @param[in,out] S points to an instance of the filter data structure. + * @param[in] numStages number of 2nd order stages in the filter. + * @param[in] pCoeffs points to the filter coefficients. + * @param[in] pState points to the state buffer. + */ + void arm_biquad_cascade_stereo_df2T_init_f32( + arm_biquad_cascade_stereo_df2T_instance_f32 * S, + uint8_t numStages, + const float32_t * pCoeffs, + float32_t * pState); + + + /** + * @brief Initialization function for the floating-point transposed direct form II Biquad cascade filter. + * @param[in,out] S points to an instance of the filter data structure. + * @param[in] numStages number of 2nd order stages in the filter. + * @param[in] pCoeffs points to the filter coefficients. + * @param[in] pState points to the state buffer. + */ + void arm_biquad_cascade_df2T_init_f64( + arm_biquad_cascade_df2T_instance_f64 * S, + uint8_t numStages, + float64_t * pCoeffs, + float64_t * pState); + + + /** + * @brief Instance structure for the Q15 FIR lattice filter. + */ + typedef struct + { + uint16_t numStages; /**< number of filter stages. */ + q15_t *pState; /**< points to the state variable array. The array is of length numStages. */ + const q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numStages. */ + } arm_fir_lattice_instance_q15; + + /** + * @brief Instance structure for the Q31 FIR lattice filter. + */ + typedef struct + { + uint16_t numStages; /**< number of filter stages. */ + q31_t *pState; /**< points to the state variable array. The array is of length numStages. */ + const q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numStages. */ + } arm_fir_lattice_instance_q31; + + /** + * @brief Instance structure for the floating-point FIR lattice filter. + */ + typedef struct + { + uint16_t numStages; /**< number of filter stages. */ + float32_t *pState; /**< points to the state variable array. The array is of length numStages. */ + const float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numStages. */ + } arm_fir_lattice_instance_f32; + + + /** + * @brief Initialization function for the Q15 FIR lattice filter. + * @param[in] S points to an instance of the Q15 FIR lattice structure. + * @param[in] numStages number of filter stages. + * @param[in] pCoeffs points to the coefficient buffer. The array is of length numStages. + * @param[in] pState points to the state buffer. The array is of length numStages. + */ + void arm_fir_lattice_init_q15( + arm_fir_lattice_instance_q15 * S, + uint16_t numStages, + const q15_t * pCoeffs, + q15_t * pState); + + + /** + * @brief Processing function for the Q15 FIR lattice filter. + * @param[in] S points to an instance of the Q15 FIR lattice structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data. + * @param[in] blockSize number of samples to process. + */ + void arm_fir_lattice_q15( + const arm_fir_lattice_instance_q15 * S, + const q15_t * pSrc, + q15_t * pDst, + uint32_t blockSize); + + + /** + * @brief Initialization function for the Q31 FIR lattice filter. + * @param[in] S points to an instance of the Q31 FIR lattice structure. + * @param[in] numStages number of filter stages. + * @param[in] pCoeffs points to the coefficient buffer. The array is of length numStages. + * @param[in] pState points to the state buffer. The array is of length numStages. + */ + void arm_fir_lattice_init_q31( + arm_fir_lattice_instance_q31 * S, + uint16_t numStages, + const q31_t * pCoeffs, + q31_t * pState); + + + /** + * @brief Processing function for the Q31 FIR lattice filter. + * @param[in] S points to an instance of the Q31 FIR lattice structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data + * @param[in] blockSize number of samples to process. + */ + void arm_fir_lattice_q31( + const arm_fir_lattice_instance_q31 * S, + const q31_t * pSrc, + q31_t * pDst, + uint32_t blockSize); + + +/** + * @brief Initialization function for the floating-point FIR lattice filter. + * @param[in] S points to an instance of the floating-point FIR lattice structure. + * @param[in] numStages number of filter stages. + * @param[in] pCoeffs points to the coefficient buffer. The array is of length numStages. + * @param[in] pState points to the state buffer. The array is of length numStages. + */ + void arm_fir_lattice_init_f32( + arm_fir_lattice_instance_f32 * S, + uint16_t numStages, + const float32_t * pCoeffs, + float32_t * pState); + + + /** + * @brief Processing function for the floating-point FIR lattice filter. + * @param[in] S points to an instance of the floating-point FIR lattice structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data + * @param[in] blockSize number of samples to process. + */ + void arm_fir_lattice_f32( + const arm_fir_lattice_instance_f32 * S, + const float32_t * pSrc, + float32_t * pDst, + uint32_t blockSize); + + + /** + * @brief Instance structure for the Q15 IIR lattice filter. + */ + typedef struct + { + uint16_t numStages; /**< number of stages in the filter. */ + q15_t *pState; /**< points to the state variable array. The array is of length numStages+blockSize. */ + q15_t *pkCoeffs; /**< points to the reflection coefficient array. The array is of length numStages. */ + q15_t *pvCoeffs; /**< points to the ladder coefficient array. The array is of length numStages+1. */ + } arm_iir_lattice_instance_q15; + + /** + * @brief Instance structure for the Q31 IIR lattice filter. + */ + typedef struct + { + uint16_t numStages; /**< number of stages in the filter. */ + q31_t *pState; /**< points to the state variable array. The array is of length numStages+blockSize. */ + q31_t *pkCoeffs; /**< points to the reflection coefficient array. The array is of length numStages. */ + q31_t *pvCoeffs; /**< points to the ladder coefficient array. The array is of length numStages+1. */ + } arm_iir_lattice_instance_q31; + + /** + * @brief Instance structure for the floating-point IIR lattice filter. + */ + typedef struct + { + uint16_t numStages; /**< number of stages in the filter. */ + float32_t *pState; /**< points to the state variable array. The array is of length numStages+blockSize. */ + float32_t *pkCoeffs; /**< points to the reflection coefficient array. The array is of length numStages. */ + float32_t *pvCoeffs; /**< points to the ladder coefficient array. The array is of length numStages+1. */ + } arm_iir_lattice_instance_f32; + + + /** + * @brief Processing function for the floating-point IIR lattice filter. + * @param[in] S points to an instance of the floating-point IIR lattice structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data. + * @param[in] blockSize number of samples to process. + */ + void arm_iir_lattice_f32( + const arm_iir_lattice_instance_f32 * S, + const float32_t * pSrc, + float32_t * pDst, + uint32_t blockSize); + + + /** + * @brief Initialization function for the floating-point IIR lattice filter. + * @param[in] S points to an instance of the floating-point IIR lattice structure. + * @param[in] numStages number of stages in the filter. + * @param[in] pkCoeffs points to the reflection coefficient buffer. The array is of length numStages. + * @param[in] pvCoeffs points to the ladder coefficient buffer. The array is of length numStages+1. + * @param[in] pState points to the state buffer. The array is of length numStages+blockSize-1. + * @param[in] blockSize number of samples to process. + */ + void arm_iir_lattice_init_f32( + arm_iir_lattice_instance_f32 * S, + uint16_t numStages, + float32_t * pkCoeffs, + float32_t * pvCoeffs, + float32_t * pState, + uint32_t blockSize); + + + /** + * @brief Processing function for the Q31 IIR lattice filter. + * @param[in] S points to an instance of the Q31 IIR lattice structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data. + * @param[in] blockSize number of samples to process. + */ + void arm_iir_lattice_q31( + const arm_iir_lattice_instance_q31 * S, + const q31_t * pSrc, + q31_t * pDst, + uint32_t blockSize); + + + /** + * @brief Initialization function for the Q31 IIR lattice filter. + * @param[in] S points to an instance of the Q31 IIR lattice structure. + * @param[in] numStages number of stages in the filter. + * @param[in] pkCoeffs points to the reflection coefficient buffer. The array is of length numStages. + * @param[in] pvCoeffs points to the ladder coefficient buffer. The array is of length numStages+1. + * @param[in] pState points to the state buffer. The array is of length numStages+blockSize. + * @param[in] blockSize number of samples to process. + */ + void arm_iir_lattice_init_q31( + arm_iir_lattice_instance_q31 * S, + uint16_t numStages, + q31_t * pkCoeffs, + q31_t * pvCoeffs, + q31_t * pState, + uint32_t blockSize); + + + /** + * @brief Processing function for the Q15 IIR lattice filter. + * @param[in] S points to an instance of the Q15 IIR lattice structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data. + * @param[in] blockSize number of samples to process. + */ + void arm_iir_lattice_q15( + const arm_iir_lattice_instance_q15 * S, + const q15_t * pSrc, + q15_t * pDst, + uint32_t blockSize); + + +/** + * @brief Initialization function for the Q15 IIR lattice filter. + * @param[in] S points to an instance of the fixed-point Q15 IIR lattice structure. + * @param[in] numStages number of stages in the filter. + * @param[in] pkCoeffs points to reflection coefficient buffer. The array is of length numStages. + * @param[in] pvCoeffs points to ladder coefficient buffer. The array is of length numStages+1. + * @param[in] pState points to state buffer. The array is of length numStages+blockSize. + * @param[in] blockSize number of samples to process per call. + */ + void arm_iir_lattice_init_q15( + arm_iir_lattice_instance_q15 * S, + uint16_t numStages, + q15_t * pkCoeffs, + q15_t * pvCoeffs, + q15_t * pState, + uint32_t blockSize); + + + /** + * @brief Instance structure for the floating-point LMS filter. + */ + typedef struct + { + uint16_t numTaps; /**< number of coefficients in the filter. */ + float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ + float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ + float32_t mu; /**< step size that controls filter coefficient updates. */ + } arm_lms_instance_f32; + + + /** + * @brief Processing function for floating-point LMS filter. + * @param[in] S points to an instance of the floating-point LMS filter structure. + * @param[in] pSrc points to the block of input data. + * @param[in] pRef points to the block of reference data. + * @param[out] pOut points to the block of output data. + * @param[out] pErr points to the block of error data. + * @param[in] blockSize number of samples to process. + */ + void arm_lms_f32( + const arm_lms_instance_f32 * S, + const float32_t * pSrc, + float32_t * pRef, + float32_t * pOut, + float32_t * pErr, + uint32_t blockSize); + + + /** + * @brief Initialization function for floating-point LMS filter. + * @param[in] S points to an instance of the floating-point LMS filter structure. + * @param[in] numTaps number of filter coefficients. + * @param[in] pCoeffs points to the coefficient buffer. + * @param[in] pState points to state buffer. + * @param[in] mu step size that controls filter coefficient updates. + * @param[in] blockSize number of samples to process. + */ + void arm_lms_init_f32( + arm_lms_instance_f32 * S, + uint16_t numTaps, + float32_t * pCoeffs, + float32_t * pState, + float32_t mu, + uint32_t blockSize); + + + /** + * @brief Instance structure for the Q15 LMS filter. + */ + typedef struct + { + uint16_t numTaps; /**< number of coefficients in the filter. */ + q15_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ + q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ + q15_t mu; /**< step size that controls filter coefficient updates. */ + uint32_t postShift; /**< bit shift applied to coefficients. */ + } arm_lms_instance_q15; + + + /** + * @brief Initialization function for the Q15 LMS filter. + * @param[in] S points to an instance of the Q15 LMS filter structure. + * @param[in] numTaps number of filter coefficients. + * @param[in] pCoeffs points to the coefficient buffer. + * @param[in] pState points to the state buffer. + * @param[in] mu step size that controls filter coefficient updates. + * @param[in] blockSize number of samples to process. + * @param[in] postShift bit shift applied to coefficients. + */ + void arm_lms_init_q15( + arm_lms_instance_q15 * S, + uint16_t numTaps, + q15_t * pCoeffs, + q15_t * pState, + q15_t mu, + uint32_t blockSize, + uint32_t postShift); + + + /** + * @brief Processing function for Q15 LMS filter. + * @param[in] S points to an instance of the Q15 LMS filter structure. + * @param[in] pSrc points to the block of input data. + * @param[in] pRef points to the block of reference data. + * @param[out] pOut points to the block of output data. + * @param[out] pErr points to the block of error data. + * @param[in] blockSize number of samples to process. + */ + void arm_lms_q15( + const arm_lms_instance_q15 * S, + const q15_t * pSrc, + q15_t * pRef, + q15_t * pOut, + q15_t * pErr, + uint32_t blockSize); + + + /** + * @brief Instance structure for the Q31 LMS filter. + */ + typedef struct + { + uint16_t numTaps; /**< number of coefficients in the filter. */ + q31_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ + q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ + q31_t mu; /**< step size that controls filter coefficient updates. */ + uint32_t postShift; /**< bit shift applied to coefficients. */ + } arm_lms_instance_q31; + + + /** + * @brief Processing function for Q31 LMS filter. + * @param[in] S points to an instance of the Q15 LMS filter structure. + * @param[in] pSrc points to the block of input data. + * @param[in] pRef points to the block of reference data. + * @param[out] pOut points to the block of output data. + * @param[out] pErr points to the block of error data. + * @param[in] blockSize number of samples to process. + */ + void arm_lms_q31( + const arm_lms_instance_q31 * S, + const q31_t * pSrc, + q31_t * pRef, + q31_t * pOut, + q31_t * pErr, + uint32_t blockSize); + + + /** + * @brief Initialization function for Q31 LMS filter. + * @param[in] S points to an instance of the Q31 LMS filter structure. + * @param[in] numTaps number of filter coefficients. + * @param[in] pCoeffs points to coefficient buffer. + * @param[in] pState points to state buffer. + * @param[in] mu step size that controls filter coefficient updates. + * @param[in] blockSize number of samples to process. + * @param[in] postShift bit shift applied to coefficients. + */ + void arm_lms_init_q31( + arm_lms_instance_q31 * S, + uint16_t numTaps, + q31_t * pCoeffs, + q31_t * pState, + q31_t mu, + uint32_t blockSize, + uint32_t postShift); + + + /** + * @brief Instance structure for the floating-point normalized LMS filter. + */ + typedef struct + { + uint16_t numTaps; /**< number of coefficients in the filter. */ + float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ + float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ + float32_t mu; /**< step size that control filter coefficient updates. */ + float32_t energy; /**< saves previous frame energy. */ + float32_t x0; /**< saves previous input sample. */ + } arm_lms_norm_instance_f32; + + + /** + * @brief Processing function for floating-point normalized LMS filter. + * @param[in] S points to an instance of the floating-point normalized LMS filter structure. + * @param[in] pSrc points to the block of input data. + * @param[in] pRef points to the block of reference data. + * @param[out] pOut points to the block of output data. + * @param[out] pErr points to the block of error data. + * @param[in] blockSize number of samples to process. + */ + void arm_lms_norm_f32( + arm_lms_norm_instance_f32 * S, + const float32_t * pSrc, + float32_t * pRef, + float32_t * pOut, + float32_t * pErr, + uint32_t blockSize); + + + /** + * @brief Initialization function for floating-point normalized LMS filter. + * @param[in] S points to an instance of the floating-point LMS filter structure. + * @param[in] numTaps number of filter coefficients. + * @param[in] pCoeffs points to coefficient buffer. + * @param[in] pState points to state buffer. + * @param[in] mu step size that controls filter coefficient updates. + * @param[in] blockSize number of samples to process. + */ + void arm_lms_norm_init_f32( + arm_lms_norm_instance_f32 * S, + uint16_t numTaps, + float32_t * pCoeffs, + float32_t * pState, + float32_t mu, + uint32_t blockSize); + + + /** + * @brief Instance structure for the Q31 normalized LMS filter. + */ + typedef struct + { + uint16_t numTaps; /**< number of coefficients in the filter. */ + q31_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ + q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ + q31_t mu; /**< step size that controls filter coefficient updates. */ + uint8_t postShift; /**< bit shift applied to coefficients. */ + const q31_t *recipTable; /**< points to the reciprocal initial value table. */ + q31_t energy; /**< saves previous frame energy. */ + q31_t x0; /**< saves previous input sample. */ + } arm_lms_norm_instance_q31; + + + /** + * @brief Processing function for Q31 normalized LMS filter. + * @param[in] S points to an instance of the Q31 normalized LMS filter structure. + * @param[in] pSrc points to the block of input data. + * @param[in] pRef points to the block of reference data. + * @param[out] pOut points to the block of output data. + * @param[out] pErr points to the block of error data. + * @param[in] blockSize number of samples to process. + */ + void arm_lms_norm_q31( + arm_lms_norm_instance_q31 * S, + const q31_t * pSrc, + q31_t * pRef, + q31_t * pOut, + q31_t * pErr, + uint32_t blockSize); + + + /** + * @brief Initialization function for Q31 normalized LMS filter. + * @param[in] S points to an instance of the Q31 normalized LMS filter structure. + * @param[in] numTaps number of filter coefficients. + * @param[in] pCoeffs points to coefficient buffer. + * @param[in] pState points to state buffer. + * @param[in] mu step size that controls filter coefficient updates. + * @param[in] blockSize number of samples to process. + * @param[in] postShift bit shift applied to coefficients. + */ + void arm_lms_norm_init_q31( + arm_lms_norm_instance_q31 * S, + uint16_t numTaps, + q31_t * pCoeffs, + q31_t * pState, + q31_t mu, + uint32_t blockSize, + uint8_t postShift); + + + /** + * @brief Instance structure for the Q15 normalized LMS filter. + */ + typedef struct + { + uint16_t numTaps; /**< Number of coefficients in the filter. */ + q15_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ + q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ + q15_t mu; /**< step size that controls filter coefficient updates. */ + uint8_t postShift; /**< bit shift applied to coefficients. */ + const q15_t *recipTable; /**< Points to the reciprocal initial value table. */ + q15_t energy; /**< saves previous frame energy. */ + q15_t x0; /**< saves previous input sample. */ + } arm_lms_norm_instance_q15; + + + /** + * @brief Processing function for Q15 normalized LMS filter. + * @param[in] S points to an instance of the Q15 normalized LMS filter structure. + * @param[in] pSrc points to the block of input data. + * @param[in] pRef points to the block of reference data. + * @param[out] pOut points to the block of output data. + * @param[out] pErr points to the block of error data. + * @param[in] blockSize number of samples to process. + */ + void arm_lms_norm_q15( + arm_lms_norm_instance_q15 * S, + const q15_t * pSrc, + q15_t * pRef, + q15_t * pOut, + q15_t * pErr, + uint32_t blockSize); + + + /** + * @brief Initialization function for Q15 normalized LMS filter. + * @param[in] S points to an instance of the Q15 normalized LMS filter structure. + * @param[in] numTaps number of filter coefficients. + * @param[in] pCoeffs points to coefficient buffer. + * @param[in] pState points to state buffer. + * @param[in] mu step size that controls filter coefficient updates. + * @param[in] blockSize number of samples to process. + * @param[in] postShift bit shift applied to coefficients. + */ + void arm_lms_norm_init_q15( + arm_lms_norm_instance_q15 * S, + uint16_t numTaps, + q15_t * pCoeffs, + q15_t * pState, + q15_t mu, + uint32_t blockSize, + uint8_t postShift); + + + /** + * @brief Correlation of floating-point sequences. + * @param[in] pSrcA points to the first input sequence. + * @param[in] srcALen length of the first input sequence. + * @param[in] pSrcB points to the second input sequence. + * @param[in] srcBLen length of the second input sequence. + * @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1. + */ + void arm_correlate_f32( + const float32_t * pSrcA, + uint32_t srcALen, + const float32_t * pSrcB, + uint32_t srcBLen, + float32_t * pDst); + + +/** + @brief Correlation of Q15 sequences + @param[in] pSrcA points to the first input sequence + @param[in] srcALen length of the first input sequence + @param[in] pSrcB points to the second input sequence + @param[in] srcBLen length of the second input sequence + @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1. + @param[in] pScratch points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. +*/ +void arm_correlate_opt_q15( + const q15_t * pSrcA, + uint32_t srcALen, + const q15_t * pSrcB, + uint32_t srcBLen, + q15_t * pDst, + q15_t * pScratch); + + +/** + @brief Correlation of Q15 sequences. + @param[in] pSrcA points to the first input sequence + @param[in] srcALen length of the first input sequence + @param[in] pSrcB points to the second input sequence + @param[in] srcBLen length of the second input sequence + @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1. + */ + void arm_correlate_q15( + const q15_t * pSrcA, + uint32_t srcALen, + const q15_t * pSrcB, + uint32_t srcBLen, + q15_t * pDst); + + +/** + @brief Correlation of Q15 sequences (fast version). + @param[in] pSrcA points to the first input sequence + @param[in] srcALen length of the first input sequence + @param[in] pSrcB points to the second input sequence + @param[in] srcBLen length of the second input sequence + @param[out] pDst points to the location where the output result is written. Length 2 * max(srcALen, srcBLen) - 1. + @return none + */ +void arm_correlate_fast_q15( + const q15_t * pSrcA, + uint32_t srcALen, + const q15_t * pSrcB, + uint32_t srcBLen, + q15_t * pDst); + + +/** + @brief Correlation of Q15 sequences (fast version). + @param[in] pSrcA points to the first input sequence. + @param[in] srcALen length of the first input sequence. + @param[in] pSrcB points to the second input sequence. + @param[in] srcBLen length of the second input sequence. + @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1. + @param[in] pScratch points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. + */ +void arm_correlate_fast_opt_q15( + const q15_t * pSrcA, + uint32_t srcALen, + const q15_t * pSrcB, + uint32_t srcBLen, + q15_t * pDst, + q15_t * pScratch); + + + /** + * @brief Correlation of Q31 sequences. + * @param[in] pSrcA points to the first input sequence. + * @param[in] srcALen length of the first input sequence. + * @param[in] pSrcB points to the second input sequence. + * @param[in] srcBLen length of the second input sequence. + * @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1. + */ + void arm_correlate_q31( + const q31_t * pSrcA, + uint32_t srcALen, + const q31_t * pSrcB, + uint32_t srcBLen, + q31_t * pDst); + + +/** + @brief Correlation of Q31 sequences (fast version). + @param[in] pSrcA points to the first input sequence + @param[in] srcALen length of the first input sequence + @param[in] pSrcB points to the second input sequence + @param[in] srcBLen length of the second input sequence + @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1. + */ +void arm_correlate_fast_q31( + const q31_t * pSrcA, + uint32_t srcALen, + const q31_t * pSrcB, + uint32_t srcBLen, + q31_t * pDst); + + + /** + * @brief Correlation of Q7 sequences. + * @param[in] pSrcA points to the first input sequence. + * @param[in] srcALen length of the first input sequence. + * @param[in] pSrcB points to the second input sequence. + * @param[in] srcBLen length of the second input sequence. + * @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1. + * @param[in] pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. + * @param[in] pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen). + */ + void arm_correlate_opt_q7( + const q7_t * pSrcA, + uint32_t srcALen, + const q7_t * pSrcB, + uint32_t srcBLen, + q7_t * pDst, + q15_t * pScratch1, + q15_t * pScratch2); + + + /** + * @brief Correlation of Q7 sequences. + * @param[in] pSrcA points to the first input sequence. + * @param[in] srcALen length of the first input sequence. + * @param[in] pSrcB points to the second input sequence. + * @param[in] srcBLen length of the second input sequence. + * @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1. + */ + void arm_correlate_q7( + const q7_t * pSrcA, + uint32_t srcALen, + const q7_t * pSrcB, + uint32_t srcBLen, + q7_t * pDst); + + + /** + * @brief Instance structure for the floating-point sparse FIR filter. + */ + typedef struct + { + uint16_t numTaps; /**< number of coefficients in the filter. */ + uint16_t stateIndex; /**< state buffer index. Points to the oldest sample in the state buffer. */ + float32_t *pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */ + const float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ + uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */ + int32_t *pTapDelay; /**< points to the array of delay values. The array is of length numTaps. */ + } arm_fir_sparse_instance_f32; + + /** + * @brief Instance structure for the Q31 sparse FIR filter. + */ + typedef struct + { + uint16_t numTaps; /**< number of coefficients in the filter. */ + uint16_t stateIndex; /**< state buffer index. Points to the oldest sample in the state buffer. */ + q31_t *pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */ + const q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ + uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */ + int32_t *pTapDelay; /**< points to the array of delay values. The array is of length numTaps. */ + } arm_fir_sparse_instance_q31; + + /** + * @brief Instance structure for the Q15 sparse FIR filter. + */ + typedef struct + { + uint16_t numTaps; /**< number of coefficients in the filter. */ + uint16_t stateIndex; /**< state buffer index. Points to the oldest sample in the state buffer. */ + q15_t *pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */ + const q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ + uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */ + int32_t *pTapDelay; /**< points to the array of delay values. The array is of length numTaps. */ + } arm_fir_sparse_instance_q15; + + /** + * @brief Instance structure for the Q7 sparse FIR filter. + */ + typedef struct + { + uint16_t numTaps; /**< number of coefficients in the filter. */ + uint16_t stateIndex; /**< state buffer index. Points to the oldest sample in the state buffer. */ + q7_t *pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */ + const q7_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ + uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */ + int32_t *pTapDelay; /**< points to the array of delay values. The array is of length numTaps. */ + } arm_fir_sparse_instance_q7; + + + /** + * @brief Processing function for the floating-point sparse FIR filter. + * @param[in] S points to an instance of the floating-point sparse FIR structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data + * @param[in] pScratchIn points to a temporary buffer of size blockSize. + * @param[in] blockSize number of input samples to process per call. + */ + void arm_fir_sparse_f32( + arm_fir_sparse_instance_f32 * S, + const float32_t * pSrc, + float32_t * pDst, + float32_t * pScratchIn, + uint32_t blockSize); + + + /** + * @brief Initialization function for the floating-point sparse FIR filter. + * @param[in,out] S points to an instance of the floating-point sparse FIR structure. + * @param[in] numTaps number of nonzero coefficients in the filter. + * @param[in] pCoeffs points to the array of filter coefficients. + * @param[in] pState points to the state buffer. + * @param[in] pTapDelay points to the array of offset times. + * @param[in] maxDelay maximum offset time supported. + * @param[in] blockSize number of samples that will be processed per block. + */ + void arm_fir_sparse_init_f32( + arm_fir_sparse_instance_f32 * S, + uint16_t numTaps, + const float32_t * pCoeffs, + float32_t * pState, + int32_t * pTapDelay, + uint16_t maxDelay, + uint32_t blockSize); + + + /** + * @brief Processing function for the Q31 sparse FIR filter. + * @param[in] S points to an instance of the Q31 sparse FIR structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data + * @param[in] pScratchIn points to a temporary buffer of size blockSize. + * @param[in] blockSize number of input samples to process per call. + */ + void arm_fir_sparse_q31( + arm_fir_sparse_instance_q31 * S, + const q31_t * pSrc, + q31_t * pDst, + q31_t * pScratchIn, + uint32_t blockSize); + + + /** + * @brief Initialization function for the Q31 sparse FIR filter. + * @param[in,out] S points to an instance of the Q31 sparse FIR structure. + * @param[in] numTaps number of nonzero coefficients in the filter. + * @param[in] pCoeffs points to the array of filter coefficients. + * @param[in] pState points to the state buffer. + * @param[in] pTapDelay points to the array of offset times. + * @param[in] maxDelay maximum offset time supported. + * @param[in] blockSize number of samples that will be processed per block. + */ + void arm_fir_sparse_init_q31( + arm_fir_sparse_instance_q31 * S, + uint16_t numTaps, + const q31_t * pCoeffs, + q31_t * pState, + int32_t * pTapDelay, + uint16_t maxDelay, + uint32_t blockSize); + + + /** + * @brief Processing function for the Q15 sparse FIR filter. + * @param[in] S points to an instance of the Q15 sparse FIR structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data + * @param[in] pScratchIn points to a temporary buffer of size blockSize. + * @param[in] pScratchOut points to a temporary buffer of size blockSize. + * @param[in] blockSize number of input samples to process per call. + */ + void arm_fir_sparse_q15( + arm_fir_sparse_instance_q15 * S, + const q15_t * pSrc, + q15_t * pDst, + q15_t * pScratchIn, + q31_t * pScratchOut, + uint32_t blockSize); + + + /** + * @brief Initialization function for the Q15 sparse FIR filter. + * @param[in,out] S points to an instance of the Q15 sparse FIR structure. + * @param[in] numTaps number of nonzero coefficients in the filter. + * @param[in] pCoeffs points to the array of filter coefficients. + * @param[in] pState points to the state buffer. + * @param[in] pTapDelay points to the array of offset times. + * @param[in] maxDelay maximum offset time supported. + * @param[in] blockSize number of samples that will be processed per block. + */ + void arm_fir_sparse_init_q15( + arm_fir_sparse_instance_q15 * S, + uint16_t numTaps, + const q15_t * pCoeffs, + q15_t * pState, + int32_t * pTapDelay, + uint16_t maxDelay, + uint32_t blockSize); + + + /** + * @brief Processing function for the Q7 sparse FIR filter. + * @param[in] S points to an instance of the Q7 sparse FIR structure. + * @param[in] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data + * @param[in] pScratchIn points to a temporary buffer of size blockSize. + * @param[in] pScratchOut points to a temporary buffer of size blockSize. + * @param[in] blockSize number of input samples to process per call. + */ + void arm_fir_sparse_q7( + arm_fir_sparse_instance_q7 * S, + const q7_t * pSrc, + q7_t * pDst, + q7_t * pScratchIn, + q31_t * pScratchOut, + uint32_t blockSize); + + + /** + * @brief Initialization function for the Q7 sparse FIR filter. + * @param[in,out] S points to an instance of the Q7 sparse FIR structure. + * @param[in] numTaps number of nonzero coefficients in the filter. + * @param[in] pCoeffs points to the array of filter coefficients. + * @param[in] pState points to the state buffer. + * @param[in] pTapDelay points to the array of offset times. + * @param[in] maxDelay maximum offset time supported. + * @param[in] blockSize number of samples that will be processed per block. + */ + void arm_fir_sparse_init_q7( + arm_fir_sparse_instance_q7 * S, + uint16_t numTaps, + const q7_t * pCoeffs, + q7_t * pState, + int32_t * pTapDelay, + uint16_t maxDelay, + uint32_t blockSize); + + + /** + * @brief Floating-point sin_cos function. + * @param[in] theta input value in degrees + * @param[out] pSinVal points to the processed sine output. + * @param[out] pCosVal points to the processed cos output. + */ + void arm_sin_cos_f32( + float32_t theta, + float32_t * pSinVal, + float32_t * pCosVal); + + + /** + * @brief Q31 sin_cos function. + * @param[in] theta scaled input value in degrees + * @param[out] pSinVal points to the processed sine output. + * @param[out] pCosVal points to the processed cosine output. + */ + void arm_sin_cos_q31( + q31_t theta, + q31_t * pSinVal, + q31_t * pCosVal); + + + /** + * @brief Floating-point complex conjugate. + * @param[in] pSrc points to the input vector + * @param[out] pDst points to the output vector + * @param[in] numSamples number of complex samples in each vector + */ + void arm_cmplx_conj_f32( + const float32_t * pSrc, + float32_t * pDst, + uint32_t numSamples); + + /** + * @brief Q31 complex conjugate. + * @param[in] pSrc points to the input vector + * @param[out] pDst points to the output vector + * @param[in] numSamples number of complex samples in each vector + */ + void arm_cmplx_conj_q31( + const q31_t * pSrc, + q31_t * pDst, + uint32_t numSamples); + + + /** + * @brief Q15 complex conjugate. + * @param[in] pSrc points to the input vector + * @param[out] pDst points to the output vector + * @param[in] numSamples number of complex samples in each vector + */ + void arm_cmplx_conj_q15( + const q15_t * pSrc, + q15_t * pDst, + uint32_t numSamples); + + + /** + * @brief Floating-point complex magnitude squared + * @param[in] pSrc points to the complex input vector + * @param[out] pDst points to the real output vector + * @param[in] numSamples number of complex samples in the input vector + */ + void arm_cmplx_mag_squared_f32( + const float32_t * pSrc, + float32_t * pDst, + uint32_t numSamples); + + + /** + * @brief Q31 complex magnitude squared + * @param[in] pSrc points to the complex input vector + * @param[out] pDst points to the real output vector + * @param[in] numSamples number of complex samples in the input vector + */ + void arm_cmplx_mag_squared_q31( + const q31_t * pSrc, + q31_t * pDst, + uint32_t numSamples); + + + /** + * @brief Q15 complex magnitude squared + * @param[in] pSrc points to the complex input vector + * @param[out] pDst points to the real output vector + * @param[in] numSamples number of complex samples in the input vector + */ + void arm_cmplx_mag_squared_q15( + const q15_t * pSrc, + q15_t * pDst, + uint32_t numSamples); + + + /** + * @ingroup groupController + */ + + /** + * @defgroup PID PID Motor Control + * + * A Proportional Integral Derivative (PID) controller is a generic feedback control + * loop mechanism widely used in industrial control systems. + * A PID controller is the most commonly used type of feedback controller. + * + * This set of functions implements (PID) controllers + * for Q15, Q31, and floating-point data types. The functions operate on a single sample + * of data and each call to the function returns a single processed value. + * S points to an instance of the PID control data structure. in + * is the input sample value. The functions return the output value. + * + * \par Algorithm: + *
+   *    y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]
+   *    A0 = Kp + Ki + Kd
+   *    A1 = (-Kp ) - (2 * Kd )
+   *    A2 = Kd
+   * 
+ * + * \par + * where \c Kp is proportional constant, \c Ki is Integral constant and \c Kd is Derivative constant + * + * \par + * \image html PID.gif "Proportional Integral Derivative Controller" + * + * \par + * The PID controller calculates an "error" value as the difference between + * the measured output and the reference input. + * The controller attempts to minimize the error by adjusting the process control inputs. + * The proportional value determines the reaction to the current error, + * the integral value determines the reaction based on the sum of recent errors, + * and the derivative value determines the reaction based on the rate at which the error has been changing. + * + * \par Instance Structure + * The Gains A0, A1, A2 and state variables for a PID controller are stored together in an instance data structure. + * A separate instance structure must be defined for each PID Controller. + * There are separate instance structure declarations for each of the 3 supported data types. + * + * \par Reset Functions + * There is also an associated reset function for each data type which clears the state array. + * + * \par Initialization Functions + * There is also an associated initialization function for each data type. + * The initialization function performs the following operations: + * - Initializes the Gains A0, A1, A2 from Kp,Ki, Kd gains. + * - Zeros out the values in the state buffer. + * + * \par + * Instance structure cannot be placed into a const data section and it is recommended to use the initialization function. + * + * \par Fixed-Point Behavior + * Care must be taken when using the fixed-point versions of the PID Controller functions. + * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered. + * Refer to the function specific documentation below for usage guidelines. + */ + + /** + * @addtogroup PID + * @{ + */ + + /** + * @brief Process function for the floating-point PID Control. + * @param[in,out] S is an instance of the floating-point PID Control structure + * @param[in] in input sample to process + * @return processed output sample. + */ + __STATIC_FORCEINLINE float32_t arm_pid_f32( + arm_pid_instance_f32 * S, + float32_t in) + { + float32_t out; + + /* y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2] */ + out = (S->A0 * in) + + (S->A1 * S->state[0]) + (S->A2 * S->state[1]) + (S->state[2]); + + /* Update state */ + S->state[1] = S->state[0]; + S->state[0] = in; + S->state[2] = out; + + /* return to application */ + return (out); + + } + +/** + @brief Process function for the Q31 PID Control. + @param[in,out] S points to an instance of the Q31 PID Control structure + @param[in] in input sample to process + @return processed output sample. + + \par Scaling and Overflow Behavior + The function is implemented using an internal 64-bit accumulator. + The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit. + Thus, if the accumulator result overflows it wraps around rather than clip. + In order to avoid overflows completely the input signal must be scaled down by 2 bits as there are four additions. + After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format. + */ +__STATIC_FORCEINLINE q31_t arm_pid_q31( + arm_pid_instance_q31 * S, + q31_t in) + { + q63_t acc; + q31_t out; + + /* acc = A0 * x[n] */ + acc = (q63_t) S->A0 * in; + + /* acc += A1 * x[n-1] */ + acc += (q63_t) S->A1 * S->state[0]; + + /* acc += A2 * x[n-2] */ + acc += (q63_t) S->A2 * S->state[1]; + + /* convert output to 1.31 format to add y[n-1] */ + out = (q31_t) (acc >> 31U); + + /* out += y[n-1] */ + out += S->state[2]; + + /* Update state */ + S->state[1] = S->state[0]; + S->state[0] = in; + S->state[2] = out; + + /* return to application */ + return (out); + } + + +/** + @brief Process function for the Q15 PID Control. + @param[in,out] S points to an instance of the Q15 PID Control structure + @param[in] in input sample to process + @return processed output sample. + + \par Scaling and Overflow Behavior + The function is implemented using a 64-bit internal accumulator. + Both Gains and state variables are represented in 1.15 format and multiplications yield a 2.30 result. + The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format. + There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved. + After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits. + Lastly, the accumulator is saturated to yield a result in 1.15 format. + */ +__STATIC_FORCEINLINE q15_t arm_pid_q15( + arm_pid_instance_q15 * S, + q15_t in) + { + q63_t acc; + q15_t out; + +#if defined (ARM_MATH_DSP) + /* Implementation of PID controller */ + + /* acc = A0 * x[n] */ + acc = (q31_t) __SMUAD((uint32_t)S->A0, (uint32_t)in); + + /* acc += A1 * x[n-1] + A2 * x[n-2] */ + acc = (q63_t)__SMLALD((uint32_t)S->A1, (uint32_t)read_q15x2 (S->state), (uint64_t)acc); +#else + /* acc = A0 * x[n] */ + acc = ((q31_t) S->A0) * in; + + /* acc += A1 * x[n-1] + A2 * x[n-2] */ + acc += (q31_t) S->A1 * S->state[0]; + acc += (q31_t) S->A2 * S->state[1]; +#endif + + /* acc += y[n-1] */ + acc += (q31_t) S->state[2] << 15; + + /* saturate the output */ + out = (q15_t) (__SSAT((acc >> 15), 16)); + + /* Update state */ + S->state[1] = S->state[0]; + S->state[0] = in; + S->state[2] = out; + + /* return to application */ + return (out); + } + + /** + * @} end of PID group + */ + + + /** + * @brief Floating-point matrix inverse. + * @param[in] src points to the instance of the input floating-point matrix structure. + * @param[out] dst points to the instance of the output floating-point matrix structure. + * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match. + * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR. + */ + arm_status arm_mat_inverse_f32( + const arm_matrix_instance_f32 * src, + arm_matrix_instance_f32 * dst); + + + /** + * @brief Floating-point matrix inverse. + * @param[in] src points to the instance of the input floating-point matrix structure. + * @param[out] dst points to the instance of the output floating-point matrix structure. + * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match. + * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR. + */ + arm_status arm_mat_inverse_f64( + const arm_matrix_instance_f64 * src, + arm_matrix_instance_f64 * dst); + + + + /** + * @ingroup groupController + */ + + /** + * @defgroup clarke Vector Clarke Transform + * Forward Clarke transform converts the instantaneous stator phases into a two-coordinate time invariant vector. + * Generally the Clarke transform uses three-phase currents Ia, Ib and Ic to calculate currents + * in the two-phase orthogonal stator axis Ialpha and Ibeta. + * When Ialpha is superposed with Ia as shown in the figure below + * \image html clarke.gif Stator current space vector and its components in (a,b). + * and Ia + Ib + Ic = 0, in this condition Ialpha and Ibeta + * can be calculated using only Ia and Ib. + * + * The function operates on a single sample of data and each call to the function returns the processed output. + * The library provides separate functions for Q31 and floating-point data types. + * \par Algorithm + * \image html clarkeFormula.gif + * where Ia and Ib are the instantaneous stator phases and + * pIalpha and pIbeta are the two coordinates of time invariant vector. + * \par Fixed-Point Behavior + * Care must be taken when using the Q31 version of the Clarke transform. + * In particular, the overflow and saturation behavior of the accumulator used must be considered. + * Refer to the function specific documentation below for usage guidelines. + */ + + /** + * @addtogroup clarke + * @{ + */ + + /** + * + * @brief Floating-point Clarke transform + * @param[in] Ia input three-phase coordinate a + * @param[in] Ib input three-phase coordinate b + * @param[out] pIalpha points to output two-phase orthogonal vector axis alpha + * @param[out] pIbeta points to output two-phase orthogonal vector axis beta + * @return none + */ + __STATIC_FORCEINLINE void arm_clarke_f32( + float32_t Ia, + float32_t Ib, + float32_t * pIalpha, + float32_t * pIbeta) + { + /* Calculate pIalpha using the equation, pIalpha = Ia */ + *pIalpha = Ia; + + /* Calculate pIbeta using the equation, pIbeta = (1/sqrt(3)) * Ia + (2/sqrt(3)) * Ib */ + *pIbeta = ((float32_t) 0.57735026919 * Ia + (float32_t) 1.15470053838 * Ib); + } + + +/** + @brief Clarke transform for Q31 version + @param[in] Ia input three-phase coordinate a + @param[in] Ib input three-phase coordinate b + @param[out] pIalpha points to output two-phase orthogonal vector axis alpha + @param[out] pIbeta points to output two-phase orthogonal vector axis beta + @return none + + \par Scaling and Overflow Behavior + The function is implemented using an internal 32-bit accumulator. + The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format. + There is saturation on the addition, hence there is no risk of overflow. + */ +__STATIC_FORCEINLINE void arm_clarke_q31( + q31_t Ia, + q31_t Ib, + q31_t * pIalpha, + q31_t * pIbeta) + { + q31_t product1, product2; /* Temporary variables used to store intermediate results */ + + /* Calculating pIalpha from Ia by equation pIalpha = Ia */ + *pIalpha = Ia; + + /* Intermediate product is calculated by (1/(sqrt(3)) * Ia) */ + product1 = (q31_t) (((q63_t) Ia * 0x24F34E8B) >> 30); + + /* Intermediate product is calculated by (2/sqrt(3) * Ib) */ + product2 = (q31_t) (((q63_t) Ib * 0x49E69D16) >> 30); + + /* pIbeta is calculated by adding the intermediate products */ + *pIbeta = __QADD(product1, product2); + } + + /** + * @} end of clarke group + */ + + + /** + * @ingroup groupController + */ + + /** + * @defgroup inv_clarke Vector Inverse Clarke Transform + * Inverse Clarke transform converts the two-coordinate time invariant vector into instantaneous stator phases. + * + * The function operates on a single sample of data and each call to the function returns the processed output. + * The library provides separate functions for Q31 and floating-point data types. + * \par Algorithm + * \image html clarkeInvFormula.gif + * where pIa and pIb are the instantaneous stator phases and + * Ialpha and Ibeta are the two coordinates of time invariant vector. + * \par Fixed-Point Behavior + * Care must be taken when using the Q31 version of the Clarke transform. + * In particular, the overflow and saturation behavior of the accumulator used must be considered. + * Refer to the function specific documentation below for usage guidelines. + */ + + /** + * @addtogroup inv_clarke + * @{ + */ + + /** + * @brief Floating-point Inverse Clarke transform + * @param[in] Ialpha input two-phase orthogonal vector axis alpha + * @param[in] Ibeta input two-phase orthogonal vector axis beta + * @param[out] pIa points to output three-phase coordinate a + * @param[out] pIb points to output three-phase coordinate b + * @return none + */ + __STATIC_FORCEINLINE void arm_inv_clarke_f32( + float32_t Ialpha, + float32_t Ibeta, + float32_t * pIa, + float32_t * pIb) + { + /* Calculating pIa from Ialpha by equation pIa = Ialpha */ + *pIa = Ialpha; + + /* Calculating pIb from Ialpha and Ibeta by equation pIb = -(1/2) * Ialpha + (sqrt(3)/2) * Ibeta */ + *pIb = -0.5f * Ialpha + 0.8660254039f * Ibeta; + } + + +/** + @brief Inverse Clarke transform for Q31 version + @param[in] Ialpha input two-phase orthogonal vector axis alpha + @param[in] Ibeta input two-phase orthogonal vector axis beta + @param[out] pIa points to output three-phase coordinate a + @param[out] pIb points to output three-phase coordinate b + @return none + + \par Scaling and Overflow Behavior + The function is implemented using an internal 32-bit accumulator. + The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format. + There is saturation on the subtraction, hence there is no risk of overflow. + */ +__STATIC_FORCEINLINE void arm_inv_clarke_q31( + q31_t Ialpha, + q31_t Ibeta, + q31_t * pIa, + q31_t * pIb) + { + q31_t product1, product2; /* Temporary variables used to store intermediate results */ + + /* Calculating pIa from Ialpha by equation pIa = Ialpha */ + *pIa = Ialpha; + + /* Intermediate product is calculated by (1/(2*sqrt(3)) * Ia) */ + product1 = (q31_t) (((q63_t) (Ialpha) * (0x40000000)) >> 31); + + /* Intermediate product is calculated by (1/sqrt(3) * pIb) */ + product2 = (q31_t) (((q63_t) (Ibeta) * (0x6ED9EBA1)) >> 31); + + /* pIb is calculated by subtracting the products */ + *pIb = __QSUB(product2, product1); + } + + /** + * @} end of inv_clarke group + */ + + + + /** + * @ingroup groupController + */ + + /** + * @defgroup park Vector Park Transform + * + * Forward Park transform converts the input two-coordinate vector to flux and torque components. + * The Park transform can be used to realize the transformation of the Ialpha and the Ibeta currents + * from the stationary to the moving reference frame and control the spatial relationship between + * the stator vector current and rotor flux vector. + * If we consider the d axis aligned with the rotor flux, the diagram below shows the + * current vector and the relationship from the two reference frames: + * \image html park.gif "Stator current space vector and its component in (a,b) and in the d,q rotating reference frame" + * + * The function operates on a single sample of data and each call to the function returns the processed output. + * The library provides separate functions for Q31 and floating-point data types. + * \par Algorithm + * \image html parkFormula.gif + * where Ialpha and Ibeta are the stator vector components, + * pId and pIq are rotor vector components and cosVal and sinVal are the + * cosine and sine values of theta (rotor flux position). + * \par Fixed-Point Behavior + * Care must be taken when using the Q31 version of the Park transform. + * In particular, the overflow and saturation behavior of the accumulator used must be considered. + * Refer to the function specific documentation below for usage guidelines. + */ + + /** + * @addtogroup park + * @{ + */ + + /** + * @brief Floating-point Park transform + * @param[in] Ialpha input two-phase vector coordinate alpha + * @param[in] Ibeta input two-phase vector coordinate beta + * @param[out] pId points to output rotor reference frame d + * @param[out] pIq points to output rotor reference frame q + * @param[in] sinVal sine value of rotation angle theta + * @param[in] cosVal cosine value of rotation angle theta + * @return none + * + * The function implements the forward Park transform. + * + */ + __STATIC_FORCEINLINE void arm_park_f32( + float32_t Ialpha, + float32_t Ibeta, + float32_t * pId, + float32_t * pIq, + float32_t sinVal, + float32_t cosVal) + { + /* Calculate pId using the equation, pId = Ialpha * cosVal + Ibeta * sinVal */ + *pId = Ialpha * cosVal + Ibeta * sinVal; + + /* Calculate pIq using the equation, pIq = - Ialpha * sinVal + Ibeta * cosVal */ + *pIq = -Ialpha * sinVal + Ibeta * cosVal; + } + + +/** + @brief Park transform for Q31 version + @param[in] Ialpha input two-phase vector coordinate alpha + @param[in] Ibeta input two-phase vector coordinate beta + @param[out] pId points to output rotor reference frame d + @param[out] pIq points to output rotor reference frame q + @param[in] sinVal sine value of rotation angle theta + @param[in] cosVal cosine value of rotation angle theta + @return none + + \par Scaling and Overflow Behavior + The function is implemented using an internal 32-bit accumulator. + The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format. + There is saturation on the addition and subtraction, hence there is no risk of overflow. + */ +__STATIC_FORCEINLINE void arm_park_q31( + q31_t Ialpha, + q31_t Ibeta, + q31_t * pId, + q31_t * pIq, + q31_t sinVal, + q31_t cosVal) + { + q31_t product1, product2; /* Temporary variables used to store intermediate results */ + q31_t product3, product4; /* Temporary variables used to store intermediate results */ + + /* Intermediate product is calculated by (Ialpha * cosVal) */ + product1 = (q31_t) (((q63_t) (Ialpha) * (cosVal)) >> 31); + + /* Intermediate product is calculated by (Ibeta * sinVal) */ + product2 = (q31_t) (((q63_t) (Ibeta) * (sinVal)) >> 31); + + + /* Intermediate product is calculated by (Ialpha * sinVal) */ + product3 = (q31_t) (((q63_t) (Ialpha) * (sinVal)) >> 31); + + /* Intermediate product is calculated by (Ibeta * cosVal) */ + product4 = (q31_t) (((q63_t) (Ibeta) * (cosVal)) >> 31); + + /* Calculate pId by adding the two intermediate products 1 and 2 */ + *pId = __QADD(product1, product2); + + /* Calculate pIq by subtracting the two intermediate products 3 from 4 */ + *pIq = __QSUB(product4, product3); + } + + /** + * @} end of park group + */ + + + /** + * @ingroup groupController + */ + + /** + * @defgroup inv_park Vector Inverse Park transform + * Inverse Park transform converts the input flux and torque components to two-coordinate vector. + * + * The function operates on a single sample of data and each call to the function returns the processed output. + * The library provides separate functions for Q31 and floating-point data types. + * \par Algorithm + * \image html parkInvFormula.gif + * where pIalpha and pIbeta are the stator vector components, + * Id and Iq are rotor vector components and cosVal and sinVal are the + * cosine and sine values of theta (rotor flux position). + * \par Fixed-Point Behavior + * Care must be taken when using the Q31 version of the Park transform. + * In particular, the overflow and saturation behavior of the accumulator used must be considered. + * Refer to the function specific documentation below for usage guidelines. + */ + + /** + * @addtogroup inv_park + * @{ + */ + + /** + * @brief Floating-point Inverse Park transform + * @param[in] Id input coordinate of rotor reference frame d + * @param[in] Iq input coordinate of rotor reference frame q + * @param[out] pIalpha points to output two-phase orthogonal vector axis alpha + * @param[out] pIbeta points to output two-phase orthogonal vector axis beta + * @param[in] sinVal sine value of rotation angle theta + * @param[in] cosVal cosine value of rotation angle theta + * @return none + */ + __STATIC_FORCEINLINE void arm_inv_park_f32( + float32_t Id, + float32_t Iq, + float32_t * pIalpha, + float32_t * pIbeta, + float32_t sinVal, + float32_t cosVal) + { + /* Calculate pIalpha using the equation, pIalpha = Id * cosVal - Iq * sinVal */ + *pIalpha = Id * cosVal - Iq * sinVal; + + /* Calculate pIbeta using the equation, pIbeta = Id * sinVal + Iq * cosVal */ + *pIbeta = Id * sinVal + Iq * cosVal; + } + + +/** + @brief Inverse Park transform for Q31 version + @param[in] Id input coordinate of rotor reference frame d + @param[in] Iq input coordinate of rotor reference frame q + @param[out] pIalpha points to output two-phase orthogonal vector axis alpha + @param[out] pIbeta points to output two-phase orthogonal vector axis beta + @param[in] sinVal sine value of rotation angle theta + @param[in] cosVal cosine value of rotation angle theta + @return none + + @par Scaling and Overflow Behavior + The function is implemented using an internal 32-bit accumulator. + The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format. + There is saturation on the addition, hence there is no risk of overflow. + */ +__STATIC_FORCEINLINE void arm_inv_park_q31( + q31_t Id, + q31_t Iq, + q31_t * pIalpha, + q31_t * pIbeta, + q31_t sinVal, + q31_t cosVal) + { + q31_t product1, product2; /* Temporary variables used to store intermediate results */ + q31_t product3, product4; /* Temporary variables used to store intermediate results */ + + /* Intermediate product is calculated by (Id * cosVal) */ + product1 = (q31_t) (((q63_t) (Id) * (cosVal)) >> 31); + + /* Intermediate product is calculated by (Iq * sinVal) */ + product2 = (q31_t) (((q63_t) (Iq) * (sinVal)) >> 31); + + + /* Intermediate product is calculated by (Id * sinVal) */ + product3 = (q31_t) (((q63_t) (Id) * (sinVal)) >> 31); + + /* Intermediate product is calculated by (Iq * cosVal) */ + product4 = (q31_t) (((q63_t) (Iq) * (cosVal)) >> 31); + + /* Calculate pIalpha by using the two intermediate products 1 and 2 */ + *pIalpha = __QSUB(product1, product2); + + /* Calculate pIbeta by using the two intermediate products 3 and 4 */ + *pIbeta = __QADD(product4, product3); + } + + /** + * @} end of Inverse park group + */ + + + /** + * @ingroup groupInterpolation + */ + + /** + * @defgroup LinearInterpolate Linear Interpolation + * + * Linear interpolation is a method of curve fitting using linear polynomials. + * Linear interpolation works by effectively drawing a straight line between two neighboring samples and returning the appropriate point along that line + * + * \par + * \image html LinearInterp.gif "Linear interpolation" + * + * \par + * A Linear Interpolate function calculates an output value(y), for the input(x) + * using linear interpolation of the input values x0, x1( nearest input values) and the output values y0 and y1(nearest output values) + * + * \par Algorithm: + *
+   *       y = y0 + (x - x0) * ((y1 - y0)/(x1-x0))
+   *       where x0, x1 are nearest values of input x
+   *             y0, y1 are nearest values to output y
+   * 
+ * + * \par + * This set of functions implements Linear interpolation process + * for Q7, Q15, Q31, and floating-point data types. The functions operate on a single + * sample of data and each call to the function returns a single processed value. + * S points to an instance of the Linear Interpolate function data structure. + * x is the input sample value. The functions returns the output value. + * + * \par + * if x is outside of the table boundary, Linear interpolation returns first value of the table + * if x is below input range and returns last value of table if x is above range. + */ + + /** + * @addtogroup LinearInterpolate + * @{ + */ + + /** + * @brief Process function for the floating-point Linear Interpolation Function. + * @param[in,out] S is an instance of the floating-point Linear Interpolation structure + * @param[in] x input sample to process + * @return y processed output sample. + * + */ + __STATIC_FORCEINLINE float32_t arm_linear_interp_f32( + arm_linear_interp_instance_f32 * S, + float32_t x) + { + float32_t y; + float32_t x0, x1; /* Nearest input values */ + float32_t y0, y1; /* Nearest output values */ + float32_t xSpacing = S->xSpacing; /* spacing between input values */ + int32_t i; /* Index variable */ + float32_t *pYData = S->pYData; /* pointer to output table */ + + /* Calculation of index */ + i = (int32_t) ((x - S->x1) / xSpacing); + + if (i < 0) + { + /* Iniatilize output for below specified range as least output value of table */ + y = pYData[0]; + } + else if ((uint32_t)i >= S->nValues) + { + /* Iniatilize output for above specified range as last output value of table */ + y = pYData[S->nValues - 1]; + } + else + { + /* Calculation of nearest input values */ + x0 = S->x1 + i * xSpacing; + x1 = S->x1 + (i + 1) * xSpacing; + + /* Read of nearest output values */ + y0 = pYData[i]; + y1 = pYData[i + 1]; + + /* Calculation of output */ + y = y0 + (x - x0) * ((y1 - y0) / (x1 - x0)); + + } + + /* returns output value */ + return (y); + } + + + /** + * + * @brief Process function for the Q31 Linear Interpolation Function. + * @param[in] pYData pointer to Q31 Linear Interpolation table + * @param[in] x input sample to process + * @param[in] nValues number of table values + * @return y processed output sample. + * + * \par + * Input sample x is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part. + * This function can support maximum of table size 2^12. + * + */ + __STATIC_FORCEINLINE q31_t arm_linear_interp_q31( + q31_t * pYData, + q31_t x, + uint32_t nValues) + { + q31_t y; /* output */ + q31_t y0, y1; /* Nearest output values */ + q31_t fract; /* fractional part */ + int32_t index; /* Index to read nearest output values */ + + /* Input is in 12.20 format */ + /* 12 bits for the table index */ + /* Index value calculation */ + index = ((x & (q31_t)0xFFF00000) >> 20); + + if (index >= (int32_t)(nValues - 1)) + { + return (pYData[nValues - 1]); + } + else if (index < 0) + { + return (pYData[0]); + } + else + { + /* 20 bits for the fractional part */ + /* shift left by 11 to keep fract in 1.31 format */ + fract = (x & 0x000FFFFF) << 11; + + /* Read two nearest output values from the index in 1.31(q31) format */ + y0 = pYData[index]; + y1 = pYData[index + 1]; + + /* Calculation of y0 * (1-fract) and y is in 2.30 format */ + y = ((q31_t) ((q63_t) y0 * (0x7FFFFFFF - fract) >> 32)); + + /* Calculation of y0 * (1-fract) + y1 *fract and y is in 2.30 format */ + y += ((q31_t) (((q63_t) y1 * fract) >> 32)); + + /* Convert y to 1.31 format */ + return (y << 1U); + } + } + + + /** + * + * @brief Process function for the Q15 Linear Interpolation Function. + * @param[in] pYData pointer to Q15 Linear Interpolation table + * @param[in] x input sample to process + * @param[in] nValues number of table values + * @return y processed output sample. + * + * \par + * Input sample x is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part. + * This function can support maximum of table size 2^12. + * + */ + __STATIC_FORCEINLINE q15_t arm_linear_interp_q15( + q15_t * pYData, + q31_t x, + uint32_t nValues) + { + q63_t y; /* output */ + q15_t y0, y1; /* Nearest output values */ + q31_t fract; /* fractional part */ + int32_t index; /* Index to read nearest output values */ + + /* Input is in 12.20 format */ + /* 12 bits for the table index */ + /* Index value calculation */ + index = ((x & (int32_t)0xFFF00000) >> 20); + + if (index >= (int32_t)(nValues - 1)) + { + return (pYData[nValues - 1]); + } + else if (index < 0) + { + return (pYData[0]); + } + else + { + /* 20 bits for the fractional part */ + /* fract is in 12.20 format */ + fract = (x & 0x000FFFFF); + + /* Read two nearest output values from the index */ + y0 = pYData[index]; + y1 = pYData[index + 1]; + + /* Calculation of y0 * (1-fract) and y is in 13.35 format */ + y = ((q63_t) y0 * (0xFFFFF - fract)); + + /* Calculation of (y0 * (1-fract) + y1 * fract) and y is in 13.35 format */ + y += ((q63_t) y1 * (fract)); + + /* convert y to 1.15 format */ + return (q15_t) (y >> 20); + } + } + + + /** + * + * @brief Process function for the Q7 Linear Interpolation Function. + * @param[in] pYData pointer to Q7 Linear Interpolation table + * @param[in] x input sample to process + * @param[in] nValues number of table values + * @return y processed output sample. + * + * \par + * Input sample x is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part. + * This function can support maximum of table size 2^12. + */ + __STATIC_FORCEINLINE q7_t arm_linear_interp_q7( + q7_t * pYData, + q31_t x, + uint32_t nValues) + { + q31_t y; /* output */ + q7_t y0, y1; /* Nearest output values */ + q31_t fract; /* fractional part */ + uint32_t index; /* Index to read nearest output values */ + + /* Input is in 12.20 format */ + /* 12 bits for the table index */ + /* Index value calculation */ + if (x < 0) + { + return (pYData[0]); + } + index = (x >> 20) & 0xfff; + + if (index >= (nValues - 1)) + { + return (pYData[nValues - 1]); + } + else + { + /* 20 bits for the fractional part */ + /* fract is in 12.20 format */ + fract = (x & 0x000FFFFF); + + /* Read two nearest output values from the index and are in 1.7(q7) format */ + y0 = pYData[index]; + y1 = pYData[index + 1]; + + /* Calculation of y0 * (1-fract ) and y is in 13.27(q27) format */ + y = ((y0 * (0xFFFFF - fract))); + + /* Calculation of y1 * fract + y0 * (1-fract) and y is in 13.27(q27) format */ + y += (y1 * fract); + + /* convert y to 1.7(q7) format */ + return (q7_t) (y >> 20); + } + } + + /** + * @} end of LinearInterpolate group + */ + + /** + * @brief Fast approximation to the trigonometric sine function for floating-point data. + * @param[in] x input value in radians. + * @return sin(x). + */ + float32_t arm_sin_f32( + float32_t x); + + + /** + * @brief Fast approximation to the trigonometric sine function for Q31 data. + * @param[in] x Scaled input value in radians. + * @return sin(x). + */ + q31_t arm_sin_q31( + q31_t x); + + + /** + * @brief Fast approximation to the trigonometric sine function for Q15 data. + * @param[in] x Scaled input value in radians. + * @return sin(x). + */ + q15_t arm_sin_q15( + q15_t x); + + + /** + * @brief Fast approximation to the trigonometric cosine function for floating-point data. + * @param[in] x input value in radians. + * @return cos(x). + */ + float32_t arm_cos_f32( + float32_t x); + + + /** + * @brief Fast approximation to the trigonometric cosine function for Q31 data. + * @param[in] x Scaled input value in radians. + * @return cos(x). + */ + q31_t arm_cos_q31( + q31_t x); + + + /** + * @brief Fast approximation to the trigonometric cosine function for Q15 data. + * @param[in] x Scaled input value in radians. + * @return cos(x). + */ + q15_t arm_cos_q15( + q15_t x); + + + /** + * @ingroup groupFastMath + */ + + + /** + * @defgroup SQRT Square Root + * + * Computes the square root of a number. + * There are separate functions for Q15, Q31, and floating-point data types. + * The square root function is computed using the Newton-Raphson algorithm. + * This is an iterative algorithm of the form: + *
+   *      x1 = x0 - f(x0)/f'(x0)
+   * 
+ * where x1 is the current estimate, + * x0 is the previous estimate, and + * f'(x0) is the derivative of f() evaluated at x0. + * For the square root function, the algorithm reduces to: + *
+   *     x0 = in/2                         [initial guess]
+   *     x1 = 1/2 * ( x0 + in / x0)        [each iteration]
+   * 
+ */ + + + /** + * @addtogroup SQRT + * @{ + */ + +/** + @brief Floating-point square root function. + @param[in] in input value + @param[out] pOut square root of input value + @return execution status + - \ref ARM_MATH_SUCCESS : input value is positive + - \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0 + */ +__STATIC_FORCEINLINE arm_status arm_sqrt_f32( + float32_t in, + float32_t * pOut) + { + if (in >= 0.0f) + { +#if defined ( __CC_ARM ) + #if defined __TARGET_FPU_VFP + *pOut = __sqrtf(in); + #else + *pOut = sqrtf(in); + #endif + +#elif defined ( __ICCARM__ ) + #if defined __ARMVFP__ + __ASM("VSQRT.F32 %0,%1" : "=t"(*pOut) : "t"(in)); + #else + *pOut = sqrtf(in); + #endif + +#else + *pOut = sqrtf(in); +#endif + + return (ARM_MATH_SUCCESS); + } + else + { + *pOut = 0.0f; + return (ARM_MATH_ARGUMENT_ERROR); + } + } + + +/** + @brief Q31 square root function. + @param[in] in input value. The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF + @param[out] pOut points to square root of input value + @return execution status + - \ref ARM_MATH_SUCCESS : input value is positive + - \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0 + */ +arm_status arm_sqrt_q31( + q31_t in, + q31_t * pOut); + + +/** + @brief Q15 square root function. + @param[in] in input value. The range of the input value is [0 +1) or 0x0000 to 0x7FFF + @param[out] pOut points to square root of input value + @return execution status + - \ref ARM_MATH_SUCCESS : input value is positive + - \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0 + */ +arm_status arm_sqrt_q15( + q15_t in, + q15_t * pOut); + + /** + * @brief Vector Floating-point square root function. + * @param[in] pIn input vector. + * @param[out] pOut vector of square roots of input elements. + * @param[in] len length of input vector. + * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if + * in is negative value and returns zero output for negative values. + */ + void arm_vsqrt_f32( + float32_t * pIn, + float32_t * pOut, + uint16_t len); + + void arm_vsqrt_q31( + q31_t * pIn, + q31_t * pOut, + uint16_t len); + + void arm_vsqrt_q15( + q15_t * pIn, + q15_t * pOut, + uint16_t len); + + /** + * @} end of SQRT group + */ + + + /** + * @brief floating-point Circular write function. + */ + __STATIC_FORCEINLINE void arm_circularWrite_f32( + int32_t * circBuffer, + int32_t L, + uint16_t * writeOffset, + int32_t bufferInc, + const int32_t * src, + int32_t srcInc, + uint32_t blockSize) + { + uint32_t i = 0U; + int32_t wOffset; + + /* Copy the value of Index pointer that points + * to the current location where the input samples to be copied */ + wOffset = *writeOffset; + + /* Loop over the blockSize */ + i = blockSize; + + while (i > 0U) + { + /* copy the input sample to the circular buffer */ + circBuffer[wOffset] = *src; + + /* Update the input pointer */ + src += srcInc; + + /* Circularly update wOffset. Watch out for positive and negative value */ + wOffset += bufferInc; + if (wOffset >= L) + wOffset -= L; + + /* Decrement the loop counter */ + i--; + } + + /* Update the index pointer */ + *writeOffset = (uint16_t)wOffset; + } + + + + /** + * @brief floating-point Circular Read function. + */ + __STATIC_FORCEINLINE void arm_circularRead_f32( + int32_t * circBuffer, + int32_t L, + int32_t * readOffset, + int32_t bufferInc, + int32_t * dst, + int32_t * dst_base, + int32_t dst_length, + int32_t dstInc, + uint32_t blockSize) + { + uint32_t i = 0U; + int32_t rOffset; + int32_t* dst_end; + + /* Copy the value of Index pointer that points + * to the current location from where the input samples to be read */ + rOffset = *readOffset; + dst_end = dst_base + dst_length; + + /* Loop over the blockSize */ + i = blockSize; + + while (i > 0U) + { + /* copy the sample from the circular buffer to the destination buffer */ + *dst = circBuffer[rOffset]; + + /* Update the input pointer */ + dst += dstInc; + + if (dst == dst_end) + { + dst = dst_base; + } + + /* Circularly update rOffset. Watch out for positive and negative value */ + rOffset += bufferInc; + + if (rOffset >= L) + { + rOffset -= L; + } + + /* Decrement the loop counter */ + i--; + } + + /* Update the index pointer */ + *readOffset = rOffset; + } + + + /** + * @brief Q15 Circular write function. + */ + __STATIC_FORCEINLINE void arm_circularWrite_q15( + q15_t * circBuffer, + int32_t L, + uint16_t * writeOffset, + int32_t bufferInc, + const q15_t * src, + int32_t srcInc, + uint32_t blockSize) + { + uint32_t i = 0U; + int32_t wOffset; + + /* Copy the value of Index pointer that points + * to the current location where the input samples to be copied */ + wOffset = *writeOffset; + + /* Loop over the blockSize */ + i = blockSize; + + while (i > 0U) + { + /* copy the input sample to the circular buffer */ + circBuffer[wOffset] = *src; + + /* Update the input pointer */ + src += srcInc; + + /* Circularly update wOffset. Watch out for positive and negative value */ + wOffset += bufferInc; + if (wOffset >= L) + wOffset -= L; + + /* Decrement the loop counter */ + i--; + } + + /* Update the index pointer */ + *writeOffset = (uint16_t)wOffset; + } + + + /** + * @brief Q15 Circular Read function. + */ + __STATIC_FORCEINLINE void arm_circularRead_q15( + q15_t * circBuffer, + int32_t L, + int32_t * readOffset, + int32_t bufferInc, + q15_t * dst, + q15_t * dst_base, + int32_t dst_length, + int32_t dstInc, + uint32_t blockSize) + { + uint32_t i = 0; + int32_t rOffset; + q15_t* dst_end; + + /* Copy the value of Index pointer that points + * to the current location from where the input samples to be read */ + rOffset = *readOffset; + + dst_end = dst_base + dst_length; + + /* Loop over the blockSize */ + i = blockSize; + + while (i > 0U) + { + /* copy the sample from the circular buffer to the destination buffer */ + *dst = circBuffer[rOffset]; + + /* Update the input pointer */ + dst += dstInc; + + if (dst == dst_end) + { + dst = dst_base; + } + + /* Circularly update wOffset. Watch out for positive and negative value */ + rOffset += bufferInc; + + if (rOffset >= L) + { + rOffset -= L; + } + + /* Decrement the loop counter */ + i--; + } + + /* Update the index pointer */ + *readOffset = rOffset; + } + + + /** + * @brief Q7 Circular write function. + */ + __STATIC_FORCEINLINE void arm_circularWrite_q7( + q7_t * circBuffer, + int32_t L, + uint16_t * writeOffset, + int32_t bufferInc, + const q7_t * src, + int32_t srcInc, + uint32_t blockSize) + { + uint32_t i = 0U; + int32_t wOffset; + + /* Copy the value of Index pointer that points + * to the current location where the input samples to be copied */ + wOffset = *writeOffset; + + /* Loop over the blockSize */ + i = blockSize; + + while (i > 0U) + { + /* copy the input sample to the circular buffer */ + circBuffer[wOffset] = *src; + + /* Update the input pointer */ + src += srcInc; + + /* Circularly update wOffset. Watch out for positive and negative value */ + wOffset += bufferInc; + if (wOffset >= L) + wOffset -= L; + + /* Decrement the loop counter */ + i--; + } + + /* Update the index pointer */ + *writeOffset = (uint16_t)wOffset; + } + + + /** + * @brief Q7 Circular Read function. + */ + __STATIC_FORCEINLINE void arm_circularRead_q7( + q7_t * circBuffer, + int32_t L, + int32_t * readOffset, + int32_t bufferInc, + q7_t * dst, + q7_t * dst_base, + int32_t dst_length, + int32_t dstInc, + uint32_t blockSize) + { + uint32_t i = 0; + int32_t rOffset; + q7_t* dst_end; + + /* Copy the value of Index pointer that points + * to the current location from where the input samples to be read */ + rOffset = *readOffset; + + dst_end = dst_base + dst_length; + + /* Loop over the blockSize */ + i = blockSize; + + while (i > 0U) + { + /* copy the sample from the circular buffer to the destination buffer */ + *dst = circBuffer[rOffset]; + + /* Update the input pointer */ + dst += dstInc; + + if (dst == dst_end) + { + dst = dst_base; + } + + /* Circularly update rOffset. Watch out for positive and negative value */ + rOffset += bufferInc; + + if (rOffset >= L) + { + rOffset -= L; + } + + /* Decrement the loop counter */ + i--; + } + + /* Update the index pointer */ + *readOffset = rOffset; + } + + + /** + * @brief Sum of the squares of the elements of a Q31 vector. + * @param[in] pSrc is input pointer + * @param[in] blockSize is the number of samples to process + * @param[out] pResult is output value. + */ + void arm_power_q31( + const q31_t * pSrc, + uint32_t blockSize, + q63_t * pResult); + + + /** + * @brief Sum of the squares of the elements of a floating-point vector. + * @param[in] pSrc is input pointer + * @param[in] blockSize is the number of samples to process + * @param[out] pResult is output value. + */ + void arm_power_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult); + + + /** + * @brief Sum of the squares of the elements of a Q15 vector. + * @param[in] pSrc is input pointer + * @param[in] blockSize is the number of samples to process + * @param[out] pResult is output value. + */ + void arm_power_q15( + const q15_t * pSrc, + uint32_t blockSize, + q63_t * pResult); + + + /** + * @brief Sum of the squares of the elements of a Q7 vector. + * @param[in] pSrc is input pointer + * @param[in] blockSize is the number of samples to process + * @param[out] pResult is output value. + */ + void arm_power_q7( + const q7_t * pSrc, + uint32_t blockSize, + q31_t * pResult); + + + /** + * @brief Mean value of a Q7 vector. + * @param[in] pSrc is input pointer + * @param[in] blockSize is the number of samples to process + * @param[out] pResult is output value. + */ + void arm_mean_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult); + + + /** + * @brief Mean value of a Q15 vector. + * @param[in] pSrc is input pointer + * @param[in] blockSize is the number of samples to process + * @param[out] pResult is output value. + */ + void arm_mean_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult); + + + /** + * @brief Mean value of a Q31 vector. + * @param[in] pSrc is input pointer + * @param[in] blockSize is the number of samples to process + * @param[out] pResult is output value. + */ + void arm_mean_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult); + + + /** + * @brief Mean value of a floating-point vector. + * @param[in] pSrc is input pointer + * @param[in] blockSize is the number of samples to process + * @param[out] pResult is output value. + */ + void arm_mean_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult); + + + /** + * @brief Variance of the elements of a floating-point vector. + * @param[in] pSrc is input pointer + * @param[in] blockSize is the number of samples to process + * @param[out] pResult is output value. + */ + void arm_var_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult); + + + /** + * @brief Variance of the elements of a Q31 vector. + * @param[in] pSrc is input pointer + * @param[in] blockSize is the number of samples to process + * @param[out] pResult is output value. + */ + void arm_var_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult); + + + /** + * @brief Variance of the elements of a Q15 vector. + * @param[in] pSrc is input pointer + * @param[in] blockSize is the number of samples to process + * @param[out] pResult is output value. + */ + void arm_var_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult); + + + /** + * @brief Root Mean Square of the elements of a floating-point vector. + * @param[in] pSrc is input pointer + * @param[in] blockSize is the number of samples to process + * @param[out] pResult is output value. + */ + void arm_rms_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult); + + + /** + * @brief Root Mean Square of the elements of a Q31 vector. + * @param[in] pSrc is input pointer + * @param[in] blockSize is the number of samples to process + * @param[out] pResult is output value. + */ + void arm_rms_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult); + + + /** + * @brief Root Mean Square of the elements of a Q15 vector. + * @param[in] pSrc is input pointer + * @param[in] blockSize is the number of samples to process + * @param[out] pResult is output value. + */ + void arm_rms_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult); + + + /** + * @brief Standard deviation of the elements of a floating-point vector. + * @param[in] pSrc is input pointer + * @param[in] blockSize is the number of samples to process + * @param[out] pResult is output value. + */ + void arm_std_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult); + + + /** + * @brief Standard deviation of the elements of a Q31 vector. + * @param[in] pSrc is input pointer + * @param[in] blockSize is the number of samples to process + * @param[out] pResult is output value. + */ + void arm_std_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult); + + + /** + * @brief Standard deviation of the elements of a Q15 vector. + * @param[in] pSrc is input pointer + * @param[in] blockSize is the number of samples to process + * @param[out] pResult is output value. + */ + void arm_std_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult); + + + /** + * @brief Floating-point complex magnitude + * @param[in] pSrc points to the complex input vector + * @param[out] pDst points to the real output vector + * @param[in] numSamples number of complex samples in the input vector + */ + void arm_cmplx_mag_f32( + const float32_t * pSrc, + float32_t * pDst, + uint32_t numSamples); + + + /** + * @brief Q31 complex magnitude + * @param[in] pSrc points to the complex input vector + * @param[out] pDst points to the real output vector + * @param[in] numSamples number of complex samples in the input vector + */ + void arm_cmplx_mag_q31( + const q31_t * pSrc, + q31_t * pDst, + uint32_t numSamples); + + + /** + * @brief Q15 complex magnitude + * @param[in] pSrc points to the complex input vector + * @param[out] pDst points to the real output vector + * @param[in] numSamples number of complex samples in the input vector + */ + void arm_cmplx_mag_q15( + const q15_t * pSrc, + q15_t * pDst, + uint32_t numSamples); + + + /** + * @brief Q15 complex dot product + * @param[in] pSrcA points to the first input vector + * @param[in] pSrcB points to the second input vector + * @param[in] numSamples number of complex samples in each vector + * @param[out] realResult real part of the result returned here + * @param[out] imagResult imaginary part of the result returned here + */ + void arm_cmplx_dot_prod_q15( + const q15_t * pSrcA, + const q15_t * pSrcB, + uint32_t numSamples, + q31_t * realResult, + q31_t * imagResult); + + + /** + * @brief Q31 complex dot product + * @param[in] pSrcA points to the first input vector + * @param[in] pSrcB points to the second input vector + * @param[in] numSamples number of complex samples in each vector + * @param[out] realResult real part of the result returned here + * @param[out] imagResult imaginary part of the result returned here + */ + void arm_cmplx_dot_prod_q31( + const q31_t * pSrcA, + const q31_t * pSrcB, + uint32_t numSamples, + q63_t * realResult, + q63_t * imagResult); + + + /** + * @brief Floating-point complex dot product + * @param[in] pSrcA points to the first input vector + * @param[in] pSrcB points to the second input vector + * @param[in] numSamples number of complex samples in each vector + * @param[out] realResult real part of the result returned here + * @param[out] imagResult imaginary part of the result returned here + */ + void arm_cmplx_dot_prod_f32( + const float32_t * pSrcA, + const float32_t * pSrcB, + uint32_t numSamples, + float32_t * realResult, + float32_t * imagResult); + + + /** + * @brief Q15 complex-by-real multiplication + * @param[in] pSrcCmplx points to the complex input vector + * @param[in] pSrcReal points to the real input vector + * @param[out] pCmplxDst points to the complex output vector + * @param[in] numSamples number of samples in each vector + */ + void arm_cmplx_mult_real_q15( + const q15_t * pSrcCmplx, + const q15_t * pSrcReal, + q15_t * pCmplxDst, + uint32_t numSamples); + + + /** + * @brief Q31 complex-by-real multiplication + * @param[in] pSrcCmplx points to the complex input vector + * @param[in] pSrcReal points to the real input vector + * @param[out] pCmplxDst points to the complex output vector + * @param[in] numSamples number of samples in each vector + */ + void arm_cmplx_mult_real_q31( + const q31_t * pSrcCmplx, + const q31_t * pSrcReal, + q31_t * pCmplxDst, + uint32_t numSamples); + + + /** + * @brief Floating-point complex-by-real multiplication + * @param[in] pSrcCmplx points to the complex input vector + * @param[in] pSrcReal points to the real input vector + * @param[out] pCmplxDst points to the complex output vector + * @param[in] numSamples number of samples in each vector + */ + void arm_cmplx_mult_real_f32( + const float32_t * pSrcCmplx, + const float32_t * pSrcReal, + float32_t * pCmplxDst, + uint32_t numSamples); + + + /** + * @brief Minimum value of a Q7 vector. + * @param[in] pSrc is input pointer + * @param[in] blockSize is the number of samples to process + * @param[out] result is output pointer + * @param[in] index is the array index of the minimum value in the input buffer. + */ + void arm_min_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * result, + uint32_t * index); + + + /** + * @brief Minimum value of a Q15 vector. + * @param[in] pSrc is input pointer + * @param[in] blockSize is the number of samples to process + * @param[out] pResult is output pointer + * @param[in] pIndex is the array index of the minimum value in the input buffer. + */ + void arm_min_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult, + uint32_t * pIndex); + + + /** + * @brief Minimum value of a Q31 vector. + * @param[in] pSrc is input pointer + * @param[in] blockSize is the number of samples to process + * @param[out] pResult is output pointer + * @param[out] pIndex is the array index of the minimum value in the input buffer. + */ + void arm_min_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult, + uint32_t * pIndex); + + + /** + * @brief Minimum value of a floating-point vector. + * @param[in] pSrc is input pointer + * @param[in] blockSize is the number of samples to process + * @param[out] pResult is output pointer + * @param[out] pIndex is the array index of the minimum value in the input buffer. + */ + void arm_min_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult, + uint32_t * pIndex); + + +/** + * @brief Maximum value of a Q7 vector. + * @param[in] pSrc points to the input buffer + * @param[in] blockSize length of the input vector + * @param[out] pResult maximum value returned here + * @param[out] pIndex index of maximum value returned here + */ + void arm_max_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult, + uint32_t * pIndex); + + +/** + * @brief Maximum value of a Q15 vector. + * @param[in] pSrc points to the input buffer + * @param[in] blockSize length of the input vector + * @param[out] pResult maximum value returned here + * @param[out] pIndex index of maximum value returned here + */ + void arm_max_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult, + uint32_t * pIndex); + + +/** + * @brief Maximum value of a Q31 vector. + * @param[in] pSrc points to the input buffer + * @param[in] blockSize length of the input vector + * @param[out] pResult maximum value returned here + * @param[out] pIndex index of maximum value returned here + */ + void arm_max_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult, + uint32_t * pIndex); + + +/** + * @brief Maximum value of a floating-point vector. + * @param[in] pSrc points to the input buffer + * @param[in] blockSize length of the input vector + * @param[out] pResult maximum value returned here + * @param[out] pIndex index of maximum value returned here + */ + void arm_max_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult, + uint32_t * pIndex); + + + /** + * @brief Q15 complex-by-complex multiplication + * @param[in] pSrcA points to the first input vector + * @param[in] pSrcB points to the second input vector + * @param[out] pDst points to the output vector + * @param[in] numSamples number of complex samples in each vector + */ + void arm_cmplx_mult_cmplx_q15( + const q15_t * pSrcA, + const q15_t * pSrcB, + q15_t * pDst, + uint32_t numSamples); + + + /** + * @brief Q31 complex-by-complex multiplication + * @param[in] pSrcA points to the first input vector + * @param[in] pSrcB points to the second input vector + * @param[out] pDst points to the output vector + * @param[in] numSamples number of complex samples in each vector + */ + void arm_cmplx_mult_cmplx_q31( + const q31_t * pSrcA, + const q31_t * pSrcB, + q31_t * pDst, + uint32_t numSamples); + + + /** + * @brief Floating-point complex-by-complex multiplication + * @param[in] pSrcA points to the first input vector + * @param[in] pSrcB points to the second input vector + * @param[out] pDst points to the output vector + * @param[in] numSamples number of complex samples in each vector + */ + void arm_cmplx_mult_cmplx_f32( + const float32_t * pSrcA, + const float32_t * pSrcB, + float32_t * pDst, + uint32_t numSamples); + + + /** + * @brief Converts the elements of the floating-point vector to Q31 vector. + * @param[in] pSrc points to the floating-point input vector + * @param[out] pDst points to the Q31 output vector + * @param[in] blockSize length of the input vector + */ + void arm_float_to_q31( + const float32_t * pSrc, + q31_t * pDst, + uint32_t blockSize); + + + /** + * @brief Converts the elements of the floating-point vector to Q15 vector. + * @param[in] pSrc points to the floating-point input vector + * @param[out] pDst points to the Q15 output vector + * @param[in] blockSize length of the input vector + */ + void arm_float_to_q15( + const float32_t * pSrc, + q15_t * pDst, + uint32_t blockSize); + + + /** + * @brief Converts the elements of the floating-point vector to Q7 vector. + * @param[in] pSrc points to the floating-point input vector + * @param[out] pDst points to the Q7 output vector + * @param[in] blockSize length of the input vector + */ + void arm_float_to_q7( + const float32_t * pSrc, + q7_t * pDst, + uint32_t blockSize); + + + /** + * @brief Converts the elements of the Q31 vector to floating-point vector. + * @param[in] pSrc is input pointer + * @param[out] pDst is output pointer + * @param[in] blockSize is the number of samples to process + */ + void arm_q31_to_float( + const q31_t * pSrc, + float32_t * pDst, + uint32_t blockSize); + + + /** + * @brief Converts the elements of the Q31 vector to Q15 vector. + * @param[in] pSrc is input pointer + * @param[out] pDst is output pointer + * @param[in] blockSize is the number of samples to process + */ + void arm_q31_to_q15( + const q31_t * pSrc, + q15_t * pDst, + uint32_t blockSize); + + + /** + * @brief Converts the elements of the Q31 vector to Q7 vector. + * @param[in] pSrc is input pointer + * @param[out] pDst is output pointer + * @param[in] blockSize is the number of samples to process + */ + void arm_q31_to_q7( + const q31_t * pSrc, + q7_t * pDst, + uint32_t blockSize); + + + /** + * @brief Converts the elements of the Q15 vector to floating-point vector. + * @param[in] pSrc is input pointer + * @param[out] pDst is output pointer + * @param[in] blockSize is the number of samples to process + */ + void arm_q15_to_float( + const q15_t * pSrc, + float32_t * pDst, + uint32_t blockSize); + + + /** + * @brief Converts the elements of the Q15 vector to Q31 vector. + * @param[in] pSrc is input pointer + * @param[out] pDst is output pointer + * @param[in] blockSize is the number of samples to process + */ + void arm_q15_to_q31( + const q15_t * pSrc, + q31_t * pDst, + uint32_t blockSize); + + + /** + * @brief Converts the elements of the Q15 vector to Q7 vector. + * @param[in] pSrc is input pointer + * @param[out] pDst is output pointer + * @param[in] blockSize is the number of samples to process + */ + void arm_q15_to_q7( + const q15_t * pSrc, + q7_t * pDst, + uint32_t blockSize); + + + /** + * @brief Converts the elements of the Q7 vector to floating-point vector. + * @param[in] pSrc is input pointer + * @param[out] pDst is output pointer + * @param[in] blockSize is the number of samples to process + */ + void arm_q7_to_float( + const q7_t * pSrc, + float32_t * pDst, + uint32_t blockSize); + + + /** + * @brief Converts the elements of the Q7 vector to Q31 vector. + * @param[in] pSrc input pointer + * @param[out] pDst output pointer + * @param[in] blockSize number of samples to process + */ + void arm_q7_to_q31( + const q7_t * pSrc, + q31_t * pDst, + uint32_t blockSize); + + + /** + * @brief Converts the elements of the Q7 vector to Q15 vector. + * @param[in] pSrc input pointer + * @param[out] pDst output pointer + * @param[in] blockSize number of samples to process + */ + void arm_q7_to_q15( + const q7_t * pSrc, + q15_t * pDst, + uint32_t blockSize); + + + /** + * @ingroup groupInterpolation + */ + + /** + * @defgroup BilinearInterpolate Bilinear Interpolation + * + * Bilinear interpolation is an extension of linear interpolation applied to a two dimensional grid. + * The underlying function f(x, y) is sampled on a regular grid and the interpolation process + * determines values between the grid points. + * Bilinear interpolation is equivalent to two step linear interpolation, first in the x-dimension and then in the y-dimension. + * Bilinear interpolation is often used in image processing to rescale images. + * The CMSIS DSP library provides bilinear interpolation functions for Q7, Q15, Q31, and floating-point data types. + * + * Algorithm + * \par + * The instance structure used by the bilinear interpolation functions describes a two dimensional data table. + * For floating-point, the instance structure is defined as: + *
+   *   typedef struct
+   *   {
+   *     uint16_t numRows;
+   *     uint16_t numCols;
+   *     float32_t *pData;
+   * } arm_bilinear_interp_instance_f32;
+   * 
+ * + * \par + * where numRows specifies the number of rows in the table; + * numCols specifies the number of columns in the table; + * and pData points to an array of size numRows*numCols values. + * The data table pTable is organized in row order and the supplied data values fall on integer indexes. + * That is, table element (x,y) is located at pTable[x + y*numCols] where x and y are integers. + * + * \par + * Let (x, y) specify the desired interpolation point. Then define: + *
+   *     XF = floor(x)
+   *     YF = floor(y)
+   * 
+ * \par + * The interpolated output point is computed as: + *
+   *  f(x, y) = f(XF, YF) * (1-(x-XF)) * (1-(y-YF))
+   *           + f(XF+1, YF) * (x-XF)*(1-(y-YF))
+   *           + f(XF, YF+1) * (1-(x-XF))*(y-YF)
+   *           + f(XF+1, YF+1) * (x-XF)*(y-YF)
+   * 
+ * Note that the coordinates (x, y) contain integer and fractional components. + * The integer components specify which portion of the table to use while the + * fractional components control the interpolation processor. + * + * \par + * if (x,y) are outside of the table boundary, Bilinear interpolation returns zero output. + */ + + + /** + * @addtogroup BilinearInterpolate + * @{ + */ + + /** + * @brief Floating-point bilinear interpolation. + * @param[in,out] S points to an instance of the interpolation structure. + * @param[in] X interpolation coordinate. + * @param[in] Y interpolation coordinate. + * @return out interpolated value. + */ + __STATIC_FORCEINLINE float32_t arm_bilinear_interp_f32( + const arm_bilinear_interp_instance_f32 * S, + float32_t X, + float32_t Y) + { + float32_t out; + float32_t f00, f01, f10, f11; + float32_t *pData = S->pData; + int32_t xIndex, yIndex, index; + float32_t xdiff, ydiff; + float32_t b1, b2, b3, b4; + + xIndex = (int32_t) X; + yIndex = (int32_t) Y; + + /* Care taken for table outside boundary */ + /* Returns zero output when values are outside table boundary */ + if (xIndex < 0 || xIndex > (S->numRows - 1) || yIndex < 0 || yIndex > (S->numCols - 1)) + { + return (0); + } + + /* Calculation of index for two nearest points in X-direction */ + index = (xIndex - 1) + (yIndex - 1) * S->numCols; + + + /* Read two nearest points in X-direction */ + f00 = pData[index]; + f01 = pData[index + 1]; + + /* Calculation of index for two nearest points in Y-direction */ + index = (xIndex - 1) + (yIndex) * S->numCols; + + + /* Read two nearest points in Y-direction */ + f10 = pData[index]; + f11 = pData[index + 1]; + + /* Calculation of intermediate values */ + b1 = f00; + b2 = f01 - f00; + b3 = f10 - f00; + b4 = f00 - f01 - f10 + f11; + + /* Calculation of fractional part in X */ + xdiff = X - xIndex; + + /* Calculation of fractional part in Y */ + ydiff = Y - yIndex; + + /* Calculation of bi-linear interpolated output */ + out = b1 + b2 * xdiff + b3 * ydiff + b4 * xdiff * ydiff; + + /* return to application */ + return (out); + } + + + /** + * @brief Q31 bilinear interpolation. + * @param[in,out] S points to an instance of the interpolation structure. + * @param[in] X interpolation coordinate in 12.20 format. + * @param[in] Y interpolation coordinate in 12.20 format. + * @return out interpolated value. + */ + __STATIC_FORCEINLINE q31_t arm_bilinear_interp_q31( + arm_bilinear_interp_instance_q31 * S, + q31_t X, + q31_t Y) + { + q31_t out; /* Temporary output */ + q31_t acc = 0; /* output */ + q31_t xfract, yfract; /* X, Y fractional parts */ + q31_t x1, x2, y1, y2; /* Nearest output values */ + int32_t rI, cI; /* Row and column indices */ + q31_t *pYData = S->pData; /* pointer to output table values */ + uint32_t nCols = S->numCols; /* num of rows */ + + /* Input is in 12.20 format */ + /* 12 bits for the table index */ + /* Index value calculation */ + rI = ((X & (q31_t)0xFFF00000) >> 20); + + /* Input is in 12.20 format */ + /* 12 bits for the table index */ + /* Index value calculation */ + cI = ((Y & (q31_t)0xFFF00000) >> 20); + + /* Care taken for table outside boundary */ + /* Returns zero output when values are outside table boundary */ + if (rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1)) + { + return (0); + } + + /* 20 bits for the fractional part */ + /* shift left xfract by 11 to keep 1.31 format */ + xfract = (X & 0x000FFFFF) << 11U; + + /* Read two nearest output values from the index */ + x1 = pYData[(rI) + (int32_t)nCols * (cI) ]; + x2 = pYData[(rI) + (int32_t)nCols * (cI) + 1]; + + /* 20 bits for the fractional part */ + /* shift left yfract by 11 to keep 1.31 format */ + yfract = (Y & 0x000FFFFF) << 11U; + + /* Read two nearest output values from the index */ + y1 = pYData[(rI) + (int32_t)nCols * (cI + 1) ]; + y2 = pYData[(rI) + (int32_t)nCols * (cI + 1) + 1]; + + /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 3.29(q29) format */ + out = ((q31_t) (((q63_t) x1 * (0x7FFFFFFF - xfract)) >> 32)); + acc = ((q31_t) (((q63_t) out * (0x7FFFFFFF - yfract)) >> 32)); + + /* x2 * (xfract) * (1-yfract) in 3.29(q29) and adding to acc */ + out = ((q31_t) ((q63_t) x2 * (0x7FFFFFFF - yfract) >> 32)); + acc += ((q31_t) ((q63_t) out * (xfract) >> 32)); + + /* y1 * (1 - xfract) * (yfract) in 3.29(q29) and adding to acc */ + out = ((q31_t) ((q63_t) y1 * (0x7FFFFFFF - xfract) >> 32)); + acc += ((q31_t) ((q63_t) out * (yfract) >> 32)); + + /* y2 * (xfract) * (yfract) in 3.29(q29) and adding to acc */ + out = ((q31_t) ((q63_t) y2 * (xfract) >> 32)); + acc += ((q31_t) ((q63_t) out * (yfract) >> 32)); + + /* Convert acc to 1.31(q31) format */ + return ((q31_t)(acc << 2)); + } + + + /** + * @brief Q15 bilinear interpolation. + * @param[in,out] S points to an instance of the interpolation structure. + * @param[in] X interpolation coordinate in 12.20 format. + * @param[in] Y interpolation coordinate in 12.20 format. + * @return out interpolated value. + */ + __STATIC_FORCEINLINE q15_t arm_bilinear_interp_q15( + arm_bilinear_interp_instance_q15 * S, + q31_t X, + q31_t Y) + { + q63_t acc = 0; /* output */ + q31_t out; /* Temporary output */ + q15_t x1, x2, y1, y2; /* Nearest output values */ + q31_t xfract, yfract; /* X, Y fractional parts */ + int32_t rI, cI; /* Row and column indices */ + q15_t *pYData = S->pData; /* pointer to output table values */ + uint32_t nCols = S->numCols; /* num of rows */ + + /* Input is in 12.20 format */ + /* 12 bits for the table index */ + /* Index value calculation */ + rI = ((X & (q31_t)0xFFF00000) >> 20); + + /* Input is in 12.20 format */ + /* 12 bits for the table index */ + /* Index value calculation */ + cI = ((Y & (q31_t)0xFFF00000) >> 20); + + /* Care taken for table outside boundary */ + /* Returns zero output when values are outside table boundary */ + if (rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1)) + { + return (0); + } + + /* 20 bits for the fractional part */ + /* xfract should be in 12.20 format */ + xfract = (X & 0x000FFFFF); + + /* Read two nearest output values from the index */ + x1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) ]; + x2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) + 1]; + + /* 20 bits for the fractional part */ + /* yfract should be in 12.20 format */ + yfract = (Y & 0x000FFFFF); + + /* Read two nearest output values from the index */ + y1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) ]; + y2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) + 1]; + + /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 13.51 format */ + + /* x1 is in 1.15(q15), xfract in 12.20 format and out is in 13.35 format */ + /* convert 13.35 to 13.31 by right shifting and out is in 1.31 */ + out = (q31_t) (((q63_t) x1 * (0xFFFFF - xfract)) >> 4U); + acc = ((q63_t) out * (0xFFFFF - yfract)); + + /* x2 * (xfract) * (1-yfract) in 1.51 and adding to acc */ + out = (q31_t) (((q63_t) x2 * (0xFFFFF - yfract)) >> 4U); + acc += ((q63_t) out * (xfract)); + + /* y1 * (1 - xfract) * (yfract) in 1.51 and adding to acc */ + out = (q31_t) (((q63_t) y1 * (0xFFFFF - xfract)) >> 4U); + acc += ((q63_t) out * (yfract)); + + /* y2 * (xfract) * (yfract) in 1.51 and adding to acc */ + out = (q31_t) (((q63_t) y2 * (xfract)) >> 4U); + acc += ((q63_t) out * (yfract)); + + /* acc is in 13.51 format and down shift acc by 36 times */ + /* Convert out to 1.15 format */ + return ((q15_t)(acc >> 36)); + } + + + /** + * @brief Q7 bilinear interpolation. + * @param[in,out] S points to an instance of the interpolation structure. + * @param[in] X interpolation coordinate in 12.20 format. + * @param[in] Y interpolation coordinate in 12.20 format. + * @return out interpolated value. + */ + __STATIC_FORCEINLINE q7_t arm_bilinear_interp_q7( + arm_bilinear_interp_instance_q7 * S, + q31_t X, + q31_t Y) + { + q63_t acc = 0; /* output */ + q31_t out; /* Temporary output */ + q31_t xfract, yfract; /* X, Y fractional parts */ + q7_t x1, x2, y1, y2; /* Nearest output values */ + int32_t rI, cI; /* Row and column indices */ + q7_t *pYData = S->pData; /* pointer to output table values */ + uint32_t nCols = S->numCols; /* num of rows */ + + /* Input is in 12.20 format */ + /* 12 bits for the table index */ + /* Index value calculation */ + rI = ((X & (q31_t)0xFFF00000) >> 20); + + /* Input is in 12.20 format */ + /* 12 bits for the table index */ + /* Index value calculation */ + cI = ((Y & (q31_t)0xFFF00000) >> 20); + + /* Care taken for table outside boundary */ + /* Returns zero output when values are outside table boundary */ + if (rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1)) + { + return (0); + } + + /* 20 bits for the fractional part */ + /* xfract should be in 12.20 format */ + xfract = (X & (q31_t)0x000FFFFF); + + /* Read two nearest output values from the index */ + x1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) ]; + x2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) + 1]; + + /* 20 bits for the fractional part */ + /* yfract should be in 12.20 format */ + yfract = (Y & (q31_t)0x000FFFFF); + + /* Read two nearest output values from the index */ + y1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) ]; + y2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) + 1]; + + /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 16.47 format */ + out = ((x1 * (0xFFFFF - xfract))); + acc = (((q63_t) out * (0xFFFFF - yfract))); + + /* x2 * (xfract) * (1-yfract) in 2.22 and adding to acc */ + out = ((x2 * (0xFFFFF - yfract))); + acc += (((q63_t) out * (xfract))); + + /* y1 * (1 - xfract) * (yfract) in 2.22 and adding to acc */ + out = ((y1 * (0xFFFFF - xfract))); + acc += (((q63_t) out * (yfract))); + + /* y2 * (xfract) * (yfract) in 2.22 and adding to acc */ + out = ((y2 * (yfract))); + acc += (((q63_t) out * (xfract))); + + /* acc in 16.47 format and down shift by 40 to convert to 1.7 format */ + return ((q7_t)(acc >> 40)); + } + + /** + * @} end of BilinearInterpolate group + */ + + +/* SMMLAR */ +#define multAcc_32x32_keep32_R(a, x, y) \ + a = (q31_t) (((((q63_t) a) << 32) + ((q63_t) x * y) + 0x80000000LL ) >> 32) + +/* SMMLSR */ +#define multSub_32x32_keep32_R(a, x, y) \ + a = (q31_t) (((((q63_t) a) << 32) - ((q63_t) x * y) + 0x80000000LL ) >> 32) + +/* SMMULR */ +#define mult_32x32_keep32_R(a, x, y) \ + a = (q31_t) (((q63_t) x * y + 0x80000000LL ) >> 32) + +/* SMMLA */ +#define multAcc_32x32_keep32(a, x, y) \ + a += (q31_t) (((q63_t) x * y) >> 32) + +/* SMMLS */ +#define multSub_32x32_keep32(a, x, y) \ + a -= (q31_t) (((q63_t) x * y) >> 32) + +/* SMMUL */ +#define mult_32x32_keep32(a, x, y) \ + a = (q31_t) (((q63_t) x * y ) >> 32) + + +#if defined ( __CC_ARM ) + /* Enter low optimization region - place directly above function definition */ + #if defined( __ARM_ARCH_7EM__ ) + #define LOW_OPTIMIZATION_ENTER \ + _Pragma ("push") \ + _Pragma ("O1") + #else + #define LOW_OPTIMIZATION_ENTER + #endif + + /* Exit low optimization region - place directly after end of function definition */ + #if defined ( __ARM_ARCH_7EM__ ) + #define LOW_OPTIMIZATION_EXIT \ + _Pragma ("pop") + #else + #define LOW_OPTIMIZATION_EXIT + #endif + + /* Enter low optimization region - place directly above function definition */ + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + + /* Exit low optimization region - place directly after end of function definition */ + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined (__ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 ) + #define LOW_OPTIMIZATION_ENTER + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( __GNUC__ ) + #define LOW_OPTIMIZATION_ENTER \ + __attribute__(( optimize("-O1") )) + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( __ICCARM__ ) + /* Enter low optimization region - place directly above function definition */ + #if defined ( __ARM_ARCH_7EM__ ) + #define LOW_OPTIMIZATION_ENTER \ + _Pragma ("optimize=low") + #else + #define LOW_OPTIMIZATION_ENTER + #endif + + /* Exit low optimization region - place directly after end of function definition */ + #define LOW_OPTIMIZATION_EXIT + + /* Enter low optimization region - place directly above function definition */ + #if defined ( __ARM_ARCH_7EM__ ) + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER \ + _Pragma ("optimize=low") + #else + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #endif + + /* Exit low optimization region - place directly after end of function definition */ + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( __TI_ARM__ ) + #define LOW_OPTIMIZATION_ENTER + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( __CSMC__ ) + #define LOW_OPTIMIZATION_ENTER + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( __TASKING__ ) + #define LOW_OPTIMIZATION_ENTER + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#endif + + +#ifdef __cplusplus +} +#endif + +/* Compiler specific diagnostic adjustment */ +#if defined ( __CC_ARM ) + +#elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 ) + +#elif defined ( __GNUC__ ) +#pragma GCC diagnostic pop + +#elif defined ( __ICCARM__ ) + +#elif defined ( __TI_ARM__ ) + +#elif defined ( __CSMC__ ) + +#elif defined ( __TASKING__ ) + +#elif defined ( _MSC_VER ) + +#else + #error Unknown compiler +#endif + +#endif /* _ARM_MATH_H */ + +/** + * + * End of file. + */ diff --git a/libraries/AP_GyroFFT/CMSIS_5/lib/libarm_cortexM3l_math.a b/libraries/AP_GyroFFT/CMSIS_5/lib/libarm_cortexM3l_math.a new file mode 100644 index 0000000000..f4d896e774 Binary files /dev/null and b/libraries/AP_GyroFFT/CMSIS_5/lib/libarm_cortexM3l_math.a differ diff --git a/libraries/AP_GyroFFT/CMSIS_5/lib/libarm_cortexM4l_math.a b/libraries/AP_GyroFFT/CMSIS_5/lib/libarm_cortexM4l_math.a new file mode 100644 index 0000000000..3084c44e9a Binary files /dev/null and b/libraries/AP_GyroFFT/CMSIS_5/lib/libarm_cortexM4l_math.a differ diff --git a/libraries/AP_GyroFFT/CMSIS_5/lib/libarm_cortexM4lf_math.a b/libraries/AP_GyroFFT/CMSIS_5/lib/libarm_cortexM4lf_math.a new file mode 100644 index 0000000000..66efc87f62 Binary files /dev/null and b/libraries/AP_GyroFFT/CMSIS_5/lib/libarm_cortexM4lf_math.a differ diff --git a/libraries/AP_GyroFFT/CMSIS_5/lib/libarm_cortexM7l_math.a b/libraries/AP_GyroFFT/CMSIS_5/lib/libarm_cortexM7l_math.a new file mode 100644 index 0000000000..d03b4188cf Binary files /dev/null and b/libraries/AP_GyroFFT/CMSIS_5/lib/libarm_cortexM7l_math.a differ diff --git a/libraries/AP_GyroFFT/CMSIS_5/lib/libarm_cortexM7lfdp_math.a b/libraries/AP_GyroFFT/CMSIS_5/lib/libarm_cortexM7lfdp_math.a new file mode 100644 index 0000000000..ab7cf24260 Binary files /dev/null and b/libraries/AP_GyroFFT/CMSIS_5/lib/libarm_cortexM7lfdp_math.a differ diff --git a/libraries/AP_GyroFFT/CMSIS_5/lib/libarm_cortexM7lfsp_math.a b/libraries/AP_GyroFFT/CMSIS_5/lib/libarm_cortexM7lfsp_math.a new file mode 100644 index 0000000000..5d0f9002db Binary files /dev/null and b/libraries/AP_GyroFFT/CMSIS_5/lib/libarm_cortexM7lfsp_math.a differ