AP_NavEKF3: moved intermediate variables to common memory

this moves intermediate variables from being per-core to being common
between cores. This saves memory on systems with more than one core by
avoiding allocating this memory on every core.

This is an alternative to #11717 which moves memory onto the stack. It
doesn't save as much memory as #11717, but avoids creating large stack
frames
This commit is contained in:
Andrew Tridgell 2019-09-21 10:57:40 +10:00
parent 0041874826
commit 96c6544997
5 changed files with 71 additions and 25 deletions

View File

@ -662,7 +662,7 @@ bool NavEKF2::InitialiseFilter(void)
} }
// allocate common intermediate variable space // allocate common intermediate variable space
core_common = (struct CoreCommon *)hal.util->malloc_type(NavEKF2_core::get_core_common_size(), AP_HAL::Util::MEM_FAST); core_common = (void *)hal.util->malloc_type(NavEKF2_core::get_core_common_size(), AP_HAL::Util::MEM_FAST);
if (core_common == nullptr) { if (core_common == nullptr) {
_enable.set(0); _enable.set(0);
hal.util->free_type(core, sizeof(NavEKF2_core)*num_cores, AP_HAL::Util::MEM_FAST); hal.util->free_type(core, sizeof(NavEKF2_core)*num_cores, AP_HAL::Util::MEM_FAST);

View File

@ -682,9 +682,20 @@ bool NavEKF3::InitialiseFilter(void)
gcs().send_text(MAV_SEVERITY_CRITICAL, "NavEKF3: allocation failed"); gcs().send_text(MAV_SEVERITY_CRITICAL, "NavEKF3: allocation failed");
return false; return false;
} }
// allocate common intermediate variable space
core_common = (void *)hal.util->malloc_type(NavEKF3_core::get_core_common_size(), AP_HAL::Util::MEM_FAST);
if (core_common == nullptr) {
_enable.set(0);
hal.util->free_type(core, sizeof(NavEKF3_core)*num_cores, AP_HAL::Util::MEM_FAST);
core = nullptr;
gcs().send_text(MAV_SEVERITY_CRITICAL, "NavEKF3: common allocation failed");
return false;
}
// Call constructors on all cores
for (uint8_t i = 0; i < num_cores; i++) { for (uint8_t i = 0; i < num_cores; i++) {
//Call Constructors new (&core[i]) NavEKF3_core(this);
new (&core[i]) NavEKF3_core();
} }
} }
@ -694,7 +705,7 @@ bool NavEKF3::InitialiseFilter(void)
bool core_setup_success = true; bool core_setup_success = true;
for (uint8_t core_index=0; core_index<num_cores; core_index++) { for (uint8_t core_index=0; core_index<num_cores; core_index++) {
if (coreSetupRequired[core_index]) { if (coreSetupRequired[core_index]) {
coreSetupRequired[core_index] = !core[core_index].setup_core(this, coreImuIndex[core_index], core_index); coreSetupRequired[core_index] = !core[core_index].setup_core(coreImuIndex[core_index], core_index);
if (coreSetupRequired[core_index]) { if (coreSetupRequired[core_index]) {
core_setup_success = false; core_setup_success = false;
} }

View File

@ -495,7 +495,12 @@ private:
// time of last lane switch // time of last lane switch
uint32_t lastLaneSwitch_ms; uint32_t lastLaneSwitch_ms;
/*
common intermediate variables used by all cores
*/
void *core_common;
struct { struct {
uint32_t last_function_call; // last time getLastYawYawResetAngle was called uint32_t last_function_call; // last time getLastYawYawResetAngle was called
bool core_changed; // true when a core change happened and hasn't been consumed, false otherwise bool core_changed; // true when a core change happened and hasn't been consumed, false otherwise

View File

@ -10,7 +10,7 @@
extern const AP_HAL::HAL& hal; extern const AP_HAL::HAL& hal;
// constructor // constructor
NavEKF3_core::NavEKF3_core(void) : NavEKF3_core::NavEKF3_core(NavEKF3 *_frontend) :
_perf_UpdateFilter(hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_UpdateFilter")), _perf_UpdateFilter(hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_UpdateFilter")),
_perf_CovariancePrediction(hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_CovariancePrediction")), _perf_CovariancePrediction(hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_CovariancePrediction")),
_perf_FuseVelPosNED(hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_FuseVelPosNED")), _perf_FuseVelPosNED(hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_FuseVelPosNED")),
@ -19,7 +19,14 @@ NavEKF3_core::NavEKF3_core(void) :
_perf_FuseSideslip(hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_FuseSideslip")), _perf_FuseSideslip(hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_FuseSideslip")),
_perf_TerrainOffset(hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_TerrainOffset")), _perf_TerrainOffset(hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_TerrainOffset")),
_perf_FuseOptFlow(hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_FuseOptFlow")), _perf_FuseOptFlow(hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_FuseOptFlow")),
_perf_FuseBodyOdom(hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_FuseBodyOdom")) _perf_FuseBodyOdom(hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_FuseBodyOdom")),
frontend(_frontend),
// setup the intermediate variables shared by all cores (to save memory)
common((struct core_common *)_frontend->core_common),
Kfusion(common->Kfusion),
KH(common->KH),
KHP(common->KHP),
nextP(common->nextP)
{ {
_perf_test[0] = hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_Test0"); _perf_test[0] = hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_Test0");
_perf_test[1] = hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_Test1"); _perf_test[1] = hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_Test1");
@ -36,9 +43,8 @@ NavEKF3_core::NavEKF3_core(void) :
} }
// setup this core backend // setup this core backend
bool NavEKF3_core::setup_core(NavEKF3 *_frontend, uint8_t _imu_index, uint8_t _core_index) bool NavEKF3_core::setup_core(uint8_t _imu_index, uint8_t _core_index)
{ {
frontend = _frontend;
imu_index = _imu_index; imu_index = _imu_index;
gyro_index_active = imu_index; gyro_index_active = imu_index;
accel_index_active = imu_index; accel_index_active = imu_index;
@ -59,15 +65,15 @@ bool NavEKF3_core::setup_core(NavEKF3 *_frontend, uint8_t _imu_index, uint8_t _c
} }
// find the maximum time delay for all potential sensors // find the maximum time delay for all potential sensors
uint16_t maxTimeDelay_ms = MAX(_frontend->_hgtDelay_ms , uint16_t maxTimeDelay_ms = MAX(frontend->_hgtDelay_ms ,
MAX(_frontend->_flowDelay_ms , MAX(frontend->_flowDelay_ms ,
MAX(_frontend->_rngBcnDelay_ms , MAX(frontend->_rngBcnDelay_ms ,
MAX(_frontend->magDelay_ms , MAX(frontend->magDelay_ms ,
(uint16_t)(EKF_TARGET_DT_MS) (uint16_t)(EKF_TARGET_DT_MS)
)))); ))));
// GPS sensing can have large delays and should not be included if disabled // GPS sensing can have large delays and should not be included if disabled
if (_frontend->_fusionModeGPS != 3) { if (frontend->_fusionModeGPS != 3) {
// Wait for the configuration of all GPS units to be confirmed. Until this has occurred the GPS driver cannot provide a correct time delay // Wait for the configuration of all GPS units to be confirmed. Until this has occurred the GPS driver cannot provide a correct time delay
float gps_delay_sec = 0; float gps_delay_sec = 0;
if (!AP::gps().get_lag(gps_delay_sec)) { if (!AP::gps().get_lag(gps_delay_sec)) {
@ -90,7 +96,7 @@ bool NavEKF3_core::setup_core(NavEKF3 *_frontend, uint8_t _imu_index, uint8_t _c
// airspeed sensing can have large delays and should not be included if disabled // airspeed sensing can have large delays and should not be included if disabled
if (_ahrs->airspeed_sensor_enabled()) { if (_ahrs->airspeed_sensor_enabled()) {
maxTimeDelay_ms = MAX(maxTimeDelay_ms , _frontend->tasDelay_ms); maxTimeDelay_ms = MAX(maxTimeDelay_ms , frontend->tasDelay_ms);
} }
// calculate the IMU buffer length required to accommodate the maximum delay with some allowance for jitter // calculate the IMU buffer length required to accommodate the maximum delay with some allowance for jitter
@ -100,13 +106,13 @@ bool NavEKF3_core::setup_core(NavEKF3 *_frontend, uint8_t _imu_index, uint8_t _c
// with the worst case delay from current time to ekf fusion time // with the worst case delay from current time to ekf fusion time
// allow for worst case 50% extension of the ekf fusion time horizon delay due to timing jitter // allow for worst case 50% extension of the ekf fusion time horizon delay due to timing jitter
uint16_t ekf_delay_ms = maxTimeDelay_ms + (int)(ceilf((float)maxTimeDelay_ms * 0.5f)); uint16_t ekf_delay_ms = maxTimeDelay_ms + (int)(ceilf((float)maxTimeDelay_ms * 0.5f));
obs_buffer_length = (ekf_delay_ms / _frontend->sensorIntervalMin_ms) + 1; obs_buffer_length = (ekf_delay_ms / frontend->sensorIntervalMin_ms) + 1;
// limit to be no longer than the IMU buffer (we can't process data faster than the EKF prediction rate) // limit to be no longer than the IMU buffer (we can't process data faster than the EKF prediction rate)
obs_buffer_length = MIN(obs_buffer_length,imu_buffer_length); obs_buffer_length = MIN(obs_buffer_length,imu_buffer_length);
// calculate buffer size for optical flow data // calculate buffer size for optical flow data
const uint8_t flow_buffer_length = MIN((ekf_delay_ms / _frontend->flowIntervalMin_ms) + 1, imu_buffer_length); const uint8_t flow_buffer_length = MIN((ekf_delay_ms / frontend->flowIntervalMin_ms) + 1, imu_buffer_length);
if(!storedGPS.init(obs_buffer_length)) { if(!storedGPS.init(obs_buffer_length)) {
return false; return false;
@ -211,7 +217,7 @@ void NavEKF3_core::InitialiseVariables()
lastKnownPositionNE.zero(); lastKnownPositionNE.zero();
prevTnb.zero(); prevTnb.zero();
memset(&P[0][0], 0, sizeof(P)); memset(&P[0][0], 0, sizeof(P));
memset(&nextP[0][0], 0, sizeof(nextP)); memset(common, 0, sizeof(*common));
flowDataValid = false; flowDataValid = false;
rangeDataToFuse = false; rangeDataToFuse = false;
Popt = 0.0f; Popt = 0.0f;
@ -567,6 +573,12 @@ void NavEKF3_core::CovarianceInit()
// Update Filter States - this should be called whenever new IMU data is available // Update Filter States - this should be called whenever new IMU data is available
void NavEKF3_core::UpdateFilter(bool predict) void NavEKF3_core::UpdateFilter(bool predict)
{ {
#if CONFIG_HAL_BOARD == HAL_BOARD_SITL
// fill the common variables with NaN, so we catch any cases in
// SITL where they are used without initialisation
fill_nanf((float *)common, sizeof(*common)/sizeof(float));
#endif
// Set the flag to indicate to the filter that the front-end has given permission for a new state prediction cycle to be started // Set the flag to indicate to the filter that the front-end has given permission for a new state prediction cycle to be started
startPredictEnabled = predict; startPredictEnabled = predict;

View File

@ -67,10 +67,10 @@ class NavEKF3_core
{ {
public: public:
// Constructor // Constructor
NavEKF3_core(void); NavEKF3_core(NavEKF3 *_frontend);
// setup this core backend // setup this core backend
bool setup_core(NavEKF3 *_frontend, uint8_t _imu_index, uint8_t _core_index); bool setup_core(uint8_t _imu_index, uint8_t _core_index);
// Initialise the states from accelerometer and magnetometer data (if present) // Initialise the states from accelerometer and magnetometer data (if present)
// This method can only be used when the vehicle is static // This method can only be used when the vehicle is static
@ -365,7 +365,10 @@ public:
// get timing statistics structure // get timing statistics structure
void getTimingStatistics(struct ekf_timing &timing); void getTimingStatistics(struct ekf_timing &timing);
// return size of core_common for use in frontend allocation
static uint32_t get_core_common_size(void) { return sizeof(core_common); }
private: private:
// Reference to the global EKF frontend for parameters // Reference to the global EKF frontend for parameters
NavEKF3 *frontend; NavEKF3 *frontend;
@ -533,6 +536,21 @@ private:
uint8_t type; // type specifiying Euler rotation order used, 1 = 312 (ZXY), 2 = 321 (ZYX) uint8_t type; // type specifiying Euler rotation order used, 1 = 312 (ZXY), 2 = 321 (ZYX)
}; };
/*
common intermediate variables used by all cores. These save a
lot memory by avoiding allocating these arrays on every core
Having these as stack variables would save even more memory, but
would give us very high stack usage in some functions, which
poses a risk of stack overflow until we have infrastructure in
place to calculate maximum stack usage using static analysis.
*/
struct core_common {
Vector28 Kfusion;
Matrix24 KH;
Matrix24 KHP;
Matrix24 nextP;
} *common;
// bias estimates for the IMUs that are enabled but not being used // bias estimates for the IMUs that are enabled but not being used
// by this core. // by this core.
struct { struct {
@ -855,9 +873,9 @@ private:
bool badIMUdata; // boolean true if the bad IMU data is detected bool badIMUdata; // boolean true if the bad IMU data is detected
float gpsNoiseScaler; // Used to scale the GPS measurement noise and consistency gates to compensate for operation with small satellite counts float gpsNoiseScaler; // Used to scale the GPS measurement noise and consistency gates to compensate for operation with small satellite counts
Vector28 Kfusion; // Kalman gain vector Vector28 &Kfusion; // Kalman gain vector
Matrix24 KH; // intermediate result used for covariance updates Matrix24 &KH; // intermediate result used for covariance updates
Matrix24 KHP; // intermediate result used for covariance updates Matrix24 &KHP; // intermediate result used for covariance updates
Matrix24 P; // covariance matrix Matrix24 P; // covariance matrix
imu_ring_buffer_t<imu_elements> storedIMU; // IMU data buffer imu_ring_buffer_t<imu_elements> storedIMU; // IMU data buffer
obs_ring_buffer_t<gps_elements> storedGPS; // GPS data buffer obs_ring_buffer_t<gps_elements> storedGPS; // GPS data buffer
@ -911,7 +929,7 @@ private:
bool allMagSensorsFailed; // true if all magnetometer sensors have timed out on this flight and we are no longer using magnetometer data bool allMagSensorsFailed; // true if all magnetometer sensors have timed out on this flight and we are no longer using magnetometer data
uint32_t lastSynthYawTime_ms; // time stamp when synthetic yaw measurement was last fused to maintain covariance health (msec) uint32_t lastSynthYawTime_ms; // time stamp when synthetic yaw measurement was last fused to maintain covariance health (msec)
uint32_t ekfStartTime_ms; // time the EKF was started (msec) uint32_t ekfStartTime_ms; // time the EKF was started (msec)
Matrix24 nextP; // Predicted covariance matrix before addition of process noise to diagonals Matrix24 &nextP; // Predicted covariance matrix before addition of process noise to diagonals
Vector2f lastKnownPositionNE; // last known position Vector2f lastKnownPositionNE; // last known position
uint32_t lastDecayTime_ms; // time of last decay of GPS position offset uint32_t lastDecayTime_ms; // time of last decay of GPS position offset
float velTestRatio; // sum of squares of GPS velocity innovation divided by fail threshold float velTestRatio; // sum of squares of GPS velocity innovation divided by fail threshold