AP_ESC_Telem: fix RPM timeout race

The RPM telemetry data structure can get updated by another thread at
any time. This can cause (now - last_update_us) to be negative, which
looks like the data is nearly UINT32_MAX microseconds stale. To fix
this, we copy the last_update_us value before we get the current time
so it's guaranteed to be positive.

We also minimize the number of places we explicitly check the timestamp
and rely on the `data_valid` where possible to minimize the performance
impact of this change.
This commit is contained in:
Bob Long 2025-01-03 23:29:56 +11:00 committed by Andrew Tridgell
parent 31fe3d31f3
commit 0b7a56e2fd
2 changed files with 20 additions and 21 deletions

View File

@ -105,14 +105,12 @@ uint8_t AP_ESC_Telem::get_motor_frequencies_hz(uint8_t nfreqs, float* freqs) con
uint32_t AP_ESC_Telem::get_active_esc_mask() const { uint32_t AP_ESC_Telem::get_active_esc_mask() const {
uint32_t ret = 0; uint32_t ret = 0;
const uint32_t now = AP_HAL::millis(); const uint32_t now = AP_HAL::millis();
uint32_t now_us = AP_HAL::micros();
for (uint8_t i = 0; i < ESC_TELEM_MAX_ESCS; i++) { for (uint8_t i = 0; i < ESC_TELEM_MAX_ESCS; i++) {
if (_telem_data[i].last_update_ms == 0 && !was_rpm_data_ever_reported(_rpm_data[i])) { if (_telem_data[i].last_update_ms == 0 && !was_rpm_data_ever_reported(_rpm_data[i])) {
// have never seen telem from this ESC // have never seen telem from this ESC
continue; continue;
} }
if (_telem_data[i].stale(now) if (_telem_data[i].stale(now) && !_rpm_data[i].data_valid) {
&& !rpm_data_within_timeout(_rpm_data[i], now_us, ESC_RPM_DATA_TIMEOUT_US)) {
continue; continue;
} }
ret |= (1U << i); ret |= (1U << i);
@ -126,14 +124,12 @@ uint8_t AP_ESC_Telem::get_max_rpm_esc() const
uint32_t ret = 0; uint32_t ret = 0;
float max_rpm = 0; float max_rpm = 0;
const uint32_t now = AP_HAL::millis(); const uint32_t now = AP_HAL::millis();
const uint32_t now_us = AP_HAL::micros();
for (uint8_t i = 0; i < ESC_TELEM_MAX_ESCS; i++) { for (uint8_t i = 0; i < ESC_TELEM_MAX_ESCS; i++) {
if (_telem_data[i].last_update_ms == 0 && !was_rpm_data_ever_reported(_rpm_data[i])) { if (_telem_data[i].last_update_ms == 0 && !was_rpm_data_ever_reported(_rpm_data[i])) {
// have never seen telem from this ESC // have never seen telem from this ESC
continue; continue;
} }
if (_telem_data[i].stale(now) if (_telem_data[i].stale(now) && !_rpm_data[i].data_valid) {
&& !rpm_data_within_timeout(_rpm_data[i], now_us, ESC_RPM_DATA_TIMEOUT_US)) {
continue; continue;
} }
if (_rpm_data[i].rpm > max_rpm) { if (_rpm_data[i].rpm > max_rpm) {
@ -153,13 +149,12 @@ uint8_t AP_ESC_Telem::get_num_active_escs() const {
// return the whether all the motors in servo_channel_mask are running // return the whether all the motors in servo_channel_mask are running
bool AP_ESC_Telem::are_motors_running(uint32_t servo_channel_mask, float min_rpm, float max_rpm) const bool AP_ESC_Telem::are_motors_running(uint32_t servo_channel_mask, float min_rpm, float max_rpm) const
{ {
const uint32_t now = AP_HAL::micros();
for (uint8_t i = 0; i < ESC_TELEM_MAX_ESCS; i++) { for (uint8_t i = 0; i < ESC_TELEM_MAX_ESCS; i++) {
if (BIT_IS_SET(servo_channel_mask, i)) { if (BIT_IS_SET(servo_channel_mask, i)) {
const volatile AP_ESC_Telem_Backend::RpmData& rpmdata = _rpm_data[i]; const volatile AP_ESC_Telem_Backend::RpmData& rpmdata = _rpm_data[i];
// we choose a relatively strict measure of health so that failsafe actions can rely on the results // we choose a relatively strict measure of health so that failsafe actions can rely on the results
if (!rpm_data_within_timeout(rpmdata, now, ESC_RPM_CHECK_TIMEOUT_US)) { if (!rpm_data_within_timeout(rpmdata, ESC_RPM_CHECK_TIMEOUT_US)) {
return false; return false;
} }
if (rpmdata.rpm < min_rpm) { if (rpmdata.rpm < min_rpm) {
@ -201,7 +196,7 @@ bool AP_ESC_Telem::get_rpm(uint8_t esc_index, float& rpm) const
} }
const uint32_t now = AP_HAL::micros(); const uint32_t now = AP_HAL::micros();
if (rpm_data_within_timeout(rpmdata, now, ESC_RPM_DATA_TIMEOUT_US)) { if (rpmdata.data_valid) {
const float slew = MIN(1.0f, (now - rpmdata.last_update_us) * rpmdata.update_rate_hz * (1.0f / 1e6f)); const float slew = MIN(1.0f, (now - rpmdata.last_update_us) * rpmdata.update_rate_hz * (1.0f / 1e6f));
rpm = (rpmdata.prev_rpm + (rpmdata.rpm - rpmdata.prev_rpm) * slew); rpm = (rpmdata.prev_rpm + (rpmdata.rpm - rpmdata.prev_rpm) * slew);
@ -225,9 +220,7 @@ bool AP_ESC_Telem::get_raw_rpm(uint8_t esc_index, float& rpm) const
const volatile AP_ESC_Telem_Backend::RpmData& rpmdata = _rpm_data[esc_index]; const volatile AP_ESC_Telem_Backend::RpmData& rpmdata = _rpm_data[esc_index];
const uint32_t now = AP_HAL::micros(); if (!rpmdata.data_valid) {
if (!rpm_data_within_timeout(rpmdata, now, ESC_RPM_DATA_TIMEOUT_US)) {
return false; return false;
} }
@ -413,7 +406,6 @@ void AP_ESC_Telem::send_esc_telemetry_mavlink(uint8_t mav_chan)
} }
const uint32_t now = AP_HAL::millis(); const uint32_t now = AP_HAL::millis();
const uint32_t now_us = AP_HAL::micros();
// loop through groups of 4 ESCs // loop through groups of 4 ESCs
const uint8_t esc_offset = constrain_int16(mavlink_offset, 0, ESC_TELEM_MAX_ESCS-1); const uint8_t esc_offset = constrain_int16(mavlink_offset, 0, ESC_TELEM_MAX_ESCS-1);
@ -435,8 +427,7 @@ void AP_ESC_Telem::send_esc_telemetry_mavlink(uint8_t mav_chan)
for (uint8_t j=0; j<4; j++) { for (uint8_t j=0; j<4; j++) {
const uint8_t esc_id = (i * 4 + j) + esc_offset; const uint8_t esc_id = (i * 4 + j) + esc_offset;
if (esc_id < ESC_TELEM_MAX_ESCS && if (esc_id < ESC_TELEM_MAX_ESCS &&
(!_telem_data[esc_id].stale(now) || (!_telem_data[esc_id].stale(now) || _rpm_data[esc_id].data_valid)) {
rpm_data_within_timeout(_rpm_data[esc_id], now_us, ESC_RPM_DATA_TIMEOUT_US))) {
all_stale = false; all_stale = false;
break; break;
} }
@ -808,23 +799,31 @@ void AP_ESC_Telem::update()
} }
#endif // HAL_LOGGING_ENABLED #endif // HAL_LOGGING_ENABLED
const uint32_t now_us = AP_HAL::micros();
for (uint8_t i = 0; i < ESC_TELEM_MAX_ESCS; i++) { for (uint8_t i = 0; i < ESC_TELEM_MAX_ESCS; i++) {
// copy the last_updated_us timestamp to avoid any race issues
const uint32_t last_updated_us = _rpm_data[i].last_update_us;
const uint32_t now_us = AP_HAL::micros();
// Invalidate RPM data if not received for too long // Invalidate RPM data if not received for too long
if ((now_us - _rpm_data[i].last_update_us) > ESC_RPM_DATA_TIMEOUT_US) { if (AP_HAL::timeout_expired(last_updated_us, now_us, ESC_RPM_DATA_TIMEOUT_US)) {
_rpm_data[i].data_valid = false; _rpm_data[i].data_valid = false;
} }
} }
} }
bool AP_ESC_Telem::rpm_data_within_timeout(const volatile AP_ESC_Telem_Backend::RpmData &instance, const uint32_t now_us, const uint32_t timeout_us) // NOTE: This function should only be used to check timeouts other than
// ESC_RPM_DATA_TIMEOUT_US. Timeouts equal to ESC_RPM_DATA_TIMEOUT_US should
// use RpmData::data_valid, which is cheaper and achieves the same result.
bool AP_ESC_Telem::rpm_data_within_timeout(const volatile AP_ESC_Telem_Backend::RpmData &instance, const uint32_t timeout_us)
{ {
// copy the last_update_us timestamp to avoid any race issues
const uint32_t last_update_us = instance.last_update_us;
const uint32_t now_us = AP_HAL::micros();
// easy case, has the time window been crossed so it's invalid // easy case, has the time window been crossed so it's invalid
if ((now_us - instance.last_update_us) > timeout_us) { if (AP_HAL::timeout_expired(last_update_us, now_us, timeout_us)) {
return false; return false;
} }
// we never got a valid data, to it's invalid // we never got a valid data, to it's invalid
if (instance.last_update_us == 0) { if (last_update_us == 0) {
return false; return false;
} }
// check if things generally expired on us, this is done to handle time wrapping // check if things generally expired on us, this is done to handle time wrapping

View File

@ -134,7 +134,7 @@ public:
private: private:
// helper that validates RPM data // helper that validates RPM data
static bool rpm_data_within_timeout (const volatile AP_ESC_Telem_Backend::RpmData &instance, const uint32_t now_us, const uint32_t timeout_us); static bool rpm_data_within_timeout (const volatile AP_ESC_Telem_Backend::RpmData &instance, const uint32_t timeout_us);
static bool was_rpm_data_ever_reported (const volatile AP_ESC_Telem_Backend::RpmData &instance); static bool was_rpm_data_ever_reported (const volatile AP_ESC_Telem_Backend::RpmData &instance);
#if AP_EXTENDED_DSHOT_TELEM_V2_ENABLED #if AP_EXTENDED_DSHOT_TELEM_V2_ENABLED