HAL_ChibiOS: support recovery from a mutex deadlock

if we have a bug which causes a lock order deadlock locking up the
main thread then try a force release of the mutex to allow flight to
continue without a watchdog
This commit is contained in:
Andrew Tridgell 2023-06-16 09:54:23 +10:00
parent f7243c0ff5
commit 81e0685d33
2 changed files with 49 additions and 1 deletions

View File

@ -50,6 +50,7 @@
#include <AP_Filesystem/AP_Filesystem.h>
#include "shared_dma.h"
#include <AP_Common/ExpandingString.h>
#include <GCS_MAVLink/GCS.h>
#if HAL_WITH_IO_MCU
#include <AP_IOMCU/AP_IOMCU.h>
@ -440,7 +441,14 @@ void Scheduler::_monitor_thread(void *arg)
}
if (loop_delay >= 500 && !sched->in_expected_delay()) {
// at 500ms we declare an internal error
INTERNAL_ERROR(AP_InternalError::error_t::main_loop_stuck);
AP::internalerror().error(AP_InternalError::error_t::main_loop_stuck, hal.util->persistent_data.semaphore_line);
/*
if we are armed and get this condition then it is likely
a lock ordering deadlock. If the main thread is waiting
on a mutex then we try to force release the mutex from
the thread that is holding it.
*/
try_force_mutex();
}
#if AP_CRASHDUMP_ENABLED
@ -827,4 +835,42 @@ void Scheduler::check_stack_free(void)
#endif // CH_CFG_USE_DYNAMIC
/*
try to avoid watchdog during a locking deadlock by force releasing a
mutex that is blocking the main thread
*/
void Scheduler::try_force_mutex(void)
{
#if HAL_LOGGING_ENABLED
chSysLock();
thread_t *main_thread = get_main_thread();
if (main_thread == nullptr || main_thread->state != CH_STATE_WTMTX) {
chSysUnlock();
return;
}
mutex_t *wtmtx = main_thread->u.wtmtxp;
if (wtmtx == nullptr || wtmtx->owner == nullptr) {
chSysUnlock();
return;
}
char thdname[17] {};
uint16_t sem_line = hal.util->persistent_data.semaphore_line;
strncpy(thdname, wtmtx->owner->name, sizeof(thdname)-1);
// we will force release the lock
chMtxForceReleaseS(wtmtx);
chSysUnlock();
// log a DLCK message with information on the deadlock we have avoided
AP::logger().WriteCritical("DLCK", "TimeUS,SemLine,ThdName,MtxP", "QHNI",
AP_HAL::micros64(),
sem_line,
thdname,
unsigned(wtmtx));
GCS_SEND_TEXT(MAV_SEVERITY_CRITICAL, "CRITICAL Deadlock %u %s %p", sem_line, thdname, wtmtx);
#endif
}
#endif // HAL_SCHEDULER_ENABLED

View File

@ -199,5 +199,7 @@ private:
void ext_watchdog_pat(uint32_t now_ms);
uint32_t last_ext_watchdog_ms;
#endif
static void try_force_mutex(void);
};
#endif