mirror of
https://github.com/ArduPilot/ardupilot
synced 2025-02-01 13:38:38 -04:00
HAL_ChibiOS: support recovery from a mutex deadlock
if we have a bug which causes a lock order deadlock locking up the main thread then try a force release of the mutex to allow flight to continue without a watchdog
This commit is contained in:
parent
f7243c0ff5
commit
81e0685d33
@ -50,6 +50,7 @@
|
||||
#include <AP_Filesystem/AP_Filesystem.h>
|
||||
#include "shared_dma.h"
|
||||
#include <AP_Common/ExpandingString.h>
|
||||
#include <GCS_MAVLink/GCS.h>
|
||||
|
||||
#if HAL_WITH_IO_MCU
|
||||
#include <AP_IOMCU/AP_IOMCU.h>
|
||||
@ -440,7 +441,14 @@ void Scheduler::_monitor_thread(void *arg)
|
||||
}
|
||||
if (loop_delay >= 500 && !sched->in_expected_delay()) {
|
||||
// at 500ms we declare an internal error
|
||||
INTERNAL_ERROR(AP_InternalError::error_t::main_loop_stuck);
|
||||
AP::internalerror().error(AP_InternalError::error_t::main_loop_stuck, hal.util->persistent_data.semaphore_line);
|
||||
/*
|
||||
if we are armed and get this condition then it is likely
|
||||
a lock ordering deadlock. If the main thread is waiting
|
||||
on a mutex then we try to force release the mutex from
|
||||
the thread that is holding it.
|
||||
*/
|
||||
try_force_mutex();
|
||||
}
|
||||
|
||||
#if AP_CRASHDUMP_ENABLED
|
||||
@ -827,4 +835,42 @@ void Scheduler::check_stack_free(void)
|
||||
|
||||
#endif // CH_CFG_USE_DYNAMIC
|
||||
|
||||
/*
|
||||
try to avoid watchdog during a locking deadlock by force releasing a
|
||||
mutex that is blocking the main thread
|
||||
*/
|
||||
void Scheduler::try_force_mutex(void)
|
||||
{
|
||||
#if HAL_LOGGING_ENABLED
|
||||
chSysLock();
|
||||
thread_t *main_thread = get_main_thread();
|
||||
|
||||
if (main_thread == nullptr || main_thread->state != CH_STATE_WTMTX) {
|
||||
chSysUnlock();
|
||||
return;
|
||||
}
|
||||
|
||||
mutex_t *wtmtx = main_thread->u.wtmtxp;
|
||||
if (wtmtx == nullptr || wtmtx->owner == nullptr) {
|
||||
chSysUnlock();
|
||||
return;
|
||||
}
|
||||
char thdname[17] {};
|
||||
uint16_t sem_line = hal.util->persistent_data.semaphore_line;
|
||||
strncpy(thdname, wtmtx->owner->name, sizeof(thdname)-1);
|
||||
|
||||
// we will force release the lock
|
||||
chMtxForceReleaseS(wtmtx);
|
||||
chSysUnlock();
|
||||
|
||||
// log a DLCK message with information on the deadlock we have avoided
|
||||
AP::logger().WriteCritical("DLCK", "TimeUS,SemLine,ThdName,MtxP", "QHNI",
|
||||
AP_HAL::micros64(),
|
||||
sem_line,
|
||||
thdname,
|
||||
unsigned(wtmtx));
|
||||
GCS_SEND_TEXT(MAV_SEVERITY_CRITICAL, "CRITICAL Deadlock %u %s %p", sem_line, thdname, wtmtx);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // HAL_SCHEDULER_ENABLED
|
||||
|
@ -199,5 +199,7 @@ private:
|
||||
void ext_watchdog_pat(uint32_t now_ms);
|
||||
uint32_t last_ext_watchdog_ms;
|
||||
#endif
|
||||
|
||||
static void try_force_mutex(void);
|
||||
};
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user