From ebb528905d1b7c57326269c3cead513f87516bef Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Mon, 11 May 2020 15:56:07 +1000 Subject: [PATCH] AP_InternalError: resync for 4.0 update --- .../AP_InternalError/AP_InternalError.cpp | 76 ++++++++++++++++++- libraries/AP_InternalError/AP_InternalError.h | 32 +++++++- 2 files changed, 105 insertions(+), 3 deletions(-) diff --git a/libraries/AP_InternalError/AP_InternalError.cpp b/libraries/AP_InternalError/AP_InternalError.cpp index dd9c104c25..28d11f248c 100644 --- a/libraries/AP_InternalError/AP_InternalError.cpp +++ b/libraries/AP_InternalError/AP_InternalError.cpp @@ -7,7 +7,7 @@ extern const AP_HAL::HAL &hal; // actually create the instance: static AP_InternalError instance; -void AP_InternalError::error(const AP_InternalError::error_t e) { +void AP_InternalError::error(const AP_InternalError::error_t e, uint16_t line) { #if CONFIG_HAL_BOARD == HAL_BOARD_SITL switch (e) { case AP_InternalError::error_t::watchdog_reset: @@ -20,11 +20,68 @@ void AP_InternalError::error(const AP_InternalError::error_t e) { #endif internal_errors |= uint32_t(e); total_error_count++; + last_line = line; hal.util->persistent_data.internal_errors = internal_errors; hal.util->persistent_data.internal_error_count = total_error_count; + hal.util->persistent_data.internal_error_last_line = line; } +void AP_InternalError::errors_as_string(uint8_t *buffer, const uint16_t len) const +{ + static const char * const error_bit_descriptions[] { + "mapfailure", // logger_mapfailure + "miss_struct", // logger_missing_logstructure + "write_mssfmt", // logger_logwrite_missingfmt + "many_deletes", // logger_too_many_deletions + "bad_getfile", // logger_bad_getfilename + "unused1", + "flush_no_sem", // logger_flushing_without_sem + "bad_curr_blk", // logger_bad_current_block + "blkcnt_bad", // logger_blockcount_mismatch + "dq_failure", // logger_dequeue_failure + "cnstring_nan", // constraining_nan + "watchdog_rst", // watchdog_reset + "iomcu_reset", + "iomcu_fail", + "spi_fail", + "main_loop_stk", // main_loop_stuck + "gcs_bad_link", // gcs_bad_missionprotocol_link + "bitmask_range", + "gcs_offset", + "i2c_isr", + "flow_of_ctrl", // flow_of_control + "sfs_recursion", // switch_full_sector_recursion + "bad_rotation", + "stack_ovrflw", // stack_overflow + }; + + static_assert((1U<<(ARRAY_SIZE(error_bit_descriptions))) == uint32_t(AP_InternalError::error_t::__LAST__), "too few descriptions for bits"); + + buffer[0] = 0; + uint32_t buffer_used = 0; + for (uint8_t i=0; i= len) { + break; + } + if (internal_errors & (1U<snprintf((char*)&buffer[buffer_used], + len-buffer_used, + format, + error_bit_descriptions[i]); + if (written <= 0) { + break; + } + buffer_used += written; + } + } +} namespace AP { @@ -34,3 +91,20 @@ AP_InternalError &internalerror() } }; + +// stack overflow hook for low level RTOS code, C binding +void AP_stack_overflow(const char *thread_name) +{ + static bool done_stack_overflow; + INTERNAL_ERROR(AP_InternalError::error_t::stack_overflow); + if (!done_stack_overflow) { + // we don't want to record the thread name more than once, as + // first overflow can trigger a 2nd + strncpy(hal.util->persistent_data.thread_name4, thread_name, 4); + done_stack_overflow = true; + } + hal.util->persistent_data.fault_type = 42; // magic value + if (!hal.util->get_soft_armed()) { + AP_HAL::panic("stack overflow %s\n", thread_name); + } +} diff --git a/libraries/AP_InternalError/AP_InternalError.h b/libraries/AP_InternalError/AP_InternalError.h index eac69ab1e3..42743f3d9b 100644 --- a/libraries/AP_InternalError/AP_InternalError.h +++ b/libraries/AP_InternalError/AP_InternalError.h @@ -32,6 +32,10 @@ public: // of thing. Examples of what NOT to put in here - sd card // filling up, bad input received from GCS, GPS unit was working // and now is not. + + // note that this map is an internal ArduPilot fixture and is + // prone to change at regular intervals. The meanings of these + // bits can change day-to-day. enum class error_t { // Hex Decimal logger_mapfailure = (1U << 0), // 0x00001 1 logger_missing_logstructure = (1U << 1), // 0x00002 2 @@ -53,10 +57,26 @@ public: bitmask_range = (1U << 17), // 0x20000 131072 gcs_offset = (1U << 18), // 0x40000 262144 i2c_isr = (1U << 19), // 0x80000 524288 - flow_of_control = (1U << 20), // for generic we-should-never-get-here situations + flow_of_control = (1U << 20), //0x100000 1048576 for generic we-should-never-get-here situations + switch_full_sector_recursion= (1U << 21), //0x200000 2097152 + bad_rotation = (1U << 22), //0x400000 4194304 + stack_overflow = (1U << 23), //0x800000 8388608 + __LAST__ = (1U << 24), // used only for sanity check }; - void error(const AP_InternalError::error_t error); + // if you've changed __LAST__ to be 32, then you will want to + // rejig the way we do sanity checks as we don't want to move to a + // 64-bit type for error_t: + static_assert(sizeof(error_t) == 4, "error_t should be 32-bit type"); + + uint16_t last_error_line() const { return last_line; } + + void error(const AP_InternalError::error_t error, uint16_t line=0); + + // fill buffer with a description of the exceptions present in + // internal errors. buffer will always be null-terminated. + void errors_as_string(uint8_t *buffer, uint16_t len) const; + uint32_t count() const { return total_error_count; } // internal_errors - return mask of internal errors seen @@ -70,8 +90,16 @@ private: uint32_t internal_errors; uint32_t total_error_count; + uint16_t last_line; }; namespace AP { AP_InternalError &internalerror(); }; + +extern "C" { + void AP_stack_overflow(const char *thread_name); +} + +#define INTERNAL_ERROR(error_number) \ + AP::internalerror().error(error_number, __LINE__);