From f119cca3b317e011a522fa6ced72276330e741b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beat=20K=C3=BCng?= Date: Wed, 29 Mar 2023 08:19:43 +0200 Subject: [PATCH] logger: restart on file write error This can also happen if the maximum file size is reached. --- src/modules/logger/log_writer.cpp | 15 +++++- src/modules/logger/log_writer.h | 4 +- src/modules/logger/log_writer_file.cpp | 9 +++- src/modules/logger/log_writer_file.h | 5 +- src/modules/logger/logger.cpp | 75 ++++++++++++++++---------- src/modules/logger/logger.h | 4 +- 6 files changed, 76 insertions(+), 36 deletions(-) diff --git a/src/modules/logger/log_writer.cpp b/src/modules/logger/log_writer.cpp index 8511f9e92a..93a5eaa847 100644 --- a/src/modules/logger/log_writer.cpp +++ b/src/modules/logger/log_writer.cpp @@ -123,11 +123,13 @@ bool LogWriter::is_started(LogType type, Backend query_backend) const return false; } -void LogWriter::start_log_file(LogType type, const char *filename) +bool LogWriter::start_log_file(LogType type, const char *filename) { if (_log_writer_file) { - _log_writer_file->start_log(type, filename); + return _log_writer_file->start_log(type, filename); } + + return false; } void LogWriter::stop_log_file(LogType type) @@ -195,5 +197,14 @@ void LogWriter::select_write_backend(Backend sel_backend) } } +bool LogWriter::had_file_write_error() const +{ + if (_log_writer_file) { + return _log_writer_file->had_write_error(); + } + + return false; +} + } } diff --git a/src/modules/logger/log_writer.h b/src/modules/logger/log_writer.h index 9635d22dad..32fdea46df 100644 --- a/src/modules/logger/log_writer.h +++ b/src/modules/logger/log_writer.h @@ -65,10 +65,12 @@ public: /** stop all running threads and wait for them to exit */ void thread_stop(); - void start_log_file(LogType type, const char *filename); + bool start_log_file(LogType type, const char *filename); void stop_log_file(LogType type); + bool had_file_write_error() const; + void start_log_mavlink(); void stop_log_mavlink(); diff --git a/src/modules/logger/log_writer_file.cpp b/src/modules/logger/log_writer_file.cpp index 873da36d3e..50d5cf7f45 100644 --- a/src/modules/logger/log_writer_file.cpp +++ b/src/modules/logger/log_writer_file.cpp @@ -211,7 +211,7 @@ bool LogWriterFile::init_logfile_encryption(const char *filename) #endif // PX4_CRYPTO -void LogWriterFile::start_log(LogType type, const char *filename) +bool LogWriterFile::start_log(LogType type, const char *filename) { // At this point we don't expect the file to be open, but it can happen for very fast consecutive stop & start // calls. In that case we wait for the thread to close the file first. @@ -243,7 +243,7 @@ void LogWriterFile::start_log(LogType type, const char *filename) if (!enc_init) { PX4_ERR("Failed to start encrypted logging"); _crypto.close(); - return; + return false; } #endif @@ -251,7 +251,10 @@ void LogWriterFile::start_log(LogType type, const char *filename) if (_buffers[(int)type].start_log(filename)) { PX4_INFO("Opened %s log file: %s", log_type_str(type), filename); notify(); + return true; } + + return false; } int LogWriterFile::hardfault_store_filename(const char *log_file) @@ -452,6 +455,7 @@ void LogWriterFile::run() } else { PX4_ERR("write failed (%i)", errno); + buffer._had_write_error.store(true); buffer._should_run = false; pthread_mutex_unlock(&_mtx); buffer.close_file(); @@ -648,6 +652,7 @@ size_t LogWriterFile::LogFileBuffer::get_read_ptr(void **ptr, bool *is_part) bool LogWriterFile::LogFileBuffer::start_log(const char *filename) { _fd = ::open(filename, O_CREAT | O_WRONLY, PX4_O_MODE_666); + _had_write_error.store(false); if (_fd < 0) { PX4_ERR("Can't open log file %s, errno: %d", filename, errno); diff --git a/src/modules/logger/log_writer_file.h b/src/modules/logger/log_writer_file.h index 218f127258..757f69de25 100644 --- a/src/modules/logger/log_writer_file.h +++ b/src/modules/logger/log_writer_file.h @@ -79,7 +79,7 @@ public: void thread_stop(); - void start_log(LogType type, const char *filename); + bool start_log(LogType type, const char *filename); void stop_log(LogType type); @@ -132,6 +132,8 @@ public: return _need_reliable_transfer; } + bool had_write_error() const { return _buffers[(int)LogType::Full]._had_write_error.load(); } + pthread_t thread_id() const { return _thread; } #if defined(PX4_CRYPTO) @@ -199,6 +201,7 @@ private: size_t count() const { return _count; } bool _should_run = false; + px4::atomic_bool _had_write_error{false}; private: const size_t _buffer_size; int _fd = -1; diff --git a/src/modules/logger/logger.cpp b/src/modules/logger/logger.cpp index 6d0ee623c5..1068d511bd 100644 --- a/src/modules/logger/logger.cpp +++ b/src/modules/logger/logger.cpp @@ -887,6 +887,8 @@ void Logger::run() was_started = false; } + handle_file_write_error(); + update_params(); // wait for next loop iteration... @@ -1117,8 +1119,8 @@ bool Logger::start_stop_logging() if (_vehicle_status_sub.update(&vehicle_status)) { - desired_state = (vehicle_status.arming_state == vehicle_status_s::ARMING_STATE_ARMED) || (_prev_state - && _log_mode == LogMode::arm_until_shutdown); + desired_state = (vehicle_status.arming_state == vehicle_status_s::ARMING_STATE_ARMED) || + (_prev_file_log_start_state && _log_mode == LogMode::arm_until_shutdown); updated = true; } } @@ -1126,8 +1128,8 @@ bool Logger::start_stop_logging() desired_state = desired_state || _manually_logging_override; // only start/stop if this is a state transition - if (updated && _prev_state != desired_state) { - _prev_state = desired_state; + if (updated && _prev_file_log_start_state != desired_state) { + _prev_file_log_start_state = desired_state; if (desired_state) { if (_should_stop_file_log) { // happens on quick stop/start toggling @@ -1394,6 +1396,7 @@ void Logger::start_log_file(LogType type) } PX4_INFO("Start file log (type: %s)", log_type_str(type)); + _statistics[(int) type].start_time_file = 0; char file_name[LOG_DIR_LEN] = ""; @@ -1409,35 +1412,36 @@ void Logger::start_log_file(LogType type) _param_sdlog_crypto_exchange_key.get()); #endif - _writer.start_log_file(type, file_name); - _writer.select_write_backend(LogWriter::BackendFile); - _writer.set_need_reliable_transfer(true); + if (_writer.start_log_file(type, file_name)) { + _writer.select_write_backend(LogWriter::BackendFile); + _writer.set_need_reliable_transfer(true); - write_header(type); - write_version(type); - write_formats(type); + write_header(type); + write_version(type); + write_formats(type); - if (type == LogType::Full) { - write_parameters(type); - write_parameter_defaults(type); - write_perf_data(PrintLoadReason::Preflight); - write_console_output(); - write_events_file(LogType::Full); - write_excluded_optional_topics(type); + if (type == LogType::Full) { + write_parameters(type); + write_parameter_defaults(type); + write_perf_data(PrintLoadReason::Preflight); + write_console_output(); + write_events_file(LogType::Full); + write_excluded_optional_topics(type); + } + + write_all_add_logged_msg(type); + _writer.set_need_reliable_transfer(false); + _writer.unselect_write_backend(); + _writer.notify(); + + if (type == LogType::Full) { + /* reset performance counters to get in-flight min and max values in post flight log */ + perf_reset_all(); + } + + _statistics[(int) type].start_time_file = hrt_absolute_time(); } - write_all_add_logged_msg(type); - _writer.set_need_reliable_transfer(false); - _writer.unselect_write_backend(); - _writer.notify(); - - if (type == LogType::Full) { - /* reset performance counters to get in-flight min and max values in post flight log */ - perf_reset_all(); - } - - _statistics[(int)type].start_time_file = hrt_absolute_time(); - } void Logger::stop_log_file(LogType type) @@ -1513,6 +1517,19 @@ struct perf_callback_data_t { char *buffer; }; +void Logger::handle_file_write_error() +{ + // Check for write errors, but do not immediately retry + if (_writer.had_file_write_error() && !_writer.is_started(LogType::Full, LogWriter::BackendFile) + && _prev_file_log_start_state) { + if (_statistics[(int)LogType::Full].start_time_file != 0 + && hrt_absolute_time() > _statistics[(int)LogType::Full].start_time_file + 10_s) { + PX4_DEBUG("Restarting due to write failure"); + start_log_file(LogType::Full); + } + } +} + void Logger::perf_iterate_callback(perf_counter_t handle, void *user) { perf_callback_data_t *callback_data = (perf_callback_data_t *)user; diff --git a/src/modules/logger/logger.h b/src/modules/logger/logger.h index 65a9f1d179..57b324d0c0 100644 --- a/src/modules/logger/logger.h +++ b/src/modules/logger/logger.h @@ -315,6 +315,8 @@ private: void handle_vehicle_command_update(); void ack_vehicle_command(vehicle_command_s *cmd, uint32_t result); + void handle_file_write_error(); + /** * initialize the output for the process load, so that ~1 second later it will be written to the log */ @@ -346,7 +348,7 @@ private: LogFileName _file_name[(int)LogType::Count]; - bool _prev_state{false}; ///< previous state depending on logging mode (arming or aux1 state) + bool _prev_file_log_start_state{false}; ///< previous state depending on logging mode (arming or aux1 state) bool _manually_logging_override{false}; Statistics _statistics[(int)LogType::Count];