AP_Scheduler: cope with high CPU load by stretching loop times

this adds an extra budget of time per loop when we are not achieving
scheduled tasks at a rate of at least 1/8 of the desired rate. This
fixes an issue where a vehicle can become uncontrollable if the user
asks for a SCHED_LOOP_RATE which is not achievable. As these events
happen we add extra loop budget until we are able to run all tasks. We
drop the extra time when the CPU pressure eases.
This commit is contained in:
Andrew Tridgell 2019-09-17 18:13:28 +10:00
parent 3d9f393f4b
commit 79acaeaef1
3 changed files with 76 additions and 9 deletions

View File

@ -26,7 +26,9 @@
#include <AP_Logger/AP_Logger.h>
#include <AP_InertialSensor/AP_InertialSensor.h>
#include <AP_InternalError/AP_InternalError.h>
#if CONFIG_HAL_BOARD == HAL_BOARD_SITL
#include <SITL/SITL.h>
#endif
#include <stdio.h>
#if APM_BUILD_TYPE(APM_BUILD_ArduCopter) || APM_BUILD_TYPE(APM_BUILD_ArduSub)
@ -131,8 +133,8 @@ void AP_Scheduler::run(uint32_t time_available)
}
for (uint8_t i=0; i<_num_tasks; i++) {
uint16_t dt = _tick_counter - _last_run[i];
uint16_t interval_ticks = _loop_rate_hz / _tasks[i].rate_hz;
uint32_t dt = _tick_counter - _last_run[i];
uint32_t interval_ticks = _loop_rate_hz / _tasks[i].rate_hz;
if (interval_ticks < 1) {
interval_ticks = 1;
}
@ -153,6 +155,12 @@ void AP_Scheduler::run(uint32_t time_available)
(unsigned)_task_time_allowed);
}
if (dt >= interval_ticks*max_task_slowdown) {
// we are going beyond the maximum slowdown factor for a
// task. This will trigger increasing the time budget
task_not_achieved++;
}
if (_task_time_allowed > time_available) {
// not enough time to run this task. Continue loop -
// maybe another task will fit into time remaining
@ -253,6 +261,17 @@ void AP_Scheduler::loop()
hal.util->persistent_data.scheduler_task = -1;
}
#if CONFIG_HAL_BOARD == HAL_BOARD_SITL
{
/*
for testing low CPU conditions we can add an optional delay in SITL
*/
auto *sitl = AP::sitl();
uint32_t loop_delay_us = sitl->loop_delay.get();
hal.scheduler->delay_microseconds(loop_delay_us);
}
#endif
// tell the scheduler one tick has passed
tick();
@ -262,14 +281,41 @@ void AP_Scheduler::loop()
// the first call to the scheduler they won't run on a later
// call until scheduler.tick() is called again
const uint32_t loop_us = get_loop_period_us();
const uint32_t time_available = (sample_time_us + loop_us) - AP_HAL::micros();
run(time_available > loop_us ? 0u : time_available);
uint32_t now = AP_HAL::micros();
uint32_t time_available = 0;
if (now - sample_time_us < loop_us) {
// get remaining time available for this loop
time_available = loop_us - (now - sample_time_us);
}
// add in extra loop time determined by not achieving scheduler tasks
time_available += extra_loop_us;
// run the tasks
run(time_available);
#if CONFIG_HAL_BOARD == HAL_BOARD_SITL
// move result of AP_HAL::micros() forward:
hal.scheduler->delay_microseconds(1);
#endif
if (task_not_achieved > 0) {
// add some extra time to the budget
extra_loop_us = MIN(extra_loop_us+100U, 5000U);
task_not_achieved = 0;
task_all_achieved = 0;
} else if (extra_loop_us > 0) {
task_all_achieved++;
if (task_all_achieved > 50) {
// we have gone through 50 loops without a task taking too
// long. CPU pressure has eased, so drop the extra time we're
// giving each loop
task_all_achieved = 0;
// we are achieving all tasks, slowly lower the extra loop time
extra_loop_us = MAX(0U, extra_loop_us-50U);
}
}
// check loop time
perf_info.check_loop_time(sample_time_us - _loop_timer_start_us);
@ -306,6 +352,7 @@ void AP_Scheduler::Log_Write_Performance()
spi_count : pd.spi_count,
i2c_count : pd.i2c_count,
i2c_isr_count : pd.i2c_isr_count,
extra_loop_us : extra_loop_us,
};
AP::logger().WriteCriticalBlock(&pkt, sizeof(pkt));
}

View File

@ -139,7 +139,12 @@ public:
float get_last_loop_time_s(void) const {
return _last_loop_time_s;
}
// get the amount of extra time being added on each loop
uint32_t get_extra_loop_us(void) const {
return extra_loop_us;
}
static const struct AP_Param::GroupInfo var_info[];
// loop performance monitoring:
@ -200,6 +205,19 @@ private:
// bitmask bit which indicates if we should log PERF message
uint32_t _log_performance_bit;
// maximum task slowdown compared to desired task rate before we
// start giving extra time per loop
const uint8_t max_task_slowdown = 4;
// counters to handle dynamically adjusting extra loop time to
// cope with low CPU conditions
uint32_t task_not_achieved;
uint32_t task_all_achieved;
// extra time available for each loop - used to dynamically adjust
// the loop rate in case we are well over budget
uint32_t extra_loop_us;
};
namespace AP {

View File

@ -2,6 +2,7 @@
#include <AP_Logger/AP_Logger.h>
#include <GCS_MAVLink/GCS.h>
#include "AP_Scheduler.h"
extern const AP_HAL::HAL& hal;
@ -114,13 +115,14 @@ float AP::PerfInfo::get_filtered_time() const
void AP::PerfInfo::update_logging()
{
gcs().send_text(MAV_SEVERITY_WARNING,
"PERF: %u/%u max=%lu min=%lu F=%u sd=%lu",
"PERF: %u/%u [%lu:%lu] F=%uHz sd=%lu Ex=%lu",
(unsigned)get_num_long_running(),
(unsigned)get_num_loops(),
(unsigned long)get_max_time(),
(unsigned long)get_min_time(),
(unsigned)(get_filtered_time()*1.0e6),
(unsigned long)get_stddev_time());
(unsigned)(0.5+(1.0f/get_filtered_time())),
(unsigned long)get_stddev_time(),
(unsigned long)AP::scheduler().get_extra_loop_us());
}
void AP::PerfInfo::set_loop_rate(uint16_t rate_hz)