HAL_ChibiOS: speed up millis(), micros64() etc

these use faster primitives and an assembly division by 1000 to get
between 2x and 3x speedup on these critical calls
This commit is contained in:
Andrew Tridgell 2023-12-18 12:15:35 +11:00
parent e33f0269d3
commit 6dbc3b6a70
3 changed files with 101 additions and 24 deletions

View File

@ -37,7 +37,7 @@
boot in microseconds, and which wraps at 0xFFFFFFFF.
On top of this base function we build get_systime_us32() which has
the same property, but which also maintains timer_base_us64 to allow
the same property, but which allows for a startup offset
for micros64()
*/
@ -68,48 +68,84 @@ static uint32_t system_time_u32_us(void)
#error "unsupported timer resolution"
#endif
// offset for micros64()
static uint64_t timer_base_us64;
static uint32_t get_systime_us32(void)
{
static uint32_t last_us32;
uint32_t now = system_time_u32_us();
#ifdef AP_BOARD_START_TIME
now += AP_BOARD_START_TIME;
#endif
if (now < last_us32) {
const uint64_t dt_us = 0x100000000ULL;
timer_base_us64 += dt_us;
}
last_us32 = now;
return now;
}
/*
for the exposed functions we use chSysGetStatusAndLockX() to prevent
an interrupt changing the globals while allowing this call from any
context
for the exposed functions we use chVTGetTimeStampI which handles
wrap and directly gives a uint64_t (aka systimestamp_t)
*/
uint64_t hrt_micros64I()
{
#ifdef AP_BOARD_START_TIME
return chVTGetTimeStampI() + AP_BOARD_START_TIME;
#endif
return chVTGetTimeStampI();
}
static inline bool is_locked(void) {
return !port_irq_enabled(port_get_irq_status());
}
uint64_t hrt_micros64()
{
syssts_t sts = chSysGetStatusAndLockX();
uint32_t now = get_systime_us32();
uint64_t ret = timer_base_us64 + now;
chSysRestoreStatusX(sts);
if (is_locked()) {
return chVTGetTimeStampI();
} else if (port_is_isr_context()) {
uint64_t ret;
chSysLockFromISR();
ret = chVTGetTimeStampI();
chSysUnlockFromISR();
return ret;
} else {
uint64_t ret;
chSysLock();
ret = chVTGetTimeStampI();
chSysUnlock();
return ret;
}
}
uint32_t hrt_micros32()
{
syssts_t sts = chSysGetStatusAndLockX();
uint32_t ret = get_systime_us32();
chSysRestoreStatusX(sts);
#if CH_CFG_ST_RESOLUTION == 16
// boards with 16 bit timers need to call get_systime_us32() in a
// lock state because on those boards we have local static
// variables that need protection
if (is_locked()) {
return get_systime_us32();
} else if (port_is_isr_context()) {
uint32_t ret;
chSysLockFromISR();
ret = get_systime_us32();
chSysUnlockFromISR();
return ret;
} else {
uint32_t ret;
chSysLock();
ret = get_systime_us32();
chSysUnlock();
return ret;
}
#else
return get_systime_us32();
#endif
}
uint32_t hrt_millis64()
{
return _hrt_div1000(hrt_micros64());
}
uint32_t hrt_millis32()
{
return (uint32_t)(hrt_micros64() / 1000U);
return (uint32_t)(hrt_millis64());
}

View File

@ -4,10 +4,48 @@
#ifdef __cplusplus
extern "C" {
#endif
void hrt_init(void);
uint64_t hrt_micros64(void);
uint64_t hrt_micros64I(void); // from locked context
uint64_t hrt_micros64_from_ISR(void); // from an ISR
uint32_t hrt_micros32(void);
uint32_t hrt_millis32(void);
uint32_t hrt_millis32I(void); // from locked context
uint32_t hrt_millis32_from_ISR(void); // from an ISR
uint32_t hrt_millis64(void);
/*
thanks to:
https://0x414b.com/2021/04/16/arm-division.html
*/
static inline uint64_t _hrt_umul64x64hi(uint32_t b, uint32_t a, uint32_t d, uint32_t c)
{
__asm__ ("\n\
umull r4, r5, %[b], %[d] \n\
umull %[d], r4, %[a], %[d] \n\
adds r5, %[d] \n\
umull %[d], %[a], %[a], %[c] \n\
adcs r4, %[d] \n\
adc %[a], #0 \n\
umull %[c], %[b], %[b], %[c] \n\
adds r5, %[c] \n\
adcs %[b], r4 \n\
adc %[a], #0 \n\
" : [a] "+r" (a), [b] "+r" (b), [c] "+r" (c), [d] "+r" (d) : : "r4", "r5");
return (uint64_t) a << 32 | b;
}
/*
return x / 1000
faster than the gcc implementation using _hrt_umul64x64hi() by about 3x
*/
static inline uint64_t _hrt_div1000(uint64_t x)
{
x >>= 3U;
return _hrt_umul64x64hi((uint32_t)x, x >> 32U, 0xe353f7cfU, 0x20c49ba5U) >> 4U;
}
#ifdef __cplusplus
}
#endif

View File

@ -30,6 +30,9 @@
#include "hal.h"
#include <hrt.h>
// we rely on systimestamp_t for 64 bit timestamps
static_assert(sizeof(uint64_t) == sizeof(systimestamp_t), "unexpected systimestamp_t size");
#if CH_CFG_ST_RESOLUTION == 16
static_assert(sizeof(systime_t) == 2, "expected 16 bit systime_t");
#elif CH_CFG_ST_RESOLUTION == 32
@ -385,7 +388,7 @@ __FASTRAMFUNC__ uint64_t micros64()
__FASTRAMFUNC__ uint64_t millis64()
{
return hrt_micros64() / 1000U;
return hrt_millis64();
}