mirror of https://github.com/ArduPilot/ardupilot
27 lines
882 B
C
27 lines
882 B
C
|
/*
|
||
|
return 64 bit x / 1000
|
||
|
faster than the normal gcc implementation using by about 3x
|
||
|
With thanks to https://0x414b.com/2021/04/16/arm-division.html
|
||
|
and https://stackoverflow.com/questions/74765410/multiply-two-uint64-ts-and-store-result-to-uint64-t-doesnt-seem-to-work
|
||
|
*/
|
||
|
static inline uint64_t uint64_div1000(uint64_t x)
|
||
|
{
|
||
|
x >>= 3U;
|
||
|
uint64_t a_lo = (uint32_t)x;
|
||
|
uint64_t a_hi = x >> 32;
|
||
|
const uint64_t b_lo = 0xe353f7cfU;
|
||
|
const uint64_t b_hi = 0x20c49ba5U;
|
||
|
|
||
|
uint64_t a_x_b_hi = a_hi * b_hi;
|
||
|
uint64_t a_x_b_mid = a_hi * b_lo;
|
||
|
uint64_t b_x_a_mid = b_hi * a_lo;
|
||
|
uint32_t a_x_b_lo = (a_lo * b_lo)>>32;
|
||
|
|
||
|
// 64-bit product + two 32-bit values
|
||
|
uint64_t middle = a_x_b_mid + a_x_b_lo + (uint32_t)b_x_a_mid;
|
||
|
|
||
|
// 64-bit product + two 32-bit values
|
||
|
uint64_t r = a_x_b_hi + (middle >> 32) + (b_x_a_mid >> 32);
|
||
|
return r >> 4U;
|
||
|
}
|