bpo-31773: _PyTime_GetPerfCounter() uses _PyTime_t (GH-3983)
* Rewrite win_perf_counter() to only use integers internally. * Add _PyTime_MulDiv() which compute "ticks * mul / div" in two parts (int part and remaining) to prevent integer overflow. * Clock frequency is checked at initialization for integer overflow. * Enhance also pymonotonic() to reduce the precision loss on macOS (mach_absolute_time() clock).
This commit is contained in:
parent
0df19055c9
commit
bdaeb7d237
|
@ -197,7 +197,7 @@ PyAPI_FUNC(int) _PyTime_gmtime(time_t t, struct tm *tm);
|
||||||
|
|
||||||
The function cannot fail. _PyTime_Init() ensures that the system clock
|
The function cannot fail. _PyTime_Init() ensures that the system clock
|
||||||
works. */
|
works. */
|
||||||
PyAPI_FUNC(double) _PyTime_GetPerfCounterDouble(void);
|
PyAPI_FUNC(_PyTime_t) _PyTime_GetPerfCounter(void);
|
||||||
|
|
||||||
/* Get the performance counter: clock with the highest available resolution to
|
/* Get the performance counter: clock with the highest available resolution to
|
||||||
measure a short duration.
|
measure a short duration.
|
||||||
|
@ -205,8 +205,8 @@ PyAPI_FUNC(double) _PyTime_GetPerfCounterDouble(void);
|
||||||
Fill info (if set) with information of the function used to get the time.
|
Fill info (if set) with information of the function used to get the time.
|
||||||
|
|
||||||
Return 0 on success, raise an exception and return -1 on error. */
|
Return 0 on success, raise an exception and return -1 on error. */
|
||||||
PyAPI_FUNC(int) _PyTime_GetPerfCounterDoubleWithInfo(
|
PyAPI_FUNC(int) _PyTime_GetPerfCounterWithInfo(
|
||||||
double *t,
|
_PyTime_t *t,
|
||||||
_Py_clock_info_t *info);
|
_Py_clock_info_t *info);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
|
@ -91,11 +91,12 @@ floatclock(_Py_clock_info_t *info)
|
||||||
static PyObject*
|
static PyObject*
|
||||||
perf_counter(_Py_clock_info_t *info)
|
perf_counter(_Py_clock_info_t *info)
|
||||||
{
|
{
|
||||||
double t;
|
_PyTime_t t;
|
||||||
if (_PyTime_GetPerfCounterDoubleWithInfo(&t, info) < 0) {
|
if (_PyTime_GetPerfCounterWithInfo(&t, info) < 0) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
return PyFloat_FromDouble(t);
|
double d = _PyTime_AsSecondsDouble(t);
|
||||||
|
return PyFloat_FromDouble(d);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(MS_WINDOWS) || defined(HAVE_CLOCK)
|
#if defined(MS_WINDOWS) || defined(HAVE_CLOCK)
|
||||||
|
|
|
@ -1669,10 +1669,10 @@ PyImport_ImportModuleLevelObject(PyObject *name, PyObject *globals,
|
||||||
else {
|
else {
|
||||||
static int ximporttime = 0;
|
static int ximporttime = 0;
|
||||||
static int import_level;
|
static int import_level;
|
||||||
static double accumulated;
|
static _PyTime_t accumulated;
|
||||||
_Py_IDENTIFIER(importtime);
|
_Py_IDENTIFIER(importtime);
|
||||||
|
|
||||||
double t1 = 0, accumulated_copy = accumulated;
|
_PyTime_t t1 = 0, accumulated_copy = accumulated;
|
||||||
|
|
||||||
Py_XDECREF(mod);
|
Py_XDECREF(mod);
|
||||||
|
|
||||||
|
@ -1695,7 +1695,7 @@ PyImport_ImportModuleLevelObject(PyObject *name, PyObject *globals,
|
||||||
|
|
||||||
if (ximporttime) {
|
if (ximporttime) {
|
||||||
import_level++;
|
import_level++;
|
||||||
t1 = _PyTime_GetPerfCounterDouble();
|
t1 = _PyTime_GetPerfCounter();
|
||||||
accumulated = 0;
|
accumulated = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1711,12 +1711,12 @@ PyImport_ImportModuleLevelObject(PyObject *name, PyObject *globals,
|
||||||
mod != NULL);
|
mod != NULL);
|
||||||
|
|
||||||
if (ximporttime) {
|
if (ximporttime) {
|
||||||
double cum = _PyTime_GetPerfCounterDouble() - t1;
|
_PyTime_t cum = _PyTime_GetPerfCounter() - t1;
|
||||||
|
|
||||||
import_level--;
|
import_level--;
|
||||||
fprintf(stderr, "import time: %9ld | %10ld | %*s%s\n",
|
fprintf(stderr, "import time: %9ld | %10ld | %*s%s\n",
|
||||||
(long)ceil((cum - accumulated) * 1e6),
|
(long)_PyTime_AsMicroseconds(cum - accumulated, _PyTime_ROUND_CEILING),
|
||||||
(long)ceil(cum * 1e6),
|
(long)_PyTime_AsMicroseconds(cum, _PyTime_ROUND_CEILING),
|
||||||
import_level*2, "", PyUnicode_AsUTF8(abs_name));
|
import_level*2, "", PyUnicode_AsUTF8(abs_name));
|
||||||
|
|
||||||
accumulated = accumulated_copy + cum;
|
accumulated = accumulated_copy + cum;
|
||||||
|
|
162
Python/pytime.c
162
Python/pytime.c
|
@ -42,6 +42,27 @@ _PyTime_overflow(void)
|
||||||
"timestamp too large to convert to C _PyTime_t");
|
"timestamp too large to convert to C _PyTime_t");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(MS_WINDOWS) || defined(__APPLE__)
|
||||||
|
Py_LOCAL_INLINE(_PyTime_t)
|
||||||
|
_PyTime_MulDiv(_PyTime_t ticks, _PyTime_t mul, _PyTime_t div)
|
||||||
|
{
|
||||||
|
_PyTime_t intpart, remaining;
|
||||||
|
/* Compute (ticks * mul / div) in two parts to prevent integer overflow:
|
||||||
|
compute integer part, and then the remaining part.
|
||||||
|
|
||||||
|
(ticks * mul) / div == (ticks / div) * mul + (ticks % div) * mul / div
|
||||||
|
|
||||||
|
The caller must ensure that "(div - 1) * mul" cannot overflow. */
|
||||||
|
intpart = ticks / div;
|
||||||
|
ticks %= div;
|
||||||
|
remaining = ticks * mul;
|
||||||
|
remaining /= div;
|
||||||
|
return intpart * mul + remaining;
|
||||||
|
}
|
||||||
|
#endif /* defined(MS_WINDOWS) || defined(__APPLE__) */
|
||||||
|
|
||||||
|
|
||||||
time_t
|
time_t
|
||||||
_PyLong_AsTime_t(PyObject *obj)
|
_PyLong_AsTime_t(PyObject *obj)
|
||||||
{
|
{
|
||||||
|
@ -700,29 +721,62 @@ pymonotonic(_PyTime_t *tp, _Py_clock_info_t *info, int raise)
|
||||||
|
|
||||||
#elif defined(__APPLE__)
|
#elif defined(__APPLE__)
|
||||||
static mach_timebase_info_data_t timebase;
|
static mach_timebase_info_data_t timebase;
|
||||||
uint64_t time;
|
static uint64_t t0 = 0;
|
||||||
|
uint64_t ticks;
|
||||||
|
|
||||||
if (timebase.denom == 0) {
|
if (timebase.denom == 0) {
|
||||||
/* According to the Technical Q&A QA1398, mach_timebase_info() cannot
|
/* According to the Technical Q&A QA1398, mach_timebase_info() cannot
|
||||||
fail: https://developer.apple.com/library/mac/#qa/qa1398/ */
|
fail: https://developer.apple.com/library/mac/#qa/qa1398/ */
|
||||||
(void)mach_timebase_info(&timebase);
|
(void)mach_timebase_info(&timebase);
|
||||||
|
|
||||||
|
/* Sanity check: should never occur in practice */
|
||||||
|
if (timebase.numer < 1 || timebase.denom < 1) {
|
||||||
|
PyErr_SetString(PyExc_RuntimeError,
|
||||||
|
"invalid mach_timebase_info");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check that timebase.numer and timebase.denom can be casted to
|
||||||
|
_PyTime_t. In pratice, timebase uses uint32_t, so casting cannot
|
||||||
|
overflow. At the end, only make sure that the type is uint32_t
|
||||||
|
(_PyTime_t is 64-bit long). */
|
||||||
|
assert(sizeof(timebase.numer) < sizeof(_PyTime_t));
|
||||||
|
assert(sizeof(timebase.denom) < sizeof(_PyTime_t));
|
||||||
|
|
||||||
|
/* Make sure that (ticks * timebase.numer) cannot overflow in
|
||||||
|
_PyTime_MulDiv(), with ticks < timebase.denom.
|
||||||
|
|
||||||
|
Known time bases:
|
||||||
|
|
||||||
|
* always (1, 1) on Intel
|
||||||
|
* (1000000000, 33333335) or (1000000000, 25000000) on PowerPC
|
||||||
|
|
||||||
|
None of these time bases can overflow with 64-bit _PyTime_t, but
|
||||||
|
check for overflow, just in case. */
|
||||||
|
if ((_PyTime_t)timebase.numer > _PyTime_MAX / (_PyTime_t)timebase.denom) {
|
||||||
|
PyErr_SetString(PyExc_OverflowError,
|
||||||
|
"mach_timebase_info is too large");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
t0 = mach_absolute_time();
|
||||||
}
|
}
|
||||||
|
|
||||||
time = mach_absolute_time();
|
|
||||||
|
|
||||||
/* apply timebase factor */
|
|
||||||
time *= timebase.numer;
|
|
||||||
time /= timebase.denom;
|
|
||||||
|
|
||||||
*tp = time;
|
|
||||||
|
|
||||||
if (info) {
|
if (info) {
|
||||||
info->implementation = "mach_absolute_time()";
|
info->implementation = "mach_absolute_time()";
|
||||||
info->resolution = (double)timebase.numer / timebase.denom * 1e-9;
|
info->resolution = (double)timebase.numer / (double)timebase.denom * 1e-9;
|
||||||
info->monotonic = 1;
|
info->monotonic = 1;
|
||||||
info->adjustable = 0;
|
info->adjustable = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ticks = mach_absolute_time();
|
||||||
|
/* Use a "time zero" to reduce precision loss when converting time
|
||||||
|
to floatting point number, as in time.monotonic(). */
|
||||||
|
ticks -= t0;
|
||||||
|
*tp = _PyTime_MulDiv(ticks,
|
||||||
|
(_PyTime_t)timebase.numer,
|
||||||
|
(_PyTime_t)timebase.denom);
|
||||||
|
|
||||||
#elif defined(__hpux)
|
#elif defined(__hpux)
|
||||||
hrtime_t time;
|
hrtime_t time;
|
||||||
|
|
||||||
|
@ -802,60 +856,93 @@ _PyTime_GetMonotonicClockWithInfo(_PyTime_t *tp, _Py_clock_info_t *info)
|
||||||
|
|
||||||
#ifdef MS_WINDOWS
|
#ifdef MS_WINDOWS
|
||||||
static int
|
static int
|
||||||
win_perf_counter(double *tp, _Py_clock_info_t *info)
|
win_perf_counter(_PyTime_t *tp, _Py_clock_info_t *info)
|
||||||
{
|
{
|
||||||
static LONGLONG cpu_frequency = 0;
|
static LONGLONG frequency = 0;
|
||||||
static LONGLONG ctrStart;
|
static LONGLONG t0 = 0;
|
||||||
LARGE_INTEGER now;
|
LARGE_INTEGER now;
|
||||||
double diff;
|
LONGLONG ticksll;
|
||||||
|
_PyTime_t ticks;
|
||||||
|
|
||||||
if (cpu_frequency == 0) {
|
if (frequency == 0) {
|
||||||
LARGE_INTEGER freq;
|
LARGE_INTEGER freq;
|
||||||
QueryPerformanceCounter(&now);
|
if (!QueryPerformanceFrequency(&freq)) {
|
||||||
ctrStart = now.QuadPart;
|
|
||||||
if (!QueryPerformanceFrequency(&freq) || freq.QuadPart == 0) {
|
|
||||||
PyErr_SetFromWindowsErr(0);
|
PyErr_SetFromWindowsErr(0);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
cpu_frequency = freq.QuadPart;
|
frequency = freq.QuadPart;
|
||||||
|
|
||||||
|
/* Sanity check: should never occur in practice */
|
||||||
|
if (frequency < 1) {
|
||||||
|
PyErr_SetString(PyExc_RuntimeError,
|
||||||
|
"invalid QueryPerformanceFrequency");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check that frequency can be casted to _PyTime_t.
|
||||||
|
|
||||||
|
Make also sure that (ticks * SEC_TO_NS) cannot overflow in
|
||||||
|
_PyTime_MulDiv(), with ticks < frequency.
|
||||||
|
|
||||||
|
Known QueryPerformanceFrequency() values:
|
||||||
|
|
||||||
|
* 10,000,000 (10 MHz): 100 ns resolution
|
||||||
|
* 3,579,545 Hz (3.6 MHz): 279 ns resolution
|
||||||
|
|
||||||
|
None of these frequencies can overflow with 64-bit _PyTime_t, but
|
||||||
|
check for overflow, just in case. */
|
||||||
|
if (frequency > _PyTime_MAX
|
||||||
|
|| frequency > (LONGLONG)_PyTime_MAX / (LONGLONG)SEC_TO_NS) {
|
||||||
|
PyErr_SetString(PyExc_OverflowError,
|
||||||
|
"QueryPerformanceFrequency is too large");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
QueryPerformanceCounter(&now);
|
||||||
|
t0 = now.QuadPart;
|
||||||
}
|
}
|
||||||
QueryPerformanceCounter(&now);
|
|
||||||
diff = (double)(now.QuadPart - ctrStart);
|
|
||||||
if (info) {
|
if (info) {
|
||||||
info->implementation = "QueryPerformanceCounter()";
|
info->implementation = "QueryPerformanceCounter()";
|
||||||
info->resolution = 1.0 / (double)cpu_frequency;
|
info->resolution = 1.0 / (double)frequency;
|
||||||
info->monotonic = 1;
|
info->monotonic = 1;
|
||||||
info->adjustable = 0;
|
info->adjustable = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
diff = diff / (double)cpu_frequency;
|
QueryPerformanceCounter(&now);
|
||||||
*tp = diff;
|
ticksll = now.QuadPart;
|
||||||
|
|
||||||
|
/* Use a "time zero" to reduce precision loss when converting time
|
||||||
|
to floatting point number, as in time.perf_counter(). */
|
||||||
|
ticksll -= t0;
|
||||||
|
|
||||||
|
/* Make sure that casting LONGLONG to _PyTime_t cannot overflow,
|
||||||
|
both types are signed */
|
||||||
|
Py_BUILD_ASSERT(sizeof(ticksll) <= sizeof(ticks));
|
||||||
|
ticks = (_PyTime_t)ticksll;
|
||||||
|
|
||||||
|
*tp = _PyTime_MulDiv(ticks, SEC_TO_NS, (_PyTime_t)frequency);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
_PyTime_GetPerfCounterDoubleWithInfo(double *d, _Py_clock_info_t *info)
|
_PyTime_GetPerfCounterWithInfo(_PyTime_t *t, _Py_clock_info_t *info)
|
||||||
{
|
{
|
||||||
#ifdef MS_WINDOWS
|
#ifdef MS_WINDOWS
|
||||||
return win_perf_counter(d, info);
|
return win_perf_counter(t, info);
|
||||||
#else
|
#else
|
||||||
_PyTime_t t;
|
return _PyTime_GetMonotonicClockWithInfo(t, info);
|
||||||
if (_PyTime_GetMonotonicClockWithInfo(&t, info) < 0) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
*d = _PyTime_AsSecondsDouble(t);
|
|
||||||
return 0;
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
double
|
_PyTime_t
|
||||||
_PyTime_GetPerfCounterDouble(void)
|
_PyTime_GetPerfCounter(void)
|
||||||
{
|
{
|
||||||
double t;
|
_PyTime_t t;
|
||||||
if (_PyTime_GetPerfCounterDoubleWithInfo(&t, NULL)) {
|
if (_PyTime_GetPerfCounterWithInfo(&t, NULL)) {
|
||||||
Py_UNREACHABLE();
|
Py_UNREACHABLE();
|
||||||
}
|
}
|
||||||
return t;
|
return t;
|
||||||
|
@ -869,14 +956,13 @@ _PyTime_Init(void)
|
||||||
are working properly to not have to check for exceptions at runtime. If
|
are working properly to not have to check for exceptions at runtime. If
|
||||||
a clock works once, it cannot fail in next calls. */
|
a clock works once, it cannot fail in next calls. */
|
||||||
_PyTime_t t;
|
_PyTime_t t;
|
||||||
double d;
|
|
||||||
if (_PyTime_GetSystemClockWithInfo(&t, NULL) < 0) {
|
if (_PyTime_GetSystemClockWithInfo(&t, NULL) < 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (_PyTime_GetMonotonicClockWithInfo(&t, NULL) < 0) {
|
if (_PyTime_GetMonotonicClockWithInfo(&t, NULL) < 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (_PyTime_GetPerfCounterDoubleWithInfo(&d, NULL) < 0) {
|
if (_PyTime_GetPerfCounterWithInfo(&t, NULL) < 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
|
Loading…
Reference in New Issue