gh-111962: Make dtoa thread-safe in `--disable-gil` builds. (#112049)

This updates `dtoa.c` to avoid using the Bigint free-list in --disable-gil builds and
to pre-computes the needed powers of 5 during interpreter initialization.

* gh-111962: Make dtoa thread-safe in `--disable-gil` builds.

This avoids using the Bigint free-list in `--disable-gil` builds
and pre-computes the needed powers of 5 during interpreter initialization.

* Fix size of cached powers of 5 array.

We need the powers of 5 up to 5**512 because we only jump straight to
underflow when the exponent is less than -512 (or larger than 308).

* Rename Py_NOGIL to Py_GIL_DISABLED

* Changes from review

* Fix assertion placement
This commit is contained in:
Sam Gross 2023-12-07 08:47:55 -05:00 committed by GitHub
parent 9f67042f28
commit 2d76be251d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 70 additions and 30 deletions

View File

@ -35,6 +35,9 @@ struct _dtoa_state {
/* The size of the Bigint freelist */
#define Bigint_Kmax 7
/* The size of the cached powers of 5 array */
#define Bigint_Pow5size 8
#ifndef PRIVATE_MEM
#define PRIVATE_MEM 2304
#endif
@ -42,9 +45,10 @@ struct _dtoa_state {
((PRIVATE_MEM+sizeof(double)-1)/sizeof(double))
struct _dtoa_state {
/* p5s is a linked list of powers of 5 of the form 5**(2**i), i >= 2 */
// p5s is an array of powers of 5 of the form:
// 5**(2**(i+2)) for 0 <= i < Bigint_Pow5size
struct Bigint *p5s[Bigint_Pow5size];
// XXX This should be freed during runtime fini.
struct Bigint *p5s;
struct Bigint *freelist[Bigint_Kmax+1];
double preallocated[Bigint_PREALLOC_SIZE];
double *preallocated_next;
@ -57,9 +61,6 @@ struct _dtoa_state {
#endif // !Py_USING_MEMORY_DEBUGGER
/* These functions are used by modules compiled as C extension like math:
they must be exported. */
extern double _Py_dg_strtod(const char *str, char **ptr);
extern char* _Py_dg_dtoa(double d, int mode, int ndigits,
int *decpt, int *sign, char **rve);
@ -67,6 +68,11 @@ extern void _Py_dg_freedtoa(char *s);
#endif // _PY_SHORT_FLOAT_REPR == 1
extern PyStatus _PyDtoa_Init(PyInterpreterState *interp);
extern void _PyDtoa_Fini(PyInterpreterState *interp);
#ifdef __cplusplus
}
#endif

View File

@ -309,7 +309,7 @@ BCinfo {
// struct Bigint is defined in pycore_dtoa.h.
typedef struct Bigint Bigint;
#ifndef Py_USING_MEMORY_DEBUGGER
#if !defined(Py_GIL_DISABLED) && !defined(Py_USING_MEMORY_DEBUGGER)
/* Memory management: memory is allocated from, and returned to, Kmax+1 pools
of memory, where pool k (0 <= k <= Kmax) is for Bigints b with b->maxwds ==
@ -428,7 +428,7 @@ Bfree(Bigint *v)
}
}
#endif /* Py_USING_MEMORY_DEBUGGER */
#endif /* !defined(Py_GIL_DISABLED) && !defined(Py_USING_MEMORY_DEBUGGER) */
#define Bcopy(x,y) memcpy((char *)&x->sign, (char *)&y->sign, \
y->wds*sizeof(Long) + 2*sizeof(int))
@ -673,10 +673,17 @@ mult(Bigint *a, Bigint *b)
static Bigint *
pow5mult(Bigint *b, int k)
{
Bigint *b1, *p5, *p51;
Bigint *b1, *p5, **p5s;
int i;
static const int p05[3] = { 5, 25, 125 };
// For double-to-string conversion, the maximum value of k is limited by
// DBL_MAX_10_EXP (308), the maximum decimal base-10 exponent for binary64.
// For string-to-double conversion, the extreme case is constrained by our
// hardcoded exponent limit before we underflow of -512, adjusted by
// STRTOD_DIGLIM-DBL_DIG-1, giving a maximum of k=535.
assert(0 <= k && k < 1024);
if ((i = k & 3)) {
b = multadd(b, p05[i-1], 0);
if (b == NULL)
@ -686,18 +693,11 @@ pow5mult(Bigint *b, int k)
if (!(k >>= 2))
return b;
PyInterpreterState *interp = _PyInterpreterState_GET();
p5 = interp->dtoa.p5s;
if (!p5) {
/* first time */
p5 = i2b(625);
if (p5 == NULL) {
Bfree(b);
return NULL;
}
interp->dtoa.p5s = p5;
p5->next = 0;
}
p5s = interp->dtoa.p5s;
for(;;) {
assert(p5s != interp->dtoa.p5s + Bigint_Pow5size);
p5 = *p5s;
p5s++;
if (k & 1) {
b1 = mult(b, p5);
Bfree(b);
@ -707,17 +707,6 @@ pow5mult(Bigint *b, int k)
}
if (!(k >>= 1))
break;
p51 = p5->next;
if (!p51) {
p51 = mult(p5,p5);
if (p51 == NULL) {
Bfree(b);
return NULL;
}
p51->next = 0;
p5->next = p51;
}
p5 = p51;
}
return b;
}
@ -2811,3 +2800,42 @@ _Py_dg_dtoa(double dd, int mode, int ndigits,
}
#endif // _PY_SHORT_FLOAT_REPR == 1
PyStatus
_PyDtoa_Init(PyInterpreterState *interp)
{
#if _PY_SHORT_FLOAT_REPR == 1 && !defined(Py_USING_MEMORY_DEBUGGER)
Bigint **p5s = interp->dtoa.p5s;
// 5**4 = 625
Bigint *p5 = i2b(625);
if (p5 == NULL) {
return PyStatus_NoMemory();
}
p5s[0] = p5;
// compute 5**8, 5**16, 5**32, ..., 5**512
for (Py_ssize_t i = 1; i < Bigint_Pow5size; i++) {
p5 = mult(p5, p5);
if (p5 == NULL) {
return PyStatus_NoMemory();
}
p5s[i] = p5;
}
#endif
return PyStatus_Ok();
}
void
_PyDtoa_Fini(PyInterpreterState *interp)
{
#if _PY_SHORT_FLOAT_REPR == 1 && !defined(Py_USING_MEMORY_DEBUGGER)
Bigint **p5s = interp->dtoa.p5s;
for (Py_ssize_t i = 0; i < Bigint_Pow5size; i++) {
Bigint *p5 = p5s[i];
p5s[i] = NULL;
Bfree(p5);
}
#endif
}

View File

@ -820,6 +820,11 @@ pycore_interp_init(PyThreadState *tstate)
return status;
}
status = _PyDtoa_Init(interp);
if (_PyStatus_EXCEPTION(status)) {
return status;
}
// The GC must be initialized before the first GC collection.
status = _PyGC_Init(interp);
if (_PyStatus_EXCEPTION(status)) {
@ -1776,6 +1781,7 @@ finalize_interp_clear(PyThreadState *tstate)
_PyXI_Fini(tstate->interp);
_PyExc_ClearExceptionGroupType(tstate->interp);
_Py_clear_generic_types(tstate->interp);
_PyDtoa_Fini(tstate->interp);
/* Clear interpreter state and all thread states */
_PyInterpreterState_Clear(tstate);