gh-111972: Make Unicode name C APIcapsule initialization thread-safe (#112249)

This commit is contained in:
Kirill Podoprigora 2023-11-30 13:12:49 +03:00 committed by GitHub
parent 81261fa67f
commit 0785c68559
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 26 additions and 20 deletions

View File

@ -28,6 +28,8 @@ typedef struct {
} _PyUnicode_Name_CAPI;
extern _PyUnicode_Name_CAPI* _PyUnicode_GetNameCAPI(void);
#ifdef __cplusplus
}
#endif

View File

@ -5869,6 +5869,23 @@ PyUnicode_AsUTF16String(PyObject *unicode)
return _PyUnicode_EncodeUTF16(unicode, NULL, 0);
}
_PyUnicode_Name_CAPI *
_PyUnicode_GetNameCAPI(void)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
_PyUnicode_Name_CAPI *ucnhash_capi;
ucnhash_capi = _Py_atomic_load_ptr(&interp->unicode.ucnhash_capi);
if (ucnhash_capi == NULL) {
ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
PyUnicodeData_CAPSULE_NAME, 1);
// It's fine if we overwite the value here. It's always the same value.
_Py_atomic_store_ptr(&interp->unicode.ucnhash_capi, ucnhash_capi);
}
return ucnhash_capi;
}
/* --- Unicode Escape Codec ----------------------------------------------- */
PyObject *
@ -5884,7 +5901,6 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
_PyUnicode_Name_CAPI *ucnhash_capi;
PyInterpreterState *interp = _PyInterpreterState_GET();
// so we can remember if we've seen an invalid escape char or not
*first_invalid_escape = NULL;
@ -6032,11 +6048,7 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
/* \N{name} */
case 'N':
ucnhash_capi = interp->unicode.ucnhash_capi;
if (ucnhash_capi == NULL) {
/* load the unicode data module */
ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
PyUnicodeData_CAPSULE_NAME, 1);
ucnhash_capi = _PyUnicode_GetNameCAPI();
if (ucnhash_capi == NULL) {
PyErr_SetString(
PyExc_UnicodeError,
@ -6044,8 +6056,6 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
);
goto onError;
}
interp->unicode.ucnhash_capi = ucnhash_capi;
}
message = "malformed \\N character escape";
if (s >= end) {

View File

@ -931,8 +931,6 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
return Py_BuildValue("(Nn)", res, end);
}
static _PyUnicode_Name_CAPI *ucnhash_capi = NULL;
PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
{
if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
@ -953,14 +951,10 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
return NULL;
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
return NULL;
if (!ucnhash_capi) {
/* load the unicode data module */
ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
PyUnicodeData_CAPSULE_NAME, 1);
if (!ucnhash_capi) {
_PyUnicode_Name_CAPI *ucnhash_capi = _PyUnicode_GetNameCAPI();
if (ucnhash_capi == NULL) {
return NULL;
}
}
for (i = start, ressize = 0; i < end; ++i) {
/* object is guaranteed to be "ready" */
c = PyUnicode_READ_CHAR(object, i);