bpo-1635741: _PyUnicode_Name_CAPI moves to internal C API (GH-22713)

The private _PyUnicode_Name_CAPI structure of the PyCapsule API
unicodedata.ucnhash_CAPI moves to the internal C API. Moreover, the
structure gets a new state member which must be passed to the
getcode() and getname() functions.

* Move Include/ucnhash.h to Include/internal/pycore_ucnhash.h
* unicodedata module is now built with Py_BUILD_CORE_MODULE.
* unicodedata: move hashAPI variable into unicodedata_module_state.
This commit is contained in:
Victor Stinner 2020-10-26 16:43:47 +01:00 committed by GitHub
parent b510e101f8
commit 47e1afd2a1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 74 additions and 49 deletions

View File

@ -407,6 +407,12 @@ Porting to Python 3.10
Unicode object without initial data. Unicode object without initial data.
(Contributed by Inada Naoki in :issue:`36346`.) (Contributed by Inada Naoki in :issue:`36346`.)
* The private ``_PyUnicode_Name_CAPI`` structure of the PyCapsule API
``unicodedata.ucnhash_CAPI`` moves to the internal C API. Moreover,
the structure gets a new ``state`` member which must be passed to the
``getcode()`` and ``getname()`` functions.
(Contributed by Victor Stinner in :issue:`1635741`.)
Deprecated Deprecated
---------- ----------

View File

@ -1,11 +1,14 @@
/* Unicode name database interface */ /* Unicode name database interface */
#ifndef Py_LIMITED_API #ifndef Py_INTERNAL_UCNHASH_H
#ifndef Py_UCNHASH_H #define Py_INTERNAL_UCNHASH_H
#define Py_UCNHASH_H
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
#ifndef Py_BUILD_CORE
# error "this header requires Py_BUILD_CORE define"
#endif
/* revised ucnhash CAPI interface (exported through a "wrapper") */ /* revised ucnhash CAPI interface (exported through a "wrapper") */
#define PyUnicodeData_CAPSULE_NAME "unicodedata.ucnhash_CAPI" #define PyUnicodeData_CAPSULE_NAME "unicodedata.ucnhash_CAPI"
@ -15,16 +18,22 @@ typedef struct {
/* Size of this struct */ /* Size of this struct */
int size; int size;
// state which must be passed as the first parameter to getname()
// and getcode()
void *state;
/* Get name for a given character code. Returns non-zero if /* Get name for a given character code. Returns non-zero if
success, zero if not. Does not set Python exceptions. success, zero if not. Does not set Python exceptions.
If self is NULL, data come from the default version of the database. If self is NULL, data come from the default version of the database.
If it is not NULL, it should be a unicodedata.ucd_X_Y_Z object */ If it is not NULL, it should be a unicodedata.ucd_X_Y_Z object */
int (*getname)(PyObject *self, Py_UCS4 code, char* buffer, int buflen, int (*getname)(void *state, PyObject *self, Py_UCS4 code,
char* buffer, int buflen,
int with_alias_and_seq); int with_alias_and_seq);
/* Get character code for a given name. Same error handling /* Get character code for a given name. Same error handling
as for getname. */ as for getname. */
int (*getcode)(PyObject *self, const char* name, int namelen, Py_UCS4* code, int (*getcode)(void *state, PyObject *self,
const char* name, int namelen, Py_UCS4* code,
int with_named_seq); int with_named_seq);
} _PyUnicode_Name_CAPI; } _PyUnicode_Name_CAPI;
@ -32,5 +41,4 @@ typedef struct {
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif /* !Py_UCNHASH_H */ #endif /* !Py_INTERNAL_UCNHASH_H */
#endif /* !Py_LIMITED_API */

View File

@ -1065,7 +1065,6 @@ PYTHON_HEADERS= \
$(srcdir)/Include/traceback.h \ $(srcdir)/Include/traceback.h \
$(srcdir)/Include/tracemalloc.h \ $(srcdir)/Include/tracemalloc.h \
$(srcdir)/Include/tupleobject.h \ $(srcdir)/Include/tupleobject.h \
$(srcdir)/Include/ucnhash.h \
$(srcdir)/Include/unicodeobject.h \ $(srcdir)/Include/unicodeobject.h \
$(srcdir)/Include/warnings.h \ $(srcdir)/Include/warnings.h \
$(srcdir)/Include/weakrefobject.h \ $(srcdir)/Include/weakrefobject.h \
@ -1129,6 +1128,7 @@ PYTHON_HEADERS= \
$(srcdir)/Include/internal/pycore_sysmodule.h \ $(srcdir)/Include/internal/pycore_sysmodule.h \
$(srcdir)/Include/internal/pycore_traceback.h \ $(srcdir)/Include/internal/pycore_traceback.h \
$(srcdir)/Include/internal/pycore_tuple.h \ $(srcdir)/Include/internal/pycore_tuple.h \
$(srcdir)/Include/internal/pycore_ucnhash.h \
$(srcdir)/Include/internal/pycore_unionobject.h \ $(srcdir)/Include/internal/pycore_unionobject.h \
$(srcdir)/Include/internal/pycore_warnings.h \ $(srcdir)/Include/internal/pycore_warnings.h \
$(DTRACE_HEADERS) $(DTRACE_HEADERS)

View File

@ -0,0 +1,4 @@
The private ``_PyUnicode_Name_CAPI`` structure of the PyCapsule API
``unicodedata.ucnhash_CAPI`` moves to the internal C API. Moreover, the
structure gets a new ``state`` member which must be passed to the
``getcode()`` and ``getname()`` functions. Patch by Victor Stinner.

View File

@ -185,7 +185,7 @@ _symtable symtablemodule.c
#_json -I$(srcdir)/Include/internal -DPy_BUILD_CORE_BUILTIN _json.c # _json speedups #_json -I$(srcdir)/Include/internal -DPy_BUILD_CORE_BUILTIN _json.c # _json speedups
#_statistics _statisticsmodule.c # statistics accelerator #_statistics _statisticsmodule.c # statistics accelerator
#unicodedata unicodedata.c # static Unicode character database #unicodedata unicodedata.c -DPy_BUILD_CORE_BUILTIN # static Unicode character database
# Modules with some UNIX dependencies -- on by default: # Modules with some UNIX dependencies -- on by default:

View File

@ -16,7 +16,7 @@
#define PY_SSIZE_T_CLEAN #define PY_SSIZE_T_CLEAN
#include "Python.h" #include "Python.h"
#include "ucnhash.h" #include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
#include "structmember.h" // PyMemberDef #include "structmember.h" // PyMemberDef
#include <stdbool.h> #include <stdbool.h>
@ -97,6 +97,8 @@ typedef struct {
// Borrowed reference to &UCD_Type. It is used to prepare the code // Borrowed reference to &UCD_Type. It is used to prepare the code
// to convert the UCD_Type static type to a heap type. // to convert the UCD_Type static type to a heap type.
PyTypeObject *ucd_type; PyTypeObject *ucd_type;
_PyUnicode_Name_CAPI capi;
} unicodedata_module_state; } unicodedata_module_state;
// bpo-1635741: Temporary global state until the unicodedata module // bpo-1635741: Temporary global state until the unicodedata module
@ -1180,10 +1182,11 @@ _getucname(unicodedata_module_state *state, PyObject *self,
} }
static int static int
capi_getucname(PyObject *self, Py_UCS4 code, char* buffer, int buflen, capi_getucname(void *state_raw, PyObject *self, Py_UCS4 code,
char* buffer, int buflen,
int with_alias_and_seq) int with_alias_and_seq)
{ {
unicodedata_module_state *state = &global_module_state; unicodedata_module_state *state = (unicodedata_module_state *)state_raw;
return _getucname(state, self, code, buffer, buflen, with_alias_and_seq); return _getucname(state, self, code, buffer, buflen, with_alias_and_seq);
} }
@ -1323,21 +1326,15 @@ _getcode(unicodedata_module_state *state, PyObject* self,
} }
static int static int
capi_getcode(PyObject* self, const char* name, int namelen, Py_UCS4* code, capi_getcode(void *state_raw, PyObject* self,
const char* name, int namelen, Py_UCS4* code,
int with_named_seq) int with_named_seq)
{ {
unicodedata_module_state *state = &global_module_state; unicodedata_module_state *state = (unicodedata_module_state *)state_raw;
return _getcode(state, self, name, namelen, code, with_named_seq); return _getcode(state, self, name, namelen, code, with_named_seq);
} }
static const _PyUnicode_Name_CAPI hashAPI =
{
sizeof(_PyUnicode_Name_CAPI),
capi_getucname,
capi_getcode
};
/* -------------------------------------------------------------------- */ /* -------------------------------------------------------------------- */
/* Python bindings */ /* Python bindings */
@ -1510,6 +1507,11 @@ PyInit_unicodedata(void)
PyObject *m, *v; PyObject *m, *v;
unicodedata_module_state *state = &global_module_state; unicodedata_module_state *state = &global_module_state;
state->capi.size = sizeof(_PyUnicode_Name_CAPI);
state->capi.state = state;
state->capi.getname = capi_getucname;
state->capi.getcode = capi_getcode;
Py_SET_TYPE(&UCD_Type, &PyType_Type); Py_SET_TYPE(&UCD_Type, &PyType_Type);
state->ucd_type = &UCD_Type; state->ucd_type = &UCD_Type;
@ -1528,7 +1530,7 @@ PyInit_unicodedata(void)
PyModule_AddObject(m, "ucd_3_2_0", v); PyModule_AddObject(m, "ucd_3_2_0", v);
/* Export C API */ /* Export C API */
v = PyCapsule_New((void *)&hashAPI, PyUnicodeData_CAPSULE_NAME, NULL); v = PyCapsule_New((void *)&state->capi, PyUnicodeData_CAPSULE_NAME, NULL);
if (v != NULL) if (v != NULL)
PyModule_AddObject(m, "ucnhash_CAPI", v); PyModule_AddObject(m, "ucnhash_CAPI", v);
return m; return m;

View File

@ -40,16 +40,16 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#define PY_SSIZE_T_CLEAN #define PY_SSIZE_T_CLEAN
#include "Python.h" #include "Python.h"
#include "pycore_abstract.h" // _PyIndex_Check() #include "pycore_abstract.h" // _PyIndex_Check()
#include "pycore_bytes_methods.h" // _Py_bytes_lower() #include "pycore_bytes_methods.h" // _Py_bytes_lower()
#include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_initconfig.h" // _PyStatus_OK()
#include "pycore_interp.h" // PyInterpreterState.fs_codec #include "pycore_interp.h" // PyInterpreterState.fs_codec
#include "pycore_object.h" // _PyObject_GC_TRACK() #include "pycore_object.h" // _PyObject_GC_TRACK()
#include "pycore_pathconfig.h" // _Py_DumpPathConfig() #include "pycore_pathconfig.h" // _Py_DumpPathConfig()
#include "pycore_pylifecycle.h" // _Py_SetFileSystemEncoding() #include "pycore_pylifecycle.h" // _Py_SetFileSystemEncoding()
#include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "ucnhash.h" // _PyUnicode_Name_CAPI #include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
#include "stringlib/eq.h" // unicode_eq() #include "stringlib/eq.h" // unicode_eq()
#ifdef MS_WINDOWS #ifdef MS_WINDOWS
#include <windows.h> #include <windows.h>
@ -6344,7 +6344,7 @@ PyUnicode_AsUTF16String(PyObject *unicode)
/* --- Unicode Escape Codec ----------------------------------------------- */ /* --- Unicode Escape Codec ----------------------------------------------- */
static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL; static _PyUnicode_Name_CAPI *ucnhash_capi = NULL;
PyObject * PyObject *
_PyUnicode_DecodeUnicodeEscape(const char *s, _PyUnicode_DecodeUnicodeEscape(const char *s,
@ -6497,11 +6497,11 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
/* \N{name} */ /* \N{name} */
case 'N': case 'N':
if (ucnhash_CAPI == NULL) { if (ucnhash_capi == NULL) {
/* load the unicode data module */ /* load the unicode data module */
ucnhash_CAPI = (_PyUnicode_Name_CAPI *)PyCapsule_Import( ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
PyUnicodeData_CAPSULE_NAME, 1); PyUnicodeData_CAPSULE_NAME, 1);
if (ucnhash_CAPI == NULL) { if (ucnhash_capi == NULL) {
PyErr_SetString( PyErr_SetString(
PyExc_UnicodeError, PyExc_UnicodeError,
"\\N escapes not supported (can't load unicodedata module)" "\\N escapes not supported (can't load unicodedata module)"
@ -6523,7 +6523,8 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
s++; s++;
ch = 0xffffffff; /* in case 'getcode' messes up */ ch = 0xffffffff; /* in case 'getcode' messes up */
if (namelen <= INT_MAX && if (namelen <= INT_MAX &&
ucnhash_CAPI->getcode(NULL, start, (int)namelen, ucnhash_capi->getcode(ucnhash_capi->state, NULL,
start, (int)namelen,
&ch, 0)) { &ch, 0)) {
assert(ch <= MAX_UNICODE); assert(ch <= MAX_UNICODE);
WRITE_CHAR(ch); WRITE_CHAR(ch);

View File

@ -196,6 +196,7 @@
<ClInclude Include="..\Include\internal\pycore_sysmodule.h" /> <ClInclude Include="..\Include\internal\pycore_sysmodule.h" />
<ClInclude Include="..\Include\internal\pycore_traceback.h" /> <ClInclude Include="..\Include\internal\pycore_traceback.h" />
<ClInclude Include="..\Include\internal\pycore_tuple.h" /> <ClInclude Include="..\Include\internal\pycore_tuple.h" />
<ClInclude Include="..\Include\internal\pycore_ucnhash.h" />
<ClInclude Include="..\Include\internal\pycore_unionobject.h" /> <ClInclude Include="..\Include\internal\pycore_unionobject.h" />
<ClInclude Include="..\Include\internal\pycore_warnings.h" /> <ClInclude Include="..\Include\internal\pycore_warnings.h" />
<ClInclude Include="..\Include\interpreteridobject.h" /> <ClInclude Include="..\Include\interpreteridobject.h" />
@ -252,7 +253,6 @@
<ClInclude Include="..\Include\traceback.h" /> <ClInclude Include="..\Include\traceback.h" />
<ClInclude Include="..\Include\tracemalloc.h" /> <ClInclude Include="..\Include\tracemalloc.h" />
<ClInclude Include="..\Include\tupleobject.h" /> <ClInclude Include="..\Include\tupleobject.h" />
<ClInclude Include="..\Include\ucnhash.h" />
<ClInclude Include="..\Include\unicodeobject.h" /> <ClInclude Include="..\Include\unicodeobject.h" />
<ClInclude Include="..\Include\weakrefobject.h" /> <ClInclude Include="..\Include\weakrefobject.h" />
<ClInclude Include="..\Modules\_math.h" /> <ClInclude Include="..\Modules\_math.h" />

View File

@ -273,9 +273,6 @@
<ClInclude Include="..\Include\tupleobject.h"> <ClInclude Include="..\Include\tupleobject.h">
<Filter>Include</Filter> <Filter>Include</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\Include\ucnhash.h">
<Filter>Include</Filter>
</ClInclude>
<ClInclude Include="..\Include\unicodeobject.h"> <ClInclude Include="..\Include\unicodeobject.h">
<Filter>Include</Filter> <Filter>Include</Filter>
</ClInclude> </ClInclude>
@ -573,6 +570,9 @@
<ClInclude Include="..\Include\internal\pycore_tuple.h"> <ClInclude Include="..\Include\internal\pycore_tuple.h">
<Filter>Include\internal</Filter> <Filter>Include\internal</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\Include\internal\pycore_ucnhash.h">
<Filter>Include\internal</Filter>
</ClInclude>
<ClInclude Include="..\Include\internal\pycore_unionobject.h"> <ClInclude Include="..\Include\internal\pycore_unionobject.h">
<Filter>Include\internal</Filter> <Filter>Include\internal</Filter>
</ClInclude> </ClInclude>

View File

@ -11,7 +11,7 @@ Copyright (c) Corporation for National Research Initiatives.
#include "Python.h" #include "Python.h"
#include "pycore_interp.h" // PyInterpreterState.codec_search_path #include "pycore_interp.h" // PyInterpreterState.codec_search_path
#include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "ucnhash.h" #include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
#include <ctype.h> #include <ctype.h>
const char *Py_hexdigits = "0123456789abcdef"; const char *Py_hexdigits = "0123456789abcdef";
@ -954,7 +954,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
return Py_BuildValue("(Nn)", res, end); return Py_BuildValue("(Nn)", res, end);
} }
static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL; static _PyUnicode_Name_CAPI *ucnhash_capi = NULL;
PyObject *PyCodec_NameReplaceErrors(PyObject *exc) PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
{ {
@ -976,17 +976,19 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
return NULL; return NULL;
if (!(object = PyUnicodeEncodeError_GetObject(exc))) if (!(object = PyUnicodeEncodeError_GetObject(exc)))
return NULL; return NULL;
if (!ucnhash_CAPI) { if (!ucnhash_capi) {
/* load the unicode data module */ /* load the unicode data module */
ucnhash_CAPI = (_PyUnicode_Name_CAPI *)PyCapsule_Import( ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
PyUnicodeData_CAPSULE_NAME, 1); PyUnicodeData_CAPSULE_NAME, 1);
if (!ucnhash_CAPI) if (!ucnhash_capi) {
return NULL; return NULL;
}
} }
for (i = start, ressize = 0; i < end; ++i) { for (i = start, ressize = 0; i < end; ++i) {
/* object is guaranteed to be "ready" */ /* object is guaranteed to be "ready" */
c = PyUnicode_READ_CHAR(object, i); c = PyUnicode_READ_CHAR(object, i);
if (ucnhash_CAPI->getname(NULL, c, buffer, sizeof(buffer), 1)) { if (ucnhash_capi->getname(ucnhash_capi->state, NULL,
c, buffer, sizeof(buffer), 1)) {
replsize = 1+1+1+(int)strlen(buffer)+1; replsize = 1+1+1+(int)strlen(buffer)+1;
} }
else if (c >= 0x10000) { else if (c >= 0x10000) {
@ -1009,7 +1011,8 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
i < end; ++i) { i < end; ++i) {
c = PyUnicode_READ_CHAR(object, i); c = PyUnicode_READ_CHAR(object, i);
*outp++ = '\\'; *outp++ = '\\';
if (ucnhash_CAPI->getname(NULL, c, buffer, sizeof(buffer), 1)) { if (ucnhash_capi->getname(ucnhash_capi->state, NULL,
c, buffer, sizeof(buffer), 1)) {
*outp++ = 'N'; *outp++ = 'N';
*outp++ = '{'; *outp++ = '{';
strcpy((char *)outp, buffer); strcpy((char *)outp, buffer);

View File

@ -878,7 +878,8 @@ class PyBuildExt(build_ext):
self.add(Extension('_lsprof', ['_lsprof.c', 'rotatingtree.c'])) self.add(Extension('_lsprof', ['_lsprof.c', 'rotatingtree.c']))
# static Unicode character database # static Unicode character database
self.add(Extension('unicodedata', ['unicodedata.c'], self.add(Extension('unicodedata', ['unicodedata.c'],
depends=['unicodedata_db.h', 'unicodename_db.h'])) depends=['unicodedata_db.h', 'unicodename_db.h'],
extra_compile_args=['-DPy_BUILD_CORE_MODULE']))
# _opcode module # _opcode module
self.add(Extension('_opcode', ['_opcode.c'])) self.add(Extension('_opcode', ['_opcode.c']))
# asyncio speedups # asyncio speedups