bpo-1635741: _PyUnicode_Name_CAPI moves to internal C API (GH-22713)
The private _PyUnicode_Name_CAPI structure of the PyCapsule API unicodedata.ucnhash_CAPI moves to the internal C API. Moreover, the structure gets a new state member which must be passed to the getcode() and getname() functions. * Move Include/ucnhash.h to Include/internal/pycore_ucnhash.h * unicodedata module is now built with Py_BUILD_CORE_MODULE. * unicodedata: move hashAPI variable into unicodedata_module_state.
This commit is contained in:
parent
b510e101f8
commit
47e1afd2a1
|
@ -407,6 +407,12 @@ Porting to Python 3.10
|
|||
Unicode object without initial data.
|
||||
(Contributed by Inada Naoki in :issue:`36346`.)
|
||||
|
||||
* The private ``_PyUnicode_Name_CAPI`` structure of the PyCapsule API
|
||||
``unicodedata.ucnhash_CAPI`` moves to the internal C API. Moreover,
|
||||
the structure gets a new ``state`` member which must be passed to the
|
||||
``getcode()`` and ``getname()`` functions.
|
||||
(Contributed by Victor Stinner in :issue:`1635741`.)
|
||||
|
||||
Deprecated
|
||||
----------
|
||||
|
||||
|
|
|
@ -1,11 +1,14 @@
|
|||
/* Unicode name database interface */
|
||||
#ifndef Py_LIMITED_API
|
||||
#ifndef Py_UCNHASH_H
|
||||
#define Py_UCNHASH_H
|
||||
#ifndef Py_INTERNAL_UCNHASH_H
|
||||
#define Py_INTERNAL_UCNHASH_H
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef Py_BUILD_CORE
|
||||
# error "this header requires Py_BUILD_CORE define"
|
||||
#endif
|
||||
|
||||
/* revised ucnhash CAPI interface (exported through a "wrapper") */
|
||||
|
||||
#define PyUnicodeData_CAPSULE_NAME "unicodedata.ucnhash_CAPI"
|
||||
|
@ -15,16 +18,22 @@ typedef struct {
|
|||
/* Size of this struct */
|
||||
int size;
|
||||
|
||||
// state which must be passed as the first parameter to getname()
|
||||
// and getcode()
|
||||
void *state;
|
||||
|
||||
/* Get name for a given character code. Returns non-zero if
|
||||
success, zero if not. Does not set Python exceptions.
|
||||
If self is NULL, data come from the default version of the database.
|
||||
If it is not NULL, it should be a unicodedata.ucd_X_Y_Z object */
|
||||
int (*getname)(PyObject *self, Py_UCS4 code, char* buffer, int buflen,
|
||||
int (*getname)(void *state, PyObject *self, Py_UCS4 code,
|
||||
char* buffer, int buflen,
|
||||
int with_alias_and_seq);
|
||||
|
||||
/* Get character code for a given name. Same error handling
|
||||
as for getname. */
|
||||
int (*getcode)(PyObject *self, const char* name, int namelen, Py_UCS4* code,
|
||||
int (*getcode)(void *state, PyObject *self,
|
||||
const char* name, int namelen, Py_UCS4* code,
|
||||
int with_named_seq);
|
||||
|
||||
} _PyUnicode_Name_CAPI;
|
||||
|
@ -32,5 +41,4 @@ typedef struct {
|
|||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* !Py_UCNHASH_H */
|
||||
#endif /* !Py_LIMITED_API */
|
||||
#endif /* !Py_INTERNAL_UCNHASH_H */
|
|
@ -1065,7 +1065,6 @@ PYTHON_HEADERS= \
|
|||
$(srcdir)/Include/traceback.h \
|
||||
$(srcdir)/Include/tracemalloc.h \
|
||||
$(srcdir)/Include/tupleobject.h \
|
||||
$(srcdir)/Include/ucnhash.h \
|
||||
$(srcdir)/Include/unicodeobject.h \
|
||||
$(srcdir)/Include/warnings.h \
|
||||
$(srcdir)/Include/weakrefobject.h \
|
||||
|
@ -1129,6 +1128,7 @@ PYTHON_HEADERS= \
|
|||
$(srcdir)/Include/internal/pycore_sysmodule.h \
|
||||
$(srcdir)/Include/internal/pycore_traceback.h \
|
||||
$(srcdir)/Include/internal/pycore_tuple.h \
|
||||
$(srcdir)/Include/internal/pycore_ucnhash.h \
|
||||
$(srcdir)/Include/internal/pycore_unionobject.h \
|
||||
$(srcdir)/Include/internal/pycore_warnings.h \
|
||||
$(DTRACE_HEADERS)
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
The private ``_PyUnicode_Name_CAPI`` structure of the PyCapsule API
|
||||
``unicodedata.ucnhash_CAPI`` moves to the internal C API. Moreover, the
|
||||
structure gets a new ``state`` member which must be passed to the
|
||||
``getcode()`` and ``getname()`` functions. Patch by Victor Stinner.
|
|
@ -185,7 +185,7 @@ _symtable symtablemodule.c
|
|||
#_json -I$(srcdir)/Include/internal -DPy_BUILD_CORE_BUILTIN _json.c # _json speedups
|
||||
#_statistics _statisticsmodule.c # statistics accelerator
|
||||
|
||||
#unicodedata unicodedata.c # static Unicode character database
|
||||
#unicodedata unicodedata.c -DPy_BUILD_CORE_BUILTIN # static Unicode character database
|
||||
|
||||
|
||||
# Modules with some UNIX dependencies -- on by default:
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
#define PY_SSIZE_T_CLEAN
|
||||
|
||||
#include "Python.h"
|
||||
#include "ucnhash.h"
|
||||
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
|
||||
#include "structmember.h" // PyMemberDef
|
||||
|
||||
#include <stdbool.h>
|
||||
|
@ -97,6 +97,8 @@ typedef struct {
|
|||
// Borrowed reference to &UCD_Type. It is used to prepare the code
|
||||
// to convert the UCD_Type static type to a heap type.
|
||||
PyTypeObject *ucd_type;
|
||||
|
||||
_PyUnicode_Name_CAPI capi;
|
||||
} unicodedata_module_state;
|
||||
|
||||
// bpo-1635741: Temporary global state until the unicodedata module
|
||||
|
@ -1180,10 +1182,11 @@ _getucname(unicodedata_module_state *state, PyObject *self,
|
|||
}
|
||||
|
||||
static int
|
||||
capi_getucname(PyObject *self, Py_UCS4 code, char* buffer, int buflen,
|
||||
capi_getucname(void *state_raw, PyObject *self, Py_UCS4 code,
|
||||
char* buffer, int buflen,
|
||||
int with_alias_and_seq)
|
||||
{
|
||||
unicodedata_module_state *state = &global_module_state;
|
||||
unicodedata_module_state *state = (unicodedata_module_state *)state_raw;
|
||||
return _getucname(state, self, code, buffer, buflen, with_alias_and_seq);
|
||||
|
||||
}
|
||||
|
@ -1323,21 +1326,15 @@ _getcode(unicodedata_module_state *state, PyObject* self,
|
|||
}
|
||||
|
||||
static int
|
||||
capi_getcode(PyObject* self, const char* name, int namelen, Py_UCS4* code,
|
||||
capi_getcode(void *state_raw, PyObject* self,
|
||||
const char* name, int namelen, Py_UCS4* code,
|
||||
int with_named_seq)
|
||||
{
|
||||
unicodedata_module_state *state = &global_module_state;
|
||||
unicodedata_module_state *state = (unicodedata_module_state *)state_raw;
|
||||
return _getcode(state, self, name, namelen, code, with_named_seq);
|
||||
|
||||
}
|
||||
|
||||
static const _PyUnicode_Name_CAPI hashAPI =
|
||||
{
|
||||
sizeof(_PyUnicode_Name_CAPI),
|
||||
capi_getucname,
|
||||
capi_getcode
|
||||
};
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/* Python bindings */
|
||||
|
||||
|
@ -1510,6 +1507,11 @@ PyInit_unicodedata(void)
|
|||
PyObject *m, *v;
|
||||
unicodedata_module_state *state = &global_module_state;
|
||||
|
||||
state->capi.size = sizeof(_PyUnicode_Name_CAPI);
|
||||
state->capi.state = state;
|
||||
state->capi.getname = capi_getucname;
|
||||
state->capi.getcode = capi_getcode;
|
||||
|
||||
Py_SET_TYPE(&UCD_Type, &PyType_Type);
|
||||
state->ucd_type = &UCD_Type;
|
||||
|
||||
|
@ -1528,7 +1530,7 @@ PyInit_unicodedata(void)
|
|||
PyModule_AddObject(m, "ucd_3_2_0", v);
|
||||
|
||||
/* Export C API */
|
||||
v = PyCapsule_New((void *)&hashAPI, PyUnicodeData_CAPSULE_NAME, NULL);
|
||||
v = PyCapsule_New((void *)&state->capi, PyUnicodeData_CAPSULE_NAME, NULL);
|
||||
if (v != NULL)
|
||||
PyModule_AddObject(m, "ucnhash_CAPI", v);
|
||||
return m;
|
||||
|
|
|
@ -40,16 +40,16 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|||
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include "Python.h"
|
||||
#include "pycore_abstract.h" // _PyIndex_Check()
|
||||
#include "pycore_bytes_methods.h" // _Py_bytes_lower()
|
||||
#include "pycore_initconfig.h" // _PyStatus_OK()
|
||||
#include "pycore_interp.h" // PyInterpreterState.fs_codec
|
||||
#include "pycore_object.h" // _PyObject_GC_TRACK()
|
||||
#include "pycore_pathconfig.h" // _Py_DumpPathConfig()
|
||||
#include "pycore_pylifecycle.h" // _Py_SetFileSystemEncoding()
|
||||
#include "pycore_pystate.h" // _PyInterpreterState_GET()
|
||||
#include "ucnhash.h" // _PyUnicode_Name_CAPI
|
||||
#include "stringlib/eq.h" // unicode_eq()
|
||||
#include "pycore_abstract.h" // _PyIndex_Check()
|
||||
#include "pycore_bytes_methods.h" // _Py_bytes_lower()
|
||||
#include "pycore_initconfig.h" // _PyStatus_OK()
|
||||
#include "pycore_interp.h" // PyInterpreterState.fs_codec
|
||||
#include "pycore_object.h" // _PyObject_GC_TRACK()
|
||||
#include "pycore_pathconfig.h" // _Py_DumpPathConfig()
|
||||
#include "pycore_pylifecycle.h" // _Py_SetFileSystemEncoding()
|
||||
#include "pycore_pystate.h" // _PyInterpreterState_GET()
|
||||
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
|
||||
#include "stringlib/eq.h" // unicode_eq()
|
||||
|
||||
#ifdef MS_WINDOWS
|
||||
#include <windows.h>
|
||||
|
@ -6344,7 +6344,7 @@ PyUnicode_AsUTF16String(PyObject *unicode)
|
|||
|
||||
/* --- Unicode Escape Codec ----------------------------------------------- */
|
||||
|
||||
static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
|
||||
static _PyUnicode_Name_CAPI *ucnhash_capi = NULL;
|
||||
|
||||
PyObject *
|
||||
_PyUnicode_DecodeUnicodeEscape(const char *s,
|
||||
|
@ -6497,11 +6497,11 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
|
|||
|
||||
/* \N{name} */
|
||||
case 'N':
|
||||
if (ucnhash_CAPI == NULL) {
|
||||
if (ucnhash_capi == NULL) {
|
||||
/* load the unicode data module */
|
||||
ucnhash_CAPI = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
|
||||
ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
|
||||
PyUnicodeData_CAPSULE_NAME, 1);
|
||||
if (ucnhash_CAPI == NULL) {
|
||||
if (ucnhash_capi == NULL) {
|
||||
PyErr_SetString(
|
||||
PyExc_UnicodeError,
|
||||
"\\N escapes not supported (can't load unicodedata module)"
|
||||
|
@ -6523,7 +6523,8 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
|
|||
s++;
|
||||
ch = 0xffffffff; /* in case 'getcode' messes up */
|
||||
if (namelen <= INT_MAX &&
|
||||
ucnhash_CAPI->getcode(NULL, start, (int)namelen,
|
||||
ucnhash_capi->getcode(ucnhash_capi->state, NULL,
|
||||
start, (int)namelen,
|
||||
&ch, 0)) {
|
||||
assert(ch <= MAX_UNICODE);
|
||||
WRITE_CHAR(ch);
|
||||
|
|
|
@ -196,6 +196,7 @@
|
|||
<ClInclude Include="..\Include\internal\pycore_sysmodule.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_traceback.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_tuple.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_ucnhash.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_unionobject.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_warnings.h" />
|
||||
<ClInclude Include="..\Include\interpreteridobject.h" />
|
||||
|
@ -252,7 +253,6 @@
|
|||
<ClInclude Include="..\Include\traceback.h" />
|
||||
<ClInclude Include="..\Include\tracemalloc.h" />
|
||||
<ClInclude Include="..\Include\tupleobject.h" />
|
||||
<ClInclude Include="..\Include\ucnhash.h" />
|
||||
<ClInclude Include="..\Include\unicodeobject.h" />
|
||||
<ClInclude Include="..\Include\weakrefobject.h" />
|
||||
<ClInclude Include="..\Modules\_math.h" />
|
||||
|
|
|
@ -273,9 +273,6 @@
|
|||
<ClInclude Include="..\Include\tupleobject.h">
|
||||
<Filter>Include</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Include\ucnhash.h">
|
||||
<Filter>Include</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Include\unicodeobject.h">
|
||||
<Filter>Include</Filter>
|
||||
</ClInclude>
|
||||
|
@ -573,6 +570,9 @@
|
|||
<ClInclude Include="..\Include\internal\pycore_tuple.h">
|
||||
<Filter>Include\internal</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Include\internal\pycore_ucnhash.h">
|
||||
<Filter>Include\internal</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Include\internal\pycore_unionobject.h">
|
||||
<Filter>Include\internal</Filter>
|
||||
</ClInclude>
|
||||
|
|
|
@ -11,7 +11,7 @@ Copyright (c) Corporation for National Research Initiatives.
|
|||
#include "Python.h"
|
||||
#include "pycore_interp.h" // PyInterpreterState.codec_search_path
|
||||
#include "pycore_pystate.h" // _PyInterpreterState_GET()
|
||||
#include "ucnhash.h"
|
||||
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
|
||||
#include <ctype.h>
|
||||
|
||||
const char *Py_hexdigits = "0123456789abcdef";
|
||||
|
@ -954,7 +954,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
|
|||
return Py_BuildValue("(Nn)", res, end);
|
||||
}
|
||||
|
||||
static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
|
||||
static _PyUnicode_Name_CAPI *ucnhash_capi = NULL;
|
||||
|
||||
PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
|
||||
{
|
||||
|
@ -976,17 +976,19 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
|
|||
return NULL;
|
||||
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
|
||||
return NULL;
|
||||
if (!ucnhash_CAPI) {
|
||||
if (!ucnhash_capi) {
|
||||
/* load the unicode data module */
|
||||
ucnhash_CAPI = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
|
||||
ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
|
||||
PyUnicodeData_CAPSULE_NAME, 1);
|
||||
if (!ucnhash_CAPI)
|
||||
if (!ucnhash_capi) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
for (i = start, ressize = 0; i < end; ++i) {
|
||||
/* object is guaranteed to be "ready" */
|
||||
c = PyUnicode_READ_CHAR(object, i);
|
||||
if (ucnhash_CAPI->getname(NULL, c, buffer, sizeof(buffer), 1)) {
|
||||
if (ucnhash_capi->getname(ucnhash_capi->state, NULL,
|
||||
c, buffer, sizeof(buffer), 1)) {
|
||||
replsize = 1+1+1+(int)strlen(buffer)+1;
|
||||
}
|
||||
else if (c >= 0x10000) {
|
||||
|
@ -1009,7 +1011,8 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
|
|||
i < end; ++i) {
|
||||
c = PyUnicode_READ_CHAR(object, i);
|
||||
*outp++ = '\\';
|
||||
if (ucnhash_CAPI->getname(NULL, c, buffer, sizeof(buffer), 1)) {
|
||||
if (ucnhash_capi->getname(ucnhash_capi->state, NULL,
|
||||
c, buffer, sizeof(buffer), 1)) {
|
||||
*outp++ = 'N';
|
||||
*outp++ = '{';
|
||||
strcpy((char *)outp, buffer);
|
||||
|
|
3
setup.py
3
setup.py
|
@ -878,7 +878,8 @@ class PyBuildExt(build_ext):
|
|||
self.add(Extension('_lsprof', ['_lsprof.c', 'rotatingtree.c']))
|
||||
# static Unicode character database
|
||||
self.add(Extension('unicodedata', ['unicodedata.c'],
|
||||
depends=['unicodedata_db.h', 'unicodename_db.h']))
|
||||
depends=['unicodedata_db.h', 'unicodename_db.h'],
|
||||
extra_compile_args=['-DPy_BUILD_CORE_MODULE']))
|
||||
# _opcode module
|
||||
self.add(Extension('_opcode', ['_opcode.c']))
|
||||
# asyncio speedups
|
||||
|
|
Loading…
Reference in New Issue