bpo-42157: unicodedata avoids references to UCD_Type (GH-22990)

* UCD_Check() uses PyModule_Check()
* Simplify the internal _PyUnicode_Name_CAPI structure:

  * Remove size and state members
  * Remove state and self parameters of getcode() and getname()
    functions

* Remove global_module_state
This commit is contained in:
Victor Stinner 2020-10-26 19:19:36 +01:00 committed by GitHub
parent 8374d2ee15
commit 920cb647ba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 129 additions and 139 deletions

View File

@ -408,10 +408,8 @@ Porting to Python 3.10
(Contributed by Inada Naoki in :issue:`36346`.) (Contributed by Inada Naoki in :issue:`36346`.)
* The private ``_PyUnicode_Name_CAPI`` structure of the PyCapsule API * The private ``_PyUnicode_Name_CAPI`` structure of the PyCapsule API
``unicodedata.ucnhash_CAPI`` moves to the internal C API. Moreover, ``unicodedata.ucnhash_CAPI`` moves to the internal C API.
the structure gets a new ``state`` member which must be passed to the (Contributed by Victor Stinner in :issue:`42157`.)
``getcode()`` and ``getname()`` functions.
(Contributed by Victor Stinner in :issue:`1635741`.)
Deprecated Deprecated
---------- ----------

View File

@ -15,25 +15,15 @@ extern "C" {
typedef struct { typedef struct {
/* Size of this struct */ /* Get name for a given character code.
int size; Returns non-zero if success, zero if not.
Does not set Python exceptions. */
// state which must be passed as the first parameter to getname() int (*getname)(Py_UCS4 code, char* buffer, int buflen,
// and getcode()
void *state;
/* Get name for a given character code. Returns non-zero if
success, zero if not. Does not set Python exceptions.
If self is NULL, data come from the default version of the database.
If it is not NULL, it should be a unicodedata.ucd_X_Y_Z object */
int (*getname)(void *state, PyObject *self, Py_UCS4 code,
char* buffer, int buflen,
int with_alias_and_seq); int with_alias_and_seq);
/* Get character code for a given name. Same error handling /* Get character code for a given name.
as for getname. */ Same error handling as for getname(). */
int (*getcode)(void *state, PyObject *self, int (*getcode)(const char* name, int namelen, Py_UCS4* code,
const char* name, int namelen, Py_UCS4* code,
int with_named_seq); int with_named_seq);
} _PyUnicode_Name_CAPI; } _PyUnicode_Name_CAPI;

View File

@ -1,4 +0,0 @@
The private ``_PyUnicode_Name_CAPI`` structure of the PyCapsule API
``unicodedata.ucnhash_CAPI`` moves to the internal C API. Moreover, the
structure gets a new ``state`` member which must be passed to the
``getcode()`` and ``getname()`` functions. Patch by Victor Stinner.

View File

@ -0,0 +1,3 @@
The private ``_PyUnicode_Name_CAPI`` structure of the PyCapsule API
``unicodedata.ucnhash_CAPI`` moves to the internal C API.
Patch by Victor Stinner.

View File

@ -93,29 +93,19 @@ static PyMemberDef DB_members[] = {
/* forward declaration */ /* forward declaration */
static PyTypeObject UCD_Type; static PyTypeObject UCD_Type;
typedef struct { // Check if self is an unicodedata.UCD instance.
// Borrowed reference to &UCD_Type. It is used to prepare the code // If self is NULL (when the PyCapsule C API is used), return 0.
// to convert the UCD_Type static type to a heap type. // PyModule_Check() is used to avoid having to retrieve the ucd_type.
PyTypeObject *ucd_type; // See unicodedata_functions comment to the rationale of this macro.
#define UCD_Check(self) (self != NULL && !PyModule_Check(self))
_PyUnicode_Name_CAPI capi;
} unicodedata_module_state;
// bpo-1635741: Temporary global state until the unicodedata module
// gets a real module state.
static unicodedata_module_state global_module_state;
// Check if self is an instance of ucd_type.
// Return 0 if self is NULL (when the PyCapsule C API is used).
#define UCD_Check(self, ucd_type) (self != NULL && Py_IS_TYPE(self, ucd_type))
static PyObject* static PyObject*
new_previous_version(unicodedata_module_state *state, new_previous_version(PyTypeObject *ucd_type,
const char*name, const change_record* (*getrecord)(Py_UCS4), const char*name, const change_record* (*getrecord)(Py_UCS4),
Py_UCS4 (*normalization)(Py_UCS4)) Py_UCS4 (*normalization)(Py_UCS4))
{ {
PreviousDBVersion *self; PreviousDBVersion *self;
self = PyObject_New(PreviousDBVersion, state->ucd_type); self = PyObject_New(PreviousDBVersion, ucd_type);
if (self == NULL) if (self == NULL)
return NULL; return NULL;
self->name = name; self->name = name;
@ -147,12 +137,11 @@ unicodedata_UCD_decimal_impl(PyObject *self, int chr,
PyObject *default_value) PyObject *default_value)
/*[clinic end generated code: output=be23376e1a185231 input=933f8107993f23d0]*/ /*[clinic end generated code: output=be23376e1a185231 input=933f8107993f23d0]*/
{ {
unicodedata_module_state *state = &global_module_state;
int have_old = 0; int have_old = 0;
long rc; long rc;
Py_UCS4 c = (Py_UCS4)chr; Py_UCS4 c = (Py_UCS4)chr;
if (UCD_Check(self, state->ucd_type)) { if (UCD_Check(self)) {
const change_record *old = get_old_record(self, c); const change_record *old = get_old_record(self, c);
if (old->category_changed == 0) { if (old->category_changed == 0) {
/* unassigned */ /* unassigned */
@ -236,12 +225,11 @@ unicodedata_UCD_numeric_impl(PyObject *self, int chr,
PyObject *default_value) PyObject *default_value)
/*[clinic end generated code: output=53ce281fe85b10c4 input=fdf5871a5542893c]*/ /*[clinic end generated code: output=53ce281fe85b10c4 input=fdf5871a5542893c]*/
{ {
unicodedata_module_state *state = &global_module_state;
int have_old = 0; int have_old = 0;
double rc; double rc;
Py_UCS4 c = (Py_UCS4)chr; Py_UCS4 c = (Py_UCS4)chr;
if (UCD_Check(self, state->ucd_type)) { if (UCD_Check(self)) {
const change_record *old = get_old_record(self, c); const change_record *old = get_old_record(self, c);
if (old->category_changed == 0) { if (old->category_changed == 0) {
/* unassigned */ /* unassigned */
@ -283,11 +271,10 @@ static PyObject *
unicodedata_UCD_category_impl(PyObject *self, int chr) unicodedata_UCD_category_impl(PyObject *self, int chr)
/*[clinic end generated code: output=8571539ee2e6783a input=27d6f3d85050bc06]*/ /*[clinic end generated code: output=8571539ee2e6783a input=27d6f3d85050bc06]*/
{ {
unicodedata_module_state *state = &global_module_state;
int index; int index;
Py_UCS4 c = (Py_UCS4)chr; Py_UCS4 c = (Py_UCS4)chr;
index = (int) _getrecord_ex(c)->category; index = (int) _getrecord_ex(c)->category;
if (UCD_Check(self, state->ucd_type)) { if (UCD_Check(self)) {
const change_record *old = get_old_record(self, c); const change_record *old = get_old_record(self, c);
if (old->category_changed != 0xFF) if (old->category_changed != 0xFF)
index = old->category_changed; index = old->category_changed;
@ -311,11 +298,10 @@ static PyObject *
unicodedata_UCD_bidirectional_impl(PyObject *self, int chr) unicodedata_UCD_bidirectional_impl(PyObject *self, int chr)
/*[clinic end generated code: output=d36310ce2039bb92 input=b3d8f42cebfcf475]*/ /*[clinic end generated code: output=d36310ce2039bb92 input=b3d8f42cebfcf475]*/
{ {
unicodedata_module_state *state = &global_module_state;
int index; int index;
Py_UCS4 c = (Py_UCS4)chr; Py_UCS4 c = (Py_UCS4)chr;
index = (int) _getrecord_ex(c)->bidirectional; index = (int) _getrecord_ex(c)->bidirectional;
if (UCD_Check(self, state->ucd_type)) { if (UCD_Check(self)) {
const change_record *old = get_old_record(self, c); const change_record *old = get_old_record(self, c);
if (old->category_changed == 0) if (old->category_changed == 0)
index = 0; /* unassigned */ index = 0; /* unassigned */
@ -341,11 +327,10 @@ static int
unicodedata_UCD_combining_impl(PyObject *self, int chr) unicodedata_UCD_combining_impl(PyObject *self, int chr)
/*[clinic end generated code: output=cad056d0cb6a5920 input=9f2d6b2a95d0a22a]*/ /*[clinic end generated code: output=cad056d0cb6a5920 input=9f2d6b2a95d0a22a]*/
{ {
unicodedata_module_state *state = &global_module_state;
int index; int index;
Py_UCS4 c = (Py_UCS4)chr; Py_UCS4 c = (Py_UCS4)chr;
index = (int) _getrecord_ex(c)->combining; index = (int) _getrecord_ex(c)->combining;
if (UCD_Check(self, state->ucd_type)) { if (UCD_Check(self)) {
const change_record *old = get_old_record(self, c); const change_record *old = get_old_record(self, c);
if (old->category_changed == 0) if (old->category_changed == 0)
index = 0; /* unassigned */ index = 0; /* unassigned */
@ -370,11 +355,10 @@ static int
unicodedata_UCD_mirrored_impl(PyObject *self, int chr) unicodedata_UCD_mirrored_impl(PyObject *self, int chr)
/*[clinic end generated code: output=2532dbf8121b50e6 input=5dd400d351ae6f3b]*/ /*[clinic end generated code: output=2532dbf8121b50e6 input=5dd400d351ae6f3b]*/
{ {
unicodedata_module_state *state = &global_module_state;
int index; int index;
Py_UCS4 c = (Py_UCS4)chr; Py_UCS4 c = (Py_UCS4)chr;
index = (int) _getrecord_ex(c)->mirrored; index = (int) _getrecord_ex(c)->mirrored;
if (UCD_Check(self, state->ucd_type)) { if (UCD_Check(self)) {
const change_record *old = get_old_record(self, c); const change_record *old = get_old_record(self, c);
if (old->category_changed == 0) if (old->category_changed == 0)
index = 0; /* unassigned */ index = 0; /* unassigned */
@ -398,11 +382,10 @@ static PyObject *
unicodedata_UCD_east_asian_width_impl(PyObject *self, int chr) unicodedata_UCD_east_asian_width_impl(PyObject *self, int chr)
/*[clinic end generated code: output=484e8537d9ee8197 input=c4854798aab026e0]*/ /*[clinic end generated code: output=484e8537d9ee8197 input=c4854798aab026e0]*/
{ {
unicodedata_module_state *state = &global_module_state;
int index; int index;
Py_UCS4 c = (Py_UCS4)chr; Py_UCS4 c = (Py_UCS4)chr;
index = (int) _getrecord_ex(c)->east_asian_width; index = (int) _getrecord_ex(c)->east_asian_width;
if (UCD_Check(self, state->ucd_type)) { if (UCD_Check(self)) {
const change_record *old = get_old_record(self, c); const change_record *old = get_old_record(self, c);
if (old->category_changed == 0) if (old->category_changed == 0)
index = 0; /* unassigned */ index = 0; /* unassigned */
@ -428,7 +411,6 @@ static PyObject *
unicodedata_UCD_decomposition_impl(PyObject *self, int chr) unicodedata_UCD_decomposition_impl(PyObject *self, int chr)
/*[clinic end generated code: output=7d699f3ec7565d27 input=e4c12459ad68507b]*/ /*[clinic end generated code: output=7d699f3ec7565d27 input=e4c12459ad68507b]*/
{ {
unicodedata_module_state *state = &global_module_state;
char decomp[256]; char decomp[256];
int code, index, count; int code, index, count;
size_t i; size_t i;
@ -437,7 +419,7 @@ unicodedata_UCD_decomposition_impl(PyObject *self, int chr)
code = (int)c; code = (int)c;
if (UCD_Check(self, state->ucd_type)) { if (UCD_Check(self)) {
const change_record *old = get_old_record(self, c); const change_record *old = get_old_record(self, c);
if (old->category_changed == 0) if (old->category_changed == 0)
return PyUnicode_FromString(""); /* unassigned */ return PyUnicode_FromString(""); /* unassigned */
@ -480,13 +462,14 @@ unicodedata_UCD_decomposition_impl(PyObject *self, int chr)
} }
static void static void
get_decomp_record(unicodedata_module_state *state, PyObject *self, get_decomp_record(PyObject *self, Py_UCS4 code,
Py_UCS4 code, int *index, int *prefix, int *count) int *index, int *prefix, int *count)
{ {
if (code >= 0x110000) { if (code >= 0x110000) {
*index = 0; *index = 0;
} else if (UCD_Check(self, state->ucd_type) && }
get_old_record(self, code)->category_changed==0) { else if (UCD_Check(self)
&& get_old_record(self, code)->category_changed==0) {
/* unassigned in old version */ /* unassigned in old version */
*index = 0; *index = 0;
} }
@ -515,8 +498,7 @@ get_decomp_record(unicodedata_module_state *state, PyObject *self,
#define SCount (LCount*NCount) #define SCount (LCount*NCount)
static PyObject* static PyObject*
nfd_nfkd(unicodedata_module_state *state, PyObject *self, nfd_nfkd(PyObject *self, PyObject *input, int k)
PyObject *input, int k)
{ {
PyObject *result; PyObject *result;
Py_UCS4 *output; Py_UCS4 *output;
@ -584,7 +566,7 @@ nfd_nfkd(unicodedata_module_state *state, PyObject *self,
continue; continue;
} }
/* normalization changes */ /* normalization changes */
if (UCD_Check(self, state->ucd_type)) { if (UCD_Check(self)) {
Py_UCS4 value = ((PreviousDBVersion*)self)->normalization(code); Py_UCS4 value = ((PreviousDBVersion*)self)->normalization(code);
if (value != 0) { if (value != 0) {
stack[stackptr++] = value; stack[stackptr++] = value;
@ -593,7 +575,7 @@ nfd_nfkd(unicodedata_module_state *state, PyObject *self,
} }
/* Other decompositions. */ /* Other decompositions. */
get_decomp_record(state, self, code, &index, &prefix, &count); get_decomp_record(self, code, &index, &prefix, &count);
/* Copy character if it is not decomposable, or has a /* Copy character if it is not decomposable, or has a
compatibility decomposition, but we do NFD. */ compatibility decomposition, but we do NFD. */
@ -665,7 +647,7 @@ find_nfc_index(const struct reindex* nfc, Py_UCS4 code)
} }
static PyObject* static PyObject*
nfc_nfkc(unicodedata_module_state *state, PyObject *self, PyObject *input, int k) nfc_nfkc(PyObject *self, PyObject *input, int k)
{ {
PyObject *result; PyObject *result;
int kind; int kind;
@ -677,7 +659,7 @@ nfc_nfkc(unicodedata_module_state *state, PyObject *self, PyObject *input, int k
Py_ssize_t skipped[20]; Py_ssize_t skipped[20];
int cskipped = 0; int cskipped = 0;
result = nfd_nfkd(state, self, input, k); result = nfd_nfkd(self, input, k);
if (!result) if (!result)
return NULL; return NULL;
/* result will be "ready". */ /* result will be "ready". */
@ -820,13 +802,13 @@ typedef enum {YES = 0, MAYBE = 1, NO = 2} QuickcheckResult;
* https://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms * https://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms
*/ */
static QuickcheckResult static QuickcheckResult
is_normalized_quickcheck(unicodedata_module_state *state, PyObject *self, is_normalized_quickcheck(PyObject *self, PyObject *input, bool nfc, bool k,
PyObject *input, bool nfc, bool k, bool yes_only) bool yes_only)
{ {
/* An older version of the database is requested, quickchecks must be /* UCD 3.2.0 is requested, quickchecks must be disabled. */
disabled. */ if (UCD_Check(self)) {
if (UCD_Check(self, state->ucd_type))
return NO; return NO;
}
Py_ssize_t i, len; Py_ssize_t i, len;
int kind; int kind;
@ -885,7 +867,6 @@ unicodedata_UCD_is_normalized_impl(PyObject *self, PyObject *form,
PyObject *input) PyObject *input)
/*[clinic end generated code: output=11e5a3694e723ca5 input=a544f14cea79e508]*/ /*[clinic end generated code: output=11e5a3694e723ca5 input=a544f14cea79e508]*/
{ {
unicodedata_module_state *state = &global_module_state;
if (PyUnicode_READY(input) == -1) { if (PyUnicode_READY(input) == -1) {
return NULL; return NULL;
} }
@ -921,10 +902,10 @@ unicodedata_UCD_is_normalized_impl(PyObject *self, PyObject *form,
return NULL; return NULL;
} }
m = is_normalized_quickcheck(state, self, input, nfc, k, false); m = is_normalized_quickcheck(self, input, nfc, k, false);
if (m == MAYBE) { if (m == MAYBE) {
cmp = (nfc ? nfc_nfkc : nfd_nfkd)(state, self, input, k); cmp = (nfc ? nfc_nfkc : nfd_nfkd)(self, input, k);
if (cmp == NULL) { if (cmp == NULL) {
return NULL; return NULL;
} }
@ -959,7 +940,6 @@ unicodedata_UCD_normalize_impl(PyObject *self, PyObject *form,
PyObject *input) PyObject *input)
/*[clinic end generated code: output=05ca4385a2ad6983 input=3a5206c0ad2833fb]*/ /*[clinic end generated code: output=05ca4385a2ad6983 input=3a5206c0ad2833fb]*/
{ {
unicodedata_module_state *state = &global_module_state;
if (PyUnicode_GET_LENGTH(input) == 0) { if (PyUnicode_GET_LENGTH(input) == 0) {
/* Special case empty input strings, since resizing /* Special case empty input strings, since resizing
them later would cause internal errors. */ them later would cause internal errors. */
@ -968,36 +948,36 @@ unicodedata_UCD_normalize_impl(PyObject *self, PyObject *form,
} }
if (_PyUnicode_EqualToASCIIId(form, &PyId_NFC)) { if (_PyUnicode_EqualToASCIIId(form, &PyId_NFC)) {
if (is_normalized_quickcheck(state, self, input, if (is_normalized_quickcheck(self, input,
true, false, true) == YES) { true, false, true) == YES) {
Py_INCREF(input); Py_INCREF(input);
return input; return input;
} }
return nfc_nfkc(state, self, input, 0); return nfc_nfkc(self, input, 0);
} }
if (_PyUnicode_EqualToASCIIId(form, &PyId_NFKC)) { if (_PyUnicode_EqualToASCIIId(form, &PyId_NFKC)) {
if (is_normalized_quickcheck(state, self, input, if (is_normalized_quickcheck(self, input,
true, true, true) == YES) { true, true, true) == YES) {
Py_INCREF(input); Py_INCREF(input);
return input; return input;
} }
return nfc_nfkc(state, self, input, 1); return nfc_nfkc(self, input, 1);
} }
if (_PyUnicode_EqualToASCIIId(form, &PyId_NFD)) { if (_PyUnicode_EqualToASCIIId(form, &PyId_NFD)) {
if (is_normalized_quickcheck(state, self, input, if (is_normalized_quickcheck(self, input,
false, false, true) == YES) { false, false, true) == YES) {
Py_INCREF(input); Py_INCREF(input);
return input; return input;
} }
return nfd_nfkd(state, self, input, 0); return nfd_nfkd(self, input, 0);
} }
if (_PyUnicode_EqualToASCIIId(form, &PyId_NFKD)) { if (_PyUnicode_EqualToASCIIId(form, &PyId_NFKD)) {
if (is_normalized_quickcheck(state, self, input, if (is_normalized_quickcheck(self, input,
false, true, true) == YES) { false, true, true) == YES) {
Py_INCREF(input); Py_INCREF(input);
return input; return input;
} }
return nfd_nfkd(state, self, input, 1); return nfd_nfkd(self, input, 1);
} }
PyErr_SetString(PyExc_ValueError, "invalid normalization form"); PyErr_SetString(PyExc_ValueError, "invalid normalization form");
return NULL; return NULL;
@ -1080,7 +1060,7 @@ is_unified_ideograph(Py_UCS4 code)
(cp < named_sequences_end)) (cp < named_sequences_end))
static int static int
_getucname(unicodedata_module_state *state, PyObject *self, _getucname(PyObject *self,
Py_UCS4 code, char* buffer, int buflen, int with_alias_and_seq) Py_UCS4 code, char* buffer, int buflen, int with_alias_and_seq)
{ {
/* Find the name associated with the given code point. /* Find the name associated with the given code point.
@ -1098,7 +1078,7 @@ _getucname(unicodedata_module_state *state, PyObject *self,
if (!with_alias_and_seq && (IS_ALIAS(code) || IS_NAMED_SEQ(code))) if (!with_alias_and_seq && (IS_ALIAS(code) || IS_NAMED_SEQ(code)))
return 0; return 0;
if (UCD_Check(self, state->ucd_type)) { if (UCD_Check(self)) {
/* in 3.2.0 there are no aliases and named sequences */ /* in 3.2.0 there are no aliases and named sequences */
const change_record *old; const change_record *old;
if (IS_ALIAS(code) || IS_NAMED_SEQ(code)) if (IS_ALIAS(code) || IS_NAMED_SEQ(code))
@ -1182,23 +1162,21 @@ _getucname(unicodedata_module_state *state, PyObject *self,
} }
static int static int
capi_getucname(void *state_raw, PyObject *self, Py_UCS4 code, capi_getucname(Py_UCS4 code,
char* buffer, int buflen, char* buffer, int buflen,
int with_alias_and_seq) int with_alias_and_seq)
{ {
unicodedata_module_state *state = (unicodedata_module_state *)state_raw; return _getucname(NULL, code, buffer, buflen, with_alias_and_seq);
return _getucname(state, self, code, buffer, buflen, with_alias_and_seq);
} }
static int static int
_cmpname(unicodedata_module_state *state, PyObject *self, _cmpname(PyObject *self, int code, const char* name, int namelen)
int code, const char* name, int namelen)
{ {
/* check if code corresponds to the given name */ /* check if code corresponds to the given name */
int i; int i;
char buffer[NAME_MAXLEN+1]; char buffer[NAME_MAXLEN+1];
if (!_getucname(state, self, code, buffer, NAME_MAXLEN, 1)) if (!_getucname(self, code, buffer, NAME_MAXLEN, 1))
return 0; return 0;
for (i = 0; i < namelen; i++) { for (i = 0; i < namelen; i++) {
if (Py_TOUPPER(name[i]) != buffer[i]) if (Py_TOUPPER(name[i]) != buffer[i])
@ -1243,7 +1221,7 @@ _check_alias_and_seq(unsigned int cp, Py_UCS4* code, int with_named_seq)
} }
static int static int
_getcode(unicodedata_module_state *state, PyObject* self, _getcode(PyObject* self,
const char* name, int namelen, Py_UCS4* code, int with_named_seq) const char* name, int namelen, Py_UCS4* code, int with_named_seq)
{ {
/* Return the code point associated with the given name. /* Return the code point associated with the given name.
@ -1305,7 +1283,7 @@ _getcode(unicodedata_module_state *state, PyObject* self,
v = code_hash[i]; v = code_hash[i];
if (!v) if (!v)
return 0; return 0;
if (_cmpname(state, self, v, name, namelen)) { if (_cmpname(self, v, name, namelen)) {
return _check_alias_and_seq(v, code, with_named_seq); return _check_alias_and_seq(v, code, with_named_seq);
} }
incr = (h ^ (h >> 3)) & mask; incr = (h ^ (h >> 3)) & mask;
@ -1316,7 +1294,7 @@ _getcode(unicodedata_module_state *state, PyObject* self,
v = code_hash[i]; v = code_hash[i];
if (!v) if (!v)
return 0; return 0;
if (_cmpname(state, self, v, name, namelen)) { if (_cmpname(self, v, name, namelen)) {
return _check_alias_and_seq(v, code, with_named_seq); return _check_alias_and_seq(v, code, with_named_seq);
} }
incr = incr << 1; incr = incr << 1;
@ -1326,15 +1304,20 @@ _getcode(unicodedata_module_state *state, PyObject* self,
} }
static int static int
capi_getcode(void *state_raw, PyObject* self, capi_getcode(const char* name, int namelen, Py_UCS4* code,
const char* name, int namelen, Py_UCS4* code,
int with_named_seq) int with_named_seq)
{ {
unicodedata_module_state *state = (unicodedata_module_state *)state_raw; return _getcode(NULL, name, namelen, code, with_named_seq);
return _getcode(state, self, name, namelen, code, with_named_seq);
} }
static const _PyUnicode_Name_CAPI unicodedata_capi =
{
.getname = capi_getucname,
.getcode = capi_getcode,
};
/* -------------------------------------------------------------------- */ /* -------------------------------------------------------------------- */
/* Python bindings */ /* Python bindings */
@ -1356,11 +1339,10 @@ static PyObject *
unicodedata_UCD_name_impl(PyObject *self, int chr, PyObject *default_value) unicodedata_UCD_name_impl(PyObject *self, int chr, PyObject *default_value)
/*[clinic end generated code: output=6bbb37a326407707 input=3e0367f534de56d9]*/ /*[clinic end generated code: output=6bbb37a326407707 input=3e0367f534de56d9]*/
{ {
unicodedata_module_state *state = &global_module_state;
char name[NAME_MAXLEN+1]; char name[NAME_MAXLEN+1];
Py_UCS4 c = (Py_UCS4)chr; Py_UCS4 c = (Py_UCS4)chr;
if (!_getucname(state, self, c, name, NAME_MAXLEN, 0)) { if (!_getucname(self, c, name, NAME_MAXLEN, 0)) {
if (default_value == NULL) { if (default_value == NULL) {
PyErr_SetString(PyExc_ValueError, "no such name"); PyErr_SetString(PyExc_ValueError, "no such name");
return NULL; return NULL;
@ -1392,7 +1374,6 @@ unicodedata_UCD_lookup_impl(PyObject *self, const char *name,
Py_ssize_clean_t name_length) Py_ssize_clean_t name_length)
/*[clinic end generated code: output=765cb8186788e6be input=a557be0f8607a0d6]*/ /*[clinic end generated code: output=765cb8186788e6be input=a557be0f8607a0d6]*/
{ {
unicodedata_module_state *state = &global_module_state;
Py_UCS4 code; Py_UCS4 code;
unsigned int index; unsigned int index;
if (name_length > NAME_MAXLEN) { if (name_length > NAME_MAXLEN) {
@ -1400,7 +1381,7 @@ unicodedata_UCD_lookup_impl(PyObject *self, const char *name,
return NULL; return NULL;
} }
if (!_getcode(state, self, name, (int)name_length, &code, 1)) { if (!_getcode(self, name, (int)name_length, &code, 1)) {
PyErr_Format(PyExc_KeyError, "undefined character name '%s'", name); PyErr_Format(PyExc_KeyError, "undefined character name '%s'", name);
return NULL; return NULL;
} }
@ -1415,8 +1396,10 @@ unicodedata_UCD_lookup_impl(PyObject *self, const char *name,
return PyUnicode_FromOrdinal(code); return PyUnicode_FromOrdinal(code);
} }
/* XXX Add doc strings. */ // List of functions used to define module functions *AND* unicodedata.UCD
// methods. For module functions, self is the module. For UCD methods, self
// is an UCD instance. The UCD_Check() macro is used to check if self is
// an UCD instance.
static PyMethodDef unicodedata_functions[] = { static PyMethodDef unicodedata_functions[] = {
UNICODEDATA_UCD_DECIMAL_METHODDEF UNICODEDATA_UCD_DECIMAL_METHODDEF
UNICODEDATA_UCD_DIGIT_METHODDEF UNICODEDATA_UCD_DIGIT_METHODDEF
@ -1501,41 +1484,64 @@ static struct PyModuleDef unicodedatamodule = {
NULL NULL
}; };
static int
unicodedata_exec(PyObject *module)
{
Py_SET_TYPE(&UCD_Type, &PyType_Type);
PyTypeObject *ucd_type = &UCD_Type;
if (PyModule_AddStringConstant(module, "unidata_version", UNIDATA_VERSION) < 0) {
return -1;
}
if (PyModule_AddType(module, ucd_type) < 0) {
return -1;
}
/* Previous versions */
PyObject *v;
v = new_previous_version(ucd_type, "3.2.0",
get_change_3_2_0, normalization_3_2_0);
if (v == NULL) {
return -1;
}
if (PyModule_AddObject(module, "ucd_3_2_0", v) < 0) {
Py_DECREF(v);
return -1;
}
/* Export C API */
v = PyCapsule_New((void *)&unicodedata_capi, PyUnicodeData_CAPSULE_NAME,
NULL);
if (v == NULL) {
return -1;
}
if (PyModule_AddObject(module, "ucnhash_CAPI", v) < 0) {
Py_DECREF(v);
return -1;
}
return 0;
}
PyMODINIT_FUNC PyMODINIT_FUNC
PyInit_unicodedata(void) PyInit_unicodedata(void)
{ {
PyObject *m, *v; PyObject *module = PyModule_Create(&unicodedatamodule);
unicodedata_module_state *state = &global_module_state; if (!module) {
state->capi.size = sizeof(_PyUnicode_Name_CAPI);
state->capi.state = state;
state->capi.getname = capi_getucname;
state->capi.getcode = capi_getcode;
Py_SET_TYPE(&UCD_Type, &PyType_Type);
state->ucd_type = &UCD_Type;
m = PyModule_Create(&unicodedatamodule);
if (!m)
return NULL; return NULL;
}
PyModule_AddStringConstant(m, "unidata_version", UNIDATA_VERSION); if (unicodedata_exec(module) < 0) {
Py_INCREF(state->ucd_type); Py_DECREF(module);
PyModule_AddObject(m, "UCD", (PyObject*)state->ucd_type); return NULL;
}
/* Previous versions */ return module;
v = new_previous_version(state, "3.2.0",
get_change_3_2_0, normalization_3_2_0);
if (v != NULL)
PyModule_AddObject(m, "ucd_3_2_0", v);
/* Export C API */
v = PyCapsule_New((void *)&state->capi, PyUnicodeData_CAPSULE_NAME, NULL);
if (v != NULL)
PyModule_AddObject(m, "ucnhash_CAPI", v);
return m;
} }
/* /*
Local variables: Local variables:
c-basic-offset: 4 c-basic-offset: 4

View File

@ -6523,8 +6523,7 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
s++; s++;
ch = 0xffffffff; /* in case 'getcode' messes up */ ch = 0xffffffff; /* in case 'getcode' messes up */
if (namelen <= INT_MAX && if (namelen <= INT_MAX &&
ucnhash_capi->getcode(ucnhash_capi->state, NULL, ucnhash_capi->getcode(start, (int)namelen,
start, (int)namelen,
&ch, 0)) { &ch, 0)) {
assert(ch <= MAX_UNICODE); assert(ch <= MAX_UNICODE);
WRITE_CHAR(ch); WRITE_CHAR(ch);

View File

@ -987,8 +987,7 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
for (i = start, ressize = 0; i < end; ++i) { for (i = start, ressize = 0; i < end; ++i) {
/* object is guaranteed to be "ready" */ /* object is guaranteed to be "ready" */
c = PyUnicode_READ_CHAR(object, i); c = PyUnicode_READ_CHAR(object, i);
if (ucnhash_capi->getname(ucnhash_capi->state, NULL, if (ucnhash_capi->getname(c, buffer, sizeof(buffer), 1)) {
c, buffer, sizeof(buffer), 1)) {
replsize = 1+1+1+(int)strlen(buffer)+1; replsize = 1+1+1+(int)strlen(buffer)+1;
} }
else if (c >= 0x10000) { else if (c >= 0x10000) {
@ -1011,8 +1010,7 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
i < end; ++i) { i < end; ++i) {
c = PyUnicode_READ_CHAR(object, i); c = PyUnicode_READ_CHAR(object, i);
*outp++ = '\\'; *outp++ = '\\';
if (ucnhash_capi->getname(ucnhash_capi->state, NULL, if (ucnhash_capi->getname(c, buffer, sizeof(buffer), 1)) {
c, buffer, sizeof(buffer), 1)) {
*outp++ = 'N'; *outp++ = 'N';
*outp++ = '{'; *outp++ = '{';
strcpy((char *)outp, buffer); strcpy((char *)outp, buffer);