mirror of https://github.com/python/cpython
Issue #10156: In the interpreter's initialization phase, unicode globals
are now initialized dynamically as needed.
This commit is contained in:
parent
5bb893c44b
commit
059972535f
|
@ -10,6 +10,9 @@ What's New in Python 3.2.4
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Issue #10156: In the interpreter's initialization phase, unicode globals
|
||||
are now initialized dynamically as needed.
|
||||
|
||||
- Issue #16975: Fix error handling bug in the escape-decode bytes decoder.
|
||||
|
||||
- Issue #14850: Now a charmap decoder treats U+FFFE as "undefined mapping"
|
||||
|
|
|
@ -80,8 +80,9 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|||
|
||||
/* --- Globals ------------------------------------------------------------
|
||||
|
||||
The globals are initialized by the _PyUnicode_Init() API and should
|
||||
not be used before calling that API.
|
||||
NOTE: In the interpreter's initialization phase, some globals are currently
|
||||
initialized dynamically as needed. In the process Unicode objects may
|
||||
be created before the Unicode type is ready.
|
||||
|
||||
*/
|
||||
|
||||
|
@ -98,18 +99,30 @@ extern "C" {
|
|||
Another way to look at this is that to say that the actual reference
|
||||
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
|
||||
*/
|
||||
static PyObject *interned;
|
||||
static PyObject *interned = NULL;
|
||||
|
||||
/* Free list for Unicode objects */
|
||||
static PyUnicodeObject *free_list;
|
||||
static int numfree;
|
||||
static PyUnicodeObject *free_list = NULL;
|
||||
static int numfree = 0;
|
||||
|
||||
/* The empty Unicode object is shared to improve performance. */
|
||||
static PyUnicodeObject *unicode_empty;
|
||||
static PyUnicodeObject *unicode_empty = NULL;
|
||||
|
||||
#define _Py_RETURN_UNICODE_EMPTY() \
|
||||
do { \
|
||||
if (unicode_empty != NULL) \
|
||||
Py_INCREF(unicode_empty); \
|
||||
else { \
|
||||
unicode_empty = _PyUnicode_New(0); \
|
||||
if (unicode_empty != NULL) \
|
||||
Py_INCREF(unicode_empty); \
|
||||
} \
|
||||
return (PyObject *)unicode_empty; \
|
||||
} while (0)
|
||||
|
||||
/* Single character Unicode strings in the Latin-1 range are being
|
||||
shared as well. */
|
||||
static PyUnicodeObject *unicode_latin1[256];
|
||||
static PyUnicodeObject *unicode_latin1[256] = {NULL};
|
||||
|
||||
/* Fast detection of the most frequent whitespace characters */
|
||||
const unsigned char _Py_ascii_whitespace[] = {
|
||||
|
@ -214,7 +227,7 @@ PyUnicode_GetMax(void)
|
|||
|
||||
#define BLOOM_MASK unsigned long
|
||||
|
||||
static BLOOM_MASK bloom_linebreak;
|
||||
static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0;
|
||||
|
||||
#define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
|
||||
#define BLOOM(mask, ch) ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
|
||||
|
@ -479,10 +492,8 @@ PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u,
|
|||
if (u != NULL) {
|
||||
|
||||
/* Optimization for empty strings */
|
||||
if (size == 0 && unicode_empty != NULL) {
|
||||
Py_INCREF(unicode_empty);
|
||||
return (PyObject *)unicode_empty;
|
||||
}
|
||||
if (size == 0)
|
||||
_Py_RETURN_UNICODE_EMPTY();
|
||||
|
||||
/* Single character Unicode objects in the Latin-1 range are
|
||||
shared when using this constructor */
|
||||
|
@ -528,10 +539,8 @@ PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
|
|||
if (u != NULL) {
|
||||
|
||||
/* Optimization for empty strings */
|
||||
if (size == 0 && unicode_empty != NULL) {
|
||||
Py_INCREF(unicode_empty);
|
||||
return (PyObject *)unicode_empty;
|
||||
}
|
||||
if (size == 0)
|
||||
_Py_RETURN_UNICODE_EMPTY();
|
||||
|
||||
/* Single characters are shared when using this constructor.
|
||||
Restrict to ASCII, since the input must be UTF-8. */
|
||||
|
@ -1393,15 +1402,11 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
|
|||
|
||||
/* Decoding bytes objects is the most common case and should be fast */
|
||||
if (PyBytes_Check(obj)) {
|
||||
if (PyBytes_GET_SIZE(obj) == 0) {
|
||||
Py_INCREF(unicode_empty);
|
||||
v = (PyObject *) unicode_empty;
|
||||
}
|
||||
else {
|
||||
if (PyBytes_GET_SIZE(obj) == 0)
|
||||
_Py_RETURN_UNICODE_EMPTY();
|
||||
v = PyUnicode_Decode(
|
||||
PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
|
||||
encoding, errors);
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
|
@ -1421,12 +1426,11 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
|
|||
}
|
||||
|
||||
if (buffer.len == 0) {
|
||||
Py_INCREF(unicode_empty);
|
||||
v = (PyObject *) unicode_empty;
|
||||
PyBuffer_Release(&buffer);
|
||||
_Py_RETURN_UNICODE_EMPTY();
|
||||
}
|
||||
else
|
||||
v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors);
|
||||
|
||||
v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors);
|
||||
PyBuffer_Release(&buffer);
|
||||
return v;
|
||||
}
|
||||
|
@ -8323,10 +8327,8 @@ unicode_repeat(PyUnicodeObject *str, Py_ssize_t len)
|
|||
Py_ssize_t nchars;
|
||||
size_t nbytes;
|
||||
|
||||
if (len < 1) {
|
||||
Py_INCREF(unicode_empty);
|
||||
return (PyObject *)unicode_empty;
|
||||
}
|
||||
if (len < 1)
|
||||
_Py_RETURN_UNICODE_EMPTY();
|
||||
|
||||
if (len == 1 && PyUnicode_CheckExact(str)) {
|
||||
/* no repeat, return original string */
|
||||
|
@ -10056,8 +10058,6 @@ PyTypeObject PyUnicode_Type = {
|
|||
|
||||
void _PyUnicode_Init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* XXX - move this array to unicodectype.c ? */
|
||||
Py_UNICODE linebreak[] = {
|
||||
0x000A, /* LINE FEED */
|
||||
|
@ -10071,14 +10071,12 @@ void _PyUnicode_Init(void)
|
|||
};
|
||||
|
||||
/* Init the implementation */
|
||||
free_list = NULL;
|
||||
numfree = 0;
|
||||
if (!unicode_empty) {
|
||||
unicode_empty = _PyUnicode_New(0);
|
||||
if (!unicode_empty)
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
unicode_latin1[i] = NULL;
|
||||
if (PyType_Ready(&PyUnicode_Type) < 0)
|
||||
Py_FatalError("Can't initialize 'unicode'");
|
||||
|
||||
|
@ -10123,15 +10121,11 @@ _PyUnicode_Fini(void)
|
|||
{
|
||||
int i;
|
||||
|
||||
Py_XDECREF(unicode_empty);
|
||||
unicode_empty = NULL;
|
||||
Py_CLEAR(unicode_empty);
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
Py_CLEAR(unicode_latin1[i]);
|
||||
|
||||
for (i = 0; i < 256; i++) {
|
||||
if (unicode_latin1[i]) {
|
||||
Py_DECREF(unicode_latin1[i]);
|
||||
unicode_latin1[i] = NULL;
|
||||
}
|
||||
}
|
||||
(void)PyUnicode_ClearFreeList();
|
||||
}
|
||||
|
||||
|
@ -10250,8 +10244,7 @@ void _Py_ReleaseInternedUnicodeStrings(void)
|
|||
"mortal/immortal\n", mortal_size, immortal_size);
|
||||
Py_DECREF(keys);
|
||||
PyDict_Clear(interned);
|
||||
Py_DECREF(interned);
|
||||
interned = NULL;
|
||||
Py_CLEAR(interned);
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue