Issue #10156: In the interpreter's initialization phase, unicode globals

are now initialized dynamically as needed.
This commit is contained in:
Serhiy Storchaka 2013-01-26 12:13:40 +02:00
parent 6fef14d7f3
commit c59c85c1ac
2 changed files with 41 additions and 41 deletions

View File

@ -9,6 +9,9 @@ What's New in Python 2.7.4
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #10156: In the interpreter's initialization phase, unicode globals
are now initialized dynamically as needed.
- Issue #16975: Fix error handling bug in the escape-decode decoder. - Issue #16975: Fix error handling bug in the escape-decode decoder.
- Issue #14850: Now a charmap decoder treats U+FFFE as "undefined mapping" - Issue #14850: Now a charmap decoder treats U+FFFE as "undefined mapping"

View File

@ -82,8 +82,9 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
/* --- Globals ------------------------------------------------------------ /* --- Globals ------------------------------------------------------------
The globals are initialized by the _PyUnicode_Init() API and should NOTE: In the interpreter's initialization phase, some globals are currently
not be used before calling that API. initialized dynamically as needed. In the process Unicode objects may
be created before the Unicode type is ready.
*/ */
@ -93,15 +94,27 @@ extern "C" {
#endif #endif
/* Free list for Unicode objects */ /* Free list for Unicode objects */
static PyUnicodeObject *free_list; static PyUnicodeObject *free_list = NULL;
static int numfree; static int numfree = 0;
/* The empty Unicode object is shared to improve performance. */ /* The empty Unicode object is shared to improve performance. */
static PyUnicodeObject *unicode_empty; static PyUnicodeObject *unicode_empty = NULL;
#define _Py_RETURN_UNICODE_EMPTY() \
do { \
if (unicode_empty != NULL) \
Py_INCREF(unicode_empty); \
else { \
unicode_empty = _PyUnicode_New(0); \
if (unicode_empty != NULL) \
Py_INCREF(unicode_empty); \
} \
return (PyObject *)unicode_empty; \
} while (0)
/* Single character Unicode strings in the Latin-1 range are being /* Single character Unicode strings in the Latin-1 range are being
shared as well. */ shared as well. */
static PyUnicodeObject *unicode_latin1[256]; static PyUnicodeObject *unicode_latin1[256] = {NULL};
/* Default encoding to use and assume when NULL is passed as encoding /* Default encoding to use and assume when NULL is passed as encoding
parameter; it is initialized by _PyUnicode_Init(). parameter; it is initialized by _PyUnicode_Init().
@ -110,7 +123,7 @@ static PyUnicodeObject *unicode_latin1[256];
PyUnicode_GetDefaultEncoding() APIs to access this global. PyUnicode_GetDefaultEncoding() APIs to access this global.
*/ */
static char unicode_default_encoding[100]; static char unicode_default_encoding[100 + 1] = "ascii";
/* Fast detection of the most frequent whitespace characters */ /* Fast detection of the most frequent whitespace characters */
const unsigned char _Py_ascii_whitespace[] = { const unsigned char _Py_ascii_whitespace[] = {
@ -204,7 +217,7 @@ PyUnicode_GetMax(void)
#define BLOOM_MASK unsigned long #define BLOOM_MASK unsigned long
static BLOOM_MASK bloom_linebreak; static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0;
#define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1))))) #define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
#define BLOOM(mask, ch) ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1))))) #define BLOOM(mask, ch) ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
@ -448,10 +461,8 @@ PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u,
if (u != NULL) { if (u != NULL) {
/* Optimization for empty strings */ /* Optimization for empty strings */
if (size == 0 && unicode_empty != NULL) { if (size == 0)
Py_INCREF(unicode_empty); _Py_RETURN_UNICODE_EMPTY();
return (PyObject *)unicode_empty;
}
/* Single character Unicode objects in the Latin-1 range are /* Single character Unicode objects in the Latin-1 range are
shared when using this constructor */ shared when using this constructor */
@ -497,10 +508,8 @@ PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
if (u != NULL) { if (u != NULL) {
/* Optimization for empty strings */ /* Optimization for empty strings */
if (size == 0 && unicode_empty != NULL) { if (size == 0)
Py_INCREF(unicode_empty); _Py_RETURN_UNICODE_EMPTY();
return (PyObject *)unicode_empty;
}
/* Single characters are shared when using this constructor. /* Single characters are shared when using this constructor.
Restrict to ASCII, since the input must be UTF-8. */ Restrict to ASCII, since the input must be UTF-8. */
@ -1162,13 +1171,10 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
} }
/* Convert to Unicode */ /* Convert to Unicode */
if (len == 0) { if (len == 0)
Py_INCREF(unicode_empty); _Py_RETURN_UNICODE_EMPTY();
v = (PyObject *)unicode_empty;
}
else
v = PyUnicode_Decode(s, len, encoding, errors);
v = PyUnicode_Decode(s, len, encoding, errors);
return v; return v;
onError: onError:
@ -1381,7 +1387,7 @@ int PyUnicode_SetDefaultEncoding(const char *encoding)
Py_DECREF(v); Py_DECREF(v);
strncpy(unicode_default_encoding, strncpy(unicode_default_encoding,
encoding, encoding,
sizeof(unicode_default_encoding)); sizeof(unicode_default_encoding) - 1);
return 0; return 0;
onError: onError:
@ -8850,8 +8856,6 @@ PyTypeObject PyUnicode_Type = {
void _PyUnicode_Init(void) void _PyUnicode_Init(void)
{ {
int i;
/* XXX - move this array to unicodectype.c ? */ /* XXX - move this array to unicodectype.c ? */
Py_UNICODE linebreak[] = { Py_UNICODE linebreak[] = {
0x000A, /* LINE FEED */ 0x000A, /* LINE FEED */
@ -8865,15 +8869,12 @@ void _PyUnicode_Init(void)
}; };
/* Init the implementation */ /* Init the implementation */
free_list = NULL; if (!unicode_empty) {
numfree = 0; unicode_empty = _PyUnicode_New(0);
unicode_empty = _PyUnicode_New(0); if (!unicode_empty)
if (!unicode_empty) return;
return; }
strcpy(unicode_default_encoding, "ascii");
for (i = 0; i < 256; i++)
unicode_latin1[i] = NULL;
if (PyType_Ready(&PyUnicode_Type) < 0) if (PyType_Ready(&PyUnicode_Type) < 0)
Py_FatalError("Can't initialize 'unicode'"); Py_FatalError("Can't initialize 'unicode'");
@ -8918,15 +8919,11 @@ _PyUnicode_Fini(void)
{ {
int i; int i;
Py_XDECREF(unicode_empty); Py_CLEAR(unicode_empty);
unicode_empty = NULL;
for (i = 0; i < 256; i++)
Py_CLEAR(unicode_latin1[i]);
for (i = 0; i < 256; i++) {
if (unicode_latin1[i]) {
Py_DECREF(unicode_latin1[i]);
unicode_latin1[i] = NULL;
}
}
(void)PyUnicode_ClearFreeList(); (void)PyUnicode_ClearFreeList();
} }