This commit is contained in:
Charles-François Natali 2011-09-29 19:51:46 +02:00
commit 734e159b12
2 changed files with 32 additions and 40 deletions

View File

@ -377,16 +377,6 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \
_PyUnicode_NONCOMPACT_DATA(op))
#define _PyUnicode_UTF8(op) \
(PyUnicode_IS_COMPACT_ASCII(op) ? \
((char*)((PyASCIIObject*)(op) + 1)) : \
((PyCompactUnicodeObject*)(op))->utf8)
#define _PyUnicode_UTF8_LENGTH(op) \
(PyUnicode_IS_COMPACT_ASCII(op) ? \
((PyASCIIObject*)(op))->length : \
((PyCompactUnicodeObject*)(op))->utf8_length)
/* Compute (index * char_size) where char_size is 2 ** (kind - 1).
The index is a character index, the result is a size in bytes. */
@ -466,7 +456,7 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
#define PyUnicode_READY(op) \
(assert(PyUnicode_Check(op)), \
(PyUnicode_IS_READY(op) ? \
0 : _PyUnicode_Ready((PyUnicodeObject *)(op))))
0 : _PyUnicode_Ready((PyObject *)(op))))
/* Return a maximum character value which is suitable for creating another
string based on op. This is always an approximation but more efficient
@ -507,14 +497,16 @@ PyAPI_FUNC(PyObject*) PyUnicode_New(
);
#endif
/* Initializes the canonical string representation from a the deprected
wstr/Py_UNICODE representation. This function is used to convert
unicode objects which were created using the old API to the new flexible
format introduced with PEP 393. The PyUnicode_READY() macro can be
more efficient if the string is already ready. */
/* Initializes the canonical string representation from a the deprecated
wstr/Py_UNICODE representation. This function is used to convert Unicode
objects which were created using the old API to the new flexible format
introduced with PEP 393.
Don't call this function directly, use the public PyUnicode_READY() macro
instead. */
#ifndef Py_LIMITED_API
PyAPI_FUNC(int) _PyUnicode_Ready(
PyUnicodeObject *unicode /* Unicode object */
PyObject *unicode /* Unicode object */
);
#endif

View File

@ -105,6 +105,14 @@ extern "C" {
} \
} while (0)
#define _PyUnicode_UTF8(op) \
(PyUnicode_IS_COMPACT_ASCII(op) ? \
((char*)((PyASCIIObject*)(op) + 1)) : \
((PyCompactUnicodeObject*)(op))->utf8)
#define _PyUnicode_UTF8_LENGTH(op) \
(PyUnicode_IS_COMPACT_ASCII(op) ? \
((PyASCIIObject*)(op))->length : \
((PyCompactUnicodeObject*)(op))->utf8_length)
#define _PyUnicode_WSTR(op) (((PyASCIIObject*)(op))->wstr)
#define _PyUnicode_WSTR_LENGTH(op) (((PyCompactUnicodeObject*)(op))->wstr_length)
#define _PyUnicode_LENGTH(op) (((PyASCIIObject *)(op))->length)
@ -773,8 +781,9 @@ int unicode_ready_calls = 0;
#endif
int
_PyUnicode_Ready(PyUnicodeObject *unicode)
_PyUnicode_Ready(PyObject *obj)
{
PyUnicodeObject *unicode = (PyUnicodeObject *)obj;
wchar_t *end;
Py_UCS4 maxchar = 0;
Py_ssize_t num_surrogates;
@ -782,25 +791,19 @@ _PyUnicode_Ready(PyUnicodeObject *unicode)
Py_ssize_t length_wo_surrogates;
#endif
assert(PyUnicode_Check(unicode));
if (unicode->data.any != NULL) {
assert(PyUnicode_KIND(unicode) != PyUnicode_WCHAR_KIND);
return 0;
}
/* _PyUnicode_Ready() is only intented for old-style API usage where
* strings were created using _PyObject_New() and where no canonical
* representation (the str field) has been set yet aka strings
* which are not yet ready.
*/
strings were created using _PyObject_New() and where no canonical
representation (the str field) has been set yet aka strings
which are not yet ready. */
assert(PyUnicode_Check(obj));
assert(!PyUnicode_IS_READY(obj));
assert(!PyUnicode_IS_COMPACT(obj));
assert(_PyUnicode_KIND(obj) == PyUnicode_WCHAR_KIND);
assert(_PyUnicode_WSTR(unicode) != NULL);
assert(_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND);
assert(!PyUnicode_IS_COMPACT(unicode));
assert(!PyUnicode_IS_READY(unicode));
/* Actually, it should neither be interned nor be anything else: */
assert(_PyUnicode_STATE(unicode).interned == 0);
assert(unicode->data.any == NULL);
assert(unicode->_base.utf8 == NULL);
/* Actually, it should neither be interned nor be anything else: */
assert(_PyUnicode_STATE(unicode).interned == SSTATE_NOT_INTERNED);
#ifdef Py_DEBUG
++unicode_ready_calls;
@ -808,11 +811,8 @@ _PyUnicode_Ready(PyUnicodeObject *unicode)
end = _PyUnicode_WSTR(unicode) + _PyUnicode_WSTR_LENGTH(unicode);
if (find_maxchar_surrogates(_PyUnicode_WSTR(unicode), end,
&maxchar,
&num_surrogates) == -1) {
assert(0 && "PyUnicode_FindMaxCharAndNumSurrogatePairs failed");
&maxchar, &num_surrogates) == -1)
return -1;
}
if (maxchar < 256) {
unicode->data.any = PyObject_MALLOC(_PyUnicode_WSTR_LENGTH(unicode) + 1);
@ -1038,8 +1038,8 @@ PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size)
/* If not empty and not single character, copy the Unicode data
into the new object */
if (find_maxchar_surrogates(u, u + size, &maxchar,
&num_surrogates) == -1)
if (find_maxchar_surrogates(u, u + size,
&maxchar, &num_surrogates) == -1)
return NULL;
unicode = (PyUnicodeObject *) PyUnicode_New(size - num_surrogates,