Issue 4474: On platforms with sizeof(wchar_t) == 4 and
sizeof(Py_UNICODE) == 2, PyUnicode_FromWideChar now converts each character outside the BMP to the appropriate surrogate pair. Thanks Victor Stinner for the patch. (backport of r70452 from py3k to trunk)
This commit is contained in:
parent
eb15863a97
commit
6b265f1bf8
|
@ -12,6 +12,10 @@ What's New in Python 2.7 alpha 1
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Issue #4474: PyUnicode_FromWideChar now converts characters outside
|
||||
the BMP to surrogate pairs, on systems with sizeof(wchar_t) == 4
|
||||
and sizeof(Py_UNICODE) == 2.
|
||||
|
||||
- Issue #5237: Allow auto-numbered fields in str.format(). For
|
||||
example: '{} {}'.format(1, 2) == '1 2'.
|
||||
|
||||
|
|
|
@ -620,6 +620,48 @@ test_u_code(PyObject *self)
|
|||
return Py_None;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
test_widechar(PyObject *self)
|
||||
{
|
||||
#if defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4)
|
||||
const wchar_t wtext[2] = {(wchar_t)0x10ABCDu};
|
||||
size_t wtextlen = 1;
|
||||
#else
|
||||
const wchar_t wtext[3] = {(wchar_t)0xDBEAu, (wchar_t)0xDFCDu};
|
||||
size_t wtextlen = 2;
|
||||
#endif
|
||||
PyObject *wide, *utf8;
|
||||
|
||||
wide = PyUnicode_FromWideChar(wtext, wtextlen);
|
||||
if (wide == NULL)
|
||||
return NULL;
|
||||
|
||||
utf8 = PyUnicode_FromString("\xf4\x8a\xaf\x8d");
|
||||
if (utf8 == NULL) {
|
||||
Py_DECREF(wide);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (PyUnicode_GET_SIZE(wide) != PyUnicode_GET_SIZE(utf8)) {
|
||||
Py_DECREF(wide);
|
||||
Py_DECREF(utf8);
|
||||
return raiseTestError("test_widechar",
|
||||
"wide string and utf8 string have different length");
|
||||
}
|
||||
if (PyUnicode_Compare(wide, utf8)) {
|
||||
Py_DECREF(wide);
|
||||
Py_DECREF(utf8);
|
||||
if (PyErr_Occurred())
|
||||
return NULL;
|
||||
return raiseTestError("test_widechar",
|
||||
"wide string and utf8 string are differents");
|
||||
}
|
||||
|
||||
Py_DECREF(wide);
|
||||
Py_DECREF(utf8);
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
test_empty_argparse(PyObject *self)
|
||||
{
|
||||
|
@ -975,6 +1017,7 @@ static PyMethodDef TestMethods[] = {
|
|||
#endif
|
||||
#ifdef Py_USING_UNICODE
|
||||
{"test_u_code", (PyCFunction)test_u_code, METH_NOARGS},
|
||||
{"test_widechar", (PyCFunction)test_widechar, METH_NOARGS},
|
||||
#endif
|
||||
#ifdef WITH_THREAD
|
||||
{"_test_thread_state", test_thread_state, METH_VARARGS},
|
||||
|
|
|
@ -529,6 +529,60 @@ PyObject *PyUnicode_FromString(const char *u)
|
|||
|
||||
#ifdef HAVE_WCHAR_H
|
||||
|
||||
#if (Py_UNICODE_SIZE == 2) && defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4)
|
||||
# define CONVERT_WCHAR_TO_SURROGATES
|
||||
#endif
|
||||
|
||||
#ifdef CONVERT_WCHAR_TO_SURROGATES
|
||||
|
||||
/* Here sizeof(wchar_t) is 4 but Py_UNICODE_SIZE == 2, so we need
|
||||
to convert from UTF32 to UTF16. */
|
||||
|
||||
PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
|
||||
Py_ssize_t size)
|
||||
{
|
||||
PyUnicodeObject *unicode;
|
||||
register Py_ssize_t i;
|
||||
Py_ssize_t alloc;
|
||||
const wchar_t *orig_w;
|
||||
|
||||
if (w == NULL) {
|
||||
PyErr_BadInternalCall();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
alloc = size;
|
||||
orig_w = w;
|
||||
for (i = size; i > 0; i--) {
|
||||
if (*w > 0xFFFF)
|
||||
alloc++;
|
||||
w++;
|
||||
}
|
||||
w = orig_w;
|
||||
unicode = _PyUnicode_New(alloc);
|
||||
if (!unicode)
|
||||
return NULL;
|
||||
|
||||
/* Copy the wchar_t data into the new object */
|
||||
{
|
||||
register Py_UNICODE *u;
|
||||
u = PyUnicode_AS_UNICODE(unicode);
|
||||
for (i = size; i > 0; i--) {
|
||||
if (*w > 0xFFFF) {
|
||||
wchar_t ordinal = *w++;
|
||||
ordinal -= 0x10000;
|
||||
*u++ = 0xD800 | (ordinal >> 10);
|
||||
*u++ = 0xDC00 | (ordinal & 0x3FF);
|
||||
}
|
||||
else
|
||||
*u++ = *w++;
|
||||
}
|
||||
}
|
||||
return (PyObject *)unicode;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
|
||||
Py_ssize_t size)
|
||||
{
|
||||
|
@ -559,6 +613,10 @@ PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
|
|||
return (PyObject *)unicode;
|
||||
}
|
||||
|
||||
#endif /* CONVERT_WCHAR_TO_SURROGATES */
|
||||
|
||||
#undef CONVERT_WCHAR_TO_SURROGATES
|
||||
|
||||
static void
|
||||
makefmt(char *fmt, int longflag, int size_tflag, int zeropad, int width, int precision, char c)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue