Issue #3280: like chr() already does, the "%c" format now accepts the full unicode range
even on "narrow Unicode" builds; the result is a pair of UTF-16 surrogates.
This commit is contained in:
parent
142957ce95
commit
a4db68622c
|
@ -717,7 +717,10 @@ class UnicodeTest(
|
||||||
self.assertEqual("%(x)s, %(\xfc)s" % {'x':"abc", '\xfc':"def"}, 'abc, def')
|
self.assertEqual("%(x)s, %(\xfc)s" % {'x':"abc", '\xfc':"def"}, 'abc, def')
|
||||||
|
|
||||||
self.assertEqual('%c' % 0x1234, '\u1234')
|
self.assertEqual('%c' % 0x1234, '\u1234')
|
||||||
self.assertRaises(OverflowError, "%c".__mod__, (sys.maxunicode+1,))
|
self.assertEqual('%c' % 0x21483, '\U00021483')
|
||||||
|
self.assertRaises(OverflowError, "%c".__mod__, (0x110000,))
|
||||||
|
self.assertEqual('%c' % '\U00021483', '\U00021483')
|
||||||
|
self.assertRaises(TypeError, "%c".__mod__, "aa")
|
||||||
|
|
||||||
# formatting jobs delegated from the string implementation:
|
# formatting jobs delegated from the string implementation:
|
||||||
self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
|
self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
|
||||||
|
|
|
@ -12,6 +12,11 @@ What's new in Python 3.0b2?
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #3280: like chr(), the "%c" format now accepts unicode code points
|
||||||
|
beyond the Basic Multilingual Plane (above 0xffff) on all configurations. On
|
||||||
|
"narrow Unicode" builds, the result is a string of 2 code units, forming a
|
||||||
|
UTF-16 surrogate pair.
|
||||||
|
|
||||||
- Issue #3282: str.isprintable() should return False for undefined
|
- Issue #3282: str.isprintable() should return False for undefined
|
||||||
Unicode characters.
|
Unicode characters.
|
||||||
|
|
||||||
|
|
|
@ -8730,11 +8730,28 @@ formatchar(Py_UNICODE *buf,
|
||||||
size_t buflen,
|
size_t buflen,
|
||||||
PyObject *v)
|
PyObject *v)
|
||||||
{
|
{
|
||||||
/* presume that the buffer is at least 2 characters long */
|
/* presume that the buffer is at least 3 characters long */
|
||||||
if (PyUnicode_Check(v)) {
|
if (PyUnicode_Check(v)) {
|
||||||
if (PyUnicode_GET_SIZE(v) != 1)
|
if (PyUnicode_GET_SIZE(v) == 1) {
|
||||||
goto onError;
|
buf[0] = PyUnicode_AS_UNICODE(v)[0];
|
||||||
buf[0] = PyUnicode_AS_UNICODE(v)[0];
|
buf[1] = '\0';
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
#ifndef Py_UNICODE_WIDE
|
||||||
|
if (PyUnicode_GET_SIZE(v) == 2) {
|
||||||
|
/* Decode a valid surrogate pair */
|
||||||
|
int c0 = PyUnicode_AS_UNICODE(v)[0];
|
||||||
|
int c1 = PyUnicode_AS_UNICODE(v)[1];
|
||||||
|
if (0xD800 <= c0 && c0 <= 0xDBFF &&
|
||||||
|
0xDC00 <= c1 && c1 <= 0xDFFF) {
|
||||||
|
buf[0] = c0;
|
||||||
|
buf[1] = c1;
|
||||||
|
buf[2] = '\0';
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
goto onError;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* Integer input truncated to a character */
|
/* Integer input truncated to a character */
|
||||||
|
@ -8742,25 +8759,25 @@ formatchar(Py_UNICODE *buf,
|
||||||
x = PyLong_AsLong(v);
|
x = PyLong_AsLong(v);
|
||||||
if (x == -1 && PyErr_Occurred())
|
if (x == -1 && PyErr_Occurred())
|
||||||
goto onError;
|
goto onError;
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
if (x < 0 || x > 0x10ffff) {
|
if (x < 0 || x > 0x10ffff) {
|
||||||
PyErr_SetString(PyExc_OverflowError,
|
PyErr_SetString(PyExc_OverflowError,
|
||||||
"%c arg not in range(0x110000) "
|
"%c arg not in range(0x110000)");
|
||||||
"(wide Python build)");
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
if (x < 0 || x > 0xffff) {
|
#ifndef Py_UNICODE_WIDE
|
||||||
PyErr_SetString(PyExc_OverflowError,
|
if (x > 0xffff) {
|
||||||
"%c arg not in range(0x10000) "
|
x -= 0x10000;
|
||||||
"(narrow Python build)");
|
buf[0] = (Py_UNICODE)(0xD800 | (x >> 10));
|
||||||
return -1;
|
buf[1] = (Py_UNICODE)(0xDC00 | (x & 0x3FF));
|
||||||
|
return 2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
buf[0] = (Py_UNICODE) x;
|
buf[0] = (Py_UNICODE) x;
|
||||||
|
buf[1] = '\0';
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
buf[1] = '\0';
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
onError:
|
onError:
|
||||||
PyErr_SetString(PyExc_TypeError,
|
PyErr_SetString(PyExc_TypeError,
|
||||||
|
|
|
@ -294,21 +294,12 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
|
||||||
case 'C':
|
case 'C':
|
||||||
{
|
{
|
||||||
int i = va_arg(*p_va, int);
|
int i = va_arg(*p_va, int);
|
||||||
Py_UNICODE c;
|
|
||||||
if (i < 0 || i > PyUnicode_GetMax()) {
|
if (i < 0 || i > PyUnicode_GetMax()) {
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
PyErr_SetString(PyExc_OverflowError,
|
PyErr_SetString(PyExc_OverflowError,
|
||||||
"%c arg not in range(0x110000) "
|
"%c arg not in range(0x110000)";
|
||||||
"(wide Python build)");
|
|
||||||
#else
|
|
||||||
PyErr_SetString(PyExc_OverflowError,
|
|
||||||
"%c arg not in range(0x10000) "
|
|
||||||
"(narrow Python build)");
|
|
||||||
#endif
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
c = i;
|
return PyUnicode_FromOrdinal(i);
|
||||||
return PyUnicode_FromUnicode(&c, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
case 's':
|
case 's':
|
||||||
|
|
Loading…
Reference in New Issue