Issue #3280: like chr() already does, the "%c" format now accepts the full unicode range

even on "narrow Unicode" builds; the result is a pair of UTF-16 surrogates.
This commit is contained in:
Amaury Forgeot d'Arc 2008-07-04 21:26:43 +00:00
parent 142957ce95
commit a4db68622c
4 changed files with 43 additions and 27 deletions

View File

@ -717,7 +717,10 @@ class UnicodeTest(
self.assertEqual("%(x)s, %(\xfc)s" % {'x':"abc", '\xfc':"def"}, 'abc, def') self.assertEqual("%(x)s, %(\xfc)s" % {'x':"abc", '\xfc':"def"}, 'abc, def')
self.assertEqual('%c' % 0x1234, '\u1234') self.assertEqual('%c' % 0x1234, '\u1234')
self.assertRaises(OverflowError, "%c".__mod__, (sys.maxunicode+1,)) self.assertEqual('%c' % 0x21483, '\U00021483')
self.assertRaises(OverflowError, "%c".__mod__, (0x110000,))
self.assertEqual('%c' % '\U00021483', '\U00021483')
self.assertRaises(TypeError, "%c".__mod__, "aa")
# formatting jobs delegated from the string implementation: # formatting jobs delegated from the string implementation:
self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...') self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')

View File

@ -12,6 +12,11 @@ What's new in Python 3.0b2?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #3280: like chr(), the "%c" format now accepts unicode code points
beyond the Basic Multilingual Plane (above 0xffff) on all configurations. On
"narrow Unicode" builds, the result is a string of 2 code units, forming a
UTF-16 surrogate pair.
- Issue #3282: str.isprintable() should return False for undefined - Issue #3282: str.isprintable() should return False for undefined
Unicode characters. Unicode characters.

View File

@ -8730,11 +8730,28 @@ formatchar(Py_UNICODE *buf,
size_t buflen, size_t buflen,
PyObject *v) PyObject *v)
{ {
/* presume that the buffer is at least 2 characters long */ /* presume that the buffer is at least 3 characters long */
if (PyUnicode_Check(v)) { if (PyUnicode_Check(v)) {
if (PyUnicode_GET_SIZE(v) != 1) if (PyUnicode_GET_SIZE(v) == 1) {
goto onError; buf[0] = PyUnicode_AS_UNICODE(v)[0];
buf[0] = PyUnicode_AS_UNICODE(v)[0]; buf[1] = '\0';
return 1;
}
#ifndef Py_UNICODE_WIDE
if (PyUnicode_GET_SIZE(v) == 2) {
/* Decode a valid surrogate pair */
int c0 = PyUnicode_AS_UNICODE(v)[0];
int c1 = PyUnicode_AS_UNICODE(v)[1];
if (0xD800 <= c0 && c0 <= 0xDBFF &&
0xDC00 <= c1 && c1 <= 0xDFFF) {
buf[0] = c0;
buf[1] = c1;
buf[2] = '\0';
return 2;
}
}
#endif
goto onError;
} }
else { else {
/* Integer input truncated to a character */ /* Integer input truncated to a character */
@ -8742,25 +8759,25 @@ formatchar(Py_UNICODE *buf,
x = PyLong_AsLong(v); x = PyLong_AsLong(v);
if (x == -1 && PyErr_Occurred()) if (x == -1 && PyErr_Occurred())
goto onError; goto onError;
#ifdef Py_UNICODE_WIDE
if (x < 0 || x > 0x10ffff) { if (x < 0 || x > 0x10ffff) {
PyErr_SetString(PyExc_OverflowError, PyErr_SetString(PyExc_OverflowError,
"%c arg not in range(0x110000) " "%c arg not in range(0x110000)");
"(wide Python build)");
return -1; return -1;
} }
#else
if (x < 0 || x > 0xffff) { #ifndef Py_UNICODE_WIDE
PyErr_SetString(PyExc_OverflowError, if (x > 0xffff) {
"%c arg not in range(0x10000) " x -= 0x10000;
"(narrow Python build)"); buf[0] = (Py_UNICODE)(0xD800 | (x >> 10));
return -1; buf[1] = (Py_UNICODE)(0xDC00 | (x & 0x3FF));
return 2;
} }
#endif #endif
buf[0] = (Py_UNICODE) x; buf[0] = (Py_UNICODE) x;
buf[1] = '\0';
return 1;
} }
buf[1] = '\0';
return 1;
onError: onError:
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,

View File

@ -294,21 +294,12 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
case 'C': case 'C':
{ {
int i = va_arg(*p_va, int); int i = va_arg(*p_va, int);
Py_UNICODE c;
if (i < 0 || i > PyUnicode_GetMax()) { if (i < 0 || i > PyUnicode_GetMax()) {
#ifdef Py_UNICODE_WIDE
PyErr_SetString(PyExc_OverflowError, PyErr_SetString(PyExc_OverflowError,
"%c arg not in range(0x110000) " "%c arg not in range(0x110000)";
"(wide Python build)");
#else
PyErr_SetString(PyExc_OverflowError,
"%c arg not in range(0x10000) "
"(narrow Python build)");
#endif
return NULL; return NULL;
} }
c = i; return PyUnicode_FromOrdinal(i);
return PyUnicode_FromUnicode(&c, 1);
} }
case 's': case 's':