#7649: "u'%c' % char" now behaves like "u'%s' % char" and raises a UnicodeDecodeError if 'char' is a byte string that can't be decoded using the default encoding.
This commit is contained in:
parent
271b7e1abb
commit
321251567e
|
@ -16,8 +16,9 @@ Core and Builtins
|
||||||
UnicodeEncodeError, UnicodeDecodeError, and UnicodeTranslateError to
|
UnicodeEncodeError, UnicodeDecodeError, and UnicodeTranslateError to
|
||||||
strings.
|
strings.
|
||||||
|
|
||||||
- Issue #7649: Fix u'%c' % char for character in range 0x80..0xFF, raise an
|
- Issue #7649: "u'%c' % char" now behaves like "u'%s' % char" and raises a
|
||||||
UnicodeDecodeError.
|
UnicodeDecodeError if 'char' is a byte string that can't be decoded using
|
||||||
|
the default encoding.
|
||||||
|
|
||||||
- Issue #6902: Fix problem with built-in types format incorrectly with
|
- Issue #6902: Fix problem with built-in types format incorrectly with
|
||||||
0 padding.
|
0 padding.
|
||||||
|
|
|
@ -8170,7 +8170,8 @@ formatchar(Py_UNICODE *buf,
|
||||||
size_t buflen,
|
size_t buflen,
|
||||||
PyObject *v)
|
PyObject *v)
|
||||||
{
|
{
|
||||||
PyObject *s;
|
PyObject *unistr;
|
||||||
|
char *str;
|
||||||
/* presume that the buffer is at least 2 characters long */
|
/* presume that the buffer is at least 2 characters long */
|
||||||
if (PyUnicode_Check(v)) {
|
if (PyUnicode_Check(v)) {
|
||||||
if (PyUnicode_GET_SIZE(v) != 1)
|
if (PyUnicode_GET_SIZE(v) != 1)
|
||||||
|
@ -8181,14 +8182,22 @@ formatchar(Py_UNICODE *buf,
|
||||||
else if (PyString_Check(v)) {
|
else if (PyString_Check(v)) {
|
||||||
if (PyString_GET_SIZE(v) != 1)
|
if (PyString_GET_SIZE(v) != 1)
|
||||||
goto onError;
|
goto onError;
|
||||||
/* #7649: if the char is a non-ascii (i.e. in range(0x80,0x100)) byte
|
/* #7649: "u'%c' % char" should behave like "u'%s' % char" and fail
|
||||||
string, "u'%c' % char" should fail with a UnicodeDecodeError */
|
with a UnicodeDecodeError if 'char' is not decodable with the
|
||||||
s = PyUnicode_FromStringAndSize(PyString_AS_STRING(v), 1);
|
default encoding (usually ASCII, but it might be something else) */
|
||||||
/* if the char is not decodable return -1 */
|
str = PyString_AS_STRING(v);
|
||||||
if (s == NULL)
|
if ((unsigned char)str[0] > 0x7F) {
|
||||||
|
/* the char is not ASCII; try to decode the string using the
|
||||||
|
default encoding and return -1 to let the UnicodeDecodeError
|
||||||
|
be raised if the string can't be decoded */
|
||||||
|
unistr = PyUnicode_Decode(str, 1, NULL, "strict");
|
||||||
|
if (unistr == NULL)
|
||||||
return -1;
|
return -1;
|
||||||
buf[0] = PyUnicode_AS_UNICODE(s)[0];
|
buf[0] = PyUnicode_AS_UNICODE(unistr)[0];
|
||||||
Py_DECREF(s);
|
Py_DECREF(unistr);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
buf[0] = (Py_UNICODE)str[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
else {
|
else {
|
||||||
|
|
Loading…
Reference in New Issue