diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index debcddcf95c..5eb331a73cb 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -395,6 +395,19 @@ class UnicodeTest( self.assertEqual(u'%c' % 0x1234, u'\u1234') self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,)) + for num in range(0x00,0x80): + char = chr(num) + self.assertEqual(u"%c" % char, char) + self.assertEqual(u"%c" % num, char) + # Issue 7649 + for num in range(0x80,0x100): + uchar = unichr(num) + self.assertEqual(uchar, u"%c" % num) # works only with ints + self.assertEqual(uchar, u"%c" % uchar) # and unicode chars + # the implicit decoding should fail for non-ascii chars + self.assertRaises(UnicodeDecodeError, u"%c".__mod__, chr(num)) + self.assertRaises(UnicodeDecodeError, u"%s".__mod__, chr(num)) + # formatting jobs delegated from the string implementation: self.assertEqual('...%(foo)s...' % {'foo':u"abc"}, u'...abc...') self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...') diff --git a/Misc/NEWS b/Misc/NEWS index dd2346be0b9..5dc3a7b610b 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,9 @@ What's New in Python 2.7 alpha 4? Core and Builtins ----------------- +- Issue #7649: Fix u'%c' % char for character in range 0x80..0xFF, raise an + UnicodeDecodeError + - Issue #6902: Fix problem with built-in types format incorrectly with 0 padding. @@ -249,7 +252,7 @@ Tests Documentation ------------- - + - Updating `Using Python` documentation to include description of CPython's -J, -U and -X options. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 130ca48463c..d80ff714ae2 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -8170,6 +8170,7 @@ formatchar(Py_UNICODE *buf, size_t buflen, PyObject *v) { + PyObject *s; /* presume that the buffer is at least 2 characters long */ if (PyUnicode_Check(v)) { if (PyUnicode_GET_SIZE(v) != 1) @@ -8180,7 +8181,14 @@ formatchar(Py_UNICODE *buf, else if (PyString_Check(v)) { if (PyString_GET_SIZE(v) != 1) goto onError; - buf[0] = (Py_UNICODE)PyString_AS_STRING(v)[0]; + /* #7649: if the char is a non-ascii (i.e. in range(0x80,0x100)) byte + string, "u'%c' % char" should fail with a UnicodeDecodeError */ + s = PyUnicode_FromStringAndSize(PyString_AS_STRING(v), 1); + /* if the char is not decodable return -1 */ + if (s == NULL) + return -1; + buf[0] = PyUnicode_AS_UNICODE(s)[0]; + Py_DECREF(s); } else {