Issue #19279: UTF-7 decoder no more produces illegal strings.
This commit is contained in:
commit
55e092f545
|
@ -820,6 +820,36 @@ class UTF7Test(ReadTest, unittest.TestCase):
|
|||
]
|
||||
)
|
||||
|
||||
def test_errors(self):
|
||||
tests = [
|
||||
(b'a\xffb', 'a\ufffdb'),
|
||||
(b'a+IK', 'a\ufffd'),
|
||||
(b'a+IK-b', 'a\ufffdb'),
|
||||
(b'a+IK,b', 'a\ufffdb'),
|
||||
(b'a+IKx', 'a\u20ac\ufffd'),
|
||||
(b'a+IKx-b', 'a\u20ac\ufffdb'),
|
||||
(b'a+IKwgr', 'a\u20ac\ufffd'),
|
||||
(b'a+IKwgr-b', 'a\u20ac\ufffdb'),
|
||||
(b'a+IKwgr,', 'a\u20ac\ufffd'),
|
||||
(b'a+IKwgr,-b', 'a\u20ac\ufffd-b'),
|
||||
(b'a+IKwgrB', 'a\u20ac\u20ac\ufffd'),
|
||||
(b'a+IKwgrB-b', 'a\u20ac\u20ac\ufffdb'),
|
||||
(b'a+/,+IKw-b', 'a\ufffd\u20acb'),
|
||||
(b'a+//,+IKw-b', 'a\ufffd\u20acb'),
|
||||
(b'a+///,+IKw-b', 'a\uffff\ufffd\u20acb'),
|
||||
(b'a+////,+IKw-b', 'a\uffff\ufffd\u20acb'),
|
||||
]
|
||||
for raw, expected in tests:
|
||||
with self.subTest(raw=raw):
|
||||
self.assertRaises(UnicodeDecodeError, codecs.utf_7_decode,
|
||||
raw, 'strict', True)
|
||||
self.assertEqual(raw.decode('utf-7', 'replace'), expected)
|
||||
|
||||
def test_nonbmp(self):
|
||||
self.assertEqual('\U000104A0'.encode(self.encoding), b'+2AHcoA-')
|
||||
self.assertEqual('\ud801\udca0'.encode(self.encoding), b'+2AHcoA-')
|
||||
self.assertEqual(b'+2AHcoA-'.decode(self.encoding), '\U000104A0')
|
||||
|
||||
class UTF16ExTest(unittest.TestCase):
|
||||
|
||||
def test_errors(self):
|
||||
|
|
|
@ -10,6 +10,8 @@ Projected release date: 2013-10-20
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Issue #19279: UTF-7 decoder no more produces illegal strings.
|
||||
|
||||
- Issue #16612: Add "Argument Clinic", a compile-time preprocessor for
|
||||
C files to generate argument parsing code. (See PEP 436.)
|
||||
|
||||
|
|
|
@ -4341,6 +4341,7 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
|
|||
Py_UCS4 outCh = (Py_UCS4)(base64buffer >> (base64bits-16));
|
||||
base64bits -= 16;
|
||||
base64buffer &= (1 << base64bits) - 1; /* clear high bits */
|
||||
assert(outCh <= 0xffff);
|
||||
if (surrogate) {
|
||||
/* expecting a second surrogate */
|
||||
if (Py_UNICODE_IS_LOW_SURROGATE(outCh)) {
|
||||
|
@ -4408,6 +4409,7 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
|
|||
inShift = 1;
|
||||
shiftOutStart = writer.pos;
|
||||
base64bits = 0;
|
||||
base64buffer = 0;
|
||||
}
|
||||
}
|
||||
else if (DECODE_DIRECT(ch)) { /* character decodes as itself */
|
||||
|
|
Loading…
Reference in New Issue