bpo-22602: Raise an exception in the UTF-7 decoder for ill-formed sequences starting with "+". (GH-8741)
The UTF-7 decoder now raises UnicodeDecodeError for ill-formed sequences starting with "+" (as specified in RFC 2152).
This commit is contained in:
parent
d3d3171da8
commit
e349bf2358
|
@ -1020,6 +1020,7 @@ class UTF7Test(ReadTest, unittest.TestCase):
|
|||
(b'a+////,+IKw-b', 'a\uffff\ufffd\u20acb'),
|
||||
(b'a+IKw-b\xff', 'a\u20acb\ufffd'),
|
||||
(b'a+IKw\xffb', 'a\u20ac\ufffdb'),
|
||||
(b'a+@b', 'a\ufffdb'),
|
||||
]
|
||||
for raw, expected in tests:
|
||||
with self.subTest(raw=raw):
|
||||
|
|
|
@ -1630,6 +1630,10 @@ class UnicodeTest(string_tests.CommonTest,
|
|||
for c in set_o:
|
||||
self.assertEqual(c.encode('ascii').decode('utf7'), c)
|
||||
|
||||
with self.assertRaisesRegex(UnicodeDecodeError,
|
||||
'ill-formed sequence'):
|
||||
b'+@'.decode('utf-7')
|
||||
|
||||
def test_codecs_utf8(self):
|
||||
self.assertEqual(''.encode('utf-8'), b'')
|
||||
self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac')
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
The UTF-7 decoder now raises :exc:`UnicodeDecodeError` for ill-formed
|
||||
sequences starting with "+" (as specified in RFC 2152). Patch by Zackery
|
||||
Spytz.
|
|
@ -4479,6 +4479,11 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
|
|||
if (_PyUnicodeWriter_WriteCharInline(&writer, '+') < 0)
|
||||
goto onError;
|
||||
}
|
||||
else if (s < e && !IS_BASE64(*s)) {
|
||||
s++;
|
||||
errmsg = "ill-formed sequence";
|
||||
goto utf7Error;
|
||||
}
|
||||
else { /* begin base64-encoded section */
|
||||
inShift = 1;
|
||||
surrogate = 0;
|
||||
|
|
Loading…
Reference in New Issue