bpo-36312: Fix decoders for some code pages. (GH-12369)
(cherry picked from commit c1e2c288f4
)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
parent
65b9849f0f
commit
74829b7323
|
@ -3159,6 +3159,15 @@ class CodePageTest(unittest.TestCase):
|
|||
('[\U0010ffff\uDC80]', 'replace', b'[\xf4\x8f\xbf\xbf?]'),
|
||||
))
|
||||
|
||||
def test_code_page_decode_flags(self):
|
||||
# Issue #36312: For some code pages (e.g. UTF-7) flags for
|
||||
# MultiByteToWideChar() must be set to 0.
|
||||
for cp in (50220, 50221, 50222, 50225, 50227, 50229,
|
||||
*range(57002, 57011+1), 65000):
|
||||
self.assertEqual(codecs.code_page_decode(cp, b'abc'), ('abc', 3))
|
||||
self.assertEqual(codecs.code_page_decode(42, b'abc'),
|
||||
('\uf061\uf062\uf063', 3))
|
||||
|
||||
def test_incremental(self):
|
||||
decoded = codecs.code_page_decode(932, b'\x82', 'strict', False)
|
||||
self.assertEqual(decoded, ('', 0))
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Fixed decoders for the following code pages: 50220, 50221, 50222, 50225,
|
||||
50227, 50229, 57002 through 57011, 65000 and 42.
|
|
@ -7123,15 +7123,21 @@ decode_code_page_strict(UINT code_page,
|
|||
const char *in,
|
||||
int insize)
|
||||
{
|
||||
const DWORD flags = decode_code_page_flags(code_page);
|
||||
DWORD flags = MB_ERR_INVALID_CHARS;
|
||||
wchar_t *out;
|
||||
DWORD outsize;
|
||||
|
||||
/* First get the size of the result */
|
||||
assert(insize > 0);
|
||||
outsize = MultiByteToWideChar(code_page, flags, in, insize, NULL, 0);
|
||||
if (outsize <= 0)
|
||||
while ((outsize = MultiByteToWideChar(code_page, flags,
|
||||
in, insize, NULL, 0)) <= 0)
|
||||
{
|
||||
if (!flags || GetLastError() != ERROR_INVALID_FLAGS) {
|
||||
goto error;
|
||||
}
|
||||
/* For some code pages (e.g. UTF-7) flags must be set to 0. */
|
||||
flags = 0;
|
||||
}
|
||||
|
||||
if (*v == NULL) {
|
||||
/* Create unicode object */
|
||||
|
@ -7177,7 +7183,7 @@ decode_code_page_errors(UINT code_page,
|
|||
{
|
||||
const char *startin = in;
|
||||
const char *endin = in + size;
|
||||
const DWORD flags = decode_code_page_flags(code_page);
|
||||
DWORD flags = MB_ERR_INVALID_CHARS;
|
||||
/* Ideally, we should get reason from FormatMessage. This is the Windows
|
||||
2000 English version of the message. */
|
||||
const char *reason = "No mapping for the Unicode character exists "
|
||||
|
@ -7248,6 +7254,11 @@ decode_code_page_errors(UINT code_page,
|
|||
if (outsize > 0)
|
||||
break;
|
||||
err = GetLastError();
|
||||
if (err == ERROR_INVALID_FLAGS && flags) {
|
||||
/* For some code pages (e.g. UTF-7) flags must be set to 0. */
|
||||
flags = 0;
|
||||
continue;
|
||||
}
|
||||
if (err != ERROR_NO_UNICODE_TRANSLATION
|
||||
&& err != ERROR_INSUFFICIENT_BUFFER)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue