bpo-36312: Fix decoders for some code pages. (GH-12369)

(cherry picked from commit c1e2c288f4)

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
Miss Islington (bot) 2019-03-20 21:31:57 -07:00 committed by GitHub
parent 65b9849f0f
commit 74829b7323
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 27 additions and 5 deletions

View File

@ -3159,6 +3159,15 @@ class CodePageTest(unittest.TestCase):
('[\U0010ffff\uDC80]', 'replace', b'[\xf4\x8f\xbf\xbf?]'), ('[\U0010ffff\uDC80]', 'replace', b'[\xf4\x8f\xbf\xbf?]'),
)) ))
def test_code_page_decode_flags(self):
# Issue #36312: For some code pages (e.g. UTF-7) flags for
# MultiByteToWideChar() must be set to 0.
for cp in (50220, 50221, 50222, 50225, 50227, 50229,
*range(57002, 57011+1), 65000):
self.assertEqual(codecs.code_page_decode(cp, b'abc'), ('abc', 3))
self.assertEqual(codecs.code_page_decode(42, b'abc'),
('\uf061\uf062\uf063', 3))
def test_incremental(self): def test_incremental(self):
decoded = codecs.code_page_decode(932, b'\x82', 'strict', False) decoded = codecs.code_page_decode(932, b'\x82', 'strict', False)
self.assertEqual(decoded, ('', 0)) self.assertEqual(decoded, ('', 0))

View File

@ -0,0 +1,2 @@
Fixed decoders for the following code pages: 50220, 50221, 50222, 50225,
50227, 50229, 57002 through 57011, 65000 and 42.

View File

@ -7123,15 +7123,21 @@ decode_code_page_strict(UINT code_page,
const char *in, const char *in,
int insize) int insize)
{ {
const DWORD flags = decode_code_page_flags(code_page); DWORD flags = MB_ERR_INVALID_CHARS;
wchar_t *out; wchar_t *out;
DWORD outsize; DWORD outsize;
/* First get the size of the result */ /* First get the size of the result */
assert(insize > 0); assert(insize > 0);
outsize = MultiByteToWideChar(code_page, flags, in, insize, NULL, 0); while ((outsize = MultiByteToWideChar(code_page, flags,
if (outsize <= 0) in, insize, NULL, 0)) <= 0)
goto error; {
if (!flags || GetLastError() != ERROR_INVALID_FLAGS) {
goto error;
}
/* For some code pages (e.g. UTF-7) flags must be set to 0. */
flags = 0;
}
if (*v == NULL) { if (*v == NULL) {
/* Create unicode object */ /* Create unicode object */
@ -7177,7 +7183,7 @@ decode_code_page_errors(UINT code_page,
{ {
const char *startin = in; const char *startin = in;
const char *endin = in + size; const char *endin = in + size;
const DWORD flags = decode_code_page_flags(code_page); DWORD flags = MB_ERR_INVALID_CHARS;
/* Ideally, we should get reason from FormatMessage. This is the Windows /* Ideally, we should get reason from FormatMessage. This is the Windows
2000 English version of the message. */ 2000 English version of the message. */
const char *reason = "No mapping for the Unicode character exists " const char *reason = "No mapping for the Unicode character exists "
@ -7248,6 +7254,11 @@ decode_code_page_errors(UINT code_page,
if (outsize > 0) if (outsize > 0)
break; break;
err = GetLastError(); err = GetLastError();
if (err == ERROR_INVALID_FLAGS && flags) {
/* For some code pages (e.g. UTF-7) flags must be set to 0. */
flags = 0;
continue;
}
if (err != ERROR_NO_UNICODE_TRANSLATION if (err != ERROR_NO_UNICODE_TRANSLATION
&& err != ERROR_INSUFFICIENT_BUFFER) && err != ERROR_INSUFFICIENT_BUFFER)
{ {