bpo-36312: Fix decoders for some code pages. (GH-12369)
(cherry picked from commit c1e2c288f4
)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
parent
65b9849f0f
commit
74829b7323
|
@ -3159,6 +3159,15 @@ class CodePageTest(unittest.TestCase):
|
||||||
('[\U0010ffff\uDC80]', 'replace', b'[\xf4\x8f\xbf\xbf?]'),
|
('[\U0010ffff\uDC80]', 'replace', b'[\xf4\x8f\xbf\xbf?]'),
|
||||||
))
|
))
|
||||||
|
|
||||||
|
def test_code_page_decode_flags(self):
|
||||||
|
# Issue #36312: For some code pages (e.g. UTF-7) flags for
|
||||||
|
# MultiByteToWideChar() must be set to 0.
|
||||||
|
for cp in (50220, 50221, 50222, 50225, 50227, 50229,
|
||||||
|
*range(57002, 57011+1), 65000):
|
||||||
|
self.assertEqual(codecs.code_page_decode(cp, b'abc'), ('abc', 3))
|
||||||
|
self.assertEqual(codecs.code_page_decode(42, b'abc'),
|
||||||
|
('\uf061\uf062\uf063', 3))
|
||||||
|
|
||||||
def test_incremental(self):
|
def test_incremental(self):
|
||||||
decoded = codecs.code_page_decode(932, b'\x82', 'strict', False)
|
decoded = codecs.code_page_decode(932, b'\x82', 'strict', False)
|
||||||
self.assertEqual(decoded, ('', 0))
|
self.assertEqual(decoded, ('', 0))
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Fixed decoders for the following code pages: 50220, 50221, 50222, 50225,
|
||||||
|
50227, 50229, 57002 through 57011, 65000 and 42.
|
|
@ -7123,15 +7123,21 @@ decode_code_page_strict(UINT code_page,
|
||||||
const char *in,
|
const char *in,
|
||||||
int insize)
|
int insize)
|
||||||
{
|
{
|
||||||
const DWORD flags = decode_code_page_flags(code_page);
|
DWORD flags = MB_ERR_INVALID_CHARS;
|
||||||
wchar_t *out;
|
wchar_t *out;
|
||||||
DWORD outsize;
|
DWORD outsize;
|
||||||
|
|
||||||
/* First get the size of the result */
|
/* First get the size of the result */
|
||||||
assert(insize > 0);
|
assert(insize > 0);
|
||||||
outsize = MultiByteToWideChar(code_page, flags, in, insize, NULL, 0);
|
while ((outsize = MultiByteToWideChar(code_page, flags,
|
||||||
if (outsize <= 0)
|
in, insize, NULL, 0)) <= 0)
|
||||||
goto error;
|
{
|
||||||
|
if (!flags || GetLastError() != ERROR_INVALID_FLAGS) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
/* For some code pages (e.g. UTF-7) flags must be set to 0. */
|
||||||
|
flags = 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (*v == NULL) {
|
if (*v == NULL) {
|
||||||
/* Create unicode object */
|
/* Create unicode object */
|
||||||
|
@ -7177,7 +7183,7 @@ decode_code_page_errors(UINT code_page,
|
||||||
{
|
{
|
||||||
const char *startin = in;
|
const char *startin = in;
|
||||||
const char *endin = in + size;
|
const char *endin = in + size;
|
||||||
const DWORD flags = decode_code_page_flags(code_page);
|
DWORD flags = MB_ERR_INVALID_CHARS;
|
||||||
/* Ideally, we should get reason from FormatMessage. This is the Windows
|
/* Ideally, we should get reason from FormatMessage. This is the Windows
|
||||||
2000 English version of the message. */
|
2000 English version of the message. */
|
||||||
const char *reason = "No mapping for the Unicode character exists "
|
const char *reason = "No mapping for the Unicode character exists "
|
||||||
|
@ -7248,6 +7254,11 @@ decode_code_page_errors(UINT code_page,
|
||||||
if (outsize > 0)
|
if (outsize > 0)
|
||||||
break;
|
break;
|
||||||
err = GetLastError();
|
err = GetLastError();
|
||||||
|
if (err == ERROR_INVALID_FLAGS && flags) {
|
||||||
|
/* For some code pages (e.g. UTF-7) flags must be set to 0. */
|
||||||
|
flags = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if (err != ERROR_NO_UNICODE_TRANSLATION
|
if (err != ERROR_NO_UNICODE_TRANSLATION
|
||||||
&& err != ERROR_INSUFFICIENT_BUFFER)
|
&& err != ERROR_INSUFFICIENT_BUFFER)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue