diff --git a/Lib/test/test_codecencodings_cn.py b/Lib/test/test_codecencodings_cn.py index c8a410c2e03..2a450718d5a 100644 --- a/Lib/test/test_codecencodings_cn.py +++ b/Lib/test/test_codecencodings_cn.py @@ -86,6 +86,10 @@ class Test_HZ(multibytecodec_support.TestBase, unittest.TestCase): (b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'), (b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'), (b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"), + # issue 30003 + ('ab~cd', 'strict', b'ab~~cd'), # escape ~ + (b'~{Dc~~:C~}', 'strict', None), # ~~ only in ASCII mode + (b'~{Dc~\n:C~}', 'strict', None), # ~\n only in ASCII mode ) if __name__ == "__main__": diff --git a/Misc/NEWS b/Misc/NEWS index 74f7922934d..5a7e377d6b2 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -334,6 +334,9 @@ Extension Modules Library ------- +- bpo-30003: Fix handling escape characters in HZ codec. Based on patch + by Ma Lin. + - bpo-30149: inspect.signature() now supports callables with variable-argument parameters wrapped with partialmethod. Patch by Dong-hee Na. diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c index bda175c55d1..1fcc220b8db 100644 --- a/Modules/cjkcodecs/_codecs_cn.c +++ b/Modules/cjkcodecs/_codecs_cn.c @@ -350,15 +350,17 @@ ENCODER(hz) DBCHAR code; if (c < 0x80) { - if (state->i == 0) { - WRITEBYTE1((unsigned char)c); - NEXT(1, 1); - } - else { - WRITEBYTE3('~', '}', (unsigned char)c); - NEXT(1, 3); + if (state->i) { + WRITEBYTE2('~', '}'); + NEXT_OUT(2); state->i = 0; } + WRITEBYTE1((unsigned char)c); + NEXT(1, 1); + if (c == '~') { + WRITEBYTE1('~'); + NEXT_OUT(1); + } continue; } @@ -409,17 +411,14 @@ DECODER(hz) unsigned char c2 = INBYTE2; REQUIRE_INBUF(2); - if (c2 == '~') { + if (c2 == '~' && state->i == 0) OUTCHAR('~'); - NEXT_IN(2); - continue; - } else if (c2 == '{' && state->i == 0) state->i = 1; /* set GB */ + else if (c2 == '\n' && state->i == 0) + ; /* line-continuation */ else if (c2 == '}' && state->i == 1) state->i = 0; /* set ASCII */ - else if (c2 == '\n') - ; /* line-continuation */ else return 1; NEXT_IN(2);