bpo-30003: Fix handling escape characters in HZ codec (#1556)

This commit is contained in:
Xiang Zhang 2017-05-22 22:42:05 +08:00 committed by GitHub
parent 15033d145b
commit 89a5e03244
3 changed files with 19 additions and 13 deletions

View File

@ -86,6 +86,10 @@ class Test_HZ(multibytecodec_support.TestBase, unittest.TestCase):
(b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'),
(b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'),
(b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"),
# issue 30003
('ab~cd', 'strict', b'ab~~cd'), # escape ~
(b'~{Dc~~:C~}', 'strict', None), # ~~ only in ASCII mode
(b'~{Dc~\n:C~}', 'strict', None), # ~\n only in ASCII mode
)
if __name__ == "__main__":

View File

@ -334,6 +334,9 @@ Extension Modules
Library
-------
- bpo-30003: Fix handling escape characters in HZ codec. Based on patch
by Ma Lin.
- bpo-30149: inspect.signature() now supports callables with
variable-argument parameters wrapped with partialmethod.
Patch by Dong-hee Na.

View File

@ -350,14 +350,16 @@ ENCODER(hz)
DBCHAR code;
if (c < 0x80) {
if (state->i == 0) {
if (state->i) {
WRITEBYTE2('~', '}');
NEXT_OUT(2);
state->i = 0;
}
WRITEBYTE1((unsigned char)c);
NEXT(1, 1);
}
else {
WRITEBYTE3('~', '}', (unsigned char)c);
NEXT(1, 3);
state->i = 0;
if (c == '~') {
WRITEBYTE1('~');
NEXT_OUT(1);
}
continue;
}
@ -409,17 +411,14 @@ DECODER(hz)
unsigned char c2 = INBYTE2;
REQUIRE_INBUF(2);
if (c2 == '~') {
if (c2 == '~' && state->i == 0)
OUTCHAR('~');
NEXT_IN(2);
continue;
}
else if (c2 == '{' && state->i == 0)
state->i = 1; /* set GB */
else if (c2 == '\n' && state->i == 0)
; /* line-continuation */
else if (c2 == '}' && state->i == 1)
state->i = 0; /* set ASCII */
else if (c2 == '\n')
; /* line-continuation */
else
return 1;
NEXT_IN(2);