bpo-30003: Fix handling escape characters in HZ codec (#1720) (#1556)

This commit is contained in:
Xiang Zhang 2017-05-23 01:04:27 +08:00 committed by GitHub
parent 2b67c7aae7
commit 6e1b832a6c
3 changed files with 21 additions and 13 deletions

View File

@ -82,6 +82,10 @@ class Test_HZ(test_multibytecodec_support.TestBase, unittest.TestCase):
(b'ab~cd', 'replace', u'ab\uFFFDd'), (b'ab~cd', 'replace', u'ab\uFFFDd'),
(b'ab\xffcd', 'replace', u'ab\uFFFDcd'), (b'ab\xffcd', 'replace', u'ab\uFFFDcd'),
(b'ab~{\x81\x81\x41\x44~}cd', 'replace', u'ab\uFFFD\uFFFD\u804Acd'), (b'ab~{\x81\x81\x41\x44~}cd', 'replace', u'ab\uFFFD\uFFFD\u804Acd'),
# issue 30003
(u'ab~cd', 'strict', b'ab~~cd'), # escape ~
(b'~{Dc~~:C~}', 'strict', None), # ~~ only in ASCII mode
(b'~{Dc~\n:C~}', 'strict', None), # ~\n only in ASCII mode
) )
def test_main(): def test_main():

View File

@ -49,6 +49,9 @@ Extension Modules
Library Library
------- -------
- bpo-30003: Fix handling escape characters in HZ codec. Based on patch
by Ma Lin.
- bpo-30375: Warnings emitted when compile a regular expression now always - bpo-30375: Warnings emitted when compile a regular expression now always
point to the line in the user code. Previously they could point into inners point to the line in the user code. Previously they could point into inners
of the re module if emitted from inside of groups or conditionals. of the re module if emitted from inside of groups or conditionals.

View File

@ -335,15 +335,17 @@ ENCODER(hz)
DBCHAR code; DBCHAR code;
if (c < 0x80) { if (c < 0x80) {
if (state->i == 0) { if (state->i) {
WRITE1((unsigned char)c) WRITE2('~', '}')
NEXT(1, 1) NEXT_OUT(2)
}
else {
WRITE3('~', '}', (unsigned char)c)
NEXT(1, 3)
state->i = 0; state->i = 0;
} }
WRITE1((unsigned char)c)
NEXT(1, 1)
if (c == '~') {
WRITE1('~')
NEXT_OUT(1)
}
continue; continue;
} }
@ -390,20 +392,19 @@ DECODER(hz)
unsigned char c2 = IN2; unsigned char c2 = IN2;
REQUIRE_INBUF(2) REQUIRE_INBUF(2)
if (c2 == '~') { if (c2 == '~' && state->i == 0) {
WRITE1('~') WRITE1('~')
NEXT(2, 1) NEXT_OUT(1)
continue;
} }
else if (c2 == '{' && state->i == 0) else if (c2 == '{' && state->i == 0)
state->i = 1; /* set GB */ state->i = 1; /* set GB */
else if (c2 == '\n' && state->i == 0)
; /* line-continuation */
else if (c2 == '}' && state->i == 1) else if (c2 == '}' && state->i == 1)
state->i = 0; /* set ASCII */ state->i = 0; /* set ASCII */
else if (c2 == '\n')
; /* line-continuation */
else else
return 2; return 2;
NEXT(2, 0); NEXT_IN(2)
continue; continue;
} }