Issue #5433: Excessive newline detection optimization in IncrementalNewlineDecoder
This commit is contained in:
parent
2db74c2412
commit
66913e2213
|
@ -1915,6 +1915,19 @@ class IncrementalNewlineDecoderTest(unittest.TestCase):
|
|||
decoder = self.IncrementalNewlineDecoder(decoder, translate=True)
|
||||
self.check_newline_decoding_utf8(decoder)
|
||||
|
||||
def test_newline_bytes(self):
|
||||
# Issue 5433: Excessive optimization in IncrementalNewlineDecoder
|
||||
def _check(dec):
|
||||
self.assertEquals(dec.newlines, None)
|
||||
self.assertEquals(dec.decode("\u0D00"), "\u0D00")
|
||||
self.assertEquals(dec.newlines, None)
|
||||
self.assertEquals(dec.decode("\u0A00"), "\u0A00")
|
||||
self.assertEquals(dec.newlines, None)
|
||||
dec = self.IncrementalNewlineDecoder(None, translate=False)
|
||||
_check(dec)
|
||||
dec = self.IncrementalNewlineDecoder(None, translate=True)
|
||||
_check(dec)
|
||||
|
||||
class CIncrementalNewlineDecoderTest(IncrementalNewlineDecoderTest):
|
||||
pass
|
||||
|
||||
|
|
|
@ -305,22 +305,40 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
|
|||
for the \r *byte* with the libc's optimized memchr.
|
||||
*/
|
||||
if (seennl == SEEN_LF || seennl == 0) {
|
||||
int has_cr, has_lf;
|
||||
has_lf = (seennl == SEEN_LF) ||
|
||||
(memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL);
|
||||
has_cr = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) != NULL);
|
||||
if (has_lf && !has_cr) {
|
||||
only_lf = 1;
|
||||
seennl = SEEN_LF;
|
||||
}
|
||||
only_lf = !(memchr(in_str, '\r', len * sizeof(Py_UNICODE)) != NULL);
|
||||
}
|
||||
|
||||
if (!self->translate) {
|
||||
if (only_lf) {
|
||||
/* If not already seen, quick scan for a possible "\n" character.
|
||||
(there's nothing else to be done, even when in translation mode)
|
||||
*/
|
||||
if (seennl == 0 &&
|
||||
memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
|
||||
Py_UNICODE *s, *end;
|
||||
s = in_str;
|
||||
end = in_str + len;
|
||||
for (;;) {
|
||||
Py_UNICODE c;
|
||||
/* Fast loop for non-control characters */
|
||||
while (*s > '\n')
|
||||
s++;
|
||||
c = *s++;
|
||||
if (c == '\n') {
|
||||
seennl |= SEEN_LF;
|
||||
break;
|
||||
}
|
||||
if (s > end)
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* Finished: we have scanned for newlines, and none of them
|
||||
need translating */
|
||||
}
|
||||
else if (!self->translate) {
|
||||
Py_UNICODE *s, *end;
|
||||
/* We have already seen all newline types, no need to scan again */
|
||||
if (seennl == SEEN_ALL)
|
||||
goto endscan;
|
||||
if (only_lf)
|
||||
goto endscan;
|
||||
s = in_str;
|
||||
end = in_str + len;
|
||||
for (;;) {
|
||||
|
@ -347,7 +365,7 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
|
|||
endscan:
|
||||
;
|
||||
}
|
||||
else if (!only_lf) {
|
||||
else {
|
||||
PyObject *translated = NULL;
|
||||
Py_UNICODE *out_str;
|
||||
Py_UNICODE *in, *out, *end;
|
||||
|
|
Loading…
Reference in New Issue