mirror of https://github.com/python/cpython
bpo-46054: Fix parsing error when parsing non-utf8 characters in source files (GH-30068)
This commit is contained in:
parent
59435eea08
commit
4325a766f5
|
@ -2387,6 +2387,18 @@ class SyntaxErrorTests(unittest.TestCase):
|
|||
finally:
|
||||
unlink(TESTFN)
|
||||
|
||||
def test_non_utf8(self):
|
||||
# Check non utf-8 characters
|
||||
try:
|
||||
with open(TESTFN, 'bw') as testfile:
|
||||
testfile.write(b'\x7fELF\x02\x01\x01\x00\x00\x00')
|
||||
rc, out, err = script_helper.assert_python_failure('-Wd', '-X', 'utf8', TESTFN)
|
||||
err = err.decode('utf-8').splitlines()
|
||||
|
||||
self.assertEqual(err[-1], "SyntaxError: invalid non-printable character U+007F")
|
||||
finally:
|
||||
unlink(TESTFN)
|
||||
|
||||
def test_attributes_new_constructor(self):
|
||||
args = ("bad.py", 1, 2, "abcdefg", 1, 100)
|
||||
the_exception = SyntaxError("bad bad", args)
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Fix parser error when parsing non-utf8 characters in source files. Patch by
|
||||
Pablo Galindo.
|
|
@ -819,10 +819,10 @@ tok_readline_raw(struct tok_state *tok)
|
|||
tok_concatenate_interactive_new_line(tok, line) == -1) {
|
||||
return 0;
|
||||
}
|
||||
if (*tok->inp == '\0') {
|
||||
tok->inp = strchr(tok->inp, '\0');
|
||||
if (tok->inp == tok->buf) {
|
||||
return 0;
|
||||
}
|
||||
tok->inp = strchr(tok->inp, '\0');
|
||||
} while (tok->inp[-1] != '\n');
|
||||
return 1;
|
||||
}
|
||||
|
@ -984,13 +984,10 @@ tok_underflow_file(struct tok_state *tok) {
|
|||
}
|
||||
/* The default encoding is UTF-8, so make sure we don't have any
|
||||
non-UTF-8 sequences in it. */
|
||||
if (!tok->encoding
|
||||
&& (tok->decoding_state != STATE_NORMAL || tok->lineno >= 2)) {
|
||||
if (!ensure_utf8(tok->cur, tok)) {
|
||||
if (!tok->encoding && !ensure_utf8(tok->cur, tok)) {
|
||||
error_ret(tok);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
assert(tok->done == E_OK);
|
||||
return tok->done == E_OK;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue