Issue #14629: Raise SyntaxError in tokenizer.detect_encoding
if the first two lines have non-UTF-8 characters without an encoding declaration.
This commit is contained in:
parent
8e6e0fdb7f
commit
63674f4b52
|
@ -825,6 +825,16 @@ class TestDetectEncoding(TestCase):
|
||||||
found, consumed_lines = detect_encoding(rl)
|
found, consumed_lines = detect_encoding(rl)
|
||||||
self.assertEqual(found, "iso-8859-1")
|
self.assertEqual(found, "iso-8859-1")
|
||||||
|
|
||||||
|
def test_syntaxerror_latin1(self):
|
||||||
|
# Issue 14629: need to raise SyntaxError if the first
|
||||||
|
# line(s) have non-UTF-8 characters
|
||||||
|
lines = (
|
||||||
|
b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
|
||||||
|
)
|
||||||
|
readline = self.get_readline(lines)
|
||||||
|
self.assertRaises(SyntaxError, detect_encoding, readline)
|
||||||
|
|
||||||
|
|
||||||
def test_utf8_normalization(self):
|
def test_utf8_normalization(self):
|
||||||
# See get_normal_name() in tokenizer.c.
|
# See get_normal_name() in tokenizer.c.
|
||||||
encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
|
encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
|
||||||
|
|
|
@ -292,9 +292,12 @@ def detect_encoding(readline):
|
||||||
|
|
||||||
def find_cookie(line):
|
def find_cookie(line):
|
||||||
try:
|
try:
|
||||||
line_string = line.decode('ascii')
|
# Decode as UTF-8. Either the line is an encoding declaration,
|
||||||
|
# in which case it should be pure ASCII, or it must be UTF-8
|
||||||
|
# per default encoding.
|
||||||
|
line_string = line.decode('utf-8')
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
return None
|
raise SyntaxError("invalid or missing encoding declaration")
|
||||||
|
|
||||||
matches = cookie_re.findall(line_string)
|
matches = cookie_re.findall(line_string)
|
||||||
if not matches:
|
if not matches:
|
||||||
|
|
|
@ -47,6 +47,9 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #14629: Raise SyntaxError in tokenizer.detect_encoding if the
|
||||||
|
first two lines have non-UTF-8 characters without an encoding declaration.
|
||||||
|
|
||||||
- Issue #14308: Fix an exception when a "dummy" thread is in the threading
|
- Issue #14308: Fix an exception when a "dummy" thread is in the threading
|
||||||
module's active list after a fork().
|
module's active list after a fork().
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue