mirror of https://github.com/python/cpython
[3.13] gh-124188: Fix PyErr_ProgramTextObject() (GH-124189) (GH-124423)
* Detect source file encoding.
* Use the "replace" error handler even for UTF-8 (default) encoding.
* Remove the BOM.
* Fix detection of too long lines if they contain NUL.
* Return the head rather than the tail for truncated long lines.
(cherry picked from commit e2f710792b
)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
parent
167d8d2f07
commit
03ae82d0d3
|
@ -232,9 +232,13 @@ def make_script(script_dir, script_basename, source, omit_suffix=False):
|
||||||
if not omit_suffix:
|
if not omit_suffix:
|
||||||
script_filename += os.extsep + 'py'
|
script_filename += os.extsep + 'py'
|
||||||
script_name = os.path.join(script_dir, script_filename)
|
script_name = os.path.join(script_dir, script_filename)
|
||||||
# The script should be encoded to UTF-8, the default string encoding
|
if isinstance(source, str):
|
||||||
with open(script_name, 'w', encoding='utf-8') as script_file:
|
# The script should be encoded to UTF-8, the default string encoding
|
||||||
script_file.write(source)
|
with open(script_name, 'w', encoding='utf-8') as script_file:
|
||||||
|
script_file.write(source)
|
||||||
|
else:
|
||||||
|
with open(script_name, 'wb') as script_file:
|
||||||
|
script_file.write(source)
|
||||||
importlib.invalidate_caches()
|
importlib.invalidate_caches()
|
||||||
return script_name
|
return script_name
|
||||||
|
|
||||||
|
|
|
@ -152,5 +152,8 @@ class IsolatedCodeGenTests(CodegenTestCase):
|
||||||
|
|
||||||
def test_syntax_error__return_not_in_function(self):
|
def test_syntax_error__return_not_in_function(self):
|
||||||
snippet = "return 42"
|
snippet = "return 42"
|
||||||
with self.assertRaisesRegex(SyntaxError, "'return' outside function"):
|
with self.assertRaisesRegex(SyntaxError, "'return' outside function") as cm:
|
||||||
self.codegen_test(snippet, None)
|
self.codegen_test(snippet, None)
|
||||||
|
self.assertIsNone(cm.exception.text)
|
||||||
|
self.assertEqual(cm.exception.offset, 1)
|
||||||
|
self.assertEqual(cm.exception.end_offset, 10)
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
"""test script for a few new invalid token catches"""
|
"""test script for a few new invalid token catches"""
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
from codecs import BOM_UTF8
|
||||||
from test import support
|
from test import support
|
||||||
from test.support import os_helper
|
from test.support import os_helper
|
||||||
from test.support import script_helper
|
from test.support import script_helper
|
||||||
|
@ -11,67 +12,158 @@ class EOFTestCase(unittest.TestCase):
|
||||||
def test_EOF_single_quote(self):
|
def test_EOF_single_quote(self):
|
||||||
expect = "unterminated string literal (detected at line 1) (<string>, line 1)"
|
expect = "unterminated string literal (detected at line 1) (<string>, line 1)"
|
||||||
for quote in ("'", "\""):
|
for quote in ("'", "\""):
|
||||||
try:
|
with self.assertRaises(SyntaxError) as cm:
|
||||||
eval(f"""{quote}this is a test\
|
eval(f"""{quote}this is a test\
|
||||||
""")
|
""")
|
||||||
except SyntaxError as msg:
|
self.assertEqual(str(cm.exception), expect)
|
||||||
self.assertEqual(str(msg), expect)
|
self.assertEqual(cm.exception.offset, 1)
|
||||||
self.assertEqual(msg.offset, 1)
|
|
||||||
else:
|
|
||||||
raise support.TestFailed
|
|
||||||
|
|
||||||
def test_EOFS(self):
|
def test_EOFS(self):
|
||||||
expect = ("unterminated triple-quoted string literal (detected at line 1) (<string>, line 1)")
|
expect = ("unterminated triple-quoted string literal (detected at line 3) (<string>, line 1)")
|
||||||
try:
|
with self.assertRaises(SyntaxError) as cm:
|
||||||
eval("""'''this is a test""")
|
eval("""ä = '''thîs is \na \ntest""")
|
||||||
except SyntaxError as msg:
|
self.assertEqual(str(cm.exception), expect)
|
||||||
self.assertEqual(str(msg), expect)
|
self.assertEqual(cm.exception.text, "ä = '''thîs is ")
|
||||||
self.assertEqual(msg.offset, 1)
|
self.assertEqual(cm.exception.offset, 5)
|
||||||
else:
|
|
||||||
raise support.TestFailed
|
with self.assertRaises(SyntaxError) as cm:
|
||||||
|
eval("""ä = '''thîs is \na \ntest""".encode())
|
||||||
|
self.assertEqual(str(cm.exception), expect)
|
||||||
|
self.assertEqual(cm.exception.text, "ä = '''thîs is ")
|
||||||
|
self.assertEqual(cm.exception.offset, 5)
|
||||||
|
|
||||||
|
with self.assertRaises(SyntaxError) as cm:
|
||||||
|
eval(BOM_UTF8 + """ä = '''thîs is \na \ntest""".encode())
|
||||||
|
self.assertEqual(str(cm.exception), expect)
|
||||||
|
self.assertEqual(cm.exception.text, "ä = '''thîs is ")
|
||||||
|
self.assertEqual(cm.exception.offset, 5)
|
||||||
|
|
||||||
|
with self.assertRaises(SyntaxError) as cm:
|
||||||
|
eval("""# coding: latin1\nä = '''thîs is \na \ntest""".encode('latin1'))
|
||||||
|
self.assertEqual(str(cm.exception), "unterminated triple-quoted string literal (detected at line 4) (<string>, line 2)")
|
||||||
|
self.assertEqual(cm.exception.text, "ä = '''thîs is ")
|
||||||
|
self.assertEqual(cm.exception.offset, 5)
|
||||||
|
|
||||||
def test_EOFS_with_file(self):
|
def test_EOFS_with_file(self):
|
||||||
expect = ("(<string>, line 1)")
|
expect = ("(<string>, line 1)")
|
||||||
with os_helper.temp_dir() as temp_dir:
|
with os_helper.temp_dir() as temp_dir:
|
||||||
file_name = script_helper.make_script(temp_dir, 'foo', """'''this is \na \ntest""")
|
file_name = script_helper.make_script(temp_dir, 'foo',
|
||||||
rc, out, err = script_helper.assert_python_failure(file_name)
|
"""ä = '''thîs is \na \ntest""")
|
||||||
self.assertIn(b'unterminated triple-quoted string literal (detected at line 3)', err)
|
rc, out, err = script_helper.assert_python_failure('-X', 'utf8', file_name)
|
||||||
|
err = err.decode().splitlines()
|
||||||
|
self.assertEqual(err[-3:], [
|
||||||
|
" ä = '''thîs is ",
|
||||||
|
' ^',
|
||||||
|
'SyntaxError: unterminated triple-quoted string literal (detected at line 3)'])
|
||||||
|
|
||||||
|
file_name = script_helper.make_script(temp_dir, 'foo',
|
||||||
|
"""ä = '''thîs is \na \ntest""".encode())
|
||||||
|
rc, out, err = script_helper.assert_python_failure('-X', 'utf8', file_name)
|
||||||
|
err = err.decode().splitlines()
|
||||||
|
self.assertEqual(err[-3:], [
|
||||||
|
" ä = '''thîs is ",
|
||||||
|
' ^',
|
||||||
|
'SyntaxError: unterminated triple-quoted string literal (detected at line 3)'])
|
||||||
|
|
||||||
|
file_name = script_helper.make_script(temp_dir, 'foo',
|
||||||
|
BOM_UTF8 + """ä = '''thîs is \na \ntest""".encode())
|
||||||
|
rc, out, err = script_helper.assert_python_failure('-X', 'utf8', file_name)
|
||||||
|
err = err.decode().splitlines()
|
||||||
|
self.assertEqual(err[-3:], [
|
||||||
|
" ä = '''thîs is ",
|
||||||
|
' ^',
|
||||||
|
'SyntaxError: unterminated triple-quoted string literal (detected at line 3)'])
|
||||||
|
|
||||||
|
file_name = script_helper.make_script(temp_dir, 'foo',
|
||||||
|
"""# coding: latin1\nä = '''thîs is \na \ntest""".encode('latin1'))
|
||||||
|
rc, out, err = script_helper.assert_python_failure('-X', 'utf8', file_name)
|
||||||
|
err = err.decode().splitlines()
|
||||||
|
self.assertEqual(err[-3:], [
|
||||||
|
" ä = '''thîs is ",
|
||||||
|
' ^',
|
||||||
|
'SyntaxError: unterminated triple-quoted string literal (detected at line 4)'])
|
||||||
|
|
||||||
@warnings_helper.ignore_warnings(category=SyntaxWarning)
|
@warnings_helper.ignore_warnings(category=SyntaxWarning)
|
||||||
def test_eof_with_line_continuation(self):
|
def test_eof_with_line_continuation(self):
|
||||||
expect = "unexpected EOF while parsing (<string>, line 1)"
|
expect = "unexpected EOF while parsing (<string>, line 1)"
|
||||||
try:
|
with self.assertRaises(SyntaxError) as cm:
|
||||||
compile('"\\Xhh" \\', '<string>', 'exec')
|
compile('"\\Xhh" \\', '<string>', 'exec')
|
||||||
except SyntaxError as msg:
|
self.assertEqual(str(cm.exception), expect)
|
||||||
self.assertEqual(str(msg), expect)
|
|
||||||
else:
|
|
||||||
raise support.TestFailed
|
|
||||||
|
|
||||||
def test_line_continuation_EOF(self):
|
def test_line_continuation_EOF(self):
|
||||||
"""A continuation at the end of input must be an error; bpo2180."""
|
"""A continuation at the end of input must be an error; bpo2180."""
|
||||||
expect = 'unexpected EOF while parsing (<string>, line 1)'
|
expect = 'unexpected EOF while parsing (<string>, line 1)'
|
||||||
with self.assertRaises(SyntaxError) as excinfo:
|
with self.assertRaises(SyntaxError) as cm:
|
||||||
exec('x = 5\\')
|
exec('ä = 5\\')
|
||||||
self.assertEqual(str(excinfo.exception), expect)
|
self.assertEqual(str(cm.exception), expect)
|
||||||
with self.assertRaises(SyntaxError) as excinfo:
|
self.assertEqual(cm.exception.text, 'ä = 5\\\n')
|
||||||
|
self.assertEqual(cm.exception.offset, 7)
|
||||||
|
|
||||||
|
with self.assertRaises(SyntaxError) as cm:
|
||||||
|
exec('ä = 5\\'.encode())
|
||||||
|
self.assertEqual(str(cm.exception), expect)
|
||||||
|
self.assertEqual(cm.exception.text, 'ä = 5\\\n')
|
||||||
|
self.assertEqual(cm.exception.offset, 7)
|
||||||
|
|
||||||
|
with self.assertRaises(SyntaxError) as cm:
|
||||||
|
exec('# coding:latin1\nä = 5\\'.encode('latin1'))
|
||||||
|
self.assertEqual(str(cm.exception),
|
||||||
|
'unexpected EOF while parsing (<string>, line 2)')
|
||||||
|
self.assertEqual(cm.exception.text, 'ä = 5\\\n')
|
||||||
|
self.assertEqual(cm.exception.offset, 7)
|
||||||
|
|
||||||
|
with self.assertRaises(SyntaxError) as cm:
|
||||||
|
exec(BOM_UTF8 + 'ä = 5\\'.encode())
|
||||||
|
self.assertEqual(str(cm.exception), expect)
|
||||||
|
self.assertEqual(cm.exception.text, 'ä = 5\\\n')
|
||||||
|
self.assertEqual(cm.exception.offset, 7)
|
||||||
|
|
||||||
|
with self.assertRaises(SyntaxError) as cm:
|
||||||
exec('\\')
|
exec('\\')
|
||||||
self.assertEqual(str(excinfo.exception), expect)
|
self.assertEqual(str(cm.exception), expect)
|
||||||
|
|
||||||
@unittest.skipIf(not sys.executable, "sys.executable required")
|
@unittest.skipIf(not sys.executable, "sys.executable required")
|
||||||
def test_line_continuation_EOF_from_file_bpo2180(self):
|
def test_line_continuation_EOF_from_file_bpo2180(self):
|
||||||
"""Ensure tok_nextc() does not add too many ending newlines."""
|
"""Ensure tok_nextc() does not add too many ending newlines."""
|
||||||
with os_helper.temp_dir() as temp_dir:
|
with os_helper.temp_dir() as temp_dir:
|
||||||
file_name = script_helper.make_script(temp_dir, 'foo', '\\')
|
file_name = script_helper.make_script(temp_dir, 'foo', '\\')
|
||||||
rc, out, err = script_helper.assert_python_failure(file_name)
|
rc, out, err = script_helper.assert_python_failure('-X', 'utf8', file_name)
|
||||||
self.assertIn(b'unexpected EOF while parsing', err)
|
err = err.decode().splitlines()
|
||||||
self.assertIn(b'line 1', err)
|
self.assertEqual(err[-2:], [
|
||||||
self.assertIn(b'\\', err)
|
' \\',
|
||||||
|
'SyntaxError: unexpected EOF while parsing'])
|
||||||
|
self.assertEqual(err[-3][-8:], ', line 1', err)
|
||||||
|
|
||||||
|
file_name = script_helper.make_script(temp_dir, 'foo', 'ä = 6\\')
|
||||||
|
rc, out, err = script_helper.assert_python_failure('-X', 'utf8', file_name)
|
||||||
|
err = err.decode().splitlines()
|
||||||
|
self.assertEqual(err[-3:], [
|
||||||
|
' ä = 6\\',
|
||||||
|
' ^',
|
||||||
|
'SyntaxError: unexpected EOF while parsing'])
|
||||||
|
self.assertEqual(err[-4][-8:], ', line 1', err)
|
||||||
|
|
||||||
|
file_name = script_helper.make_script(temp_dir, 'foo',
|
||||||
|
'# coding:latin1\n'
|
||||||
|
'ä = 7\\'.encode('latin1'))
|
||||||
|
rc, out, err = script_helper.assert_python_failure('-X', 'utf8', file_name)
|
||||||
|
err = err.decode().splitlines()
|
||||||
|
self.assertEqual(err[-3:], [
|
||||||
|
' ä = 7\\',
|
||||||
|
' ^',
|
||||||
|
'SyntaxError: unexpected EOF while parsing'])
|
||||||
|
self.assertEqual(err[-4][-8:], ', line 2', err)
|
||||||
|
|
||||||
|
file_name = script_helper.make_script(temp_dir, 'foo',
|
||||||
|
BOM_UTF8 + 'ä = 8\\'.encode())
|
||||||
|
rc, out, err = script_helper.assert_python_failure('-X', 'utf8', file_name)
|
||||||
|
err = err.decode().splitlines()
|
||||||
|
self.assertEqual(err[-3:], [
|
||||||
|
' ä = 8\\',
|
||||||
|
' ^',
|
||||||
|
'SyntaxError: unexpected EOF while parsing'])
|
||||||
|
self.assertEqual(err[-4][-8:], ', line 1', err)
|
||||||
|
|
||||||
file_name = script_helper.make_script(temp_dir, 'foo', 'y = 6\\')
|
|
||||||
rc, out, err = script_helper.assert_python_failure(file_name)
|
|
||||||
self.assertIn(b'unexpected EOF while parsing', err)
|
|
||||||
self.assertIn(b'line 1', err)
|
|
||||||
self.assertIn(b'y = 6\\', err)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -7,6 +7,7 @@ import unittest
|
||||||
import pickle
|
import pickle
|
||||||
import weakref
|
import weakref
|
||||||
import errno
|
import errno
|
||||||
|
from codecs import BOM_UTF8
|
||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
|
|
||||||
from test.support import (captured_stderr, check_impl_detail,
|
from test.support import (captured_stderr, check_impl_detail,
|
||||||
|
@ -1988,16 +1989,20 @@ class ImportErrorTests(unittest.TestCase):
|
||||||
self.assertEqual(exc.path, orig.path)
|
self.assertEqual(exc.path, orig.path)
|
||||||
|
|
||||||
|
|
||||||
|
def run_script(source):
|
||||||
|
if isinstance(source, str):
|
||||||
|
with open(TESTFN, 'w', encoding='utf-8') as testfile:
|
||||||
|
testfile.write(dedent(source))
|
||||||
|
else:
|
||||||
|
with open(TESTFN, 'wb') as testfile:
|
||||||
|
testfile.write(source)
|
||||||
|
_rc, _out, err = script_helper.assert_python_failure('-Wd', '-X', 'utf8', TESTFN)
|
||||||
|
return err.decode('utf-8').splitlines()
|
||||||
|
|
||||||
class AssertionErrorTests(unittest.TestCase):
|
class AssertionErrorTests(unittest.TestCase):
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
unlink(TESTFN)
|
unlink(TESTFN)
|
||||||
|
|
||||||
def write_source(self, source):
|
|
||||||
with open(TESTFN, 'w') as testfile:
|
|
||||||
testfile.write(dedent(source))
|
|
||||||
_rc, _out, err = script_helper.assert_python_failure('-Wd', '-X', 'utf8', TESTFN)
|
|
||||||
return err.decode('utf-8').splitlines()
|
|
||||||
|
|
||||||
@force_not_colorized
|
@force_not_colorized
|
||||||
def test_assertion_error_location(self):
|
def test_assertion_error_location(self):
|
||||||
cases = [
|
cases = [
|
||||||
|
@ -2029,11 +2034,32 @@ class AssertionErrorTests(unittest.TestCase):
|
||||||
'AssertionError',
|
'AssertionError',
|
||||||
],
|
],
|
||||||
),
|
),
|
||||||
('assert 1 > 2, "message"',
|
('assert 1 > 2, "messäge"',
|
||||||
[
|
[
|
||||||
' assert 1 > 2, "message"',
|
' assert 1 > 2, "messäge"',
|
||||||
' ^^^^^',
|
' ^^^^^',
|
||||||
'AssertionError: message',
|
'AssertionError: messäge',
|
||||||
|
],
|
||||||
|
),
|
||||||
|
('assert 1 > 2, "messäge"'.encode(),
|
||||||
|
[
|
||||||
|
' assert 1 > 2, "messäge"',
|
||||||
|
' ^^^^^',
|
||||||
|
'AssertionError: messäge',
|
||||||
|
],
|
||||||
|
),
|
||||||
|
('# coding: latin1\nassert 1 > 2, "messäge"'.encode('latin1'),
|
||||||
|
[
|
||||||
|
' assert 1 > 2, "messäge"',
|
||||||
|
' ^^^^^',
|
||||||
|
'AssertionError: messäge',
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(BOM_UTF8 + 'assert 1 > 2, "messäge"'.encode(),
|
||||||
|
[
|
||||||
|
' assert 1 > 2, "messäge"',
|
||||||
|
' ^^^^^',
|
||||||
|
'AssertionError: messäge',
|
||||||
],
|
],
|
||||||
),
|
),
|
||||||
|
|
||||||
|
@ -2071,8 +2097,8 @@ class AssertionErrorTests(unittest.TestCase):
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
for source, expected in cases:
|
for source, expected in cases:
|
||||||
with self.subTest(source):
|
with self.subTest(source=source):
|
||||||
result = self.write_source(source)
|
result = run_script(source)
|
||||||
self.assertEqual(result[-3:], expected)
|
self.assertEqual(result[-3:], expected)
|
||||||
|
|
||||||
@force_not_colorized
|
@force_not_colorized
|
||||||
|
@ -2102,12 +2128,14 @@ class AssertionErrorTests(unittest.TestCase):
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
for source, expected in cases:
|
for source, expected in cases:
|
||||||
with self.subTest(source):
|
with self.subTest(source=source):
|
||||||
result = self.write_source(source)
|
result = run_script(source)
|
||||||
self.assertEqual(result[-len(expected):], expected)
|
self.assertEqual(result[-len(expected):], expected)
|
||||||
|
|
||||||
|
|
||||||
class SyntaxErrorTests(unittest.TestCase):
|
class SyntaxErrorTests(unittest.TestCase):
|
||||||
|
maxDiff = None
|
||||||
|
|
||||||
@force_not_colorized
|
@force_not_colorized
|
||||||
def test_range_of_offsets(self):
|
def test_range_of_offsets(self):
|
||||||
cases = [
|
cases = [
|
||||||
|
@ -2200,45 +2228,106 @@ class SyntaxErrorTests(unittest.TestCase):
|
||||||
the_exception = exc
|
the_exception = exc
|
||||||
|
|
||||||
def test_encodings(self):
|
def test_encodings(self):
|
||||||
|
self.addCleanup(unlink, TESTFN)
|
||||||
source = (
|
source = (
|
||||||
'# -*- coding: cp437 -*-\n'
|
'# -*- coding: cp437 -*-\n'
|
||||||
'"¢¢¢¢¢¢" + f(4, x for x in range(1))\n'
|
'"¢¢¢¢¢¢" + f(4, x for x in range(1))\n'
|
||||||
)
|
)
|
||||||
try:
|
err = run_script(source.encode('cp437'))
|
||||||
with open(TESTFN, 'w', encoding='cp437') as testfile:
|
self.assertEqual(err[-3], ' "¢¢¢¢¢¢" + f(4, x for x in range(1))')
|
||||||
testfile.write(source)
|
self.assertEqual(err[-2], ' ^^^^^^^^^^^^^^^^^^^')
|
||||||
rc, out, err = script_helper.assert_python_failure('-Wd', '-X', 'utf8', TESTFN)
|
|
||||||
err = err.decode('utf-8').splitlines()
|
|
||||||
|
|
||||||
self.assertEqual(err[-3], ' "¢¢¢¢¢¢" + f(4, x for x in range(1))')
|
|
||||||
self.assertEqual(err[-2], ' ^^^^^^^^^^^^^^^^^^^')
|
|
||||||
finally:
|
|
||||||
unlink(TESTFN)
|
|
||||||
|
|
||||||
# Check backwards tokenizer errors
|
# Check backwards tokenizer errors
|
||||||
source = '# -*- coding: ascii -*-\n\n(\n'
|
source = '# -*- coding: ascii -*-\n\n(\n'
|
||||||
try:
|
err = run_script(source)
|
||||||
with open(TESTFN, 'w', encoding='ascii') as testfile:
|
self.assertEqual(err[-3], ' (')
|
||||||
testfile.write(source)
|
self.assertEqual(err[-2], ' ^')
|
||||||
rc, out, err = script_helper.assert_python_failure('-Wd', '-X', 'utf8', TESTFN)
|
|
||||||
err = err.decode('utf-8').splitlines()
|
|
||||||
|
|
||||||
self.assertEqual(err[-3], ' (')
|
|
||||||
self.assertEqual(err[-2], ' ^')
|
|
||||||
finally:
|
|
||||||
unlink(TESTFN)
|
|
||||||
|
|
||||||
def test_non_utf8(self):
|
def test_non_utf8(self):
|
||||||
# Check non utf-8 characters
|
# Check non utf-8 characters
|
||||||
try:
|
self.addCleanup(unlink, TESTFN)
|
||||||
with open(TESTFN, 'bw') as testfile:
|
err = run_script(b"\x89")
|
||||||
testfile.write(b"\x89")
|
self.assertIn("SyntaxError: Non-UTF-8 code starting with '\\x89' in file", err[-1])
|
||||||
rc, out, err = script_helper.assert_python_failure('-Wd', '-X', 'utf8', TESTFN)
|
|
||||||
err = err.decode('utf-8').splitlines()
|
|
||||||
|
|
||||||
self.assertIn("SyntaxError: Non-UTF-8 code starting with '\\x89' in file", err[-1])
|
def test_string_source(self):
|
||||||
finally:
|
def try_compile(source):
|
||||||
unlink(TESTFN)
|
with self.assertRaises(SyntaxError) as cm:
|
||||||
|
compile(source, '<string>', 'exec')
|
||||||
|
return cm.exception
|
||||||
|
|
||||||
|
exc = try_compile('return "ä"')
|
||||||
|
self.assertEqual(str(exc), "'return' outside function (<string>, line 1)")
|
||||||
|
self.assertIsNone(exc.text)
|
||||||
|
self.assertEqual(exc.offset, 1)
|
||||||
|
self.assertEqual(exc.end_offset, 12)
|
||||||
|
|
||||||
|
exc = try_compile('return "ä"'.encode())
|
||||||
|
self.assertEqual(str(exc), "'return' outside function (<string>, line 1)")
|
||||||
|
self.assertIsNone(exc.text)
|
||||||
|
self.assertEqual(exc.offset, 1)
|
||||||
|
self.assertEqual(exc.end_offset, 12)
|
||||||
|
|
||||||
|
exc = try_compile(BOM_UTF8 + 'return "ä"'.encode())
|
||||||
|
self.assertEqual(str(exc), "'return' outside function (<string>, line 1)")
|
||||||
|
self.assertIsNone(exc.text)
|
||||||
|
self.assertEqual(exc.offset, 1)
|
||||||
|
self.assertEqual(exc.end_offset, 12)
|
||||||
|
|
||||||
|
exc = try_compile('# coding: latin1\nreturn "ä"'.encode('latin1'))
|
||||||
|
self.assertEqual(str(exc), "'return' outside function (<string>, line 2)")
|
||||||
|
self.assertIsNone(exc.text)
|
||||||
|
self.assertEqual(exc.offset, 1)
|
||||||
|
self.assertEqual(exc.end_offset, 12)
|
||||||
|
|
||||||
|
exc = try_compile('return "ä" #' + 'ä'*1000)
|
||||||
|
self.assertEqual(str(exc), "'return' outside function (<string>, line 1)")
|
||||||
|
self.assertIsNone(exc.text)
|
||||||
|
self.assertEqual(exc.offset, 1)
|
||||||
|
self.assertEqual(exc.end_offset, 12)
|
||||||
|
|
||||||
|
exc = try_compile('return "ä" # ' + 'ä'*1000)
|
||||||
|
self.assertEqual(str(exc), "'return' outside function (<string>, line 1)")
|
||||||
|
self.assertIsNone(exc.text)
|
||||||
|
self.assertEqual(exc.offset, 1)
|
||||||
|
self.assertEqual(exc.end_offset, 12)
|
||||||
|
|
||||||
|
def test_file_source(self):
|
||||||
|
self.addCleanup(unlink, TESTFN)
|
||||||
|
err = run_script('return "ä"')
|
||||||
|
self.assertEqual(err[-3:], [
|
||||||
|
' return "ä"',
|
||||||
|
' ^^^^^^^^^^',
|
||||||
|
"SyntaxError: 'return' outside function"])
|
||||||
|
|
||||||
|
err = run_script('return "ä"'.encode())
|
||||||
|
self.assertEqual(err[-3:], [
|
||||||
|
' return "ä"',
|
||||||
|
' ^^^^^^^^^^',
|
||||||
|
"SyntaxError: 'return' outside function"])
|
||||||
|
|
||||||
|
err = run_script(BOM_UTF8 + 'return "ä"'.encode())
|
||||||
|
self.assertEqual(err[-3:], [
|
||||||
|
' return "ä"',
|
||||||
|
' ^^^^^^^^^^',
|
||||||
|
"SyntaxError: 'return' outside function"])
|
||||||
|
|
||||||
|
err = run_script('# coding: latin1\nreturn "ä"'.encode('latin1'))
|
||||||
|
self.assertEqual(err[-3:], [
|
||||||
|
' return "ä"',
|
||||||
|
' ^^^^^^^^^^',
|
||||||
|
"SyntaxError: 'return' outside function"])
|
||||||
|
|
||||||
|
err = run_script('return "ä" #' + 'ä'*1000)
|
||||||
|
self.assertEqual(err[-2:], [
|
||||||
|
' ^^^^^^^^^^^',
|
||||||
|
"SyntaxError: 'return' outside function"])
|
||||||
|
self.assertEqual(err[-3][:100], ' return "ä" #' + 'ä'*84)
|
||||||
|
|
||||||
|
err = run_script('return "ä" # ' + 'ä'*1000)
|
||||||
|
self.assertEqual(err[-2:], [
|
||||||
|
' ^^^^^^^^^^^',
|
||||||
|
"SyntaxError: 'return' outside function"])
|
||||||
|
self.assertEqual(err[-3][:100], ' return "ä" # ' + 'ä'*83)
|
||||||
|
|
||||||
def test_attributes_new_constructor(self):
|
def test_attributes_new_constructor(self):
|
||||||
args = ("bad.py", 1, 2, "abcdefg", 1, 100)
|
args = ("bad.py", 1, 2, "abcdefg", 1, 100)
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Fix reading and decoding a line from the source file witn non-UTF-8 encoding
|
||||||
|
for syntax errors raised in the compiler.
|
|
@ -1856,44 +1856,44 @@ PyErr_SyntaxLocationEx(const char *filename, int lineno, int col_offset)
|
||||||
functionality in tb_displayline() in traceback.c. */
|
functionality in tb_displayline() in traceback.c. */
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
err_programtext(PyThreadState *tstate, FILE *fp, int lineno, const char* encoding)
|
err_programtext(FILE *fp, int lineno, const char* encoding)
|
||||||
{
|
{
|
||||||
int i;
|
|
||||||
char linebuf[1000];
|
char linebuf[1000];
|
||||||
if (fp == NULL) {
|
size_t line_size = 0;
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < lineno; i++) {
|
for (int i = 0; i < lineno; ) {
|
||||||
char *pLastChar = &linebuf[sizeof(linebuf) - 2];
|
line_size = 0;
|
||||||
do {
|
if (_Py_UniversalNewlineFgetsWithSize(linebuf, sizeof(linebuf),
|
||||||
*pLastChar = '\0';
|
fp, NULL, &line_size) == NULL)
|
||||||
if (Py_UniversalNewlineFgets(linebuf, sizeof linebuf,
|
{
|
||||||
fp, NULL) == NULL) {
|
/* Error or EOF. */
|
||||||
goto after_loop;
|
return NULL;
|
||||||
}
|
|
||||||
/* fgets read *something*; if it didn't get as
|
|
||||||
far as pLastChar, it must have found a newline
|
|
||||||
or hit the end of the file; if pLastChar is \n,
|
|
||||||
it obviously found a newline; else we haven't
|
|
||||||
yet seen a newline, so must continue */
|
|
||||||
} while (*pLastChar != '\0' && *pLastChar != '\n');
|
|
||||||
}
|
|
||||||
|
|
||||||
after_loop:
|
|
||||||
fclose(fp);
|
|
||||||
if (i == lineno) {
|
|
||||||
PyObject *res;
|
|
||||||
if (encoding != NULL) {
|
|
||||||
res = PyUnicode_Decode(linebuf, strlen(linebuf), encoding, "replace");
|
|
||||||
} else {
|
|
||||||
res = PyUnicode_FromString(linebuf);
|
|
||||||
}
|
}
|
||||||
if (res == NULL)
|
/* fgets read *something*; if it didn't fill the
|
||||||
_PyErr_Clear(tstate);
|
whole buffer, it must have found a newline
|
||||||
return res;
|
or hit the end of the file; if the last character is \n,
|
||||||
|
it obviously found a newline; else we haven't
|
||||||
|
yet seen a newline, so must continue */
|
||||||
|
if (i + 1 < lineno
|
||||||
|
&& line_size == sizeof(linebuf) - 1
|
||||||
|
&& linebuf[sizeof(linebuf) - 2] != '\n')
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
i++;
|
||||||
}
|
}
|
||||||
return NULL;
|
|
||||||
|
const char *line = linebuf;
|
||||||
|
/* Skip BOM. */
|
||||||
|
if (lineno == 1 && line_size >= 3 && memcmp(line, "\xef\xbb\xbf", 3) == 0) {
|
||||||
|
line += 3;
|
||||||
|
line_size -= 3;
|
||||||
|
}
|
||||||
|
PyObject *res = PyUnicode_Decode(line, line_size, encoding, "replace");
|
||||||
|
if (res == NULL) {
|
||||||
|
PyErr_Clear();
|
||||||
|
}
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *
|
PyObject *
|
||||||
|
@ -1913,20 +1913,41 @@ PyErr_ProgramText(const char *filename, int lineno)
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Function from Parser/tokenizer/file_tokenizer.c */
|
||||||
|
extern char* _PyTokenizer_FindEncodingFilename(int, PyObject *);
|
||||||
|
|
||||||
PyObject *
|
PyObject *
|
||||||
_PyErr_ProgramDecodedTextObject(PyObject *filename, int lineno, const char* encoding)
|
_PyErr_ProgramDecodedTextObject(PyObject *filename, int lineno, const char* encoding)
|
||||||
{
|
{
|
||||||
|
char *found_encoding = NULL;
|
||||||
if (filename == NULL || lineno <= 0) {
|
if (filename == NULL || lineno <= 0) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyThreadState *tstate = _PyThreadState_GET();
|
|
||||||
FILE *fp = _Py_fopen_obj(filename, "r" PY_STDIOTEXTMODE);
|
FILE *fp = _Py_fopen_obj(filename, "r" PY_STDIOTEXTMODE);
|
||||||
if (fp == NULL) {
|
if (fp == NULL) {
|
||||||
_PyErr_Clear(tstate);
|
PyErr_Clear();
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
return err_programtext(tstate, fp, lineno, encoding);
|
if (encoding == NULL) {
|
||||||
|
int fd = fileno(fp);
|
||||||
|
found_encoding = _PyTokenizer_FindEncodingFilename(fd, filename);
|
||||||
|
encoding = found_encoding;
|
||||||
|
if (encoding == NULL) {
|
||||||
|
PyErr_Clear();
|
||||||
|
encoding = "utf-8";
|
||||||
|
}
|
||||||
|
/* Reset position */
|
||||||
|
if (lseek(fd, 0, SEEK_SET) == (off_t)-1) {
|
||||||
|
fclose(fp);
|
||||||
|
PyMem_Free(found_encoding);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PyObject *res = err_programtext(fp, lineno, encoding);
|
||||||
|
fclose(fp);
|
||||||
|
PyMem_Free(found_encoding);
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *
|
PyObject *
|
||||||
|
|
Loading…
Reference in New Issue