bpo-40334: Improve column offsets for thrown syntax errors by Pegen (GH-19782)
This commit is contained in:
parent
719e14d283
commit
76c1b4d5c5
|
@ -609,7 +609,7 @@ invalid_assignment:
|
||||||
| expression ':' expression ['=' annotated_rhs] {
|
| expression ':' expression ['=' annotated_rhs] {
|
||||||
RAISE_SYNTAX_ERROR("illegal target for annotation") }
|
RAISE_SYNTAX_ERROR("illegal target for annotation") }
|
||||||
| a=expression ('=' | augassign) (yield_expr | star_expressions) {
|
| a=expression ('=' | augassign) (yield_expr | star_expressions) {
|
||||||
RAISE_SYNTAX_ERROR("cannot assign to %s", _PyPegen_get_expr_name(a)) }
|
RAISE_SYNTAX_ERROR_NO_COL_OFFSET("cannot assign to %s", _PyPegen_get_expr_name(a)) }
|
||||||
invalid_block:
|
invalid_block:
|
||||||
| NEWLINE !INDENT { RAISE_INDENTATION_ERROR("expected an indented block") }
|
| NEWLINE !INDENT { RAISE_INDENTATION_ERROR("expected an indented block") }
|
||||||
invalid_comprehension:
|
invalid_comprehension:
|
||||||
|
|
|
@ -599,7 +599,7 @@ class CmdLineTest(unittest.TestCase):
|
||||||
exitcode, stdout, stderr = assert_python_failure(script_name)
|
exitcode, stdout, stderr = assert_python_failure(script_name)
|
||||||
text = io.TextIOWrapper(io.BytesIO(stderr), 'ascii').read()
|
text = io.TextIOWrapper(io.BytesIO(stderr), 'ascii').read()
|
||||||
# Confirm that the caret is located under the first 1 character
|
# Confirm that the caret is located under the first 1 character
|
||||||
self.assertIn("\n 1 + 1 = 2\n ^", text)
|
self.assertIn("\n 1 + 1 = 2\n ^", text)
|
||||||
|
|
||||||
def test_syntaxerror_indented_caret_position(self):
|
def test_syntaxerror_indented_caret_position(self):
|
||||||
script = textwrap.dedent("""\
|
script = textwrap.dedent("""\
|
||||||
|
@ -611,7 +611,7 @@ class CmdLineTest(unittest.TestCase):
|
||||||
exitcode, stdout, stderr = assert_python_failure(script_name)
|
exitcode, stdout, stderr = assert_python_failure(script_name)
|
||||||
text = io.TextIOWrapper(io.BytesIO(stderr), 'ascii').read()
|
text = io.TextIOWrapper(io.BytesIO(stderr), 'ascii').read()
|
||||||
# Confirm that the caret is located under the first 1 character
|
# Confirm that the caret is located under the first 1 character
|
||||||
self.assertIn("\n 1 + 1 = 2\n ^", text)
|
self.assertIn("\n 1 + 1 = 2\n ^", text)
|
||||||
|
|
||||||
# Try the same with a form feed at the start of the indented line
|
# Try the same with a form feed at the start of the indented line
|
||||||
script = (
|
script = (
|
||||||
|
@ -622,7 +622,7 @@ class CmdLineTest(unittest.TestCase):
|
||||||
exitcode, stdout, stderr = assert_python_failure(script_name)
|
exitcode, stdout, stderr = assert_python_failure(script_name)
|
||||||
text = io.TextIOWrapper(io.BytesIO(stderr), "ascii").read()
|
text = io.TextIOWrapper(io.BytesIO(stderr), "ascii").read()
|
||||||
self.assertNotIn("\f", text)
|
self.assertNotIn("\f", text)
|
||||||
self.assertIn("\n 1 + 1 = 2\n ^", text)
|
self.assertIn("\n 1 + 1 = 2\n ^", text)
|
||||||
|
|
||||||
def test_syntaxerror_multi_line_fstring(self):
|
def test_syntaxerror_multi_line_fstring(self):
|
||||||
script = 'foo = f"""{}\nfoo"""\n'
|
script = 'foo = f"""{}\nfoo"""\n'
|
||||||
|
|
|
@ -178,19 +178,19 @@ class ExceptionTests(unittest.TestCase):
|
||||||
s = '''if True:\n print()\n\texec "mixed tabs and spaces"'''
|
s = '''if True:\n print()\n\texec "mixed tabs and spaces"'''
|
||||||
ckmsg(s, "inconsistent use of tabs and spaces in indentation", TabError)
|
ckmsg(s, "inconsistent use of tabs and spaces in indentation", TabError)
|
||||||
|
|
||||||
@support.skip_if_new_parser("Pegen column offsets might be different")
|
def check(self, src, lineno, offset, encoding='utf-8'):
|
||||||
def testSyntaxErrorOffset(self):
|
with self.assertRaises(SyntaxError) as cm:
|
||||||
def check(src, lineno, offset, encoding='utf-8'):
|
compile(src, '<fragment>', 'exec')
|
||||||
with self.assertRaises(SyntaxError) as cm:
|
self.assertEqual(cm.exception.lineno, lineno)
|
||||||
compile(src, '<fragment>', 'exec')
|
self.assertEqual(cm.exception.offset, offset)
|
||||||
self.assertEqual(cm.exception.lineno, lineno)
|
if cm.exception.text is not None:
|
||||||
self.assertEqual(cm.exception.offset, offset)
|
if not isinstance(src, str):
|
||||||
if cm.exception.text is not None:
|
src = src.decode(encoding, 'replace')
|
||||||
if not isinstance(src, str):
|
line = src.split('\n')[lineno-1]
|
||||||
src = src.decode(encoding, 'replace')
|
self.assertIn(line, cm.exception.text)
|
||||||
line = src.split('\n')[lineno-1]
|
|
||||||
self.assertIn(line, cm.exception.text)
|
|
||||||
|
|
||||||
|
def testSyntaxErrorOffset(self):
|
||||||
|
check = self.check
|
||||||
check('def fact(x):\n\treturn x!\n', 2, 10)
|
check('def fact(x):\n\treturn x!\n', 2, 10)
|
||||||
check('1 +\n', 1, 4)
|
check('1 +\n', 1, 4)
|
||||||
check('def spam():\n print(1)\n print(2)', 3, 10)
|
check('def spam():\n print(1)\n print(2)', 3, 10)
|
||||||
|
@ -238,20 +238,20 @@ class ExceptionTests(unittest.TestCase):
|
||||||
check('nonlocal x', 1, 1)
|
check('nonlocal x', 1, 1)
|
||||||
check('def f():\n global x\n nonlocal x', 2, 3)
|
check('def f():\n global x\n nonlocal x', 2, 3)
|
||||||
|
|
||||||
# Errors thrown by ast.c
|
|
||||||
check('for 1 in []: pass', 1, 5)
|
|
||||||
check('def f(*):\n pass', 1, 7)
|
|
||||||
check('[*x for x in xs]', 1, 2)
|
|
||||||
check('def f():\n x, y: int', 2, 3)
|
|
||||||
check('(yield i) = 2', 1, 1)
|
|
||||||
check('foo(x for x in range(10), 100)', 1, 5)
|
|
||||||
check('foo(1=2)', 1, 5)
|
|
||||||
|
|
||||||
# Errors thrown by future.c
|
# Errors thrown by future.c
|
||||||
check('from __future__ import doesnt_exist', 1, 1)
|
check('from __future__ import doesnt_exist', 1, 1)
|
||||||
check('from __future__ import braces', 1, 1)
|
check('from __future__ import braces', 1, 1)
|
||||||
check('x=1\nfrom __future__ import division', 2, 1)
|
check('x=1\nfrom __future__ import division', 2, 1)
|
||||||
|
|
||||||
|
@support.skip_if_new_parser("Pegen column offsets might be different")
|
||||||
|
def testSyntaxErrorOffsetCustom(self):
|
||||||
|
self.check('for 1 in []: pass', 1, 5)
|
||||||
|
self.check('def f(*):\n pass', 1, 7)
|
||||||
|
self.check('[*x for x in xs]', 1, 2)
|
||||||
|
self.check('def f():\n x, y: int', 2, 3)
|
||||||
|
self.check('(yield i) = 2', 1, 1)
|
||||||
|
self.check('foo(x for x in range(10), 100)', 1, 5)
|
||||||
|
self.check('foo(1=2)', 1, 5)
|
||||||
|
|
||||||
@cpython_only
|
@cpython_only
|
||||||
def testSettingException(self):
|
def testSettingException(self):
|
||||||
|
|
|
@ -10515,7 +10515,7 @@ invalid_assignment_rule(Parser *p)
|
||||||
(_tmp_132_var = _tmp_132_rule(p))
|
(_tmp_132_var = _tmp_132_rule(p))
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
res = RAISE_SYNTAX_ERROR ( "cannot assign to %s" , _PyPegen_get_expr_name ( a ) );
|
res = RAISE_SYNTAX_ERROR_NO_COL_OFFSET ( "cannot assign to %s" , _PyPegen_get_expr_name ( a ) );
|
||||||
if (res == NULL && PyErr_Occurred()) {
|
if (res == NULL && PyErr_Occurred()) {
|
||||||
p->error_indicator = 1;
|
p->error_indicator = 1;
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
|
@ -145,11 +145,15 @@ byte_offset_to_character_offset(PyObject *line, int col_offset)
|
||||||
if (!str) {
|
if (!str) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, NULL);
|
PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace");
|
||||||
if (!text) {
|
if (!text) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
Py_ssize_t size = PyUnicode_GET_LENGTH(text);
|
Py_ssize_t size = PyUnicode_GET_LENGTH(text);
|
||||||
|
str = PyUnicode_AsUTF8(text);
|
||||||
|
if (str != NULL && (int)strlen(str) == col_offset) {
|
||||||
|
size = strlen(str);
|
||||||
|
}
|
||||||
Py_DECREF(text);
|
Py_DECREF(text);
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
@ -297,68 +301,23 @@ error:
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline PyObject *
|
static inline PyObject *
|
||||||
get_error_line(char *buffer)
|
get_error_line(char *buffer, int is_file)
|
||||||
{
|
{
|
||||||
char *newline = strchr(buffer, '\n');
|
const char *newline;
|
||||||
|
if (is_file) {
|
||||||
|
newline = strrchr(buffer, '\n');
|
||||||
|
} else {
|
||||||
|
newline = strchr(buffer, '\n');
|
||||||
|
}
|
||||||
|
|
||||||
if (newline) {
|
if (newline) {
|
||||||
return PyUnicode_FromStringAndSize(buffer, newline - buffer);
|
return PyUnicode_DecodeUTF8(buffer, newline - buffer, "replace");
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return PyUnicode_FromString(buffer);
|
return PyUnicode_DecodeUTF8(buffer, strlen(buffer), "replace");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
|
||||||
tokenizer_error_with_col_offset(Parser *p, PyObject *errtype, const char *errmsg)
|
|
||||||
{
|
|
||||||
PyObject *errstr = NULL;
|
|
||||||
PyObject *value = NULL;
|
|
||||||
size_t col_number = -1;
|
|
||||||
|
|
||||||
errstr = PyUnicode_FromString(errmsg);
|
|
||||||
if (!errstr) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
PyObject *loc = NULL;
|
|
||||||
if (p->start_rule == Py_file_input) {
|
|
||||||
loc = PyErr_ProgramTextObject(p->tok->filename, p->tok->lineno);
|
|
||||||
}
|
|
||||||
if (!loc) {
|
|
||||||
loc = get_error_line(p->tok->buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (loc) {
|
|
||||||
col_number = p->tok->cur - p->tok->buf;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
Py_INCREF(Py_None);
|
|
||||||
loc = Py_None;
|
|
||||||
}
|
|
||||||
|
|
||||||
PyObject *tmp = Py_BuildValue("(OiiN)", p->tok->filename, p->tok->lineno,
|
|
||||||
col_number, loc);
|
|
||||||
if (!tmp) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
value = PyTuple_Pack(2, errstr, tmp);
|
|
||||||
Py_DECREF(tmp);
|
|
||||||
if (!value) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
PyErr_SetObject(errtype, value);
|
|
||||||
|
|
||||||
Py_XDECREF(value);
|
|
||||||
Py_XDECREF(errstr);
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
error:
|
|
||||||
Py_XDECREF(errstr);
|
|
||||||
Py_XDECREF(loc);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
tokenizer_error(Parser *p)
|
tokenizer_error(Parser *p)
|
||||||
{
|
{
|
||||||
|
@ -376,20 +335,20 @@ tokenizer_error(Parser *p)
|
||||||
msg = "invalid character in identifier";
|
msg = "invalid character in identifier";
|
||||||
break;
|
break;
|
||||||
case E_BADPREFIX:
|
case E_BADPREFIX:
|
||||||
return tokenizer_error_with_col_offset(p,
|
RAISE_SYNTAX_ERROR("invalid string prefix");
|
||||||
errtype, "invalid string prefix");
|
return -1;
|
||||||
case E_EOFS:
|
case E_EOFS:
|
||||||
return tokenizer_error_with_col_offset(p,
|
RAISE_SYNTAX_ERROR("EOF while scanning triple-quoted string literal");
|
||||||
errtype, "EOF while scanning triple-quoted string literal");
|
return -1;
|
||||||
case E_EOLS:
|
case E_EOLS:
|
||||||
return tokenizer_error_with_col_offset(p,
|
RAISE_SYNTAX_ERROR("EOL while scanning string literal");
|
||||||
errtype, "EOL while scanning string literal");
|
return -1;
|
||||||
case E_EOF:
|
case E_EOF:
|
||||||
return tokenizer_error_with_col_offset(p,
|
RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
|
||||||
errtype, "unexpected EOF while parsing");
|
return -1;
|
||||||
case E_DEDENT:
|
case E_DEDENT:
|
||||||
return tokenizer_error_with_col_offset(p,
|
RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level");
|
||||||
PyExc_IndentationError, "unindent does not match any outer indentation level");
|
return -1;
|
||||||
case E_INTR:
|
case E_INTR:
|
||||||
if (!PyErr_Occurred()) {
|
if (!PyErr_Occurred()) {
|
||||||
PyErr_SetNone(PyExc_KeyboardInterrupt);
|
PyErr_SetNone(PyExc_KeyboardInterrupt);
|
||||||
|
@ -421,14 +380,14 @@ tokenizer_error(Parser *p)
|
||||||
}
|
}
|
||||||
|
|
||||||
void *
|
void *
|
||||||
_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
|
_PyPegen_raise_error(Parser *p, PyObject *errtype, int with_col_number, const char *errmsg, ...)
|
||||||
{
|
{
|
||||||
PyObject *value = NULL;
|
PyObject *value = NULL;
|
||||||
PyObject *errstr = NULL;
|
PyObject *errstr = NULL;
|
||||||
PyObject *loc = NULL;
|
PyObject *loc = NULL;
|
||||||
PyObject *tmp = NULL;
|
PyObject *tmp = NULL;
|
||||||
Token *t = p->tokens[p->fill - 1];
|
Token *t = p->tokens[p->fill - 1];
|
||||||
Py_ssize_t col_number = 0;
|
Py_ssize_t col_number = !with_col_number;
|
||||||
va_list va;
|
va_list va;
|
||||||
|
|
||||||
va_start(va, errmsg);
|
va_start(va, errmsg);
|
||||||
|
@ -443,14 +402,20 @@ _PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!loc) {
|
if (!loc) {
|
||||||
loc = get_error_line(p->tok->buf);
|
loc = get_error_line(p->tok->buf, p->start_rule == Py_file_input);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (loc) {
|
if (loc && with_col_number) {
|
||||||
int col_offset = t->col_offset == -1 ? 0 : t->col_offset;
|
int col_offset;
|
||||||
col_number = byte_offset_to_character_offset(loc, col_offset) + 1;
|
if (t->col_offset == -1) {
|
||||||
|
col_offset = Py_SAFE_DOWNCAST(p->tok->cur - p->tok->buf,
|
||||||
|
intptr_t, int);
|
||||||
|
} else {
|
||||||
|
col_offset = t->col_offset + 1;
|
||||||
|
}
|
||||||
|
col_number = byte_offset_to_character_offset(loc, col_offset);
|
||||||
}
|
}
|
||||||
else {
|
else if (!loc) {
|
||||||
Py_INCREF(Py_None);
|
Py_INCREF(Py_None);
|
||||||
loc = Py_None;
|
loc = Py_None;
|
||||||
}
|
}
|
||||||
|
@ -632,14 +597,6 @@ _PyPegen_fill_token(Parser *p)
|
||||||
type = PyTokenizer_Get(p->tok, &start, &end);
|
type = PyTokenizer_Get(p->tok, &start, &end);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (type == ERRORTOKEN) {
|
|
||||||
if (p->tok->done == E_DECODE) {
|
|
||||||
return raise_decode_error(p);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return tokenizer_error(p);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (type == ENDMARKER && p->start_rule == Py_single_input && p->parsing_started) {
|
if (type == ENDMARKER && p->start_rule == Py_single_input && p->parsing_started) {
|
||||||
type = NEWLINE; /* Add an extra newline */
|
type = NEWLINE; /* Add an extra newline */
|
||||||
p->parsing_started = 0;
|
p->parsing_started = 0;
|
||||||
|
@ -700,6 +657,16 @@ _PyPegen_fill_token(Parser *p)
|
||||||
t->end_col_offset = p->tok->lineno == 1 ? p->starting_col_offset + end_col_offset : end_col_offset;
|
t->end_col_offset = p->tok->lineno == 1 ? p->starting_col_offset + end_col_offset : end_col_offset;
|
||||||
|
|
||||||
p->fill += 1;
|
p->fill += 1;
|
||||||
|
|
||||||
|
if (type == ERRORTOKEN) {
|
||||||
|
if (p->tok->done == E_DECODE) {
|
||||||
|
return raise_decode_error(p);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return tokenizer_error(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -126,14 +126,15 @@ expr_ty _PyPegen_name_token(Parser *p);
|
||||||
expr_ty _PyPegen_number_token(Parser *p);
|
expr_ty _PyPegen_number_token(Parser *p);
|
||||||
void *_PyPegen_string_token(Parser *p);
|
void *_PyPegen_string_token(Parser *p);
|
||||||
const char *_PyPegen_get_expr_name(expr_ty);
|
const char *_PyPegen_get_expr_name(expr_ty);
|
||||||
void *_PyPegen_raise_error(Parser *p, PyObject *, const char *errmsg, ...);
|
void *_PyPegen_raise_error(Parser *p, PyObject *errtype, int with_col_number, const char *errmsg, ...);
|
||||||
void *_PyPegen_dummy_name(Parser *p, ...);
|
void *_PyPegen_dummy_name(Parser *p, ...);
|
||||||
|
|
||||||
#define UNUSED(expr) do { (void)(expr); } while (0)
|
#define UNUSED(expr) do { (void)(expr); } while (0)
|
||||||
#define EXTRA_EXPR(head, tail) head->lineno, head->col_offset, tail->end_lineno, tail->end_col_offset, p->arena
|
#define EXTRA_EXPR(head, tail) head->lineno, head->col_offset, tail->end_lineno, tail->end_col_offset, p->arena
|
||||||
#define EXTRA start_lineno, start_col_offset, end_lineno, end_col_offset, p->arena
|
#define EXTRA start_lineno, start_col_offset, end_lineno, end_col_offset, p->arena
|
||||||
#define RAISE_SYNTAX_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, msg, ##__VA_ARGS__)
|
#define RAISE_SYNTAX_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, 1, msg, ##__VA_ARGS__)
|
||||||
#define RAISE_INDENTATION_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_IndentationError, msg, ##__VA_ARGS__)
|
#define RAISE_INDENTATION_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_IndentationError, 1, msg, ##__VA_ARGS__)
|
||||||
|
#define RAISE_SYNTAX_ERROR_NO_COL_OFFSET(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, 0, msg, ##__VA_ARGS__)
|
||||||
|
|
||||||
Py_LOCAL_INLINE(void *)
|
Py_LOCAL_INLINE(void *)
|
||||||
CHECK_CALL(Parser *p, void *result)
|
CHECK_CALL(Parser *p, void *result)
|
||||||
|
@ -190,8 +191,8 @@ INVALID_VERSION_CHECK(Parser *p, int version, char *msg, void *node)
|
||||||
}
|
}
|
||||||
if (p->feature_version < version) {
|
if (p->feature_version < version) {
|
||||||
p->error_indicator = 1;
|
p->error_indicator = 1;
|
||||||
return _PyPegen_raise_error(p, PyExc_SyntaxError, "%s only supported in Python 3.%i and greater",
|
return RAISE_SYNTAX_ERROR("%s only supported in Python 3.%i and greater",
|
||||||
msg, version);
|
msg, version);
|
||||||
}
|
}
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue