From 025eb98dc0c1dc27404df6c544fc2944e0fa9f3a Mon Sep 17 00:00:00 2001 From: Ammar Askar Date: Mon, 24 Sep 2018 17:12:49 -0400 Subject: [PATCH] bpo-34683: Make SyntaxError column offsets consistently 1-indexed (gh-9338) Also point to start of tokens in parsing errors. Fixes bpo-34683 --- Lib/test/test_exceptions.py | 39 +++++++++++++++++++ Lib/test/test_future.py | 6 +-- Lib/test/test_global.py | 6 +-- Lib/test/test_support.py | 2 +- Lib/test/test_symtable.py | 2 +- .../2018-09-15-19-32-34.bpo-34683.msCiQE.rst | 1 + Parser/parsetok.c | 8 +++- Python/ast.c | 2 +- Python/compile.c | 2 +- Python/future.c | 4 +- Python/symtable.c | 14 +++---- 11 files changed, 65 insertions(+), 21 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2018-09-15-19-32-34.bpo-34683.msCiQE.rst diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index 2a9ec706467..53532488008 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -187,6 +187,45 @@ class ExceptionTests(unittest.TestCase): check('def spam():\n print(1)\n print(2)', 3, 10) check('Python = "Python" +', 1, 20) check('Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +', 1, 20) + check('x = "a', 1, 7) + check('lambda x: x = 2', 1, 1) + + # Errors thrown by compile.c + check('class foo:return 1', 1, 11) + check('def f():\n continue', 2, 3) + check('def f():\n break', 2, 3) + check('try:\n pass\nexcept:\n pass\nexcept ValueError:\n pass', 2, 3) + + # Errors thrown by tokenizer.c + check('(0x+1)', 1, 3) + check('x = 0xI', 1, 6) + check('0010 + 2', 1, 4) + check('x = 32e-+4', 1, 8) + check('x = 0o9', 1, 6) + + # Errors thrown by symtable.c + check('x = [(yield i) for i in range(3)]', 1, 6) + check('def f():\n from _ import *', 1, 1) + check('def f(x, x):\n pass', 1, 1) + check('def f(x):\n nonlocal x', 2, 3) + check('def f(x):\n x = 1\n global x', 3, 3) + check('nonlocal x', 1, 1) + check('def f():\n global x\n nonlocal x', 2, 3) + + # Errors thrown by ast.c + check('for 1 in []: pass', 1, 5) + check('def f(*):\n pass', 1, 7) + check('[*x for x in xs]', 1, 2) + check('def f():\n x, y: int', 2, 3) + check('(yield i) = 2', 1, 1) + check('foo(x for x in range(10), 100)', 1, 5) + check('foo(1=2)', 1, 5) + + # Errors thrown by future.c + check('from __future__ import doesnt_exist', 1, 1) + check('from __future__ import braces', 1, 1) + check('x=1\nfrom __future__ import division', 2, 1) + @cpython_only def testSettingException(self): diff --git a/Lib/test/test_future.py b/Lib/test/test_future.py index 61cd63479d8..660701b4b3d 100644 --- a/Lib/test/test_future.py +++ b/Lib/test/test_future.py @@ -15,7 +15,7 @@ def get_error_location(msg): class FutureTest(unittest.TestCase): - def check_syntax_error(self, err, basename, lineno, offset=0): + def check_syntax_error(self, err, basename, lineno, offset=1): self.assertIn('%s.py, line %d' % (basename, lineno), str(err)) self.assertEqual(os.path.basename(err.filename), basename + '.py') self.assertEqual(err.lineno, lineno) @@ -68,12 +68,12 @@ class FutureTest(unittest.TestCase): def test_badfuture9(self): with self.assertRaises(SyntaxError) as cm: from test import badsyntax_future9 - self.check_syntax_error(cm.exception, "badsyntax_future9", 3, 0) + self.check_syntax_error(cm.exception, "badsyntax_future9", 3) def test_badfuture10(self): with self.assertRaises(SyntaxError) as cm: from test import badsyntax_future10 - self.check_syntax_error(cm.exception, "badsyntax_future10", 3, 0) + self.check_syntax_error(cm.exception, "badsyntax_future10", 3) def test_parserhack(self): # test that the parser.c::future_hack function works as expected diff --git a/Lib/test/test_global.py b/Lib/test/test_global.py index 9eeccf12506..8159602be98 100644 --- a/Lib/test/test_global.py +++ b/Lib/test/test_global.py @@ -24,7 +24,7 @@ def wrong1(): global a global b """ - check_syntax_error(self, prog_text_1, lineno=4, offset=4) + check_syntax_error(self, prog_text_1, lineno=4, offset=5) def test2(self): prog_text_2 = """\ @@ -32,7 +32,7 @@ def wrong2(): print(x) global x """ - check_syntax_error(self, prog_text_2, lineno=3, offset=4) + check_syntax_error(self, prog_text_2, lineno=3, offset=5) def test3(self): prog_text_3 = """\ @@ -41,7 +41,7 @@ def wrong3(): x = 2 global x """ - check_syntax_error(self, prog_text_3, lineno=4, offset=4) + check_syntax_error(self, prog_text_3, lineno=4, offset=5) def test4(self): prog_text_4 = """\ diff --git a/Lib/test/test_support.py b/Lib/test/test_support.py index 7870e940a46..171e28aaa80 100644 --- a/Lib/test/test_support.py +++ b/Lib/test/test_support.py @@ -286,7 +286,7 @@ class TestSupport(unittest.TestCase): self.assertEqual(cm.exception.errno, errno.EBADF) def test_check_syntax_error(self): - support.check_syntax_error(self, "def class", lineno=1, offset=9) + support.check_syntax_error(self, "def class", lineno=1, offset=5) with self.assertRaises(AssertionError): support.check_syntax_error(self, "x=1") diff --git a/Lib/test/test_symtable.py b/Lib/test/test_symtable.py index dfaee173ef7..2cd735bdc50 100644 --- a/Lib/test/test_symtable.py +++ b/Lib/test/test_symtable.py @@ -169,7 +169,7 @@ class SymtableTest(unittest.TestCase): else: self.fail("no SyntaxError for %r" % (brokencode,)) checkfilename("def f(x): foo)(", 14) # parse-time - checkfilename("def f(x): global x", 10) # symtable-build-time + checkfilename("def f(x): global x", 11) # symtable-build-time symtable.symtable("pass", b"spam", "exec") with self.assertWarns(DeprecationWarning), \ self.assertRaises(TypeError): diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-09-15-19-32-34.bpo-34683.msCiQE.rst b/Misc/NEWS.d/next/Core and Builtins/2018-09-15-19-32-34.bpo-34683.msCiQE.rst new file mode 100644 index 00000000000..43bf61c5b86 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2018-09-15-19-32-34.bpo-34683.msCiQE.rst @@ -0,0 +1 @@ +Fixed a bug where some SyntaxError error pointed to locations that were off-by-one. diff --git a/Parser/parsetok.c b/Parser/parsetok.c index a1580e6751e..fc878d89d56 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -187,6 +187,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, parser_state *ps; node *n; int started = 0; + int col_offset; if ((ps = PyParser_New(g, start)) == NULL) { err_ret->error = E_NOMEM; @@ -203,7 +204,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, int type; size_t len; char *str; - int col_offset; + col_offset = -1; type = PyTokenizer_Get(tok, &a, &b); if (type == ERRORTOKEN) { @@ -321,7 +322,10 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, if (tok->buf != NULL) { size_t len; assert(tok->cur - tok->buf < INT_MAX); - err_ret->offset = (int)(tok->cur - tok->buf); + /* if we've managed to parse a token, point the offset to its start, + * else use the current reading position of the tokenizer + */ + err_ret->offset = col_offset != -1 ? col_offset + 1 : ((int)(tok->cur - tok->buf)); len = tok->inp - tok->buf; err_ret->text = (char *) PyObject_MALLOC(len + 1); if (err_ret->text != NULL) { diff --git a/Python/ast.c b/Python/ast.c index b93eb88dae7..b2fcb219752 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -683,7 +683,7 @@ ast_error(struct compiling *c, const node *n, const char *errmsg) Py_INCREF(Py_None); loc = Py_None; } - tmp = Py_BuildValue("(OiiN)", c->c_filename, LINENO(n), n->n_col_offset, loc); + tmp = Py_BuildValue("(OiiN)", c->c_filename, LINENO(n), n->n_col_offset + 1, loc); if (!tmp) return 0; errstr = PyUnicode_FromString(errmsg); diff --git a/Python/compile.c b/Python/compile.c index 707da79ab66..3a45804580e 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -4830,7 +4830,7 @@ compiler_error(struct compiler *c, const char *errstr) loc = Py_None; } u = Py_BuildValue("(OiiO)", c->c_filename, c->u->u_lineno, - c->u->u_col_offset, loc); + c->u->u_col_offset + 1, loc); if (!u) goto exit; v = Py_BuildValue("(zO)", errstr, u); diff --git a/Python/future.c b/Python/future.c index 4ea6827723b..1663a38a6fd 100644 --- a/Python/future.c +++ b/Python/future.c @@ -48,12 +48,12 @@ future_check_features(PyFutureFeatures *ff, stmt_ty s, PyObject *filename) } else if (strcmp(feature, "braces") == 0) { PyErr_SetString(PyExc_SyntaxError, "not a chance"); - PyErr_SyntaxLocationObject(filename, s->lineno, s->col_offset); + PyErr_SyntaxLocationObject(filename, s->lineno, s->col_offset + 1); return 0; } else { PyErr_Format(PyExc_SyntaxError, UNDEFINED_FUTURE_FEATURE, feature); - PyErr_SyntaxLocationObject(filename, s->lineno, s->col_offset); + PyErr_SyntaxLocationObject(filename, s->lineno, s->col_offset + 1); return 0; } } diff --git a/Python/symtable.c b/Python/symtable.c index 3e8c6f5dae3..e7216147a8a 100644 --- a/Python/symtable.c +++ b/Python/symtable.c @@ -390,7 +390,7 @@ error_at_directive(PySTEntryObject *ste, PyObject *name) if (PyUnicode_Compare(PyTuple_GET_ITEM(data, 0), name) == 0) { PyErr_SyntaxLocationObject(ste->ste_table->st_filename, PyLong_AsLong(PyTuple_GET_ITEM(data, 1)), - PyLong_AsLong(PyTuple_GET_ITEM(data, 2))); + PyLong_AsLong(PyTuple_GET_ITEM(data, 2)) + 1); return 0; } @@ -990,7 +990,7 @@ symtable_add_def(struct symtable *st, PyObject *name, int flag) PyErr_Format(PyExc_SyntaxError, DUPLICATE_ARGUMENT, name); PyErr_SyntaxLocationObject(st->st_filename, st->st_cur->ste_lineno, - st->st_cur->ste_col_offset); + st->st_cur->ste_col_offset + 1); goto error; } val |= flag; @@ -1179,7 +1179,7 @@ symtable_visit_stmt(struct symtable *st, stmt_ty s) e_name->v.Name.id); PyErr_SyntaxLocationObject(st->st_filename, s->lineno, - s->col_offset); + s->col_offset + 1); VISIT_QUIT(st, 0); } if (s->v.AnnAssign.simple && @@ -1274,7 +1274,7 @@ symtable_visit_stmt(struct symtable *st, stmt_ty s) msg, name); PyErr_SyntaxLocationObject(st->st_filename, s->lineno, - s->col_offset); + s->col_offset + 1); VISIT_QUIT(st, 0); } if (!symtable_add_def(st, name, DEF_GLOBAL)) @@ -1306,7 +1306,7 @@ symtable_visit_stmt(struct symtable *st, stmt_ty s) PyErr_Format(PyExc_SyntaxError, msg, name); PyErr_SyntaxLocationObject(st->st_filename, s->lineno, - s->col_offset); + s->col_offset + 1); VISIT_QUIT(st, 0); } if (!symtable_add_def(st, name, DEF_NONLOCAL)) @@ -1645,7 +1645,7 @@ symtable_visit_alias(struct symtable *st, alias_ty a) int lineno = st->st_cur->ste_lineno; int col_offset = st->st_cur->ste_col_offset; PyErr_SetString(PyExc_SyntaxError, IMPORT_STAR_WARNING); - PyErr_SyntaxLocationObject(st->st_filename, lineno, col_offset); + PyErr_SyntaxLocationObject(st->st_filename, lineno, col_offset + 1); Py_DECREF(store_name); return 0; } @@ -1736,7 +1736,7 @@ symtable_handle_comprehension(struct symtable *st, expr_ty e, "'yield' inside generator expression"); PyErr_SyntaxLocationObject(st->st_filename, st->st_cur->ste_lineno, - st->st_cur->ste_col_offset); + st->st_cur->ste_col_offset + 1); symtable_exit_block(st, (void *)e); return 0; }