bpo-39219: Fix SyntaxError attributes in the tokenizer. (GH-17828)
* Always set the text attribute. * Correct the offset attribute for non-ascii sources.
This commit is contained in:
parent
f4f445b693
commit
0cc6b5e559
|
@ -179,17 +179,25 @@ class ExceptionTests(unittest.TestCase):
|
||||||
ckmsg(s, "inconsistent use of tabs and spaces in indentation", TabError)
|
ckmsg(s, "inconsistent use of tabs and spaces in indentation", TabError)
|
||||||
|
|
||||||
def testSyntaxErrorOffset(self):
|
def testSyntaxErrorOffset(self):
|
||||||
def check(src, lineno, offset):
|
def check(src, lineno, offset, encoding='utf-8'):
|
||||||
with self.assertRaises(SyntaxError) as cm:
|
with self.assertRaises(SyntaxError) as cm:
|
||||||
compile(src, '<fragment>', 'exec')
|
compile(src, '<fragment>', 'exec')
|
||||||
self.assertEqual(cm.exception.lineno, lineno)
|
self.assertEqual(cm.exception.lineno, lineno)
|
||||||
self.assertEqual(cm.exception.offset, offset)
|
self.assertEqual(cm.exception.offset, offset)
|
||||||
|
if cm.exception.text is not None:
|
||||||
|
if not isinstance(src, str):
|
||||||
|
src = src.decode(encoding, 'replace')
|
||||||
|
line = src.split('\n')[lineno-1]
|
||||||
|
self.assertEqual(cm.exception.text.rstrip('\n'), line)
|
||||||
|
|
||||||
check('def fact(x):\n\treturn x!\n', 2, 10)
|
check('def fact(x):\n\treturn x!\n', 2, 10)
|
||||||
check('1 +\n', 1, 4)
|
check('1 +\n', 1, 4)
|
||||||
check('def spam():\n print(1)\n print(2)', 3, 10)
|
check('def spam():\n print(1)\n print(2)', 3, 10)
|
||||||
check('Python = "Python" +', 1, 20)
|
check('Python = "Python" +', 1, 20)
|
||||||
check('Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +', 1, 20)
|
check('Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +', 1, 20)
|
||||||
|
check(b'# -*- coding: cp1251 -*-\nPython = "\xcf\xb3\xf2\xee\xed" +',
|
||||||
|
2, 19, encoding='cp1251')
|
||||||
|
check(b'Python = "\xcf\xb3\xf2\xee\xed" +', 1, 18)
|
||||||
check('x = "a', 1, 7)
|
check('x = "a', 1, 7)
|
||||||
check('lambda x: x = 2', 1, 1)
|
check('lambda x: x = 2', 1, 1)
|
||||||
|
|
||||||
|
@ -205,6 +213,10 @@ class ExceptionTests(unittest.TestCase):
|
||||||
check('0010 + 2', 1, 4)
|
check('0010 + 2', 1, 4)
|
||||||
check('x = 32e-+4', 1, 8)
|
check('x = 32e-+4', 1, 8)
|
||||||
check('x = 0o9', 1, 6)
|
check('x = 0o9', 1, 6)
|
||||||
|
check('\u03b1 = 0xI', 1, 6)
|
||||||
|
check(b'\xce\xb1 = 0xI', 1, 6)
|
||||||
|
check(b'# -*- coding: iso8859-7 -*-\n\xe1 = 0xI', 2, 6,
|
||||||
|
encoding='iso8859-7')
|
||||||
|
|
||||||
# Errors thrown by symtable.c
|
# Errors thrown by symtable.c
|
||||||
check('x = [(yield i) for i in range(3)]', 1, 5)
|
check('x = [(yield i) for i in range(3)]', 1, 5)
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Syntax errors raised in the tokenizer now always set correct "text" and
|
||||||
|
"offset" attributes.
|
|
@ -1,6 +1,7 @@
|
||||||
|
|
||||||
/* Tokenizer implementation */
|
/* Tokenizer implementation */
|
||||||
|
|
||||||
|
#define PY_SSIZE_T_CLEAN
|
||||||
#include "Python.h"
|
#include "Python.h"
|
||||||
|
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
@ -1034,17 +1035,44 @@ tok_backup(struct tok_state *tok, int c)
|
||||||
static int
|
static int
|
||||||
syntaxerror(struct tok_state *tok, const char *format, ...)
|
syntaxerror(struct tok_state *tok, const char *format, ...)
|
||||||
{
|
{
|
||||||
|
PyObject *errmsg, *errtext, *args;
|
||||||
va_list vargs;
|
va_list vargs;
|
||||||
#ifdef HAVE_STDARG_PROTOTYPES
|
#ifdef HAVE_STDARG_PROTOTYPES
|
||||||
va_start(vargs, format);
|
va_start(vargs, format);
|
||||||
#else
|
#else
|
||||||
va_start(vargs);
|
va_start(vargs);
|
||||||
#endif
|
#endif
|
||||||
PyErr_FormatV(PyExc_SyntaxError, format, vargs);
|
errmsg = PyUnicode_FromFormatV(format, vargs);
|
||||||
va_end(vargs);
|
va_end(vargs);
|
||||||
PyErr_SyntaxLocationObject(tok->filename,
|
if (!errmsg) {
|
||||||
tok->lineno,
|
goto error;
|
||||||
(int)(tok->cur - tok->line_start));
|
}
|
||||||
|
|
||||||
|
errtext = PyUnicode_DecodeUTF8(tok->line_start, tok->cur - tok->line_start,
|
||||||
|
"replace");
|
||||||
|
if (!errtext) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
int offset = (int)PyUnicode_GET_LENGTH(errtext);
|
||||||
|
Py_ssize_t line_len = strcspn(tok->line_start, "\n");
|
||||||
|
if (line_len != tok->cur - tok->line_start) {
|
||||||
|
Py_DECREF(errtext);
|
||||||
|
errtext = PyUnicode_DecodeUTF8(tok->line_start, line_len,
|
||||||
|
"replace");
|
||||||
|
}
|
||||||
|
if (!errtext) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
args = Py_BuildValue("(O(OiiN))", errmsg,
|
||||||
|
tok->filename, tok->lineno, offset, errtext);
|
||||||
|
if (args) {
|
||||||
|
PyErr_SetObject(PyExc_SyntaxError, args);
|
||||||
|
Py_DECREF(args);
|
||||||
|
}
|
||||||
|
|
||||||
|
error:
|
||||||
|
Py_XDECREF(errmsg);
|
||||||
tok->done = E_ERROR;
|
tok->done = E_ERROR;
|
||||||
return ERRORTOKEN;
|
return ERRORTOKEN;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue