mirror of https://github.com/python/cpython
bpo-44317: Improve tokenizer errors with more informative locations (GH-26555)
This commit is contained in:
parent
7b21108445
commit
f24777c2b3
|
@ -226,9 +226,9 @@ class ExceptionTests(unittest.TestCase):
|
|||
# Errors thrown by tokenizer.c
|
||||
check('(0x+1)', 1, 3)
|
||||
check('x = 0xI', 1, 6)
|
||||
check('0010 + 2', 1, 4)
|
||||
check('0010 + 2', 1, 1)
|
||||
check('x = 32e-+4', 1, 8)
|
||||
check('x = 0o9', 1, 6)
|
||||
check('x = 0o9', 1, 7)
|
||||
check('\u03b1 = 0xI', 1, 6)
|
||||
check(b'\xce\xb1 = 0xI', 1, 6)
|
||||
check(b'# -*- coding: iso8859-7 -*-\n\xe1 = 0xI', 2, 6,
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Improve tokenizer error with improved locations. Patch by Pablo Galindo.
|
|
@ -1071,19 +1071,13 @@ tok_backup(struct tok_state *tok, int c)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
syntaxerror(struct tok_state *tok, const char *format, ...)
|
||||
_syntaxerror_range(struct tok_state *tok, const char *format,
|
||||
int col_offset, int end_col_offset,
|
||||
va_list vargs)
|
||||
{
|
||||
PyObject *errmsg, *errtext, *args;
|
||||
va_list vargs;
|
||||
#ifdef HAVE_STDARG_PROTOTYPES
|
||||
va_start(vargs, format);
|
||||
#else
|
||||
va_start(vargs);
|
||||
#endif
|
||||
errmsg = PyUnicode_FromFormatV(format, vargs);
|
||||
va_end(vargs);
|
||||
if (!errmsg) {
|
||||
goto error;
|
||||
}
|
||||
|
@ -1093,7 +1087,14 @@ syntaxerror(struct tok_state *tok, const char *format, ...)
|
|||
if (!errtext) {
|
||||
goto error;
|
||||
}
|
||||
int offset = (int)PyUnicode_GET_LENGTH(errtext);
|
||||
|
||||
if (col_offset == -1) {
|
||||
col_offset = (int)PyUnicode_GET_LENGTH(errtext);
|
||||
}
|
||||
if (end_col_offset == -1) {
|
||||
end_col_offset = col_offset;
|
||||
}
|
||||
|
||||
Py_ssize_t line_len = strcspn(tok->line_start, "\n");
|
||||
if (line_len != tok->cur - tok->line_start) {
|
||||
Py_DECREF(errtext);
|
||||
|
@ -1104,8 +1105,8 @@ syntaxerror(struct tok_state *tok, const char *format, ...)
|
|||
goto error;
|
||||
}
|
||||
|
||||
args = Py_BuildValue("(O(OiiN))", errmsg,
|
||||
tok->filename, tok->lineno, offset, errtext);
|
||||
args = Py_BuildValue("(O(OiiNii))", errmsg, tok->filename, tok->lineno,
|
||||
col_offset, errtext, tok->lineno, end_col_offset);
|
||||
if (args) {
|
||||
PyErr_SetObject(PyExc_SyntaxError, args);
|
||||
Py_DECREF(args);
|
||||
|
@ -1117,6 +1118,38 @@ error:
|
|||
return ERRORTOKEN;
|
||||
}
|
||||
|
||||
static int
|
||||
syntaxerror(struct tok_state *tok, const char *format, ...)
|
||||
{
|
||||
va_list vargs;
|
||||
#ifdef HAVE_STDARG_PROTOTYPES
|
||||
va_start(vargs, format);
|
||||
#else
|
||||
va_start(vargs);
|
||||
#endif
|
||||
int ret = _syntaxerror_range(tok, format, -1, -1, vargs);
|
||||
va_end(vargs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
syntaxerror_known_range(struct tok_state *tok,
|
||||
int col_offset, int end_col_offset,
|
||||
const char *format, ...)
|
||||
{
|
||||
va_list vargs;
|
||||
#ifdef HAVE_STDARG_PROTOTYPES
|
||||
va_start(vargs, format);
|
||||
#else
|
||||
va_start(vargs);
|
||||
#endif
|
||||
int ret = _syntaxerror_range(tok, format, col_offset, end_col_offset, vargs);
|
||||
va_end(vargs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int
|
||||
indenterror(struct tok_state *tok)
|
||||
{
|
||||
|
@ -1692,12 +1725,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
|||
c = tok_nextc(tok);
|
||||
}
|
||||
if (c < '0' || c >= '8') {
|
||||
tok_backup(tok, c);
|
||||
if (isdigit(c)) {
|
||||
return syntaxerror(tok,
|
||||
"invalid digit '%c' in octal literal", c);
|
||||
}
|
||||
else {
|
||||
tok_backup(tok, c);
|
||||
return syntaxerror(tok, "invalid octal literal");
|
||||
}
|
||||
}
|
||||
|
@ -1721,12 +1754,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
|||
c = tok_nextc(tok);
|
||||
}
|
||||
if (c != '0' && c != '1') {
|
||||
tok_backup(tok, c);
|
||||
if (isdigit(c)) {
|
||||
return syntaxerror(tok,
|
||||
"invalid digit '%c' in binary literal", c);
|
||||
}
|
||||
else {
|
||||
tok_backup(tok, c);
|
||||
return syntaxerror(tok, "invalid binary literal");
|
||||
}
|
||||
}
|
||||
|
@ -1759,6 +1792,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
|||
}
|
||||
c = tok_nextc(tok);
|
||||
}
|
||||
char* zeros_end = tok->cur;
|
||||
if (isdigit(c)) {
|
||||
nonzero = 1;
|
||||
c = tok_decimal_tail(tok);
|
||||
|
@ -1779,10 +1813,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
|||
else if (nonzero) {
|
||||
/* Old-style octal: now disallowed. */
|
||||
tok_backup(tok, c);
|
||||
return syntaxerror(tok,
|
||||
"leading zeros in decimal integer "
|
||||
"literals are not permitted; "
|
||||
"use an 0o prefix for octal integers");
|
||||
return syntaxerror_known_range(
|
||||
tok, (int)(tok->start + 1 - tok->line_start),
|
||||
(int)(zeros_end - tok->line_start),
|
||||
"leading zeros in decimal integer "
|
||||
"literals are not permitted; "
|
||||
"use an 0o prefix for octal integers");
|
||||
}
|
||||
if (!verify_end_of_number(tok, c, "decimal")) {
|
||||
return ERRORTOKEN;
|
||||
|
|
Loading…
Reference in New Issue