bpo-45848: Allow the parser to get error lines from encoded files (GH-29646)

This commit is contained in:
Pablo Galindo Salgado 2021-11-20 14:36:07 +00:00 committed by GitHub
parent 6d430ef5ab
commit fdcc46d955
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 42 additions and 11 deletions

View File

@ -149,6 +149,11 @@ PyAPI_FUNC(PyObject *) PyErr_ProgramTextObject(
PyObject *filename, PyObject *filename,
int lineno); int lineno);
PyAPI_FUNC(PyObject *) _PyErr_ProgramDecodedTextObject(
PyObject *filename,
int lineno,
const char* encoding);
PyAPI_FUNC(PyObject *) _PyUnicodeTranslateError_Create( PyAPI_FUNC(PyObject *) _PyUnicodeTranslateError_Create(
PyObject *object, PyObject *object,
Py_ssize_t start, Py_ssize_t start,

View File

@ -2353,6 +2353,19 @@ class SyntaxErrorTests(unittest.TestCase):
finally: finally:
unlink(TESTFN) unlink(TESTFN)
# Check backwards tokenizer errors
source = '# -*- coding: ascii -*-\n\n(\n'
try:
with open(TESTFN, 'w', encoding='ascii') as testfile:
testfile.write(source)
rc, out, err = script_helper.assert_python_failure('-Wd', '-X', 'utf8', TESTFN)
err = err.decode('utf-8').splitlines()
self.assertEqual(err[-3], ' (')
self.assertEqual(err[-2], ' ^')
finally:
unlink(TESTFN)
def test_attributes_new_constructor(self): def test_attributes_new_constructor(self):
args = ("bad.py", 1, 2, "abcdefg", 1, 100) args = ("bad.py", 1, 2, "abcdefg", 1, 100)
the_exception = SyntaxError("bad bad", args) the_exception = SyntaxError("bad bad", args)

View File

@ -0,0 +1,2 @@
Allow the parser to obtain error lines directly from encoded files. Patch by
Pablo Galindo

View File

@ -482,14 +482,12 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
goto error; goto error;
} }
// PyErr_ProgramTextObject assumes that the text is utf-8 so we cannot call it with a file
// with an arbitrary encoding or otherwise we could get some badly decoded text.
int uses_utf8_codec = (!p->tok->encoding || strcmp(p->tok->encoding, "utf-8") == 0);
if (p->tok->fp_interactive) { if (p->tok->fp_interactive) {
error_line = get_error_line(p, lineno); error_line = get_error_line(p, lineno);
} }
else if (uses_utf8_codec && p->start_rule == Py_file_input) { else if (p->start_rule == Py_file_input) {
error_line = PyErr_ProgramTextObject(p->tok->filename, (int) lineno); error_line = _PyErr_ProgramDecodedTextObject(p->tok->filename,
(int) lineno, p->tok->encoding);
} }
if (!error_line) { if (!error_line) {
@ -500,15 +498,18 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
we're actually parsing from a file, which has an E_EOF SyntaxError and in that case we're actually parsing from a file, which has an E_EOF SyntaxError and in that case
`PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which `PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which
does not physically exist */ does not physically exist */
assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF || !uses_utf8_codec); assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);
if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) { if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
Py_ssize_t size = p->tok->inp - p->tok->buf; Py_ssize_t size = p->tok->inp - p->tok->buf;
error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace"); error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
} }
else { else if (p->tok->fp == NULL || p->tok->fp == stdin) {
error_line = get_error_line(p, lineno); error_line = get_error_line(p, lineno);
} }
else {
error_line = PyUnicode_FromStringAndSize("", 0);
}
if (!error_line) { if (!error_line) {
goto error; goto error;
} }

View File

@ -1692,7 +1692,7 @@ PyErr_SyntaxLocationEx(const char *filename, int lineno, int col_offset)
functionality in tb_displayline() in traceback.c. */ functionality in tb_displayline() in traceback.c. */
static PyObject * static PyObject *
err_programtext(PyThreadState *tstate, FILE *fp, int lineno) err_programtext(PyThreadState *tstate, FILE *fp, int lineno, const char* encoding)
{ {
int i; int i;
char linebuf[1000]; char linebuf[1000];
@ -1720,7 +1720,11 @@ after_loop:
fclose(fp); fclose(fp);
if (i == lineno) { if (i == lineno) {
PyObject *res; PyObject *res;
res = PyUnicode_FromString(linebuf); if (encoding != NULL) {
res = PyUnicode_Decode(linebuf, strlen(linebuf), encoding, "replace");
} else {
res = PyUnicode_FromString(linebuf);
}
if (res == NULL) if (res == NULL)
_PyErr_Clear(tstate); _PyErr_Clear(tstate);
return res; return res;
@ -1746,7 +1750,7 @@ PyErr_ProgramText(const char *filename, int lineno)
} }
PyObject * PyObject *
PyErr_ProgramTextObject(PyObject *filename, int lineno) _PyErr_ProgramDecodedTextObject(PyObject *filename, int lineno, const char* encoding)
{ {
if (filename == NULL || lineno <= 0) { if (filename == NULL || lineno <= 0) {
return NULL; return NULL;
@ -1758,7 +1762,13 @@ PyErr_ProgramTextObject(PyObject *filename, int lineno)
_PyErr_Clear(tstate); _PyErr_Clear(tstate);
return NULL; return NULL;
} }
return err_programtext(tstate, fp, lineno); return err_programtext(tstate, fp, lineno, encoding);
}
PyObject *
PyErr_ProgramTextObject(PyObject *filename, int lineno)
{
return _PyErr_ProgramDecodedTextObject(filename, lineno, NULL);
} }
#ifdef __cplusplus #ifdef __cplusplus