gh-115823: Calculate correctly error locations when dealing with implicit encodings (#115824)

This commit is contained in:
Pablo Galindo Salgado 2024-02-26 13:57:09 +01:00 committed by GitHub
parent b7383b8b71
commit 015b97d19a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 13 additions and 11 deletions

View File

@ -301,6 +301,7 @@ class ExceptionTests(unittest.TestCase):
{ {
6 6
0="""''', 5, 13) 0="""''', 5, 13)
check('b"fooжжж"'.encode(), 1, 1, 1, 10)
# Errors thrown by symtable.c # Errors thrown by symtable.c
check('x = [(yield i) for i in range(3)]', 1, 7) check('x = [(yield i) for i in range(3)]', 1, 7)

View File

@ -0,0 +1,3 @@
Properly calculate error ranges in the parser when raising
:exc:`SyntaxError` exceptions caused by invalid byte sequences. Patch by
Pablo Galindo

View File

@ -369,20 +369,18 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
Py_ssize_t col_number = col_offset; Py_ssize_t col_number = col_offset;
Py_ssize_t end_col_number = end_col_offset; Py_ssize_t end_col_number = end_col_offset;
if (p->tok->encoding != NULL) {
col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset); col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset);
if (col_number < 0) { if (col_number < 0) {
goto error; goto error;
} }
if (end_col_number > 0) {
Py_ssize_t end_col_offset = _PyPegen_byte_offset_to_character_offset(error_line, end_col_number); if (end_col_offset > 0) {
if (end_col_offset < 0) { end_col_number = _PyPegen_byte_offset_to_character_offset(error_line, end_col_offset);
if (end_col_number < 0) {
goto error; goto error;
} else {
end_col_number = end_col_offset;
}
} }
} }
tmp = Py_BuildValue("(OnnNnn)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number); tmp = Py_BuildValue("(OnnNnn)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number);
if (!tmp) { if (!tmp) {
goto error; goto error;