cpython/Parser/pegen_errors.c

#include <Python.h>
#include <errcode.h>

#include "tokenizer.h"
#include "pegen.h"

// TOKENIZER ERRORS

void
_PyPegen_raise_tokenizer_init_error(PyObject *filename)
{
    if (!(PyErr_ExceptionMatches(PyExc_LookupError)
          || PyErr_ExceptionMatches(PyExc_SyntaxError)
          || PyErr_ExceptionMatches(PyExc_ValueError)
          || PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) {
        return;
    }
    PyObject *errstr = NULL;
    PyObject *tuple = NULL;
    PyObject *type;
    PyObject *value;
    PyObject *tback;
    PyErr_Fetch(&type, &value, &tback);
    errstr = PyObject_Str(value);
    if (!errstr) {
        goto error;
    }

    PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None);
    if (!tmp) {
        goto error;
    }

    tuple = PyTuple_Pack(2, errstr, tmp);
    Py_DECREF(tmp);
    if (!value) {
        goto error;
    }
    PyErr_SetObject(PyExc_SyntaxError, tuple);

error:
    Py_XDECREF(type);
    Py_XDECREF(value);
    Py_XDECREF(tback);
    Py_XDECREF(errstr);
    Py_XDECREF(tuple);
}

static inline void
raise_unclosed_parentheses_error(Parser *p) {
       int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
       int error_col = p->tok->parencolstack[p->tok->level-1];
       RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError,
                                  error_lineno, error_col, error_lineno, -1,
                                  "'%c' was never closed",
                                  p->tok->parenstack[p->tok->level-1]);
}

int
_Pypegen_tokenizer_error(Parser *p)
{
    if (PyErr_Occurred()) {
        return -1;
    }

    const char *msg = NULL;
    PyObject* errtype = PyExc_SyntaxError;
    Py_ssize_t col_offset = -1;
    switch (p->tok->done) {
        case E_TOKEN:
            msg = "invalid token";
            break;
        case E_EOF:
            if (p->tok->level) {
                raise_unclosed_parentheses_error(p);
            } else {
                RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
            }
            return -1;
        case E_DEDENT:
            RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level");
            return -1;
        case E_INTR:
            if (!PyErr_Occurred()) {
                PyErr_SetNone(PyExc_KeyboardInterrupt);
            }
            return -1;
        case E_NOMEM:
            PyErr_NoMemory();
            return -1;
        case E_TABSPACE:
            errtype = PyExc_TabError;
            msg = "inconsistent use of tabs and spaces in indentation";
            break;
        case E_TOODEEP:
            errtype = PyExc_IndentationError;
            msg = "too many levels of indentation";
            break;
        case E_LINECONT: {
            col_offset = p->tok->cur - p->tok->buf - 1;
            msg = "unexpected character after line continuation character";
            break;
        }
        default:
            msg = "unknown parsing error";
    }

    RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno,
                               col_offset >= 0 ? col_offset : 0,
                               p->tok->lineno, -1, msg);
    return -1;
}

int
_Pypegen_raise_decode_error(Parser *p)
{
    assert(PyErr_Occurred());
    const char *errtype = NULL;
    if (PyErr_ExceptionMatches(PyExc_UnicodeError)) {
        errtype = "unicode error";
    }
    else if (PyErr_ExceptionMatches(PyExc_ValueError)) {
        errtype = "value error";
    }
    if (errtype) {
        PyObject *type;
        PyObject *value;
        PyObject *tback;
        PyObject *errstr;
        PyErr_Fetch(&type, &value, &tback);
        errstr = PyObject_Str(value);
        if (errstr) {
            RAISE_SYNTAX_ERROR("(%s) %U", errtype, errstr);
            Py_DECREF(errstr);
        }
        else {
            PyErr_Clear();
            RAISE_SYNTAX_ERROR("(%s) unknown error", errtype);
        }
        Py_XDECREF(type);
        Py_XDECREF(value);
        Py_XDECREF(tback);
    }

    return -1;
}

static int
_PyPegen_tokenize_full_source_to_check_for_errors(Parser *p) {
    // Tokenize the whole input to see if there are any tokenization
    // errors such as mistmatching parentheses. These will get priority
    // over generic syntax errors only if the line number of the error is
    // before the one that we had for the generic error.

    // We don't want to tokenize to the end for interactive input
    if (p->tok->prompt != NULL) {
        return 0;
    }

    PyObject *type, *value, *traceback;
    PyErr_Fetch(&type, &value, &traceback);

    Token *current_token = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
    Py_ssize_t current_err_line = current_token->lineno;

    int ret = 0;

    for (;;) {
        const char *start;
        const char *end;
        switch (_PyTokenizer_Get(p->tok, &start, &end)) {
            case ERRORTOKEN:
                if (p->tok->level != 0) {
                    int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
                    if (current_err_line > error_lineno) {
                        raise_unclosed_parentheses_error(p);
                        ret = -1;
                        goto exit;
                    }
                }
                break;
            case ENDMARKER:
                break;
            default:
                continue;
        }
        break;
    }


exit:
    if (PyErr_Occurred()) {
        Py_XDECREF(value);
        Py_XDECREF(type);
        Py_XDECREF(traceback);
    } else {
        PyErr_Restore(type, value, traceback);
    }
    return ret;
}

// PARSER ERRORS

void *
_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
{
    if (p->fill == 0) {
        va_list va;
        va_start(va, errmsg);
        _PyPegen_raise_error_known_location(p, errtype, 0, 0, 0, -1, errmsg, va);
        va_end(va);
        return NULL;
    }

    Token *t = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
    Py_ssize_t col_offset;
    Py_ssize_t end_col_offset = -1;
    if (t->col_offset == -1) {
        if (p->tok->cur == p->tok->buf) {
            col_offset = 0;
        } else {
            const char* start = p->tok->buf  ? p->tok->line_start : p->tok->buf;
            col_offset = Py_SAFE_DOWNCAST(p->tok->cur - start, intptr_t, int);
        }
    } else {
        col_offset = t->col_offset + 1;
    }

    if (t->end_col_offset != -1) {
        end_col_offset = t->end_col_offset + 1;
    }

    va_list va;
    va_start(va, errmsg);
    _PyPegen_raise_error_known_location(p, errtype, t->lineno, col_offset, t->end_lineno, end_col_offset, errmsg, va);
    va_end(va);

    return NULL;
}

static PyObject *
get_error_line_from_tokenizer_buffers(Parser *p, Py_ssize_t lineno)
{
    /* If the file descriptor is interactive, the source lines of the current
     * (multi-line) statement are stored in p->tok->interactive_src_start.
     * If not, we're parsing from a string, which means that the whole source
     * is stored in p->tok->str. */
    assert((p->tok->fp == NULL && p->tok->str != NULL) || p->tok->fp == stdin);

    char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start : p->tok->str;
    assert(cur_line != NULL);

    for (int i = 0; i < lineno - 1; i++) {
        cur_line = strchr(cur_line, '\n') + 1;
    }

    char *next_newline;
    if ((next_newline = strchr(cur_line, '\n')) == NULL) { // This is the last line
        next_newline = cur_line + strlen(cur_line);
    }
    return PyUnicode_DecodeUTF8(cur_line, next_newline - cur_line, "replace");
}

void *
_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
                                    Py_ssize_t lineno, Py_ssize_t col_offset,
                                    Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
                                    const char *errmsg, va_list va)
{
    PyObject *value = NULL;
    PyObject *errstr = NULL;
    PyObject *error_line = NULL;
    PyObject *tmp = NULL;
    p->error_indicator = 1;

    if (end_lineno == CURRENT_POS) {
        end_lineno = p->tok->lineno;
    }
    if (end_col_offset == CURRENT_POS) {
        end_col_offset = p->tok->cur - p->tok->line_start;
    }

    if (p->start_rule == Py_fstring_input) {
        const char *fstring_msg = "f-string: ";
        Py_ssize_t len = strlen(fstring_msg) + strlen(errmsg);

        char *new_errmsg = PyMem_Malloc(len + 1); // Lengths of both strings plus NULL character
        if (!new_errmsg) {
            return (void *) PyErr_NoMemory();
        }

        // Copy both strings into new buffer
        memcpy(new_errmsg, fstring_msg, strlen(fstring_msg));
        memcpy(new_errmsg + strlen(fstring_msg), errmsg, strlen(errmsg));
        new_errmsg[len] = 0;
        errmsg = new_errmsg;
    }
    errstr = PyUnicode_FromFormatV(errmsg, va);
    if (!errstr) {
        goto error;
    }

    if (p->tok->fp_interactive) {
        error_line = get_error_line_from_tokenizer_buffers(p, lineno);
    }
    else if (p->start_rule == Py_file_input) {
        error_line = _PyErr_ProgramDecodedTextObject(p->tok->filename,
                                                     (int) lineno, p->tok->encoding);
    }

    if (!error_line) {
        /* PyErr_ProgramTextObject was not called or returned NULL. If it was not called,
           then we need to find the error line from some other source, because
           p->start_rule != Py_file_input. If it returned NULL, then it either unexpectedly
           failed or we're parsing from a string or the REPL. There's a third edge case where
           we're actually parsing from a file, which has an E_EOF SyntaxError and in that case
           `PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which
           does not physically exist */
        assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);

        if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
            Py_ssize_t size = p->tok->inp - p->tok->buf;
            error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
        }
        else if (p->tok->fp == NULL || p->tok->fp == stdin) {
            error_line = get_error_line_from_tokenizer_buffers(p, lineno);
        }
        else {
            error_line = PyUnicode_FromStringAndSize("", 0);
        }
        if (!error_line) {
            goto error;
        }
    }

    if (p->start_rule == Py_fstring_input) {
        col_offset -= p->starting_col_offset;
        end_col_offset -= p->starting_col_offset;
    }

    Py_ssize_t col_number = col_offset;
    Py_ssize_t end_col_number = end_col_offset;

    if (p->tok->encoding != NULL) {
        col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset);
        if (col_number < 0) {
            goto error;
        }
        if (end_col_number > 0) {
            Py_ssize_t end_col_offset = _PyPegen_byte_offset_to_character_offset(error_line, end_col_number);
            if (end_col_offset < 0) {
                goto error;
            } else {
                end_col_number = end_col_offset;
            }
        }
    }
    tmp = Py_BuildValue("(OiiNii)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number);
    if (!tmp) {
        goto error;
    }
    value = PyTuple_Pack(2, errstr, tmp);
    Py_DECREF(tmp);
    if (!value) {
        goto error;
    }
    PyErr_SetObject(errtype, value);

    Py_DECREF(errstr);
    Py_DECREF(value);
    if (p->start_rule == Py_fstring_input) {
        PyMem_Free((void *)errmsg);
    }
    return NULL;

error:
    Py_XDECREF(errstr);
    Py_XDECREF(error_line);
    if (p->start_rule == Py_fstring_input) {
        PyMem_Free((void *)errmsg);
    }
    return NULL;
}

void
_Pypegen_set_syntax_error(Parser* p, Token* last_token) {
    // Existing sintax error
    if (PyErr_Occurred()) {
        // Prioritize tokenizer errors to custom syntax errors raised
        // on the second phase only if the errors come from the parser.
        if (p->tok->done == E_DONE && PyErr_ExceptionMatches(PyExc_SyntaxError)) {
            _PyPegen_tokenize_full_source_to_check_for_errors(p);
        }
        // Propagate the existing syntax error.
        return;
    }
    // Initialization error
    if (p->fill == 0) {
        RAISE_SYNTAX_ERROR("error at start before reading any input");
    }
    // Parser encountered EOF (End of File) unexpectedtly
    if (last_token->type == ERRORTOKEN && p->tok->done == E_EOF) {
        if (p->tok->level) {
            raise_unclosed_parentheses_error(p);
        } else {
            RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
        }
        return;
    }
    // Indentation error in the tokenizer
    if (last_token->type == INDENT || last_token->type == DEDENT) {
        RAISE_INDENTATION_ERROR(last_token->type == INDENT ? "unexpected indent" : "unexpected unindent");
        return;
    }
    // Unknown error (generic case)

    // Use the last token we found on the first pass to avoid reporting
    // incorrect locations for generic syntax errors just because we reached
    // further away when trying to find specific syntax errors in the second
    // pass.
    RAISE_SYNTAX_ERROR_KNOWN_LOCATION(last_token, "invalid syntax");
    // _PyPegen_tokenize_full_source_to_check_for_errors will override the existing
    // generic SyntaxError we just raised if errors are found.
    _PyPegen_tokenize_full_source_to_check_for_errors(p);
}
Refactor parser compilation units into specific components (GH-29676) 2021-11-20 21:08:50 -04:00			`#include <Python.h>`
			`#include <errcode.h>`

			`#include "tokenizer.h"`
			`#include "pegen.h"`

			`// TOKENIZER ERRORS`

			`void`
			`_PyPegen_raise_tokenizer_init_error(PyObject *filename)`
			`{`
			`if (!(PyErr_ExceptionMatches(PyExc_LookupError)`
			`\|\| PyErr_ExceptionMatches(PyExc_SyntaxError)`
			`\|\| PyErr_ExceptionMatches(PyExc_ValueError)`
			`\|\| PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) {`
			`return;`
			`}`
			`PyObject *errstr = NULL;`
			`PyObject *tuple = NULL;`
			`PyObject *type;`
			`PyObject *value;`
			`PyObject *tback;`
			`PyErr_Fetch(&type, &value, &tback);`
			`errstr = PyObject_Str(value);`
			`if (!errstr) {`
			`goto error;`
			`}`

			`PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None);`
			`if (!tmp) {`
			`goto error;`
			`}`

			`tuple = PyTuple_Pack(2, errstr, tmp);`
			`Py_DECREF(tmp);`
			`if (!value) {`
			`goto error;`
			`}`
			`PyErr_SetObject(PyExc_SyntaxError, tuple);`

			`error:`
			`Py_XDECREF(type);`
			`Py_XDECREF(value);`
			`Py_XDECREF(tback);`
			`Py_XDECREF(errstr);`
			`Py_XDECREF(tuple);`
			`}`

			`static inline void`
			`raise_unclosed_parentheses_error(Parser *p) {`
			`int error_lineno = p->tok->parenlinenostack[p->tok->level-1];`
			`int error_col = p->tok->parencolstack[p->tok->level-1];`
			`RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError,`
			`error_lineno, error_col, error_lineno, -1,`
			`"'%c' was never closed",`
			`p->tok->parenstack[p->tok->level-1]);`
			`}`

			`int`
			`_Pypegen_tokenizer_error(Parser *p)`
			`{`
			`if (PyErr_Occurred()) {`
			`return -1;`
			`}`

			`const char *msg = NULL;`
			`PyObject* errtype = PyExc_SyntaxError;`
			`Py_ssize_t col_offset = -1;`
			`switch (p->tok->done) {`
			`case E_TOKEN:`
			`msg = "invalid token";`
			`break;`
			`case E_EOF:`
			`if (p->tok->level) {`
			`raise_unclosed_parentheses_error(p);`
			`} else {`
			`RAISE_SYNTAX_ERROR("unexpected EOF while parsing");`
			`}`
			`return -1;`
			`case E_DEDENT:`
			`RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level");`
			`return -1;`
			`case E_INTR:`
			`if (!PyErr_Occurred()) {`
			`PyErr_SetNone(PyExc_KeyboardInterrupt);`
			`}`
			`return -1;`
			`case E_NOMEM:`
			`PyErr_NoMemory();`
			`return -1;`
			`case E_TABSPACE:`
			`errtype = PyExc_TabError;`
			`msg = "inconsistent use of tabs and spaces in indentation";`
			`break;`
			`case E_TOODEEP:`
			`errtype = PyExc_IndentationError;`
			`msg = "too many levels of indentation";`
			`break;`
			`case E_LINECONT: {`
			`col_offset = p->tok->cur - p->tok->buf - 1;`
			`msg = "unexpected character after line continuation character";`
			`break;`
			`}`
			`default:`
			`msg = "unknown parsing error";`
			`}`

			`RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno,`
			`col_offset >= 0 ? col_offset : 0,`
			`p->tok->lineno, -1, msg);`
			`return -1;`
			`}`

			`int`
			`_Pypegen_raise_decode_error(Parser *p)`
			`{`
			`assert(PyErr_Occurred());`
			`const char *errtype = NULL;`
			`if (PyErr_ExceptionMatches(PyExc_UnicodeError)) {`
			`errtype = "unicode error";`
			`}`
			`else if (PyErr_ExceptionMatches(PyExc_ValueError)) {`
			`errtype = "value error";`
			`}`
			`if (errtype) {`
			`PyObject *type;`
			`PyObject *value;`
			`PyObject *tback;`
			`PyObject *errstr;`
			`PyErr_Fetch(&type, &value, &tback);`
			`errstr = PyObject_Str(value);`
			`if (errstr) {`
			`RAISE_SYNTAX_ERROR("(%s) %U", errtype, errstr);`
			`Py_DECREF(errstr);`
			`}`
			`else {`
			`PyErr_Clear();`
			`RAISE_SYNTAX_ERROR("(%s) unknown error", errtype);`
			`}`
			`Py_XDECREF(type);`
			`Py_XDECREF(value);`
			`Py_XDECREF(tback);`
			`}`

			`return -1;`
			`}`

			`static int`
			`_PyPegen_tokenize_full_source_to_check_for_errors(Parser *p) {`
			`// Tokenize the whole input to see if there are any tokenization`
			`// errors such as mistmatching parentheses. These will get priority`
			`// over generic syntax errors only if the line number of the error is`
			`// before the one that we had for the generic error.`

			`// We don't want to tokenize to the end for interactive input`
			`if (p->tok->prompt != NULL) {`
			`return 0;`
			`}`

			`PyObject type, value, *traceback;`
			`PyErr_Fetch(&type, &value, &traceback);`

			`Token *current_token = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];`
			`Py_ssize_t current_err_line = current_token->lineno;`

			`int ret = 0;`

			`for (;;) {`
			`const char *start;`
			`const char *end;`
			`switch (_PyTokenizer_Get(p->tok, &start, &end)) {`
			`case ERRORTOKEN:`
			`if (p->tok->level != 0) {`
			`int error_lineno = p->tok->parenlinenostack[p->tok->level-1];`
			`if (current_err_line > error_lineno) {`
			`raise_unclosed_parentheses_error(p);`
			`ret = -1;`
			`goto exit;`
			`}`
			`}`
			`break;`
			`case ENDMARKER:`
			`break;`
			`default:`
			`continue;`
			`}`
			`break;`
			`}`


			`exit:`
			`if (PyErr_Occurred()) {`
			`Py_XDECREF(value);`
			`Py_XDECREF(type);`
			`Py_XDECREF(traceback);`
			`} else {`
			`PyErr_Restore(type, value, traceback);`
			`}`
			`return ret;`
			`}`

			`// PARSER ERRORS`

			`void *`
			`_PyPegen_raise_error(Parser p, PyObject errtype, const char *errmsg, ...)`
			`{`
			`if (p->fill == 0) {`
			`va_list va;`
			`va_start(va, errmsg);`
			`_PyPegen_raise_error_known_location(p, errtype, 0, 0, 0, -1, errmsg, va);`
			`va_end(va);`
			`return NULL;`
			`}`

			`Token *t = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];`
			`Py_ssize_t col_offset;`
			`Py_ssize_t end_col_offset = -1;`
			`if (t->col_offset == -1) {`
			`if (p->tok->cur == p->tok->buf) {`
			`col_offset = 0;`
			`} else {`
			`const char* start = p->tok->buf ? p->tok->line_start : p->tok->buf;`
			`col_offset = Py_SAFE_DOWNCAST(p->tok->cur - start, intptr_t, int);`
			`}`
			`} else {`
			`col_offset = t->col_offset + 1;`
			`}`

			`if (t->end_col_offset != -1) {`
			`end_col_offset = t->end_col_offset + 1;`
			`}`

			`va_list va;`
			`va_start(va, errmsg);`
			`_PyPegen_raise_error_known_location(p, errtype, t->lineno, col_offset, t->end_lineno, end_col_offset, errmsg, va);`
			`va_end(va);`

			`return NULL;`
			`}`

			`static PyObject *`
			`get_error_line_from_tokenizer_buffers(Parser *p, Py_ssize_t lineno)`
			`{`
			`/* If the file descriptor is interactive, the source lines of the current`
			`* (multi-line) statement are stored in p->tok->interactive_src_start.`
			`* If not, we're parsing from a string, which means that the whole source`
			`* is stored in p->tok->str. */`
Ensure the str member of the tokenizer is always initialised (GH-29681) 2021-11-20 22:06:39 -04:00			`assert((p->tok->fp == NULL && p->tok->str != NULL) \|\| p->tok->fp == stdin);`
Refactor parser compilation units into specific components (GH-29676) 2021-11-20 21:08:50 -04:00
			`char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start : p->tok->str;`
			`assert(cur_line != NULL);`

			`for (int i = 0; i < lineno - 1; i++) {`
			`cur_line = strchr(cur_line, '\n') + 1;`
			`}`

			`char *next_newline;`
			`if ((next_newline = strchr(cur_line, '\n')) == NULL) { // This is the last line`
			`next_newline = cur_line + strlen(cur_line);`
			`}`
			`return PyUnicode_DecodeUTF8(cur_line, next_newline - cur_line, "replace");`
			`}`

			`void *`
			`_PyPegen_raise_error_known_location(Parser p, PyObject errtype,`
			`Py_ssize_t lineno, Py_ssize_t col_offset,`
			`Py_ssize_t end_lineno, Py_ssize_t end_col_offset,`
			`const char *errmsg, va_list va)`
			`{`
			`PyObject *value = NULL;`
			`PyObject *errstr = NULL;`
			`PyObject *error_line = NULL;`
			`PyObject *tmp = NULL;`
			`p->error_indicator = 1;`

			`if (end_lineno == CURRENT_POS) {`
			`end_lineno = p->tok->lineno;`
			`}`
			`if (end_col_offset == CURRENT_POS) {`
			`end_col_offset = p->tok->cur - p->tok->line_start;`
			`}`

			`if (p->start_rule == Py_fstring_input) {`
			`const char *fstring_msg = "f-string: ";`
			`Py_ssize_t len = strlen(fstring_msg) + strlen(errmsg);`

			`char *new_errmsg = PyMem_Malloc(len + 1); // Lengths of both strings plus NULL character`
			`if (!new_errmsg) {`
			`return (void *) PyErr_NoMemory();`
			`}`

			`// Copy both strings into new buffer`
			`memcpy(new_errmsg, fstring_msg, strlen(fstring_msg));`
			`memcpy(new_errmsg + strlen(fstring_msg), errmsg, strlen(errmsg));`
			`new_errmsg[len] = 0;`
			`errmsg = new_errmsg;`
			`}`
			`errstr = PyUnicode_FromFormatV(errmsg, va);`
			`if (!errstr) {`
			`goto error;`
			`}`

			`if (p->tok->fp_interactive) {`
			`error_line = get_error_line_from_tokenizer_buffers(p, lineno);`
			`}`
			`else if (p->start_rule == Py_file_input) {`
			`error_line = _PyErr_ProgramDecodedTextObject(p->tok->filename,`
			`(int) lineno, p->tok->encoding);`
			`}`

			`if (!error_line) {`
			`/* PyErr_ProgramTextObject was not called or returned NULL. If it was not called,`
			`then we need to find the error line from some other source, because`
			`p->start_rule != Py_file_input. If it returned NULL, then it either unexpectedly`
			`failed or we're parsing from a string or the REPL. There's a third edge case where`
			`we're actually parsing from a file, which has an E_EOF SyntaxError and in that case`
			`PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which
			`does not physically exist */`
			`assert(p->tok->fp == NULL \|\| p->tok->fp == stdin \|\| p->tok->done == E_EOF);`

			`if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {`
			`Py_ssize_t size = p->tok->inp - p->tok->buf;`
			`error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");`
			`}`
			`else if (p->tok->fp == NULL \|\| p->tok->fp == stdin) {`
			`error_line = get_error_line_from_tokenizer_buffers(p, lineno);`
			`}`
			`else {`
			`error_line = PyUnicode_FromStringAndSize("", 0);`
			`}`
			`if (!error_line) {`
			`goto error;`
			`}`
			`}`

			`if (p->start_rule == Py_fstring_input) {`
			`col_offset -= p->starting_col_offset;`
			`end_col_offset -= p->starting_col_offset;`
			`}`

			`Py_ssize_t col_number = col_offset;`
			`Py_ssize_t end_col_number = end_col_offset;`

			`if (p->tok->encoding != NULL) {`
			`col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset);`
			`if (col_number < 0) {`
			`goto error;`
			`}`
			`if (end_col_number > 0) {`
			`Py_ssize_t end_col_offset = _PyPegen_byte_offset_to_character_offset(error_line, end_col_number);`
			`if (end_col_offset < 0) {`
			`goto error;`
			`} else {`
			`end_col_number = end_col_offset;`
			`}`
			`}`
			`}`
			`tmp = Py_BuildValue("(OiiNii)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number);`
			`if (!tmp) {`
			`goto error;`
			`}`
			`value = PyTuple_Pack(2, errstr, tmp);`
			`Py_DECREF(tmp);`
			`if (!value) {`
			`goto error;`
			`}`
			`PyErr_SetObject(errtype, value);`

			`Py_DECREF(errstr);`
			`Py_DECREF(value);`
			`if (p->start_rule == Py_fstring_input) {`
			`PyMem_Free((void *)errmsg);`
			`}`
			`return NULL;`

			`error:`
			`Py_XDECREF(errstr);`
			`Py_XDECREF(error_line);`
			`if (p->start_rule == Py_fstring_input) {`
			`PyMem_Free((void *)errmsg);`
			`}`
			`return NULL;`
			`}`

			`void`
			`_Pypegen_set_syntax_error(Parser* p, Token* last_token) {`
			`// Existing sintax error`
			`if (PyErr_Occurred()) {`
			`// Prioritize tokenizer errors to custom syntax errors raised`
			`// on the second phase only if the errors come from the parser.`
			`if (p->tok->done == E_DONE && PyErr_ExceptionMatches(PyExc_SyntaxError)) {`
			`_PyPegen_tokenize_full_source_to_check_for_errors(p);`
			`}`
			`// Propagate the existing syntax error.`
			`return;`
			`}`
			`// Initialization error`
			`if (p->fill == 0) {`
			`RAISE_SYNTAX_ERROR("error at start before reading any input");`
			`}`
			`// Parser encountered EOF (End of File) unexpectedtly`
bpo-45727: Only trigger the 'did you forgot a comma' error suggestion if inside parentheses (GH-29757) 2021-11-24 18:21:23 -04:00			`if (last_token->type == ERRORTOKEN && p->tok->done == E_EOF) {`
Refactor parser compilation units into specific components (GH-29676) 2021-11-20 21:08:50 -04:00			`if (p->tok->level) {`
			`raise_unclosed_parentheses_error(p);`
			`} else {`
			`RAISE_SYNTAX_ERROR("unexpected EOF while parsing");`
			`}`
			`return;`
			`}`
			`// Indentation error in the tokenizer`
			`if (last_token->type == INDENT \|\| last_token->type == DEDENT) {`
			`RAISE_INDENTATION_ERROR(last_token->type == INDENT ? "unexpected indent" : "unexpected unindent");`
			`return;`
			`}`
			`// Unknown error (generic case)`

			`// Use the last token we found on the first pass to avoid reporting`
			`// incorrect locations for generic syntax errors just because we reached`
			`// further away when trying to find specific syntax errors in the second`
			`// pass.`
			`RAISE_SYNTAX_ERROR_KNOWN_LOCATION(last_token, "invalid syntax");`
			`// _PyPegen_tokenize_full_source_to_check_for_errors will override the existing`
			`// generic SyntaxError we just raised if errors are found.`
			`_PyPegen_tokenize_full_source_to_check_for_errors(p);`
bpo-45727: Only trigger the 'did you forgot a comma' error suggestion if inside parentheses (GH-29757) 2021-11-24 18:21:23 -04:00			`}`