mirror of https://github.com/python/cpython
bpo-43410: Fix crash in the parser when producing syntax errors when reading from stdin (GH-24763)
This commit is contained in:
parent
9923df9641
commit
cd8dcbc851
|
@ -816,9 +816,16 @@ class IgnoreEnvironmentTest(unittest.TestCase):
|
|||
PYTHONVERBOSE="1",
|
||||
)
|
||||
|
||||
class SyntaxErrorTests(unittest.TestCase):
|
||||
def test_tokenizer_error_with_stdin(self):
|
||||
proc = subprocess.run([sys.executable, "-"], input = b"(1+2+3",
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
self.assertNotEqual(proc.returncode, 0)
|
||||
self.assertNotEqual(proc.stderr, None)
|
||||
self.assertIn(b"\nSyntaxError", proc.stderr)
|
||||
|
||||
def test_main():
|
||||
support.run_unittest(CmdLineTest, IgnoreEnvironmentTest)
|
||||
support.run_unittest(CmdLineTest, IgnoreEnvironmentTest, SyntaxErrorTests)
|
||||
support.reap_children()
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Fix a bug that was causing the parser to crash when emiting syntax errors
|
||||
when reading input from stdin. Patch by Pablo Galindo
|
|
@ -397,7 +397,8 @@ get_error_line(Parser *p, Py_ssize_t lineno)
|
|||
are stored in p->tok->stdin_content */
|
||||
assert(p->tok->fp == NULL || p->tok->fp == stdin);
|
||||
|
||||
char *cur_line = p->tok->fp == NULL ? p->tok->str : p->tok->stdin_content;
|
||||
char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start : p->tok->str;
|
||||
|
||||
for (int i = 0; i < lineno - 1; i++) {
|
||||
cur_line = strchr(cur_line, '\n') + 1;
|
||||
}
|
||||
|
@ -440,7 +441,10 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
|
|||
goto error;
|
||||
}
|
||||
|
||||
if (p->start_rule == Py_file_input) {
|
||||
if (p->tok->fp_interactive) {
|
||||
error_line = get_error_line(p, lineno);
|
||||
}
|
||||
else if (p->start_rule == Py_file_input) {
|
||||
error_line = PyErr_ProgramTextObject(p->tok->filename, (int) lineno);
|
||||
}
|
||||
|
||||
|
@ -1232,7 +1236,7 @@ _PyPegen_run_parser(Parser *p)
|
|||
if (p->fill == 0) {
|
||||
RAISE_SYNTAX_ERROR("error at start before reading any input");
|
||||
}
|
||||
else if (p->tok->done == E_EOF) {
|
||||
else if (p->tok->done == E_EOF) {
|
||||
if (p->tok->level) {
|
||||
raise_unclosed_parentheses_error(p);
|
||||
} else {
|
||||
|
@ -1287,6 +1291,10 @@ _PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filena
|
|||
}
|
||||
return NULL;
|
||||
}
|
||||
if (!tok->fp || ps1 != NULL || ps2 != NULL ||
|
||||
PyUnicode_CompareWithASCIIString(filename_ob, "<stdin>") == 0) {
|
||||
tok->fp_interactive = 1;
|
||||
}
|
||||
// This transfers the ownership to the tokenizer
|
||||
tok->filename = filename_ob;
|
||||
Py_INCREF(filename_ob);
|
||||
|
|
|
@ -56,6 +56,9 @@ tok_new(void)
|
|||
if (tok == NULL)
|
||||
return NULL;
|
||||
tok->buf = tok->cur = tok->inp = NULL;
|
||||
tok->fp_interactive = 0;
|
||||
tok->interactive_src_start = NULL;
|
||||
tok->interactive_src_end = NULL;
|
||||
tok->start = NULL;
|
||||
tok->end = NULL;
|
||||
tok->done = E_OK;
|
||||
|
@ -80,8 +83,6 @@ tok_new(void)
|
|||
tok->decoding_readline = NULL;
|
||||
tok->decoding_buffer = NULL;
|
||||
tok->type_comments = 0;
|
||||
tok->stdin_content = NULL;
|
||||
|
||||
tok->async_hacks = 0;
|
||||
tok->async_def = 0;
|
||||
tok->async_def_indent = 0;
|
||||
|
@ -323,6 +324,35 @@ check_bom(int get_char(struct tok_state *),
|
|||
return 1;
|
||||
}
|
||||
|
||||
static int tok_concatenate_interactive_new_line(struct tok_state* tok, char* line) {
|
||||
assert(tok->fp_interactive);
|
||||
|
||||
if (!line) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
Py_ssize_t current_size = tok->interactive_src_end - tok->interactive_src_start;
|
||||
Py_ssize_t line_size = strlen(line);
|
||||
char* new_str = tok->interactive_src_start;
|
||||
|
||||
new_str = PyMem_Realloc(new_str, current_size + line_size + 1);
|
||||
if (!new_str) {
|
||||
if (tok->interactive_src_start) {
|
||||
PyMem_Free(tok->interactive_src_start);
|
||||
}
|
||||
tok->interactive_src_start = NULL;
|
||||
tok->interactive_src_end = NULL;
|
||||
tok->done = E_NOMEM;
|
||||
return -1;
|
||||
}
|
||||
strcpy(new_str + current_size, line);
|
||||
|
||||
tok->interactive_src_start = new_str;
|
||||
tok->interactive_src_end = new_str + current_size + line_size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Read a line of text from TOK into S, using the stream in TOK.
|
||||
Return NULL on failure, else S.
|
||||
|
||||
|
@ -552,6 +582,12 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
|
|||
badchar, tok->filename, tok->lineno + 1);
|
||||
return error_ret(tok);
|
||||
}
|
||||
|
||||
if (tok->fp_interactive &&
|
||||
tok_concatenate_interactive_new_line(tok, line) == -1) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return line;
|
||||
}
|
||||
|
||||
|
@ -807,17 +843,21 @@ PyTokenizer_FromFile(FILE *fp, const char* enc,
|
|||
void
|
||||
PyTokenizer_Free(struct tok_state *tok)
|
||||
{
|
||||
if (tok->encoding != NULL)
|
||||
if (tok->encoding != NULL) {
|
||||
PyMem_Free(tok->encoding);
|
||||
}
|
||||
Py_XDECREF(tok->decoding_readline);
|
||||
Py_XDECREF(tok->decoding_buffer);
|
||||
Py_XDECREF(tok->filename);
|
||||
if (tok->fp != NULL && tok->buf != NULL)
|
||||
if (tok->fp != NULL && tok->buf != NULL) {
|
||||
PyMem_Free(tok->buf);
|
||||
if (tok->input)
|
||||
}
|
||||
if (tok->input) {
|
||||
PyMem_Free(tok->input);
|
||||
if (tok->stdin_content)
|
||||
PyMem_Free(tok->stdin_content);
|
||||
}
|
||||
if (tok->interactive_src_start != NULL) {
|
||||
PyMem_Free(tok->interactive_src_start);
|
||||
}
|
||||
PyMem_Free(tok);
|
||||
}
|
||||
|
||||
|
@ -858,24 +898,6 @@ tok_nextc(struct tok_state *tok)
|
|||
if (translated == NULL)
|
||||
return EOF;
|
||||
newtok = translated;
|
||||
if (tok->stdin_content == NULL) {
|
||||
tok->stdin_content = PyMem_Malloc(strlen(translated) + 1);
|
||||
if (tok->stdin_content == NULL) {
|
||||
tok->done = E_NOMEM;
|
||||
return EOF;
|
||||
}
|
||||
sprintf(tok->stdin_content, "%s", translated);
|
||||
}
|
||||
else {
|
||||
char *new_str = PyMem_Malloc(strlen(tok->stdin_content) + strlen(translated) + 1);
|
||||
if (new_str == NULL) {
|
||||
tok->done = E_NOMEM;
|
||||
return EOF;
|
||||
}
|
||||
sprintf(new_str, "%s%s", tok->stdin_content, translated);
|
||||
PyMem_Free(tok->stdin_content);
|
||||
tok->stdin_content = new_str;
|
||||
}
|
||||
}
|
||||
if (tok->encoding && newtok && *newtok) {
|
||||
/* Recode to UTF-8 */
|
||||
|
@ -898,6 +920,10 @@ tok_nextc(struct tok_state *tok)
|
|||
strcpy(newtok, buf);
|
||||
Py_DECREF(u);
|
||||
}
|
||||
if (tok->fp_interactive &&
|
||||
tok_concatenate_interactive_new_line(tok, newtok) == -1) {
|
||||
return EOF;
|
||||
}
|
||||
if (tok->nextprompt != NULL)
|
||||
tok->prompt = tok->nextprompt;
|
||||
if (newtok == NULL)
|
||||
|
@ -958,7 +984,7 @@ tok_nextc(struct tok_state *tok)
|
|||
}
|
||||
if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf),
|
||||
tok) == NULL) {
|
||||
if (!tok->decoding_erred)
|
||||
if (!tok->decoding_erred && !(tok->done == E_NOMEM))
|
||||
tok->done = E_EOF;
|
||||
done = 1;
|
||||
}
|
||||
|
|
|
@ -26,6 +26,9 @@ struct tok_state {
|
|||
char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */
|
||||
char *cur; /* Next character in buffer */
|
||||
char *inp; /* End of data in buffer */
|
||||
int fp_interactive; /* If the file descriptor is interactive */
|
||||
char *interactive_src_start; /* The start of the source parsed so far in interactive mode */
|
||||
char *interactive_src_end; /* The end of the source parsed so far in interactive mode */
|
||||
const char *end; /* End of input buffer if buf != NULL */
|
||||
const char *start; /* Start of current token if not NULL */
|
||||
int done; /* E_OK normally, E_EOF at EOF, otherwise error code */
|
||||
|
@ -37,7 +40,6 @@ struct tok_state {
|
|||
int atbol; /* Nonzero if at begin of new line */
|
||||
int pendin; /* Pending indents (if > 0) or dedents (if < 0) */
|
||||
const char *prompt, *nextprompt; /* For interactive prompting */
|
||||
char *stdin_content;
|
||||
int lineno; /* Current line number */
|
||||
int first_lineno; /* First line of a single line or multi line string
|
||||
expression (cf. issue 16806) */
|
||||
|
|
Loading…
Reference in New Issue