mirror of https://github.com/python/cpython
bpo-46521: Fix codeop to use a new partial-input mode of the parser (GH-31010)
This commit is contained in:
parent
25db2b361b
commit
69e10976b2
|
@ -18,8 +18,10 @@
|
||||||
#define PyCF_IGNORE_COOKIE 0x0800
|
#define PyCF_IGNORE_COOKIE 0x0800
|
||||||
#define PyCF_TYPE_COMMENTS 0x1000
|
#define PyCF_TYPE_COMMENTS 0x1000
|
||||||
#define PyCF_ALLOW_TOP_LEVEL_AWAIT 0x2000
|
#define PyCF_ALLOW_TOP_LEVEL_AWAIT 0x2000
|
||||||
|
#define PyCF_ALLOW_INCOMPLETE_INPUT 0x4000
|
||||||
#define PyCF_COMPILE_MASK (PyCF_ONLY_AST | PyCF_ALLOW_TOP_LEVEL_AWAIT | \
|
#define PyCF_COMPILE_MASK (PyCF_ONLY_AST | PyCF_ALLOW_TOP_LEVEL_AWAIT | \
|
||||||
PyCF_TYPE_COMMENTS | PyCF_DONT_IMPLY_DEDENT)
|
PyCF_TYPE_COMMENTS | PyCF_DONT_IMPLY_DEDENT | \
|
||||||
|
PyCF_ALLOW_INCOMPLETE_INPUT)
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int cf_flags; /* bitmask of CO_xxx flags relevant to future */
|
int cf_flags; /* bitmask of CO_xxx flags relevant to future */
|
||||||
|
|
|
@ -26,6 +26,8 @@ extern "C" {
|
||||||
#define E_TOODEEP 20 /* Too many indentation levels */
|
#define E_TOODEEP 20 /* Too many indentation levels */
|
||||||
#define E_DEDENT 21 /* No matching outer block for dedent */
|
#define E_DEDENT 21 /* No matching outer block for dedent */
|
||||||
#define E_DECODE 22 /* Error in decoding into Unicode */
|
#define E_DECODE 22 /* Error in decoding into Unicode */
|
||||||
|
#define E_EOFS 23 /* EOF in triple-quoted string */
|
||||||
|
#define E_EOLS 24 /* EOL in single-quoted string */
|
||||||
#define E_LINECONT 25 /* Unexpected characters after a line continuation */
|
#define E_LINECONT 25 /* Unexpected characters after a line continuation */
|
||||||
#define E_BADSINGLE 27 /* Ill-formed single statement input */
|
#define E_BADSINGLE 27 /* Ill-formed single statement input */
|
||||||
#define E_INTERACT_STOP 28 /* Interactive mode stopped tokenization */
|
#define E_INTERACT_STOP 28 /* Interactive mode stopped tokenization */
|
||||||
|
|
|
@ -10,30 +10,6 @@ and:
|
||||||
syntax error (OverflowError and ValueError can be produced by
|
syntax error (OverflowError and ValueError can be produced by
|
||||||
malformed literals).
|
malformed literals).
|
||||||
|
|
||||||
Approach:
|
|
||||||
|
|
||||||
First, check if the source consists entirely of blank lines and
|
|
||||||
comments; if so, replace it with 'pass', because the built-in
|
|
||||||
parser doesn't always do the right thing for these.
|
|
||||||
|
|
||||||
Compile three times: as is, with \n, and with \n\n appended. If it
|
|
||||||
compiles as is, it's complete. If it compiles with one \n appended,
|
|
||||||
we expect more. If it doesn't compile either way, we compare the
|
|
||||||
error we get when compiling with \n or \n\n appended. If the errors
|
|
||||||
are the same, the code is broken. But if the errors are different, we
|
|
||||||
expect more. Not intuitive; not even guaranteed to hold in future
|
|
||||||
releases; but this matches the compiler's behavior from Python 1.4
|
|
||||||
through 2.2, at least.
|
|
||||||
|
|
||||||
Caveat:
|
|
||||||
|
|
||||||
It is possible (but not likely) that the parser stops parsing with a
|
|
||||||
successful outcome before reaching the end of the source; in this
|
|
||||||
case, trailing symbols may be ignored instead of causing an error.
|
|
||||||
For example, a backslash followed by two newlines may be followed by
|
|
||||||
arbitrary garbage. This will be fixed once the API for the parser is
|
|
||||||
better.
|
|
||||||
|
|
||||||
The two interfaces are:
|
The two interfaces are:
|
||||||
|
|
||||||
compile_command(source, filename, symbol):
|
compile_command(source, filename, symbol):
|
||||||
|
@ -64,7 +40,11 @@ _features = [getattr(__future__, fname)
|
||||||
|
|
||||||
__all__ = ["compile_command", "Compile", "CommandCompiler"]
|
__all__ = ["compile_command", "Compile", "CommandCompiler"]
|
||||||
|
|
||||||
PyCF_DONT_IMPLY_DEDENT = 0x200 # Matches pythonrun.h.
|
# The following flags match the values from Include/cpython/compile.h
|
||||||
|
# Caveat emptor: These flags are undocumented on purpose and depending
|
||||||
|
# on their effect outside the standard library is **unsupported**.
|
||||||
|
PyCF_DONT_IMPLY_DEDENT = 0x200
|
||||||
|
PyCF_ALLOW_INCOMPLETE_INPUT = 0x4000
|
||||||
|
|
||||||
def _maybe_compile(compiler, source, filename, symbol):
|
def _maybe_compile(compiler, source, filename, symbol):
|
||||||
# Check for source consisting of only blank lines and comments.
|
# Check for source consisting of only blank lines and comments.
|
||||||
|
@ -86,24 +66,12 @@ def _maybe_compile(compiler, source, filename, symbol):
|
||||||
with warnings.catch_warnings():
|
with warnings.catch_warnings():
|
||||||
warnings.simplefilter("error")
|
warnings.simplefilter("error")
|
||||||
|
|
||||||
code1 = err1 = err2 = None
|
|
||||||
try:
|
try:
|
||||||
code1 = compiler(source + "\n", filename, symbol)
|
compiler(source + "\n", filename, symbol)
|
||||||
except SyntaxError as e:
|
except SyntaxError as e:
|
||||||
err1 = e
|
if "incomplete input" in str(e):
|
||||||
|
|
||||||
try:
|
|
||||||
code2 = compiler(source + "\n\n", filename, symbol)
|
|
||||||
except SyntaxError as e:
|
|
||||||
err2 = e
|
|
||||||
|
|
||||||
try:
|
|
||||||
if not code1 and _is_syntax_error(err1, err2):
|
|
||||||
raise err1
|
|
||||||
else:
|
|
||||||
return None
|
return None
|
||||||
finally:
|
raise
|
||||||
err1 = err2 = None
|
|
||||||
|
|
||||||
def _is_syntax_error(err1, err2):
|
def _is_syntax_error(err1, err2):
|
||||||
rep1 = repr(err1)
|
rep1 = repr(err1)
|
||||||
|
@ -115,7 +83,7 @@ def _is_syntax_error(err1, err2):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _compile(source, filename, symbol):
|
def _compile(source, filename, symbol):
|
||||||
return compile(source, filename, symbol, PyCF_DONT_IMPLY_DEDENT)
|
return compile(source, filename, symbol, PyCF_DONT_IMPLY_DEDENT | PyCF_ALLOW_INCOMPLETE_INPUT)
|
||||||
|
|
||||||
def compile_command(source, filename="<input>", symbol="single"):
|
def compile_command(source, filename="<input>", symbol="single"):
|
||||||
r"""Compile a command and determine whether it is incomplete.
|
r"""Compile a command and determine whether it is incomplete.
|
||||||
|
@ -144,7 +112,7 @@ class Compile:
|
||||||
statement, it "remembers" and compiles all subsequent program texts
|
statement, it "remembers" and compiles all subsequent program texts
|
||||||
with the statement in force."""
|
with the statement in force."""
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.flags = PyCF_DONT_IMPLY_DEDENT
|
self.flags = PyCF_DONT_IMPLY_DEDENT | PyCF_ALLOW_INCOMPLETE_INPUT
|
||||||
|
|
||||||
def __call__(self, source, filename, symbol):
|
def __call__(self, source, filename, symbol):
|
||||||
codeob = compile(source, filename, symbol, self.flags, True)
|
codeob = compile(source, filename, symbol, self.flags, True)
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Fix a bug in the :mod:`codeop` module that was incorrectly identifying
|
||||||
|
invalid code involving string quotes as valid code.
|
|
@ -726,6 +726,9 @@ compute_parser_flags(PyCompilerFlags *flags)
|
||||||
if ((flags->cf_flags & PyCF_ONLY_AST) && flags->cf_feature_version < 7) {
|
if ((flags->cf_flags & PyCF_ONLY_AST) && flags->cf_feature_version < 7) {
|
||||||
parser_flags |= PyPARSE_ASYNC_HACKS;
|
parser_flags |= PyPARSE_ASYNC_HACKS;
|
||||||
}
|
}
|
||||||
|
if (flags->cf_flags & PyCF_ALLOW_INCOMPLETE_INPUT) {
|
||||||
|
parser_flags |= PyPARSE_ALLOW_INCOMPLETE_INPUT;
|
||||||
|
}
|
||||||
return parser_flags;
|
return parser_flags;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -811,12 +814,22 @@ reset_parser_state_for_error_pass(Parser *p)
|
||||||
p->tok->interactive_underflow = IUNDERFLOW_STOP;
|
p->tok->interactive_underflow = IUNDERFLOW_STOP;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int
|
||||||
|
_is_end_of_source(Parser *p) {
|
||||||
|
int err = p->tok->done;
|
||||||
|
return err == E_EOF || err == E_EOFS || err == E_EOLS;
|
||||||
|
}
|
||||||
|
|
||||||
void *
|
void *
|
||||||
_PyPegen_run_parser(Parser *p)
|
_PyPegen_run_parser(Parser *p)
|
||||||
{
|
{
|
||||||
void *res = _PyPegen_parse(p);
|
void *res = _PyPegen_parse(p);
|
||||||
assert(p->level == 0);
|
assert(p->level == 0);
|
||||||
if (res == NULL) {
|
if (res == NULL) {
|
||||||
|
if ((p->flags & PyPARSE_ALLOW_INCOMPLETE_INPUT) && _is_end_of_source(p)) {
|
||||||
|
PyErr_Clear();
|
||||||
|
return RAISE_SYNTAX_ERROR("incomplete input");
|
||||||
|
}
|
||||||
if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_SyntaxError)) {
|
if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_SyntaxError)) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
#define PyPARSE_BARRY_AS_BDFL 0x0020
|
#define PyPARSE_BARRY_AS_BDFL 0x0020
|
||||||
#define PyPARSE_TYPE_COMMENTS 0x0040
|
#define PyPARSE_TYPE_COMMENTS 0x0040
|
||||||
#define PyPARSE_ASYNC_HACKS 0x0080
|
#define PyPARSE_ASYNC_HACKS 0x0080
|
||||||
|
#define PyPARSE_ALLOW_INCOMPLETE_INPUT 0x0100
|
||||||
|
|
||||||
#define CURRENT_POS (-5)
|
#define CURRENT_POS (-5)
|
||||||
|
|
||||||
|
|
|
@ -40,7 +40,7 @@
|
||||||
static struct tok_state *tok_new(void);
|
static struct tok_state *tok_new(void);
|
||||||
static int tok_nextc(struct tok_state *tok);
|
static int tok_nextc(struct tok_state *tok);
|
||||||
static void tok_backup(struct tok_state *tok, int c);
|
static void tok_backup(struct tok_state *tok, int c);
|
||||||
|
static int syntaxerror(struct tok_state *tok, const char *format, ...);
|
||||||
|
|
||||||
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
|
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
|
||||||
tokenizing. */
|
tokenizing. */
|
||||||
|
@ -1031,8 +1031,9 @@ tok_nextc(struct tok_state *tok)
|
||||||
if (tok->cur != tok->inp) {
|
if (tok->cur != tok->inp) {
|
||||||
return Py_CHARMASK(*tok->cur++); /* Fast path */
|
return Py_CHARMASK(*tok->cur++); /* Fast path */
|
||||||
}
|
}
|
||||||
if (tok->done != E_OK)
|
if (tok->done != E_OK) {
|
||||||
return EOF;
|
return EOF;
|
||||||
|
}
|
||||||
if (tok->fp == NULL) {
|
if (tok->fp == NULL) {
|
||||||
rc = tok_underflow_string(tok);
|
rc = tok_underflow_string(tok);
|
||||||
}
|
}
|
||||||
|
@ -1964,16 +1965,21 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
||||||
tok->line_start = tok->multi_line_start;
|
tok->line_start = tok->multi_line_start;
|
||||||
int start = tok->lineno;
|
int start = tok->lineno;
|
||||||
tok->lineno = tok->first_lineno;
|
tok->lineno = tok->first_lineno;
|
||||||
|
|
||||||
if (quote_size == 3) {
|
if (quote_size == 3) {
|
||||||
return syntaxerror(tok,
|
syntaxerror(tok, "unterminated triple-quoted string literal"
|
||||||
"unterminated triple-quoted string literal"
|
|
||||||
" (detected at line %d)", start);
|
" (detected at line %d)", start);
|
||||||
|
if (c != '\n') {
|
||||||
|
tok->done = E_EOFS;
|
||||||
|
}
|
||||||
|
return ERRORTOKEN;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return syntaxerror(tok,
|
syntaxerror(tok, "unterminated string literal (detected at"
|
||||||
"unterminated string literal (detected at"
|
|
||||||
" line %d)", start);
|
" line %d)", start);
|
||||||
|
if (c != '\n') {
|
||||||
|
tok->done = E_EOLS;
|
||||||
|
}
|
||||||
|
return ERRORTOKEN;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (c == quote) {
|
if (c == quote) {
|
||||||
|
|
Loading…
Reference in New Issue