bpo-40334: Add support for feature_version in new PEG parser (GH-19827)
`ast.parse` and `compile` support a `feature_version` parameter that tells the parser to parse the input string, as if it were written in an older Python version. The `feature_version` is propagated to the tokenizer, which uses it to handle the three different stages of support for `async` and `await`. Additionally, it disallows the following at parser level: - The '@' operator in < 3.5 - Async functions in < 3.5 - Async comprehensions in < 3.6 - Underscores in numeric literals in < 3.6 - Await expression in < 3.5 - Variable annotations in < 3.6 - Async for-loops in < 3.5 - Async with-statements in < 3.5 - F-strings in < 3.6 Closes we-like-parsers/cpython#124.
This commit is contained in:
parent
eb0d359b4b
commit
3e0a6f37df
|
@ -80,10 +80,14 @@ compound_stmt[stmt_ty]:
|
|||
# NOTE: annotated_rhs may start with 'yield'; yield_expr must start with 'yield'
|
||||
assignment:
|
||||
| a=NAME ':' b=expression c=['=' d=annotated_rhs { d }] {
|
||||
_Py_AnnAssign(CHECK(_PyPegen_set_expr_context(p, a, Store)), b, c, 1, EXTRA) }
|
||||
CHECK_VERSION(
|
||||
6,
|
||||
"Variable annotation syntax is",
|
||||
_Py_AnnAssign(CHECK(_PyPegen_set_expr_context(p, a, Store)), b, c, 1, EXTRA)
|
||||
) }
|
||||
| a=('(' b=inside_paren_ann_assign_target ')' { b }
|
||||
| ann_assign_subscript_attribute_target) ':' b=expression c=['=' d=annotated_rhs { d }] {
|
||||
_Py_AnnAssign(a, b, c, 0, EXTRA)}
|
||||
CHECK_VERSION(6, "Variable annotations syntax is", _Py_AnnAssign(a, b, c, 0, EXTRA)) }
|
||||
| a=(z=star_targets '=' { z })+ b=(yield_expr | star_expressions) tc=[TYPE_COMMENT] {
|
||||
_Py_Assign(a, b, NEW_TYPE_COMMENT(p, tc), EXTRA) }
|
||||
| a=target b=augassign c=(yield_expr | star_expressions) {
|
||||
|
@ -94,7 +98,7 @@ augassign[AugOperator*]:
|
|||
| '+=' { _PyPegen_augoperator(p, Add) }
|
||||
| '-=' { _PyPegen_augoperator(p, Sub) }
|
||||
| '*=' { _PyPegen_augoperator(p, Mult) }
|
||||
| '@=' {_PyPegen_augoperator(p, MatMult)}
|
||||
| '@=' { CHECK_VERSION(5, "The '@' operator is", _PyPegen_augoperator(p, MatMult)) }
|
||||
| '/=' { _PyPegen_augoperator(p, Div) }
|
||||
| '%=' { _PyPegen_augoperator(p, Mod) }
|
||||
| '&=' { _PyPegen_augoperator(p, BitAnd) }
|
||||
|
@ -156,14 +160,20 @@ while_stmt[stmt_ty]:
|
|||
| 'while' a=named_expression ':' b=block c=[else_block] { _Py_While(a, b, c, EXTRA) }
|
||||
|
||||
for_stmt[stmt_ty]:
|
||||
| is_async=[ASYNC] 'for' t=star_targets 'in' ex=star_expressions ':' tc=[TYPE_COMMENT] b=block el=[else_block] {
|
||||
(is_async ? _Py_AsyncFor : _Py_For)(t, ex, b, el, NEW_TYPE_COMMENT(p, tc), EXTRA) }
|
||||
| 'for' t=star_targets 'in' ex=star_expressions ':' tc=[TYPE_COMMENT] b=block el=[else_block] {
|
||||
_Py_For(t, ex, b, el, NEW_TYPE_COMMENT(p, tc), EXTRA) }
|
||||
| ASYNC 'for' t=star_targets 'in' ex=star_expressions ':' tc=[TYPE_COMMENT] b=block el=[else_block] {
|
||||
CHECK_VERSION(5, "Async for loops are", _Py_AsyncFor(t, ex, b, el, NEW_TYPE_COMMENT(p, tc), EXTRA)) }
|
||||
|
||||
with_stmt[stmt_ty]:
|
||||
| is_async=[ASYNC] 'with' '(' a=','.with_item+ ')' ':' b=block {
|
||||
(is_async ? _Py_AsyncWith : _Py_With)(a, b, NULL, EXTRA) }
|
||||
| is_async=[ASYNC] 'with' a=','.with_item+ ':' tc=[TYPE_COMMENT] b=block {
|
||||
(is_async ? _Py_AsyncWith : _Py_With)(a, b, NEW_TYPE_COMMENT(p, tc), EXTRA) }
|
||||
| 'with' '(' a=','.with_item+ ')' ':' b=block {
|
||||
_Py_With(a, b, NULL, EXTRA) }
|
||||
| 'with' a=','.with_item+ ':' tc=[TYPE_COMMENT] b=block {
|
||||
_Py_With(a, b, NEW_TYPE_COMMENT(p, tc), EXTRA) }
|
||||
| ASYNC 'with' '(' a=','.with_item+ ')' ':' b=block {
|
||||
CHECK_VERSION(5, "Async with statements are", _Py_AsyncWith(a, b, NULL, EXTRA)) }
|
||||
| ASYNC 'with' a=','.with_item+ ':' tc=[TYPE_COMMENT] b=block {
|
||||
CHECK_VERSION(5, "Async with statements are", _Py_AsyncWith(a, b, NEW_TYPE_COMMENT(p, tc), EXTRA)) }
|
||||
with_item[withitem_ty]:
|
||||
| e=expression o=['as' t=target { t }] { _Py_withitem(e, o, p->arena) }
|
||||
|
||||
|
@ -188,10 +198,18 @@ function_def[stmt_ty]:
|
|||
| function_def_raw
|
||||
|
||||
function_def_raw[stmt_ty]:
|
||||
| is_async=[ASYNC] 'def' n=NAME '(' params=[params] ')' a=['->' z=expression { z }] ':' tc=[func_type_comment] b=block {
|
||||
(is_async ? _Py_AsyncFunctionDef : _Py_FunctionDef)(n->v.Name.id,
|
||||
| 'def' n=NAME '(' params=[params] ')' a=['->' z=expression { z }] ':' tc=[func_type_comment] b=block {
|
||||
_Py_FunctionDef(n->v.Name.id,
|
||||
(params) ? params : CHECK(_PyPegen_empty_arguments(p)),
|
||||
b, NULL, a, NEW_TYPE_COMMENT(p, tc), EXTRA) }
|
||||
| ASYNC 'def' n=NAME '(' params=[params] ')' a=['->' z=expression { z }] ':' tc=[func_type_comment] b=block {
|
||||
CHECK_VERSION(
|
||||
5,
|
||||
"Async functions are",
|
||||
_Py_AsyncFunctionDef(n->v.Name.id,
|
||||
(params) ? params : CHECK(_PyPegen_empty_arguments(p)),
|
||||
b, NULL, a, NEW_TYPE_COMMENT(p, tc), EXTRA)
|
||||
) }
|
||||
func_type_comment[PyObject*]:
|
||||
| NEWLINE t=TYPE_COMMENT &(NEWLINE INDENT) { t } # Must be followed by indented block
|
||||
| invalid_double_type_comments
|
||||
|
@ -399,7 +417,7 @@ term[expr_ty]:
|
|||
| a=term '/' b=factor { _Py_BinOp(a, Div, b, EXTRA) }
|
||||
| a=term '//' b=factor { _Py_BinOp(a, FloorDiv, b, EXTRA) }
|
||||
| a=term '%' b=factor { _Py_BinOp(a, Mod, b, EXTRA) }
|
||||
| a=term '@' b=factor { _Py_BinOp(a, MatMult, b, EXTRA) }
|
||||
| a=term '@' b=factor { CHECK_VERSION(5, "The '@' operator is", _Py_BinOp(a, MatMult, b, EXTRA)) }
|
||||
| factor
|
||||
factor[expr_ty] (memo):
|
||||
| '+' a=factor { _Py_UnaryOp(UAdd, a, EXTRA) }
|
||||
|
@ -410,7 +428,7 @@ power[expr_ty]:
|
|||
| a=await_primary '**' b=factor { _Py_BinOp(a, Pow, b, EXTRA) }
|
||||
| await_primary
|
||||
await_primary[expr_ty] (memo):
|
||||
| AWAIT a=primary { _Py_Await(a, EXTRA) }
|
||||
| AWAIT a=primary { CHECK_VERSION(5, "Await expressions are", _Py_Await(a, EXTRA)) }
|
||||
| primary
|
||||
primary[expr_ty]:
|
||||
| a=primary '.' b=NAME { _Py_Attribute(a, b->v.Name.id, Load, EXTRA) }
|
||||
|
@ -469,8 +487,12 @@ kvpair[KeyValuePair*]:
|
|||
| '**' a=bitwise_or { _PyPegen_key_value_pair(p, NULL, a) }
|
||||
| a=expression ':' b=expression { _PyPegen_key_value_pair(p, a, b) }
|
||||
for_if_clauses[asdl_seq*]:
|
||||
| a=(y=[ASYNC] 'for' a=star_targets 'in' b=disjunction c=('if' z=disjunction { z })*
|
||||
{ _Py_comprehension(a, b, c, y != NULL, p->arena) })+ { a }
|
||||
| for_if_clause+
|
||||
for_if_clause[comprehension_ty]:
|
||||
| ASYNC 'for' a=star_targets 'in' b=disjunction c=('if' z=disjunction { z })* {
|
||||
CHECK_VERSION(6, "Async comprehensions are", _Py_comprehension(a, b, c, 1, p->arena)) }
|
||||
| 'for' a=star_targets 'in' b=disjunction c=('if' z=disjunction { z })* {
|
||||
_Py_comprehension(a, b, c, 0, p->arena) }
|
||||
|
||||
yield_expr[expr_ty]:
|
||||
| 'yield' 'from' a=expression { _Py_YieldFrom(a, EXTRA) }
|
||||
|
|
|
@ -252,7 +252,6 @@ class TypeCommentTests(unittest.TestCase):
|
|||
self.assertEqual(tree.body[0].type_comment, None)
|
||||
self.assertEqual(tree.body[1].type_comment, None)
|
||||
|
||||
@support.skip_if_new_parser("Pegen does not support feature_version yet")
|
||||
def test_asyncdef(self):
|
||||
for tree in self.parse_all(asyncdef, minver=5):
|
||||
self.assertEqual(tree.body[0].type_comment, "() -> int")
|
||||
|
@ -261,27 +260,22 @@ class TypeCommentTests(unittest.TestCase):
|
|||
self.assertEqual(tree.body[0].type_comment, None)
|
||||
self.assertEqual(tree.body[1].type_comment, None)
|
||||
|
||||
@support.skip_if_new_parser("Pegen does not support feature_version yet")
|
||||
def test_asyncvar(self):
|
||||
for tree in self.parse_all(asyncvar, maxver=6):
|
||||
pass
|
||||
|
||||
@support.skip_if_new_parser("Pegen does not support feature_version yet")
|
||||
def test_asynccomp(self):
|
||||
for tree in self.parse_all(asynccomp, minver=6):
|
||||
pass
|
||||
|
||||
@support.skip_if_new_parser("Pegen does not support feature_version yet")
|
||||
def test_matmul(self):
|
||||
for tree in self.parse_all(matmul, minver=5):
|
||||
pass
|
||||
|
||||
@support.skip_if_new_parser("Pegen does not support feature_version yet")
|
||||
def test_fstring(self):
|
||||
for tree in self.parse_all(fstring, minver=6):
|
||||
pass
|
||||
|
||||
@support.skip_if_new_parser("Pegen does not support feature_version yet")
|
||||
def test_underscorednumber(self):
|
||||
for tree in self.parse_all(underscorednumber, minver=6):
|
||||
pass
|
||||
|
|
3258
Parser/pegen/parse.c
3258
Parser/pegen/parse.c
File diff suppressed because it is too large
Load Diff
|
@ -179,6 +179,13 @@ _PyPegen_parsestr(Parser *p, const char *s, int *bytesmode, int *rawmode, PyObje
|
|||
}
|
||||
}
|
||||
|
||||
/* fstrings are only allowed in Python 3.6 and greater */
|
||||
if (fmode && p->feature_version < 6) {
|
||||
p->error_indicator = 1;
|
||||
RAISE_SYNTAX_ERROR("Format strings are only supported in Python 3.6 and greater");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (fmode && *bytesmode) {
|
||||
PyErr_BadInternalCall();
|
||||
return -1;
|
||||
|
@ -595,7 +602,8 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
Parser *p2 = _PyPegen_Parser_New(tok, Py_fstring_input, p->flags, NULL, p->arena);
|
||||
Parser *p2 = _PyPegen_Parser_New(tok, Py_fstring_input, p->flags, p->feature_version,
|
||||
NULL, p->arena);
|
||||
p2->starting_lineno = p->starting_lineno + p->tok->first_lineno - 1;
|
||||
p2->starting_col_offset = p->tok->first_lineno == p->tok->lineno
|
||||
? p->starting_col_offset + t->col_offset : 0;
|
||||
|
|
|
@ -933,11 +933,16 @@ _PyPegen_number_token(Parser *p)
|
|||
}
|
||||
|
||||
char *num_raw = PyBytes_AsString(t->bytes);
|
||||
|
||||
if (num_raw == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (p->feature_version < 6 && strchr(num_raw, '_') != NULL) {
|
||||
p->error_indicator = 1;
|
||||
return RAISE_SYNTAX_ERROR("Underscores in numeric literals are only supported"
|
||||
"in Python 3.6 and greater");
|
||||
}
|
||||
|
||||
PyObject *c = parsenumber(num_raw);
|
||||
|
||||
if (c == NULL) {
|
||||
|
@ -1030,12 +1035,15 @@ compute_parser_flags(PyCompilerFlags *flags)
|
|||
if (flags->cf_flags & PyCF_TYPE_COMMENTS) {
|
||||
parser_flags |= PyPARSE_TYPE_COMMENTS;
|
||||
}
|
||||
if (flags->cf_feature_version < 7) {
|
||||
parser_flags |= PyPARSE_ASYNC_HACKS;
|
||||
}
|
||||
return parser_flags;
|
||||
}
|
||||
|
||||
Parser *
|
||||
_PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
|
||||
int *errcode, PyArena *arena)
|
||||
int feature_version, int *errcode, PyArena *arena)
|
||||
{
|
||||
Parser *p = PyMem_Malloc(sizeof(Parser));
|
||||
if (p == NULL) {
|
||||
|
@ -1077,6 +1085,7 @@ _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
|
|||
p->starting_lineno = 0;
|
||||
p->starting_col_offset = 0;
|
||||
p->flags = flags;
|
||||
p->feature_version = feature_version;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
@ -1138,7 +1147,8 @@ _PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filena
|
|||
mod_ty result = NULL;
|
||||
|
||||
int parser_flags = compute_parser_flags(flags);
|
||||
Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, errcode, arena);
|
||||
Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, PY_MINOR_VERSION,
|
||||
errcode, arena);
|
||||
if (p == NULL) {
|
||||
goto error;
|
||||
}
|
||||
|
@ -1194,9 +1204,12 @@ _PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filen
|
|||
mod_ty result = NULL;
|
||||
|
||||
int parser_flags = compute_parser_flags(flags);
|
||||
int feature_version = flags ? flags->cf_feature_version : PY_MINOR_VERSION;
|
||||
tok->type_comments = (parser_flags & PyPARSE_TYPE_COMMENTS) > 0;
|
||||
tok->async_hacks = (parser_flags & PyPARSE_ASYNC_HACKS) > 0;
|
||||
|
||||
Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, NULL, arena);
|
||||
Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, feature_version,
|
||||
NULL, arena);
|
||||
if (p == NULL) {
|
||||
goto error;
|
||||
}
|
||||
|
|
|
@ -69,6 +69,7 @@ typedef struct {
|
|||
int starting_col_offset;
|
||||
int error_indicator;
|
||||
int flags;
|
||||
int feature_version;
|
||||
growable_comment_array type_ignore_comments;
|
||||
} Parser;
|
||||
|
||||
|
@ -180,9 +181,26 @@ NEW_TYPE_COMMENT(Parser *p, Token *tc)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
Py_LOCAL_INLINE(void *)
|
||||
INVALID_VERSION_CHECK(Parser *p, int version, char *msg, void *node)
|
||||
{
|
||||
if (node == NULL) {
|
||||
p->error_indicator = 1; // Inline CHECK_CALL
|
||||
return NULL;
|
||||
}
|
||||
if (p->feature_version < version) {
|
||||
p->error_indicator = 1;
|
||||
return _PyPegen_raise_error(p, PyExc_SyntaxError, "%s only supported in Python 3.%i and greater",
|
||||
msg, version);
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
#define CHECK_VERSION(version, msg, node) INVALID_VERSION_CHECK(p, version, msg, node)
|
||||
|
||||
arg_ty _PyPegen_add_type_comment_to_arg(Parser *, arg_ty, Token *);
|
||||
PyObject *_PyPegen_new_identifier(Parser *, char *);
|
||||
Parser *_PyPegen_Parser_New(struct tok_state *, int, int, int *, PyArena *);
|
||||
Parser *_PyPegen_Parser_New(struct tok_state *, int, int, int, int *, PyArena *);
|
||||
void _PyPegen_Parser_Free(Parser *);
|
||||
mod_ty _PyPegen_run_parser_from_file_pointer(FILE *, int, PyObject *, const char *,
|
||||
const char *, const char *, PyCompilerFlags *, int *, PyArena *);
|
||||
|
|
Loading…
Reference in New Issue