bpo-40334: Add support for feature_version in new PEG parser (GH-19827)

`ast.parse` and `compile` support a `feature_version` parameter that
tells the parser to parse the input string, as if it were written in
an older Python version.
The `feature_version` is propagated to the tokenizer, which uses it
to handle the three different stages of support for `async` and
`await`. Additionally, it disallows the following at parser level:
- The '@' operator in < 3.5
- Async functions in < 3.5
- Async comprehensions in < 3.6
- Underscores in numeric literals in < 3.6
- Await expression in < 3.5
- Variable annotations in < 3.6
- Async for-loops in < 3.5
- Async with-statements in < 3.5
- F-strings in < 3.6

Closes we-like-parsers/cpython#124.
This commit is contained in:
Lysandros Nikolaou 2020-05-01 06:27:52 +03:00 committed by GitHub
parent eb0d359b4b
commit 3e0a6f37df
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 1970 additions and 1425 deletions

View File

@ -80,10 +80,14 @@ compound_stmt[stmt_ty]:
# NOTE: annotated_rhs may start with 'yield'; yield_expr must start with 'yield' # NOTE: annotated_rhs may start with 'yield'; yield_expr must start with 'yield'
assignment: assignment:
| a=NAME ':' b=expression c=['=' d=annotated_rhs { d }] { | a=NAME ':' b=expression c=['=' d=annotated_rhs { d }] {
_Py_AnnAssign(CHECK(_PyPegen_set_expr_context(p, a, Store)), b, c, 1, EXTRA) } CHECK_VERSION(
6,
"Variable annotation syntax is",
_Py_AnnAssign(CHECK(_PyPegen_set_expr_context(p, a, Store)), b, c, 1, EXTRA)
) }
| a=('(' b=inside_paren_ann_assign_target ')' { b } | a=('(' b=inside_paren_ann_assign_target ')' { b }
| ann_assign_subscript_attribute_target) ':' b=expression c=['=' d=annotated_rhs { d }] { | ann_assign_subscript_attribute_target) ':' b=expression c=['=' d=annotated_rhs { d }] {
_Py_AnnAssign(a, b, c, 0, EXTRA)} CHECK_VERSION(6, "Variable annotations syntax is", _Py_AnnAssign(a, b, c, 0, EXTRA)) }
| a=(z=star_targets '=' { z })+ b=(yield_expr | star_expressions) tc=[TYPE_COMMENT] { | a=(z=star_targets '=' { z })+ b=(yield_expr | star_expressions) tc=[TYPE_COMMENT] {
_Py_Assign(a, b, NEW_TYPE_COMMENT(p, tc), EXTRA) } _Py_Assign(a, b, NEW_TYPE_COMMENT(p, tc), EXTRA) }
| a=target b=augassign c=(yield_expr | star_expressions) { | a=target b=augassign c=(yield_expr | star_expressions) {
@ -91,19 +95,19 @@ assignment:
| invalid_assignment | invalid_assignment
augassign[AugOperator*]: augassign[AugOperator*]:
| '+=' {_PyPegen_augoperator(p, Add)} | '+=' { _PyPegen_augoperator(p, Add) }
| '-=' {_PyPegen_augoperator(p, Sub)} | '-=' { _PyPegen_augoperator(p, Sub) }
| '*=' {_PyPegen_augoperator(p, Mult)} | '*=' { _PyPegen_augoperator(p, Mult) }
| '@=' {_PyPegen_augoperator(p, MatMult)} | '@=' { CHECK_VERSION(5, "The '@' operator is", _PyPegen_augoperator(p, MatMult)) }
| '/=' {_PyPegen_augoperator(p, Div)} | '/=' { _PyPegen_augoperator(p, Div) }
| '%=' {_PyPegen_augoperator(p, Mod)} | '%=' { _PyPegen_augoperator(p, Mod) }
| '&=' {_PyPegen_augoperator(p, BitAnd)} | '&=' { _PyPegen_augoperator(p, BitAnd) }
| '|=' {_PyPegen_augoperator(p, BitOr)} | '|=' { _PyPegen_augoperator(p, BitOr) }
| '^=' {_PyPegen_augoperator(p, BitXor)} | '^=' { _PyPegen_augoperator(p, BitXor) }
| '<<=' {_PyPegen_augoperator(p, LShift)} | '<<=' { _PyPegen_augoperator(p, LShift) }
| '>>=' {_PyPegen_augoperator(p, RShift)} | '>>=' { _PyPegen_augoperator(p, RShift) }
| '**=' {_PyPegen_augoperator(p, Pow)} | '**=' { _PyPegen_augoperator(p, Pow) }
| '//=' {_PyPegen_augoperator(p, FloorDiv)} | '//=' { _PyPegen_augoperator(p, FloorDiv) }
global_stmt[stmt_ty]: 'global' a=','.NAME+ { global_stmt[stmt_ty]: 'global' a=','.NAME+ {
_Py_Global(CHECK(_PyPegen_map_names_to_ids(p, a)), EXTRA) } _Py_Global(CHECK(_PyPegen_map_names_to_ids(p, a)), EXTRA) }
@ -156,14 +160,20 @@ while_stmt[stmt_ty]:
| 'while' a=named_expression ':' b=block c=[else_block] { _Py_While(a, b, c, EXTRA) } | 'while' a=named_expression ':' b=block c=[else_block] { _Py_While(a, b, c, EXTRA) }
for_stmt[stmt_ty]: for_stmt[stmt_ty]:
| is_async=[ASYNC] 'for' t=star_targets 'in' ex=star_expressions ':' tc=[TYPE_COMMENT] b=block el=[else_block] { | 'for' t=star_targets 'in' ex=star_expressions ':' tc=[TYPE_COMMENT] b=block el=[else_block] {
(is_async ? _Py_AsyncFor : _Py_For)(t, ex, b, el, NEW_TYPE_COMMENT(p, tc), EXTRA) } _Py_For(t, ex, b, el, NEW_TYPE_COMMENT(p, tc), EXTRA) }
| ASYNC 'for' t=star_targets 'in' ex=star_expressions ':' tc=[TYPE_COMMENT] b=block el=[else_block] {
CHECK_VERSION(5, "Async for loops are", _Py_AsyncFor(t, ex, b, el, NEW_TYPE_COMMENT(p, tc), EXTRA)) }
with_stmt[stmt_ty]: with_stmt[stmt_ty]:
| is_async=[ASYNC] 'with' '(' a=','.with_item+ ')' ':' b=block { | 'with' '(' a=','.with_item+ ')' ':' b=block {
(is_async ? _Py_AsyncWith : _Py_With)(a, b, NULL, EXTRA) } _Py_With(a, b, NULL, EXTRA) }
| is_async=[ASYNC] 'with' a=','.with_item+ ':' tc=[TYPE_COMMENT] b=block { | 'with' a=','.with_item+ ':' tc=[TYPE_COMMENT] b=block {
(is_async ? _Py_AsyncWith : _Py_With)(a, b, NEW_TYPE_COMMENT(p, tc), EXTRA) } _Py_With(a, b, NEW_TYPE_COMMENT(p, tc), EXTRA) }
| ASYNC 'with' '(' a=','.with_item+ ')' ':' b=block {
CHECK_VERSION(5, "Async with statements are", _Py_AsyncWith(a, b, NULL, EXTRA)) }
| ASYNC 'with' a=','.with_item+ ':' tc=[TYPE_COMMENT] b=block {
CHECK_VERSION(5, "Async with statements are", _Py_AsyncWith(a, b, NEW_TYPE_COMMENT(p, tc), EXTRA)) }
with_item[withitem_ty]: with_item[withitem_ty]:
| e=expression o=['as' t=target { t }] { _Py_withitem(e, o, p->arena) } | e=expression o=['as' t=target { t }] { _Py_withitem(e, o, p->arena) }
@ -188,10 +198,18 @@ function_def[stmt_ty]:
| function_def_raw | function_def_raw
function_def_raw[stmt_ty]: function_def_raw[stmt_ty]:
| is_async=[ASYNC] 'def' n=NAME '(' params=[params] ')' a=['->' z=expression { z }] ':' tc=[func_type_comment] b=block { | 'def' n=NAME '(' params=[params] ')' a=['->' z=expression { z }] ':' tc=[func_type_comment] b=block {
(is_async ? _Py_AsyncFunctionDef : _Py_FunctionDef)(n->v.Name.id, _Py_FunctionDef(n->v.Name.id,
(params) ? params : CHECK(_PyPegen_empty_arguments(p)), (params) ? params : CHECK(_PyPegen_empty_arguments(p)),
b, NULL, a, NEW_TYPE_COMMENT(p, tc), EXTRA) } b, NULL, a, NEW_TYPE_COMMENT(p, tc), EXTRA) }
| ASYNC 'def' n=NAME '(' params=[params] ')' a=['->' z=expression { z }] ':' tc=[func_type_comment] b=block {
CHECK_VERSION(
5,
"Async functions are",
_Py_AsyncFunctionDef(n->v.Name.id,
(params) ? params : CHECK(_PyPegen_empty_arguments(p)),
b, NULL, a, NEW_TYPE_COMMENT(p, tc), EXTRA)
) }
func_type_comment[PyObject*]: func_type_comment[PyObject*]:
| NEWLINE t=TYPE_COMMENT &(NEWLINE INDENT) { t } # Must be followed by indented block | NEWLINE t=TYPE_COMMENT &(NEWLINE INDENT) { t } # Must be followed by indented block
| invalid_double_type_comments | invalid_double_type_comments
@ -399,7 +417,7 @@ term[expr_ty]:
| a=term '/' b=factor { _Py_BinOp(a, Div, b, EXTRA) } | a=term '/' b=factor { _Py_BinOp(a, Div, b, EXTRA) }
| a=term '//' b=factor { _Py_BinOp(a, FloorDiv, b, EXTRA) } | a=term '//' b=factor { _Py_BinOp(a, FloorDiv, b, EXTRA) }
| a=term '%' b=factor { _Py_BinOp(a, Mod, b, EXTRA) } | a=term '%' b=factor { _Py_BinOp(a, Mod, b, EXTRA) }
| a=term '@' b=factor { _Py_BinOp(a, MatMult, b, EXTRA) } | a=term '@' b=factor { CHECK_VERSION(5, "The '@' operator is", _Py_BinOp(a, MatMult, b, EXTRA)) }
| factor | factor
factor[expr_ty] (memo): factor[expr_ty] (memo):
| '+' a=factor { _Py_UnaryOp(UAdd, a, EXTRA) } | '+' a=factor { _Py_UnaryOp(UAdd, a, EXTRA) }
@ -410,7 +428,7 @@ power[expr_ty]:
| a=await_primary '**' b=factor { _Py_BinOp(a, Pow, b, EXTRA) } | a=await_primary '**' b=factor { _Py_BinOp(a, Pow, b, EXTRA) }
| await_primary | await_primary
await_primary[expr_ty] (memo): await_primary[expr_ty] (memo):
| AWAIT a=primary { _Py_Await(a, EXTRA) } | AWAIT a=primary { CHECK_VERSION(5, "Await expressions are", _Py_Await(a, EXTRA)) }
| primary | primary
primary[expr_ty]: primary[expr_ty]:
| a=primary '.' b=NAME { _Py_Attribute(a, b->v.Name.id, Load, EXTRA) } | a=primary '.' b=NAME { _Py_Attribute(a, b->v.Name.id, Load, EXTRA) }
@ -469,8 +487,12 @@ kvpair[KeyValuePair*]:
| '**' a=bitwise_or { _PyPegen_key_value_pair(p, NULL, a) } | '**' a=bitwise_or { _PyPegen_key_value_pair(p, NULL, a) }
| a=expression ':' b=expression { _PyPegen_key_value_pair(p, a, b) } | a=expression ':' b=expression { _PyPegen_key_value_pair(p, a, b) }
for_if_clauses[asdl_seq*]: for_if_clauses[asdl_seq*]:
| a=(y=[ASYNC] 'for' a=star_targets 'in' b=disjunction c=('if' z=disjunction { z })* | for_if_clause+
{ _Py_comprehension(a, b, c, y != NULL, p->arena) })+ { a } for_if_clause[comprehension_ty]:
| ASYNC 'for' a=star_targets 'in' b=disjunction c=('if' z=disjunction { z })* {
CHECK_VERSION(6, "Async comprehensions are", _Py_comprehension(a, b, c, 1, p->arena)) }
| 'for' a=star_targets 'in' b=disjunction c=('if' z=disjunction { z })* {
_Py_comprehension(a, b, c, 0, p->arena) }
yield_expr[expr_ty]: yield_expr[expr_ty]:
| 'yield' 'from' a=expression { _Py_YieldFrom(a, EXTRA) } | 'yield' 'from' a=expression { _Py_YieldFrom(a, EXTRA) }

View File

@ -252,7 +252,6 @@ class TypeCommentTests(unittest.TestCase):
self.assertEqual(tree.body[0].type_comment, None) self.assertEqual(tree.body[0].type_comment, None)
self.assertEqual(tree.body[1].type_comment, None) self.assertEqual(tree.body[1].type_comment, None)
@support.skip_if_new_parser("Pegen does not support feature_version yet")
def test_asyncdef(self): def test_asyncdef(self):
for tree in self.parse_all(asyncdef, minver=5): for tree in self.parse_all(asyncdef, minver=5):
self.assertEqual(tree.body[0].type_comment, "() -> int") self.assertEqual(tree.body[0].type_comment, "() -> int")
@ -261,27 +260,22 @@ class TypeCommentTests(unittest.TestCase):
self.assertEqual(tree.body[0].type_comment, None) self.assertEqual(tree.body[0].type_comment, None)
self.assertEqual(tree.body[1].type_comment, None) self.assertEqual(tree.body[1].type_comment, None)
@support.skip_if_new_parser("Pegen does not support feature_version yet")
def test_asyncvar(self): def test_asyncvar(self):
for tree in self.parse_all(asyncvar, maxver=6): for tree in self.parse_all(asyncvar, maxver=6):
pass pass
@support.skip_if_new_parser("Pegen does not support feature_version yet")
def test_asynccomp(self): def test_asynccomp(self):
for tree in self.parse_all(asynccomp, minver=6): for tree in self.parse_all(asynccomp, minver=6):
pass pass
@support.skip_if_new_parser("Pegen does not support feature_version yet")
def test_matmul(self): def test_matmul(self):
for tree in self.parse_all(matmul, minver=5): for tree in self.parse_all(matmul, minver=5):
pass pass
@support.skip_if_new_parser("Pegen does not support feature_version yet")
def test_fstring(self): def test_fstring(self):
for tree in self.parse_all(fstring, minver=6): for tree in self.parse_all(fstring, minver=6):
pass pass
@support.skip_if_new_parser("Pegen does not support feature_version yet")
def test_underscorednumber(self): def test_underscorednumber(self):
for tree in self.parse_all(underscorednumber, minver=6): for tree in self.parse_all(underscorednumber, minver=6):
pass pass

File diff suppressed because it is too large Load Diff

View File

@ -179,6 +179,13 @@ _PyPegen_parsestr(Parser *p, const char *s, int *bytesmode, int *rawmode, PyObje
} }
} }
/* fstrings are only allowed in Python 3.6 and greater */
if (fmode && p->feature_version < 6) {
p->error_indicator = 1;
RAISE_SYNTAX_ERROR("Format strings are only supported in Python 3.6 and greater");
return -1;
}
if (fmode && *bytesmode) { if (fmode && *bytesmode) {
PyErr_BadInternalCall(); PyErr_BadInternalCall();
return -1; return -1;
@ -595,7 +602,8 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
return NULL; return NULL;
} }
Parser *p2 = _PyPegen_Parser_New(tok, Py_fstring_input, p->flags, NULL, p->arena); Parser *p2 = _PyPegen_Parser_New(tok, Py_fstring_input, p->flags, p->feature_version,
NULL, p->arena);
p2->starting_lineno = p->starting_lineno + p->tok->first_lineno - 1; p2->starting_lineno = p->starting_lineno + p->tok->first_lineno - 1;
p2->starting_col_offset = p->tok->first_lineno == p->tok->lineno p2->starting_col_offset = p->tok->first_lineno == p->tok->lineno
? p->starting_col_offset + t->col_offset : 0; ? p->starting_col_offset + t->col_offset : 0;

View File

@ -933,11 +933,16 @@ _PyPegen_number_token(Parser *p)
} }
char *num_raw = PyBytes_AsString(t->bytes); char *num_raw = PyBytes_AsString(t->bytes);
if (num_raw == NULL) { if (num_raw == NULL) {
return NULL; return NULL;
} }
if (p->feature_version < 6 && strchr(num_raw, '_') != NULL) {
p->error_indicator = 1;
return RAISE_SYNTAX_ERROR("Underscores in numeric literals are only supported"
"in Python 3.6 and greater");
}
PyObject *c = parsenumber(num_raw); PyObject *c = parsenumber(num_raw);
if (c == NULL) { if (c == NULL) {
@ -1030,12 +1035,15 @@ compute_parser_flags(PyCompilerFlags *flags)
if (flags->cf_flags & PyCF_TYPE_COMMENTS) { if (flags->cf_flags & PyCF_TYPE_COMMENTS) {
parser_flags |= PyPARSE_TYPE_COMMENTS; parser_flags |= PyPARSE_TYPE_COMMENTS;
} }
if (flags->cf_feature_version < 7) {
parser_flags |= PyPARSE_ASYNC_HACKS;
}
return parser_flags; return parser_flags;
} }
Parser * Parser *
_PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags, _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
int *errcode, PyArena *arena) int feature_version, int *errcode, PyArena *arena)
{ {
Parser *p = PyMem_Malloc(sizeof(Parser)); Parser *p = PyMem_Malloc(sizeof(Parser));
if (p == NULL) { if (p == NULL) {
@ -1077,6 +1085,7 @@ _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
p->starting_lineno = 0; p->starting_lineno = 0;
p->starting_col_offset = 0; p->starting_col_offset = 0;
p->flags = flags; p->flags = flags;
p->feature_version = feature_version;
return p; return p;
} }
@ -1138,7 +1147,8 @@ _PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filena
mod_ty result = NULL; mod_ty result = NULL;
int parser_flags = compute_parser_flags(flags); int parser_flags = compute_parser_flags(flags);
Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, errcode, arena); Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, PY_MINOR_VERSION,
errcode, arena);
if (p == NULL) { if (p == NULL) {
goto error; goto error;
} }
@ -1194,9 +1204,12 @@ _PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filen
mod_ty result = NULL; mod_ty result = NULL;
int parser_flags = compute_parser_flags(flags); int parser_flags = compute_parser_flags(flags);
int feature_version = flags ? flags->cf_feature_version : PY_MINOR_VERSION;
tok->type_comments = (parser_flags & PyPARSE_TYPE_COMMENTS) > 0; tok->type_comments = (parser_flags & PyPARSE_TYPE_COMMENTS) > 0;
tok->async_hacks = (parser_flags & PyPARSE_ASYNC_HACKS) > 0;
Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, NULL, arena); Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, feature_version,
NULL, arena);
if (p == NULL) { if (p == NULL) {
goto error; goto error;
} }

View File

@ -69,6 +69,7 @@ typedef struct {
int starting_col_offset; int starting_col_offset;
int error_indicator; int error_indicator;
int flags; int flags;
int feature_version;
growable_comment_array type_ignore_comments; growable_comment_array type_ignore_comments;
} Parser; } Parser;
@ -180,9 +181,26 @@ NEW_TYPE_COMMENT(Parser *p, Token *tc)
return NULL; return NULL;
} }
Py_LOCAL_INLINE(void *)
INVALID_VERSION_CHECK(Parser *p, int version, char *msg, void *node)
{
if (node == NULL) {
p->error_indicator = 1; // Inline CHECK_CALL
return NULL;
}
if (p->feature_version < version) {
p->error_indicator = 1;
return _PyPegen_raise_error(p, PyExc_SyntaxError, "%s only supported in Python 3.%i and greater",
msg, version);
}
return node;
}
#define CHECK_VERSION(version, msg, node) INVALID_VERSION_CHECK(p, version, msg, node)
arg_ty _PyPegen_add_type_comment_to_arg(Parser *, arg_ty, Token *); arg_ty _PyPegen_add_type_comment_to_arg(Parser *, arg_ty, Token *);
PyObject *_PyPegen_new_identifier(Parser *, char *); PyObject *_PyPegen_new_identifier(Parser *, char *);
Parser *_PyPegen_Parser_New(struct tok_state *, int, int, int *, PyArena *); Parser *_PyPegen_Parser_New(struct tok_state *, int, int, int, int *, PyArena *);
void _PyPegen_Parser_Free(Parser *); void _PyPegen_Parser_Free(Parser *);
mod_ty _PyPegen_run_parser_from_file_pointer(FILE *, int, PyObject *, const char *, mod_ty _PyPegen_run_parser_from_file_pointer(FILE *, int, PyObject *, const char *,
const char *, const char *, PyCompilerFlags *, int *, PyArena *); const char *, const char *, PyCompilerFlags *, int *, PyArena *);