bpo-41690: Use a loop to collect args in the parser instead of recursion (GH-22053)

This program can segfault the parser by stack overflow:

```
import ast

code = "f(" + ",".join(['a' for _ in range(100000)]) + ")"
print("Ready!")
ast.parse(code)
```

the reason is that the rule for arguments has a simple recursion when collecting args:

args[expr_ty]:
    [...]
    | a=named_expression b=[',' c=args { c }] {
        [...] }
This commit is contained in:
Pablo Galindo 2020-09-02 17:44:19 +01:00 committed by GitHub
parent 3940333637
commit 4a97b1517a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 916 additions and 803 deletions

View File

@ -535,22 +535,11 @@ arguments[expr_ty] (memo):
| a=args [','] &')' { a }
| incorrect_arguments
args[expr_ty]:
| a=starred_expression b=[',' c=args { c }] {
_Py_Call(_PyPegen_dummy_name(p),
(b) ? CHECK(_PyPegen_seq_insert_in_front(p, a, ((expr_ty) b)->v.Call.args))
: CHECK(_PyPegen_singleton_seq(p, a)),
(b) ? ((expr_ty) b)->v.Call.keywords : NULL,
EXTRA) }
| a=','.(starred_expression | named_expression !'=')+ b=[',' k=kwargs {k}] { _PyPegen_collect_call_seqs(p, a, b) }
| a=kwargs { _Py_Call(_PyPegen_dummy_name(p),
CHECK_NULL_ALLOWED(_PyPegen_seq_extract_starred_exprs(p, a)),
CHECK_NULL_ALLOWED(_PyPegen_seq_delete_starred_exprs(p, a)),
EXTRA) }
| a=named_expression b=[',' c=args { c }] {
_Py_Call(_PyPegen_dummy_name(p),
(b) ? CHECK(_PyPegen_seq_insert_in_front(p, a, ((expr_ty) b)->v.Call.args))
: CHECK(_PyPegen_singleton_seq(p, a)),
(b) ? ((expr_ty) b)->v.Call.keywords : NULL,
EXTRA) }
kwargs[asdl_seq*]:
| a=','.kwarg_or_starred+ ',' b=','.kwarg_or_double_starred+ { _PyPegen_join_sequences(p, a, b) }
| ','.kwarg_or_starred+

View File

@ -0,0 +1,2 @@
Fix a possible stack overflow in the parser when parsing functions and
classes with a huge ammount of arguments. Patch by Pablo Galindo.

File diff suppressed because it is too large Load Diff

View File

@ -2217,3 +2217,38 @@ _PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args)
"Generator expression must be parenthesized"
);
}
expr_ty _PyPegen_collect_call_seqs(Parser *p, asdl_seq *a, asdl_seq *b) {
Py_ssize_t args_len = asdl_seq_LEN(a);
Py_ssize_t total_len = args_len;
if (b == NULL) {
expr_ty first = asdl_seq_GET(a, 0);
expr_ty last = asdl_seq_GET(a, args_len - 1);
return _Py_Call(_PyPegen_dummy_name(p), a, NULL, EXTRA_EXPR(first, last));
}
asdl_seq *starreds = _PyPegen_seq_extract_starred_exprs(p, b);
asdl_seq *keywords = _PyPegen_seq_delete_starred_exprs(p, b);
if (starreds) {
total_len += asdl_seq_LEN(starreds);
}
asdl_seq *args = _Py_asdl_seq_new(total_len, p->arena);
Py_ssize_t i = 0;
for (i = 0; i < args_len; i++) {
asdl_seq_SET(args, i, asdl_seq_GET(a, i));
}
for (; i < total_len; i++) {
asdl_seq_SET(args, i, asdl_seq_GET(starreds, i - args_len));
}
expr_ty first = asdl_seq_GET(args, 0);
expr_ty last = asdl_seq_GET(b, asdl_seq_LEN(b)-1);
return _Py_Call(_PyPegen_dummy_name(p), args, keywords, EXTRA_EXPR(first, last));
}

View File

@ -257,6 +257,7 @@ stmt_ty _PyPegen_class_def_decorators(Parser *, asdl_seq *, stmt_ty);
KeywordOrStarred *_PyPegen_keyword_or_starred(Parser *, void *, int);
asdl_seq *_PyPegen_seq_extract_starred_exprs(Parser *, asdl_seq *);
asdl_seq *_PyPegen_seq_delete_starred_exprs(Parser *, asdl_seq *);
expr_ty _PyPegen_collect_call_seqs(Parser *, asdl_seq *, asdl_seq *);
expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_seq *);
asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *);
int _PyPegen_check_barry_as_flufl(Parser *);