bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503)
Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com>
This commit is contained in:
parent
a81849b031
commit
c5fc156852
|
@ -13,6 +13,7 @@ on:
|
|||
- '**/*.rst'
|
||||
pull_request:
|
||||
branches:
|
||||
- pegen
|
||||
- master
|
||||
- 3.8
|
||||
- 3.7
|
||||
|
@ -50,6 +51,22 @@ jobs:
|
|||
build_macos:
|
||||
name: 'macOS'
|
||||
runs-on: macos-latest
|
||||
env:
|
||||
PYTHONOLDPARSER: old
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- name: Configure CPython
|
||||
run: ./configure --with-pydebug --with-openssl=/usr/local/opt/openssl --prefix=/opt/python-dev
|
||||
- name: Build CPython
|
||||
run: make -j4
|
||||
- name: Display build info
|
||||
run: make pythoninfo
|
||||
- name: Tests
|
||||
run: make buildbottest TESTOPTS="-j4 -uall,-cpu"
|
||||
|
||||
build_macos_pegen:
|
||||
name: 'macOS - Pegen'
|
||||
runs-on: macos-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- name: Configure CPython
|
||||
|
@ -64,6 +81,34 @@ jobs:
|
|||
build_ubuntu:
|
||||
name: 'Ubuntu'
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
OPENSSL_VER: 1.1.1f
|
||||
PYTHONOLDPARSER: old
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- name: Install Dependencies
|
||||
run: sudo ./.github/workflows/posix-deps-apt.sh
|
||||
- name: 'Restore OpenSSL build'
|
||||
id: cache-openssl
|
||||
uses: actions/cache@v1
|
||||
with:
|
||||
path: ./multissl/openssl/${{ env.OPENSSL_VER }}
|
||||
key: ${{ runner.os }}-multissl-openssl-${{ env.OPENSSL_VER }}
|
||||
- name: Install OpenSSL
|
||||
if: steps.cache-openssl.outputs.cache-hit != 'true'
|
||||
run: python3 Tools/ssl/multissltests.py --steps=library --base-directory $PWD/multissl --openssl $OPENSSL_VER --system Linux
|
||||
- name: Configure CPython
|
||||
run: ./configure --with-pydebug --with-openssl=$PWD/multissl/openssl/$OPENSSL_VER
|
||||
- name: Build CPython
|
||||
run: make -j4
|
||||
- name: Display build info
|
||||
run: make pythoninfo
|
||||
- name: Tests
|
||||
run: xvfb-run make buildbottest TESTOPTS="-j4 -uall,-cpu"
|
||||
|
||||
build_ubuntu_pegen:
|
||||
name: 'Ubuntu - Pegen'
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
OPENSSL_VER: 1.1.1f
|
||||
steps:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
language: c
|
||||
dist: xenial
|
||||
dist: bionic
|
||||
|
||||
# To cache doc-building dependencies and C compiler output.
|
||||
cache:
|
||||
|
@ -22,6 +22,7 @@ env:
|
|||
branches:
|
||||
only:
|
||||
- master
|
||||
- pegen
|
||||
- /^\d\.\d+$/
|
||||
- buildbot-custom
|
||||
|
||||
|
@ -157,7 +158,9 @@ install:
|
|||
before_script:
|
||||
# -Og is much faster than -O0
|
||||
- CFLAGS="${CFLAGS} -Og" ./configure --with-pydebug
|
||||
- make -j4 regen-all
|
||||
- eval "$(pyenv init -)"
|
||||
- pyenv global 3.8
|
||||
- PYTHON_FOR_REGEN=python3.8 make -j4 regen-all
|
||||
- changes=`git status --porcelain`
|
||||
- |
|
||||
# Check for changes in regenerated files
|
||||
|
|
|
@ -426,6 +426,8 @@ Miscellaneous options
|
|||
defines the following possible values:
|
||||
|
||||
* ``-X faulthandler`` to enable :mod:`faulthandler`;
|
||||
* ``-X oldparser``: enable the traditional LL(1) parser. See also
|
||||
:envvar:`PYTHONOLDPARSER`.
|
||||
* ``-X showrefcount`` to output the total reference count and number of used
|
||||
memory blocks when the program finishes or after each statement in the
|
||||
interactive interpreter. This only works on debug builds.
|
||||
|
@ -574,6 +576,12 @@ conflict.
|
|||
:option:`-d` multiple times.
|
||||
|
||||
|
||||
.. envvar:: PYTHONOLDPARSER
|
||||
|
||||
If this is set it is equivalent to specifying the :option:`-X`
|
||||
``oldparser`` option.
|
||||
|
||||
|
||||
.. envvar:: PYTHONINSPECT
|
||||
|
||||
If this is set to a non-empty string it is equivalent to specifying the
|
||||
|
|
|
@ -0,0 +1,555 @@
|
|||
# Simplified grammar for Python
|
||||
|
||||
@bytecode True
|
||||
@trailer '''
|
||||
void *
|
||||
_PyPegen_parse(Parser *p)
|
||||
{
|
||||
// Initialize keywords
|
||||
p->keywords = reserved_keywords;
|
||||
p->n_keyword_lists = n_keyword_lists;
|
||||
|
||||
// Run parser
|
||||
void *result = NULL;
|
||||
if (p->start_rule == Py_file_input) {
|
||||
result = file_rule(p);
|
||||
} else if (p->start_rule == Py_single_input) {
|
||||
result = interactive_rule(p);
|
||||
} else if (p->start_rule == Py_eval_input) {
|
||||
result = eval_rule(p);
|
||||
} else if (p->start_rule == Py_fstring_input) {
|
||||
result = fstring_rule(p);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// The end
|
||||
'''
|
||||
file[mod_ty]: a=[statements] ENDMARKER { Module(a, NULL, p->arena) }
|
||||
interactive[mod_ty]: a=statement_newline { Interactive(a, p->arena) }
|
||||
eval[mod_ty]: a=expressions NEWLINE* ENDMARKER { Expression(a, p->arena) }
|
||||
fstring[expr_ty]: star_expressions
|
||||
|
||||
statements[asdl_seq*]: a=statement+ { _PyPegen_seq_flatten(p, a) }
|
||||
statement[asdl_seq*]: a=compound_stmt { _PyPegen_singleton_seq(p, a) } | simple_stmt
|
||||
statement_newline[asdl_seq*]:
|
||||
| a=compound_stmt NEWLINE { _PyPegen_singleton_seq(p, a) }
|
||||
| simple_stmt
|
||||
| NEWLINE { _PyPegen_singleton_seq(p, CHECK(_Py_Pass(EXTRA))) }
|
||||
| ENDMARKER { _PyPegen_interactive_exit(p) }
|
||||
simple_stmt[asdl_seq*]:
|
||||
| a=small_stmt !';' NEWLINE { _PyPegen_singleton_seq(p, a) } # Not needed, there for speedup
|
||||
| a=';'.small_stmt+ [';'] NEWLINE { a }
|
||||
# NOTE: assignment MUST precede expression, else parsing a simple assignment
|
||||
# will throw a SyntaxError.
|
||||
small_stmt[stmt_ty] (memo):
|
||||
| assignment
|
||||
| e=star_expressions { _Py_Expr(e, EXTRA) }
|
||||
| &'return' return_stmt
|
||||
| &('import' | 'from') import_stmt
|
||||
| &'raise' raise_stmt
|
||||
| 'pass' { _Py_Pass(EXTRA) }
|
||||
| &'del' del_stmt
|
||||
| &'yield' yield_stmt
|
||||
| &'assert' assert_stmt
|
||||
| 'break' { _Py_Break(EXTRA) }
|
||||
| 'continue' { _Py_Continue(EXTRA) }
|
||||
| &'global' global_stmt
|
||||
| &'nonlocal' nonlocal_stmt
|
||||
compound_stmt[stmt_ty]:
|
||||
| &('def' | '@' | ASYNC) function_def
|
||||
| &'if' if_stmt
|
||||
| &('class' | '@') class_def
|
||||
| &('with' | ASYNC) with_stmt
|
||||
| &('for' | ASYNC) for_stmt
|
||||
| &'try' try_stmt
|
||||
| &'while' while_stmt
|
||||
|
||||
# NOTE: annotated_rhs may start with 'yield'; yield_expr must start with 'yield'
|
||||
assignment:
|
||||
| a=NAME ':' b=expression c=['=' d=annotated_rhs { d }] {
|
||||
_Py_AnnAssign(CHECK(_PyPegen_set_expr_context(p, a, Store)), b, c, 1, EXTRA) }
|
||||
| a=('(' b=inside_paren_ann_assign_target ')' { b }
|
||||
| ann_assign_subscript_attribute_target) ':' b=expression c=['=' d=annotated_rhs { d }] {
|
||||
_Py_AnnAssign(a, b, c, 0, EXTRA)}
|
||||
| a=(z=star_targets '=' { z })+ b=(yield_expr | star_expressions) {
|
||||
_Py_Assign(a, b, NULL, EXTRA) }
|
||||
| a=target b=augassign c=(yield_expr | star_expressions) {
|
||||
_Py_AugAssign(a, b->kind, c, EXTRA) }
|
||||
| invalid_assignment
|
||||
|
||||
augassign[AugOperator*]:
|
||||
| '+=' {_PyPegen_augoperator(p, Add)}
|
||||
| '-=' {_PyPegen_augoperator(p, Sub)}
|
||||
| '*=' {_PyPegen_augoperator(p, Mult)}
|
||||
| '@=' {_PyPegen_augoperator(p, MatMult)}
|
||||
| '/=' {_PyPegen_augoperator(p, Div)}
|
||||
| '%=' {_PyPegen_augoperator(p, Mod)}
|
||||
| '&=' {_PyPegen_augoperator(p, BitAnd)}
|
||||
| '|=' {_PyPegen_augoperator(p, BitOr)}
|
||||
| '^=' {_PyPegen_augoperator(p, BitXor)}
|
||||
| '<<=' {_PyPegen_augoperator(p, LShift)}
|
||||
| '>>=' {_PyPegen_augoperator(p, RShift)}
|
||||
| '**=' {_PyPegen_augoperator(p, Pow)}
|
||||
| '//=' {_PyPegen_augoperator(p, FloorDiv)}
|
||||
|
||||
global_stmt[stmt_ty]: 'global' a=','.NAME+ {
|
||||
_Py_Global(CHECK(_PyPegen_map_names_to_ids(p, a)), EXTRA) }
|
||||
nonlocal_stmt[stmt_ty]: 'nonlocal' a=','.NAME+ {
|
||||
_Py_Nonlocal(CHECK(_PyPegen_map_names_to_ids(p, a)), EXTRA) }
|
||||
|
||||
yield_stmt[stmt_ty]: y=yield_expr { _Py_Expr(y, EXTRA) }
|
||||
|
||||
assert_stmt[stmt_ty]: 'assert' a=expression b=[',' z=expression { z }] { _Py_Assert(a, b, EXTRA) }
|
||||
|
||||
del_stmt[stmt_ty]: 'del' a=del_targets { _Py_Delete(a, EXTRA) }
|
||||
|
||||
import_stmt[stmt_ty]: import_name | import_from
|
||||
import_name[stmt_ty]: 'import' a=dotted_as_names { _Py_Import(a, EXTRA) }
|
||||
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
|
||||
import_from[stmt_ty]:
|
||||
| 'from' a=('.' | '...')* b=dotted_name 'import' c=import_from_targets {
|
||||
_Py_ImportFrom(b->v.Name.id, c, _PyPegen_seq_count_dots(a), EXTRA) }
|
||||
| 'from' a=('.' | '...')+ 'import' b=import_from_targets {
|
||||
_Py_ImportFrom(NULL, b, _PyPegen_seq_count_dots(a), EXTRA) }
|
||||
import_from_targets[asdl_seq*]:
|
||||
| '(' a=import_from_as_names [','] ')' { a }
|
||||
| import_from_as_names
|
||||
| '*' { _PyPegen_singleton_seq(p, CHECK(_PyPegen_alias_for_star(p))) }
|
||||
import_from_as_names[asdl_seq*]:
|
||||
| a=','.import_from_as_name+ { a }
|
||||
import_from_as_name[alias_ty]:
|
||||
| a=NAME b=['as' z=NAME { z }] { _Py_alias(a->v.Name.id,
|
||||
(b) ? ((expr_ty) b)->v.Name.id : NULL,
|
||||
p->arena) }
|
||||
dotted_as_names[asdl_seq*]:
|
||||
| a=','.dotted_as_name+ { a }
|
||||
dotted_as_name[alias_ty]:
|
||||
| a=dotted_name b=['as' z=NAME { z }] { _Py_alias(a->v.Name.id,
|
||||
(b) ? ((expr_ty) b)->v.Name.id : NULL,
|
||||
p->arena) }
|
||||
dotted_name[expr_ty]:
|
||||
| a=dotted_name '.' b=NAME { _PyPegen_join_names_with_dot(p, a, b) }
|
||||
| NAME
|
||||
|
||||
if_stmt[stmt_ty]:
|
||||
| 'if' a=named_expression ':' b=block c=elif_stmt { _Py_If(a, b, CHECK(_PyPegen_singleton_seq(p, c)), EXTRA) }
|
||||
| 'if' a=named_expression ':' b=block c=[else_block] { _Py_If(a, b, c, EXTRA) }
|
||||
elif_stmt[stmt_ty]:
|
||||
| 'elif' a=named_expression ':' b=block c=elif_stmt { _Py_If(a, b, CHECK(_PyPegen_singleton_seq(p, c)), EXTRA) }
|
||||
| 'elif' a=named_expression ':' b=block c=[else_block] { _Py_If(a, b, c, EXTRA) }
|
||||
else_block[asdl_seq*]: 'else' ':' b=block { b }
|
||||
|
||||
while_stmt[stmt_ty]:
|
||||
| 'while' a=named_expression ':' b=block c=[else_block] { _Py_While(a, b, c, EXTRA) }
|
||||
|
||||
for_stmt[stmt_ty]:
|
||||
| is_async=[ASYNC] 'for' t=star_targets 'in' ex=star_expressions ':' b=block el=[else_block] {
|
||||
(is_async ? _Py_AsyncFor : _Py_For)(t, ex, b, el, NULL, EXTRA) }
|
||||
|
||||
with_stmt[stmt_ty]:
|
||||
| is_async=[ASYNC] 'with' '(' a=','.with_item+ ')' ':' b=block {
|
||||
(is_async ? _Py_AsyncWith : _Py_With)(a, b, NULL, EXTRA) }
|
||||
| is_async=[ASYNC] 'with' a=','.with_item+ ':' b=block {
|
||||
(is_async ? _Py_AsyncWith : _Py_With)(a, b, NULL, EXTRA) }
|
||||
with_item[withitem_ty]:
|
||||
| e=expression o=['as' t=target { t }] { _Py_withitem(e, o, p->arena) }
|
||||
|
||||
try_stmt[stmt_ty]:
|
||||
| 'try' ':' b=block f=finally_block { _Py_Try(b, NULL, NULL, f, EXTRA) }
|
||||
| 'try' ':' b=block ex=except_block+ el=[else_block] f=[finally_block] { _Py_Try(b, ex, el, f, EXTRA) }
|
||||
except_block[excepthandler_ty]:
|
||||
| 'except' e=expression t=['as' z=target { z }] ':' b=block {
|
||||
_Py_ExceptHandler(e, (t) ? ((expr_ty) t)->v.Name.id : NULL, b, EXTRA) }
|
||||
| 'except' ':' b=block { _Py_ExceptHandler(NULL, NULL, b, EXTRA) }
|
||||
finally_block[asdl_seq*]: 'finally' ':' a=block { a }
|
||||
|
||||
return_stmt[stmt_ty]:
|
||||
| 'return' a=[star_expressions] { _Py_Return(a, EXTRA) }
|
||||
|
||||
raise_stmt[stmt_ty]:
|
||||
| 'raise' a=expression b=['from' z=expression { z }] { _Py_Raise(a, b, EXTRA) }
|
||||
| 'raise' { _Py_Raise(NULL, NULL, EXTRA) }
|
||||
|
||||
function_def[stmt_ty]:
|
||||
| d=decorators f=function_def_raw { _PyPegen_function_def_decorators(p, d, f) }
|
||||
| function_def_raw
|
||||
|
||||
function_def_raw[stmt_ty]:
|
||||
| is_async=[ASYNC] 'def' n=NAME '(' params=[params] ')' a=['->' z=annotation { z }] ':' b=block {
|
||||
(is_async ? _Py_AsyncFunctionDef : _Py_FunctionDef)(n->v.Name.id,
|
||||
(params) ? params : CHECK(_PyPegen_empty_arguments(p)),
|
||||
b, NULL, a, NULL, EXTRA) }
|
||||
|
||||
params[arguments_ty]:
|
||||
| invalid_parameters
|
||||
| parameters
|
||||
parameters[arguments_ty]:
|
||||
| a=slash_without_default b=[',' x=plain_names { x }] c=[',' y=names_with_default { y }] d=[',' z=[star_etc] { z }] {
|
||||
_PyPegen_make_arguments(p, a, NULL, b, c, d) }
|
||||
| a=slash_with_default b=[',' y=names_with_default { y }] c=[',' z=[star_etc] { z }] {
|
||||
_PyPegen_make_arguments(p, NULL, a, NULL, b, c) }
|
||||
| a=plain_names b=[',' y=names_with_default { y }] c=[',' z=[star_etc] { z }] {
|
||||
_PyPegen_make_arguments(p, NULL, NULL, a, b, c) }
|
||||
| a=names_with_default b=[',' z=[star_etc] { z }] { _PyPegen_make_arguments(p, NULL, NULL, NULL, a, b)}
|
||||
| a=star_etc { _PyPegen_make_arguments(p, NULL, NULL, NULL, NULL, a) }
|
||||
slash_without_default[asdl_seq*]: a=plain_names ',' '/' { a }
|
||||
slash_with_default[SlashWithDefault*]: a=[n=plain_names ',' { n }] b=names_with_default ',' '/' {
|
||||
_PyPegen_slash_with_default(p, a, b) }
|
||||
star_etc[StarEtc*]:
|
||||
| '*' a=plain_name b=name_with_optional_default* c=[',' d=kwds { d }] [','] {
|
||||
_PyPegen_star_etc(p, a, b, c) }
|
||||
| '*' b=name_with_optional_default+ c=[',' d=kwds { d }] [','] {
|
||||
_PyPegen_star_etc(p, NULL, b, c) }
|
||||
| a=kwds [','] { _PyPegen_star_etc(p, NULL, NULL, a) }
|
||||
name_with_optional_default[NameDefaultPair*]:
|
||||
| ',' a=plain_name b=['=' e=expression { e }] { _PyPegen_name_default_pair(p, a, b) }
|
||||
names_with_default[asdl_seq*]: a=','.name_with_default+ { a }
|
||||
name_with_default[NameDefaultPair*]:
|
||||
| n=plain_name '=' e=expression { _PyPegen_name_default_pair(p, n, e) }
|
||||
plain_names[asdl_seq*] (memo): a=','.(plain_name !'=')+ { a }
|
||||
plain_name[arg_ty]:
|
||||
| a=NAME b=[':' z=annotation { z }] { _Py_arg(a->v.Name.id, b, NULL, EXTRA) }
|
||||
kwds[arg_ty]:
|
||||
| '**' a=plain_name { a }
|
||||
annotation[expr_ty]: expression
|
||||
|
||||
decorators[asdl_seq*]: a=('@' f=named_expression NEWLINE { f })+ { a }
|
||||
|
||||
class_def[stmt_ty]:
|
||||
| a=decorators b=class_def_raw { _PyPegen_class_def_decorators(p, a, b) }
|
||||
| class_def_raw
|
||||
class_def_raw[stmt_ty]:
|
||||
| 'class' a=NAME b=['(' z=[arguments] ')' { z }] ':' c=block {
|
||||
_Py_ClassDef(a->v.Name.id,
|
||||
(b) ? ((expr_ty) b)->v.Call.args : NULL,
|
||||
(b) ? ((expr_ty) b)->v.Call.keywords : NULL,
|
||||
c, NULL, EXTRA) }
|
||||
|
||||
block[asdl_seq*] (memo):
|
||||
| NEWLINE INDENT a=statements DEDENT { a }
|
||||
| simple_stmt
|
||||
| invalid_block
|
||||
|
||||
expressions_list[asdl_seq*]: a=','.star_expression+ [','] { a }
|
||||
star_expressions[expr_ty]:
|
||||
| a=star_expression b=(',' c=star_expression { c })+ [','] {
|
||||
_Py_Tuple(CHECK(_PyPegen_seq_insert_in_front(p, a, b)), Load, EXTRA) }
|
||||
| a=star_expression ',' { _Py_Tuple(CHECK(_PyPegen_singleton_seq(p, a)), Load, EXTRA) }
|
||||
| star_expression
|
||||
star_expression[expr_ty] (memo):
|
||||
| '*' a=bitwise_or { _Py_Starred(a, Load, EXTRA) }
|
||||
| expression
|
||||
|
||||
star_named_expressions[asdl_seq*]: a=','.star_named_expression+ [','] { a }
|
||||
star_named_expression[expr_ty]:
|
||||
| '*' a=bitwise_or { _Py_Starred(a, Load, EXTRA) }
|
||||
| named_expression
|
||||
named_expression[expr_ty]:
|
||||
| a=NAME ':=' b=expression { _Py_NamedExpr(CHECK(_PyPegen_set_expr_context(p, a, Store)), b, EXTRA) }
|
||||
| expression !':='
|
||||
| invalid_named_expression
|
||||
|
||||
annotated_rhs[expr_ty]: yield_expr | star_expressions
|
||||
|
||||
expressions[expr_ty]:
|
||||
| a=expression b=(',' c=expression { c })+ [','] {
|
||||
_Py_Tuple(CHECK(_PyPegen_seq_insert_in_front(p, a, b)), Load, EXTRA) }
|
||||
| a=expression ',' { _Py_Tuple(CHECK(_PyPegen_singleton_seq(p, a)), Load, EXTRA) }
|
||||
| expression
|
||||
expression[expr_ty] (memo):
|
||||
| a=disjunction 'if' b=disjunction 'else' c=expression { _Py_IfExp(b, a, c, EXTRA) }
|
||||
| disjunction
|
||||
| lambdef
|
||||
|
||||
lambdef[expr_ty]:
|
||||
| 'lambda' a=[lambda_parameters] ':' b=expression { _Py_Lambda((a) ? a : CHECK(_PyPegen_empty_arguments(p)), b, EXTRA) }
|
||||
lambda_parameters[arguments_ty]:
|
||||
| a=lambda_slash_without_default b=[',' x=lambda_plain_names { x }] c=[',' y=lambda_names_with_default { y }] d=[',' z=[lambda_star_etc] { z }] {
|
||||
_PyPegen_make_arguments(p, a, NULL, b, c, d) }
|
||||
| a=lambda_slash_with_default b=[',' y=lambda_names_with_default { y }] c=[',' z=[lambda_star_etc] { z }] {
|
||||
_PyPegen_make_arguments(p, NULL, a, NULL, b, c) }
|
||||
| a=lambda_plain_names b=[',' y=lambda_names_with_default { y }] c=[',' z=[lambda_star_etc] { z }] {
|
||||
_PyPegen_make_arguments(p, NULL, NULL, a, b, c) }
|
||||
| a=lambda_names_with_default b=[',' z=[lambda_star_etc] { z }] { _PyPegen_make_arguments(p, NULL, NULL, NULL, a, b)}
|
||||
| a=lambda_star_etc { _PyPegen_make_arguments(p, NULL, NULL, NULL, NULL, a) }
|
||||
lambda_slash_without_default[asdl_seq*]: a=lambda_plain_names ',' '/' { a }
|
||||
lambda_slash_with_default[SlashWithDefault*]: a=[n=lambda_plain_names ',' { n }] b=lambda_names_with_default ',' '/' {
|
||||
_PyPegen_slash_with_default(p, a, b) }
|
||||
lambda_star_etc[StarEtc*]:
|
||||
| '*' a=lambda_plain_name b=lambda_name_with_optional_default* c=[',' d=lambda_kwds { d }] [','] {
|
||||
_PyPegen_star_etc(p, a, b, c) }
|
||||
| '*' b=lambda_name_with_optional_default+ c=[',' d=lambda_kwds { d }] [','] {
|
||||
_PyPegen_star_etc(p, NULL, b, c) }
|
||||
| a=lambda_kwds [','] { _PyPegen_star_etc(p, NULL, NULL, a) }
|
||||
lambda_name_with_optional_default[NameDefaultPair*]:
|
||||
| ',' a=lambda_plain_name b=['=' e=expression { e }] { _PyPegen_name_default_pair(p, a, b) }
|
||||
lambda_names_with_default[asdl_seq*]: a=','.lambda_name_with_default+ { a }
|
||||
lambda_name_with_default[NameDefaultPair*]:
|
||||
| n=lambda_plain_name '=' e=expression { _PyPegen_name_default_pair(p, n, e) }
|
||||
lambda_plain_names[asdl_seq*]: a=','.(lambda_plain_name !'=')+ { a }
|
||||
lambda_plain_name[arg_ty]: a=NAME { _Py_arg(a->v.Name.id, NULL, NULL, EXTRA) }
|
||||
lambda_kwds[arg_ty]: '**' a=lambda_plain_name { a }
|
||||
|
||||
disjunction[expr_ty] (memo):
|
||||
| a=conjunction b=('or' c=conjunction { c })+ { _Py_BoolOp(
|
||||
Or,
|
||||
CHECK(_PyPegen_seq_insert_in_front(p, a, b)),
|
||||
EXTRA) }
|
||||
| conjunction
|
||||
conjunction[expr_ty] (memo):
|
||||
| a=inversion b=('and' c=inversion { c })+ { _Py_BoolOp(
|
||||
And,
|
||||
CHECK(_PyPegen_seq_insert_in_front(p, a, b)),
|
||||
EXTRA) }
|
||||
| inversion
|
||||
inversion[expr_ty] (memo):
|
||||
| 'not' a=inversion { _Py_UnaryOp(Not, a, EXTRA) }
|
||||
| comparison
|
||||
comparison[expr_ty]:
|
||||
| a=bitwise_or b=compare_op_bitwise_or_pair+ {
|
||||
_Py_Compare(a, CHECK(_PyPegen_get_cmpops(p, b)), CHECK(_PyPegen_get_exprs(p, b)), EXTRA) }
|
||||
| bitwise_or
|
||||
compare_op_bitwise_or_pair[CmpopExprPair*]:
|
||||
| eq_bitwise_or
|
||||
| noteq_bitwise_or
|
||||
| lte_bitwise_or
|
||||
| lt_bitwise_or
|
||||
| gte_bitwise_or
|
||||
| gt_bitwise_or
|
||||
| notin_bitwise_or
|
||||
| in_bitwise_or
|
||||
| isnot_bitwise_or
|
||||
| is_bitwise_or
|
||||
eq_bitwise_or[CmpopExprPair*]: '==' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, Eq, a) }
|
||||
noteq_bitwise_or[CmpopExprPair*]: '!=' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, NotEq, a) }
|
||||
lte_bitwise_or[CmpopExprPair*]: '<=' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, LtE, a) }
|
||||
lt_bitwise_or[CmpopExprPair*]: '<' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, Lt, a) }
|
||||
gte_bitwise_or[CmpopExprPair*]: '>=' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, GtE, a) }
|
||||
gt_bitwise_or[CmpopExprPair*]: '>' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, Gt, a) }
|
||||
notin_bitwise_or[CmpopExprPair*]: 'not' 'in' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, NotIn, a) }
|
||||
in_bitwise_or[CmpopExprPair*]: 'in' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, In, a) }
|
||||
isnot_bitwise_or[CmpopExprPair*]: 'is' 'not' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, IsNot, a) }
|
||||
is_bitwise_or[CmpopExprPair*]: 'is' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, Is, a) }
|
||||
|
||||
bitwise_or[expr_ty]:
|
||||
| a=bitwise_or '|' b=bitwise_xor { _Py_BinOp(a, BitOr, b, EXTRA) }
|
||||
| bitwise_xor
|
||||
bitwise_xor[expr_ty]:
|
||||
| a=bitwise_xor '^' b=bitwise_and { _Py_BinOp(a, BitXor, b, EXTRA) }
|
||||
| bitwise_and
|
||||
bitwise_and[expr_ty]:
|
||||
| a=bitwise_and '&' b=shift_expr { _Py_BinOp(a, BitAnd, b, EXTRA) }
|
||||
| shift_expr
|
||||
shift_expr[expr_ty]:
|
||||
| a=shift_expr '<<' b=sum { _Py_BinOp(a, LShift, b, EXTRA) }
|
||||
| a=shift_expr '>>' b=sum { _Py_BinOp(a, RShift, b, EXTRA) }
|
||||
| sum
|
||||
|
||||
sum[expr_ty]:
|
||||
| a=sum '+' b=term { _Py_BinOp(a, Add, b, EXTRA) }
|
||||
| a=sum '-' b=term { _Py_BinOp(a, Sub, b, EXTRA) }
|
||||
| term
|
||||
term[expr_ty]:
|
||||
| a=term '*' b=factor { _Py_BinOp(a, Mult, b, EXTRA) }
|
||||
| a=term '/' b=factor { _Py_BinOp(a, Div, b, EXTRA) }
|
||||
| a=term '//' b=factor { _Py_BinOp(a, FloorDiv, b, EXTRA) }
|
||||
| a=term '%' b=factor { _Py_BinOp(a, Mod, b, EXTRA) }
|
||||
| a=term '@' b=factor { _Py_BinOp(a, MatMult, b, EXTRA) }
|
||||
| factor
|
||||
factor[expr_ty] (memo):
|
||||
| '+' a=factor { _Py_UnaryOp(UAdd, a, EXTRA) }
|
||||
| '-' a=factor { _Py_UnaryOp(USub, a, EXTRA) }
|
||||
| '~' a=factor { _Py_UnaryOp(Invert, a, EXTRA) }
|
||||
| power
|
||||
power[expr_ty]:
|
||||
| a=await_primary '**' b=factor { _Py_BinOp(a, Pow, b, EXTRA) }
|
||||
| await_primary
|
||||
await_primary[expr_ty] (memo):
|
||||
| AWAIT a=primary { _Py_Await(a, EXTRA) }
|
||||
| primary
|
||||
primary[expr_ty]:
|
||||
| a=primary '.' b=NAME { _Py_Attribute(a, b->v.Name.id, Load, EXTRA) }
|
||||
| a=primary b=genexp { _Py_Call(a, CHECK(_PyPegen_singleton_seq(p, b)), NULL, EXTRA) }
|
||||
| a=primary '(' b=[arguments] ')' {
|
||||
_Py_Call(a,
|
||||
(b) ? ((expr_ty) b)->v.Call.args : NULL,
|
||||
(b) ? ((expr_ty) b)->v.Call.keywords : NULL,
|
||||
EXTRA) }
|
||||
| a=primary '[' b=slices ']' { _Py_Subscript(a, b, Load, EXTRA) }
|
||||
| atom
|
||||
|
||||
slices[expr_ty]:
|
||||
| a=slice !',' { a }
|
||||
| a=','.slice+ [','] { _Py_Tuple(a, Load, EXTRA) }
|
||||
slice[expr_ty]:
|
||||
| a=[expression] ':' b=[expression] c=[':' d=[expression] { d }] { _Py_Slice(a, b, c, EXTRA) }
|
||||
| a=expression { a }
|
||||
atom[expr_ty]:
|
||||
| NAME
|
||||
| 'True' { _Py_Constant(Py_True, NULL, EXTRA) }
|
||||
| 'False' { _Py_Constant(Py_False, NULL, EXTRA) }
|
||||
| 'None' { _Py_Constant(Py_None, NULL, EXTRA) }
|
||||
| '__new_parser__' { RAISE_SYNTAX_ERROR("You found it!") }
|
||||
| &STRING strings
|
||||
| NUMBER
|
||||
| &'(' (tuple | group | genexp)
|
||||
| &'[' (list | listcomp)
|
||||
| &'{' (dict | set | dictcomp | setcomp)
|
||||
| '...' { _Py_Constant(Py_Ellipsis, NULL, EXTRA) }
|
||||
|
||||
strings[expr_ty] (memo): a=STRING+ { _PyPegen_concatenate_strings(p, a) }
|
||||
list[expr_ty]:
|
||||
| '[' a=[star_named_expressions] ']' { _Py_List(a, Load, EXTRA) }
|
||||
listcomp[expr_ty]:
|
||||
| '[' a=named_expression b=for_if_clauses ']' { _Py_ListComp(a, b, EXTRA) }
|
||||
| invalid_comprehension
|
||||
tuple[expr_ty]:
|
||||
| '(' a=[y=star_named_expression ',' z=[star_named_expressions] { _PyPegen_seq_insert_in_front(p, y, z) } ] ')' {
|
||||
_Py_Tuple(a, Load, EXTRA) }
|
||||
group[expr_ty]: '(' a=(yield_expr | named_expression) ')' { a }
|
||||
genexp[expr_ty]:
|
||||
| '(' a=expression b=for_if_clauses ')' { _Py_GeneratorExp(a, b, EXTRA) }
|
||||
| invalid_comprehension
|
||||
set[expr_ty]: '{' a=expressions_list '}' { _Py_Set(a, EXTRA) }
|
||||
setcomp[expr_ty]:
|
||||
| '{' a=expression b=for_if_clauses '}' { _Py_SetComp(a, b, EXTRA) }
|
||||
| invalid_comprehension
|
||||
dict[expr_ty]:
|
||||
| '{' a=[kvpairs] '}' { _Py_Dict(CHECK(_PyPegen_get_keys(p, a)),
|
||||
CHECK(_PyPegen_get_values(p, a)), EXTRA) }
|
||||
dictcomp[expr_ty]:
|
||||
| '{' a=kvpair b=for_if_clauses '}' { _Py_DictComp(a->key, a->value, b, EXTRA) }
|
||||
kvpairs[asdl_seq*]: a=','.kvpair+ [','] { a }
|
||||
kvpair[KeyValuePair*]:
|
||||
| '**' a=bitwise_or { _PyPegen_key_value_pair(p, NULL, a) }
|
||||
| a=expression ':' b=expression { _PyPegen_key_value_pair(p, a, b) }
|
||||
for_if_clauses[asdl_seq*]:
|
||||
| a=(y=[ASYNC] 'for' a=star_targets 'in' b=disjunction c=('if' z=disjunction { z })*
|
||||
{ _Py_comprehension(a, b, c, y != NULL, p->arena) })+ { a }
|
||||
|
||||
yield_expr[expr_ty]:
|
||||
| 'yield' 'from' a=expression { _Py_YieldFrom(a, EXTRA) }
|
||||
| 'yield' a=[star_expressions] { _Py_Yield(a, EXTRA) }
|
||||
|
||||
arguments[expr_ty] (memo):
|
||||
| a=args [','] &')' { a }
|
||||
| incorrect_arguments
|
||||
args[expr_ty]:
|
||||
| a=starred_expression b=[',' c=args { c }] {
|
||||
_Py_Call(_PyPegen_dummy_name(p),
|
||||
(b) ? CHECK(_PyPegen_seq_insert_in_front(p, a, ((expr_ty) b)->v.Call.args))
|
||||
: CHECK(_PyPegen_singleton_seq(p, a)),
|
||||
(b) ? ((expr_ty) b)->v.Call.keywords : NULL,
|
||||
EXTRA) }
|
||||
| a=kwargs { _Py_Call(_PyPegen_dummy_name(p),
|
||||
CHECK_NULL_ALLOWED(_PyPegen_seq_extract_starred_exprs(p, a)),
|
||||
CHECK_NULL_ALLOWED(_PyPegen_seq_delete_starred_exprs(p, a)),
|
||||
EXTRA) }
|
||||
| a=named_expression b=[',' c=args { c }] {
|
||||
_Py_Call(_PyPegen_dummy_name(p),
|
||||
(b) ? CHECK(_PyPegen_seq_insert_in_front(p, a, ((expr_ty) b)->v.Call.args))
|
||||
: CHECK(_PyPegen_singleton_seq(p, a)),
|
||||
(b) ? ((expr_ty) b)->v.Call.keywords : NULL,
|
||||
EXTRA) }
|
||||
kwargs[asdl_seq*]:
|
||||
| a=','.kwarg_or_starred+ ',' b=','.kwarg_or_double_starred+ { _PyPegen_join_sequences(p, a, b) }
|
||||
| ','.kwarg_or_starred+
|
||||
| ','.kwarg_or_double_starred+
|
||||
starred_expression[expr_ty]:
|
||||
| '*' a=expression { _Py_Starred(a, Load, EXTRA) }
|
||||
kwarg_or_starred[KeywordOrStarred*]:
|
||||
| a=NAME '=' b=expression {
|
||||
_PyPegen_keyword_or_starred(p, CHECK(_Py_keyword(a->v.Name.id, b, EXTRA)), 1) }
|
||||
| a=starred_expression { _PyPegen_keyword_or_starred(p, a, 0) }
|
||||
kwarg_or_double_starred[KeywordOrStarred*]:
|
||||
| a=NAME '=' b=expression {
|
||||
_PyPegen_keyword_or_starred(p, CHECK(_Py_keyword(a->v.Name.id, b, EXTRA)), 1) }
|
||||
| '**' a=expression { _PyPegen_keyword_or_starred(p, CHECK(_Py_keyword(NULL, a, EXTRA)), 1) }
|
||||
|
||||
# NOTE: star_targets may contain *bitwise_or, targets may not.
|
||||
star_targets[expr_ty]:
|
||||
| a=star_target !',' { a }
|
||||
| a=star_target b=(',' c=star_target { c })* [','] {
|
||||
_Py_Tuple(CHECK(_PyPegen_seq_insert_in_front(p, a, b)), Store, EXTRA) }
|
||||
star_targets_seq[asdl_seq*]: a=','.star_target+ [','] { a }
|
||||
star_target[expr_ty] (memo):
|
||||
| '*' a=(!'*' star_target) {
|
||||
_Py_Starred(CHECK(_PyPegen_set_expr_context(p, a, Store)), Store, EXTRA) }
|
||||
| a=t_primary '.' b=NAME !t_lookahead { _Py_Attribute(a, b->v.Name.id, Store, EXTRA) }
|
||||
| a=t_primary '[' b=slices ']' !t_lookahead { _Py_Subscript(a, b, Store, EXTRA) }
|
||||
| star_atom
|
||||
star_atom[expr_ty]:
|
||||
| a=NAME { _PyPegen_set_expr_context(p, a, Store) }
|
||||
| '(' a=star_target ')' { _PyPegen_set_expr_context(p, a, Store) }
|
||||
| '(' a=[star_targets_seq] ')' { _Py_Tuple(a, Store, EXTRA) }
|
||||
| '[' a=[star_targets_seq] ']' { _Py_List(a, Store, EXTRA) }
|
||||
|
||||
inside_paren_ann_assign_target[expr_ty]:
|
||||
| ann_assign_subscript_attribute_target
|
||||
| a=NAME { _PyPegen_set_expr_context(p, a, Store) }
|
||||
| '(' a=inside_paren_ann_assign_target ')' { a }
|
||||
|
||||
ann_assign_subscript_attribute_target[expr_ty]:
|
||||
| a=t_primary '.' b=NAME !t_lookahead { _Py_Attribute(a, b->v.Name.id, Store, EXTRA) }
|
||||
| a=t_primary '[' b=slices ']' !t_lookahead { _Py_Subscript(a, b, Store, EXTRA) }
|
||||
|
||||
del_targets[asdl_seq*]: a=','.del_target+ [','] { a }
|
||||
del_target[expr_ty] (memo):
|
||||
| a=t_primary '.' b=NAME !t_lookahead { _Py_Attribute(a, b->v.Name.id, Del, EXTRA) }
|
||||
| a=t_primary '[' b=slices ']' !t_lookahead { _Py_Subscript(a, b, Del, EXTRA) }
|
||||
| del_t_atom
|
||||
del_t_atom[expr_ty]:
|
||||
| a=NAME { _PyPegen_set_expr_context(p, a, Del) }
|
||||
| '(' a=del_target ')' { _PyPegen_set_expr_context(p, a, Del) }
|
||||
| '(' a=[del_targets] ')' { _Py_Tuple(a, Del, EXTRA) }
|
||||
| '[' a=[del_targets] ']' { _Py_List(a, Del, EXTRA) }
|
||||
|
||||
targets[asdl_seq*]: a=','.target+ [','] { a }
|
||||
target[expr_ty] (memo):
|
||||
| a=t_primary '.' b=NAME !t_lookahead { _Py_Attribute(a, b->v.Name.id, Store, EXTRA) }
|
||||
| a=t_primary '[' b=slices ']' !t_lookahead { _Py_Subscript(a, b, Store, EXTRA) }
|
||||
| t_atom
|
||||
t_primary[expr_ty]:
|
||||
| a=t_primary '.' b=NAME &t_lookahead { _Py_Attribute(a, b->v.Name.id, Load, EXTRA) }
|
||||
| a=t_primary '[' b=slices ']' &t_lookahead { _Py_Subscript(a, b, Load, EXTRA) }
|
||||
| a=t_primary b=genexp &t_lookahead { _Py_Call(a, CHECK(_PyPegen_singleton_seq(p, b)), NULL, EXTRA) }
|
||||
| a=t_primary '(' b=[arguments] ')' &t_lookahead {
|
||||
_Py_Call(a,
|
||||
(b) ? ((expr_ty) b)->v.Call.args : NULL,
|
||||
(b) ? ((expr_ty) b)->v.Call.keywords : NULL,
|
||||
EXTRA) }
|
||||
| a=atom &t_lookahead { a }
|
||||
t_lookahead: '(' | '[' | '.'
|
||||
t_atom[expr_ty]:
|
||||
| a=NAME { _PyPegen_set_expr_context(p, a, Store) }
|
||||
| '(' a=target ')' { _PyPegen_set_expr_context(p, a, Store) }
|
||||
| '(' b=[targets] ')' { _Py_Tuple(b, Store, EXTRA) }
|
||||
| '[' b=[targets] ']' { _Py_List(b, Store, EXTRA) }
|
||||
|
||||
|
||||
# From here on, there are rules for invalid syntax with specialised error messages
|
||||
incorrect_arguments:
|
||||
| args ',' '*' { RAISE_SYNTAX_ERROR("iterable argument unpacking follows keyword argument unpacking") }
|
||||
| expression for_if_clauses ',' [args | expression for_if_clauses] {
|
||||
RAISE_SYNTAX_ERROR("Generator expression must be parenthesized") }
|
||||
| a=args ',' args { _PyPegen_arguments_parsing_error(p, a) }
|
||||
invalid_named_expression:
|
||||
| a=expression ':=' expression {
|
||||
RAISE_SYNTAX_ERROR("cannot use assignment expressions with %s", _PyPegen_get_expr_name(a)) }
|
||||
invalid_assignment:
|
||||
| list ':' { RAISE_SYNTAX_ERROR("only single target (not list) can be annotated") }
|
||||
| tuple ':' { RAISE_SYNTAX_ERROR("only single target (not tuple) can be annotated") }
|
||||
| expression ':' expression ['=' annotated_rhs] {
|
||||
RAISE_SYNTAX_ERROR("illegal target for annotation") }
|
||||
| a=expression ('=' | augassign) (yield_expr | star_expressions) {
|
||||
RAISE_SYNTAX_ERROR("cannot assign to %s", _PyPegen_get_expr_name(a)) }
|
||||
invalid_block:
|
||||
| NEWLINE !INDENT { RAISE_INDENTATION_ERROR("expected an indented block") }
|
||||
invalid_comprehension:
|
||||
| ('[' | '(' | '{') '*' expression for_if_clauses {
|
||||
RAISE_SYNTAX_ERROR("iterable unpacking cannot be used in comprehension") }
|
||||
invalid_parameters:
|
||||
| [plain_names ','] (slash_with_default | names_with_default) ',' plain_names {
|
||||
RAISE_SYNTAX_ERROR("non-default argument follows default argument") }
|
|
@ -108,4 +108,7 @@ PyAPI_FUNC(int) _PyAST_Optimize(struct _mod *, PyArena *arena, _PyASTOptimizeSta
|
|||
#define Py_eval_input 258
|
||||
#define Py_func_type_input 345
|
||||
|
||||
/* This doesn't need to match anything */
|
||||
#define Py_fstring_input 800
|
||||
|
||||
#endif /* !Py_COMPILE_H */
|
||||
|
|
|
@ -147,6 +147,10 @@ typedef struct {
|
|||
Set to 1 by -X faulthandler and PYTHONFAULTHANDLER. -1 means unset. */
|
||||
int faulthandler;
|
||||
|
||||
/* Enable PEG parser?
|
||||
1 by default, set to 0 by -X oldparser and PYTHONOLDPARSER */
|
||||
int use_peg;
|
||||
|
||||
/* Enable tracemalloc?
|
||||
Set by -X tracemalloc=N and PYTHONTRACEMALLOC. -1 means unset */
|
||||
int tracemalloc;
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
#ifndef Py_LIMITED_API
|
||||
#ifndef Py_PEGENINTERFACE
|
||||
#define Py_PEGENINTERFACE
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "Python.h"
|
||||
#include "Python-ast.h"
|
||||
|
||||
PyAPI_FUNC(mod_ty) PyPegen_ASTFromFile(const char *filename, int mode, PyArena *arena);
|
||||
PyAPI_FUNC(mod_ty) PyPegen_ASTFromString(const char *str, int mode, PyCompilerFlags *flags,
|
||||
PyArena *arena);
|
||||
PyAPI_FUNC(mod_ty) PyPegen_ASTFromStringObject(const char *str, PyObject* filename, int mode,
|
||||
PyCompilerFlags *flags, PyArena *arena);
|
||||
PyAPI_FUNC(mod_ty) PyPegen_ASTFromFileObject(FILE *fp, PyObject *filename_ob,
|
||||
int mode, const char *enc, const char *ps1,
|
||||
const char *ps2, int *errcode, PyArena *arena);
|
||||
PyAPI_FUNC(PyCodeObject *) PyPegen_CodeObjectFromFile(const char *filename, int mode);
|
||||
PyAPI_FUNC(PyCodeObject *) PyPegen_CodeObjectFromString(const char *str, int mode,
|
||||
PyCompilerFlags *flags);
|
||||
PyAPI_FUNC(PyCodeObject *) PyPegen_CodeObjectFromFileObject(FILE *, PyObject *filename_ob,
|
||||
int mode, const char *enc,
|
||||
const char *ps1,
|
||||
const char *ps2,
|
||||
int *errcode);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* !Py_PEGENINTERFACE*/
|
||||
#endif /* !Py_LIMITED_API */
|
|
@ -599,7 +599,7 @@ class CmdLineTest(unittest.TestCase):
|
|||
exitcode, stdout, stderr = assert_python_failure(script_name)
|
||||
text = io.TextIOWrapper(io.BytesIO(stderr), 'ascii').read()
|
||||
# Confirm that the caret is located under the first 1 character
|
||||
self.assertIn("\n 1 + 1 = 2\n ^", text)
|
||||
self.assertIn("\n 1 + 1 = 2\n ^", text)
|
||||
|
||||
def test_syntaxerror_indented_caret_position(self):
|
||||
script = textwrap.dedent("""\
|
||||
|
@ -611,7 +611,7 @@ class CmdLineTest(unittest.TestCase):
|
|||
exitcode, stdout, stderr = assert_python_failure(script_name)
|
||||
text = io.TextIOWrapper(io.BytesIO(stderr), 'ascii').read()
|
||||
# Confirm that the caret is located under the first 1 character
|
||||
self.assertIn("\n 1 + 1 = 2\n ^", text)
|
||||
self.assertIn("\n 1 + 1 = 2\n ^", text)
|
||||
|
||||
# Try the same with a form feed at the start of the indented line
|
||||
script = (
|
||||
|
@ -622,7 +622,7 @@ class CmdLineTest(unittest.TestCase):
|
|||
exitcode, stdout, stderr = assert_python_failure(script_name)
|
||||
text = io.TextIOWrapper(io.BytesIO(stderr), "ascii").read()
|
||||
self.assertNotIn("\f", text)
|
||||
self.assertIn("\n 1 + 1 = 2\n ^", text)
|
||||
self.assertIn("\n 1 + 1 = 2\n ^", text)
|
||||
|
||||
def test_syntaxerror_multi_line_fstring(self):
|
||||
script = 'foo = f"""{}\nfoo"""\n'
|
||||
|
@ -632,14 +632,14 @@ class CmdLineTest(unittest.TestCase):
|
|||
self.assertEqual(
|
||||
stderr.splitlines()[-3:],
|
||||
[
|
||||
b' foo = f"""{}',
|
||||
b' ^',
|
||||
b' foo"""',
|
||||
b' ^',
|
||||
b'SyntaxError: f-string: empty expression not allowed',
|
||||
],
|
||||
)
|
||||
|
||||
def test_syntaxerror_invalid_escape_sequence_multi_line(self):
|
||||
script = 'foo = """\\q\n"""\n'
|
||||
script = 'foo = """\\q"""\n'
|
||||
with support.temp_dir() as script_dir:
|
||||
script_name = _make_test_script(script_dir, 'script', script)
|
||||
exitcode, stdout, stderr = assert_python_failure(
|
||||
|
@ -647,10 +647,9 @@ class CmdLineTest(unittest.TestCase):
|
|||
)
|
||||
self.assertEqual(
|
||||
stderr.splitlines()[-3:],
|
||||
[
|
||||
b' foo = """\\q',
|
||||
b' ^',
|
||||
b'SyntaxError: invalid escape sequence \\q',
|
||||
[ b' foo = """\\q"""',
|
||||
b' ^',
|
||||
b'SyntaxError: invalid escape sequence \\q'
|
||||
],
|
||||
)
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
Test cases for codeop.py
|
||||
Nick Mathewson
|
||||
"""
|
||||
import sys
|
||||
import unittest
|
||||
from test.support import is_jython
|
||||
|
||||
|
@ -9,7 +10,6 @@ from codeop import compile_command, PyCF_DONT_IMPLY_DEDENT
|
|||
import io
|
||||
|
||||
if is_jython:
|
||||
import sys
|
||||
|
||||
def unify_callables(d):
|
||||
for n,v in d.items():
|
||||
|
@ -122,6 +122,7 @@ class CodeopTests(unittest.TestCase):
|
|||
av("def f():\n pass\n#foo\n")
|
||||
av("@a.b.c\ndef f():\n pass\n")
|
||||
|
||||
@unittest.skipIf(sys.flags.use_peg, "Pegen does not support PyCF_DONT_INPLY_DEDENT yet")
|
||||
def test_incomplete(self):
|
||||
ai = self.assertIncomplete
|
||||
|
||||
|
|
|
@ -501,6 +501,7 @@ if 1:
|
|||
self.compile_single("if x:\n f(x)\nelse:\n g(x)")
|
||||
self.compile_single("class T:\n pass")
|
||||
|
||||
@unittest.skipIf(sys.flags.use_peg, 'Pegen does not disallow multiline single stmts')
|
||||
def test_bad_single_statement(self):
|
||||
self.assertInvalidSingle('1\n2')
|
||||
self.assertInvalidSingle('def f(): pass')
|
||||
|
|
|
@ -347,6 +347,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
|||
'isolated': 0,
|
||||
'use_environment': 1,
|
||||
'dev_mode': 0,
|
||||
'use_peg': 1,
|
||||
|
||||
'install_signal_handlers': 1,
|
||||
'use_hash_seed': 0,
|
||||
|
@ -728,6 +729,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
|||
'import_time': 1,
|
||||
'show_ref_count': 1,
|
||||
'malloc_stats': 1,
|
||||
'use_peg': 0,
|
||||
|
||||
'stdio_encoding': 'iso8859-1',
|
||||
'stdio_errors': 'replace',
|
||||
|
|
|
@ -26,6 +26,7 @@ class EOFTestCase(unittest.TestCase):
|
|||
else:
|
||||
raise support.TestFailed
|
||||
|
||||
@unittest.skipIf(sys.flags.use_peg, "TODO for PEG -- fails with new parser")
|
||||
def test_line_continuation_EOF(self):
|
||||
"""A continuation at the end of input must be an error; bpo2180."""
|
||||
expect = 'unexpected EOF while parsing (<string>, line 1)'
|
||||
|
@ -36,6 +37,7 @@ class EOFTestCase(unittest.TestCase):
|
|||
exec('\\')
|
||||
self.assertEqual(str(excinfo.exception), expect)
|
||||
|
||||
@unittest.skip("TODO for PEG -- fails even with old parser now")
|
||||
@unittest.skipIf(not sys.executable, "sys.executable required")
|
||||
def test_line_continuation_EOF_from_file_bpo2180(self):
|
||||
"""Ensure tok_nextc() does not add too many ending newlines."""
|
||||
|
|
|
@ -178,6 +178,7 @@ class ExceptionTests(unittest.TestCase):
|
|||
s = '''if True:\n print()\n\texec "mixed tabs and spaces"'''
|
||||
ckmsg(s, "inconsistent use of tabs and spaces in indentation", TabError)
|
||||
|
||||
@unittest.skipIf(sys.flags.use_peg, "Pegen column offsets might be different")
|
||||
def testSyntaxErrorOffset(self):
|
||||
def check(src, lineno, offset, encoding='utf-8'):
|
||||
with self.assertRaises(SyntaxError) as cm:
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
import __future__
|
||||
import unittest
|
||||
import sys
|
||||
|
||||
|
||||
@unittest.skipIf(sys.flags.use_peg, "Not supported by pegen yet")
|
||||
class FLUFLTests(unittest.TestCase):
|
||||
|
||||
def test_barry_as_bdfl(self):
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
import ast
|
||||
import types
|
||||
import decimal
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
a_global = 'global variable'
|
||||
|
@ -205,7 +206,8 @@ f'{a * f"-{x()}-"}'"""
|
|||
call = binop.right.values[1].value
|
||||
self.assertEqual(type(call), ast.Call)
|
||||
self.assertEqual(call.lineno, 3)
|
||||
self.assertEqual(call.col_offset, 11)
|
||||
if not sys.flags.use_peg:
|
||||
self.assertEqual(call.col_offset, 11)
|
||||
|
||||
def test_ast_line_numbers_duplicate_expression(self):
|
||||
"""Duplicate expression
|
||||
|
|
|
@ -1856,10 +1856,11 @@ Traceback (most recent call last):
|
|||
...
|
||||
SyntaxError: 'yield' outside function
|
||||
|
||||
>>> def f(): x = yield = y
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
SyntaxError: assignment to yield expression not possible
|
||||
# Pegen does not produce this error message yet
|
||||
# >>> def f(): x = yield = y
|
||||
# Traceback (most recent call last):
|
||||
# ...
|
||||
# SyntaxError: assignment to yield expression not possible
|
||||
|
||||
>>> def f(): (yield bar) = y
|
||||
Traceback (most recent call last):
|
||||
|
|
|
@ -8,6 +8,7 @@ import pickle
|
|||
import unittest
|
||||
import operator
|
||||
import struct
|
||||
import sys
|
||||
from test import support
|
||||
from test.support.script_helper import assert_python_failure
|
||||
from test.support.script_helper import assert_python_ok
|
||||
|
@ -899,9 +900,10 @@ class ParserStackLimitTestCase(unittest.TestCase):
|
|||
st = parser.expr(e)
|
||||
st.compile()
|
||||
|
||||
@unittest.skipIf(sys.flags.use_peg, "Pegen does not trigger memory error with this many parenthesis")
|
||||
def test_trigger_memory_error(self):
|
||||
e = self._nested_expression(100)
|
||||
rc, out, err = assert_python_failure('-c', e)
|
||||
rc, out, err = assert_python_failure('-Xoldparser', '-c', e)
|
||||
# parsing the expression will result in an error message
|
||||
# followed by a MemoryError (see #11963)
|
||||
self.assertIn(b's_push: parser stack overflow', err)
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
import os
|
||||
|
||||
from test.support import load_package_tests
|
||||
|
||||
# Load all tests in package
|
||||
def load_tests(*args):
|
||||
return load_package_tests(os.path.dirname(__file__), *args)
|
|
@ -0,0 +1,4 @@
|
|||
import unittest
|
||||
from . import load_tests
|
||||
|
||||
unittest.main()
|
|
@ -0,0 +1,62 @@
|
|||
"""
|
||||
Copy-parse of ast.dump, removing the `isinstance` checks. This is needed,
|
||||
because testing pegen requires generating a C extension module, which contains
|
||||
a copy of the symbols defined in Python-ast.c. Thus, the isinstance check would
|
||||
always fail. We rely on string comparison of the base classes instead.
|
||||
TODO: Remove the above-described hack.
|
||||
"""
|
||||
|
||||
def ast_dump(node, annotate_fields=True, include_attributes=False, *, indent=None):
|
||||
def _format(node, level=0):
|
||||
if indent is not None:
|
||||
level += 1
|
||||
prefix = '\n' + indent * level
|
||||
sep = ',\n' + indent * level
|
||||
else:
|
||||
prefix = ''
|
||||
sep = ', '
|
||||
if any(cls.__name__ == 'AST' for cls in node.__class__.__mro__):
|
||||
cls = type(node)
|
||||
args = []
|
||||
allsimple = True
|
||||
keywords = annotate_fields
|
||||
for name in node._fields:
|
||||
try:
|
||||
value = getattr(node, name)
|
||||
except AttributeError:
|
||||
keywords = True
|
||||
continue
|
||||
if value is None and getattr(cls, name, ...) is None:
|
||||
keywords = True
|
||||
continue
|
||||
value, simple = _format(value, level)
|
||||
allsimple = allsimple and simple
|
||||
if keywords:
|
||||
args.append('%s=%s' % (name, value))
|
||||
else:
|
||||
args.append(value)
|
||||
if include_attributes and node._attributes:
|
||||
for name in node._attributes:
|
||||
try:
|
||||
value = getattr(node, name)
|
||||
except AttributeError:
|
||||
continue
|
||||
if value is None and getattr(cls, name, ...) is None:
|
||||
continue
|
||||
value, simple = _format(value, level)
|
||||
allsimple = allsimple and simple
|
||||
args.append('%s=%s' % (name, value))
|
||||
if allsimple and len(args) <= 3:
|
||||
return '%s(%s)' % (node.__class__.__name__, ', '.join(args)), not args
|
||||
return '%s(%s%s)' % (node.__class__.__name__, prefix, sep.join(args)), False
|
||||
elif isinstance(node, list):
|
||||
if not node:
|
||||
return '[]', True
|
||||
return '[%s%s]' % (prefix, sep.join(_format(x, level)[0] for x in node)), False
|
||||
return repr(node), True
|
||||
|
||||
if all(cls.__name__ != 'AST' for cls in node.__class__.__mro__):
|
||||
raise TypeError('expected AST, got %r' % node.__class__.__name__)
|
||||
if indent is not None and not isinstance(indent, str):
|
||||
indent = ' ' * indent
|
||||
return _format(node)[0]
|
|
@ -0,0 +1,333 @@
|
|||
import ast
|
||||
import contextlib
|
||||
import traceback
|
||||
import tempfile
|
||||
import shutil
|
||||
import unittest
|
||||
import sys
|
||||
|
||||
from test import test_tools
|
||||
from test.test_peg_generator.ast_dump import ast_dump
|
||||
from pathlib import PurePath, Path
|
||||
from typing import Sequence
|
||||
|
||||
test_tools.skip_if_missing('peg_generator')
|
||||
with test_tools.imports_under_tool('peg_generator'):
|
||||
from pegen.grammar_parser import GeneratedParser as GrammarParser
|
||||
from pegen.testutil import (
|
||||
parse_string,
|
||||
generate_parser_c_extension,
|
||||
generate_c_parser_source,
|
||||
)
|
||||
|
||||
|
||||
class TestCParser(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.tmp_path = tempfile.mkdtemp()
|
||||
|
||||
def tearDown(self):
|
||||
with contextlib.suppress(PermissionError):
|
||||
shutil.rmtree(self.tmp_path)
|
||||
|
||||
def check_input_strings_for_grammar(
|
||||
self,
|
||||
source: str,
|
||||
tmp_path: PurePath,
|
||||
valid_cases: Sequence[str] = (),
|
||||
invalid_cases: Sequence[str] = (),
|
||||
) -> None:
|
||||
grammar = parse_string(source, GrammarParser)
|
||||
extension = generate_parser_c_extension(grammar, Path(tmp_path))
|
||||
|
||||
if valid_cases:
|
||||
for case in valid_cases:
|
||||
extension.parse_string(case, mode=0)
|
||||
|
||||
if invalid_cases:
|
||||
for case in invalid_cases:
|
||||
with self.assertRaises(SyntaxError):
|
||||
extension.parse_string(case, mode=0)
|
||||
|
||||
def verify_ast_generation(self, source: str, stmt: str, tmp_path: PurePath) -> None:
|
||||
grammar = parse_string(source, GrammarParser)
|
||||
extension = generate_parser_c_extension(grammar, Path(tmp_path))
|
||||
|
||||
expected_ast = ast.parse(stmt)
|
||||
actual_ast = extension.parse_string(stmt, mode=1)
|
||||
self.assertEqual(ast_dump(expected_ast), ast_dump(actual_ast))
|
||||
|
||||
def test_c_parser(self) -> None:
|
||||
grammar_source = """
|
||||
start[mod_ty]: a=stmt* $ { Module(a, NULL, p->arena) }
|
||||
stmt[stmt_ty]: a=expr_stmt { a }
|
||||
expr_stmt[stmt_ty]: a=expression NEWLINE { _Py_Expr(a, EXTRA) }
|
||||
expression[expr_ty]: ( l=expression '+' r=term { _Py_BinOp(l, Add, r, EXTRA) }
|
||||
| l=expression '-' r=term { _Py_BinOp(l, Sub, r, EXTRA) }
|
||||
| t=term { t }
|
||||
)
|
||||
term[expr_ty]: ( l=term '*' r=factor { _Py_BinOp(l, Mult, r, EXTRA) }
|
||||
| l=term '/' r=factor { _Py_BinOp(l, Div, r, EXTRA) }
|
||||
| f=factor { f }
|
||||
)
|
||||
factor[expr_ty]: ('(' e=expression ')' { e }
|
||||
| a=atom { a }
|
||||
)
|
||||
atom[expr_ty]: ( n=NAME { n }
|
||||
| n=NUMBER { n }
|
||||
| s=STRING { s }
|
||||
)
|
||||
"""
|
||||
grammar = parse_string(grammar_source, GrammarParser)
|
||||
extension = generate_parser_c_extension(grammar, Path(self.tmp_path))
|
||||
|
||||
expressions = [
|
||||
"4+5",
|
||||
"4-5",
|
||||
"4*5",
|
||||
"1+4*5",
|
||||
"1+4/5",
|
||||
"(1+1) + (1+1)",
|
||||
"(1+1) - (1+1)",
|
||||
"(1+1) * (1+1)",
|
||||
"(1+1) / (1+1)",
|
||||
]
|
||||
|
||||
for expr in expressions:
|
||||
the_ast = extension.parse_string(expr, mode=1)
|
||||
expected_ast = ast.parse(expr)
|
||||
self.assertEqual(ast_dump(the_ast), ast_dump(expected_ast))
|
||||
|
||||
def test_lookahead(self) -> None:
|
||||
grammar = """
|
||||
start: NAME &NAME expr NEWLINE? ENDMARKER
|
||||
expr: NAME | NUMBER
|
||||
"""
|
||||
valid_cases = ["foo bar"]
|
||||
invalid_cases = ["foo 34"]
|
||||
self.check_input_strings_for_grammar(grammar, self.tmp_path, valid_cases, invalid_cases)
|
||||
|
||||
def test_negative_lookahead(self) -> None:
|
||||
grammar = """
|
||||
start: NAME !NAME expr NEWLINE? ENDMARKER
|
||||
expr: NAME | NUMBER
|
||||
"""
|
||||
valid_cases = ["foo 34"]
|
||||
invalid_cases = ["foo bar"]
|
||||
self.check_input_strings_for_grammar(grammar, self.tmp_path, valid_cases, invalid_cases)
|
||||
|
||||
def test_cut(self) -> None:
|
||||
grammar = """
|
||||
start: X ~ Y Z | X Q S
|
||||
X: 'x'
|
||||
Y: 'y'
|
||||
Z: 'z'
|
||||
Q: 'q'
|
||||
S: 's'
|
||||
"""
|
||||
valid_cases = ["x y z"]
|
||||
invalid_cases = ["x q s"]
|
||||
self.check_input_strings_for_grammar(grammar, self.tmp_path, valid_cases, invalid_cases)
|
||||
|
||||
def test_gather(self) -> None:
|
||||
grammar = """
|
||||
start: ';'.pass_stmt+ NEWLINE
|
||||
pass_stmt: 'pass'
|
||||
"""
|
||||
valid_cases = ["pass", "pass; pass"]
|
||||
invalid_cases = ["pass;", "pass; pass;"]
|
||||
self.check_input_strings_for_grammar(grammar, self.tmp_path, valid_cases, invalid_cases)
|
||||
|
||||
def test_left_recursion(self) -> None:
|
||||
grammar = """
|
||||
start: expr NEWLINE
|
||||
expr: ('-' term | expr '+' term | term)
|
||||
term: NUMBER
|
||||
"""
|
||||
valid_cases = ["-34", "34", "34 + 12", "1 + 1 + 2 + 3"]
|
||||
self.check_input_strings_for_grammar(grammar, self.tmp_path, valid_cases)
|
||||
|
||||
def test_advanced_left_recursive(self) -> None:
|
||||
grammar = """
|
||||
start: NUMBER | sign start
|
||||
sign: ['-']
|
||||
"""
|
||||
valid_cases = ["23", "-34"]
|
||||
self.check_input_strings_for_grammar(grammar, self.tmp_path, valid_cases)
|
||||
|
||||
def test_mutually_left_recursive(self) -> None:
|
||||
grammar = """
|
||||
start: foo 'E'
|
||||
foo: bar 'A' | 'B'
|
||||
bar: foo 'C' | 'D'
|
||||
"""
|
||||
valid_cases = ["B E", "D A C A E"]
|
||||
self.check_input_strings_for_grammar(grammar, self.tmp_path, valid_cases)
|
||||
|
||||
def test_nasty_mutually_left_recursive(self) -> None:
|
||||
grammar = """
|
||||
start: target '='
|
||||
target: maybe '+' | NAME
|
||||
maybe: maybe '-' | target
|
||||
"""
|
||||
valid_cases = ["x ="]
|
||||
invalid_cases = ["x - + ="]
|
||||
self.check_input_strings_for_grammar(grammar, self.tmp_path, valid_cases, invalid_cases)
|
||||
|
||||
def test_return_stmt_noexpr_action(self) -> None:
|
||||
grammar = """
|
||||
start[mod_ty]: a=[statements] ENDMARKER { Module(a, NULL, p->arena) }
|
||||
statements[asdl_seq*]: a=statement+ { a }
|
||||
statement[stmt_ty]: simple_stmt
|
||||
simple_stmt[stmt_ty]: small_stmt
|
||||
small_stmt[stmt_ty]: return_stmt
|
||||
return_stmt[stmt_ty]: a='return' NEWLINE { _Py_Return(NULL, EXTRA) }
|
||||
"""
|
||||
stmt = "return"
|
||||
self.verify_ast_generation(grammar, stmt, self.tmp_path)
|
||||
|
||||
def test_gather_action_ast(self) -> None:
|
||||
grammar = """
|
||||
start[mod_ty]: a=';'.pass_stmt+ NEWLINE ENDMARKER { Module(a, NULL, p->arena) }
|
||||
pass_stmt[stmt_ty]: a='pass' { _Py_Pass(EXTRA)}
|
||||
"""
|
||||
stmt = "pass; pass"
|
||||
self.verify_ast_generation(grammar, stmt, self.tmp_path)
|
||||
|
||||
def test_pass_stmt_action(self) -> None:
|
||||
grammar = """
|
||||
start[mod_ty]: a=[statements] ENDMARKER { Module(a, NULL, p->arena) }
|
||||
statements[asdl_seq*]: a=statement+ { a }
|
||||
statement[stmt_ty]: simple_stmt
|
||||
simple_stmt[stmt_ty]: small_stmt
|
||||
small_stmt[stmt_ty]: pass_stmt
|
||||
pass_stmt[stmt_ty]: a='pass' NEWLINE { _Py_Pass(EXTRA) }
|
||||
"""
|
||||
stmt = "pass"
|
||||
self.verify_ast_generation(grammar, stmt, self.tmp_path)
|
||||
|
||||
def test_if_stmt_action(self) -> None:
|
||||
grammar = """
|
||||
start[mod_ty]: a=[statements] ENDMARKER { Module(a, NULL, p->arena) }
|
||||
statements[asdl_seq*]: a=statement+ { _PyPegen_seq_flatten(p, a) }
|
||||
statement[asdl_seq*]: a=compound_stmt { _PyPegen_singleton_seq(p, a) } | simple_stmt
|
||||
|
||||
simple_stmt[asdl_seq*]: a=small_stmt b=further_small_stmt* [';'] NEWLINE { _PyPegen_seq_insert_in_front(p, a, b) }
|
||||
further_small_stmt[stmt_ty]: ';' a=small_stmt { a }
|
||||
|
||||
block: simple_stmt | NEWLINE INDENT a=statements DEDENT { a }
|
||||
|
||||
compound_stmt: if_stmt
|
||||
|
||||
if_stmt: 'if' a=full_expression ':' b=block { _Py_If(a, b, NULL, EXTRA) }
|
||||
|
||||
small_stmt[stmt_ty]: pass_stmt
|
||||
|
||||
pass_stmt[stmt_ty]: a='pass' { _Py_Pass(EXTRA) }
|
||||
|
||||
full_expression: NAME
|
||||
"""
|
||||
stmt = "pass"
|
||||
self.verify_ast_generation(grammar, stmt, self.tmp_path)
|
||||
|
||||
def test_same_name_different_types(self) -> None:
|
||||
source = """
|
||||
start[mod_ty]: a=import_from+ NEWLINE ENDMARKER { Module(a, NULL, p->arena)}
|
||||
import_from[stmt_ty]: ( a='from' !'import' c=simple_name 'import' d=import_as_names_from {
|
||||
_Py_ImportFrom(c->v.Name.id, d, 0, EXTRA) }
|
||||
| a='from' '.' 'import' c=import_as_names_from {
|
||||
_Py_ImportFrom(NULL, c, 1, EXTRA) }
|
||||
)
|
||||
simple_name[expr_ty]: NAME
|
||||
import_as_names_from[asdl_seq*]: a=','.import_as_name_from+ { a }
|
||||
import_as_name_from[alias_ty]: a=NAME 'as' b=NAME { _Py_alias(((expr_ty) a)->v.Name.id, ((expr_ty) b)->v.Name.id, p->arena) }
|
||||
"""
|
||||
grammar = parse_string(source, GrammarParser)
|
||||
extension = generate_parser_c_extension(grammar, Path(self.tmp_path))
|
||||
|
||||
for stmt in ("from a import b as c", "from . import a as b"):
|
||||
expected_ast = ast.parse(stmt)
|
||||
actual_ast = extension.parse_string(stmt, mode=1)
|
||||
self.assertEqual(ast_dump(expected_ast), ast_dump(actual_ast))
|
||||
|
||||
def test_with_stmt_with_paren(self) -> None:
|
||||
grammar_source = """
|
||||
start[mod_ty]: a=[statements] ENDMARKER { Module(a, NULL, p->arena) }
|
||||
statements[asdl_seq*]: a=statement+ { _PyPegen_seq_flatten(p, a) }
|
||||
statement[asdl_seq*]: a=compound_stmt { _PyPegen_singleton_seq(p, a) }
|
||||
compound_stmt[stmt_ty]: with_stmt
|
||||
with_stmt[stmt_ty]: (
|
||||
a='with' '(' b=','.with_item+ ')' ':' c=block {
|
||||
_Py_With(b, _PyPegen_singleton_seq(p, c), NULL, EXTRA) }
|
||||
)
|
||||
with_item[withitem_ty]: (
|
||||
e=NAME o=['as' t=NAME { t }] { _Py_withitem(e, _PyPegen_set_expr_context(p, o, Store), p->arena) }
|
||||
)
|
||||
block[stmt_ty]: a=pass_stmt NEWLINE { a } | NEWLINE INDENT a=pass_stmt DEDENT { a }
|
||||
pass_stmt[stmt_ty]: a='pass' { _Py_Pass(EXTRA) }
|
||||
"""
|
||||
stmt = "with (\n a as b,\n c as d\n): pass"
|
||||
grammar = parse_string(grammar_source, GrammarParser)
|
||||
extension = generate_parser_c_extension(grammar, Path(self.tmp_path))
|
||||
the_ast = extension.parse_string(stmt, mode=1)
|
||||
self.assertTrue(ast_dump(the_ast).startswith(
|
||||
"Module(body=[With(items=[withitem(context_expr=Name(id='a', ctx=Load()), optional_vars=Name(id='b', ctx=Store())), "
|
||||
"withitem(context_expr=Name(id='c', ctx=Load()), optional_vars=Name(id='d', ctx=Store()))]"
|
||||
))
|
||||
|
||||
def test_ternary_operator(self) -> None:
|
||||
grammar_source = """
|
||||
start[mod_ty]: a=expr ENDMARKER { Module(a, NULL, p->arena) }
|
||||
expr[asdl_seq*]: a=listcomp NEWLINE { _PyPegen_singleton_seq(p, _Py_Expr(a, EXTRA)) }
|
||||
listcomp[expr_ty]: (
|
||||
a='[' b=NAME c=for_if_clauses d=']' { _Py_ListComp(b, c, EXTRA) }
|
||||
)
|
||||
for_if_clauses[asdl_seq*]: (
|
||||
a=(y=[ASYNC] 'for' a=NAME 'in' b=NAME c=('if' z=NAME { z })*
|
||||
{ _Py_comprehension(_Py_Name(((expr_ty) a)->v.Name.id, Store, EXTRA), b, c, (y == NULL) ? 0 : 1, p->arena) })+ { a }
|
||||
)
|
||||
"""
|
||||
stmt = "[i for i in a if b]"
|
||||
self.verify_ast_generation(grammar_source, stmt, self.tmp_path)
|
||||
|
||||
def test_syntax_error_for_string(self) -> None:
|
||||
grammar_source = """
|
||||
start: expr+ NEWLINE? ENDMARKER
|
||||
expr: NAME
|
||||
"""
|
||||
grammar = parse_string(grammar_source, GrammarParser)
|
||||
print(list(Path(self.tmp_path).iterdir()))
|
||||
extension = generate_parser_c_extension(grammar, Path(self.tmp_path))
|
||||
for text in ("a b 42 b a", "名 名 42 名 名"):
|
||||
try:
|
||||
extension.parse_string(text, mode=0)
|
||||
except SyntaxError as e:
|
||||
tb = traceback.format_exc()
|
||||
self.assertTrue('File "<string>", line 1' in tb)
|
||||
self.assertTrue(f"SyntaxError: invalid syntax" in tb)
|
||||
|
||||
def test_headers_and_trailer(self) -> None:
|
||||
grammar_source = """
|
||||
@header 'SOME HEADER'
|
||||
@subheader 'SOME SUBHEADER'
|
||||
@trailer 'SOME TRAILER'
|
||||
start: expr+ NEWLINE? ENDMARKER
|
||||
expr: x=NAME
|
||||
"""
|
||||
grammar = parse_string(grammar_source, GrammarParser)
|
||||
parser_source = generate_c_parser_source(grammar)
|
||||
|
||||
self.assertTrue("SOME HEADER" in parser_source)
|
||||
self.assertTrue("SOME SUBHEADER" in parser_source)
|
||||
self.assertTrue("SOME TRAILER" in parser_source)
|
||||
|
||||
|
||||
def test_error_in_rules(self) -> None:
|
||||
grammar_source = """
|
||||
start: expr+ NEWLINE? ENDMARKER
|
||||
expr: NAME {PyTuple_New(-1)}
|
||||
"""
|
||||
grammar = parse_string(grammar_source, GrammarParser)
|
||||
extension = generate_parser_c_extension(grammar, Path(self.tmp_path))
|
||||
# PyTuple_New raises SystemError if an invalid argument was passed.
|
||||
with self.assertRaises(SystemError):
|
||||
extension.parse_string("a", mode=0)
|
|
@ -0,0 +1,225 @@
|
|||
import unittest
|
||||
|
||||
from test import test_tools
|
||||
from typing import Dict, Set
|
||||
|
||||
test_tools.skip_if_missing('peg_generator')
|
||||
with test_tools.imports_under_tool('peg_generator'):
|
||||
from pegen.grammar_parser import GeneratedParser as GrammarParser
|
||||
from pegen.testutil import parse_string
|
||||
from pegen.first_sets import FirstSetCalculator
|
||||
from pegen.grammar import Grammar
|
||||
|
||||
|
||||
class TestFirstSets(unittest.TestCase):
|
||||
def calculate_first_sets(self, grammar_source: str) -> Dict[str, Set[str]]:
|
||||
grammar: Grammar = parse_string(grammar_source, GrammarParser)
|
||||
return FirstSetCalculator(grammar.rules).calculate()
|
||||
|
||||
def test_alternatives(self) -> None:
|
||||
grammar = """
|
||||
start: expr NEWLINE? ENDMARKER
|
||||
expr: A | B
|
||||
A: 'a' | '-'
|
||||
B: 'b' | '+'
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {
|
||||
"A": {"'a'", "'-'"},
|
||||
"B": {"'+'", "'b'"},
|
||||
"expr": {"'+'", "'a'", "'b'", "'-'"},
|
||||
"start": {"'+'", "'a'", "'b'", "'-'"},
|
||||
})
|
||||
|
||||
def test_optionals(self) -> None:
|
||||
grammar = """
|
||||
start: expr NEWLINE
|
||||
expr: ['a'] ['b'] 'c'
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {
|
||||
"expr": {"'c'", "'a'", "'b'"},
|
||||
"start": {"'c'", "'a'", "'b'"},
|
||||
})
|
||||
|
||||
def test_repeat_with_separator(self) -> None:
|
||||
grammar = """
|
||||
start: ','.thing+ NEWLINE
|
||||
thing: NUMBER
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
|
||||
|
||||
def test_optional_operator(self) -> None:
|
||||
grammar = """
|
||||
start: sum NEWLINE
|
||||
sum: (term)? 'b'
|
||||
term: NUMBER
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {
|
||||
"term": {"NUMBER"},
|
||||
"sum": {"NUMBER", "'b'"},
|
||||
"start": {"'b'", "NUMBER"},
|
||||
})
|
||||
|
||||
def test_optional_literal(self) -> None:
|
||||
grammar = """
|
||||
start: sum NEWLINE
|
||||
sum: '+' ? term
|
||||
term: NUMBER
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {
|
||||
"term": {"NUMBER"},
|
||||
"sum": {"'+'", "NUMBER"},
|
||||
"start": {"'+'", "NUMBER"},
|
||||
})
|
||||
|
||||
def test_optional_after(self) -> None:
|
||||
grammar = """
|
||||
start: term NEWLINE
|
||||
term: NUMBER ['+']
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"NUMBER"}})
|
||||
|
||||
def test_optional_before(self) -> None:
|
||||
grammar = """
|
||||
start: term NEWLINE
|
||||
term: ['+'] NUMBER
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}})
|
||||
|
||||
def test_repeat_0(self) -> None:
|
||||
grammar = """
|
||||
start: thing* "+" NEWLINE
|
||||
thing: NUMBER
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}})
|
||||
|
||||
def test_repeat_0_with_group(self) -> None:
|
||||
grammar = """
|
||||
start: ('+' '-')* term NEWLINE
|
||||
term: NUMBER
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}})
|
||||
|
||||
def test_repeat_1(self) -> None:
|
||||
grammar = """
|
||||
start: thing+ '-' NEWLINE
|
||||
thing: NUMBER
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
|
||||
|
||||
def test_repeat_1_with_group(self) -> None:
|
||||
grammar = """
|
||||
start: ('+' term)+ term NEWLINE
|
||||
term: NUMBER
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}})
|
||||
|
||||
def test_gather(self) -> None:
|
||||
grammar = """
|
||||
start: ','.thing+ NEWLINE
|
||||
thing: NUMBER
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
|
||||
|
||||
def test_positive_lookahead(self) -> None:
|
||||
grammar = """
|
||||
start: expr NEWLINE
|
||||
expr: &'a' opt
|
||||
opt: 'a' | 'b' | 'c'
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {
|
||||
"expr": {"'a'"},
|
||||
"start": {"'a'"},
|
||||
"opt": {"'b'", "'c'", "'a'"},
|
||||
})
|
||||
|
||||
def test_negative_lookahead(self) -> None:
|
||||
grammar = """
|
||||
start: expr NEWLINE
|
||||
expr: !'a' opt
|
||||
opt: 'a' | 'b' | 'c'
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {
|
||||
"opt": {"'b'", "'a'", "'c'"},
|
||||
"expr": {"'b'", "'c'"},
|
||||
"start": {"'b'", "'c'"},
|
||||
})
|
||||
|
||||
def test_left_recursion(self) -> None:
|
||||
grammar = """
|
||||
start: expr NEWLINE
|
||||
expr: ('-' term | expr '+' term | term)
|
||||
term: NUMBER
|
||||
foo: 'foo'
|
||||
bar: 'bar'
|
||||
baz: 'baz'
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {
|
||||
"expr": {"NUMBER", "'-'"},
|
||||
"term": {"NUMBER"},
|
||||
"start": {"NUMBER", "'-'"},
|
||||
"foo": {"'foo'"},
|
||||
"bar": {"'bar'"},
|
||||
"baz": {"'baz'"},
|
||||
})
|
||||
|
||||
def test_advance_left_recursion(self) -> None:
|
||||
grammar = """
|
||||
start: NUMBER | sign start
|
||||
sign: ['-']
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}})
|
||||
|
||||
def test_mutual_left_recursion(self) -> None:
|
||||
grammar = """
|
||||
start: foo 'E'
|
||||
foo: bar 'A' | 'B'
|
||||
bar: foo 'C' | 'D'
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {
|
||||
"foo": {"'D'", "'B'"},
|
||||
"bar": {"'D'"},
|
||||
"start": {"'D'", "'B'"},
|
||||
})
|
||||
|
||||
def test_nasty_left_recursion(self) -> None:
|
||||
# TODO: Validate this
|
||||
grammar = """
|
||||
start: target '='
|
||||
target: maybe '+' | NAME
|
||||
maybe: maybe '-' | target
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {"maybe": set(), "target": {"NAME"}, "start": {"NAME"}})
|
||||
|
||||
def test_nullable_rule(self) -> None:
|
||||
grammar = """
|
||||
start: sign thing $
|
||||
sign: ['-']
|
||||
thing: NUMBER
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {
|
||||
"sign": {"", "'-'"},
|
||||
"thing": {"NUMBER"},
|
||||
"start": {"NUMBER", "'-'"},
|
||||
})
|
||||
|
||||
def test_epsilon_production_in_start_rule(self) -> None:
|
||||
grammar = """
|
||||
start: ['-'] $
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}})
|
||||
|
||||
def test_multiple_nullable_rules(self) -> None:
|
||||
grammar = """
|
||||
start: sign thing other another $
|
||||
sign: ['-']
|
||||
thing: ['+']
|
||||
other: '*'
|
||||
another: '/'
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {
|
||||
"sign": {"", "'-'"},
|
||||
"thing": {"'+'", ""},
|
||||
"start": {"'+'", "'-'", "'*'"},
|
||||
"other": {"'*'"},
|
||||
"another": {"'/'"},
|
||||
})
|
|
@ -0,0 +1,728 @@
|
|||
import io
|
||||
import textwrap
|
||||
import unittest
|
||||
|
||||
from test import test_tools
|
||||
from typing import Dict, Any
|
||||
from tokenize import TokenInfo, NAME, NEWLINE, NUMBER, OP
|
||||
|
||||
test_tools.skip_if_missing('peg_generator')
|
||||
with test_tools.imports_under_tool('peg_generator'):
|
||||
from pegen.grammar_parser import GeneratedParser as GrammarParser
|
||||
from pegen.testutil import (
|
||||
parse_string,
|
||||
generate_parser,
|
||||
make_parser
|
||||
)
|
||||
from pegen.grammar import GrammarVisitor, GrammarError, Grammar
|
||||
from pegen.grammar_visualizer import ASTGrammarPrinter
|
||||
from pegen.parser import Parser
|
||||
from pegen.python_generator import PythonParserGenerator
|
||||
|
||||
|
||||
class TestPegen(unittest.TestCase):
|
||||
def test_parse_grammar(self) -> None:
|
||||
grammar_source = """
|
||||
start: sum NEWLINE
|
||||
sum: t1=term '+' t2=term { action } | term
|
||||
term: NUMBER
|
||||
"""
|
||||
expected = """
|
||||
start: sum NEWLINE
|
||||
sum: term '+' term | term
|
||||
term: NUMBER
|
||||
"""
|
||||
grammar: Grammar = parse_string(grammar_source, GrammarParser)
|
||||
rules = grammar.rules
|
||||
self.assertEqual(str(grammar), textwrap.dedent(expected).strip())
|
||||
# Check the str() and repr() of a few rules; AST nodes don't support ==.
|
||||
self.assertEqual(str(rules["start"]), "start: sum NEWLINE")
|
||||
self.assertEqual(str(rules["sum"]), "sum: term '+' term | term")
|
||||
expected_repr = "Rule('term', None, Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))"
|
||||
self.assertEqual(repr(rules["term"]), expected_repr)
|
||||
|
||||
def test_long_rule_str(self) -> None:
|
||||
grammar_source = """
|
||||
start: zero | one | one zero | one one | one zero zero | one zero one | one one zero | one one one
|
||||
"""
|
||||
expected = """
|
||||
start:
|
||||
| zero
|
||||
| one
|
||||
| one zero
|
||||
| one one
|
||||
| one zero zero
|
||||
| one zero one
|
||||
| one one zero
|
||||
| one one one
|
||||
"""
|
||||
grammar: Grammar = parse_string(grammar_source, GrammarParser)
|
||||
self.assertEqual(str(grammar.rules["start"]), textwrap.dedent(expected).strip())
|
||||
|
||||
def test_typed_rules(self) -> None:
|
||||
grammar = """
|
||||
start[int]: sum NEWLINE
|
||||
sum[int]: t1=term '+' t2=term { action } | term
|
||||
term[int]: NUMBER
|
||||
"""
|
||||
rules = parse_string(grammar, GrammarParser).rules
|
||||
# Check the str() and repr() of a few rules; AST nodes don't support ==.
|
||||
self.assertEqual(str(rules["start"]), "start: sum NEWLINE")
|
||||
self.assertEqual(str(rules["sum"]), "sum: term '+' term | term")
|
||||
self.assertEqual(
|
||||
repr(rules["term"]),
|
||||
"Rule('term', 'int', Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))"
|
||||
)
|
||||
|
||||
def test_repeat_with_separator_rules(self) -> None:
|
||||
grammar = """
|
||||
start: ','.thing+ NEWLINE
|
||||
thing: NUMBER
|
||||
"""
|
||||
rules = parse_string(grammar, GrammarParser).rules
|
||||
self.assertEqual(str(rules["start"]), "start: ','.thing+ NEWLINE")
|
||||
print(repr(rules["start"]))
|
||||
self.assertTrue(repr(rules["start"]).startswith(
|
||||
"Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'"
|
||||
))
|
||||
self.assertEqual(str(rules["thing"]), "thing: NUMBER")
|
||||
|
||||
def test_expr_grammar(self) -> None:
|
||||
grammar = """
|
||||
start: sum NEWLINE
|
||||
sum: term '+' term | term
|
||||
term: NUMBER
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
node = parse_string("42\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
[[TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n")]],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"),
|
||||
])
|
||||
|
||||
def test_optional_operator(self) -> None:
|
||||
grammar = """
|
||||
start: sum NEWLINE
|
||||
sum: term ('+' term)?
|
||||
term: NUMBER
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
node = parse_string("1+2\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
[
|
||||
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+2\n")],
|
||||
[
|
||||
TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+2\n"),
|
||||
[TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1+2\n")],
|
||||
],
|
||||
],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 3), end=(1, 4), line="1+2\n"),
|
||||
])
|
||||
node = parse_string("1\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
[[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
|
||||
])
|
||||
|
||||
def test_optional_literal(self) -> None:
|
||||
grammar = """
|
||||
start: sum NEWLINE
|
||||
sum: term '+' ?
|
||||
term: NUMBER
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
node = parse_string("1+\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
[
|
||||
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+\n")],
|
||||
TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+\n"),
|
||||
],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="1+\n"),
|
||||
])
|
||||
node = parse_string("1\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
[[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
|
||||
])
|
||||
|
||||
def test_alt_optional_operator(self) -> None:
|
||||
grammar = """
|
||||
start: sum NEWLINE
|
||||
sum: term ['+' term]
|
||||
term: NUMBER
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
node = parse_string("1 + 2\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
[
|
||||
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n")],
|
||||
[
|
||||
TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"),
|
||||
[TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n")],
|
||||
],
|
||||
],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"),
|
||||
])
|
||||
node = parse_string("1\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
[[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
|
||||
])
|
||||
|
||||
def test_repeat_0_simple(self) -> None:
|
||||
grammar = """
|
||||
start: thing thing* NEWLINE
|
||||
thing: NUMBER
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
node = parse_string("1 2 3\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n")],
|
||||
[
|
||||
[[TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n")]],
|
||||
[[TokenInfo(NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n")]],
|
||||
],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"),
|
||||
])
|
||||
node = parse_string("1\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")],
|
||||
[],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
|
||||
])
|
||||
|
||||
def test_repeat_0_complex(self) -> None:
|
||||
grammar = """
|
||||
start: term ('+' term)* NEWLINE
|
||||
term: NUMBER
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
node = parse_string("1 + 2 + 3\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")],
|
||||
[
|
||||
[
|
||||
[
|
||||
TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"),
|
||||
[TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")],
|
||||
]
|
||||
],
|
||||
[
|
||||
[
|
||||
TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"),
|
||||
[TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")],
|
||||
]
|
||||
],
|
||||
],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"),
|
||||
])
|
||||
|
||||
def test_repeat_1_simple(self) -> None:
|
||||
grammar = """
|
||||
start: thing thing+ NEWLINE
|
||||
thing: NUMBER
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
node = parse_string("1 2 3\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n")],
|
||||
[
|
||||
[[TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n")]],
|
||||
[[TokenInfo(NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n")]],
|
||||
],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"),
|
||||
])
|
||||
with self.assertRaises(SyntaxError):
|
||||
parse_string("1\n", parser_class)
|
||||
|
||||
def test_repeat_1_complex(self) -> None:
|
||||
grammar = """
|
||||
start: term ('+' term)+ NEWLINE
|
||||
term: NUMBER
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
node = parse_string("1 + 2 + 3\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")],
|
||||
[
|
||||
[
|
||||
[
|
||||
TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"),
|
||||
[TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")],
|
||||
]
|
||||
],
|
||||
[
|
||||
[
|
||||
TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"),
|
||||
[TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")],
|
||||
]
|
||||
],
|
||||
],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"),
|
||||
])
|
||||
with self.assertRaises(SyntaxError):
|
||||
parse_string("1\n", parser_class)
|
||||
|
||||
def test_repeat_with_sep_simple(self) -> None:
|
||||
grammar = """
|
||||
start: ','.thing+ NEWLINE
|
||||
thing: NUMBER
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
node = parse_string("1, 2, 3\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
[
|
||||
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2, 3\n")],
|
||||
[TokenInfo(NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2, 3\n")],
|
||||
[TokenInfo(NUMBER, string="3", start=(1, 6), end=(1, 7), line="1, 2, 3\n")],
|
||||
],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 7), end=(1, 8), line="1, 2, 3\n"),
|
||||
])
|
||||
|
||||
def test_left_recursive(self) -> None:
|
||||
grammar_source = """
|
||||
start: expr NEWLINE
|
||||
expr: ('-' term | expr '+' term | term)
|
||||
term: NUMBER
|
||||
foo: NAME+
|
||||
bar: NAME*
|
||||
baz: NAME?
|
||||
"""
|
||||
grammar: Grammar = parse_string(grammar_source, GrammarParser)
|
||||
parser_class = generate_parser(grammar)
|
||||
rules = grammar.rules
|
||||
self.assertFalse(rules["start"].left_recursive)
|
||||
self.assertTrue(rules["expr"].left_recursive)
|
||||
self.assertFalse(rules["term"].left_recursive)
|
||||
self.assertFalse(rules["foo"].left_recursive)
|
||||
self.assertFalse(rules["bar"].left_recursive)
|
||||
self.assertFalse(rules["baz"].left_recursive)
|
||||
node = parse_string("1 + 2 + 3\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
[
|
||||
[
|
||||
[[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")]],
|
||||
TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"),
|
||||
[TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")],
|
||||
],
|
||||
TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"),
|
||||
[TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")],
|
||||
],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"),
|
||||
])
|
||||
|
||||
def test_python_expr(self) -> None:
|
||||
grammar = """
|
||||
start: expr NEWLINE? $ { ast.Expression(expr, lineno=1, col_offset=0) }
|
||||
expr: ( expr '+' term { ast.BinOp(expr, ast.Add(), term, lineno=expr.lineno, col_offset=expr.col_offset, end_lineno=term.end_lineno, end_col_offset=term.end_col_offset) }
|
||||
| expr '-' term { ast.BinOp(expr, ast.Sub(), term, lineno=expr.lineno, col_offset=expr.col_offset, end_lineno=term.end_lineno, end_col_offset=term.end_col_offset) }
|
||||
| term { term }
|
||||
)
|
||||
term: ( l=term '*' r=factor { ast.BinOp(l, ast.Mult(), r, lineno=l.lineno, col_offset=l.col_offset, end_lineno=r.end_lineno, end_col_offset=r.end_col_offset) }
|
||||
| l=term '/' r=factor { ast.BinOp(l, ast.Div(), r, lineno=l.lineno, col_offset=l.col_offset, end_lineno=r.end_lineno, end_col_offset=r.end_col_offset) }
|
||||
| factor { factor }
|
||||
)
|
||||
factor: ( '(' expr ')' { expr }
|
||||
| atom { atom }
|
||||
)
|
||||
atom: ( n=NAME { ast.Name(id=n.string, ctx=ast.Load(), lineno=n.start[0], col_offset=n.start[1], end_lineno=n.end[0], end_col_offset=n.end[1]) }
|
||||
| n=NUMBER { ast.Constant(value=ast.literal_eval(n.string), lineno=n.start[0], col_offset=n.start[1], end_lineno=n.end[0], end_col_offset=n.end[1]) }
|
||||
)
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
node = parse_string("(1 + 2*3 + 5)/(6 - 2)\n", parser_class)
|
||||
code = compile(node, "", "eval")
|
||||
val = eval(code)
|
||||
self.assertEqual(val, 3.0)
|
||||
|
||||
def test_nullable(self) -> None:
|
||||
grammar_source = """
|
||||
start: sign NUMBER
|
||||
sign: ['-' | '+']
|
||||
"""
|
||||
grammar: Grammar = parse_string(grammar_source, GrammarParser)
|
||||
out = io.StringIO()
|
||||
genr = PythonParserGenerator(grammar, out)
|
||||
rules = grammar.rules
|
||||
self.assertFalse(rules["start"].nullable) # Not None!
|
||||
self.assertTrue(rules["sign"].nullable)
|
||||
|
||||
def test_advanced_left_recursive(self) -> None:
|
||||
grammar_source = """
|
||||
start: NUMBER | sign start
|
||||
sign: ['-']
|
||||
"""
|
||||
grammar: Grammar = parse_string(grammar_source, GrammarParser)
|
||||
out = io.StringIO()
|
||||
genr = PythonParserGenerator(grammar, out)
|
||||
rules = grammar.rules
|
||||
self.assertFalse(rules["start"].nullable) # Not None!
|
||||
self.assertTrue(rules["sign"].nullable)
|
||||
self.assertTrue(rules["start"].left_recursive)
|
||||
self.assertFalse(rules["sign"].left_recursive)
|
||||
|
||||
def test_mutually_left_recursive(self) -> None:
|
||||
grammar_source = """
|
||||
start: foo 'E'
|
||||
foo: bar 'A' | 'B'
|
||||
bar: foo 'C' | 'D'
|
||||
"""
|
||||
grammar: Grammar = parse_string(grammar_source, GrammarParser)
|
||||
out = io.StringIO()
|
||||
genr = PythonParserGenerator(grammar, out)
|
||||
rules = grammar.rules
|
||||
self.assertFalse(rules["start"].left_recursive)
|
||||
self.assertTrue(rules["foo"].left_recursive)
|
||||
self.assertTrue(rules["bar"].left_recursive)
|
||||
genr.generate("<string>")
|
||||
ns: Dict[str, Any] = {}
|
||||
exec(out.getvalue(), ns)
|
||||
parser_class: Type[Parser] = ns["GeneratedParser"]
|
||||
node = parse_string("D A C A E", parser_class)
|
||||
self.assertEqual(node, [
|
||||
[
|
||||
[
|
||||
[
|
||||
[TokenInfo(type=NAME, string="D", start=(1, 0), end=(1, 1), line="D A C A E")],
|
||||
TokenInfo(type=NAME, string="A", start=(1, 2), end=(1, 3), line="D A C A E"),
|
||||
],
|
||||
TokenInfo(type=NAME, string="C", start=(1, 4), end=(1, 5), line="D A C A E"),
|
||||
],
|
||||
TokenInfo(type=NAME, string="A", start=(1, 6), end=(1, 7), line="D A C A E"),
|
||||
],
|
||||
TokenInfo(type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E"),
|
||||
])
|
||||
node = parse_string("B C A E", parser_class)
|
||||
self.assertIsNotNone(node)
|
||||
self.assertEqual(node, [
|
||||
[
|
||||
[
|
||||
[TokenInfo(type=NAME, string="B", start=(1, 0), end=(1, 1), line="B C A E")],
|
||||
TokenInfo(type=NAME, string="C", start=(1, 2), end=(1, 3), line="B C A E"),
|
||||
],
|
||||
TokenInfo(type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E"),
|
||||
],
|
||||
TokenInfo(type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E"),
|
||||
])
|
||||
|
||||
def test_nasty_mutually_left_recursive(self) -> None:
|
||||
# This grammar does not recognize 'x - + =', much to my chagrin.
|
||||
# But that's the way PEG works.
|
||||
# [Breathlessly]
|
||||
# The problem is that the toplevel target call
|
||||
# recurses into maybe, which recognizes 'x - +',
|
||||
# and then the toplevel target looks for another '+',
|
||||
# which fails, so it retreats to NAME,
|
||||
# which succeeds, so we end up just recognizing 'x',
|
||||
# and then start fails because there's no '=' after that.
|
||||
grammar_source = """
|
||||
start: target '='
|
||||
target: maybe '+' | NAME
|
||||
maybe: maybe '-' | target
|
||||
"""
|
||||
grammar: Grammar = parse_string(grammar_source, GrammarParser)
|
||||
out = io.StringIO()
|
||||
genr = PythonParserGenerator(grammar, out)
|
||||
genr.generate("<string>")
|
||||
ns: Dict[str, Any] = {}
|
||||
exec(out.getvalue(), ns)
|
||||
parser_class = ns["GeneratedParser"]
|
||||
with self.assertRaises(SyntaxError):
|
||||
parse_string("x - + =", parser_class)
|
||||
|
||||
def test_lookahead(self) -> None:
|
||||
grammar = """
|
||||
start: (expr_stmt | assign_stmt) &'.'
|
||||
expr_stmt: !(target '=') expr
|
||||
assign_stmt: target '=' expr
|
||||
expr: term ('+' term)*
|
||||
target: NAME
|
||||
term: NUMBER
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
node = parse_string("foo = 12 + 12 .", parser_class)
|
||||
self.assertEqual(node, [
|
||||
[
|
||||
[
|
||||
[TokenInfo(NAME, string="foo", start=(1, 0), end=(1, 3), line="foo = 12 + 12 .")],
|
||||
TokenInfo(OP, string="=", start=(1, 4), end=(1, 5), line="foo = 12 + 12 ."),
|
||||
[
|
||||
[
|
||||
TokenInfo(
|
||||
NUMBER, string="12", start=(1, 6), end=(1, 8), line="foo = 12 + 12 ."
|
||||
)
|
||||
],
|
||||
[
|
||||
[
|
||||
[
|
||||
TokenInfo(
|
||||
OP,
|
||||
string="+",
|
||||
start=(1, 9),
|
||||
end=(1, 10),
|
||||
line="foo = 12 + 12 .",
|
||||
),
|
||||
[
|
||||
TokenInfo(
|
||||
NUMBER,
|
||||
string="12",
|
||||
start=(1, 11),
|
||||
end=(1, 13),
|
||||
line="foo = 12 + 12 .",
|
||||
)
|
||||
],
|
||||
]
|
||||
]
|
||||
],
|
||||
],
|
||||
]
|
||||
]
|
||||
])
|
||||
|
||||
def test_named_lookahead_error(self) -> None:
|
||||
grammar = """
|
||||
start: foo=!'x' NAME
|
||||
"""
|
||||
with self.assertRaises(SyntaxError):
|
||||
make_parser(grammar)
|
||||
|
||||
def test_start_leader(self) -> None:
|
||||
grammar = """
|
||||
start: attr | NAME
|
||||
attr: start '.' NAME
|
||||
"""
|
||||
# Would assert False without a special case in compute_left_recursives().
|
||||
make_parser(grammar)
|
||||
|
||||
def test_left_recursion_too_complex(self) -> None:
|
||||
grammar = """
|
||||
start: foo
|
||||
foo: bar '+' | baz '+' | '+'
|
||||
bar: baz '-' | foo '-' | '-'
|
||||
baz: foo '*' | bar '*' | '*'
|
||||
"""
|
||||
with self.assertRaises(ValueError) as errinfo:
|
||||
make_parser(grammar)
|
||||
self.assertTrue("no leader" in str(errinfo.exception.value))
|
||||
|
||||
def test_cut(self) -> None:
|
||||
grammar = """
|
||||
start: '(' ~ expr ')'
|
||||
expr: NUMBER
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
node = parse_string("(1)", parser_class, verbose=True)
|
||||
self.assertEqual(node, [
|
||||
TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)"),
|
||||
[TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)")],
|
||||
TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)"),
|
||||
])
|
||||
|
||||
def test_dangling_reference(self) -> None:
|
||||
grammar = """
|
||||
start: foo ENDMARKER
|
||||
foo: bar NAME
|
||||
"""
|
||||
with self.assertRaises(GrammarError):
|
||||
parser_class = make_parser(grammar)
|
||||
|
||||
def test_bad_token_reference(self) -> None:
|
||||
grammar = """
|
||||
start: foo
|
||||
foo: NAMEE
|
||||
"""
|
||||
with self.assertRaises(GrammarError):
|
||||
parser_class = make_parser(grammar)
|
||||
|
||||
def test_missing_start(self) -> None:
|
||||
grammar = """
|
||||
foo: NAME
|
||||
"""
|
||||
with self.assertRaises(GrammarError):
|
||||
parser_class = make_parser(grammar)
|
||||
|
||||
|
||||
class TestGrammarVisitor:
|
||||
class Visitor(GrammarVisitor):
|
||||
def __init__(self) -> None:
|
||||
self.n_nodes = 0
|
||||
|
||||
def visit(self, node: Any, *args: Any, **kwargs: Any) -> None:
|
||||
self.n_nodes += 1
|
||||
super().visit(node, *args, **kwargs)
|
||||
|
||||
def test_parse_trivial_grammar(self) -> None:
|
||||
grammar = """
|
||||
start: 'a'
|
||||
"""
|
||||
rules = parse_string(grammar, GrammarParser)
|
||||
visitor = self.Visitor()
|
||||
|
||||
visitor.visit(rules)
|
||||
|
||||
self.assertEqual(visitor.n_nodes, 6)
|
||||
|
||||
def test_parse_or_grammar(self) -> None:
|
||||
grammar = """
|
||||
start: rule
|
||||
rule: 'a' | 'b'
|
||||
"""
|
||||
rules = parse_string(grammar, GrammarParser)
|
||||
visitor = self.Visitor()
|
||||
|
||||
visitor.visit(rules)
|
||||
|
||||
# Grammar/Rule/Rhs/Alt/NamedItem/NameLeaf -> 6
|
||||
# Rule/Rhs/ -> 2
|
||||
# Alt/NamedItem/StringLeaf -> 3
|
||||
# Alt/NamedItem/StringLeaf -> 3
|
||||
|
||||
self.assertEqual(visitor.n_nodes, 14)
|
||||
|
||||
def test_parse_repeat1_grammar(self) -> None:
|
||||
grammar = """
|
||||
start: 'a'+
|
||||
"""
|
||||
rules = parse_string(grammar, GrammarParser)
|
||||
visitor = self.Visitor()
|
||||
|
||||
visitor.visit(rules)
|
||||
|
||||
# Grammar/Rule/Rhs/Alt/NamedItem/Repeat1/StringLeaf -> 6
|
||||
self.assertEqual(visitor.n_nodes, 7)
|
||||
|
||||
def test_parse_repeat0_grammar(self) -> None:
|
||||
grammar = """
|
||||
start: 'a'*
|
||||
"""
|
||||
rules = parse_string(grammar, GrammarParser)
|
||||
visitor = self.Visitor()
|
||||
|
||||
visitor.visit(rules)
|
||||
|
||||
# Grammar/Rule/Rhs/Alt/NamedItem/Repeat0/StringLeaf -> 6
|
||||
|
||||
self.assertEqual(visitor.n_nodes, 7)
|
||||
|
||||
def test_parse_optional_grammar(self) -> None:
|
||||
grammar = """
|
||||
start: 'a' ['b']
|
||||
"""
|
||||
rules = parse_string(grammar, GrammarParser)
|
||||
visitor = self.Visitor()
|
||||
|
||||
visitor.visit(rules)
|
||||
|
||||
# Grammar/Rule/Rhs/Alt/NamedItem/StringLeaf -> 6
|
||||
# NamedItem/Opt/Rhs/Alt/NamedItem/Stringleaf -> 6
|
||||
|
||||
self.assertEqual(visitor.n_nodes, 12)
|
||||
|
||||
|
||||
class TestGrammarVisualizer(unittest.TestCase):
|
||||
def test_simple_rule(self) -> None:
|
||||
grammar = """
|
||||
start: 'a' 'b'
|
||||
"""
|
||||
rules = parse_string(grammar, GrammarParser)
|
||||
|
||||
printer = ASTGrammarPrinter()
|
||||
lines: List[str] = []
|
||||
printer.print_grammar_ast(rules, printer=lines.append)
|
||||
|
||||
output = "\n".join(lines)
|
||||
expected_output = textwrap.dedent(
|
||||
"""\
|
||||
└──Rule
|
||||
└──Rhs
|
||||
└──Alt
|
||||
├──NamedItem
|
||||
│ └──StringLeaf("'a'")
|
||||
└──NamedItem
|
||||
└──StringLeaf("'b'")
|
||||
"""
|
||||
)
|
||||
|
||||
self.assertEqual(output, expected_output)
|
||||
|
||||
def test_multiple_rules(self) -> None:
|
||||
grammar = """
|
||||
start: a b
|
||||
a: 'a'
|
||||
b: 'b'
|
||||
"""
|
||||
rules = parse_string(grammar, GrammarParser)
|
||||
|
||||
printer = ASTGrammarPrinter()
|
||||
lines: List[str] = []
|
||||
printer.print_grammar_ast(rules, printer=lines.append)
|
||||
|
||||
output = "\n".join(lines)
|
||||
expected_output = textwrap.dedent(
|
||||
"""\
|
||||
└──Rule
|
||||
└──Rhs
|
||||
└──Alt
|
||||
├──NamedItem
|
||||
│ └──NameLeaf('a')
|
||||
└──NamedItem
|
||||
└──NameLeaf('b')
|
||||
|
||||
└──Rule
|
||||
└──Rhs
|
||||
└──Alt
|
||||
└──NamedItem
|
||||
└──StringLeaf("'a'")
|
||||
|
||||
└──Rule
|
||||
└──Rhs
|
||||
└──Alt
|
||||
└──NamedItem
|
||||
└──StringLeaf("'b'")
|
||||
"""
|
||||
)
|
||||
|
||||
self.assertEqual(output, expected_output)
|
||||
|
||||
def test_deep_nested_rule(self) -> None:
|
||||
grammar = """
|
||||
start: 'a' ['b'['c'['d']]]
|
||||
"""
|
||||
rules = parse_string(grammar, GrammarParser)
|
||||
|
||||
printer = ASTGrammarPrinter()
|
||||
lines: List[str] = []
|
||||
printer.print_grammar_ast(rules, printer=lines.append)
|
||||
|
||||
output = "\n".join(lines)
|
||||
print()
|
||||
print(output)
|
||||
expected_output = textwrap.dedent(
|
||||
"""\
|
||||
└──Rule
|
||||
└──Rhs
|
||||
└──Alt
|
||||
├──NamedItem
|
||||
│ └──StringLeaf("'a'")
|
||||
└──NamedItem
|
||||
└──Opt
|
||||
└──Rhs
|
||||
└──Alt
|
||||
├──NamedItem
|
||||
│ └──StringLeaf("'b'")
|
||||
└──NamedItem
|
||||
└──Opt
|
||||
└──Rhs
|
||||
└──Alt
|
||||
├──NamedItem
|
||||
│ └──StringLeaf("'c'")
|
||||
└──NamedItem
|
||||
└──Opt
|
||||
└──Rhs
|
||||
└──Alt
|
||||
└──NamedItem
|
||||
└──StringLeaf("'d'")
|
||||
"""
|
||||
)
|
||||
|
||||
self.assertEqual(output, expected_output)
|
|
@ -0,0 +1,764 @@
|
|||
import ast
|
||||
import os
|
||||
import sys
|
||||
import _peg_parser as peg_parser
|
||||
import unittest
|
||||
from pathlib import PurePath
|
||||
from typing import Any, Union, Iterable, Tuple
|
||||
from textwrap import dedent
|
||||
|
||||
|
||||
TEST_CASES = [
|
||||
('annotated_assignment', 'x: int = 42'),
|
||||
('annotated_assignment_with_tuple', 'x: tuple = 1, 2'),
|
||||
('annotated_assignment_with_parens', '(paren): int = 3+2'),
|
||||
('annotated_assignment_with_yield', 'x: int = yield 42'),
|
||||
('annotated_no_assignment', 'x: int'),
|
||||
('annotation_with_multiple_parens', '((parens)): int'),
|
||||
('annotation_with_parens', '(parens): int'),
|
||||
('annotated_assignment_with_attr', 'a.b: int'),
|
||||
('annotated_assignment_with_subscript', 'a[b]: int'),
|
||||
('annotated_assignment_with_attr_and_parens', '(a.b): int'),
|
||||
('annotated_assignment_with_subscript_and_parens', '(a[b]): int'),
|
||||
('assert', 'assert a'),
|
||||
('assert_message', 'assert a, b'),
|
||||
('assignment_false', 'a = False'),
|
||||
('assignment_none', 'a = None'),
|
||||
('assignment_true', 'a = True'),
|
||||
('assignment_paren', '(a) = 42'),
|
||||
('assignment_paren_multiple', '(a, b) = (0, 1)'),
|
||||
('asyncfor',
|
||||
'''
|
||||
async for i in a:
|
||||
pass
|
||||
'''),
|
||||
('attribute_call', 'a.b()'),
|
||||
('attribute_multiple_names', 'abcd.efg.hij'),
|
||||
('attribute_simple', 'a.b'),
|
||||
('attributes_subscript', 'a.b[0]'),
|
||||
('augmented_assignment', 'x += 42'),
|
||||
('binop_add', '1 + 1'),
|
||||
('binop_add_multiple', '1 + 1 + 1 + 1'),
|
||||
('binop_all', '1 + 2 * 5 + 3 ** 2 - -3'),
|
||||
('binop_boolop_comp', '1 + 1 == 2 or 1 + 1 == 3 and not b'),
|
||||
('boolop_or', 'a or b'),
|
||||
('boolop_or_multiple', 'a or b or c'),
|
||||
('class_def_bases',
|
||||
'''
|
||||
class C(A, B):
|
||||
pass
|
||||
'''),
|
||||
('class_def_decorators',
|
||||
'''
|
||||
@a
|
||||
class C:
|
||||
pass
|
||||
'''),
|
||||
('class_def_decorator_with_expression',
|
||||
'''
|
||||
@lambda x: 42
|
||||
class C:
|
||||
pass
|
||||
'''),
|
||||
('class_def_decorator_with_expression_and_walrus',
|
||||
'''
|
||||
@x:=lambda x: 42
|
||||
class C:
|
||||
pass
|
||||
'''),
|
||||
|
||||
('class_def_keywords',
|
||||
'''
|
||||
class C(keyword=a+b, **c):
|
||||
pass
|
||||
'''),
|
||||
('class_def_mixed',
|
||||
'''
|
||||
class C(A, B, keyword=0, **a):
|
||||
pass
|
||||
'''),
|
||||
('class_def_simple',
|
||||
'''
|
||||
class C:
|
||||
pass
|
||||
'''),
|
||||
('class_def_starred_and_kwarg',
|
||||
'''
|
||||
class C(A, B, *x, **y):
|
||||
pass
|
||||
'''),
|
||||
('class_def_starred_in_kwargs',
|
||||
'''
|
||||
class C(A, x=2, *[B, C], y=3):
|
||||
pass
|
||||
'''),
|
||||
('call_attribute', 'f().b'),
|
||||
('call_genexp', 'f(i for i in a)'),
|
||||
('call_mixed_args', 'f(a, b, *c, **d)'),
|
||||
('call_mixed_args_named', 'f(a, b, *c, d=4, **v)'),
|
||||
('call_one_arg', 'f(a)'),
|
||||
('call_posarg_genexp', 'f(a, (i for i in a))'),
|
||||
('call_simple', 'f()'),
|
||||
('call_subscript', 'f()[0]'),
|
||||
('comp', 'a == b'),
|
||||
('comp_multiple', 'a == b == c'),
|
||||
('comp_paren_end', 'a == (b-1)'),
|
||||
('comp_paren_start', '(a-1) == b'),
|
||||
('decorator',
|
||||
'''
|
||||
@a
|
||||
def f():
|
||||
pass
|
||||
'''),
|
||||
('decorator_async',
|
||||
'''
|
||||
@a
|
||||
async def d():
|
||||
pass
|
||||
'''),
|
||||
('decorator_with_expression',
|
||||
'''
|
||||
@lambda x: 42
|
||||
def f():
|
||||
pass
|
||||
'''),
|
||||
('decorator_with_expression_and_walrus',
|
||||
'''
|
||||
@x:=lambda x: 42
|
||||
def f():
|
||||
pass
|
||||
'''),
|
||||
('del_attribute', 'del a.b'),
|
||||
('del_call_attribute', 'del a().c'),
|
||||
('del_call_genexp_attribute', 'del a(i for i in b).c'),
|
||||
('del_empty', 'del()'),
|
||||
('del_list', 'del a, [b, c]'),
|
||||
('del_mixed', 'del a[0].b().c'),
|
||||
('del_multiple', 'del a, b'),
|
||||
('del_multiple_calls_attribute', 'del a()().b'),
|
||||
('del_paren', 'del(a,b)'),
|
||||
('del_paren_single_target', 'del(a)'),
|
||||
('del_subscript_attribute', 'del a[0].b'),
|
||||
('del_tuple', 'del a, (b, c)'),
|
||||
('delete', 'del a'),
|
||||
('dict',
|
||||
'''
|
||||
{
|
||||
a: 1,
|
||||
b: 2,
|
||||
c: 3
|
||||
}
|
||||
'''),
|
||||
('dict_comp', '{x:1 for x in a}'),
|
||||
('dict_comp_if', '{x:1+2 for x in a if b}'),
|
||||
('dict_empty', '{}'),
|
||||
('for',
|
||||
'''
|
||||
for i in a:
|
||||
pass
|
||||
'''),
|
||||
('for_else',
|
||||
'''
|
||||
for i in a:
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
'''),
|
||||
('for_star_target_in_paren', 'for (a) in b: pass'),
|
||||
('for_star_targets_attribute', 'for a.b in c: pass'),
|
||||
('for_star_targets_call_attribute', 'for a().c in b: pass'),
|
||||
('for_star_targets_empty', 'for () in a: pass'),
|
||||
('for_star_targets_mixed', 'for a[0].b().c in d: pass'),
|
||||
('for_star_targets_mixed_starred',
|
||||
'''
|
||||
for a, *b, (c, d) in e:
|
||||
pass
|
||||
'''),
|
||||
('for_star_targets_multiple', 'for a, b in c: pass'),
|
||||
('for_star_targets_nested_starred', 'for *[*a] in b: pass'),
|
||||
('for_star_targets_starred', 'for *a in b: pass'),
|
||||
('for_star_targets_subscript_attribute', 'for a[0].b in c: pass'),
|
||||
('for_star_targets_trailing_comma',
|
||||
'''
|
||||
for a, (b, c), in d:
|
||||
pass
|
||||
'''),
|
||||
('for_star_targets_tuple', 'for a, (b, c) in d: pass'),
|
||||
('for_underscore',
|
||||
'''
|
||||
for _ in a:
|
||||
pass
|
||||
'''),
|
||||
('function_return_type',
|
||||
'''
|
||||
def f() -> Any:
|
||||
pass
|
||||
'''),
|
||||
('f-string_slice', "f'{x[2]}'"),
|
||||
('f-string_slice_upper', "f'{x[2:3]}'"),
|
||||
('f-string_slice_step', "f'{x[2:3:-2]}'"),
|
||||
('f-string_constant', "f'{42}'"),
|
||||
('f-string_boolop', "f'{x and y}'"),
|
||||
('f-string_named_expr', "f'{(x:=42)}'"),
|
||||
('f-string_binop', "f'{x+y}'"),
|
||||
('f-string_unaryop', "f'{not x}'"),
|
||||
('f-string_lambda', "f'{(lambda x, /, y, y2=42 , *z, k1, k2=34, **k3: 42)}'"),
|
||||
('f-string_lambda_call', "f'{(lambda: 2)(2)}'"),
|
||||
('f-string_ifexpr', "f'{x if y else z}'"),
|
||||
('f-string_dict', "f'{ {2:34, 3:34} }'"),
|
||||
('f-string_set', "f'{ {2,-45} }'"),
|
||||
('f-string_list', "f'{ [2,-45] }'"),
|
||||
('f-string_tuple', "f'{ (2,-45) }'"),
|
||||
('f-string_listcomp', "f'{[x for x in y if z]}'"),
|
||||
('f-string_setcomp', "f'{ {x for x in y if z} }'"),
|
||||
('f-string_dictcomp', "f'{ {x:x for x in y if z} }'"),
|
||||
('f-string_genexpr', "f'{ (x for x in y if z) }'"),
|
||||
('f-string_yield', "f'{ (yield x) }'"),
|
||||
('f-string_yieldfrom', "f'{ (yield from x) }'"),
|
||||
('f-string_await', "f'{ await x }'"),
|
||||
('f-string_compare', "f'{ x == y }'"),
|
||||
('f-string_call', "f'{ f(x,y,z) }'"),
|
||||
('f-string_attribute', "f'{ f.x.y.z }'"),
|
||||
('f-string_starred', "f'{ *x, }'"),
|
||||
('f-string_doublestarred', "f'{ {**x} }'"),
|
||||
('f-string_escape_brace', "f'{{Escape'"),
|
||||
('f-string_escape_closing_brace', "f'Escape}}'"),
|
||||
('f-string_repr', "f'{a!r}'"),
|
||||
('f-string_str', "f'{a!s}'"),
|
||||
('f-string_ascii', "f'{a!a}'"),
|
||||
('f-string_debug', "f'{a=}'"),
|
||||
('f-string_padding', "f'{a:03d}'"),
|
||||
('f-string_multiline',
|
||||
"""
|
||||
f'''
|
||||
{hello}
|
||||
'''
|
||||
"""),
|
||||
('f-string_multiline_in_expr',
|
||||
"""
|
||||
f'''
|
||||
{
|
||||
hello
|
||||
}
|
||||
'''
|
||||
"""),
|
||||
('f-string_multiline_in_call',
|
||||
"""
|
||||
f'''
|
||||
{f(
|
||||
a, b, c
|
||||
)}
|
||||
'''
|
||||
"""),
|
||||
('global', 'global a, b'),
|
||||
('group', '(yield a)'),
|
||||
('if_elif',
|
||||
'''
|
||||
if a:
|
||||
pass
|
||||
elif b:
|
||||
pass
|
||||
'''),
|
||||
('if_elif_elif',
|
||||
'''
|
||||
if a:
|
||||
pass
|
||||
elif b:
|
||||
pass
|
||||
elif c:
|
||||
pass
|
||||
'''),
|
||||
('if_elif_else',
|
||||
'''
|
||||
if a:
|
||||
pass
|
||||
elif b:
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
'''),
|
||||
('if_else',
|
||||
'''
|
||||
if a:
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
'''),
|
||||
('if_simple', 'if a: pass'),
|
||||
('import', 'import a'),
|
||||
('import_alias', 'import a as b'),
|
||||
('import_dotted', 'import a.b'),
|
||||
('import_dotted_alias', 'import a.b as c'),
|
||||
('import_dotted_multichar', 'import ab.cd'),
|
||||
('import_from', 'from a import b'),
|
||||
('import_from_alias', 'from a import b as c'),
|
||||
('import_from_dotted', 'from a.b import c'),
|
||||
('import_from_dotted_alias', 'from a.b import c as d'),
|
||||
('import_from_multiple_aliases', 'from a import b as c, d as e'),
|
||||
('import_from_one_dot', 'from .a import b'),
|
||||
('import_from_one_dot_alias', 'from .a import b as c'),
|
||||
('import_from_star', 'from a import *'),
|
||||
('import_from_three_dots', 'from ...a import b'),
|
||||
('import_from_trailing_comma', 'from a import (b,)'),
|
||||
('kwarg',
|
||||
'''
|
||||
def f(**a):
|
||||
pass
|
||||
'''),
|
||||
('kwonly_args',
|
||||
'''
|
||||
def f(*, a, b):
|
||||
pass
|
||||
'''),
|
||||
('kwonly_args_with_default',
|
||||
'''
|
||||
def f(*, a=2, b):
|
||||
pass
|
||||
'''),
|
||||
('lambda_kwarg', 'lambda **a: 42'),
|
||||
('lambda_kwonly_args', 'lambda *, a, b: 42'),
|
||||
('lambda_kwonly_args_with_default', 'lambda *, a=2, b: 42'),
|
||||
('lambda_mixed_args', 'lambda a, /, b, *, c: 42'),
|
||||
('lambda_mixed_args_with_default', 'lambda a, b=2, /, c=3, *e, f, **g: 42'),
|
||||
('lambda_no_args', 'lambda: 42'),
|
||||
('lambda_pos_args', 'lambda a,b: 42'),
|
||||
('lambda_pos_args_with_default', 'lambda a, b=2: 42'),
|
||||
('lambda_pos_only_args', 'lambda a, /: 42'),
|
||||
('lambda_pos_only_args_with_default', 'lambda a=0, /: 42'),
|
||||
('lambda_pos_posonly_args', 'lambda a, b, /, c, d: 42'),
|
||||
('lambda_pos_posonly_args_with_default', 'lambda a, b=0, /, c=2: 42'),
|
||||
('lambda_vararg', 'lambda *a: 42'),
|
||||
('lambda_vararg_kwonly_args', 'lambda *a, b: 42'),
|
||||
('list', '[1, 2, a]'),
|
||||
('list_comp', '[i for i in a]'),
|
||||
('list_comp_if', '[i for i in a if b]'),
|
||||
('list_trailing_comma', '[1+2, a, 3+4,]'),
|
||||
('mixed_args',
|
||||
'''
|
||||
def f(a, /, b, *, c):
|
||||
pass
|
||||
'''),
|
||||
('mixed_args_with_default',
|
||||
'''
|
||||
def f(a, b=2, /, c=3, *e, f, **g):
|
||||
pass
|
||||
'''),
|
||||
('multipart_string_bytes', 'b"Hola" b"Hello" b"Bye"'),
|
||||
('multipart_string_triple', '"""Something here""" "and now"'),
|
||||
('multipart_string_different_prefixes', 'u"Something" "Other thing" r"last thing"'),
|
||||
('multiple_assignments', 'x = y = z = 42'),
|
||||
('multiple_assignments_with_yield', 'x = y = z = yield 42'),
|
||||
('multiple_pass',
|
||||
'''
|
||||
pass; pass
|
||||
pass
|
||||
'''),
|
||||
('namedexpr', '(x := [1, 2, 3])'),
|
||||
('namedexpr_false', '(x := False)'),
|
||||
('namedexpr_none', '(x := None)'),
|
||||
('namedexpr_true', '(x := True)'),
|
||||
('nonlocal', 'nonlocal a, b'),
|
||||
('number_complex', '-2.234+1j'),
|
||||
('number_float', '-34.2333'),
|
||||
('number_imaginary_literal', '1.1234j'),
|
||||
('number_integer', '-234'),
|
||||
('number_underscores', '1_234_567'),
|
||||
('pass', 'pass'),
|
||||
('pos_args',
|
||||
'''
|
||||
def f(a, b):
|
||||
pass
|
||||
'''),
|
||||
('pos_args_with_default',
|
||||
'''
|
||||
def f(a, b=2):
|
||||
pass
|
||||
'''),
|
||||
('pos_only_args',
|
||||
'''
|
||||
def f(a, /):
|
||||
pass
|
||||
'''),
|
||||
('pos_only_args_with_default',
|
||||
'''
|
||||
def f(a=0, /):
|
||||
pass
|
||||
'''),
|
||||
('pos_posonly_args',
|
||||
'''
|
||||
def f(a, b, /, c, d):
|
||||
pass
|
||||
'''),
|
||||
('pos_posonly_args_with_default',
|
||||
'''
|
||||
def f(a, b=0, /, c=2):
|
||||
pass
|
||||
'''),
|
||||
('primary_mixed', 'a.b.c().d[0]'),
|
||||
('raise', 'raise'),
|
||||
('raise_ellipsis', 'raise ...'),
|
||||
('raise_expr', 'raise a'),
|
||||
('raise_from', 'raise a from b'),
|
||||
('return', 'return'),
|
||||
('return_expr', 'return a'),
|
||||
('set', '{1, 2+4, 3+5}'),
|
||||
('set_comp', '{i for i in a}'),
|
||||
('set_trailing_comma', '{1, 2, 3,}'),
|
||||
('simple_assignment', 'x = 42'),
|
||||
('simple_assignment_with_yield', 'x = yield 42'),
|
||||
('string_bytes', 'b"hello"'),
|
||||
('string_concatenation_bytes', 'b"hello" b"world"'),
|
||||
('string_concatenation_simple', '"abcd" "efgh"'),
|
||||
('string_format_simple', 'f"hello"'),
|
||||
('string_format_with_formatted_value', 'f"hello {world}"'),
|
||||
('string_simple', '"hello"'),
|
||||
('string_unicode', 'u"hello"'),
|
||||
('subscript_attribute', 'a[0].b'),
|
||||
('subscript_call', 'a[b]()'),
|
||||
('subscript_multiple_slices', 'a[0:a:2, 1]'),
|
||||
('subscript_simple', 'a[0]'),
|
||||
('subscript_single_element_tuple', 'a[0,]'),
|
||||
('subscript_trailing_comma', 'a[0, 1, 2,]'),
|
||||
('subscript_tuple', 'a[0, 1, 2]'),
|
||||
('subscript_whole_slice', 'a[0+1:b:c]'),
|
||||
('try_except',
|
||||
'''
|
||||
try:
|
||||
pass
|
||||
except:
|
||||
pass
|
||||
'''),
|
||||
('try_except_else',
|
||||
'''
|
||||
try:
|
||||
pass
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
'''),
|
||||
('try_except_else_finally',
|
||||
'''
|
||||
try:
|
||||
pass
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
finally:
|
||||
pass
|
||||
'''),
|
||||
('try_except_expr',
|
||||
'''
|
||||
try:
|
||||
pass
|
||||
except a:
|
||||
pass
|
||||
'''),
|
||||
('try_except_expr_target',
|
||||
'''
|
||||
try:
|
||||
pass
|
||||
except a as b:
|
||||
pass
|
||||
'''),
|
||||
('try_except_finally',
|
||||
'''
|
||||
try:
|
||||
pass
|
||||
except:
|
||||
pass
|
||||
finally:
|
||||
pass
|
||||
'''),
|
||||
('try_finally',
|
||||
'''
|
||||
try:
|
||||
pass
|
||||
finally:
|
||||
pass
|
||||
'''),
|
||||
('unpacking_binop', '[*([1, 2, 3] + [3, 4, 5])]'),
|
||||
('unpacking_call', '[*b()]'),
|
||||
('unpacking_compare', '[*(x < y)]'),
|
||||
('unpacking_constant', '[*3]'),
|
||||
('unpacking_dict', '[*{1: 2, 3: 4}]'),
|
||||
('unpacking_dict_comprehension', '[*{x:y for x,y in z}]'),
|
||||
('unpacking_ifexpr', '[*([1, 2, 3] if x else y)]'),
|
||||
('unpacking_list', '[*[1,2,3]]'),
|
||||
('unpacking_list_comprehension', '[*[x for x in y]]'),
|
||||
('unpacking_namedexpr', '[*(x:=[1, 2, 3])]'),
|
||||
('unpacking_set', '[*{1,2,3}]'),
|
||||
('unpacking_set_comprehension', '[*{x for x in y}]'),
|
||||
('unpacking_string', '[*"myvalue"]'),
|
||||
('unpacking_tuple', '[*(1,2,3)]'),
|
||||
('unpacking_unaryop', '[*(not [1, 2, 3])]'),
|
||||
('unpacking_yield', '[*(yield 42)]'),
|
||||
('unpacking_yieldfrom', '[*(yield from x)]'),
|
||||
('tuple', '(1, 2, 3)'),
|
||||
('vararg',
|
||||
'''
|
||||
def f(*a):
|
||||
pass
|
||||
'''),
|
||||
('vararg_kwonly_args',
|
||||
'''
|
||||
def f(*a, b):
|
||||
pass
|
||||
'''),
|
||||
('while',
|
||||
'''
|
||||
while a:
|
||||
pass
|
||||
'''),
|
||||
('while_else',
|
||||
'''
|
||||
while a:
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
'''),
|
||||
('with',
|
||||
'''
|
||||
with a:
|
||||
pass
|
||||
'''),
|
||||
('with_as',
|
||||
'''
|
||||
with a as b:
|
||||
pass
|
||||
'''),
|
||||
('with_as_paren',
|
||||
'''
|
||||
with a as (b):
|
||||
pass
|
||||
'''),
|
||||
('with_as_empty', 'with a as (): pass'),
|
||||
('with_list_recursive',
|
||||
'''
|
||||
with a as [x, [y, z]]:
|
||||
pass
|
||||
'''),
|
||||
('with_tuple_recursive',
|
||||
'''
|
||||
with a as ((x, y), z):
|
||||
pass
|
||||
'''),
|
||||
('with_tuple_target',
|
||||
'''
|
||||
with a as (x, y):
|
||||
pass
|
||||
'''),
|
||||
('yield', 'yield'),
|
||||
('yield_expr', 'yield a'),
|
||||
('yield_from', 'yield from a'),
|
||||
]
|
||||
|
||||
FAIL_TEST_CASES = [
|
||||
("annotation_multiple_targets", "(a, b): int = 42"),
|
||||
("annotation_nested_tuple", "((a, b)): int"),
|
||||
("annotation_list", "[a]: int"),
|
||||
("annotation_lambda", "lambda: int = 42"),
|
||||
("annotation_tuple", "(a,): int"),
|
||||
("annotation_tuple_without_paren", "a,: int"),
|
||||
("assignment_keyword", "a = if"),
|
||||
("comprehension_lambda", "(a for a in lambda: b)"),
|
||||
("comprehension_else", "(a for a in b if c else d"),
|
||||
("del_call", "del a()"),
|
||||
("del_call_genexp", "del a(i for i in b)"),
|
||||
("del_subscript_call", "del a[b]()"),
|
||||
("del_attribute_call", "del a.b()"),
|
||||
("del_mixed_call", "del a[0].b().c.d()"),
|
||||
("for_star_targets_call", "for a() in b: pass"),
|
||||
("for_star_targets_subscript_call", "for a[b]() in c: pass"),
|
||||
("for_star_targets_attribute_call", "for a.b() in c: pass"),
|
||||
("for_star_targets_mixed_call", "for a[0].b().c.d() in e: pass"),
|
||||
("for_star_targets_in", "for a, in in b: pass"),
|
||||
("f-string_assignment", "f'{x = 42}'"),
|
||||
("f-string_empty", "f'{}'"),
|
||||
("f-string_function_def", "f'{def f(): pass}'"),
|
||||
("f-string_lambda", "f'{lambda x: 42}'"),
|
||||
("f-string_singe_brace", "f'{'"),
|
||||
("f-string_single_closing_brace", "f'}'"),
|
||||
("from_import_invalid", "from import import a"),
|
||||
("from_import_trailing_comma", "from a import b,"),
|
||||
# This test case checks error paths involving tokens with uninitialized
|
||||
# values of col_offset and end_col_offset.
|
||||
("invalid indentation",
|
||||
"""
|
||||
def f():
|
||||
a
|
||||
a
|
||||
"""),
|
||||
("not_terminated_string", "a = 'example"),
|
||||
]
|
||||
|
||||
FAIL_SPECIALIZED_MESSAGE_CASES = [
|
||||
("f(x, y, z=1, **b, *a", "iterable argument unpacking follows keyword argument unpacking"),
|
||||
("f(x, y=1, *z, **a, b", "positional argument follows keyword argument unpacking"),
|
||||
("f(x, y, z=1, a=2, b", "positional argument follows keyword argument"),
|
||||
("True = 1", "cannot assign to True"),
|
||||
("a() = 1", "cannot assign to function call"),
|
||||
("(a, b): int", "only single target (not tuple) can be annotated"),
|
||||
("[a, b]: int", "only single target (not list) can be annotated"),
|
||||
("a(): int", "illegal target for annotation"),
|
||||
("1 += 1", "cannot assign to literal"),
|
||||
("pass\n pass", "unexpected indent"),
|
||||
("def f():\npass", "expected an indented block"),
|
||||
]
|
||||
|
||||
GOOD_BUT_FAIL_TEST_CASES = [
|
||||
('string_concatenation_format', 'f"{hello} world" f"again {and_again}"'),
|
||||
('string_concatenation_multiple',
|
||||
'''
|
||||
f"hello" f"{world} again" f"and_again"
|
||||
'''),
|
||||
('f-string_multiline_comp',
|
||||
"""
|
||||
f'''
|
||||
{(i for i in a
|
||||
if b)}
|
||||
'''
|
||||
"""),
|
||||
]
|
||||
|
||||
FSTRINGS_TRACEBACKS = {
|
||||
'multiline_fstrings_same_line_with_brace': (
|
||||
"""
|
||||
f'''
|
||||
{a$b}
|
||||
'''
|
||||
""",
|
||||
'(a$b)',
|
||||
),
|
||||
'multiline_fstring_brace_on_next_line': (
|
||||
"""
|
||||
f'''
|
||||
{a$b
|
||||
}'''
|
||||
""",
|
||||
'(a$b',
|
||||
),
|
||||
'multiline_fstring_brace_on_previous_line': (
|
||||
"""
|
||||
f'''
|
||||
{
|
||||
a$b}'''
|
||||
""",
|
||||
'a$b)',
|
||||
),
|
||||
}
|
||||
|
||||
EXPRESSIONS_TEST_CASES = [
|
||||
("expression_add", "1+1"),
|
||||
("expression_add_2", "a+b"),
|
||||
("expression_call", "f(a, b=2, **kw)"),
|
||||
("expression_tuple", "1, 2, 3"),
|
||||
("expression_tuple_one_value", "1,")
|
||||
]
|
||||
|
||||
|
||||
def cleanup_source(source: Any) -> str:
|
||||
if isinstance(source, str):
|
||||
result = dedent(source)
|
||||
elif not isinstance(source, (list, tuple)):
|
||||
result = "\n".join(source)
|
||||
else:
|
||||
raise TypeError(f"Invalid type for test source: {source}")
|
||||
return result
|
||||
|
||||
|
||||
def prepare_test_cases(
|
||||
test_cases: Iterable[Tuple[str, Union[str, Iterable[str]]]]
|
||||
) -> Tuple[Iterable[str], Iterable[str]]:
|
||||
|
||||
test_ids, _test_sources = zip(*test_cases)
|
||||
test_sources = list(_test_sources)
|
||||
for index, source in enumerate(test_sources):
|
||||
result = cleanup_source(source)
|
||||
test_sources[index] = result
|
||||
return test_ids, test_sources
|
||||
|
||||
|
||||
TEST_IDS, TEST_SOURCES = prepare_test_cases(TEST_CASES)
|
||||
|
||||
GOOD_BUT_FAIL_TEST_IDS, GOOD_BUT_FAIL_SOURCES = prepare_test_cases(
|
||||
GOOD_BUT_FAIL_TEST_CASES
|
||||
)
|
||||
|
||||
FAIL_TEST_IDS, FAIL_SOURCES = prepare_test_cases(FAIL_TEST_CASES)
|
||||
|
||||
EXPRESSIONS_TEST_IDS, EXPRESSIONS_TEST_SOURCES = prepare_test_cases(
|
||||
EXPRESSIONS_TEST_CASES
|
||||
)
|
||||
|
||||
|
||||
class ASTGenerationTest(unittest.TestCase):
|
||||
def test_correct_ast_generation_on_source_files(self) -> None:
|
||||
self.maxDiff = None
|
||||
for source in TEST_SOURCES:
|
||||
actual_ast = peg_parser.parse_string(source)
|
||||
expected_ast = ast.parse(source)
|
||||
self.assertEqual(
|
||||
ast.dump(actual_ast, include_attributes=True),
|
||||
ast.dump(expected_ast, include_attributes=True),
|
||||
f"Wrong AST generation for source: {source}",
|
||||
)
|
||||
|
||||
def test_incorrect_ast_generation_on_source_files(self) -> None:
|
||||
for source in FAIL_SOURCES:
|
||||
with self.assertRaises(SyntaxError, msg=f"Parsing {source} did not raise an exception"):
|
||||
peg_parser.parse_string(source)
|
||||
|
||||
def test_incorrect_ast_generation_with_specialized_errors(self) -> None:
|
||||
for source, error_text in FAIL_SPECIALIZED_MESSAGE_CASES:
|
||||
exc = IndentationError if "indent" in error_text else SyntaxError
|
||||
with self.assertRaises(exc) as se:
|
||||
peg_parser.parse_string(source)
|
||||
self.assertTrue(
|
||||
error_text in se.exception.msg,
|
||||
f"Actual error message does not match expexted for {source}"
|
||||
)
|
||||
|
||||
@unittest.skipIf(sys.flags.use_peg, "This tests nothing for now, since compile uses pegen as well")
|
||||
@unittest.expectedFailure
|
||||
def test_correct_but_known_to_fail_ast_generation_on_source_files(self) -> None:
|
||||
for source in GOOD_BUT_FAIL_SOURCES:
|
||||
actual_ast = peg_parser.parse_string(source)
|
||||
expected_ast = ast.parse(source)
|
||||
self.assertEqual(
|
||||
ast.dump(actual_ast, include_attributes=True),
|
||||
ast.dump(expected_ast, include_attributes=True),
|
||||
f"Wrong AST generation for source: {source}",
|
||||
)
|
||||
|
||||
def test_correct_ast_generation_without_pos_info(self) -> None:
|
||||
for source in GOOD_BUT_FAIL_SOURCES:
|
||||
actual_ast = peg_parser.parse_string(source)
|
||||
expected_ast = ast.parse(source)
|
||||
self.assertEqual(
|
||||
ast.dump(actual_ast),
|
||||
ast.dump(expected_ast),
|
||||
f"Wrong AST generation for source: {source}",
|
||||
)
|
||||
|
||||
def test_fstring_parse_error_tracebacks(self) -> None:
|
||||
for source, error_text in FSTRINGS_TRACEBACKS.values():
|
||||
with self.assertRaises(SyntaxError) as se:
|
||||
peg_parser.parse_string(dedent(source))
|
||||
self.assertEqual(error_text, se.exception.text)
|
||||
|
||||
def test_correct_ast_generatrion_eval(self) -> None:
|
||||
for source in EXPRESSIONS_TEST_SOURCES:
|
||||
actual_ast = peg_parser.parse_string(source, mode='eval')
|
||||
expected_ast = ast.parse(source, mode='eval')
|
||||
self.assertEqual(
|
||||
ast.dump(actual_ast, include_attributes=True),
|
||||
ast.dump(expected_ast, include_attributes=True),
|
||||
f"Wrong AST generation for source: {source}",
|
||||
)
|
||||
|
||||
def test_tokenizer_errors_are_propagated(self) -> None:
|
||||
n=201
|
||||
with self.assertRaisesRegex(SyntaxError, "too many nested parentheses"):
|
||||
peg_parser.parse_string(n*'(' + ')'*n)
|
|
@ -3,6 +3,7 @@
|
|||
import dis
|
||||
import pickle
|
||||
import unittest
|
||||
import sys
|
||||
|
||||
from test.support import check_syntax_error
|
||||
|
||||
|
@ -23,10 +24,12 @@ class PositionalOnlyTestCase(unittest.TestCase):
|
|||
compile(codestr + "\n", "<test>", "single")
|
||||
|
||||
def test_invalid_syntax_errors(self):
|
||||
check_syntax_error(self, "def f(a, b = 5, /, c): pass", "non-default argument follows default argument")
|
||||
check_syntax_error(self, "def f(a = 5, b, /, c): pass", "non-default argument follows default argument")
|
||||
check_syntax_error(self, "def f(a = 5, b=1, /, c, *, d=2): pass", "non-default argument follows default argument")
|
||||
check_syntax_error(self, "def f(a = 5, b, /): pass", "non-default argument follows default argument")
|
||||
if not sys.flags.use_peg:
|
||||
check_syntax_error(self, "def f(a, b = 5, /, c): pass", "non-default argument follows default argument")
|
||||
check_syntax_error(self, "def f(a = 5, b, /, c): pass", "non-default argument follows default argument")
|
||||
check_syntax_error(self, "def f(a = 5, b=1, /, c, *, d=2): pass", "non-default argument follows default argument")
|
||||
check_syntax_error(self, "def f(a = 5, b, /): pass", "non-default argument follows default argument")
|
||||
|
||||
check_syntax_error(self, "def f(*args, /): pass")
|
||||
check_syntax_error(self, "def f(*args, a, /): pass")
|
||||
check_syntax_error(self, "def f(**kwargs, /): pass")
|
||||
|
@ -44,10 +47,12 @@ class PositionalOnlyTestCase(unittest.TestCase):
|
|||
check_syntax_error(self, "def f(a, *, c, /, d, e): pass")
|
||||
|
||||
def test_invalid_syntax_errors_async(self):
|
||||
check_syntax_error(self, "async def f(a, b = 5, /, c): pass", "non-default argument follows default argument")
|
||||
check_syntax_error(self, "async def f(a = 5, b, /, c): pass", "non-default argument follows default argument")
|
||||
check_syntax_error(self, "async def f(a = 5, b=1, /, c, d=2): pass", "non-default argument follows default argument")
|
||||
check_syntax_error(self, "async def f(a = 5, b, /): pass", "non-default argument follows default argument")
|
||||
if not sys.flags.use_peg:
|
||||
check_syntax_error(self, "async def f(a, b = 5, /, c): pass", "non-default argument follows default argument")
|
||||
check_syntax_error(self, "async def f(a = 5, b, /, c): pass", "non-default argument follows default argument")
|
||||
check_syntax_error(self, "async def f(a = 5, b=1, /, c, d=2): pass", "non-default argument follows default argument")
|
||||
check_syntax_error(self, "async def f(a = 5, b, /): pass", "non-default argument follows default argument")
|
||||
|
||||
check_syntax_error(self, "async def f(*args, /): pass")
|
||||
check_syntax_error(self, "async def f(*args, a, /): pass")
|
||||
check_syntax_error(self, "async def f(**kwargs, /): pass")
|
||||
|
@ -231,9 +236,11 @@ class PositionalOnlyTestCase(unittest.TestCase):
|
|||
self.assertEqual(x(1, 2), 3)
|
||||
|
||||
def test_invalid_syntax_lambda(self):
|
||||
check_syntax_error(self, "lambda a, b = 5, /, c: None", "non-default argument follows default argument")
|
||||
check_syntax_error(self, "lambda a = 5, b, /, c: None", "non-default argument follows default argument")
|
||||
check_syntax_error(self, "lambda a = 5, b, /: None", "non-default argument follows default argument")
|
||||
if not sys.flags.use_peg:
|
||||
check_syntax_error(self, "lambda a, b = 5, /, c: None", "non-default argument follows default argument")
|
||||
check_syntax_error(self, "lambda a = 5, b, /, c: None", "non-default argument follows default argument")
|
||||
check_syntax_error(self, "lambda a = 5, b, /: None", "non-default argument follows default argument")
|
||||
|
||||
check_syntax_error(self, "lambda *args, /: None")
|
||||
check_syntax_error(self, "lambda *args, a, /: None")
|
||||
check_syntax_error(self, "lambda **kwargs, /: None")
|
||||
|
|
|
@ -119,7 +119,8 @@ class TestLiterals(unittest.TestCase):
|
|||
eval("'''\n\\z'''")
|
||||
self.assertEqual(len(w), 1)
|
||||
self.assertEqual(w[0].filename, '<string>')
|
||||
self.assertEqual(w[0].lineno, 1)
|
||||
if not sys.flags.use_peg:
|
||||
self.assertEqual(w[0].lineno, 1)
|
||||
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
warnings.simplefilter('error', category=DeprecationWarning)
|
||||
|
@ -128,7 +129,8 @@ class TestLiterals(unittest.TestCase):
|
|||
exc = cm.exception
|
||||
self.assertEqual(w, [])
|
||||
self.assertEqual(exc.filename, '<string>')
|
||||
self.assertEqual(exc.lineno, 1)
|
||||
if not sys.flags.use_peg:
|
||||
self.assertEqual(exc.lineno, 1)
|
||||
|
||||
def test_eval_str_raw(self):
|
||||
self.assertEqual(eval(""" r'x' """), 'x')
|
||||
|
@ -168,7 +170,8 @@ class TestLiterals(unittest.TestCase):
|
|||
eval("b'''\n\\z'''")
|
||||
self.assertEqual(len(w), 1)
|
||||
self.assertEqual(w[0].filename, '<string>')
|
||||
self.assertEqual(w[0].lineno, 1)
|
||||
if not sys.flags.use_peg:
|
||||
self.assertEqual(w[0].lineno, 1)
|
||||
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
warnings.simplefilter('error', category=DeprecationWarning)
|
||||
|
@ -177,7 +180,8 @@ class TestLiterals(unittest.TestCase):
|
|||
exc = cm.exception
|
||||
self.assertEqual(w, [])
|
||||
self.assertEqual(exc.filename, '<string>')
|
||||
self.assertEqual(exc.lineno, 1)
|
||||
if not sys.flags.use_peg:
|
||||
self.assertEqual(exc.lineno, 1)
|
||||
|
||||
def test_eval_bytes_raw(self):
|
||||
self.assertEqual(eval(""" br'x' """), b'x')
|
||||
|
|
|
@ -63,9 +63,10 @@ SyntaxError: cannot assign to __debug__
|
|||
Traceback (most recent call last):
|
||||
SyntaxError: cannot assign to function call
|
||||
|
||||
>>> del f()
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: cannot delete function call
|
||||
# Pegen does not support this yet
|
||||
# >>> del f()
|
||||
# Traceback (most recent call last):
|
||||
# SyntaxError: cannot delete function call
|
||||
|
||||
>>> a + 1 = 2
|
||||
Traceback (most recent call last):
|
||||
|
@ -100,29 +101,30 @@ expression inside that contain should still cause a syntax error.
|
|||
This test just checks a couple of cases rather than enumerating all of
|
||||
them.
|
||||
|
||||
>>> (a, "b", c) = (1, 2, 3)
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: cannot assign to literal
|
||||
# All of the following also produce different error messages with pegen
|
||||
# >>> (a, "b", c) = (1, 2, 3)
|
||||
# Traceback (most recent call last):
|
||||
# SyntaxError: cannot assign to literal
|
||||
|
||||
>>> (a, True, c) = (1, 2, 3)
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: cannot assign to True
|
||||
# >>> (a, True, c) = (1, 2, 3)
|
||||
# Traceback (most recent call last):
|
||||
# SyntaxError: cannot assign to True
|
||||
|
||||
>>> (a, __debug__, c) = (1, 2, 3)
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: cannot assign to __debug__
|
||||
|
||||
>>> (a, *True, c) = (1, 2, 3)
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: cannot assign to True
|
||||
# >>> (a, *True, c) = (1, 2, 3)
|
||||
# Traceback (most recent call last):
|
||||
# SyntaxError: cannot assign to True
|
||||
|
||||
>>> (a, *__debug__, c) = (1, 2, 3)
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: cannot assign to __debug__
|
||||
|
||||
>>> [a, b, c + 1] = [1, 2, 3]
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: cannot assign to operator
|
||||
# >>> [a, b, c + 1] = [1, 2, 3]
|
||||
# Traceback (most recent call last):
|
||||
# SyntaxError: cannot assign to operator
|
||||
|
||||
>>> a if 1 else b = 1
|
||||
Traceback (most recent call last):
|
||||
|
@ -186,9 +188,11 @@ SyntaxError: Generator expression must be parenthesized
|
|||
>>> f(x for x in L, **{})
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: Generator expression must be parenthesized
|
||||
>>> f(L, x for x in L)
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: Generator expression must be parenthesized
|
||||
|
||||
# >>> f(L, x for x in L)
|
||||
# Traceback (most recent call last):
|
||||
# SyntaxError: Generator expression must be parenthesized
|
||||
|
||||
>>> f(x for x in L, y for y in L)
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: Generator expression must be parenthesized
|
||||
|
@ -297,31 +301,34 @@ SyntaxError: invalid syntax
|
|||
... 290, 291, 292, 293, 294, 295, 296, 297, 298, 299) # doctest: +ELLIPSIS
|
||||
(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ..., 297, 298, 299)
|
||||
|
||||
>>> f(lambda x: x[0] = 3)
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: expression cannot contain assignment, perhaps you meant "=="?
|
||||
# >>> f(lambda x: x[0] = 3)
|
||||
# Traceback (most recent call last):
|
||||
# SyntaxError: expression cannot contain assignment, perhaps you meant "=="?
|
||||
|
||||
The grammar accepts any test (basically, any expression) in the
|
||||
keyword slot of a call site. Test a few different options.
|
||||
|
||||
>>> f(x()=2)
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: expression cannot contain assignment, perhaps you meant "=="?
|
||||
>>> f(a or b=1)
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: expression cannot contain assignment, perhaps you meant "=="?
|
||||
>>> f(x.y=1)
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: expression cannot contain assignment, perhaps you meant "=="?
|
||||
>>> f((x)=2)
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: expression cannot contain assignment, perhaps you meant "=="?
|
||||
>>> f(True=2)
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: cannot assign to True
|
||||
# >>> f(x()=2)
|
||||
# Traceback (most recent call last):
|
||||
# SyntaxError: expression cannot contain assignment, perhaps you meant "=="?
|
||||
# >>> f(a or b=1)
|
||||
# Traceback (most recent call last):
|
||||
# SyntaxError: expression cannot contain assignment, perhaps you meant "=="?
|
||||
# >>> f(x.y=1)
|
||||
# Traceback (most recent call last):
|
||||
# SyntaxError: expression cannot contain assignment, perhaps you meant "=="?
|
||||
# >>> f((x)=2)
|
||||
# Traceback (most recent call last):
|
||||
# SyntaxError: expression cannot contain assignment, perhaps you meant "=="?
|
||||
# >>> f(True=2)
|
||||
# Traceback (most recent call last):
|
||||
# SyntaxError: cannot assign to True
|
||||
>>> f(__debug__=1)
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: cannot assign to __debug__
|
||||
>>> __debug__: int
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: cannot assign to __debug__
|
||||
|
||||
|
||||
More set_context():
|
||||
|
@ -620,9 +627,9 @@ Corner-cases that used to fail to raise the correct error:
|
|||
Traceback (most recent call last):
|
||||
SyntaxError: cannot assign to __debug__
|
||||
|
||||
>>> with (lambda *:0): pass
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: named arguments must follow bare *
|
||||
# >>> with (lambda *:0): pass
|
||||
# Traceback (most recent call last):
|
||||
# SyntaxError: named arguments must follow bare *
|
||||
|
||||
Corner-cases that used to crash:
|
||||
|
||||
|
@ -637,6 +644,7 @@ Corner-cases that used to crash:
|
|||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
from test import support
|
||||
|
@ -670,6 +678,8 @@ class SyntaxTestCase(unittest.TestCase):
|
|||
def test_assign_call(self):
|
||||
self._check_error("f() = 1", "assign")
|
||||
|
||||
@unittest.skipIf(sys.flags.use_peg, "Pegen does not produce a specialized error "
|
||||
"message yet")
|
||||
def test_assign_del(self):
|
||||
self._check_error("del f()", "delete")
|
||||
|
||||
|
|
|
@ -545,10 +545,10 @@ class SysModuleTest(unittest.TestCase):
|
|||
def test_sys_flags(self):
|
||||
self.assertTrue(sys.flags)
|
||||
attrs = ("debug",
|
||||
"inspect", "interactive", "optimize", "dont_write_bytecode",
|
||||
"no_user_site", "no_site", "ignore_environment", "verbose",
|
||||
"bytes_warning", "quiet", "hash_randomization", "isolated",
|
||||
"dev_mode", "utf8_mode")
|
||||
"inspect", "interactive", "optimize", "use_peg",
|
||||
"dont_write_bytecode", "no_user_site", "no_site",
|
||||
"ignore_environment", "verbose", "bytes_warning", "quiet",
|
||||
"hash_randomization", "isolated", "dev_mode", "utf8_mode")
|
||||
for attr in attrs:
|
||||
self.assertTrue(hasattr(sys.flags, attr), attr)
|
||||
attr_type = bool if attr == "dev_mode" else int
|
||||
|
|
|
@ -656,6 +656,8 @@ class BaseExceptionReportingTests:
|
|||
self.assertIn('inner_raise() # Marker', blocks[2])
|
||||
self.check_zero_div(blocks[2])
|
||||
|
||||
@unittest.skipIf(sys.flags.use_peg,
|
||||
"Pegen is arguably better here, so no need to fix this")
|
||||
def test_syntax_error_offset_at_eol(self):
|
||||
# See #10186.
|
||||
def e():
|
||||
|
|
|
@ -218,6 +218,7 @@ def favk(
|
|||
"""
|
||||
|
||||
|
||||
@unittest.skipIf(sys.flags.use_peg, "Pegen does not support type comments yet")
|
||||
class TypeCommentTests(unittest.TestCase):
|
||||
|
||||
lowest = 4 # Lowest minor version supported
|
||||
|
|
|
@ -158,14 +158,15 @@ List comprehension element unpacking
|
|||
...
|
||||
SyntaxError: iterable unpacking cannot be used in comprehension
|
||||
|
||||
Generator expression in function arguments
|
||||
# Pegen is better here.
|
||||
# Generator expression in function arguments
|
||||
|
||||
>>> list(*x for x in (range(5) for i in range(3)))
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
list(*x for x in (range(5) for i in range(3)))
|
||||
^
|
||||
SyntaxError: invalid syntax
|
||||
# >>> list(*x for x in (range(5) for i in range(3)))
|
||||
# Traceback (most recent call last):
|
||||
# ...
|
||||
# list(*x for x in (range(5) for i in range(3)))
|
||||
# ^
|
||||
# SyntaxError: invalid syntax
|
||||
|
||||
>>> dict(**x for x in [{1:2}])
|
||||
Traceback (most recent call last):
|
||||
|
|
|
@ -6,6 +6,7 @@ import pathlib
|
|||
import random
|
||||
import tokenize
|
||||
import ast
|
||||
import sys
|
||||
|
||||
|
||||
def read_pyfile(filename):
|
||||
|
@ -327,6 +328,7 @@ class UnparseTestCase(ASTTestCase):
|
|||
ast.Constant(value=(1, 2, 3), kind=None), "(1, 2, 3)"
|
||||
)
|
||||
|
||||
@unittest.skipIf(sys.flags.use_peg, "Pegen does not support type annotation yet")
|
||||
def test_function_type(self):
|
||||
for function_type in (
|
||||
"() -> int",
|
||||
|
|
|
@ -244,7 +244,7 @@ LIBOBJS= @LIBOBJS@
|
|||
PYTHON= python$(EXE)
|
||||
BUILDPYTHON= python$(BUILDEXE)
|
||||
|
||||
PYTHON_FOR_REGEN=@PYTHON_FOR_REGEN@
|
||||
PYTHON_FOR_REGEN?=@PYTHON_FOR_REGEN@
|
||||
UPDATE_FILE=@PYTHON_FOR_REGEN@ $(srcdir)/Tools/scripts/update_file.py
|
||||
PYTHON_FOR_BUILD=@PYTHON_FOR_BUILD@
|
||||
_PYTHON_HOST_PLATFORM=@_PYTHON_HOST_PLATFORM@
|
||||
|
@ -295,6 +295,19 @@ LIBFFI_INCLUDEDIR= @LIBFFI_INCLUDEDIR@
|
|||
|
||||
##########################################################################
|
||||
# Parser
|
||||
|
||||
PEGEN_OBJS= \
|
||||
Parser/pegen/pegen.o \
|
||||
Parser/pegen/parse.o \
|
||||
Parser/pegen/parse_string.o \
|
||||
Parser/pegen/peg_api.o
|
||||
|
||||
|
||||
PEGEN_HEADERS= \
|
||||
$(srcdir)/Include/pegen_interface.h \
|
||||
$(srcdir)/Parser/pegen/pegen.h \
|
||||
$(srcdir)/Parser/pegen/parse_string.h
|
||||
|
||||
POBJS= \
|
||||
Parser/acceler.o \
|
||||
Parser/grammar1.o \
|
||||
|
@ -303,9 +316,10 @@ POBJS= \
|
|||
Parser/parser.o \
|
||||
Parser/token.o \
|
||||
|
||||
PARSER_OBJS= $(POBJS) Parser/myreadline.o Parser/parsetok.o Parser/tokenizer.o
|
||||
PARSER_OBJS= $(POBJS) $(PEGEN_OBJS) Parser/myreadline.o Parser/parsetok.o Parser/tokenizer.o
|
||||
|
||||
PARSER_HEADERS= \
|
||||
$(PEGEN_HEADERS) \
|
||||
$(srcdir)/Include/grammar.h \
|
||||
$(srcdir)/Include/parsetok.h \
|
||||
$(srcdir)/Parser/parser.h \
|
||||
|
@ -731,7 +745,7 @@ regen-importlib: Programs/_freeze_importlib
|
|||
############################################################################
|
||||
# Regenerate all generated files
|
||||
|
||||
regen-all: regen-opcode regen-opcode-targets regen-typeslots regen-grammar \
|
||||
regen-all: regen-opcode regen-opcode-targets regen-typeslots regen-grammar regen-pegen \
|
||||
regen-token regen-keyword regen-symbol regen-ast regen-importlib clinic
|
||||
|
||||
############################################################################
|
||||
|
@ -806,6 +820,12 @@ regen-grammar: regen-token
|
|||
$(UPDATE_FILE) $(srcdir)/Include/graminit.h $(srcdir)/Include/graminit.h.new
|
||||
$(UPDATE_FILE) $(srcdir)/Python/graminit.c $(srcdir)/Python/graminit.c.new
|
||||
|
||||
.PHONY: regen-pegen
|
||||
regen-pegen:
|
||||
PYTHONPATH=$(srcdir)/Tools/peg_generator $(PYTHON_FOR_REGEN) -m pegen -c -q $(srcdir)/Grammar/python.gram \
|
||||
-o $(srcdir)/Parser/pegen/parse.new.c
|
||||
$(UPDATE_FILE) $(srcdir)/Parser/pegen/parse.c $(srcdir)/Parser/pegen/parse.new.c
|
||||
|
||||
.PHONY=regen-ast
|
||||
regen-ast:
|
||||
# Regenerate Include/Python-ast.h using Parser/asdl_c.py -h
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
Switch to a new parser, based on PEG. For more details see PEP 617. To
|
||||
temporarily switch back to the old parser, use ``-X oldparser`` or
|
||||
``PYTHONOLDPARSER=1``. In Python 3.10 we will remove the old parser
|
||||
completely, including the ``parser`` module (already deprecated) and
|
||||
anything that depends on it.
|
|
@ -134,6 +134,9 @@ faulthandler faulthandler.c
|
|||
# can call _PyTraceMalloc_NewReference().
|
||||
_tracemalloc _tracemalloc.c hashtable.c
|
||||
|
||||
# PEG-based parser module -- slated to be *the* parser
|
||||
_peg_parser _peg_parser.c
|
||||
|
||||
# The rest of the modules listed in this file are all commented out by
|
||||
# default. Usually they can be detected and built as dynamically
|
||||
# loaded modules by the new setup.py script added in Python 2.1. If
|
||||
|
|
|
@ -0,0 +1,107 @@
|
|||
#include <Python.h>
|
||||
#include <pegen_interface.h>
|
||||
|
||||
PyObject *
|
||||
_Py_parse_file(PyObject *self, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
static char *keywords[] = {"file", "mode", NULL};
|
||||
char *filename;
|
||||
char *mode_str = "exec";
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|s", keywords, &filename, &mode_str)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int mode;
|
||||
if (strcmp(mode_str, "exec") == 0) {
|
||||
mode = Py_file_input;
|
||||
}
|
||||
else if (strcmp(mode_str, "single") == 0) {
|
||||
mode = Py_single_input;
|
||||
}
|
||||
else {
|
||||
return PyErr_Format(PyExc_ValueError, "mode must be either 'exec' or 'single'");
|
||||
}
|
||||
|
||||
PyArena *arena = PyArena_New();
|
||||
if (arena == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyObject *result = NULL;
|
||||
|
||||
mod_ty res = PyPegen_ASTFromFile(filename, mode, arena);
|
||||
if (res == NULL) {
|
||||
goto error;
|
||||
}
|
||||
result = PyAST_mod2obj(res);
|
||||
|
||||
error:
|
||||
PyArena_Free(arena);
|
||||
return result;
|
||||
}
|
||||
|
||||
PyObject *
|
||||
_Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
static char *keywords[] = {"string", "mode", NULL};
|
||||
char *the_string;
|
||||
char *mode_str = "exec";
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|s", keywords, &the_string, &mode_str)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int mode;
|
||||
if (strcmp(mode_str, "exec") == 0) {
|
||||
mode = Py_file_input;
|
||||
}
|
||||
else if (strcmp(mode_str, "eval") == 0) {
|
||||
mode = Py_eval_input;
|
||||
}
|
||||
else if (strcmp(mode_str, "single") == 0) {
|
||||
mode = Py_single_input;
|
||||
}
|
||||
else {
|
||||
return PyErr_Format(PyExc_ValueError, "mode must be either 'exec' or 'eval' or 'single'");
|
||||
}
|
||||
|
||||
PyArena *arena = PyArena_New();
|
||||
if (arena == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyObject *result = NULL;
|
||||
|
||||
PyCompilerFlags flags = _PyCompilerFlags_INIT;
|
||||
flags.cf_flags = PyCF_IGNORE_COOKIE;
|
||||
|
||||
mod_ty res = PyPegen_ASTFromString(the_string, mode, &flags, arena);
|
||||
if (res == NULL) {
|
||||
goto error;
|
||||
}
|
||||
result = PyAST_mod2obj(res);
|
||||
|
||||
error:
|
||||
PyArena_Free(arena);
|
||||
return result;
|
||||
}
|
||||
|
||||
static PyMethodDef ParseMethods[] = {
|
||||
{"parse_file", (PyCFunction)(void (*)(void))_Py_parse_file, METH_VARARGS|METH_KEYWORDS, "Parse a file."},
|
||||
{"parse_string", (PyCFunction)(void (*)(void))_Py_parse_string, METH_VARARGS|METH_KEYWORDS,"Parse a string."},
|
||||
{NULL, NULL, 0, NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
static struct PyModuleDef parsemodule = {
|
||||
PyModuleDef_HEAD_INIT,
|
||||
.m_name = "peg_parser",
|
||||
.m_doc = "A parser.",
|
||||
.m_methods = ParseMethods,
|
||||
};
|
||||
|
||||
PyMODINIT_FUNC
|
||||
PyInit__peg_parser(void)
|
||||
{
|
||||
return PyModule_Create(&parsemodule);
|
||||
}
|
|
@ -75,6 +75,8 @@ extern PyObject* PyInit__opcode(void);
|
|||
|
||||
extern PyObject* PyInit__contextvars(void);
|
||||
|
||||
extern PyObject* PyInit__peg_parser(void);
|
||||
|
||||
/* tools/freeze/makeconfig.py marker for additional "extern" */
|
||||
/* -- ADDMODULE MARKER 1 -- */
|
||||
|
||||
|
@ -169,6 +171,7 @@ struct _inittab _PyImport_Inittab[] = {
|
|||
{"_opcode", PyInit__opcode},
|
||||
|
||||
{"_contextvars", PyInit__contextvars},
|
||||
{"_peg_parser", PyInit__peg_parser},
|
||||
|
||||
/* Sentinel */
|
||||
{0, 0}
|
||||
|
|
|
@ -213,6 +213,8 @@
|
|||
<ClInclude Include="..\Include\parsetok.h" />
|
||||
<ClInclude Include="..\Include\patchlevel.h" />
|
||||
<ClInclude Include="..\Include\picklebufobject.h" />
|
||||
<ClInclude Include="..\Include\pegen_interface.h" />
|
||||
<ClInclude Include="..\Include\pyhash.h" />
|
||||
<ClInclude Include="..\Include\pyhash.h" />
|
||||
<ClInclude Include="..\Include\py_curses.h" />
|
||||
<ClInclude Include="..\Include\pyarena.h" />
|
||||
|
@ -276,6 +278,8 @@
|
|||
<ClInclude Include="..\Objects\unicodetype_db.h" />
|
||||
<ClInclude Include="..\Parser\parser.h" />
|
||||
<ClInclude Include="..\Parser\tokenizer.h" />
|
||||
<ClInclude Include="..\Parser\pegen\parse_string.h" />
|
||||
<ClInclude Include="..\Parser\pegen\pegen.h" />
|
||||
<ClInclude Include="..\PC\errmap.h" />
|
||||
<ClInclude Include="..\PC\pyconfig.h" />
|
||||
<ClInclude Include="..\Python\ceval_gil.h" />
|
||||
|
@ -338,6 +342,7 @@
|
|||
<ClCompile Include="..\Modules\_opcode.c" />
|
||||
<ClCompile Include="..\Modules\_operator.c" />
|
||||
<ClCompile Include="..\Modules\parsermodule.c" />
|
||||
<ClCompile Include="..\Modules\_peg_parser.c" />
|
||||
<ClCompile Include="..\Modules\posixmodule.c" />
|
||||
<ClCompile Include="..\Modules\rotatingtree.c" />
|
||||
<ClCompile Include="..\Modules\sha1module.c" />
|
||||
|
@ -419,6 +424,10 @@
|
|||
<ClCompile Include="..\Parser\parsetok.c" />
|
||||
<ClCompile Include="..\Parser\tokenizer.c" />
|
||||
<ClCompile Include="..\Parser\token.c" />
|
||||
<ClCompile Include="..\Parser\pegen\pegen.c" />
|
||||
<ClCompile Include="..\Parser\pegen\parse.c" />
|
||||
<ClCompile Include="..\Parser\pegen\parse_string.c" />
|
||||
<ClCompile Include="..\Parser\pegen\peg_api.c" />
|
||||
<ClCompile Include="..\PC\invalid_parameter_handler.c" />
|
||||
<ClCompile Include="..\PC\winreg.c" />
|
||||
<ClCompile Include="..\PC\config.c" />
|
||||
|
|
|
@ -902,6 +902,18 @@
|
|||
<ClCompile Include="..\Parser\grammar1.c">
|
||||
<Filter>Parser</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Parser\pegen\pegen.c">
|
||||
<Filter>Parser</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Parser\pegen\parse.c">
|
||||
<Filter>Parser</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Parser\pegen\parse_string.c">
|
||||
<Filter>Parser</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Parser\pegen\peg_api.c">
|
||||
<Filter>Parser</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Parser\listnode.c">
|
||||
<Filter>Parser</Filter>
|
||||
</ClCompile>
|
||||
|
|
|
@ -166,6 +166,14 @@
|
|||
</Copy>
|
||||
<Warning Text="Grammar updated. You will need to rebuild pythoncore to see the changes." Condition="'@(_UpdatedH)' != '' and '@(_UpdatedC)' != ''" />
|
||||
</Target>
|
||||
<Target Name="_RegenPegen" BeforeTargets="Build">
|
||||
<!-- Regenerate Parser/pegen/parse.c -->
|
||||
<Exec Command=""$PYTHONPATH=$(srcdir)/Tools/peg_generator" "$(PythonExe)" -m pegen -c -q "$(PySourcePath)Grammar\python.gram" -o "$(IntDir)parse.c"" />
|
||||
<Copy SourceFiles="$(IntDir)parse.c" DestinationFiles="$(PySourcePath)Parser\pegen\parse.c">
|
||||
<Output TaskParameter="CopiedFiles" ItemName="_UpdatedParse" />
|
||||
</Copy>
|
||||
<Warning Text="Pegen updated. You will need to rebuild pythoncore to see the changes." Condition="'@(_UpdatedParse)' != ''" />
|
||||
</Target>
|
||||
<Target Name="_RegenAST_H" AfterTargets="_RegenGrammar">
|
||||
<!-- Regenerate Include/Python-ast.h using Parser/asdl_c.py -h -->
|
||||
<Exec Command=""$(PythonExe)" "$(PySourcePath)Parser\asdl_c.py" -h "$(IntDir)Python-ast.h" "$(PySourcePath)Parser\Python.asdl"" />
|
||||
|
@ -222,4 +230,4 @@
|
|||
<Clean Include="$(IntDir)graminit.c.new" />
|
||||
</ItemGroup>
|
||||
</Target>
|
||||
</Project>
|
||||
</Project>
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,46 @@
|
|||
#ifndef STRINGS_H
|
||||
#define STRINGS_H
|
||||
|
||||
#include <Python.h>
|
||||
#include <Python-ast.h>
|
||||
#include "pegen.h"
|
||||
|
||||
#define EXPRLIST_N_CACHED 64
|
||||
|
||||
typedef struct {
|
||||
/* Incrementally build an array of expr_ty, so be used in an
|
||||
asdl_seq. Cache some small but reasonably sized number of
|
||||
expr_ty's, and then after that start dynamically allocating,
|
||||
doubling the number allocated each time. Note that the f-string
|
||||
f'{0}a{1}' contains 3 expr_ty's: 2 FormattedValue's, and one
|
||||
Constant for the literal 'a'. So you add expr_ty's about twice as
|
||||
fast as you add expressions in an f-string. */
|
||||
|
||||
Py_ssize_t allocated; /* Number we've allocated. */
|
||||
Py_ssize_t size; /* Number we've used. */
|
||||
expr_ty *p; /* Pointer to the memory we're actually
|
||||
using. Will point to 'data' until we
|
||||
start dynamically allocating. */
|
||||
expr_ty data[EXPRLIST_N_CACHED];
|
||||
} ExprList;
|
||||
|
||||
/* The FstringParser is designed to add a mix of strings and
|
||||
f-strings, and concat them together as needed. Ultimately, it
|
||||
generates an expr_ty. */
|
||||
typedef struct {
|
||||
PyObject *last_str;
|
||||
ExprList expr_list;
|
||||
int fmode;
|
||||
} FstringParser;
|
||||
|
||||
void _PyPegen_FstringParser_Init(FstringParser *);
|
||||
int _PyPegen_parsestr(Parser *, const char *, int *, int *, PyObject **,
|
||||
const char **, Py_ssize_t *);
|
||||
int _PyPegen_FstringParser_ConcatFstring(Parser *, FstringParser *, const char **,
|
||||
const char *, int, int, Token *, Token *,
|
||||
Token *);
|
||||
int _PyPegen_FstringParser_ConcatAndDel(FstringParser *, PyObject *);
|
||||
expr_ty _PyPegen_FstringParser_Finish(Parser *, FstringParser *, Token *, Token *);
|
||||
void _PyPegen_FstringParser_Dealloc(FstringParser *);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,134 @@
|
|||
#include <pegen_interface.h>
|
||||
|
||||
#include "../tokenizer.h"
|
||||
#include "pegen.h"
|
||||
|
||||
mod_ty
|
||||
PyPegen_ASTFromString(const char *str, int mode, PyCompilerFlags *flags, PyArena *arena)
|
||||
{
|
||||
PyObject *filename_ob = PyUnicode_FromString("<string>");
|
||||
if (filename_ob == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
mod_ty result = PyPegen_ASTFromStringObject(str, filename_ob, mode, flags, arena);
|
||||
Py_XDECREF(filename_ob);
|
||||
return result;
|
||||
}
|
||||
|
||||
mod_ty
|
||||
PyPegen_ASTFromStringObject(const char *str, PyObject* filename, int mode, PyCompilerFlags *flags, PyArena *arena)
|
||||
{
|
||||
if (PySys_Audit("compile", "yO", str, filename) < 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int iflags = flags != NULL ? flags->cf_flags : PyCF_IGNORE_COOKIE;
|
||||
mod_ty result = _PyPegen_run_parser_from_string(str, mode, filename, iflags, arena);
|
||||
return result;
|
||||
}
|
||||
|
||||
mod_ty
|
||||
PyPegen_ASTFromFile(const char *filename, int mode, PyArena *arena)
|
||||
{
|
||||
PyObject *filename_ob = PyUnicode_FromString(filename);
|
||||
if (filename_ob == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
mod_ty result = _PyPegen_run_parser_from_file(filename, mode, filename_ob, arena);
|
||||
Py_XDECREF(filename_ob);
|
||||
return result;
|
||||
}
|
||||
|
||||
mod_ty
|
||||
PyPegen_ASTFromFileObject(FILE *fp, PyObject *filename_ob, int mode,
|
||||
const char *enc, const char *ps1, const char* ps2,
|
||||
int *errcode, PyArena *arena)
|
||||
{
|
||||
if (PySys_Audit("compile", "OO", Py_None, filename_ob) < 0) {
|
||||
return NULL;
|
||||
}
|
||||
return _PyPegen_run_parser_from_file_pointer(fp, mode, filename_ob, enc, ps1, ps2,
|
||||
errcode, arena);
|
||||
}
|
||||
|
||||
PyCodeObject *
|
||||
PyPegen_CodeObjectFromString(const char *str, int mode, PyCompilerFlags *flags)
|
||||
{
|
||||
PyArena *arena = PyArena_New();
|
||||
if (arena == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyCodeObject *result = NULL;
|
||||
|
||||
PyObject *filename_ob = PyUnicode_FromString("<string>");
|
||||
if (filename_ob == NULL) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
mod_ty res = PyPegen_ASTFromString(str, mode, flags, arena);
|
||||
if (res == NULL) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
result = PyAST_CompileObject(res, filename_ob, NULL, -1, arena);
|
||||
|
||||
error:
|
||||
Py_XDECREF(filename_ob);
|
||||
PyArena_Free(arena);
|
||||
return result;
|
||||
}
|
||||
|
||||
PyCodeObject *
|
||||
PyPegen_CodeObjectFromFile(const char *filename, int mode)
|
||||
{
|
||||
PyArena *arena = PyArena_New();
|
||||
if (arena == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyCodeObject *result = NULL;
|
||||
|
||||
PyObject *filename_ob = PyUnicode_FromString(filename);
|
||||
if (filename_ob == NULL) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
mod_ty res = PyPegen_ASTFromFile(filename, mode, arena);
|
||||
if (res == NULL) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
result = PyAST_CompileObject(res, filename_ob, NULL, -1, arena);
|
||||
|
||||
error:
|
||||
Py_XDECREF(filename_ob);
|
||||
PyArena_Free(arena);
|
||||
return result;
|
||||
}
|
||||
|
||||
PyCodeObject *
|
||||
PyPegen_CodeObjectFromFileObject(FILE *fp, PyObject *filename_ob, int mode,
|
||||
const char *ps1, const char *ps2, const char *enc,
|
||||
int *errcode)
|
||||
{
|
||||
PyArena *arena = PyArena_New();
|
||||
if (arena == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyCodeObject *result = NULL;
|
||||
|
||||
mod_ty res = PyPegen_ASTFromFileObject(fp, filename_ob, mode, enc, ps1, ps2,
|
||||
errcode, arena);
|
||||
if (res == NULL) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
result = PyAST_CompileObject(res, filename_ob, NULL, -1, arena);
|
||||
|
||||
error:
|
||||
PyArena_Free(arena);
|
||||
return result;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,179 @@
|
|||
#ifndef PEGEN_H
|
||||
#define PEGEN_H
|
||||
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include <Python.h>
|
||||
#include <token.h>
|
||||
#include <Python-ast.h>
|
||||
#include <pyarena.h>
|
||||
|
||||
typedef struct _memo {
|
||||
int type;
|
||||
void *node;
|
||||
int mark;
|
||||
struct _memo *next;
|
||||
} Memo;
|
||||
|
||||
typedef struct {
|
||||
int type;
|
||||
PyObject *bytes;
|
||||
int lineno, col_offset, end_lineno, end_col_offset;
|
||||
Memo *memo;
|
||||
} Token;
|
||||
|
||||
typedef struct {
|
||||
char *str;
|
||||
int type;
|
||||
} KeywordToken;
|
||||
|
||||
typedef struct {
|
||||
struct tok_state *tok;
|
||||
Token **tokens;
|
||||
int mark;
|
||||
int fill, size;
|
||||
PyArena *arena;
|
||||
KeywordToken **keywords;
|
||||
int n_keyword_lists;
|
||||
int start_rule;
|
||||
int *errcode;
|
||||
int parsing_started;
|
||||
PyObject* normalize;
|
||||
int starting_lineno;
|
||||
int starting_col_offset;
|
||||
int error_indicator;
|
||||
} Parser;
|
||||
|
||||
typedef struct {
|
||||
cmpop_ty cmpop;
|
||||
expr_ty expr;
|
||||
} CmpopExprPair;
|
||||
|
||||
typedef struct {
|
||||
expr_ty key;
|
||||
expr_ty value;
|
||||
} KeyValuePair;
|
||||
|
||||
typedef struct {
|
||||
arg_ty arg;
|
||||
expr_ty value;
|
||||
} NameDefaultPair;
|
||||
|
||||
typedef struct {
|
||||
asdl_seq *plain_names;
|
||||
asdl_seq *names_with_defaults; // asdl_seq* of NameDefaultsPair's
|
||||
} SlashWithDefault;
|
||||
|
||||
typedef struct {
|
||||
arg_ty vararg;
|
||||
asdl_seq *kwonlyargs; // asdl_seq* of NameDefaultsPair's
|
||||
arg_ty kwarg;
|
||||
} StarEtc;
|
||||
|
||||
typedef struct {
|
||||
operator_ty kind;
|
||||
} AugOperator;
|
||||
|
||||
typedef struct {
|
||||
void *element;
|
||||
int is_keyword;
|
||||
} KeywordOrStarred;
|
||||
|
||||
void _PyPegen_clear_memo_statistics(void);
|
||||
PyObject *_PyPegen_get_memo_statistics(void);
|
||||
|
||||
int _PyPegen_insert_memo(Parser *p, int mark, int type, void *node);
|
||||
int _PyPegen_update_memo(Parser *p, int mark, int type, void *node);
|
||||
int _PyPegen_is_memoized(Parser *p, int type, void *pres);
|
||||
|
||||
int _PyPegen_lookahead_with_string(int, void *(func)(Parser *, const char *), Parser *, const char *);
|
||||
int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int);
|
||||
int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
|
||||
|
||||
Token *_PyPegen_expect_token(Parser *p, int type);
|
||||
Token *_PyPegen_get_last_nonnwhitespace_token(Parser *);
|
||||
int _PyPegen_fill_token(Parser *p);
|
||||
void *_PyPegen_async_token(Parser *p);
|
||||
void *_PyPegen_await_token(Parser *p);
|
||||
void *_PyPegen_endmarker_token(Parser *p);
|
||||
expr_ty _PyPegen_name_token(Parser *p);
|
||||
void *_PyPegen_newline_token(Parser *p);
|
||||
void *_PyPegen_indent_token(Parser *p);
|
||||
void *_PyPegen_dedent_token(Parser *p);
|
||||
expr_ty _PyPegen_number_token(Parser *p);
|
||||
void *_PyPegen_string_token(Parser *p);
|
||||
const char *_PyPegen_get_expr_name(expr_ty);
|
||||
void *_PyPegen_raise_error(Parser *p, PyObject *, const char *errmsg, ...);
|
||||
void *_PyPegen_dummy_name(Parser *p, ...);
|
||||
|
||||
#define UNUSED(expr) do { (void)(expr); } while (0)
|
||||
#define EXTRA_EXPR(head, tail) head->lineno, head->col_offset, tail->end_lineno, tail->end_col_offset, p->arena
|
||||
#define EXTRA start_lineno, start_col_offset, end_lineno, end_col_offset, p->arena
|
||||
#define RAISE_SYNTAX_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, msg, ##__VA_ARGS__)
|
||||
#define RAISE_INDENTATION_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_IndentationError, msg, ##__VA_ARGS__)
|
||||
|
||||
Py_LOCAL_INLINE(void *)
|
||||
CHECK_CALL(Parser *p, void *result)
|
||||
{
|
||||
if (result == NULL) {
|
||||
assert(PyErr_Occurred());
|
||||
p->error_indicator = 1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* This is needed for helper functions that are allowed to
|
||||
return NULL without an error. Example: _PyPegen_seq_extract_starred_exprs */
|
||||
Py_LOCAL_INLINE(void *)
|
||||
CHECK_CALL_NULL_ALLOWED(Parser *p, void *result)
|
||||
{
|
||||
if (result == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
#define CHECK(result) CHECK_CALL(p, result)
|
||||
#define CHECK_NULL_ALLOWED(result) CHECK_CALL_NULL_ALLOWED(p, result)
|
||||
|
||||
PyObject *_PyPegen_new_identifier(Parser *, char *);
|
||||
Parser *_PyPegen_Parser_New(struct tok_state *, int, int *, PyArena *);
|
||||
void _PyPegen_Parser_Free(Parser *);
|
||||
mod_ty _PyPegen_run_parser_from_file_pointer(FILE *, int, PyObject *, const char *,
|
||||
const char *, const char *, int *, PyArena *);
|
||||
void *_PyPegen_run_parser(Parser *);
|
||||
mod_ty _PyPegen_run_parser_from_file(const char *, int, PyObject *, PyArena *);
|
||||
mod_ty _PyPegen_run_parser_from_string(const char *, int, PyObject *, int, PyArena *);
|
||||
void *_PyPegen_interactive_exit(Parser *);
|
||||
asdl_seq *_PyPegen_singleton_seq(Parser *, void *);
|
||||
asdl_seq *_PyPegen_seq_insert_in_front(Parser *, void *, asdl_seq *);
|
||||
asdl_seq *_PyPegen_seq_flatten(Parser *, asdl_seq *);
|
||||
expr_ty _PyPegen_join_names_with_dot(Parser *, expr_ty, expr_ty);
|
||||
int _PyPegen_seq_count_dots(asdl_seq *);
|
||||
alias_ty _PyPegen_alias_for_star(Parser *);
|
||||
asdl_seq *_PyPegen_map_names_to_ids(Parser *, asdl_seq *);
|
||||
CmpopExprPair *_PyPegen_cmpop_expr_pair(Parser *, cmpop_ty, expr_ty);
|
||||
asdl_int_seq *_PyPegen_get_cmpops(Parser *p, asdl_seq *);
|
||||
asdl_seq *_PyPegen_get_exprs(Parser *, asdl_seq *);
|
||||
expr_ty _PyPegen_set_expr_context(Parser *, expr_ty, expr_context_ty);
|
||||
KeyValuePair *_PyPegen_key_value_pair(Parser *, expr_ty, expr_ty);
|
||||
asdl_seq *_PyPegen_get_keys(Parser *, asdl_seq *);
|
||||
asdl_seq *_PyPegen_get_values(Parser *, asdl_seq *);
|
||||
NameDefaultPair *_PyPegen_name_default_pair(Parser *, arg_ty, expr_ty);
|
||||
SlashWithDefault *_PyPegen_slash_with_default(Parser *, asdl_seq *, asdl_seq *);
|
||||
StarEtc *_PyPegen_star_etc(Parser *, arg_ty, asdl_seq *, arg_ty);
|
||||
arguments_ty _PyPegen_make_arguments(Parser *, asdl_seq *, SlashWithDefault *,
|
||||
asdl_seq *, asdl_seq *, StarEtc *);
|
||||
arguments_ty _PyPegen_empty_arguments(Parser *);
|
||||
AugOperator *_PyPegen_augoperator(Parser*, operator_ty type);
|
||||
stmt_ty _PyPegen_function_def_decorators(Parser *, asdl_seq *, stmt_ty);
|
||||
stmt_ty _PyPegen_class_def_decorators(Parser *, asdl_seq *, stmt_ty);
|
||||
KeywordOrStarred *_PyPegen_keyword_or_starred(Parser *, void *, int);
|
||||
asdl_seq *_PyPegen_seq_extract_starred_exprs(Parser *, asdl_seq *);
|
||||
asdl_seq *_PyPegen_seq_delete_starred_exprs(Parser *, asdl_seq *);
|
||||
expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_seq *);
|
||||
asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *);
|
||||
void *_PyPegen_arguments_parsing_error(Parser *, expr_ty);
|
||||
|
||||
void *_PyPegen_parse(Parser *);
|
||||
|
||||
#endif
|
|
@ -485,6 +485,9 @@ static int test_init_from_config(void)
|
|||
|
||||
config.install_signal_handlers = 0;
|
||||
|
||||
putenv("PYTHONOLDPARSER=");
|
||||
config.use_peg = 0;
|
||||
|
||||
/* FIXME: test use_environment */
|
||||
|
||||
putenv("PYTHONHASHSEED=42");
|
||||
|
|
|
@ -563,7 +563,8 @@ astfold_expr(expr_ty node_, PyArena *ctx_, _PyASTOptimizeState *state)
|
|||
CALL(fold_tuple, expr_ty, node_);
|
||||
break;
|
||||
case Name_kind:
|
||||
if (_PyUnicode_EqualToASCIIString(node_->v.Name.id, "__debug__")) {
|
||||
if (node_->v.Name.ctx == Load &&
|
||||
_PyUnicode_EqualToASCIIString(node_->v.Name.id, "__debug__")) {
|
||||
return make_const(node_, PyBool_FromLong(!state->optimize), ctx_);
|
||||
}
|
||||
break;
|
||||
|
|
|
@ -816,7 +816,12 @@ builtin_compile_impl(PyObject *module, PyObject *source, PyObject *filename,
|
|||
if (str == NULL)
|
||||
goto error;
|
||||
|
||||
int current_use_peg = PyInterpreterState_Get()->config.use_peg;
|
||||
if (flags & PyCF_TYPE_COMMENTS || feature_version >= 0) {
|
||||
PyInterpreterState_Get()->config.use_peg = 0;
|
||||
}
|
||||
result = Py_CompileStringObject(str, filename, start[compile_mode], &cf, optimize);
|
||||
PyInterpreterState_Get()->config.use_peg = current_use_peg;
|
||||
Py_XDECREF(source_copy);
|
||||
goto finally;
|
||||
|
||||
|
|
|
@ -2152,6 +2152,55 @@ compiler_default_arguments(struct compiler *c, arguments_ty args)
|
|||
return funcflags;
|
||||
}
|
||||
|
||||
static int
|
||||
forbidden_name(struct compiler *c, identifier name, expr_context_ty ctx)
|
||||
{
|
||||
|
||||
if (ctx == Store && _PyUnicode_EqualToASCIIString(name, "__debug__")) {
|
||||
compiler_error(c, "cannot assign to __debug__");
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
compiler_check_debug_one_arg(struct compiler *c, arg_ty arg)
|
||||
{
|
||||
if (arg != NULL) {
|
||||
if (forbidden_name(c, arg->arg, Store))
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
compiler_check_debug_args_seq(struct compiler *c, asdl_seq *args)
|
||||
{
|
||||
if (args != NULL) {
|
||||
for (int i = 0, n = asdl_seq_LEN(args); i < n; i++) {
|
||||
if (!compiler_check_debug_one_arg(c, asdl_seq_GET(args, i)))
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
compiler_check_debug_args(struct compiler *c, arguments_ty args)
|
||||
{
|
||||
if (!compiler_check_debug_args_seq(c, args->posonlyargs))
|
||||
return 0;
|
||||
if (!compiler_check_debug_args_seq(c, args->args))
|
||||
return 0;
|
||||
if (!compiler_check_debug_one_arg(c, args->vararg))
|
||||
return 0;
|
||||
if (!compiler_check_debug_args_seq(c, args->kwonlyargs))
|
||||
return 0;
|
||||
if (!compiler_check_debug_one_arg(c, args->kwarg))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
compiler_function(struct compiler *c, stmt_ty s, int is_async)
|
||||
{
|
||||
|
@ -2189,6 +2238,9 @@ compiler_function(struct compiler *c, stmt_ty s, int is_async)
|
|||
scope_type = COMPILER_SCOPE_FUNCTION;
|
||||
}
|
||||
|
||||
if (!compiler_check_debug_args(c, args))
|
||||
return 0;
|
||||
|
||||
if (!compiler_decorators(c, decos))
|
||||
return 0;
|
||||
|
||||
|
@ -2596,6 +2648,9 @@ compiler_lambda(struct compiler *c, expr_ty e)
|
|||
arguments_ty args = e->v.Lambda.args;
|
||||
assert(e->kind == Lambda_kind);
|
||||
|
||||
if (!compiler_check_debug_args(c, args))
|
||||
return 0;
|
||||
|
||||
if (!name) {
|
||||
name = PyUnicode_InternFromString("<lambda>");
|
||||
if (!name)
|
||||
|
@ -3505,6 +3560,9 @@ compiler_nameop(struct compiler *c, identifier name, expr_context_ty ctx)
|
|||
!_PyUnicode_EqualToASCIIString(name, "True") &&
|
||||
!_PyUnicode_EqualToASCIIString(name, "False"));
|
||||
|
||||
if (forbidden_name(c, name, ctx))
|
||||
return 0;
|
||||
|
||||
mangled = _Py_Mangle(c->u->u_private, name);
|
||||
if (!mangled)
|
||||
return 0;
|
||||
|
@ -4056,6 +4114,9 @@ validate_keywords(struct compiler *c, asdl_seq *keywords)
|
|||
if (key->arg == NULL) {
|
||||
continue;
|
||||
}
|
||||
if (forbidden_name(c, key->arg, Store)) {
|
||||
return -1;
|
||||
}
|
||||
for (Py_ssize_t j = i + 1; j < nkeywords; j++) {
|
||||
keyword_ty other = ((keyword_ty)asdl_seq_GET(keywords, j));
|
||||
if (other->arg && !PyUnicode_Compare(key->arg, other->arg)) {
|
||||
|
@ -5013,6 +5074,8 @@ compiler_visit_expr1(struct compiler *c, expr_ty e)
|
|||
ADDOP_NAME(c, LOAD_ATTR, e->v.Attribute.attr, names);
|
||||
break;
|
||||
case Store:
|
||||
if (forbidden_name(c, e->v.Attribute.attr, e->v.Attribute.ctx))
|
||||
return 0;
|
||||
ADDOP_NAME(c, STORE_ATTR, e->v.Attribute.attr, names);
|
||||
break;
|
||||
case Del:
|
||||
|
@ -5183,6 +5246,8 @@ compiler_annassign(struct compiler *c, stmt_ty s)
|
|||
}
|
||||
switch (targ->kind) {
|
||||
case Name_kind:
|
||||
if (forbidden_name(c, targ->v.Name.id, Store))
|
||||
return 0;
|
||||
/* If we have a simple name in a module or class, store annotation. */
|
||||
if (s->v.AnnAssign.simple &&
|
||||
(c->u->u_scope_type == COMPILER_SCOPE_MODULE ||
|
||||
|
@ -5200,6 +5265,8 @@ compiler_annassign(struct compiler *c, stmt_ty s)
|
|||
}
|
||||
break;
|
||||
case Attribute_kind:
|
||||
if (forbidden_name(c, targ->v.Attribute.attr, Store))
|
||||
return 0;
|
||||
if (!s->v.AnnAssign.value &&
|
||||
!check_ann_expr(c, targ->v.Attribute.value)) {
|
||||
return 0;
|
||||
|
|
|
@ -1594,50 +1594,51 @@ const unsigned char _Py_M__importlib_bootstrap[] = {
|
|||
0,218,1,120,90,5,119,104,101,114,101,90,9,102,114,111,
|
||||
109,95,110,97,109,101,90,3,101,120,99,114,10,0,0,0,
|
||||
114,10,0,0,0,114,11,0,0,0,114,215,0,0,0,9,
|
||||
4,0,0,115,44,0,0,0,0,10,8,1,10,1,4,1,
|
||||
12,2,4,1,28,2,8,1,14,1,10,1,2,255,8,2,
|
||||
10,1,14,1,2,1,14,1,14,4,10,1,16,255,2,2,
|
||||
12,1,26,1,114,215,0,0,0,99,1,0,0,0,0,0,
|
||||
0,0,0,0,0,0,3,0,0,0,6,0,0,0,67,0,
|
||||
0,0,115,146,0,0,0,124,0,160,0,100,1,161,1,125,
|
||||
1,124,0,160,0,100,2,161,1,125,2,124,1,100,3,117,
|
||||
1,114,82,124,2,100,3,117,1,114,78,124,1,124,2,106,
|
||||
1,107,3,114,78,116,2,106,3,100,4,124,1,155,2,100,
|
||||
5,124,2,106,1,155,2,100,6,157,5,116,4,100,7,100,
|
||||
8,141,3,1,0,124,1,83,0,124,2,100,3,117,1,114,
|
||||
96,124,2,106,1,83,0,116,2,106,3,100,9,116,4,100,
|
||||
7,100,8,141,3,1,0,124,0,100,10,25,0,125,1,100,
|
||||
11,124,0,118,1,114,142,124,1,160,5,100,12,161,1,100,
|
||||
13,25,0,125,1,124,1,83,0,41,14,122,167,67,97,108,
|
||||
99,117,108,97,116,101,32,119,104,97,116,32,95,95,112,97,
|
||||
99,107,97,103,101,95,95,32,115,104,111,117,108,100,32,98,
|
||||
101,46,10,10,32,32,32,32,95,95,112,97,99,107,97,103,
|
||||
101,95,95,32,105,115,32,110,111,116,32,103,117,97,114,97,
|
||||
110,116,101,101,100,32,116,111,32,98,101,32,100,101,102,105,
|
||||
110,101,100,32,111,114,32,99,111,117,108,100,32,98,101,32,
|
||||
115,101,116,32,116,111,32,78,111,110,101,10,32,32,32,32,
|
||||
116,111,32,114,101,112,114,101,115,101,110,116,32,116,104,97,
|
||||
116,32,105,116,115,32,112,114,111,112,101,114,32,118,97,108,
|
||||
117,101,32,105,115,32,117,110,107,110,111,119,110,46,10,10,
|
||||
32,32,32,32,114,146,0,0,0,114,106,0,0,0,78,122,
|
||||
32,95,95,112,97,99,107,97,103,101,95,95,32,33,61,32,
|
||||
95,95,115,112,101,99,95,95,46,112,97,114,101,110,116,32,
|
||||
40,122,4,32,33,61,32,250,1,41,233,3,0,0,0,41,
|
||||
1,90,10,115,116,97,99,107,108,101,118,101,108,122,89,99,
|
||||
97,110,39,116,32,114,101,115,111,108,118,101,32,112,97,99,
|
||||
107,97,103,101,32,102,114,111,109,32,95,95,115,112,101,99,
|
||||
95,95,32,111,114,32,95,95,112,97,99,107,97,103,101,95,
|
||||
95,44,32,102,97,108,108,105,110,103,32,98,97,99,107,32,
|
||||
111,110,32,95,95,110,97,109,101,95,95,32,97,110,100,32,
|
||||
95,95,112,97,116,104,95,95,114,1,0,0,0,114,142,0,
|
||||
0,0,114,129,0,0,0,114,22,0,0,0,41,6,114,35,
|
||||
0,0,0,114,131,0,0,0,114,193,0,0,0,114,194,0,
|
||||
0,0,114,195,0,0,0,114,130,0,0,0,41,3,218,7,
|
||||
103,108,111,98,97,108,115,114,187,0,0,0,114,96,0,0,
|
||||
0,114,10,0,0,0,114,10,0,0,0,114,11,0,0,0,
|
||||
218,17,95,99,97,108,99,95,95,95,112,97,99,107,97,103,
|
||||
101,95,95,46,4,0,0,115,34,0,0,0,0,7,10,1,
|
||||
10,1,8,1,18,1,22,2,4,254,6,3,4,1,8,1,
|
||||
4,0,0,115,52,0,0,0,0,10,8,1,10,1,4,1,
|
||||
12,2,4,1,4,1,2,255,4,1,8,255,10,2,8,1,
|
||||
14,1,10,1,2,255,8,2,10,1,14,1,2,1,14,1,
|
||||
14,4,10,1,16,255,2,2,12,1,26,1,114,215,0,0,
|
||||
0,99,1,0,0,0,0,0,0,0,0,0,0,0,3,0,
|
||||
0,0,6,0,0,0,67,0,0,0,115,146,0,0,0,124,
|
||||
0,160,0,100,1,161,1,125,1,124,0,160,0,100,2,161,
|
||||
1,125,2,124,1,100,3,117,1,114,82,124,2,100,3,117,
|
||||
1,114,78,124,1,124,2,106,1,107,3,114,78,116,2,106,
|
||||
3,100,4,124,1,155,2,100,5,124,2,106,1,155,2,100,
|
||||
6,157,5,116,4,100,7,100,8,141,3,1,0,124,1,83,
|
||||
0,124,2,100,3,117,1,114,96,124,2,106,1,83,0,116,
|
||||
2,106,3,100,9,116,4,100,7,100,8,141,3,1,0,124,
|
||||
0,100,10,25,0,125,1,100,11,124,0,118,1,114,142,124,
|
||||
1,160,5,100,12,161,1,100,13,25,0,125,1,124,1,83,
|
||||
0,41,14,122,167,67,97,108,99,117,108,97,116,101,32,119,
|
||||
104,97,116,32,95,95,112,97,99,107,97,103,101,95,95,32,
|
||||
115,104,111,117,108,100,32,98,101,46,10,10,32,32,32,32,
|
||||
95,95,112,97,99,107,97,103,101,95,95,32,105,115,32,110,
|
||||
111,116,32,103,117,97,114,97,110,116,101,101,100,32,116,111,
|
||||
32,98,101,32,100,101,102,105,110,101,100,32,111,114,32,99,
|
||||
111,117,108,100,32,98,101,32,115,101,116,32,116,111,32,78,
|
||||
111,110,101,10,32,32,32,32,116,111,32,114,101,112,114,101,
|
||||
115,101,110,116,32,116,104,97,116,32,105,116,115,32,112,114,
|
||||
111,112,101,114,32,118,97,108,117,101,32,105,115,32,117,110,
|
||||
107,110,111,119,110,46,10,10,32,32,32,32,114,146,0,0,
|
||||
0,114,106,0,0,0,78,122,32,95,95,112,97,99,107,97,
|
||||
103,101,95,95,32,33,61,32,95,95,115,112,101,99,95,95,
|
||||
46,112,97,114,101,110,116,32,40,122,4,32,33,61,32,250,
|
||||
1,41,233,3,0,0,0,41,1,90,10,115,116,97,99,107,
|
||||
108,101,118,101,108,122,89,99,97,110,39,116,32,114,101,115,
|
||||
111,108,118,101,32,112,97,99,107,97,103,101,32,102,114,111,
|
||||
109,32,95,95,115,112,101,99,95,95,32,111,114,32,95,95,
|
||||
112,97,99,107,97,103,101,95,95,44,32,102,97,108,108,105,
|
||||
110,103,32,98,97,99,107,32,111,110,32,95,95,110,97,109,
|
||||
101,95,95,32,97,110,100,32,95,95,112,97,116,104,95,95,
|
||||
114,1,0,0,0,114,142,0,0,0,114,129,0,0,0,114,
|
||||
22,0,0,0,41,6,114,35,0,0,0,114,131,0,0,0,
|
||||
114,193,0,0,0,114,194,0,0,0,114,195,0,0,0,114,
|
||||
130,0,0,0,41,3,218,7,103,108,111,98,97,108,115,114,
|
||||
187,0,0,0,114,96,0,0,0,114,10,0,0,0,114,10,
|
||||
0,0,0,114,11,0,0,0,218,17,95,99,97,108,99,95,
|
||||
95,95,112,97,99,107,97,103,101,95,95,46,4,0,0,115,
|
||||
42,0,0,0,0,7,10,1,10,1,8,1,18,1,6,1,
|
||||
2,255,4,1,4,255,6,2,4,254,6,3,4,1,8,1,
|
||||
6,2,6,2,4,254,6,3,8,1,8,1,14,1,114,221,
|
||||
0,0,0,114,10,0,0,0,99,5,0,0,0,0,0,0,
|
||||
0,0,0,0,0,9,0,0,0,5,0,0,0,67,0,0,
|
||||
|
|
|
@ -481,10 +481,11 @@ const unsigned char _Py_M__importlib_bootstrap_external[] = {
|
|||
108,101,118,101,108,90,13,98,97,115,101,95,102,105,108,101,
|
||||
110,97,109,101,114,5,0,0,0,114,5,0,0,0,114,8,
|
||||
0,0,0,218,17,115,111,117,114,99,101,95,102,114,111,109,
|
||||
95,99,97,99,104,101,116,1,0,0,115,52,0,0,0,0,
|
||||
95,99,97,99,104,101,116,1,0,0,115,68,0,0,0,0,
|
||||
9,12,1,8,1,10,1,12,1,4,1,10,1,12,1,14,
|
||||
1,16,1,4,1,4,1,12,1,8,1,18,2,10,1,8,
|
||||
1,16,1,10,1,16,1,10,1,14,2,16,1,10,1,16,
|
||||
1,16,1,4,1,4,1,12,1,8,1,2,1,2,255,4,
|
||||
1,2,255,8,2,10,1,8,1,16,1,10,1,16,1,10,
|
||||
1,4,1,2,255,8,2,16,1,10,1,4,1,2,255,10,
|
||||
2,14,1,114,102,0,0,0,99,1,0,0,0,0,0,0,
|
||||
0,0,0,0,0,5,0,0,0,9,0,0,0,67,0,0,
|
||||
0,115,124,0,0,0,116,0,124,0,131,1,100,1,107,2,
|
||||
|
|
|
@ -68,6 +68,7 @@ static const char usage_3[] = "\
|
|||
-X opt : set implementation-specific option. The following options are available:\n\
|
||||
\n\
|
||||
-X faulthandler: enable faulthandler\n\
|
||||
-X oldparser: enable the traditional LL(1) parser; also PYTHONOLDPARSER\n\
|
||||
-X showrefcount: output the total reference count and number of used\n\
|
||||
memory blocks when the program finishes or after each statement in the\n\
|
||||
interactive interpreter. This only works on debug builds\n\
|
||||
|
@ -634,6 +635,7 @@ _PyConfig_InitCompatConfig(PyConfig *config)
|
|||
#ifdef MS_WINDOWS
|
||||
config->legacy_windows_stdio = -1;
|
||||
#endif
|
||||
config->use_peg = 1;
|
||||
}
|
||||
|
||||
|
||||
|
@ -791,6 +793,7 @@ _PyConfig_Copy(PyConfig *config, const PyConfig *config2)
|
|||
COPY_ATTR(isolated);
|
||||
COPY_ATTR(use_environment);
|
||||
COPY_ATTR(dev_mode);
|
||||
COPY_ATTR(use_peg);
|
||||
COPY_ATTR(install_signal_handlers);
|
||||
COPY_ATTR(use_hash_seed);
|
||||
COPY_ATTR(hash_seed);
|
||||
|
@ -894,6 +897,7 @@ config_as_dict(const PyConfig *config)
|
|||
SET_ITEM_INT(isolated);
|
||||
SET_ITEM_INT(use_environment);
|
||||
SET_ITEM_INT(dev_mode);
|
||||
SET_ITEM_INT(use_peg);
|
||||
SET_ITEM_INT(install_signal_handlers);
|
||||
SET_ITEM_INT(use_hash_seed);
|
||||
SET_ITEM_UINT(hash_seed);
|
||||
|
@ -1428,6 +1432,11 @@ config_read_complex_options(PyConfig *config)
|
|||
config->import_time = 1;
|
||||
}
|
||||
|
||||
if (config_get_env(config, "PYTHONOLDPARSER")
|
||||
|| config_get_xoption(config, L"oldparser")) {
|
||||
config->use_peg = 0;
|
||||
}
|
||||
|
||||
PyStatus status;
|
||||
if (config->tracemalloc < 0) {
|
||||
status = config_init_tracemalloc(config);
|
||||
|
@ -2507,6 +2516,7 @@ PyConfig_Read(PyConfig *config)
|
|||
assert(config->isolated >= 0);
|
||||
assert(config->use_environment >= 0);
|
||||
assert(config->dev_mode >= 0);
|
||||
assert(config->use_peg >= 0);
|
||||
assert(config->install_signal_handlers >= 0);
|
||||
assert(config->use_hash_seed >= 0);
|
||||
assert(config->faulthandler >= 0);
|
||||
|
|
|
@ -29,6 +29,8 @@
|
|||
#include "ast.h" // PyAST_FromNodeObject()
|
||||
#include "marshal.h" // PyMarshal_ReadLongFromFile()
|
||||
|
||||
#include <pegen_interface.h> // PyPegen_ASTFrom*
|
||||
|
||||
#ifdef MS_WINDOWS
|
||||
# include "malloc.h" // alloca()
|
||||
#endif
|
||||
|
@ -183,6 +185,7 @@ PyRun_InteractiveOneObjectEx(FILE *fp, PyObject *filename,
|
|||
PyArena *arena;
|
||||
const char *ps1 = "", *ps2 = "", *enc = NULL;
|
||||
int errcode = 0;
|
||||
int use_peg = _PyInterpreterState_GET()->config.use_peg;
|
||||
_Py_IDENTIFIER(encoding);
|
||||
_Py_IDENTIFIER(__main__);
|
||||
|
||||
|
@ -235,9 +238,17 @@ PyRun_InteractiveOneObjectEx(FILE *fp, PyObject *filename,
|
|||
Py_XDECREF(oenc);
|
||||
return -1;
|
||||
}
|
||||
mod = PyParser_ASTFromFileObject(fp, filename, enc,
|
||||
Py_single_input, ps1, ps2,
|
||||
flags, &errcode, arena);
|
||||
|
||||
if (use_peg) {
|
||||
mod = PyPegen_ASTFromFileObject(fp, filename, Py_single_input,
|
||||
enc, ps1, ps2, &errcode, arena);
|
||||
}
|
||||
else {
|
||||
mod = PyParser_ASTFromFileObject(fp, filename, enc,
|
||||
Py_single_input, ps1, ps2,
|
||||
flags, &errcode, arena);
|
||||
}
|
||||
|
||||
Py_XDECREF(v);
|
||||
Py_XDECREF(w);
|
||||
Py_XDECREF(oenc);
|
||||
|
@ -1019,6 +1030,7 @@ PyRun_StringFlags(const char *str, int start, PyObject *globals,
|
|||
mod_ty mod;
|
||||
PyArena *arena;
|
||||
PyObject *filename;
|
||||
int use_peg = _PyInterpreterState_GET()->config.use_peg;
|
||||
|
||||
filename = _PyUnicode_FromId(&PyId_string); /* borrowed */
|
||||
if (filename == NULL)
|
||||
|
@ -1028,7 +1040,13 @@ PyRun_StringFlags(const char *str, int start, PyObject *globals,
|
|||
if (arena == NULL)
|
||||
return NULL;
|
||||
|
||||
mod = PyParser_ASTFromStringObject(str, filename, start, flags, arena);
|
||||
if (use_peg) {
|
||||
mod = PyPegen_ASTFromStringObject(str, filename, start, flags, arena);
|
||||
}
|
||||
else {
|
||||
mod = PyParser_ASTFromStringObject(str, filename, start, flags, arena);
|
||||
}
|
||||
|
||||
if (mod != NULL)
|
||||
ret = run_mod(mod, filename, globals, locals, flags, arena);
|
||||
PyArena_Free(arena);
|
||||
|
@ -1043,6 +1061,7 @@ PyRun_FileExFlags(FILE *fp, const char *filename_str, int start, PyObject *globa
|
|||
mod_ty mod;
|
||||
PyArena *arena = NULL;
|
||||
PyObject *filename;
|
||||
int use_peg = _PyInterpreterState_GET()->config.use_peg;
|
||||
|
||||
filename = PyUnicode_DecodeFSDefault(filename_str);
|
||||
if (filename == NULL)
|
||||
|
@ -1052,8 +1071,15 @@ PyRun_FileExFlags(FILE *fp, const char *filename_str, int start, PyObject *globa
|
|||
if (arena == NULL)
|
||||
goto exit;
|
||||
|
||||
mod = PyParser_ASTFromFileObject(fp, filename, NULL, start, 0, 0,
|
||||
flags, NULL, arena);
|
||||
if (use_peg) {
|
||||
mod = PyPegen_ASTFromFileObject(fp, filename, start, NULL, NULL, NULL,
|
||||
NULL, arena);
|
||||
}
|
||||
else {
|
||||
mod = PyParser_ASTFromFileObject(fp, filename, NULL, start, 0, 0,
|
||||
flags, NULL, arena);
|
||||
}
|
||||
|
||||
if (closeit)
|
||||
fclose(fp);
|
||||
if (mod == NULL) {
|
||||
|
@ -1196,11 +1222,17 @@ Py_CompileStringObject(const char *str, PyObject *filename, int start,
|
|||
{
|
||||
PyCodeObject *co;
|
||||
mod_ty mod;
|
||||
int use_peg = _PyInterpreterState_GET()->config.use_peg;
|
||||
PyArena *arena = PyArena_New();
|
||||
if (arena == NULL)
|
||||
return NULL;
|
||||
|
||||
mod = PyParser_ASTFromStringObject(str, filename, start, flags, arena);
|
||||
if (use_peg) {
|
||||
mod = PyPegen_ASTFromStringObject(str, filename, start, flags, arena);
|
||||
}
|
||||
else {
|
||||
mod = PyParser_ASTFromStringObject(str, filename, start, flags, arena);
|
||||
}
|
||||
if (mod == NULL) {
|
||||
PyArena_Free(arena);
|
||||
return NULL;
|
||||
|
@ -1297,13 +1329,19 @@ _Py_SymtableStringObjectFlags(const char *str, PyObject *filename, int start, Py
|
|||
{
|
||||
struct symtable *st;
|
||||
mod_ty mod;
|
||||
int use_peg = _PyInterpreterState_GET()->config.use_peg;
|
||||
PyArena *arena;
|
||||
|
||||
arena = PyArena_New();
|
||||
if (arena == NULL)
|
||||
return NULL;
|
||||
|
||||
mod = PyParser_ASTFromStringObject(str, filename, start, flags, arena);
|
||||
if (use_peg) {
|
||||
mod = PyPegen_ASTFromStringObject(str, filename, start, flags, arena);
|
||||
}
|
||||
else {
|
||||
mod = PyParser_ASTFromStringObject(str, filename, start, flags, arena);
|
||||
}
|
||||
if (mod == NULL) {
|
||||
PyArena_Free(arena);
|
||||
return NULL;
|
||||
|
|
|
@ -2427,6 +2427,7 @@ static PyStructSequence_Field flags_fields[] = {
|
|||
{"inspect", "-i"},
|
||||
{"interactive", "-i"},
|
||||
{"optimize", "-O or -OO"},
|
||||
{"use_peg", "-p old or -p new"},
|
||||
{"dont_write_bytecode", "-B"},
|
||||
{"no_user_site", "-s"},
|
||||
{"no_site", "-S"},
|
||||
|
@ -2447,7 +2448,7 @@ static PyStructSequence_Desc flags_desc = {
|
|||
"sys.flags", /* name */
|
||||
flags__doc__, /* doc */
|
||||
flags_fields, /* fields */
|
||||
15
|
||||
16
|
||||
};
|
||||
|
||||
static PyObject*
|
||||
|
@ -2470,6 +2471,7 @@ make_flags(PyThreadState *tstate)
|
|||
SetFlag(config->inspect);
|
||||
SetFlag(config->interactive);
|
||||
SetFlag(config->optimization_level);
|
||||
SetFlag(config->use_peg);
|
||||
SetFlag(!config->write_bytecode);
|
||||
SetFlag(!config->user_site_directory);
|
||||
SetFlag(!config->site_import);
|
||||
|
|
|
@ -23,6 +23,8 @@ msi Support for packaging Python as an MSI package on Windows.
|
|||
|
||||
parser Un-parsing tool to generate code from an AST.
|
||||
|
||||
peg_generator PEG-based parser generator (pegen) used for new parser.
|
||||
|
||||
pynche A Tkinter-based color editor.
|
||||
|
||||
scripts A number of useful single-file programs, e.g. tabnanny.py
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
# A clang-format style that approximates Python's PEP 7
|
||||
BasedOnStyle: Google
|
||||
AlwaysBreakAfterReturnType: All
|
||||
AllowShortIfStatementsOnASingleLine: false
|
||||
AlignAfterOpenBracket: Align
|
||||
BreakBeforeBraces: Stroustrup
|
||||
ColumnLimit: 95
|
||||
DerivePointerAlignment: false
|
||||
IndentWidth: 4
|
||||
Language: Cpp
|
||||
PointerAlignment: Right
|
||||
ReflowComments: true
|
||||
SpaceBeforeParens: ControlStatements
|
||||
SpacesInParentheses: false
|
||||
TabWidth: 4
|
||||
UseTab: Never
|
||||
SortIncludes: false
|
|
@ -0,0 +1,3 @@
|
|||
peg_extension/parse.c
|
||||
data/xxl.py
|
||||
@data
|
|
@ -0,0 +1,116 @@
|
|||
UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
PYTHON ?= ../../python
|
||||
endif
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
PYTHON ?= ../../python.exe
|
||||
endif
|
||||
|
||||
CPYTHON ?= ../../Lib
|
||||
MYPY ?= mypy
|
||||
|
||||
GRAMMAR = ../../Grammar/python.gram
|
||||
TESTFILE = data/cprog.py
|
||||
TIMEFILE = data/xxl.py
|
||||
TESTDIR = .
|
||||
TESTFLAGS = --short
|
||||
|
||||
data/xxl.py:
|
||||
$(PYTHON) -m zipfile -e data/xxl.zip data
|
||||
|
||||
build: peg_extension/parse.c
|
||||
|
||||
peg_extension/parse.c: $(GRAMMAR) pegen/*.py peg_extension/peg_extension.c ../../Parser/pegen/pegen.c ../../Parser/pegen/parse_string.c ../../Parser/pegen/*.h pegen/grammar_parser.py
|
||||
$(PYTHON) -m pegen -q -c $(GRAMMAR) -o peg_extension/parse.c --compile-extension
|
||||
|
||||
clean:
|
||||
-rm -f peg_extension/*.o peg_extension/*.so peg_extension/parse.c
|
||||
-rm -f data/xxl.py
|
||||
|
||||
dump: peg_extension/parse.c
|
||||
cat -n $(TESTFILE)
|
||||
$(PYTHON) -c "from peg_extension import parse; import ast; t = parse.parse_file('$(TESTFILE)', mode=1); print(ast.dump(t))"
|
||||
|
||||
regen-metaparser: pegen/metagrammar.gram pegen/*.py
|
||||
$(PYTHON) -m pegen -q -c pegen/metagrammar.gram -o pegen/grammar_parser.py
|
||||
|
||||
# Note: These targets really depend on the generated shared object in peg_extension/parse.*.so but
|
||||
# this has different names in different systems so we are abusing the implicit dependency on
|
||||
# parse.c by the use of --compile-extension.
|
||||
|
||||
.PHONY: test
|
||||
|
||||
test: run
|
||||
|
||||
run: peg_extension/parse.c
|
||||
$(PYTHON) -c "from peg_extension import parse; t = parse.parse_file('$(TESTFILE)'); exec(t)"
|
||||
|
||||
compile: peg_extension/parse.c
|
||||
$(PYTHON) -c "from peg_extension import parse; t = parse.parse_file('$(TESTFILE)', mode=2)"
|
||||
|
||||
parse: peg_extension/parse.c
|
||||
$(PYTHON) -c "from peg_extension import parse; t = parse.parse_file('$(TESTFILE)', mode=1)"
|
||||
|
||||
check: peg_extension/parse.c
|
||||
$(PYTHON) -c "from peg_extension import parse; t = parse.parse_file('$(TESTFILE)', mode=0)"
|
||||
|
||||
stats: peg_extension/parse.c data/xxl.py
|
||||
$(PYTHON) -c "from peg_extension import parse; t = parse.parse_file('$(TIMEFILE)', mode=0); parse.dump_memo_stats()" >@data
|
||||
$(PYTHON) scripts/joinstats.py @data
|
||||
|
||||
time: time_compile
|
||||
|
||||
time_compile: peg_extension/parse.c data/xxl.py
|
||||
$(PYTHON) scripts/benchmark.py --parser=pegen --target=xxl compile
|
||||
|
||||
time_parse: peg_extension/parse.c data/xxl.py
|
||||
$(PYTHON) scripts/benchmark.py --parser=pegen --target=xxl parse
|
||||
|
||||
time_check: peg_extension/parse.c data/xxl.py
|
||||
$(PYTHON) scripts/benchmark.py --parser=pegen --target=xxl check
|
||||
|
||||
time_stdlib: time_stdlib_compile
|
||||
|
||||
time_stdlib_compile: data/xxl.py
|
||||
$(PYTHON) scripts/benchmark.py --parser=cpython --target=xxl compile
|
||||
|
||||
time_stdlib_parse: data/xxl.py
|
||||
$(PYTHON) scripts/benchmark.py --parser=cpython --target=xxl parse
|
||||
|
||||
test_local:
|
||||
$(PYTHON) scripts/test_parse_directory.py \
|
||||
-g $(GRAMMAR) \
|
||||
-d $(TESTDIR) \
|
||||
$(TESTFLAGS) \
|
||||
--exclude "*/failset/*" \
|
||||
--exclude "*/failset/**" \
|
||||
--exclude "*/failset/**/*"
|
||||
|
||||
test_global: $(CPYTHON)
|
||||
$(PYTHON) scripts/test_parse_directory.py \
|
||||
-g $(GRAMMAR) \
|
||||
-d $(CPYTHON) \
|
||||
$(TESTFLAGS) \
|
||||
--exclude "*/test2to3/*" \
|
||||
--exclude "*/test2to3/**/*" \
|
||||
--exclude "*/bad*" \
|
||||
--exclude "*/lib2to3/tests/data/*"
|
||||
|
||||
mypy: regen-metaparser
|
||||
$(MYPY) # For list of files, see mypy.ini
|
||||
|
||||
format-python:
|
||||
black pegen scripts
|
||||
|
||||
bench:
|
||||
$(PYTHON) scripts/benchmark.py --parser=pegen --target=stdlib check
|
||||
|
||||
format: format-python
|
||||
|
||||
find_max_nesting:
|
||||
$(PYTHON) scripts/find_max_nesting.py
|
||||
|
||||
tags: TAGS
|
||||
|
||||
TAGS: pegen/*.py test/test_pegen.py
|
||||
etags pegen/*.py test/test_pegen.py
|
|
@ -0,0 +1,10 @@
|
|||
if 1:
|
||||
print("Hello " + "world")
|
||||
if 0:
|
||||
print("then")
|
||||
print("clause")
|
||||
elif 1:
|
||||
pass
|
||||
elif 1:
|
||||
pass
|
||||
else: print("else-clause")
|
Binary file not shown.
|
@ -0,0 +1,26 @@
|
|||
[mypy]
|
||||
files = pegen, scripts
|
||||
|
||||
follow_imports = error
|
||||
no_implicit_optional = True
|
||||
strict_optional = True
|
||||
|
||||
#check_untyped_defs = True
|
||||
disallow_untyped_calls = True
|
||||
disallow_untyped_defs = True
|
||||
|
||||
disallow_any_generics = true
|
||||
disallow_any_unimported = True
|
||||
disallow_incomplete_defs = True
|
||||
disallow_subclassing_any = True
|
||||
|
||||
warn_unused_configs = True
|
||||
warn_unused_ignores = true
|
||||
warn_redundant_casts = true
|
||||
warn_no_return = True
|
||||
|
||||
show_traceback = True
|
||||
show_error_codes = True
|
||||
|
||||
[mypy-pegen.grammar_parser]
|
||||
strict_optional = False
|
|
@ -0,0 +1,153 @@
|
|||
#include "pegen.h"
|
||||
|
||||
PyObject *
|
||||
_build_return_object(mod_ty module, int mode, PyObject *filename_ob, PyArena *arena)
|
||||
{
|
||||
PyObject *result = NULL;
|
||||
|
||||
if (mode == 2) {
|
||||
result = (PyObject *)PyAST_CompileObject(module, filename_ob, NULL, -1, arena);
|
||||
} else if (mode == 1) {
|
||||
result = PyAST_mod2obj(module);
|
||||
} else {
|
||||
result = Py_None;
|
||||
Py_INCREF(result);
|
||||
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
parse_file(PyObject *self, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
static char *keywords[] = {"file", "mode", NULL};
|
||||
const char *filename;
|
||||
int mode = 2;
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|i", keywords, &filename, &mode)) {
|
||||
return NULL;
|
||||
}
|
||||
if (mode < 0 || mode > 2) {
|
||||
return PyErr_Format(PyExc_ValueError, "Bad mode, must be 0 <= mode <= 2");
|
||||
}
|
||||
|
||||
PyArena *arena = PyArena_New();
|
||||
if (arena == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyObject *result = NULL;
|
||||
|
||||
PyObject *filename_ob = PyUnicode_FromString(filename);
|
||||
if (filename_ob == NULL) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
mod_ty res = _PyPegen_run_parser_from_file(filename, Py_file_input, filename_ob, arena);
|
||||
if (res == NULL) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
result = _build_return_object(res, mode, filename_ob, arena);
|
||||
|
||||
error:
|
||||
Py_XDECREF(filename_ob);
|
||||
PyArena_Free(arena);
|
||||
return result;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
parse_string(PyObject *self, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
static char *keywords[] = {"str", "mode", NULL};
|
||||
const char *the_string;
|
||||
int mode = 2;
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|i", keywords, &the_string, &mode)) {
|
||||
return NULL;
|
||||
}
|
||||
if (mode < 0 || mode > 2) {
|
||||
return PyErr_Format(PyExc_ValueError, "Bad mode, must be 0 <= mode <= 2");
|
||||
}
|
||||
|
||||
PyArena *arena = PyArena_New();
|
||||
if (arena == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyObject *result = NULL;
|
||||
|
||||
PyObject *filename_ob = PyUnicode_FromString("<string>");
|
||||
if (filename_ob == NULL) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
mod_ty res = _PyPegen_run_parser_from_string(the_string, Py_file_input, filename_ob,
|
||||
PyCF_IGNORE_COOKIE, arena);
|
||||
if (res == NULL) {
|
||||
goto error;
|
||||
}
|
||||
result = _build_return_object(res, mode, filename_ob, arena);
|
||||
|
||||
error:
|
||||
Py_XDECREF(filename_ob);
|
||||
PyArena_Free(arena);
|
||||
return result;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
clear_memo_stats()
|
||||
{
|
||||
_PyPegen_clear_memo_statistics();
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
get_memo_stats()
|
||||
{
|
||||
return _PyPegen_get_memo_statistics();
|
||||
}
|
||||
|
||||
// TODO: Write to Python's sys.stdout instead of C's stdout.
|
||||
static PyObject *
|
||||
dump_memo_stats()
|
||||
{
|
||||
PyObject *list = _PyPegen_get_memo_statistics();
|
||||
if (list == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
Py_ssize_t len = PyList_Size(list);
|
||||
for (Py_ssize_t i = 0; i < len; i++) {
|
||||
PyObject *value = PyList_GetItem(list, i); // Borrowed reference.
|
||||
long count = PyLong_AsLong(value);
|
||||
if (count < 0) {
|
||||
break;
|
||||
}
|
||||
if (count > 0) {
|
||||
printf("%4ld %9ld\n", i, count);
|
||||
}
|
||||
}
|
||||
Py_DECREF(list);
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyMethodDef ParseMethods[] = {
|
||||
{"parse_file", (PyCFunction)(void(*)(void))parse_file, METH_VARARGS|METH_KEYWORDS, "Parse a file."},
|
||||
{"parse_string", (PyCFunction)(void(*)(void))parse_string, METH_VARARGS|METH_KEYWORDS, "Parse a string."},
|
||||
{"clear_memo_stats", clear_memo_stats, METH_NOARGS},
|
||||
{"dump_memo_stats", dump_memo_stats, METH_NOARGS},
|
||||
{"get_memo_stats", get_memo_stats, METH_NOARGS},
|
||||
{NULL, NULL, 0, NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
static struct PyModuleDef parsemodule = {
|
||||
PyModuleDef_HEAD_INIT,
|
||||
.m_name = "parse",
|
||||
.m_doc = "A parser.",
|
||||
.m_methods = ParseMethods,
|
||||
};
|
||||
|
||||
PyMODINIT_FUNC
|
||||
PyInit_parse(void)
|
||||
{
|
||||
return PyModule_Create(&parsemodule);
|
||||
}
|
|
@ -0,0 +1,136 @@
|
|||
#!/usr/bin/env python3.8
|
||||
|
||||
"""pegen -- PEG Generator.
|
||||
|
||||
Search the web for PEG Parsers for reference.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import time
|
||||
import token
|
||||
import traceback
|
||||
|
||||
from typing import Final
|
||||
|
||||
from pegen.build import build_parser_and_generator
|
||||
from pegen.testutil import print_memstats
|
||||
|
||||
|
||||
argparser = argparse.ArgumentParser(
|
||||
prog="pegen", description="Experimental PEG-like parser generator"
|
||||
)
|
||||
argparser.add_argument("-q", "--quiet", action="store_true", help="Don't print the parsed grammar")
|
||||
argparser.add_argument(
|
||||
"-v",
|
||||
"--verbose",
|
||||
action="count",
|
||||
default=0,
|
||||
help="Print timing stats; repeat for more debug output",
|
||||
)
|
||||
argparser.add_argument(
|
||||
"-c", "--cpython", action="store_true", help="Generate C code for inclusion into CPython"
|
||||
)
|
||||
argparser.add_argument(
|
||||
"--compile-extension",
|
||||
action="store_true",
|
||||
help="Compile generated C code into an extension module",
|
||||
)
|
||||
argparser.add_argument(
|
||||
"-o",
|
||||
"--output",
|
||||
metavar="OUT",
|
||||
help="Where to write the generated parser (default parse.py or parse.c)",
|
||||
)
|
||||
argparser.add_argument("filename", help="Grammar description")
|
||||
argparser.add_argument(
|
||||
"--optimized", action="store_true", help="Compile the extension in optimized mode"
|
||||
)
|
||||
argparser.add_argument(
|
||||
"--skip-actions", action="store_true", help="Suppress code emission for rule actions",
|
||||
)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = argparser.parse_args()
|
||||
verbose = args.verbose
|
||||
verbose_tokenizer = verbose >= 3
|
||||
verbose_parser = verbose == 2 or verbose >= 4
|
||||
t0 = time.time()
|
||||
|
||||
output_file = args.output
|
||||
if not output_file:
|
||||
if args.cpython:
|
||||
output_file = "parse.c"
|
||||
else:
|
||||
output_file = "parse.py"
|
||||
|
||||
try:
|
||||
grammar, parser, tokenizer, gen = build_parser_and_generator(
|
||||
args.filename,
|
||||
output_file,
|
||||
args.compile_extension,
|
||||
verbose_tokenizer,
|
||||
verbose_parser,
|
||||
args.verbose,
|
||||
keep_asserts_in_extension=False if args.optimized else True,
|
||||
skip_actions=args.skip_actions,
|
||||
)
|
||||
except Exception as err:
|
||||
if args.verbose:
|
||||
raise # Show traceback
|
||||
traceback.print_exception(err.__class__, err, None)
|
||||
sys.stderr.write("For full traceback, use -v\n")
|
||||
sys.exit(1)
|
||||
|
||||
if not args.quiet:
|
||||
if args.verbose:
|
||||
print("Raw Grammar:")
|
||||
for line in repr(grammar).splitlines():
|
||||
print(" ", line)
|
||||
|
||||
print("Clean Grammar:")
|
||||
for line in str(grammar).splitlines():
|
||||
print(" ", line)
|
||||
|
||||
if args.verbose:
|
||||
print("First Graph:")
|
||||
for src, dsts in gen.first_graph.items():
|
||||
print(f" {src} -> {', '.join(dsts)}")
|
||||
print("First SCCS:")
|
||||
for scc in gen.first_sccs:
|
||||
print(" ", scc, end="")
|
||||
if len(scc) > 1:
|
||||
print(
|
||||
" # Indirectly left-recursive; leaders:",
|
||||
{name for name in scc if grammar.rules[name].leader},
|
||||
)
|
||||
else:
|
||||
name = next(iter(scc))
|
||||
if name in gen.first_graph[name]:
|
||||
print(" # Left-recursive")
|
||||
else:
|
||||
print()
|
||||
|
||||
t1 = time.time()
|
||||
|
||||
if args.verbose:
|
||||
dt = t1 - t0
|
||||
diag = tokenizer.diagnose()
|
||||
nlines = diag.end[0]
|
||||
if diag.type == token.ENDMARKER:
|
||||
nlines -= 1
|
||||
print(f"Total time: {dt:.3f} sec; {nlines} lines", end="")
|
||||
if dt:
|
||||
print(f"; {nlines / dt:.0f} lines/sec")
|
||||
else:
|
||||
print()
|
||||
print("Caches sizes:")
|
||||
print(f" token array : {len(tokenizer._tokens):10}")
|
||||
print(f" cache : {len(parser._cache):10}")
|
||||
if not print_memstats():
|
||||
print("(Can't find psutil; install it for memory stats.)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,169 @@
|
|||
import pathlib
|
||||
import shutil
|
||||
import tokenize
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
import distutils.log
|
||||
from distutils.core import Distribution, Extension
|
||||
from distutils.command.clean import clean # type: ignore
|
||||
from distutils.command.build_ext import build_ext # type: ignore
|
||||
|
||||
from pegen.c_generator import CParserGenerator
|
||||
from pegen.grammar import Grammar
|
||||
from pegen.grammar_parser import GeneratedParser as GrammarParser
|
||||
from pegen.parser import Parser
|
||||
from pegen.parser_generator import ParserGenerator
|
||||
from pegen.python_generator import PythonParserGenerator
|
||||
from pegen.tokenizer import Tokenizer
|
||||
|
||||
MOD_DIR = pathlib.Path(__file__).parent
|
||||
|
||||
|
||||
def compile_c_extension(
|
||||
generated_source_path: str,
|
||||
build_dir: Optional[str] = None,
|
||||
verbose: bool = False,
|
||||
keep_asserts: bool = True,
|
||||
) -> str:
|
||||
"""Compile the generated source for a parser generator into an extension module.
|
||||
|
||||
The extension module will be generated in the same directory as the provided path
|
||||
for the generated source, with the same basename (in addition to extension module
|
||||
metadata). For example, for the source mydir/parser.c the generated extension
|
||||
in a darwin system with python 3.8 will be mydir/parser.cpython-38-darwin.so.
|
||||
|
||||
If *build_dir* is provided, that path will be used as the temporary build directory
|
||||
of distutils (this is useful in case you want to use a temporary directory).
|
||||
"""
|
||||
if verbose:
|
||||
distutils.log.set_verbosity(distutils.log.DEBUG)
|
||||
|
||||
source_file_path = pathlib.Path(generated_source_path)
|
||||
extension_name = source_file_path.stem
|
||||
extra_compile_args = []
|
||||
if keep_asserts:
|
||||
extra_compile_args.append("-UNDEBUG")
|
||||
extension = [
|
||||
Extension(
|
||||
extension_name,
|
||||
sources=[
|
||||
str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"),
|
||||
str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
|
||||
str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
|
||||
str(MOD_DIR.parent.parent.parent / "Parser" / "pegen" / "pegen.c"),
|
||||
str(MOD_DIR.parent.parent.parent / "Parser" / "pegen" / "parse_string.c"),
|
||||
str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"),
|
||||
generated_source_path,
|
||||
],
|
||||
include_dirs=[
|
||||
str(MOD_DIR.parent.parent.parent / "Include" / "internal"),
|
||||
str(MOD_DIR.parent.parent.parent / "Parser"),
|
||||
str(MOD_DIR.parent.parent.parent / "Parser" / "pegen"),
|
||||
],
|
||||
extra_compile_args=extra_compile_args,
|
||||
)
|
||||
]
|
||||
dist = Distribution({"name": extension_name, "ext_modules": extension})
|
||||
cmd = build_ext(dist)
|
||||
cmd.inplace = True
|
||||
if build_dir:
|
||||
cmd.build_temp = build_dir
|
||||
cmd.ensure_finalized()
|
||||
cmd.run()
|
||||
|
||||
extension_path = source_file_path.parent / cmd.get_ext_filename(extension_name)
|
||||
shutil.move(cmd.get_ext_fullpath(extension_name), extension_path)
|
||||
|
||||
cmd = clean(dist)
|
||||
cmd.finalize_options()
|
||||
cmd.run()
|
||||
|
||||
return extension_path
|
||||
|
||||
|
||||
def build_parser(
|
||||
grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False
|
||||
) -> Tuple[Grammar, Parser, Tokenizer]:
|
||||
with open(grammar_file) as file:
|
||||
tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer)
|
||||
parser = GrammarParser(tokenizer, verbose=verbose_parser)
|
||||
grammar = parser.start()
|
||||
|
||||
if not grammar:
|
||||
raise parser.make_syntax_error(grammar_file)
|
||||
|
||||
return grammar, parser, tokenizer
|
||||
|
||||
|
||||
def build_generator(
|
||||
tokenizer: Tokenizer,
|
||||
grammar: Grammar,
|
||||
grammar_file: str,
|
||||
output_file: str,
|
||||
compile_extension: bool = False,
|
||||
verbose_c_extension: bool = False,
|
||||
keep_asserts_in_extension: bool = True,
|
||||
skip_actions: bool = False,
|
||||
) -> ParserGenerator:
|
||||
# TODO: Allow other extensions; pass the output type as an argument.
|
||||
if not output_file.endswith((".c", ".py")):
|
||||
raise RuntimeError("Your output file must either be a .c or .py file")
|
||||
with open(output_file, "w") as file:
|
||||
gen: ParserGenerator
|
||||
if output_file.endswith(".c"):
|
||||
gen = CParserGenerator(grammar, file, skip_actions=skip_actions)
|
||||
elif output_file.endswith(".py"):
|
||||
gen = PythonParserGenerator(grammar, file) # TODO: skip_actions
|
||||
else:
|
||||
assert False # Should have been checked above
|
||||
gen.generate(grammar_file)
|
||||
|
||||
if compile_extension and output_file.endswith(".c"):
|
||||
compile_c_extension(
|
||||
output_file, verbose=verbose_c_extension, keep_asserts=keep_asserts_in_extension
|
||||
)
|
||||
|
||||
return gen
|
||||
|
||||
|
||||
def build_parser_and_generator(
|
||||
grammar_file: str,
|
||||
output_file: str,
|
||||
compile_extension: bool = False,
|
||||
verbose_tokenizer: bool = False,
|
||||
verbose_parser: bool = False,
|
||||
verbose_c_extension: bool = False,
|
||||
keep_asserts_in_extension: bool = True,
|
||||
skip_actions: bool = False,
|
||||
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
|
||||
"""Generate rules, parser, tokenizer, parser generator for a given grammar
|
||||
|
||||
Args:
|
||||
grammar_file (string): Path for the grammar file
|
||||
output_file (string): Path for the output file
|
||||
compile_extension (bool, optional): Whether to compile the C extension.
|
||||
Defaults to False.
|
||||
verbose_tokenizer (bool, optional): Whether to display additional output
|
||||
when generating the tokenizer. Defaults to False.
|
||||
verbose_parser (bool, optional): Whether to display additional output
|
||||
when generating the parser. Defaults to False.
|
||||
verbose_c_extension (bool, optional): Whether to display additional
|
||||
output when compiling the C extension . Defaults to False.
|
||||
keep_asserts_in_extension (bool, optional): Whether to keep the assert statements
|
||||
when compiling the extension module. Defaults to True.
|
||||
skip_actions (bool, optional): Whether to pretend no rule has any actions.
|
||||
"""
|
||||
grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
|
||||
gen = build_generator(
|
||||
tokenizer,
|
||||
grammar,
|
||||
grammar_file,
|
||||
output_file,
|
||||
compile_extension,
|
||||
verbose_c_extension,
|
||||
keep_asserts_in_extension,
|
||||
skip_actions=skip_actions,
|
||||
)
|
||||
|
||||
return grammar, parser, tokenizer, gen
|
|
@ -0,0 +1,605 @@
|
|||
import ast
|
||||
import re
|
||||
from typing import Any, cast, Dict, IO, Optional, List, Text, Tuple
|
||||
|
||||
from pegen.grammar import (
|
||||
Cut,
|
||||
GrammarVisitor,
|
||||
Rhs,
|
||||
Alt,
|
||||
NamedItem,
|
||||
NameLeaf,
|
||||
StringLeaf,
|
||||
Lookahead,
|
||||
PositiveLookahead,
|
||||
NegativeLookahead,
|
||||
Opt,
|
||||
Repeat0,
|
||||
Repeat1,
|
||||
Gather,
|
||||
Group,
|
||||
Rule,
|
||||
)
|
||||
from pegen import grammar
|
||||
from pegen.parser_generator import dedupe, ParserGenerator
|
||||
from pegen.tokenizer import exact_token_types
|
||||
|
||||
EXTENSION_PREFIX = """\
|
||||
#include "pegen.h"
|
||||
|
||||
"""
|
||||
|
||||
EXTENSION_SUFFIX = """
|
||||
void *
|
||||
_PyPegen_parse(Parser *p)
|
||||
{
|
||||
// Initialize keywords
|
||||
p->keywords = reserved_keywords;
|
||||
p->n_keyword_lists = n_keyword_lists;
|
||||
|
||||
return start_rule(p);
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
class CCallMakerVisitor(GrammarVisitor):
|
||||
def __init__(self, parser_generator: ParserGenerator):
|
||||
self.gen = parser_generator
|
||||
self.cache: Dict[Any, Any] = {}
|
||||
self.keyword_cache: Dict[str, int] = {}
|
||||
|
||||
def keyword_helper(self, keyword: str) -> Tuple[str, str]:
|
||||
if keyword not in self.keyword_cache:
|
||||
self.keyword_cache[keyword] = self.gen.keyword_type()
|
||||
return "keyword", f"_PyPegen_expect_token(p, {self.keyword_cache[keyword]})"
|
||||
|
||||
def visit_NameLeaf(self, node: NameLeaf) -> Tuple[str, str]:
|
||||
name = node.value
|
||||
if name in ("NAME", "NUMBER", "STRING"):
|
||||
name = name.lower()
|
||||
return f"{name}_var", f"_PyPegen_{name}_token(p)"
|
||||
if name in ("NEWLINE", "DEDENT", "INDENT", "ENDMARKER", "ASYNC", "AWAIT"):
|
||||
name = name.lower()
|
||||
return f"{name}_var", f"_PyPegen_{name}_token(p)"
|
||||
return f"{name}_var", f"{name}_rule(p)"
|
||||
|
||||
def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
|
||||
val = ast.literal_eval(node.value)
|
||||
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
|
||||
return self.keyword_helper(val)
|
||||
else:
|
||||
assert val in exact_token_types, f"{node.value} is not a known literal"
|
||||
type = exact_token_types[val]
|
||||
return "literal", f"_PyPegen_expect_token(p, {type})"
|
||||
|
||||
def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
|
||||
if node in self.cache:
|
||||
return self.cache[node]
|
||||
if len(node.alts) == 1 and len(node.alts[0].items) == 1:
|
||||
self.cache[node] = self.visit(node.alts[0].items[0])
|
||||
else:
|
||||
name = self.gen.name_node(node)
|
||||
self.cache[node] = f"{name}_var", f"{name}_rule(p)"
|
||||
return self.cache[node]
|
||||
|
||||
def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]:
|
||||
name, call = self.visit(node.item)
|
||||
if node.name:
|
||||
name = node.name
|
||||
return name, call
|
||||
|
||||
def lookahead_call_helper(self, node: Lookahead, positive: int) -> Tuple[None, str]:
|
||||
name, call = self.visit(node.node)
|
||||
func, args = call.split("(", 1)
|
||||
assert args[-1] == ")"
|
||||
args = args[:-1]
|
||||
if not args.startswith("p,"):
|
||||
return None, f"_PyPegen_lookahead({positive}, {func}, {args})"
|
||||
elif args[2:].strip().isalnum():
|
||||
return None, f"_PyPegen_lookahead_with_int({positive}, {func}, {args})"
|
||||
else:
|
||||
return None, f"_PyPegen_lookahead_with_string({positive}, {func}, {args})"
|
||||
|
||||
def visit_PositiveLookahead(self, node: PositiveLookahead) -> Tuple[None, str]:
|
||||
return self.lookahead_call_helper(node, 1)
|
||||
|
||||
def visit_NegativeLookahead(self, node: NegativeLookahead) -> Tuple[None, str]:
|
||||
return self.lookahead_call_helper(node, 0)
|
||||
|
||||
def visit_Opt(self, node: Opt) -> Tuple[str, str]:
|
||||
name, call = self.visit(node.node)
|
||||
return "opt_var", f"{call}, 1" # Using comma operator!
|
||||
|
||||
def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]:
|
||||
if node in self.cache:
|
||||
return self.cache[node]
|
||||
name = self.gen.name_loop(node.node, False)
|
||||
self.cache[node] = f"{name}_var", f"{name}_rule(p)"
|
||||
return self.cache[node]
|
||||
|
||||
def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]:
|
||||
if node in self.cache:
|
||||
return self.cache[node]
|
||||
name = self.gen.name_loop(node.node, True)
|
||||
self.cache[node] = f"{name}_var", f"{name}_rule(p)"
|
||||
return self.cache[node]
|
||||
|
||||
def visit_Gather(self, node: Gather) -> Tuple[str, str]:
|
||||
if node in self.cache:
|
||||
return self.cache[node]
|
||||
name = self.gen.name_gather(node)
|
||||
self.cache[node] = f"{name}_var", f"{name}_rule(p)"
|
||||
return self.cache[node]
|
||||
|
||||
def visit_Group(self, node: Group) -> Tuple[Optional[str], str]:
|
||||
return self.visit(node.rhs)
|
||||
|
||||
def visit_Cut(self, node: Cut) -> Tuple[str, str]:
|
||||
return "cut_var", "1"
|
||||
|
||||
|
||||
class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||
def __init__(
|
||||
self,
|
||||
grammar: grammar.Grammar,
|
||||
file: Optional[IO[Text]],
|
||||
debug: bool = False,
|
||||
skip_actions: bool = False,
|
||||
):
|
||||
super().__init__(grammar, file)
|
||||
self.callmakervisitor: CCallMakerVisitor = CCallMakerVisitor(self)
|
||||
self._varname_counter = 0
|
||||
self.debug = debug
|
||||
self.skip_actions = skip_actions
|
||||
|
||||
def unique_varname(self, name: str = "tmpvar") -> str:
|
||||
new_var = name + "_" + str(self._varname_counter)
|
||||
self._varname_counter += 1
|
||||
return new_var
|
||||
|
||||
def call_with_errorcheck_return(self, call_text: str, returnval: str) -> None:
|
||||
error_var = self.unique_varname()
|
||||
self.print(f"int {error_var} = {call_text};")
|
||||
self.print(f"if ({error_var}) {{")
|
||||
with self.indent():
|
||||
self.print(f"return {returnval};")
|
||||
self.print(f"}}")
|
||||
|
||||
def call_with_errorcheck_goto(self, call_text: str, goto_target: str) -> None:
|
||||
error_var = self.unique_varname()
|
||||
self.print(f"int {error_var} = {call_text};")
|
||||
self.print(f"if ({error_var}) {{")
|
||||
with self.indent():
|
||||
self.print(f"goto {goto_target};")
|
||||
self.print(f"}}")
|
||||
|
||||
def out_of_memory_return(
|
||||
self, expr: str, returnval: str, message: str = "Parser out of memory", cleanup_code=None
|
||||
) -> None:
|
||||
self.print(f"if ({expr}) {{")
|
||||
with self.indent():
|
||||
self.print(f'PyErr_Format(PyExc_MemoryError, "{message}");')
|
||||
if cleanup_code is not None:
|
||||
self.print(cleanup_code)
|
||||
self.print(f"return {returnval};")
|
||||
self.print(f"}}")
|
||||
|
||||
def out_of_memory_goto(
|
||||
self, expr: str, goto_target: str, message: str = "Parser out of memory"
|
||||
) -> None:
|
||||
self.print(f"if ({expr}) {{")
|
||||
with self.indent():
|
||||
self.print(f'PyErr_Format(PyExc_MemoryError, "{message}");')
|
||||
self.print(f"goto {goto_target};")
|
||||
self.print(f"}}")
|
||||
|
||||
def generate(self, filename: str) -> None:
|
||||
self.collect_todo()
|
||||
self.print(f"// @generated by pegen.py from {filename}")
|
||||
header = self.grammar.metas.get("header", EXTENSION_PREFIX)
|
||||
if header:
|
||||
self.print(header.rstrip("\n"))
|
||||
subheader = self.grammar.metas.get("subheader", "")
|
||||
if subheader:
|
||||
self.print(subheader)
|
||||
self._setup_keywords()
|
||||
for i, (rulename, rule) in enumerate(self.todo.items(), 1000):
|
||||
comment = " // Left-recursive" if rule.left_recursive else ""
|
||||
self.print(f"#define {rulename}_type {i}{comment}")
|
||||
self.print()
|
||||
for rulename, rule in self.todo.items():
|
||||
if rule.is_loop() or rule.is_gather():
|
||||
type = "asdl_seq *"
|
||||
elif rule.type:
|
||||
type = rule.type + " "
|
||||
else:
|
||||
type = "void *"
|
||||
self.print(f"static {type}{rulename}_rule(Parser *p);")
|
||||
self.print()
|
||||
while self.todo:
|
||||
for rulename, rule in list(self.todo.items()):
|
||||
del self.todo[rulename]
|
||||
self.print()
|
||||
if rule.left_recursive:
|
||||
self.print("// Left-recursive")
|
||||
self.visit(rule)
|
||||
if self.skip_actions:
|
||||
mode = 0
|
||||
else:
|
||||
mode = int(self.rules["start"].type == "mod_ty") if "start" in self.rules else 1
|
||||
if mode == 1 and self.grammar.metas.get("bytecode"):
|
||||
mode += 1
|
||||
modulename = self.grammar.metas.get("modulename", "parse")
|
||||
trailer = self.grammar.metas.get("trailer", EXTENSION_SUFFIX)
|
||||
keyword_cache = self.callmakervisitor.keyword_cache
|
||||
if trailer:
|
||||
self.print(trailer.rstrip("\n") % dict(mode=mode, modulename=modulename))
|
||||
|
||||
def _group_keywords_by_length(self) -> Dict[int, List[Tuple[str, int]]]:
|
||||
groups: Dict[int, List[Tuple[str, int]]] = {}
|
||||
for keyword_str, keyword_type in self.callmakervisitor.keyword_cache.items():
|
||||
length = len(keyword_str)
|
||||
if length in groups:
|
||||
groups[length].append((keyword_str, keyword_type))
|
||||
else:
|
||||
groups[length] = [(keyword_str, keyword_type)]
|
||||
return groups
|
||||
|
||||
def _setup_keywords(self) -> None:
|
||||
keyword_cache = self.callmakervisitor.keyword_cache
|
||||
n_keyword_lists = (
|
||||
len(max(keyword_cache.keys(), key=len)) + 1 if len(keyword_cache) > 0 else 0
|
||||
)
|
||||
self.print(f"static const int n_keyword_lists = {n_keyword_lists};")
|
||||
groups = self._group_keywords_by_length()
|
||||
self.print("static KeywordToken *reserved_keywords[] = {")
|
||||
with self.indent():
|
||||
num_groups = max(groups) + 1 if groups else 1
|
||||
for keywords_length in range(num_groups):
|
||||
if keywords_length not in groups.keys():
|
||||
self.print("NULL,")
|
||||
else:
|
||||
self.print("(KeywordToken[]) {")
|
||||
with self.indent():
|
||||
for keyword_str, keyword_type in groups[keywords_length]:
|
||||
self.print(f'{{"{keyword_str}", {keyword_type}}},')
|
||||
self.print("{NULL, -1},")
|
||||
self.print("},")
|
||||
self.print("};")
|
||||
|
||||
def _set_up_token_start_metadata_extraction(self) -> None:
|
||||
self.print("if (p->mark == p->fill && _PyPegen_fill_token(p) < 0) {")
|
||||
with self.indent():
|
||||
self.print("p->error_indicator = 1;")
|
||||
self.print("return NULL;")
|
||||
self.print("}")
|
||||
self.print("int start_lineno = p->tokens[mark]->lineno;")
|
||||
self.print("UNUSED(start_lineno); // Only used by EXTRA macro")
|
||||
self.print("int start_col_offset = p->tokens[mark]->col_offset;")
|
||||
self.print("UNUSED(start_col_offset); // Only used by EXTRA macro")
|
||||
|
||||
def _set_up_token_end_metadata_extraction(self) -> None:
|
||||
self.print("Token *token = _PyPegen_get_last_nonnwhitespace_token(p);")
|
||||
self.print("if (token == NULL) {")
|
||||
with self.indent():
|
||||
self.print("return NULL;")
|
||||
self.print("}")
|
||||
self.print(f"int end_lineno = token->end_lineno;")
|
||||
self.print("UNUSED(end_lineno); // Only used by EXTRA macro")
|
||||
self.print(f"int end_col_offset = token->end_col_offset;")
|
||||
self.print("UNUSED(end_col_offset); // Only used by EXTRA macro")
|
||||
|
||||
def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None:
|
||||
self.print("{")
|
||||
with self.indent():
|
||||
self.print(f"{result_type} res = NULL;")
|
||||
self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &res))")
|
||||
with self.indent():
|
||||
self.print("return res;")
|
||||
self.print("int mark = p->mark;")
|
||||
self.print("int resmark = p->mark;")
|
||||
self.print("while (1) {")
|
||||
with self.indent():
|
||||
self.call_with_errorcheck_return(
|
||||
f"_PyPegen_update_memo(p, mark, {node.name}_type, res)", "res"
|
||||
)
|
||||
self.print("p->mark = mark;")
|
||||
self.print(f"void *raw = {node.name}_raw(p);")
|
||||
self.print("if (raw == NULL || p->mark <= resmark)")
|
||||
with self.indent():
|
||||
self.print("break;")
|
||||
self.print("resmark = p->mark;")
|
||||
self.print("res = raw;")
|
||||
self.print("}")
|
||||
self.print("p->mark = resmark;")
|
||||
self.print("return res;")
|
||||
self.print("}")
|
||||
self.print(f"static {result_type}")
|
||||
self.print(f"{node.name}_raw(Parser *p)")
|
||||
|
||||
def _should_memoize(self, node: Rule) -> bool:
|
||||
return node.memo and not node.left_recursive
|
||||
|
||||
def _handle_default_rule_body(self, node: Rule, rhs: Rhs, result_type: str) -> None:
|
||||
memoize = self._should_memoize(node)
|
||||
|
||||
with self.indent():
|
||||
self.print("if (p->error_indicator) {")
|
||||
with self.indent():
|
||||
self.print("return NULL;")
|
||||
self.print("}")
|
||||
self.print(f"{result_type} res = NULL;")
|
||||
if memoize:
|
||||
self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &res))")
|
||||
with self.indent():
|
||||
self.print("return res;")
|
||||
self.print("int mark = p->mark;")
|
||||
if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts):
|
||||
self._set_up_token_start_metadata_extraction()
|
||||
self.visit(
|
||||
rhs,
|
||||
is_loop=False,
|
||||
is_gather=node.is_gather(),
|
||||
rulename=node.name if memoize else None,
|
||||
)
|
||||
if self.debug:
|
||||
self.print(f'fprintf(stderr, "Fail at %d: {node.name}\\n", p->mark);')
|
||||
self.print("res = NULL;")
|
||||
self.print(" done:")
|
||||
with self.indent():
|
||||
if memoize:
|
||||
self.print(f"_PyPegen_insert_memo(p, mark, {node.name}_type, res);")
|
||||
self.print("return res;")
|
||||
|
||||
def _handle_loop_rule_body(self, node: Rule, rhs: Rhs) -> None:
|
||||
memoize = self._should_memoize(node)
|
||||
is_repeat1 = node.name.startswith("_loop1")
|
||||
|
||||
with self.indent():
|
||||
self.print("if (p->error_indicator) {")
|
||||
with self.indent():
|
||||
self.print("return NULL;")
|
||||
self.print("}")
|
||||
self.print(f"void *res = NULL;")
|
||||
if memoize:
|
||||
self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &res))")
|
||||
with self.indent():
|
||||
self.print("return res;")
|
||||
self.print("int mark = p->mark;")
|
||||
self.print("int start_mark = p->mark;")
|
||||
self.print("void **children = PyMem_Malloc(sizeof(void *));")
|
||||
self.out_of_memory_return(f"!children", "NULL")
|
||||
self.print("ssize_t children_capacity = 1;")
|
||||
self.print("ssize_t n = 0;")
|
||||
if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts):
|
||||
self._set_up_token_start_metadata_extraction()
|
||||
self.visit(
|
||||
rhs,
|
||||
is_loop=True,
|
||||
is_gather=node.is_gather(),
|
||||
rulename=node.name if memoize else None,
|
||||
)
|
||||
if is_repeat1:
|
||||
self.print("if (n == 0) {")
|
||||
with self.indent():
|
||||
self.print("PyMem_Free(children);")
|
||||
self.print("return NULL;")
|
||||
self.print("}")
|
||||
self.print("asdl_seq *seq = _Py_asdl_seq_new(n, p->arena);")
|
||||
self.out_of_memory_return(
|
||||
f"!seq",
|
||||
"NULL",
|
||||
message=f"asdl_seq_new {node.name}",
|
||||
cleanup_code="PyMem_Free(children);",
|
||||
)
|
||||
self.print("for (int i = 0; i < n; i++) asdl_seq_SET(seq, i, children[i]);")
|
||||
self.print("PyMem_Free(children);")
|
||||
if node.name:
|
||||
self.print(f"_PyPegen_insert_memo(p, start_mark, {node.name}_type, seq);")
|
||||
self.print("return seq;")
|
||||
|
||||
def visit_Rule(self, node: Rule) -> None:
|
||||
is_loop = node.is_loop()
|
||||
is_gather = node.is_gather()
|
||||
rhs = node.flatten()
|
||||
if is_loop or is_gather:
|
||||
result_type = "asdl_seq *"
|
||||
elif node.type:
|
||||
result_type = node.type
|
||||
else:
|
||||
result_type = "void *"
|
||||
|
||||
for line in str(node).splitlines():
|
||||
self.print(f"// {line}")
|
||||
if node.left_recursive and node.leader:
|
||||
self.print(f"static {result_type} {node.name}_raw(Parser *);")
|
||||
|
||||
self.print(f"static {result_type}")
|
||||
self.print(f"{node.name}_rule(Parser *p)")
|
||||
|
||||
if node.left_recursive and node.leader:
|
||||
self._set_up_rule_memoization(node, result_type)
|
||||
|
||||
self.print("{")
|
||||
if is_loop:
|
||||
self._handle_loop_rule_body(node, rhs)
|
||||
else:
|
||||
self._handle_default_rule_body(node, rhs, result_type)
|
||||
self.print("}")
|
||||
|
||||
def visit_NamedItem(self, node: NamedItem, names: List[str]) -> None:
|
||||
name, call = self.callmakervisitor.visit(node)
|
||||
if not name:
|
||||
self.print(call)
|
||||
else:
|
||||
name = dedupe(name, names)
|
||||
self.print(f"({name} = {call})")
|
||||
|
||||
def visit_Rhs(
|
||||
self, node: Rhs, is_loop: bool, is_gather: bool, rulename: Optional[str]
|
||||
) -> None:
|
||||
if is_loop:
|
||||
assert len(node.alts) == 1
|
||||
for alt in node.alts:
|
||||
self.visit(alt, is_loop=is_loop, is_gather=is_gather, rulename=rulename)
|
||||
|
||||
def join_conditions(self, keyword: str, node: Any, names: List[str]) -> None:
|
||||
self.print(f"{keyword} (")
|
||||
with self.indent():
|
||||
first = True
|
||||
for item in node.items:
|
||||
if first:
|
||||
first = False
|
||||
else:
|
||||
self.print("&&")
|
||||
self.visit(item, names=names)
|
||||
self.print(")")
|
||||
|
||||
def emit_action(self, node: Alt, cleanup_code=None) -> None:
|
||||
self.print(f"res = {node.action};")
|
||||
|
||||
self.print("if (res == NULL && PyErr_Occurred()) {")
|
||||
with self.indent():
|
||||
self.print("p->error_indicator = 1;")
|
||||
if cleanup_code:
|
||||
self.print(cleanup_code)
|
||||
self.print("return NULL;")
|
||||
self.print("}")
|
||||
|
||||
if self.debug:
|
||||
self.print(
|
||||
f'fprintf(stderr, "Hit with action [%d-%d]: %s\\n", mark, p->mark, "{node}");'
|
||||
)
|
||||
|
||||
def emit_default_action(self, is_gather: bool, names: List[str], node: Alt) -> None:
|
||||
if len(names) > 1:
|
||||
if is_gather:
|
||||
assert len(names) == 2
|
||||
self.print(f"res = _PyPegen_seq_insert_in_front(p, {names[0]}, {names[1]});")
|
||||
else:
|
||||
if self.debug:
|
||||
self.print(
|
||||
f'fprintf(stderr, "Hit without action [%d:%d]: %s\\n", mark, p->mark, "{node}");'
|
||||
)
|
||||
self.print(f"res = _PyPegen_dummy_name(p, {', '.join(names)});")
|
||||
else:
|
||||
if self.debug:
|
||||
self.print(
|
||||
f'fprintf(stderr, "Hit with default action [%d:%d]: %s\\n", mark, p->mark, "{node}");'
|
||||
)
|
||||
self.print(f"res = {names[0]};")
|
||||
|
||||
def emit_dummy_action(self) -> None:
|
||||
self.print(f"res = _PyPegen_dummy_name(p);")
|
||||
|
||||
def handle_alt_normal(self, node: Alt, is_gather: bool, names: List[str]) -> None:
|
||||
self.join_conditions(keyword="if", node=node, names=names)
|
||||
self.print("{")
|
||||
# We have parsed successfully all the conditions for the option.
|
||||
with self.indent():
|
||||
# Prepare to emmit the rule action and do so
|
||||
if node.action and "EXTRA" in node.action:
|
||||
self._set_up_token_end_metadata_extraction()
|
||||
if self.skip_actions:
|
||||
self.emit_dummy_action()
|
||||
elif node.action:
|
||||
self.emit_action(node)
|
||||
else:
|
||||
self.emit_default_action(is_gather, names, node)
|
||||
|
||||
# As the current option has parsed correctly, do not continue with the rest.
|
||||
self.print(f"goto done;")
|
||||
self.print("}")
|
||||
|
||||
def handle_alt_loop(
|
||||
self, node: Alt, is_gather: bool, rulename: Optional[str], names: List[str]
|
||||
) -> None:
|
||||
# Condition of the main body of the alternative
|
||||
self.join_conditions(keyword="while", node=node, names=names)
|
||||
self.print("{")
|
||||
# We have parsed successfully one item!
|
||||
with self.indent():
|
||||
# Prepare to emit the rule action and do so
|
||||
if node.action and "EXTRA" in node.action:
|
||||
self._set_up_token_end_metadata_extraction()
|
||||
if self.skip_actions:
|
||||
self.emit_dummy_action()
|
||||
elif node.action:
|
||||
self.emit_action(node, cleanup_code="PyMem_Free(children);")
|
||||
else:
|
||||
self.emit_default_action(is_gather, names, node)
|
||||
|
||||
# Add the result of rule to the temporary buffer of children. This buffer
|
||||
# will populate later an asdl_seq with all elements to return.
|
||||
self.print("if (n == children_capacity) {")
|
||||
with self.indent():
|
||||
self.print("children_capacity *= 2;")
|
||||
self.print("children = PyMem_Realloc(children, children_capacity*sizeof(void *));")
|
||||
self.out_of_memory_return(f"!children", "NULL", message=f"realloc {rulename}")
|
||||
self.print("}")
|
||||
self.print(f"children[n++] = res;")
|
||||
self.print("mark = p->mark;")
|
||||
self.print("}")
|
||||
|
||||
def visit_Alt(
|
||||
self, node: Alt, is_loop: bool, is_gather: bool, rulename: Optional[str]
|
||||
) -> None:
|
||||
self.print(f"{{ // {node}")
|
||||
with self.indent():
|
||||
# Prepare variable declarations for the alternative
|
||||
vars = self.collect_vars(node)
|
||||
for v, var_type in sorted(item for item in vars.items() if item[0] is not None):
|
||||
if not var_type:
|
||||
var_type = "void *"
|
||||
else:
|
||||
var_type += " "
|
||||
if v == "cut_var":
|
||||
v += " = 0" # cut_var must be initialized
|
||||
self.print(f"{var_type}{v};")
|
||||
if v == "opt_var":
|
||||
self.print("UNUSED(opt_var); // Silence compiler warnings")
|
||||
|
||||
names: List[str] = []
|
||||
if is_loop:
|
||||
self.handle_alt_loop(node, is_gather, rulename, names)
|
||||
else:
|
||||
self.handle_alt_normal(node, is_gather, names)
|
||||
|
||||
self.print("p->mark = mark;")
|
||||
if "cut_var" in names:
|
||||
self.print("if (cut_var) return NULL;")
|
||||
self.print("}")
|
||||
|
||||
def collect_vars(self, node: Alt) -> Dict[str, Optional[str]]:
|
||||
names: List[str] = []
|
||||
types = {}
|
||||
for item in node.items:
|
||||
name, type = self.add_var(item, names)
|
||||
types[name] = type
|
||||
return types
|
||||
|
||||
def add_var(self, node: NamedItem, names: List[str]) -> Tuple[str, Optional[str]]:
|
||||
name: str
|
||||
call: str
|
||||
name, call = self.callmakervisitor.visit(node.item)
|
||||
type = None
|
||||
if not name:
|
||||
return name, type
|
||||
if name.startswith("cut"):
|
||||
return name, "int"
|
||||
if name.endswith("_var"):
|
||||
rulename = name[:-4]
|
||||
rule = self.rules.get(rulename)
|
||||
if rule is not None:
|
||||
if rule.is_loop() or rule.is_gather():
|
||||
type = "asdl_seq *"
|
||||
else:
|
||||
type = rule.type
|
||||
elif name.startswith("_loop") or name.startswith("_gather"):
|
||||
type = "asdl_seq *"
|
||||
elif name in ("name_var", "string_var", "number_var"):
|
||||
type = "expr_ty"
|
||||
if node.name:
|
||||
name = node.name
|
||||
name = dedupe(name, names)
|
||||
return name, type
|
|
@ -0,0 +1,153 @@
|
|||
#!/usr/bin/env python3.8
|
||||
|
||||
import argparse
|
||||
import collections
|
||||
import pprint
|
||||
import sys
|
||||
from typing import Optional, Set, Dict
|
||||
|
||||
from pegen.build import build_parser
|
||||
from pegen.grammar import (
|
||||
Alt,
|
||||
Cut,
|
||||
Gather,
|
||||
Grammar,
|
||||
GrammarVisitor,
|
||||
Group,
|
||||
Leaf,
|
||||
Lookahead,
|
||||
NamedItem,
|
||||
NameLeaf,
|
||||
NegativeLookahead,
|
||||
Opt,
|
||||
Repeat,
|
||||
Repeat0,
|
||||
Repeat1,
|
||||
Rhs,
|
||||
Rule,
|
||||
StringLeaf,
|
||||
PositiveLookahead,
|
||||
)
|
||||
|
||||
argparser = argparse.ArgumentParser(
|
||||
prog="calculate_first_sets", description="Calculate the first sets of a grammar",
|
||||
)
|
||||
argparser.add_argument("grammar_file", help="The grammar file")
|
||||
|
||||
|
||||
class FirstSetCalculator(GrammarVisitor):
|
||||
def __init__(self, rules: Dict[str, Rule]) -> None:
|
||||
self.rules = rules
|
||||
for rule in rules.values():
|
||||
rule.nullable_visit(rules)
|
||||
self.first_sets: Dict[str, Set[str]] = dict()
|
||||
self.in_process: Set[str] = set()
|
||||
|
||||
def calculate(self) -> Dict[str, Set[str]]:
|
||||
for name, rule in self.rules.items():
|
||||
self.visit(rule)
|
||||
return self.first_sets
|
||||
|
||||
def visit_Alt(self, item: Alt) -> Set[str]:
|
||||
result: Set[str] = set()
|
||||
to_remove: Set[str] = set()
|
||||
for other in item.items:
|
||||
new_terminals = self.visit(other)
|
||||
if isinstance(other.item, NegativeLookahead):
|
||||
to_remove |= new_terminals
|
||||
result |= new_terminals
|
||||
if to_remove:
|
||||
result -= to_remove
|
||||
|
||||
# If the set of new terminals can start with the empty string,
|
||||
# it means that the item is completelly nullable and we should
|
||||
# also considering at least the next item in case the current
|
||||
# one fails to parse.
|
||||
|
||||
if "" in new_terminals:
|
||||
continue
|
||||
|
||||
if not isinstance(other.item, (Opt, NegativeLookahead, Repeat0)):
|
||||
break
|
||||
|
||||
# Do not allow the empty string to propagate.
|
||||
result.discard("")
|
||||
|
||||
return result
|
||||
|
||||
def visit_Cut(self, item: Cut) -> Set[str]:
|
||||
return set()
|
||||
|
||||
def visit_Group(self, item: Group) -> Set[str]:
|
||||
return self.visit(item.rhs)
|
||||
|
||||
def visit_PositiveLookahead(self, item: Lookahead) -> Set[str]:
|
||||
return self.visit(item.node)
|
||||
|
||||
def visit_NegativeLookahead(self, item: NegativeLookahead) -> Set[str]:
|
||||
return self.visit(item.node)
|
||||
|
||||
def visit_NamedItem(self, item: NamedItem) -> Set[str]:
|
||||
return self.visit(item.item)
|
||||
|
||||
def visit_Opt(self, item: Opt) -> Set[str]:
|
||||
return self.visit(item.node)
|
||||
|
||||
def visit_Gather(self, item: Gather) -> Set[str]:
|
||||
return self.visit(item.node)
|
||||
|
||||
def visit_Repeat0(self, item: Repeat0) -> Set[str]:
|
||||
return self.visit(item.node)
|
||||
|
||||
def visit_Repeat1(self, item: Repeat1) -> Set[str]:
|
||||
return self.visit(item.node)
|
||||
|
||||
def visit_NameLeaf(self, item: NameLeaf) -> Set[str]:
|
||||
if item.value not in self.rules:
|
||||
return {item.value}
|
||||
|
||||
if item.value not in self.first_sets:
|
||||
self.first_sets[item.value] = self.visit(self.rules[item.value])
|
||||
return self.first_sets[item.value]
|
||||
elif item.value in self.in_process:
|
||||
return set()
|
||||
|
||||
return self.first_sets[item.value]
|
||||
|
||||
def visit_StringLeaf(self, item: StringLeaf) -> Set[str]:
|
||||
return {item.value}
|
||||
|
||||
def visit_Rhs(self, item: Rhs) -> Set[str]:
|
||||
result: Set[str] = set()
|
||||
for alt in item.alts:
|
||||
result |= self.visit(alt)
|
||||
return result
|
||||
|
||||
def visit_Rule(self, item: Rule) -> Set[str]:
|
||||
if item.name in self.in_process:
|
||||
return set()
|
||||
elif item.name not in self.first_sets:
|
||||
self.in_process.add(item.name)
|
||||
terminals = self.visit(item.rhs)
|
||||
if item.nullable:
|
||||
terminals.add("")
|
||||
self.first_sets[item.name] = terminals
|
||||
self.in_process.remove(item.name)
|
||||
return self.first_sets[item.name]
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = argparser.parse_args()
|
||||
|
||||
try:
|
||||
grammar, parser, tokenizer = build_parser(args.grammar_file)
|
||||
except Exception as err:
|
||||
print("ERROR: Failed to parse grammar file", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
firs_sets = FirstSetCalculator(grammar.rules).calculate()
|
||||
pprint.pprint(firs_sets)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,470 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from abc import abstractmethod
|
||||
from typing import (
|
||||
AbstractSet,
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
Iterable,
|
||||
Iterator,
|
||||
List,
|
||||
Optional,
|
||||
Set,
|
||||
Tuple,
|
||||
TYPE_CHECKING,
|
||||
TypeVar,
|
||||
Union,
|
||||
)
|
||||
|
||||
from pegen.parser import memoize, Parser
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pegen.parser_generator import ParserGenerator
|
||||
|
||||
|
||||
class GrammarError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class GrammarVisitor:
|
||||
def visit(self, node: Any, *args: Any, **kwargs: Any) -> Any:
|
||||
"""Visit a node."""
|
||||
method = "visit_" + node.__class__.__name__
|
||||
visitor = getattr(self, method, self.generic_visit)
|
||||
return visitor(node, *args, **kwargs)
|
||||
|
||||
def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> None:
|
||||
"""Called if no explicit visitor function exists for a node."""
|
||||
for value in node:
|
||||
if isinstance(value, list):
|
||||
for item in value:
|
||||
self.visit(item, *args, **kwargs)
|
||||
else:
|
||||
self.visit(value, *args, **kwargs)
|
||||
|
||||
|
||||
class Grammar:
|
||||
def __init__(self, rules: Iterable[Rule], metas: Iterable[Tuple[str, Optional[str]]]):
|
||||
self.rules = {rule.name: rule for rule in rules}
|
||||
self.metas = dict(metas)
|
||||
|
||||
def __str__(self) -> str:
|
||||
return "\n".join(str(rule) for name, rule in self.rules.items())
|
||||
|
||||
def __repr__(self) -> str:
|
||||
lines = ["Grammar("]
|
||||
lines.append(" [")
|
||||
for rule in self.rules.values():
|
||||
lines.append(f" {repr(rule)},")
|
||||
lines.append(" ],")
|
||||
lines.append(" {repr(list(self.metas.items()))}")
|
||||
lines.append(")")
|
||||
return "\n".join(lines)
|
||||
|
||||
def __iter__(self) -> Iterator[Rule]:
|
||||
yield from self.rules.values()
|
||||
|
||||
|
||||
# Global flag whether we want actions in __str__() -- default off.
|
||||
SIMPLE_STR = True
|
||||
|
||||
|
||||
class Rule:
|
||||
def __init__(self, name: str, type: Optional[str], rhs: Rhs, memo: Optional[object] = None):
|
||||
self.name = name
|
||||
self.type = type
|
||||
self.rhs = rhs
|
||||
self.memo = bool(memo)
|
||||
self.visited = False
|
||||
self.nullable = False
|
||||
self.left_recursive = False
|
||||
self.leader = False
|
||||
|
||||
def is_loop(self) -> bool:
|
||||
return self.name.startswith("_loop")
|
||||
|
||||
def is_gather(self) -> bool:
|
||||
return self.name.startswith("_gather")
|
||||
|
||||
def __str__(self) -> str:
|
||||
if SIMPLE_STR or self.type is None:
|
||||
res = f"{self.name}: {self.rhs}"
|
||||
else:
|
||||
res = f"{self.name}[{self.type}]: {self.rhs}"
|
||||
if len(res) < 88:
|
||||
return res
|
||||
lines = [res.split(":")[0] + ":"]
|
||||
lines += [f" | {alt}" for alt in self.rhs.alts]
|
||||
return "\n".join(lines)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"Rule({self.name!r}, {self.type!r}, {self.rhs!r})"
|
||||
|
||||
def __iter__(self) -> Iterator[Rhs]:
|
||||
yield self.rhs
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
if self.visited:
|
||||
# A left-recursive rule is considered non-nullable.
|
||||
return False
|
||||
self.visited = True
|
||||
self.nullable = self.rhs.nullable_visit(rules)
|
||||
return self.nullable
|
||||
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
return self.rhs.initial_names()
|
||||
|
||||
def flatten(self) -> Rhs:
|
||||
# If it's a single parenthesized group, flatten it.
|
||||
rhs = self.rhs
|
||||
if (
|
||||
not self.is_loop()
|
||||
and len(rhs.alts) == 1
|
||||
and len(rhs.alts[0].items) == 1
|
||||
and isinstance(rhs.alts[0].items[0].item, Group)
|
||||
):
|
||||
rhs = rhs.alts[0].items[0].item.rhs
|
||||
return rhs
|
||||
|
||||
def collect_todo(self, gen: ParserGenerator) -> None:
|
||||
rhs = self.flatten()
|
||||
rhs.collect_todo(gen)
|
||||
|
||||
|
||||
class Leaf:
|
||||
def __init__(self, value: str):
|
||||
self.value = value
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.value
|
||||
|
||||
def __iter__(self) -> Iterable[str]:
|
||||
if False:
|
||||
yield
|
||||
|
||||
@abstractmethod
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class NameLeaf(Leaf):
|
||||
"""The value is the name."""
|
||||
|
||||
def __str__(self) -> str:
|
||||
if self.value == "ENDMARKER":
|
||||
return "$"
|
||||
return super().__str__()
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"NameLeaf({self.value!r})"
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
if self.value in rules:
|
||||
return rules[self.value].nullable_visit(rules)
|
||||
# Token or unknown; never empty.
|
||||
return False
|
||||
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
return {self.value}
|
||||
|
||||
|
||||
class StringLeaf(Leaf):
|
||||
"""The value is a string literal, including quotes."""
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"StringLeaf({self.value!r})"
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
# The string token '' is considered empty.
|
||||
return not self.value
|
||||
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
return set()
|
||||
|
||||
|
||||
class Rhs:
|
||||
def __init__(self, alts: List[Alt]):
|
||||
self.alts = alts
|
||||
self.memo: Optional[Tuple[Optional[str], str]] = None
|
||||
|
||||
def __str__(self) -> str:
|
||||
return " | ".join(str(alt) for alt in self.alts)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"Rhs({self.alts!r})"
|
||||
|
||||
def __iter__(self) -> Iterator[List[Alt]]:
|
||||
yield self.alts
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
for alt in self.alts:
|
||||
if alt.nullable_visit(rules):
|
||||
return True
|
||||
return False
|
||||
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
names: Set[str] = set()
|
||||
for alt in self.alts:
|
||||
names |= alt.initial_names()
|
||||
return names
|
||||
|
||||
def collect_todo(self, gen: ParserGenerator) -> None:
|
||||
for alt in self.alts:
|
||||
alt.collect_todo(gen)
|
||||
|
||||
|
||||
class Alt:
|
||||
def __init__(self, items: List[NamedItem], *, icut: int = -1, action: Optional[str] = None):
|
||||
self.items = items
|
||||
self.icut = icut
|
||||
self.action = action
|
||||
|
||||
def __str__(self) -> str:
|
||||
core = " ".join(str(item) for item in self.items)
|
||||
if not SIMPLE_STR and self.action:
|
||||
return f"{core} {{ {self.action} }}"
|
||||
else:
|
||||
return core
|
||||
|
||||
def __repr__(self) -> str:
|
||||
args = [repr(self.items)]
|
||||
if self.icut >= 0:
|
||||
args.append(f"icut={self.icut}")
|
||||
if self.action:
|
||||
args.append(f"action={self.action!r}")
|
||||
return f"Alt({', '.join(args)})"
|
||||
|
||||
def __iter__(self) -> Iterator[List[NamedItem]]:
|
||||
yield self.items
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
for item in self.items:
|
||||
if not item.nullable_visit(rules):
|
||||
return False
|
||||
return True
|
||||
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
names: Set[str] = set()
|
||||
for item in self.items:
|
||||
names |= item.initial_names()
|
||||
if not item.nullable:
|
||||
break
|
||||
return names
|
||||
|
||||
def collect_todo(self, gen: ParserGenerator) -> None:
|
||||
for item in self.items:
|
||||
item.collect_todo(gen)
|
||||
|
||||
|
||||
class NamedItem:
|
||||
def __init__(self, name: Optional[str], item: Item):
|
||||
self.name = name
|
||||
self.item = item
|
||||
self.nullable = False
|
||||
|
||||
def __str__(self) -> str:
|
||||
if not SIMPLE_STR and self.name:
|
||||
return f"{self.name}={self.item}"
|
||||
else:
|
||||
return str(self.item)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"NamedItem({self.name!r}, {self.item!r})"
|
||||
|
||||
def __iter__(self) -> Iterator[Item]:
|
||||
yield self.item
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
self.nullable = self.item.nullable_visit(rules)
|
||||
return self.nullable
|
||||
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
return self.item.initial_names()
|
||||
|
||||
def collect_todo(self, gen: ParserGenerator) -> None:
|
||||
gen.callmakervisitor.visit(self.item)
|
||||
|
||||
|
||||
class Lookahead:
|
||||
def __init__(self, node: Plain, sign: str):
|
||||
self.node = node
|
||||
self.sign = sign
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.sign}{self.node}"
|
||||
|
||||
def __iter__(self) -> Iterator[Plain]:
|
||||
yield self.node
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
return True
|
||||
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
return set()
|
||||
|
||||
|
||||
class PositiveLookahead(Lookahead):
|
||||
def __init__(self, node: Plain):
|
||||
super().__init__(node, "&")
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"PositiveLookahead({self.node!r})"
|
||||
|
||||
|
||||
class NegativeLookahead(Lookahead):
|
||||
def __init__(self, node: Plain):
|
||||
super().__init__(node, "!")
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"NegativeLookahead({self.node!r})"
|
||||
|
||||
|
||||
class Opt:
|
||||
def __init__(self, node: Item):
|
||||
self.node = node
|
||||
|
||||
def __str__(self) -> str:
|
||||
s = str(self.node)
|
||||
# TODO: Decide whether to use [X] or X? based on type of X
|
||||
if " " in s:
|
||||
return f"[{s}]"
|
||||
else:
|
||||
return f"{s}?"
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"Opt({self.node!r})"
|
||||
|
||||
def __iter__(self) -> Iterator[Item]:
|
||||
yield self.node
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
return True
|
||||
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
return self.node.initial_names()
|
||||
|
||||
|
||||
class Repeat:
|
||||
"""Shared base class for x* and x+."""
|
||||
|
||||
def __init__(self, node: Plain):
|
||||
self.node = node
|
||||
self.memo: Optional[Tuple[Optional[str], str]] = None
|
||||
|
||||
@abstractmethod
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
raise NotImplementedError
|
||||
|
||||
def __iter__(self) -> Iterator[Plain]:
|
||||
yield self.node
|
||||
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
return self.node.initial_names()
|
||||
|
||||
|
||||
class Repeat0(Repeat):
|
||||
def __str__(self) -> str:
|
||||
s = str(self.node)
|
||||
# TODO: Decide whether to use (X)* or X* based on type of X
|
||||
if " " in s:
|
||||
return f"({s})*"
|
||||
else:
|
||||
return f"{s}*"
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"Repeat0({self.node!r})"
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
class Repeat1(Repeat):
|
||||
def __str__(self) -> str:
|
||||
s = str(self.node)
|
||||
# TODO: Decide whether to use (X)+ or X+ based on type of X
|
||||
if " " in s:
|
||||
return f"({s})+"
|
||||
else:
|
||||
return f"{s}+"
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"Repeat1({self.node!r})"
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
class Gather(Repeat):
|
||||
def __init__(self, separator: Plain, node: Plain):
|
||||
self.separator = separator
|
||||
self.node = node
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.separator!s}.{self.node!s}+"
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"Gather({self.separator!r}, {self.node!r})"
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
class Group:
|
||||
def __init__(self, rhs: Rhs):
|
||||
self.rhs = rhs
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"({self.rhs})"
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"Group({self.rhs!r})"
|
||||
|
||||
def __iter__(self) -> Iterator[Rhs]:
|
||||
yield self.rhs
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
return self.rhs.nullable_visit(rules)
|
||||
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
return self.rhs.initial_names()
|
||||
|
||||
|
||||
class Cut:
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"Cut()"
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"~"
|
||||
|
||||
def __iter__(self) -> Iterator[Tuple[str, str]]:
|
||||
if False:
|
||||
yield
|
||||
|
||||
def __eq__(self, other: object) -> bool:
|
||||
if not isinstance(other, Cut):
|
||||
return NotImplemented
|
||||
return True
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
return True
|
||||
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
return set()
|
||||
|
||||
|
||||
Plain = Union[Leaf, Group]
|
||||
Item = Union[Plain, Opt, Repeat, Lookahead, Rhs, Cut]
|
||||
RuleName = Tuple[str, str]
|
||||
MetaTuple = Tuple[str, Optional[str]]
|
||||
MetaList = List[MetaTuple]
|
||||
RuleList = List[Rule]
|
||||
NamedItemList = List[NamedItem]
|
||||
LookaheadOrCut = Union[Lookahead, Cut]
|
|
@ -0,0 +1,677 @@
|
|||
#!/usr/bin/env python3.8
|
||||
# @generated by pegen from pegen/metagrammar.gram
|
||||
|
||||
import ast
|
||||
import sys
|
||||
import tokenize
|
||||
|
||||
from typing import Any, Optional
|
||||
|
||||
from pegen.parser import memoize, memoize_left_rec, logger, Parser
|
||||
from ast import literal_eval
|
||||
|
||||
from pegen.grammar import (
|
||||
Alt,
|
||||
Cut,
|
||||
Gather,
|
||||
Group,
|
||||
Item,
|
||||
Lookahead,
|
||||
LookaheadOrCut,
|
||||
MetaTuple,
|
||||
MetaList,
|
||||
NameLeaf,
|
||||
NamedItem,
|
||||
NamedItemList,
|
||||
NegativeLookahead,
|
||||
Opt,
|
||||
Plain,
|
||||
PositiveLookahead,
|
||||
Repeat0,
|
||||
Repeat1,
|
||||
Rhs,
|
||||
Rule,
|
||||
RuleList,
|
||||
RuleName,
|
||||
Grammar,
|
||||
StringLeaf,
|
||||
)
|
||||
|
||||
class GeneratedParser(Parser):
|
||||
|
||||
@memoize
|
||||
def start(self) -> Optional[Grammar]:
|
||||
# start: grammar $
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
if (
|
||||
(grammar := self.grammar())
|
||||
and
|
||||
(endmarker := self.expect('ENDMARKER'))
|
||||
):
|
||||
return grammar
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def grammar(self) -> Optional[Grammar]:
|
||||
# grammar: metas rules | rules
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
if (
|
||||
(metas := self.metas())
|
||||
and
|
||||
(rules := self.rules())
|
||||
):
|
||||
return Grammar ( rules , metas )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(rules := self.rules())
|
||||
):
|
||||
return Grammar ( rules , [ ] )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def metas(self) -> Optional[MetaList]:
|
||||
# metas: meta metas | meta
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
if (
|
||||
(meta := self.meta())
|
||||
and
|
||||
(metas := self.metas())
|
||||
):
|
||||
return [ meta ] + metas
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(meta := self.meta())
|
||||
):
|
||||
return [ meta ]
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def meta(self) -> Optional[MetaTuple]:
|
||||
# meta: "@" NAME NEWLINE | "@" NAME NAME NEWLINE | "@" NAME STRING NEWLINE
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
if (
|
||||
(literal := self.expect("@"))
|
||||
and
|
||||
(name := self.name())
|
||||
and
|
||||
(newline := self.expect('NEWLINE'))
|
||||
):
|
||||
return ( name . string , None )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(literal := self.expect("@"))
|
||||
and
|
||||
(a := self.name())
|
||||
and
|
||||
(b := self.name())
|
||||
and
|
||||
(newline := self.expect('NEWLINE'))
|
||||
):
|
||||
return ( a . string , b . string )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(literal := self.expect("@"))
|
||||
and
|
||||
(name := self.name())
|
||||
and
|
||||
(string := self.string())
|
||||
and
|
||||
(newline := self.expect('NEWLINE'))
|
||||
):
|
||||
return ( name . string , literal_eval ( string . string ) )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def rules(self) -> Optional[RuleList]:
|
||||
# rules: rule rules | rule
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
if (
|
||||
(rule := self.rule())
|
||||
and
|
||||
(rules := self.rules())
|
||||
):
|
||||
return [ rule ] + rules
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(rule := self.rule())
|
||||
):
|
||||
return [ rule ]
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def rule(self) -> Optional[Rule]:
|
||||
# rule: rulename memoflag? ":" alts NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" alts NEWLINE
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
if (
|
||||
(rulename := self.rulename())
|
||||
and
|
||||
(opt := self.memoflag(),)
|
||||
and
|
||||
(literal := self.expect(":"))
|
||||
and
|
||||
(alts := self.alts())
|
||||
and
|
||||
(newline := self.expect('NEWLINE'))
|
||||
and
|
||||
(indent := self.expect('INDENT'))
|
||||
and
|
||||
(more_alts := self.more_alts())
|
||||
and
|
||||
(dedent := self.expect('DEDENT'))
|
||||
):
|
||||
return Rule ( rulename [ 0 ] , rulename [ 1 ] , Rhs ( alts . alts + more_alts . alts ) , memo = opt )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(rulename := self.rulename())
|
||||
and
|
||||
(opt := self.memoflag(),)
|
||||
and
|
||||
(literal := self.expect(":"))
|
||||
and
|
||||
(newline := self.expect('NEWLINE'))
|
||||
and
|
||||
(indent := self.expect('INDENT'))
|
||||
and
|
||||
(more_alts := self.more_alts())
|
||||
and
|
||||
(dedent := self.expect('DEDENT'))
|
||||
):
|
||||
return Rule ( rulename [ 0 ] , rulename [ 1 ] , more_alts , memo = opt )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(rulename := self.rulename())
|
||||
and
|
||||
(opt := self.memoflag(),)
|
||||
and
|
||||
(literal := self.expect(":"))
|
||||
and
|
||||
(alts := self.alts())
|
||||
and
|
||||
(newline := self.expect('NEWLINE'))
|
||||
):
|
||||
return Rule ( rulename [ 0 ] , rulename [ 1 ] , alts , memo = opt )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def rulename(self) -> Optional[RuleName]:
|
||||
# rulename: NAME '[' NAME '*' ']' | NAME '[' NAME ']' | NAME
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
if (
|
||||
(name := self.name())
|
||||
and
|
||||
(literal := self.expect('['))
|
||||
and
|
||||
(type := self.name())
|
||||
and
|
||||
(literal_1 := self.expect('*'))
|
||||
and
|
||||
(literal_2 := self.expect(']'))
|
||||
):
|
||||
return ( name . string , type . string + "*" )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(name := self.name())
|
||||
and
|
||||
(literal := self.expect('['))
|
||||
and
|
||||
(type := self.name())
|
||||
and
|
||||
(literal_1 := self.expect(']'))
|
||||
):
|
||||
return ( name . string , type . string )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(name := self.name())
|
||||
):
|
||||
return ( name . string , None )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def memoflag(self) -> Optional[str]:
|
||||
# memoflag: '(' 'memo' ')'
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
if (
|
||||
(literal := self.expect('('))
|
||||
and
|
||||
(literal_1 := self.expect('memo'))
|
||||
and
|
||||
(literal_2 := self.expect(')'))
|
||||
):
|
||||
return "memo"
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def alts(self) -> Optional[Rhs]:
|
||||
# alts: alt "|" alts | alt
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
if (
|
||||
(alt := self.alt())
|
||||
and
|
||||
(literal := self.expect("|"))
|
||||
and
|
||||
(alts := self.alts())
|
||||
):
|
||||
return Rhs ( [ alt ] + alts . alts )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(alt := self.alt())
|
||||
):
|
||||
return Rhs ( [ alt ] )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def more_alts(self) -> Optional[Rhs]:
|
||||
# more_alts: "|" alts NEWLINE more_alts | "|" alts NEWLINE
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
if (
|
||||
(literal := self.expect("|"))
|
||||
and
|
||||
(alts := self.alts())
|
||||
and
|
||||
(newline := self.expect('NEWLINE'))
|
||||
and
|
||||
(more_alts := self.more_alts())
|
||||
):
|
||||
return Rhs ( alts . alts + more_alts . alts )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(literal := self.expect("|"))
|
||||
and
|
||||
(alts := self.alts())
|
||||
and
|
||||
(newline := self.expect('NEWLINE'))
|
||||
):
|
||||
return Rhs ( alts . alts )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def alt(self) -> Optional[Alt]:
|
||||
# alt: items '$' action | items '$' | items action | items
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
if (
|
||||
(items := self.items())
|
||||
and
|
||||
(literal := self.expect('$'))
|
||||
and
|
||||
(action := self.action())
|
||||
):
|
||||
return Alt ( items + [ NamedItem ( None , NameLeaf ( 'ENDMARKER' ) ) ] , action = action )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(items := self.items())
|
||||
and
|
||||
(literal := self.expect('$'))
|
||||
):
|
||||
return Alt ( items + [ NamedItem ( None , NameLeaf ( 'ENDMARKER' ) ) ] , action = None )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(items := self.items())
|
||||
and
|
||||
(action := self.action())
|
||||
):
|
||||
return Alt ( items , action = action )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(items := self.items())
|
||||
):
|
||||
return Alt ( items , action = None )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def items(self) -> Optional[NamedItemList]:
|
||||
# items: named_item items | named_item
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
if (
|
||||
(named_item := self.named_item())
|
||||
and
|
||||
(items := self.items())
|
||||
):
|
||||
return [ named_item ] + items
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(named_item := self.named_item())
|
||||
):
|
||||
return [ named_item ]
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def named_item(self) -> Optional[NamedItem]:
|
||||
# named_item: NAME '=' ~ item | item | lookahead
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
if (
|
||||
(name := self.name())
|
||||
and
|
||||
(literal := self.expect('='))
|
||||
and
|
||||
(cut := True)
|
||||
and
|
||||
(item := self.item())
|
||||
):
|
||||
return NamedItem ( name . string , item )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(item := self.item())
|
||||
):
|
||||
return NamedItem ( None , item )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(it := self.lookahead())
|
||||
):
|
||||
return NamedItem ( None , it )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def lookahead(self) -> Optional[LookaheadOrCut]:
|
||||
# lookahead: '&' ~ atom | '!' ~ atom | '~'
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
if (
|
||||
(literal := self.expect('&'))
|
||||
and
|
||||
(cut := True)
|
||||
and
|
||||
(atom := self.atom())
|
||||
):
|
||||
return PositiveLookahead ( atom )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(literal := self.expect('!'))
|
||||
and
|
||||
(cut := True)
|
||||
and
|
||||
(atom := self.atom())
|
||||
):
|
||||
return NegativeLookahead ( atom )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(literal := self.expect('~'))
|
||||
):
|
||||
return Cut ( )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def item(self) -> Optional[Item]:
|
||||
# item: '[' ~ alts ']' | atom '?' | atom '*' | atom '+' | atom '.' atom '+' | atom
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
if (
|
||||
(literal := self.expect('['))
|
||||
and
|
||||
(cut := True)
|
||||
and
|
||||
(alts := self.alts())
|
||||
and
|
||||
(literal_1 := self.expect(']'))
|
||||
):
|
||||
return Opt ( alts )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(atom := self.atom())
|
||||
and
|
||||
(literal := self.expect('?'))
|
||||
):
|
||||
return Opt ( atom )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(atom := self.atom())
|
||||
and
|
||||
(literal := self.expect('*'))
|
||||
):
|
||||
return Repeat0 ( atom )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(atom := self.atom())
|
||||
and
|
||||
(literal := self.expect('+'))
|
||||
):
|
||||
return Repeat1 ( atom )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(sep := self.atom())
|
||||
and
|
||||
(literal := self.expect('.'))
|
||||
and
|
||||
(node := self.atom())
|
||||
and
|
||||
(literal_1 := self.expect('+'))
|
||||
):
|
||||
return Gather ( sep , node )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(atom := self.atom())
|
||||
):
|
||||
return atom
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def atom(self) -> Optional[Plain]:
|
||||
# atom: '(' ~ alts ')' | NAME | STRING
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
if (
|
||||
(literal := self.expect('('))
|
||||
and
|
||||
(cut := True)
|
||||
and
|
||||
(alts := self.alts())
|
||||
and
|
||||
(literal_1 := self.expect(')'))
|
||||
):
|
||||
return Group ( alts )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(name := self.name())
|
||||
):
|
||||
return NameLeaf ( name . string )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(string := self.string())
|
||||
):
|
||||
return StringLeaf ( string . string )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def action(self) -> Optional[str]:
|
||||
# action: "{" ~ target_atoms "}"
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
if (
|
||||
(literal := self.expect("{"))
|
||||
and
|
||||
(cut := True)
|
||||
and
|
||||
(target_atoms := self.target_atoms())
|
||||
and
|
||||
(literal_1 := self.expect("}"))
|
||||
):
|
||||
return target_atoms
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def target_atoms(self) -> Optional[str]:
|
||||
# target_atoms: target_atom target_atoms | target_atom
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
if (
|
||||
(target_atom := self.target_atom())
|
||||
and
|
||||
(target_atoms := self.target_atoms())
|
||||
):
|
||||
return target_atom + " " + target_atoms
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(target_atom := self.target_atom())
|
||||
):
|
||||
return target_atom
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def target_atom(self) -> Optional[str]:
|
||||
# target_atom: "{" ~ target_atoms "}" | NAME | NUMBER | STRING | "?" | ":" | !"}" OP
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
if (
|
||||
(literal := self.expect("{"))
|
||||
and
|
||||
(cut := True)
|
||||
and
|
||||
(target_atoms := self.target_atoms())
|
||||
and
|
||||
(literal_1 := self.expect("}"))
|
||||
):
|
||||
return "{" + target_atoms + "}"
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(name := self.name())
|
||||
):
|
||||
return name . string
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(number := self.number())
|
||||
):
|
||||
return number . string
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(string := self.string())
|
||||
):
|
||||
return string . string
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(literal := self.expect("?"))
|
||||
):
|
||||
return "?"
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(literal := self.expect(":"))
|
||||
):
|
||||
return ":"
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
self.negative_lookahead(self.expect, "}")
|
||||
and
|
||||
(op := self.op())
|
||||
):
|
||||
return op . string
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return None
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from pegen.parser import simple_parser_main
|
||||
simple_parser_main(GeneratedParser)
|
|
@ -0,0 +1,65 @@
|
|||
import argparse
|
||||
import sys
|
||||
|
||||
from typing import Any, Iterator, Iterable, Callable
|
||||
|
||||
from pegen.build import build_parser
|
||||
from pegen.grammar import Grammar, Rule
|
||||
|
||||
argparser = argparse.ArgumentParser(
|
||||
prog="pegen", description="Pretty print the AST for a given PEG grammar"
|
||||
)
|
||||
argparser.add_argument("filename", help="Grammar description")
|
||||
|
||||
|
||||
class ASTGrammarPrinter:
|
||||
def children(self, node: Rule) -> Iterator[Any]:
|
||||
for value in node:
|
||||
if isinstance(value, list):
|
||||
yield from value
|
||||
else:
|
||||
yield value
|
||||
|
||||
def name(self, node: Rule) -> str:
|
||||
if not list(self.children(node)):
|
||||
return repr(node)
|
||||
return node.__class__.__name__
|
||||
|
||||
def print_grammar_ast(self, grammar: Grammar, printer: Callable[..., None] = print) -> None:
|
||||
for rule in grammar.rules.values():
|
||||
printer(self.print_nodes_recursively(rule))
|
||||
|
||||
def print_nodes_recursively(self, node: Rule, prefix: str = "", istail: bool = True) -> str:
|
||||
|
||||
children = list(self.children(node))
|
||||
value = self.name(node)
|
||||
|
||||
line = prefix + ("└──" if istail else "├──") + value + "\n"
|
||||
sufix = " " if istail else "│ "
|
||||
|
||||
if not children:
|
||||
return line
|
||||
|
||||
*children, last = children
|
||||
for child in children:
|
||||
line += self.print_nodes_recursively(child, prefix + sufix, False)
|
||||
line += self.print_nodes_recursively(last, prefix + sufix, True)
|
||||
|
||||
return line
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = argparser.parse_args()
|
||||
|
||||
try:
|
||||
grammar, parser, tokenizer = build_parser(args.filename)
|
||||
except Exception as err:
|
||||
print("ERROR: Failed to parse grammar file", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
visitor = ASTGrammarPrinter()
|
||||
visitor.print_grammar_ast(grammar)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,123 @@
|
|||
@subheader """\
|
||||
from ast import literal_eval
|
||||
|
||||
from pegen.grammar import (
|
||||
Alt,
|
||||
Cut,
|
||||
Gather,
|
||||
Group,
|
||||
Item,
|
||||
Lookahead,
|
||||
LookaheadOrCut,
|
||||
MetaTuple,
|
||||
MetaList,
|
||||
NameLeaf,
|
||||
NamedItem,
|
||||
NamedItemList,
|
||||
NegativeLookahead,
|
||||
Opt,
|
||||
Plain,
|
||||
PositiveLookahead,
|
||||
Repeat0,
|
||||
Repeat1,
|
||||
Rhs,
|
||||
Rule,
|
||||
RuleList,
|
||||
RuleName,
|
||||
Grammar,
|
||||
StringLeaf,
|
||||
)
|
||||
"""
|
||||
|
||||
start[Grammar]: grammar ENDMARKER { grammar }
|
||||
|
||||
grammar[Grammar]:
|
||||
| metas rules { Grammar(rules, metas) }
|
||||
| rules { Grammar(rules, []) }
|
||||
|
||||
metas[MetaList]:
|
||||
| meta metas { [meta] + metas }
|
||||
| meta { [meta] }
|
||||
|
||||
meta[MetaTuple]:
|
||||
| "@" NAME NEWLINE { (name.string, None) }
|
||||
| "@" a=NAME b=NAME NEWLINE { (a.string, b.string) }
|
||||
| "@" NAME STRING NEWLINE { (name.string, literal_eval(string.string)) }
|
||||
|
||||
rules[RuleList]:
|
||||
| rule rules { [rule] + rules }
|
||||
| rule { [rule] }
|
||||
|
||||
rule[Rule]:
|
||||
| rulename memoflag? ":" alts NEWLINE INDENT more_alts DEDENT {
|
||||
Rule(rulename[0], rulename[1], Rhs(alts.alts + more_alts.alts), memo=opt) }
|
||||
| rulename memoflag? ":" NEWLINE INDENT more_alts DEDENT {
|
||||
Rule(rulename[0], rulename[1], more_alts, memo=opt) }
|
||||
| rulename memoflag? ":" alts NEWLINE { Rule(rulename[0], rulename[1], alts, memo=opt) }
|
||||
|
||||
rulename[RuleName]:
|
||||
| NAME '[' type=NAME '*' ']' { (name.string, type.string+"*") }
|
||||
| NAME '[' type=NAME ']' { (name.string, type.string) }
|
||||
| NAME { (name.string, None) }
|
||||
|
||||
# In the future this may return something more complicated
|
||||
memoflag[str]:
|
||||
| '(' 'memo' ')' { "memo" }
|
||||
|
||||
alts[Rhs]:
|
||||
| alt "|" alts { Rhs([alt] + alts.alts)}
|
||||
| alt { Rhs([alt]) }
|
||||
|
||||
more_alts[Rhs]:
|
||||
| "|" alts NEWLINE more_alts { Rhs(alts.alts + more_alts.alts) }
|
||||
| "|" alts NEWLINE { Rhs(alts.alts) }
|
||||
|
||||
alt[Alt]:
|
||||
| items '$' action { Alt(items + [NamedItem(None, NameLeaf('ENDMARKER'))], action=action) }
|
||||
| items '$' { Alt(items + [NamedItem(None, NameLeaf('ENDMARKER'))], action=None) }
|
||||
| items action { Alt(items, action=action) }
|
||||
| items { Alt(items, action=None) }
|
||||
|
||||
items[NamedItemList]:
|
||||
| named_item items { [named_item] + items }
|
||||
| named_item { [named_item] }
|
||||
|
||||
named_item[NamedItem]:
|
||||
| NAME '=' ~ item {NamedItem(name.string, item)}
|
||||
| item {NamedItem(None, item)}
|
||||
| it=lookahead {NamedItem(None, it)}
|
||||
|
||||
lookahead[LookaheadOrCut]:
|
||||
| '&' ~ atom {PositiveLookahead(atom)}
|
||||
| '!' ~ atom {NegativeLookahead(atom)}
|
||||
| '~' {Cut()}
|
||||
|
||||
item[Item]:
|
||||
| '[' ~ alts ']' {Opt(alts)}
|
||||
| atom '?' {Opt(atom)}
|
||||
| atom '*' {Repeat0(atom)}
|
||||
| atom '+' {Repeat1(atom)}
|
||||
| sep=atom '.' node=atom '+' {Gather(sep, node)}
|
||||
| atom {atom}
|
||||
|
||||
atom[Plain]:
|
||||
| '(' ~ alts ')' {Group(alts)}
|
||||
| NAME {NameLeaf(name.string) }
|
||||
| STRING {StringLeaf(string.string)}
|
||||
|
||||
# Mini-grammar for the actions
|
||||
|
||||
action[str]: "{" ~ target_atoms "}" { target_atoms }
|
||||
|
||||
target_atoms[str]:
|
||||
| target_atom target_atoms { target_atom + " " + target_atoms }
|
||||
| target_atom { target_atom }
|
||||
|
||||
target_atom[str]:
|
||||
| "{" ~ target_atoms "}" { "{" + target_atoms + "}" }
|
||||
| NAME { name.string }
|
||||
| NUMBER { number.string }
|
||||
| STRING { string.string }
|
||||
| "?" { "?" }
|
||||
| ":" { ":" }
|
||||
| !"}" OP { op.string }
|
|
@ -0,0 +1,310 @@
|
|||
import argparse
|
||||
import sys
|
||||
import time
|
||||
import token
|
||||
import tokenize
|
||||
import traceback
|
||||
|
||||
from abc import abstractmethod
|
||||
from typing import Any, Callable, cast, Dict, Optional, Tuple, Type, TypeVar
|
||||
|
||||
from pegen.tokenizer import exact_token_types
|
||||
from pegen.tokenizer import Mark
|
||||
from pegen.tokenizer import Tokenizer
|
||||
|
||||
T = TypeVar("T")
|
||||
P = TypeVar("P", bound="Parser")
|
||||
F = TypeVar("F", bound=Callable[..., Any])
|
||||
|
||||
|
||||
def logger(method: F) -> F:
|
||||
"""For non-memoized functions that we want to be logged.
|
||||
|
||||
(In practice this is only non-leader left-recursive functions.)
|
||||
"""
|
||||
method_name = method.__name__
|
||||
|
||||
def logger_wrapper(self: P, *args: object) -> T:
|
||||
if not self._verbose:
|
||||
return method(self, *args)
|
||||
argsr = ",".join(repr(arg) for arg in args)
|
||||
fill = " " * self._level
|
||||
print(f"{fill}{method_name}({argsr}) .... (looking at {self.showpeek()})")
|
||||
self._level += 1
|
||||
tree = method(self, *args)
|
||||
self._level -= 1
|
||||
print(f"{fill}... {method_name}({argsr}) --> {tree!s:.200}")
|
||||
return tree
|
||||
|
||||
logger_wrapper.__wrapped__ = method # type: ignore
|
||||
return cast(F, logger_wrapper)
|
||||
|
||||
|
||||
def memoize(method: F) -> F:
|
||||
"""Memoize a symbol method."""
|
||||
method_name = method.__name__
|
||||
|
||||
def memoize_wrapper(self: P, *args: object) -> T:
|
||||
mark = self.mark()
|
||||
key = mark, method_name, args
|
||||
# Fast path: cache hit, and not verbose.
|
||||
if key in self._cache and not self._verbose:
|
||||
tree, endmark = self._cache[key]
|
||||
self.reset(endmark)
|
||||
return tree
|
||||
# Slow path: no cache hit, or verbose.
|
||||
verbose = self._verbose
|
||||
argsr = ",".join(repr(arg) for arg in args)
|
||||
fill = " " * self._level
|
||||
if key not in self._cache:
|
||||
if verbose:
|
||||
print(f"{fill}{method_name}({argsr}) ... (looking at {self.showpeek()})")
|
||||
self._level += 1
|
||||
tree = method(self, *args)
|
||||
self._level -= 1
|
||||
if verbose:
|
||||
print(f"{fill}... {method_name}({argsr}) -> {tree!s:.200}")
|
||||
endmark = self.mark()
|
||||
self._cache[key] = tree, endmark
|
||||
else:
|
||||
tree, endmark = self._cache[key]
|
||||
if verbose:
|
||||
print(f"{fill}{method_name}({argsr}) -> {tree!s:.200}")
|
||||
self.reset(endmark)
|
||||
return tree
|
||||
|
||||
memoize_wrapper.__wrapped__ = method # type: ignore
|
||||
return cast(F, memoize_wrapper)
|
||||
|
||||
|
||||
def memoize_left_rec(method: Callable[[P], Optional[T]]) -> Callable[[P], Optional[T]]:
|
||||
"""Memoize a left-recursive symbol method."""
|
||||
method_name = method.__name__
|
||||
|
||||
def memoize_left_rec_wrapper(self: P) -> Optional[T]:
|
||||
mark = self.mark()
|
||||
key = mark, method_name, ()
|
||||
# Fast path: cache hit, and not verbose.
|
||||
if key in self._cache and not self._verbose:
|
||||
tree, endmark = self._cache[key]
|
||||
self.reset(endmark)
|
||||
return tree
|
||||
# Slow path: no cache hit, or verbose.
|
||||
verbose = self._verbose
|
||||
fill = " " * self._level
|
||||
if key not in self._cache:
|
||||
if verbose:
|
||||
print(f"{fill}{method_name} ... (looking at {self.showpeek()})")
|
||||
self._level += 1
|
||||
|
||||
# For left-recursive rules we manipulate the cache and
|
||||
# loop until the rule shows no progress, then pick the
|
||||
# previous result. For an explanation why this works, see
|
||||
# https://github.com/PhilippeSigaud/Pegged/wiki/Left-Recursion
|
||||
# (But we use the memoization cache instead of a static
|
||||
# variable; perhaps this is similar to a paper by Warth et al.
|
||||
# (http://web.cs.ucla.edu/~todd/research/pub.php?id=pepm08).
|
||||
|
||||
# Prime the cache with a failure.
|
||||
self._cache[key] = None, mark
|
||||
lastresult, lastmark = None, mark
|
||||
depth = 0
|
||||
if verbose:
|
||||
print(f"{fill}Recursive {method_name} at {mark} depth {depth}")
|
||||
|
||||
while True:
|
||||
self.reset(mark)
|
||||
result = method(self)
|
||||
endmark = self.mark()
|
||||
depth += 1
|
||||
if verbose:
|
||||
print(
|
||||
f"{fill}Recursive {method_name} at {mark} depth {depth}: {result!s:.200} to {endmark}"
|
||||
)
|
||||
if not result:
|
||||
if verbose:
|
||||
print(f"{fill}Fail with {lastresult!s:.200} to {lastmark}")
|
||||
break
|
||||
if endmark <= lastmark:
|
||||
if verbose:
|
||||
print(f"{fill}Bailing with {lastresult!s:.200} to {lastmark}")
|
||||
break
|
||||
self._cache[key] = lastresult, lastmark = result, endmark
|
||||
|
||||
self.reset(lastmark)
|
||||
tree = lastresult
|
||||
|
||||
self._level -= 1
|
||||
if verbose:
|
||||
print(f"{fill}{method_name}() -> {tree!s:.200} [cached]")
|
||||
if tree:
|
||||
endmark = self.mark()
|
||||
else:
|
||||
endmark = mark
|
||||
self.reset(endmark)
|
||||
self._cache[key] = tree, endmark
|
||||
else:
|
||||
tree, endmark = self._cache[key]
|
||||
if verbose:
|
||||
print(f"{fill}{method_name}() -> {tree!s:.200} [fresh]")
|
||||
if tree:
|
||||
self.reset(endmark)
|
||||
return tree
|
||||
|
||||
memoize_left_rec_wrapper.__wrapped__ = method # type: ignore
|
||||
return memoize_left_rec_wrapper
|
||||
|
||||
|
||||
class Parser:
|
||||
"""Parsing base class."""
|
||||
|
||||
def __init__(self, tokenizer: Tokenizer, *, verbose: bool = False):
|
||||
self._tokenizer = tokenizer
|
||||
self._verbose = verbose
|
||||
self._level = 0
|
||||
self._cache: Dict[Tuple[Mark, str, Tuple[Any, ...]], Tuple[Any, Mark]] = {}
|
||||
# Pass through common tokenizer methods.
|
||||
# TODO: Rename to _mark and _reset.
|
||||
self.mark = self._tokenizer.mark
|
||||
self.reset = self._tokenizer.reset
|
||||
|
||||
@abstractmethod
|
||||
def start(self) -> Any:
|
||||
pass
|
||||
|
||||
def showpeek(self) -> str:
|
||||
tok = self._tokenizer.peek()
|
||||
return f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"
|
||||
|
||||
@memoize
|
||||
def name(self) -> Optional[tokenize.TokenInfo]:
|
||||
tok = self._tokenizer.peek()
|
||||
if tok.type == token.NAME:
|
||||
return self._tokenizer.getnext()
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def number(self) -> Optional[tokenize.TokenInfo]:
|
||||
tok = self._tokenizer.peek()
|
||||
if tok.type == token.NUMBER:
|
||||
return self._tokenizer.getnext()
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def string(self) -> Optional[tokenize.TokenInfo]:
|
||||
tok = self._tokenizer.peek()
|
||||
if tok.type == token.STRING:
|
||||
return self._tokenizer.getnext()
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def op(self) -> Optional[tokenize.TokenInfo]:
|
||||
tok = self._tokenizer.peek()
|
||||
if tok.type == token.OP:
|
||||
return self._tokenizer.getnext()
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def expect(self, type: str) -> Optional[tokenize.TokenInfo]:
|
||||
tok = self._tokenizer.peek()
|
||||
if tok.string == type:
|
||||
return self._tokenizer.getnext()
|
||||
if type in exact_token_types:
|
||||
if tok.type == exact_token_types[type]:
|
||||
return self._tokenizer.getnext()
|
||||
if type in token.__dict__:
|
||||
if tok.type == token.__dict__[type]:
|
||||
return self._tokenizer.getnext()
|
||||
if tok.type == token.OP and tok.string == type:
|
||||
return self._tokenizer.getnext()
|
||||
return None
|
||||
|
||||
def positive_lookahead(self, func: Callable[..., T], *args: object) -> T:
|
||||
mark = self.mark()
|
||||
ok = func(*args)
|
||||
self.reset(mark)
|
||||
return ok
|
||||
|
||||
def negative_lookahead(self, func: Callable[..., object], *args: object) -> bool:
|
||||
mark = self.mark()
|
||||
ok = func(*args)
|
||||
self.reset(mark)
|
||||
return not ok
|
||||
|
||||
def make_syntax_error(self, filename: str = "<unknown>") -> SyntaxError:
|
||||
tok = self._tokenizer.diagnose()
|
||||
return SyntaxError(
|
||||
"pegen parse failure", (filename, tok.start[0], 1 + tok.start[1], tok.line)
|
||||
)
|
||||
|
||||
|
||||
def simple_parser_main(parser_class: Type[Parser]) -> None:
|
||||
argparser = argparse.ArgumentParser()
|
||||
argparser.add_argument(
|
||||
"-v",
|
||||
"--verbose",
|
||||
action="count",
|
||||
default=0,
|
||||
help="Print timing stats; repeat for more debug output",
|
||||
)
|
||||
argparser.add_argument(
|
||||
"-q", "--quiet", action="store_true", help="Don't print the parsed program"
|
||||
)
|
||||
argparser.add_argument("filename", help="Input file ('-' to use stdin)")
|
||||
|
||||
args = argparser.parse_args()
|
||||
verbose = args.verbose
|
||||
verbose_tokenizer = verbose >= 3
|
||||
verbose_parser = verbose == 2 or verbose >= 4
|
||||
|
||||
t0 = time.time()
|
||||
|
||||
filename = args.filename
|
||||
if filename == "" or filename == "-":
|
||||
filename = "<stdin>"
|
||||
file = sys.stdin
|
||||
else:
|
||||
file = open(args.filename)
|
||||
try:
|
||||
tokengen = tokenize.generate_tokens(file.readline)
|
||||
tokenizer = Tokenizer(tokengen, verbose=verbose_tokenizer)
|
||||
parser = parser_class(tokenizer, verbose=verbose_parser)
|
||||
tree = parser.start()
|
||||
try:
|
||||
if file.isatty():
|
||||
endpos = 0
|
||||
else:
|
||||
endpos = file.tell()
|
||||
except IOError:
|
||||
endpos = 0
|
||||
finally:
|
||||
if file is not sys.stdin:
|
||||
file.close()
|
||||
|
||||
t1 = time.time()
|
||||
|
||||
if not tree:
|
||||
err = parser.make_syntax_error(filename)
|
||||
traceback.print_exception(err.__class__, err, None)
|
||||
sys.exit(1)
|
||||
|
||||
if not args.quiet:
|
||||
print(tree)
|
||||
|
||||
if verbose:
|
||||
dt = t1 - t0
|
||||
diag = tokenizer.diagnose()
|
||||
nlines = diag.end[0]
|
||||
if diag.type == token.ENDMARKER:
|
||||
nlines -= 1
|
||||
print(f"Total time: {dt:.3f} sec; {nlines} lines", end="")
|
||||
if endpos:
|
||||
print(f" ({endpos} bytes)", end="")
|
||||
if dt:
|
||||
print(f"; {nlines / dt:.0f} lines/sec")
|
||||
else:
|
||||
print()
|
||||
print("Caches sizes:")
|
||||
print(f" token array : {len(tokenizer._tokens):10}")
|
||||
print(f" cache : {len(parser._cache):10}")
|
||||
## print_memstats()
|
|
@ -0,0 +1,188 @@
|
|||
import contextlib
|
||||
import token
|
||||
from abc import abstractmethod
|
||||
|
||||
from typing import AbstractSet, Dict, IO, Iterator, List, Optional, Set, Text, Tuple
|
||||
|
||||
from pegen import sccutils
|
||||
from pegen.grammar import (
|
||||
Grammar,
|
||||
Rule,
|
||||
Rhs,
|
||||
Alt,
|
||||
NamedItem,
|
||||
Plain,
|
||||
NameLeaf,
|
||||
StringLeaf,
|
||||
Gather,
|
||||
)
|
||||
from pegen.grammar import GrammarError, GrammarVisitor
|
||||
|
||||
|
||||
class RuleCheckingVisitor(GrammarVisitor):
|
||||
def __init__(self, rules: Dict[str, Rule]):
|
||||
self.rules = rules
|
||||
|
||||
def visit_NameLeaf(self, node: NameLeaf) -> None:
|
||||
if node.value not in self.rules and node.value not in token.tok_name.values():
|
||||
# TODO: Add line/col info to (leaf) nodes
|
||||
raise GrammarError(f"Dangling reference to rule {node.value!r}")
|
||||
|
||||
|
||||
class ParserGenerator:
|
||||
|
||||
callmakervisitor: GrammarVisitor
|
||||
|
||||
def __init__(self, grammar: Grammar, file: Optional[IO[Text]]):
|
||||
self.grammar = grammar
|
||||
self.rules = grammar.rules
|
||||
if "trailer" not in grammar.metas and "start" not in self.rules:
|
||||
raise GrammarError("Grammar without a trailer must have a 'start' rule")
|
||||
checker = RuleCheckingVisitor(self.rules)
|
||||
for rule in self.rules.values():
|
||||
checker.visit(rule)
|
||||
self.file = file
|
||||
self.level = 0
|
||||
compute_nullables(self.rules)
|
||||
self.first_graph, self.first_sccs = compute_left_recursives(self.rules)
|
||||
self.todo = self.rules.copy() # Rules to generate
|
||||
self.counter = 0 # For name_rule()/name_loop()
|
||||
self.keyword_counter = 499 # For keyword_type()
|
||||
|
||||
@abstractmethod
|
||||
def generate(self, filename: str) -> None:
|
||||
raise NotImplementedError
|
||||
|
||||
@contextlib.contextmanager
|
||||
def indent(self) -> Iterator[None]:
|
||||
self.level += 1
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
self.level -= 1
|
||||
|
||||
def print(self, *args: object) -> None:
|
||||
if not args:
|
||||
print(file=self.file)
|
||||
else:
|
||||
print(" " * self.level, end="", file=self.file)
|
||||
print(*args, file=self.file)
|
||||
|
||||
def printblock(self, lines: str) -> None:
|
||||
for line in lines.splitlines():
|
||||
self.print(line)
|
||||
|
||||
def collect_todo(self) -> None:
|
||||
done: Set[str] = set()
|
||||
while True:
|
||||
alltodo = list(self.todo)
|
||||
todo = [i for i in alltodo if i not in done]
|
||||
if not todo:
|
||||
break
|
||||
for rulename in todo:
|
||||
self.todo[rulename].collect_todo(self)
|
||||
done = set(alltodo)
|
||||
|
||||
def keyword_type(self) -> int:
|
||||
self.keyword_counter += 1
|
||||
return self.keyword_counter
|
||||
|
||||
def name_node(self, rhs: Rhs) -> str:
|
||||
self.counter += 1
|
||||
name = f"_tmp_{self.counter}" # TODO: Pick a nicer name.
|
||||
self.todo[name] = Rule(name, None, rhs)
|
||||
return name
|
||||
|
||||
def name_loop(self, node: Plain, is_repeat1: bool) -> str:
|
||||
self.counter += 1
|
||||
if is_repeat1:
|
||||
prefix = "_loop1_"
|
||||
else:
|
||||
prefix = "_loop0_"
|
||||
name = f"{prefix}{self.counter}" # TODO: It's ugly to signal via the name.
|
||||
self.todo[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])]))
|
||||
return name
|
||||
|
||||
def name_gather(self, node: Gather) -> str:
|
||||
self.counter += 1
|
||||
name = f"_gather_{self.counter}"
|
||||
self.counter += 1
|
||||
extra_function_name = f"_loop0_{self.counter}"
|
||||
extra_function_alt = Alt(
|
||||
[NamedItem(None, node.separator), NamedItem("elem", node.node),], action="elem",
|
||||
)
|
||||
self.todo[extra_function_name] = Rule(
|
||||
extra_function_name, None, Rhs([extra_function_alt]),
|
||||
)
|
||||
alt = Alt(
|
||||
[NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name)),],
|
||||
)
|
||||
self.todo[name] = Rule(name, None, Rhs([alt]),)
|
||||
return name
|
||||
|
||||
|
||||
def dedupe(name: str, names: List[str]) -> str:
|
||||
origname = name
|
||||
counter = 0
|
||||
while name in names:
|
||||
counter += 1
|
||||
name = f"{origname}_{counter}"
|
||||
names.append(name)
|
||||
return name
|
||||
|
||||
|
||||
def compute_nullables(rules: Dict[str, Rule]) -> None:
|
||||
"""Compute which rules in a grammar are nullable.
|
||||
|
||||
Thanks to TatSu (tatsu/leftrec.py) for inspiration.
|
||||
"""
|
||||
for rule in rules.values():
|
||||
rule.nullable_visit(rules)
|
||||
|
||||
|
||||
def compute_left_recursives(
|
||||
rules: Dict[str, Rule]
|
||||
) -> Tuple[Dict[str, AbstractSet[str]], List[AbstractSet[str]]]:
|
||||
graph = make_first_graph(rules)
|
||||
sccs = list(sccutils.strongly_connected_components(graph.keys(), graph))
|
||||
for scc in sccs:
|
||||
if len(scc) > 1:
|
||||
for name in scc:
|
||||
rules[name].left_recursive = True
|
||||
# Try to find a leader such that all cycles go through it.
|
||||
leaders = set(scc)
|
||||
for start in scc:
|
||||
for cycle in sccutils.find_cycles_in_scc(graph, scc, start):
|
||||
## print("Cycle:", " -> ".join(cycle))
|
||||
leaders -= scc - set(cycle)
|
||||
if not leaders:
|
||||
raise ValueError(
|
||||
f"SCC {scc} has no leadership candidate (no element is included in all cycles)"
|
||||
)
|
||||
## print("Leaders:", leaders)
|
||||
leader = min(leaders) # Pick an arbitrary leader from the candidates.
|
||||
rules[leader].leader = True
|
||||
else:
|
||||
name = min(scc) # The only element.
|
||||
if name in graph[name]:
|
||||
rules[name].left_recursive = True
|
||||
rules[name].leader = True
|
||||
return graph, sccs
|
||||
|
||||
|
||||
def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]:
|
||||
"""Compute the graph of left-invocations.
|
||||
|
||||
There's an edge from A to B if A may invoke B at its initial
|
||||
position.
|
||||
|
||||
Note that this requires the nullable flags to have been computed.
|
||||
"""
|
||||
graph = {}
|
||||
vertices: Set[str] = set()
|
||||
for rulename, rhs in rules.items():
|
||||
graph[rulename] = names = rhs.initial_names()
|
||||
vertices |= names
|
||||
for vertex in vertices:
|
||||
graph.setdefault(vertex, set())
|
||||
return graph
|
|
@ -0,0 +1,224 @@
|
|||
from typing import Any, Dict, List, Optional, IO, Text, Tuple
|
||||
|
||||
from pegen.grammar import (
|
||||
Cut,
|
||||
GrammarVisitor,
|
||||
NameLeaf,
|
||||
StringLeaf,
|
||||
Rhs,
|
||||
NamedItem,
|
||||
Lookahead,
|
||||
PositiveLookahead,
|
||||
NegativeLookahead,
|
||||
Opt,
|
||||
Repeat0,
|
||||
Repeat1,
|
||||
Gather,
|
||||
Group,
|
||||
Rule,
|
||||
Alt,
|
||||
)
|
||||
from pegen import grammar
|
||||
from pegen.parser_generator import dedupe, ParserGenerator
|
||||
|
||||
MODULE_PREFIX = """\
|
||||
#!/usr/bin/env python3.8
|
||||
# @generated by pegen from {filename}
|
||||
|
||||
import ast
|
||||
import sys
|
||||
import tokenize
|
||||
|
||||
from typing import Any, Optional
|
||||
|
||||
from pegen.parser import memoize, memoize_left_rec, logger, Parser
|
||||
|
||||
"""
|
||||
MODULE_SUFFIX = """
|
||||
|
||||
if __name__ == '__main__':
|
||||
from pegen.parser import simple_parser_main
|
||||
simple_parser_main(GeneratedParser)
|
||||
"""
|
||||
|
||||
|
||||
class PythonCallMakerVisitor(GrammarVisitor):
|
||||
def __init__(self, parser_generator: ParserGenerator):
|
||||
self.gen = parser_generator
|
||||
self.cache: Dict[Any, Any] = {}
|
||||
|
||||
def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]:
|
||||
name = node.value
|
||||
if name in ("NAME", "NUMBER", "STRING", "OP"):
|
||||
name = name.lower()
|
||||
return name, f"self.{name}()"
|
||||
if name in ("NEWLINE", "DEDENT", "INDENT", "ENDMARKER", "ASYNC", "AWAIT"):
|
||||
return name.lower(), f"self.expect({name!r})"
|
||||
return name, f"self.{name}()"
|
||||
|
||||
def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
|
||||
return "literal", f"self.expect({node.value})"
|
||||
|
||||
def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
|
||||
if node in self.cache:
|
||||
return self.cache[node]
|
||||
if len(node.alts) == 1 and len(node.alts[0].items) == 1:
|
||||
self.cache[node] = self.visit(node.alts[0].items[0])
|
||||
else:
|
||||
name = self.gen.name_node(node)
|
||||
self.cache[node] = name, f"self.{name}()"
|
||||
return self.cache[node]
|
||||
|
||||
def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]:
|
||||
name, call = self.visit(node.item)
|
||||
if node.name:
|
||||
name = node.name
|
||||
return name, call
|
||||
|
||||
def lookahead_call_helper(self, node: Lookahead) -> Tuple[str, str]:
|
||||
name, call = self.visit(node.node)
|
||||
head, tail = call.split("(", 1)
|
||||
assert tail[-1] == ")"
|
||||
tail = tail[:-1]
|
||||
return head, tail
|
||||
|
||||
def visit_PositiveLookahead(self, node: PositiveLookahead) -> Tuple[None, str]:
|
||||
head, tail = self.lookahead_call_helper(node)
|
||||
return None, f"self.positive_lookahead({head}, {tail})"
|
||||
|
||||
def visit_NegativeLookahead(self, node: NegativeLookahead) -> Tuple[None, str]:
|
||||
head, tail = self.lookahead_call_helper(node)
|
||||
return None, f"self.negative_lookahead({head}, {tail})"
|
||||
|
||||
def visit_Opt(self, node: Opt) -> Tuple[str, str]:
|
||||
name, call = self.visit(node.node)
|
||||
return "opt", f"{call}," # Note trailing comma!
|
||||
|
||||
def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]:
|
||||
if node in self.cache:
|
||||
return self.cache[node]
|
||||
name = self.gen.name_loop(node.node, False)
|
||||
self.cache[node] = name, f"self.{name}()," # Also a trailing comma!
|
||||
return self.cache[node]
|
||||
|
||||
def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]:
|
||||
if node in self.cache:
|
||||
return self.cache[node]
|
||||
name = self.gen.name_loop(node.node, True)
|
||||
self.cache[node] = name, f"self.{name}()" # But no trailing comma here!
|
||||
return self.cache[node]
|
||||
|
||||
def visit_Gather(self, node: Gather) -> Tuple[str, str]:
|
||||
if node in self.cache:
|
||||
return self.cache[node]
|
||||
name = self.gen.name_gather(node)
|
||||
self.cache[node] = name, f"self.{name}()" # No trailing comma here either!
|
||||
return self.cache[node]
|
||||
|
||||
def visit_Group(self, node: Group) -> Tuple[Optional[str], str]:
|
||||
return self.visit(node.rhs)
|
||||
|
||||
def visit_Cut(self, node: Cut) -> Tuple[str, str]:
|
||||
return "cut", "True"
|
||||
|
||||
|
||||
class PythonParserGenerator(ParserGenerator, GrammarVisitor):
|
||||
def __init__(self, grammar: grammar.Grammar, file: Optional[IO[Text]]):
|
||||
super().__init__(grammar, file)
|
||||
self.callmakervisitor = PythonCallMakerVisitor(self)
|
||||
|
||||
def generate(self, filename: str) -> None:
|
||||
header = self.grammar.metas.get("header", MODULE_PREFIX)
|
||||
if header is not None:
|
||||
self.print(header.rstrip("\n").format(filename=filename))
|
||||
subheader = self.grammar.metas.get("subheader", "")
|
||||
if subheader:
|
||||
self.print(subheader.format(filename=filename))
|
||||
self.print("class GeneratedParser(Parser):")
|
||||
while self.todo:
|
||||
for rulename, rule in list(self.todo.items()):
|
||||
del self.todo[rulename]
|
||||
self.print()
|
||||
with self.indent():
|
||||
self.visit(rule)
|
||||
trailer = self.grammar.metas.get("trailer", MODULE_SUFFIX)
|
||||
if trailer is not None:
|
||||
self.print(trailer.rstrip("\n"))
|
||||
|
||||
def visit_Rule(self, node: Rule) -> None:
|
||||
is_loop = node.is_loop()
|
||||
is_gather = node.is_gather()
|
||||
rhs = node.flatten()
|
||||
if node.left_recursive:
|
||||
if node.leader:
|
||||
self.print("@memoize_left_rec")
|
||||
else:
|
||||
# Non-leader rules in a cycle are not memoized,
|
||||
# but they must still be logged.
|
||||
self.print("@logger")
|
||||
else:
|
||||
self.print("@memoize")
|
||||
node_type = node.type or "Any"
|
||||
self.print(f"def {node.name}(self) -> Optional[{node_type}]:")
|
||||
with self.indent():
|
||||
self.print(f"# {node.name}: {rhs}")
|
||||
if node.nullable:
|
||||
self.print(f"# nullable={node.nullable}")
|
||||
self.print("mark = self.mark()")
|
||||
if is_loop:
|
||||
self.print("children = []")
|
||||
self.visit(rhs, is_loop=is_loop, is_gather=is_gather)
|
||||
if is_loop:
|
||||
self.print("return children")
|
||||
else:
|
||||
self.print("return None")
|
||||
|
||||
def visit_NamedItem(self, node: NamedItem, names: List[str]) -> None:
|
||||
name, call = self.callmakervisitor.visit(node.item)
|
||||
if node.name:
|
||||
name = node.name
|
||||
if not name:
|
||||
self.print(call)
|
||||
else:
|
||||
if name != "cut":
|
||||
name = dedupe(name, names)
|
||||
self.print(f"({name} := {call})")
|
||||
|
||||
def visit_Rhs(self, node: Rhs, is_loop: bool = False, is_gather: bool = False) -> None:
|
||||
if is_loop:
|
||||
assert len(node.alts) == 1
|
||||
for alt in node.alts:
|
||||
self.visit(alt, is_loop=is_loop, is_gather=is_gather)
|
||||
|
||||
def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None:
|
||||
names: List[str] = []
|
||||
self.print("cut = False") # TODO: Only if needed.
|
||||
if is_loop:
|
||||
self.print("while (")
|
||||
else:
|
||||
self.print("if (")
|
||||
with self.indent():
|
||||
first = True
|
||||
for item in node.items:
|
||||
if first:
|
||||
first = False
|
||||
else:
|
||||
self.print("and")
|
||||
self.visit(item, names=names)
|
||||
self.print("):")
|
||||
with self.indent():
|
||||
action = node.action
|
||||
if not action:
|
||||
if is_gather:
|
||||
assert len(names) == 2
|
||||
action = f"[{names[0]}] + {names[1]}"
|
||||
else:
|
||||
action = f"[{', '.join(names)}]"
|
||||
if is_loop:
|
||||
self.print(f"children.append({action})")
|
||||
self.print(f"mark = self.mark()")
|
||||
else:
|
||||
self.print(f"return {action}")
|
||||
self.print("self.reset(mark)")
|
||||
# Skip remaining alternatives if a cut was reached.
|
||||
self.print("if cut: return None") # TODO: Only if needed.
|
|
@ -0,0 +1,128 @@
|
|||
# Adapted from mypy (mypy/build.py) under the MIT license.
|
||||
|
||||
from typing import *
|
||||
|
||||
|
||||
def strongly_connected_components(
|
||||
vertices: AbstractSet[str], edges: Dict[str, AbstractSet[str]]
|
||||
) -> Iterator[AbstractSet[str]]:
|
||||
"""Compute Strongly Connected Components of a directed graph.
|
||||
|
||||
Args:
|
||||
vertices: the labels for the vertices
|
||||
edges: for each vertex, gives the target vertices of its outgoing edges
|
||||
|
||||
Returns:
|
||||
An iterator yielding strongly connected components, each
|
||||
represented as a set of vertices. Each input vertex will occur
|
||||
exactly once; vertices not part of a SCC are returned as
|
||||
singleton sets.
|
||||
|
||||
From http://code.activestate.com/recipes/578507/.
|
||||
"""
|
||||
identified: Set[str] = set()
|
||||
stack: List[str] = []
|
||||
index: Dict[str, int] = {}
|
||||
boundaries: List[int] = []
|
||||
|
||||
def dfs(v: str) -> Iterator[Set[str]]:
|
||||
index[v] = len(stack)
|
||||
stack.append(v)
|
||||
boundaries.append(index[v])
|
||||
|
||||
for w in edges[v]:
|
||||
if w not in index:
|
||||
yield from dfs(w)
|
||||
elif w not in identified:
|
||||
while index[w] < boundaries[-1]:
|
||||
boundaries.pop()
|
||||
|
||||
if boundaries[-1] == index[v]:
|
||||
boundaries.pop()
|
||||
scc = set(stack[index[v] :])
|
||||
del stack[index[v] :]
|
||||
identified.update(scc)
|
||||
yield scc
|
||||
|
||||
for v in vertices:
|
||||
if v not in index:
|
||||
yield from dfs(v)
|
||||
|
||||
|
||||
def topsort(
|
||||
data: Dict[AbstractSet[str], Set[AbstractSet[str]]]
|
||||
) -> Iterable[AbstractSet[AbstractSet[str]]]:
|
||||
"""Topological sort.
|
||||
|
||||
Args:
|
||||
data: A map from SCCs (represented as frozen sets of strings) to
|
||||
sets of SCCs, its dependencies. NOTE: This data structure
|
||||
is modified in place -- for normalization purposes,
|
||||
self-dependencies are removed and entries representing
|
||||
orphans are added.
|
||||
|
||||
Returns:
|
||||
An iterator yielding sets of SCCs that have an equivalent
|
||||
ordering. NOTE: The algorithm doesn't care about the internal
|
||||
structure of SCCs.
|
||||
|
||||
Example:
|
||||
Suppose the input has the following structure:
|
||||
|
||||
{A: {B, C}, B: {D}, C: {D}}
|
||||
|
||||
This is normalized to:
|
||||
|
||||
{A: {B, C}, B: {D}, C: {D}, D: {}}
|
||||
|
||||
The algorithm will yield the following values:
|
||||
|
||||
{D}
|
||||
{B, C}
|
||||
{A}
|
||||
|
||||
From http://code.activestate.com/recipes/577413/.
|
||||
"""
|
||||
# TODO: Use a faster algorithm?
|
||||
for k, v in data.items():
|
||||
v.discard(k) # Ignore self dependencies.
|
||||
for item in set.union(*data.values()) - set(data.keys()):
|
||||
data[item] = set()
|
||||
while True:
|
||||
ready = {item for item, dep in data.items() if not dep}
|
||||
if not ready:
|
||||
break
|
||||
yield ready
|
||||
data = {item: (dep - ready) for item, dep in data.items() if item not in ready}
|
||||
assert not data, "A cyclic dependency exists amongst %r" % data
|
||||
|
||||
|
||||
def find_cycles_in_scc(
|
||||
graph: Dict[str, AbstractSet[str]], scc: AbstractSet[str], start: str
|
||||
) -> Iterable[List[str]]:
|
||||
"""Find cycles in SCC emanating from start.
|
||||
|
||||
Yields lists of the form ['A', 'B', 'C', 'A'], which means there's
|
||||
a path from A -> B -> C -> A. The first item is always the start
|
||||
argument, but the last item may be another element, e.g. ['A',
|
||||
'B', 'C', 'B'] means there's a path from A to B and there's a
|
||||
cycle from B to C and back.
|
||||
"""
|
||||
# Basic input checks.
|
||||
assert start in scc, (start, scc)
|
||||
assert scc <= graph.keys(), scc - graph.keys()
|
||||
|
||||
# Reduce the graph to nodes in the SCC.
|
||||
graph = {src: {dst for dst in dsts if dst in scc} for src, dsts in graph.items() if src in scc}
|
||||
assert start in graph
|
||||
|
||||
# Recursive helper that yields cycles.
|
||||
def dfs(node: str, path: List[str]) -> Iterator[List[str]]:
|
||||
if node in path:
|
||||
yield path + [node]
|
||||
return
|
||||
path = path + [node] # TODO: Make this not quadratic.
|
||||
for child in graph[node]:
|
||||
yield from dfs(child, path)
|
||||
|
||||
yield from dfs(start, [])
|
|
@ -0,0 +1,126 @@
|
|||
import importlib.util
|
||||
import io
|
||||
import os
|
||||
import pathlib
|
||||
import sys
|
||||
import textwrap
|
||||
import tokenize
|
||||
|
||||
from typing import Any, cast, Dict, IO, Type, Final
|
||||
|
||||
from pegen.build import compile_c_extension
|
||||
from pegen.c_generator import CParserGenerator
|
||||
from pegen.grammar import Grammar
|
||||
from pegen.grammar_parser import GeneratedParser as GrammarParser
|
||||
from pegen.parser import Parser
|
||||
from pegen.python_generator import PythonParserGenerator
|
||||
from pegen.tokenizer import Tokenizer
|
||||
|
||||
|
||||
def generate_parser(grammar: Grammar) -> Type[Parser]:
|
||||
# Generate a parser.
|
||||
out = io.StringIO()
|
||||
genr = PythonParserGenerator(grammar, out)
|
||||
genr.generate("<string>")
|
||||
|
||||
# Load the generated parser class.
|
||||
ns: Dict[str, Any] = {}
|
||||
exec(out.getvalue(), ns)
|
||||
return ns["GeneratedParser"]
|
||||
|
||||
|
||||
def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = False) -> Any:
|
||||
# Run a parser on a file (stream).
|
||||
tokenizer = Tokenizer(tokenize.generate_tokens(file.readline)) # type: ignore # typeshed issue #3515
|
||||
parser = parser_class(tokenizer, verbose=verbose)
|
||||
result = parser.start()
|
||||
if result is None:
|
||||
raise parser.make_syntax_error()
|
||||
return result
|
||||
|
||||
|
||||
def parse_string(
|
||||
source: str, parser_class: Type[Parser], *, dedent: bool = True, verbose: bool = False
|
||||
) -> Any:
|
||||
# Run the parser on a string.
|
||||
if dedent:
|
||||
source = textwrap.dedent(source)
|
||||
file = io.StringIO(source)
|
||||
return run_parser(file, parser_class, verbose=verbose) # type: ignore # typeshed issue #3515
|
||||
|
||||
|
||||
def make_parser(source: str) -> Type[Parser]:
|
||||
# Combine parse_string() and generate_parser().
|
||||
grammar = parse_string(source, GrammarParser)
|
||||
return generate_parser(grammar)
|
||||
|
||||
|
||||
def import_file(full_name: str, path: str) -> Any:
|
||||
"""Import a python module from a path"""
|
||||
|
||||
spec = importlib.util.spec_from_file_location(full_name, path)
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
|
||||
# We assume this is not None and has an exec_module() method.
|
||||
# See https://docs.python.org/3/reference/import.html?highlight=exec_module#loading
|
||||
loader = cast(Any, spec.loader)
|
||||
loader.exec_module(mod)
|
||||
return mod
|
||||
|
||||
|
||||
def generate_c_parser_source(grammar: Grammar) -> str:
|
||||
out = io.StringIO()
|
||||
genr = CParserGenerator(grammar, out)
|
||||
genr.generate("<string>")
|
||||
return out.getvalue()
|
||||
|
||||
|
||||
def generate_parser_c_extension(
|
||||
grammar: Grammar, path: pathlib.PurePath, debug: bool = False
|
||||
) -> Any:
|
||||
"""Generate a parser c extension for the given grammar in the given path
|
||||
|
||||
Returns a module object with a parse_string() method.
|
||||
TODO: express that using a Protocol.
|
||||
"""
|
||||
# Make sure that the working directory is empty: reusing non-empty temporary
|
||||
# directories when generating extensions can lead to segmentation faults.
|
||||
# Check issue #95 (https://github.com/gvanrossum/pegen/issues/95) for more
|
||||
# context.
|
||||
assert not os.listdir(path)
|
||||
source = path / "parse.c"
|
||||
with open(source, "w") as file:
|
||||
genr = CParserGenerator(grammar, file, debug=debug)
|
||||
genr.generate("parse.c")
|
||||
extension_path = compile_c_extension(str(source), build_dir=str(path / "build"))
|
||||
extension = import_file("parse", extension_path)
|
||||
return extension
|
||||
|
||||
|
||||
def print_memstats() -> bool:
|
||||
MiB: Final = 2 ** 20
|
||||
try:
|
||||
import psutil # type: ignore
|
||||
except ImportError:
|
||||
return False
|
||||
print("Memory stats:")
|
||||
process = psutil.Process()
|
||||
meminfo = process.memory_info()
|
||||
res = {}
|
||||
res["rss"] = meminfo.rss / MiB
|
||||
res["vms"] = meminfo.vms / MiB
|
||||
if sys.platform == "win32":
|
||||
res["maxrss"] = meminfo.peak_wset / MiB
|
||||
else:
|
||||
# See https://stackoverflow.com/questions/938733/total-memory-used-by-python-process
|
||||
import resource # Since it doesn't exist on Windows.
|
||||
|
||||
rusage = resource.getrusage(resource.RUSAGE_SELF)
|
||||
if sys.platform == "darwin":
|
||||
factor = 1
|
||||
else:
|
||||
factor = 1024 # Linux
|
||||
res["maxrss"] = rusage.ru_maxrss * factor / MiB
|
||||
for key, value in res.items():
|
||||
print(f" {key:12.12s}: {value:10.0f} MiB")
|
||||
return True
|
|
@ -0,0 +1,86 @@
|
|||
import token
|
||||
import tokenize
|
||||
from typing import List, Iterator
|
||||
|
||||
Mark = int # NewType('Mark', int)
|
||||
|
||||
exact_token_types = token.EXACT_TOKEN_TYPES # type: ignore
|
||||
|
||||
|
||||
def shorttok(tok: tokenize.TokenInfo) -> str:
|
||||
return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"
|
||||
|
||||
|
||||
class Tokenizer:
|
||||
"""Caching wrapper for the tokenize module.
|
||||
|
||||
This is pretty tied to Python's syntax.
|
||||
"""
|
||||
|
||||
_tokens: List[tokenize.TokenInfo]
|
||||
|
||||
def __init__(self, tokengen: Iterator[tokenize.TokenInfo], *, verbose: bool = False):
|
||||
self._tokengen = tokengen
|
||||
self._tokens = []
|
||||
self._index = 0
|
||||
self._verbose = verbose
|
||||
if verbose:
|
||||
self.report(False, False)
|
||||
|
||||
def getnext(self) -> tokenize.TokenInfo:
|
||||
"""Return the next token and updates the index."""
|
||||
cached = True
|
||||
while self._index == len(self._tokens):
|
||||
tok = next(self._tokengen)
|
||||
if tok.type in (tokenize.NL, tokenize.COMMENT):
|
||||
continue
|
||||
if tok.type == token.ERRORTOKEN and tok.string.isspace():
|
||||
continue
|
||||
self._tokens.append(tok)
|
||||
cached = False
|
||||
tok = self._tokens[self._index]
|
||||
self._index += 1
|
||||
if self._verbose:
|
||||
self.report(cached, False)
|
||||
return tok
|
||||
|
||||
def peek(self) -> tokenize.TokenInfo:
|
||||
"""Return the next token *without* updating the index."""
|
||||
while self._index == len(self._tokens):
|
||||
tok = next(self._tokengen)
|
||||
if tok.type in (tokenize.NL, tokenize.COMMENT):
|
||||
continue
|
||||
if tok.type == token.ERRORTOKEN and tok.string.isspace():
|
||||
continue
|
||||
self._tokens.append(tok)
|
||||
return self._tokens[self._index]
|
||||
|
||||
def diagnose(self) -> tokenize.TokenInfo:
|
||||
if not self._tokens:
|
||||
self.getnext()
|
||||
return self._tokens[-1]
|
||||
|
||||
def mark(self) -> Mark:
|
||||
return self._index
|
||||
|
||||
def reset(self, index: Mark) -> None:
|
||||
if index == self._index:
|
||||
return
|
||||
assert 0 <= index <= len(self._tokens), (index, len(self._tokens))
|
||||
old_index = self._index
|
||||
self._index = index
|
||||
if self._verbose:
|
||||
self.report(True, index < old_index)
|
||||
|
||||
def report(self, cached: bool, back: bool) -> None:
|
||||
if back:
|
||||
fill = "-" * self._index + "-"
|
||||
elif cached:
|
||||
fill = "-" * self._index + ">"
|
||||
else:
|
||||
fill = "-" * self._index + "*"
|
||||
if self._index == 0:
|
||||
print(f"{fill} (Bof)")
|
||||
else:
|
||||
tok = self._tokens[self._index - 1]
|
||||
print(f"{fill} {shorttok(tok)}")
|
|
@ -0,0 +1,9 @@
|
|||
[tool.black]
|
||||
line-length = 99
|
||||
target_version = ['py38']
|
||||
exclude = '''
|
||||
(
|
||||
/pegen/grammar_parser.py # generated file
|
||||
| /test/test_data/ # test files
|
||||
)
|
||||
'''
|
|
@ -0,0 +1,2 @@
|
|||
memory-profiler==0.57.0
|
||||
psutil==5.7.0
|
|
@ -0,0 +1 @@
|
|||
# This exists to let mypy find modules here
|
|
@ -0,0 +1,28 @@
|
|||
import ast
|
||||
import sys
|
||||
import time
|
||||
import token
|
||||
import tokenize
|
||||
|
||||
from pegen.testutil import print_memstats
|
||||
|
||||
|
||||
def main() -> None:
|
||||
t0 = time.time()
|
||||
for filename in sys.argv[1:]:
|
||||
print(filename, end="\r")
|
||||
try:
|
||||
with open(filename) as file:
|
||||
source = file.read()
|
||||
tree = ast.parse(source, filename)
|
||||
except Exception as err:
|
||||
print(f"{filename}: {err.__class__.__name__}: {err}", file=sys.stderr)
|
||||
tok = None
|
||||
t1 = time.time()
|
||||
dt = t1 - t0
|
||||
print(f"Parsed in {dt:.3f} secs", file=sys.stderr)
|
||||
print_memstats()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,140 @@
|
|||
#!/usr/bin/env python3.9
|
||||
|
||||
import argparse
|
||||
import ast
|
||||
import sys
|
||||
import os
|
||||
import resource
|
||||
from time import time
|
||||
|
||||
import memory_profiler
|
||||
|
||||
sys.path.insert(0, os.getcwd())
|
||||
from peg_extension import parse
|
||||
from pegen.build import build_parser_and_generator
|
||||
from scripts.test_parse_directory import parse_directory
|
||||
|
||||
argparser = argparse.ArgumentParser(
|
||||
prog="benchmark", description="Reproduce the various pegen benchmarks"
|
||||
)
|
||||
argparser.add_argument(
|
||||
"--parser",
|
||||
action="store",
|
||||
choices=["pegen", "cpython"],
|
||||
default="pegen",
|
||||
help="Which parser to benchmark (default is pegen)",
|
||||
)
|
||||
argparser.add_argument(
|
||||
"--target",
|
||||
action="store",
|
||||
choices=["xxl", "stdlib"],
|
||||
default="xxl",
|
||||
help="Which target to use for the benchmark (default is xxl.py)",
|
||||
)
|
||||
|
||||
subcommands = argparser.add_subparsers(title="Benchmarks", dest="subcommand")
|
||||
command_compile = subcommands.add_parser(
|
||||
"compile", help="Benchmark parsing and compiling to bytecode"
|
||||
)
|
||||
command_parse = subcommands.add_parser("parse", help="Benchmark parsing and generating an ast.AST")
|
||||
command_check = subcommands.add_parser(
|
||||
"check", help="Benchmark parsing and throwing the tree away"
|
||||
)
|
||||
|
||||
|
||||
def benchmark(func):
|
||||
def wrapper(*args):
|
||||
times = list()
|
||||
for _ in range(3):
|
||||
start = time()
|
||||
result = func(*args)
|
||||
end = time()
|
||||
times.append(end - start)
|
||||
memory = memory_profiler.memory_usage((func, args))
|
||||
print(f"{func.__name__}")
|
||||
print(f"\tTime: {sum(times)/3:.3f} seconds on an average of 3 runs")
|
||||
print(f"\tMemory: {max(memory)} MiB on an average of 3 runs")
|
||||
return result
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
@benchmark
|
||||
def time_compile(source, parser):
|
||||
if parser == "cpython":
|
||||
return compile(source, os.path.join("data", "xxl.py"), "exec")
|
||||
else:
|
||||
return parse.parse_string(source, mode=2)
|
||||
|
||||
|
||||
@benchmark
|
||||
def time_parse(source, parser):
|
||||
if parser == "cpython":
|
||||
return ast.parse(source, os.path.join("data", "xxl.py"), "exec")
|
||||
else:
|
||||
return parse.parse_string(source, mode=1)
|
||||
|
||||
|
||||
@benchmark
|
||||
def time_check(source):
|
||||
return parse.parse_string(source, mode=0)
|
||||
|
||||
|
||||
def run_benchmark_xxl(subcommand, parser, source):
|
||||
if subcommand == "compile":
|
||||
time_compile(source, parser)
|
||||
elif subcommand == "parse":
|
||||
time_parse(source, parser)
|
||||
elif subcommand == "check":
|
||||
time_check(source)
|
||||
|
||||
|
||||
def run_benchmark_stdlib(subcommand, parser):
|
||||
modes = {"compile": 2, "parse": 1, "check": 0}
|
||||
extension = None
|
||||
if parser == "pegen":
|
||||
extension = build_parser_and_generator(
|
||||
"../../Grammar/python.gram",
|
||||
"peg_extension/parse.c",
|
||||
compile_extension=True,
|
||||
skip_actions=False,
|
||||
)
|
||||
for _ in range(3):
|
||||
parse_directory(
|
||||
"../../Lib",
|
||||
"../../Grammar/python.gram",
|
||||
verbose=False,
|
||||
excluded_files=[
|
||||
"*/bad*",
|
||||
"*/lib2to3/tests/data/*",
|
||||
],
|
||||
skip_actions=False,
|
||||
tree_arg=0,
|
||||
short=True,
|
||||
extension=extension,
|
||||
mode=modes[subcommand],
|
||||
parser=parser,
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
args = argparser.parse_args()
|
||||
subcommand = args.subcommand
|
||||
parser = args.parser
|
||||
target = args.target
|
||||
|
||||
if subcommand is None:
|
||||
argparser.error("A benchmark to run is required")
|
||||
if subcommand == "check" and parser == "cpython":
|
||||
argparser.error("Cannot use check target with the CPython parser")
|
||||
|
||||
if target == "xxl":
|
||||
with open(os.path.join("data", "xxl.py"), "r") as f:
|
||||
source = f.read()
|
||||
run_benchmark_xxl(subcommand, parser, source)
|
||||
elif target == "stdlib":
|
||||
run_benchmark_stdlib(subcommand, parser)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,86 @@
|
|||
#!/usr/bin/env python3.8
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import json
|
||||
|
||||
from typing import Dict, Any
|
||||
from urllib.request import urlretrieve
|
||||
|
||||
argparser = argparse.ArgumentParser(
|
||||
prog="download_pypi_packages", description="Helper program to download PyPI packages",
|
||||
)
|
||||
argparser.add_argument(
|
||||
"-n", "--number", type=int, default=100, help="Number of packages to download"
|
||||
)
|
||||
argparser.add_argument(
|
||||
"-a", "--all", action="store_true", help="Download all packages listed in the json file"
|
||||
)
|
||||
|
||||
|
||||
def load_json(filename: str) -> Dict[Any, Any]:
|
||||
with open(os.path.join("data", f"{filename}.json"), "r") as f:
|
||||
j = json.loads(f.read())
|
||||
return j
|
||||
|
||||
|
||||
def remove_json(filename: str) -> None:
|
||||
path = os.path.join("data", f"{filename}.json")
|
||||
os.remove(path)
|
||||
|
||||
|
||||
def download_package_json(package_name: str) -> None:
|
||||
url = f"https://pypi.org/pypi/{package_name}/json"
|
||||
urlretrieve(url, os.path.join("data", f"{package_name}.json"))
|
||||
|
||||
|
||||
def download_package_code(name: str, package_json: Dict[Any, Any]) -> None:
|
||||
source_index = -1
|
||||
for idx, url_info in enumerate(package_json["urls"]):
|
||||
if url_info["python_version"] == "source":
|
||||
source_index = idx
|
||||
break
|
||||
filename = package_json["urls"][source_index]["filename"]
|
||||
url = package_json["urls"][source_index]["url"]
|
||||
urlretrieve(url, os.path.join("data", "pypi", filename))
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = argparser.parse_args()
|
||||
number_packages = args.number
|
||||
all_packages = args.all
|
||||
|
||||
top_pypi_packages = load_json("top-pypi-packages-365-days")
|
||||
if all_packages:
|
||||
top_pypi_packages = top_pypi_packages["rows"]
|
||||
elif number_packages >= 0 and number_packages <= 4000:
|
||||
top_pypi_packages = top_pypi_packages["rows"][:number_packages]
|
||||
else:
|
||||
raise AssertionError("Unknown value for NUMBER_OF_PACKAGES")
|
||||
|
||||
try:
|
||||
os.mkdir(os.path.join("data", "pypi"))
|
||||
except FileExistsError:
|
||||
pass
|
||||
|
||||
for package in top_pypi_packages:
|
||||
package_name = package["project"]
|
||||
|
||||
print(f"Downloading JSON Data for {package_name}... ", end="")
|
||||
download_package_json(package_name)
|
||||
print("Done")
|
||||
|
||||
package_json = load_json(package_name)
|
||||
try:
|
||||
print(f"Dowloading and compressing package {package_name} ... ", end="")
|
||||
download_package_code(package_name, package_json)
|
||||
print("Done")
|
||||
except (IndexError, KeyError):
|
||||
print(f"Could not locate source for {package_name}")
|
||||
continue
|
||||
finally:
|
||||
remove_json(package_name)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,61 @@
|
|||
#!/usr/bin/env python3.8
|
||||
"""Find the maximum amount of nesting for an expression that can be parsed
|
||||
without causing a parse error.
|
||||
|
||||
Starting at the INITIAL_NESTING_DEPTH, an expression containing n parenthesis
|
||||
around a 0 is generated then tested with both the C and Python parsers. We
|
||||
continue incrementing the number of parenthesis by 10 until both parsers have
|
||||
failed. As soon as a single parser fails, we stop testing that parser.
|
||||
|
||||
The grammar file, initial nesting size, and amount by which the nested size is
|
||||
incremented on each success can be controlled by changing the GRAMMAR_FILE,
|
||||
INITIAL_NESTING_DEPTH, or NESTED_INCR_AMT variables.
|
||||
|
||||
Usage: python -m scripts.find_max_nesting
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
from tempfile import TemporaryDirectory
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from _peg_parser import parse_string
|
||||
|
||||
GRAMMAR_FILE = "data/python.gram"
|
||||
INITIAL_NESTING_DEPTH = 10
|
||||
NESTED_INCR_AMT = 10
|
||||
|
||||
|
||||
FAIL = "\033[91m"
|
||||
ENDC = "\033[0m"
|
||||
|
||||
|
||||
def check_nested_expr(nesting_depth: int) -> bool:
|
||||
expr = f"{'(' * nesting_depth}0{')' * nesting_depth}"
|
||||
|
||||
try:
|
||||
parse_string(expr)
|
||||
print(f"Nesting depth of {nesting_depth} is successful")
|
||||
return True
|
||||
except Exception as err:
|
||||
print(f"{FAIL}(Failed with nesting depth of {nesting_depth}{ENDC}")
|
||||
print(f"{FAIL}\t{err}{ENDC}")
|
||||
return False
|
||||
|
||||
|
||||
def main() -> None:
|
||||
print(f"Testing {GRAMMAR_FILE} starting at nesting depth of {INITIAL_NESTING_DEPTH}...")
|
||||
|
||||
nesting_depth = INITIAL_NESTING_DEPTH
|
||||
succeeded = True
|
||||
while succeeded:
|
||||
expr = f"{'(' * nesting_depth}0{')' * nesting_depth}"
|
||||
if succeeded:
|
||||
succeeded = check_nested_expr(nesting_depth)
|
||||
nesting_depth += NESTED_INCR_AMT
|
||||
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,111 @@
|
|||
#!/usr/bin/env python3.8
|
||||
|
||||
""" Convert a grammar into a dot-file suitable for use with GraphViz
|
||||
|
||||
For example:
|
||||
Generate the GraphViz file:
|
||||
# scripts/grammar_grapher.py data/python.gram > python.gv
|
||||
|
||||
Then generate the graph...
|
||||
|
||||
# twopi python.gv -Tpng > python_twopi.png
|
||||
|
||||
or
|
||||
|
||||
# dot python.gv -Tpng > python_dot.png
|
||||
|
||||
NOTE: The _dot_ and _twopi_ tools seem to produce the most useful results.
|
||||
The _circo_ tool is the worst of the bunch. Don't even bother.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from typing import Any, List
|
||||
|
||||
sys.path.insert(0, ".")
|
||||
|
||||
from pegen.build import build_parser
|
||||
from pegen.grammar import (
|
||||
Alt,
|
||||
Cut,
|
||||
Grammar,
|
||||
Group,
|
||||
Leaf,
|
||||
Lookahead,
|
||||
Rule,
|
||||
NameLeaf,
|
||||
NamedItem,
|
||||
Opt,
|
||||
Repeat,
|
||||
Rhs,
|
||||
)
|
||||
|
||||
argparser = argparse.ArgumentParser(prog="graph_grammar", description="Graph a grammar tree",)
|
||||
argparser.add_argument("grammar_file", help="The grammar file to graph")
|
||||
|
||||
|
||||
def references_for_item(item: Any) -> List[Any]:
|
||||
if isinstance(item, Alt):
|
||||
return [_ref for _item in item.items for _ref in references_for_item(_item)]
|
||||
elif isinstance(item, Cut):
|
||||
return []
|
||||
elif isinstance(item, Group):
|
||||
return references_for_item(item.rhs)
|
||||
elif isinstance(item, Lookahead):
|
||||
return references_for_item(item.node)
|
||||
elif isinstance(item, NamedItem):
|
||||
return references_for_item(item.item)
|
||||
|
||||
# NOTE NameLeaf must be before Leaf
|
||||
elif isinstance(item, NameLeaf):
|
||||
if item.value == "ENDMARKER":
|
||||
return []
|
||||
return [item.value]
|
||||
elif isinstance(item, Leaf):
|
||||
return []
|
||||
|
||||
elif isinstance(item, Opt):
|
||||
return references_for_item(item.node)
|
||||
elif isinstance(item, Repeat):
|
||||
return references_for_item(item.node)
|
||||
elif isinstance(item, Rhs):
|
||||
return [_ref for alt in item.alts for _ref in references_for_item(alt)]
|
||||
elif isinstance(item, Rule):
|
||||
return references_for_item(item.rhs)
|
||||
else:
|
||||
raise RuntimeError(f"Unknown item: {type(item)}")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = argparser.parse_args()
|
||||
|
||||
try:
|
||||
grammar, parser, tokenizer = build_parser(args.grammar_file)
|
||||
except Exception as err:
|
||||
print("ERROR: Failed to parse grammar file", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
references = {}
|
||||
for name, rule in grammar.rules.items():
|
||||
references[name] = set(references_for_item(rule))
|
||||
|
||||
# Flatten the start node if has only a single reference
|
||||
root_node = "start"
|
||||
if start := references["start"]:
|
||||
if len(start) == 1:
|
||||
root_node = list(start)[0]
|
||||
del references["start"]
|
||||
|
||||
print("digraph g1 {")
|
||||
print('\toverlap="scale";') # Force twopi to scale the graph to avoid overlaps
|
||||
print(f'\troot="{root_node}";')
|
||||
print(f"\t{root_node} [color=green, shape=circle]")
|
||||
for name, refs in references.items():
|
||||
if refs: # Ignore empty sets
|
||||
print(f"\t{name} -> {','.join(refs)};")
|
||||
print("}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,66 @@
|
|||
#!/usr/bin/env python3.8
|
||||
|
||||
"""Produce a report about the most-memoable types.
|
||||
|
||||
Reads a list of statistics from stdin. Each line must be two numbers,
|
||||
being a type and a count. We then read some other files and produce a
|
||||
list sorted by most frequent type.
|
||||
|
||||
There should also be something to recognize left-recursive rules.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
from typing import Dict
|
||||
|
||||
reporoot = os.path.dirname(os.path.dirname(__file__))
|
||||
parse_c = os.path.join(reporoot, "peg_extension", "parse.c")
|
||||
|
||||
|
||||
class TypeMapper:
|
||||
"""State used to map types to names."""
|
||||
|
||||
def __init__(self, filename: str) -> None:
|
||||
self.table: Dict[int, str] = {}
|
||||
with open(filename) as f:
|
||||
for line in f:
|
||||
match = re.match(r"#define (\w+)_type (\d+)", line)
|
||||
if match:
|
||||
name, type = match.groups()
|
||||
if "left" in line.lower():
|
||||
name += " // Left-recursive"
|
||||
self.table[int(type)] = name
|
||||
|
||||
def lookup(self, type: int) -> str:
|
||||
return self.table.get(type, str(type))
|
||||
|
||||
|
||||
def main() -> None:
|
||||
mapper = TypeMapper(parse_c)
|
||||
table = []
|
||||
filename = sys.argv[1]
|
||||
with open(filename) as f:
|
||||
for lineno, line in enumerate(f, 1):
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
parts = line.split()
|
||||
# Extra fields ignored
|
||||
if len(parts) < 2:
|
||||
print(f"{lineno}: bad input ({line!r})")
|
||||
continue
|
||||
try:
|
||||
type, count = map(int, parts[:2])
|
||||
except ValueError as err:
|
||||
print(f"{lineno}: non-integer input ({line!r})")
|
||||
continue
|
||||
table.append((type, count))
|
||||
table.sort(key=lambda values: -values[1])
|
||||
for type, count in table:
|
||||
print(f"{type:4d} {count:9d} {mapper.lookup(type)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,117 @@
|
|||
#!/usr/bin/env python3.8
|
||||
|
||||
"""Show the parse tree for a given program, nicely formatted.
|
||||
|
||||
Example:
|
||||
|
||||
$ scripts/show_parse.py a+b
|
||||
Module(
|
||||
body=[
|
||||
Expr(
|
||||
value=BinOp(
|
||||
left=Name(id="a", ctx=Load()), op=Add(), right=Name(id="b", ctx=Load())
|
||||
)
|
||||
)
|
||||
],
|
||||
type_ignores=[],
|
||||
)
|
||||
$
|
||||
|
||||
Use -v to show line numbers and column offsets.
|
||||
|
||||
The formatting is done using black. You can also import this module
|
||||
and call one of its functions.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import ast
|
||||
import difflib
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
from typing import List
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"-d", "--diff", action="store_true", help="show diff between grammar and ast (requires -g)"
|
||||
)
|
||||
parser.add_argument("-g", "--grammar-file", help="grammar to use (default: use the ast module)")
|
||||
parser.add_argument(
|
||||
"-m",
|
||||
"--multiline",
|
||||
action="store_true",
|
||||
help="concatenate program arguments using newline instead of space",
|
||||
)
|
||||
parser.add_argument("-v", "--verbose", action="store_true", help="show line/column numbers")
|
||||
parser.add_argument("program", nargs="+", help="program to parse (will be concatenated)")
|
||||
|
||||
|
||||
def format_tree(tree: ast.AST, verbose: bool = False) -> str:
|
||||
with tempfile.NamedTemporaryFile("w+") as tf:
|
||||
tf.write(ast.dump(tree, include_attributes=verbose))
|
||||
tf.write("\n")
|
||||
tf.flush()
|
||||
cmd = f"black -q {tf.name}"
|
||||
sts = os.system(cmd)
|
||||
if sts:
|
||||
raise RuntimeError(f"Command {cmd!r} failed with status 0x{sts:x}")
|
||||
tf.seek(0)
|
||||
return tf.read()
|
||||
|
||||
|
||||
def diff_trees(a: ast.AST, b: ast.AST, verbose: bool = False) -> List[str]:
|
||||
sa = format_tree(a, verbose)
|
||||
sb = format_tree(b, verbose)
|
||||
la = sa.splitlines()
|
||||
lb = sb.splitlines()
|
||||
return list(difflib.unified_diff(la, lb, "a", "b", lineterm=""))
|
||||
|
||||
|
||||
def show_parse(source: str, verbose: bool = False) -> str:
|
||||
tree = ast.parse(source)
|
||||
return format_tree(tree, verbose).rstrip("\n")
|
||||
|
||||
|
||||
def print_parse(source: str, verbose: bool = False) -> None:
|
||||
print(show_parse(source, verbose))
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = parser.parse_args()
|
||||
if args.diff and not args.grammar_file:
|
||||
parser.error("-d/--diff requires -g/--grammar-file")
|
||||
if args.multiline:
|
||||
sep = "\n"
|
||||
else:
|
||||
sep = " "
|
||||
program = sep.join(args.program)
|
||||
if args.grammar_file:
|
||||
sys.path.insert(0, os.curdir)
|
||||
from pegen.build import build_parser_and_generator
|
||||
|
||||
build_parser_and_generator(args.grammar_file, "peg_parser/parse.c", compile_extension=True)
|
||||
from pegen.parse import parse_string # type: ignore[import]
|
||||
|
||||
tree = parse_string(program, mode=1)
|
||||
|
||||
if args.diff:
|
||||
a = tree
|
||||
b = ast.parse(program)
|
||||
diff = diff_trees(a, b, args.verbose)
|
||||
if diff:
|
||||
for line in diff:
|
||||
print(line)
|
||||
else:
|
||||
print("# Trees are the same")
|
||||
else:
|
||||
print(f"# Parsed using {args.grammar_file}")
|
||||
print(format_tree(tree, args.verbose))
|
||||
else:
|
||||
tree = ast.parse(program)
|
||||
print("# Parse using ast.parse()")
|
||||
print(format_tree(tree, args.verbose))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,289 @@
|
|||
#!/usr/bin/env python3.8
|
||||
|
||||
import argparse
|
||||
import ast
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import traceback
|
||||
from glob import glob
|
||||
from pathlib import PurePath
|
||||
|
||||
from typing import List, Optional, Any
|
||||
|
||||
sys.path.insert(0, os.getcwd())
|
||||
from pegen.build import build_parser_and_generator
|
||||
from pegen.testutil import print_memstats
|
||||
from scripts import show_parse
|
||||
|
||||
SUCCESS = "\033[92m"
|
||||
FAIL = "\033[91m"
|
||||
ENDC = "\033[0m"
|
||||
|
||||
argparser = argparse.ArgumentParser(
|
||||
prog="test_parse_directory",
|
||||
description="Helper program to test directories or files for pegen",
|
||||
)
|
||||
argparser.add_argument("-d", "--directory", help="Directory path containing files to test")
|
||||
argparser.add_argument("-g", "--grammar-file", help="Grammar file path")
|
||||
argparser.add_argument(
|
||||
"-e", "--exclude", action="append", default=[], help="Glob(s) for matching files to exclude"
|
||||
)
|
||||
argparser.add_argument(
|
||||
"-s", "--short", action="store_true", help="Only show errors, in a more Emacs-friendly format"
|
||||
)
|
||||
argparser.add_argument(
|
||||
"-v", "--verbose", action="store_true", help="Display detailed errors for failures"
|
||||
)
|
||||
argparser.add_argument(
|
||||
"--skip-actions", action="store_true", help="Suppress code emission for rule actions",
|
||||
)
|
||||
argparser.add_argument(
|
||||
"-t", "--tree", action="count", help="Compare parse tree to official AST", default=0
|
||||
)
|
||||
|
||||
|
||||
def report_status(
|
||||
succeeded: bool,
|
||||
file: str,
|
||||
verbose: bool,
|
||||
error: Optional[Exception] = None,
|
||||
short: bool = False,
|
||||
) -> None:
|
||||
if short and succeeded:
|
||||
return
|
||||
|
||||
if succeeded is True:
|
||||
status = "OK"
|
||||
COLOR = SUCCESS
|
||||
else:
|
||||
status = "Fail"
|
||||
COLOR = FAIL
|
||||
|
||||
if short:
|
||||
lineno = 0
|
||||
offset = 0
|
||||
if isinstance(error, SyntaxError):
|
||||
lineno = error.lineno or 1
|
||||
offset = error.offset or 1
|
||||
message = error.args[0]
|
||||
else:
|
||||
message = f"{error.__class__.__name__}: {error}"
|
||||
print(f"{file}:{lineno}:{offset}: {message}")
|
||||
else:
|
||||
print(f"{COLOR}{file:60} {status}{ENDC}")
|
||||
|
||||
if error and verbose:
|
||||
print(f" {str(error.__class__.__name__)}: {error}")
|
||||
|
||||
|
||||
def compare_trees(
|
||||
actual_tree: ast.AST, file: str, verbose: bool, include_attributes: bool = False,
|
||||
) -> int:
|
||||
with open(file) as f:
|
||||
expected_tree = ast.parse(f.read())
|
||||
|
||||
expected_text = ast.dump(expected_tree, include_attributes=include_attributes)
|
||||
actual_text = ast.dump(actual_tree, include_attributes=include_attributes)
|
||||
if actual_text == expected_text:
|
||||
if verbose:
|
||||
print("Tree for {file}:")
|
||||
print(show_parse.format_tree(actual_tree, include_attributes))
|
||||
return 0
|
||||
|
||||
print(f"Diffing ASTs for {file} ...")
|
||||
|
||||
expected = show_parse.format_tree(expected_tree, include_attributes)
|
||||
actual = show_parse.format_tree(actual_tree, include_attributes)
|
||||
|
||||
if verbose:
|
||||
print("Expected for {file}:")
|
||||
print(expected)
|
||||
print("Actual for {file}:")
|
||||
print(actual)
|
||||
print(f"Diff for {file}:")
|
||||
|
||||
diff = show_parse.diff_trees(expected_tree, actual_tree, include_attributes)
|
||||
for line in diff:
|
||||
print(line)
|
||||
|
||||
return 1
|
||||
|
||||
|
||||
def parse_directory(
|
||||
directory: str,
|
||||
grammar_file: str,
|
||||
verbose: bool,
|
||||
excluded_files: List[str],
|
||||
skip_actions: bool,
|
||||
tree_arg: int,
|
||||
short: bool,
|
||||
extension: Any,
|
||||
mode: int,
|
||||
parser: str,
|
||||
) -> int:
|
||||
if parser == "cpython" and (tree_arg or mode == 0):
|
||||
print("Cannot specify tree argument or mode=0 with the cpython parser.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if not directory:
|
||||
print("You must specify a directory of files to test.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if grammar_file:
|
||||
if not os.path.exists(grammar_file):
|
||||
print(f"The specified grammar file, {grammar_file}, does not exist.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
if not extension and parser == "pegen":
|
||||
build_parser_and_generator(
|
||||
grammar_file,
|
||||
"peg_extension/parse.c",
|
||||
compile_extension=True,
|
||||
skip_actions=skip_actions,
|
||||
)
|
||||
except Exception as err:
|
||||
print(
|
||||
f"{FAIL}The following error occurred when generating the parser. Please check your grammar file.\n{ENDC}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
traceback.print_exception(err.__class__, err, None)
|
||||
|
||||
return 1
|
||||
|
||||
else:
|
||||
print("A grammar file was not provided - attempting to use existing file...\n")
|
||||
|
||||
if parser == "pegen":
|
||||
try:
|
||||
from peg_extension import parse # type: ignore
|
||||
except:
|
||||
print(
|
||||
"An existing parser was not found. Please run `make` or specify a grammar file with the `-g` flag.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
|
||||
# For a given directory, traverse files and attempt to parse each one
|
||||
# - Output success/failure for each file
|
||||
errors = 0
|
||||
files = []
|
||||
trees = {} # Trees to compare (after everything else is done)
|
||||
|
||||
t0 = time.time()
|
||||
for file in sorted(glob(f"{directory}/**/*.py", recursive=True)):
|
||||
# Only attempt to parse Python files and files that are not excluded
|
||||
should_exclude_file = False
|
||||
for pattern in excluded_files:
|
||||
if PurePath(file).match(pattern):
|
||||
should_exclude_file = True
|
||||
break
|
||||
|
||||
if not should_exclude_file:
|
||||
try:
|
||||
if tree_arg:
|
||||
mode = 1
|
||||
if parser == "cpython":
|
||||
with open(file, "r") as f:
|
||||
source = f.read()
|
||||
if mode == 2:
|
||||
compile(source, file, "exec")
|
||||
elif mode == 1:
|
||||
ast.parse(source, file, "exec")
|
||||
else:
|
||||
tree = parse.parse_file(file, mode=mode)
|
||||
if tree_arg:
|
||||
trees[file] = tree
|
||||
if not short:
|
||||
report_status(succeeded=True, file=file, verbose=verbose)
|
||||
except Exception as error:
|
||||
try:
|
||||
ast.parse(file)
|
||||
except Exception:
|
||||
if not short:
|
||||
print(f"File {file} cannot be parsed by either pegen or the ast module.")
|
||||
else:
|
||||
report_status(
|
||||
succeeded=False, file=file, verbose=verbose, error=error, short=short
|
||||
)
|
||||
errors += 1
|
||||
files.append(file)
|
||||
t1 = time.time()
|
||||
|
||||
total_seconds = t1 - t0
|
||||
total_files = len(files)
|
||||
|
||||
total_bytes = 0
|
||||
total_lines = 0
|
||||
for file in files:
|
||||
# Count lines and bytes separately
|
||||
with open(file, "rb") as f:
|
||||
total_lines += sum(1 for _ in f)
|
||||
total_bytes += f.tell()
|
||||
|
||||
print(
|
||||
f"Checked {total_files:,} files, {total_lines:,} lines,",
|
||||
f"{total_bytes:,} bytes in {total_seconds:,.3f} seconds.",
|
||||
)
|
||||
if total_seconds > 0:
|
||||
print(
|
||||
f"That's {total_lines / total_seconds :,.0f} lines/sec,",
|
||||
f"or {total_bytes / total_seconds :,.0f} bytes/sec.",
|
||||
)
|
||||
|
||||
if parser == "pegen":
|
||||
# Dump memo stats to @data.
|
||||
with open("@data", "w") as datafile:
|
||||
for i, count in enumerate(parse.get_memo_stats()):
|
||||
if count:
|
||||
datafile.write(f"{i:4d} {count:9d}\n")
|
||||
|
||||
if short:
|
||||
print_memstats()
|
||||
|
||||
if errors:
|
||||
print(f"Encountered {errors} failures.", file=sys.stderr)
|
||||
|
||||
# Compare trees (the dict is empty unless -t is given)
|
||||
compare_trees_errors = 0
|
||||
for file, tree in trees.items():
|
||||
if not short:
|
||||
print("Comparing ASTs for", file)
|
||||
if compare_trees(tree, file, verbose, tree_arg >= 2) == 1:
|
||||
compare_trees_errors += 1
|
||||
|
||||
if errors or compare_trees_errors:
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = argparser.parse_args()
|
||||
directory = args.directory
|
||||
grammar_file = args.grammar_file
|
||||
verbose = args.verbose
|
||||
excluded_files = args.exclude
|
||||
skip_actions = args.skip_actions
|
||||
tree = args.tree
|
||||
short = args.short
|
||||
sys.exit(
|
||||
parse_directory(
|
||||
directory,
|
||||
grammar_file,
|
||||
verbose,
|
||||
excluded_files,
|
||||
skip_actions,
|
||||
tree,
|
||||
short,
|
||||
None,
|
||||
0,
|
||||
"pegen",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,101 @@
|
|||
#!/usr/bin/env python3.8
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import glob
|
||||
import tarfile
|
||||
import zipfile
|
||||
import shutil
|
||||
import sys
|
||||
|
||||
from typing import Generator, Any
|
||||
|
||||
sys.path.insert(0, ".")
|
||||
from pegen import build
|
||||
from scripts import test_parse_directory
|
||||
|
||||
argparser = argparse.ArgumentParser(
|
||||
prog="test_pypi_packages", description="Helper program to test parsing PyPI packages",
|
||||
)
|
||||
argparser.add_argument(
|
||||
"-t", "--tree", action="count", help="Compare parse tree to official AST", default=0
|
||||
)
|
||||
|
||||
|
||||
def get_packages() -> Generator[str, None, None]:
|
||||
all_packages = (
|
||||
glob.glob("./data/pypi/*.tar.gz")
|
||||
+ glob.glob("./data/pypi/*.zip")
|
||||
+ glob.glob("./data/pypi/*.tgz")
|
||||
)
|
||||
for package in all_packages:
|
||||
yield package
|
||||
|
||||
|
||||
def extract_files(filename: str) -> None:
|
||||
savedir = os.path.join("data", "pypi")
|
||||
if tarfile.is_tarfile(filename):
|
||||
tarfile.open(filename).extractall(savedir)
|
||||
elif zipfile.is_zipfile(filename):
|
||||
zipfile.ZipFile(filename).extractall(savedir)
|
||||
else:
|
||||
raise ValueError(f"Could not identify type of compressed file {filename}")
|
||||
|
||||
|
||||
def find_dirname(package_name: str) -> str:
|
||||
for name in os.listdir(os.path.join("data", "pypi")):
|
||||
full_path = os.path.join("data", "pypi", name)
|
||||
if os.path.isdir(full_path) and name in package_name:
|
||||
return full_path
|
||||
assert False # This is to fix mypy, should never be reached
|
||||
|
||||
|
||||
def run_tests(dirname: str, tree: int, extension: Any) -> int:
|
||||
return test_parse_directory.parse_directory(
|
||||
dirname,
|
||||
"data/python.gram",
|
||||
verbose=False,
|
||||
excluded_files=[
|
||||
"*/failset/*",
|
||||
"*/failset/**",
|
||||
"*/failset/**/*",
|
||||
"*/test2to3/*",
|
||||
"*/test2to3/**/*",
|
||||
"*/bad*",
|
||||
"*/lib2to3/tests/data/*",
|
||||
],
|
||||
skip_actions=False,
|
||||
tree_arg=tree,
|
||||
short=True,
|
||||
extension=extension,
|
||||
)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = argparser.parse_args()
|
||||
tree = args.tree
|
||||
|
||||
extension = build.build_parser_and_generator(
|
||||
"data/python.gram", "peg_parser/parse.c", compile_extension=True
|
||||
)
|
||||
for package in get_packages():
|
||||
print(f"Extracting files from {package}... ", end="")
|
||||
try:
|
||||
extract_files(package)
|
||||
print("Done")
|
||||
except ValueError as e:
|
||||
print(e)
|
||||
continue
|
||||
|
||||
print(f"Trying to parse all python files ... ")
|
||||
dirname = find_dirname(package)
|
||||
status = run_tests(dirname, tree, extension)
|
||||
if status == 0:
|
||||
print("Done")
|
||||
shutil.rmtree(dirname)
|
||||
else:
|
||||
print(f"Failed to parse {dirname}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -25,8 +25,10 @@ def main(regrtest_args):
|
|||
'-u', # Unbuffered stdout and stderr
|
||||
'-W', 'default', # Warnings set to 'default'
|
||||
'-bb', # Warnings about bytes/bytearray
|
||||
'-E', # Ignore environment variables
|
||||
]
|
||||
if 'PYTHONOLDPARSER' not in os.environ:
|
||||
args.append('-E') # Ignore environment variables
|
||||
|
||||
# Allow user-specified interpreter options to override our defaults.
|
||||
args.extend(test.support.args_from_interpreter_flags())
|
||||
|
||||
|
|
Loading…
Reference in New Issue