bpo-40939: Use the new grammar for the grammar specification documentation (GH-19969)
(We censor the heck out of actions and some other stuff using a custom "highlighter".) Co-authored-by: Guido van Rossum <guido@python.org>
This commit is contained in:
parent
67987acd5d
commit
72cabb2aa6
|
@ -15,7 +15,7 @@ sys.path.append(os.path.abspath('includes'))
|
||||||
|
|
||||||
extensions = ['sphinx.ext.coverage', 'sphinx.ext.doctest',
|
extensions = ['sphinx.ext.coverage', 'sphinx.ext.doctest',
|
||||||
'pyspecific', 'c_annotations', 'escape4chm',
|
'pyspecific', 'c_annotations', 'escape4chm',
|
||||||
'asdl_highlight']
|
'asdl_highlight', 'peg_highlight']
|
||||||
|
|
||||||
|
|
||||||
doctest_global_setup = '''
|
doctest_global_setup = '''
|
||||||
|
|
|
@ -1,7 +1,19 @@
|
||||||
Full Grammar specification
|
Full Grammar specification
|
||||||
==========================
|
==========================
|
||||||
|
|
||||||
This is the full Python grammar, as it is read by the parser generator and used
|
This is the full Python grammar, derived directly from the grammar
|
||||||
to parse Python source files:
|
used to generate the CPython parser (see :source:`Grammar/python.gram`).
|
||||||
|
The version here omits details related to code generation and
|
||||||
|
error recovery.
|
||||||
|
|
||||||
.. literalinclude:: ../../Grammar/Grammar
|
The notation is a mixture of `EBNF
|
||||||
|
<https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form>`_
|
||||||
|
and `PEG <https://en.wikipedia.org/wiki/Parsing_expression_grammar>`_.
|
||||||
|
In particular, ``&`` followed by a symbol, token or parenthesized
|
||||||
|
group indicates a positive lookahead (i.e., is required to match but
|
||||||
|
not consumed), while ``!`` indicates a negative lookahead (i.e., is
|
||||||
|
required _not_ to match). We use the ``|`` separator to mean PEG's
|
||||||
|
"ordered choice" (written as ``/`` in traditional PEG grammars).
|
||||||
|
|
||||||
|
.. literalinclude:: ../../Grammar/python.gram
|
||||||
|
:language: peg
|
||||||
|
|
|
@ -0,0 +1,75 @@
|
||||||
|
from pygments.lexer import RegexLexer, bygroups, include
|
||||||
|
from pygments.token import Comment, Generic, Keyword, Name, Operator, Punctuation, Text
|
||||||
|
|
||||||
|
from sphinx.highlighting import lexers
|
||||||
|
|
||||||
|
|
||||||
|
class PEGLexer(RegexLexer):
|
||||||
|
"""Pygments Lexer for PEG grammar (.gram) files
|
||||||
|
|
||||||
|
This lexer strips the following elements from the grammar:
|
||||||
|
|
||||||
|
- Meta-tags
|
||||||
|
- Variable assignments
|
||||||
|
- Actions
|
||||||
|
- Lookaheads
|
||||||
|
- Rule types
|
||||||
|
- Rule options
|
||||||
|
- Rules named `invalid_*` or `incorrect_*`
|
||||||
|
"""
|
||||||
|
|
||||||
|
name = "PEG"
|
||||||
|
aliases = ["peg"]
|
||||||
|
filenames = ["*.gram"]
|
||||||
|
_name = r"([^\W\d]\w*)"
|
||||||
|
_text_ws = r"(\s*)"
|
||||||
|
|
||||||
|
tokens = {
|
||||||
|
"ws": [(r"\n", Text), (r"\s+", Text), (r"#.*$", Comment.Singleline),],
|
||||||
|
"lookaheads": [
|
||||||
|
(r"(?<=\|\s)(&\w+\s?)", bygroups(None)),
|
||||||
|
(r"(?<=\|\s)(&'.+'\s?)", bygroups(None)),
|
||||||
|
(r'(?<=\|\s)(&".+"\s?)', bygroups(None)),
|
||||||
|
(r"(?<=\|\s)(&\(.+\)\s?)", bygroups(None)),
|
||||||
|
],
|
||||||
|
"metas": [
|
||||||
|
(r"(@\w+ '''(.|\n)+?''')", bygroups(None)),
|
||||||
|
(r"^(@.*)$", bygroups(None)),
|
||||||
|
],
|
||||||
|
"actions": [(r"{(.|\n)+?}", bygroups(None)),],
|
||||||
|
"strings": [
|
||||||
|
(r"'\w+?'", Keyword),
|
||||||
|
(r'"\w+?"', Keyword),
|
||||||
|
(r"'\W+?'", Text),
|
||||||
|
(r'"\W+?"', Text),
|
||||||
|
],
|
||||||
|
"variables": [(_name + _text_ws + "(=)", bygroups(None, None, None),),],
|
||||||
|
"invalids": [
|
||||||
|
(r"^(\s+\|\s+invalid_\w+\s*\n)", bygroups(None)),
|
||||||
|
(r"^(\s+\|\s+incorrect_\w+\s*\n)", bygroups(None)),
|
||||||
|
(r"^(#.*invalid syntax.*(?:.|\n)*)", bygroups(None),),
|
||||||
|
],
|
||||||
|
"root": [
|
||||||
|
include("invalids"),
|
||||||
|
include("ws"),
|
||||||
|
include("lookaheads"),
|
||||||
|
include("metas"),
|
||||||
|
include("actions"),
|
||||||
|
include("strings"),
|
||||||
|
include("variables"),
|
||||||
|
(r"\b(?!(NULL|EXTRA))([A-Z_]+)\b\s*(?!\()", Text,),
|
||||||
|
(
|
||||||
|
r"^\s*" + _name + "\s*" + "(\[.*\])?" + "\s*" + "(\(.+\))?" + "\s*(:)",
|
||||||
|
bygroups(Name.Function, None, None, Punctuation),
|
||||||
|
),
|
||||||
|
(_name, Name.Function),
|
||||||
|
(r"[\||\.|\+|\*|\?]", Operator),
|
||||||
|
(r"{|}|\(|\)|\[|\]", Punctuation),
|
||||||
|
(r".", Text),
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def setup(app):
|
||||||
|
lexers["peg"] = PEGLexer()
|
||||||
|
return {"version": "1.0", "parallel_read_safe": True}
|
206
Grammar/Grammar
206
Grammar/Grammar
|
@ -1,206 +0,0 @@
|
||||||
# Grammar for Python
|
|
||||||
|
|
||||||
# NOTE: Editing this file has no effect except on the docs.
|
|
||||||
|
|
||||||
# Start symbols for the grammar:
|
|
||||||
# single_input is a single interactive statement;
|
|
||||||
# file_input is a module or sequence of commands read from an input file;
|
|
||||||
# eval_input is the input for the eval() functions.
|
|
||||||
# func_type_input is a PEP 484 Python 2 function type comment
|
|
||||||
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
|
||||||
# NB: due to the way TYPE_COMMENT is tokenized it will always be followed by a NEWLINE
|
|
||||||
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
|
||||||
file_input: (NEWLINE | stmt)* ENDMARKER
|
|
||||||
eval_input: testlist NEWLINE* ENDMARKER
|
|
||||||
|
|
||||||
decorator: '@' namedexpr_test NEWLINE
|
|
||||||
decorators: decorator+
|
|
||||||
decorated: decorators (classdef | funcdef | async_funcdef)
|
|
||||||
|
|
||||||
async_funcdef: ASYNC funcdef
|
|
||||||
funcdef: 'def' NAME parameters ['->' test] ':' [TYPE_COMMENT] func_body_suite
|
|
||||||
|
|
||||||
parameters: '(' [typedargslist] ')'
|
|
||||||
|
|
||||||
# The following definition for typedarglist is equivalent to this set of rules:
|
|
||||||
#
|
|
||||||
# arguments = argument (',' [TYPE_COMMENT] argument)*
|
|
||||||
# argument = tfpdef ['=' test]
|
|
||||||
# kwargs = '**' tfpdef [','] [TYPE_COMMENT]
|
|
||||||
# args = '*' [tfpdef]
|
|
||||||
# kwonly_kwargs = (',' [TYPE_COMMENT] argument)* (TYPE_COMMENT | [',' [TYPE_COMMENT] [kwargs]])
|
|
||||||
# args_kwonly_kwargs = args kwonly_kwargs | kwargs
|
|
||||||
# poskeyword_args_kwonly_kwargs = arguments ( TYPE_COMMENT | [',' [TYPE_COMMENT] [args_kwonly_kwargs]])
|
|
||||||
# typedargslist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs
|
|
||||||
# typedarglist = (arguments ',' [TYPE_COMMENT] '/' [',' [[TYPE_COMMENT] typedargslist_no_posonly]])|(typedargslist_no_posonly)"
|
|
||||||
#
|
|
||||||
# It needs to be fully expanded to allow our LL(1) parser to work on it.
|
|
||||||
|
|
||||||
typedargslist: (
|
|
||||||
(tfpdef ['=' test] (',' [TYPE_COMMENT] tfpdef ['=' test])* ',' [TYPE_COMMENT] '/' [',' [ [TYPE_COMMENT] tfpdef ['=' test] (
|
|
||||||
',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [
|
|
||||||
'*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]])
|
|
||||||
| '**' tfpdef [','] [TYPE_COMMENT]]])
|
|
||||||
| '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]])
|
|
||||||
| '**' tfpdef [','] [TYPE_COMMENT]]] )
|
|
||||||
| (tfpdef ['=' test] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [
|
|
||||||
'*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]])
|
|
||||||
| '**' tfpdef [','] [TYPE_COMMENT]]])
|
|
||||||
| '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]])
|
|
||||||
| '**' tfpdef [','] [TYPE_COMMENT])
|
|
||||||
)
|
|
||||||
tfpdef: NAME [':' test]
|
|
||||||
|
|
||||||
# The following definition for varargslist is equivalent to this set of rules:
|
|
||||||
#
|
|
||||||
# arguments = argument (',' argument )*
|
|
||||||
# argument = vfpdef ['=' test]
|
|
||||||
# kwargs = '**' vfpdef [',']
|
|
||||||
# args = '*' [vfpdef]
|
|
||||||
# kwonly_kwargs = (',' argument )* [',' [kwargs]]
|
|
||||||
# args_kwonly_kwargs = args kwonly_kwargs | kwargs
|
|
||||||
# poskeyword_args_kwonly_kwargs = arguments [',' [args_kwonly_kwargs]]
|
|
||||||
# vararglist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs
|
|
||||||
# varargslist = arguments ',' '/' [','[(vararglist_no_posonly)]] | (vararglist_no_posonly)
|
|
||||||
#
|
|
||||||
# It needs to be fully expanded to allow our LL(1) parser to work on it.
|
|
||||||
|
|
||||||
varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
|
|
||||||
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
|
||||||
| '**' vfpdef [',']]]
|
|
||||||
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
|
||||||
| '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
|
|
||||||
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
|
||||||
| '**' vfpdef [',']]]
|
|
||||||
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
|
||||||
| '**' vfpdef [',']
|
|
||||||
)
|
|
||||||
vfpdef: NAME
|
|
||||||
|
|
||||||
stmt: simple_stmt | compound_stmt
|
|
||||||
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
|
|
||||||
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
|
|
||||||
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
|
|
||||||
expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
|
|
||||||
[('=' (yield_expr|testlist_star_expr))+ [TYPE_COMMENT]] )
|
|
||||||
annassign: ':' test ['=' (yield_expr|testlist_star_expr)]
|
|
||||||
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
|
|
||||||
augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
|
|
||||||
'<<=' | '>>=' | '**=' | '//=')
|
|
||||||
# For normal and annotated assignments, additional restrictions enforced by the interpreter
|
|
||||||
del_stmt: 'del' exprlist
|
|
||||||
pass_stmt: 'pass'
|
|
||||||
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
|
|
||||||
break_stmt: 'break'
|
|
||||||
continue_stmt: 'continue'
|
|
||||||
return_stmt: 'return' [testlist_star_expr]
|
|
||||||
yield_stmt: yield_expr
|
|
||||||
raise_stmt: 'raise' [test ['from' test]]
|
|
||||||
import_stmt: import_name | import_from
|
|
||||||
import_name: 'import' dotted_as_names
|
|
||||||
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
|
|
||||||
import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
|
|
||||||
'import' ('*' | '(' import_as_names ')' | import_as_names))
|
|
||||||
import_as_name: NAME ['as' NAME]
|
|
||||||
dotted_as_name: dotted_name ['as' NAME]
|
|
||||||
import_as_names: import_as_name (',' import_as_name)* [',']
|
|
||||||
dotted_as_names: dotted_as_name (',' dotted_as_name)*
|
|
||||||
dotted_name: NAME ('.' NAME)*
|
|
||||||
global_stmt: 'global' NAME (',' NAME)*
|
|
||||||
nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
|
|
||||||
assert_stmt: 'assert' test [',' test]
|
|
||||||
|
|
||||||
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
|
|
||||||
async_stmt: ASYNC (funcdef | with_stmt | for_stmt)
|
|
||||||
if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite]
|
|
||||||
while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite]
|
|
||||||
for_stmt: 'for' exprlist 'in' testlist ':' [TYPE_COMMENT] suite ['else' ':' suite]
|
|
||||||
try_stmt: ('try' ':' suite
|
|
||||||
((except_clause ':' suite)+
|
|
||||||
['else' ':' suite]
|
|
||||||
['finally' ':' suite] |
|
|
||||||
'finally' ':' suite))
|
|
||||||
with_stmt: 'with' with_item (',' with_item)* ':' [TYPE_COMMENT] suite
|
|
||||||
with_item: test ['as' expr]
|
|
||||||
# NB compile.c makes sure that the default except clause is last
|
|
||||||
except_clause: 'except' [test ['as' NAME]]
|
|
||||||
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
|
|
||||||
|
|
||||||
namedexpr_test: test [':=' test]
|
|
||||||
test: or_test ['if' or_test 'else' test] | lambdef
|
|
||||||
test_nocond: or_test | lambdef_nocond
|
|
||||||
lambdef: 'lambda' [varargslist] ':' test
|
|
||||||
lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
|
|
||||||
or_test: and_test ('or' and_test)*
|
|
||||||
and_test: not_test ('and' not_test)*
|
|
||||||
not_test: 'not' not_test | comparison
|
|
||||||
comparison: expr (comp_op expr)*
|
|
||||||
# <> isn't actually a valid comparison operator in Python. It's here for the
|
|
||||||
# sake of a __future__ import described in PEP 401 (which really works :-)
|
|
||||||
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
|
|
||||||
star_expr: '*' expr
|
|
||||||
expr: xor_expr ('|' xor_expr)*
|
|
||||||
xor_expr: and_expr ('^' and_expr)*
|
|
||||||
and_expr: shift_expr ('&' shift_expr)*
|
|
||||||
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
|
||||||
arith_expr: term (('+'|'-') term)*
|
|
||||||
term: factor (('*'|'@'|'/'|'%'|'//') factor)*
|
|
||||||
factor: ('+'|'-'|'~') factor | power
|
|
||||||
power: atom_expr ['**' factor]
|
|
||||||
atom_expr: [AWAIT] atom trailer*
|
|
||||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
|
||||||
'[' [testlist_comp] ']' |
|
|
||||||
'{' [dictorsetmaker] '}' |
|
|
||||||
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
|
|
||||||
testlist_comp: (namedexpr_test|star_expr) ( comp_for | (',' (namedexpr_test|star_expr))* [','] )
|
|
||||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
|
||||||
subscriptlist: subscript (',' subscript)* [',']
|
|
||||||
subscript: test | [test] ':' [test] [sliceop]
|
|
||||||
sliceop: ':' [test]
|
|
||||||
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
|
|
||||||
testlist: test (',' test)* [',']
|
|
||||||
dictorsetmaker: ( ((test ':' test | '**' expr)
|
|
||||||
(comp_for | (',' (test ':' test | '**' expr))* [','])) |
|
|
||||||
((test | star_expr)
|
|
||||||
(comp_for | (',' (test | star_expr))* [','])) )
|
|
||||||
|
|
||||||
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
|
|
||||||
|
|
||||||
arglist: argument (',' argument)* [',']
|
|
||||||
|
|
||||||
# The reason that keywords are test nodes instead of NAME is that using NAME
|
|
||||||
# results in an ambiguity. ast.c makes sure it's a NAME.
|
|
||||||
# "test '=' test" is really "keyword '=' test", but we have no such token.
|
|
||||||
# These need to be in a single rule to avoid grammar that is ambiguous
|
|
||||||
# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
|
|
||||||
# we explicitly match '*' here, too, to give it proper precedence.
|
|
||||||
# Illegal combinations and orderings are blocked in ast.c:
|
|
||||||
# multiple (test comp_for) arguments are blocked; keyword unpackings
|
|
||||||
# that precede iterable unpackings are blocked; etc.
|
|
||||||
argument: ( test [comp_for] |
|
|
||||||
test ':=' test |
|
|
||||||
test '=' test |
|
|
||||||
'**' test |
|
|
||||||
'*' test )
|
|
||||||
|
|
||||||
comp_iter: comp_for | comp_if
|
|
||||||
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
|
|
||||||
comp_for: [ASYNC] sync_comp_for
|
|
||||||
comp_if: 'if' test_nocond [comp_iter]
|
|
||||||
|
|
||||||
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
|
||||||
encoding_decl: NAME
|
|
||||||
|
|
||||||
yield_expr: 'yield' [yield_arg]
|
|
||||||
yield_arg: 'from' test | testlist_star_expr
|
|
||||||
|
|
||||||
# the TYPE_COMMENT in suites is only parsed for funcdefs,
|
|
||||||
# but can't go elsewhere due to ambiguity
|
|
||||||
func_body_suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT
|
|
||||||
|
|
||||||
func_type_input: func_type NEWLINE* ENDMARKER
|
|
||||||
func_type: '(' [typelist] ')' '->' test
|
|
||||||
# typelist is a modified typedargslist (see above)
|
|
||||||
typelist: (test (',' test)* [','
|
|
||||||
['*' [test] (',' test)* [',' '**' test] | '**' test]]
|
|
||||||
| '*' [test] (',' test)* [',' '**' test] | '**' test)
|
|
Loading…
Reference in New Issue