From e6b2d93f0c3891827f609ecac1ced21e1626ed0a Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 27 Jul 2020 12:00:42 -0700 Subject: [PATCH] [3.9] bpo-40939: Use the new grammar for the grammar specification documentation (GH-19969) (#21641) (We censor the heck out of actions and some other stuff using a custom "highlighter".) (cherry picked from commit 72cabb2aa636272e608285f5a6ba83b62be9be4e) Co-authored-by: Pablo Galindo --- Doc/conf.py | 2 +- Doc/reference/grammar.rst | 18 +++++-- Doc/tools/extensions/peg_highlight.py | 75 +++++++++++++++++++++++++++ 3 files changed, 91 insertions(+), 4 deletions(-) create mode 100644 Doc/tools/extensions/peg_highlight.py diff --git a/Doc/conf.py b/Doc/conf.py index 12d74ea24ce..bfb2a98fc63 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -15,7 +15,7 @@ sys.path.append(os.path.abspath('includes')) extensions = ['sphinx.ext.coverage', 'sphinx.ext.doctest', 'pyspecific', 'c_annotations', 'escape4chm', - 'asdl_highlight'] + 'asdl_highlight', 'peg_highlight'] doctest_global_setup = ''' diff --git a/Doc/reference/grammar.rst b/Doc/reference/grammar.rst index 83d0f8532c6..acf83765b57 100644 --- a/Doc/reference/grammar.rst +++ b/Doc/reference/grammar.rst @@ -1,7 +1,19 @@ Full Grammar specification ========================== -This is the full Python grammar, as it is read by the parser generator and used -to parse Python source files: +This is the full Python grammar, derived directly from the grammar +used to generate the CPython parser (see :source:`Grammar/python.gram`). +The version here omits details related to code generation and +error recovery. -.. literalinclude:: ../../Grammar/Grammar +The notation is a mixture of `EBNF +`_ +and `PEG `_. +In particular, ``&`` followed by a symbol, token or parenthesized +group indicates a positive lookahead (i.e., is required to match but +not consumed), while ``!`` indicates a negative lookahead (i.e., is +required _not_ to match). We use the ``|`` separator to mean PEG's +"ordered choice" (written as ``/`` in traditional PEG grammars). + +.. literalinclude:: ../../Grammar/python.gram + :language: peg diff --git a/Doc/tools/extensions/peg_highlight.py b/Doc/tools/extensions/peg_highlight.py new file mode 100644 index 00000000000..f02515d3919 --- /dev/null +++ b/Doc/tools/extensions/peg_highlight.py @@ -0,0 +1,75 @@ +from pygments.lexer import RegexLexer, bygroups, include +from pygments.token import Comment, Generic, Keyword, Name, Operator, Punctuation, Text + +from sphinx.highlighting import lexers + + +class PEGLexer(RegexLexer): + """Pygments Lexer for PEG grammar (.gram) files + + This lexer strips the following elements from the grammar: + + - Meta-tags + - Variable assignments + - Actions + - Lookaheads + - Rule types + - Rule options + - Rules named `invalid_*` or `incorrect_*` + """ + + name = "PEG" + aliases = ["peg"] + filenames = ["*.gram"] + _name = r"([^\W\d]\w*)" + _text_ws = r"(\s*)" + + tokens = { + "ws": [(r"\n", Text), (r"\s+", Text), (r"#.*$", Comment.Singleline),], + "lookaheads": [ + (r"(?<=\|\s)(&\w+\s?)", bygroups(None)), + (r"(?<=\|\s)(&'.+'\s?)", bygroups(None)), + (r'(?<=\|\s)(&".+"\s?)', bygroups(None)), + (r"(?<=\|\s)(&\(.+\)\s?)", bygroups(None)), + ], + "metas": [ + (r"(@\w+ '''(.|\n)+?''')", bygroups(None)), + (r"^(@.*)$", bygroups(None)), + ], + "actions": [(r"{(.|\n)+?}", bygroups(None)),], + "strings": [ + (r"'\w+?'", Keyword), + (r'"\w+?"', Keyword), + (r"'\W+?'", Text), + (r'"\W+?"', Text), + ], + "variables": [(_name + _text_ws + "(=)", bygroups(None, None, None),),], + "invalids": [ + (r"^(\s+\|\s+invalid_\w+\s*\n)", bygroups(None)), + (r"^(\s+\|\s+incorrect_\w+\s*\n)", bygroups(None)), + (r"^(#.*invalid syntax.*(?:.|\n)*)", bygroups(None),), + ], + "root": [ + include("invalids"), + include("ws"), + include("lookaheads"), + include("metas"), + include("actions"), + include("strings"), + include("variables"), + (r"\b(?!(NULL|EXTRA))([A-Z_]+)\b\s*(?!\()", Text,), + ( + r"^\s*" + _name + "\s*" + "(\[.*\])?" + "\s*" + "(\(.+\))?" + "\s*(:)", + bygroups(Name.Function, None, None, Punctuation), + ), + (_name, Name.Function), + (r"[\||\.|\+|\*|\?]", Operator), + (r"{|}|\(|\)|\[|\]", Punctuation), + (r".", Text), + ], + } + + +def setup(app): + lexers["peg"] = PEGLexer() + return {"version": "1.0", "parallel_read_safe": True}