[3.9] bpo-40939: Use the new grammar for the grammar specification documentation (GH-19969) (#21641)

(We censor the heck out of actions and some other stuff using a custom "highlighter".)

(cherry picked from commit 72cabb2aa6)

Co-authored-by: Pablo Galindo <Pablogsal@gmail.com>
This commit is contained in:
Guido van Rossum 2020-07-27 12:00:42 -07:00 committed by GitHub
parent 8b052751d3
commit e6b2d93f0c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 91 additions and 4 deletions

View File

@ -15,7 +15,7 @@ sys.path.append(os.path.abspath('includes'))
extensions = ['sphinx.ext.coverage', 'sphinx.ext.doctest',
'pyspecific', 'c_annotations', 'escape4chm',
'asdl_highlight']
'asdl_highlight', 'peg_highlight']
doctest_global_setup = '''

View File

@ -1,7 +1,19 @@
Full Grammar specification
==========================
This is the full Python grammar, as it is read by the parser generator and used
to parse Python source files:
This is the full Python grammar, derived directly from the grammar
used to generate the CPython parser (see :source:`Grammar/python.gram`).
The version here omits details related to code generation and
error recovery.
.. literalinclude:: ../../Grammar/Grammar
The notation is a mixture of `EBNF
<https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form>`_
and `PEG <https://en.wikipedia.org/wiki/Parsing_expression_grammar>`_.
In particular, ``&`` followed by a symbol, token or parenthesized
group indicates a positive lookahead (i.e., is required to match but
not consumed), while ``!`` indicates a negative lookahead (i.e., is
required _not_ to match). We use the ``|`` separator to mean PEG's
"ordered choice" (written as ``/`` in traditional PEG grammars).
.. literalinclude:: ../../Grammar/python.gram
:language: peg

View File

@ -0,0 +1,75 @@
from pygments.lexer import RegexLexer, bygroups, include
from pygments.token import Comment, Generic, Keyword, Name, Operator, Punctuation, Text
from sphinx.highlighting import lexers
class PEGLexer(RegexLexer):
"""Pygments Lexer for PEG grammar (.gram) files
This lexer strips the following elements from the grammar:
- Meta-tags
- Variable assignments
- Actions
- Lookaheads
- Rule types
- Rule options
- Rules named `invalid_*` or `incorrect_*`
"""
name = "PEG"
aliases = ["peg"]
filenames = ["*.gram"]
_name = r"([^\W\d]\w*)"
_text_ws = r"(\s*)"
tokens = {
"ws": [(r"\n", Text), (r"\s+", Text), (r"#.*$", Comment.Singleline),],
"lookaheads": [
(r"(?<=\|\s)(&\w+\s?)", bygroups(None)),
(r"(?<=\|\s)(&'.+'\s?)", bygroups(None)),
(r'(?<=\|\s)(&".+"\s?)', bygroups(None)),
(r"(?<=\|\s)(&\(.+\)\s?)", bygroups(None)),
],
"metas": [
(r"(@\w+ '''(.|\n)+?''')", bygroups(None)),
(r"^(@.*)$", bygroups(None)),
],
"actions": [(r"{(.|\n)+?}", bygroups(None)),],
"strings": [
(r"'\w+?'", Keyword),
(r'"\w+?"', Keyword),
(r"'\W+?'", Text),
(r'"\W+?"', Text),
],
"variables": [(_name + _text_ws + "(=)", bygroups(None, None, None),),],
"invalids": [
(r"^(\s+\|\s+invalid_\w+\s*\n)", bygroups(None)),
(r"^(\s+\|\s+incorrect_\w+\s*\n)", bygroups(None)),
(r"^(#.*invalid syntax.*(?:.|\n)*)", bygroups(None),),
],
"root": [
include("invalids"),
include("ws"),
include("lookaheads"),
include("metas"),
include("actions"),
include("strings"),
include("variables"),
(r"\b(?!(NULL|EXTRA))([A-Z_]+)\b\s*(?!\()", Text,),
(
r"^\s*" + _name + "\s*" + "(\[.*\])?" + "\s*" + "(\(.+\))?" + "\s*(:)",
bygroups(Name.Function, None, None, Punctuation),
),
(_name, Name.Function),
(r"[\||\.|\+|\*|\?]", Operator),
(r"{|}|\(|\)|\[|\]", Punctuation),
(r".", Text),
],
}
def setup(app):
lexers["peg"] = PEGLexer()
return {"version": "1.0", "parallel_read_safe": True}