bpo-40939: Generate keyword.py using the new parser (GH-20800)

2020-06-11 15:45:15 +03:00 · 2020-06-11 15:45:15 +03:00 · 9727694f08
parent 33faf5c4f4
commit 9727694f08
6 changed files with 88 additions and 9 deletions
--- a/Lib/keyword.py
+++ b/Lib/keyword.py
@ -1,13 +1,14 @@
-"""Keywords (from "Grammar/Grammar")
+"""Keywords (from "Grammar/python.gram")

 This file is automatically generated; please don't muck it up!

 To update the symbols in this file, 'cd' to the top directory of
 the python source tree and run:

-    python3 -m Parser.pgen.keywordgen Grammar/Grammar \
-                                      Grammar/Tokens \
-                                      Lib/keyword.py
+    PYTHONPATH=Tools/peg_generator python3 -m pegen.keywordgen \
+        Grammar/Grammar \
+        Grammar/Tokens \
+        Lib/keyword.py

 Alternatively, you can run 'make regen-keyword'.
 """
@ -18,6 +19,7 @@ kwlist = [
    'False',
    'None',
    'True',
+    '__new_parser__',
    'and',
    'as',
    'assert',
--- a/Lib/pydoc.py
+++ b/Lib/pydoc.py
@ -1817,6 +1817,7 @@ class Helper:
        'False': '',
        'None': '',
        'True': '',
+        '__new_parser__': '',
        'and': 'BOOLEAN',
        'as': 'with',
        'assert': ('assert', ''),
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@ -895,9 +895,10 @@ regen-token:

 .PHONY: regen-keyword
 regen-keyword:
-	# Regenerate Lib/keyword.py from Grammar/Grammar and Grammar/Tokens
-	# using Parser/pgen
-	PYTHONPATH=$(srcdir) $(PYTHON_FOR_REGEN) -m Parser.pgen.keywordgen $(srcdir)/Grammar/Grammar \
+	# Regenerate Lib/keyword.py from Grammar/python.gram and Grammar/Tokens
+	# using Tools/peg_generator/pegen
+	PYTHONPATH=$(srcdir)/Tools/peg_generator $(PYTHON_FOR_REGEN) -m pegen.keywordgen \
+		$(srcdir)/Grammar/python.gram \
 		$(srcdir)/Grammar/Tokens \
 		$(srcdir)/Lib/keyword.py.new
 	$(UPDATE_FILE) $(srcdir)/Lib/keyword.py $(srcdir)/Lib/keyword.py.new
--- a/Misc/NEWS.d/next/Library/2020-06-11-11-07-10.bpo-40939.-D5Asl.rst
+++ b/Misc/NEWS.d/next/Library/2020-06-11-11-07-10.bpo-40939.-D5Asl.rst
@ -0,0 +1 @@
+Use the new PEG parser when generating the stdlib :mod:`keyword` module.
--- a/PCbuild/regen.vcxproj
+++ b/PCbuild/regen.vcxproj
@ -205,8 +205,9 @@
    <Exec Command="&quot;$(PythonExe)&quot; $(PySourcePath)Tools\scripts\generate_token.py py &quot;$(PySourcePath)Grammar\Tokens&quot; &quot;$(PySourcePath)Lib\token.py&quot;" />
  </Target>
  <Target Name="_RegenKeywords" AfterTargets="_RegenTokens">
-    <!-- Regenerate Lib/keyword.py from Grammar/Grammar and Grammar/Tokens using Parser/pgen-->
-    <Exec Command="&quot;$(PythonExe)&quot; -m Parser.pgen.keywordgen &quot;$(PySourcePath)Grammar\Grammar&quot; &quot;$(PySourcePath)Grammar\Tokens&quot; &quot;$(IntDir)keyword.py&quot;" />
+    <!-- Regenerate Lib/keyword.py from Grammar/python.gram and Grammar/Tokens using Tools/peg_generator/pegen-->
+    <SetEnv Name="PYTHONPATH" Prefix="true" Value="$(PySourcePath)Tools\peg_generator\" />
+    <Exec Command="&quot;$(PythonExe)&quot; -m pegen.keywordgen &quot;$(PySourcePath)Grammar\python.gram&quot; &quot;$(PySourcePath)Grammar\Tokens&quot; &quot;$(IntDir)keyword.py&quot;" />
    <Copy SourceFiles="$(IntDir)keyword.py" DestinationFiles="$(PySourcePath)Lib\keyword.py">
      <Output TaskParameter="CopiedFiles" ItemName="_Updated" />
    </Copy>
--- a/Tools/peg_generator/pegen/keywordgen.py
+++ b/Tools/peg_generator/pegen/keywordgen.py
@ -0,0 +1,73 @@
+"""Generate Lib/keyword.py from the Grammar and Tokens files using pgen"""
+
+import argparse
+
+from .build import build_parser, generate_token_definitions
+from .c_generator import CParserGenerator
+
+TEMPLATE = r'''
+"""Keywords (from "Grammar/python.gram")
+
+This file is automatically generated; please don't muck it up!
+
+To update the symbols in this file, 'cd' to the top directory of
+the python source tree and run:
+
+    PYTHONPATH=Tools/peg_generator python3 -m pegen.keywordgen \
+        Grammar/Grammar \
+        Grammar/Tokens \
+        Lib/keyword.py
+
+Alternatively, you can run 'make regen-keyword'.
+"""
+
+__all__ = ["iskeyword", "kwlist"]
+
+kwlist = [
+    {keywords}
+]
+
+iskeyword = frozenset(kwlist).__contains__
+'''.lstrip()
+
+EXTRA_KEYWORDS = ["async", "await"]
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate the Lib/keywords.py file from the grammar."
+    )
+    parser.add_argument(
+        "grammar", type=str, help="The file with the grammar definition in PEG format"
+    )
+    parser.add_argument(
+        "tokens_file",
+        type=argparse.FileType("r"),
+        help="The file with the token definitions"
+    )
+    parser.add_argument(
+        "keyword_file",
+        type=argparse.FileType("w"),
+        help="The path to write the keyword definitions",
+    )
+    args = parser.parse_args()
+
+    grammar, _, _ = build_parser(args.grammar)
+    with args.tokens_file as tok_file:
+        all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
+    gen: ParserGenerator = CParserGenerator(
+        grammar, all_tokens, exact_tok, non_exact_tok, file=None
+    )
+    gen.collect_todo()
+
+    with args.keyword_file as thefile:
+        all_keywords = sorted(
+            list(gen.callmakervisitor.keyword_cache.keys()) + EXTRA_KEYWORDS
+        )
+
+        keywords = ",\n    ".join(map(repr, all_keywords))
+        thefile.write(TEMPLATE.format(keywords=keywords))
+
+
+if __name__ == "__main__":
+    main()
				`@ -0,0 +1 @@`
				Use the new PEG parser when generating the stdlib :mod:`keyword` module.