From 9727694f08cad4b019d2939224e3416312b1c0e1 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Thu, 11 Jun 2020 15:45:15 +0300 Subject: [PATCH] bpo-40939: Generate keyword.py using the new parser (GH-20800) --- Lib/keyword.py | 10 ++- Lib/pydoc.py | 1 + Makefile.pre.in | 7 +- .../2020-06-11-11-07-10.bpo-40939.-D5Asl.rst | 1 + PCbuild/regen.vcxproj | 5 +- Tools/peg_generator/pegen/keywordgen.py | 73 +++++++++++++++++++ 6 files changed, 88 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-06-11-11-07-10.bpo-40939.-D5Asl.rst create mode 100644 Tools/peg_generator/pegen/keywordgen.py diff --git a/Lib/keyword.py b/Lib/keyword.py index ddcbb25d3d3..afc3db3942c 100644 --- a/Lib/keyword.py +++ b/Lib/keyword.py @@ -1,13 +1,14 @@ -"""Keywords (from "Grammar/Grammar") +"""Keywords (from "Grammar/python.gram") This file is automatically generated; please don't muck it up! To update the symbols in this file, 'cd' to the top directory of the python source tree and run: - python3 -m Parser.pgen.keywordgen Grammar/Grammar \ - Grammar/Tokens \ - Lib/keyword.py + PYTHONPATH=Tools/peg_generator python3 -m pegen.keywordgen \ + Grammar/Grammar \ + Grammar/Tokens \ + Lib/keyword.py Alternatively, you can run 'make regen-keyword'. """ @@ -18,6 +19,7 @@ kwlist = [ 'False', 'None', 'True', + '__new_parser__', 'and', 'as', 'assert', diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 628f9fc7d1d..a5368bf8bfe 100755 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -1817,6 +1817,7 @@ class Helper: 'False': '', 'None': '', 'True': '', + '__new_parser__': '', 'and': 'BOOLEAN', 'as': 'with', 'assert': ('assert', ''), diff --git a/Makefile.pre.in b/Makefile.pre.in index 7c16d2905fb..9a82729aa0f 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -895,9 +895,10 @@ regen-token: .PHONY: regen-keyword regen-keyword: - # Regenerate Lib/keyword.py from Grammar/Grammar and Grammar/Tokens - # using Parser/pgen - PYTHONPATH=$(srcdir) $(PYTHON_FOR_REGEN) -m Parser.pgen.keywordgen $(srcdir)/Grammar/Grammar \ + # Regenerate Lib/keyword.py from Grammar/python.gram and Grammar/Tokens + # using Tools/peg_generator/pegen + PYTHONPATH=$(srcdir)/Tools/peg_generator $(PYTHON_FOR_REGEN) -m pegen.keywordgen \ + $(srcdir)/Grammar/python.gram \ $(srcdir)/Grammar/Tokens \ $(srcdir)/Lib/keyword.py.new $(UPDATE_FILE) $(srcdir)/Lib/keyword.py $(srcdir)/Lib/keyword.py.new diff --git a/Misc/NEWS.d/next/Library/2020-06-11-11-07-10.bpo-40939.-D5Asl.rst b/Misc/NEWS.d/next/Library/2020-06-11-11-07-10.bpo-40939.-D5Asl.rst new file mode 100644 index 00000000000..0e831129dd8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-06-11-11-07-10.bpo-40939.-D5Asl.rst @@ -0,0 +1 @@ +Use the new PEG parser when generating the stdlib :mod:`keyword` module. \ No newline at end of file diff --git a/PCbuild/regen.vcxproj b/PCbuild/regen.vcxproj index d46fb997dbd..564a4dd7118 100644 --- a/PCbuild/regen.vcxproj +++ b/PCbuild/regen.vcxproj @@ -205,8 +205,9 @@ - - + + + diff --git a/Tools/peg_generator/pegen/keywordgen.py b/Tools/peg_generator/pegen/keywordgen.py new file mode 100644 index 00000000000..279c34b6dae --- /dev/null +++ b/Tools/peg_generator/pegen/keywordgen.py @@ -0,0 +1,73 @@ +"""Generate Lib/keyword.py from the Grammar and Tokens files using pgen""" + +import argparse + +from .build import build_parser, generate_token_definitions +from .c_generator import CParserGenerator + +TEMPLATE = r''' +"""Keywords (from "Grammar/python.gram") + +This file is automatically generated; please don't muck it up! + +To update the symbols in this file, 'cd' to the top directory of +the python source tree and run: + + PYTHONPATH=Tools/peg_generator python3 -m pegen.keywordgen \ + Grammar/Grammar \ + Grammar/Tokens \ + Lib/keyword.py + +Alternatively, you can run 'make regen-keyword'. +""" + +__all__ = ["iskeyword", "kwlist"] + +kwlist = [ + {keywords} +] + +iskeyword = frozenset(kwlist).__contains__ +'''.lstrip() + +EXTRA_KEYWORDS = ["async", "await"] + + +def main(): + parser = argparse.ArgumentParser( + description="Generate the Lib/keywords.py file from the grammar." + ) + parser.add_argument( + "grammar", type=str, help="The file with the grammar definition in PEG format" + ) + parser.add_argument( + "tokens_file", + type=argparse.FileType("r"), + help="The file with the token definitions" + ) + parser.add_argument( + "keyword_file", + type=argparse.FileType("w"), + help="The path to write the keyword definitions", + ) + args = parser.parse_args() + + grammar, _, _ = build_parser(args.grammar) + with args.tokens_file as tok_file: + all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file) + gen: ParserGenerator = CParserGenerator( + grammar, all_tokens, exact_tok, non_exact_tok, file=None + ) + gen.collect_todo() + + with args.keyword_file as thefile: + all_keywords = sorted( + list(gen.callmakervisitor.keyword_cache.keys()) + EXTRA_KEYWORDS + ) + + keywords = ",\n ".join(map(repr, all_keywords)) + thefile.write(TEMPLATE.format(keywords=keywords)) + + +if __name__ == "__main__": + main()