cpython/Tools/peg_generator/pegen/keywordgen.py

"""Generate Lib/keyword.py from the Grammar and Tokens files using pgen"""

import argparse

from .build import build_parser, generate_token_definitions
from .c_generator import CParserGenerator

TEMPLATE = r'''
"""Keywords (from "Grammar/python.gram")

This file is automatically generated; please don't muck it up!

To update the symbols in this file, 'cd' to the top directory of
the python source tree and run:

    PYTHONPATH=Tools/peg_generator python3 -m pegen.keywordgen \
        Grammar/python.gram \
        Grammar/Tokens \
        Lib/keyword.py

Alternatively, you can run 'make regen-keyword'.
"""

__all__ = ["iskeyword", "issoftkeyword", "kwlist", "softkwlist"]

kwlist = [
{keywords}
]

softkwlist = [
{soft_keywords}
]

iskeyword = frozenset(kwlist).__contains__
issoftkeyword = frozenset(softkwlist).__contains__
'''.lstrip()


def main() -> None:
    parser = argparse.ArgumentParser(
        description="Generate the Lib/keywords.py file from the grammar."
    )
    parser.add_argument(
        "grammar", type=str, help="The file with the grammar definition in PEG format"
    )
    parser.add_argument(
        "tokens_file", type=argparse.FileType("r"), help="The file with the token definitions"
    )
    parser.add_argument(
        "keyword_file",
        type=argparse.FileType("w"),
        help="The path to write the keyword definitions",
    )
    args = parser.parse_args()

    grammar, _, _ = build_parser(args.grammar)
    with args.tokens_file as tok_file:
        all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
    gen = CParserGenerator(grammar, all_tokens, exact_tok, non_exact_tok, file=None)
    gen.collect_rules()

    with args.keyword_file as thefile:
        all_keywords = sorted(list(gen.keywords.keys()))
        all_soft_keywords = sorted(gen.soft_keywords)

        keywords = "" if not all_keywords else "    " + ",\n    ".join(map(repr, all_keywords))
        soft_keywords = (
            "" if not all_soft_keywords else "    " + ",\n    ".join(map(repr, all_soft_keywords))
        )
        thefile.write(TEMPLATE.format(keywords=keywords, soft_keywords=soft_keywords))


if __name__ == "__main__":
    main()
bpo-40939: Generate keyword.py using the new parser (GH-20800) 2020-06-11 09:45:15 -03:00			`"""Generate Lib/keyword.py from the Grammar and Tokens files using pgen"""`

			`import argparse`

			`from .build import build_parser, generate_token_definitions`
			`from .c_generator import CParserGenerator`

			`TEMPLATE = r'''`
			`"""Keywords (from "Grammar/python.gram")`

			`This file is automatically generated; please don't muck it up!`

			`To update the symbols in this file, 'cd' to the top directory of`
			`the python source tree and run:`

			`PYTHONPATH=Tools/peg_generator python3 -m pegen.keywordgen \`
Delete remaining references to Grammar/Grammar from docs (#21624) (Ironically, the file itself remains, see https://github.com/we-like-parsers/cpython/issues/135.) 2020-07-26 12:27:52 -03:00			`Grammar/python.gram \`
bpo-40939: Generate keyword.py using the new parser (GH-20800) 2020-06-11 09:45:15 -03:00			`Grammar/Tokens \`
			`Lib/keyword.py`

			`Alternatively, you can run 'make regen-keyword'.`
			`"""`

Include soft keywords in keyword.py (GH-20877) 2020-06-14 23:55:15 -03:00			`__all__ = ["iskeyword", "issoftkeyword", "kwlist", "softkwlist"]`
bpo-40939: Generate keyword.py using the new parser (GH-20800) 2020-06-11 09:45:15 -03:00
			`kwlist = [`
Include soft keywords in keyword.py (GH-20877) 2020-06-14 23:55:15 -03:00			`{keywords}`
			`]`

			`softkwlist = [`
			`{soft_keywords}`
bpo-40939: Generate keyword.py using the new parser (GH-20800) 2020-06-11 09:45:15 -03:00			`]`

			`iskeyword = frozenset(kwlist).__contains__`
Include soft keywords in keyword.py (GH-20877) 2020-06-14 23:55:15 -03:00			`issoftkeyword = frozenset(softkwlist).__contains__`
bpo-40939: Generate keyword.py using the new parser (GH-20800) 2020-06-11 09:45:15 -03:00			`'''.lstrip()`

gh-108455: Run `mypy` on `Tools/peg_generator` (#108456) Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> 2023-08-28 17:04:12 -03:00
Update pegen to use the latest upstream developments (GH-27586) 2021-08-12 13:37:30 -03:00			`def main() -> None:`
bpo-40939: Generate keyword.py using the new parser (GH-20800) 2020-06-11 09:45:15 -03:00			`parser = argparse.ArgumentParser(`
			`description="Generate the Lib/keywords.py file from the grammar."`
			`)`
			`parser.add_argument(`
			`"grammar", type=str, help="The file with the grammar definition in PEG format"`
			`)`
			`parser.add_argument(`
bpo-40939: Clean and adapt the peg_generator directory after deleting the old parser (GH-20822) 2020-06-11 21:55:35 -03:00			`"tokens_file", type=argparse.FileType("r"), help="The file with the token definitions"`
bpo-40939: Generate keyword.py using the new parser (GH-20800) 2020-06-11 09:45:15 -03:00			`)`
			`parser.add_argument(`
			`"keyword_file",`
			`type=argparse.FileType("w"),`
			`help="The path to write the keyword definitions",`
			`)`
			`args = parser.parse_args()`

			`grammar, _, _ = build_parser(args.grammar)`
			`with args.tokens_file as tok_file:`
			`all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)`
Update pegen to use the latest upstream developments (GH-27586) 2021-08-12 13:37:30 -03:00			`gen = CParserGenerator(grammar, all_tokens, exact_tok, non_exact_tok, file=None)`
Extract visitors from the grammar nodes and call makers in the peg generator (GH-28172) Simplify the peg generator logic by extracting as much visitors as possible to disentangle the flow and separate concerns. 2021-09-05 10:58:52 -03:00			`gen.collect_rules()`
bpo-40939: Generate keyword.py using the new parser (GH-20800) 2020-06-11 09:45:15 -03:00
			`with args.keyword_file as thefile:`
gh-107015: Remove async_hacks from the tokenizer (#107018) 2023-07-26 12:34:15 -03:00			`all_keywords = sorted(list(gen.keywords.keys()))`
Extract visitors from the grammar nodes and call makers in the peg generator (GH-28172) Simplify the peg generator logic by extracting as much visitors as possible to disentangle the flow and separate concerns. 2021-09-05 10:58:52 -03:00			`all_soft_keywords = sorted(gen.soft_keywords)`
bpo-40939: Generate keyword.py using the new parser (GH-20800) 2020-06-11 09:45:15 -03:00
Fix trailing whitespace in keyword.py (GH-20881) 2020-06-15 00:33:33 -03:00			`keywords = "" if not all_keywords else " " + ",\n ".join(map(repr, all_keywords))`
Update pegen to use the latest upstream developments (GH-27586) 2021-08-12 13:37:30 -03:00			`soft_keywords = (`
			`"" if not all_soft_keywords else " " + ",\n ".join(map(repr, all_soft_keywords))`
			`)`
Include soft keywords in keyword.py (GH-20877) 2020-06-14 23:55:15 -03:00			`thefile.write(TEMPLATE.format(keywords=keywords, soft_keywords=soft_keywords))`
bpo-40939: Generate keyword.py using the new parser (GH-20800) 2020-06-11 09:45:15 -03:00

			`if __name__ == "__main__":`
			`main()`