bpo-36143: Regenerate Lib/keyword.py from the Grammar and Tokens file using pgen (GH-12456)
Now that the parser generator is written in Python (Parser/pgen) we can make use of it to regenerate the Lib/keyword file that contains the language keywords instead of parsing the autogenerated grammar files. This also allows checking in the CI that the autogenerated files are up to date.
This commit is contained in:
parent
027b09c5a1
commit
91759d9801
|
@ -1,98 +1,55 @@
|
|||
#! /usr/bin/env python3
|
||||
|
||||
"""Keywords (from "graminit.c")
|
||||
"""Keywords (from "Grammar/Grammar")
|
||||
|
||||
This file is automatically generated; please don't muck it up!
|
||||
|
||||
To update the symbols in this file, 'cd' to the top directory of
|
||||
the python source tree after building the interpreter and run:
|
||||
the python source tree and run:
|
||||
|
||||
./python Lib/keyword.py
|
||||
python3 -m Parser.pgen.keywordgen Grammar/Grammar \
|
||||
Grammar/Tokens \
|
||||
Lib/keyword.py
|
||||
|
||||
Alternatively, you can run 'make regen-keyword'.
|
||||
"""
|
||||
|
||||
__all__ = ["iskeyword", "kwlist"]
|
||||
|
||||
kwlist = [
|
||||
#--start keywords--
|
||||
'False',
|
||||
'None',
|
||||
'True',
|
||||
'and',
|
||||
'as',
|
||||
'assert',
|
||||
'break',
|
||||
'class',
|
||||
'continue',
|
||||
'def',
|
||||
'del',
|
||||
'elif',
|
||||
'else',
|
||||
'except',
|
||||
'finally',
|
||||
'for',
|
||||
'from',
|
||||
'global',
|
||||
'if',
|
||||
'import',
|
||||
'in',
|
||||
'is',
|
||||
'lambda',
|
||||
'nonlocal',
|
||||
'not',
|
||||
'or',
|
||||
'pass',
|
||||
'raise',
|
||||
'return',
|
||||
'try',
|
||||
'while',
|
||||
'with',
|
||||
'yield',
|
||||
#--end keywords--
|
||||
]
|
||||
|
||||
kwlist.append('async')
|
||||
kwlist.append('await')
|
||||
kwlist.sort()
|
||||
'False',
|
||||
'None',
|
||||
'True',
|
||||
'and',
|
||||
'as',
|
||||
'assert',
|
||||
'async',
|
||||
'await',
|
||||
'break',
|
||||
'class',
|
||||
'continue',
|
||||
'def',
|
||||
'del',
|
||||
'elif',
|
||||
'else',
|
||||
'except',
|
||||
'finally',
|
||||
'for',
|
||||
'from',
|
||||
'global',
|
||||
'if',
|
||||
'import',
|
||||
'in',
|
||||
'is',
|
||||
'lambda',
|
||||
'nonlocal',
|
||||
'not',
|
||||
'or',
|
||||
'pass',
|
||||
'raise',
|
||||
'return',
|
||||
'try',
|
||||
'while',
|
||||
'with',
|
||||
'yield'
|
||||
]
|
||||
|
||||
iskeyword = frozenset(kwlist).__contains__
|
||||
|
||||
def main():
|
||||
import sys, re
|
||||
|
||||
args = sys.argv[1:]
|
||||
iptfile = args and args[0] or "Python/graminit.c"
|
||||
if len(args) > 1: optfile = args[1]
|
||||
else: optfile = "Lib/keyword.py"
|
||||
|
||||
# load the output skeleton from the target, taking care to preserve its
|
||||
# newline convention.
|
||||
with open(optfile, newline='') as fp:
|
||||
format = fp.readlines()
|
||||
nl = format[0][len(format[0].strip()):] if format else '\n'
|
||||
|
||||
# scan the source file for keywords
|
||||
with open(iptfile) as fp:
|
||||
strprog = re.compile('"([^"]+)"')
|
||||
lines = []
|
||||
for line in fp:
|
||||
if '{1, "' in line:
|
||||
match = strprog.search(line)
|
||||
if match:
|
||||
lines.append(" '" + match.group(1) + "'," + nl)
|
||||
lines.sort()
|
||||
|
||||
# insert the lines of keywords into the skeleton
|
||||
try:
|
||||
start = format.index("#--start keywords--" + nl) + 1
|
||||
end = format.index("#--end keywords--" + nl)
|
||||
format[start:end] = lines
|
||||
except ValueError:
|
||||
sys.stderr.write("target does not contain format markers\n")
|
||||
sys.exit(1)
|
||||
|
||||
# write the output file
|
||||
with open(optfile, 'w', newline='') as fp:
|
||||
fp.writelines(format)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
@ -1,20 +1,5 @@
|
|||
import keyword
|
||||
import unittest
|
||||
from test import support
|
||||
import filecmp
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import shutil
|
||||
import textwrap
|
||||
|
||||
KEYWORD_FILE = support.findfile('keyword.py')
|
||||
GRAMMAR_FILE = os.path.join(os.path.split(__file__)[0],
|
||||
'..', '..', 'Python', 'graminit.c')
|
||||
TEST_PY_FILE = 'keyword_test.py'
|
||||
GRAMMAR_TEST_FILE = 'graminit_test.c'
|
||||
PY_FILE_WITHOUT_KEYWORDS = 'minimal_keyword.py'
|
||||
NONEXISTENT_FILE = 'not_here.txt'
|
||||
|
||||
|
||||
class Test_iskeyword(unittest.TestCase):
|
||||
|
@ -35,103 +20,17 @@ class Test_iskeyword(unittest.TestCase):
|
|||
keyword.kwlist = ['its', 'all', 'eggs', 'beans', 'and', 'a', 'slice']
|
||||
self.assertFalse(keyword.iskeyword('eggs'))
|
||||
|
||||
def test_all_keywords_fail_to_be_used_as_names(self):
|
||||
for key in keyword.kwlist:
|
||||
with self.assertRaises(SyntaxError):
|
||||
exec(f"{key} = 42")
|
||||
|
||||
class TestKeywordGeneration(unittest.TestCase):
|
||||
def test_async_and_await_are_keywords(self):
|
||||
self.assertIn("async", keyword.kwlist)
|
||||
self.assertIn("await", keyword.kwlist)
|
||||
|
||||
def _copy_file_without_generated_keywords(self, source_file, dest_file):
|
||||
with open(source_file, 'rb') as fp:
|
||||
lines = fp.readlines()
|
||||
nl = lines[0][len(lines[0].strip()):]
|
||||
with open(dest_file, 'wb') as fp:
|
||||
fp.writelines(lines[:lines.index(b"#--start keywords--" + nl) + 1])
|
||||
fp.writelines(lines[lines.index(b"#--end keywords--" + nl):])
|
||||
|
||||
def _generate_keywords(self, grammar_file, target_keyword_py_file):
|
||||
proc = subprocess.Popen([sys.executable,
|
||||
KEYWORD_FILE,
|
||||
grammar_file,
|
||||
target_keyword_py_file], stderr=subprocess.PIPE)
|
||||
stderr = proc.communicate()[1]
|
||||
return proc.returncode, stderr
|
||||
|
||||
@unittest.skipIf(not os.path.exists(GRAMMAR_FILE),
|
||||
'test only works from source build directory')
|
||||
def test_real_grammar_and_keyword_file(self):
|
||||
self._copy_file_without_generated_keywords(KEYWORD_FILE, TEST_PY_FILE)
|
||||
self.addCleanup(support.unlink, TEST_PY_FILE)
|
||||
self.assertFalse(filecmp.cmp(KEYWORD_FILE, TEST_PY_FILE))
|
||||
self.assertEqual((0, b''), self._generate_keywords(GRAMMAR_FILE,
|
||||
TEST_PY_FILE))
|
||||
self.assertTrue(filecmp.cmp(KEYWORD_FILE, TEST_PY_FILE))
|
||||
|
||||
def test_grammar(self):
|
||||
self._copy_file_without_generated_keywords(KEYWORD_FILE, TEST_PY_FILE)
|
||||
self.addCleanup(support.unlink, TEST_PY_FILE)
|
||||
with open(GRAMMAR_TEST_FILE, 'w') as fp:
|
||||
# Some of these are probably implementation accidents.
|
||||
fp.writelines(textwrap.dedent("""\
|
||||
{2, 1},
|
||||
{11, "encoding_decl", 0, 2, states_79,
|
||||
"\000\000\040\000\000\000\000\000\000\000\000\000"
|
||||
"\000\000\000\000\000\000\000\000\000"},
|
||||
{1, "jello"},
|
||||
{326, 0},
|
||||
{1, "turnip"},
|
||||
\t{1, "This one is tab indented"
|
||||
{278, 0},
|
||||
{1, "crazy but legal"
|
||||
"also legal" {1, "
|
||||
{1, "continue"},
|
||||
{1, "lemon"},
|
||||
{1, "tomato"},
|
||||
{1, "wigii"},
|
||||
{1, 'no good'}
|
||||
{283, 0},
|
||||
{1, "too many spaces"}"""))
|
||||
self.addCleanup(support.unlink, GRAMMAR_TEST_FILE)
|
||||
self._generate_keywords(GRAMMAR_TEST_FILE, TEST_PY_FILE)
|
||||
expected = [
|
||||
" 'This one is tab indented',",
|
||||
" 'also legal',",
|
||||
" 'continue',",
|
||||
" 'crazy but legal',",
|
||||
" 'jello',",
|
||||
" 'lemon',",
|
||||
" 'tomato',",
|
||||
" 'turnip',",
|
||||
" 'wigii',",
|
||||
]
|
||||
with open(TEST_PY_FILE) as fp:
|
||||
lines = fp.read().splitlines()
|
||||
start = lines.index("#--start keywords--") + 1
|
||||
end = lines.index("#--end keywords--")
|
||||
actual = lines[start:end]
|
||||
self.assertEqual(actual, expected)
|
||||
|
||||
def test_empty_grammar_results_in_no_keywords(self):
|
||||
self._copy_file_without_generated_keywords(KEYWORD_FILE,
|
||||
PY_FILE_WITHOUT_KEYWORDS)
|
||||
self.addCleanup(support.unlink, PY_FILE_WITHOUT_KEYWORDS)
|
||||
shutil.copyfile(KEYWORD_FILE, TEST_PY_FILE)
|
||||
self.addCleanup(support.unlink, TEST_PY_FILE)
|
||||
self.assertEqual((0, b''), self._generate_keywords(os.devnull,
|
||||
TEST_PY_FILE))
|
||||
self.assertTrue(filecmp.cmp(TEST_PY_FILE, PY_FILE_WITHOUT_KEYWORDS))
|
||||
|
||||
def test_keywords_py_without_markers_produces_error(self):
|
||||
rc, stderr = self._generate_keywords(os.devnull, os.devnull)
|
||||
self.assertNotEqual(rc, 0)
|
||||
self.assertRegex(stderr, b'does not contain format markers')
|
||||
|
||||
def test_missing_grammar_file_produces_error(self):
|
||||
rc, stderr = self._generate_keywords(NONEXISTENT_FILE, KEYWORD_FILE)
|
||||
self.assertNotEqual(rc, 0)
|
||||
self.assertRegex(stderr, b'(?ms)' + NONEXISTENT_FILE.encode())
|
||||
|
||||
def test_missing_keywords_py_file_produces_error(self):
|
||||
rc, stderr = self._generate_keywords(os.devnull, NONEXISTENT_FILE)
|
||||
self.assertNotEqual(rc, 0)
|
||||
self.assertRegex(stderr, b'(?ms)' + NONEXISTENT_FILE.encode())
|
||||
def test_keywords_are_sorted(self):
|
||||
self.assertListEqual(sorted(keyword.kwlist), keyword.kwlist)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -724,7 +724,7 @@ regen-importlib: Programs/_freeze_importlib
|
|||
# Regenerate all generated files
|
||||
|
||||
regen-all: regen-opcode regen-opcode-targets regen-typeslots regen-grammar \
|
||||
regen-token regen-symbol regen-ast regen-importlib clinic
|
||||
regen-token regen-keyword regen-symbol regen-ast regen-importlib clinic
|
||||
|
||||
############################################################################
|
||||
# Special rules for object files
|
||||
|
@ -843,6 +843,15 @@ regen-token:
|
|||
$(srcdir)/Grammar/Tokens \
|
||||
$(srcdir)/Lib/token.py
|
||||
|
||||
.PHONY: regen-keyword
|
||||
regen-keyword:
|
||||
# Regenerate Lib/keyword.py from Grammar/Grammar and Grammar/Tokens
|
||||
# using Parser/pgen
|
||||
$(PYTHON_FOR_REGEN) -m Parser.pgen.keywordgen $(srcdir)/Grammar/Grammar \
|
||||
$(srcdir)/Grammar/Tokens \
|
||||
$(srcdir)/Lib/keyword.py.new
|
||||
$(UPDATE_FILE) $(srcdir)/Lib/keyword.py $(srcdir)/Lib/keyword.py.new
|
||||
|
||||
.PHONY: regen-symbol
|
||||
regen-symbol: $(srcdir)/Include/graminit.h
|
||||
# Regenerate Lib/symbol.py from Include/graminit.h
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Regenerate :mod:`keyword` from the Grammar and Tokens file using pgen. Patch
|
||||
by Pablo Galindo.
|
|
@ -0,0 +1,60 @@
|
|||
"""Generate Lib/keyword.py from the Grammar and Tokens files using pgen"""
|
||||
|
||||
import argparse
|
||||
|
||||
from .pgen import ParserGenerator
|
||||
|
||||
TEMPLATE = r'''
|
||||
"""Keywords (from "Grammar/Grammar")
|
||||
|
||||
This file is automatically generated; please don't muck it up!
|
||||
|
||||
To update the symbols in this file, 'cd' to the top directory of
|
||||
the python source tree and run:
|
||||
|
||||
python3 -m Parser.pgen.keywordgen Grammar/Grammar \
|
||||
Grammar/Tokens \
|
||||
Lib/keyword.py
|
||||
|
||||
Alternatively, you can run 'make regen-keyword'.
|
||||
"""
|
||||
|
||||
__all__ = ["iskeyword", "kwlist"]
|
||||
|
||||
kwlist = [
|
||||
{keywords}
|
||||
]
|
||||
|
||||
iskeyword = frozenset(kwlist).__contains__
|
||||
'''.lstrip()
|
||||
|
||||
EXTRA_KEYWORDS = ["async", "await"]
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Generate the Lib/keywords.py "
|
||||
"file from the grammar.")
|
||||
parser.add_argument(
|
||||
"grammar", type=str, help="The file with the grammar definition in EBNF format"
|
||||
)
|
||||
parser.add_argument(
|
||||
"tokens", type=str, help="The file with the token definitions"
|
||||
)
|
||||
parser.add_argument(
|
||||
"keyword_file",
|
||||
type=argparse.FileType('w'),
|
||||
help="The path to write the keyword definitions",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
p = ParserGenerator(args.grammar, args.tokens)
|
||||
grammar = p.make_grammar()
|
||||
|
||||
with args.keyword_file as thefile:
|
||||
all_keywords = sorted(list(grammar.keywords) + EXTRA_KEYWORDS)
|
||||
|
||||
keywords = ",\n ".join(map(repr, all_keywords))
|
||||
thefile.write(TEMPLATE.format(keywords=keywords))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue