bpo-40334: use the TOKENS file when checking dangling rules (GH-19849)
This commit is contained in:
parent
6bd99d5f00
commit
7ba08ff7b4
|
@ -17,6 +17,8 @@ from pegen.tokenizer import Tokenizer
|
|||
|
||||
MOD_DIR = pathlib.Path(__file__).resolve().parent
|
||||
|
||||
TokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]]
|
||||
|
||||
|
||||
def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]:
|
||||
flags = sysconfig.get_config_var(compiler_flags)
|
||||
|
@ -112,7 +114,8 @@ def build_parser(
|
|||
return grammar, parser, tokenizer
|
||||
|
||||
|
||||
def generate_token_definitions(tokens: IO[str]) -> Tuple[Dict[str, int], Set[str]]:
|
||||
def generate_token_definitions(tokens: IO[str]) -> TokenDefinitions:
|
||||
all_tokens = {}
|
||||
exact_tokens = {}
|
||||
non_exact_tokens = set()
|
||||
numbers = itertools.count(0)
|
||||
|
@ -129,13 +132,15 @@ def generate_token_definitions(tokens: IO[str]) -> Tuple[Dict[str, int], Set[str
|
|||
if len(pieces) == 1:
|
||||
(token,) = pieces
|
||||
non_exact_tokens.add(token)
|
||||
all_tokens[index] = token
|
||||
elif len(pieces) == 2:
|
||||
_, op = pieces
|
||||
token, op = pieces
|
||||
exact_tokens[op.strip("'")] = index
|
||||
all_tokens[index] = token
|
||||
else:
|
||||
raise ValueError(f"Unexpected line found in Tokens file: {line}")
|
||||
|
||||
return exact_tokens, non_exact_tokens
|
||||
return all_tokens, exact_tokens, non_exact_tokens
|
||||
|
||||
|
||||
def build_c_generator(
|
||||
|
@ -149,10 +154,10 @@ def build_c_generator(
|
|||
skip_actions: bool = False,
|
||||
) -> ParserGenerator:
|
||||
with open(tokens_file, "r") as tok_file:
|
||||
exact_tok, non_exact_tok = generate_token_definitions(tok_file)
|
||||
all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
|
||||
with open(output_file, "w") as file:
|
||||
gen: ParserGenerator = CParserGenerator(
|
||||
grammar, exact_tok, non_exact_tok, file, skip_actions=skip_actions
|
||||
grammar, all_tokens, exact_tok, non_exact_tok, file, skip_actions=skip_actions
|
||||
)
|
||||
gen.generate(grammar_file)
|
||||
|
||||
|
|
|
@ -265,13 +265,14 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
def __init__(
|
||||
self,
|
||||
grammar: grammar.Grammar,
|
||||
tokens: Dict[int, str],
|
||||
exact_tokens: Dict[str, int],
|
||||
non_exact_tokens: Set[str],
|
||||
file: Optional[IO[Text]],
|
||||
debug: bool = False,
|
||||
skip_actions: bool = False,
|
||||
):
|
||||
super().__init__(grammar, file)
|
||||
super().__init__(grammar, tokens, file)
|
||||
self.callmakervisitor: CCallMakerVisitor = CCallMakerVisitor(
|
||||
self, exact_tokens, non_exact_tokens
|
||||
)
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
import contextlib
|
||||
import token
|
||||
from abc import abstractmethod
|
||||
|
||||
from typing import AbstractSet, Dict, IO, Iterator, List, Optional, Set, Text, Tuple
|
||||
|
@ -19,11 +18,12 @@ from pegen.grammar import GrammarError, GrammarVisitor
|
|||
|
||||
|
||||
class RuleCheckingVisitor(GrammarVisitor):
|
||||
def __init__(self, rules: Dict[str, Rule]):
|
||||
def __init__(self, rules: Dict[str, Rule], tokens: Dict[int, str]):
|
||||
self.rules = rules
|
||||
self.tokens = tokens
|
||||
|
||||
def visit_NameLeaf(self, node: NameLeaf) -> None:
|
||||
if node.value not in self.rules and node.value not in token.tok_name.values():
|
||||
if node.value not in self.rules and node.value not in self.tokens.values():
|
||||
# TODO: Add line/col info to (leaf) nodes
|
||||
raise GrammarError(f"Dangling reference to rule {node.value!r}")
|
||||
|
||||
|
@ -32,12 +32,13 @@ class ParserGenerator:
|
|||
|
||||
callmakervisitor: GrammarVisitor
|
||||
|
||||
def __init__(self, grammar: Grammar, file: Optional[IO[Text]]):
|
||||
def __init__(self, grammar: Grammar, tokens: Dict[int, str], file: Optional[IO[Text]]):
|
||||
self.grammar = grammar
|
||||
self.tokens = tokens
|
||||
self.rules = grammar.rules
|
||||
if "trailer" not in grammar.metas and "start" not in self.rules:
|
||||
raise GrammarError("Grammar without a trailer must have a 'start' rule")
|
||||
checker = RuleCheckingVisitor(self.rules)
|
||||
checker = RuleCheckingVisitor(self.rules, self.tokens)
|
||||
for rule in self.rules.values():
|
||||
checker.visit(rule)
|
||||
self.file = file
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import token
|
||||
from typing import Any, Dict, Optional, IO, Text, Tuple
|
||||
|
||||
from pegen.grammar import (
|
||||
|
@ -123,8 +124,13 @@ class PythonCallMakerVisitor(GrammarVisitor):
|
|||
|
||||
|
||||
class PythonParserGenerator(ParserGenerator, GrammarVisitor):
|
||||
def __init__(self, grammar: grammar.Grammar, file: Optional[IO[Text]]):
|
||||
super().__init__(grammar, file)
|
||||
def __init__(
|
||||
self,
|
||||
grammar: grammar.Grammar,
|
||||
file: Optional[IO[Text]],
|
||||
tokens: Dict[int, str] = token.tok_name,
|
||||
):
|
||||
super().__init__(grammar, tokens, file)
|
||||
self.callmakervisitor = PythonCallMakerVisitor(self)
|
||||
|
||||
def generate(self, filename: str) -> None:
|
||||
|
|
|
@ -17,6 +17,7 @@ from pegen.parser import Parser
|
|||
from pegen.python_generator import PythonParserGenerator
|
||||
from pegen.tokenizer import Tokenizer
|
||||
|
||||
ALL_TOKENS = token.tok_name
|
||||
EXACT_TOKENS = token.EXACT_TOKEN_TYPES # type: ignore
|
||||
NON_EXACT_TOKENS = {
|
||||
name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values()
|
||||
|
@ -76,7 +77,7 @@ def import_file(full_name: str, path: str) -> Any:
|
|||
|
||||
def generate_c_parser_source(grammar: Grammar) -> str:
|
||||
out = io.StringIO()
|
||||
genr = CParserGenerator(grammar, EXACT_TOKENS, NON_EXACT_TOKENS, out)
|
||||
genr = CParserGenerator(grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, out)
|
||||
genr.generate("<string>")
|
||||
return out.getvalue()
|
||||
|
||||
|
@ -96,7 +97,9 @@ def generate_parser_c_extension(
|
|||
assert not os.listdir(path)
|
||||
source = path / "parse.c"
|
||||
with open(source, "w") as file:
|
||||
genr = CParserGenerator(grammar, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug)
|
||||
genr = CParserGenerator(
|
||||
grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug
|
||||
)
|
||||
genr.generate("parse.c")
|
||||
compile_c_extension(str(source), build_dir=str(path))
|
||||
|
||||
|
|
Loading…
Reference in New Issue