GH-111485: Factor out opcode ID generator from the main cases generator. (GH-112831)

This commit is contained in:
Mark Shannon 2023-12-08 11:48:30 +00:00 committed by GitHub
parent 15a80b15af
commit aefdebdef1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 203 additions and 72 deletions

9
Include/opcode_ids.h generated
View File

@ -1,6 +1,6 @@
// This file is generated by Tools/cases_generator/generate_cases.py
// This file is generated by Tools/cases_generator/opcode_id_generator.py
// from:
// Python/bytecodes.c
// ['./Python/bytecodes.c']
// Do not edit!
#ifndef Py_OPCODE_IDS_H
@ -55,7 +55,6 @@ extern "C" {
#define UNARY_NEGATIVE 42
#define UNARY_NOT 43
#define WITH_EXCEPT_START 44
#define HAVE_ARGUMENT 45
#define BINARY_OP 45
#define BUILD_CONST_KEY_MAP 46
#define BUILD_LIST 47
@ -200,7 +199,6 @@ extern "C" {
#define UNPACK_SEQUENCE_LIST 216
#define UNPACK_SEQUENCE_TUPLE 217
#define UNPACK_SEQUENCE_TWO_TUPLE 218
#define MIN_INSTRUMENTED_OPCODE 236
#define INSTRUMENTED_RESUME 236
#define INSTRUMENTED_END_FOR 237
#define INSTRUMENTED_END_SEND 238
@ -233,6 +231,9 @@ extern "C" {
#define SETUP_WITH 266
#define STORE_FAST_MAYBE_NULL 267
#define HAVE_ARGUMENT 45
#define MIN_INSTRUMENTED_OPCODE 236
#ifdef __cplusplus
}
#endif

View File

@ -1587,13 +1587,14 @@ regen-cases:
$(PYTHON_FOR_REGEN) \
$(srcdir)/Tools/cases_generator/generate_cases.py \
$(CASESFLAG) \
-n $(srcdir)/Include/opcode_ids.h.new \
-t $(srcdir)/Python/opcode_targets.h.new \
-m $(srcdir)/Include/internal/pycore_opcode_metadata.h.new \
-e $(srcdir)/Python/executor_cases.c.h.new \
-p $(srcdir)/Lib/_opcode_metadata.py.new \
-a $(srcdir)/Python/abstract_interp_cases.c.h.new \
$(srcdir)/Python/bytecodes.c
$(PYTHON_FOR_REGEN) \
$(srcdir)/Tools/cases_generator/opcode_id_generator.py -o $(srcdir)/Include/opcode_ids.h.new $(srcdir)/Python/bytecodes.c
$(PYTHON_FOR_REGEN) \
$(srcdir)/Tools/cases_generator/tier1_generator.py -o $(srcdir)/Python/generated_cases.c.h.new $(srcdir)/Python/bytecodes.c
$(UPDATE_FILE) $(srcdir)/Python/generated_cases.c.h $(srcdir)/Python/generated_cases.c.h.new

View File

@ -48,7 +48,12 @@ class CWriter:
if offset <= self.indents[-1] or offset > 40:
offset = self.indents[-1] + 4
self.indents.append(offset)
elif "{" in txt or is_label(txt):
if is_label(txt):
self.indents.append(self.indents[-1] + 4)
elif "{" in txt:
if 'extern "C"' in txt:
self.indents.append(self.indents[-1])
else:
self.indents.append(self.indents[-1] + 4)
def emit_text(self, txt: str) -> None:

View File

@ -101,13 +101,6 @@ arg_parser.add_argument(
arg_parser.add_argument(
"-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT
)
arg_parser.add_argument(
"-n",
"--opcode_ids_h",
type=str,
help="Header file with opcode number definitions",
default=DEFAULT_OPCODE_IDS_H_OUTPUT,
)
arg_parser.add_argument(
"-t",
"--opcode_targets_h",
@ -334,42 +327,8 @@ class Generator(Analyzer):
self.opmap = opmap
self.markers = markers
def write_opcode_ids(
self, opcode_ids_h_filename: str, opcode_targets_filename: str
) -> None:
"""Write header file that defined the opcode IDs"""
with open(opcode_ids_h_filename, "w") as f:
# Create formatter
self.out = Formatter(f, 0)
self.write_provenance_header()
self.out.emit("")
self.out.emit("#ifndef Py_OPCODE_IDS_H")
self.out.emit("#define Py_OPCODE_IDS_H")
self.out.emit("#ifdef __cplusplus")
self.out.emit('extern "C" {')
self.out.emit("#endif")
self.out.emit("")
self.out.emit("/* Instruction opcodes for compiled code */")
def define(name: str, opcode: int) -> None:
self.out.emit(f"#define {name:<38} {opcode:>3}")
all_pairs: list[tuple[int, int, str]] = []
# the second item in the tuple sorts the markers before the ops
all_pairs.extend((i, 1, name) for (name, i) in self.markers.items())
all_pairs.extend((i, 2, name) for (name, i) in self.opmap.items())
for i, _, name in sorted(all_pairs):
assert name is not None
define(name, i)
self.out.emit("")
self.out.emit("#ifdef __cplusplus")
self.out.emit("}")
self.out.emit("#endif")
self.out.emit("#endif /* !Py_OPCODE_IDS_H */")
def write_opcode_targets(self, opcode_targets_filename: str) -> None:
"""Write header file that defines the jump target table"""
with open(opcode_targets_filename, "w") as f:
# Create formatter
@ -885,7 +844,7 @@ def main() -> None:
# These raise OSError if output can't be written
a.assign_opcode_ids()
a.write_opcode_ids(args.opcode_ids_h, args.opcode_targets_h)
a.write_opcode_targets(args.opcode_targets_h)
a.write_metadata(args.metadata, args.pymetadata)
a.write_executor_instructions(args.executor_cases, args.emit_line_directives)
a.write_abstract_interpreter_instructions(

View File

@ -0,0 +1,19 @@
from pathlib import Path
from typing import TextIO
ROOT = Path(__file__).parent.parent.parent
DEFAULT_INPUT = (ROOT / "Python/bytecodes.c").absolute()
def root_relative_path(filename: str) -> str:
return Path(filename).relative_to(ROOT).as_posix()
def write_header(generator: str, source: str, outfile: TextIO) -> None:
outfile.write(
f"""// This file is generated by {root_relative_path(generator)}
// from:
// {source}
// Do not edit!
"""
)

View File

@ -0,0 +1,153 @@
"""Generate the list of opcode IDs.
Reads the instruction definitions from bytecodes.c.
Writes the IDs to opcode._ids.h by default.
"""
import argparse
import os.path
import sys
from analyzer import (
Analysis,
Instruction,
analyze_files,
)
from generators_common import (
DEFAULT_INPUT,
ROOT,
write_header,
)
from cwriter import CWriter
from typing import TextIO
DEFAULT_OUTPUT = ROOT / "Include/opcode_ids.h"
def generate_opcode_header(filenames: str, analysis: Analysis, outfile: TextIO) -> None:
write_header(__file__, filenames, outfile)
out = CWriter(outfile, 0, False)
out.emit("\n")
instmap: dict[str, int] = {}
# 0 is reserved for cache entries. This helps debugging.
instmap["CACHE"] = 0
# 17 is reserved as it is the initial value for the specializing counter.
# This helps catch cases where we attempt to execute a cache.
instmap["RESERVED"] = 17
# 149 is RESUME - it is hard coded as such in Tools/build/deepfreeze.py
instmap["RESUME"] = 149
instmap["INSTRUMENTED_LINE"] = 254
instrumented = [
name for name in analysis.instructions if name.startswith("INSTRUMENTED")
]
# Special case: this instruction is implemented in ceval.c
# rather than bytecodes.c, so we need to add it explicitly
# here (at least until we add something to bytecodes.c to
# declare external instructions).
instrumented.append("INSTRUMENTED_LINE")
specialized: set[str] = set()
no_arg: list[str] = []
has_arg: list[str] = []
for family in analysis.families.values():
specialized.update(inst.name for inst in family.members)
for inst in analysis.instructions.values():
name = inst.name
if name in specialized:
continue
if name in instrumented:
continue
if inst.properties.oparg:
has_arg.append(name)
else:
no_arg.append(name)
# Specialized ops appear in their own section
# Instrumented opcodes are at the end of the valid range
min_internal = 150
min_instrumented = 254 - (len(instrumented) - 1)
assert min_internal + len(specialized) < min_instrumented
next_opcode = 1
def add_instruction(name: str) -> None:
nonlocal next_opcode
if name in instmap:
return # Pre-defined name
while next_opcode in instmap.values():
next_opcode += 1
instmap[name] = next_opcode
next_opcode += 1
for name in sorted(no_arg):
add_instruction(name)
for name in sorted(has_arg):
add_instruction(name)
# For compatibility
next_opcode = min_internal
for name in sorted(specialized):
add_instruction(name)
next_opcode = min_instrumented
for name in instrumented:
add_instruction(name)
for op, name in enumerate(sorted(analysis.pseudos), 256):
instmap[name] = op
assert 255 not in instmap.values()
out.emit(
"""#ifndef Py_OPCODE_IDS_H
#define Py_OPCODE_IDS_H
#ifdef __cplusplus
extern "C" {
#endif
/* Instruction opcodes for compiled code */
"""
)
def write_define(name: str, op: int) -> None:
out.emit(f"#define {name:<38} {op:>3}\n")
for op, name in sorted([(op, name) for (name, op) in instmap.items()]):
write_define(name, op)
out.emit("\n")
write_define("HAVE_ARGUMENT", len(no_arg))
write_define("MIN_INSTRUMENTED_OPCODE", min_instrumented)
out.emit("\n")
out.emit("#ifdef __cplusplus\n")
out.emit("}\n")
out.emit("#endif\n")
out.emit("#endif /* !Py_OPCODE_IDS_H */\n")
arg_parser = argparse.ArgumentParser(
description="Generate the header file with all opcode IDs.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
arg_parser.add_argument(
"-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT
)
arg_parser.add_argument(
"input", nargs=argparse.REMAINDER, help="Instruction definition file(s)"
)
if __name__ == "__main__":
args = arg_parser.parse_args()
if len(args.input) == 0:
args.input.append(DEFAULT_INPUT)
data = analyze_files(args.input)
with open(args.output, "w") as outfile:
generate_opcode_header(args.input, data, outfile)

View File

@ -17,33 +17,18 @@ from analyzer import (
StackItem,
analysis_error,
)
from generators_common import (
DEFAULT_INPUT,
ROOT,
write_header,
)
from cwriter import CWriter
from typing import TextIO, Iterator
from lexer import Token
from stack import StackOffset
HERE = os.path.dirname(__file__)
ROOT = os.path.join(HERE, "../..")
THIS = os.path.relpath(__file__, ROOT).replace(os.path.sep, "/")
DEFAULT_INPUT = os.path.relpath(os.path.join(ROOT, "Python/bytecodes.c"))
DEFAULT_OUTPUT = os.path.relpath(os.path.join(ROOT, "Python/generated_cases.c.h"))
def write_header(filename: str, outfile: TextIO) -> None:
outfile.write(
f"""// This file is generated by {THIS}
// from:
// {filename}
// Do not edit!
#ifdef TIER_TWO
#error "This file is for Tier 1 only"
#endif
#define TIER_ONE 1
"""
)
DEFAULT_OUTPUT = ROOT / "Python/generated_cases.c.h"
FOOTER = "#undef TIER_ONE\n"
@ -351,7 +336,15 @@ def uses_this(inst: Instruction) -> bool:
def generate_tier1(
filenames: str, analysis: Analysis, outfile: TextIO, lines: bool
) -> None:
write_header(filenames, outfile)
write_header(__file__, filenames, outfile)
outfile.write(
"""
#ifdef TIER_TWO
#error "This file is for Tier 1 only"
#endif
#define TIER_ONE 1
"""
)
out = CWriter(outfile, 2, lines)
out.emit("\n")
for name, inst in sorted(analysis.instructions.items()):