gh-109287: Desugar inst(X) to op(X); macro(X) = X (#109294)

This makes the internal representation in the code generator simpler: there's a list of ops, and a list of macros, and there's no special-casing needed for ops that aren't macros. (There's now special-casing for ops that are also macros, but that's simpler.)
This commit is contained in:
Guido van Rossum 2023-09-15 08:39:05 -07:00 committed by GitHub
parent 47af188593
commit a7a079798d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 111 additions and 164 deletions

View File

@ -170,6 +170,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
return 2;
case BINARY_OP_ADD_UNICODE:
return 2;
case _BINARY_OP_INPLACE_ADD_UNICODE:
return 2;
case BINARY_OP_INPLACE_ADD_UNICODE:
return 2;
case BINARY_SUBSCR:
@ -430,6 +432,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
return 0;
case _ITER_CHECK_LIST:
return 1;
case _ITER_JUMP_LIST:
return 1;
case _IS_ITER_EXHAUSTED_LIST:
return 1;
case _ITER_NEXT_LIST:
@ -438,6 +442,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
return 1;
case _ITER_CHECK_TUPLE:
return 1;
case _ITER_JUMP_TUPLE:
return 1;
case _IS_ITER_EXHAUSTED_TUPLE:
return 1;
case _ITER_NEXT_TUPLE:
@ -446,6 +452,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
return 1;
case _ITER_CHECK_RANGE:
return 1;
case _ITER_JUMP_RANGE:
return 1;
case _IS_ITER_EXHAUSTED_RANGE:
return 1;
case _ITER_NEXT_RANGE:
@ -702,6 +710,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
return 1;
case BINARY_OP_ADD_UNICODE:
return 1;
case _BINARY_OP_INPLACE_ADD_UNICODE:
return 0;
case BINARY_OP_INPLACE_ADD_UNICODE:
return 0;
case BINARY_SUBSCR:
@ -962,6 +972,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
return 0;
case _ITER_CHECK_LIST:
return 1;
case _ITER_JUMP_LIST:
return 1;
case _IS_ITER_EXHAUSTED_LIST:
return 2;
case _ITER_NEXT_LIST:
@ -970,6 +982,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
return 2;
case _ITER_CHECK_TUPLE:
return 1;
case _ITER_JUMP_TUPLE:
return 1;
case _IS_ITER_EXHAUSTED_TUPLE:
return 2;
case _ITER_NEXT_TUPLE:
@ -978,6 +992,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
return 2;
case _ITER_CHECK_RANGE:
return 1;
case _ITER_JUMP_RANGE:
return 1;
case _IS_ITER_EXHAUSTED_RANGE:
return 2;
case _ITER_NEXT_RANGE:
@ -1905,11 +1921,11 @@ const uint8_t _PyOpcode_Caches[256] = {
[COMPARE_OP] = 1,
[POP_JUMP_IF_FALSE] = 1,
[POP_JUMP_IF_TRUE] = 1,
[POP_JUMP_IF_NONE] = 1,
[POP_JUMP_IF_NOT_NONE] = 1,
[FOR_ITER] = 1,
[CALL] = 3,
[BINARY_OP] = 1,
[POP_JUMP_IF_NONE] = 1,
[POP_JUMP_IF_NOT_NONE] = 1,
[JUMP_BACKWARD] = 1,
};
#endif // NEED_OPCODE_METADATA

View File

@ -94,13 +94,8 @@ class Analyzer:
self.parse_file(filename, instrs_idx)
files = " + ".join(self.input_filenames)
n_instrs = 0
n_ops = 0
for instr in self.instrs.values():
if instr.kind == "op":
n_ops += 1
else:
n_instrs += 1
n_instrs = len(set(self.instrs) & set(self.macros))
n_ops = len(self.instrs) - n_instrs
print(
f"Read {n_instrs} instructions, {n_ops} ops, "
f"{len(self.macros)} macros, {len(self.pseudos)} pseudos, "
@ -145,6 +140,9 @@ class Analyzer:
match thing:
case parsing.InstDef(name=name):
macro: parsing.Macro | None = None
if thing.kind == "inst":
macro = parsing.Macro(name, [parsing.OpName(name)])
if name in self.instrs:
if not thing.override:
raise psr.make_syntax_error(
@ -152,9 +150,12 @@ class Analyzer:
f"previous definition @ {self.instrs[name].inst.context}",
thing_first_token,
)
self.everything[
instrs_idx[name]
] = OverriddenInstructionPlaceHolder(name=name)
placeholder = OverriddenInstructionPlaceHolder(name=name)
self.everything[instrs_idx[name]] = placeholder
if macro is not None:
self.warning(
f"Overriding desugared {macro.name} may not work", thing
)
if name not in self.instrs and thing.override:
raise psr.make_syntax_error(
f"Definition of '{name}' @ {thing.context} is supposed to be "
@ -164,6 +165,9 @@ class Analyzer:
self.instrs[name] = Instruction(thing)
instrs_idx[name] = len(self.everything)
self.everything.append(thing)
if macro is not None:
self.macros[macro.name] = macro
self.everything.append(macro)
case parsing.Macro(name):
self.macros[name] = thing
self.everything.append(thing)
@ -197,9 +201,9 @@ class Analyzer:
for target in targets:
if target_instr := self.instrs.get(target):
target_instr.predicted = True
elif target_macro := self.macro_instrs.get(target):
if target_macro := self.macro_instrs.get(target):
target_macro.predicted = True
else:
if not target_instr and not target_macro:
self.error(
f"Unknown instruction {target!r} predicted in {instr.name!r}",
instr.inst, # TODO: Use better location
@ -263,11 +267,7 @@ class Analyzer:
)
def effect_counts(self, name: str) -> tuple[int, int, int]:
if instr := self.instrs.get(name):
cache = instr.cache_offset
input = len(instr.input_effects)
output = len(instr.output_effects)
elif mac := self.macro_instrs.get(name):
if mac := self.macro_instrs.get(name):
cache = mac.cache_offset
input, output = 0, 0
for part in mac.parts:
@ -407,7 +407,8 @@ class Analyzer:
case parsing.OpName(name):
if name not in self.instrs:
self.error(f"Unknown instruction {name!r}", macro)
components.append(self.instrs[name])
else:
components.append(self.instrs[name])
case parsing.CacheEffect():
components.append(uop)
case _:

View File

@ -160,14 +160,9 @@ class Generator(Analyzer):
pushed: str | None = None
match thing:
case parsing.InstDef():
if thing.kind != "op" or self.instrs[thing.name].is_viable_uop():
instr = self.instrs[thing.name]
popped = effect_str(instr.input_effects)
pushed = effect_str(instr.output_effects)
else:
instr = None
popped = ""
pushed = ""
instr = self.instrs[thing.name]
popped = effect_str(instr.input_effects)
pushed = effect_str(instr.output_effects)
case parsing.Macro():
instr = self.macro_instrs[thing.name]
popped, pushed = stacking.get_stack_effect_info_for_macro(instr)
@ -208,6 +203,8 @@ class Generator(Analyzer):
for thing in self.everything:
if isinstance(thing, OverriddenInstructionPlaceHolder):
continue
if isinstance(thing, parsing.Macro) and thing.name in self.instrs:
continue
instr, popped, pushed = self.get_stack_effect_info(thing)
if instr is not None:
popped_data.append((instr, popped))
@ -255,15 +252,11 @@ class Generator(Analyzer):
ops: list[tuple[bool, str]] = [] # (has_arg, name) for each opcode
instrumented_ops: list[str] = []
specialized_ops = set()
specialized_ops: set[str] = set()
for name, family in self.families.items():
specialized_ops.update(family.members)
for instr in itertools.chain(
[instr for instr in self.instrs.values() if instr.kind != "op"],
self.macro_instrs.values(),
):
assert isinstance(instr, (Instruction, MacroInstruction, PseudoInstruction))
for instr in self.macro_instrs.values():
name = instr.name
if name in specialized_ops:
continue
@ -320,7 +313,7 @@ class Generator(Analyzer):
while opname[next_opcode] is not None:
next_opcode += 1
assert next_opcode < min_internal
assert next_opcode < min_internal, next_opcode
for i, op in enumerate(sorted(specialized_ops)):
map_op(min_internal + i, op)
@ -421,13 +414,12 @@ class Generator(Analyzer):
self.write_provenance_header()
self.out.emit("\n" + textwrap.dedent("""
#ifndef Py_BUILD_CORE
# error "this header requires Py_BUILD_CORE define"
#endif
""").strip())
self.out.emit("\n#include <stdbool.h> // bool")
self.out.emit("")
self.out.emit("#ifndef Py_BUILD_CORE")
self.out.emit('# error "this header requires Py_BUILD_CORE define"')
self.out.emit("#endif")
self.out.emit("")
self.out.emit("#include <stdbool.h> // bool")
self.write_pseudo_instrs()
@ -498,7 +490,10 @@ class Generator(Analyzer):
case parsing.InstDef():
self.write_metadata_for_inst(self.instrs[thing.name])
case parsing.Macro():
self.write_metadata_for_macro(self.macro_instrs[thing.name])
if thing.name not in self.instrs:
self.write_metadata_for_macro(
self.macro_instrs[thing.name]
)
case parsing.Pseudo():
self.write_metadata_for_pseudo(
self.pseudo_instrs[thing.name]
@ -513,35 +508,14 @@ class Generator(Analyzer):
";",
):
# Write macro expansion for each non-pseudo instruction
for thing in self.everything:
match thing:
case OverriddenInstructionPlaceHolder():
pass
case parsing.InstDef(name=name):
instr = self.instrs[name]
# Since an 'op' is not a bytecode, it has no expansion; but 'inst' is
if instr.kind == "inst" and instr.is_viable_uop():
# Construct a dummy Component -- input/output mappings are not used
part = Component(instr, instr.active_caches)
self.write_macro_expansions(
instr.name, [part], instr.cache_offset
)
elif instr.kind == "inst" and variable_used(
instr.inst, "oparg1"
):
assert variable_used(
instr.inst, "oparg2"
), "Half super-instr?"
self.write_super_expansions(instr.name)
case parsing.Macro():
mac = self.macro_instrs[thing.name]
self.write_macro_expansions(
mac.name, mac.parts, mac.cache_offset
)
case parsing.Pseudo():
pass
case _:
assert_never(thing)
for mac in self.macro_instrs.values():
if is_super_instruction(mac):
# Special-case the heck out of super-instructions
self.write_super_expansions(mac.name)
else:
self.write_macro_expansions(
mac.name, mac.parts, mac.cache_offset
)
with self.metadata_item(
"const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE]", "=", ";"
@ -564,19 +538,15 @@ class Generator(Analyzer):
family_member_names: set[str] = set()
for family in self.families.values():
family_member_names.update(family.members)
for instr in self.instrs.values():
if (
instr.name not in family_member_names
and instr.cache_offset > 0
and instr.kind == "inst"
and not instr.name.startswith("INSTRUMENTED_")
):
self.out.emit(f"[{instr.name}] = {instr.cache_offset},")
for mac in self.macro_instrs.values():
if mac.name not in family_member_names and mac.cache_offset > 0:
if (
mac.cache_offset > 0
and mac.name not in family_member_names
and not mac.name.startswith("INSTRUMENTED_")
):
self.out.emit(f"[{mac.name}] = {mac.cache_offset},")
# Irregular case:
self.out.emit('[JUMP_BACKWARD] = 1,')
self.out.emit("[JUMP_BACKWARD] = 1,")
deoptcodes = {}
for name, op in self.opmap.items():
@ -674,7 +644,8 @@ class Generator(Analyzer):
add("_SET_IP")
for instr in self.instrs.values():
if instr.kind == "op":
# Skip ops that are also macros -- those are desugared inst()s
if instr.name not in self.macros:
add(instr.name)
def write_macro_expansions(
@ -693,10 +664,11 @@ class Generator(Analyzer):
# It is sometimes emitted for macros that have a
# manual translation in translate_bytecode_to_trace()
# in Python/optimizer.c.
self.note(
f"Part {part.instr.name} of {name} is not a viable uop",
part.instr.inst,
)
if len(parts) > 1 or part.instr.name != name:
self.note(
f"Part {part.instr.name} of {name} is not a viable uop",
part.instr.inst,
)
return
if not part.active_caches:
if part.instr.name == "_SET_IP":
@ -792,31 +764,26 @@ class Generator(Analyzer):
self.write_provenance_header()
# Write and count instructions of all kinds
n_instrs = 0
n_macros = 0
for thing in self.everything:
match thing:
case OverriddenInstructionPlaceHolder():
self.write_overridden_instr_place_holder(thing)
case parsing.InstDef():
if thing.kind != "op":
n_instrs += 1
self.write_instr(self.instrs[thing.name])
pass
case parsing.Macro():
n_macros += 1
mac = self.macro_instrs[thing.name]
stacking.write_macro_instr(
mac, self.out, self.families.get(mac.name)
)
# self.write_macro(self.macro_instrs[thing.name])
case parsing.Pseudo():
pass
case _:
assert_never(thing)
print(
f"Wrote {n_instrs} instructions and {n_macros} macros "
f"to {output_filename}",
f"Wrote {n_macros} cases to {output_filename}",
file=sys.stderr,
)
@ -824,41 +791,21 @@ class Generator(Analyzer):
self, executor_filename: str, emit_line_directives: bool
) -> None:
"""Generate cases for the Tier 2 interpreter."""
n_instrs = 0
n_uops = 0
with open(executor_filename, "w") as f:
self.out = Formatter(f, 8, emit_line_directives)
self.write_provenance_header()
for thing in self.everything:
match thing:
case OverriddenInstructionPlaceHolder():
# TODO: Is this helpful?
self.write_overridden_instr_place_holder(thing)
case parsing.InstDef():
instr = self.instrs[thing.name]
if instr.is_viable_uop():
if instr.kind == "op":
n_uops += 1
else:
n_instrs += 1
self.out.emit("")
with self.out.block(f"case {thing.name}:"):
stacking.write_single_instr(
instr, self.out, tier=TIER_TWO
)
if instr.check_eval_breaker:
self.out.emit("CHECK_EVAL_BREAKER();")
self.out.emit("break;")
# elif instr.kind != "op":
# print(f"NOTE: {thing.name} is not a viable uop")
case parsing.Macro():
pass
case parsing.Pseudo():
pass
case _:
assert_never(thing)
for instr in self.instrs.values():
if instr.is_viable_uop():
n_uops += 1
self.out.emit("")
with self.out.block(f"case {instr.name}:"):
stacking.write_single_instr(instr, self.out, tier=TIER_TWO)
if instr.check_eval_breaker:
self.out.emit("CHECK_EVAL_BREAKER();")
self.out.emit("break;")
print(
f"Wrote {n_instrs} instructions and {n_uops} ops to {executor_filename}",
f"Wrote {n_uops} cases to {executor_filename}",
file=sys.stderr,
)
@ -869,26 +816,16 @@ class Generator(Analyzer):
with open(abstract_interpreter_filename, "w") as f:
self.out = Formatter(f, 8, emit_line_directives)
self.write_provenance_header()
for thing in self.everything:
match thing:
case OverriddenInstructionPlaceHolder():
pass
case parsing.InstDef():
instr = AbstractInstruction(self.instrs[thing.name].inst)
if (
instr.is_viable_uop()
and instr.name not in SPECIALLY_HANDLED_ABSTRACT_INSTR
):
self.out.emit("")
with self.out.block(f"case {thing.name}:"):
instr.write(self.out, tier=TIER_TWO)
self.out.emit("break;")
case parsing.Macro():
pass
case parsing.Pseudo():
pass
case _:
assert_never(thing)
for instr in self.instrs.values():
instr = AbstractInstruction(instr.inst)
if (
instr.is_viable_uop()
and instr.name not in SPECIALLY_HANDLED_ABSTRACT_INSTR
):
self.out.emit("")
with self.out.block(f"case {instr.name}:"):
instr.write(self.out, tier=TIER_TWO)
self.out.emit("break;")
print(
f"Wrote some stuff to {abstract_interpreter_filename}",
file=sys.stderr,
@ -902,24 +839,17 @@ class Generator(Analyzer):
f"{self.out.comment} TARGET({place_holder.name}) overridden by later definition"
)
def write_instr(self, instr: Instruction) -> None:
name = instr.name
self.out.emit("")
if instr.inst.override:
self.out.emit("{self.out.comment} Override")
with self.out.block(f"TARGET({name})"):
if instr.predicted:
self.out.emit(f"PREDICTED({name});")
self.out.static_assert_family_size(
instr.name, instr.family, instr.cache_offset
)
stacking.write_single_instr(instr, self.out, tier=TIER_ONE)
if not instr.always_exits:
if instr.cache_offset:
self.out.emit(f"next_instr += {instr.cache_offset};")
if instr.check_eval_breaker:
self.out.emit("CHECK_EVAL_BREAKER();")
self.out.emit(f"DISPATCH();")
def is_super_instruction(mac: MacroInstruction) -> bool:
if (
len(mac.parts) == 1
and isinstance(mac.parts[0], Component)
and variable_used(mac.parts[0].instr.inst, "oparg1")
):
assert variable_used(mac.parts[0].instr.inst, "oparg2")
return True
else:
return False
def main() -> None:

View File

@ -53,7 +53,6 @@ class Instruction:
# Parts of the underlying instruction definition
inst: parsing.InstDef
kind: typing.Literal["inst", "op"]
name: str
block: parsing.Block
block_text: list[str] # Block.text, less curlies, less PREDICT() calls
@ -77,7 +76,6 @@ class Instruction:
def __init__(self, inst: parsing.InstDef):
self.inst = inst
self.kind = inst.kind
self.name = inst.name
self.block = inst.block
self.block_text, self.check_eval_breaker, self.block_line = extract_block_text(

View File

@ -376,6 +376,8 @@ def write_macro_instr(
if not parts[-1].instr.always_exits:
if not next_instr_is_set and mac.cache_offset:
out.emit(f"next_instr += {mac.cache_offset};")
if parts[-1].instr.check_eval_breaker:
out.emit("CHECK_EVAL_BREAKER();")
out.emit("DISPATCH();")