gh-109287: Desugar inst(X) to op(X); macro(X) = X (#109294)

This makes the internal representation in the code generator simpler: there's a list of ops, and a list of macros, and there's no special-casing needed for ops that aren't macros. (There's now special-casing for ops that are also macros, but that's simpler.)
This commit is contained in:
Guido van Rossum 2023-09-15 08:39:05 -07:00 committed by GitHub
parent 47af188593
commit a7a079798d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 111 additions and 164 deletions

View File

@ -170,6 +170,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
return 2; return 2;
case BINARY_OP_ADD_UNICODE: case BINARY_OP_ADD_UNICODE:
return 2; return 2;
case _BINARY_OP_INPLACE_ADD_UNICODE:
return 2;
case BINARY_OP_INPLACE_ADD_UNICODE: case BINARY_OP_INPLACE_ADD_UNICODE:
return 2; return 2;
case BINARY_SUBSCR: case BINARY_SUBSCR:
@ -430,6 +432,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
return 0; return 0;
case _ITER_CHECK_LIST: case _ITER_CHECK_LIST:
return 1; return 1;
case _ITER_JUMP_LIST:
return 1;
case _IS_ITER_EXHAUSTED_LIST: case _IS_ITER_EXHAUSTED_LIST:
return 1; return 1;
case _ITER_NEXT_LIST: case _ITER_NEXT_LIST:
@ -438,6 +442,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
return 1; return 1;
case _ITER_CHECK_TUPLE: case _ITER_CHECK_TUPLE:
return 1; return 1;
case _ITER_JUMP_TUPLE:
return 1;
case _IS_ITER_EXHAUSTED_TUPLE: case _IS_ITER_EXHAUSTED_TUPLE:
return 1; return 1;
case _ITER_NEXT_TUPLE: case _ITER_NEXT_TUPLE:
@ -446,6 +452,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
return 1; return 1;
case _ITER_CHECK_RANGE: case _ITER_CHECK_RANGE:
return 1; return 1;
case _ITER_JUMP_RANGE:
return 1;
case _IS_ITER_EXHAUSTED_RANGE: case _IS_ITER_EXHAUSTED_RANGE:
return 1; return 1;
case _ITER_NEXT_RANGE: case _ITER_NEXT_RANGE:
@ -702,6 +710,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
return 1; return 1;
case BINARY_OP_ADD_UNICODE: case BINARY_OP_ADD_UNICODE:
return 1; return 1;
case _BINARY_OP_INPLACE_ADD_UNICODE:
return 0;
case BINARY_OP_INPLACE_ADD_UNICODE: case BINARY_OP_INPLACE_ADD_UNICODE:
return 0; return 0;
case BINARY_SUBSCR: case BINARY_SUBSCR:
@ -962,6 +972,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
return 0; return 0;
case _ITER_CHECK_LIST: case _ITER_CHECK_LIST:
return 1; return 1;
case _ITER_JUMP_LIST:
return 1;
case _IS_ITER_EXHAUSTED_LIST: case _IS_ITER_EXHAUSTED_LIST:
return 2; return 2;
case _ITER_NEXT_LIST: case _ITER_NEXT_LIST:
@ -970,6 +982,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
return 2; return 2;
case _ITER_CHECK_TUPLE: case _ITER_CHECK_TUPLE:
return 1; return 1;
case _ITER_JUMP_TUPLE:
return 1;
case _IS_ITER_EXHAUSTED_TUPLE: case _IS_ITER_EXHAUSTED_TUPLE:
return 2; return 2;
case _ITER_NEXT_TUPLE: case _ITER_NEXT_TUPLE:
@ -978,6 +992,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
return 2; return 2;
case _ITER_CHECK_RANGE: case _ITER_CHECK_RANGE:
return 1; return 1;
case _ITER_JUMP_RANGE:
return 1;
case _IS_ITER_EXHAUSTED_RANGE: case _IS_ITER_EXHAUSTED_RANGE:
return 2; return 2;
case _ITER_NEXT_RANGE: case _ITER_NEXT_RANGE:
@ -1905,11 +1921,11 @@ const uint8_t _PyOpcode_Caches[256] = {
[COMPARE_OP] = 1, [COMPARE_OP] = 1,
[POP_JUMP_IF_FALSE] = 1, [POP_JUMP_IF_FALSE] = 1,
[POP_JUMP_IF_TRUE] = 1, [POP_JUMP_IF_TRUE] = 1,
[POP_JUMP_IF_NONE] = 1,
[POP_JUMP_IF_NOT_NONE] = 1,
[FOR_ITER] = 1, [FOR_ITER] = 1,
[CALL] = 3, [CALL] = 3,
[BINARY_OP] = 1, [BINARY_OP] = 1,
[POP_JUMP_IF_NONE] = 1,
[POP_JUMP_IF_NOT_NONE] = 1,
[JUMP_BACKWARD] = 1, [JUMP_BACKWARD] = 1,
}; };
#endif // NEED_OPCODE_METADATA #endif // NEED_OPCODE_METADATA

View File

@ -94,13 +94,8 @@ class Analyzer:
self.parse_file(filename, instrs_idx) self.parse_file(filename, instrs_idx)
files = " + ".join(self.input_filenames) files = " + ".join(self.input_filenames)
n_instrs = 0 n_instrs = len(set(self.instrs) & set(self.macros))
n_ops = 0 n_ops = len(self.instrs) - n_instrs
for instr in self.instrs.values():
if instr.kind == "op":
n_ops += 1
else:
n_instrs += 1
print( print(
f"Read {n_instrs} instructions, {n_ops} ops, " f"Read {n_instrs} instructions, {n_ops} ops, "
f"{len(self.macros)} macros, {len(self.pseudos)} pseudos, " f"{len(self.macros)} macros, {len(self.pseudos)} pseudos, "
@ -145,6 +140,9 @@ class Analyzer:
match thing: match thing:
case parsing.InstDef(name=name): case parsing.InstDef(name=name):
macro: parsing.Macro | None = None
if thing.kind == "inst":
macro = parsing.Macro(name, [parsing.OpName(name)])
if name in self.instrs: if name in self.instrs:
if not thing.override: if not thing.override:
raise psr.make_syntax_error( raise psr.make_syntax_error(
@ -152,9 +150,12 @@ class Analyzer:
f"previous definition @ {self.instrs[name].inst.context}", f"previous definition @ {self.instrs[name].inst.context}",
thing_first_token, thing_first_token,
) )
self.everything[ placeholder = OverriddenInstructionPlaceHolder(name=name)
instrs_idx[name] self.everything[instrs_idx[name]] = placeholder
] = OverriddenInstructionPlaceHolder(name=name) if macro is not None:
self.warning(
f"Overriding desugared {macro.name} may not work", thing
)
if name not in self.instrs and thing.override: if name not in self.instrs and thing.override:
raise psr.make_syntax_error( raise psr.make_syntax_error(
f"Definition of '{name}' @ {thing.context} is supposed to be " f"Definition of '{name}' @ {thing.context} is supposed to be "
@ -164,6 +165,9 @@ class Analyzer:
self.instrs[name] = Instruction(thing) self.instrs[name] = Instruction(thing)
instrs_idx[name] = len(self.everything) instrs_idx[name] = len(self.everything)
self.everything.append(thing) self.everything.append(thing)
if macro is not None:
self.macros[macro.name] = macro
self.everything.append(macro)
case parsing.Macro(name): case parsing.Macro(name):
self.macros[name] = thing self.macros[name] = thing
self.everything.append(thing) self.everything.append(thing)
@ -197,9 +201,9 @@ class Analyzer:
for target in targets: for target in targets:
if target_instr := self.instrs.get(target): if target_instr := self.instrs.get(target):
target_instr.predicted = True target_instr.predicted = True
elif target_macro := self.macro_instrs.get(target): if target_macro := self.macro_instrs.get(target):
target_macro.predicted = True target_macro.predicted = True
else: if not target_instr and not target_macro:
self.error( self.error(
f"Unknown instruction {target!r} predicted in {instr.name!r}", f"Unknown instruction {target!r} predicted in {instr.name!r}",
instr.inst, # TODO: Use better location instr.inst, # TODO: Use better location
@ -263,11 +267,7 @@ class Analyzer:
) )
def effect_counts(self, name: str) -> tuple[int, int, int]: def effect_counts(self, name: str) -> tuple[int, int, int]:
if instr := self.instrs.get(name): if mac := self.macro_instrs.get(name):
cache = instr.cache_offset
input = len(instr.input_effects)
output = len(instr.output_effects)
elif mac := self.macro_instrs.get(name):
cache = mac.cache_offset cache = mac.cache_offset
input, output = 0, 0 input, output = 0, 0
for part in mac.parts: for part in mac.parts:
@ -407,6 +407,7 @@ class Analyzer:
case parsing.OpName(name): case parsing.OpName(name):
if name not in self.instrs: if name not in self.instrs:
self.error(f"Unknown instruction {name!r}", macro) self.error(f"Unknown instruction {name!r}", macro)
else:
components.append(self.instrs[name]) components.append(self.instrs[name])
case parsing.CacheEffect(): case parsing.CacheEffect():
components.append(uop) components.append(uop)

View File

@ -160,14 +160,9 @@ class Generator(Analyzer):
pushed: str | None = None pushed: str | None = None
match thing: match thing:
case parsing.InstDef(): case parsing.InstDef():
if thing.kind != "op" or self.instrs[thing.name].is_viable_uop():
instr = self.instrs[thing.name] instr = self.instrs[thing.name]
popped = effect_str(instr.input_effects) popped = effect_str(instr.input_effects)
pushed = effect_str(instr.output_effects) pushed = effect_str(instr.output_effects)
else:
instr = None
popped = ""
pushed = ""
case parsing.Macro(): case parsing.Macro():
instr = self.macro_instrs[thing.name] instr = self.macro_instrs[thing.name]
popped, pushed = stacking.get_stack_effect_info_for_macro(instr) popped, pushed = stacking.get_stack_effect_info_for_macro(instr)
@ -208,6 +203,8 @@ class Generator(Analyzer):
for thing in self.everything: for thing in self.everything:
if isinstance(thing, OverriddenInstructionPlaceHolder): if isinstance(thing, OverriddenInstructionPlaceHolder):
continue continue
if isinstance(thing, parsing.Macro) and thing.name in self.instrs:
continue
instr, popped, pushed = self.get_stack_effect_info(thing) instr, popped, pushed = self.get_stack_effect_info(thing)
if instr is not None: if instr is not None:
popped_data.append((instr, popped)) popped_data.append((instr, popped))
@ -255,15 +252,11 @@ class Generator(Analyzer):
ops: list[tuple[bool, str]] = [] # (has_arg, name) for each opcode ops: list[tuple[bool, str]] = [] # (has_arg, name) for each opcode
instrumented_ops: list[str] = [] instrumented_ops: list[str] = []
specialized_ops = set() specialized_ops: set[str] = set()
for name, family in self.families.items(): for name, family in self.families.items():
specialized_ops.update(family.members) specialized_ops.update(family.members)
for instr in itertools.chain( for instr in self.macro_instrs.values():
[instr for instr in self.instrs.values() if instr.kind != "op"],
self.macro_instrs.values(),
):
assert isinstance(instr, (Instruction, MacroInstruction, PseudoInstruction))
name = instr.name name = instr.name
if name in specialized_ops: if name in specialized_ops:
continue continue
@ -320,7 +313,7 @@ class Generator(Analyzer):
while opname[next_opcode] is not None: while opname[next_opcode] is not None:
next_opcode += 1 next_opcode += 1
assert next_opcode < min_internal assert next_opcode < min_internal, next_opcode
for i, op in enumerate(sorted(specialized_ops)): for i, op in enumerate(sorted(specialized_ops)):
map_op(min_internal + i, op) map_op(min_internal + i, op)
@ -421,13 +414,12 @@ class Generator(Analyzer):
self.write_provenance_header() self.write_provenance_header()
self.out.emit("\n" + textwrap.dedent(""" self.out.emit("")
#ifndef Py_BUILD_CORE self.out.emit("#ifndef Py_BUILD_CORE")
# error "this header requires Py_BUILD_CORE define" self.out.emit('# error "this header requires Py_BUILD_CORE define"')
#endif self.out.emit("#endif")
""").strip()) self.out.emit("")
self.out.emit("#include <stdbool.h> // bool")
self.out.emit("\n#include <stdbool.h> // bool")
self.write_pseudo_instrs() self.write_pseudo_instrs()
@ -498,7 +490,10 @@ class Generator(Analyzer):
case parsing.InstDef(): case parsing.InstDef():
self.write_metadata_for_inst(self.instrs[thing.name]) self.write_metadata_for_inst(self.instrs[thing.name])
case parsing.Macro(): case parsing.Macro():
self.write_metadata_for_macro(self.macro_instrs[thing.name]) if thing.name not in self.instrs:
self.write_metadata_for_macro(
self.macro_instrs[thing.name]
)
case parsing.Pseudo(): case parsing.Pseudo():
self.write_metadata_for_pseudo( self.write_metadata_for_pseudo(
self.pseudo_instrs[thing.name] self.pseudo_instrs[thing.name]
@ -513,35 +508,14 @@ class Generator(Analyzer):
";", ";",
): ):
# Write macro expansion for each non-pseudo instruction # Write macro expansion for each non-pseudo instruction
for thing in self.everything: for mac in self.macro_instrs.values():
match thing: if is_super_instruction(mac):
case OverriddenInstructionPlaceHolder(): # Special-case the heck out of super-instructions
pass self.write_super_expansions(mac.name)
case parsing.InstDef(name=name): else:
instr = self.instrs[name]
# Since an 'op' is not a bytecode, it has no expansion; but 'inst' is
if instr.kind == "inst" and instr.is_viable_uop():
# Construct a dummy Component -- input/output mappings are not used
part = Component(instr, instr.active_caches)
self.write_macro_expansions(
instr.name, [part], instr.cache_offset
)
elif instr.kind == "inst" and variable_used(
instr.inst, "oparg1"
):
assert variable_used(
instr.inst, "oparg2"
), "Half super-instr?"
self.write_super_expansions(instr.name)
case parsing.Macro():
mac = self.macro_instrs[thing.name]
self.write_macro_expansions( self.write_macro_expansions(
mac.name, mac.parts, mac.cache_offset mac.name, mac.parts, mac.cache_offset
) )
case parsing.Pseudo():
pass
case _:
assert_never(thing)
with self.metadata_item( with self.metadata_item(
"const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE]", "=", ";" "const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE]", "=", ";"
@ -564,19 +538,15 @@ class Generator(Analyzer):
family_member_names: set[str] = set() family_member_names: set[str] = set()
for family in self.families.values(): for family in self.families.values():
family_member_names.update(family.members) family_member_names.update(family.members)
for instr in self.instrs.values():
if (
instr.name not in family_member_names
and instr.cache_offset > 0
and instr.kind == "inst"
and not instr.name.startswith("INSTRUMENTED_")
):
self.out.emit(f"[{instr.name}] = {instr.cache_offset},")
for mac in self.macro_instrs.values(): for mac in self.macro_instrs.values():
if mac.name not in family_member_names and mac.cache_offset > 0: if (
mac.cache_offset > 0
and mac.name not in family_member_names
and not mac.name.startswith("INSTRUMENTED_")
):
self.out.emit(f"[{mac.name}] = {mac.cache_offset},") self.out.emit(f"[{mac.name}] = {mac.cache_offset},")
# Irregular case: # Irregular case:
self.out.emit('[JUMP_BACKWARD] = 1,') self.out.emit("[JUMP_BACKWARD] = 1,")
deoptcodes = {} deoptcodes = {}
for name, op in self.opmap.items(): for name, op in self.opmap.items():
@ -674,7 +644,8 @@ class Generator(Analyzer):
add("_SET_IP") add("_SET_IP")
for instr in self.instrs.values(): for instr in self.instrs.values():
if instr.kind == "op": # Skip ops that are also macros -- those are desugared inst()s
if instr.name not in self.macros:
add(instr.name) add(instr.name)
def write_macro_expansions( def write_macro_expansions(
@ -693,6 +664,7 @@ class Generator(Analyzer):
# It is sometimes emitted for macros that have a # It is sometimes emitted for macros that have a
# manual translation in translate_bytecode_to_trace() # manual translation in translate_bytecode_to_trace()
# in Python/optimizer.c. # in Python/optimizer.c.
if len(parts) > 1 or part.instr.name != name:
self.note( self.note(
f"Part {part.instr.name} of {name} is not a viable uop", f"Part {part.instr.name} of {name} is not a viable uop",
part.instr.inst, part.instr.inst,
@ -792,31 +764,26 @@ class Generator(Analyzer):
self.write_provenance_header() self.write_provenance_header()
# Write and count instructions of all kinds # Write and count instructions of all kinds
n_instrs = 0
n_macros = 0 n_macros = 0
for thing in self.everything: for thing in self.everything:
match thing: match thing:
case OverriddenInstructionPlaceHolder(): case OverriddenInstructionPlaceHolder():
self.write_overridden_instr_place_holder(thing) self.write_overridden_instr_place_holder(thing)
case parsing.InstDef(): case parsing.InstDef():
if thing.kind != "op": pass
n_instrs += 1
self.write_instr(self.instrs[thing.name])
case parsing.Macro(): case parsing.Macro():
n_macros += 1 n_macros += 1
mac = self.macro_instrs[thing.name] mac = self.macro_instrs[thing.name]
stacking.write_macro_instr( stacking.write_macro_instr(
mac, self.out, self.families.get(mac.name) mac, self.out, self.families.get(mac.name)
) )
# self.write_macro(self.macro_instrs[thing.name])
case parsing.Pseudo(): case parsing.Pseudo():
pass pass
case _: case _:
assert_never(thing) assert_never(thing)
print( print(
f"Wrote {n_instrs} instructions and {n_macros} macros " f"Wrote {n_macros} cases to {output_filename}",
f"to {output_filename}",
file=sys.stderr, file=sys.stderr,
) )
@ -824,41 +791,21 @@ class Generator(Analyzer):
self, executor_filename: str, emit_line_directives: bool self, executor_filename: str, emit_line_directives: bool
) -> None: ) -> None:
"""Generate cases for the Tier 2 interpreter.""" """Generate cases for the Tier 2 interpreter."""
n_instrs = 0
n_uops = 0 n_uops = 0
with open(executor_filename, "w") as f: with open(executor_filename, "w") as f:
self.out = Formatter(f, 8, emit_line_directives) self.out = Formatter(f, 8, emit_line_directives)
self.write_provenance_header() self.write_provenance_header()
for thing in self.everything: for instr in self.instrs.values():
match thing:
case OverriddenInstructionPlaceHolder():
# TODO: Is this helpful?
self.write_overridden_instr_place_holder(thing)
case parsing.InstDef():
instr = self.instrs[thing.name]
if instr.is_viable_uop(): if instr.is_viable_uop():
if instr.kind == "op":
n_uops += 1 n_uops += 1
else:
n_instrs += 1
self.out.emit("") self.out.emit("")
with self.out.block(f"case {thing.name}:"): with self.out.block(f"case {instr.name}:"):
stacking.write_single_instr( stacking.write_single_instr(instr, self.out, tier=TIER_TWO)
instr, self.out, tier=TIER_TWO
)
if instr.check_eval_breaker: if instr.check_eval_breaker:
self.out.emit("CHECK_EVAL_BREAKER();") self.out.emit("CHECK_EVAL_BREAKER();")
self.out.emit("break;") self.out.emit("break;")
# elif instr.kind != "op":
# print(f"NOTE: {thing.name} is not a viable uop")
case parsing.Macro():
pass
case parsing.Pseudo():
pass
case _:
assert_never(thing)
print( print(
f"Wrote {n_instrs} instructions and {n_uops} ops to {executor_filename}", f"Wrote {n_uops} cases to {executor_filename}",
file=sys.stderr, file=sys.stderr,
) )
@ -869,26 +816,16 @@ class Generator(Analyzer):
with open(abstract_interpreter_filename, "w") as f: with open(abstract_interpreter_filename, "w") as f:
self.out = Formatter(f, 8, emit_line_directives) self.out = Formatter(f, 8, emit_line_directives)
self.write_provenance_header() self.write_provenance_header()
for thing in self.everything: for instr in self.instrs.values():
match thing: instr = AbstractInstruction(instr.inst)
case OverriddenInstructionPlaceHolder():
pass
case parsing.InstDef():
instr = AbstractInstruction(self.instrs[thing.name].inst)
if ( if (
instr.is_viable_uop() instr.is_viable_uop()
and instr.name not in SPECIALLY_HANDLED_ABSTRACT_INSTR and instr.name not in SPECIALLY_HANDLED_ABSTRACT_INSTR
): ):
self.out.emit("") self.out.emit("")
with self.out.block(f"case {thing.name}:"): with self.out.block(f"case {instr.name}:"):
instr.write(self.out, tier=TIER_TWO) instr.write(self.out, tier=TIER_TWO)
self.out.emit("break;") self.out.emit("break;")
case parsing.Macro():
pass
case parsing.Pseudo():
pass
case _:
assert_never(thing)
print( print(
f"Wrote some stuff to {abstract_interpreter_filename}", f"Wrote some stuff to {abstract_interpreter_filename}",
file=sys.stderr, file=sys.stderr,
@ -902,24 +839,17 @@ class Generator(Analyzer):
f"{self.out.comment} TARGET({place_holder.name}) overridden by later definition" f"{self.out.comment} TARGET({place_holder.name}) overridden by later definition"
) )
def write_instr(self, instr: Instruction) -> None:
name = instr.name def is_super_instruction(mac: MacroInstruction) -> bool:
self.out.emit("") if (
if instr.inst.override: len(mac.parts) == 1
self.out.emit("{self.out.comment} Override") and isinstance(mac.parts[0], Component)
with self.out.block(f"TARGET({name})"): and variable_used(mac.parts[0].instr.inst, "oparg1")
if instr.predicted: ):
self.out.emit(f"PREDICTED({name});") assert variable_used(mac.parts[0].instr.inst, "oparg2")
self.out.static_assert_family_size( return True
instr.name, instr.family, instr.cache_offset else:
) return False
stacking.write_single_instr(instr, self.out, tier=TIER_ONE)
if not instr.always_exits:
if instr.cache_offset:
self.out.emit(f"next_instr += {instr.cache_offset};")
if instr.check_eval_breaker:
self.out.emit("CHECK_EVAL_BREAKER();")
self.out.emit(f"DISPATCH();")
def main() -> None: def main() -> None:

View File

@ -53,7 +53,6 @@ class Instruction:
# Parts of the underlying instruction definition # Parts of the underlying instruction definition
inst: parsing.InstDef inst: parsing.InstDef
kind: typing.Literal["inst", "op"]
name: str name: str
block: parsing.Block block: parsing.Block
block_text: list[str] # Block.text, less curlies, less PREDICT() calls block_text: list[str] # Block.text, less curlies, less PREDICT() calls
@ -77,7 +76,6 @@ class Instruction:
def __init__(self, inst: parsing.InstDef): def __init__(self, inst: parsing.InstDef):
self.inst = inst self.inst = inst
self.kind = inst.kind
self.name = inst.name self.name = inst.name
self.block = inst.block self.block = inst.block
self.block_text, self.check_eval_breaker, self.block_line = extract_block_text( self.block_text, self.check_eval_breaker, self.block_line = extract_block_text(

View File

@ -376,6 +376,8 @@ def write_macro_instr(
if not parts[-1].instr.always_exits: if not parts[-1].instr.always_exits:
if not next_instr_is_set and mac.cache_offset: if not next_instr_is_set and mac.cache_offset:
out.emit(f"next_instr += {mac.cache_offset};") out.emit(f"next_instr += {mac.cache_offset};")
if parts[-1].instr.check_eval_breaker:
out.emit("CHECK_EVAL_BREAKER();")
out.emit("DISPATCH();") out.emit("DISPATCH();")