gh-106812: Refactor cases_generator to allow uops with array stack effects (#107564)

Introducing a new file, stacking.py, that takes over several responsibilities related to symbolic evaluation of push/pop operations, with more generality.
This commit is contained in:
Guido van Rossum 2023-08-04 09:35:56 -07:00 committed by GitHub
parent 407d7fda94
commit 400835ea16
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 1798 additions and 1098 deletions

View File

@ -679,9 +679,9 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
case LOAD_GLOBAL:
return ((oparg & 1) ? 1 : 0) + 1;
case LOAD_GLOBAL_MODULE:
return ((oparg & 1) ? 1 : 0) + 1;
return (oparg & 1 ? 1 : 0) + 1;
case LOAD_GLOBAL_BUILTIN:
return ((oparg & 1) ? 1 : 0) + 1;
return (oparg & 1 ? 1 : 0) + 1;
case DELETE_FAST:
return 0;
case MAKE_CELL:
@ -739,7 +739,7 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
case LOAD_METHOD:
return ((oparg & 1) ? 1 : 0) + 1;
case LOAD_ATTR_INSTANCE_VALUE:
return ((oparg & 1) ? 1 : 0) + 1;
return (oparg & 1 ? 1 : 0) + 1;
case LOAD_ATTR_MODULE:
return ((oparg & 1) ? 1 : 0) + 1;
case LOAD_ATTR_WITH_HINT:
@ -944,7 +944,18 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
}
#endif
enum InstructionFormat { INSTR_FMT_IB, INSTR_FMT_IBC, INSTR_FMT_IBC00, INSTR_FMT_IBC000, INSTR_FMT_IBC00000000, INSTR_FMT_IX, INSTR_FMT_IXC, INSTR_FMT_IXC0, INSTR_FMT_IXC00, INSTR_FMT_IXC000 };
enum InstructionFormat {
INSTR_FMT_IB,
INSTR_FMT_IBC,
INSTR_FMT_IBC00,
INSTR_FMT_IBC000,
INSTR_FMT_IBC00000000,
INSTR_FMT_IX,
INSTR_FMT_IXC,
INSTR_FMT_IXC0,
INSTR_FMT_IXC00,
INSTR_FMT_IXC000,
};
#define IS_VALID_OPCODE(OP) \
(((OP) >= 0) && ((OP) < OPCODE_METADATA_SIZE) && \

View File

@ -6,9 +6,9 @@ from test import test_tools
test_tools.skip_if_missing('cases_generator')
with test_tools.imports_under_tool('cases_generator'):
import generate_cases
import analysis
import formatting
import generate_cases
from parsing import StackEffect
@ -46,28 +46,11 @@ class TestEffects(unittest.TestCase):
(2, "(oparg<<1)"),
)
self.assertEqual(
formatting.string_effect_size(
formatting.list_effect_size(input_effects),
), "1 + oparg + oparg*2",
)
self.assertEqual(
formatting.string_effect_size(
formatting.list_effect_size(output_effects),
),
"2 + oparg*4",
)
self.assertEqual(
formatting.string_effect_size(
formatting.list_effect_size(other_effects),
),
"2 + (oparg<<1)",
)
class TestGeneratedCases(unittest.TestCase):
def setUp(self) -> None:
super().setUp()
self.maxDiff = None
self.temp_dir = tempfile.gettempdir()
self.temp_input_filename = os.path.join(self.temp_dir, "input.txt")
@ -140,7 +123,8 @@ class TestGeneratedCases(unittest.TestCase):
"""
output = """
TARGET(OP) {
PyObject *value = stack_pointer[-1];
PyObject *value;
value = stack_pointer[-1];
spam();
STACK_SHRINK(1);
DISPATCH();
@ -173,8 +157,9 @@ class TestGeneratedCases(unittest.TestCase):
"""
output = """
TARGET(OP) {
PyObject *value = stack_pointer[-1];
PyObject *value;
PyObject *res;
value = stack_pointer[-1];
spam();
stack_pointer[-1] = res;
DISPATCH();
@ -190,9 +175,11 @@ class TestGeneratedCases(unittest.TestCase):
"""
output = """
TARGET(OP) {
PyObject *right = stack_pointer[-1];
PyObject *left = stack_pointer[-2];
PyObject *right;
PyObject *left;
PyObject *res;
right = stack_pointer[-1];
left = stack_pointer[-2];
spam();
STACK_SHRINK(1);
stack_pointer[-1] = res;
@ -209,9 +196,11 @@ class TestGeneratedCases(unittest.TestCase):
"""
output = """
TARGET(OP) {
PyObject *right = stack_pointer[-1];
PyObject *left = stack_pointer[-2];
PyObject *right;
PyObject *left;
PyObject *result;
right = stack_pointer[-1];
left = stack_pointer[-2];
spam();
stack_pointer[-1] = result;
DISPATCH();
@ -235,8 +224,9 @@ class TestGeneratedCases(unittest.TestCase):
}
TARGET(OP3) {
PyObject *arg = stack_pointer[-1];
PyObject *arg;
PyObject *res;
arg = stack_pointer[-1];
DEOPT_IF(xxx, OP1);
stack_pointer[-1] = res;
CHECK_EVAL_BREAKER();
@ -281,9 +271,11 @@ class TestGeneratedCases(unittest.TestCase):
"""
output = """
TARGET(OP) {
PyObject *right = stack_pointer[-1];
PyObject *left = stack_pointer[-2];
PyObject *right;
PyObject *left;
PyObject *res;
right = stack_pointer[-1];
left = stack_pointer[-2];
if (cond) goto pop_2_label;
STACK_SHRINK(1);
stack_pointer[-1] = res;
@ -299,7 +291,8 @@ class TestGeneratedCases(unittest.TestCase):
"""
output = """
TARGET(OP) {
PyObject *value = stack_pointer[-1];
PyObject *value;
value = stack_pointer[-1];
uint16_t counter = read_u16(&next_instr[0].cache);
uint32_t extra = read_u32(&next_instr[1].cache);
STACK_SHRINK(1);
@ -338,8 +331,10 @@ class TestGeneratedCases(unittest.TestCase):
"""
output = """
TARGET(OP1) {
PyObject *right = stack_pointer[-1];
PyObject *left = stack_pointer[-2];
PyObject *right;
PyObject *left;
right = stack_pointer[-1];
left = stack_pointer[-2];
uint16_t counter = read_u16(&next_instr[0].cache);
op1(left, right);
next_instr += 1;
@ -347,38 +342,38 @@ class TestGeneratedCases(unittest.TestCase):
}
TARGET(OP) {
PyObject *_tmp_1 = stack_pointer[-1];
PyObject *_tmp_2 = stack_pointer[-2];
PyObject *_tmp_3 = stack_pointer[-3];
static_assert(INLINE_CACHE_ENTRIES_OP == 5, "incorrect cache size");
PyObject *right;
PyObject *left;
PyObject *arg2;
PyObject *res;
// OP1
right = stack_pointer[-1];
left = stack_pointer[-2];
{
PyObject *right = _tmp_1;
PyObject *left = _tmp_2;
uint16_t counter = read_u16(&next_instr[0].cache);
op1(left, right);
_tmp_2 = left;
_tmp_1 = right;
}
// OP2
arg2 = stack_pointer[-3];
{
PyObject *right = _tmp_1;
PyObject *left = _tmp_2;
PyObject *arg2 = _tmp_3;
PyObject *res;
uint32_t extra = read_u32(&next_instr[3].cache);
res = op2(arg2, left, right);
_tmp_3 = res;
}
next_instr += 5;
static_assert(INLINE_CACHE_ENTRIES_OP == 5, "incorrect cache size");
STACK_SHRINK(2);
stack_pointer[-1] = _tmp_3;
stack_pointer[-1] = res;
next_instr += 5;
DISPATCH();
}
TARGET(OP3) {
PyObject *right = stack_pointer[-1];
PyObject *left = stack_pointer[-2];
PyObject *arg2 = stack_pointer[-3];
PyObject *right;
PyObject *left;
PyObject *arg2;
PyObject *res;
right = stack_pointer[-1];
left = stack_pointer[-2];
arg2 = stack_pointer[-3];
res = op3(arg2, left, right);
STACK_SHRINK(2);
stack_pointer[-1] = res;
@ -396,9 +391,12 @@ class TestGeneratedCases(unittest.TestCase):
"""
output = """
TARGET(OP) {
PyObject *above = stack_pointer[-1];
PyObject **values = (stack_pointer - (1 + oparg*2));
PyObject *below = stack_pointer[-(2 + oparg*2)];
PyObject *above;
PyObject **values;
PyObject *below;
above = stack_pointer[-1];
values = stack_pointer - 1 - oparg*2;
below = stack_pointer[-2 - oparg*2];
spam();
STACK_SHRINK(oparg*2);
STACK_SHRINK(2);
@ -416,12 +414,13 @@ class TestGeneratedCases(unittest.TestCase):
output = """
TARGET(OP) {
PyObject *below;
PyObject **values = stack_pointer - (2) + 1;
PyObject **values;
PyObject *above;
values = stack_pointer - 1;
spam(values, oparg);
STACK_GROW(oparg*3);
stack_pointer[-2 - oparg*3] = below;
stack_pointer[-1] = above;
stack_pointer[-(2 + oparg*3)] = below;
DISPATCH();
}
"""
@ -435,8 +434,9 @@ class TestGeneratedCases(unittest.TestCase):
"""
output = """
TARGET(OP) {
PyObject **values = (stack_pointer - oparg);
PyObject **values;
PyObject *above;
values = stack_pointer - oparg;
spam(values, oparg);
STACK_GROW(1);
stack_pointer[-1] = above;
@ -453,8 +453,10 @@ class TestGeneratedCases(unittest.TestCase):
"""
output = """
TARGET(OP) {
PyObject **values = (stack_pointer - oparg);
PyObject *extra = stack_pointer[-(1 + oparg)];
PyObject **values;
PyObject *extra;
values = stack_pointer - oparg;
extra = stack_pointer[-1 - oparg];
if (oparg == 0) { STACK_SHRINK(oparg); goto pop_1_somewhere; }
STACK_SHRINK(oparg);
STACK_SHRINK(1);
@ -471,18 +473,21 @@ class TestGeneratedCases(unittest.TestCase):
"""
output = """
TARGET(OP) {
PyObject *cc = stack_pointer[-1];
PyObject *input = ((oparg & 1) == 1) ? stack_pointer[-(1 + (((oparg & 1) == 1) ? 1 : 0))] : NULL;
PyObject *aa = stack_pointer[-(2 + (((oparg & 1) == 1) ? 1 : 0))];
PyObject *cc;
PyObject *input = NULL;
PyObject *aa;
PyObject *xx;
PyObject *output = NULL;
PyObject *zz;
cc = stack_pointer[-1];
if ((oparg & 1) == 1) { input = stack_pointer[-1 - ((oparg & 1) == 1 ? 1 : 0)]; }
aa = stack_pointer[-2 - ((oparg & 1) == 1 ? 1 : 0)];
output = spam(oparg, input);
STACK_SHRINK((((oparg & 1) == 1) ? 1 : 0));
STACK_GROW(((oparg & 2) ? 1 : 0));
stack_pointer[-2 - (oparg & 2 ? 1 : 0)] = xx;
if (oparg & 2) { stack_pointer[-1 - (oparg & 2 ? 1 : 0)] = output; }
stack_pointer[-1] = zz;
if (oparg & 2) { stack_pointer[-(1 + ((oparg & 2) ? 1 : 0))] = output; }
stack_pointer[-(2 + ((oparg & 2) ? 1 : 0))] = xx;
DISPATCH();
}
"""
@ -500,29 +505,28 @@ class TestGeneratedCases(unittest.TestCase):
"""
output = """
TARGET(M) {
PyObject *_tmp_1 = stack_pointer[-1];
PyObject *_tmp_2 = stack_pointer[-2];
PyObject *_tmp_3 = stack_pointer[-3];
PyObject *right;
PyObject *middle;
PyObject *left;
PyObject *deep;
PyObject *extra = NULL;
PyObject *res;
// A
right = stack_pointer[-1];
middle = stack_pointer[-2];
left = stack_pointer[-3];
{
PyObject *right = _tmp_1;
PyObject *middle = _tmp_2;
PyObject *left = _tmp_3;
# Body of A
}
// B
{
PyObject *deep;
PyObject *extra = NULL;
PyObject *res;
# Body of B
_tmp_3 = deep;
if (oparg) { _tmp_2 = extra; }
_tmp_1 = res;
}
STACK_SHRINK(1);
STACK_GROW((oparg ? 1 : 0));
stack_pointer[-1] = _tmp_1;
if (oparg) { stack_pointer[-2] = _tmp_2; }
stack_pointer[-3] = _tmp_3;
stack_pointer[-2 - (oparg ? 1 : 0)] = deep;
if (oparg) { stack_pointer[-1 - (oparg ? 1 : 0)] = extra; }
stack_pointer[-1] = res;
DISPATCH();
}
"""

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -13,7 +13,6 @@ from instructions import (
MacroParts,
OverriddenInstructionPlaceHolder,
PseudoInstruction,
StackEffectMapping,
)
import parsing
from parsing import StackEffect
@ -34,11 +33,12 @@ class Analyzer:
input_filenames: list[str]
errors: int = 0
warnings: int = 0
def __init__(self, input_filenames: list[str]):
self.input_filenames = input_filenames
def error(self, msg: str, node: parsing.Node) -> None:
def message(self, msg: str, node: parsing.Node) -> None:
lineno = 0
filename = "<unknown file>"
if context := node.context:
@ -49,8 +49,18 @@ class Analyzer:
if token.kind != "COMMENT":
break
print(f"{filename}:{lineno}: {msg}", file=sys.stderr)
def error(self, msg: str, node: parsing.Node) -> None:
self.message("error: " + msg, node)
self.errors += 1
def warning(self, msg: str, node: parsing.Node) -> None:
self.message("warning: " + msg, node)
self.warnings += 1
def note(self, msg: str, node: parsing.Node) -> None:
self.message("note: " + msg, node)
everything: list[
parsing.InstDef
| parsing.Macro
@ -83,8 +93,15 @@ class Analyzer:
self.parse_file(filename, instrs_idx)
files = " + ".join(self.input_filenames)
n_instrs = 0
n_ops = 0
for instr in self.instrs.values():
if instr.kind == "op":
n_ops += 1
else:
n_instrs += 1
print(
f"Read {len(self.instrs)} instructions/ops, "
f"Read {n_instrs} instructions, {n_ops} ops, "
f"{len(self.macros)} macros, {len(self.pseudos)} pseudos, "
f"and {len(self.families)} families from {files}",
file=sys.stderr,
@ -270,14 +287,70 @@ class Analyzer:
self.macro_instrs = {}
self.pseudo_instrs = {}
for name, macro in self.macros.items():
self.macro_instrs[name] = self.analyze_macro(macro)
self.macro_instrs[name] = mac = self.analyze_macro(macro)
self.check_macro_consistency(mac)
for name, pseudo in self.pseudos.items():
self.pseudo_instrs[name] = self.analyze_pseudo(pseudo)
# TODO: Merge with similar code in stacking.py, write_components()
def check_macro_consistency(self, mac: MacroInstruction) -> None:
def get_var_names(instr: Instruction) -> dict[str, StackEffect]:
vars: dict[str, StackEffect] = {}
for eff in instr.input_effects + instr.output_effects:
if eff.name in vars:
if vars[eff.name] != eff:
self.error(
f"Instruction {instr.name!r} has "
f"inconsistent type/cond/size for variable "
f"{eff.name!r}: {vars[eff.name]} vs {eff}",
instr.inst,
)
else:
vars[eff.name] = eff
return vars
all_vars: dict[str, StackEffect] = {}
# print("Checking", mac.name)
prevop: Instruction | None = None
for part in mac.parts:
if not isinstance(part, Component):
continue
vars = get_var_names(part.instr)
# print(" //", part.instr.name, "//", vars)
for name, eff in vars.items():
if name in all_vars:
if all_vars[name] != eff:
self.error(
f"Macro {mac.name!r} has "
f"inconsistent type/cond/size for variable "
f"{name!r}: "
f"{all_vars[name]} vs {eff} in {part.instr.name!r}",
mac.macro,
)
else:
all_vars[name] = eff
if prevop is not None:
pushes = list(prevop.output_effects)
pops = list(reversed(part.instr.input_effects))
copies: list[tuple[StackEffect, StackEffect]] = []
while pushes and pops and pushes[-1] == pops[0]:
src, dst = pushes.pop(), pops.pop(0)
if src.name == dst.name or dst.name is UNUSED:
continue
copies.append((src, dst))
reads = set(copy[0].name for copy in copies)
writes = set(copy[1].name for copy in copies)
if reads & writes:
self.error(
f"Macro {mac.name!r} has conflicting copies "
f"(source of one copy is destination of another): "
f"{reads & writes}",
mac.macro,
)
prevop = part.instr
def analyze_macro(self, macro: parsing.Macro) -> MacroInstruction:
components = self.check_macro_components(macro)
stack, initial_sp = self.stack_analysis(components)
sp = initial_sp
parts: MacroParts = []
flags = InstructionFlags.newEmpty()
offset = 0
@ -287,20 +360,15 @@ class Analyzer:
parts.append(ceffect)
offset += ceffect.size
case Instruction() as instr:
part, sp, offset = self.analyze_instruction(
instr, stack, sp, offset
)
part, offset = self.analyze_instruction(instr, offset)
parts.append(part)
flags.add(instr.instr_flags)
case _:
typing.assert_never(component)
final_sp = sp
format = "IB"
if offset:
format += "C" + "0" * (offset - 1)
return MacroInstruction(
macro.name, stack, initial_sp, final_sp, format, flags, macro, parts, offset
)
return MacroInstruction(macro.name, format, flags, macro, parts, offset)
def analyze_pseudo(self, pseudo: parsing.Pseudo) -> PseudoInstruction:
targets = [self.instrs[target] for target in pseudo.targets]
@ -312,24 +380,15 @@ class Analyzer:
return PseudoInstruction(pseudo.name, targets, fmts[0], targets[0].instr_flags)
def analyze_instruction(
self, instr: Instruction, stack: list[StackEffect], sp: int, offset: int
) -> tuple[Component, int, int]:
input_mapping: StackEffectMapping = []
for ieffect in reversed(instr.input_effects):
sp -= 1
input_mapping.append((stack[sp], ieffect))
output_mapping: StackEffectMapping = []
for oeffect in instr.output_effects:
output_mapping.append((stack[sp], oeffect))
sp += 1
self, instr: Instruction, offset: int
) -> tuple[Component, int]:
active_effects: list[ActiveCacheEffect] = []
for ceffect in instr.cache_effects:
if ceffect.name != UNUSED:
active_effects.append(ActiveCacheEffect(ceffect, offset))
offset += ceffect.size
return (
Component(instr, input_mapping, output_mapping, active_effects),
sp,
Component(instr, active_effects),
offset,
)
@ -348,65 +407,3 @@ class Analyzer:
case _:
typing.assert_never(uop)
return components
def stack_analysis(
self, components: typing.Iterable[InstructionOrCacheEffect]
) -> tuple[list[StackEffect], int]:
"""Analyze a macro.
Ignore cache effects.
Return the list of variables (as StackEffects) and the initial stack pointer.
"""
lowest = current = highest = 0
conditions: dict[int, str] = {} # Indexed by 'current'.
last_instr: Instruction | None = None
for thing in components:
if isinstance(thing, Instruction):
last_instr = thing
for thing in components:
match thing:
case Instruction() as instr:
if any(
eff.size for eff in instr.input_effects + instr.output_effects
):
# TODO: Eventually this will be needed, at least for macros.
self.error(
f"Instruction {instr.name!r} has variable-sized stack effect, "
"which are not supported in macro instructions",
instr.inst, # TODO: Pass name+location of macro
)
if any(eff.cond for eff in instr.input_effects):
self.error(
f"Instruction {instr.name!r} has conditional input stack effect, "
"which are not supported in macro instructions",
instr.inst, # TODO: Pass name+location of macro
)
if (
any(eff.cond for eff in instr.output_effects)
and instr is not last_instr
):
self.error(
f"Instruction {instr.name!r} has conditional output stack effect, "
"but is not the last instruction in a macro",
instr.inst, # TODO: Pass name+location of macro
)
current -= len(instr.input_effects)
lowest = min(lowest, current)
for eff in instr.output_effects:
if eff.cond:
conditions[current] = eff.cond
current += 1
highest = max(highest, current)
case parsing.CacheEffect():
pass
case _:
typing.assert_never(thing)
# At this point, 'current' is the net stack effect,
# and 'lowest' and 'highest' are the extremes.
# Note that 'lowest' may be negative.
stack = [
StackEffect(f"_tmp_{i}", "", conditions.get(highest - i, ""))
for i in reversed(range(1, highest - lowest + 1))
]
return stack, -lowest

View File

@ -49,9 +49,9 @@ class InstructionFlags:
if value:
setattr(self, name, value)
def names(self, value=None):
def names(self, value=None) -> list[str]:
if value is None:
return dataclasses.asdict(self).keys()
return list(dataclasses.asdict(self).keys())
return [n for n, v in dataclasses.asdict(self).items() if v == value]
def bitmap(self) -> int:

View File

@ -2,7 +2,7 @@ import contextlib
import re
import typing
from parsing import StackEffect
from parsing import StackEffect, Family
UNUSED = "unused"
@ -19,8 +19,11 @@ class Formatter:
nominal_filename: str
def __init__(
self, stream: typing.TextIO, indent: int,
emit_line_directives: bool = False, comment: str = "//",
self,
stream: typing.TextIO,
indent: int,
emit_line_directives: bool = False,
comment: str = "//",
) -> None:
self.stream = stream
self.prefix = " " * indent
@ -93,8 +96,11 @@ class Formatter:
typ = f"{dst.type}" if dst.type else "PyObject *"
if src:
cast = self.cast(dst, src)
init = f" = {cast}{src.name}"
elif dst.cond:
initexpr = f"{cast}{src.name}"
if src.cond and src.cond != "1":
initexpr = f"{parenthesize_cond(src.cond)} ? {initexpr} : NULL"
init = f" = {initexpr}"
elif dst.cond and dst.cond != "1":
init = " = NULL"
else:
init = ""
@ -102,10 +108,7 @@ class Formatter:
self.emit(f"{typ}{sepa}{dst.name}{init};")
def assign(self, dst: StackEffect, src: StackEffect):
if src.name == UNUSED:
return
if src.size:
# Don't write sized arrays -- it's up to the user code.
if src.name == UNUSED or dst.name == UNUSED:
return
cast = self.cast(dst, src)
if re.match(r"^REG\(oparg(\d+)\)$", dst.name):
@ -122,6 +125,23 @@ class Formatter:
def cast(self, dst: StackEffect, src: StackEffect) -> str:
return f"({dst.type or 'PyObject *'})" if src.type != dst.type else ""
def static_assert_family_size(
self, name: str, family: Family | None, cache_offset: int
) -> None:
"""Emit a static_assert for the size of a family, if known.
This will fail at compile time if the cache size computed from
the instruction definition does not match the size of the struct
used by specialize.c.
"""
if family and name == family.name:
cache_size = family.size
if cache_size:
self.emit(
f"static_assert({cache_size} == {cache_offset}, "
f'"incorrect cache size");'
)
def prettify_filename(filename: str) -> str:
# Make filename more user-friendly and less platform-specific,
@ -178,11 +198,8 @@ def maybe_parenthesize(sym: str) -> str:
return f"({sym})"
def string_effect_size(arg: tuple[int, str]) -> str:
numeric, symbolic = arg
if numeric and symbolic:
return f"{numeric} + {symbolic}"
elif symbolic:
return symbolic
else:
return str(numeric)
def parenthesize_cond(cond: str) -> str:
"""Parenthesize a condition, but only if it contains ?: itself."""
if "?" in cond:
cond = f"({cond})"
return cond

View File

@ -4,14 +4,14 @@ Writes the cases to generated_cases.c.h, which is #included in ceval.c.
"""
import argparse
import contextlib
import os
import posixpath
import sys
import typing
import stacking # Early import to avoid circular import
from analysis import Analyzer
from formatting import Formatter, list_effect_size, maybe_parenthesize
from formatting import Formatter, list_effect_size
from flags import InstructionFlags, variable_used
from instructions import (
AnyInstruction,
@ -118,41 +118,7 @@ class Generator(Analyzer):
pushed = ""
case parsing.Macro():
instr = self.macro_instrs[thing.name]
parts = [comp for comp in instr.parts if isinstance(comp, Component)]
# Note: stack_analysis() already verifies that macro components
# have no variable-sized stack effects.
low = 0
sp = 0
high = 0
pushed_symbolic: list[str] = []
for comp in parts:
for effect in comp.instr.input_effects:
assert not effect.cond, effect
assert not effect.size, effect
sp -= 1
low = min(low, sp)
for effect in comp.instr.output_effects:
assert not effect.size, effect
if effect.cond:
if effect.cond in ("0", "1"):
pushed_symbolic.append(effect.cond)
else:
pushed_symbolic.append(
maybe_parenthesize(
f"{maybe_parenthesize(effect.cond)} ? 1 : 0"
)
)
sp += 1
high = max(sp, high)
if high != max(0, sp):
# If you get this, intermediate stack growth occurs,
# and stack size calculations may go awry.
# E.g. [push, pop]. The fix would be for stack size
# calculations to use the micro ops.
self.error("Macro has virtual stack growth", thing)
popped = str(-low)
pushed_symbolic.append(str(sp - low - len(pushed_symbolic)))
pushed = " + ".join(pushed_symbolic)
popped, pushed = stacking.get_stack_effect_info_for_macro(instr)
case parsing.Pseudo():
instr = self.pseudo_instrs[thing.name]
popped = pushed = None
@ -258,7 +224,8 @@ class Generator(Analyzer):
case _:
typing.assert_never(thing)
all_formats.add(format)
# Turn it into a list of enum definitions.
# Turn it into a sorted list of enum values.
format_enums = [INSTR_FMT_PREFIX + format for format in sorted(all_formats)]
with open(metadata_filename, "w") as f:
@ -276,8 +243,10 @@ class Generator(Analyzer):
self.write_stack_effect_functions()
# Write type definitions
self.out.emit(f"enum InstructionFormat {{ {', '.join(format_enums)} }};")
# Write the enum definition for instruction formats.
with self.out.block("enum InstructionFormat", ";"):
for enum in format_enums:
self.out.emit(enum + ",")
self.out.emit("")
self.out.emit(
@ -374,7 +343,7 @@ class Generator(Analyzer):
# Since an 'op' is not a bytecode, it has no expansion; but 'inst' is
if instr.kind == "inst" and instr.is_viable_uop():
# Construct a dummy Component -- input/output mappings are not used
part = Component(instr, [], [], instr.active_caches)
part = Component(instr, instr.active_caches)
self.write_macro_expansions(instr.name, [part])
elif instr.kind == "inst" and variable_used(
instr.inst, "oparg1"
@ -468,7 +437,15 @@ class Generator(Analyzer):
if isinstance(part, Component):
# All component instructions must be viable uops
if not part.instr.is_viable_uop():
print(f"NOTE: Part {part.instr.name} of {name} is not a viable uop")
# This note just reminds us about macros that cannot
# be expanded to Tier 2 uops. It is not an error.
# It is sometimes emitted for macros that have a
# manual translation in translate_bytecode_to_trace()
# in Python/optimizer.c.
self.note(
f"Part {part.instr.name} of {name} is not a viable uop",
part.instr.inst,
)
return
if not part.active_caches:
size, offset = OPARG_SIZES["OPARG_FULL"], 0
@ -512,7 +489,7 @@ class Generator(Analyzer):
instr2 = self.instrs[name2]
assert not instr1.active_caches, f"{name1} has active caches"
assert not instr2.active_caches, f"{name2} has active caches"
expansions = [
expansions: list[tuple[str, int, int]] = [
(name1, OPARG_SIZES["OPARG_TOP"], 0),
(name2, OPARG_SIZES["OPARG_BOTTOM"], 0),
]
@ -563,7 +540,6 @@ class Generator(Analyzer):
# Write and count instructions of all kinds
n_instrs = 0
n_macros = 0
n_pseudos = 0
for thing in self.everything:
match thing:
case OverriddenInstructionPlaceHolder():
@ -574,15 +550,17 @@ class Generator(Analyzer):
self.write_instr(self.instrs[thing.name])
case parsing.Macro():
n_macros += 1
self.write_macro(self.macro_instrs[thing.name])
mac = self.macro_instrs[thing.name]
stacking.write_macro_instr(mac, self.out, self.families.get(mac.name))
# self.write_macro(self.macro_instrs[thing.name])
case parsing.Pseudo():
n_pseudos += 1
pass
case _:
typing.assert_never(thing)
print(
f"Wrote {n_instrs} instructions, {n_macros} macros, "
f"and {n_pseudos} pseudos to {output_filename}",
f"Wrote {n_instrs} instructions and {n_macros} macros "
f"to {output_filename}",
file=sys.stderr,
)
@ -590,6 +568,8 @@ class Generator(Analyzer):
self, executor_filename: str, emit_line_directives: bool
) -> None:
"""Generate cases for the Tier 2 interpreter."""
n_instrs = 0
n_uops = 0
with open(executor_filename, "w") as f:
self.out = Formatter(f, 8, emit_line_directives)
self.write_provenance_header()
@ -601,6 +581,10 @@ class Generator(Analyzer):
case parsing.InstDef():
instr = self.instrs[thing.name]
if instr.is_viable_uop():
if instr.kind == "op":
n_uops += 1
else:
n_instrs += 1
self.out.emit("")
with self.out.block(f"case {thing.name}:"):
instr.write(self.out, tier=TIER_TWO)
@ -616,7 +600,7 @@ class Generator(Analyzer):
case _:
typing.assert_never(thing)
print(
f"Wrote some stuff to {executor_filename}",
f"Wrote {n_instrs} instructions and {n_uops} ops to {executor_filename}",
file=sys.stderr,
)
@ -642,69 +626,6 @@ class Generator(Analyzer):
self.out.emit("CHECK_EVAL_BREAKER();")
self.out.emit(f"DISPATCH();")
def write_macro(self, mac: MacroInstruction) -> None:
"""Write code for a macro instruction."""
last_instr: Instruction | None = None
with self.wrap_macro(mac):
cache_adjust = 0
for part in mac.parts:
match part:
case parsing.CacheEffect(size=size):
cache_adjust += size
case Component() as comp:
last_instr = comp.instr
comp.write_body(self.out)
cache_adjust += comp.instr.cache_offset
if cache_adjust:
self.out.emit(f"next_instr += {cache_adjust};")
if (
(family := self.families.get(mac.name))
and mac.name == family.name
and (cache_size := family.size)
):
self.out.emit(
f"static_assert({cache_size} == "
f'{cache_adjust}, "incorrect cache size");'
)
@contextlib.contextmanager
def wrap_macro(self, mac: MacroInstruction):
"""Boilerplate for macro instructions."""
# TODO: Somewhere (where?) make it so that if one instruction
# has an output that is input to another, and the variable names
# and types match and don't conflict with other instructions,
# that variable is declared with the right name and type in the
# outer block, rather than trusting the compiler to optimize it.
self.out.emit("")
with self.out.block(f"TARGET({mac.name})"):
if mac.predicted:
self.out.emit(f"PREDICTED({mac.name});")
# The input effects should have no conditionals.
# Only the output effects do (for now).
ieffects = [
StackEffect(eff.name, eff.type) if eff.cond else eff
for eff in mac.stack
]
for i, var in reversed(list(enumerate(ieffects))):
src = None
if i < mac.initial_sp:
src = StackEffect(f"stack_pointer[-{mac.initial_sp - i}]", "")
self.out.declare(var, src)
yield
self.out.stack_adjust(ieffects[: mac.initial_sp], mac.stack[: mac.final_sp])
for i, var in enumerate(reversed(mac.stack[: mac.final_sp]), 1):
dst = StackEffect(f"stack_pointer[-{i}]", "")
self.out.assign(dst, var)
self.out.emit(f"DISPATCH();")
def main():
"""Parse command line, parse input, analyze, write output."""

View File

@ -2,17 +2,16 @@ import dataclasses
import re
import typing
from flags import InstructionFlags, variable_used_unspecialized
from flags import InstructionFlags, variable_used, variable_used_unspecialized
from formatting import (
Formatter,
UNUSED,
string_effect_size,
list_effect_size,
maybe_parenthesize,
)
import lexer as lx
import parsing
from parsing import StackEffect
import stacking
BITS_PER_CODE_UNIT = 16
@ -61,6 +60,7 @@ class Instruction:
# Computed by constructor
always_exits: bool
has_deopt: bool
cache_offset: int
cache_effects: list[parsing.CacheEffect]
input_effects: list[StackEffect]
@ -83,6 +83,7 @@ class Instruction:
self.block
)
self.always_exits = always_exits(self.block_text)
self.has_deopt = variable_used(self.inst, "DEOPT_IF")
self.cache_effects = [
effect for effect in inst.inputs if isinstance(effect, parsing.CacheEffect)
]
@ -93,7 +94,7 @@ class Instruction:
self.output_effects = inst.outputs # For consistency/completeness
unmoved_names: set[str] = set()
for ieffect, oeffect in zip(self.input_effects, self.output_effects):
if ieffect.name == oeffect.name:
if ieffect == oeffect and ieffect.name == oeffect.name:
unmoved_names.add(ieffect.name)
else:
break
@ -141,84 +142,17 @@ class Instruction:
def write(self, out: Formatter, tier: Tiers = TIER_ONE) -> None:
"""Write one instruction, sans prologue and epilogue."""
# Write a static assertion that a family's cache size is correct
if family := self.family:
if self.name == family.name:
if cache_size := family.size:
out.emit(
f"static_assert({cache_size} == "
f'{self.cache_offset}, "incorrect cache size");'
)
out.static_assert_family_size(self.name, self.family, self.cache_offset)
# Write input stack effect variable declarations and initializations
ieffects = list(reversed(self.input_effects))
for i, ieffect in enumerate(ieffects):
isize = string_effect_size(
list_effect_size([ieff for ieff in ieffects[: i + 1]])
)
if ieffect.size:
src = StackEffect(
f"(stack_pointer - {maybe_parenthesize(isize)})", "PyObject **"
)
elif ieffect.cond:
src = StackEffect(
f"({ieffect.cond}) ? stack_pointer[-{maybe_parenthesize(isize)}] : NULL",
"",
)
else:
src = StackEffect(f"stack_pointer[-{maybe_parenthesize(isize)}]", "")
out.declare(ieffect, src)
# Write output stack effect variable declarations
isize = string_effect_size(list_effect_size(self.input_effects))
input_names = {ieffect.name for ieffect in self.input_effects}
for i, oeffect in enumerate(self.output_effects):
if oeffect.name not in input_names:
if oeffect.size:
osize = string_effect_size(
list_effect_size([oeff for oeff in self.output_effects[:i]])
)
offset = "stack_pointer"
if isize != osize:
if isize != "0":
offset += f" - ({isize})"
if osize != "0":
offset += f" + {osize}"
src = StackEffect(offset, "PyObject **")
out.declare(oeffect, src)
else:
out.declare(oeffect, None)
# out.emit(f"next_instr += OPSIZE({self.inst.name}) - 1;")
self.write_body(out, 0, self.active_caches, tier=tier)
stacking.write_single_instr(self, out, tier)
# Skip the rest if the block always exits
if self.always_exits:
return
# Write net stack growth/shrinkage
out.stack_adjust(
[ieff for ieff in self.input_effects],
[oeff for oeff in self.output_effects],
)
# Write output stack effect assignments
oeffects = list(reversed(self.output_effects))
for i, oeffect in enumerate(oeffects):
if oeffect.name in self.unmoved_names:
continue
osize = string_effect_size(
list_effect_size([oeff for oeff in oeffects[: i + 1]])
)
if oeffect.size:
dst = StackEffect(
f"stack_pointer - {maybe_parenthesize(osize)}", "PyObject **"
)
else:
dst = StackEffect(f"stack_pointer[-{maybe_parenthesize(osize)}]", "")
out.assign(dst, oeffect)
# Write cache effect
if tier == TIER_ONE and self.cache_offset:
out.emit(f"next_instr += {self.cache_offset};")
@ -274,7 +208,12 @@ class Instruction:
# These aren't DECREF'ed so they can stay.
ieffs = list(self.input_effects)
oeffs = list(self.output_effects)
while ieffs and oeffs and ieffs[0] == oeffs[0]:
while (
ieffs
and oeffs
and ieffs[0] == oeffs[0]
and ieffs[0].name == oeffs[0].name
):
ieffs.pop(0)
oeffs.pop(0)
ninputs, symbolic = list_effect_size(ieffs)
@ -307,30 +246,13 @@ class Instruction:
InstructionOrCacheEffect = Instruction | parsing.CacheEffect
StackEffectMapping = list[tuple[StackEffect, StackEffect]]
@dataclasses.dataclass
class Component:
instr: Instruction
input_mapping: StackEffectMapping
output_mapping: StackEffectMapping
active_caches: list[ActiveCacheEffect]
def write_body(self, out: Formatter) -> None:
with out.block(""):
input_names = {ieffect.name for _, ieffect in self.input_mapping}
for var, ieffect in self.input_mapping:
out.declare(ieffect, var)
for _, oeffect in self.output_mapping:
if oeffect.name not in input_names:
out.declare(oeffect, None)
self.instr.write_body(out, -4, self.active_caches)
for var, oeffect in self.output_mapping:
out.assign(var, oeffect)
MacroParts = list[Component | parsing.CacheEffect]
@ -340,9 +262,6 @@ class MacroInstruction:
"""A macro instruction."""
name: str
stack: list[StackEffect]
initial_sp: int
final_sp: int
instr_fmt: str
instr_flags: InstructionFlags
macro: parsing.Macro

View File

@ -69,12 +69,18 @@ class Block(Node):
@dataclass
class StackEffect(Node):
name: str
name: str = field(compare=False) # __eq__ only uses type, cond, size
type: str = "" # Optional `:type`
cond: str = "" # Optional `if (cond)`
size: str = "" # Optional `[size]`
# Note: size cannot be combined with type or cond
def __repr__(self):
items = [self.name, self.type, self.cond, self.size]
while items and items[-1] == "":
del items[-1]
return f"StackEffect({', '.join(repr(item) for item in items)})"
@dataclass
class Expression(Node):
@ -130,6 +136,7 @@ class Family(Node):
size: str # Variable giving the cache size in code units
members: list[str]
@dataclass
class Pseudo(Node):
name: str
@ -154,7 +161,13 @@ class Parser(PLexer):
if hdr := self.inst_header():
if block := self.block():
return InstDef(
hdr.override, hdr.register, hdr.kind, hdr.name, hdr.inputs, hdr.outputs, block
hdr.override,
hdr.register,
hdr.kind,
hdr.name,
hdr.inputs,
hdr.outputs,
block,
)
raise self.make_syntax_error("Expected block")
return None
@ -371,9 +384,7 @@ class Parser(PLexer):
raise self.make_syntax_error("Expected {")
if members := self.members():
if self.expect(lx.RBRACE) and self.expect(lx.SEMI):
return Pseudo(
tkn.text, members
)
return Pseudo(tkn.text, members)
return None
def members(self) -> list[str] | None:

View File

@ -1,4 +1,5 @@
import lexer as lx
Token = lx.Token
@ -64,7 +65,9 @@ class PLexer:
tkn = self.next()
if tkn is not None and tkn.kind == kind:
return tkn
raise self.make_syntax_error(f"Expected {kind!r} but got {tkn and tkn.text!r}", tkn)
raise self.make_syntax_error(
f"Expected {kind!r} but got {tkn and tkn.text!r}", tkn
)
def extract_line(self, lineno: int) -> str:
# Return source line `lineno` (1-based)
@ -73,18 +76,20 @@ class PLexer:
return ""
return lines[lineno - 1]
def make_syntax_error(self, message: str, tkn: Token|None = None) -> SyntaxError:
def make_syntax_error(self, message: str, tkn: Token | None = None) -> SyntaxError:
# Construct a SyntaxError instance from message and token
if tkn is None:
tkn = self.peek()
if tkn is None:
tkn = self.tokens[-1]
return lx.make_syntax_error(message,
self.filename, tkn.line, tkn.column, self.extract_line(tkn.line))
return lx.make_syntax_error(
message, self.filename, tkn.line, tkn.column, self.extract_line(tkn.line)
)
if __name__ == "__main__":
import sys
if sys.argv[1:]:
filename = sys.argv[1]
if filename == "-c" and sys.argv[2:]:

View File

@ -0,0 +1,400 @@
import dataclasses
import typing
from formatting import (
Formatter,
UNUSED,
maybe_parenthesize,
parenthesize_cond,
)
from instructions import (
ActiveCacheEffect,
Instruction,
MacroInstruction,
Component,
Tiers,
TIER_ONE,
)
from parsing import StackEffect, CacheEffect, Family
@dataclasses.dataclass
class StackOffset:
"""Represent the stack offset for a PEEK or POKE.
- At stack_pointer[0], deep and high are both empty.
(Note that that is an invalid stack reference.)
- Below stack top, only deep is non-empty.
- Above stack top, only high is non-empty.
- In complex cases, both deep and high may be non-empty.
All this would be much simpler if all stack entries were the same
size, but with conditional and array effects, they aren't.
The offsets are each represented by a list of StackEffect objects.
The name in the StackEffects is unused.
"""
deep: list[StackEffect] = dataclasses.field(default_factory=list)
high: list[StackEffect] = dataclasses.field(default_factory=list)
def clone(self) -> "StackOffset":
return StackOffset(list(self.deep), list(self.high))
def negate(self) -> "StackOffset":
return StackOffset(list(self.high), list(self.deep))
def deeper(self, eff: StackEffect) -> None:
if eff in self.high:
self.high.remove(eff)
else:
self.deep.append(eff)
def higher(self, eff: StackEffect) -> None:
if eff in self.deep:
self.deep.remove(eff)
else:
self.high.append(eff)
def as_terms(self) -> list[tuple[str, str]]:
num = 0
terms: list[tuple[str, str]] = []
for eff in self.deep:
if eff.size:
terms.append(("-", maybe_parenthesize(eff.size)))
elif eff.cond and eff.cond != "1":
terms.append(("-", f"({parenthesize_cond(eff.cond)} ? 1 : 0)"))
elif eff.cond != "0":
num -= 1
for eff in self.high:
if eff.size:
terms.append(("+", maybe_parenthesize(eff.size)))
elif eff.cond and eff.cond != "1":
terms.append(("+", f"({parenthesize_cond(eff.cond)} ? 1 : 0)"))
elif eff.cond != "0":
num += 1
if num < 0:
terms.insert(0, ("-", str(-num)))
elif num > 0:
terms.append(("+", str(num)))
return terms
def as_index(self) -> str:
terms = self.as_terms()
return make_index(terms)
def make_index(terms: list[tuple[str, str]]) -> str:
# Produce an index expression from the terms honoring PEP 8,
# surrounding binary ops with spaces but not unary minus
index = ""
for sign, term in terms:
if index:
index += f" {sign} {term}"
elif sign == "+":
index = term
else:
index = sign + term
return index or "0"
@dataclasses.dataclass
class StackItem:
offset: StackOffset
effect: StackEffect
def as_variable(self, lax: bool = False) -> str:
"""Return e.g. stack_pointer[-1]."""
terms = self.offset.as_terms()
if self.effect.size:
terms.insert(0, ("+", "stack_pointer"))
index = make_index(terms)
if self.effect.size:
res = index
else:
res = f"stack_pointer[{index}]"
if not lax:
# Check that we're not reading or writing above stack top.
# Skip this for output variable initialization (lax=True).
assert (
self.effect in self.offset.deep and not self.offset.high
), f"Push or pop above current stack level: {res}"
return res
@dataclasses.dataclass
class CopyEffect:
src: StackEffect
dst: StackEffect
class EffectManager:
"""Manage stack effects and offsets for an instruction."""
instr: Instruction
active_caches: list[ActiveCacheEffect]
peeks: list[StackItem]
pokes: list[StackItem]
copies: list[CopyEffect] # See merge()
# Track offsets from stack pointer
min_offset: StackOffset
final_offset: StackOffset
def __init__(
self,
instr: Instruction,
active_caches: list[ActiveCacheEffect],
pred: "EffectManager | None" = None,
):
self.instr = instr
self.active_caches = active_caches
self.peeks = []
self.pokes = []
self.copies = []
self.final_offset = pred.final_offset.clone() if pred else StackOffset()
for eff in reversed(instr.input_effects):
self.final_offset.deeper(eff)
self.peeks.append(StackItem(offset=self.final_offset.clone(), effect=eff))
self.min_offset = self.final_offset.clone()
for eff in instr.output_effects:
self.pokes.append(StackItem(offset=self.final_offset.clone(), effect=eff))
self.final_offset.higher(eff)
if pred:
# Replace push(x) + pop(y) with copy(x, y).
# Check that the sources and destinations are disjoint.
sources: set[str] = set()
destinations: set[str] = set()
while (
pred.pokes
and self.peeks
and pred.pokes[-1].effect == self.peeks[-1].effect
):
src = pred.pokes.pop(-1).effect
dst = self.peeks.pop(0).effect
pred.final_offset.deeper(src)
if dst.name != UNUSED:
destinations.add(dst.name)
if dst.name != src.name:
sources.add(src.name)
self.copies.append(CopyEffect(src, dst))
# TODO: Turn this into an error (pass an Analyzer instance?)
assert sources & destinations == set(), (
pred.instr.name,
self.instr.name,
sources,
destinations,
)
def adjust_deeper(self, eff: StackEffect) -> None:
for peek in self.peeks:
peek.offset.deeper(eff)
for poke in self.pokes:
poke.offset.deeper(eff)
self.min_offset.deeper(eff)
self.final_offset.deeper(eff)
def adjust_higher(self, eff: StackEffect) -> None:
for peek in self.peeks:
peek.offset.higher(eff)
for poke in self.pokes:
poke.offset.higher(eff)
self.min_offset.higher(eff)
self.final_offset.higher(eff)
def adjust(self, offset: StackOffset) -> None:
for down in offset.deep:
self.adjust_deeper(down)
for up in offset.high:
self.adjust_higher(up)
def adjust_inverse(self, offset: StackOffset) -> None:
for down in offset.deep:
self.adjust_higher(down)
for up in offset.high:
self.adjust_deeper(up)
def collect_vars(self) -> dict[str, StackEffect]:
"""Collect all variables, skipping unused ones."""
vars: dict[str, StackEffect] = {}
def add(eff: StackEffect) -> None:
if eff.name != UNUSED:
if eff.name in vars:
# TODO: Make this an error
assert vars[eff.name] == eff, (
self.instr.name,
eff.name,
vars[eff.name],
eff,
)
else:
vars[eff.name] = eff
for copy in self.copies:
add(copy.src)
add(copy.dst)
for peek in self.peeks:
add(peek.effect)
for poke in self.pokes:
add(poke.effect)
return vars
def less_than(a: StackOffset, b: StackOffset) -> bool:
# TODO: Handle more cases
if a.high != b.high:
return False
return a.deep[: len(b.deep)] == b.deep
def get_managers(parts: list[Component]) -> list[EffectManager]:
managers: list[EffectManager] = []
pred: EffectManager | None = None
for part in parts:
mgr = EffectManager(part.instr, part.active_caches, pred)
managers.append(mgr)
pred = mgr
return managers
def get_stack_effect_info_for_macro(mac: MacroInstruction) -> tuple[str, str]:
"""Get the stack effect info for a macro instruction.
Returns a tuple (popped, pushed) where each is a string giving a
symbolic expression for the number of values popped/pushed.
"""
parts = [part for part in mac.parts if isinstance(part, Component)]
managers = get_managers(parts)
popped = StackOffset()
for mgr in managers:
if less_than(mgr.min_offset, popped):
popped = mgr.min_offset.clone()
# Compute pushed = final - popped
pushed = managers[-1].final_offset.clone()
for effect in popped.deep:
pushed.higher(effect)
for effect in popped.high:
pushed.deeper(effect)
return popped.negate().as_index(), pushed.as_index()
def write_single_instr(
instr: Instruction, out: Formatter, tier: Tiers = TIER_ONE
) -> None:
try:
write_components(
[Component(instr, instr.active_caches)],
out,
tier,
)
except AssertionError as err:
raise AssertionError(f"Error writing instruction {instr.name}") from err
def write_macro_instr(
mac: MacroInstruction, out: Formatter, family: Family | None
) -> None:
parts = [part for part in mac.parts if isinstance(part, Component)]
cache_adjust = 0
for part in mac.parts:
match part:
case CacheEffect(size=size):
cache_adjust += size
case Component(instr=instr):
cache_adjust += instr.cache_offset
case _:
typing.assert_never(part)
out.emit("")
with out.block(f"TARGET({mac.name})"):
if mac.predicted:
out.emit(f"PREDICTED({mac.name});")
out.static_assert_family_size(mac.name, family, cache_adjust)
try:
write_components(parts, out, TIER_ONE)
except AssertionError as err:
raise AssertionError(f"Error writing macro {mac.name}") from err
if cache_adjust:
out.emit(f"next_instr += {cache_adjust};")
out.emit("DISPATCH();")
def write_components(
parts: list[Component],
out: Formatter,
tier: Tiers,
) -> None:
managers = get_managers(parts)
all_vars: dict[str, StackEffect] = {}
for mgr in managers:
for name, eff in mgr.collect_vars().items():
if name in all_vars:
# TODO: Turn this into an error -- variable conflict
assert all_vars[name] == eff, (
name,
mgr.instr.name,
all_vars[name],
eff,
)
else:
all_vars[name] = eff
# Declare all variables
for name, eff in all_vars.items():
out.declare(eff, None)
for mgr in managers:
if len(parts) > 1:
out.emit(f"// {mgr.instr.name}")
for copy in mgr.copies:
if copy.src.name != copy.dst.name:
out.assign(copy.dst, copy.src)
for peek in mgr.peeks:
out.assign(
peek.effect,
StackEffect(
peek.as_variable(),
peek.effect.type,
peek.effect.cond,
peek.effect.size,
),
)
# Initialize array outputs
for poke in mgr.pokes:
if poke.effect.size and poke.effect.name not in mgr.instr.unmoved_names:
out.assign(
poke.effect,
StackEffect(
poke.as_variable(lax=True),
poke.effect.type,
poke.effect.cond,
poke.effect.size,
),
)
if len(parts) == 1:
mgr.instr.write_body(out, 0, mgr.active_caches, tier)
else:
with out.block(""):
mgr.instr.write_body(out, -4, mgr.active_caches, tier)
if mgr is managers[-1]:
out.stack_adjust(mgr.final_offset.deep, mgr.final_offset.high)
# Use clone() since adjust_inverse() mutates final_offset
mgr.adjust_inverse(mgr.final_offset.clone())
for poke in mgr.pokes:
if not poke.effect.size and poke.effect.name not in mgr.instr.unmoved_names:
out.assign(
StackEffect(
poke.as_variable(),
poke.effect.type,
poke.effect.cond,
poke.effect.size,
),
poke.effect,
)