GH-120024: Tidy up case generator code a bit. (GH-122780)

This commit is contained in:
Mark Shannon 2024-08-08 10:57:59 +01:00 committed by GitHub
parent 0d9c123d1a
commit 81c739e2dc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 175 additions and 103 deletions

View File

@ -882,7 +882,6 @@
}
}
// _DO_CALL
args = &stack_pointer[-oparg];
self_or_null = maybe_self;
callable = func;
{
@ -3651,7 +3650,6 @@
}
}
// _MONITOR_CALL
args = &stack_pointer[-oparg];
{
int is_meth = !PyStackRef_IsNull(maybe_self);
PyObject *function = PyStackRef_AsPyObjectBorrow(func);
@ -3672,7 +3670,6 @@
if (err) goto error;
}
// _DO_CALL
args = &stack_pointer[-oparg];
self_or_null = maybe_self;
callable = func;
{

View File

@ -62,7 +62,6 @@ class Properties:
return not self.error_with_pop and not self.error_without_pop
SKIP_PROPERTIES = Properties(
escapes=False,
error_with_pop=False,
@ -99,7 +98,6 @@ class Skip:
class Flush:
@property
def properties(self) -> Properties:
return SKIP_PROPERTIES
@ -112,6 +110,7 @@ class Flush:
def size(self) -> int:
return 0
@dataclass
class StackItem:
name: str
@ -133,6 +132,7 @@ class StackItem:
def get_size(self) -> str:
return self.size if self.size else "1"
@dataclass
class StackEffect:
inputs: list[StackItem]
@ -150,6 +150,7 @@ class CacheEntry:
def __str__(self) -> str:
return f"{self.name}/{self.size}"
@dataclass
class Uop:
name: str
@ -163,7 +164,7 @@ class Uop:
_size: int = -1
implicitly_created: bool = False
replicated = 0
replicates : "Uop | None" = None
replicates: "Uop | None" = None
def dump(self, indent: str) -> None:
print(
@ -308,19 +309,26 @@ def override_error(
)
def convert_stack_item(item: parser.StackEffect, replace_op_arg_1: str | None) -> StackItem:
def convert_stack_item(
item: parser.StackEffect, replace_op_arg_1: str | None
) -> StackItem:
cond = item.cond
if replace_op_arg_1 and OPARG_AND_1.match(item.cond):
cond = replace_op_arg_1
return StackItem(
item.name, item.type, cond, item.size
)
return StackItem(item.name, item.type, cond, item.size)
def analyze_stack(op: parser.InstDef | parser.Pseudo, replace_op_arg_1: str | None = None) -> StackEffect:
def analyze_stack(
op: parser.InstDef | parser.Pseudo, replace_op_arg_1: str | None = None
) -> StackEffect:
inputs: list[StackItem] = [
convert_stack_item(i, replace_op_arg_1) for i in op.inputs if isinstance(i, parser.StackEffect)
convert_stack_item(i, replace_op_arg_1)
for i in op.inputs
if isinstance(i, parser.StackEffect)
]
outputs: list[StackItem] = [
convert_stack_item(i, replace_op_arg_1) for i in op.outputs
]
outputs: list[StackItem] = [convert_stack_item(i, replace_op_arg_1) for i in op.outputs]
# Mark variables with matching names at the base of the stack as "peek"
modified = False
for input, output in zip(inputs, outputs):
@ -331,9 +339,11 @@ def analyze_stack(op: parser.InstDef | parser.Pseudo, replace_op_arg_1: str | No
if isinstance(op, parser.InstDef):
output_names = [out.name for out in outputs]
for input in inputs:
if (variable_used(op, input.name) or
variable_used(op, "DECREF_INPUTS") or
(not input.peek and input.name in output_names)):
if (
variable_used(op, input.name)
or variable_used(op, "DECREF_INPUTS")
or (not input.peek and input.name in output_names)
):
input.used = True
for output in outputs:
if variable_used(op, output.name):
@ -359,9 +369,9 @@ def analyze_deferred_refs(node: parser.InstDef) -> dict[lexer.Token, str | None]
def find_assignment_target(idx: int) -> list[lexer.Token]:
"""Find the tokens that make up the left-hand side of an assignment"""
offset = 1
for tkn in reversed(node.block.tokens[:idx-1]):
for tkn in reversed(node.block.tokens[: idx - 1]):
if tkn.kind == "SEMI" or tkn.kind == "LBRACE" or tkn.kind == "RBRACE":
return node.block.tokens[idx-offset:idx-1]
return node.block.tokens[idx - offset : idx - 1]
offset += 1
return []
@ -370,42 +380,54 @@ def analyze_deferred_refs(node: parser.InstDef) -> dict[lexer.Token, str | None]
if tkn.kind != "IDENTIFIER" or tkn.text != "PyStackRef_FromPyObjectNew":
continue
if idx == 0 or node.block.tokens[idx-1].kind != "EQUALS":
if idx == 0 or node.block.tokens[idx - 1].kind != "EQUALS":
raise analysis_error("Expected '=' before PyStackRef_FromPyObjectNew", tkn)
lhs = find_assignment_target(idx)
if len(lhs) == 0:
raise analysis_error("PyStackRef_FromPyObjectNew() must be assigned to an output", tkn)
raise analysis_error(
"PyStackRef_FromPyObjectNew() must be assigned to an output", tkn
)
if lhs[0].kind == "TIMES" or any(t.kind == "ARROW" or t.kind == "LBRACKET" for t in lhs[1:]):
if lhs[0].kind == "TIMES" or any(
t.kind == "ARROW" or t.kind == "LBRACKET" for t in lhs[1:]
):
# Don't handle: *ptr = ..., ptr->field = ..., or ptr[field] = ...
# Assume that they are visible to the GC.
refs[tkn] = None
continue
if len(lhs) != 1 or lhs[0].kind != "IDENTIFIER":
raise analysis_error("PyStackRef_FromPyObjectNew() must be assigned to an output", tkn)
raise analysis_error(
"PyStackRef_FromPyObjectNew() must be assigned to an output", tkn
)
name = lhs[0].text
if not any(var.name == name for var in node.outputs):
raise analysis_error(f"PyStackRef_FromPyObjectNew() must be assigned to an output, not '{name}'", tkn)
raise analysis_error(
f"PyStackRef_FromPyObjectNew() must be assigned to an output, not '{name}'",
tkn,
)
refs[tkn] = name
return refs
def variable_used(node: parser.InstDef, name: str) -> bool:
"""Determine whether a variable with a given name is used in a node."""
return any(
token.kind == "IDENTIFIER" and token.text == name for token in node.block.tokens
)
def oparg_used(node: parser.InstDef) -> bool:
"""Determine whether `oparg` is used in a node."""
return any(
token.kind == "IDENTIFIER" and token.text == "oparg" for token in node.tokens
)
def tier_variable(node: parser.InstDef) -> int | None:
"""Determine whether a tier variable is used in a node."""
for token in node.tokens:
@ -416,6 +438,7 @@ def tier_variable(node: parser.InstDef) -> int | None:
return int(token.text[-1])
return None
def has_error_with_pop(op: parser.InstDef) -> bool:
return (
variable_used(op, "ERROR_IF")
@ -424,6 +447,7 @@ def has_error_with_pop(op: parser.InstDef) -> bool:
or variable_used(op, "resume_with_error")
)
def has_error_without_pop(op: parser.InstDef) -> bool:
return (
variable_used(op, "ERROR_NO_POP")
@ -606,8 +630,10 @@ def stack_effect_only_peeks(instr: parser.InstDef) -> bool:
for s, other in zip(stack_inputs, instr.outputs)
)
OPARG_AND_1 = re.compile("\\(*oparg *& *1")
def effect_depends_on_oparg_1(op: parser.InstDef) -> bool:
for effect in op.inputs:
if isinstance(effect, parser.CacheEffect):
@ -623,6 +649,7 @@ def effect_depends_on_oparg_1(op: parser.InstDef) -> bool:
return True
return False
def compute_properties(op: parser.InstDef) -> Properties:
has_free = (
variable_used(op, "PyCell_New")
@ -667,7 +694,12 @@ def compute_properties(op: parser.InstDef) -> Properties:
)
def make_uop(name: str, op: parser.InstDef, inputs: list[parser.InputEffect], uops: dict[str, Uop]) -> Uop:
def make_uop(
name: str,
op: parser.InstDef,
inputs: list[parser.InputEffect],
uops: dict[str, Uop],
) -> Uop:
result = Uop(
name=name,
context=op.context,
@ -685,7 +717,9 @@ def make_uop(name: str, op: parser.InstDef, inputs: list[parser.InputEffect], uo
properties = compute_properties(op)
if properties.oparg:
# May not need oparg anymore
properties.oparg = any(token.text == "oparg" for token in op.block.tokens)
properties.oparg = any(
token.text == "oparg" for token in op.block.tokens
)
rep = Uop(
name=name_x,
context=op.context,
@ -736,8 +770,10 @@ def add_op(op: parser.InstDef, uops: dict[str, Uop]) -> None:
def add_instruction(
where: lexer.Token, name: str, parts: list[Part],
instructions: dict[str, Instruction]
where: lexer.Token,
name: str,
parts: list[Part],
instructions: dict[str, Instruction],
) -> None:
instructions[name] = Instruction(where, name, parts, None)
@ -781,7 +817,9 @@ def add_macro(
parts.append(Flush())
else:
if part.name not in uops:
raise analysis_error(f"No Uop named {part.name}", macro.tokens[0])
raise analysis_error(
f"No Uop named {part.name}", macro.tokens[0]
)
parts.append(uops[part.name])
case parser.CacheEffect():
parts.append(Skip(part.size))

View File

@ -58,12 +58,13 @@ def emit_to(out: CWriter, tkn_iter: Iterator[Token], end: str) -> None:
parens -= 1
out.emit(tkn)
ReplacementFunctionType = Callable[
[Token, Iterator[Token], Uop, Stack, Instruction | None], None
]
class Emitter:
class Emitter:
out: CWriter
_replacers: dict[str, ReplacementFunctionType]
@ -176,7 +177,6 @@ class Emitter:
else:
self.out.emit(f"PyStackRef_CLOSE({var.name});\n")
def sync_sp(
self,
tkn: Token,
@ -190,7 +190,6 @@ class Emitter:
next(tkn_iter)
stack.flush(self.out)
def check_eval_breaker(
self,
tkn: Token,
@ -227,7 +226,6 @@ class Emitter:
# unused portions of the stack to NULL.
stack.flush_single_var(self.out, target, uop.stack.outputs)
def emit_tokens(
self,
uop: Uop,
@ -248,6 +246,7 @@ class Emitter:
def emit(self, txt: str | Token) -> None:
self.out.emit(txt)
def cflags(p: Properties) -> str:
flags: list[str] = []
if p.oparg:

View File

@ -91,6 +91,7 @@ def emit_stack_effect_function(
def generate_stack_effect_functions(analysis: Analysis, out: CWriter) -> None:
popped_data: list[tuple[str, str]] = []
pushed_data: list[tuple[str, str]] = []
def add(inst: Instruction | PseudoInstruction) -> None:
stack = get_stack_effect(inst)
popped = (-stack.base_offset).to_c()

View File

@ -88,8 +88,8 @@ def emit_default(out: CWriter, uop: Uop) -> None:
else:
out.emit(f"{var.name} = sym_new_not_null(ctx);\n")
class OptimizerEmitter(Emitter):
class OptimizerEmitter(Emitter):
pass
@ -139,7 +139,7 @@ def write_uop(
local = locals[var.name]
else:
local = Local.local(var)
out.emit(stack.push(local))
stack.push(local)
out.start_line()
stack.flush(out, cast_type="_Py_UopsSymbol *", extract_bits=True)
except StackError as ex:
@ -161,8 +161,9 @@ def generate_abstract_interpreter(
out.emit("\n")
base_uop_names = set([uop.name for uop in base.uops.values()])
for abstract_uop_name in abstract.uops:
assert abstract_uop_name in base_uop_names,\
f"All abstract uops should override base uops, but {abstract_uop_name} is not."
assert (
abstract_uop_name in base_uop_names
), f"All abstract uops should override base uops, but {abstract_uop_name} is not."
for uop in base.uops.values():
override: Uop | None = None
@ -192,7 +193,7 @@ def generate_abstract_interpreter(
def generate_tier2_abstract_from_files(
filenames: list[str], outfilename: str, debug: bool=False
filenames: list[str], outfilename: str, debug: bool = False
) -> None:
assert len(filenames) == 2, "Need a base file and an abstract cases file."
base = analyze_files([filenames[0]])
@ -211,7 +212,7 @@ arg_parser.add_argument(
)
arg_parser.add_argument("input", nargs='*', help="Abstract interpreter definition file")
arg_parser.add_argument("input", nargs="*", help="Abstract interpreter definition file")
arg_parser.add_argument(
"base", nargs="*", help="The base instruction definition file(s)"

View File

@ -66,6 +66,7 @@ class Node:
assert context is not None
return context.owner.tokens[context.begin]
@dataclass
class Block(Node):
# This just holds a context which has the list of tokens.
@ -426,7 +427,9 @@ class Parser(PLexer):
raise self.make_syntax_error("Expected {")
if members := self.members():
if self.expect(lx.RBRACE) and self.expect(lx.SEMI):
return Pseudo(tkn.text, inp, outp, flags, members)
return Pseudo(
tkn.text, inp, outp, flags, members
)
return None
def members(self) -> list[str] | None:

View File

@ -18,7 +18,6 @@ from cwriter import CWriter
from typing import TextIO
DEFAULT_OUTPUT = ROOT / "Lib/_opcode_metadata.py"

View File

@ -38,9 +38,9 @@ def var_size(var: StackItem) -> str:
else:
return "1"
@dataclass
class Local:
item: StackItem
cached: bool
in_memory: bool
@ -75,6 +75,7 @@ class Local:
def is_array(self) -> bool:
return self.item.is_array()
@dataclass
class StackOffset:
"The stack offset of the virtual base of the stack from the physical stack pointer"
@ -183,44 +184,37 @@ class Stack:
)
if var.name in UNUSED:
if popped.name not in UNUSED and popped.name in self.defined:
raise StackError(f"Value is declared unused, but is already cached by prior operation")
raise StackError(
f"Value is declared unused, but is already cached by prior operation"
)
return "", popped
if not var.used:
return "", popped
self.defined.add(var.name)
# Always define array variables as it is free, and their offset might have changed
if var.is_array():
return (
f"{var.name} = &stack_pointer[{self.top_offset.to_c()}];\n",
Local.redefinition(var, popped)
)
if not popped.defined:
return (
f"{var.name} = stack_pointer[{self.top_offset.to_c()}];\n",
Local.redefinition(var, popped)
)
else:
if popped.defined:
if popped.name == var.name:
return "", popped
else:
return (
f"{var.name} = {popped.name};\n",
Local.redefinition(var, popped)
)
defn = f"{var.name} = {popped.name};\n"
else:
if var.is_array():
defn = f"{var.name} = &stack_pointer[{self.top_offset.to_c()}];\n"
else:
defn = f"{var.name} = stack_pointer[{self.top_offset.to_c()}];\n"
return defn, Local.redefinition(var, popped)
self.base_offset.pop(var)
if var.name in UNUSED or not var.used:
return "", Local.unused(var)
self.defined.add(var.name)
cast = f"({var.type})" if (not indirect and var.type) else ""
bits = ".bits" if cast and not extract_bits else ""
assign = (
f"{var.name} = {cast}{indirect}stack_pointer[{self.base_offset.to_c()}]{bits};"
)
assign = f"{var.name} = {cast}{indirect}stack_pointer[{self.base_offset.to_c()}]{bits};"
if var.condition:
if var.condition == "1":
assign = f"{assign}\n"
elif var.condition == "0":
return "", Local.unused(var)
return "", Local.unused(var)
else:
assign = f"if ({var.condition}) {{ {assign} }}\n"
else:
@ -228,21 +222,12 @@ class Stack:
in_memory = var.is_array() or var.peek
return assign, Local(var, not var.is_array(), in_memory, True)
def push(self, var: Local) -> str:
def push(self, var: Local) -> None:
self.variables.append(var)
if var.is_array() and not var.defined and var.item.used:
assert var.in_memory
assert not var.cached
c_offset = self.top_offset.to_c()
self.top_offset.push(var.item)
self.top_offset.push(var.item)
if var.item.used:
self.defined.add(var.name)
var.defined = True
return f"{var.name} = &stack_pointer[{c_offset}];\n"
else:
self.top_offset.push(var.item)
if var.item.used:
self.defined.add(var.name)
return ""
def define_output_arrays(self, outputs: list[StackItem]) -> str:
res = []
@ -257,24 +242,38 @@ class Stack:
return "\n".join(res)
@staticmethod
def _do_emit(out: CWriter, var: StackItem, base_offset: StackOffset,
cast_type: str = "uintptr_t", extract_bits: bool = False) -> None:
def _do_emit(
out: CWriter,
var: StackItem,
base_offset: StackOffset,
cast_type: str = "uintptr_t",
extract_bits: bool = False,
) -> None:
cast = f"({cast_type})" if var.type else ""
bits = ".bits" if cast and not extract_bits else ""
if var.condition == "0":
return
if var.condition and var.condition != "1":
out.emit(f"if ({var.condition}) ")
out.emit(
f"stack_pointer[{base_offset.to_c()}]{bits} = {cast}{var.name};\n"
)
out.emit(f"stack_pointer[{base_offset.to_c()}]{bits} = {cast}{var.name};\n")
@staticmethod
def _do_flush(out: CWriter, variables: list[Local], base_offset: StackOffset, top_offset: StackOffset,
cast_type: str = "uintptr_t", extract_bits: bool = False) -> None:
def _do_flush(
out: CWriter,
variables: list[Local],
base_offset: StackOffset,
top_offset: StackOffset,
cast_type: str = "uintptr_t",
extract_bits: bool = False,
) -> None:
out.start_line()
for var in variables:
if var.cached and not var.in_memory and not var.item.peek and not var.name in UNUSED:
if (
var.cached
and not var.in_memory
and not var.item.peek
and not var.name in UNUSED
):
Stack._do_emit(out, var.item, base_offset, cast_type, extract_bits)
base_offset.push(var.item)
if base_offset.to_c() != top_offset.to_c():
@ -286,31 +285,55 @@ class Stack:
out.emit("assert(WITHIN_STACK_BOUNDS());\n")
out.start_line()
def flush_locally(self, out: CWriter, cast_type: str = "uintptr_t", extract_bits: bool = False) -> None:
self._do_flush(out, self.variables[:], self.base_offset.copy(), self.top_offset.copy(), cast_type, extract_bits)
def flush_locally(
self, out: CWriter, cast_type: str = "uintptr_t", extract_bits: bool = False
) -> None:
self._do_flush(
out,
self.variables[:],
self.base_offset.copy(),
self.top_offset.copy(),
cast_type,
extract_bits,
)
def flush(self, out: CWriter, cast_type: str = "uintptr_t", extract_bits: bool = False) -> None:
self._do_flush(out, self.variables, self.base_offset, self.top_offset, cast_type, extract_bits)
def flush(
self, out: CWriter, cast_type: str = "uintptr_t", extract_bits: bool = False
) -> None:
self._do_flush(
out,
self.variables,
self.base_offset,
self.top_offset,
cast_type,
extract_bits,
)
self.variables = []
self.base_offset.clear()
self.top_offset.clear()
def flush_single_var(self, out: CWriter, var_name: str, outputs: list[StackItem],
cast_type: str = "uintptr_t", extract_bits: bool = False) -> None:
def flush_single_var(
self,
out: CWriter,
var_name: str,
outputs: list[StackItem],
cast_type: str = "uintptr_t",
extract_bits: bool = False,
) -> None:
assert any(var.name == var_name for var in outputs)
base_offset = self.base_offset.copy()
top_offset = self.top_offset.copy()
for var in self.variables:
base_offset.push(var.item)
for var in outputs:
if any(var == v.item for v in self.variables):
for output in outputs:
if any(output == v.item for v in self.variables):
# The variable is already on the stack, such as a peeked value
# in the tier1 generator
continue
if var.name == var_name:
Stack._do_emit(out, var, base_offset, cast_type, extract_bits)
base_offset.push(var)
top_offset.push(var)
if output.name == var_name:
Stack._do_emit(out, output, base_offset, cast_type, extract_bits)
base_offset.push(output)
top_offset.push(output)
if base_offset.to_c() != top_offset.to_c():
print("base", base_offset, "top", top_offset)
assert False
@ -324,7 +347,8 @@ class Stack:
def get_stack_effect(inst: Instruction | PseudoInstruction) -> Stack:
stack = Stack()
def stacks(inst : Instruction | PseudoInstruction) -> Iterator[StackEffect]:
def stacks(inst: Instruction | PseudoInstruction) -> Iterator[StackEffect]:
if isinstance(inst, Instruction):
for uop in inst.parts:
if isinstance(uop, Uop):

View File

@ -30,6 +30,7 @@ def write_opcode_targets(analysis: Analysis, out: CWriter) -> None:
out.emit(target)
out.emit("};\n")
arg_parser = argparse.ArgumentParser(
description="Generate the file with dispatch targets.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,

View File

@ -33,6 +33,7 @@ DEFAULT_OUTPUT = ROOT / "Python/generated_cases.c.h"
FOOTER = "#undef TIER_ONE\n"
def declare_variable(var: StackItem, out: CWriter) -> None:
type, null = type_and_null(var)
space = " " if type[-1].isalnum() else ""
@ -61,8 +62,14 @@ def declare_variables(inst: Instruction, out: CWriter) -> None:
required.remove(var.name)
declare_variable(var, out)
def write_uop(
uop: Part, emitter: Emitter, offset: int, stack: Stack, inst: Instruction, braces: bool
uop: Part,
emitter: Emitter,
offset: int,
stack: Stack,
inst: Instruction,
braces: bool,
) -> int:
# out.emit(stack.as_comment() + "\n")
if isinstance(uop, Skip):
@ -123,7 +130,7 @@ def write_uop(
if output.name in uop.deferred_refs.values():
# We've already spilled this when emitting tokens
output.cached = False
emitter.emit(stack.push(output))
stack.push(output)
if braces:
emitter.out.start_line()
emitter.emit("}\n")

View File

@ -19,7 +19,7 @@ from generators_common import (
emit_to,
write_header,
type_and_null,
Emitter
Emitter,
)
from cwriter import CWriter
from typing import TextIO, Iterator
@ -62,7 +62,6 @@ def declare_variables(uop: Uop, out: CWriter) -> None:
class Tier2Emitter(Emitter):
def __init__(self, out: CWriter):
super().__init__(out)
self._replacers["oparg"] = self.oparg
@ -110,10 +109,10 @@ class Tier2Emitter(Emitter):
next(tkn_iter) # Semi colon
self.emit(") {\n")
self.emit("UOP_STAT_INC(uopcode, miss);\n")
self.emit("JUMP_TO_JUMP_TARGET();\n");
self.emit("JUMP_TO_JUMP_TARGET();\n")
self.emit("}\n")
def exit_if( # type: ignore[override]
def exit_if( # type: ignore[override]
self,
tkn: Token,
tkn_iter: Iterator[Token],
@ -150,6 +149,7 @@ class Tier2Emitter(Emitter):
assert one.text == "1"
self.out.emit_at(uop.name[-1], tkn)
def write_uop(uop: Uop, emitter: Emitter, stack: Stack) -> None:
locals: dict[str, Local] = {}
try:
@ -186,7 +186,7 @@ def write_uop(uop: Uop, emitter: Emitter, stack: Stack) -> None:
if output.name in uop.deferred_refs.values():
# We've already spilled this when emitting tokens
output.cached = False
emitter.emit(stack.push(output))
stack.push(output)
except StackError as ex:
raise analysis_error(ex.args[0], uop.body[0]) from None
@ -219,7 +219,9 @@ def generate_tier2(
continue
why_not_viable = uop.why_not_viable()
if why_not_viable is not None:
out.emit(f"/* {uop.name} is not a viable micro-op for tier 2 because it {why_not_viable} */\n\n")
out.emit(
f"/* {uop.name} is not a viable micro-op for tier 2 because it {why_not_viable} */\n\n"
)
continue
out.emit(f"case {uop.name}: {{\n")
declare_variables(uop, out)