gh-106581: Fix two bugs in the code generator's copy optimization (#108380)

I was comparing the last preceding poke with the *last* peek,
rather than the *first* peek.

Unfortunately this bug obscured another bug:
When the last preceding poke is UNUSED, the first peek disappears,
leaving the variable unassigned. This is how I fixed it:

- Rename CopyEffect to CopyItem.
- Change CopyItem to contain StackItems instead of StackEffects.
- Update those StackItems when adjusting the manager higher or lower.
- Assert that those StackItems' offsets are equivalent.
- Other clever things.

---------

Co-authored-by: Irit Katriel <1055913+iritkatriel@users.noreply.github.com>
This commit is contained in:
Guido van Rossum 2023-08-24 12:10:51 -07:00 committed by GitHub
parent c494fb333b
commit 88941d665f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 62 additions and 20 deletions

View File

@ -3833,7 +3833,6 @@
DEOPT_IF(code->co_argcount != oparg + (self_or_null != NULL), CALL);
}
// _CHECK_STACK_SPACE
callable = stack_pointer[-2 - oparg];
{
PyFunctionObject *func = (PyFunctionObject *)callable;
PyCodeObject *code = (PyCodeObject *)func->func_code;
@ -3841,8 +3840,6 @@
}
// _INIT_CALL_PY_EXACT_ARGS
args = stack_pointer - oparg;
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
{
int argcount = oparg;
if (self_or_null != NULL) {

View File

@ -83,6 +83,27 @@ class StackOffset:
terms = self.as_terms()
return make_index(terms)
def equivalent_to(self, other: "StackOffset") -> bool:
if self.deep == other.deep and self.high == other.high:
return True
deep = list(self.deep)
for x in other.deep:
try:
deep.remove(x)
except ValueError:
return False
if deep:
return False
high = list(self.high)
for x in other.high:
try:
high.remove(x)
except ValueError:
return False
if high:
return False
return True
def make_index(terms: list[tuple[str, str]]) -> str:
# Produce an index expression from the terms honoring PEP 8,
@ -131,9 +152,9 @@ class StackItem:
@dataclasses.dataclass
class CopyEffect:
src: StackEffect
dst: StackEffect
class CopyItem:
src: StackItem
dst: StackItem
class EffectManager:
@ -143,7 +164,7 @@ class EffectManager:
active_caches: list[ActiveCacheEffect]
peeks: list[StackItem]
pokes: list[StackItem]
copies: list[CopyEffect] # See merge()
copies: list[CopyItem] # See merge()
# Track offsets from stack pointer
min_offset: StackOffset
final_offset: StackOffset
@ -179,16 +200,18 @@ class EffectManager:
while (
pred.pokes
and self.peeks
and pred.pokes[-1].effect == self.peeks[-1].effect
and pred.pokes[-1].effect == self.peeks[0].effect
):
src = pred.pokes.pop(-1).effect
dst = self.peeks.pop(0).effect
pred.final_offset.deeper(src)
if dst.name != UNUSED:
destinations.add(dst.name)
if dst.name != src.name:
sources.add(src.name)
self.copies.append(CopyEffect(src, dst))
src = pred.pokes.pop(-1)
dst = self.peeks.pop(0)
assert src.offset.equivalent_to(dst.offset), (src, dst)
pred.final_offset.deeper(src.effect)
if dst.effect.name != src.effect.name:
if dst.effect.name != UNUSED:
destinations.add(dst.effect.name)
if src.effect.name != UNUSED:
sources.add(src.effect.name)
self.copies.append(CopyItem(src, dst))
# TODO: Turn this into an error (pass an Analyzer instance?)
assert sources & destinations == set(), (
pred.instr.name,
@ -202,11 +225,27 @@ class EffectManager:
else:
pred = None # Break
# Fix up patterns of copies through UNUSED,
# e.g. cp(a, UNUSED) + cp(UNUSED, b) -> cp(a, b).
if any(copy.src.effect.name == UNUSED for copy in self.copies):
pred = self.pred
while pred is not None:
for copy in self.copies:
if copy.src.effect.name == UNUSED:
for pred_copy in pred.copies:
if pred_copy.dst == copy.src:
copy.src = pred_copy.src
break
pred = pred.pred
def adjust_deeper(self, eff: StackEffect) -> None:
for peek in self.peeks:
peek.offset.deeper(eff)
for poke in self.pokes:
poke.offset.deeper(eff)
for copy in self.copies:
copy.src.offset.deeper(eff)
copy.dst.offset.deeper(eff)
self.min_offset.deeper(eff)
self.final_offset.deeper(eff)
@ -215,6 +254,9 @@ class EffectManager:
peek.offset.higher(eff)
for poke in self.pokes:
poke.offset.higher(eff)
for copy in self.copies:
copy.src.offset.higher(eff)
copy.dst.offset.higher(eff)
self.min_offset.higher(eff)
self.final_offset.higher(eff)
@ -248,8 +290,8 @@ class EffectManager:
vars[eff.name] = eff
for copy in self.copies:
add(copy.src)
add(copy.dst)
add(copy.src.effect)
add(copy.dst.effect)
for peek in self.peeks:
add(peek.effect)
for poke in self.pokes:
@ -365,8 +407,11 @@ def write_components(
out.emit(f"// {mgr.instr.name}")
for copy in mgr.copies:
if copy.src.name != copy.dst.name:
out.assign(copy.dst, copy.src)
copy_src_effect = copy.src.effect
if copy_src_effect.name != copy.dst.effect.name:
if copy_src_effect.name == UNUSED:
copy_src_effect = copy.src.as_stack_effect()
out.assign(copy.dst.effect, copy_src_effect)
for peek in mgr.peeks:
out.assign(
peek.effect,