GH-95150: Use position and exception tables for code hashing and equality (GH-95509)

This commit is contained in:
Brandt Bucher 2022-08-01 11:02:56 -07:00 committed by GitHub
parent a95e60db74
commit c7e5bbaee8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 68 additions and 5 deletions

View File

@ -428,6 +428,27 @@ class CodeTest(unittest.TestCase):
self.assertIsNone(line)
self.assertEqual(end_line, new_code.co_firstlineno + 1)
def test_code_equality(self):
def f():
try:
a()
except:
b()
else:
c()
finally:
d()
code_a = f.__code__
code_b = code_a.replace(co_linetable=b"")
code_c = code_a.replace(co_exceptiontable=b"")
code_d = code_b.replace(co_exceptiontable=b"")
self.assertNotEqual(code_a, code_b)
self.assertNotEqual(code_a, code_c)
self.assertNotEqual(code_a, code_d)
self.assertNotEqual(code_b, code_c)
self.assertNotEqual(code_b, code_d)
self.assertNotEqual(code_c, code_d)
def isinterned(s):
return s is sys.intern(('_' + s + '_')[1:-1])

View File

@ -615,7 +615,7 @@ if 1:
exec(code, ns)
f1 = ns['f1']
f2 = ns['f2']
self.assertIs(f1.__code__, f2.__code__)
self.assertIs(f1.__code__.co_consts, f2.__code__.co_consts)
self.check_constant(f1, const)
self.assertEqual(repr(f1()), repr(const))
@ -628,7 +628,7 @@ if 1:
# Note: "lambda: ..." emits "LOAD_CONST Ellipsis",
# whereas "lambda: Ellipsis" emits "LOAD_GLOBAL Ellipsis"
f1, f2 = lambda: ..., lambda: ...
self.assertIs(f1.__code__, f2.__code__)
self.assertIs(f1.__code__.co_consts, f2.__code__.co_consts)
self.check_constant(f1, Ellipsis)
self.assertEqual(repr(f1()), repr(Ellipsis))
@ -643,7 +643,7 @@ if 1:
# {0} is converted to a constant frozenset({0}) by the peephole
# optimizer
f1, f2 = lambda x: x in {0}, lambda x: x in {0}
self.assertIs(f1.__code__, f2.__code__)
self.assertIs(f1.__code__.co_consts, f2.__code__.co_consts)
self.check_constant(f1, frozenset({0}))
self.assertTrue(f1(0))
@ -1302,6 +1302,27 @@ f(
self.assertIsNotNone(end_column)
self.assertLessEqual((line, column), (end_line, end_column))
@support.cpython_only
def test_column_offset_deduplication(self):
# GH-95150: Code with different column offsets shouldn't be merged!
for source in [
"lambda: a",
"(a for b in c)",
"[a for b in c]",
"{a for b in c}",
"{a: b for c in d}",
]:
with self.subTest(source):
code = compile(f"{source}, {source}", "<test>", "eval")
self.assertEqual(len(code.co_consts), 2)
self.assertIsInstance(code.co_consts[0], types.CodeType)
self.assertIsInstance(code.co_consts[1], types.CodeType)
self.assertNotEqual(code.co_consts[0], code.co_consts[1])
self.assertNotEqual(
list(code.co_consts[0].co_positions()),
list(code.co_consts[1].co_positions()),
)
class TestExpressionStackSize(unittest.TestCase):
# These tests check that the computed stack size for a code object

View File

@ -2012,7 +2012,8 @@ def fib(n):
a, b = 0, 1
"""
try:
self.assertEqual(compile(s1, '<string>', 'exec'), compile(s2, '<string>', 'exec'))
compile(s1, '<string>', 'exec')
compile(s2, '<string>', 'exec')
except SyntaxError:
self.fail("Indented statement over multiple lines is valid")

View File

@ -0,0 +1,3 @@
Update code object hashing and equality to consider all debugging and
exception handling tables. This fixes an issue where certain non-identical
code objects could be "deduplicated" during compilation.

View File

@ -1695,6 +1695,15 @@ code_richcompare(PyObject *self, PyObject *other, int op)
eq = PyObject_RichCompareBool(co->co_localsplusnames,
cp->co_localsplusnames, Py_EQ);
if (eq <= 0) goto unequal;
eq = PyObject_RichCompareBool(co->co_linetable, cp->co_linetable, Py_EQ);
if (eq <= 0) {
goto unequal;
}
eq = PyObject_RichCompareBool(co->co_exceptiontable,
cp->co_exceptiontable, Py_EQ);
if (eq <= 0) {
goto unequal;
}
if (op == Py_EQ)
res = Py_True;
@ -1727,7 +1736,15 @@ code_hash(PyCodeObject *co)
if (h2 == -1) return -1;
h3 = PyObject_Hash(co->co_localsplusnames);
if (h3 == -1) return -1;
h = h0 ^ h1 ^ h2 ^ h3 ^
Py_hash_t h4 = PyObject_Hash(co->co_linetable);
if (h4 == -1) {
return -1;
}
Py_hash_t h5 = PyObject_Hash(co->co_exceptiontable);
if (h5 == -1) {
return -1;
}
h = h0 ^ h1 ^ h2 ^ h3 ^ h4 ^ h5 ^
co->co_argcount ^ co->co_posonlyargcount ^ co->co_kwonlyargcount ^
co->co_flags;
if (h == -1) h = -2;