bpo-37596: Make `set` and `frozenset` marshalling deterministic (GH-27926)

This commit is contained in:
Brandt Bucher 2021-08-25 04:14:34 -07:00 committed by GitHub
parent 7ecd3425d4
commit 33d95c6fac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 59 additions and 0 deletions

View File

@ -344,6 +344,31 @@ class BugsTestCase(unittest.TestCase):
for i in range(len(data)):
self.assertRaises(EOFError, marshal.loads, data[0: i])
def test_deterministic_sets(self):
# bpo-37596: To support reproducible builds, sets and frozensets need to
# have their elements serialized in a consistent order (even when they
# have been scrambled by hash randomization):
for kind in ("set", "frozenset"):
for elements in (
"float('nan'), b'a', b'b', b'c', 'x', 'y', 'z'",
# Also test for bad interactions with backreferencing:
"('string', 1), ('string', 2), ('string', 3)",
):
s = f"{kind}([{elements}])"
with self.subTest(s):
# First, make sure that our test case still has different
# orders under hash seeds 0 and 1. If this check fails, we
# need to update this test with different elements:
args = ["-c", f"print({s})"]
_, repr_0, _ = assert_python_ok(*args, PYTHONHASHSEED="0")
_, repr_1, _ = assert_python_ok(*args, PYTHONHASHSEED="1")
self.assertNotEqual(repr_0, repr_1)
# Then, perform the actual test:
args = ["-c", f"import marshal; print(marshal.dumps({s}))"]
_, dump_0, _ = assert_python_ok(*args, PYTHONHASHSEED="0")
_, dump_1, _ = assert_python_ok(*args, PYTHONHASHSEED="1")
self.assertEqual(dump_0, dump_1)
LARGE_SIZE = 2**31
pointer_size = 8 if sys.maxsize > 0xFFFFFFFF else 4

View File

@ -0,0 +1,2 @@
Ensure that :class:`set` and :class:`frozenset` objects are always
:mod:`marshalled <marshal>` reproducibly.

View File

@ -503,9 +503,41 @@ w_complex_object(PyObject *v, char flag, WFILE *p)
W_TYPE(TYPE_SET, p);
n = PySet_GET_SIZE(v);
W_SIZE(n, p);
// bpo-37596: To support reproducible builds, sets and frozensets need
// to have their elements serialized in a consistent order (even when
// they have been scrambled by hash randomization). To ensure this, we
// use an order equivalent to sorted(v, key=marshal.dumps):
PyObject *pairs = PyList_New(0);
if (pairs == NULL) {
p->error = WFERR_NOMEMORY;
return;
}
while (_PySet_NextEntry(v, &pos, &value, &hash)) {
PyObject *dump = PyMarshal_WriteObjectToString(value, p->version);
if (dump == NULL) {
p->error = WFERR_UNMARSHALLABLE;
goto anyset_done;
}
PyObject *pair = PyTuple_Pack(2, dump, value);
Py_DECREF(dump);
if (pair == NULL || PyList_Append(pairs, pair)) {
p->error = WFERR_NOMEMORY;
Py_XDECREF(pair);
goto anyset_done;
}
Py_DECREF(pair);
}
if (PyList_Sort(pairs)) {
p->error = WFERR_NOMEMORY;
goto anyset_done;
}
for (Py_ssize_t i = 0; i < n; i++) {
PyObject *pair = PyList_GET_ITEM(pairs, i);
value = PyTuple_GET_ITEM(pair, 1);
w_object(value, p);
}
anyset_done:
Py_DECREF(pairs);
}
else if (PyCode_Check(v)) {
PyCodeObject *co = (PyCodeObject *)v;