From 33d95c6facdfda3c8c0feffa7a99184e4abc2f63 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 25 Aug 2021 04:14:34 -0700 Subject: [PATCH] bpo-37596: Make `set` and `frozenset` marshalling deterministic (GH-27926) --- Lib/test/test_marshal.py | 25 +++++++++++++++ .../2021-08-23-21-39-59.bpo-37596.ojRcwB.rst | 2 ++ Python/marshal.c | 32 +++++++++++++++++++ 3 files changed, 59 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2021-08-23-21-39-59.bpo-37596.ojRcwB.rst diff --git a/Lib/test/test_marshal.py b/Lib/test/test_marshal.py index d20b9d2c1ff..bdfe79fbbec 100644 --- a/Lib/test/test_marshal.py +++ b/Lib/test/test_marshal.py @@ -344,6 +344,31 @@ class BugsTestCase(unittest.TestCase): for i in range(len(data)): self.assertRaises(EOFError, marshal.loads, data[0: i]) + def test_deterministic_sets(self): + # bpo-37596: To support reproducible builds, sets and frozensets need to + # have their elements serialized in a consistent order (even when they + # have been scrambled by hash randomization): + for kind in ("set", "frozenset"): + for elements in ( + "float('nan'), b'a', b'b', b'c', 'x', 'y', 'z'", + # Also test for bad interactions with backreferencing: + "('string', 1), ('string', 2), ('string', 3)", + ): + s = f"{kind}([{elements}])" + with self.subTest(s): + # First, make sure that our test case still has different + # orders under hash seeds 0 and 1. If this check fails, we + # need to update this test with different elements: + args = ["-c", f"print({s})"] + _, repr_0, _ = assert_python_ok(*args, PYTHONHASHSEED="0") + _, repr_1, _ = assert_python_ok(*args, PYTHONHASHSEED="1") + self.assertNotEqual(repr_0, repr_1) + # Then, perform the actual test: + args = ["-c", f"import marshal; print(marshal.dumps({s}))"] + _, dump_0, _ = assert_python_ok(*args, PYTHONHASHSEED="0") + _, dump_1, _ = assert_python_ok(*args, PYTHONHASHSEED="1") + self.assertEqual(dump_0, dump_1) + LARGE_SIZE = 2**31 pointer_size = 8 if sys.maxsize > 0xFFFFFFFF else 4 diff --git a/Misc/NEWS.d/next/Library/2021-08-23-21-39-59.bpo-37596.ojRcwB.rst b/Misc/NEWS.d/next/Library/2021-08-23-21-39-59.bpo-37596.ojRcwB.rst new file mode 100644 index 00000000000..81fdfeb6294 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-08-23-21-39-59.bpo-37596.ojRcwB.rst @@ -0,0 +1,2 @@ +Ensure that :class:`set` and :class:`frozenset` objects are always +:mod:`marshalled ` reproducibly. diff --git a/Python/marshal.c b/Python/marshal.c index 1260704c74c..b69c4d09641 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -503,9 +503,41 @@ w_complex_object(PyObject *v, char flag, WFILE *p) W_TYPE(TYPE_SET, p); n = PySet_GET_SIZE(v); W_SIZE(n, p); + // bpo-37596: To support reproducible builds, sets and frozensets need + // to have their elements serialized in a consistent order (even when + // they have been scrambled by hash randomization). To ensure this, we + // use an order equivalent to sorted(v, key=marshal.dumps): + PyObject *pairs = PyList_New(0); + if (pairs == NULL) { + p->error = WFERR_NOMEMORY; + return; + } while (_PySet_NextEntry(v, &pos, &value, &hash)) { + PyObject *dump = PyMarshal_WriteObjectToString(value, p->version); + if (dump == NULL) { + p->error = WFERR_UNMARSHALLABLE; + goto anyset_done; + } + PyObject *pair = PyTuple_Pack(2, dump, value); + Py_DECREF(dump); + if (pair == NULL || PyList_Append(pairs, pair)) { + p->error = WFERR_NOMEMORY; + Py_XDECREF(pair); + goto anyset_done; + } + Py_DECREF(pair); + } + if (PyList_Sort(pairs)) { + p->error = WFERR_NOMEMORY; + goto anyset_done; + } + for (Py_ssize_t i = 0; i < n; i++) { + PyObject *pair = PyList_GET_ITEM(pairs, i); + value = PyTuple_GET_ITEM(pair, 1); w_object(value, p); } + anyset_done: + Py_DECREF(pairs); } else if (PyCode_Check(v)) { PyCodeObject *co = (PyCodeObject *)v;