From c0c2aa7644ebd4953682784dbb9904fe955ff647 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 9 Sep 2024 21:28:55 +0300 Subject: [PATCH] gh-122213: Add notes for pickle serialization errors (GH-122214) This allows to identify the source of the error. --- Doc/whatsnew/3.14.rst | 3 + Lib/pickle.py | 172 ++++++++++---- Lib/test/pickletester.py | 217 ++++++++++++++++-- ...-07-24-08-48-22.gh-issue-122213.o3pdgA.rst | 2 + Modules/_pickle.c | 145 +++++++++--- 5 files changed, 441 insertions(+), 98 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-07-24-08-48-22.gh-issue-122213.o3pdgA.rst diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index e1bd52370d7..ebe4b91c502 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -221,6 +221,9 @@ pickle * Set the default protocol version on the :mod:`pickle` module to 5. For more details, please see :ref:`pickle protocols `. +* Add notes for pickle serialization errors that allow to identify the source + of the error. + (Contributed by Serhiy Storchaka in :gh:`122213`.) symtable -------- diff --git a/Lib/pickle.py b/Lib/pickle.py index f40b8e3adbe..ed8138beb90 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -600,18 +600,22 @@ class _Pickler: self.save_global(obj, rv) return - # Assert that reduce() returned a tuple - if not isinstance(rv, tuple): - raise PicklingError(f'__reduce__ must return a string or tuple, not {_T(rv)}') + try: + # Assert that reduce() returned a tuple + if not isinstance(rv, tuple): + raise PicklingError(f'__reduce__ must return a string or tuple, not {_T(rv)}') - # Assert that it returned an appropriately sized tuple - l = len(rv) - if not (2 <= l <= 6): - raise PicklingError("tuple returned by __reduce__ " - "must contain 2 through 6 elements") + # Assert that it returned an appropriately sized tuple + l = len(rv) + if not (2 <= l <= 6): + raise PicklingError("tuple returned by __reduce__ " + "must contain 2 through 6 elements") - # Save the reduce() output and finally memoize the object - self.save_reduce(obj=obj, *rv) + # Save the reduce() output and finally memoize the object + self.save_reduce(obj=obj, *rv) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} object') + raise def persistent_id(self, obj): # This exists so a subclass can override it @@ -652,13 +656,25 @@ class _Pickler: raise PicklingError(f"first argument to __newobj_ex__() " f"must be {obj.__class__!r}, not {cls!r}") if self.proto >= 4: - save(cls) - save(args) - save(kwargs) + try: + save(cls) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} class') + raise + try: + save(args) + save(kwargs) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} __new__ arguments') + raise write(NEWOBJ_EX) else: func = partial(cls.__new__, cls, *args, **kwargs) - save(func) + try: + save(func) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} reconstructor') + raise save(()) write(REDUCE) elif self.proto >= 2 and func_name == "__newobj__": @@ -695,12 +711,28 @@ class _Pickler: raise PicklingError(f"first argument to __newobj__() " f"must be {obj.__class__!r}, not {cls!r}") args = args[1:] - save(cls) - save(args) + try: + save(cls) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} class') + raise + try: + save(args) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} __new__ arguments') + raise write(NEWOBJ) else: - save(func) - save(args) + try: + save(func) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} reconstructor') + raise + try: + save(args) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} reconstructor arguments') + raise write(REDUCE) if obj is not None: @@ -718,23 +750,35 @@ class _Pickler: # items and dict items (as (key, value) tuples), or None. if listitems is not None: - self._batch_appends(listitems) + self._batch_appends(listitems, obj) if dictitems is not None: - self._batch_setitems(dictitems) + self._batch_setitems(dictitems, obj) if state is not None: if state_setter is None: - save(state) + try: + save(state) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} state') + raise write(BUILD) else: # If a state_setter is specified, call it instead of load_build # to update obj's with its previous state. # First, push state_setter and its tuple of expected arguments # (obj, state) onto the stack. - save(state_setter) + try: + save(state_setter) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} state setter') + raise save(obj) # simple BINGET opcode as obj is already memoized. - save(state) + try: + save(state) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} state') + raise write(TUPLE2) # Trigger a state_setter(obj, state) function call. write(REDUCE) @@ -914,8 +958,12 @@ class _Pickler: save = self.save memo = self.memo if n <= 3 and self.proto >= 2: - for element in obj: - save(element) + for i, element in enumerate(obj): + try: + save(element) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} item {i}') + raise # Subtle. Same as in the big comment below. if id(obj) in memo: get = self.get(memo[id(obj)][0]) @@ -929,8 +977,12 @@ class _Pickler: # has more than 3 elements. write = self.write write(MARK) - for element in obj: - save(element) + for i, element in enumerate(obj): + try: + save(element) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} item {i}') + raise if id(obj) in memo: # Subtle. d was not in memo when we entered save_tuple(), so @@ -960,38 +1012,52 @@ class _Pickler: self.write(MARK + LIST) self.memoize(obj) - self._batch_appends(obj) + self._batch_appends(obj, obj) dispatch[list] = save_list _BATCHSIZE = 1000 - def _batch_appends(self, items): + def _batch_appends(self, items, obj): # Helper to batch up APPENDS sequences save = self.save write = self.write if not self.bin: - for x in items: - save(x) + for i, x in enumerate(items): + try: + save(x) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} item {i}') + raise write(APPEND) return it = iter(items) + start = 0 while True: tmp = list(islice(it, self._BATCHSIZE)) n = len(tmp) if n > 1: write(MARK) - for x in tmp: - save(x) + for i, x in enumerate(tmp, start): + try: + save(x) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} item {i}') + raise write(APPENDS) elif n: - save(tmp[0]) + try: + save(tmp[0]) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} item {start}') + raise write(APPEND) # else tmp is empty, and we're done if n < self._BATCHSIZE: return + start += n def save_dict(self, obj): if self.bin: @@ -1000,11 +1066,11 @@ class _Pickler: self.write(MARK + DICT) self.memoize(obj) - self._batch_setitems(obj.items()) + self._batch_setitems(obj.items(), obj) dispatch[dict] = save_dict - def _batch_setitems(self, items): + def _batch_setitems(self, items, obj): # Helper to batch up SETITEMS sequences; proto >= 1 only save = self.save write = self.write @@ -1012,7 +1078,11 @@ class _Pickler: if not self.bin: for k, v in items: save(k) - save(v) + try: + save(v) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} item {k!r}') + raise write(SETITEM) return @@ -1024,12 +1094,20 @@ class _Pickler: write(MARK) for k, v in tmp: save(k) - save(v) + try: + save(v) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} item {k!r}') + raise write(SETITEMS) elif n: k, v = tmp[0] save(k) - save(v) + try: + save(v) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} item {k!r}') + raise write(SETITEM) # else tmp is empty, and we're done if n < self._BATCHSIZE: @@ -1052,8 +1130,12 @@ class _Pickler: n = len(batch) if n > 0: write(MARK) - for item in batch: - save(item) + try: + for item in batch: + save(item) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} element') + raise write(ADDITEMS) if n < self._BATCHSIZE: return @@ -1068,8 +1150,12 @@ class _Pickler: return write(MARK) - for item in obj: - save(item) + try: + for item in obj: + save(item) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} element') + raise if id(obj) in self.memo: # If the object is already in the memo, this means it is diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index ca6dd6b8b1a..c7dbd997894 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -1614,6 +1614,8 @@ class AbstractPicklingErrorTests: self.dumps(obj, proto) self.assertEqual(str(cm.exception), '__reduce__ must return a string or tuple, not list') + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX object']) obj = REX((print,)) for proto in protocols: @@ -1622,6 +1624,8 @@ class AbstractPicklingErrorTests: self.dumps(obj, proto) self.assertEqual(str(cm.exception), 'tuple returned by __reduce__ must contain 2 through 6 elements') + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX object']) obj = REX((print, (), None, None, None, None, None)) for proto in protocols: @@ -1630,6 +1634,8 @@ class AbstractPicklingErrorTests: self.dumps(obj, proto) self.assertEqual(str(cm.exception), 'tuple returned by __reduce__ must contain 2 through 6 elements') + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX object']) def test_bad_reconstructor(self): obj = REX((42, ())) @@ -1640,13 +1646,18 @@ class AbstractPicklingErrorTests: self.assertEqual(str(cm.exception), 'first item of the tuple returned by __reduce__ ' 'must be callable, not int') + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX object']) def test_unpickleable_reconstructor(self): obj = REX((UnpickleableCallable(), ())) for proto in protocols: with self.subTest(proto=proto): - with self.assertRaises(CustomError): + with self.assertRaises(CustomError) as cm: self.dumps(obj, proto) + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX reconstructor', + 'when serializing test.pickletester.REX object']) def test_bad_reconstructor_args(self): obj = REX((print, [])) @@ -1657,13 +1668,19 @@ class AbstractPicklingErrorTests: self.assertEqual(str(cm.exception), 'second item of the tuple returned by __reduce__ ' 'must be a tuple, not list') + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX object']) def test_unpickleable_reconstructor_args(self): obj = REX((print, (1, 2, UNPICKLEABLE))) for proto in protocols: with self.subTest(proto=proto): - with self.assertRaises(CustomError): + with self.assertRaises(CustomError) as cm: self.dumps(obj, proto) + self.assertEqual(cm.exception.__notes__, [ + 'when serializing tuple item 2', + 'when serializing test.pickletester.REX reconstructor arguments', + 'when serializing test.pickletester.REX object']) def test_bad_newobj_args(self): obj = REX((copyreg.__newobj__, ())) @@ -1674,6 +1691,8 @@ class AbstractPicklingErrorTests: self.assertIn(str(cm.exception), { 'tuple index out of range', '__newobj__ expected at least 1 argument, got 0'}) + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX object']) obj = REX((copyreg.__newobj__, [REX])) for proto in protocols[2:]: @@ -1683,6 +1702,8 @@ class AbstractPicklingErrorTests: self.assertEqual(str(cm.exception), 'second item of the tuple returned by __reduce__ ' 'must be a tuple, not list') + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX object']) def test_bad_newobj_class(self): obj = REX((copyreg.__newobj__, (NoNew(),))) @@ -1693,6 +1714,8 @@ class AbstractPicklingErrorTests: self.assertIn(str(cm.exception), { 'first argument to __newobj__() has no __new__', f'first argument to __newobj__() must be a class, not {__name__}.NoNew'}) + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX object']) def test_wrong_newobj_class(self): obj = REX((copyreg.__newobj__, (str,))) @@ -1702,21 +1725,42 @@ class AbstractPicklingErrorTests: self.dumps(obj, proto) self.assertEqual(str(cm.exception), f'first argument to __newobj__() must be {REX!r}, not {str!r}') + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX object']) def test_unpickleable_newobj_class(self): class LocalREX(REX): pass obj = LocalREX((copyreg.__newobj__, (LocalREX,))) for proto in protocols: with self.subTest(proto=proto): - with self.assertRaises(pickle.PicklingError): + with self.assertRaises(pickle.PicklingError) as cm: self.dumps(obj, proto) + if proto >= 2: + self.assertEqual(cm.exception.__notes__, [ + f'when serializing {LocalREX.__module__}.{LocalREX.__qualname__} class', + f'when serializing {LocalREX.__module__}.{LocalREX.__qualname__} object']) + else: + self.assertEqual(cm.exception.__notes__, [ + 'when serializing tuple item 0', + f'when serializing {LocalREX.__module__}.{LocalREX.__qualname__} reconstructor arguments', + f'when serializing {LocalREX.__module__}.{LocalREX.__qualname__} object']) def test_unpickleable_newobj_args(self): obj = REX((copyreg.__newobj__, (REX, 1, 2, UNPICKLEABLE))) for proto in protocols: with self.subTest(proto=proto): - with self.assertRaises(CustomError): + with self.assertRaises(CustomError) as cm: self.dumps(obj, proto) + if proto >= 2: + self.assertEqual(cm.exception.__notes__, [ + 'when serializing tuple item 2', + 'when serializing test.pickletester.REX __new__ arguments', + 'when serializing test.pickletester.REX object']) + else: + self.assertEqual(cm.exception.__notes__, [ + 'when serializing tuple item 3', + 'when serializing test.pickletester.REX reconstructor arguments', + 'when serializing test.pickletester.REX object']) def test_bad_newobj_ex_args(self): obj = REX((copyreg.__newobj_ex__, ())) @@ -1727,6 +1771,8 @@ class AbstractPicklingErrorTests: self.assertIn(str(cm.exception), { 'not enough values to unpack (expected 3, got 0)', '__newobj_ex__ expected 3 arguments, got 0'}) + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX object']) obj = REX((copyreg.__newobj_ex__, 42)) for proto in protocols[2:]: @@ -1736,6 +1782,8 @@ class AbstractPicklingErrorTests: self.assertEqual(str(cm.exception), 'second item of the tuple returned by __reduce__ ' 'must be a tuple, not int') + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX object']) obj = REX((copyreg.__newobj_ex__, (REX, 42, {}))) if self.pickler is pickle._Pickler: @@ -1745,6 +1793,8 @@ class AbstractPicklingErrorTests: self.dumps(obj, proto) self.assertEqual(str(cm.exception), 'Value after * must be an iterable, not int') + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX object']) else: for proto in protocols[2:]: with self.subTest(proto=proto): @@ -1752,6 +1802,8 @@ class AbstractPicklingErrorTests: self.dumps(obj, proto) self.assertEqual(str(cm.exception), 'second argument to __newobj_ex__() must be a tuple, not int') + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX object']) obj = REX((copyreg.__newobj_ex__, (REX, (), []))) if self.pickler is pickle._Pickler: @@ -1761,6 +1813,8 @@ class AbstractPicklingErrorTests: self.dumps(obj, proto) self.assertEqual(str(cm.exception), 'functools.partial() argument after ** must be a mapping, not list') + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX object']) else: for proto in protocols[2:]: with self.subTest(proto=proto): @@ -1768,6 +1822,8 @@ class AbstractPicklingErrorTests: self.dumps(obj, proto) self.assertEqual(str(cm.exception), 'third argument to __newobj_ex__() must be a dict, not list') + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX object']) def test_bad_newobj_ex__class(self): obj = REX((copyreg.__newobj_ex__, (NoNew(), (), {}))) @@ -1778,6 +1834,8 @@ class AbstractPicklingErrorTests: self.assertIn(str(cm.exception), { 'first argument to __newobj_ex__() has no __new__', f'first argument to __newobj_ex__() must be a class, not {__name__}.NoNew'}) + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX object']) def test_wrong_newobj_ex_class(self): if self.pickler is not pickle._Pickler: @@ -1789,35 +1847,95 @@ class AbstractPicklingErrorTests: self.dumps(obj, proto) self.assertEqual(str(cm.exception), f'first argument to __newobj_ex__() must be {REX}, not {str}') + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX object']) def test_unpickleable_newobj_ex_class(self): class LocalREX(REX): pass obj = LocalREX((copyreg.__newobj_ex__, (LocalREX, (), {}))) for proto in protocols: with self.subTest(proto=proto): - with self.assertRaises(pickle.PicklingError): + with self.assertRaises(pickle.PicklingError) as cm: self.dumps(obj, proto) + if proto >= 4: + self.assertEqual(cm.exception.__notes__, [ + f'when serializing {LocalREX.__module__}.{LocalREX.__qualname__} class', + f'when serializing {LocalREX.__module__}.{LocalREX.__qualname__} object']) + elif proto >= 2: + self.assertEqual(cm.exception.__notes__, [ + 'when serializing tuple item 0', + 'when serializing tuple item 1', + 'when serializing functools.partial state', + 'when serializing functools.partial object', + f'when serializing {LocalREX.__module__}.{LocalREX.__qualname__} reconstructor', + f'when serializing {LocalREX.__module__}.{LocalREX.__qualname__} object']) + else: + self.assertEqual(cm.exception.__notes__, [ + 'when serializing tuple item 0', + f'when serializing {LocalREX.__module__}.{LocalREX.__qualname__} reconstructor arguments', + f'when serializing {LocalREX.__module__}.{LocalREX.__qualname__} object']) def test_unpickleable_newobj_ex_args(self): obj = REX((copyreg.__newobj_ex__, (REX, (1, 2, UNPICKLEABLE), {}))) for proto in protocols: with self.subTest(proto=proto): - with self.assertRaises(CustomError): + with self.assertRaises(CustomError) as cm: self.dumps(obj, proto) + if proto >= 4: + self.assertEqual(cm.exception.__notes__, [ + 'when serializing tuple item 2', + 'when serializing test.pickletester.REX __new__ arguments', + 'when serializing test.pickletester.REX object']) + elif proto >= 2: + self.assertEqual(cm.exception.__notes__, [ + 'when serializing tuple item 3', + 'when serializing tuple item 1', + 'when serializing functools.partial state', + 'when serializing functools.partial object', + 'when serializing test.pickletester.REX reconstructor', + 'when serializing test.pickletester.REX object']) + else: + self.assertEqual(cm.exception.__notes__, [ + 'when serializing tuple item 2', + 'when serializing tuple item 1', + 'when serializing test.pickletester.REX reconstructor arguments', + 'when serializing test.pickletester.REX object']) def test_unpickleable_newobj_ex_kwargs(self): obj = REX((copyreg.__newobj_ex__, (REX, (), {'a': UNPICKLEABLE}))) for proto in protocols: with self.subTest(proto=proto): - with self.assertRaises(CustomError): + with self.assertRaises(CustomError) as cm: self.dumps(obj, proto) + if proto >= 4: + self.assertEqual(cm.exception.__notes__, [ + "when serializing dict item 'a'", + 'when serializing test.pickletester.REX __new__ arguments', + 'when serializing test.pickletester.REX object']) + elif proto >= 2: + self.assertEqual(cm.exception.__notes__, [ + "when serializing dict item 'a'", + 'when serializing tuple item 2', + 'when serializing functools.partial state', + 'when serializing functools.partial object', + 'when serializing test.pickletester.REX reconstructor', + 'when serializing test.pickletester.REX object']) + else: + self.assertEqual(cm.exception.__notes__, [ + "when serializing dict item 'a'", + 'when serializing tuple item 2', + 'when serializing test.pickletester.REX reconstructor arguments', + 'when serializing test.pickletester.REX object']) def test_unpickleable_state(self): obj = REX_state(UNPICKLEABLE) for proto in protocols: with self.subTest(proto=proto): - with self.assertRaises(CustomError): + with self.assertRaises(CustomError) as cm: self.dumps(obj, proto) + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX_state state', + 'when serializing test.pickletester.REX_state object']) def test_bad_state_setter(self): if self.pickler is pickle._Pickler: @@ -1830,20 +1948,28 @@ class AbstractPicklingErrorTests: self.assertEqual(str(cm.exception), 'sixth item of the tuple returned by __reduce__ ' 'must be callable, not int') + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX object']) def test_unpickleable_state_setter(self): obj = REX((print, (), 'state', None, None, UnpickleableCallable())) for proto in protocols: with self.subTest(proto=proto): - with self.assertRaises(CustomError): + with self.assertRaises(CustomError) as cm: self.dumps(obj, proto) + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX state setter', + 'when serializing test.pickletester.REX object']) def test_unpickleable_state_with_state_setter(self): obj = REX((print, (), UNPICKLEABLE, None, None, print)) for proto in protocols: with self.subTest(proto=proto): - with self.assertRaises(CustomError): + with self.assertRaises(CustomError) as cm: self.dumps(obj, proto) + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX state', + 'when serializing test.pickletester.REX object']) def test_bad_object_list_items(self): # Issue4176: crash when 4th and 5th items of __reduce__() @@ -1857,6 +1983,8 @@ class AbstractPicklingErrorTests: "'int' object is not iterable", 'fourth item of the tuple returned by __reduce__ ' 'must be an iterator, not int'}) + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX object']) if self.pickler is not pickle._Pickler: # Python implementation is less strict and also accepts iterables. @@ -1868,13 +1996,18 @@ class AbstractPicklingErrorTests: self.assertEqual(str(cm.exception), 'fourth item of the tuple returned by __reduce__ ' 'must be an iterator, not int') + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX object']) def test_unpickleable_object_list_items(self): obj = REX_six([1, 2, UNPICKLEABLE]) for proto in protocols: with self.subTest(proto=proto): - with self.assertRaises(CustomError): + with self.assertRaises(CustomError) as cm: self.dumps(obj, proto) + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX_six item 2', + 'when serializing test.pickletester.REX_six object']) def test_bad_object_dict_items(self): # Issue4176: crash when 4th and 5th items of __reduce__() @@ -1888,6 +2021,8 @@ class AbstractPicklingErrorTests: "'int' object is not iterable", 'fifth item of the tuple returned by __reduce__ ' 'must be an iterator, not int'}) + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX object']) for proto in protocols: obj = REX((dict, (), None, None, iter([('a',)]))) @@ -1897,6 +2032,8 @@ class AbstractPicklingErrorTests: self.assertIn(str(cm.exception), { 'not enough values to unpack (expected 2, got 1)', 'dict items iterator must return 2-tuples'}) + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX object']) if self.pickler is not pickle._Pickler: # Python implementation is less strict and also accepts iterables. @@ -1907,66 +2044,106 @@ class AbstractPicklingErrorTests: self.dumps(obj, proto) self.assertEqual(str(cm.exception), 'dict items iterator must return 2-tuples') + self.assertEqual(cm.exception.__notes__, [ + 'when serializing test.pickletester.REX object']) def test_unpickleable_object_dict_items(self): obj = REX_seven({'a': UNPICKLEABLE}) for proto in protocols: with self.subTest(proto=proto): - with self.assertRaises(CustomError): + with self.assertRaises(CustomError) as cm: self.dumps(obj, proto) + self.assertEqual(cm.exception.__notes__, [ + "when serializing test.pickletester.REX_seven item 'a'", + 'when serializing test.pickletester.REX_seven object']) def test_unpickleable_list_items(self): obj = [1, [2, 3, UNPICKLEABLE]] for proto in protocols: with self.subTest(proto=proto): - with self.assertRaises(CustomError): + with self.assertRaises(CustomError) as cm: self.dumps(obj, proto) + self.assertEqual(cm.exception.__notes__, [ + 'when serializing list item 2', + 'when serializing list item 1']) for n in [0, 1, 1000, 1005]: obj = [*range(n), UNPICKLEABLE] for proto in protocols: with self.subTest(proto=proto): - with self.assertRaises(CustomError): + with self.assertRaises(CustomError) as cm: self.dumps(obj, proto) + self.assertEqual(cm.exception.__notes__, [ + f'when serializing list item {n}']) def test_unpickleable_tuple_items(self): obj = (1, (2, 3, UNPICKLEABLE)) for proto in protocols: with self.subTest(proto=proto): - with self.assertRaises(CustomError): + with self.assertRaises(CustomError) as cm: self.dumps(obj, proto) + self.assertEqual(cm.exception.__notes__, [ + 'when serializing tuple item 2', + 'when serializing tuple item 1']) obj = (*range(10), UNPICKLEABLE) for proto in protocols: with self.subTest(proto=proto): - with self.assertRaises(CustomError): + with self.assertRaises(CustomError) as cm: self.dumps(obj, proto) + self.assertEqual(cm.exception.__notes__, [ + 'when serializing tuple item 10']) def test_unpickleable_dict_items(self): obj = {'a': {'b': UNPICKLEABLE}} for proto in protocols: with self.subTest(proto=proto): - with self.assertRaises(CustomError): + with self.assertRaises(CustomError) as cm: self.dumps(obj, proto) + self.assertEqual(cm.exception.__notes__, [ + "when serializing dict item 'b'", + "when serializing dict item 'a'"]) for n in [0, 1, 1000, 1005]: obj = dict.fromkeys(range(n)) obj['a'] = UNPICKLEABLE for proto in protocols: with self.subTest(proto=proto, n=n): - with self.assertRaises(CustomError): + with self.assertRaises(CustomError) as cm: self.dumps(obj, proto) + self.assertEqual(cm.exception.__notes__, [ + "when serializing dict item 'a'"]) def test_unpickleable_set_items(self): obj = {UNPICKLEABLE} for proto in protocols: with self.subTest(proto=proto): - with self.assertRaises(CustomError): + with self.assertRaises(CustomError) as cm: self.dumps(obj, proto) + if proto >= 4: + self.assertEqual(cm.exception.__notes__, [ + 'when serializing set element']) + else: + self.assertEqual(cm.exception.__notes__, [ + 'when serializing list item 0', + 'when serializing tuple item 0', + 'when serializing set reconstructor arguments']) def test_unpickleable_frozenset_items(self): obj = frozenset({frozenset({UNPICKLEABLE})}) for proto in protocols: with self.subTest(proto=proto): - with self.assertRaises(CustomError): + with self.assertRaises(CustomError) as cm: self.dumps(obj, proto) + if proto >= 4: + self.assertEqual(cm.exception.__notes__, [ + 'when serializing frozenset element', + 'when serializing frozenset element']) + else: + self.assertEqual(cm.exception.__notes__, [ + 'when serializing list item 0', + 'when serializing tuple item 0', + 'when serializing frozenset reconstructor arguments', + 'when serializing list item 0', + 'when serializing tuple item 0', + 'when serializing frozenset reconstructor arguments']) def test_global_lookup_error(self): # Global name does not exist diff --git a/Misc/NEWS.d/next/Library/2024-07-24-08-48-22.gh-issue-122213.o3pdgA.rst b/Misc/NEWS.d/next/Library/2024-07-24-08-48-22.gh-issue-122213.o3pdgA.rst new file mode 100644 index 00000000000..833a2a676f9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-24-08-48-22.gh-issue-122213.o3pdgA.rst @@ -0,0 +1,2 @@ +Add notes for pickle serialization errors that allow to identify the source +of the error. diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 9863baf0a29..18affdd4875 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -15,6 +15,7 @@ #include "pycore_long.h" // _PyLong_AsByteArray() #include "pycore_moduleobject.h" // _PyModule_GetState() #include "pycore_object.h" // _PyNone_Type +#include "pycore_pyerrors.h" // _PyErr_FormatNote #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_runtime.h" // _Py_ID() #include "pycore_setobject.h" // _PySet_NextEntry() @@ -2764,8 +2765,10 @@ store_tuple_elements(PickleState *state, PicklerObject *self, PyObject *t, if (element == NULL) return -1; - if (save(state, self, element, 0) < 0) + if (save(state, self, element, 0) < 0) { + _PyErr_FormatNote("when serializing %T item %zd", t, i); return -1; + } } return 0; @@ -2884,11 +2887,12 @@ save_tuple(PickleState *state, PicklerObject *self, PyObject *obj) * Returns 0 on success, <0 on error. */ static int -batch_list(PickleState *state, PicklerObject *self, PyObject *iter) +batch_list(PickleState *state, PicklerObject *self, PyObject *iter, PyObject *origobj) { PyObject *obj = NULL; PyObject *firstitem = NULL; int i, n; + Py_ssize_t total = 0; const char mark_op = MARK; const char append_op = APPEND; @@ -2903,7 +2907,7 @@ batch_list(PickleState *state, PicklerObject *self, PyObject *iter) if (self->proto == 0) { /* APPENDS isn't available; do one at a time. */ - for (;;) { + for (;; total++) { obj = PyIter_Next(iter); if (obj == NULL) { if (PyErr_Occurred()) @@ -2912,8 +2916,10 @@ batch_list(PickleState *state, PicklerObject *self, PyObject *iter) } i = save(state, self, obj, 0); Py_DECREF(obj); - if (i < 0) + if (i < 0) { + _PyErr_FormatNote("when serializing %T item %zd", origobj, total); return -1; + } if (_Pickler_Write(self, &append_op, 1) < 0) return -1; } @@ -2939,8 +2945,10 @@ batch_list(PickleState *state, PicklerObject *self, PyObject *iter) goto error; /* Only one item to write */ - if (save(state, self, firstitem, 0) < 0) + if (save(state, self, firstitem, 0) < 0) { + _PyErr_FormatNote("when serializing %T item %zd", origobj, total); goto error; + } if (_Pickler_Write(self, &append_op, 1) < 0) goto error; Py_CLEAR(firstitem); @@ -2953,16 +2961,22 @@ batch_list(PickleState *state, PicklerObject *self, PyObject *iter) if (_Pickler_Write(self, &mark_op, 1) < 0) goto error; - if (save(state, self, firstitem, 0) < 0) + if (save(state, self, firstitem, 0) < 0) { + _PyErr_FormatNote("when serializing %T item %zd", origobj, total); goto error; + } Py_CLEAR(firstitem); + total++; n = 1; /* Fetch and save up to BATCHSIZE items */ while (obj) { - if (save(state, self, obj, 0) < 0) + if (save(state, self, obj, 0) < 0) { + _PyErr_FormatNote("when serializing %T item %zd", origobj, total); goto error; + } Py_CLEAR(obj); + total++; n += 1; if (n == BATCHSIZE) @@ -3018,8 +3032,10 @@ batch_list_exact(PickleState *state, PicklerObject *self, PyObject *obj) Py_INCREF(item); int err = save(state, self, item, 0); Py_DECREF(item); - if (err < 0) + if (err < 0) { + _PyErr_FormatNote("when serializing %T item 0", obj); return -1; + } if (_Pickler_Write(self, &append_op, 1) < 0) return -1; return 0; @@ -3036,8 +3052,10 @@ batch_list_exact(PickleState *state, PicklerObject *self, PyObject *obj) Py_INCREF(item); int err = save(state, self, item, 0); Py_DECREF(item); - if (err < 0) + if (err < 0) { + _PyErr_FormatNote("when serializing %T item %zd", obj, total); return -1; + } total++; if (++this_batch == BATCHSIZE) break; @@ -3097,7 +3115,7 @@ save_list(PickleState *state, PicklerObject *self, PyObject *obj) Py_DECREF(iter); goto error; } - status = batch_list(state, self, iter); + status = batch_list(state, self, iter, obj); _Py_LeaveRecursiveCall(); Py_DECREF(iter); } @@ -3125,7 +3143,7 @@ save_list(PickleState *state, PicklerObject *self, PyObject *obj) * ugly to bear. */ static int -batch_dict(PickleState *state, PicklerObject *self, PyObject *iter) +batch_dict(PickleState *state, PicklerObject *self, PyObject *iter, PyObject *origobj) { PyObject *obj = NULL; PyObject *firstitem = NULL; @@ -3153,8 +3171,13 @@ batch_dict(PickleState *state, PicklerObject *self, PyObject *iter) return -1; } i = save(state, self, PyTuple_GET_ITEM(obj, 0), 0); - if (i >= 0) + if (i >= 0) { i = save(state, self, PyTuple_GET_ITEM(obj, 1), 0); + if (i < 0) { + _PyErr_FormatNote("when serializing %T item %R", + origobj, PyTuple_GET_ITEM(obj, 0)); + } + } Py_DECREF(obj); if (i < 0) return -1; @@ -3190,8 +3213,11 @@ batch_dict(PickleState *state, PicklerObject *self, PyObject *iter) /* Only one item to write */ if (save(state, self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0) goto error; - if (save(state, self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0) + if (save(state, self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0) { + _PyErr_FormatNote("when serializing %T item %R", + origobj, PyTuple_GET_ITEM(firstitem, 0)); goto error; + } if (_Pickler_Write(self, &setitem_op, 1) < 0) goto error; Py_CLEAR(firstitem); @@ -3206,8 +3232,11 @@ batch_dict(PickleState *state, PicklerObject *self, PyObject *iter) if (save(state, self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0) goto error; - if (save(state, self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0) + if (save(state, self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0) { + _PyErr_FormatNote("when serializing %T item %R", + origobj, PyTuple_GET_ITEM(firstitem, 0)); goto error; + } Py_CLEAR(firstitem); n = 1; @@ -3218,9 +3247,13 @@ batch_dict(PickleState *state, PicklerObject *self, PyObject *iter) "iterator must return 2-tuples"); goto error; } - if (save(state, self, PyTuple_GET_ITEM(obj, 0), 0) < 0 || - save(state, self, PyTuple_GET_ITEM(obj, 1), 0) < 0) + if (save(state, self, PyTuple_GET_ITEM(obj, 0), 0) < 0) goto error; + if (save(state, self, PyTuple_GET_ITEM(obj, 1), 0) < 0) { + _PyErr_FormatNote("when serializing %T item %R", + origobj, PyTuple_GET_ITEM(obj, 0)); + goto error; + } Py_CLEAR(obj); n += 1; @@ -3281,6 +3314,7 @@ batch_dict_exact(PickleState *state, PicklerObject *self, PyObject *obj) goto error; } if (save(state, self, value, 0) < 0) { + _PyErr_FormatNote("when serializing %T item %R", obj, key); goto error; } Py_CLEAR(key); @@ -3302,6 +3336,7 @@ batch_dict_exact(PickleState *state, PicklerObject *self, PyObject *obj) goto error; } if (save(state, self, value, 0) < 0) { + _PyErr_FormatNote("when serializing %T item %R", obj, key); goto error; } Py_CLEAR(key); @@ -3376,7 +3411,7 @@ save_dict(PickleState *state, PicklerObject *self, PyObject *obj) Py_DECREF(iter); goto error; } - status = batch_dict(state, self, iter); + status = batch_dict(state, self, iter, obj); _Py_LeaveRecursiveCall(); Py_DECREF(iter); } @@ -3446,8 +3481,10 @@ save_set(PickleState *state, PicklerObject *self, PyObject *obj) while (_PySet_NextEntryRef(obj, &ppos, &item, &hash)) { err = save(state, self, item, 0); Py_CLEAR(item); - if (err < 0) + if (err < 0) { + _PyErr_FormatNote("when serializing %T element", obj); break; + } if (++i == BATCHSIZE) break; } @@ -3519,6 +3556,7 @@ save_frozenset(PickleState *state, PicklerObject *self, PyObject *obj) break; } if (save(state, self, item, 0) < 0) { + _PyErr_FormatNote("when serializing %T element", obj); Py_DECREF(item); Py_DECREF(iter); return -1; @@ -4058,10 +4096,17 @@ save_reduce(PickleState *st, PicklerObject *self, PyObject *args, } if (self->proto >= 4) { - if (save(st, self, cls, 0) < 0 || - save(st, self, args, 0) < 0 || - save(st, self, kwargs, 0) < 0 || - _Pickler_Write(self, &newobj_ex_op, 1) < 0) { + if (save(st, self, cls, 0) < 0) { + _PyErr_FormatNote("when serializing %T class", obj); + return -1; + } + if (save(st, self, args, 0) < 0 || + save(st, self, kwargs, 0) < 0) + { + _PyErr_FormatNote("when serializing %T __new__ arguments", obj); + return -1; + } + if (_Pickler_Write(self, &newobj_ex_op, 1) < 0) { return -1; } } @@ -4098,14 +4143,18 @@ save_reduce(PickleState *st, PicklerObject *self, PyObject *args, } if (save(st, self, callable, 0) < 0 || - save(st, self, newargs, 0) < 0 || - _Pickler_Write(self, &reduce_op, 1) < 0) { + save(st, self, newargs, 0) < 0) + { + _PyErr_FormatNote("when serializing %T reconstructor", obj); Py_DECREF(newargs); Py_DECREF(callable); return -1; } Py_DECREF(newargs); Py_DECREF(callable); + if (_Pickler_Write(self, &reduce_op, 1) < 0) { + return -1; + } } } else if (use_newobj) { @@ -4173,6 +4222,7 @@ save_reduce(PickleState *st, PicklerObject *self, PyObject *args, /* Save the class and its __new__ arguments. */ if (save(st, self, cls, 0) < 0) { + _PyErr_FormatNote("when serializing %T class", obj); return -1; } @@ -4182,18 +4232,27 @@ save_reduce(PickleState *st, PicklerObject *self, PyObject *args, p = save(st, self, newargtup, 0); Py_DECREF(newargtup); - if (p < 0) + if (p < 0) { + _PyErr_FormatNote("when serializing %T __new__ arguments", obj); return -1; + } /* Add NEWOBJ opcode. */ if (_Pickler_Write(self, &newobj_op, 1) < 0) return -1; } else { /* Not using NEWOBJ. */ - if (save(st, self, callable, 0) < 0 || - save(st, self, argtup, 0) < 0 || - _Pickler_Write(self, &reduce_op, 1) < 0) + if (save(st, self, callable, 0) < 0) { + _PyErr_FormatNote("when serializing %T reconstructor", obj); return -1; + } + if (save(st, self, argtup, 0) < 0) { + _PyErr_FormatNote("when serializing %T reconstructor arguments", obj); + return -1; + } + if (_Pickler_Write(self, &reduce_op, 1) < 0) { + return -1; + } } /* obj can be NULL when save_reduce() is used directly. A NULL obj means @@ -4218,16 +4277,19 @@ save_reduce(PickleState *st, PicklerObject *self, PyObject *args, return -1; } - if (listitems && batch_list(st, self, listitems) < 0) + if (listitems && batch_list(st, self, listitems, obj) < 0) return -1; - if (dictitems && batch_dict(st, self, dictitems) < 0) + if (dictitems && batch_dict(st, self, dictitems, obj) < 0) return -1; if (state) { if (state_setter == NULL) { - if (save(st, self, state, 0) < 0 || - _Pickler_Write(self, &build_op, 1) < 0) + if (save(st, self, state, 0) < 0) { + _PyErr_FormatNote("when serializing %T state", obj); + return -1; + } + if (_Pickler_Write(self, &build_op, 1) < 0) return -1; } else { @@ -4243,9 +4305,18 @@ save_reduce(PickleState *st, PicklerObject *self, PyObject *args, const char tupletwo_op = TUPLE2; const char pop_op = POP; - if (save(st, self, state_setter, 0) < 0 || - save(st, self, obj, 0) < 0 || save(st, self, state, 0) < 0 || - _Pickler_Write(self, &tupletwo_op, 1) < 0 || + if (save(st, self, state_setter, 0) < 0) { + _PyErr_FormatNote("when serializing %T state setter", obj); + return -1; + } + if (save(st, self, obj, 0) < 0) { + return -1; + } + if (save(st, self, state, 0) < 0) { + _PyErr_FormatNote("when serializing %T state", obj); + return -1; + } + if (_Pickler_Write(self, &tupletwo_op, 1) < 0 || _Pickler_Write(self, &reduce_op, 1) < 0 || _Pickler_Write(self, &pop_op, 1) < 0) return -1; @@ -4453,10 +4524,14 @@ save(PickleState *st, PicklerObject *self, PyObject *obj, int pers_save) if (!PyTuple_Check(reduce_value)) { PyErr_Format(st->PicklingError, "__reduce__ must return a string or tuple, not %T", reduce_value); + _PyErr_FormatNote("when serializing %T object", obj); goto error; } status = save_reduce(st, self, reduce_value, obj); + if (status < 0) { + _PyErr_FormatNote("when serializing %T object", obj); + } if (0) { error: