gh-122163: Add notes for JSON serialization errors (GH-122165)

This allows to identify the source of the error.
This commit is contained in:
Serhiy Storchaka 2024-07-23 20:02:54 +03:00 committed by GitHub
parent c908d1f87d
commit e6b25e9a09
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 135 additions and 66 deletions

View File

@ -112,6 +112,13 @@ Added support for converting any objects that have the
:meth:`!as_integer_ratio` method to a :class:`~fractions.Fraction`.
(Contributed by Serhiy Storchaka in :gh:`82017`.)
json
----
Add notes for JSON serialization errors that allow to identify the source
of the error.
(Contributed by Serhiy Storchaka in :gh:`122163`.)
os
--

View File

@ -161,7 +161,8 @@ extern PyObject* _Py_Offer_Suggestions(PyObject* exception);
PyAPI_FUNC(Py_ssize_t) _Py_UTF8_Edit_Cost(PyObject *str_a, PyObject *str_b,
Py_ssize_t max_cost);
void _PyErr_FormatNote(const char *format, ...);
// Export for '_json' shared extension
PyAPI_FUNC(void) _PyErr_FormatNote(const char *format, ...);
/* Context manipulation (PEP 3134) */

View File

@ -293,37 +293,40 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
else:
newline_indent = None
separator = _item_separator
first = True
for value in lst:
if first:
first = False
else:
for i, value in enumerate(lst):
if i:
buf = separator
if isinstance(value, str):
yield buf + _encoder(value)
elif value is None:
yield buf + 'null'
elif value is True:
yield buf + 'true'
elif value is False:
yield buf + 'false'
elif isinstance(value, int):
# Subclasses of int/float may override __repr__, but we still
# want to encode them as integers/floats in JSON. One example
# within the standard library is IntEnum.
yield buf + _intstr(value)
elif isinstance(value, float):
# see comment above for int
yield buf + _floatstr(value)
else:
yield buf
if isinstance(value, (list, tuple)):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
try:
if isinstance(value, str):
yield buf + _encoder(value)
elif value is None:
yield buf + 'null'
elif value is True:
yield buf + 'true'
elif value is False:
yield buf + 'false'
elif isinstance(value, int):
# Subclasses of int/float may override __repr__, but we still
# want to encode them as integers/floats in JSON. One example
# within the standard library is IntEnum.
yield buf + _intstr(value)
elif isinstance(value, float):
# see comment above for int
yield buf + _floatstr(value)
else:
chunks = _iterencode(value, _current_indent_level)
yield from chunks
yield buf
if isinstance(value, (list, tuple)):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
else:
chunks = _iterencode(value, _current_indent_level)
yield from chunks
except GeneratorExit:
raise
except BaseException as exc:
exc.add_note(f'when serializing {type(lst).__name__} item {i}')
raise
if newline_indent is not None:
_current_indent_level -= 1
yield '\n' + _indent * _current_indent_level
@ -382,28 +385,34 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
yield item_separator
yield _encoder(key)
yield _key_separator
if isinstance(value, str):
yield _encoder(value)
elif value is None:
yield 'null'
elif value is True:
yield 'true'
elif value is False:
yield 'false'
elif isinstance(value, int):
# see comment for int/float in _make_iterencode
yield _intstr(value)
elif isinstance(value, float):
# see comment for int/float in _make_iterencode
yield _floatstr(value)
else:
if isinstance(value, (list, tuple)):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
try:
if isinstance(value, str):
yield _encoder(value)
elif value is None:
yield 'null'
elif value is True:
yield 'true'
elif value is False:
yield 'false'
elif isinstance(value, int):
# see comment for int/float in _make_iterencode
yield _intstr(value)
elif isinstance(value, float):
# see comment for int/float in _make_iterencode
yield _floatstr(value)
else:
chunks = _iterencode(value, _current_indent_level)
yield from chunks
if isinstance(value, (list, tuple)):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
else:
chunks = _iterencode(value, _current_indent_level)
yield from chunks
except GeneratorExit:
raise
except BaseException as exc:
exc.add_note(f'when serializing {type(dct).__name__} item {key!r}')
raise
if newline_indent is not None:
_current_indent_level -= 1
yield '\n' + _indent * _current_indent_level
@ -436,8 +445,14 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = o
o = _default(o)
yield from _iterencode(o, _current_indent_level)
newobj = _default(o)
try:
yield from _iterencode(newobj, _current_indent_level)
except GeneratorExit:
raise
except BaseException as exc:
exc.add_note(f'when serializing {type(o).__name__} object')
raise
if markers is not None:
del markers[markerid]
return _iterencode

View File

@ -8,6 +8,24 @@ class TestDefault:
self.dumps(type, default=repr),
self.dumps(repr(type)))
def test_bad_default(self):
def default(obj):
if obj is NotImplemented:
raise ValueError
if obj is ...:
return NotImplemented
if obj is type:
return collections
return [...]
with self.assertRaises(ValueError) as cm:
self.dumps(type, default=default)
self.assertEqual(cm.exception.__notes__,
['when serializing ellipsis object',
'when serializing list item 0',
'when serializing module object',
'when serializing type object'])
def test_ordereddict(self):
od = collections.OrderedDict(a=1, b=2, c=3, d=4)
od.move_to_end('b')

View File

@ -100,8 +100,27 @@ class TestFail:
def test_not_serializable(self):
import sys
with self.assertRaisesRegex(TypeError,
'Object of type module is not JSON serializable'):
'Object of type module is not JSON serializable') as cm:
self.dumps(sys)
self.assertFalse(hasattr(cm.exception, '__notes__'))
with self.assertRaises(TypeError) as cm:
self.dumps([1, [2, 3, sys]])
self.assertEqual(cm.exception.__notes__,
['when serializing list item 2',
'when serializing list item 1'])
with self.assertRaises(TypeError) as cm:
self.dumps((1, (2, 3, sys)))
self.assertEqual(cm.exception.__notes__,
['when serializing tuple item 2',
'when serializing tuple item 1'])
with self.assertRaises(TypeError) as cm:
self.dumps({'a': {'b': sys}})
self.assertEqual(cm.exception.__notes__,
["when serializing dict item 'b'",
"when serializing dict item 'a'"])
def test_truncated_input(self):
test_cases = [

View File

@ -12,8 +12,8 @@ class TestRecursion:
x.append(x)
try:
self.dumps(x)
except ValueError:
pass
except ValueError as exc:
self.assertEqual(exc.__notes__, ["when serializing list item 0"])
else:
self.fail("didn't raise ValueError on list recursion")
x = []
@ -21,8 +21,8 @@ class TestRecursion:
x.append(y)
try:
self.dumps(x)
except ValueError:
pass
except ValueError as exc:
self.assertEqual(exc.__notes__, ["when serializing list item 0"]*2)
else:
self.fail("didn't raise ValueError on alternating list recursion")
y = []
@ -35,8 +35,8 @@ class TestRecursion:
x["test"] = x
try:
self.dumps(x)
except ValueError:
pass
except ValueError as exc:
self.assertEqual(exc.__notes__, ["when serializing dict item 'test'"])
else:
self.fail("didn't raise ValueError on dict recursion")
x = {}
@ -60,8 +60,10 @@ class TestRecursion:
enc.recurse = True
try:
enc.encode(JSONTestObject)
except ValueError:
pass
except ValueError as exc:
self.assertEqual(exc.__notes__,
["when serializing list item 0",
"when serializing type object"])
else:
self.fail("didn't raise ValueError on default recursion")

View File

@ -0,0 +1,2 @@
Add notes for JSON serialization errors that allow to identify the source of
the error.

View File

@ -11,6 +11,7 @@
#include "Python.h"
#include "pycore_ceval.h" // _Py_EnterRecursiveCall()
#include "pycore_runtime.h" // _PyRuntime
#include "pycore_pyerrors.h" // _PyErr_FormatNote
#include "pycore_global_strings.h" // _Py_ID()
#include <stdbool.h> // bool
@ -1461,6 +1462,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
Py_DECREF(newobj);
if (rv) {
_PyErr_FormatNote("when serializing %T object", obj);
Py_XDECREF(ident);
return -1;
}
@ -1477,7 +1479,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
static int
encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first,
PyObject *key, PyObject *value,
PyObject *dct, PyObject *key, PyObject *value,
PyObject *newline_indent,
PyObject *item_separator)
{
@ -1535,6 +1537,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
return -1;
}
if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) {
_PyErr_FormatNote("when serializing %T item %R", dct, key);
return -1;
}
return 0;
@ -1606,7 +1609,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
key = PyTuple_GET_ITEM(item, 0);
value = PyTuple_GET_ITEM(item, 1);
if (encoder_encode_key_value(s, writer, &first, key, value,
if (encoder_encode_key_value(s, writer, &first, dct, key, value,
new_newline_indent,
current_item_separator) < 0)
goto bail;
@ -1616,7 +1619,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
} else {
Py_ssize_t pos = 0;
while (PyDict_Next(dct, &pos, &key, &value)) {
if (encoder_encode_key_value(s, writer, &first, key, value,
if (encoder_encode_key_value(s, writer, &first, dct, key, value,
new_newline_indent,
current_item_separator) < 0)
goto bail;
@ -1710,8 +1713,10 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
if (_PyUnicodeWriter_WriteStr(writer, separator) < 0)
goto bail;
}
if (encoder_listencode_obj(s, writer, obj, new_newline_indent))
if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) {
_PyErr_FormatNote("when serializing %T item %zd", seq, i);
goto bail;
}
}
if (ident != NULL) {
if (PyDict_DelItem(s->markers, ident))