gh-122163: Add notes for JSON serialization errors (GH-122165)

This allows to identify the source of the error.
This commit is contained in:
Serhiy Storchaka 2024-07-23 20:02:54 +03:00 committed by GitHub
parent c908d1f87d
commit e6b25e9a09
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 135 additions and 66 deletions

View File

@ -112,6 +112,13 @@ Added support for converting any objects that have the
:meth:`!as_integer_ratio` method to a :class:`~fractions.Fraction`. :meth:`!as_integer_ratio` method to a :class:`~fractions.Fraction`.
(Contributed by Serhiy Storchaka in :gh:`82017`.) (Contributed by Serhiy Storchaka in :gh:`82017`.)
json
----
Add notes for JSON serialization errors that allow to identify the source
of the error.
(Contributed by Serhiy Storchaka in :gh:`122163`.)
os os
-- --

View File

@ -161,7 +161,8 @@ extern PyObject* _Py_Offer_Suggestions(PyObject* exception);
PyAPI_FUNC(Py_ssize_t) _Py_UTF8_Edit_Cost(PyObject *str_a, PyObject *str_b, PyAPI_FUNC(Py_ssize_t) _Py_UTF8_Edit_Cost(PyObject *str_a, PyObject *str_b,
Py_ssize_t max_cost); Py_ssize_t max_cost);
void _PyErr_FormatNote(const char *format, ...); // Export for '_json' shared extension
PyAPI_FUNC(void) _PyErr_FormatNote(const char *format, ...);
/* Context manipulation (PEP 3134) */ /* Context manipulation (PEP 3134) */

View File

@ -293,37 +293,40 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
else: else:
newline_indent = None newline_indent = None
separator = _item_separator separator = _item_separator
first = True for i, value in enumerate(lst):
for value in lst: if i:
if first:
first = False
else:
buf = separator buf = separator
if isinstance(value, str): try:
yield buf + _encoder(value) if isinstance(value, str):
elif value is None: yield buf + _encoder(value)
yield buf + 'null' elif value is None:
elif value is True: yield buf + 'null'
yield buf + 'true' elif value is True:
elif value is False: yield buf + 'true'
yield buf + 'false' elif value is False:
elif isinstance(value, int): yield buf + 'false'
# Subclasses of int/float may override __repr__, but we still elif isinstance(value, int):
# want to encode them as integers/floats in JSON. One example # Subclasses of int/float may override __repr__, but we still
# within the standard library is IntEnum. # want to encode them as integers/floats in JSON. One example
yield buf + _intstr(value) # within the standard library is IntEnum.
elif isinstance(value, float): yield buf + _intstr(value)
# see comment above for int elif isinstance(value, float):
yield buf + _floatstr(value) # see comment above for int
else: yield buf + _floatstr(value)
yield buf
if isinstance(value, (list, tuple)):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
else: else:
chunks = _iterencode(value, _current_indent_level) yield buf
yield from chunks if isinstance(value, (list, tuple)):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
else:
chunks = _iterencode(value, _current_indent_level)
yield from chunks
except GeneratorExit:
raise
except BaseException as exc:
exc.add_note(f'when serializing {type(lst).__name__} item {i}')
raise
if newline_indent is not None: if newline_indent is not None:
_current_indent_level -= 1 _current_indent_level -= 1
yield '\n' + _indent * _current_indent_level yield '\n' + _indent * _current_indent_level
@ -382,28 +385,34 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
yield item_separator yield item_separator
yield _encoder(key) yield _encoder(key)
yield _key_separator yield _key_separator
if isinstance(value, str): try:
yield _encoder(value) if isinstance(value, str):
elif value is None: yield _encoder(value)
yield 'null' elif value is None:
elif value is True: yield 'null'
yield 'true' elif value is True:
elif value is False: yield 'true'
yield 'false' elif value is False:
elif isinstance(value, int): yield 'false'
# see comment for int/float in _make_iterencode elif isinstance(value, int):
yield _intstr(value) # see comment for int/float in _make_iterencode
elif isinstance(value, float): yield _intstr(value)
# see comment for int/float in _make_iterencode elif isinstance(value, float):
yield _floatstr(value) # see comment for int/float in _make_iterencode
else: yield _floatstr(value)
if isinstance(value, (list, tuple)):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
else: else:
chunks = _iterencode(value, _current_indent_level) if isinstance(value, (list, tuple)):
yield from chunks chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
else:
chunks = _iterencode(value, _current_indent_level)
yield from chunks
except GeneratorExit:
raise
except BaseException as exc:
exc.add_note(f'when serializing {type(dct).__name__} item {key!r}')
raise
if newline_indent is not None: if newline_indent is not None:
_current_indent_level -= 1 _current_indent_level -= 1
yield '\n' + _indent * _current_indent_level yield '\n' + _indent * _current_indent_level
@ -436,8 +445,14 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
if markerid in markers: if markerid in markers:
raise ValueError("Circular reference detected") raise ValueError("Circular reference detected")
markers[markerid] = o markers[markerid] = o
o = _default(o) newobj = _default(o)
yield from _iterencode(o, _current_indent_level) try:
yield from _iterencode(newobj, _current_indent_level)
except GeneratorExit:
raise
except BaseException as exc:
exc.add_note(f'when serializing {type(o).__name__} object')
raise
if markers is not None: if markers is not None:
del markers[markerid] del markers[markerid]
return _iterencode return _iterencode

View File

@ -8,6 +8,24 @@ class TestDefault:
self.dumps(type, default=repr), self.dumps(type, default=repr),
self.dumps(repr(type))) self.dumps(repr(type)))
def test_bad_default(self):
def default(obj):
if obj is NotImplemented:
raise ValueError
if obj is ...:
return NotImplemented
if obj is type:
return collections
return [...]
with self.assertRaises(ValueError) as cm:
self.dumps(type, default=default)
self.assertEqual(cm.exception.__notes__,
['when serializing ellipsis object',
'when serializing list item 0',
'when serializing module object',
'when serializing type object'])
def test_ordereddict(self): def test_ordereddict(self):
od = collections.OrderedDict(a=1, b=2, c=3, d=4) od = collections.OrderedDict(a=1, b=2, c=3, d=4)
od.move_to_end('b') od.move_to_end('b')

View File

@ -100,8 +100,27 @@ class TestFail:
def test_not_serializable(self): def test_not_serializable(self):
import sys import sys
with self.assertRaisesRegex(TypeError, with self.assertRaisesRegex(TypeError,
'Object of type module is not JSON serializable'): 'Object of type module is not JSON serializable') as cm:
self.dumps(sys) self.dumps(sys)
self.assertFalse(hasattr(cm.exception, '__notes__'))
with self.assertRaises(TypeError) as cm:
self.dumps([1, [2, 3, sys]])
self.assertEqual(cm.exception.__notes__,
['when serializing list item 2',
'when serializing list item 1'])
with self.assertRaises(TypeError) as cm:
self.dumps((1, (2, 3, sys)))
self.assertEqual(cm.exception.__notes__,
['when serializing tuple item 2',
'when serializing tuple item 1'])
with self.assertRaises(TypeError) as cm:
self.dumps({'a': {'b': sys}})
self.assertEqual(cm.exception.__notes__,
["when serializing dict item 'b'",
"when serializing dict item 'a'"])
def test_truncated_input(self): def test_truncated_input(self):
test_cases = [ test_cases = [

View File

@ -12,8 +12,8 @@ class TestRecursion:
x.append(x) x.append(x)
try: try:
self.dumps(x) self.dumps(x)
except ValueError: except ValueError as exc:
pass self.assertEqual(exc.__notes__, ["when serializing list item 0"])
else: else:
self.fail("didn't raise ValueError on list recursion") self.fail("didn't raise ValueError on list recursion")
x = [] x = []
@ -21,8 +21,8 @@ class TestRecursion:
x.append(y) x.append(y)
try: try:
self.dumps(x) self.dumps(x)
except ValueError: except ValueError as exc:
pass self.assertEqual(exc.__notes__, ["when serializing list item 0"]*2)
else: else:
self.fail("didn't raise ValueError on alternating list recursion") self.fail("didn't raise ValueError on alternating list recursion")
y = [] y = []
@ -35,8 +35,8 @@ class TestRecursion:
x["test"] = x x["test"] = x
try: try:
self.dumps(x) self.dumps(x)
except ValueError: except ValueError as exc:
pass self.assertEqual(exc.__notes__, ["when serializing dict item 'test'"])
else: else:
self.fail("didn't raise ValueError on dict recursion") self.fail("didn't raise ValueError on dict recursion")
x = {} x = {}
@ -60,8 +60,10 @@ class TestRecursion:
enc.recurse = True enc.recurse = True
try: try:
enc.encode(JSONTestObject) enc.encode(JSONTestObject)
except ValueError: except ValueError as exc:
pass self.assertEqual(exc.__notes__,
["when serializing list item 0",
"when serializing type object"])
else: else:
self.fail("didn't raise ValueError on default recursion") self.fail("didn't raise ValueError on default recursion")

View File

@ -0,0 +1,2 @@
Add notes for JSON serialization errors that allow to identify the source of
the error.

View File

@ -11,6 +11,7 @@
#include "Python.h" #include "Python.h"
#include "pycore_ceval.h" // _Py_EnterRecursiveCall() #include "pycore_ceval.h" // _Py_EnterRecursiveCall()
#include "pycore_runtime.h" // _PyRuntime #include "pycore_runtime.h" // _PyRuntime
#include "pycore_pyerrors.h" // _PyErr_FormatNote
#include "pycore_global_strings.h" // _Py_ID() #include "pycore_global_strings.h" // _Py_ID()
#include <stdbool.h> // bool #include <stdbool.h> // bool
@ -1461,6 +1462,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
Py_DECREF(newobj); Py_DECREF(newobj);
if (rv) { if (rv) {
_PyErr_FormatNote("when serializing %T object", obj);
Py_XDECREF(ident); Py_XDECREF(ident);
return -1; return -1;
} }
@ -1477,7 +1479,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
static int static int
encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first, encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first,
PyObject *key, PyObject *value, PyObject *dct, PyObject *key, PyObject *value,
PyObject *newline_indent, PyObject *newline_indent,
PyObject *item_separator) PyObject *item_separator)
{ {
@ -1535,6 +1537,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
return -1; return -1;
} }
if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) { if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) {
_PyErr_FormatNote("when serializing %T item %R", dct, key);
return -1; return -1;
} }
return 0; return 0;
@ -1606,7 +1609,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
key = PyTuple_GET_ITEM(item, 0); key = PyTuple_GET_ITEM(item, 0);
value = PyTuple_GET_ITEM(item, 1); value = PyTuple_GET_ITEM(item, 1);
if (encoder_encode_key_value(s, writer, &first, key, value, if (encoder_encode_key_value(s, writer, &first, dct, key, value,
new_newline_indent, new_newline_indent,
current_item_separator) < 0) current_item_separator) < 0)
goto bail; goto bail;
@ -1616,7 +1619,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
} else { } else {
Py_ssize_t pos = 0; Py_ssize_t pos = 0;
while (PyDict_Next(dct, &pos, &key, &value)) { while (PyDict_Next(dct, &pos, &key, &value)) {
if (encoder_encode_key_value(s, writer, &first, key, value, if (encoder_encode_key_value(s, writer, &first, dct, key, value,
new_newline_indent, new_newline_indent,
current_item_separator) < 0) current_item_separator) < 0)
goto bail; goto bail;
@ -1710,8 +1713,10 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
if (_PyUnicodeWriter_WriteStr(writer, separator) < 0) if (_PyUnicodeWriter_WriteStr(writer, separator) < 0)
goto bail; goto bail;
} }
if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) {
_PyErr_FormatNote("when serializing %T item %zd", seq, i);
goto bail; goto bail;
}
} }
if (ident != NULL) { if (ident != NULL) {
if (PyDict_DelItem(s->markers, ident)) if (PyDict_DelItem(s->markers, ident))