mirror of https://github.com/python/cpython
gh-95382: Improve performance of json encoder with indent (GH-118105)
This commit is contained in:
parent
7758be4318
commit
05adfbba2a
|
@ -244,15 +244,18 @@ class JSONEncoder(object):
|
|||
return text
|
||||
|
||||
|
||||
if (_one_shot and c_make_encoder is not None
|
||||
and self.indent is None):
|
||||
if self.indent is None or isinstance(self.indent, str):
|
||||
indent = self.indent
|
||||
else:
|
||||
indent = ' ' * self.indent
|
||||
if _one_shot and c_make_encoder is not None:
|
||||
_iterencode = c_make_encoder(
|
||||
markers, self.default, _encoder, self.indent,
|
||||
markers, self.default, _encoder, indent,
|
||||
self.key_separator, self.item_separator, self.sort_keys,
|
||||
self.skipkeys, self.allow_nan)
|
||||
else:
|
||||
_iterencode = _make_iterencode(
|
||||
markers, self.default, _encoder, self.indent, floatstr,
|
||||
markers, self.default, _encoder, indent, floatstr,
|
||||
self.key_separator, self.item_separator, self.sort_keys,
|
||||
self.skipkeys, _one_shot)
|
||||
return _iterencode(o, 0)
|
||||
|
@ -272,9 +275,6 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
|
|||
_intstr=int.__repr__,
|
||||
):
|
||||
|
||||
if _indent is not None and not isinstance(_indent, str):
|
||||
_indent = ' ' * _indent
|
||||
|
||||
def _iterencode_list(lst, _current_indent_level):
|
||||
if not lst:
|
||||
yield '[]'
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Improve performance of :func:`json.dumps` and :func:`json.dump` when using the argument *indent*. Depending on the data the encoding using
|
||||
:func:`json.dumps` with *indent* can be up to 2 to 3 times faster.
|
136
Modules/_json.c
136
Modules/_json.c
|
@ -85,11 +85,11 @@ encoder_dealloc(PyObject *self);
|
|||
static int
|
||||
encoder_clear(PyEncoderObject *self);
|
||||
static int
|
||||
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, Py_ssize_t indent_level);
|
||||
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent);
|
||||
static int
|
||||
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, Py_ssize_t indent_level);
|
||||
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent);
|
||||
static int
|
||||
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, Py_ssize_t indent_level);
|
||||
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent);
|
||||
static PyObject *
|
||||
_encoded_const(PyObject *obj);
|
||||
static void
|
||||
|
@ -1251,6 +1251,17 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
|||
return (PyObject *)s;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
_create_newline_indent(PyObject *indent, Py_ssize_t indent_level)
|
||||
{
|
||||
PyObject *newline_indent = PyUnicode_FromOrdinal('\n');
|
||||
if (newline_indent != NULL && indent_level) {
|
||||
PyUnicode_AppendAndDel(&newline_indent,
|
||||
PySequence_Repeat(indent, indent_level));
|
||||
}
|
||||
return newline_indent;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
|
@ -1267,10 +1278,20 @@ encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
|
|||
_PyUnicodeWriter_Init(&writer);
|
||||
writer.overallocate = 1;
|
||||
|
||||
if (encoder_listencode_obj(self, &writer, obj, indent_level)) {
|
||||
PyObject *newline_indent = NULL;
|
||||
if (self->indent != Py_None) {
|
||||
newline_indent = _create_newline_indent(self->indent, indent_level);
|
||||
if (newline_indent == NULL) {
|
||||
_PyUnicodeWriter_Dealloc(&writer);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
if (encoder_listencode_obj(self, &writer, obj, newline_indent)) {
|
||||
_PyUnicodeWriter_Dealloc(&writer);
|
||||
Py_XDECREF(newline_indent);
|
||||
return NULL;
|
||||
}
|
||||
Py_XDECREF(newline_indent);
|
||||
|
||||
result = PyTuple_New(1);
|
||||
if (result == NULL ||
|
||||
|
@ -1358,7 +1379,7 @@ _steal_accumulate(_PyUnicodeWriter *writer, PyObject *stolen)
|
|||
|
||||
static int
|
||||
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
|
||||
PyObject *obj, Py_ssize_t indent_level)
|
||||
PyObject *obj, PyObject *newline_indent)
|
||||
{
|
||||
/* Encode Python object obj to a JSON term */
|
||||
PyObject *newobj;
|
||||
|
@ -1394,14 +1415,14 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
|
|||
else if (PyList_Check(obj) || PyTuple_Check(obj)) {
|
||||
if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
|
||||
return -1;
|
||||
rv = encoder_listencode_list(s, writer, obj, indent_level);
|
||||
rv = encoder_listencode_list(s, writer, obj, newline_indent);
|
||||
_Py_LeaveRecursiveCall();
|
||||
return rv;
|
||||
}
|
||||
else if (PyDict_Check(obj)) {
|
||||
if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
|
||||
return -1;
|
||||
rv = encoder_listencode_dict(s, writer, obj, indent_level);
|
||||
rv = encoder_listencode_dict(s, writer, obj, newline_indent);
|
||||
_Py_LeaveRecursiveCall();
|
||||
return rv;
|
||||
}
|
||||
|
@ -1435,7 +1456,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
|
|||
Py_XDECREF(ident);
|
||||
return -1;
|
||||
}
|
||||
rv = encoder_listencode_obj(s, writer, newobj, indent_level);
|
||||
rv = encoder_listencode_obj(s, writer, newobj, newline_indent);
|
||||
_Py_LeaveRecursiveCall();
|
||||
|
||||
Py_DECREF(newobj);
|
||||
|
@ -1456,7 +1477,9 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
|
|||
|
||||
static int
|
||||
encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first,
|
||||
PyObject *key, PyObject *value, Py_ssize_t indent_level)
|
||||
PyObject *key, PyObject *value,
|
||||
PyObject *newline_indent,
|
||||
PyObject *item_separator)
|
||||
{
|
||||
PyObject *keystr = NULL;
|
||||
PyObject *encoded;
|
||||
|
@ -1493,7 +1516,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
|
|||
*first = false;
|
||||
}
|
||||
else {
|
||||
if (_PyUnicodeWriter_WriteStr(writer, s->item_separator) < 0) {
|
||||
if (_PyUnicodeWriter_WriteStr(writer, item_separator) < 0) {
|
||||
Py_DECREF(keystr);
|
||||
return -1;
|
||||
}
|
||||
|
@ -1511,7 +1534,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
|
|||
if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
|
||||
return -1;
|
||||
}
|
||||
if (encoder_listencode_obj(s, writer, value, indent_level) < 0) {
|
||||
if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) {
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
|
@ -1519,13 +1542,15 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
|
|||
|
||||
static int
|
||||
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
|
||||
PyObject *dct, Py_ssize_t indent_level)
|
||||
PyObject *dct, PyObject *newline_indent)
|
||||
{
|
||||
/* Encode Python dict dct a JSON term */
|
||||
PyObject *ident = NULL;
|
||||
PyObject *items = NULL;
|
||||
PyObject *key, *value;
|
||||
bool first = true;
|
||||
PyObject *new_newline_indent = NULL;
|
||||
PyObject *separator_indent = NULL;
|
||||
|
||||
if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
|
||||
return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2);
|
||||
|
@ -1549,14 +1574,21 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
|
|||
if (_PyUnicodeWriter_WriteChar(writer, '{'))
|
||||
goto bail;
|
||||
|
||||
PyObject *current_item_separator = s->item_separator; // borrowed reference
|
||||
if (s->indent != Py_None) {
|
||||
/* TODO: DOES NOT RUN */
|
||||
indent_level += 1;
|
||||
/*
|
||||
newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
|
||||
separator = _item_separator + newline_indent
|
||||
buf += newline_indent
|
||||
*/
|
||||
new_newline_indent = PyUnicode_Concat(newline_indent, s->indent);
|
||||
if (new_newline_indent == NULL) {
|
||||
goto bail;
|
||||
}
|
||||
separator_indent = PyUnicode_Concat(current_item_separator, new_newline_indent);
|
||||
if (separator_indent == NULL) {
|
||||
goto bail;
|
||||
}
|
||||
// update item separator with a borrowed reference
|
||||
current_item_separator = separator_indent;
|
||||
if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
|
||||
goto bail;
|
||||
}
|
||||
}
|
||||
|
||||
if (s->sort_keys || !PyDict_CheckExact(dct)) {
|
||||
|
@ -1574,7 +1606,9 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
|
|||
|
||||
key = PyTuple_GET_ITEM(item, 0);
|
||||
value = PyTuple_GET_ITEM(item, 1);
|
||||
if (encoder_encode_key_value(s, writer, &first, key, value, indent_level) < 0)
|
||||
if (encoder_encode_key_value(s, writer, &first, key, value,
|
||||
new_newline_indent,
|
||||
current_item_separator) < 0)
|
||||
goto bail;
|
||||
}
|
||||
Py_CLEAR(items);
|
||||
|
@ -1582,7 +1616,9 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
|
|||
} else {
|
||||
Py_ssize_t pos = 0;
|
||||
while (PyDict_Next(dct, &pos, &key, &value)) {
|
||||
if (encoder_encode_key_value(s, writer, &first, key, value, indent_level) < 0)
|
||||
if (encoder_encode_key_value(s, writer, &first, key, value,
|
||||
new_newline_indent,
|
||||
current_item_separator) < 0)
|
||||
goto bail;
|
||||
}
|
||||
}
|
||||
|
@ -1592,12 +1628,15 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
|
|||
goto bail;
|
||||
Py_CLEAR(ident);
|
||||
}
|
||||
/* TODO DOES NOT RUN; dead code
|
||||
if (s->indent != Py_None) {
|
||||
indent_level -= 1;
|
||||
Py_CLEAR(new_newline_indent);
|
||||
Py_CLEAR(separator_indent);
|
||||
|
||||
if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
|
||||
goto bail;
|
||||
}
|
||||
}
|
||||
|
||||
yield '\n' + (' ' * (_indent * _current_indent_level))
|
||||
}*/
|
||||
if (_PyUnicodeWriter_WriteChar(writer, '}'))
|
||||
goto bail;
|
||||
return 0;
|
||||
|
@ -1605,16 +1644,20 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
|
|||
bail:
|
||||
Py_XDECREF(items);
|
||||
Py_XDECREF(ident);
|
||||
Py_XDECREF(separator_indent);
|
||||
Py_XDECREF(new_newline_indent);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int
|
||||
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
|
||||
PyObject *seq, Py_ssize_t indent_level)
|
||||
PyObject *seq, PyObject *newline_indent)
|
||||
{
|
||||
PyObject *ident = NULL;
|
||||
PyObject *s_fast = NULL;
|
||||
Py_ssize_t i;
|
||||
PyObject *new_newline_indent = NULL;
|
||||
PyObject *separator_indent = NULL;
|
||||
|
||||
ident = NULL;
|
||||
s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
|
||||
|
@ -1643,22 +1686,31 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
|
|||
|
||||
if (_PyUnicodeWriter_WriteChar(writer, '['))
|
||||
goto bail;
|
||||
|
||||
PyObject *separator = s->item_separator; // borrowed reference
|
||||
if (s->indent != Py_None) {
|
||||
/* TODO: DOES NOT RUN */
|
||||
indent_level += 1;
|
||||
/*
|
||||
newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
|
||||
separator = _item_separator + newline_indent
|
||||
buf += newline_indent
|
||||
*/
|
||||
new_newline_indent = PyUnicode_Concat(newline_indent, s->indent);
|
||||
if (new_newline_indent == NULL) {
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
|
||||
goto bail;
|
||||
}
|
||||
|
||||
separator_indent = PyUnicode_Concat(separator, new_newline_indent);
|
||||
if (separator_indent == NULL) {
|
||||
goto bail;
|
||||
}
|
||||
separator = separator_indent; // assign separator with borrowed reference
|
||||
}
|
||||
for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
|
||||
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
|
||||
if (i) {
|
||||
if (_PyUnicodeWriter_WriteStr(writer, s->item_separator))
|
||||
if (_PyUnicodeWriter_WriteStr(writer, separator) < 0)
|
||||
goto bail;
|
||||
}
|
||||
if (encoder_listencode_obj(s, writer, obj, indent_level))
|
||||
if (encoder_listencode_obj(s, writer, obj, new_newline_indent))
|
||||
goto bail;
|
||||
}
|
||||
if (ident != NULL) {
|
||||
|
@ -1667,12 +1719,14 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
|
|||
Py_CLEAR(ident);
|
||||
}
|
||||
|
||||
/* TODO: DOES NOT RUN
|
||||
if (s->indent != Py_None) {
|
||||
indent_level -= 1;
|
||||
Py_CLEAR(new_newline_indent);
|
||||
Py_CLEAR(separator_indent);
|
||||
if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
|
||||
goto bail;
|
||||
}
|
||||
}
|
||||
|
||||
yield '\n' + (' ' * (_indent * _current_indent_level))
|
||||
}*/
|
||||
if (_PyUnicodeWriter_WriteChar(writer, ']'))
|
||||
goto bail;
|
||||
Py_DECREF(s_fast);
|
||||
|
@ -1681,6 +1735,8 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
|
|||
bail:
|
||||
Py_XDECREF(ident);
|
||||
Py_DECREF(s_fast);
|
||||
Py_XDECREF(separator_indent);
|
||||
Py_XDECREF(new_newline_indent);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -1721,7 +1777,7 @@ encoder_clear(PyEncoderObject *self)
|
|||
return 0;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
|
||||
PyDoc_STRVAR(encoder_doc, "Encoder(markers, default, encoder, indent, key_separator, item_separator, sort_keys, skipkeys, allow_nan)");
|
||||
|
||||
static PyType_Slot PyEncoderType_slots[] = {
|
||||
{Py_tp_doc, (void *)encoder_doc},
|
||||
|
|
Loading…
Reference in New Issue