gh-95382: Improve performance of json encoder with indent (GH-118105)

This commit is contained in:
Pieter Eendebak 2024-05-06 10:04:39 +02:00 committed by GitHub
parent 7758be4318
commit 05adfbba2a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 105 additions and 47 deletions

View File

@ -244,15 +244,18 @@ class JSONEncoder(object):
return text return text
if (_one_shot and c_make_encoder is not None if self.indent is None or isinstance(self.indent, str):
and self.indent is None): indent = self.indent
else:
indent = ' ' * self.indent
if _one_shot and c_make_encoder is not None:
_iterencode = c_make_encoder( _iterencode = c_make_encoder(
markers, self.default, _encoder, self.indent, markers, self.default, _encoder, indent,
self.key_separator, self.item_separator, self.sort_keys, self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, self.allow_nan) self.skipkeys, self.allow_nan)
else: else:
_iterencode = _make_iterencode( _iterencode = _make_iterencode(
markers, self.default, _encoder, self.indent, floatstr, markers, self.default, _encoder, indent, floatstr,
self.key_separator, self.item_separator, self.sort_keys, self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, _one_shot) self.skipkeys, _one_shot)
return _iterencode(o, 0) return _iterencode(o, 0)
@ -272,9 +275,6 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
_intstr=int.__repr__, _intstr=int.__repr__,
): ):
if _indent is not None and not isinstance(_indent, str):
_indent = ' ' * _indent
def _iterencode_list(lst, _current_indent_level): def _iterencode_list(lst, _current_indent_level):
if not lst: if not lst:
yield '[]' yield '[]'

View File

@ -0,0 +1,2 @@
Improve performance of :func:`json.dumps` and :func:`json.dump` when using the argument *indent*. Depending on the data the encoding using
:func:`json.dumps` with *indent* can be up to 2 to 3 times faster.

View File

@ -85,11 +85,11 @@ encoder_dealloc(PyObject *self);
static int static int
encoder_clear(PyEncoderObject *self); encoder_clear(PyEncoderObject *self);
static int static int
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, Py_ssize_t indent_level); encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent);
static int static int
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, Py_ssize_t indent_level); encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent);
static int static int
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, Py_ssize_t indent_level); encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent);
static PyObject * static PyObject *
_encoded_const(PyObject *obj); _encoded_const(PyObject *obj);
static void static void
@ -1251,6 +1251,17 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return (PyObject *)s; return (PyObject *)s;
} }
static PyObject *
_create_newline_indent(PyObject *indent, Py_ssize_t indent_level)
{
PyObject *newline_indent = PyUnicode_FromOrdinal('\n');
if (newline_indent != NULL && indent_level) {
PyUnicode_AppendAndDel(&newline_indent,
PySequence_Repeat(indent, indent_level));
}
return newline_indent;
}
static PyObject * static PyObject *
encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds) encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
{ {
@ -1267,10 +1278,20 @@ encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
_PyUnicodeWriter_Init(&writer); _PyUnicodeWriter_Init(&writer);
writer.overallocate = 1; writer.overallocate = 1;
if (encoder_listencode_obj(self, &writer, obj, indent_level)) { PyObject *newline_indent = NULL;
if (self->indent != Py_None) {
newline_indent = _create_newline_indent(self->indent, indent_level);
if (newline_indent == NULL) {
_PyUnicodeWriter_Dealloc(&writer);
return NULL;
}
}
if (encoder_listencode_obj(self, &writer, obj, newline_indent)) {
_PyUnicodeWriter_Dealloc(&writer); _PyUnicodeWriter_Dealloc(&writer);
Py_XDECREF(newline_indent);
return NULL; return NULL;
} }
Py_XDECREF(newline_indent);
result = PyTuple_New(1); result = PyTuple_New(1);
if (result == NULL || if (result == NULL ||
@ -1358,7 +1379,7 @@ _steal_accumulate(_PyUnicodeWriter *writer, PyObject *stolen)
static int static int
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
PyObject *obj, Py_ssize_t indent_level) PyObject *obj, PyObject *newline_indent)
{ {
/* Encode Python object obj to a JSON term */ /* Encode Python object obj to a JSON term */
PyObject *newobj; PyObject *newobj;
@ -1394,14 +1415,14 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
else if (PyList_Check(obj) || PyTuple_Check(obj)) { else if (PyList_Check(obj) || PyTuple_Check(obj)) {
if (_Py_EnterRecursiveCall(" while encoding a JSON object")) if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
return -1; return -1;
rv = encoder_listencode_list(s, writer, obj, indent_level); rv = encoder_listencode_list(s, writer, obj, newline_indent);
_Py_LeaveRecursiveCall(); _Py_LeaveRecursiveCall();
return rv; return rv;
} }
else if (PyDict_Check(obj)) { else if (PyDict_Check(obj)) {
if (_Py_EnterRecursiveCall(" while encoding a JSON object")) if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
return -1; return -1;
rv = encoder_listencode_dict(s, writer, obj, indent_level); rv = encoder_listencode_dict(s, writer, obj, newline_indent);
_Py_LeaveRecursiveCall(); _Py_LeaveRecursiveCall();
return rv; return rv;
} }
@ -1435,7 +1456,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
Py_XDECREF(ident); Py_XDECREF(ident);
return -1; return -1;
} }
rv = encoder_listencode_obj(s, writer, newobj, indent_level); rv = encoder_listencode_obj(s, writer, newobj, newline_indent);
_Py_LeaveRecursiveCall(); _Py_LeaveRecursiveCall();
Py_DECREF(newobj); Py_DECREF(newobj);
@ -1456,7 +1477,9 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
static int static int
encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first, encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first,
PyObject *key, PyObject *value, Py_ssize_t indent_level) PyObject *key, PyObject *value,
PyObject *newline_indent,
PyObject *item_separator)
{ {
PyObject *keystr = NULL; PyObject *keystr = NULL;
PyObject *encoded; PyObject *encoded;
@ -1493,7 +1516,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
*first = false; *first = false;
} }
else { else {
if (_PyUnicodeWriter_WriteStr(writer, s->item_separator) < 0) { if (_PyUnicodeWriter_WriteStr(writer, item_separator) < 0) {
Py_DECREF(keystr); Py_DECREF(keystr);
return -1; return -1;
} }
@ -1511,7 +1534,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) { if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
return -1; return -1;
} }
if (encoder_listencode_obj(s, writer, value, indent_level) < 0) { if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) {
return -1; return -1;
} }
return 0; return 0;
@ -1519,13 +1542,15 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
static int static int
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
PyObject *dct, Py_ssize_t indent_level) PyObject *dct, PyObject *newline_indent)
{ {
/* Encode Python dict dct a JSON term */ /* Encode Python dict dct a JSON term */
PyObject *ident = NULL; PyObject *ident = NULL;
PyObject *items = NULL; PyObject *items = NULL;
PyObject *key, *value; PyObject *key, *value;
bool first = true; bool first = true;
PyObject *new_newline_indent = NULL;
PyObject *separator_indent = NULL;
if (PyDict_GET_SIZE(dct) == 0) /* Fast path */ if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2); return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2);
@ -1549,14 +1574,21 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
if (_PyUnicodeWriter_WriteChar(writer, '{')) if (_PyUnicodeWriter_WriteChar(writer, '{'))
goto bail; goto bail;
PyObject *current_item_separator = s->item_separator; // borrowed reference
if (s->indent != Py_None) { if (s->indent != Py_None) {
/* TODO: DOES NOT RUN */ new_newline_indent = PyUnicode_Concat(newline_indent, s->indent);
indent_level += 1; if (new_newline_indent == NULL) {
/* goto bail;
newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) }
separator = _item_separator + newline_indent separator_indent = PyUnicode_Concat(current_item_separator, new_newline_indent);
buf += newline_indent if (separator_indent == NULL) {
*/ goto bail;
}
// update item separator with a borrowed reference
current_item_separator = separator_indent;
if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
goto bail;
}
} }
if (s->sort_keys || !PyDict_CheckExact(dct)) { if (s->sort_keys || !PyDict_CheckExact(dct)) {
@ -1574,7 +1606,9 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
key = PyTuple_GET_ITEM(item, 0); key = PyTuple_GET_ITEM(item, 0);
value = PyTuple_GET_ITEM(item, 1); value = PyTuple_GET_ITEM(item, 1);
if (encoder_encode_key_value(s, writer, &first, key, value, indent_level) < 0) if (encoder_encode_key_value(s, writer, &first, key, value,
new_newline_indent,
current_item_separator) < 0)
goto bail; goto bail;
} }
Py_CLEAR(items); Py_CLEAR(items);
@ -1582,7 +1616,9 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
} else { } else {
Py_ssize_t pos = 0; Py_ssize_t pos = 0;
while (PyDict_Next(dct, &pos, &key, &value)) { while (PyDict_Next(dct, &pos, &key, &value)) {
if (encoder_encode_key_value(s, writer, &first, key, value, indent_level) < 0) if (encoder_encode_key_value(s, writer, &first, key, value,
new_newline_indent,
current_item_separator) < 0)
goto bail; goto bail;
} }
} }
@ -1592,12 +1628,15 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
goto bail; goto bail;
Py_CLEAR(ident); Py_CLEAR(ident);
} }
/* TODO DOES NOT RUN; dead code
if (s->indent != Py_None) { if (s->indent != Py_None) {
indent_level -= 1; Py_CLEAR(new_newline_indent);
Py_CLEAR(separator_indent);
if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
goto bail;
}
}
yield '\n' + (' ' * (_indent * _current_indent_level))
}*/
if (_PyUnicodeWriter_WriteChar(writer, '}')) if (_PyUnicodeWriter_WriteChar(writer, '}'))
goto bail; goto bail;
return 0; return 0;
@ -1605,16 +1644,20 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
bail: bail:
Py_XDECREF(items); Py_XDECREF(items);
Py_XDECREF(ident); Py_XDECREF(ident);
Py_XDECREF(separator_indent);
Py_XDECREF(new_newline_indent);
return -1; return -1;
} }
static int static int
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
PyObject *seq, Py_ssize_t indent_level) PyObject *seq, PyObject *newline_indent)
{ {
PyObject *ident = NULL; PyObject *ident = NULL;
PyObject *s_fast = NULL; PyObject *s_fast = NULL;
Py_ssize_t i; Py_ssize_t i;
PyObject *new_newline_indent = NULL;
PyObject *separator_indent = NULL;
ident = NULL; ident = NULL;
s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence"); s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
@ -1643,22 +1686,31 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
if (_PyUnicodeWriter_WriteChar(writer, '[')) if (_PyUnicodeWriter_WriteChar(writer, '['))
goto bail; goto bail;
PyObject *separator = s->item_separator; // borrowed reference
if (s->indent != Py_None) { if (s->indent != Py_None) {
/* TODO: DOES NOT RUN */ new_newline_indent = PyUnicode_Concat(newline_indent, s->indent);
indent_level += 1; if (new_newline_indent == NULL) {
/* goto bail;
newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) }
separator = _item_separator + newline_indent
buf += newline_indent if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
*/ goto bail;
}
separator_indent = PyUnicode_Concat(separator, new_newline_indent);
if (separator_indent == NULL) {
goto bail;
}
separator = separator_indent; // assign separator with borrowed reference
} }
for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) { for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i); PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
if (i) { if (i) {
if (_PyUnicodeWriter_WriteStr(writer, s->item_separator)) if (_PyUnicodeWriter_WriteStr(writer, separator) < 0)
goto bail; goto bail;
} }
if (encoder_listencode_obj(s, writer, obj, indent_level)) if (encoder_listencode_obj(s, writer, obj, new_newline_indent))
goto bail; goto bail;
} }
if (ident != NULL) { if (ident != NULL) {
@ -1667,12 +1719,14 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
Py_CLEAR(ident); Py_CLEAR(ident);
} }
/* TODO: DOES NOT RUN
if (s->indent != Py_None) { if (s->indent != Py_None) {
indent_level -= 1; Py_CLEAR(new_newline_indent);
Py_CLEAR(separator_indent);
if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
goto bail;
}
}
yield '\n' + (' ' * (_indent * _current_indent_level))
}*/
if (_PyUnicodeWriter_WriteChar(writer, ']')) if (_PyUnicodeWriter_WriteChar(writer, ']'))
goto bail; goto bail;
Py_DECREF(s_fast); Py_DECREF(s_fast);
@ -1681,6 +1735,8 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
bail: bail:
Py_XDECREF(ident); Py_XDECREF(ident);
Py_DECREF(s_fast); Py_DECREF(s_fast);
Py_XDECREF(separator_indent);
Py_XDECREF(new_newline_indent);
return -1; return -1;
} }
@ -1721,7 +1777,7 @@ encoder_clear(PyEncoderObject *self)
return 0; return 0;
} }
PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable"); PyDoc_STRVAR(encoder_doc, "Encoder(markers, default, encoder, indent, key_separator, item_separator, sort_keys, skipkeys, allow_nan)");
static PyType_Slot PyEncoderType_slots[] = { static PyType_Slot PyEncoderType_slots[] = {
{Py_tp_doc, (void *)encoder_doc}, {Py_tp_doc, (void *)encoder_doc},