gh-95382: Improve performance of json encoder with indent (GH-118105)

This commit is contained in:
Pieter Eendebak 2024-05-06 10:04:39 +02:00 committed by GitHub
parent 7758be4318
commit 05adfbba2a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 105 additions and 47 deletions

View File

@ -244,15 +244,18 @@ class JSONEncoder(object):
return text
if (_one_shot and c_make_encoder is not None
and self.indent is None):
if self.indent is None or isinstance(self.indent, str):
indent = self.indent
else:
indent = ' ' * self.indent
if _one_shot and c_make_encoder is not None:
_iterencode = c_make_encoder(
markers, self.default, _encoder, self.indent,
markers, self.default, _encoder, indent,
self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, self.allow_nan)
else:
_iterencode = _make_iterencode(
markers, self.default, _encoder, self.indent, floatstr,
markers, self.default, _encoder, indent, floatstr,
self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, _one_shot)
return _iterencode(o, 0)
@ -272,9 +275,6 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
_intstr=int.__repr__,
):
if _indent is not None and not isinstance(_indent, str):
_indent = ' ' * _indent
def _iterencode_list(lst, _current_indent_level):
if not lst:
yield '[]'

View File

@ -0,0 +1,2 @@
Improve performance of :func:`json.dumps` and :func:`json.dump` when using the argument *indent*. Depending on the data the encoding using
:func:`json.dumps` with *indent* can be up to 2 to 3 times faster.

View File

@ -85,11 +85,11 @@ encoder_dealloc(PyObject *self);
static int
encoder_clear(PyEncoderObject *self);
static int
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, Py_ssize_t indent_level);
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent);
static int
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, Py_ssize_t indent_level);
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent);
static int
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, Py_ssize_t indent_level);
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent);
static PyObject *
_encoded_const(PyObject *obj);
static void
@ -1251,6 +1251,17 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return (PyObject *)s;
}
static PyObject *
_create_newline_indent(PyObject *indent, Py_ssize_t indent_level)
{
PyObject *newline_indent = PyUnicode_FromOrdinal('\n');
if (newline_indent != NULL && indent_level) {
PyUnicode_AppendAndDel(&newline_indent,
PySequence_Repeat(indent, indent_level));
}
return newline_indent;
}
static PyObject *
encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
{
@ -1267,10 +1278,20 @@ encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
_PyUnicodeWriter_Init(&writer);
writer.overallocate = 1;
if (encoder_listencode_obj(self, &writer, obj, indent_level)) {
PyObject *newline_indent = NULL;
if (self->indent != Py_None) {
newline_indent = _create_newline_indent(self->indent, indent_level);
if (newline_indent == NULL) {
_PyUnicodeWriter_Dealloc(&writer);
return NULL;
}
}
if (encoder_listencode_obj(self, &writer, obj, newline_indent)) {
_PyUnicodeWriter_Dealloc(&writer);
Py_XDECREF(newline_indent);
return NULL;
}
Py_XDECREF(newline_indent);
result = PyTuple_New(1);
if (result == NULL ||
@ -1358,7 +1379,7 @@ _steal_accumulate(_PyUnicodeWriter *writer, PyObject *stolen)
static int
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
PyObject *obj, Py_ssize_t indent_level)
PyObject *obj, PyObject *newline_indent)
{
/* Encode Python object obj to a JSON term */
PyObject *newobj;
@ -1394,14 +1415,14 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
else if (PyList_Check(obj) || PyTuple_Check(obj)) {
if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
return -1;
rv = encoder_listencode_list(s, writer, obj, indent_level);
rv = encoder_listencode_list(s, writer, obj, newline_indent);
_Py_LeaveRecursiveCall();
return rv;
}
else if (PyDict_Check(obj)) {
if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
return -1;
rv = encoder_listencode_dict(s, writer, obj, indent_level);
rv = encoder_listencode_dict(s, writer, obj, newline_indent);
_Py_LeaveRecursiveCall();
return rv;
}
@ -1435,7 +1456,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
Py_XDECREF(ident);
return -1;
}
rv = encoder_listencode_obj(s, writer, newobj, indent_level);
rv = encoder_listencode_obj(s, writer, newobj, newline_indent);
_Py_LeaveRecursiveCall();
Py_DECREF(newobj);
@ -1456,7 +1477,9 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
static int
encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first,
PyObject *key, PyObject *value, Py_ssize_t indent_level)
PyObject *key, PyObject *value,
PyObject *newline_indent,
PyObject *item_separator)
{
PyObject *keystr = NULL;
PyObject *encoded;
@ -1493,7 +1516,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
*first = false;
}
else {
if (_PyUnicodeWriter_WriteStr(writer, s->item_separator) < 0) {
if (_PyUnicodeWriter_WriteStr(writer, item_separator) < 0) {
Py_DECREF(keystr);
return -1;
}
@ -1511,7 +1534,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
return -1;
}
if (encoder_listencode_obj(s, writer, value, indent_level) < 0) {
if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) {
return -1;
}
return 0;
@ -1519,13 +1542,15 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
static int
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
PyObject *dct, Py_ssize_t indent_level)
PyObject *dct, PyObject *newline_indent)
{
/* Encode Python dict dct a JSON term */
PyObject *ident = NULL;
PyObject *items = NULL;
PyObject *key, *value;
bool first = true;
PyObject *new_newline_indent = NULL;
PyObject *separator_indent = NULL;
if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2);
@ -1549,14 +1574,21 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
if (_PyUnicodeWriter_WriteChar(writer, '{'))
goto bail;
PyObject *current_item_separator = s->item_separator; // borrowed reference
if (s->indent != Py_None) {
/* TODO: DOES NOT RUN */
indent_level += 1;
/*
newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
separator = _item_separator + newline_indent
buf += newline_indent
*/
new_newline_indent = PyUnicode_Concat(newline_indent, s->indent);
if (new_newline_indent == NULL) {
goto bail;
}
separator_indent = PyUnicode_Concat(current_item_separator, new_newline_indent);
if (separator_indent == NULL) {
goto bail;
}
// update item separator with a borrowed reference
current_item_separator = separator_indent;
if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
goto bail;
}
}
if (s->sort_keys || !PyDict_CheckExact(dct)) {
@ -1574,7 +1606,9 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
key = PyTuple_GET_ITEM(item, 0);
value = PyTuple_GET_ITEM(item, 1);
if (encoder_encode_key_value(s, writer, &first, key, value, indent_level) < 0)
if (encoder_encode_key_value(s, writer, &first, key, value,
new_newline_indent,
current_item_separator) < 0)
goto bail;
}
Py_CLEAR(items);
@ -1582,7 +1616,9 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
} else {
Py_ssize_t pos = 0;
while (PyDict_Next(dct, &pos, &key, &value)) {
if (encoder_encode_key_value(s, writer, &first, key, value, indent_level) < 0)
if (encoder_encode_key_value(s, writer, &first, key, value,
new_newline_indent,
current_item_separator) < 0)
goto bail;
}
}
@ -1592,12 +1628,15 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
goto bail;
Py_CLEAR(ident);
}
/* TODO DOES NOT RUN; dead code
if (s->indent != Py_None) {
indent_level -= 1;
Py_CLEAR(new_newline_indent);
Py_CLEAR(separator_indent);
if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
goto bail;
}
}
yield '\n' + (' ' * (_indent * _current_indent_level))
}*/
if (_PyUnicodeWriter_WriteChar(writer, '}'))
goto bail;
return 0;
@ -1605,16 +1644,20 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
bail:
Py_XDECREF(items);
Py_XDECREF(ident);
Py_XDECREF(separator_indent);
Py_XDECREF(new_newline_indent);
return -1;
}
static int
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
PyObject *seq, Py_ssize_t indent_level)
PyObject *seq, PyObject *newline_indent)
{
PyObject *ident = NULL;
PyObject *s_fast = NULL;
Py_ssize_t i;
PyObject *new_newline_indent = NULL;
PyObject *separator_indent = NULL;
ident = NULL;
s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
@ -1643,22 +1686,31 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
if (_PyUnicodeWriter_WriteChar(writer, '['))
goto bail;
PyObject *separator = s->item_separator; // borrowed reference
if (s->indent != Py_None) {
/* TODO: DOES NOT RUN */
indent_level += 1;
/*
newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
separator = _item_separator + newline_indent
buf += newline_indent
*/
new_newline_indent = PyUnicode_Concat(newline_indent, s->indent);
if (new_newline_indent == NULL) {
goto bail;
}
if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
goto bail;
}
separator_indent = PyUnicode_Concat(separator, new_newline_indent);
if (separator_indent == NULL) {
goto bail;
}
separator = separator_indent; // assign separator with borrowed reference
}
for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
if (i) {
if (_PyUnicodeWriter_WriteStr(writer, s->item_separator))
if (_PyUnicodeWriter_WriteStr(writer, separator) < 0)
goto bail;
}
if (encoder_listencode_obj(s, writer, obj, indent_level))
if (encoder_listencode_obj(s, writer, obj, new_newline_indent))
goto bail;
}
if (ident != NULL) {
@ -1667,12 +1719,14 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
Py_CLEAR(ident);
}
/* TODO: DOES NOT RUN
if (s->indent != Py_None) {
indent_level -= 1;
Py_CLEAR(new_newline_indent);
Py_CLEAR(separator_indent);
if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
goto bail;
}
}
yield '\n' + (' ' * (_indent * _current_indent_level))
}*/
if (_PyUnicodeWriter_WriteChar(writer, ']'))
goto bail;
Py_DECREF(s_fast);
@ -1681,6 +1735,8 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
bail:
Py_XDECREF(ident);
Py_DECREF(s_fast);
Py_XDECREF(separator_indent);
Py_XDECREF(new_newline_indent);
return -1;
}
@ -1721,7 +1777,7 @@ encoder_clear(PyEncoderObject *self)
return 0;
}
PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
PyDoc_STRVAR(encoder_doc, "Encoder(markers, default, encoder, indent, key_separator, item_separator, sort_keys, skipkeys, allow_nan)");
static PyType_Slot PyEncoderType_slots[] = {
{Py_tp_doc, (void *)encoder_doc},