Issue #12911: Fix memory consumption when calculating the repr() of huge tuples or lists.

This introduces a small private API for this common pattern.
The issue has been discovered thanks to Martin's huge-mem buildbot.
This commit is contained in:
Antoine Pitrou 2011-10-06 18:57:27 +02:00
parent bb2095f1e2
commit eeb7eea1f9
13 changed files with 271 additions and 87 deletions

View File

@ -100,7 +100,7 @@
#include "warnings.h"
#include "weakrefobject.h"
#include "structseq.h"
#include "accu.h"
#include "codecs.h"
#include "pyerrors.h"

35
Include/accu.h Normal file
View File

@ -0,0 +1,35 @@
#ifndef Py_LIMITED_API
#ifndef Py_ACCU_H
#define Py_ACCU_H
/*** This is a private API for use by the interpreter and the stdlib.
*** Its definition may be changed or removed at any moment.
***/
/*
* A two-level accumulator of unicode objects that avoids both the overhead
* of keeping a huge number of small separate objects, and the quadratic
* behaviour of using a naive repeated concatenation scheme.
*/
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
PyObject *large; /* A list of previously accumulated large strings */
PyObject *small; /* Pending small strings */
} _PyAccu;
PyAPI_FUNC(int) _PyAccu_Init(_PyAccu *acc);
PyAPI_FUNC(int) _PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode);
PyAPI_FUNC(PyObject *) _PyAccu_FinishAsList(_PyAccu *acc);
PyAPI_FUNC(PyObject *) _PyAccu_Finish(_PyAccu *acc);
PyAPI_FUNC(void) _PyAccu_Destroy(_PyAccu *acc);
#ifdef __cplusplus
}
#endif
#endif /* Py_ACCU_H */
#endif /* Py_LIMITED_API */

View File

@ -59,6 +59,17 @@ class ListTest(list_tests.CommonTest):
self.assertRaises((MemoryError, OverflowError), mul, lst, n)
self.assertRaises((MemoryError, OverflowError), imul, lst, n)
def test_repr_large(self):
# Check the repr of large list objects
def check(n):
l = [0] * n
s = repr(l)
self.assertEqual(s,
'[' + ', '.join(['0'] * n) + ']')
check(10) # check our checking code
check(1000000)
def test_main(verbose=None):
support.run_unittest(ListTest)

View File

@ -154,6 +154,16 @@ class TupleTest(seq_tests.CommonTest):
# Trying to untrack an unfinished tuple could crash Python
self._not_tracked(tuple(gc.collect() for i in range(101)))
def test_repr_large(self):
# Check the repr of large list objects
def check(n):
l = (0,) * n
s = repr(l)
self.assertEqual(s,
'(' + ', '.join(['0'] * n) + ')')
check(10) # check our checking code
check(1000000)
def test_main():
support.run_unittest(TupleTest)

View File

@ -342,6 +342,7 @@ PYTHON_OBJS= \
# Objects
OBJECT_OBJS= \
Objects/abstract.o \
Objects/accu.o \
Objects/boolobject.o \
Objects/bytes_methods.o \
Objects/bytearrayobject.o \
@ -664,6 +665,7 @@ PYTHON_HEADERS= \
Include/Python-ast.h \
Include/Python.h \
Include/abstract.h \
Include/accu.h \
Include/asdl.h \
Include/ast.h \
Include/bltinmodule.h \

View File

@ -10,6 +10,9 @@ What's New in Python 3.2.3?
Core and Builtins
-----------------
- Issue #12911: Fix memory consumption when calculating the repr() of huge
tuples or lists.
- Issue #7732: Don't open a directory as a file anymore while importing a
module. Ignore the direcotry if its name matchs the module name (e.g.
"__init__.py") and raise a ImportError instead.

114
Objects/accu.c Normal file
View File

@ -0,0 +1,114 @@
/* Accumulator struct implementation */
#include "Python.h"
static PyObject *
join_list_unicode(PyObject *lst)
{
/* return ''.join(lst) */
PyObject *sep, *ret;
sep = PyUnicode_FromStringAndSize("", 0);
ret = PyUnicode_Join(sep, lst);
Py_DECREF(sep);
return ret;
}
int
_PyAccu_Init(_PyAccu *acc)
{
/* Lazily allocated */
acc->large = NULL;
acc->small = PyList_New(0);
if (acc->small == NULL)
return -1;
return 0;
}
static int
flush_accumulator(_PyAccu *acc)
{
Py_ssize_t nsmall = PyList_GET_SIZE(acc->small);
if (nsmall) {
int ret;
PyObject *joined;
if (acc->large == NULL) {
acc->large = PyList_New(0);
if (acc->large == NULL)
return -1;
}
joined = join_list_unicode(acc->small);
if (joined == NULL)
return -1;
if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) {
Py_DECREF(joined);
return -1;
}
ret = PyList_Append(acc->large, joined);
Py_DECREF(joined);
return ret;
}
return 0;
}
int
_PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode)
{
Py_ssize_t nsmall;
assert(PyUnicode_Check(unicode));
if (PyList_Append(acc->small, unicode))
return -1;
nsmall = PyList_GET_SIZE(acc->small);
/* Each item in a list of unicode objects has an overhead (in 64-bit
* builds) of:
* - 8 bytes for the list slot
* - 56 bytes for the header of the unicode object
* that is, 64 bytes. 100000 such objects waste more than 6MB
* compared to a single concatenated string.
*/
if (nsmall < 100000)
return 0;
return flush_accumulator(acc);
}
PyObject *
_PyAccu_FinishAsList(_PyAccu *acc)
{
int ret;
PyObject *res;
ret = flush_accumulator(acc);
Py_CLEAR(acc->small);
if (ret) {
Py_CLEAR(acc->large);
return NULL;
}
res = acc->large;
acc->large = NULL;
return res;
}
PyObject *
_PyAccu_Finish(_PyAccu *acc)
{
PyObject *list, *res;
if (acc->large == NULL) {
list = acc->small;
acc->small = NULL;
}
else {
list = _PyAccu_FinishAsList(acc);
if (!list)
return NULL;
}
res = join_list_unicode(list);
Py_DECREF(list);
return res;
}
void
_PyAccu_Destroy(_PyAccu *acc)
{
Py_CLEAR(acc->small);
Py_CLEAR(acc->large);
}

View File

@ -321,70 +321,59 @@ static PyObject *
list_repr(PyListObject *v)
{
Py_ssize_t i;
PyObject *s, *temp;
PyObject *pieces = NULL, *result = NULL;
PyObject *s = NULL;
_PyAccu acc;
static PyObject *sep = NULL;
if (Py_SIZE(v) == 0) {
return PyUnicode_FromString("[]");
}
if (sep == NULL) {
sep = PyUnicode_FromString(", ");
if (sep == NULL)
return NULL;
}
i = Py_ReprEnter((PyObject*)v);
if (i != 0) {
return i > 0 ? PyUnicode_FromString("[...]") : NULL;
}
if (Py_SIZE(v) == 0) {
result = PyUnicode_FromString("[]");
goto Done;
}
if (_PyAccu_Init(&acc))
goto error;
pieces = PyList_New(0);
if (pieces == NULL)
goto Done;
s = PyUnicode_FromString("[");
if (s == NULL || _PyAccu_Accumulate(&acc, s))
goto error;
Py_CLEAR(s);
/* Do repr() on each element. Note that this may mutate the list,
so must refetch the list size on each iteration. */
for (i = 0; i < Py_SIZE(v); ++i) {
int status;
if (Py_EnterRecursiveCall(" while getting the repr of a list"))
goto Done;
goto error;
s = PyObject_Repr(v->ob_item[i]);
Py_LeaveRecursiveCall();
if (s == NULL)
goto Done;
status = PyList_Append(pieces, s);
Py_DECREF(s); /* append created a new ref */
if (status < 0)
goto Done;
if (i > 0 && _PyAccu_Accumulate(&acc, sep))
goto error;
if (s == NULL || _PyAccu_Accumulate(&acc, s))
goto error;
Py_CLEAR(s);
}
/* Add "[]" decorations to the first and last items. */
assert(PyList_GET_SIZE(pieces) > 0);
s = PyUnicode_FromString("[");
if (s == NULL)
goto Done;
temp = PyList_GET_ITEM(pieces, 0);
PyUnicode_AppendAndDel(&s, temp);
PyList_SET_ITEM(pieces, 0, s);
if (s == NULL)
goto Done;
s = PyUnicode_FromString("]");
if (s == NULL)
goto Done;
temp = PyList_GET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1);
PyUnicode_AppendAndDel(&temp, s);
PyList_SET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1, temp);
if (temp == NULL)
goto Done;
if (s == NULL || _PyAccu_Accumulate(&acc, s))
goto error;
Py_CLEAR(s);
/* Paste them all together with ", " between. */
s = PyUnicode_FromString(", ");
if (s == NULL)
goto Done;
result = PyUnicode_Join(s, pieces);
Py_DECREF(s);
Done:
Py_XDECREF(pieces);
Py_ReprLeave((PyObject *)v);
return result;
return _PyAccu_Finish(&acc);
error:
_PyAccu_Destroy(&acc);
Py_XDECREF(s);
Py_ReprLeave((PyObject *)v);
return NULL;
}
static Py_ssize_t

View File

@ -240,13 +240,20 @@ static PyObject *
tuplerepr(PyTupleObject *v)
{
Py_ssize_t i, n;
PyObject *s, *temp;
PyObject *pieces, *result = NULL;
PyObject *s = NULL;
_PyAccu acc;
static PyObject *sep = NULL;
n = Py_SIZE(v);
if (n == 0)
return PyUnicode_FromString("()");
if (sep == NULL) {
sep = PyUnicode_FromString(", ");
if (sep == NULL)
return NULL;
}
/* While not mutable, it is still possible to end up with a cycle in a
tuple through an object that stores itself within a tuple (and thus
infinitely asks for the repr of itself). This should only be
@ -256,52 +263,42 @@ tuplerepr(PyTupleObject *v)
return i > 0 ? PyUnicode_FromString("(...)") : NULL;
}
pieces = PyTuple_New(n);
if (pieces == NULL)
return NULL;
if (_PyAccu_Init(&acc))
goto error;
s = PyUnicode_FromString("(");
if (s == NULL || _PyAccu_Accumulate(&acc, s))
goto error;
Py_CLEAR(s);
/* Do repr() on each element. */
for (i = 0; i < n; ++i) {
if (Py_EnterRecursiveCall(" while getting the repr of a tuple"))
goto Done;
goto error;
s = PyObject_Repr(v->ob_item[i]);
Py_LeaveRecursiveCall();
if (s == NULL)
goto Done;
PyTuple_SET_ITEM(pieces, i, s);
if (i > 0 && _PyAccu_Accumulate(&acc, sep))
goto error;
if (s == NULL || _PyAccu_Accumulate(&acc, s))
goto error;
Py_CLEAR(s);
}
if (n > 1)
s = PyUnicode_FromString(")");
else
s = PyUnicode_FromString(",)");
if (s == NULL || _PyAccu_Accumulate(&acc, s))
goto error;
Py_CLEAR(s);
/* Add "()" decorations to the first and last items. */
assert(n > 0);
s = PyUnicode_FromString("(");
if (s == NULL)
goto Done;
temp = PyTuple_GET_ITEM(pieces, 0);
PyUnicode_AppendAndDel(&s, temp);
PyTuple_SET_ITEM(pieces, 0, s);
if (s == NULL)
goto Done;
s = PyUnicode_FromString(n == 1 ? ",)" : ")");
if (s == NULL)
goto Done;
temp = PyTuple_GET_ITEM(pieces, n-1);
PyUnicode_AppendAndDel(&temp, s);
PyTuple_SET_ITEM(pieces, n-1, temp);
if (temp == NULL)
goto Done;
/* Paste them all together with ", " between. */
s = PyUnicode_FromString(", ");
if (s == NULL)
goto Done;
result = PyUnicode_Join(s, pieces);
Py_DECREF(s);
Done:
Py_DECREF(pieces);
Py_ReprLeave((PyObject *)v);
return result;
return _PyAccu_Finish(&acc);
error:
_PyAccu_Destroy(&acc);
Py_XDECREF(s);
Py_ReprLeave((PyObject *)v);
return NULL;
}
/* The addend 82520, was selected from the range(0, 1000000) for

View File

@ -205,6 +205,10 @@ SOURCE=..\..\Objects\abstract.c
# End Source File
# Begin Source File
SOURCE=..\..\Objects\accu.c
# End Source File
# Begin Source File
SOURCE=..\..\Parser\acceler.c
# End Source File
# Begin Source File

View File

@ -444,6 +444,9 @@
<File
RelativePath="..\..\Objects\abstract.c">
</File>
<File
RelativePath="..\..\Objects\accu.c">
</File>
<File
RelativePath="..\..\Parser\acceler.c">
</File>

View File

@ -634,6 +634,10 @@
RelativePath="..\..\Include\abstract.h"
>
</File>
<File
RelativePath="..\..\Include\accu.h"
>
</File>
<File
RelativePath="..\..\Include\asdl.h"
>
@ -1446,6 +1450,10 @@
RelativePath="..\..\Objects\abstract.c"
>
</File>
<File
RelativePath="..\..\Objects\accu.c"
>
</File>
<File
RelativePath="..\..\Objects\boolobject.c"
>

View File

@ -634,6 +634,10 @@
RelativePath="..\Include\abstract.h"
>
</File>
<File
RelativePath="..\Include\accu.h"
>
</File>
<File
RelativePath="..\Include\asdl.h"
>
@ -1446,6 +1450,10 @@
RelativePath="..\Objects\abstract.c"
>
</File>
<File
RelativePath="..\Objects\accu.c"
>
</File>
<File
RelativePath="..\Objects\boolobject.c"
>