mirror of https://github.com/python/cpython
Issue #12911: Fix memory consumption when calculating the repr() of huge tuples or lists.
This introduces a small private API for this common pattern. The issue has been discovered thanks to Martin's huge-mem buildbot.
This commit is contained in:
parent
bb2095f1e2
commit
eeb7eea1f9
|
@ -100,7 +100,7 @@
|
|||
#include "warnings.h"
|
||||
#include "weakrefobject.h"
|
||||
#include "structseq.h"
|
||||
|
||||
#include "accu.h"
|
||||
|
||||
#include "codecs.h"
|
||||
#include "pyerrors.h"
|
||||
|
@ -141,7 +141,7 @@ PyAPI_FUNC(PyObject*) _Py_Mangle(PyObject *p, PyObject *name);
|
|||
#endif
|
||||
|
||||
/* Argument must be a char or an int in [-128, 127] or [0, 255]. */
|
||||
#define Py_CHARMASK(c) ((unsigned char)((c) & 0xff))
|
||||
#define Py_CHARMASK(c) ((unsigned char)((c) & 0xff))
|
||||
|
||||
#include "pyfpe.h"
|
||||
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
#ifndef Py_LIMITED_API
|
||||
#ifndef Py_ACCU_H
|
||||
#define Py_ACCU_H
|
||||
|
||||
/*** This is a private API for use by the interpreter and the stdlib.
|
||||
*** Its definition may be changed or removed at any moment.
|
||||
***/
|
||||
|
||||
/*
|
||||
* A two-level accumulator of unicode objects that avoids both the overhead
|
||||
* of keeping a huge number of small separate objects, and the quadratic
|
||||
* behaviour of using a naive repeated concatenation scheme.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
PyObject *large; /* A list of previously accumulated large strings */
|
||||
PyObject *small; /* Pending small strings */
|
||||
} _PyAccu;
|
||||
|
||||
PyAPI_FUNC(int) _PyAccu_Init(_PyAccu *acc);
|
||||
PyAPI_FUNC(int) _PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode);
|
||||
PyAPI_FUNC(PyObject *) _PyAccu_FinishAsList(_PyAccu *acc);
|
||||
PyAPI_FUNC(PyObject *) _PyAccu_Finish(_PyAccu *acc);
|
||||
PyAPI_FUNC(void) _PyAccu_Destroy(_PyAccu *acc);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* Py_ACCU_H */
|
||||
#endif /* Py_LIMITED_API */
|
|
@ -59,6 +59,17 @@ class ListTest(list_tests.CommonTest):
|
|||
self.assertRaises((MemoryError, OverflowError), mul, lst, n)
|
||||
self.assertRaises((MemoryError, OverflowError), imul, lst, n)
|
||||
|
||||
def test_repr_large(self):
|
||||
# Check the repr of large list objects
|
||||
def check(n):
|
||||
l = [0] * n
|
||||
s = repr(l)
|
||||
self.assertEqual(s,
|
||||
'[' + ', '.join(['0'] * n) + ']')
|
||||
check(10) # check our checking code
|
||||
check(1000000)
|
||||
|
||||
|
||||
def test_main(verbose=None):
|
||||
support.run_unittest(ListTest)
|
||||
|
||||
|
|
|
@ -154,6 +154,16 @@ class TupleTest(seq_tests.CommonTest):
|
|||
# Trying to untrack an unfinished tuple could crash Python
|
||||
self._not_tracked(tuple(gc.collect() for i in range(101)))
|
||||
|
||||
def test_repr_large(self):
|
||||
# Check the repr of large list objects
|
||||
def check(n):
|
||||
l = (0,) * n
|
||||
s = repr(l)
|
||||
self.assertEqual(s,
|
||||
'(' + ', '.join(['0'] * n) + ')')
|
||||
check(10) # check our checking code
|
||||
check(1000000)
|
||||
|
||||
def test_main():
|
||||
support.run_unittest(TupleTest)
|
||||
|
||||
|
|
|
@ -342,6 +342,7 @@ PYTHON_OBJS= \
|
|||
# Objects
|
||||
OBJECT_OBJS= \
|
||||
Objects/abstract.o \
|
||||
Objects/accu.o \
|
||||
Objects/boolobject.o \
|
||||
Objects/bytes_methods.o \
|
||||
Objects/bytearrayobject.o \
|
||||
|
@ -664,6 +665,7 @@ PYTHON_HEADERS= \
|
|||
Include/Python-ast.h \
|
||||
Include/Python.h \
|
||||
Include/abstract.h \
|
||||
Include/accu.h \
|
||||
Include/asdl.h \
|
||||
Include/ast.h \
|
||||
Include/bltinmodule.h \
|
||||
|
|
|
@ -10,6 +10,9 @@ What's New in Python 3.2.3?
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Issue #12911: Fix memory consumption when calculating the repr() of huge
|
||||
tuples or lists.
|
||||
|
||||
- Issue #7732: Don't open a directory as a file anymore while importing a
|
||||
module. Ignore the direcotry if its name matchs the module name (e.g.
|
||||
"__init__.py") and raise a ImportError instead.
|
||||
|
|
|
@ -0,0 +1,114 @@
|
|||
/* Accumulator struct implementation */
|
||||
|
||||
#include "Python.h"
|
||||
|
||||
static PyObject *
|
||||
join_list_unicode(PyObject *lst)
|
||||
{
|
||||
/* return ''.join(lst) */
|
||||
PyObject *sep, *ret;
|
||||
sep = PyUnicode_FromStringAndSize("", 0);
|
||||
ret = PyUnicode_Join(sep, lst);
|
||||
Py_DECREF(sep);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
_PyAccu_Init(_PyAccu *acc)
|
||||
{
|
||||
/* Lazily allocated */
|
||||
acc->large = NULL;
|
||||
acc->small = PyList_New(0);
|
||||
if (acc->small == NULL)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
flush_accumulator(_PyAccu *acc)
|
||||
{
|
||||
Py_ssize_t nsmall = PyList_GET_SIZE(acc->small);
|
||||
if (nsmall) {
|
||||
int ret;
|
||||
PyObject *joined;
|
||||
if (acc->large == NULL) {
|
||||
acc->large = PyList_New(0);
|
||||
if (acc->large == NULL)
|
||||
return -1;
|
||||
}
|
||||
joined = join_list_unicode(acc->small);
|
||||
if (joined == NULL)
|
||||
return -1;
|
||||
if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) {
|
||||
Py_DECREF(joined);
|
||||
return -1;
|
||||
}
|
||||
ret = PyList_Append(acc->large, joined);
|
||||
Py_DECREF(joined);
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
_PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode)
|
||||
{
|
||||
Py_ssize_t nsmall;
|
||||
assert(PyUnicode_Check(unicode));
|
||||
|
||||
if (PyList_Append(acc->small, unicode))
|
||||
return -1;
|
||||
nsmall = PyList_GET_SIZE(acc->small);
|
||||
/* Each item in a list of unicode objects has an overhead (in 64-bit
|
||||
* builds) of:
|
||||
* - 8 bytes for the list slot
|
||||
* - 56 bytes for the header of the unicode object
|
||||
* that is, 64 bytes. 100000 such objects waste more than 6MB
|
||||
* compared to a single concatenated string.
|
||||
*/
|
||||
if (nsmall < 100000)
|
||||
return 0;
|
||||
return flush_accumulator(acc);
|
||||
}
|
||||
|
||||
PyObject *
|
||||
_PyAccu_FinishAsList(_PyAccu *acc)
|
||||
{
|
||||
int ret;
|
||||
PyObject *res;
|
||||
|
||||
ret = flush_accumulator(acc);
|
||||
Py_CLEAR(acc->small);
|
||||
if (ret) {
|
||||
Py_CLEAR(acc->large);
|
||||
return NULL;
|
||||
}
|
||||
res = acc->large;
|
||||
acc->large = NULL;
|
||||
return res;
|
||||
}
|
||||
|
||||
PyObject *
|
||||
_PyAccu_Finish(_PyAccu *acc)
|
||||
{
|
||||
PyObject *list, *res;
|
||||
if (acc->large == NULL) {
|
||||
list = acc->small;
|
||||
acc->small = NULL;
|
||||
}
|
||||
else {
|
||||
list = _PyAccu_FinishAsList(acc);
|
||||
if (!list)
|
||||
return NULL;
|
||||
}
|
||||
res = join_list_unicode(list);
|
||||
Py_DECREF(list);
|
||||
return res;
|
||||
}
|
||||
|
||||
void
|
||||
_PyAccu_Destroy(_PyAccu *acc)
|
||||
{
|
||||
Py_CLEAR(acc->small);
|
||||
Py_CLEAR(acc->large);
|
||||
}
|
|
@ -321,70 +321,59 @@ static PyObject *
|
|||
list_repr(PyListObject *v)
|
||||
{
|
||||
Py_ssize_t i;
|
||||
PyObject *s, *temp;
|
||||
PyObject *pieces = NULL, *result = NULL;
|
||||
PyObject *s = NULL;
|
||||
_PyAccu acc;
|
||||
static PyObject *sep = NULL;
|
||||
|
||||
if (Py_SIZE(v) == 0) {
|
||||
return PyUnicode_FromString("[]");
|
||||
}
|
||||
|
||||
if (sep == NULL) {
|
||||
sep = PyUnicode_FromString(", ");
|
||||
if (sep == NULL)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
i = Py_ReprEnter((PyObject*)v);
|
||||
if (i != 0) {
|
||||
return i > 0 ? PyUnicode_FromString("[...]") : NULL;
|
||||
}
|
||||
|
||||
if (Py_SIZE(v) == 0) {
|
||||
result = PyUnicode_FromString("[]");
|
||||
goto Done;
|
||||
}
|
||||
if (_PyAccu_Init(&acc))
|
||||
goto error;
|
||||
|
||||
pieces = PyList_New(0);
|
||||
if (pieces == NULL)
|
||||
goto Done;
|
||||
s = PyUnicode_FromString("[");
|
||||
if (s == NULL || _PyAccu_Accumulate(&acc, s))
|
||||
goto error;
|
||||
Py_CLEAR(s);
|
||||
|
||||
/* Do repr() on each element. Note that this may mutate the list,
|
||||
so must refetch the list size on each iteration. */
|
||||
for (i = 0; i < Py_SIZE(v); ++i) {
|
||||
int status;
|
||||
if (Py_EnterRecursiveCall(" while getting the repr of a list"))
|
||||
goto Done;
|
||||
goto error;
|
||||
s = PyObject_Repr(v->ob_item[i]);
|
||||
Py_LeaveRecursiveCall();
|
||||
if (s == NULL)
|
||||
goto Done;
|
||||
status = PyList_Append(pieces, s);
|
||||
Py_DECREF(s); /* append created a new ref */
|
||||
if (status < 0)
|
||||
goto Done;
|
||||
if (i > 0 && _PyAccu_Accumulate(&acc, sep))
|
||||
goto error;
|
||||
if (s == NULL || _PyAccu_Accumulate(&acc, s))
|
||||
goto error;
|
||||
Py_CLEAR(s);
|
||||
}
|
||||
|
||||
/* Add "[]" decorations to the first and last items. */
|
||||
assert(PyList_GET_SIZE(pieces) > 0);
|
||||
s = PyUnicode_FromString("[");
|
||||
if (s == NULL)
|
||||
goto Done;
|
||||
temp = PyList_GET_ITEM(pieces, 0);
|
||||
PyUnicode_AppendAndDel(&s, temp);
|
||||
PyList_SET_ITEM(pieces, 0, s);
|
||||
if (s == NULL)
|
||||
goto Done;
|
||||
|
||||
s = PyUnicode_FromString("]");
|
||||
if (s == NULL)
|
||||
goto Done;
|
||||
temp = PyList_GET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1);
|
||||
PyUnicode_AppendAndDel(&temp, s);
|
||||
PyList_SET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1, temp);
|
||||
if (temp == NULL)
|
||||
goto Done;
|
||||
if (s == NULL || _PyAccu_Accumulate(&acc, s))
|
||||
goto error;
|
||||
Py_CLEAR(s);
|
||||
|
||||
/* Paste them all together with ", " between. */
|
||||
s = PyUnicode_FromString(", ");
|
||||
if (s == NULL)
|
||||
goto Done;
|
||||
result = PyUnicode_Join(s, pieces);
|
||||
Py_DECREF(s);
|
||||
|
||||
Done:
|
||||
Py_XDECREF(pieces);
|
||||
Py_ReprLeave((PyObject *)v);
|
||||
return result;
|
||||
return _PyAccu_Finish(&acc);
|
||||
|
||||
error:
|
||||
_PyAccu_Destroy(&acc);
|
||||
Py_XDECREF(s);
|
||||
Py_ReprLeave((PyObject *)v);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static Py_ssize_t
|
||||
|
|
|
@ -240,13 +240,20 @@ static PyObject *
|
|||
tuplerepr(PyTupleObject *v)
|
||||
{
|
||||
Py_ssize_t i, n;
|
||||
PyObject *s, *temp;
|
||||
PyObject *pieces, *result = NULL;
|
||||
PyObject *s = NULL;
|
||||
_PyAccu acc;
|
||||
static PyObject *sep = NULL;
|
||||
|
||||
n = Py_SIZE(v);
|
||||
if (n == 0)
|
||||
return PyUnicode_FromString("()");
|
||||
|
||||
if (sep == NULL) {
|
||||
sep = PyUnicode_FromString(", ");
|
||||
if (sep == NULL)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* While not mutable, it is still possible to end up with a cycle in a
|
||||
tuple through an object that stores itself within a tuple (and thus
|
||||
infinitely asks for the repr of itself). This should only be
|
||||
|
@ -256,52 +263,42 @@ tuplerepr(PyTupleObject *v)
|
|||
return i > 0 ? PyUnicode_FromString("(...)") : NULL;
|
||||
}
|
||||
|
||||
pieces = PyTuple_New(n);
|
||||
if (pieces == NULL)
|
||||
return NULL;
|
||||
if (_PyAccu_Init(&acc))
|
||||
goto error;
|
||||
|
||||
s = PyUnicode_FromString("(");
|
||||
if (s == NULL || _PyAccu_Accumulate(&acc, s))
|
||||
goto error;
|
||||
Py_CLEAR(s);
|
||||
|
||||
/* Do repr() on each element. */
|
||||
for (i = 0; i < n; ++i) {
|
||||
if (Py_EnterRecursiveCall(" while getting the repr of a tuple"))
|
||||
goto Done;
|
||||
goto error;
|
||||
s = PyObject_Repr(v->ob_item[i]);
|
||||
Py_LeaveRecursiveCall();
|
||||
if (s == NULL)
|
||||
goto Done;
|
||||
PyTuple_SET_ITEM(pieces, i, s);
|
||||
if (i > 0 && _PyAccu_Accumulate(&acc, sep))
|
||||
goto error;
|
||||
if (s == NULL || _PyAccu_Accumulate(&acc, s))
|
||||
goto error;
|
||||
Py_CLEAR(s);
|
||||
}
|
||||
if (n > 1)
|
||||
s = PyUnicode_FromString(")");
|
||||
else
|
||||
s = PyUnicode_FromString(",)");
|
||||
if (s == NULL || _PyAccu_Accumulate(&acc, s))
|
||||
goto error;
|
||||
Py_CLEAR(s);
|
||||
|
||||
/* Add "()" decorations to the first and last items. */
|
||||
assert(n > 0);
|
||||
s = PyUnicode_FromString("(");
|
||||
if (s == NULL)
|
||||
goto Done;
|
||||
temp = PyTuple_GET_ITEM(pieces, 0);
|
||||
PyUnicode_AppendAndDel(&s, temp);
|
||||
PyTuple_SET_ITEM(pieces, 0, s);
|
||||
if (s == NULL)
|
||||
goto Done;
|
||||
|
||||
s = PyUnicode_FromString(n == 1 ? ",)" : ")");
|
||||
if (s == NULL)
|
||||
goto Done;
|
||||
temp = PyTuple_GET_ITEM(pieces, n-1);
|
||||
PyUnicode_AppendAndDel(&temp, s);
|
||||
PyTuple_SET_ITEM(pieces, n-1, temp);
|
||||
if (temp == NULL)
|
||||
goto Done;
|
||||
|
||||
/* Paste them all together with ", " between. */
|
||||
s = PyUnicode_FromString(", ");
|
||||
if (s == NULL)
|
||||
goto Done;
|
||||
result = PyUnicode_Join(s, pieces);
|
||||
Py_DECREF(s);
|
||||
|
||||
Done:
|
||||
Py_DECREF(pieces);
|
||||
Py_ReprLeave((PyObject *)v);
|
||||
return result;
|
||||
return _PyAccu_Finish(&acc);
|
||||
|
||||
error:
|
||||
_PyAccu_Destroy(&acc);
|
||||
Py_XDECREF(s);
|
||||
Py_ReprLeave((PyObject *)v);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* The addend 82520, was selected from the range(0, 1000000) for
|
||||
|
|
|
@ -205,6 +205,10 @@ SOURCE=..\..\Objects\abstract.c
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\Objects\accu.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\Parser\acceler.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
|
|
@ -444,6 +444,9 @@
|
|||
<File
|
||||
RelativePath="..\..\Objects\abstract.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\Objects\accu.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\Parser\acceler.c">
|
||||
</File>
|
||||
|
|
|
@ -634,6 +634,10 @@
|
|||
RelativePath="..\..\Include\abstract.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\Include\accu.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\Include\asdl.h"
|
||||
>
|
||||
|
@ -1446,6 +1450,10 @@
|
|||
RelativePath="..\..\Objects\abstract.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\Objects\accu.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\Objects\boolobject.c"
|
||||
>
|
||||
|
|
|
@ -634,6 +634,10 @@
|
|||
RelativePath="..\Include\abstract.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\Include\accu.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\Include\asdl.h"
|
||||
>
|
||||
|
@ -1446,6 +1450,10 @@
|
|||
RelativePath="..\Objects\abstract.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\Objects\accu.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\Objects\boolobject.c"
|
||||
>
|
||||
|
|
Loading…
Reference in New Issue