mirror of https://github.com/python/cpython
bpo-33234 Improve list() pre-sizing for inputs with known lengths (GH-9846)
The list() constructor isn't taking full advantage of known input lengths or length hints. This commit makes the constructor pre-size and not over-allocate when the input size is known (the input collection implements __len__). One on the main advantages is that this provides 12% difference in memory savings due to the difference between overallocating and allocating exactly the input size. For efficiency purposes and to avoid a performance regression for small generators and collections, the size of the input object is calculated using __len__ and not __length_hint__, as the later is considerably slower.
This commit is contained in:
parent
569d12f448
commit
372d705d95
|
@ -1,5 +1,6 @@
|
||||||
import sys
|
import sys
|
||||||
from test import list_tests
|
from test import list_tests
|
||||||
|
from test.support import cpython_only
|
||||||
import pickle
|
import pickle
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
|
@ -157,5 +158,13 @@ class ListTest(list_tests.CommonTest):
|
||||||
with self.assertRaises(TypeError):
|
with self.assertRaises(TypeError):
|
||||||
(3,) + L([1,2])
|
(3,) + L([1,2])
|
||||||
|
|
||||||
|
@cpython_only
|
||||||
|
def test_preallocation(self):
|
||||||
|
iterable = [0] * 10
|
||||||
|
iter_size = sys.getsizeof(iterable)
|
||||||
|
|
||||||
|
self.assertEqual(iter_size, sys.getsizeof(list([0] * 10)))
|
||||||
|
self.assertEqual(iter_size, sys.getsizeof(list(range(10))))
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
The list constructor will pre-size and not over-allocate when
|
||||||
|
the input lenght is known.
|
|
@ -76,6 +76,33 @@ list_resize(PyListObject *self, Py_ssize_t newsize)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
list_preallocate_exact(PyListObject *self, Py_ssize_t size)
|
||||||
|
{
|
||||||
|
assert(self->ob_item == NULL);
|
||||||
|
|
||||||
|
PyObject **items;
|
||||||
|
size_t allocated;
|
||||||
|
|
||||||
|
allocated = (size_t)size;
|
||||||
|
if (allocated > (size_t)PY_SSIZE_T_MAX / sizeof(PyObject *)) {
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (size == 0) {
|
||||||
|
allocated = 0;
|
||||||
|
}
|
||||||
|
items = (PyObject **)PyMem_New(PyObject*, allocated);
|
||||||
|
if (items == NULL) {
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
self->ob_item = items;
|
||||||
|
self->allocated = allocated;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* Debug statistic to compare allocations with reuse through the free list */
|
/* Debug statistic to compare allocations with reuse through the free list */
|
||||||
#undef SHOW_ALLOC_COUNT
|
#undef SHOW_ALLOC_COUNT
|
||||||
#ifdef SHOW_ALLOC_COUNT
|
#ifdef SHOW_ALLOC_COUNT
|
||||||
|
@ -2683,6 +2710,19 @@ list___init___impl(PyListObject *self, PyObject *iterable)
|
||||||
(void)_list_clear(self);
|
(void)_list_clear(self);
|
||||||
}
|
}
|
||||||
if (iterable != NULL) {
|
if (iterable != NULL) {
|
||||||
|
if (_PyObject_HasLen(iterable)) {
|
||||||
|
Py_ssize_t iter_len = PyObject_Size(iterable);
|
||||||
|
if (iter_len == -1) {
|
||||||
|
if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
PyErr_Clear();
|
||||||
|
}
|
||||||
|
if (iter_len > 0 && self->ob_item == NULL
|
||||||
|
&& list_preallocate_exact(self, iter_len)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
PyObject *rv = list_extend(self, iterable);
|
PyObject *rv = list_extend(self, iterable);
|
||||||
if (rv == NULL)
|
if (rv == NULL)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
Loading…
Reference in New Issue