mirror of https://github.com/python/cpython
Optimizations for bytes reallocation.
This uses up to 12.5% overallocation, not entirely unlike list_resize(). Could probably use more tweaks for odd allocation patterns, TBD. Also add __alloc__() method which returns the actually allocated size. PS. I'm now convinced that we need something like "".join(); later.
This commit is contained in:
parent
5584245f58
commit
a0867f79bb
|
@ -21,6 +21,7 @@ extern "C" {
|
||||||
/* Object layout */
|
/* Object layout */
|
||||||
typedef struct {
|
typedef struct {
|
||||||
PyObject_VAR_HEAD
|
PyObject_VAR_HEAD
|
||||||
|
Py_ssize_t ob_alloc; /* How many bytes allocated */
|
||||||
char *ob_bytes;
|
char *ob_bytes;
|
||||||
} PyBytesObject;
|
} PyBytesObject;
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
#define PY_SSIZE_T_CLEAN
|
#define PY_SSIZE_T_CLEAN
|
||||||
#include "Python.h"
|
#include "Python.h"
|
||||||
|
#include "structmember.h"
|
||||||
|
|
||||||
/* Direct API functions */
|
/* Direct API functions */
|
||||||
|
|
||||||
|
@ -25,7 +26,6 @@ PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
|
||||||
if (new == NULL)
|
if (new == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
new->ob_size = size;
|
|
||||||
if (size == 0)
|
if (size == 0)
|
||||||
new->ob_bytes = NULL;
|
new->ob_bytes = NULL;
|
||||||
else {
|
else {
|
||||||
|
@ -37,6 +37,7 @@ PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
|
||||||
if (bytes != NULL)
|
if (bytes != NULL)
|
||||||
memcpy(new->ob_bytes, bytes, size);
|
memcpy(new->ob_bytes, bytes, size);
|
||||||
}
|
}
|
||||||
|
new->ob_size = new->ob_alloc = size;
|
||||||
|
|
||||||
return (PyObject *)new;
|
return (PyObject *)new;
|
||||||
}
|
}
|
||||||
|
@ -63,12 +64,31 @@ int
|
||||||
PyBytes_Resize(PyObject *self, Py_ssize_t size)
|
PyBytes_Resize(PyObject *self, Py_ssize_t size)
|
||||||
{
|
{
|
||||||
void *sval;
|
void *sval;
|
||||||
|
Py_ssize_t alloc = ((PyBytesObject *)self)->ob_alloc;
|
||||||
|
|
||||||
assert(self != NULL);
|
assert(self != NULL);
|
||||||
assert(PyBytes_Check(self));
|
assert(PyBytes_Check(self));
|
||||||
assert(size >= 0);
|
assert(size >= 0);
|
||||||
|
|
||||||
sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, size);
|
if (size < alloc / 2) {
|
||||||
|
/* Major downsize; resize down to exact size */
|
||||||
|
alloc = size;
|
||||||
|
}
|
||||||
|
else if (size <= alloc) {
|
||||||
|
/* Within allocated size; quick exit */
|
||||||
|
((PyBytesObject *)self)->ob_size = size;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
else if (size <= alloc * 1.125) {
|
||||||
|
/* Moderate upsize; overallocate similar to list_resize() */
|
||||||
|
alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* Major upsize; resize up to exact size */
|
||||||
|
alloc = size;
|
||||||
|
}
|
||||||
|
|
||||||
|
sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
|
||||||
if (sval == NULL) {
|
if (sval == NULL) {
|
||||||
PyErr_NoMemory();
|
PyErr_NoMemory();
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -76,6 +96,7 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size)
|
||||||
|
|
||||||
((PyBytesObject *)self)->ob_bytes = sval;
|
((PyBytesObject *)self)->ob_bytes = sval;
|
||||||
((PyBytesObject *)self)->ob_size = size;
|
((PyBytesObject *)self)->ob_size = size;
|
||||||
|
((PyBytesObject *)self)->ob_alloc = alloc;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -133,7 +154,9 @@ bytes_iconcat(PyBytesObject *self, PyObject *other)
|
||||||
size = mysize + osize;
|
size = mysize + osize;
|
||||||
if (size < 0)
|
if (size < 0)
|
||||||
return PyErr_NoMemory();
|
return PyErr_NoMemory();
|
||||||
if (PyBytes_Resize((PyObject *)self, size) < 0)
|
if (size <= self->ob_alloc)
|
||||||
|
self->ob_size = size;
|
||||||
|
else if (PyBytes_Resize((PyObject *)self, size) < 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
memcpy(self->ob_bytes + mysize, ((PyBytesObject *)other)->ob_bytes, osize);
|
memcpy(self->ob_bytes + mysize, ((PyBytesObject *)other)->ob_bytes, osize);
|
||||||
Py_INCREF(self);
|
Py_INCREF(self);
|
||||||
|
@ -178,7 +201,9 @@ bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
|
||||||
size = mysize * count;
|
size = mysize * count;
|
||||||
if (count != 0 && size / count != mysize)
|
if (count != 0 && size / count != mysize)
|
||||||
return PyErr_NoMemory();
|
return PyErr_NoMemory();
|
||||||
if (PyBytes_Resize((PyObject *)self, size) < 0)
|
if (size <= self->ob_alloc)
|
||||||
|
self->ob_size = size;
|
||||||
|
else if (PyBytes_Resize((PyObject *)self, size) < 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
if (mysize == 1)
|
if (mysize == 1)
|
||||||
|
@ -372,9 +397,11 @@ bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
|
||||||
PyObject *it;
|
PyObject *it;
|
||||||
PyObject *(*iternext)(PyObject *);
|
PyObject *(*iternext)(PyObject *);
|
||||||
|
|
||||||
/* Empty previous contents (yes, do this first of all!) */
|
if (self->ob_size != 0) {
|
||||||
if (PyBytes_Resize((PyObject *)self, 0) < 0)
|
/* Empty previous contents (yes, do this first of all!) */
|
||||||
return -1;
|
if (PyBytes_Resize((PyObject *)self, 0) < 0)
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
/* Parse arguments */
|
/* Parse arguments */
|
||||||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
|
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
|
||||||
|
@ -410,7 +437,9 @@ bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
|
||||||
}
|
}
|
||||||
bytes = PyString_AS_STRING(encoded);
|
bytes = PyString_AS_STRING(encoded);
|
||||||
size = PyString_GET_SIZE(encoded);
|
size = PyString_GET_SIZE(encoded);
|
||||||
if (PyBytes_Resize((PyObject *)self, size) < 0) {
|
if (size <= self->ob_alloc)
|
||||||
|
self->ob_size = size;
|
||||||
|
else if (PyBytes_Resize((PyObject *)self, size) < 0) {
|
||||||
Py_DECREF(encoded);
|
Py_DECREF(encoded);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
@ -492,8 +521,9 @@ bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Append the byte */
|
/* Append the byte */
|
||||||
/* XXX Speed this up */
|
if (self->ob_size < self->ob_alloc)
|
||||||
if (PyBytes_Resize((PyObject *)self, self->ob_size+1) < 0)
|
self->ob_size++;
|
||||||
|
else if (PyBytes_Resize((PyObject *)self, self->ob_size+1) < 0)
|
||||||
goto error;
|
goto error;
|
||||||
self->ob_bytes[self->ob_size-1] = value;
|
self->ob_bytes[self->ob_size-1] = value;
|
||||||
}
|
}
|
||||||
|
@ -673,6 +703,17 @@ bytes_decode(PyObject *self, PyObject *args)
|
||||||
return PyCodec_Decode(self, encoding, errors);
|
return PyCodec_Decode(self, encoding, errors);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PyDoc_STRVAR(alloc_doc,
|
||||||
|
"B.__alloc__() -> int\n\
|
||||||
|
\n\
|
||||||
|
Returns the number of bytes actually allocated.");
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
bytes_alloc(PyBytesObject *self)
|
||||||
|
{
|
||||||
|
return PyInt_FromSsize_t(self->ob_alloc);
|
||||||
|
}
|
||||||
|
|
||||||
static PySequenceMethods bytes_as_sequence = {
|
static PySequenceMethods bytes_as_sequence = {
|
||||||
(lenfunc)bytes_length, /*sq_length*/
|
(lenfunc)bytes_length, /*sq_length*/
|
||||||
(binaryfunc)bytes_concat, /*sq_concat*/
|
(binaryfunc)bytes_concat, /*sq_concat*/
|
||||||
|
@ -704,7 +745,8 @@ static PyBufferProcs bytes_as_buffer = {
|
||||||
static PyMethodDef
|
static PyMethodDef
|
||||||
bytes_methods[] = {
|
bytes_methods[] = {
|
||||||
{"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
|
{"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
|
||||||
{NULL, NULL}
|
{"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
|
||||||
|
{NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
PyDoc_STRVAR(bytes_doc,
|
PyDoc_STRVAR(bytes_doc,
|
||||||
|
|
Loading…
Reference in New Issue