Add use_bytearray attribute to _PyBytesWriter

Issue #25399: Add a new use_bytearray attribute to _PyBytesWriter to use a
bytearray buffer, instead of using a bytes object.
This commit is contained in:
Victor Stinner 2015-10-14 09:41:48 +02:00
parent 199c9a6f4b
commit 661aaccf9d
2 changed files with 76 additions and 35 deletions

View File

@ -128,17 +128,21 @@ PyAPI_FUNC(Py_ssize_t) _PyBytes_InsertThousandsGrouping(char *buffer,
A _PyBytesWriter variable must be declared at the end of variables in a A _PyBytesWriter variable must be declared at the end of variables in a
function to optimize the memory allocation on the stack. */ function to optimize the memory allocation on the stack. */
typedef struct { typedef struct {
/* bytes object */ /* bytes, bytearray or NULL (when the small buffer is used) */
PyObject *buffer; PyObject *buffer;
/* Number of allocated size */ /* Number of allocated size. */
Py_ssize_t allocated; Py_ssize_t allocated;
/* Minimum number of allocated bytes, /* Minimum number of allocated bytes,
incremented by _PyBytesWriter_Prepare() */ incremented by _PyBytesWriter_Prepare() */
Py_ssize_t min_size; Py_ssize_t min_size;
/* If non-zero, overallocate the buffer (default: 0). */ /* If non-zero, use a bytearray instead of a bytes object for buffer. */
int use_bytearray;
/* If non-zero, overallocate the buffer (default: 0).
This flag must be zero if use_bytearray is non-zero. */
int overallocate; int overallocate;
/* Stack buffer */ /* Stack buffer */
@ -153,7 +157,7 @@ typedef struct {
PyAPI_FUNC(void) _PyBytesWriter_Init(_PyBytesWriter *writer); PyAPI_FUNC(void) _PyBytesWriter_Init(_PyBytesWriter *writer);
/* Get the buffer content and reset the writer. /* Get the buffer content and reset the writer.
Return a bytes object. Return a bytes object, or a bytearray object if use_bytearray is non-zero.
Raise an exception and return NULL on error. */ Raise an exception and return NULL on error. */
PyAPI_FUNC(PyObject *) _PyBytesWriter_Finish(_PyBytesWriter *writer, PyAPI_FUNC(PyObject *) _PyBytesWriter_Finish(_PyBytesWriter *writer,
void *str); void *str);

View File

@ -3852,11 +3852,8 @@ bytes_iter(PyObject *seq)
void void
_PyBytesWriter_Init(_PyBytesWriter *writer) _PyBytesWriter_Init(_PyBytesWriter *writer)
{ {
writer->buffer = NULL; /* Set all attributes before small_buffer to 0 */
writer->allocated = 0; memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
writer->min_size = 0;
writer->overallocate = 0;
writer->use_small_buffer = 0;
#ifdef Py_DEBUG #ifdef Py_DEBUG
memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer)); memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
#endif #endif
@ -3871,14 +3868,18 @@ _PyBytesWriter_Dealloc(_PyBytesWriter *writer)
Py_LOCAL_INLINE(char*) Py_LOCAL_INLINE(char*)
_PyBytesWriter_AsString(_PyBytesWriter *writer) _PyBytesWriter_AsString(_PyBytesWriter *writer)
{ {
if (!writer->use_small_buffer) { if (writer->use_small_buffer) {
assert(writer->buffer != NULL);
return PyBytes_AS_STRING(writer->buffer);
}
else {
assert(writer->buffer == NULL); assert(writer->buffer == NULL);
return writer->small_buffer; return writer->small_buffer;
} }
else if (writer->use_bytearray) {
assert(writer->buffer != NULL);
return PyByteArray_AS_STRING(writer->buffer);
}
else {
assert(writer->buffer != NULL);
return PyBytes_AS_STRING(writer->buffer);
}
} }
Py_LOCAL_INLINE(Py_ssize_t) Py_LOCAL_INLINE(Py_ssize_t)
@ -3897,18 +3898,28 @@ _PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
#ifdef Py_DEBUG #ifdef Py_DEBUG
char *start, *end; char *start, *end;
if (!writer->use_small_buffer) { if (writer->use_small_buffer) {
assert(writer->buffer != NULL);
assert(PyBytes_CheckExact(writer->buffer));
assert(Py_REFCNT(writer->buffer) == 1);
}
else {
assert(writer->buffer == NULL); assert(writer->buffer == NULL);
} }
else {
assert(writer->buffer != NULL);
if (writer->use_bytearray)
assert(PyByteArray_CheckExact(writer->buffer));
else
assert(PyBytes_CheckExact(writer->buffer));
assert(Py_REFCNT(writer->buffer) == 1);
}
start = _PyBytesWriter_AsString(writer); if (writer->use_bytearray) {
/* bytearray has its own overallocation algorithm,
writer overallocation must be disabled */
assert(!writer->overallocate);
}
assert(0 <= writer->allocated);
assert(0 <= writer->min_size && writer->min_size <= writer->allocated); assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
/* the last byte must always be null */ /* the last byte must always be null */
start = _PyBytesWriter_AsString(writer);
assert(start[writer->allocated] == 0); assert(start[writer->allocated] == 0);
end = start + writer->allocated; end = start + writer->allocated;
@ -3932,8 +3943,7 @@ _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
if (writer->min_size > PY_SSIZE_T_MAX - size) { if (writer->min_size > PY_SSIZE_T_MAX - size) {
PyErr_NoMemory(); PyErr_NoMemory();
_PyBytesWriter_Dealloc(writer); goto error;
return NULL;
} }
writer->min_size += size; writer->min_size += size;
@ -3950,23 +3960,38 @@ _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
pos = _PyBytesWriter_GetPos(writer, str); pos = _PyBytesWriter_GetPos(writer, str);
if (!writer->use_small_buffer) { if (!writer->use_small_buffer) {
/* Note: Don't use a bytearray object because the conversion from if (writer->use_bytearray) {
byterray to bytes requires to copy all bytes. */ if (PyByteArray_Resize(writer->buffer, allocated))
if (_PyBytes_Resize(&writer->buffer, allocated)) { goto error;
assert(writer->buffer == NULL); /* writer->allocated can be smaller than writer->buffer->ob_alloc,
return NULL; but we cannot use ob_alloc because bytes may need to be moved
to use the whole buffer. bytearray uses an internal optimization
to avoid moving or copying bytes when bytes are removed at the
beginning (ex: del bytearray[:1]). */
}
else {
if (_PyBytes_Resize(&writer->buffer, allocated))
goto error;
} }
} }
else { else {
/* convert from stack buffer to bytes object buffer */ /* convert from stack buffer to bytes object buffer */
assert(writer->buffer == NULL); assert(writer->buffer == NULL);
writer->buffer = PyBytes_FromStringAndSize(NULL, allocated); if (writer->use_bytearray)
writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
else
writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
if (writer->buffer == NULL) if (writer->buffer == NULL)
return NULL; goto error;
if (pos != 0) { if (pos != 0) {
Py_MEMCPY(PyBytes_AS_STRING(writer->buffer), char *dest;
if (writer->use_bytearray)
dest = PyByteArray_AS_STRING(writer->buffer);
else
dest = PyBytes_AS_STRING(writer->buffer);
Py_MEMCPY(dest,
writer->small_buffer, writer->small_buffer,
pos); pos);
} }
@ -3981,6 +4006,10 @@ _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
str = _PyBytesWriter_AsString(writer) + pos; str = _PyBytesWriter_AsString(writer) + pos;
_PyBytesWriter_CheckConsistency(writer, str); _PyBytesWriter_CheckConsistency(writer, str);
return str; return str;
error:
_PyBytesWriter_Dealloc(writer);
return NULL;
} }
/* Allocate the buffer to write size bytes. /* Allocate the buffer to write size bytes.
@ -4013,7 +4042,7 @@ _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
_PyBytesWriter_CheckConsistency(writer, str); _PyBytesWriter_CheckConsistency(writer, str);
pos = _PyBytesWriter_GetPos(writer, str); pos = _PyBytesWriter_GetPos(writer, str);
if (pos == 0) { if (pos == 0 && !writer->use_bytearray) {
Py_CLEAR(writer->buffer); Py_CLEAR(writer->buffer);
/* Get the empty byte string singleton */ /* Get the empty byte string singleton */
result = PyBytes_FromStringAndSize(NULL, 0); result = PyBytes_FromStringAndSize(NULL, 0);
@ -4026,9 +4055,17 @@ _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
writer->buffer = NULL; writer->buffer = NULL;
if (pos != writer->allocated) { if (pos != writer->allocated) {
if (_PyBytes_Resize(&result, pos)) { if (writer->use_bytearray) {
assert(result == NULL); if (PyByteArray_Resize(result, pos)) {
return NULL; Py_DECREF(result);
return NULL;
}
}
else {
if (_PyBytes_Resize(&result, pos)) {
assert(result == NULL);
return NULL;
}
} }
} }
} }