From fdfbf781140f22619b0ef6bfeac792496774bb69 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 9 Oct 2015 00:33:49 +0200 Subject: [PATCH] Issue #25318: Add _PyBytesWriter API Add a new private API to optimize Unicode encoders. It uses a small buffer allocated on the stack and supports overallocation. Use _PyBytesWriter API for UCS1 (ASCII and Latin1) and UTF-8 encoders. Enable overallocation for the UTF-8 encoder with error handlers. unicode_encode_ucs1(): initialize collend to collstart+1 to not check the current character twice, we already know that it is not ASCII. --- Include/unicodeobject.h | 2 +- Objects/stringlib/codecs.h | 84 +++------- Objects/unicodeobject.c | 316 +++++++++++++++++++++++++++++-------- 3 files changed, 269 insertions(+), 133 deletions(-) diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index d0e01426146..adcb64c350f 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -908,7 +908,7 @@ typedef struct { /* minimum character (default: 127, ASCII) */ Py_UCS4 min_char; - /* If non-zero, overallocate the buffer by 25% (default: 0). */ + /* If non-zero, overallocate the buffer (default: 0). */ unsigned char overallocate; /* If readonly is 1, buffer is a shared string (cannot be modified) diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h index 562191c18e9..d7a991855bd 100644 --- a/Objects/stringlib/codecs.h +++ b/Objects/stringlib/codecs.h @@ -263,10 +263,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicode, #define MAX_SHORT_UNICHARS 300 /* largest size we'll do on the stack */ Py_ssize_t i; /* index into s of next input byte */ - PyObject *result; /* result string object */ char *p; /* next free byte in output buffer */ - Py_ssize_t nallocated; /* number of result bytes allocated */ - Py_ssize_t nneeded; /* number of result bytes needed */ #if STRINGLIB_SIZEOF_CHAR > 1 PyObject *error_handler_obj = NULL; PyObject *exc = NULL; @@ -275,39 +272,25 @@ STRINGLIB(utf8_encoder)(PyObject *unicode, #endif #if STRINGLIB_SIZEOF_CHAR == 1 const Py_ssize_t max_char_size = 2; - char stackbuf[MAX_SHORT_UNICHARS * 2]; #elif STRINGLIB_SIZEOF_CHAR == 2 const Py_ssize_t max_char_size = 3; - char stackbuf[MAX_SHORT_UNICHARS * 3]; #else /* STRINGLIB_SIZEOF_CHAR == 4 */ const Py_ssize_t max_char_size = 4; - char stackbuf[MAX_SHORT_UNICHARS * 4]; #endif + _PyBytesWriter writer; assert(size >= 0); + _PyBytesWriter_Init(&writer); - if (size <= MAX_SHORT_UNICHARS) { - /* Write into the stack buffer; nallocated can't overflow. - * At the end, we'll allocate exactly as much heap space as it - * turns out we need. - */ - nallocated = Py_SAFE_DOWNCAST(sizeof(stackbuf), size_t, int); - result = NULL; /* will allocate after we're done */ - p = stackbuf; - } - else { - if (size > PY_SSIZE_T_MAX / max_char_size) { - /* integer overflow */ - return PyErr_NoMemory(); - } - /* Overallocate on the heap, and give the excess back at the end. */ - nallocated = size * max_char_size; - result = PyBytes_FromStringAndSize(NULL, nallocated); - if (result == NULL) - return NULL; - p = PyBytes_AS_STRING(result); + if (size > PY_SSIZE_T_MAX / max_char_size) { + /* integer overflow */ + return PyErr_NoMemory(); } + p = _PyBytesWriter_Alloc(&writer, size * max_char_size); + if (p == NULL) + return NULL; + for (i = 0; i < size;) { Py_UCS4 ch = data[i++]; @@ -338,6 +321,9 @@ STRINGLIB(utf8_encoder)(PyObject *unicode, while ((endpos < size) && Py_UNICODE_IS_SURROGATE(data[endpos])) endpos++; + /* Only overallocate the buffer if it's not the last write */ + writer.overallocate = (endpos < size); + switch (error_handler) { case _Py_ERROR_REPLACE: @@ -387,29 +373,10 @@ STRINGLIB(utf8_encoder)(PyObject *unicode, repsize = PyUnicode_GET_LENGTH(rep); if (repsize > max_char_size) { - Py_ssize_t offset; - - if (result == NULL) - offset = p - stackbuf; - else - offset = p - PyBytes_AS_STRING(result); - - if (nallocated > PY_SSIZE_T_MAX - repsize + max_char_size) { - /* integer overflow */ - PyErr_NoMemory(); + p = _PyBytesWriter_Prepare(&writer, p, + repsize - max_char_size); + if (p == NULL) goto error; - } - nallocated += repsize - max_char_size; - if (result != NULL) { - if (_PyBytes_Resize(&result, nallocated) < 0) - goto error; - } else { - result = PyBytes_FromStringAndSize(NULL, nallocated); - if (result == NULL) - goto error; - Py_MEMCPY(PyBytes_AS_STRING(result), stackbuf, offset); - } - p = PyBytes_AS_STRING(result) + offset; } if (PyBytes_Check(rep)) { @@ -437,6 +404,10 @@ STRINGLIB(utf8_encoder)(PyObject *unicode, i = newpos; } + + /* If overallocation was disabled, ensure that it was the last + write. Otherwise, we missed an optimization */ + assert(writer.overallocate || i == size); } else #if STRINGLIB_SIZEOF_CHAR > 2 @@ -461,31 +432,18 @@ STRINGLIB(utf8_encoder)(PyObject *unicode, #endif /* STRINGLIB_SIZEOF_CHAR > 1 */ } - if (result == NULL) { - /* This was stack allocated. */ - nneeded = p - stackbuf; - assert(nneeded <= nallocated); - result = PyBytes_FromStringAndSize(stackbuf, nneeded); - } - else { - /* Cut back to size actually needed. */ - nneeded = p - PyBytes_AS_STRING(result); - assert(nneeded <= nallocated); - _PyBytes_Resize(&result, nneeded); - } - #if STRINGLIB_SIZEOF_CHAR > 1 Py_XDECREF(error_handler_obj); Py_XDECREF(exc); #endif - return result; + return _PyBytesWriter_Finish(&writer, p); #if STRINGLIB_SIZEOF_CHAR > 1 error: Py_XDECREF(rep); Py_XDECREF(error_handler_obj); Py_XDECREF(exc); - Py_XDECREF(result); + _PyBytesWriter_Dealloc(&writer); return NULL; #endif diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3d7840403c1..010a610afe2 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -163,6 +163,14 @@ extern "C" { *_to++ = (to_type) *_iter++; \ } while (0) +#ifdef MS_WINDOWS + /* On Windows, overallocate by 50% is the best factor */ +# define OVERALLOCATE_FACTOR 2 +#else + /* On Linux, overallocate by 25% is the best factor */ +# define OVERALLOCATE_FACTOR 4 +#endif + /* This dictionary holds all interned unicode strings. Note that references to strings in this dictionary are *not* counted in the string's ob_refcnt. When the interned string reaches a refcnt of 0 the string deallocation @@ -338,6 +346,216 @@ PyUnicode_GetMax(void) #endif } +/* The _PyBytesWriter structure is big: it contains an embeded "stack buffer". + A _PyBytesWriter variable must be declared at the end of variables in a + function to optimize the memory allocation on the stack. */ +typedef struct { + /* bytes object */ + PyObject *buffer; + + /* Number of allocated size */ + Py_ssize_t allocated; + + /* Current size of the buffer (can be smaller than the allocated size) */ + Py_ssize_t size; + + /* If non-zero, overallocate the buffer (default: 0). */ + int overallocate; + + /* Stack buffer */ + int use_stack_buffer; + char stack_buffer[512]; +} _PyBytesWriter; + +static void +_PyBytesWriter_Init(_PyBytesWriter *writer) +{ + writer->buffer = NULL; + writer->allocated = 0; + writer->size = 0; + writer->overallocate = 0; + writer->use_stack_buffer = 0; +#ifdef Py_DEBUG + memset(writer->stack_buffer, 0xCB, sizeof(writer->stack_buffer)); +#endif +} + +static void +_PyBytesWriter_Dealloc(_PyBytesWriter *writer) +{ + Py_CLEAR(writer->buffer); +} + +static char* +_PyBytesWriter_AsString(_PyBytesWriter *writer) +{ + if (!writer->use_stack_buffer) { + assert(writer->buffer != NULL); + return PyBytes_AS_STRING(writer->buffer); + } + else { + assert(writer->buffer == NULL); + return writer->stack_buffer; + } +} + +Py_LOCAL_INLINE(Py_ssize_t) +_PyBytesWriter_GetPos(_PyBytesWriter *writer, char *str) +{ + char *start = _PyBytesWriter_AsString(writer); + assert(str != NULL); + assert(str >= start); + return str - start; +} + +Py_LOCAL_INLINE(void) +_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str) +{ +#ifdef Py_DEBUG + char *start, *end; + + if (!writer->use_stack_buffer) { + assert(writer->buffer != NULL); + assert(PyBytes_CheckExact(writer->buffer)); + assert(Py_REFCNT(writer->buffer) == 1); + } + else { + assert(writer->buffer == NULL); + } + + start = _PyBytesWriter_AsString(writer); + assert(0 <= writer->size && writer->size <= writer->allocated); + /* the last byte must always be null */ + assert(start[writer->allocated] == 0); + + end = start + writer->allocated; + assert(str != NULL); + assert(start <= str && str <= end); +#endif +} + +/* Add *size* bytes to the buffer. + str is the current pointer inside the buffer. + Return the updated current pointer inside the buffer. + Raise an exception and return NULL on error. */ +static char* +_PyBytesWriter_Prepare(_PyBytesWriter *writer, char *str, Py_ssize_t size) +{ + Py_ssize_t allocated, pos; + + _PyBytesWriter_CheckConsistency(writer, str); + assert(size >= 0); + + if (size == 0) { + /* nothing to do */ + return str; + } + + if (writer->size > PY_SSIZE_T_MAX - size) { + PyErr_NoMemory(); + _PyBytesWriter_Dealloc(writer); + return NULL; + } + writer->size += size; + + allocated = writer->allocated; + if (writer->size <= allocated) + return str; + + allocated = writer->size; + if (writer->overallocate + && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) { + /* overallocate to limit the number of realloc() */ + allocated += allocated / OVERALLOCATE_FACTOR; + } + + pos = _PyBytesWriter_GetPos(writer, str); + if (!writer->use_stack_buffer) { + /* Note: Don't use a bytearray object because the conversion from + byterray to bytes requires to copy all bytes. */ + if (_PyBytes_Resize(&writer->buffer, allocated)) { + assert(writer->buffer == NULL); + return NULL; + } + } + else { + /* convert from stack buffer to bytes object buffer */ + assert(writer->buffer == NULL); + + writer->buffer = PyBytes_FromStringAndSize(NULL, allocated); + if (writer->buffer == NULL) + return NULL; + + if (pos != 0) { + Py_MEMCPY(PyBytes_AS_STRING(writer->buffer), + writer->stack_buffer, + pos); + } + +#ifdef Py_DEBUG + memset(writer->stack_buffer, 0xDB, sizeof(writer->stack_buffer)); +#endif + + writer->use_stack_buffer = 0; + } + writer->allocated = allocated; + + str = _PyBytesWriter_AsString(writer) + pos; + _PyBytesWriter_CheckConsistency(writer, str); + return str; +} + +/* Allocate the buffer to write size bytes. + Return the pointer to the beginning of buffer data. + Raise an exception and return NULL on error. */ +static char* +_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size) +{ + /* ensure that _PyBytesWriter_Alloc() is only called once */ + assert(writer->size == 0 && writer->buffer == NULL); + assert(size >= 0); + + writer->use_stack_buffer = 1; +#if Py_DEBUG + /* the last byte is reserved, it must be '\0' */ + writer->stack_buffer[sizeof(writer->stack_buffer) - 1] = 0; + writer->allocated = sizeof(writer->stack_buffer) - 1; +#else + writer->allocated = sizeof(writer->stack_buffer); +#endif + return _PyBytesWriter_Prepare(writer, writer->stack_buffer, size); +} + +/* Get the buffer content and reset the writer. + Return a bytes object. + Raise an exception and return NULL on error. */ +static PyObject * +_PyBytesWriter_Finish(_PyBytesWriter *writer, char *str) +{ + Py_ssize_t pos; + PyObject *result; + + _PyBytesWriter_CheckConsistency(writer, str); + + pos = _PyBytesWriter_GetPos(writer, str); + if (!writer->use_stack_buffer) { + if (pos != writer->allocated) { + if (_PyBytes_Resize(&writer->buffer, pos)) { + assert(writer->buffer == NULL); + return NULL; + } + } + + result = writer->buffer; + writer->buffer = NULL; + } + else { + result = PyBytes_FromStringAndSize(writer->stack_buffer, pos); + } + + return result; +} + #ifdef Py_DEBUG int _PyUnicode_CheckConsistency(PyObject *op, int check_content) @@ -6460,17 +6678,15 @@ unicode_encode_ucs1(PyObject *unicode, Py_ssize_t pos=0, size; int kind; void *data; - /* output object */ - PyObject *res; /* pointer into the output */ char *str; - /* current output position */ - Py_ssize_t ressize; const char *encoding = (limit == 256) ? "latin-1" : "ascii"; const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)"; PyObject *error_handler_obj = NULL; PyObject *exc = NULL; _Py_error_handler error_handler = _Py_ERROR_UNKNOWN; + /* output object */ + _PyBytesWriter writer; if (PyUnicode_READY(unicode) == -1) return NULL; @@ -6481,11 +6697,11 @@ unicode_encode_ucs1(PyObject *unicode, replacements, if we need more, we'll resize */ if (size == 0) return PyBytes_FromStringAndSize(NULL, 0); - res = PyBytes_FromStringAndSize(NULL, size); - if (res == NULL) + + _PyBytesWriter_Init(&writer); + str = _PyBytesWriter_Alloc(&writer, size); + if (str == NULL) return NULL; - str = PyBytes_AS_STRING(res); - ressize = size; while (pos < size) { Py_UCS4 ch = PyUnicode_READ(kind, data, pos); @@ -6499,15 +6715,18 @@ unicode_encode_ucs1(PyObject *unicode, else { Py_ssize_t requiredsize; PyObject *repunicode; - Py_ssize_t repsize, newpos, respos, i; + Py_ssize_t repsize, newpos, i; /* startpos for collecting unencodable chars */ Py_ssize_t collstart = pos; - Py_ssize_t collend = pos; + Py_ssize_t collend = collstart + 1; /* find all unecodable characters */ while ((collend < size) && (PyUnicode_READ(kind, data, collend) >= limit)) ++collend; + /* Only overallocate the buffer if it's not the last write */ + writer.overallocate = (collend < size); + /* cache callback name lookup (if not done yet, i.e. it's the first error) */ if (error_handler == _Py_ERROR_UNKNOWN) error_handler = get_error_handler(errors); @@ -6526,8 +6745,7 @@ unicode_encode_ucs1(PyObject *unicode, break; case _Py_ERROR_XMLCHARREFREPLACE: - respos = str - PyBytes_AS_STRING(res); - requiredsize = respos; + requiredsize = 0; /* determine replacement size */ for (i = collstart; i < collend; ++i) { Py_ssize_t incr; @@ -6553,17 +6771,11 @@ unicode_encode_ucs1(PyObject *unicode, goto overflow; requiredsize += incr; } - if (requiredsize > PY_SSIZE_T_MAX - (size - collend)) - goto overflow; - requiredsize += size - collend; - if (requiredsize > ressize) { - if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize) - requiredsize = 2*ressize; - if (_PyBytes_Resize(&res, requiredsize)) - goto onError; - str = PyBytes_AS_STRING(res) + respos; - ressize = requiredsize; - } + + str = _PyBytesWriter_Prepare(&writer, str, requiredsize-1); + if (str == NULL) + goto onError; + /* generate replacement */ for (i = collstart; i < collend; ++i) { str += sprintf(str, "&#%d;", PyUnicode_READ(kind, data, i)); @@ -6598,20 +6810,9 @@ unicode_encode_ucs1(PyObject *unicode, if (PyBytes_Check(repunicode)) { /* Directly copy bytes result to output. */ repsize = PyBytes_Size(repunicode); - if (repsize > 1) { - /* Make room for all additional bytes. */ - respos = str - PyBytes_AS_STRING(res); - if (ressize > PY_SSIZE_T_MAX - repsize - 1) { - Py_DECREF(repunicode); - goto overflow; - } - if (_PyBytes_Resize(&res, ressize+repsize-1)) { - Py_DECREF(repunicode); - goto onError; - } - str = PyBytes_AS_STRING(res) + respos; - ressize += repsize-1; - } + str = _PyBytesWriter_Prepare(&writer, str, repsize-1); + if (str == NULL) + goto onError; memcpy(str, PyBytes_AsString(repunicode), repsize); str += repsize; pos = newpos; @@ -6622,24 +6823,11 @@ unicode_encode_ucs1(PyObject *unicode, /* need more space? (at least enough for what we have+the replacement+the rest of the string, so we won't have to check space for encodable characters) */ - respos = str - PyBytes_AS_STRING(res); repsize = PyUnicode_GET_LENGTH(repunicode); - requiredsize = respos; - if (requiredsize > PY_SSIZE_T_MAX - repsize) - goto overflow; - requiredsize += repsize; - if (requiredsize > PY_SSIZE_T_MAX - (size - collend)) - goto overflow; - requiredsize += size - collend; - if (requiredsize > ressize) { - if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize) - requiredsize = 2*ressize; - if (_PyBytes_Resize(&res, requiredsize)) { - Py_DECREF(repunicode); + if (repsize > 1) { + str = _PyBytesWriter_Prepare(&writer, str, repsize-1); + if (str == NULL) goto onError; - } - str = PyBytes_AS_STRING(res) + respos; - ressize = requiredsize; } /* check if there is anything unencodable in the replacement @@ -6657,26 +6845,23 @@ unicode_encode_ucs1(PyObject *unicode, pos = newpos; Py_DECREF(repunicode); } + + /* If overallocation was disabled, ensure that it was the last + write. Otherwise, we missed an optimization */ + assert(writer.overallocate || pos == size); } } - /* Resize if we allocated to much */ - size = str - PyBytes_AS_STRING(res); - if (size < ressize) { /* If this falls res will be NULL */ - assert(size >= 0); - if (_PyBytes_Resize(&res, size) < 0) - goto onError; - } Py_XDECREF(error_handler_obj); Py_XDECREF(exc); - return res; + return _PyBytesWriter_Finish(&writer, str); overflow: PyErr_SetString(PyExc_OverflowError, "encoded result is too long for a Python string"); onError: - Py_XDECREF(res); + _PyBytesWriter_Dealloc(&writer); Py_XDECREF(error_handler_obj); Py_XDECREF(exc); return NULL; @@ -13366,13 +13551,6 @@ int _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, Py_ssize_t length, Py_UCS4 maxchar) { -#ifdef MS_WINDOWS - /* On Windows, overallocate by 50% is the best factor */ -# define OVERALLOCATE_FACTOR 2 -#else - /* On Linux, overallocate by 25% is the best factor */ -# define OVERALLOCATE_FACTOR 4 -#endif Py_ssize_t newlen; PyObject *newbuffer;