From eaaaf136d20e1dd5160c2083fb81313d0818ea5d Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 13 Oct 2015 10:51:47 +0200 Subject: [PATCH] Issue #25384: Use _PyBytesWriter API in binascii This API avoids a final call to _PyBytes_Resize() for output smaller than 512 bytes. Small optimization: disable overallocation in binascii.rledecode_hqx() for the last write. --- Modules/binascii.c | 194 +++++++++++++++++++-------------------------- 1 file changed, 83 insertions(+), 111 deletions(-) diff --git a/Modules/binascii.c b/Modules/binascii.c index 3e26a7a9d21..dfa55356dc9 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -346,9 +346,10 @@ binascii_b2a_uu_impl(PyModuleDef *module, Py_buffer *data) int leftbits = 0; unsigned char this_ch; unsigned int leftchar = 0; - PyObject *rv; - Py_ssize_t bin_len; + Py_ssize_t bin_len, out_len; + _PyBytesWriter writer; + _PyBytesWriter_Init(&writer); bin_data = data->buf; bin_len = data->len; if ( bin_len > 45 ) { @@ -358,9 +359,10 @@ binascii_b2a_uu_impl(PyModuleDef *module, Py_buffer *data) } /* We're lazy and allocate to much (fixed up later) */ - if ( (rv=PyBytes_FromStringAndSize(NULL, 2 + (bin_len+2)/3*4)) == NULL ) + out_len = 2 + (bin_len + 2) / 3 * 4; + ascii_data = _PyBytesWriter_Alloc(&writer, out_len); + if (ascii_data == NULL) return NULL; - ascii_data = (unsigned char *)PyBytes_AS_STRING(rv); /* Store the length */ *ascii_data++ = ' ' + (bin_len & 077); @@ -382,12 +384,7 @@ binascii_b2a_uu_impl(PyModuleDef *module, Py_buffer *data) } *ascii_data++ = '\n'; /* Append a courtesy newline */ - if (_PyBytes_Resize(&rv, - (ascii_data - - (unsigned char *)PyBytes_AS_STRING(rv))) < 0) { - Py_CLEAR(rv); - } - return rv; + return _PyBytesWriter_Finish(&writer, ascii_data); } @@ -433,9 +430,9 @@ binascii_a2b_base64_impl(PyModuleDef *module, Py_buffer *data) int leftbits = 0; unsigned char this_ch; unsigned int leftchar = 0; - PyObject *rv; Py_ssize_t ascii_len, bin_len; int quad_pos = 0; + _PyBytesWriter writer; ascii_data = data->buf; ascii_len = data->len; @@ -447,11 +444,12 @@ binascii_a2b_base64_impl(PyModuleDef *module, Py_buffer *data) bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */ + _PyBytesWriter_Init(&writer); + /* Allocate the buffer */ - if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL ) + bin_data = _PyBytesWriter_Alloc(&writer, bin_len); + if (bin_data == NULL) return NULL; - bin_data = (unsigned char *)PyBytes_AS_STRING(rv); - bin_len = 0; for( ; ascii_len > 0; ascii_len--, ascii_data++) { this_ch = *ascii_data; @@ -496,31 +494,17 @@ binascii_a2b_base64_impl(PyModuleDef *module, Py_buffer *data) if ( leftbits >= 8 ) { leftbits -= 8; *bin_data++ = (leftchar >> leftbits) & 0xff; - bin_len++; leftchar &= ((1 << leftbits) - 1); } } if (leftbits != 0) { PyErr_SetString(Error, "Incorrect padding"); - Py_DECREF(rv); + _PyBytesWriter_Dealloc(&writer); return NULL; } - /* And set string size correctly. If the result string is empty - ** (because the input was all invalid) return the shared empty - ** string instead; _PyBytes_Resize() won't do this for us. - */ - if (bin_len > 0) { - if (_PyBytes_Resize(&rv, bin_len) < 0) { - Py_CLEAR(rv); - } - } - else { - Py_DECREF(rv); - rv = PyBytes_FromStringAndSize("", 0); - } - return rv; + return _PyBytesWriter_Finish(&writer, bin_data); } @@ -542,11 +526,12 @@ binascii_b2a_base64_impl(PyModuleDef *module, Py_buffer *data, int newline) int leftbits = 0; unsigned char this_ch; unsigned int leftchar = 0; - PyObject *rv; Py_ssize_t bin_len, out_len; + _PyBytesWriter writer; bin_data = data->buf; bin_len = data->len; + _PyBytesWriter_Init(&writer); assert(bin_len >= 0); @@ -561,9 +546,9 @@ binascii_b2a_base64_impl(PyModuleDef *module, Py_buffer *data, int newline) out_len = bin_len*2 + 2; if (newline) out_len++; - if ( (rv=PyBytes_FromStringAndSize(NULL, out_len)) == NULL ) + ascii_data = _PyBytesWriter_Alloc(&writer, out_len); + if (ascii_data == NULL) return NULL; - ascii_data = (unsigned char *)PyBytes_AS_STRING(rv); for( ; bin_len > 0 ; bin_len--, bin_data++ ) { /* Shift the data into our buffer */ @@ -588,12 +573,7 @@ binascii_b2a_base64_impl(PyModuleDef *module, Py_buffer *data, int newline) if (newline) *ascii_data++ = '\n'; /* Append a courtesy newline */ - if (_PyBytes_Resize(&rv, - (ascii_data - - (unsigned char *)PyBytes_AS_STRING(rv))) < 0) { - Py_CLEAR(rv); - } - return rv; + return _PyBytesWriter_Finish(&writer, ascii_data); } /*[clinic input] @@ -613,12 +593,14 @@ binascii_a2b_hqx_impl(PyModuleDef *module, Py_buffer *data) int leftbits = 0; unsigned char this_ch; unsigned int leftchar = 0; - PyObject *rv; + PyObject *res; Py_ssize_t len; int done = 0; + _PyBytesWriter writer; ascii_data = data->buf; len = data->len; + _PyBytesWriter_Init(&writer); assert(len >= 0); @@ -628,9 +610,9 @@ binascii_a2b_hqx_impl(PyModuleDef *module, Py_buffer *data) /* Allocate a string that is too big (fixed later) Add two to the initial length to prevent interning which would preclude subsequent resizing. */ - if ( (rv=PyBytes_FromStringAndSize(NULL, len+2)) == NULL ) + bin_data = _PyBytesWriter_Alloc(&writer, len + 2); + if (bin_data == NULL) return NULL; - bin_data = (unsigned char *)PyBytes_AS_STRING(rv); for( ; len > 0 ; len--, ascii_data++ ) { /* Get the byte and look it up */ @@ -639,7 +621,7 @@ binascii_a2b_hqx_impl(PyModuleDef *module, Py_buffer *data) continue; if ( this_ch == FAIL ) { PyErr_SetString(Error, "Illegal char"); - Py_DECREF(rv); + _PyBytesWriter_Dealloc(&writer); return NULL; } if ( this_ch == DONE ) { @@ -661,21 +643,14 @@ binascii_a2b_hqx_impl(PyModuleDef *module, Py_buffer *data) if ( leftbits && !done ) { PyErr_SetString(Incomplete, "String has incomplete number of bytes"); - Py_DECREF(rv); + _PyBytesWriter_Dealloc(&writer); return NULL; } - if (_PyBytes_Resize(&rv, - (bin_data - - (unsigned char *)PyBytes_AS_STRING(rv))) < 0) { - Py_CLEAR(rv); - } - if (rv) { - PyObject *rrv = Py_BuildValue("Oi", rv, done); - Py_DECREF(rv); - return rrv; - } - return NULL; + res = _PyBytesWriter_Finish(&writer, bin_data); + if (res == NULL) + return NULL; + return Py_BuildValue("Ni", res, done); } @@ -693,10 +668,11 @@ binascii_rlecode_hqx_impl(PyModuleDef *module, Py_buffer *data) /*[clinic end generated code: output=0905da344dbf0648 input=e1f1712447a82b09]*/ { unsigned char *in_data, *out_data; - PyObject *rv; unsigned char ch; Py_ssize_t in, inend, len; + _PyBytesWriter writer; + _PyBytesWriter_Init(&writer); in_data = data->buf; len = data->len; @@ -706,9 +682,9 @@ binascii_rlecode_hqx_impl(PyModuleDef *module, Py_buffer *data) return PyErr_NoMemory(); /* Worst case: output is twice as big as input (fixed later) */ - if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL ) + out_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2); + if (out_data == NULL) return NULL; - out_data = (unsigned char *)PyBytes_AS_STRING(rv); for( in=0; inbuf; len = data->len; + _PyBytesWriter_Init(&writer); assert(len >= 0); @@ -772,9 +745,9 @@ binascii_b2a_hqx_impl(PyModuleDef *module, Py_buffer *data) return PyErr_NoMemory(); /* Allocate a buffer that is at least large enough */ - if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL ) + ascii_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2); + if (ascii_data == NULL) return NULL; - ascii_data = (unsigned char *)PyBytes_AS_STRING(rv); for( ; len > 0 ; len--, bin_data++ ) { /* Shift into our buffer, and output any 6bits ready */ @@ -791,12 +764,8 @@ binascii_b2a_hqx_impl(PyModuleDef *module, Py_buffer *data) leftchar <<= (6-leftbits); *ascii_data++ = table_b2a_hqx[leftchar & 0x3f]; } - if (_PyBytes_Resize(&rv, - (ascii_data - - (unsigned char *)PyBytes_AS_STRING(rv))) < 0) { - Py_CLEAR(rv); - } - return rv; + + return _PyBytesWriter_Finish(&writer, ascii_data); } @@ -815,11 +784,12 @@ binascii_rledecode_hqx_impl(PyModuleDef *module, Py_buffer *data) { unsigned char *in_data, *out_data; unsigned char in_byte, in_repeat; - PyObject *rv; Py_ssize_t in_len, out_len, out_len_left; + _PyBytesWriter writer; in_data = data->buf; in_len = data->len; + _PyBytesWriter_Init(&writer); assert(in_len >= 0); @@ -830,45 +800,49 @@ binascii_rledecode_hqx_impl(PyModuleDef *module, Py_buffer *data) return PyErr_NoMemory(); /* Allocate a buffer of reasonable size. Resized when needed */ - out_len = in_len*2; - if ( (rv=PyBytes_FromStringAndSize(NULL, out_len)) == NULL ) + out_len = in_len * 2; + out_data = _PyBytesWriter_Alloc(&writer, out_len); + if (out_data == NULL) return NULL; - out_len_left = out_len; - out_data = (unsigned char *)PyBytes_AS_STRING(rv); + + /* Use overallocation */ + writer.overallocate = 1; + out_len_left = writer.allocated; /* ** We need two macros here to get/put bytes and handle ** end-of-buffer for input and output strings. */ -#define INBYTE(b) \ - do { \ - if ( --in_len < 0 ) { \ - PyErr_SetString(Incomplete, ""); \ - Py_DECREF(rv); \ - return NULL; \ - } \ - b = *in_data++; \ +#define INBYTE(b) \ + do { \ + if ( --in_len < 0 ) { \ + PyErr_SetString(Incomplete, ""); \ + goto error; \ + } \ + b = *in_data++; \ } while(0) -#define OUTBYTE(b) \ - do { \ - if ( --out_len_left < 0 ) { \ - if ( out_len > PY_SSIZE_T_MAX / 2) return PyErr_NoMemory(); \ - if (_PyBytes_Resize(&rv, 2*out_len) < 0) \ - { Py_XDECREF(rv); return NULL; } \ - out_data = (unsigned char *)PyBytes_AS_STRING(rv) \ - + out_len; \ - out_len_left = out_len-1; \ - out_len = out_len * 2; \ - } \ - *out_data++ = b; \ +#define OUTBYTE(b) \ + do { \ + if ( --out_len_left < 0 ) { \ + if (in_len <= 0) { \ + /* We are done after this write, no need to \ + overallocate the buffer anymore */ \ + writer.overallocate = 0; \ + } \ + out_data = _PyBytesWriter_Prepare(&writer, out_data, 1); \ + if (out_data == NULL) \ + goto error; \ + out_len_left = writer.allocated; \ + } \ + *out_data++ = b; \ } while(0) - /* - ** Handle first byte separately (since we have to get angry - ** in case of an orphaned RLE code). - */ - INBYTE(in_byte); + /* + ** Handle first byte separately (since we have to get angry + ** in case of an orphaned RLE code). + */ + INBYTE(in_byte); if (in_byte == RUNCHAR) { INBYTE(in_repeat); @@ -877,8 +851,7 @@ binascii_rledecode_hqx_impl(PyModuleDef *module, Py_buffer *data) ** of the string only). This is a programmer error. */ PyErr_SetString(Error, "Orphaned RLE code at start"); - Py_DECREF(rv); - return NULL; + goto error; } OUTBYTE(RUNCHAR); } else { @@ -904,12 +877,11 @@ binascii_rledecode_hqx_impl(PyModuleDef *module, Py_buffer *data) OUTBYTE(in_byte); } } - if (_PyBytes_Resize(&rv, - (out_data - - (unsigned char *)PyBytes_AS_STRING(rv))) < 0) { - Py_CLEAR(rv); - } - return rv; + return _PyBytesWriter_Finish(&writer, out_data); + +error: + _PyBytesWriter_Dealloc(&writer); + return NULL; }