Issue #25384: Use _PyBytesWriter API in binascii

This API avoids a final call to _PyBytes_Resize() for output smaller than 512
bytes.

Small optimization: disable overallocation in binascii.rledecode_hqx() for the
last write.
This commit is contained in:
Victor Stinner 2015-10-13 10:51:47 +02:00
parent fcd8794ab5
commit eaaaf136d2
1 changed files with 83 additions and 111 deletions

View File

@ -346,9 +346,10 @@ binascii_b2a_uu_impl(PyModuleDef *module, Py_buffer *data)
int leftbits = 0; int leftbits = 0;
unsigned char this_ch; unsigned char this_ch;
unsigned int leftchar = 0; unsigned int leftchar = 0;
PyObject *rv; Py_ssize_t bin_len, out_len;
Py_ssize_t bin_len; _PyBytesWriter writer;
_PyBytesWriter_Init(&writer);
bin_data = data->buf; bin_data = data->buf;
bin_len = data->len; bin_len = data->len;
if ( bin_len > 45 ) { if ( bin_len > 45 ) {
@ -358,9 +359,10 @@ binascii_b2a_uu_impl(PyModuleDef *module, Py_buffer *data)
} }
/* We're lazy and allocate to much (fixed up later) */ /* We're lazy and allocate to much (fixed up later) */
if ( (rv=PyBytes_FromStringAndSize(NULL, 2 + (bin_len+2)/3*4)) == NULL ) out_len = 2 + (bin_len + 2) / 3 * 4;
ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
if (ascii_data == NULL)
return NULL; return NULL;
ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
/* Store the length */ /* Store the length */
*ascii_data++ = ' ' + (bin_len & 077); *ascii_data++ = ' ' + (bin_len & 077);
@ -382,12 +384,7 @@ binascii_b2a_uu_impl(PyModuleDef *module, Py_buffer *data)
} }
*ascii_data++ = '\n'; /* Append a courtesy newline */ *ascii_data++ = '\n'; /* Append a courtesy newline */
if (_PyBytes_Resize(&rv, return _PyBytesWriter_Finish(&writer, ascii_data);
(ascii_data -
(unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
Py_CLEAR(rv);
}
return rv;
} }
@ -433,9 +430,9 @@ binascii_a2b_base64_impl(PyModuleDef *module, Py_buffer *data)
int leftbits = 0; int leftbits = 0;
unsigned char this_ch; unsigned char this_ch;
unsigned int leftchar = 0; unsigned int leftchar = 0;
PyObject *rv;
Py_ssize_t ascii_len, bin_len; Py_ssize_t ascii_len, bin_len;
int quad_pos = 0; int quad_pos = 0;
_PyBytesWriter writer;
ascii_data = data->buf; ascii_data = data->buf;
ascii_len = data->len; ascii_len = data->len;
@ -447,11 +444,12 @@ binascii_a2b_base64_impl(PyModuleDef *module, Py_buffer *data)
bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */ bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
_PyBytesWriter_Init(&writer);
/* Allocate the buffer */ /* Allocate the buffer */
if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL ) bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
if (bin_data == NULL)
return NULL; return NULL;
bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
bin_len = 0;
for( ; ascii_len > 0; ascii_len--, ascii_data++) { for( ; ascii_len > 0; ascii_len--, ascii_data++) {
this_ch = *ascii_data; this_ch = *ascii_data;
@ -496,31 +494,17 @@ binascii_a2b_base64_impl(PyModuleDef *module, Py_buffer *data)
if ( leftbits >= 8 ) { if ( leftbits >= 8 ) {
leftbits -= 8; leftbits -= 8;
*bin_data++ = (leftchar >> leftbits) & 0xff; *bin_data++ = (leftchar >> leftbits) & 0xff;
bin_len++;
leftchar &= ((1 << leftbits) - 1); leftchar &= ((1 << leftbits) - 1);
} }
} }
if (leftbits != 0) { if (leftbits != 0) {
PyErr_SetString(Error, "Incorrect padding"); PyErr_SetString(Error, "Incorrect padding");
Py_DECREF(rv); _PyBytesWriter_Dealloc(&writer);
return NULL; return NULL;
} }
/* And set string size correctly. If the result string is empty return _PyBytesWriter_Finish(&writer, bin_data);
** (because the input was all invalid) return the shared empty
** string instead; _PyBytes_Resize() won't do this for us.
*/
if (bin_len > 0) {
if (_PyBytes_Resize(&rv, bin_len) < 0) {
Py_CLEAR(rv);
}
}
else {
Py_DECREF(rv);
rv = PyBytes_FromStringAndSize("", 0);
}
return rv;
} }
@ -542,11 +526,12 @@ binascii_b2a_base64_impl(PyModuleDef *module, Py_buffer *data, int newline)
int leftbits = 0; int leftbits = 0;
unsigned char this_ch; unsigned char this_ch;
unsigned int leftchar = 0; unsigned int leftchar = 0;
PyObject *rv;
Py_ssize_t bin_len, out_len; Py_ssize_t bin_len, out_len;
_PyBytesWriter writer;
bin_data = data->buf; bin_data = data->buf;
bin_len = data->len; bin_len = data->len;
_PyBytesWriter_Init(&writer);
assert(bin_len >= 0); assert(bin_len >= 0);
@ -561,9 +546,9 @@ binascii_b2a_base64_impl(PyModuleDef *module, Py_buffer *data, int newline)
out_len = bin_len*2 + 2; out_len = bin_len*2 + 2;
if (newline) if (newline)
out_len++; out_len++;
if ( (rv=PyBytes_FromStringAndSize(NULL, out_len)) == NULL ) ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
if (ascii_data == NULL)
return NULL; return NULL;
ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
for( ; bin_len > 0 ; bin_len--, bin_data++ ) { for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
/* Shift the data into our buffer */ /* Shift the data into our buffer */
@ -588,12 +573,7 @@ binascii_b2a_base64_impl(PyModuleDef *module, Py_buffer *data, int newline)
if (newline) if (newline)
*ascii_data++ = '\n'; /* Append a courtesy newline */ *ascii_data++ = '\n'; /* Append a courtesy newline */
if (_PyBytes_Resize(&rv, return _PyBytesWriter_Finish(&writer, ascii_data);
(ascii_data -
(unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
Py_CLEAR(rv);
}
return rv;
} }
/*[clinic input] /*[clinic input]
@ -613,12 +593,14 @@ binascii_a2b_hqx_impl(PyModuleDef *module, Py_buffer *data)
int leftbits = 0; int leftbits = 0;
unsigned char this_ch; unsigned char this_ch;
unsigned int leftchar = 0; unsigned int leftchar = 0;
PyObject *rv; PyObject *res;
Py_ssize_t len; Py_ssize_t len;
int done = 0; int done = 0;
_PyBytesWriter writer;
ascii_data = data->buf; ascii_data = data->buf;
len = data->len; len = data->len;
_PyBytesWriter_Init(&writer);
assert(len >= 0); assert(len >= 0);
@ -628,9 +610,9 @@ binascii_a2b_hqx_impl(PyModuleDef *module, Py_buffer *data)
/* Allocate a string that is too big (fixed later) /* Allocate a string that is too big (fixed later)
Add two to the initial length to prevent interning which Add two to the initial length to prevent interning which
would preclude subsequent resizing. */ would preclude subsequent resizing. */
if ( (rv=PyBytes_FromStringAndSize(NULL, len+2)) == NULL ) bin_data = _PyBytesWriter_Alloc(&writer, len + 2);
if (bin_data == NULL)
return NULL; return NULL;
bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
for( ; len > 0 ; len--, ascii_data++ ) { for( ; len > 0 ; len--, ascii_data++ ) {
/* Get the byte and look it up */ /* Get the byte and look it up */
@ -639,7 +621,7 @@ binascii_a2b_hqx_impl(PyModuleDef *module, Py_buffer *data)
continue; continue;
if ( this_ch == FAIL ) { if ( this_ch == FAIL ) {
PyErr_SetString(Error, "Illegal char"); PyErr_SetString(Error, "Illegal char");
Py_DECREF(rv); _PyBytesWriter_Dealloc(&writer);
return NULL; return NULL;
} }
if ( this_ch == DONE ) { if ( this_ch == DONE ) {
@ -661,21 +643,14 @@ binascii_a2b_hqx_impl(PyModuleDef *module, Py_buffer *data)
if ( leftbits && !done ) { if ( leftbits && !done ) {
PyErr_SetString(Incomplete, PyErr_SetString(Incomplete,
"String has incomplete number of bytes"); "String has incomplete number of bytes");
Py_DECREF(rv); _PyBytesWriter_Dealloc(&writer);
return NULL; return NULL;
} }
if (_PyBytes_Resize(&rv,
(bin_data -
(unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
Py_CLEAR(rv);
}
if (rv) {
PyObject *rrv = Py_BuildValue("Oi", rv, done);
Py_DECREF(rv);
return rrv;
}
return NULL; res = _PyBytesWriter_Finish(&writer, bin_data);
if (res == NULL)
return NULL;
return Py_BuildValue("Ni", res, done);
} }
@ -693,10 +668,11 @@ binascii_rlecode_hqx_impl(PyModuleDef *module, Py_buffer *data)
/*[clinic end generated code: output=0905da344dbf0648 input=e1f1712447a82b09]*/ /*[clinic end generated code: output=0905da344dbf0648 input=e1f1712447a82b09]*/
{ {
unsigned char *in_data, *out_data; unsigned char *in_data, *out_data;
PyObject *rv;
unsigned char ch; unsigned char ch;
Py_ssize_t in, inend, len; Py_ssize_t in, inend, len;
_PyBytesWriter writer;
_PyBytesWriter_Init(&writer);
in_data = data->buf; in_data = data->buf;
len = data->len; len = data->len;
@ -706,9 +682,9 @@ binascii_rlecode_hqx_impl(PyModuleDef *module, Py_buffer *data)
return PyErr_NoMemory(); return PyErr_NoMemory();
/* Worst case: output is twice as big as input (fixed later) */ /* Worst case: output is twice as big as input (fixed later) */
if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL ) out_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
if (out_data == NULL)
return NULL; return NULL;
out_data = (unsigned char *)PyBytes_AS_STRING(rv);
for( in=0; in<len; in++) { for( in=0; in<len; in++) {
ch = in_data[in]; ch = in_data[in];
@ -734,12 +710,8 @@ binascii_rlecode_hqx_impl(PyModuleDef *module, Py_buffer *data)
} }
} }
} }
if (_PyBytes_Resize(&rv,
(out_data - return _PyBytesWriter_Finish(&writer, out_data);
(unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
Py_CLEAR(rv);
}
return rv;
} }
@ -760,11 +732,12 @@ binascii_b2a_hqx_impl(PyModuleDef *module, Py_buffer *data)
int leftbits = 0; int leftbits = 0;
unsigned char this_ch; unsigned char this_ch;
unsigned int leftchar = 0; unsigned int leftchar = 0;
PyObject *rv;
Py_ssize_t len; Py_ssize_t len;
_PyBytesWriter writer;
bin_data = data->buf; bin_data = data->buf;
len = data->len; len = data->len;
_PyBytesWriter_Init(&writer);
assert(len >= 0); assert(len >= 0);
@ -772,9 +745,9 @@ binascii_b2a_hqx_impl(PyModuleDef *module, Py_buffer *data)
return PyErr_NoMemory(); return PyErr_NoMemory();
/* Allocate a buffer that is at least large enough */ /* Allocate a buffer that is at least large enough */
if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL ) ascii_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
if (ascii_data == NULL)
return NULL; return NULL;
ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
for( ; len > 0 ; len--, bin_data++ ) { for( ; len > 0 ; len--, bin_data++ ) {
/* Shift into our buffer, and output any 6bits ready */ /* Shift into our buffer, and output any 6bits ready */
@ -791,12 +764,8 @@ binascii_b2a_hqx_impl(PyModuleDef *module, Py_buffer *data)
leftchar <<= (6-leftbits); leftchar <<= (6-leftbits);
*ascii_data++ = table_b2a_hqx[leftchar & 0x3f]; *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
} }
if (_PyBytes_Resize(&rv,
(ascii_data - return _PyBytesWriter_Finish(&writer, ascii_data);
(unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
Py_CLEAR(rv);
}
return rv;
} }
@ -815,11 +784,12 @@ binascii_rledecode_hqx_impl(PyModuleDef *module, Py_buffer *data)
{ {
unsigned char *in_data, *out_data; unsigned char *in_data, *out_data;
unsigned char in_byte, in_repeat; unsigned char in_byte, in_repeat;
PyObject *rv;
Py_ssize_t in_len, out_len, out_len_left; Py_ssize_t in_len, out_len, out_len_left;
_PyBytesWriter writer;
in_data = data->buf; in_data = data->buf;
in_len = data->len; in_len = data->len;
_PyBytesWriter_Init(&writer);
assert(in_len >= 0); assert(in_len >= 0);
@ -830,45 +800,49 @@ binascii_rledecode_hqx_impl(PyModuleDef *module, Py_buffer *data)
return PyErr_NoMemory(); return PyErr_NoMemory();
/* Allocate a buffer of reasonable size. Resized when needed */ /* Allocate a buffer of reasonable size. Resized when needed */
out_len = in_len*2; out_len = in_len * 2;
if ( (rv=PyBytes_FromStringAndSize(NULL, out_len)) == NULL ) out_data = _PyBytesWriter_Alloc(&writer, out_len);
if (out_data == NULL)
return NULL; return NULL;
out_len_left = out_len;
out_data = (unsigned char *)PyBytes_AS_STRING(rv); /* Use overallocation */
writer.overallocate = 1;
out_len_left = writer.allocated;
/* /*
** We need two macros here to get/put bytes and handle ** We need two macros here to get/put bytes and handle
** end-of-buffer for input and output strings. ** end-of-buffer for input and output strings.
*/ */
#define INBYTE(b) \ #define INBYTE(b) \
do { \ do { \
if ( --in_len < 0 ) { \ if ( --in_len < 0 ) { \
PyErr_SetString(Incomplete, ""); \ PyErr_SetString(Incomplete, ""); \
Py_DECREF(rv); \ goto error; \
return NULL; \ } \
} \ b = *in_data++; \
b = *in_data++; \
} while(0) } while(0)
#define OUTBYTE(b) \ #define OUTBYTE(b) \
do { \ do { \
if ( --out_len_left < 0 ) { \ if ( --out_len_left < 0 ) { \
if ( out_len > PY_SSIZE_T_MAX / 2) return PyErr_NoMemory(); \ if (in_len <= 0) { \
if (_PyBytes_Resize(&rv, 2*out_len) < 0) \ /* We are done after this write, no need to \
{ Py_XDECREF(rv); return NULL; } \ overallocate the buffer anymore */ \
out_data = (unsigned char *)PyBytes_AS_STRING(rv) \ writer.overallocate = 0; \
+ out_len; \ } \
out_len_left = out_len-1; \ out_data = _PyBytesWriter_Prepare(&writer, out_data, 1); \
out_len = out_len * 2; \ if (out_data == NULL) \
} \ goto error; \
*out_data++ = b; \ out_len_left = writer.allocated; \
} \
*out_data++ = b; \
} while(0) } while(0)
/* /*
** Handle first byte separately (since we have to get angry ** Handle first byte separately (since we have to get angry
** in case of an orphaned RLE code). ** in case of an orphaned RLE code).
*/ */
INBYTE(in_byte); INBYTE(in_byte);
if (in_byte == RUNCHAR) { if (in_byte == RUNCHAR) {
INBYTE(in_repeat); INBYTE(in_repeat);
@ -877,8 +851,7 @@ binascii_rledecode_hqx_impl(PyModuleDef *module, Py_buffer *data)
** of the string only). This is a programmer error. ** of the string only). This is a programmer error.
*/ */
PyErr_SetString(Error, "Orphaned RLE code at start"); PyErr_SetString(Error, "Orphaned RLE code at start");
Py_DECREF(rv); goto error;
return NULL;
} }
OUTBYTE(RUNCHAR); OUTBYTE(RUNCHAR);
} else { } else {
@ -904,12 +877,11 @@ binascii_rledecode_hqx_impl(PyModuleDef *module, Py_buffer *data)
OUTBYTE(in_byte); OUTBYTE(in_byte);
} }
} }
if (_PyBytes_Resize(&rv, return _PyBytesWriter_Finish(&writer, out_data);
(out_data -
(unsigned char *)PyBytes_AS_STRING(rv))) < 0) { error:
Py_CLEAR(rv); _PyBytesWriter_Dealloc(&writer);
} return NULL;
return rv;
} }