bpo-34749: Improved performance of binascii.a2b_base64(). (GH-9444)
https://bugs.python.org/issue34749
This commit is contained in:
parent
0d4f4352ef
commit
1c5e68e714
|
@ -0,0 +1,2 @@
|
||||||
|
:func:`binascii.a2b_base64` is now up to 2 times faster. Patch by Sergey
|
||||||
|
Fedoseev.
|
|
@ -130,7 +130,7 @@ static const unsigned char table_a2b_hqx[256] = {
|
||||||
static const unsigned char table_b2a_hqx[] =
|
static const unsigned char table_b2a_hqx[] =
|
||||||
"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
|
"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
|
||||||
|
|
||||||
static const char table_a2b_base64[] = {
|
static const unsigned char table_a2b_base64[] = {
|
||||||
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||||
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||||
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
|
||||||
|
@ -138,7 +138,16 @@ static const char table_a2b_base64[] = {
|
||||||
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
|
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
|
||||||
15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
|
15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
|
||||||
-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
|
-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
|
||||||
41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
|
41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1,
|
||||||
|
|
||||||
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||||
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||||
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||||
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||||
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||||
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||||
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||||
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||||
};
|
};
|
||||||
|
|
||||||
#define BASE64_PAD '='
|
#define BASE64_PAD '='
|
||||||
|
@ -413,32 +422,6 @@ binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)
|
||||||
return _PyBytesWriter_Finish(&writer, ascii_data);
|
return _PyBytesWriter_Finish(&writer, ascii_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static int
|
|
||||||
binascii_find_valid(const unsigned char *s, Py_ssize_t slen, int num)
|
|
||||||
{
|
|
||||||
/* Finds & returns the (num+1)th
|
|
||||||
** valid character for base64, or -1 if none.
|
|
||||||
*/
|
|
||||||
|
|
||||||
int ret = -1;
|
|
||||||
unsigned char c, b64val;
|
|
||||||
|
|
||||||
while ((slen > 0) && (ret == -1)) {
|
|
||||||
c = *s;
|
|
||||||
b64val = table_a2b_base64[c & 0x7f];
|
|
||||||
if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
|
|
||||||
if (num == 0)
|
|
||||||
ret = *s;
|
|
||||||
num--;
|
|
||||||
}
|
|
||||||
|
|
||||||
s++;
|
|
||||||
slen--;
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*[clinic input]
|
/*[clinic input]
|
||||||
binascii.a2b_base64
|
binascii.a2b_base64
|
||||||
|
|
||||||
|
@ -452,88 +435,74 @@ static PyObject *
|
||||||
binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
|
binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
|
||||||
/*[clinic end generated code: output=0628223f19fd3f9b input=5872acf6e1cac243]*/
|
/*[clinic end generated code: output=0628223f19fd3f9b input=5872acf6e1cac243]*/
|
||||||
{
|
{
|
||||||
const unsigned char *ascii_data;
|
assert(data->len >= 0);
|
||||||
unsigned char *bin_data;
|
|
||||||
unsigned char *bin_data_start;
|
|
||||||
int leftbits = 0;
|
|
||||||
unsigned char this_ch;
|
|
||||||
unsigned int leftchar = 0;
|
|
||||||
Py_ssize_t ascii_len, bin_len;
|
|
||||||
int quad_pos = 0;
|
|
||||||
_PyBytesWriter writer;
|
|
||||||
binascii_state *state;
|
|
||||||
|
|
||||||
ascii_data = data->buf;
|
const unsigned char *ascii_data = data->buf;
|
||||||
ascii_len = data->len;
|
size_t ascii_len = data->len;
|
||||||
|
|
||||||
assert(ascii_len >= 0);
|
|
||||||
|
|
||||||
if (ascii_len > PY_SSIZE_T_MAX - 3)
|
|
||||||
return PyErr_NoMemory();
|
|
||||||
|
|
||||||
bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
|
|
||||||
|
|
||||||
_PyBytesWriter_Init(&writer);
|
|
||||||
|
|
||||||
/* Allocate the buffer */
|
/* Allocate the buffer */
|
||||||
bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
|
Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
|
||||||
|
_PyBytesWriter writer;
|
||||||
|
_PyBytesWriter_Init(&writer);
|
||||||
|
unsigned char *bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
|
||||||
if (bin_data == NULL)
|
if (bin_data == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
bin_data_start = bin_data;
|
unsigned char *bin_data_start = bin_data;
|
||||||
|
|
||||||
for( ; ascii_len > 0; ascii_len--, ascii_data++) {
|
int quad_pos = 0;
|
||||||
this_ch = *ascii_data;
|
unsigned char leftchar = 0;
|
||||||
|
int pads = 0;
|
||||||
if (this_ch > 0x7f ||
|
for (size_t i = 0; i < ascii_len; i++) {
|
||||||
this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
|
unsigned char this_ch = ascii_data[i];
|
||||||
continue;
|
|
||||||
|
|
||||||
/* Check for pad sequences and ignore
|
/* Check for pad sequences and ignore
|
||||||
** the invalid ones.
|
** the invalid ones.
|
||||||
*/
|
*/
|
||||||
if (this_ch == BASE64_PAD) {
|
if (this_ch == BASE64_PAD) {
|
||||||
if ( (quad_pos < 2) ||
|
if (quad_pos >= 2 && quad_pos + ++pads >= 4) {
|
||||||
((quad_pos == 2) &&
|
|
||||||
(binascii_find_valid(ascii_data, ascii_len, 1)
|
|
||||||
!= BASE64_PAD)) )
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
/* A pad sequence means no more input.
|
/* A pad sequence means no more input.
|
||||||
** We've already interpreted the data
|
** We've already interpreted the data
|
||||||
** from the quad at this point.
|
** from the quad at this point.
|
||||||
*/
|
*/
|
||||||
leftbits = 0;
|
goto done;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
this_ch = table_a2b_base64[*ascii_data];
|
this_ch = table_a2b_base64[this_ch];
|
||||||
if ( this_ch == (unsigned char) -1 )
|
if (this_ch >= 64) {
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
pads = 0;
|
||||||
|
|
||||||
/*
|
switch (quad_pos) {
|
||||||
** Shift it in on the low end, and see if there's
|
case 0:
|
||||||
** a byte ready for output.
|
quad_pos = 1;
|
||||||
*/
|
leftchar = this_ch;
|
||||||
quad_pos = (quad_pos + 1) & 0x03;
|
break;
|
||||||
leftchar = (leftchar << 6) | (this_ch);
|
case 1:
|
||||||
leftbits += 6;
|
quad_pos = 2;
|
||||||
|
*bin_data++ = (leftchar << 2) | (this_ch >> 4);
|
||||||
if ( leftbits >= 8 ) {
|
leftchar = this_ch & 0x0f;
|
||||||
leftbits -= 8;
|
break;
|
||||||
*bin_data++ = (leftchar >> leftbits) & 0xff;
|
case 2:
|
||||||
leftchar &= ((1 << leftbits) - 1);
|
quad_pos = 3;
|
||||||
|
*bin_data++ = (leftchar << 4) | (this_ch >> 2);
|
||||||
|
leftchar = this_ch & 0x03;
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
quad_pos = 0;
|
||||||
|
*bin_data++ = (leftchar << 6) | (this_ch);
|
||||||
|
leftchar = 0;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (leftbits != 0) {
|
if (quad_pos != 0) {
|
||||||
state = PyModule_GetState(module);
|
binascii_state *state = PyModule_GetState(module);
|
||||||
if (state == NULL) {
|
if (state == NULL) {
|
||||||
return NULL;
|
/* error already set, from PyModule_GetState */
|
||||||
}
|
} else if (quad_pos == 1) {
|
||||||
if (leftbits == 6) {
|
|
||||||
/*
|
/*
|
||||||
** There is exactly one extra valid, non-padding, base64 character.
|
** There is exactly one extra valid, non-padding, base64 character.
|
||||||
** This is an invalid length, as there is no possible input that
|
** This is an invalid length, as there is no possible input that
|
||||||
|
@ -551,6 +520,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
done:
|
||||||
return _PyBytesWriter_Finish(&writer, bin_data);
|
return _PyBytesWriter_Finish(&writer, bin_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue