Make binascii use byte strings everywhere (in and out).

This commit is contained in:
Guido van Rossum 2007-05-22 20:24:57 +00:00
parent 6dd15d3ab0
commit 0e225aa09b
2 changed files with 116 additions and 89 deletions

View File

@ -7,10 +7,10 @@ import binascii
class BinASCIITest(unittest.TestCase):
# Create binary test data
data = "The quick brown fox jumps over the lazy dog.\r\n"
data = b"The quick brown fox jumps over the lazy dog.\r\n"
# Be slow so we don't depend on other modules
data += "".join(map(chr, range(256)))
data += "\r\nHello world.\n"
data += bytes(range(256))
data += b"\r\nHello world.\n"
def test_exceptions(self):
# Check module exceptions
@ -40,10 +40,10 @@ class BinASCIITest(unittest.TestCase):
b = self.data[i:i+MAX_BASE64]
a = binascii.b2a_base64(b)
lines.append(a)
res = ""
res = bytes()
for line in lines:
b = binascii.a2b_base64(line)
res = res + b
res += b
self.assertEqual(res, self.data)
def test_base64invalid(self):
@ -56,24 +56,23 @@ class BinASCIITest(unittest.TestCase):
a = binascii.b2a_base64(b)
lines.append(a)
fillers = ""
valid = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/"
fillers = bytes()
valid = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/"
for i in range(256):
c = chr(i)
if c not in valid:
fillers += c
if i not in valid:
fillers.append(i)
def addnoise(line):
noise = fillers
ratio = len(line) // len(noise)
res = ""
res = bytes()
while line and noise:
if len(line) // len(noise) > ratio:
c, line = line[0], line[1:]
else:
c, noise = noise[0], noise[1:]
res += c
res.append(c)
return res + noise + line
res = ""
res = bytes()
for line in map(addnoise, lines):
b = binascii.a2b_base64(line)
res += b
@ -81,7 +80,7 @@ class BinASCIITest(unittest.TestCase):
# Test base64 with just invalid characters, which should return
# empty strings. TBD: shouldn't it raise an exception instead ?
self.assertEqual(binascii.a2b_base64(fillers), '')
self.assertEqual(binascii.a2b_base64(fillers), b'')
def test_uu(self):
MAX_UU = 45
@ -90,23 +89,23 @@ class BinASCIITest(unittest.TestCase):
b = self.data[i:i+MAX_UU]
a = binascii.b2a_uu(b)
lines.append(a)
res = ""
res = bytes()
for line in lines:
b = binascii.a2b_uu(line)
res += b
self.assertEqual(res, self.data)
self.assertEqual(binascii.a2b_uu("\x7f"), "\x00"*31)
self.assertEqual(binascii.a2b_uu("\x80"), "\x00"*32)
self.assertEqual(binascii.a2b_uu("\xff"), "\x00"*31)
self.assertRaises(binascii.Error, binascii.a2b_uu, "\xff\x00")
self.assertRaises(binascii.Error, binascii.a2b_uu, "!!!!")
self.assertEqual(binascii.a2b_uu(b"\x7f"), b"\x00"*31)
self.assertEqual(binascii.a2b_uu(b"\x80"), b"\x00"*32)
self.assertEqual(binascii.a2b_uu(b"\xff"), b"\x00"*31)
self.assertRaises(binascii.Error, binascii.a2b_uu, b"\xff\x00")
self.assertRaises(binascii.Error, binascii.a2b_uu, b"!!!!")
self.assertRaises(binascii.Error, binascii.b2a_uu, 46*"!")
self.assertRaises(binascii.Error, binascii.b2a_uu, 46*b"!")
def test_crc32(self):
crc = binascii.crc32("Test the CRC-32 of")
crc = binascii.crc32(" this string.", crc)
crc = binascii.crc32(b"Test the CRC-32 of")
crc = binascii.crc32(b" this string.", crc)
self.assertEqual(crc, 1571220330)
self.assertRaises(TypeError, binascii.crc32)
@ -115,16 +114,16 @@ class BinASCIITest(unittest.TestCase):
def test_hex(self):
# test hexlification
s = '{s\005\000\000\000worldi\002\000\000\000s\005\000\000\000helloi\001\000\000\0000'
s = b'{s\005\000\000\000worldi\002\000\000\000s\005\000\000\000helloi\001\000\000\0000'
t = binascii.b2a_hex(s)
u = binascii.a2b_hex(t)
self.assertEqual(s, u)
self.assertRaises(TypeError, binascii.a2b_hex, t[:-1])
self.assertRaises(TypeError, binascii.a2b_hex, t[:-1] + 'q')
self.assertRaises(TypeError, binascii.a2b_hex, t[:-1] + b'q')
# Verify the treatment of Unicode strings
if test_support.have_unicode:
self.assertEqual(binascii.hexlify(str('a', 'ascii')), '61')
self.assertEqual(binascii.hexlify('a'), b'61')
def test_qp(self):
# A test for SF bug 534347 (segfaults without the proper fix)
@ -134,28 +133,29 @@ class BinASCIITest(unittest.TestCase):
pass
else:
self.fail("binascii.a2b_qp(**{1:1}) didn't raise TypeError")
self.assertEqual(binascii.a2b_qp("= "), "= ")
self.assertEqual(binascii.a2b_qp("=="), "=")
self.assertEqual(binascii.a2b_qp("=AX"), "=AX")
self.assertEqual(binascii.a2b_qp(b"= "), b"= ")
self.assertEqual(binascii.a2b_qp(b"=="), b"=")
self.assertEqual(binascii.a2b_qp(b"=AX"), b"=AX")
self.assertRaises(TypeError, binascii.b2a_qp, foo="bar")
self.assertEqual(binascii.a2b_qp("=00\r\n=00"), "\x00\r\n\x00")
self.assertEqual(binascii.a2b_qp(b"=00\r\n=00"), b"\x00\r\n\x00")
self.assertEqual(
binascii.b2a_qp("\xff\r\n\xff\n\xff"),
"=FF\r\n=FF\r\n=FF"
binascii.b2a_qp(b"\xff\r\n\xff\n\xff"),
b"=FF\r\n=FF\r\n=FF"
)
self.assertEqual(
binascii.b2a_qp("0"*75+"\xff\r\n\xff\r\n\xff"),
"0"*75+"=\r\n=FF\r\n=FF\r\n=FF"
binascii.b2a_qp(b"0"*75+b"\xff\r\n\xff\r\n\xff"),
b"0"*75+b"=\r\n=FF\r\n=FF\r\n=FF"
)
self.assertEqual(binascii.b2a_qp('\0\n'), '=00\n')
self.assertEqual(binascii.b2a_qp('\0\n', quotetabs=True), '=00\n')
self.assertEqual(binascii.b2a_qp('foo\tbar\t\n'), 'foo\tbar=09\n')
self.assertEqual(binascii.b2a_qp('foo\tbar\t\n', quotetabs=True), 'foo=09bar=09\n')
self.assertEqual(binascii.b2a_qp(b'\0\n'), b'=00\n')
self.assertEqual(binascii.b2a_qp(b'\0\n', quotetabs=True), b'=00\n')
self.assertEqual(binascii.b2a_qp(b'foo\tbar\t\n'), b'foo\tbar=09\n')
self.assertEqual(binascii.b2a_qp(b'foo\tbar\t\n', quotetabs=True),
b'foo=09bar=09\n')
self.assertEqual(binascii.b2a_qp('.'), '=2E')
self.assertEqual(binascii.b2a_qp('.\n'), '=2E\n')
self.assertEqual(binascii.b2a_qp('a.\n'), 'a.\n')
self.assertEqual(binascii.b2a_qp(b'.'), b'=2E')
self.assertEqual(binascii.b2a_qp(b'.\n'), b'=2E\n')
self.assertEqual(binascii.b2a_qp(b'a.\n'), b'a.\n')
def test_empty_string(self):
# A test for SF bug #1022953. Make sure SystemError is not raised.
@ -164,7 +164,10 @@ class BinASCIITest(unittest.TestCase):
'a2b_hqx', 'a2b_base64', 'rlecode_hqx', 'b2a_uu',
'rledecode_hqx']:
f = getattr(binascii, n)
f('')
try:
f(b'')
except SystemError as err:
self.fail("%s(b'') raises SystemError: %s" % (n, err))
binascii.crc_hqx('', 0)
def test_main():

View File

@ -138,7 +138,7 @@ static char table_a2b_base64[] = {
#define BASE64_PAD '='
/* Max binary chunk size; limited only by available memory */
#define BASE64_MAXBIN (INT_MAX/2 - sizeof(PyStringObject) - 3)
#define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
static unsigned char table_b2a_base64[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
@ -200,9 +200,9 @@ binascii_a2b_uu(PyObject *self, PyObject *args)
ascii_len--;
/* Allocate the buffer */
if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
return NULL;
bin_data = (unsigned char *)PyString_AsString(rv);
bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
/* XXX is it really best to add NULs if there's no more data */
@ -277,9 +277,9 @@ binascii_b2a_uu(PyObject *self, PyObject *args)
}
/* We're lazy and allocate to much (fixed up later) */
if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2+2)) == NULL )
if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len*2+2)) == NULL )
return NULL;
ascii_data = (unsigned char *)PyString_AsString(rv);
ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
/* Store the length */
*ascii_data++ = ' ' + (bin_len & 077);
@ -301,8 +301,12 @@ binascii_b2a_uu(PyObject *self, PyObject *args)
}
*ascii_data++ = '\n'; /* Append a courtesy newline */
_PyString_Resize(&rv, (ascii_data -
(unsigned char *)PyString_AsString(rv)));
if (PyBytes_Resize(rv,
(ascii_data -
(unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
Py_DECREF(rv);
rv = NULL;
}
return rv;
}
@ -351,9 +355,9 @@ binascii_a2b_base64(PyObject *self, PyObject *args)
bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
/* Allocate the buffer */
if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
return NULL;
bin_data = (unsigned char *)PyString_AsString(rv);
bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
bin_len = 0;
for( ; ascii_len > 0; ascii_len--, ascii_data++) {
@ -412,13 +416,17 @@ binascii_a2b_base64(PyObject *self, PyObject *args)
/* And set string size correctly. If the result string is empty
** (because the input was all invalid) return the shared empty
** string instead; _PyString_Resize() won't do this for us.
** string instead; PyBytes_Resize() won't do this for us.
*/
if (bin_len > 0)
_PyString_Resize(&rv, bin_len);
if (bin_len > 0) {
if (PyBytes_Resize(rv, bin_len) < 0) {
Py_DECREF(rv);
rv = NULL;
}
}
else {
Py_DECREF(rv);
rv = PyString_FromString("");
rv = PyBytes_FromStringAndSize("", 0);
}
return rv;
}
@ -445,9 +453,9 @@ binascii_b2a_base64(PyObject *self, PyObject *args)
/* We're lazy and allocate too much (fixed up later).
"+3" leaves room for up to two pad characters and a trailing
newline. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL )
if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL )
return NULL;
ascii_data = (unsigned char *)PyString_AsString(rv);
ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
/* Shift the data into our buffer */
@ -471,8 +479,12 @@ binascii_b2a_base64(PyObject *self, PyObject *args)
}
*ascii_data++ = '\n'; /* Append a courtesy newline */
_PyString_Resize(&rv, (ascii_data -
(unsigned char *)PyString_AsString(rv)));
if (PyBytes_Resize(rv,
(ascii_data -
(unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
Py_DECREF(rv);
rv = NULL;
}
return rv;
}
@ -495,9 +507,9 @@ binascii_a2b_hqx(PyObject *self, PyObject *args)
/* Allocate a string that is too big (fixed later)
Add two to the initial length to prevent interning which
would preclude subsequent resizing. */
if ( (rv=PyString_FromStringAndSize(NULL, len+2)) == NULL )
if ( (rv=PyBytes_FromStringAndSize(NULL, len+2)) == NULL )
return NULL;
bin_data = (unsigned char *)PyString_AsString(rv);
bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
for( ; len > 0 ; len--, ascii_data++ ) {
/* Get the byte and look it up */
@ -531,8 +543,12 @@ binascii_a2b_hqx(PyObject *self, PyObject *args)
Py_DECREF(rv);
return NULL;
}
_PyString_Resize(
&rv, (bin_data - (unsigned char *)PyString_AsString(rv)));
if (PyBytes_Resize(rv,
(bin_data -
(unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
Py_DECREF(rv);
rv = NULL;
}
if (rv) {
PyObject *rrv = Py_BuildValue("Oi", rv, done);
Py_DECREF(rv);
@ -556,9 +572,9 @@ binascii_rlecode_hqx(PyObject *self, PyObject *args)
return NULL;
/* Worst case: output is twice as big as input (fixed later) */
if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL )
return NULL;
out_data = (unsigned char *)PyString_AsString(rv);
out_data = (unsigned char *)PyBytes_AS_STRING(rv);
for( in=0; in<len; in++) {
ch = in_data[in];
@ -584,8 +600,12 @@ binascii_rlecode_hqx(PyObject *self, PyObject *args)
}
}
}
_PyString_Resize(&rv, (out_data -
(unsigned char *)PyString_AsString(rv)));
if (PyBytes_Resize(rv,
(out_data -
(unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
Py_DECREF(rv);
rv = NULL;
}
return rv;
}
@ -605,9 +625,9 @@ binascii_b2a_hqx(PyObject *self, PyObject *args)
return NULL;
/* Allocate a buffer that is at least large enough */
if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL )
return NULL;
ascii_data = (unsigned char *)PyString_AsString(rv);
ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
for( ; len > 0 ; len--, bin_data++ ) {
/* Shift into our buffer, and output any 6bits ready */
@ -624,8 +644,12 @@ binascii_b2a_hqx(PyObject *self, PyObject *args)
leftchar <<= (6-leftbits);
*ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
}
_PyString_Resize(&rv, (ascii_data -
(unsigned char *)PyString_AsString(rv)));
if (PyBytes_Resize(rv,
(ascii_data -
(unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
Py_DECREF(rv);
rv = NULL;
}
return rv;
}
@ -644,14 +668,14 @@ binascii_rledecode_hqx(PyObject *self, PyObject *args)
/* Empty string is a special case */
if ( in_len == 0 )
return PyString_FromString("");
return PyBytes_FromStringAndSize("", 0);
/* Allocate a buffer of reasonable size. Resized when needed */
out_len = in_len*2;
if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL )
if ( (rv=PyBytes_FromStringAndSize(NULL, out_len)) == NULL )
return NULL;
out_len_left = out_len;
out_data = (unsigned char *)PyString_AsString(rv);
out_data = (unsigned char *)PyBytes_AS_STRING(rv);
/*
** We need two macros here to get/put bytes and handle
@ -670,9 +694,9 @@ binascii_rledecode_hqx(PyObject *self, PyObject *args)
#define OUTBYTE(b) \
do { \
if ( --out_len_left < 0 ) { \
_PyString_Resize(&rv, 2*out_len); \
if ( rv == NULL ) return NULL; \
out_data = (unsigned char *)PyString_AsString(rv) \
if (PyBytes_Resize(rv, 2*out_len) < 0) \
{ Py_DECREF(rv); return NULL; } \
out_data = (unsigned char *)PyBytes_AS_STRING(rv) \
+ out_len; \
out_len_left = out_len-1; \
out_len = out_len * 2; \
@ -720,8 +744,12 @@ binascii_rledecode_hqx(PyObject *self, PyObject *args)
OUTBYTE(in_byte);
}
}
_PyString_Resize(&rv, (out_data -
(unsigned char *)PyString_AsString(rv)));
if (PyBytes_Resize(rv,
(out_data -
(unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
Py_DECREF(rv);
rv = NULL;
}
return rv;
}
@ -912,12 +940,10 @@ binascii_hexlify(PyObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "s#:b2a_hex", &argbuf, &arglen))
return NULL;
retval = PyString_FromStringAndSize(NULL, arglen*2);
retval = PyBytes_FromStringAndSize(NULL, arglen*2);
if (!retval)
return NULL;
retbuf = PyString_AsString(retval);
if (!retbuf)
goto finally;
retbuf = PyBytes_AS_STRING(retval);
/* make hex version of string, taken from shamodule.c */
for (i=j=0; i < arglen; i++) {
@ -978,12 +1004,10 @@ binascii_unhexlify(PyObject *self, PyObject *args)
return NULL;
}
retval = PyString_FromStringAndSize(NULL, (arglen/2));
retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
if (!retval)
return NULL;
retbuf = PyString_AsString(retval);
if (!retbuf)
goto finally;
retbuf = PyBytes_AS_STRING(retval);
for (i=j=0; i < arglen; i += 2) {
int top = to_int(Py_CHARMASK(argbuf[i]));
@ -1095,7 +1119,7 @@ binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
out++;
}
}
if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
PyMem_Free(odata);
return NULL;
}
@ -1295,7 +1319,7 @@ binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
}
}
}
if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
PyMem_Free(odata);
return NULL;
}