long(string, base) now takes time linear in len(string) when base is a

power of 2.  Enabled the tail end of test_long() in pickletester.py
because it no longer takes forever when run from test_pickle.py.
This commit is contained in:
Tim Peters 2003-02-02 07:51:32 +00:00
parent 06dd8cf5e4
commit bf2674be0e
4 changed files with 113 additions and 29 deletions

View File

@ -1427,9 +1427,6 @@ def encode_long(x):
binary = _binascii.unhexlify(ashex)
return binary[::-1]
# XXX OOPS! This is still quadratic-time. While hex(n) is linear-time
# XXX in the # of digits in n, long(s, 16) is still quadratic-time
# XXX in len(s).
def decode_long(data):
r"""Decode a long from a two's complement little-endian binary string.
@ -1453,7 +1450,7 @@ def decode_long(data):
if nbytes == 0:
return 0L
ashex = _binascii.hexlify(data[::-1])
n = long(ashex, 16) # quadratic time
n = long(ashex, 16) # quadratic time before Python 2.3; linear now
if data[-1] >= '\x80':
n -= 1L << (nbytes * 8)
return n

View File

@ -247,7 +247,7 @@ class AbstractPickleTests(unittest.TestCase):
def test_long(self):
for proto in protocols:
# 256 bytes is where LONG4 begins
# 256 bytes is where LONG4 begins.
for nbits in 1, 8, 8*254, 8*255, 8*256, 8*257:
nbase = 1L << nbits
for npos in nbase-1, nbase, nbase+1:
@ -257,20 +257,11 @@ class AbstractPickleTests(unittest.TestCase):
self.assertEqual(n, got)
# Try a monster. This is quadratic-time in protos 0 & 1, so don't
# bother with those.
# XXX Damn. pickle.py is still quadratic-time here, due to
# XXX long(string, 16). cPickle runs this in an eyeblink, but I
# XXX gave up waiting for pickle.py to get beyond "loading". Giving
# XXX up for now.
return
print "building long"
nbase = long("deadbeeffeedface", 16)
nbase += nbase << 1000000
for n in nbase, -nbase:
print "dumping"
p = self.dumps(n, 2)
print "loading"
got = self.loads(p)
print "checking"
self.assertEqual(n, got)
def test_reduce(self):

View File

@ -12,6 +12,9 @@ What's New in Python 2.3 alpha 2?
Core and builtins
-----------------
- long(string, base) takes time linear in len(string) when base is a power
of 2 now. It used to take time quadratic in len(string).
- filter returns now Unicode results for Unicode arguments.
- raw_input can now return Unicode objects.

View File

@ -1090,6 +1090,95 @@ long_format(PyObject *aa, int base, int addL)
return (PyObject *)str;
}
/* *str points to the first digit in a string of base base digits. base
* is a power of 2 (2, 4, 8, 16, or 32). *str is set to point to the first
* non-digit (which may be *str!). A normalized long is returned.
* The point to this routine is that it takes time linear in the number of
* string characters.
*/
static PyLongObject *
long_from_binary_base(char **str, int base)
{
char *p = *str;
char *start = p;
int bits_per_char;
int n;
PyLongObject *z;
twodigits accum;
int bits_in_accum;
digit *pdigit;
assert(base >= 2 && base <= 32 && (base & (base - 1)) == 0);
n = base;
for (bits_per_char = -1; n; ++bits_per_char)
n >>= 1;
/* n <- total # of bits needed, while setting p to end-of-string */
n = 0;
for (;;) {
int k = -1;
char ch = *p;
if (ch <= '9')
k = ch - '0';
else if (ch >= 'a')
k = ch - 'a' + 10;
else if (ch >= 'A')
k = ch - 'A' + 10;
if (k < 0 || k >= base)
break;
n += bits_per_char;
if (n < 0) {
PyErr_SetString(PyExc_ValueError,
"long string too large to convert");
return NULL;
}
++p;
}
*str = p;
/* n <- # of Python digits needed, = ceiling(n/SHIFT). */
n = (n + SHIFT - 1) / SHIFT;
z = _PyLong_New(n);
if (z == NULL)
return NULL;
/* Read string from right, and fill in long from left; i.e.,
* from least to most significant in both.
*/
accum = 0;
bits_in_accum = 0;
pdigit = z->ob_digit;
while (--p >= start) {
unsigned char ch = (unsigned char)*p;
digit k;
if (ch <= '9')
k = ch - '0';
else if (ch >= 'a')
k = ch - 'a' + 10;
else {
assert(ch >= 'A');
k = ch - 'A' + 10;
}
assert(k >= 0 && k <= base);
accum |= k << bits_in_accum;
bits_in_accum += bits_per_char;
if (bits_in_accum >= SHIFT) {
*pdigit++ = (digit)(accum & MASK);
assert(pdigit - z->ob_digit <= n);
accum >>= SHIFT;
bits_in_accum -= SHIFT;
assert(bits_in_accum < SHIFT);
}
}
if (bits_in_accum) {
assert(bits_in_accum <= SHIFT);
*pdigit++ = (digit)accum;
assert(pdigit - z->ob_digit <= n);
}
while (pdigit - z->ob_digit < n)
*pdigit++ = 0;
return long_normalize(z);
}
PyObject *
PyLong_FromString(char *str, char **pend, int base)
{
@ -1122,23 +1211,27 @@ PyLong_FromString(char *str, char **pend, int base)
}
if (base == 16 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
str += 2;
z = _PyLong_New(0);
start = str;
for ( ; z != NULL; ++str) {
int k = -1;
PyLongObject *temp;
if ((base & (base - 1)) == 0)
z = long_from_binary_base(&str, base);
else {
z = _PyLong_New(0);
for ( ; z != NULL; ++str) {
int k = -1;
PyLongObject *temp;
if (*str <= '9')
k = *str - '0';
else if (*str >= 'a')
k = *str - 'a' + 10;
else if (*str >= 'A')
k = *str - 'A' + 10;
if (k < 0 || k >= base)
break;
temp = muladd1(z, (digit)base, (digit)k);
Py_DECREF(z);
z = temp;
if (*str <= '9')
k = *str - '0';
else if (*str >= 'a')
k = *str - 'a' + 10;
else if (*str >= 'A')
k = *str - 'A' + 10;
if (k < 0 || k >= base)
break;
temp = muladd1(z, (digit)base, (digit)k);
Py_DECREF(z);
z = temp;
}
}
if (z == NULL)
return NULL;