In rare cases of strings specifying true values near sys.maxint, and oddball bases (not decimal or a power of 2), int(string, base) could deliver insane answers. This repairs all such problems, and also speeds string->int significantly. On my box, here are % speedups for decimal strings of various lengths: length speedup ------ ------- 1 12.4% 2 15.7% 3 20.6% 4 28.1% 5 33.2% 6 37.5% 7 41.9% 8 46.3% 9 51.2% 10 19.5% 11 19.9% 12 23.9% 13 23.7% 14 23.3% 15 24.9% 16 25.3% 17 28.3% 18 27.9% 19 35.7% Note that the difference between 9 and 10 is the difference between short and long Python ints on a 32-bit box. The patch doesn't actually do anything to speed conversion to long: the speedup is due to detecting "unsigned long" overflow more quickly. This is a bugfix candidate, but it's a non-trivial patch and it would be painful to separate the "bug fix" from the "speed up" parts.
This commit is contained in:
parent
b63588c188
commit
b713ec2531
|
@ -709,6 +709,84 @@ class BuiltinTest(unittest.TestCase):
|
|||
self.assertEqual(int('0123', 0), 83)
|
||||
self.assertEqual(int('0x123', 16), 291)
|
||||
|
||||
# SF bug 1334662: int(string, base) wrong answers
|
||||
# Various representations of 2**32 evaluated to 0
|
||||
# rather than 2**32 in previous versions
|
||||
|
||||
self.assertEqual(int('100000000000000000000000000000000', 2), 4294967296L)
|
||||
self.assertEqual(int('102002022201221111211', 3), 4294967296L)
|
||||
self.assertEqual(int('10000000000000000', 4), 4294967296L)
|
||||
self.assertEqual(int('32244002423141', 5), 4294967296L)
|
||||
self.assertEqual(int('1550104015504', 6), 4294967296L)
|
||||
self.assertEqual(int('211301422354', 7), 4294967296L)
|
||||
self.assertEqual(int('40000000000', 8), 4294967296L)
|
||||
self.assertEqual(int('12068657454', 9), 4294967296L)
|
||||
self.assertEqual(int('4294967296', 10), 4294967296L)
|
||||
self.assertEqual(int('1904440554', 11), 4294967296L)
|
||||
self.assertEqual(int('9ba461594', 12), 4294967296L)
|
||||
self.assertEqual(int('535a79889', 13), 4294967296L)
|
||||
self.assertEqual(int('2ca5b7464', 14), 4294967296L)
|
||||
self.assertEqual(int('1a20dcd81', 15), 4294967296L)
|
||||
self.assertEqual(int('100000000', 16), 4294967296L)
|
||||
self.assertEqual(int('a7ffda91', 17), 4294967296L)
|
||||
self.assertEqual(int('704he7g4', 18), 4294967296L)
|
||||
self.assertEqual(int('4f5aff66', 19), 4294967296L)
|
||||
self.assertEqual(int('3723ai4g', 20), 4294967296L)
|
||||
self.assertEqual(int('281d55i4', 21), 4294967296L)
|
||||
self.assertEqual(int('1fj8b184', 22), 4294967296L)
|
||||
self.assertEqual(int('1606k7ic', 23), 4294967296L)
|
||||
self.assertEqual(int('mb994ag', 24), 4294967296L)
|
||||
self.assertEqual(int('hek2mgl', 25), 4294967296L)
|
||||
self.assertEqual(int('dnchbnm', 26), 4294967296L)
|
||||
self.assertEqual(int('b28jpdm', 27), 4294967296L)
|
||||
self.assertEqual(int('8pfgih4', 28), 4294967296L)
|
||||
self.assertEqual(int('76beigg', 29), 4294967296L)
|
||||
self.assertEqual(int('5qmcpqg', 30), 4294967296L)
|
||||
self.assertEqual(int('4q0jto4', 31), 4294967296L)
|
||||
self.assertEqual(int('4000000', 32), 4294967296L)
|
||||
self.assertEqual(int('3aokq94', 33), 4294967296L)
|
||||
self.assertEqual(int('2qhxjli', 34), 4294967296L)
|
||||
self.assertEqual(int('2br45qb', 35), 4294967296L)
|
||||
self.assertEqual(int('1z141z4', 36), 4294967296L)
|
||||
|
||||
# SF bug 1334662: int(string, base) wrong answers
|
||||
# Checks for proper evaluation of 2**32 + 1
|
||||
self.assertEqual(int('100000000000000000000000000000001', 2), 4294967297L)
|
||||
self.assertEqual(int('102002022201221111212', 3), 4294967297L)
|
||||
self.assertEqual(int('10000000000000001', 4), 4294967297L)
|
||||
self.assertEqual(int('32244002423142', 5), 4294967297L)
|
||||
self.assertEqual(int('1550104015505', 6), 4294967297L)
|
||||
self.assertEqual(int('211301422355', 7), 4294967297L)
|
||||
self.assertEqual(int('40000000001', 8), 4294967297L)
|
||||
self.assertEqual(int('12068657455', 9), 4294967297L)
|
||||
self.assertEqual(int('4294967297', 10), 4294967297L)
|
||||
self.assertEqual(int('1904440555', 11), 4294967297L)
|
||||
self.assertEqual(int('9ba461595', 12), 4294967297L)
|
||||
self.assertEqual(int('535a7988a', 13), 4294967297L)
|
||||
self.assertEqual(int('2ca5b7465', 14), 4294967297L)
|
||||
self.assertEqual(int('1a20dcd82', 15), 4294967297L)
|
||||
self.assertEqual(int('100000001', 16), 4294967297L)
|
||||
self.assertEqual(int('a7ffda92', 17), 4294967297L)
|
||||
self.assertEqual(int('704he7g5', 18), 4294967297L)
|
||||
self.assertEqual(int('4f5aff67', 19), 4294967297L)
|
||||
self.assertEqual(int('3723ai4h', 20), 4294967297L)
|
||||
self.assertEqual(int('281d55i5', 21), 4294967297L)
|
||||
self.assertEqual(int('1fj8b185', 22), 4294967297L)
|
||||
self.assertEqual(int('1606k7id', 23), 4294967297L)
|
||||
self.assertEqual(int('mb994ah', 24), 4294967297L)
|
||||
self.assertEqual(int('hek2mgm', 25), 4294967297L)
|
||||
self.assertEqual(int('dnchbnn', 26), 4294967297L)
|
||||
self.assertEqual(int('b28jpdn', 27), 4294967297L)
|
||||
self.assertEqual(int('8pfgih5', 28), 4294967297L)
|
||||
self.assertEqual(int('76beigh', 29), 4294967297L)
|
||||
self.assertEqual(int('5qmcpqh', 30), 4294967297L)
|
||||
self.assertEqual(int('4q0jto5', 31), 4294967297L)
|
||||
self.assertEqual(int('4000001', 32), 4294967297L)
|
||||
self.assertEqual(int('3aokq95', 33), 4294967297L)
|
||||
self.assertEqual(int('2qhxjlj', 34), 4294967297L)
|
||||
self.assertEqual(int('2br45qc', 35), 4294967297L)
|
||||
self.assertEqual(int('1z141z5', 36), 4294967297L)
|
||||
|
||||
def test_intconversion(self):
|
||||
# Test __int__()
|
||||
class Foo0:
|
||||
|
|
|
@ -390,6 +390,7 @@ Fredrik Lundh
|
|||
Mark Lutz
|
||||
Jim Lynch
|
||||
Mikael Lyngvig
|
||||
Alan McIntyre
|
||||
Andrew I MacIntyre
|
||||
Tim MacKenzie
|
||||
Nick Maclaren
|
||||
|
|
|
@ -12,6 +12,11 @@ What's New in Python 2.5 alpha 3?
|
|||
Core and builtins
|
||||
-----------------
|
||||
|
||||
- Bug #1334662: ``int(string, base)`` could deliver a wrong answer
|
||||
when ``base`` was not 2, 4, 8, 10, 16 or 32, and ``string`` represented
|
||||
an integer close to ``sys.maxint``. This was repaired by patch
|
||||
#1335972, which also gives a nice speedup.
|
||||
|
||||
- Patch #1337051: reduced size of frame objects.
|
||||
|
||||
- PyErr_NewException now accepts a tuple of base classes as its
|
||||
|
|
|
@ -15,6 +15,94 @@
|
|||
|
||||
/* strtol and strtoul, renamed to avoid conflicts */
|
||||
|
||||
|
||||
#include <ctype.h>
|
||||
#ifndef DONT_HAVE_ERRNO_H
|
||||
#include <errno.h>
|
||||
#endif
|
||||
|
||||
/* Static overflow check values for bases 2 through 36.
|
||||
* smallmax[base] is the largest unsigned long i such that
|
||||
* i * base doesn't overflow unsigned long.
|
||||
*/
|
||||
static unsigned long smallmax[] = {
|
||||
0, /* bases 0 and 1 are invalid */
|
||||
0,
|
||||
ULONG_MAX / 2,
|
||||
ULONG_MAX / 3,
|
||||
ULONG_MAX / 4,
|
||||
ULONG_MAX / 5,
|
||||
ULONG_MAX / 6,
|
||||
ULONG_MAX / 7,
|
||||
ULONG_MAX / 8,
|
||||
ULONG_MAX / 9,
|
||||
ULONG_MAX / 10,
|
||||
ULONG_MAX / 11,
|
||||
ULONG_MAX / 12,
|
||||
ULONG_MAX / 13,
|
||||
ULONG_MAX / 14,
|
||||
ULONG_MAX / 15,
|
||||
ULONG_MAX / 16,
|
||||
ULONG_MAX / 17,
|
||||
ULONG_MAX / 18,
|
||||
ULONG_MAX / 19,
|
||||
ULONG_MAX / 20,
|
||||
ULONG_MAX / 21,
|
||||
ULONG_MAX / 22,
|
||||
ULONG_MAX / 23,
|
||||
ULONG_MAX / 24,
|
||||
ULONG_MAX / 25,
|
||||
ULONG_MAX / 26,
|
||||
ULONG_MAX / 27,
|
||||
ULONG_MAX / 28,
|
||||
ULONG_MAX / 29,
|
||||
ULONG_MAX / 30,
|
||||
ULONG_MAX / 31,
|
||||
ULONG_MAX / 32,
|
||||
ULONG_MAX / 33,
|
||||
ULONG_MAX / 34,
|
||||
ULONG_MAX / 35,
|
||||
ULONG_MAX / 36,
|
||||
};
|
||||
|
||||
/* maximum digits that can't ever overflow for bases 2 through 36,
|
||||
* calculated by [int(math.floor(math.log(2**32, i))) for i in range(2, 37)].
|
||||
* Note that this is pessimistic if sizeof(long) > 4.
|
||||
*/
|
||||
static int digitlimit[] = {
|
||||
0, 0, 32, 20, 16, 13, 12, 11, 10, 10, /* 0 - 9 */
|
||||
9, 9, 8, 8, 8, 8, 8, 7, 7, 7, /* 10 - 19 */
|
||||
7, 7, 7, 7, 6, 6, 6, 6, 6, 6, /* 20 - 29 */
|
||||
6, 6, 6, 6, 6, 6, 6}; /* 30 - 36 */
|
||||
|
||||
/* char-to-digit conversion for bases 2-36; all non-digits are 37 */
|
||||
static int digitlookup[] = {
|
||||
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
|
||||
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
|
||||
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 37, 37, 37, 37, 37, 37,
|
||||
37, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
|
||||
25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 37, 37, 37, 37,
|
||||
37, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
|
||||
25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 37, 37, 37, 37,
|
||||
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
|
||||
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
|
||||
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
|
||||
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
|
||||
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
|
||||
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
|
||||
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
|
||||
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
|
||||
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
|
||||
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
|
||||
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
|
||||
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
|
||||
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
|
||||
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
|
||||
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
|
||||
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37
|
||||
};
|
||||
|
||||
/*
|
||||
** strtoul
|
||||
** This is a general purpose routine for converting
|
||||
|
@ -28,45 +116,24 @@
|
|||
** Errors due to bad pointers will probably result in
|
||||
** exceptions - we don't check for them.
|
||||
*/
|
||||
|
||||
#include <ctype.h>
|
||||
#ifndef DONT_HAVE_ERRNO_H
|
||||
#include <errno.h>
|
||||
#endif
|
||||
|
||||
unsigned long
|
||||
PyOS_strtoul(register char *str, char **ptr, int base)
|
||||
{
|
||||
register unsigned long result; /* return value of the function */
|
||||
register unsigned long result = 0; /* return value of the function */
|
||||
register int c; /* current input character */
|
||||
register unsigned long temp; /* used in overflow testing */
|
||||
int ovf; /* true if overflow occurred */
|
||||
|
||||
result = 0;
|
||||
ovf = 0;
|
||||
|
||||
/* catch silly bases */
|
||||
if (base != 0 && (base < 2 || base > 36))
|
||||
{
|
||||
if (ptr)
|
||||
*ptr = str;
|
||||
return 0;
|
||||
}
|
||||
register int ovlimit; /* required digits to overflow */
|
||||
|
||||
/* skip leading white space */
|
||||
while (*str && isspace(Py_CHARMASK(*str)))
|
||||
str++;
|
||||
++str;
|
||||
|
||||
/* check for leading 0 or 0x for auto-base or base 16 */
|
||||
switch (base)
|
||||
{
|
||||
switch (base) {
|
||||
case 0: /* look for leading 0, 0x or 0X */
|
||||
if (*str == '0')
|
||||
{
|
||||
str++;
|
||||
if (*str == 'x' || *str == 'X')
|
||||
{
|
||||
str++;
|
||||
if (*str == '0') {
|
||||
++str;
|
||||
if (*str == 'x' || *str == 'X') {
|
||||
++str;
|
||||
base = 16;
|
||||
}
|
||||
else
|
||||
|
@ -77,49 +144,72 @@ PyOS_strtoul(register char *str, char **ptr, int base)
|
|||
break;
|
||||
|
||||
case 16: /* skip leading 0x or 0X */
|
||||
if (*str == '0' && (*(str+1) == 'x' || *(str+1) == 'X'))
|
||||
str += 2;
|
||||
if (*str == '0') {
|
||||
++str;
|
||||
if (*str == 'x' || *str == 'X')
|
||||
++str;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* do the conversion */
|
||||
while ((c = Py_CHARMASK(*str)) != '\0')
|
||||
{
|
||||
if (isdigit(c) && c - '0' < base)
|
||||
c -= '0';
|
||||
else
|
||||
{
|
||||
if (isupper(c))
|
||||
c = tolower(c);
|
||||
if (c >= 'a' && c <= 'z')
|
||||
c -= 'a' - 10;
|
||||
else /* non-"digit" character */
|
||||
break;
|
||||
if (c >= base) /* non-"digit" character */
|
||||
break;
|
||||
/* catch silly bases */
|
||||
if (base < 2 || base > 36) {
|
||||
if (ptr)
|
||||
*ptr = str;
|
||||
return 0;
|
||||
}
|
||||
temp = result;
|
||||
|
||||
/* skip leading zeroes */
|
||||
while (*str == '0')
|
||||
++str;
|
||||
|
||||
/* base is guaranteed to be in [2, 36] at this point */
|
||||
ovlimit = digitlimit[base];
|
||||
|
||||
/* do the conversion until non-digit character encountered */
|
||||
while ((c = digitlookup[Py_CHARMASK(*str)]) < base) {
|
||||
if (ovlimit > 0) /* no overflow check required */
|
||||
result = result * base + c;
|
||||
if(base == 10) {
|
||||
if(((long)(result - c) / base != (long)temp)) /* overflow */
|
||||
ovf = 1;
|
||||
else { /* requires overflow check */
|
||||
register unsigned long temp_result;
|
||||
|
||||
if (ovlimit < 0) /* guaranteed overflow */
|
||||
goto overflowed;
|
||||
|
||||
/* there could be an overflow */
|
||||
/* check overflow just from shifting */
|
||||
if (result > smallmax[base])
|
||||
goto overflowed;
|
||||
|
||||
result *= base;
|
||||
|
||||
/* check overflow from the digit's value */
|
||||
temp_result = result + c;
|
||||
if (temp_result < result)
|
||||
goto overflowed;
|
||||
|
||||
result = temp_result;
|
||||
}
|
||||
else {
|
||||
if ((result - c) / base != temp) /* overflow */
|
||||
ovf = 1;
|
||||
}
|
||||
str++;
|
||||
|
||||
++str;
|
||||
--ovlimit;
|
||||
}
|
||||
|
||||
/* set pointer to point to the last character scanned */
|
||||
if (ptr)
|
||||
*ptr = str;
|
||||
if (ovf)
|
||||
{
|
||||
result = (unsigned long) ~0L;
|
||||
errno = ERANGE;
|
||||
}
|
||||
|
||||
return result;
|
||||
|
||||
overflowed:
|
||||
if (ptr) {
|
||||
/* spool through remaining digit characters */
|
||||
while (digitlookup[Py_CHARMASK(*str)] < base)
|
||||
++str;
|
||||
*ptr = str;
|
||||
}
|
||||
errno = ERANGE;
|
||||
return (unsigned long)-1;
|
||||
}
|
||||
|
||||
long
|
||||
|
|
Loading…
Reference in New Issue