Finished backporting PEP 3127, Integer Literal Support and Syntax.

Added 0b and 0o literals to tokenizer.
Modified PyOS_strtoul to support 0b and 0o inputs.
Modified PyLong_FromString to support guessing 0b and 0o inputs.
Renamed test_hexoct.py to test_int_literal.py and added binary tests.
Added upper and lower case 0b, 0O, and 0X tests to test_int_literal.py
This commit is contained in:
Eric Smith 2008-03-17 17:32:20 +00:00
parent 7cfbf0c421
commit 9ff19b5434
7 changed files with 336 additions and 125 deletions

View File

@ -853,6 +853,12 @@ class BuiltinTest(unittest.TestCase):
self.assertRaises(ValueError, int, "0x", 16)
self.assertRaises(ValueError, int, "0x", 0)
self.assertRaises(ValueError, int, "0o", 8)
self.assertRaises(ValueError, int, "0o", 0)
self.assertRaises(ValueError, int, "0b", 2)
self.assertRaises(ValueError, int, "0b", 0)
# SF bug 1334662: int(string, base) wrong answers
# Various representations of 2**32 evaluated to 0
@ -894,6 +900,45 @@ class BuiltinTest(unittest.TestCase):
self.assertEqual(int('2br45qb', 35), 4294967296L)
self.assertEqual(int('1z141z4', 36), 4294967296L)
# tests with base 0
# this fails on 3.0, but in 2.x the old octal syntax is allowed
self.assertEqual(int(' 0123 ', 0), 83)
self.assertEqual(int(' 0123 ', 0), 83)
self.assertEqual(int('000', 0), 0)
self.assertEqual(int('0o123', 0), 83)
self.assertEqual(int('0x123', 0), 291)
self.assertEqual(int('0b100', 0), 4)
self.assertEqual(int(' 0O123 ', 0), 83)
self.assertEqual(int(' 0X123 ', 0), 291)
self.assertEqual(int(' 0B100 ', 0), 4)
# without base still base 10
self.assertEqual(int('0123'), 123)
self.assertEqual(int('0123', 10), 123)
# tests with prefix and base != 0
self.assertEqual(int('0x123', 16), 291)
self.assertEqual(int('0o123', 8), 83)
self.assertEqual(int('0b100', 2), 4)
self.assertEqual(int('0X123', 16), 291)
self.assertEqual(int('0O123', 8), 83)
self.assertEqual(int('0B100', 2), 4)
# the code has special checks for the first character after the
# type prefix
self.assertRaises(ValueError, int, '0b2', 2)
self.assertRaises(ValueError, int, '0b02', 2)
self.assertRaises(ValueError, int, '0B2', 2)
self.assertRaises(ValueError, int, '0B02', 2)
self.assertRaises(ValueError, int, '0o8', 8)
self.assertRaises(ValueError, int, '0o08', 8)
self.assertRaises(ValueError, int, '0O8', 8)
self.assertRaises(ValueError, int, '0O08', 8)
self.assertRaises(ValueError, int, '0xg', 16)
self.assertRaises(ValueError, int, '0x0g', 16)
self.assertRaises(ValueError, int, '0Xg', 16)
self.assertRaises(ValueError, int, '0X0g', 16)
# SF bug 1334662: int(string, base) wrong answers
# Checks for proper evaluation of 2**32 + 1
self.assertEqual(int('100000000000000000000000000000001', 2), 4294967297L)

View File

@ -180,7 +180,9 @@ if 1:
def test_literals_with_leading_zeroes(self):
for arg in ["077787", "0xj", "0x.", "0e", "090000000000000",
"080000000000000", "000000000000009", "000000000000008"]:
"080000000000000", "000000000000009", "000000000000008",
"0b42", "0BADCAFE", "0o123456789", "0b1.1", "0o4.2",
"0b101j2", "0o153j2", "0b100e1", "0o777e1"]:
self.assertRaises(SyntaxError, eval, arg)
self.assertEqual(eval("0777"), 511)
@ -208,6 +210,10 @@ if 1:
self.assertEqual(eval("000000000000007"), 7)
self.assertEqual(eval("000000000000008."), 8.)
self.assertEqual(eval("000000000000009."), 9.)
self.assertEqual(eval("0b101010"), 42)
self.assertEqual(eval("-0b000000000010"), -2)
self.assertEqual(eval("0o777"), 511)
self.assertEqual(eval("-0o0000010"), -8)
def test_unary_minus(self):
# Verify treatment of unary minus on negative numbers SF bug #660455

View File

@ -1,116 +0,0 @@
"""Test correct treatment of hex/oct constants.
This is complex because of changes due to PEP 237.
"""
import sys
platform_long_is_32_bits = sys.maxint == 2147483647
import unittest
from test import test_support
import warnings
warnings.filterwarnings("ignore", "hex/oct constants", FutureWarning,
"<string>")
class TextHexOct(unittest.TestCase):
def test_hex_baseline(self):
# Baseline tests
self.assertEqual(0x0, 0)
self.assertEqual(0x10, 16)
if platform_long_is_32_bits:
self.assertEqual(0x7fffffff, 2147483647)
else:
self.assertEqual(0x7fffffffffffffff, 9223372036854775807)
# Ditto with a minus sign and parentheses
self.assertEqual(-(0x0), 0)
self.assertEqual(-(0x10), -16)
if platform_long_is_32_bits:
self.assertEqual(-(0x7fffffff), -2147483647)
else:
self.assertEqual(-(0x7fffffffffffffff), -9223372036854775807)
# Ditto with a minus sign and NO parentheses
self.assertEqual(-0x0, 0)
self.assertEqual(-0x10, -16)
if platform_long_is_32_bits:
self.assertEqual(-0x7fffffff, -2147483647)
else:
self.assertEqual(-0x7fffffffffffffff, -9223372036854775807)
def test_hex_unsigned(self):
if platform_long_is_32_bits:
# Positive constants
self.assertEqual(0x80000000, 2147483648L)
self.assertEqual(0xffffffff, 4294967295L)
# Ditto with a minus sign and parentheses
self.assertEqual(-(0x80000000), -2147483648L)
self.assertEqual(-(0xffffffff), -4294967295L)
# Ditto with a minus sign and NO parentheses
# This failed in Python 2.2 through 2.2.2 and in 2.3a1
self.assertEqual(-0x80000000, -2147483648L)
self.assertEqual(-0xffffffff, -4294967295L)
else:
# Positive constants
self.assertEqual(0x8000000000000000, 9223372036854775808L)
self.assertEqual(0xffffffffffffffff, 18446744073709551615L)
# Ditto with a minus sign and parentheses
self.assertEqual(-(0x8000000000000000), -9223372036854775808L)
self.assertEqual(-(0xffffffffffffffff), -18446744073709551615L)
# Ditto with a minus sign and NO parentheses
# This failed in Python 2.2 through 2.2.2 and in 2.3a1
self.assertEqual(-0x8000000000000000, -9223372036854775808L)
self.assertEqual(-0xffffffffffffffff, -18446744073709551615L)
def test_oct_baseline(self):
# Baseline tests
self.assertEqual(00, 0)
self.assertEqual(020, 16)
if platform_long_is_32_bits:
self.assertEqual(017777777777, 2147483647)
else:
self.assertEqual(0777777777777777777777, 9223372036854775807)
# Ditto with a minus sign and parentheses
self.assertEqual(-(00), 0)
self.assertEqual(-(020), -16)
if platform_long_is_32_bits:
self.assertEqual(-(017777777777), -2147483647)
else:
self.assertEqual(-(0777777777777777777777), -9223372036854775807)
# Ditto with a minus sign and NO parentheses
self.assertEqual(-00, 0)
self.assertEqual(-020, -16)
if platform_long_is_32_bits:
self.assertEqual(-017777777777, -2147483647)
else:
self.assertEqual(-0777777777777777777777, -9223372036854775807)
def test_oct_unsigned(self):
if platform_long_is_32_bits:
# Positive constants
self.assertEqual(020000000000, 2147483648L)
self.assertEqual(037777777777, 4294967295L)
# Ditto with a minus sign and parentheses
self.assertEqual(-(020000000000), -2147483648L)
self.assertEqual(-(037777777777), -4294967295L)
# Ditto with a minus sign and NO parentheses
# This failed in Python 2.2 through 2.2.2 and in 2.3a1
self.assertEqual(-020000000000, -2147483648L)
self.assertEqual(-037777777777, -4294967295L)
else:
# Positive constants
self.assertEqual(01000000000000000000000, 9223372036854775808L)
self.assertEqual(01777777777777777777777, 18446744073709551615L)
# Ditto with a minus sign and parentheses
self.assertEqual(-(01000000000000000000000), -9223372036854775808L)
self.assertEqual(-(01777777777777777777777), -18446744073709551615L)
# Ditto with a minus sign and NO parentheses
# This failed in Python 2.2 through 2.2.2 and in 2.3a1
self.assertEqual(-01000000000000000000000, -9223372036854775808L)
self.assertEqual(-01777777777777777777777, -18446744073709551615L)
def test_main():
test_support.run_unittest(TextHexOct)
if __name__ == "__main__":
test_main()

View File

@ -0,0 +1,191 @@
"""Test correct treatment of hex/oct constants.
This is complex because of changes due to PEP 237.
"""
import unittest
from test import test_support
import warnings
warnings.filterwarnings("ignore", "hex/oct constants", FutureWarning,
"<string>")
class TextHexOctBin(unittest.TestCase):
def test_hex_baseline(self):
# A few upper/lowercase tests
self.assertEqual(0x0, 0X0)
self.assertEqual(0x1, 0X1)
self.assertEqual(0x123456789abcdef, 0X123456789abcdef)
# Baseline tests
self.assertEqual(0x0, 0)
self.assertEqual(0x10, 16)
self.assertEqual(0x7fffffff, 2147483647)
self.assertEqual(0x7fffffffffffffff, 9223372036854775807)
# Ditto with a minus sign and parentheses
self.assertEqual(-(0x0), 0)
self.assertEqual(-(0x10), -16)
self.assertEqual(-(0x7fffffff), -2147483647)
self.assertEqual(-(0x7fffffffffffffff), -9223372036854775807)
# Ditto with a minus sign and NO parentheses
self.assertEqual(-0x0, 0)
self.assertEqual(-0x10, -16)
self.assertEqual(-0x7fffffff, -2147483647)
self.assertEqual(-0x7fffffffffffffff, -9223372036854775807)
def test_hex_unsigned(self):
# Positive constants
self.assertEqual(0x80000000, 2147483648L)
self.assertEqual(0xffffffff, 4294967295L)
# Ditto with a minus sign and parentheses
self.assertEqual(-(0x80000000), -2147483648L)
self.assertEqual(-(0xffffffff), -4294967295L)
# Ditto with a minus sign and NO parentheses
# This failed in Python 2.2 through 2.2.2 and in 2.3a1
self.assertEqual(-0x80000000, -2147483648L)
self.assertEqual(-0xffffffff, -4294967295L)
# Positive constants
self.assertEqual(0x8000000000000000, 9223372036854775808L)
self.assertEqual(0xffffffffffffffff, 18446744073709551615L)
# Ditto with a minus sign and parentheses
self.assertEqual(-(0x8000000000000000), -9223372036854775808L)
self.assertEqual(-(0xffffffffffffffff), -18446744073709551615L)
# Ditto with a minus sign and NO parentheses
# This failed in Python 2.2 through 2.2.2 and in 2.3a1
self.assertEqual(-0x8000000000000000, -9223372036854775808L)
self.assertEqual(-0xffffffffffffffff, -18446744073709551615L)
def test_oct_baseline(self):
# Baseline tests
self.assertEqual(00, 0)
self.assertEqual(020, 16)
self.assertEqual(017777777777, 2147483647)
self.assertEqual(0777777777777777777777, 9223372036854775807)
# Ditto with a minus sign and parentheses
self.assertEqual(-(00), 0)
self.assertEqual(-(020), -16)
self.assertEqual(-(017777777777), -2147483647)
self.assertEqual(-(0777777777777777777777), -9223372036854775807)
# Ditto with a minus sign and NO parentheses
self.assertEqual(-00, 0)
self.assertEqual(-020, -16)
self.assertEqual(-017777777777, -2147483647)
self.assertEqual(-0777777777777777777777, -9223372036854775807)
def test_oct_baseline_new(self):
# A few upper/lowercase tests
self.assertEqual(0o0, 0O0)
self.assertEqual(0o1, 0O1)
self.assertEqual(0o1234567, 0O1234567)
# Baseline tests
self.assertEqual(0o0, 0)
self.assertEqual(0o20, 16)
self.assertEqual(0o17777777777, 2147483647)
self.assertEqual(0o777777777777777777777, 9223372036854775807)
# Ditto with a minus sign and parentheses
self.assertEqual(-(0o0), 0)
self.assertEqual(-(0o20), -16)
self.assertEqual(-(0o17777777777), -2147483647)
self.assertEqual(-(0o777777777777777777777), -9223372036854775807)
# Ditto with a minus sign and NO parentheses
self.assertEqual(-0o0, 0)
self.assertEqual(-0o20, -16)
self.assertEqual(-0o17777777777, -2147483647)
self.assertEqual(-0o777777777777777777777, -9223372036854775807)
def test_oct_unsigned(self):
# Positive constants
self.assertEqual(020000000000, 2147483648L)
self.assertEqual(037777777777, 4294967295L)
# Ditto with a minus sign and parentheses
self.assertEqual(-(020000000000), -2147483648L)
self.assertEqual(-(037777777777), -4294967295L)
# Ditto with a minus sign and NO parentheses
# This failed in Python 2.2 through 2.2.2 and in 2.3a1
self.assertEqual(-020000000000, -2147483648L)
self.assertEqual(-037777777777, -4294967295L)
# Positive constants
self.assertEqual(01000000000000000000000, 9223372036854775808L)
self.assertEqual(01777777777777777777777, 18446744073709551615L)
# Ditto with a minus sign and parentheses
self.assertEqual(-(01000000000000000000000), -9223372036854775808L)
self.assertEqual(-(01777777777777777777777), -18446744073709551615L)
# Ditto with a minus sign and NO parentheses
# This failed in Python 2.2 through 2.2.2 and in 2.3a1
self.assertEqual(-01000000000000000000000, -9223372036854775808L)
self.assertEqual(-01777777777777777777777, -18446744073709551615L)
def test_oct_unsigned_new(self):
# Positive constants
self.assertEqual(0o20000000000, 2147483648L)
self.assertEqual(0o37777777777, 4294967295L)
# Ditto with a minus sign and parentheses
self.assertEqual(-(0o20000000000), -2147483648L)
self.assertEqual(-(0o37777777777), -4294967295L)
# Ditto with a minus sign and NO parentheses
# This failed in Python 2.2 through 2.2.2 and in 2.3a1
self.assertEqual(-0o20000000000, -2147483648L)
self.assertEqual(-0o37777777777, -4294967295L)
# Positive constants
self.assertEqual(0o1000000000000000000000, 9223372036854775808L)
self.assertEqual(0o1777777777777777777777, 18446744073709551615L)
# Ditto with a minus sign and parentheses
self.assertEqual(-(0o1000000000000000000000), -9223372036854775808L)
self.assertEqual(-(0o1777777777777777777777), -18446744073709551615L)
# Ditto with a minus sign and NO parentheses
# This failed in Python 2.2 through 2.2.2 and in 2.3a1
self.assertEqual(-0o1000000000000000000000, -9223372036854775808L)
self.assertEqual(-0o1777777777777777777777, -18446744073709551615L)
def test_bin_baseline(self):
# A few upper/lowercase tests
self.assertEqual(0b0, 0B0)
self.assertEqual(0b1, 0B1)
self.assertEqual(0b10101010101, 0B10101010101)
# Baseline tests
self.assertEqual(0b0, 0)
self.assertEqual(0b10000, 16)
self.assertEqual(0b1111111111111111111111111111111, 2147483647)
self.assertEqual(0b111111111111111111111111111111111111111111111111111111111111111, 9223372036854775807)
# Ditto with a minus sign and parentheses
self.assertEqual(-(0b0), 0)
self.assertEqual(-(0b10000), -16)
self.assertEqual(-(0b1111111111111111111111111111111), -2147483647)
self.assertEqual(-(0b111111111111111111111111111111111111111111111111111111111111111), -9223372036854775807)
# Ditto with a minus sign and NO parentheses
self.assertEqual(-0b0, 0)
self.assertEqual(-0b10000, -16)
self.assertEqual(-0b1111111111111111111111111111111, -2147483647)
self.assertEqual(-0b111111111111111111111111111111111111111111111111111111111111111, -9223372036854775807)
def test_bin_unsigned(self):
# Positive constants
self.assertEqual(0b10000000000000000000000000000000, 2147483648L)
self.assertEqual(0b11111111111111111111111111111111, 4294967295L)
# Ditto with a minus sign and parentheses
self.assertEqual(-(0b10000000000000000000000000000000), -2147483648L)
self.assertEqual(-(0b11111111111111111111111111111111), -4294967295L)
# Ditto with a minus sign and NO parentheses
# This failed in Python 2.2 through 2.2.2 and in 2.3a1
self.assertEqual(-0b10000000000000000000000000000000, -2147483648L)
self.assertEqual(-0b11111111111111111111111111111111, -4294967295L)
# Positive constants
self.assertEqual(0b1000000000000000000000000000000000000000000000000000000000000000, 9223372036854775808L)
self.assertEqual(0b1111111111111111111111111111111111111111111111111111111111111111, 18446744073709551615L)
# Ditto with a minus sign and parentheses
self.assertEqual(-(0b1000000000000000000000000000000000000000000000000000000000000000), -9223372036854775808L)
self.assertEqual(-(0b1111111111111111111111111111111111111111111111111111111111111111), -18446744073709551615L)
# Ditto with a minus sign and NO parentheses
# This failed in Python 2.2 through 2.2.2 and in 2.3a1
self.assertEqual(-0b1000000000000000000000000000000000000000000000000000000000000000, -9223372036854775808L)
self.assertEqual(-0b1111111111111111111111111111111111111111111111111111111111111111, -18446744073709551615L)
def test_main():
test_support.run_unittest(TextHexOctBin)
if __name__ == "__main__":
test_main()

View File

@ -1465,14 +1465,27 @@ PyLong_FromString(char *str, char **pend, int base)
while (*str != '\0' && isspace(Py_CHARMASK(*str)))
str++;
if (base == 0) {
/* No base given. Deduce the base from the contents
of the string */
if (str[0] != '0')
base = 10;
else if (str[1] == 'x' || str[1] == 'X')
base = 16;
else if (str[1] == 'o' || str[1] == 'O')
base = 8;
else if (str[1] == 'b' || str[1] == 'B')
base = 2;
else
/* "old" (C-style) octal literal, still valid in
2.x, although illegal in 3.x */
base = 8;
}
if (base == 16 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
/* Whether or not we were deducing the base, skip leading chars
as needed */
if (str[0] == '0' &&
((base == 16 && (str[1] == 'x' || str[1] == 'X')) ||
(base == 8 && (str[1] == 'o' || str[1] == 'O')) ||
(base == 2 && (str[1] == 'b' || str[1] == 'B'))))
str += 2;
start = str;

View File

@ -1335,7 +1335,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
/* Number */
if (isdigit(c)) {
if (c == '0') {
/* Hex or octal -- maybe. */
/* Hex, octal or binary -- maybe. */
c = tok_nextc(tok);
if (c == '.')
goto fraction;
@ -1356,6 +1356,30 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
c = tok_nextc(tok);
} while (isxdigit(c));
}
else if (c == 'o' || c == 'O') {
/* Octal */
c = tok_nextc(tok);
if (c < '0' || c > '8') {
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
}
do {
c = tok_nextc(tok);
} while ('0' <= c && c < '8');
}
else if (c == 'b' || c == 'B') {
/* Binary */
c = tok_nextc(tok);
if (c != '0' && c != '1') {
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
}
do {
c = tok_nextc(tok);
} while (c == '0' || c == '1');
}
else {
int found_decimal = 0;
/* Octal; c is first char of it */

View File

@ -83,9 +83,9 @@ static int digitlimit[] = {
** This is a general purpose routine for converting
** an ascii string to an integer in an arbitrary base.
** Leading white space is ignored. If 'base' is zero
** it looks for a leading 0, 0x or 0X to tell which
** base. If these are absent it defaults to 10.
** Base must be 0 or between 2 and 36 (inclusive).
** it looks for a leading 0, 0b, 0B, 0o, 0O, 0x or 0X
** to tell which base. If these are absent it defaults
** to 10. Base must be 0 or between 2 and 36 (inclusive).
** If 'ptr' is non-NULL it will contain a pointer to
** the end of the scan.
** Errors due to bad pointers will probably result in
@ -104,7 +104,7 @@ PyOS_strtoul(register char *str, char **ptr, int base)
/* check for leading 0 or 0x for auto-base or base 16 */
switch (base) {
case 0: /* look for leading 0, 0x or 0X */
case 0: /* look for leading 0, 0b, 0o or 0x */
if (*str == '0') {
++str;
if (*str == 'x' || *str == 'X') {
@ -116,14 +116,62 @@ PyOS_strtoul(register char *str, char **ptr, int base)
}
++str;
base = 16;
}
else
} else if (*str == 'o' || *str == 'O') {
/* there must be at least one digit after 0o */
if (_PyLong_DigitValue[Py_CHARMASK(str[1])] >= 8) {
if (ptr)
*ptr = str;
return 0;
}
++str;
base = 8;
} else if (*str == 'b' || *str == 'B') {
/* there must be at least one digit after 0b */
if (_PyLong_DigitValue[Py_CHARMASK(str[1])] >= 2) {
if (ptr)
*ptr = str;
return 0;
}
++str;
base = 2;
} else {
base = 8;
}
}
else
base = 10;
break;
case 2: /* skip leading 0b or 0B */
if (*str == '0') {
++str;
if (*str == 'b' || *str == 'B') {
/* there must be at least one digit after 0b */
if (_PyLong_DigitValue[Py_CHARMASK(str[1])] >= 2) {
if (ptr)
*ptr = str;
return 0;
}
++str;
}
}
break;
case 8: /* skip leading 0o or 0O */
if (*str == '0') {
++str;
if (*str == 'o' || *str == 'O') {
/* there must be at least one digit after 0o */
if (_PyLong_DigitValue[Py_CHARMASK(str[1])] >= 8) {
if (ptr)
*ptr = str;
return 0;
}
++str;
}
}
break;
case 16: /* skip leading 0x or 0X */
if (*str == '0') {
++str;