mirror of https://github.com/python/cpython
Issue #26331: Implement the parsing part of PEP 515.
Thanks to Georg Brandl for the patch.
This commit is contained in:
parent
ee73a65745
commit
a721abac29
|
@ -345,7 +345,7 @@ Decimal objects
|
|||
*value* can be an integer, string, tuple, :class:`float`, or another :class:`Decimal`
|
||||
object. If no *value* is given, returns ``Decimal('0')``. If *value* is a
|
||||
string, it should conform to the decimal numeric string syntax after leading
|
||||
and trailing whitespace characters are removed::
|
||||
and trailing whitespace characters, as well as underscores throughout, are removed::
|
||||
|
||||
sign ::= '+' | '-'
|
||||
digit ::= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'
|
||||
|
@ -394,6 +394,10 @@ Decimal objects
|
|||
:class:`float` arguments raise an exception if the :exc:`FloatOperation`
|
||||
trap is set. By default the trap is off.
|
||||
|
||||
.. versionchanged:: 3.6
|
||||
Underscores are allowed for grouping, as with integral and floating-point
|
||||
literals in code.
|
||||
|
||||
Decimal floating point objects share many properties with the other built-in
|
||||
numeric types such as :class:`float` and :class:`int`. All of the usual math
|
||||
operations and special methods apply. Likewise, decimal objects can be
|
||||
|
@ -1075,8 +1079,8 @@ In addition to the three supplied contexts, new contexts can be created with the
|
|||
Decimal('4.44')
|
||||
|
||||
This method implements the to-number operation of the IBM specification.
|
||||
If the argument is a string, no leading or trailing whitespace is
|
||||
permitted.
|
||||
If the argument is a string, no leading or trailing whitespace or
|
||||
underscores are permitted.
|
||||
|
||||
.. method:: create_decimal_from_float(f)
|
||||
|
||||
|
|
|
@ -271,6 +271,9 @@ are always available. They are listed here in alphabetical order.
|
|||
|
||||
The complex type is described in :ref:`typesnumeric`.
|
||||
|
||||
.. versionchanged:: 3.6
|
||||
Grouping digits with underscores as in code literals is allowed.
|
||||
|
||||
|
||||
.. function:: delattr(object, name)
|
||||
|
||||
|
@ -531,11 +534,14 @@ are always available. They are listed here in alphabetical order.
|
|||
|
||||
The float type is described in :ref:`typesnumeric`.
|
||||
|
||||
.. index::
|
||||
.. versionchanged:: 3.6
|
||||
Grouping digits with underscores as in code literals is allowed.
|
||||
|
||||
|
||||
.. index::
|
||||
single: __format__
|
||||
single: string; format() (built-in function)
|
||||
|
||||
|
||||
.. function:: format(value[, format_spec])
|
||||
|
||||
Convert a *value* to a "formatted" representation, as controlled by
|
||||
|
@ -702,6 +708,10 @@ are always available. They are listed here in alphabetical order.
|
|||
:meth:`base.__int__ <object.__int__>` instead of :meth:`base.__index__
|
||||
<object.__index__>`.
|
||||
|
||||
.. versionchanged:: 3.6
|
||||
Grouping digits with underscores as in code literals is allowed.
|
||||
|
||||
|
||||
.. function:: isinstance(object, classinfo)
|
||||
|
||||
Return true if the *object* argument is an instance of the *classinfo*
|
||||
|
|
|
@ -721,20 +721,24 @@ Integer literals
|
|||
Integer literals are described by the following lexical definitions:
|
||||
|
||||
.. productionlist::
|
||||
integer: `decimalinteger` | `octinteger` | `hexinteger` | `bininteger`
|
||||
decimalinteger: `nonzerodigit` `digit`* | "0"+
|
||||
integer: `decinteger` | `bininteger` | `octinteger` | `hexinteger`
|
||||
decinteger: `nonzerodigit` (["_"] `digit`)* | "0"+ (["_"] "0")*
|
||||
bininteger: "0" ("b" | "B") (["_"] `bindigit`)+
|
||||
octinteger: "0" ("o" | "O") (["_"] `octdigit`)+
|
||||
hexinteger: "0" ("x" | "X") (["_"] `hexdigit`)+
|
||||
nonzerodigit: "1"..."9"
|
||||
digit: "0"..."9"
|
||||
octinteger: "0" ("o" | "O") `octdigit`+
|
||||
hexinteger: "0" ("x" | "X") `hexdigit`+
|
||||
bininteger: "0" ("b" | "B") `bindigit`+
|
||||
bindigit: "0" | "1"
|
||||
octdigit: "0"..."7"
|
||||
hexdigit: `digit` | "a"..."f" | "A"..."F"
|
||||
bindigit: "0" | "1"
|
||||
|
||||
There is no limit for the length of integer literals apart from what can be
|
||||
stored in available memory.
|
||||
|
||||
Underscores are ignored for determining the numeric value of the literal. They
|
||||
can be used to group digits for enhanced readability. One underscore can occur
|
||||
between digits, and after base specifiers like ``0x``.
|
||||
|
||||
Note that leading zeros in a non-zero decimal number are not allowed. This is
|
||||
for disambiguation with C-style octal literals, which Python used before version
|
||||
3.0.
|
||||
|
@ -743,6 +747,10 @@ Some examples of integer literals::
|
|||
|
||||
7 2147483647 0o177 0b100110111
|
||||
3 79228162514264337593543950336 0o377 0xdeadbeef
|
||||
100_000_000_000 0b_1110_0101
|
||||
|
||||
.. versionchanged:: 3.6
|
||||
Underscores are now allowed for grouping purposes in literals.
|
||||
|
||||
|
||||
.. _floating:
|
||||
|
@ -754,23 +762,28 @@ Floating point literals are described by the following lexical definitions:
|
|||
|
||||
.. productionlist::
|
||||
floatnumber: `pointfloat` | `exponentfloat`
|
||||
pointfloat: [`intpart`] `fraction` | `intpart` "."
|
||||
exponentfloat: (`intpart` | `pointfloat`) `exponent`
|
||||
intpart: `digit`+
|
||||
fraction: "." `digit`+
|
||||
exponent: ("e" | "E") ["+" | "-"] `digit`+
|
||||
pointfloat: [`digitpart`] `fraction` | `digitpart` "."
|
||||
exponentfloat: (`digitpart` | `pointfloat`) `exponent`
|
||||
digitpart: `digit` (["_"] `digit`)*
|
||||
fraction: "." `digitpart`
|
||||
exponent: ("e" | "E") ["+" | "-"] `digitpart`
|
||||
|
||||
Note that the integer and exponent parts are always interpreted using radix 10.
|
||||
For example, ``077e010`` is legal, and denotes the same number as ``77e10``. The
|
||||
allowed range of floating point literals is implementation-dependent. Some
|
||||
examples of floating point literals::
|
||||
allowed range of floating point literals is implementation-dependent. As in
|
||||
integer literals, underscores are supported for digit grouping.
|
||||
|
||||
3.14 10. .001 1e100 3.14e-10 0e0
|
||||
Some examples of floating point literals::
|
||||
|
||||
3.14 10. .001 1e100 3.14e-10 0e0 3.14_15_93
|
||||
|
||||
Note that numeric literals do not include a sign; a phrase like ``-1`` is
|
||||
actually an expression composed of the unary operator ``-`` and the literal
|
||||
``1``.
|
||||
|
||||
.. versionchanged:: 3.6
|
||||
Underscores are now allowed for grouping purposes in literals.
|
||||
|
||||
|
||||
.. _imaginary:
|
||||
|
||||
|
@ -780,7 +793,7 @@ Imaginary literals
|
|||
Imaginary literals are described by the following lexical definitions:
|
||||
|
||||
.. productionlist::
|
||||
imagnumber: (`floatnumber` | `intpart`) ("j" | "J")
|
||||
imagnumber: (`floatnumber` | `digitpart`) ("j" | "J")
|
||||
|
||||
An imaginary literal yields a complex number with a real part of 0.0. Complex
|
||||
numbers are represented as a pair of floating point numbers and have the same
|
||||
|
@ -788,7 +801,7 @@ restrictions on their range. To create a complex number with a nonzero real
|
|||
part, add a floating point number to it, e.g., ``(3+4j)``. Some examples of
|
||||
imaginary literals::
|
||||
|
||||
3.14j 10.j 10j .001j 1e100j 3.14e-10j
|
||||
3.14j 10.j 10j .001j 1e100j 3.14e-10j 3.14_15_93j
|
||||
|
||||
|
||||
.. _operators:
|
||||
|
|
|
@ -124,6 +124,29 @@ Windows improvements:
|
|||
New Features
|
||||
============
|
||||
|
||||
.. _pep-515:
|
||||
|
||||
PEP 515: Underscores in Numeric Literals
|
||||
========================================
|
||||
|
||||
Prior to PEP 515, there was no support for writing long numeric
|
||||
literals with some form of separator to improve readability. For
|
||||
instance, how big is ``1000000000000000```? With :pep:`515`, though,
|
||||
you can use underscores to separate digits as desired to make numeric
|
||||
literals easier to read: ``1_000_000_000_000_000``. Underscores can be
|
||||
used with other numeric literals beyond integers, e.g.
|
||||
``0x_FF_FF_FF_FF``.
|
||||
|
||||
Single underscores are allowed between digits and after any base
|
||||
specifier. More than a single underscore in a row, leading, or
|
||||
trailing underscores are not allowed.
|
||||
|
||||
.. seealso::
|
||||
|
||||
:pep:`523` - Underscores in Numeric Literals
|
||||
PEP written by Georg Brandl & Serhiy Storchaka.
|
||||
|
||||
|
||||
.. _pep-523:
|
||||
|
||||
PEP 523: Adding a frame evaluation API to CPython
|
||||
|
|
|
@ -19,6 +19,10 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
|
|||
int *type);
|
||||
|
||||
#ifndef Py_LIMITED_API
|
||||
PyAPI_FUNC(PyObject *) _Py_string_to_number_with_underscores(
|
||||
const char *str, Py_ssize_t len, const char *what, PyObject *obj, void *arg,
|
||||
PyObject *(*innerfunc)(const char *, Py_ssize_t, void *));
|
||||
|
||||
PyAPI_FUNC(double) _Py_parse_inf_or_nan(const char *p, char **endptr);
|
||||
#endif
|
||||
|
||||
|
|
|
@ -589,7 +589,7 @@ class Decimal(object):
|
|||
# From a string
|
||||
# REs insist on real strings, so we can too.
|
||||
if isinstance(value, str):
|
||||
m = _parser(value.strip())
|
||||
m = _parser(value.strip().replace("_", ""))
|
||||
if m is None:
|
||||
if context is None:
|
||||
context = getcontext()
|
||||
|
@ -4125,7 +4125,7 @@ class Context(object):
|
|||
This will make it round up for that operation.
|
||||
"""
|
||||
rounding = self.rounding
|
||||
self.rounding= type
|
||||
self.rounding = type
|
||||
return rounding
|
||||
|
||||
def create_decimal(self, num='0'):
|
||||
|
@ -4134,10 +4134,10 @@ class Context(object):
|
|||
This method implements the to-number operation of the
|
||||
IBM Decimal specification."""
|
||||
|
||||
if isinstance(num, str) and num != num.strip():
|
||||
if isinstance(num, str) and (num != num.strip() or '_' in num):
|
||||
return self._raise_error(ConversionSyntax,
|
||||
"no trailing or leading whitespace is "
|
||||
"permitted.")
|
||||
"trailing or leading whitespace and "
|
||||
"underscores are not permitted.")
|
||||
|
||||
d = Decimal(num, context=self)
|
||||
if d._isnan() and len(d._int) > self.prec - self.clamp:
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
import unittest
|
||||
from test import support
|
||||
from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
|
||||
INVALID_UNDERSCORE_LITERALS)
|
||||
|
||||
from random import random
|
||||
from math import atan2, isnan, copysign
|
||||
|
@ -377,6 +379,18 @@ class ComplexTest(unittest.TestCase):
|
|||
self.assertAlmostEqual(complex(complex1(1j)), 2j)
|
||||
self.assertRaises(TypeError, complex, complex2(1j))
|
||||
|
||||
def test_underscores(self):
|
||||
# check underscores
|
||||
for lit in VALID_UNDERSCORE_LITERALS:
|
||||
if not any(ch in lit for ch in 'xXoObB'):
|
||||
self.assertEqual(complex(lit), eval(lit))
|
||||
self.assertEqual(complex(lit), complex(lit.replace('_', '')))
|
||||
for lit in INVALID_UNDERSCORE_LITERALS:
|
||||
if lit in ('0_7', '09_99'): # octals are not recognized here
|
||||
continue
|
||||
if not any(ch in lit for ch in 'xXoObB'):
|
||||
self.assertRaises(ValueError, complex, lit)
|
||||
|
||||
def test_hash(self):
|
||||
for x in range(-30, 30):
|
||||
self.assertEqual(hash(x), hash(complex(x, 0)))
|
||||
|
|
|
@ -554,6 +554,10 @@ class ExplicitConstructionTest(unittest.TestCase):
|
|||
self.assertEqual(str(Decimal(' -7.89')), '-7.89')
|
||||
self.assertEqual(str(Decimal(" 3.45679 ")), '3.45679')
|
||||
|
||||
# underscores
|
||||
self.assertEqual(str(Decimal('1_3.3e4_0')), '1.33E+41')
|
||||
self.assertEqual(str(Decimal('1_0_0_0')), '1000')
|
||||
|
||||
# unicode whitespace
|
||||
for lead in ["", ' ', '\u00a0', '\u205f']:
|
||||
for trail in ["", ' ', '\u00a0', '\u205f']:
|
||||
|
@ -578,6 +582,9 @@ class ExplicitConstructionTest(unittest.TestCase):
|
|||
# embedded NUL
|
||||
self.assertRaises(InvalidOperation, Decimal, "12\u00003")
|
||||
|
||||
# underscores don't prevent errors
|
||||
self.assertRaises(InvalidOperation, Decimal, "1_2_\u00003")
|
||||
|
||||
@cpython_only
|
||||
def test_from_legacy_strings(self):
|
||||
import _testcapi
|
||||
|
@ -772,6 +779,9 @@ class ExplicitConstructionTest(unittest.TestCase):
|
|||
self.assertRaises(InvalidOperation, nc.create_decimal, "xyz")
|
||||
self.assertRaises(ValueError, nc.create_decimal, (1, "xyz", -25))
|
||||
self.assertRaises(TypeError, nc.create_decimal, "1234", "5678")
|
||||
# no whitespace and underscore stripping is done with this method
|
||||
self.assertRaises(InvalidOperation, nc.create_decimal, " 1234")
|
||||
self.assertRaises(InvalidOperation, nc.create_decimal, "12_34")
|
||||
|
||||
# too many NaN payload digits
|
||||
nc.prec = 3
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
|
||||
import fractions
|
||||
import operator
|
||||
import os
|
||||
|
@ -9,6 +8,8 @@ import time
|
|||
import unittest
|
||||
|
||||
from test import support
|
||||
from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
|
||||
INVALID_UNDERSCORE_LITERALS)
|
||||
from math import isinf, isnan, copysign, ldexp
|
||||
|
||||
INF = float("inf")
|
||||
|
@ -60,6 +61,27 @@ class GeneralFloatCases(unittest.TestCase):
|
|||
float(b'.' + b'1'*1000)
|
||||
float('.' + '1'*1000)
|
||||
|
||||
def test_underscores(self):
|
||||
for lit in VALID_UNDERSCORE_LITERALS:
|
||||
if not any(ch in lit for ch in 'jJxXoObB'):
|
||||
self.assertEqual(float(lit), eval(lit))
|
||||
self.assertEqual(float(lit), float(lit.replace('_', '')))
|
||||
for lit in INVALID_UNDERSCORE_LITERALS:
|
||||
if lit in ('0_7', '09_99'): # octals are not recognized here
|
||||
continue
|
||||
if not any(ch in lit for ch in 'jJxXoObB'):
|
||||
self.assertRaises(ValueError, float, lit)
|
||||
# Additional test cases; nan and inf are never valid as literals,
|
||||
# only in the float() constructor, but we don't allow underscores
|
||||
# in or around them.
|
||||
self.assertRaises(ValueError, float, '_NaN')
|
||||
self.assertRaises(ValueError, float, 'Na_N')
|
||||
self.assertRaises(ValueError, float, 'IN_F')
|
||||
self.assertRaises(ValueError, float, '-_INF')
|
||||
self.assertRaises(ValueError, float, '-INF_')
|
||||
# Check that we handle bytes values correctly.
|
||||
self.assertRaises(ValueError, float, b'0_.\xff9')
|
||||
|
||||
def test_non_numeric_input_types(self):
|
||||
# Test possible non-numeric types for the argument x, including
|
||||
# subclasses of the explicitly documented accepted types.
|
||||
|
|
|
@ -16,6 +16,87 @@ from collections import ChainMap
|
|||
from test import ann_module2
|
||||
import test
|
||||
|
||||
# These are shared with test_tokenize and other test modules.
|
||||
#
|
||||
# Note: since several test cases filter out floats by looking for "e" and ".",
|
||||
# don't add hexadecimal literals that contain "e" or "E".
|
||||
VALID_UNDERSCORE_LITERALS = [
|
||||
'0_0_0',
|
||||
'4_2',
|
||||
'1_0000_0000',
|
||||
'0b1001_0100',
|
||||
'0xffff_ffff',
|
||||
'0o5_7_7',
|
||||
'1_00_00.5',
|
||||
'1_00_00.5e5',
|
||||
'1_00_00e5_1',
|
||||
'1e1_0',
|
||||
'.1_4',
|
||||
'.1_4e1',
|
||||
'0b_0',
|
||||
'0x_f',
|
||||
'0o_5',
|
||||
'1_00_00j',
|
||||
'1_00_00.5j',
|
||||
'1_00_00e5_1j',
|
||||
'.1_4j',
|
||||
'(1_2.5+3_3j)',
|
||||
'(.5_6j)',
|
||||
]
|
||||
INVALID_UNDERSCORE_LITERALS = [
|
||||
# Trailing underscores:
|
||||
'0_',
|
||||
'42_',
|
||||
'1.4j_',
|
||||
'0x_',
|
||||
'0b1_',
|
||||
'0xf_',
|
||||
'0o5_',
|
||||
'0 if 1_Else 1',
|
||||
# Underscores in the base selector:
|
||||
'0_b0',
|
||||
'0_xf',
|
||||
'0_o5',
|
||||
# Old-style octal, still disallowed:
|
||||
'0_7',
|
||||
'09_99',
|
||||
# Multiple consecutive underscores:
|
||||
'4_______2',
|
||||
'0.1__4',
|
||||
'0.1__4j',
|
||||
'0b1001__0100',
|
||||
'0xffff__ffff',
|
||||
'0x___',
|
||||
'0o5__77',
|
||||
'1e1__0',
|
||||
'1e1__0j',
|
||||
# Underscore right before a dot:
|
||||
'1_.4',
|
||||
'1_.4j',
|
||||
# Underscore right after a dot:
|
||||
'1._4',
|
||||
'1._4j',
|
||||
'._5',
|
||||
'._5j',
|
||||
# Underscore right after a sign:
|
||||
'1.0e+_1',
|
||||
'1.0e+_1j',
|
||||
# Underscore right before j:
|
||||
'1.4_j',
|
||||
'1.4e5_j',
|
||||
# Underscore right before e:
|
||||
'1_e1',
|
||||
'1.4_e1',
|
||||
'1.4_e1j',
|
||||
# Underscore right after e:
|
||||
'1e_1',
|
||||
'1.4e_1',
|
||||
'1.4e_1j',
|
||||
# Complex cases with parens:
|
||||
'(1+1.5_j_)',
|
||||
'(1+1.5_j)',
|
||||
]
|
||||
|
||||
|
||||
class TokenTests(unittest.TestCase):
|
||||
|
||||
|
@ -95,6 +176,14 @@ class TokenTests(unittest.TestCase):
|
|||
self.assertEqual(1 if 0else 0, 0)
|
||||
self.assertRaises(SyntaxError, eval, "0 if 1Else 0")
|
||||
|
||||
def test_underscore_literals(self):
|
||||
for lit in VALID_UNDERSCORE_LITERALS:
|
||||
self.assertEqual(eval(lit), eval(lit.replace('_', '')))
|
||||
for lit in INVALID_UNDERSCORE_LITERALS:
|
||||
self.assertRaises(SyntaxError, eval, lit)
|
||||
# Sanity check: no literal begins with an underscore
|
||||
self.assertRaises(NameError, eval, "_0")
|
||||
|
||||
def test_string_literals(self):
|
||||
x = ''; y = ""; self.assertTrue(len(x) == 0 and x == y)
|
||||
x = '\''; y = "'"; self.assertTrue(len(x) == 1 and x == y and ord(x) == 39)
|
||||
|
|
|
@ -2,6 +2,8 @@ import sys
|
|||
|
||||
import unittest
|
||||
from test import support
|
||||
from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
|
||||
INVALID_UNDERSCORE_LITERALS)
|
||||
|
||||
L = [
|
||||
('0', 0),
|
||||
|
@ -212,6 +214,25 @@ class IntTestCases(unittest.TestCase):
|
|||
self.assertEqual(int('2br45qc', 35), 4294967297)
|
||||
self.assertEqual(int('1z141z5', 36), 4294967297)
|
||||
|
||||
def test_underscores(self):
|
||||
for lit in VALID_UNDERSCORE_LITERALS:
|
||||
if any(ch in lit for ch in '.eEjJ'):
|
||||
continue
|
||||
self.assertEqual(int(lit, 0), eval(lit))
|
||||
self.assertEqual(int(lit, 0), int(lit.replace('_', ''), 0))
|
||||
for lit in INVALID_UNDERSCORE_LITERALS:
|
||||
if any(ch in lit for ch in '.eEjJ'):
|
||||
continue
|
||||
self.assertRaises(ValueError, int, lit, 0)
|
||||
# Additional test cases with bases != 0, only for the constructor:
|
||||
self.assertEqual(int("1_00", 3), 9)
|
||||
self.assertEqual(int("0_100"), 100) # not valid as a literal!
|
||||
self.assertEqual(int(b"1_00"), 100) # byte underscore
|
||||
self.assertRaises(ValueError, int, "_100")
|
||||
self.assertRaises(ValueError, int, "+_100")
|
||||
self.assertRaises(ValueError, int, "1__00")
|
||||
self.assertRaises(ValueError, int, "100_")
|
||||
|
||||
@support.cpython_only
|
||||
def test_small_ints(self):
|
||||
# Bug #3236: Return small longs from PyLong_FromString
|
||||
|
|
|
@ -3,7 +3,9 @@ from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
|
|||
STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
|
||||
open as tokenize_open, Untokenizer)
|
||||
from io import BytesIO
|
||||
from unittest import TestCase, mock, main
|
||||
from unittest import TestCase, mock
|
||||
from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
|
||||
INVALID_UNDERSCORE_LITERALS)
|
||||
import os
|
||||
import token
|
||||
|
||||
|
@ -185,6 +187,21 @@ def k(x):
|
|||
NUMBER '3.14e159' (1, 4) (1, 12)
|
||||
""")
|
||||
|
||||
def test_underscore_literals(self):
|
||||
def number_token(s):
|
||||
f = BytesIO(s.encode('utf-8'))
|
||||
for toktype, token, start, end, line in tokenize(f.readline):
|
||||
if toktype == NUMBER:
|
||||
return token
|
||||
return 'invalid token'
|
||||
for lit in VALID_UNDERSCORE_LITERALS:
|
||||
if '(' in lit:
|
||||
# this won't work with compound complex inputs
|
||||
continue
|
||||
self.assertEqual(number_token(lit), lit)
|
||||
for lit in INVALID_UNDERSCORE_LITERALS:
|
||||
self.assertNotEqual(number_token(lit), lit)
|
||||
|
||||
def test_string(self):
|
||||
# String literals
|
||||
self.check_tokenize("x = ''; y = \"\"", """\
|
||||
|
@ -1529,11 +1546,10 @@ class TestRoundtrip(TestCase):
|
|||
tempdir = os.path.dirname(fn) or os.curdir
|
||||
testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
|
||||
|
||||
# Tokenize is broken on test_unicode_identifiers.py because regular
|
||||
# expressions are broken on the obscure unicode identifiers in it.
|
||||
# *sigh* With roundtrip extended to test the 5-tuple mode of
|
||||
# untokenize, 7 more testfiles fail. Remove them also until the
|
||||
# failure is diagnosed.
|
||||
# Tokenize is broken on test_pep3131.py because regular expressions are
|
||||
# broken on the obscure unicode identifiers in it. *sigh*
|
||||
# With roundtrip extended to test the 5-tuple mode of untokenize,
|
||||
# 7 more testfiles fail. Remove them also until the failure is diagnosed.
|
||||
|
||||
testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))
|
||||
for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):
|
||||
|
@ -1565,4 +1581,4 @@ class TestRoundtrip(TestCase):
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
unittest.main()
|
||||
|
|
|
@ -48,6 +48,7 @@ class TypesTests(unittest.TestCase):
|
|||
def test_float_constructor(self):
|
||||
self.assertRaises(ValueError, float, '')
|
||||
self.assertRaises(ValueError, float, '5\0')
|
||||
self.assertRaises(ValueError, float, '5_5\0')
|
||||
|
||||
def test_zero_division(self):
|
||||
try: 5.0 / 0.0
|
||||
|
|
|
@ -120,16 +120,17 @@ Comment = r'#[^\r\n]*'
|
|||
Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
|
||||
Name = r'\w+'
|
||||
|
||||
Hexnumber = r'0[xX][0-9a-fA-F]+'
|
||||
Binnumber = r'0[bB][01]+'
|
||||
Octnumber = r'0[oO][0-7]+'
|
||||
Decnumber = r'(?:0+|[1-9][0-9]*)'
|
||||
Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
|
||||
Binnumber = r'0[bB](?:_?[01])+'
|
||||
Octnumber = r'0[oO](?:_?[0-7])+'
|
||||
Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
|
||||
Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
|
||||
Exponent = r'[eE][-+]?[0-9]+'
|
||||
Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
|
||||
Expfloat = r'[0-9]+' + Exponent
|
||||
Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
|
||||
Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
|
||||
r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
|
||||
Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
|
||||
Floatnumber = group(Pointfloat, Expfloat)
|
||||
Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
|
||||
Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
|
||||
Number = group(Imagnumber, Floatnumber, Intnumber)
|
||||
|
||||
# Return the empty string, plus all of the valid string prefixes.
|
||||
|
|
|
@ -17,6 +17,8 @@ Core and Builtins
|
|||
efficient bytecode. Patch by Demur Rumed, design by Serhiy Storchaka,
|
||||
reviewed by Serhiy Storchaka and Victor Stinner.
|
||||
|
||||
- Issue #26331: Implement tokenizing support for PEP 515. Patch by Georg Brandl.
|
||||
|
||||
- Issue #27999: Make "global after use" a SyntaxError, and ditto for nonlocal.
|
||||
Patch by Ivan Levkivskyi.
|
||||
|
||||
|
@ -2678,7 +2680,7 @@ Library
|
|||
- Issue #24774: Fix docstring in http.server.test. Patch from Chiu-Hsiang Hsu.
|
||||
|
||||
- Issue #21159: Improve message in configparser.InterpolationMissingOptionError.
|
||||
Patch from Å?ukasz Langa.
|
||||
Patch from <EFBFBD>?ukasz Langa.
|
||||
|
||||
- Issue #20362: Honour TestCase.longMessage correctly in assertRegex.
|
||||
Patch from Ilia Kurenkov.
|
||||
|
@ -4606,7 +4608,7 @@ Library
|
|||
Based on patch by Martin Panter.
|
||||
|
||||
- Issue #17293: uuid.getnode() now determines MAC address on AIX using netstat.
|
||||
Based on patch by Aivars KalvÄ?ns.
|
||||
Based on patch by Aivars Kalv<EFBFBD>?ns.
|
||||
|
||||
- Issue #22769: Fixed ttk.Treeview.tag_has() when called without arguments.
|
||||
|
||||
|
|
|
@ -1889,12 +1889,13 @@ is_space(enum PyUnicode_Kind kind, void *data, Py_ssize_t pos)
|
|||
/* Return the ASCII representation of a numeric Unicode string. The numeric
|
||||
string may contain ascii characters in the range [1, 127], any Unicode
|
||||
space and any unicode digit. If strip_ws is true, leading and trailing
|
||||
whitespace is stripped.
|
||||
whitespace is stripped. If ignore_underscores is true, underscores are
|
||||
ignored.
|
||||
|
||||
Return NULL if malloc fails and an empty string if invalid characters
|
||||
are found. */
|
||||
static char *
|
||||
numeric_as_ascii(const PyObject *u, int strip_ws)
|
||||
numeric_as_ascii(const PyObject *u, int strip_ws, int ignore_underscores)
|
||||
{
|
||||
enum PyUnicode_Kind kind;
|
||||
void *data;
|
||||
|
@ -1929,6 +1930,9 @@ numeric_as_ascii(const PyObject *u, int strip_ws)
|
|||
|
||||
for (; j < len; j++) {
|
||||
ch = PyUnicode_READ(kind, data, j);
|
||||
if (ignore_underscores && ch == '_') {
|
||||
continue;
|
||||
}
|
||||
if (0 < ch && ch <= 127) {
|
||||
*cp++ = ch;
|
||||
continue;
|
||||
|
@ -2011,7 +2015,7 @@ PyDecType_FromUnicode(PyTypeObject *type, const PyObject *u,
|
|||
PyObject *dec;
|
||||
char *s;
|
||||
|
||||
s = numeric_as_ascii(u, 0);
|
||||
s = numeric_as_ascii(u, 0, 0);
|
||||
if (s == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
@ -2031,7 +2035,7 @@ PyDecType_FromUnicodeExactWS(PyTypeObject *type, const PyObject *u,
|
|||
PyObject *dec;
|
||||
char *s;
|
||||
|
||||
s = numeric_as_ascii(u, 1);
|
||||
s = numeric_as_ascii(u, 1, 1);
|
||||
if (s == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
@ -759,29 +759,12 @@ static PyMemberDef complex_members[] = {
|
|||
};
|
||||
|
||||
static PyObject *
|
||||
complex_subtype_from_string(PyTypeObject *type, PyObject *v)
|
||||
complex_from_string_inner(const char *s, Py_ssize_t len, void *type)
|
||||
{
|
||||
const char *s, *start;
|
||||
char *end;
|
||||
double x=0.0, y=0.0, z;
|
||||
int got_bracket=0;
|
||||
PyObject *s_buffer = NULL;
|
||||
Py_ssize_t len;
|
||||
|
||||
if (PyUnicode_Check(v)) {
|
||||
s_buffer = _PyUnicode_TransformDecimalAndSpaceToASCII(v);
|
||||
if (s_buffer == NULL)
|
||||
return NULL;
|
||||
s = PyUnicode_AsUTF8AndSize(s_buffer, &len);
|
||||
if (s == NULL)
|
||||
goto error;
|
||||
}
|
||||
else {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"complex() argument must be a string or a number, not '%.200s'",
|
||||
Py_TYPE(v)->tp_name);
|
||||
return NULL;
|
||||
}
|
||||
const char *start;
|
||||
char *end;
|
||||
|
||||
/* position on first nonblank */
|
||||
start = s;
|
||||
|
@ -822,7 +805,7 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
|
|||
if (PyErr_ExceptionMatches(PyExc_ValueError))
|
||||
PyErr_Clear();
|
||||
else
|
||||
goto error;
|
||||
return NULL;
|
||||
}
|
||||
if (end != s) {
|
||||
/* all 4 forms starting with <float> land here */
|
||||
|
@ -835,7 +818,7 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
|
|||
if (PyErr_ExceptionMatches(PyExc_ValueError))
|
||||
PyErr_Clear();
|
||||
else
|
||||
goto error;
|
||||
return NULL;
|
||||
}
|
||||
if (end != s)
|
||||
/* <float><signed-float>j */
|
||||
|
@ -890,17 +873,45 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
|
|||
if (s-start != len)
|
||||
goto parse_error;
|
||||
|
||||
Py_XDECREF(s_buffer);
|
||||
return complex_subtype_from_doubles(type, x, y);
|
||||
return complex_subtype_from_doubles((PyTypeObject *)type, x, y);
|
||||
|
||||
parse_error:
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"complex() arg is a malformed string");
|
||||
error:
|
||||
Py_XDECREF(s_buffer);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
complex_subtype_from_string(PyTypeObject *type, PyObject *v)
|
||||
{
|
||||
const char *s;
|
||||
PyObject *s_buffer = NULL, *result = NULL;
|
||||
Py_ssize_t len;
|
||||
|
||||
if (PyUnicode_Check(v)) {
|
||||
s_buffer = _PyUnicode_TransformDecimalAndSpaceToASCII(v);
|
||||
if (s_buffer == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
s = PyUnicode_AsUTF8AndSize(s_buffer, &len);
|
||||
if (s == NULL) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
else {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"complex() argument must be a string or a number, not '%.200s'",
|
||||
Py_TYPE(v)->tp_name);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
result = _Py_string_to_number_with_underscores(s, len, "complex", v, type,
|
||||
complex_from_string_inner);
|
||||
exit:
|
||||
Py_DECREF(s_buffer);
|
||||
return result;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
complex_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
|
|
|
@ -124,11 +124,43 @@ PyFloat_FromDouble(double fval)
|
|||
return (PyObject *) op;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
float_from_string_inner(const char *s, Py_ssize_t len, void *obj)
|
||||
{
|
||||
double x;
|
||||
const char *end;
|
||||
const char *last = s + len;
|
||||
/* strip space */
|
||||
while (s < last && Py_ISSPACE(*s)) {
|
||||
s++;
|
||||
}
|
||||
|
||||
while (s < last - 1 && Py_ISSPACE(last[-1])) {
|
||||
last--;
|
||||
}
|
||||
|
||||
/* We don't care about overflow or underflow. If the platform
|
||||
* supports them, infinities and signed zeroes (on underflow) are
|
||||
* fine. */
|
||||
x = PyOS_string_to_double(s, (char **)&end, NULL);
|
||||
if (end != last) {
|
||||
PyErr_Format(PyExc_ValueError,
|
||||
"could not convert string to float: "
|
||||
"%R", obj);
|
||||
return NULL;
|
||||
}
|
||||
else if (x == -1.0 && PyErr_Occurred()) {
|
||||
return NULL;
|
||||
}
|
||||
else {
|
||||
return PyFloat_FromDouble(x);
|
||||
}
|
||||
}
|
||||
|
||||
PyObject *
|
||||
PyFloat_FromString(PyObject *v)
|
||||
{
|
||||
const char *s, *last, *end;
|
||||
double x;
|
||||
const char *s;
|
||||
PyObject *s_buffer = NULL;
|
||||
Py_ssize_t len;
|
||||
Py_buffer view = {NULL, NULL};
|
||||
|
@ -169,27 +201,8 @@ PyFloat_FromString(PyObject *v)
|
|||
Py_TYPE(v)->tp_name);
|
||||
return NULL;
|
||||
}
|
||||
last = s + len;
|
||||
/* strip space */
|
||||
while (s < last && Py_ISSPACE(*s))
|
||||
s++;
|
||||
while (s < last - 1 && Py_ISSPACE(last[-1]))
|
||||
last--;
|
||||
/* We don't care about overflow or underflow. If the platform
|
||||
* supports them, infinities and signed zeroes (on underflow) are
|
||||
* fine. */
|
||||
x = PyOS_string_to_double(s, (char **)&end, NULL);
|
||||
if (end != last) {
|
||||
PyErr_Format(PyExc_ValueError,
|
||||
"could not convert string to float: "
|
||||
"%R", v);
|
||||
result = NULL;
|
||||
}
|
||||
else if (x == -1.0 && PyErr_Occurred())
|
||||
result = NULL;
|
||||
else
|
||||
result = PyFloat_FromDouble(x);
|
||||
|
||||
result = _Py_string_to_number_with_underscores(s, len, "float", v, v,
|
||||
float_from_string_inner);
|
||||
PyBuffer_Release(&view);
|
||||
Py_XDECREF(s_buffer);
|
||||
return result;
|
||||
|
|
|
@ -2004,12 +2004,18 @@ unsigned char _PyLong_DigitValue[256] = {
|
|||
* non-digit (which may be *str!). A normalized int is returned.
|
||||
* The point to this routine is that it takes time linear in the number of
|
||||
* string characters.
|
||||
*
|
||||
* Return values:
|
||||
* -1 on syntax error (exception needs to be set, *res is untouched)
|
||||
* 0 else (exception may be set, in that case *res is set to NULL)
|
||||
*/
|
||||
static PyLongObject *
|
||||
long_from_binary_base(const char **str, int base)
|
||||
static int
|
||||
long_from_binary_base(const char **str, int base, PyLongObject **res)
|
||||
{
|
||||
const char *p = *str;
|
||||
const char *start = p;
|
||||
char prev = 0;
|
||||
int digits = 0;
|
||||
int bits_per_char;
|
||||
Py_ssize_t n;
|
||||
PyLongObject *z;
|
||||
|
@ -2019,23 +2025,43 @@ long_from_binary_base(const char **str, int base)
|
|||
|
||||
assert(base >= 2 && base <= 32 && (base & (base - 1)) == 0);
|
||||
n = base;
|
||||
for (bits_per_char = -1; n; ++bits_per_char)
|
||||
for (bits_per_char = -1; n; ++bits_per_char) {
|
||||
n >>= 1;
|
||||
/* n <- total # of bits needed, while setting p to end-of-string */
|
||||
while (_PyLong_DigitValue[Py_CHARMASK(*p)] < base)
|
||||
}
|
||||
/* count digits and set p to end-of-string */
|
||||
while (_PyLong_DigitValue[Py_CHARMASK(*p)] < base || *p == '_') {
|
||||
if (*p == '_') {
|
||||
if (prev == '_') {
|
||||
*str = p - 1;
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
++digits;
|
||||
}
|
||||
prev = *p;
|
||||
++p;
|
||||
}
|
||||
if (prev == '_') {
|
||||
/* Trailing underscore not allowed. */
|
||||
*str = p - 1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
*str = p;
|
||||
/* n <- # of Python digits needed, = ceiling(n/PyLong_SHIFT). */
|
||||
n = (p - start) * bits_per_char + PyLong_SHIFT - 1;
|
||||
n = digits * bits_per_char + PyLong_SHIFT - 1;
|
||||
if (n / bits_per_char < p - start) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"int string too large to convert");
|
||||
return NULL;
|
||||
*res = NULL;
|
||||
return 0;
|
||||
}
|
||||
n = n / PyLong_SHIFT;
|
||||
z = _PyLong_New(n);
|
||||
if (z == NULL)
|
||||
return NULL;
|
||||
if (z == NULL) {
|
||||
*res = NULL;
|
||||
return 0;
|
||||
}
|
||||
/* Read string from right, and fill in int from left; i.e.,
|
||||
* from least to most significant in both.
|
||||
*/
|
||||
|
@ -2043,7 +2069,11 @@ long_from_binary_base(const char **str, int base)
|
|||
bits_in_accum = 0;
|
||||
pdigit = z->ob_digit;
|
||||
while (--p >= start) {
|
||||
int k = (int)_PyLong_DigitValue[Py_CHARMASK(*p)];
|
||||
int k;
|
||||
if (*p == '_') {
|
||||
continue;
|
||||
}
|
||||
k = (int)_PyLong_DigitValue[Py_CHARMASK(*p)];
|
||||
assert(k >= 0 && k < base);
|
||||
accum |= (twodigits)k << bits_in_accum;
|
||||
bits_in_accum += bits_per_char;
|
||||
|
@ -2062,7 +2092,8 @@ long_from_binary_base(const char **str, int base)
|
|||
}
|
||||
while (pdigit - z->ob_digit < n)
|
||||
*pdigit++ = 0;
|
||||
return long_normalize(z);
|
||||
*res = long_normalize(z);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Parses an int from a bytestring. Leading and trailing whitespace will be
|
||||
|
@ -2087,23 +2118,29 @@ PyLong_FromString(const char *str, char **pend, int base)
|
|||
"int() arg 2 must be >= 2 and <= 36");
|
||||
return NULL;
|
||||
}
|
||||
while (*str != '\0' && Py_ISSPACE(Py_CHARMASK(*str)))
|
||||
while (*str != '\0' && Py_ISSPACE(Py_CHARMASK(*str))) {
|
||||
str++;
|
||||
if (*str == '+')
|
||||
}
|
||||
if (*str == '+') {
|
||||
++str;
|
||||
}
|
||||
else if (*str == '-') {
|
||||
++str;
|
||||
sign = -1;
|
||||
}
|
||||
if (base == 0) {
|
||||
if (str[0] != '0')
|
||||
if (str[0] != '0') {
|
||||
base = 10;
|
||||
else if (str[1] == 'x' || str[1] == 'X')
|
||||
}
|
||||
else if (str[1] == 'x' || str[1] == 'X') {
|
||||
base = 16;
|
||||
else if (str[1] == 'o' || str[1] == 'O')
|
||||
}
|
||||
else if (str[1] == 'o' || str[1] == 'O') {
|
||||
base = 8;
|
||||
else if (str[1] == 'b' || str[1] == 'B')
|
||||
}
|
||||
else if (str[1] == 'b' || str[1] == 'B') {
|
||||
base = 2;
|
||||
}
|
||||
else {
|
||||
/* "old" (C-style) octal literal, now invalid.
|
||||
it might still be zero though */
|
||||
|
@ -2114,12 +2151,26 @@ PyLong_FromString(const char *str, char **pend, int base)
|
|||
if (str[0] == '0' &&
|
||||
((base == 16 && (str[1] == 'x' || str[1] == 'X')) ||
|
||||
(base == 8 && (str[1] == 'o' || str[1] == 'O')) ||
|
||||
(base == 2 && (str[1] == 'b' || str[1] == 'B'))))
|
||||
(base == 2 && (str[1] == 'b' || str[1] == 'B')))) {
|
||||
str += 2;
|
||||
/* One underscore allowed here. */
|
||||
if (*str == '_') {
|
||||
++str;
|
||||
}
|
||||
}
|
||||
if (str[0] == '_') {
|
||||
/* May not start with underscores. */
|
||||
goto onError;
|
||||
}
|
||||
|
||||
start = str;
|
||||
if ((base & (base - 1)) == 0)
|
||||
z = long_from_binary_base(&str, base);
|
||||
if ((base & (base - 1)) == 0) {
|
||||
int res = long_from_binary_base(&str, base, &z);
|
||||
if (res < 0) {
|
||||
/* Syntax error. */
|
||||
goto onError;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/***
|
||||
Binary bases can be converted in time linear in the number of digits, because
|
||||
|
@ -2208,11 +2259,13 @@ digit beyond the first.
|
|||
***/
|
||||
twodigits c; /* current input character */
|
||||
Py_ssize_t size_z;
|
||||
int digits = 0;
|
||||
int i;
|
||||
int convwidth;
|
||||
twodigits convmultmax, convmult;
|
||||
digit *pz, *pzstop;
|
||||
const char* scan;
|
||||
const char *scan, *lastdigit;
|
||||
char prev = 0;
|
||||
|
||||
static double log_base_BASE[37] = {0.0e0,};
|
||||
static int convwidth_base[37] = {0,};
|
||||
|
@ -2226,8 +2279,9 @@ digit beyond the first.
|
|||
log((double)PyLong_BASE));
|
||||
for (;;) {
|
||||
twodigits next = convmax * base;
|
||||
if (next > PyLong_BASE)
|
||||
if (next > PyLong_BASE) {
|
||||
break;
|
||||
}
|
||||
convmax = next;
|
||||
++i;
|
||||
}
|
||||
|
@ -2238,21 +2292,43 @@ digit beyond the first.
|
|||
|
||||
/* Find length of the string of numeric characters. */
|
||||
scan = str;
|
||||
while (_PyLong_DigitValue[Py_CHARMASK(*scan)] < base)
|
||||
lastdigit = str;
|
||||
|
||||
while (_PyLong_DigitValue[Py_CHARMASK(*scan)] < base || *scan == '_') {
|
||||
if (*scan == '_') {
|
||||
if (prev == '_') {
|
||||
/* Only one underscore allowed. */
|
||||
str = lastdigit + 1;
|
||||
goto onError;
|
||||
}
|
||||
}
|
||||
else {
|
||||
++digits;
|
||||
lastdigit = scan;
|
||||
}
|
||||
prev = *scan;
|
||||
++scan;
|
||||
}
|
||||
if (prev == '_') {
|
||||
/* Trailing underscore not allowed. */
|
||||
/* Set error pointer to first underscore. */
|
||||
str = lastdigit + 1;
|
||||
goto onError;
|
||||
}
|
||||
|
||||
/* Create an int object that can contain the largest possible
|
||||
* integer with this base and length. Note that there's no
|
||||
* need to initialize z->ob_digit -- no slot is read up before
|
||||
* being stored into.
|
||||
*/
|
||||
size_z = (Py_ssize_t)((scan - str) * log_base_BASE[base]) + 1;
|
||||
size_z = (Py_ssize_t)(digits * log_base_BASE[base]) + 1;
|
||||
/* Uncomment next line to test exceedingly rare copy code */
|
||||
/* size_z = 1; */
|
||||
assert(size_z > 0);
|
||||
z = _PyLong_New(size_z);
|
||||
if (z == NULL)
|
||||
if (z == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
Py_SIZE(z) = 0;
|
||||
|
||||
/* `convwidth` consecutive input digits are treated as a single
|
||||
|
@ -2263,9 +2339,17 @@ digit beyond the first.
|
|||
|
||||
/* Work ;-) */
|
||||
while (str < scan) {
|
||||
if (*str == '_') {
|
||||
str++;
|
||||
continue;
|
||||
}
|
||||
/* grab up to convwidth digits from the input string */
|
||||
c = (digit)_PyLong_DigitValue[Py_CHARMASK(*str++)];
|
||||
for (i = 1; i < convwidth && str != scan; ++i, ++str) {
|
||||
for (i = 1; i < convwidth && str != scan; ++str) {
|
||||
if (*str == '_') {
|
||||
continue;
|
||||
}
|
||||
i++;
|
||||
c = (twodigits)(c * base +
|
||||
(int)_PyLong_DigitValue[Py_CHARMASK(*str)]);
|
||||
assert(c < PyLong_BASE);
|
||||
|
@ -2277,9 +2361,10 @@ digit beyond the first.
|
|||
*/
|
||||
if (i != convwidth) {
|
||||
convmult = base;
|
||||
for ( ; i > 1; --i)
|
||||
for ( ; i > 1; --i) {
|
||||
convmult *= base;
|
||||
}
|
||||
}
|
||||
|
||||
/* Multiply z by convmult, and add c. */
|
||||
pz = z->ob_digit;
|
||||
|
@ -2316,41 +2401,51 @@ digit beyond the first.
|
|||
}
|
||||
}
|
||||
}
|
||||
if (z == NULL)
|
||||
if (z == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
if (error_if_nonzero) {
|
||||
/* reset the base to 0, else the exception message
|
||||
doesn't make too much sense */
|
||||
base = 0;
|
||||
if (Py_SIZE(z) != 0)
|
||||
if (Py_SIZE(z) != 0) {
|
||||
goto onError;
|
||||
}
|
||||
/* there might still be other problems, therefore base
|
||||
remains zero here for the same reason */
|
||||
}
|
||||
if (str == start)
|
||||
if (str == start) {
|
||||
goto onError;
|
||||
if (sign < 0)
|
||||
}
|
||||
if (sign < 0) {
|
||||
Py_SIZE(z) = -(Py_SIZE(z));
|
||||
while (*str && Py_ISSPACE(Py_CHARMASK(*str)))
|
||||
}
|
||||
while (*str && Py_ISSPACE(Py_CHARMASK(*str))) {
|
||||
str++;
|
||||
if (*str != '\0')
|
||||
}
|
||||
if (*str != '\0') {
|
||||
goto onError;
|
||||
}
|
||||
long_normalize(z);
|
||||
z = maybe_small_long(z);
|
||||
if (z == NULL)
|
||||
if (z == NULL) {
|
||||
return NULL;
|
||||
if (pend != NULL)
|
||||
}
|
||||
if (pend != NULL) {
|
||||
*pend = (char *)str;
|
||||
}
|
||||
return (PyObject *) z;
|
||||
|
||||
onError:
|
||||
if (pend != NULL)
|
||||
if (pend != NULL) {
|
||||
*pend = (char *)str;
|
||||
}
|
||||
Py_XDECREF(z);
|
||||
slen = strlen(orig_str) < 200 ? strlen(orig_str) : 200;
|
||||
strobj = PyUnicode_FromStringAndSize(orig_str, slen);
|
||||
if (strobj == NULL)
|
||||
if (strobj == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
PyErr_Format(PyExc_ValueError,
|
||||
"invalid literal for int() with base %d: %.200R",
|
||||
base, strobj);
|
||||
|
|
|
@ -1333,6 +1333,28 @@ verify_identifier(struct tok_state *tok)
|
|||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
tok_decimal_tail(struct tok_state *tok)
|
||||
{
|
||||
int c;
|
||||
|
||||
while (1) {
|
||||
do {
|
||||
c = tok_nextc(tok);
|
||||
} while (isdigit(c));
|
||||
if (c != '_') {
|
||||
break;
|
||||
}
|
||||
c = tok_nextc(tok);
|
||||
if (!isdigit(c)) {
|
||||
tok->done = E_TOKEN;
|
||||
tok_backup(tok, c);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
/* Get next token, after space stripping etc. */
|
||||
|
||||
static int
|
||||
|
@ -1353,18 +1375,21 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
|||
tok->atbol = 0;
|
||||
for (;;) {
|
||||
c = tok_nextc(tok);
|
||||
if (c == ' ')
|
||||
if (c == ' ') {
|
||||
col++, altcol++;
|
||||
}
|
||||
else if (c == '\t') {
|
||||
col = (col/tok->tabsize + 1) * tok->tabsize;
|
||||
altcol = (altcol/tok->alttabsize + 1)
|
||||
* tok->alttabsize;
|
||||
}
|
||||
else if (c == '\014') /* Control-L (formfeed) */
|
||||
else if (c == '\014') {/* Control-L (formfeed) */
|
||||
col = altcol = 0; /* For Emacs users */
|
||||
else
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
tok_backup(tok, c);
|
||||
if (c == '#' || c == '\n') {
|
||||
/* Lines with only whitespace and/or comments
|
||||
|
@ -1372,10 +1397,12 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
|||
not passed to the parser as NEWLINE tokens,
|
||||
except *totally* empty lines in interactive
|
||||
mode, which signal the end of a command group. */
|
||||
if (col == 0 && c == '\n' && tok->prompt != NULL)
|
||||
if (col == 0 && c == '\n' && tok->prompt != NULL) {
|
||||
blankline = 0; /* Let it through */
|
||||
else
|
||||
}
|
||||
else {
|
||||
blankline = 1; /* Ignore completely */
|
||||
}
|
||||
/* We can't jump back right here since we still
|
||||
may need to skip to the end of a comment */
|
||||
}
|
||||
|
@ -1383,10 +1410,11 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
|||
if (col == tok->indstack[tok->indent]) {
|
||||
/* No change */
|
||||
if (altcol != tok->altindstack[tok->indent]) {
|
||||
if (indenterror(tok))
|
||||
if (indenterror(tok)) {
|
||||
return ERRORTOKEN;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (col > tok->indstack[tok->indent]) {
|
||||
/* Indent -- always one */
|
||||
if (tok->indent+1 >= MAXINDENT) {
|
||||
|
@ -1395,9 +1423,10 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
|||
return ERRORTOKEN;
|
||||
}
|
||||
if (altcol <= tok->altindstack[tok->indent]) {
|
||||
if (indenterror(tok))
|
||||
if (indenterror(tok)) {
|
||||
return ERRORTOKEN;
|
||||
}
|
||||
}
|
||||
tok->pendin++;
|
||||
tok->indstack[++tok->indent] = col;
|
||||
tok->altindstack[tok->indent] = altcol;
|
||||
|
@ -1415,12 +1444,13 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
|||
return ERRORTOKEN;
|
||||
}
|
||||
if (altcol != tok->altindstack[tok->indent]) {
|
||||
if (indenterror(tok))
|
||||
if (indenterror(tok)) {
|
||||
return ERRORTOKEN;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tok->start = tok->cur;
|
||||
|
||||
|
@ -1462,9 +1492,11 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
|||
tok->start = tok->cur - 1;
|
||||
|
||||
/* Skip comment */
|
||||
if (c == '#')
|
||||
while (c != EOF && c != '\n')
|
||||
if (c == '#') {
|
||||
while (c != EOF && c != '\n') {
|
||||
c = tok_nextc(tok);
|
||||
}
|
||||
}
|
||||
|
||||
/* Check for EOF and errors now */
|
||||
if (c == EOF) {
|
||||
|
@ -1481,27 +1513,35 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
|||
saw_b = 1;
|
||||
/* Since this is a backwards compatibility support literal we don't
|
||||
want to support it in arbitrary order like byte literals. */
|
||||
else if (!(saw_b || saw_u || saw_r || saw_f) && (c == 'u' || c == 'U'))
|
||||
else if (!(saw_b || saw_u || saw_r || saw_f)
|
||||
&& (c == 'u'|| c == 'U')) {
|
||||
saw_u = 1;
|
||||
}
|
||||
/* ur"" and ru"" are not supported */
|
||||
else if (!(saw_r || saw_u) && (c == 'r' || c == 'R'))
|
||||
else if (!(saw_r || saw_u) && (c == 'r' || c == 'R')) {
|
||||
saw_r = 1;
|
||||
else if (!(saw_f || saw_b || saw_u) && (c == 'f' || c == 'F'))
|
||||
}
|
||||
else if (!(saw_f || saw_b || saw_u) && (c == 'f' || c == 'F')) {
|
||||
saw_f = 1;
|
||||
else
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
c = tok_nextc(tok);
|
||||
if (c == '"' || c == '\'')
|
||||
if (c == '"' || c == '\'') {
|
||||
goto letter_quote;
|
||||
}
|
||||
}
|
||||
while (is_potential_identifier_char(c)) {
|
||||
if (c >= 128)
|
||||
if (c >= 128) {
|
||||
nonascii = 1;
|
||||
}
|
||||
c = tok_nextc(tok);
|
||||
}
|
||||
tok_backup(tok, c);
|
||||
if (nonascii && !verify_identifier(tok))
|
||||
if (nonascii && !verify_identifier(tok)) {
|
||||
return ERRORTOKEN;
|
||||
}
|
||||
*p_start = tok->start;
|
||||
*p_end = tok->cur;
|
||||
|
||||
|
@ -1510,11 +1550,13 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
|||
/* Current token length is 5. */
|
||||
if (tok->async_def) {
|
||||
/* We're inside an 'async def' function. */
|
||||
if (memcmp(tok->start, "async", 5) == 0)
|
||||
if (memcmp(tok->start, "async", 5) == 0) {
|
||||
return ASYNC;
|
||||
if (memcmp(tok->start, "await", 5) == 0)
|
||||
}
|
||||
if (memcmp(tok->start, "await", 5) == 0) {
|
||||
return AWAIT;
|
||||
}
|
||||
}
|
||||
else if (memcmp(tok->start, "async", 5) == 0) {
|
||||
/* The current token is 'async'.
|
||||
Look ahead one token.*/
|
||||
|
@ -1546,8 +1588,9 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
|||
/* Newline */
|
||||
if (c == '\n') {
|
||||
tok->atbol = 1;
|
||||
if (blankline || tok->level > 0)
|
||||
if (blankline || tok->level > 0) {
|
||||
goto nextline;
|
||||
}
|
||||
*p_start = tok->start;
|
||||
*p_end = tok->cur - 1; /* Leave '\n' out of the string */
|
||||
tok->cont_line = 0;
|
||||
|
@ -1570,11 +1613,13 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
|||
*p_start = tok->start;
|
||||
*p_end = tok->cur;
|
||||
return ELLIPSIS;
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
tok_backup(tok, c);
|
||||
}
|
||||
tok_backup(tok, '.');
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
tok_backup(tok, c);
|
||||
}
|
||||
*p_start = tok->start;
|
||||
|
@ -1588,9 +1633,12 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
|||
/* Hex, octal or binary -- maybe. */
|
||||
c = tok_nextc(tok);
|
||||
if (c == 'x' || c == 'X') {
|
||||
|
||||
/* Hex */
|
||||
c = tok_nextc(tok);
|
||||
do {
|
||||
if (c == '_') {
|
||||
c = tok_nextc(tok);
|
||||
}
|
||||
if (!isxdigit(c)) {
|
||||
tok->done = E_TOKEN;
|
||||
tok_backup(tok, c);
|
||||
|
@ -1599,10 +1647,15 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
|||
do {
|
||||
c = tok_nextc(tok);
|
||||
} while (isxdigit(c));
|
||||
} while (c == '_');
|
||||
}
|
||||
else if (c == 'o' || c == 'O') {
|
||||
/* Octal */
|
||||
c = tok_nextc(tok);
|
||||
do {
|
||||
if (c == '_') {
|
||||
c = tok_nextc(tok);
|
||||
}
|
||||
if (c < '0' || c >= '8') {
|
||||
tok->done = E_TOKEN;
|
||||
tok_backup(tok, c);
|
||||
|
@ -1611,10 +1664,15 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
|||
do {
|
||||
c = tok_nextc(tok);
|
||||
} while ('0' <= c && c < '8');
|
||||
} while (c == '_');
|
||||
}
|
||||
else if (c == 'b' || c == 'B') {
|
||||
/* Binary */
|
||||
c = tok_nextc(tok);
|
||||
do {
|
||||
if (c == '_') {
|
||||
c = tok_nextc(tok);
|
||||
}
|
||||
if (c != '0' && c != '1') {
|
||||
tok->done = E_TOKEN;
|
||||
tok_backup(tok, c);
|
||||
|
@ -1623,24 +1681,45 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
|||
do {
|
||||
c = tok_nextc(tok);
|
||||
} while (c == '0' || c == '1');
|
||||
} while (c == '_');
|
||||
}
|
||||
else {
|
||||
int nonzero = 0;
|
||||
/* maybe old-style octal; c is first char of it */
|
||||
/* in any case, allow '0' as a literal */
|
||||
while (c == '0')
|
||||
while (1) {
|
||||
if (c == '_') {
|
||||
c = tok_nextc(tok);
|
||||
while (isdigit(c)) {
|
||||
nonzero = 1;
|
||||
if (!isdigit(c)) {
|
||||
tok->done = E_TOKEN;
|
||||
tok_backup(tok, c);
|
||||
return ERRORTOKEN;
|
||||
}
|
||||
}
|
||||
if (c != '0') {
|
||||
break;
|
||||
}
|
||||
c = tok_nextc(tok);
|
||||
}
|
||||
if (c == '.')
|
||||
if (isdigit(c)) {
|
||||
nonzero = 1;
|
||||
c = tok_decimal_tail(tok);
|
||||
if (c == 0) {
|
||||
return ERRORTOKEN;
|
||||
}
|
||||
}
|
||||
if (c == '.') {
|
||||
c = tok_nextc(tok);
|
||||
goto fraction;
|
||||
else if (c == 'e' || c == 'E')
|
||||
}
|
||||
else if (c == 'e' || c == 'E') {
|
||||
goto exponent;
|
||||
else if (c == 'j' || c == 'J')
|
||||
}
|
||||
else if (c == 'j' || c == 'J') {
|
||||
goto imaginary;
|
||||
}
|
||||
else if (nonzero) {
|
||||
/* Old-style octal: now disallowed. */
|
||||
tok->done = E_TOKEN;
|
||||
tok_backup(tok, c);
|
||||
return ERRORTOKEN;
|
||||
|
@ -1649,17 +1728,22 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
|||
}
|
||||
else {
|
||||
/* Decimal */
|
||||
do {
|
||||
c = tok_nextc(tok);
|
||||
} while (isdigit(c));
|
||||
c = tok_decimal_tail(tok);
|
||||
if (c == 0) {
|
||||
return ERRORTOKEN;
|
||||
}
|
||||
{
|
||||
/* Accept floating point numbers. */
|
||||
if (c == '.') {
|
||||
c = tok_nextc(tok);
|
||||
fraction:
|
||||
/* Fraction */
|
||||
do {
|
||||
c = tok_nextc(tok);
|
||||
} while (isdigit(c));
|
||||
if (isdigit(c)) {
|
||||
c = tok_decimal_tail(tok);
|
||||
if (c == 0) {
|
||||
return ERRORTOKEN;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (c == 'e' || c == 'E') {
|
||||
int e;
|
||||
|
@ -1681,16 +1765,18 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
|||
*p_end = tok->cur;
|
||||
return NUMBER;
|
||||
}
|
||||
do {
|
||||
c = tok_nextc(tok);
|
||||
} while (isdigit(c));
|
||||
c = tok_decimal_tail(tok);
|
||||
if (c == 0) {
|
||||
return ERRORTOKEN;
|
||||
}
|
||||
if (c == 'j' || c == 'J')
|
||||
}
|
||||
if (c == 'j' || c == 'J') {
|
||||
/* Imaginary part */
|
||||
imaginary:
|
||||
c = tok_nextc(tok);
|
||||
}
|
||||
}
|
||||
}
|
||||
tok_backup(tok, c);
|
||||
*p_start = tok->start;
|
||||
*p_end = tok->cur;
|
||||
|
@ -1708,22 +1794,27 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
|||
c = tok_nextc(tok);
|
||||
if (c == quote) {
|
||||
c = tok_nextc(tok);
|
||||
if (c == quote)
|
||||
if (c == quote) {
|
||||
quote_size = 3;
|
||||
else
|
||||
}
|
||||
else {
|
||||
end_quote_size = 1; /* empty string found */
|
||||
}
|
||||
if (c != quote)
|
||||
}
|
||||
if (c != quote) {
|
||||
tok_backup(tok, c);
|
||||
}
|
||||
|
||||
/* Get rest of string */
|
||||
while (end_quote_size != quote_size) {
|
||||
c = tok_nextc(tok);
|
||||
if (c == EOF) {
|
||||
if (quote_size == 3)
|
||||
if (quote_size == 3) {
|
||||
tok->done = E_EOFS;
|
||||
else
|
||||
}
|
||||
else {
|
||||
tok->done = E_EOLS;
|
||||
}
|
||||
tok->cur = tok->inp;
|
||||
return ERRORTOKEN;
|
||||
}
|
||||
|
@ -1732,14 +1823,16 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
|||
tok->cur = tok->inp;
|
||||
return ERRORTOKEN;
|
||||
}
|
||||
if (c == quote)
|
||||
if (c == quote) {
|
||||
end_quote_size += 1;
|
||||
}
|
||||
else {
|
||||
end_quote_size = 0;
|
||||
if (c == '\\')
|
||||
if (c == '\\') {
|
||||
tok_nextc(tok); /* skip escaped char */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*p_start = tok->start;
|
||||
*p_end = tok->cur;
|
||||
|
@ -1767,7 +1860,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
|||
int token3 = PyToken_ThreeChars(c, c2, c3);
|
||||
if (token3 != OP) {
|
||||
token = token3;
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
tok_backup(tok, c3);
|
||||
}
|
||||
*p_start = tok->start;
|
||||
|
|
27
Python/ast.c
27
Python/ast.c
|
@ -4018,7 +4018,7 @@ ast_for_stmt(struct compiling *c, const node *n)
|
|||
}
|
||||
|
||||
static PyObject *
|
||||
parsenumber(struct compiling *c, const char *s)
|
||||
parsenumber_raw(struct compiling *c, const char *s)
|
||||
{
|
||||
const char *end;
|
||||
long x;
|
||||
|
@ -4060,6 +4060,31 @@ parsenumber(struct compiling *c, const char *s)
|
|||
}
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
parsenumber(struct compiling *c, const char *s)
|
||||
{
|
||||
char *dup, *end;
|
||||
PyObject *res = NULL;
|
||||
|
||||
assert(s != NULL);
|
||||
|
||||
if (strchr(s, '_') == NULL) {
|
||||
return parsenumber_raw(c, s);
|
||||
}
|
||||
/* Create a duplicate without underscores. */
|
||||
dup = PyMem_Malloc(strlen(s) + 1);
|
||||
end = dup;
|
||||
for (; *s; s++) {
|
||||
if (*s != '_') {
|
||||
*end++ = *s;
|
||||
}
|
||||
}
|
||||
*end = '\0';
|
||||
res = parsenumber_raw(c, dup);
|
||||
PyMem_Free(dup);
|
||||
return res;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
decode_utf8(struct compiling *c, const char **sPtr, const char *end)
|
||||
{
|
||||
|
|
|
@ -370,6 +370,72 @@ PyOS_string_to_double(const char *s,
|
|||
return result;
|
||||
}
|
||||
|
||||
/* Remove underscores that follow the underscore placement rule from
|
||||
the string and then call the `innerfunc` function on the result.
|
||||
It should return a new object or NULL on exception.
|
||||
|
||||
`what` is used for the error message emitted when underscores are detected
|
||||
that don't follow the rule. `arg` is an opaque pointer passed to the inner
|
||||
function.
|
||||
|
||||
This is used to implement underscore-agnostic conversion for floats
|
||||
and complex numbers.
|
||||
*/
|
||||
PyObject *
|
||||
_Py_string_to_number_with_underscores(
|
||||
const char *s, Py_ssize_t orig_len, const char *what, PyObject *obj, void *arg,
|
||||
PyObject *(*innerfunc)(const char *, Py_ssize_t, void *))
|
||||
{
|
||||
char prev;
|
||||
const char *p, *last;
|
||||
char *dup, *end;
|
||||
PyObject *result;
|
||||
|
||||
if (strchr(s, '_') == NULL) {
|
||||
return innerfunc(s, orig_len, arg);
|
||||
}
|
||||
|
||||
dup = PyMem_Malloc(orig_len + 1);
|
||||
end = dup;
|
||||
prev = '\0';
|
||||
last = s + orig_len;
|
||||
for (p = s; *p; p++) {
|
||||
if (*p == '_') {
|
||||
/* Underscores are only allowed after digits. */
|
||||
if (!(prev >= '0' && prev <= '9')) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
else {
|
||||
*end++ = *p;
|
||||
/* Underscores are only allowed before digits. */
|
||||
if (prev == '_' && !(*p >= '0' && *p <= '9')) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
prev = *p;
|
||||
}
|
||||
/* Underscores are not allowed at the end. */
|
||||
if (prev == '_') {
|
||||
goto error;
|
||||
}
|
||||
/* No embedded NULs allowed. */
|
||||
if (p != last) {
|
||||
goto error;
|
||||
}
|
||||
*end = '\0';
|
||||
result = innerfunc(dup, end - dup, arg);
|
||||
PyMem_Free(dup);
|
||||
return result;
|
||||
|
||||
error:
|
||||
PyMem_Free(dup);
|
||||
PyErr_Format(PyExc_ValueError,
|
||||
"could not convert string to %s: "
|
||||
"%R", what, obj);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef PY_NO_SHORT_FLOAT_REPR
|
||||
|
||||
/* Given a string that may have a decimal point in the current
|
||||
|
|
Loading…
Reference in New Issue