Issue20284: Implement PEP461

This commit is contained in:
Ethan Furman 2015-01-23 20:05:18 -08:00
parent 8861502e07
commit b95b56150f
10 changed files with 1185 additions and 158 deletions

View File

@ -3057,6 +3057,197 @@ place, and instead produce new objects.
always produces a new object, even if no changes were made. always produces a new object, even if no changes were made.
.. _bytes-formatting:
``printf``-style Bytes Formatting
----------------------------------
.. index::
single: formatting, bytes (%)
single: formatting, bytearray (%)
single: interpolation, bytes (%)
single: interpolation, bytearray (%)
single: bytes; formatting
single: bytearray; formatting
single: bytes; interpolation
single: bytearray; interpolation
single: printf-style formatting
single: sprintf-style formatting
single: % formatting
single: % interpolation
.. note::
The formatting operations described here exhibit a variety of quirks that
lead to a number of common errors (such as failing to display tuples and
dictionaries correctly). If the value being printed may be a tuple or
dictionary, wrap it in a tuple.
Bytes objects (``bytes``/``bytearray``) have one unique built-in operation:
the ``%`` operator (modulo).
This is also known as the bytes *formatting* or *interpolation* operator.
Given ``format % values`` (where *format* is a bytes object), ``%`` conversion
specifications in *format* are replaced with zero or more elements of *values*.
The effect is similar to using the :c:func:`sprintf` in the C language.
If *format* requires a single argument, *values* may be a single non-tuple
object. [5]_ Otherwise, *values* must be a tuple with exactly the number of
items specified by the format bytes object, or a single mapping object (for
example, a dictionary).
A conversion specifier contains two or more characters and has the following
components, which must occur in this order:
#. The ``'%'`` character, which marks the start of the specifier.
#. Mapping key (optional), consisting of a parenthesised sequence of characters
(for example, ``(somename)``).
#. Conversion flags (optional), which affect the result of some conversion
types.
#. Minimum field width (optional). If specified as an ``'*'`` (asterisk), the
actual width is read from the next element of the tuple in *values*, and the
object to convert comes after the minimum field width and optional precision.
#. Precision (optional), given as a ``'.'`` (dot) followed by the precision. If
specified as ``'*'`` (an asterisk), the actual precision is read from the next
element of the tuple in *values*, and the value to convert comes after the
precision.
#. Length modifier (optional).
#. Conversion type.
When the right argument is a dictionary (or other mapping type), then the
formats in the bytes object *must* include a parenthesised mapping key into that
dictionary inserted immediately after the ``'%'`` character. The mapping key
selects the value to be formatted from the mapping. For example:
>>> print(b'%(language)s has %(number)03d quote types.' %
... {b'language': b"Python", b"number": 2})
b'Python has 002 quote types.'
In this case no ``*`` specifiers may occur in a format (since they require a
sequential parameter list).
The conversion flag characters are:
+---------+---------------------------------------------------------------------+
| Flag | Meaning |
+=========+=====================================================================+
| ``'#'`` | The value conversion will use the "alternate form" (where defined |
| | below). |
+---------+---------------------------------------------------------------------+
| ``'0'`` | The conversion will be zero padded for numeric values. |
+---------+---------------------------------------------------------------------+
| ``'-'`` | The converted value is left adjusted (overrides the ``'0'`` |
| | conversion if both are given). |
+---------+---------------------------------------------------------------------+
| ``' '`` | (a space) A blank should be left before a positive number (or empty |
| | string) produced by a signed conversion. |
+---------+---------------------------------------------------------------------+
| ``'+'`` | A sign character (``'+'`` or ``'-'``) will precede the conversion |
| | (overrides a "space" flag). |
+---------+---------------------------------------------------------------------+
A length modifier (``h``, ``l``, or ``L``) may be present, but is ignored as it
is not necessary for Python -- so e.g. ``%ld`` is identical to ``%d``.
The conversion types are:
+------------+-----------------------------------------------------+-------+
| Conversion | Meaning | Notes |
+============+=====================================================+=======+
| ``'d'`` | Signed integer decimal. | |
+------------+-----------------------------------------------------+-------+
| ``'i'`` | Signed integer decimal. | |
+------------+-----------------------------------------------------+-------+
| ``'o'`` | Signed octal value. | \(1) |
+------------+-----------------------------------------------------+-------+
| ``'u'`` | Obsolete type -- it is identical to ``'d'``. | \(7) |
+------------+-----------------------------------------------------+-------+
| ``'x'`` | Signed hexadecimal (lowercase). | \(2) |
+------------+-----------------------------------------------------+-------+
| ``'X'`` | Signed hexadecimal (uppercase). | \(2) |
+------------+-----------------------------------------------------+-------+
| ``'e'`` | Floating point exponential format (lowercase). | \(3) |
+------------+-----------------------------------------------------+-------+
| ``'E'`` | Floating point exponential format (uppercase). | \(3) |
+------------+-----------------------------------------------------+-------+
| ``'f'`` | Floating point decimal format. | \(3) |
+------------+-----------------------------------------------------+-------+
| ``'F'`` | Floating point decimal format. | \(3) |
+------------+-----------------------------------------------------+-------+
| ``'g'`` | Floating point format. Uses lowercase exponential | \(4) |
| | format if exponent is less than -4 or not less than | |
| | precision, decimal format otherwise. | |
+------------+-----------------------------------------------------+-------+
| ``'G'`` | Floating point format. Uses uppercase exponential | \(4) |
| | format if exponent is less than -4 or not less than | |
| | precision, decimal format otherwise. | |
+------------+-----------------------------------------------------+-------+
| ``'c'`` | Single byte (accepts integer or single | |
| | byte objects). | |
+------------+-----------------------------------------------------+-------+
| ``'b'`` | Bytes (any object that follows the | \(5) |
| | :ref:`buffer protocol <bufferobjects>` or has | |
| | :meth:`__bytes__`). | |
+------------+-----------------------------------------------------+-------+
| ``'s'`` | ``'s'`` is an alias for ``'b'`` and should only | \(6) |
| | be used for Python2/3 code bases. | |
+------------+-----------------------------------------------------+-------+
| ``'a'`` | Bytes (converts any Python object using | \(5) |
| | ``repr(obj).encode('ascii','backslashreplace)``). | |
+------------+-----------------------------------------------------+-------+
| ``'%'`` | No argument is converted, results in a ``'%'`` | |
| | character in the result. | |
+------------+-----------------------------------------------------+-------+
Notes:
(1)
The alternate form causes a leading zero (``'0'``) to be inserted between
left-hand padding and the formatting of the number if the leading character
of the result is not already a zero.
(2)
The alternate form causes a leading ``'0x'`` or ``'0X'`` (depending on whether
the ``'x'`` or ``'X'`` format was used) to be inserted between left-hand padding
and the formatting of the number if the leading character of the result is not
already a zero.
(3)
The alternate form causes the result to always contain a decimal point, even if
no digits follow it.
The precision determines the number of digits after the decimal point and
defaults to 6.
(4)
The alternate form causes the result to always contain a decimal point, and
trailing zeroes are not removed as they would otherwise be.
The precision determines the number of significant digits before and after the
decimal point and defaults to 6.
(5)
If precision is ``N``, the output is truncated to ``N`` characters.
(6)
``b'%s'`` is deprecated, but will not be removed during the 3.x series.
(7)
See :pep:`237`.
.. note::
The bytearray version of this method does *not* operate in place - it
always produces a new object, even if no changes were made.
.. seealso:: :pep:`461`.
.. versionadded:: 3.5
.. _typememoryview: .. _typememoryview:
Memory Views Memory Views

View File

@ -62,6 +62,7 @@ PyAPI_FUNC(void) PyBytes_Concat(PyObject **, PyObject *);
PyAPI_FUNC(void) PyBytes_ConcatAndDel(PyObject **, PyObject *); PyAPI_FUNC(void) PyBytes_ConcatAndDel(PyObject **, PyObject *);
#ifndef Py_LIMITED_API #ifndef Py_LIMITED_API
PyAPI_FUNC(int) _PyBytes_Resize(PyObject **, Py_ssize_t); PyAPI_FUNC(int) _PyBytes_Resize(PyObject **, Py_ssize_t);
PyAPI_FUNC(PyObject *) _PyBytes_Format(PyObject *, PyObject *);
#endif #endif
PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t, PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t,
const char *, Py_ssize_t, const char *, Py_ssize_t,

View File

@ -2245,6 +2245,8 @@ PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr(
Py_UNICODE c Py_UNICODE c
); );
PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int);
/* Create a copy of a unicode string ending with a nul character. Return NULL /* Create a copy of a unicode string ending with a nul character. Return NULL
and raise a MemoryError exception on memory allocation failure, otherwise and raise a MemoryError exception on memory allocation failure, otherwise
return a new allocated buffer (use PyMem_Free() to free the buffer). */ return a new allocated buffer (use PyMem_Free() to free the buffer). */

View File

@ -461,6 +461,28 @@ class BaseBytesTest:
self.assertEqual(b.rindex(i, 3, 9), 7) self.assertEqual(b.rindex(i, 3, 9), 7)
self.assertRaises(ValueError, b.rindex, w, 1, 3) self.assertRaises(ValueError, b.rindex, w, 1, 3)
def test_mod(self):
b = b'hello, %b!'
orig = b
b = b % b'world'
self.assertEqual(b, b'hello, world!')
self.assertEqual(orig, b'hello, %b!')
self.assertFalse(b is orig)
b = b'%s / 100 = %d%%'
a = b % (b'seventy-nine', 79)
self.assertEquals(a, b'seventy-nine / 100 = 79%')
def test_imod(self):
b = b'hello, %b!'
orig = b
b %= b'world'
self.assertEqual(b, b'hello, world!')
self.assertEqual(orig, b'hello, %b!')
self.assertFalse(b is orig)
b = b'%s / 100 = %d%%'
b %= (b'seventy-nine', 79)
self.assertEquals(b, b'seventy-nine / 100 = 79%')
def test_replace(self): def test_replace(self):
b = self.type2test(b'mississippi') b = self.type2test(b'mississippi')
self.assertEqual(b.replace(b'i', b'a'), b'massassappa') self.assertEqual(b.replace(b'i', b'a'), b'massassappa')
@ -990,6 +1012,28 @@ class ByteArrayTest(BaseBytesTest, unittest.TestCase):
b[8:] = b b[8:] = b
self.assertEqual(b, bytearray(list(range(8)) + list(range(256)))) self.assertEqual(b, bytearray(list(range(8)) + list(range(256))))
def test_mod(self):
b = bytearray(b'hello, %b!')
orig = b
b = b % b'world'
self.assertEqual(b, b'hello, world!')
self.assertEqual(orig, bytearray(b'hello, %b!'))
self.assertFalse(b is orig)
b = bytearray(b'%s / 100 = %d%%')
a = b % (b'seventy-nine', 79)
self.assertEquals(a, bytearray(b'seventy-nine / 100 = 79%'))
def test_imod(self):
b = bytearray(b'hello, %b!')
orig = b
b %= b'world'
self.assertEqual(b, b'hello, world!')
self.assertEqual(orig, bytearray(b'hello, %b!'))
self.assertFalse(b is orig)
b = bytearray(b'%s / 100 = %d%%')
b %= (b'seventy-nine', 79)
self.assertEquals(b, bytearray(b'seventy-nine / 100 = 79%'))
def test_iconcat(self): def test_iconcat(self):
b = bytearray(b"abc") b = bytearray(b"abc")
b1 = b b1 = b

View File

@ -9,7 +9,7 @@ maxsize = support.MAX_Py_ssize_t
# test string formatting operator (I am not sure if this is being tested # test string formatting operator (I am not sure if this is being tested
# elsewhere but, surely, some of the given cases are *not* tested because # elsewhere but, surely, some of the given cases are *not* tested because
# they crash python) # they crash python)
# test on unicode strings as well # test on bytes object as well
def testformat(formatstr, args, output=None, limit=None, overflowok=False): def testformat(formatstr, args, output=None, limit=None, overflowok=False):
if verbose: if verbose:
@ -46,181 +46,209 @@ def testformat(formatstr, args, output=None, limit=None, overflowok=False):
if verbose: if verbose:
print('yes') print('yes')
def testcommon(formatstr, args, output=None, limit=None, overflowok=False):
# if formatstr is a str, test str, bytes, and bytearray;
# otherwise, test bytes and bytearry
if isinstance(formatstr, str):
testformat(formatstr, args, output, limit, overflowok)
b_format = formatstr.encode('ascii')
else:
b_format = formatstr
ba_format = bytearray(b_format)
b_args = []
if not isinstance(args, tuple):
args = (args, )
b_args = tuple(args)
if output is None:
b_output = ba_output = None
else:
if isinstance(output, str):
b_output = output.encode('ascii')
else:
b_output = output
ba_output = bytearray(b_output)
testformat(b_format, b_args, b_output, limit, overflowok)
testformat(ba_format, b_args, ba_output, limit, overflowok)
class FormatTest(unittest.TestCase): class FormatTest(unittest.TestCase):
def test_format(self):
testformat("%.1d", (1,), "1") def test_common_format(self):
testformat("%.*d", (sys.maxsize,1), overflowok=True) # expect overflow # test the format identifiers that work the same across
testformat("%.100d", (1,), '00000000000000000000000000000000000000' # str, bytes, and bytearrays (integer, float, oct, hex)
testcommon("%.1d", (1,), "1")
testcommon("%.*d", (sys.maxsize,1), overflowok=True) # expect overflow
testcommon("%.100d", (1,), '00000000000000000000000000000000000000'
'000000000000000000000000000000000000000000000000000000' '000000000000000000000000000000000000000000000000000000'
'00000001', overflowok=True) '00000001', overflowok=True)
testformat("%#.117x", (1,), '0x00000000000000000000000000000000000' testcommon("%#.117x", (1,), '0x00000000000000000000000000000000000'
'000000000000000000000000000000000000000000000000000000' '000000000000000000000000000000000000000000000000000000'
'0000000000000000000000000001', '0000000000000000000000000001',
overflowok=True) overflowok=True)
testformat("%#.118x", (1,), '0x00000000000000000000000000000000000' testcommon("%#.118x", (1,), '0x00000000000000000000000000000000000'
'000000000000000000000000000000000000000000000000000000' '000000000000000000000000000000000000000000000000000000'
'00000000000000000000000000001', '00000000000000000000000000001',
overflowok=True) overflowok=True)
testformat("%f", (1.0,), "1.000000") testcommon("%f", (1.0,), "1.000000")
# these are trying to test the limits of the internal magic-number-length # these are trying to test the limits of the internal magic-number-length
# formatting buffer, if that number changes then these tests are less # formatting buffer, if that number changes then these tests are less
# effective # effective
testformat("%#.*g", (109, -1.e+49/3.)) testcommon("%#.*g", (109, -1.e+49/3.))
testformat("%#.*g", (110, -1.e+49/3.)) testcommon("%#.*g", (110, -1.e+49/3.))
testformat("%#.*g", (110, -1.e+100/3.)) testcommon("%#.*g", (110, -1.e+100/3.))
# test some ridiculously large precision, expect overflow # test some ridiculously large precision, expect overflow
testformat('%12.*f', (123456, 1.0)) testcommon('%12.*f', (123456, 1.0))
# check for internal overflow validation on length of precision # check for internal overflow validation on length of precision
# these tests should no longer cause overflow in Python # these tests should no longer cause overflow in Python
# 2.7/3.1 and later. # 2.7/3.1 and later.
testformat("%#.*g", (110, -1.e+100/3.)) testcommon("%#.*g", (110, -1.e+100/3.))
testformat("%#.*G", (110, -1.e+100/3.)) testcommon("%#.*G", (110, -1.e+100/3.))
testformat("%#.*f", (110, -1.e+100/3.)) testcommon("%#.*f", (110, -1.e+100/3.))
testformat("%#.*F", (110, -1.e+100/3.)) testcommon("%#.*F", (110, -1.e+100/3.))
# Formatting of integers. Overflow is not ok # Formatting of integers. Overflow is not ok
testformat("%x", 10, "a") testcommon("%x", 10, "a")
testformat("%x", 100000000000, "174876e800") testcommon("%x", 100000000000, "174876e800")
testformat("%o", 10, "12") testcommon("%o", 10, "12")
testformat("%o", 100000000000, "1351035564000") testcommon("%o", 100000000000, "1351035564000")
testformat("%d", 10, "10") testcommon("%d", 10, "10")
testformat("%d", 100000000000, "100000000000") testcommon("%d", 100000000000, "100000000000")
big = 123456789012345678901234567890 big = 123456789012345678901234567890
testformat("%d", big, "123456789012345678901234567890") testcommon("%d", big, "123456789012345678901234567890")
testformat("%d", -big, "-123456789012345678901234567890") testcommon("%d", -big, "-123456789012345678901234567890")
testformat("%5d", -big, "-123456789012345678901234567890") testcommon("%5d", -big, "-123456789012345678901234567890")
testformat("%31d", -big, "-123456789012345678901234567890") testcommon("%31d", -big, "-123456789012345678901234567890")
testformat("%32d", -big, " -123456789012345678901234567890") testcommon("%32d", -big, " -123456789012345678901234567890")
testformat("%-32d", -big, "-123456789012345678901234567890 ") testcommon("%-32d", -big, "-123456789012345678901234567890 ")
testformat("%032d", -big, "-0123456789012345678901234567890") testcommon("%032d", -big, "-0123456789012345678901234567890")
testformat("%-032d", -big, "-123456789012345678901234567890 ") testcommon("%-032d", -big, "-123456789012345678901234567890 ")
testformat("%034d", -big, "-000123456789012345678901234567890") testcommon("%034d", -big, "-000123456789012345678901234567890")
testformat("%034d", big, "0000123456789012345678901234567890") testcommon("%034d", big, "0000123456789012345678901234567890")
testformat("%0+34d", big, "+000123456789012345678901234567890") testcommon("%0+34d", big, "+000123456789012345678901234567890")
testformat("%+34d", big, " +123456789012345678901234567890") testcommon("%+34d", big, " +123456789012345678901234567890")
testformat("%34d", big, " 123456789012345678901234567890") testcommon("%34d", big, " 123456789012345678901234567890")
testformat("%.2d", big, "123456789012345678901234567890") testcommon("%.2d", big, "123456789012345678901234567890")
testformat("%.30d", big, "123456789012345678901234567890") testcommon("%.30d", big, "123456789012345678901234567890")
testformat("%.31d", big, "0123456789012345678901234567890") testcommon("%.31d", big, "0123456789012345678901234567890")
testformat("%32.31d", big, " 0123456789012345678901234567890") testcommon("%32.31d", big, " 0123456789012345678901234567890")
testformat("%d", float(big), "123456________________________", 6) testcommon("%d", float(big), "123456________________________", 6)
big = 0x1234567890abcdef12345 # 21 hex digits big = 0x1234567890abcdef12345 # 21 hex digits
testformat("%x", big, "1234567890abcdef12345") testcommon("%x", big, "1234567890abcdef12345")
testformat("%x", -big, "-1234567890abcdef12345") testcommon("%x", -big, "-1234567890abcdef12345")
testformat("%5x", -big, "-1234567890abcdef12345") testcommon("%5x", -big, "-1234567890abcdef12345")
testformat("%22x", -big, "-1234567890abcdef12345") testcommon("%22x", -big, "-1234567890abcdef12345")
testformat("%23x", -big, " -1234567890abcdef12345") testcommon("%23x", -big, " -1234567890abcdef12345")
testformat("%-23x", -big, "-1234567890abcdef12345 ") testcommon("%-23x", -big, "-1234567890abcdef12345 ")
testformat("%023x", -big, "-01234567890abcdef12345") testcommon("%023x", -big, "-01234567890abcdef12345")
testformat("%-023x", -big, "-1234567890abcdef12345 ") testcommon("%-023x", -big, "-1234567890abcdef12345 ")
testformat("%025x", -big, "-0001234567890abcdef12345") testcommon("%025x", -big, "-0001234567890abcdef12345")
testformat("%025x", big, "00001234567890abcdef12345") testcommon("%025x", big, "00001234567890abcdef12345")
testformat("%0+25x", big, "+0001234567890abcdef12345") testcommon("%0+25x", big, "+0001234567890abcdef12345")
testformat("%+25x", big, " +1234567890abcdef12345") testcommon("%+25x", big, " +1234567890abcdef12345")
testformat("%25x", big, " 1234567890abcdef12345") testcommon("%25x", big, " 1234567890abcdef12345")
testformat("%.2x", big, "1234567890abcdef12345") testcommon("%.2x", big, "1234567890abcdef12345")
testformat("%.21x", big, "1234567890abcdef12345") testcommon("%.21x", big, "1234567890abcdef12345")
testformat("%.22x", big, "01234567890abcdef12345") testcommon("%.22x", big, "01234567890abcdef12345")
testformat("%23.22x", big, " 01234567890abcdef12345") testcommon("%23.22x", big, " 01234567890abcdef12345")
testformat("%-23.22x", big, "01234567890abcdef12345 ") testcommon("%-23.22x", big, "01234567890abcdef12345 ")
testformat("%X", big, "1234567890ABCDEF12345") testcommon("%X", big, "1234567890ABCDEF12345")
testformat("%#X", big, "0X1234567890ABCDEF12345") testcommon("%#X", big, "0X1234567890ABCDEF12345")
testformat("%#x", big, "0x1234567890abcdef12345") testcommon("%#x", big, "0x1234567890abcdef12345")
testformat("%#x", -big, "-0x1234567890abcdef12345") testcommon("%#x", -big, "-0x1234567890abcdef12345")
testformat("%#.23x", -big, "-0x001234567890abcdef12345") testcommon("%#.23x", -big, "-0x001234567890abcdef12345")
testformat("%#+.23x", big, "+0x001234567890abcdef12345") testcommon("%#+.23x", big, "+0x001234567890abcdef12345")
testformat("%# .23x", big, " 0x001234567890abcdef12345") testcommon("%# .23x", big, " 0x001234567890abcdef12345")
testformat("%#+.23X", big, "+0X001234567890ABCDEF12345") testcommon("%#+.23X", big, "+0X001234567890ABCDEF12345")
testformat("%#-+.23X", big, "+0X001234567890ABCDEF12345") testcommon("%#-+.23X", big, "+0X001234567890ABCDEF12345")
testformat("%#-+26.23X", big, "+0X001234567890ABCDEF12345") testcommon("%#-+26.23X", big, "+0X001234567890ABCDEF12345")
testformat("%#-+27.23X", big, "+0X001234567890ABCDEF12345 ") testcommon("%#-+27.23X", big, "+0X001234567890ABCDEF12345 ")
testformat("%#+27.23X", big, " +0X001234567890ABCDEF12345") testcommon("%#+27.23X", big, " +0X001234567890ABCDEF12345")
# next one gets two leading zeroes from precision, and another from the # next one gets two leading zeroes from precision, and another from the
# 0 flag and the width # 0 flag and the width
testformat("%#+027.23X", big, "+0X0001234567890ABCDEF12345") testcommon("%#+027.23X", big, "+0X0001234567890ABCDEF12345")
# same, except no 0 flag # same, except no 0 flag
testformat("%#+27.23X", big, " +0X001234567890ABCDEF12345") testcommon("%#+27.23X", big, " +0X001234567890ABCDEF12345")
big = 0o12345670123456701234567012345670 # 32 octal digits big = 0o12345670123456701234567012345670 # 32 octal digits
testformat("%o", big, "12345670123456701234567012345670") testcommon("%o", big, "12345670123456701234567012345670")
testformat("%o", -big, "-12345670123456701234567012345670") testcommon("%o", -big, "-12345670123456701234567012345670")
testformat("%5o", -big, "-12345670123456701234567012345670") testcommon("%5o", -big, "-12345670123456701234567012345670")
testformat("%33o", -big, "-12345670123456701234567012345670") testcommon("%33o", -big, "-12345670123456701234567012345670")
testformat("%34o", -big, " -12345670123456701234567012345670") testcommon("%34o", -big, " -12345670123456701234567012345670")
testformat("%-34o", -big, "-12345670123456701234567012345670 ") testcommon("%-34o", -big, "-12345670123456701234567012345670 ")
testformat("%034o", -big, "-012345670123456701234567012345670") testcommon("%034o", -big, "-012345670123456701234567012345670")
testformat("%-034o", -big, "-12345670123456701234567012345670 ") testcommon("%-034o", -big, "-12345670123456701234567012345670 ")
testformat("%036o", -big, "-00012345670123456701234567012345670") testcommon("%036o", -big, "-00012345670123456701234567012345670")
testformat("%036o", big, "000012345670123456701234567012345670") testcommon("%036o", big, "000012345670123456701234567012345670")
testformat("%0+36o", big, "+00012345670123456701234567012345670") testcommon("%0+36o", big, "+00012345670123456701234567012345670")
testformat("%+36o", big, " +12345670123456701234567012345670") testcommon("%+36o", big, " +12345670123456701234567012345670")
testformat("%36o", big, " 12345670123456701234567012345670") testcommon("%36o", big, " 12345670123456701234567012345670")
testformat("%.2o", big, "12345670123456701234567012345670") testcommon("%.2o", big, "12345670123456701234567012345670")
testformat("%.32o", big, "12345670123456701234567012345670") testcommon("%.32o", big, "12345670123456701234567012345670")
testformat("%.33o", big, "012345670123456701234567012345670") testcommon("%.33o", big, "012345670123456701234567012345670")
testformat("%34.33o", big, " 012345670123456701234567012345670") testcommon("%34.33o", big, " 012345670123456701234567012345670")
testformat("%-34.33o", big, "012345670123456701234567012345670 ") testcommon("%-34.33o", big, "012345670123456701234567012345670 ")
testformat("%o", big, "12345670123456701234567012345670") testcommon("%o", big, "12345670123456701234567012345670")
testformat("%#o", big, "0o12345670123456701234567012345670") testcommon("%#o", big, "0o12345670123456701234567012345670")
testformat("%#o", -big, "-0o12345670123456701234567012345670") testcommon("%#o", -big, "-0o12345670123456701234567012345670")
testformat("%#.34o", -big, "-0o0012345670123456701234567012345670") testcommon("%#.34o", -big, "-0o0012345670123456701234567012345670")
testformat("%#+.34o", big, "+0o0012345670123456701234567012345670") testcommon("%#+.34o", big, "+0o0012345670123456701234567012345670")
testformat("%# .34o", big, " 0o0012345670123456701234567012345670") testcommon("%# .34o", big, " 0o0012345670123456701234567012345670")
testformat("%#+.34o", big, "+0o0012345670123456701234567012345670") testcommon("%#+.34o", big, "+0o0012345670123456701234567012345670")
testformat("%#-+.34o", big, "+0o0012345670123456701234567012345670") testcommon("%#-+.34o", big, "+0o0012345670123456701234567012345670")
testformat("%#-+37.34o", big, "+0o0012345670123456701234567012345670") testcommon("%#-+37.34o", big, "+0o0012345670123456701234567012345670")
testformat("%#+37.34o", big, "+0o0012345670123456701234567012345670") testcommon("%#+37.34o", big, "+0o0012345670123456701234567012345670")
# next one gets one leading zero from precision # next one gets one leading zero from precision
testformat("%.33o", big, "012345670123456701234567012345670") testcommon("%.33o", big, "012345670123456701234567012345670")
# base marker shouldn't change that, since "0" is redundant # base marker shouldn't change that, since "0" is redundant
testformat("%#.33o", big, "0o012345670123456701234567012345670") testcommon("%#.33o", big, "0o012345670123456701234567012345670")
# but reduce precision, and base marker should add a zero # but reduce precision, and base marker should add a zero
testformat("%#.32o", big, "0o12345670123456701234567012345670") testcommon("%#.32o", big, "0o12345670123456701234567012345670")
# one leading zero from precision, and another from "0" flag & width # one leading zero from precision, and another from "0" flag & width
testformat("%034.33o", big, "0012345670123456701234567012345670") testcommon("%034.33o", big, "0012345670123456701234567012345670")
# base marker shouldn't change that # base marker shouldn't change that
testformat("%0#34.33o", big, "0o012345670123456701234567012345670") testcommon("%0#34.33o", big, "0o012345670123456701234567012345670")
# Some small ints, in both Python int and flavors). # Some small ints, in both Python int and flavors).
testformat("%d", 42, "42") testcommon("%d", 42, "42")
testformat("%d", -42, "-42") testcommon("%d", -42, "-42")
testformat("%d", 42, "42") testcommon("%d", 42, "42")
testformat("%d", -42, "-42") testcommon("%d", -42, "-42")
testformat("%d", 42.0, "42") testcommon("%d", 42.0, "42")
testformat("%#x", 1, "0x1") testcommon("%#x", 1, "0x1")
testformat("%#x", 1, "0x1") testcommon("%#x", 1, "0x1")
testformat("%#X", 1, "0X1") testcommon("%#X", 1, "0X1")
testformat("%#X", 1, "0X1") testcommon("%#X", 1, "0X1")
testformat("%#o", 1, "0o1") testcommon("%#o", 1, "0o1")
testformat("%#o", 1, "0o1") testcommon("%#o", 1, "0o1")
testformat("%#o", 0, "0o0") testcommon("%#o", 0, "0o0")
testformat("%#o", 0, "0o0") testcommon("%#o", 0, "0o0")
testformat("%o", 0, "0") testcommon("%o", 0, "0")
testformat("%o", 0, "0") testcommon("%o", 0, "0")
testformat("%d", 0, "0") testcommon("%d", 0, "0")
testformat("%d", 0, "0") testcommon("%d", 0, "0")
testformat("%#x", 0, "0x0") testcommon("%#x", 0, "0x0")
testformat("%#x", 0, "0x0") testcommon("%#x", 0, "0x0")
testformat("%#X", 0, "0X0") testcommon("%#X", 0, "0X0")
testformat("%#X", 0, "0X0") testcommon("%#X", 0, "0X0")
testformat("%x", 0x42, "42") testcommon("%x", 0x42, "42")
testformat("%x", -0x42, "-42") testcommon("%x", -0x42, "-42")
testformat("%x", 0x42, "42") testcommon("%x", 0x42, "42")
testformat("%x", -0x42, "-42") testcommon("%x", -0x42, "-42")
testformat("%o", 0o42, "42") testcommon("%o", 0o42, "42")
testformat("%o", -0o42, "-42") testcommon("%o", -0o42, "-42")
testformat("%o", 0o42, "42") testcommon("%o", 0o42, "42")
testformat("%o", -0o42, "-42") testcommon("%o", -0o42, "-42")
# alternate float formatting
testcommon('%g', 1.1, '1.1')
testcommon('%#g', 1.1, '1.10000')
def test_str_format(self):
testformat("%r", "\u0378", "'\\u0378'") # non printable testformat("%r", "\u0378", "'\\u0378'") # non printable
testformat("%a", "\u0378", "'\\u0378'") # non printable testformat("%a", "\u0378", "'\\u0378'") # non printable
testformat("%r", "\u0374", "'\u0374'") # printable testformat("%r", "\u0374", "'\u0374'") # printable
testformat("%a", "\u0374", "'\\u0374'") # printable testformat("%a", "\u0374", "'\\u0374'") # printable
# alternate float formatting # Test exception for unknown format characters, etc.
testformat('%g', 1.1, '1.1')
testformat('%#g', 1.1, '1.10000')
# Test exception for unknown format characters
if verbose: if verbose:
print('Testing exceptions') print('Testing exceptions')
def test_exc(formatstr, args, exception, excmsg): def test_exc(formatstr, args, exception, excmsg):
@ -247,8 +275,83 @@ class FormatTest(unittest.TestCase):
test_exc('%g', '1', TypeError, "a float is required") test_exc('%g', '1', TypeError, "a float is required")
test_exc('no format', '1', TypeError, test_exc('no format', '1', TypeError,
"not all arguments converted during string formatting") "not all arguments converted during string formatting")
test_exc('no format', '1', TypeError,
"not all arguments converted during string formatting") if maxsize == 2**31-1:
# crashes 2.2.1 and earlier:
try:
"%*d"%(maxsize, -127)
except MemoryError:
pass
else:
raise TestFailed('"%*d"%(maxsize, -127) should fail')
def test_bytes_and_bytearray_format(self):
# %c will insert a single byte, either from an int in range(256), or
# from a bytes argument of length 1, not from a str.
testcommon(b"%c", 7, b"\x07")
testcommon(b"%c", b"Z", b"Z")
testcommon(b"%c", bytearray(b"Z"), b"Z")
# %b will insert a series of bytes, either from a type that supports
# the Py_buffer protocol, or something that has a __bytes__ method
class FakeBytes(object):
def __bytes__(self):
return b'123'
fb = FakeBytes()
testcommon(b"%b", b"abc", b"abc")
testcommon(b"%b", bytearray(b"def"), b"def")
testcommon(b"%b", fb, b"123")
# # %s is an alias for %b -- should only be used for Py2/3 code
testcommon(b"%s", b"abc", b"abc")
testcommon(b"%s", bytearray(b"def"), b"def")
testcommon(b"%s", fb, b"123")
# %a will give the equivalent of
# repr(some_obj).encode('ascii', 'backslashreplace')
testcommon(b"%a", 3.14, b"3.14")
testcommon(b"%a", b"ghi", b"b'ghi'")
testcommon(b"%a", "jkl", b"'jkl'")
testcommon(b"%a", "\u0544", b"'\\u0544'")
# Test exception for unknown format characters, etc.
if verbose:
print('Testing exceptions')
def test_exc(formatstr, args, exception, excmsg):
try:
testformat(formatstr, args)
except exception as exc:
if str(exc) == excmsg:
if verbose:
print("yes")
else:
if verbose: print('no')
print('Unexpected ', exception, ':', repr(str(exc)))
except:
if verbose: print('no')
print('Unexpected exception')
raise
else:
raise TestFailed('did not get expected exception: %s' % excmsg)
test_exc(b'%d', '1', TypeError,
"%d format: a number is required, not str")
test_exc(b'%d', b'1', TypeError,
"%d format: a number is required, not bytes")
test_exc(b'%g', '1', TypeError, "float argument required, not str")
test_exc(b'%g', b'1', TypeError, "float argument required, not bytes")
test_exc(b'no format', 7, TypeError,
"not all arguments converted during bytes formatting")
test_exc(b'no format', b'1', TypeError,
"not all arguments converted during bytes formatting")
test_exc(b'no format', bytearray(b'1'), TypeError,
"not all arguments converted during bytes formatting")
test_exc(b"%c", 256, TypeError,
"%c requires an integer in range(256) or a single byte")
test_exc(b"%c", b"Za", TypeError,
"%c requires an integer in range(256) or a single byte")
test_exc(b"%c", "Yb", TypeError,
"%c requires an integer in range(256) or a single byte")
test_exc(b"%b", "Xc", TypeError,
"%b requires bytes, or an object that implements __bytes__, not 'str'")
test_exc(b"%s", "Wd", TypeError,
"%b requires bytes, or an object that implements __bytes__, not 'str'")
if maxsize == 2**31-1: if maxsize == 2**31-1:
# crashes 2.2.1 and earlier: # crashes 2.2.1 and earlier:

View File

@ -14,6 +14,9 @@ Core and Builtins
atomic memory access if available. Patch written by Vitor de Lima and Gustavo atomic memory access if available. Patch written by Vitor de Lima and Gustavo
Temple. Temple.
- Issue #20284: %-interpolation (aka printf) formatting added for bytes and
bytearray.
- Issue #23048: Fix jumping out of an infinite while loop in the pdb. - Issue #23048: Fix jumping out of an infinite while loop in the pdb.
- Issue #20335: bytes constructor now raises TypeError when encoding or errors - Issue #20335: bytes constructor now raises TypeError when encoding or errors

View File

@ -686,8 +686,9 @@ PyObject_Format(PyObject *obj, PyObject *format_spec)
Py_DECREF(meth); Py_DECREF(meth);
if (result && !PyUnicode_Check(result)) { if (result && !PyUnicode_Check(result)) {
PyErr_SetString(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"__format__ method did not return string"); "__format__ must return a str, not %.200s",
Py_TYPE(result)->tp_name);
Py_DECREF(result); Py_DECREF(result);
result = NULL; result = NULL;
goto done; goto done;

View File

@ -4,6 +4,7 @@
#include "Python.h" #include "Python.h"
#include "structmember.h" #include "structmember.h"
#include "bytes_methods.h" #include "bytes_methods.h"
#include "bytesobject.h"
/*[clinic input] /*[clinic input]
class bytearray "PyByteArrayObject *" "&PyByteArray_Type" class bytearray "PyByteArrayObject *" "&PyByteArray_Type"
@ -294,6 +295,31 @@ PyByteArray_Concat(PyObject *a, PyObject *b)
return (PyObject *)result; return (PyObject *)result;
} }
static PyObject *
bytearray_format(PyByteArrayObject *self, PyObject *args)
{
PyObject *bytes_in, *bytes_out, *res;
char *bytestring;
if (self == NULL || !PyByteArray_Check(self) || args == NULL) {
PyErr_BadInternalCall();
return NULL;
}
bytestring = PyByteArray_AS_STRING(self);
bytes_in = PyBytes_FromString(bytestring);
if (bytes_in == NULL)
return NULL;
bytes_out = _PyBytes_Format(bytes_in, args);
Py_DECREF(bytes_in);
if (bytes_out == NULL)
return NULL;
res = PyByteArray_FromObject(bytes_out);
Py_DECREF(bytes_out);
if (res == NULL)
return NULL;
return res;
}
/* Functions stuffed into the type object */ /* Functions stuffed into the type object */
static Py_ssize_t static Py_ssize_t
@ -3723,6 +3749,21 @@ bytearray_methods[] = {
{NULL} {NULL}
}; };
static PyObject *
bytearray_mod(PyObject *v, PyObject *w)
{
if (!PyByteArray_Check(v))
Py_RETURN_NOTIMPLEMENTED;
return bytearray_format((PyByteArrayObject *)v, w);
}
static PyNumberMethods bytearray_as_number = {
0, /*nb_add*/
0, /*nb_subtract*/
0, /*nb_multiply*/
bytearray_mod, /*nb_remainder*/
};
PyDoc_STRVAR(bytearray_doc, PyDoc_STRVAR(bytearray_doc,
"bytearray(iterable_of_ints) -> bytearray\n\ "bytearray(iterable_of_ints) -> bytearray\n\
bytearray(string, encoding[, errors]) -> bytearray\n\ bytearray(string, encoding[, errors]) -> bytearray\n\
@ -3751,7 +3792,7 @@ PyTypeObject PyByteArray_Type = {
0, /* tp_setattr */ 0, /* tp_setattr */
0, /* tp_reserved */ 0, /* tp_reserved */
(reprfunc)bytearray_repr, /* tp_repr */ (reprfunc)bytearray_repr, /* tp_repr */
0, /* tp_as_number */ &bytearray_as_number, /* tp_as_number */
&bytearray_as_sequence, /* tp_as_sequence */ &bytearray_as_sequence, /* tp_as_sequence */
&bytearray_as_mapping, /* tp_as_mapping */ &bytearray_as_mapping, /* tp_as_mapping */
0, /* tp_hash */ 0, /* tp_hash */

View File

@ -400,6 +400,634 @@ PyBytes_FromFormat(const char *format, ...)
return ret; return ret;
} }
/* Helpers for formatstring */
Py_LOCAL_INLINE(PyObject *)
getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
{
Py_ssize_t argidx = *p_argidx;
if (argidx < arglen) {
(*p_argidx)++;
if (arglen < 0)
return args;
else
return PyTuple_GetItem(args, argidx);
}
PyErr_SetString(PyExc_TypeError,
"not enough arguments for format string");
return NULL;
}
/* Format codes
* F_LJUST '-'
* F_SIGN '+'
* F_BLANK ' '
* F_ALT '#'
* F_ZERO '0'
*/
#define F_LJUST (1<<0)
#define F_SIGN (1<<1)
#define F_BLANK (1<<2)
#define F_ALT (1<<3)
#define F_ZERO (1<<4)
/* Returns a new reference to a PyBytes object, or NULL on failure. */
static PyObject *
formatfloat(PyObject *v, int flags, int prec, int type)
{
char *p;
PyObject *result;
double x;
x = PyFloat_AsDouble(v);
if (x == -1.0 && PyErr_Occurred()) {
PyErr_Format(PyExc_TypeError, "float argument required, "
"not %.200s", Py_TYPE(v)->tp_name);
return NULL;
}
if (prec < 0)
prec = 6;
p = PyOS_double_to_string(x, type, prec,
(flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
if (p == NULL)
return NULL;
result = PyBytes_FromStringAndSize(p, strlen(p));
PyMem_Free(p);
return result;
}
/* format_long emulates the format codes d, u, o, x and X, and
* the F_ALT flag, for Python's long (unbounded) ints. It's not used for
* Python's regular ints.
* Return value: a new PyBytes*, or NULL if error.
* . *pbuf is set to point into it,
* *plen set to the # of chars following that.
* Caller must decref it when done using pbuf.
* The string starting at *pbuf is of the form
* "-"? ("0x" | "0X")? digit+
* "0x"/"0X" are present only for x and X conversions, with F_ALT
* set in flags. The case of hex digits will be correct,
* There will be at least prec digits, zero-filled on the left if
* necessary to get that many.
* val object to be converted
* flags bitmask of format flags; only F_ALT is looked at
* prec minimum number of digits; 0-fill on left if needed
* type a character in [duoxX]; u acts the same as d
*
* CAUTION: o, x and X conversions on regular ints can never
* produce a '-' sign, but can for Python's unbounded ints.
*/
static PyObject *
format_long(PyObject *val, int flags, int prec, int type,
char **pbuf, int *plen)
{
PyObject *s;
PyObject *result = NULL;
s = _PyUnicode_FormatLong(val, flags & F_ALT, prec, type);
if (!s)
return NULL;
result = _PyUnicode_AsASCIIString(s, "strict");
Py_DECREF(s);
if (!result)
return NULL;
*pbuf = PyBytes_AS_STRING(result);
*plen = PyBytes_GET_SIZE(result);
return result;
}
Py_LOCAL_INLINE(int)
formatchar(char *buf, size_t buflen, PyObject *v)
{
PyObject *w = NULL;
/* convert bytearray to bytes */
if (PyByteArray_Check(v)) {
w = PyBytes_FromObject(v);
if (w == NULL)
goto error;
v = w;
}
/* presume that the buffer is at least 2 characters long */
if (PyBytes_Check(v)) {
if (!PyArg_Parse(v, "c;%c requires an integer in range(256) or a single byte", &buf[0]))
goto error;
}
else {
long ival = PyLong_AsLong(v);
if (ival == -1 && PyErr_Occurred()) {
PyErr_SetString(PyExc_TypeError,
"%c requires an integer in range(256) or a single byte");
goto error;
}
if (ival < 0 || ival > 255) {
PyErr_SetString(PyExc_TypeError,
"%c requires an integer in range(256) or a single byte");
goto error;
}
buf[0] = ival;
}
Py_XDECREF(w);
buf[1] = '\0';
return 1;
error:
Py_XDECREF(w);
return -1;
}
static PyObject *
format_obj(PyObject *v)
{
PyObject *result = NULL, *w = NULL;
PyObject *func;
_Py_IDENTIFIER(__bytes__);
/* convert bytearray to bytes */
if (PyByteArray_Check(v)) {
w = PyBytes_FromObject(v);
if (w == NULL)
return NULL;
v = w;
}
/* is it a bytes object? */
if (PyBytes_Check(v)) {
result = v;
Py_INCREF(v);
Py_XDECREF(w);
return result;
}
/* does it support __bytes__? */
func = _PyObject_LookupSpecial(v, &PyId___bytes__);
if (func != NULL) {
result = PyObject_CallFunctionObjArgs(func, NULL);
Py_DECREF(func);
if (result == NULL)
return NULL;
if (!PyBytes_Check(result)) {
PyErr_Format(PyExc_TypeError,
"__bytes__ returned non-bytes (type %.200s)",
Py_TYPE(result)->tp_name);
Py_DECREF(result);
return NULL;
}
return result;
}
PyErr_Format(PyExc_TypeError,
"%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
Py_TYPE(v)->tp_name);
return NULL;
}
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
FORMATBUFLEN is the length of the buffer in which the ints &
chars are formatted. XXX This is a magic number. Each formatting
routine does bounds checking to ensure no overflow, but a better
solution may be to malloc a buffer of appropriate size for each
format. For now, the current solution is sufficient.
*/
#define FORMATBUFLEN (size_t)120
PyObject *
_PyBytes_Format(PyObject *format, PyObject *args)
{
char *fmt, *res;
Py_ssize_t arglen, argidx;
Py_ssize_t reslen, rescnt, fmtcnt;
int args_owned = 0;
PyObject *result;
PyObject *repr;
PyObject *dict = NULL;
if (format == NULL || !PyBytes_Check(format) || args == NULL) {
PyErr_BadInternalCall();
return NULL;
}
fmt = PyBytes_AS_STRING(format);
fmtcnt = PyBytes_GET_SIZE(format);
reslen = rescnt = fmtcnt + 100;
result = PyBytes_FromStringAndSize((char *)NULL, reslen);
if (result == NULL)
return NULL;
res = PyBytes_AsString(result);
if (PyTuple_Check(args)) {
arglen = PyTuple_GET_SIZE(args);
argidx = 0;
}
else {
arglen = -1;
argidx = -2;
}
if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
!PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
!PyByteArray_Check(args)) {
dict = args;
}
while (--fmtcnt >= 0) {
if (*fmt != '%') {
if (--rescnt < 0) {
rescnt = fmtcnt + 100;
reslen += rescnt;
if (_PyBytes_Resize(&result, reslen))
return NULL;
res = PyBytes_AS_STRING(result)
+ reslen - rescnt;
--rescnt;
}
*res++ = *fmt++;
}
else {
/* Got a format specifier */
int flags = 0;
Py_ssize_t width = -1;
int prec = -1;
int c = '\0';
int fill;
int isnumok;
PyObject *v = NULL;
PyObject *temp = NULL;
Py_buffer buf;
char *pbuf;
int sign;
Py_ssize_t len;
char formatbuf[FORMATBUFLEN];
/* For format{int,char}() */
buf.obj = NULL;
fmt++;
if (*fmt == '(') {
char *keystart;
Py_ssize_t keylen;
PyObject *key;
int pcount = 1;
if (dict == NULL) {
PyErr_SetString(PyExc_TypeError,
"format requires a mapping");
goto error;
}
++fmt;
--fmtcnt;
keystart = fmt;
/* Skip over balanced parentheses */
while (pcount > 0 && --fmtcnt >= 0) {
if (*fmt == ')')
--pcount;
else if (*fmt == '(')
++pcount;
fmt++;
}
keylen = fmt - keystart - 1;
if (fmtcnt < 0 || pcount > 0) {
PyErr_SetString(PyExc_ValueError,
"incomplete format key");
goto error;
}
key = PyBytes_FromStringAndSize(keystart,
keylen);
if (key == NULL)
goto error;
if (args_owned) {
Py_DECREF(args);
args_owned = 0;
}
args = PyObject_GetItem(dict, key);
Py_DECREF(key);
if (args == NULL) {
goto error;
}
args_owned = 1;
arglen = -1;
argidx = -2;
}
while (--fmtcnt >= 0) {
switch (c = *fmt++) {
case '-': flags |= F_LJUST; continue;
case '+': flags |= F_SIGN; continue;
case ' ': flags |= F_BLANK; continue;
case '#': flags |= F_ALT; continue;
case '0': flags |= F_ZERO; continue;
}
break;
}
if (c == '*') {
v = getnextarg(args, arglen, &argidx);
if (v == NULL)
goto error;
if (!PyLong_Check(v)) {
PyErr_SetString(PyExc_TypeError,
"* wants int");
goto error;
}
width = PyLong_AsSsize_t(v);
if (width == -1 && PyErr_Occurred())
goto error;
if (width < 0) {
flags |= F_LJUST;
width = -width;
}
if (--fmtcnt >= 0)
c = *fmt++;
}
else if (c >= 0 && isdigit(c)) {
width = c - '0';
while (--fmtcnt >= 0) {
c = Py_CHARMASK(*fmt++);
if (!isdigit(c))
break;
if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
PyErr_SetString(
PyExc_ValueError,
"width too big");
goto error;
}
width = width*10 + (c - '0');
}
}
if (c == '.') {
prec = 0;
if (--fmtcnt >= 0)
c = *fmt++;
if (c == '*') {
v = getnextarg(args, arglen, &argidx);
if (v == NULL)
goto error;
if (!PyLong_Check(v)) {
PyErr_SetString(
PyExc_TypeError,
"* wants int");
goto error;
}
prec = PyLong_AsSsize_t(v);
if (prec == -1 && PyErr_Occurred())
goto error;
if (prec < 0)
prec = 0;
if (--fmtcnt >= 0)
c = *fmt++;
}
else if (c >= 0 && isdigit(c)) {
prec = c - '0';
while (--fmtcnt >= 0) {
c = Py_CHARMASK(*fmt++);
if (!isdigit(c))
break;
if (prec > (INT_MAX - ((int)c - '0')) / 10) {
PyErr_SetString(
PyExc_ValueError,
"prec too big");
goto error;
}
prec = prec*10 + (c - '0');
}
}
} /* prec */
if (fmtcnt >= 0) {
if (c == 'h' || c == 'l' || c == 'L') {
if (--fmtcnt >= 0)
c = *fmt++;
}
}
if (fmtcnt < 0) {
PyErr_SetString(PyExc_ValueError,
"incomplete format");
goto error;
}
if (c != '%') {
v = getnextarg(args, arglen, &argidx);
if (v == NULL)
goto error;
}
sign = 0;
fill = ' ';
switch (c) {
case '%':
pbuf = "%";
len = 1;
break;
case 'a':
temp = PyObject_Repr(v);
if (temp == NULL)
goto error;
repr = PyUnicode_AsEncodedObject(temp, "ascii", "backslashreplace");
if (repr == NULL) {
Py_DECREF(temp);
goto error;
}
if (_getbuffer(repr, &buf) < 0) {
temp = format_obj(repr);
if (temp == NULL) {
Py_DECREF(repr);
goto error;
}
Py_DECREF(repr);
repr = temp;
}
pbuf = PyBytes_AS_STRING(repr);
len = PyBytes_GET_SIZE(repr);
Py_DECREF(repr);
if (prec >= 0 && len > prec)
len = prec;
break;
case 's':
// %s is only for 2/3 code; 3 only code should use %b
case 'b':
temp = format_obj(v);
if (temp == NULL)
goto error;
pbuf = PyBytes_AS_STRING(temp);
len = PyBytes_GET_SIZE(temp);
if (prec >= 0 && len > prec)
len = prec;
break;
case 'i':
case 'd':
case 'u':
case 'o':
case 'x':
case 'X':
if (c == 'i')
c = 'd';
isnumok = 0;
if (PyNumber_Check(v)) {
PyObject *iobj=NULL;
if ((PyLong_Check(v))) {
iobj = v;
Py_INCREF(iobj);
}
else {
iobj = PyNumber_Long(v);
}
if (iobj!=NULL) {
if (PyLong_Check(iobj)) {
int ilen;
isnumok = 1;
temp = format_long(iobj, flags, prec, c,
&pbuf, &ilen);
Py_DECREF(iobj);
len = ilen;
if (!temp)
goto error;
sign = 1;
}
else {
Py_DECREF(iobj);
}
}
}
if (!isnumok) {
PyErr_Format(PyExc_TypeError,
"%%%c format: a number is required, "
"not %.200s", c, Py_TYPE(v)->tp_name);
goto error;
}
if (flags & F_ZERO)
fill = '0';
break;
case 'e':
case 'E':
case 'f':
case 'F':
case 'g':
case 'G':
temp = formatfloat(v, flags, prec, c);
if (temp == NULL)
goto error;
pbuf = PyBytes_AS_STRING(temp);
len = PyBytes_GET_SIZE(temp);
sign = 1;
if (flags & F_ZERO)
fill = '0';
break;
case 'c':
pbuf = formatbuf;
len = formatchar(pbuf, sizeof(formatbuf), v);
if (len < 0)
goto error;
break;
default:
PyErr_Format(PyExc_ValueError,
"unsupported format character '%c' (0x%x) "
"at index %zd",
c, c,
(Py_ssize_t)(fmt - 1 -
PyBytes_AsString(format)));
goto error;
}
if (sign) {
if (*pbuf == '-' || *pbuf == '+') {
sign = *pbuf++;
len--;
}
else if (flags & F_SIGN)
sign = '+';
else if (flags & F_BLANK)
sign = ' ';
else
sign = 0;
}
if (width < len)
width = len;
if (rescnt - (sign != 0) < width) {
reslen -= rescnt;
rescnt = width + fmtcnt + 100;
reslen += rescnt;
if (reslen < 0) {
Py_DECREF(result);
PyBuffer_Release(&buf);
Py_XDECREF(temp);
return PyErr_NoMemory();
}
if (_PyBytes_Resize(&result, reslen)) {
PyBuffer_Release(&buf);
Py_XDECREF(temp);
return NULL;
}
res = PyBytes_AS_STRING(result)
+ reslen - rescnt;
}
if (sign) {
if (fill != ' ')
*res++ = sign;
rescnt--;
if (width > len)
width--;
}
if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
assert(pbuf[0] == '0');
assert(pbuf[1] == c);
if (fill != ' ') {
*res++ = *pbuf++;
*res++ = *pbuf++;
}
rescnt -= 2;
width -= 2;
if (width < 0)
width = 0;
len -= 2;
}
if (width > len && !(flags & F_LJUST)) {
do {
--rescnt;
*res++ = fill;
} while (--width > len);
}
if (fill == ' ') {
if (sign)
*res++ = sign;
if ((flags & F_ALT) &&
(c == 'x' || c == 'X')) {
assert(pbuf[0] == '0');
assert(pbuf[1] == c);
*res++ = *pbuf++;
*res++ = *pbuf++;
}
}
Py_MEMCPY(res, pbuf, len);
res += len;
rescnt -= len;
while (--width >= len) {
--rescnt;
*res++ = ' ';
}
if (dict && (argidx < arglen) && c != '%') {
PyErr_SetString(PyExc_TypeError,
"not all arguments converted during bytes formatting");
PyBuffer_Release(&buf);
Py_XDECREF(temp);
goto error;
}
PyBuffer_Release(&buf);
Py_XDECREF(temp);
} /* '%' */
} /* until end */
if (argidx < arglen && !dict) {
PyErr_SetString(PyExc_TypeError,
"not all arguments converted during bytes formatting");
goto error;
}
if (args_owned) {
Py_DECREF(args);
}
if (_PyBytes_Resize(&result, reslen - rescnt))
return NULL;
return result;
error:
Py_DECREF(result);
if (args_owned) {
Py_DECREF(args);
}
return NULL;
}
/* =-= */
static void static void
bytes_dealloc(PyObject *op) bytes_dealloc(PyObject *op)
{ {
@ -2995,6 +3623,21 @@ bytes_methods[] = {
{NULL, NULL} /* sentinel */ {NULL, NULL} /* sentinel */
}; };
static PyObject *
bytes_mod(PyObject *v, PyObject *w)
{
if (!PyBytes_Check(v))
Py_RETURN_NOTIMPLEMENTED;
return _PyBytes_Format(v, w);
}
static PyNumberMethods bytes_as_number = {
0, /*nb_add*/
0, /*nb_subtract*/
0, /*nb_multiply*/
bytes_mod, /*nb_remainder*/
};
static PyObject * static PyObject *
str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds); str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
@ -3286,7 +3929,7 @@ PyTypeObject PyBytes_Type = {
0, /* tp_setattr */ 0, /* tp_setattr */
0, /* tp_reserved */ 0, /* tp_reserved */
(reprfunc)bytes_repr, /* tp_repr */ (reprfunc)bytes_repr, /* tp_repr */
0, /* tp_as_number */ &bytes_as_number, /* tp_as_number */
&bytes_as_sequence, /* tp_as_sequence */ &bytes_as_sequence, /* tp_as_sequence */
&bytes_as_mapping, /* tp_as_mapping */ &bytes_as_mapping, /* tp_as_mapping */
(hashfunc)bytes_hash, /* tp_hash */ (hashfunc)bytes_hash, /* tp_hash */
@ -3377,14 +4020,14 @@ PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
} }
/* The following function breaks the notion that strings are immutable: /* The following function breaks the notion that bytes are immutable:
it changes the size of a string. We get away with this only if there it changes the size of a bytes object. We get away with this only if there
is only one module referencing the object. You can also think of it is only one module referencing the object. You can also think of it
as creating a new string object and destroying the old one, only as creating a new bytes object and destroying the old one, only
more efficiently. In any case, don't use this if the string may more efficiently. In any case, don't use this if the bytes object may
already be known to some other part of the code... already be known to some other part of the code...
Note that if there's not enough memory to resize the string, the original Note that if there's not enough memory to resize the bytes object, the
string object at *pv is deallocated, *pv is set to NULL, an "out of original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
memory" exception is set, and -1 is returned. Else (on success) 0 is memory" exception is set, and -1 is returned. Else (on success) 0 is
returned, and the value in *pv may or may not be the same as on input. returned, and the value in *pv may or may not be the same as on input.
As always, an extra byte is allocated for a trailing \0 byte (newsize As always, an extra byte is allocated for a trailing \0 byte (newsize

View File

@ -13893,8 +13893,8 @@ formatfloat(PyObject *v, struct unicode_format_arg_t *arg,
* CAUTION: o, x and X conversions on regular ints can never * CAUTION: o, x and X conversions on regular ints can never
* produce a '-' sign, but can for Python's unbounded ints. * produce a '-' sign, but can for Python's unbounded ints.
*/ */
static PyObject* PyObject *
formatlong(PyObject *val, struct unicode_format_arg_t *arg) _PyUnicode_FormatLong(PyObject *val, int alt, int prec, int type)
{ {
PyObject *result = NULL; PyObject *result = NULL;
char *buf; char *buf;
@ -13904,8 +13904,6 @@ formatlong(PyObject *val, struct unicode_format_arg_t *arg)
Py_ssize_t llen; Py_ssize_t llen;
int numdigits; /* len == numnondigits + numdigits */ int numdigits; /* len == numnondigits + numdigits */
int numnondigits = 0; int numnondigits = 0;
int prec = arg->prec;
int type = arg->ch;
/* Avoid exceeding SSIZE_T_MAX */ /* Avoid exceeding SSIZE_T_MAX */
if (prec > INT_MAX-3) { if (prec > INT_MAX-3) {
@ -13954,7 +13952,7 @@ formatlong(PyObject *val, struct unicode_format_arg_t *arg)
if (llen > INT_MAX) { if (llen > INT_MAX) {
Py_DECREF(result); Py_DECREF(result);
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"string too large in _PyBytes_FormatLong"); "string too large in _PyUnicode_FormatLong");
return NULL; return NULL;
} }
len = (int)llen; len = (int)llen;
@ -13964,7 +13962,7 @@ formatlong(PyObject *val, struct unicode_format_arg_t *arg)
assert(numdigits > 0); assert(numdigits > 0);
/* Get rid of base marker unless F_ALT */ /* Get rid of base marker unless F_ALT */
if (((arg->flags & F_ALT) == 0 && if (((alt) == 0 &&
(type == 'o' || type == 'x' || type == 'X'))) { (type == 'o' || type == 'x' || type == 'X'))) {
assert(buf[sign] == '0'); assert(buf[sign] == '0');
assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' || assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
@ -14099,7 +14097,7 @@ mainformatlong(PyObject *v,
return 1; return 1;
} }
res = formatlong(iobj, arg); res = _PyUnicode_FormatLong(iobj, arg->flags & F_ALT, arg->prec, type);
Py_DECREF(iobj); Py_DECREF(iobj);
if (res == NULL) if (res == NULL)
return -1; return -1;