diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index a403d6f6a9f..55f53513f3b 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -3057,6 +3057,197 @@ place, and instead produce new objects. always produces a new object, even if no changes were made. +.. _bytes-formatting: + +``printf``-style Bytes Formatting +---------------------------------- + +.. index:: + single: formatting, bytes (%) + single: formatting, bytearray (%) + single: interpolation, bytes (%) + single: interpolation, bytearray (%) + single: bytes; formatting + single: bytearray; formatting + single: bytes; interpolation + single: bytearray; interpolation + single: printf-style formatting + single: sprintf-style formatting + single: % formatting + single: % interpolation + +.. note:: + + The formatting operations described here exhibit a variety of quirks that + lead to a number of common errors (such as failing to display tuples and + dictionaries correctly). If the value being printed may be a tuple or + dictionary, wrap it in a tuple. + +Bytes objects (``bytes``/``bytearray``) have one unique built-in operation: +the ``%`` operator (modulo). +This is also known as the bytes *formatting* or *interpolation* operator. +Given ``format % values`` (where *format* is a bytes object), ``%`` conversion +specifications in *format* are replaced with zero or more elements of *values*. +The effect is similar to using the :c:func:`sprintf` in the C language. + +If *format* requires a single argument, *values* may be a single non-tuple +object. [5]_ Otherwise, *values* must be a tuple with exactly the number of +items specified by the format bytes object, or a single mapping object (for +example, a dictionary). + +A conversion specifier contains two or more characters and has the following +components, which must occur in this order: + +#. The ``'%'`` character, which marks the start of the specifier. + +#. Mapping key (optional), consisting of a parenthesised sequence of characters + (for example, ``(somename)``). + +#. Conversion flags (optional), which affect the result of some conversion + types. + +#. Minimum field width (optional). If specified as an ``'*'`` (asterisk), the + actual width is read from the next element of the tuple in *values*, and the + object to convert comes after the minimum field width and optional precision. + +#. Precision (optional), given as a ``'.'`` (dot) followed by the precision. If + specified as ``'*'`` (an asterisk), the actual precision is read from the next + element of the tuple in *values*, and the value to convert comes after the + precision. + +#. Length modifier (optional). + +#. Conversion type. + +When the right argument is a dictionary (or other mapping type), then the +formats in the bytes object *must* include a parenthesised mapping key into that +dictionary inserted immediately after the ``'%'`` character. The mapping key +selects the value to be formatted from the mapping. For example: + + >>> print(b'%(language)s has %(number)03d quote types.' % + ... {b'language': b"Python", b"number": 2}) + b'Python has 002 quote types.' + +In this case no ``*`` specifiers may occur in a format (since they require a +sequential parameter list). + +The conversion flag characters are: + ++---------+---------------------------------------------------------------------+ +| Flag | Meaning | ++=========+=====================================================================+ +| ``'#'`` | The value conversion will use the "alternate form" (where defined | +| | below). | ++---------+---------------------------------------------------------------------+ +| ``'0'`` | The conversion will be zero padded for numeric values. | ++---------+---------------------------------------------------------------------+ +| ``'-'`` | The converted value is left adjusted (overrides the ``'0'`` | +| | conversion if both are given). | ++---------+---------------------------------------------------------------------+ +| ``' '`` | (a space) A blank should be left before a positive number (or empty | +| | string) produced by a signed conversion. | ++---------+---------------------------------------------------------------------+ +| ``'+'`` | A sign character (``'+'`` or ``'-'``) will precede the conversion | +| | (overrides a "space" flag). | ++---------+---------------------------------------------------------------------+ + +A length modifier (``h``, ``l``, or ``L``) may be present, but is ignored as it +is not necessary for Python -- so e.g. ``%ld`` is identical to ``%d``. + +The conversion types are: + ++------------+-----------------------------------------------------+-------+ +| Conversion | Meaning | Notes | ++============+=====================================================+=======+ +| ``'d'`` | Signed integer decimal. | | ++------------+-----------------------------------------------------+-------+ +| ``'i'`` | Signed integer decimal. | | ++------------+-----------------------------------------------------+-------+ +| ``'o'`` | Signed octal value. | \(1) | ++------------+-----------------------------------------------------+-------+ +| ``'u'`` | Obsolete type -- it is identical to ``'d'``. | \(7) | ++------------+-----------------------------------------------------+-------+ +| ``'x'`` | Signed hexadecimal (lowercase). | \(2) | ++------------+-----------------------------------------------------+-------+ +| ``'X'`` | Signed hexadecimal (uppercase). | \(2) | ++------------+-----------------------------------------------------+-------+ +| ``'e'`` | Floating point exponential format (lowercase). | \(3) | ++------------+-----------------------------------------------------+-------+ +| ``'E'`` | Floating point exponential format (uppercase). | \(3) | ++------------+-----------------------------------------------------+-------+ +| ``'f'`` | Floating point decimal format. | \(3) | ++------------+-----------------------------------------------------+-------+ +| ``'F'`` | Floating point decimal format. | \(3) | ++------------+-----------------------------------------------------+-------+ +| ``'g'`` | Floating point format. Uses lowercase exponential | \(4) | +| | format if exponent is less than -4 or not less than | | +| | precision, decimal format otherwise. | | ++------------+-----------------------------------------------------+-------+ +| ``'G'`` | Floating point format. Uses uppercase exponential | \(4) | +| | format if exponent is less than -4 or not less than | | +| | precision, decimal format otherwise. | | ++------------+-----------------------------------------------------+-------+ +| ``'c'`` | Single byte (accepts integer or single | | +| | byte objects). | | ++------------+-----------------------------------------------------+-------+ +| ``'b'`` | Bytes (any object that follows the | \(5) | +| | :ref:`buffer protocol ` or has | | +| | :meth:`__bytes__`). | | ++------------+-----------------------------------------------------+-------+ +| ``'s'`` | ``'s'`` is an alias for ``'b'`` and should only | \(6) | +| | be used for Python2/3 code bases. | | ++------------+-----------------------------------------------------+-------+ +| ``'a'`` | Bytes (converts any Python object using | \(5) | +| | ``repr(obj).encode('ascii','backslashreplace)``). | | ++------------+-----------------------------------------------------+-------+ +| ``'%'`` | No argument is converted, results in a ``'%'`` | | +| | character in the result. | | ++------------+-----------------------------------------------------+-------+ + +Notes: + +(1) + The alternate form causes a leading zero (``'0'``) to be inserted between + left-hand padding and the formatting of the number if the leading character + of the result is not already a zero. + +(2) + The alternate form causes a leading ``'0x'`` or ``'0X'`` (depending on whether + the ``'x'`` or ``'X'`` format was used) to be inserted between left-hand padding + and the formatting of the number if the leading character of the result is not + already a zero. + +(3) + The alternate form causes the result to always contain a decimal point, even if + no digits follow it. + + The precision determines the number of digits after the decimal point and + defaults to 6. + +(4) + The alternate form causes the result to always contain a decimal point, and + trailing zeroes are not removed as they would otherwise be. + + The precision determines the number of significant digits before and after the + decimal point and defaults to 6. + +(5) + If precision is ``N``, the output is truncated to ``N`` characters. + +(6) + ``b'%s'`` is deprecated, but will not be removed during the 3.x series. + +(7) + See :pep:`237`. + +.. note:: + + The bytearray version of this method does *not* operate in place - it + always produces a new object, even if no changes were made. + +.. seealso:: :pep:`461`. +.. versionadded:: 3.5 + .. _typememoryview: Memory Views diff --git a/Include/bytesobject.h b/Include/bytesobject.h index 0ee8d366d4a..e379bace37e 100644 --- a/Include/bytesobject.h +++ b/Include/bytesobject.h @@ -62,6 +62,7 @@ PyAPI_FUNC(void) PyBytes_Concat(PyObject **, PyObject *); PyAPI_FUNC(void) PyBytes_ConcatAndDel(PyObject **, PyObject *); #ifndef Py_LIMITED_API PyAPI_FUNC(int) _PyBytes_Resize(PyObject **, Py_ssize_t); +PyAPI_FUNC(PyObject *) _PyBytes_Format(PyObject *, PyObject *); #endif PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t, const char *, Py_ssize_t, diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 729f5848163..d2ffabee162 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -2245,6 +2245,8 @@ PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr( Py_UNICODE c ); +PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int); + /* Create a copy of a unicode string ending with a nul character. Return NULL and raise a MemoryError exception on memory allocation failure, otherwise return a new allocated buffer (use PyMem_Free() to free the buffer). */ diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index e8dde5df761..995a908718b 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -461,6 +461,28 @@ class BaseBytesTest: self.assertEqual(b.rindex(i, 3, 9), 7) self.assertRaises(ValueError, b.rindex, w, 1, 3) + def test_mod(self): + b = b'hello, %b!' + orig = b + b = b % b'world' + self.assertEqual(b, b'hello, world!') + self.assertEqual(orig, b'hello, %b!') + self.assertFalse(b is orig) + b = b'%s / 100 = %d%%' + a = b % (b'seventy-nine', 79) + self.assertEquals(a, b'seventy-nine / 100 = 79%') + + def test_imod(self): + b = b'hello, %b!' + orig = b + b %= b'world' + self.assertEqual(b, b'hello, world!') + self.assertEqual(orig, b'hello, %b!') + self.assertFalse(b is orig) + b = b'%s / 100 = %d%%' + b %= (b'seventy-nine', 79) + self.assertEquals(b, b'seventy-nine / 100 = 79%') + def test_replace(self): b = self.type2test(b'mississippi') self.assertEqual(b.replace(b'i', b'a'), b'massassappa') @@ -990,6 +1012,28 @@ class ByteArrayTest(BaseBytesTest, unittest.TestCase): b[8:] = b self.assertEqual(b, bytearray(list(range(8)) + list(range(256)))) + def test_mod(self): + b = bytearray(b'hello, %b!') + orig = b + b = b % b'world' + self.assertEqual(b, b'hello, world!') + self.assertEqual(orig, bytearray(b'hello, %b!')) + self.assertFalse(b is orig) + b = bytearray(b'%s / 100 = %d%%') + a = b % (b'seventy-nine', 79) + self.assertEquals(a, bytearray(b'seventy-nine / 100 = 79%')) + + def test_imod(self): + b = bytearray(b'hello, %b!') + orig = b + b %= b'world' + self.assertEqual(b, b'hello, world!') + self.assertEqual(orig, bytearray(b'hello, %b!')) + self.assertFalse(b is orig) + b = bytearray(b'%s / 100 = %d%%') + b %= (b'seventy-nine', 79) + self.assertEquals(b, bytearray(b'seventy-nine / 100 = 79%')) + def test_iconcat(self): b = bytearray(b"abc") b1 = b diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py index 631bf35ad72..9d1f5d33029 100644 --- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -9,7 +9,7 @@ maxsize = support.MAX_Py_ssize_t # test string formatting operator (I am not sure if this is being tested # elsewhere but, surely, some of the given cases are *not* tested because # they crash python) -# test on unicode strings as well +# test on bytes object as well def testformat(formatstr, args, output=None, limit=None, overflowok=False): if verbose: @@ -46,181 +46,209 @@ def testformat(formatstr, args, output=None, limit=None, overflowok=False): if verbose: print('yes') +def testcommon(formatstr, args, output=None, limit=None, overflowok=False): + # if formatstr is a str, test str, bytes, and bytearray; + # otherwise, test bytes and bytearry + if isinstance(formatstr, str): + testformat(formatstr, args, output, limit, overflowok) + b_format = formatstr.encode('ascii') + else: + b_format = formatstr + ba_format = bytearray(b_format) + b_args = [] + if not isinstance(args, tuple): + args = (args, ) + b_args = tuple(args) + if output is None: + b_output = ba_output = None + else: + if isinstance(output, str): + b_output = output.encode('ascii') + else: + b_output = output + ba_output = bytearray(b_output) + testformat(b_format, b_args, b_output, limit, overflowok) + testformat(ba_format, b_args, ba_output, limit, overflowok) + class FormatTest(unittest.TestCase): - def test_format(self): - testformat("%.1d", (1,), "1") - testformat("%.*d", (sys.maxsize,1), overflowok=True) # expect overflow - testformat("%.100d", (1,), '00000000000000000000000000000000000000' + + def test_common_format(self): + # test the format identifiers that work the same across + # str, bytes, and bytearrays (integer, float, oct, hex) + testcommon("%.1d", (1,), "1") + testcommon("%.*d", (sys.maxsize,1), overflowok=True) # expect overflow + testcommon("%.100d", (1,), '00000000000000000000000000000000000000' '000000000000000000000000000000000000000000000000000000' '00000001', overflowok=True) - testformat("%#.117x", (1,), '0x00000000000000000000000000000000000' + testcommon("%#.117x", (1,), '0x00000000000000000000000000000000000' '000000000000000000000000000000000000000000000000000000' '0000000000000000000000000001', overflowok=True) - testformat("%#.118x", (1,), '0x00000000000000000000000000000000000' + testcommon("%#.118x", (1,), '0x00000000000000000000000000000000000' '000000000000000000000000000000000000000000000000000000' '00000000000000000000000000001', overflowok=True) - testformat("%f", (1.0,), "1.000000") + testcommon("%f", (1.0,), "1.000000") # these are trying to test the limits of the internal magic-number-length # formatting buffer, if that number changes then these tests are less # effective - testformat("%#.*g", (109, -1.e+49/3.)) - testformat("%#.*g", (110, -1.e+49/3.)) - testformat("%#.*g", (110, -1.e+100/3.)) + testcommon("%#.*g", (109, -1.e+49/3.)) + testcommon("%#.*g", (110, -1.e+49/3.)) + testcommon("%#.*g", (110, -1.e+100/3.)) # test some ridiculously large precision, expect overflow - testformat('%12.*f', (123456, 1.0)) + testcommon('%12.*f', (123456, 1.0)) # check for internal overflow validation on length of precision # these tests should no longer cause overflow in Python # 2.7/3.1 and later. - testformat("%#.*g", (110, -1.e+100/3.)) - testformat("%#.*G", (110, -1.e+100/3.)) - testformat("%#.*f", (110, -1.e+100/3.)) - testformat("%#.*F", (110, -1.e+100/3.)) + testcommon("%#.*g", (110, -1.e+100/3.)) + testcommon("%#.*G", (110, -1.e+100/3.)) + testcommon("%#.*f", (110, -1.e+100/3.)) + testcommon("%#.*F", (110, -1.e+100/3.)) # Formatting of integers. Overflow is not ok - testformat("%x", 10, "a") - testformat("%x", 100000000000, "174876e800") - testformat("%o", 10, "12") - testformat("%o", 100000000000, "1351035564000") - testformat("%d", 10, "10") - testformat("%d", 100000000000, "100000000000") + testcommon("%x", 10, "a") + testcommon("%x", 100000000000, "174876e800") + testcommon("%o", 10, "12") + testcommon("%o", 100000000000, "1351035564000") + testcommon("%d", 10, "10") + testcommon("%d", 100000000000, "100000000000") big = 123456789012345678901234567890 - testformat("%d", big, "123456789012345678901234567890") - testformat("%d", -big, "-123456789012345678901234567890") - testformat("%5d", -big, "-123456789012345678901234567890") - testformat("%31d", -big, "-123456789012345678901234567890") - testformat("%32d", -big, " -123456789012345678901234567890") - testformat("%-32d", -big, "-123456789012345678901234567890 ") - testformat("%032d", -big, "-0123456789012345678901234567890") - testformat("%-032d", -big, "-123456789012345678901234567890 ") - testformat("%034d", -big, "-000123456789012345678901234567890") - testformat("%034d", big, "0000123456789012345678901234567890") - testformat("%0+34d", big, "+000123456789012345678901234567890") - testformat("%+34d", big, " +123456789012345678901234567890") - testformat("%34d", big, " 123456789012345678901234567890") - testformat("%.2d", big, "123456789012345678901234567890") - testformat("%.30d", big, "123456789012345678901234567890") - testformat("%.31d", big, "0123456789012345678901234567890") - testformat("%32.31d", big, " 0123456789012345678901234567890") - testformat("%d", float(big), "123456________________________", 6) + testcommon("%d", big, "123456789012345678901234567890") + testcommon("%d", -big, "-123456789012345678901234567890") + testcommon("%5d", -big, "-123456789012345678901234567890") + testcommon("%31d", -big, "-123456789012345678901234567890") + testcommon("%32d", -big, " -123456789012345678901234567890") + testcommon("%-32d", -big, "-123456789012345678901234567890 ") + testcommon("%032d", -big, "-0123456789012345678901234567890") + testcommon("%-032d", -big, "-123456789012345678901234567890 ") + testcommon("%034d", -big, "-000123456789012345678901234567890") + testcommon("%034d", big, "0000123456789012345678901234567890") + testcommon("%0+34d", big, "+000123456789012345678901234567890") + testcommon("%+34d", big, " +123456789012345678901234567890") + testcommon("%34d", big, " 123456789012345678901234567890") + testcommon("%.2d", big, "123456789012345678901234567890") + testcommon("%.30d", big, "123456789012345678901234567890") + testcommon("%.31d", big, "0123456789012345678901234567890") + testcommon("%32.31d", big, " 0123456789012345678901234567890") + testcommon("%d", float(big), "123456________________________", 6) big = 0x1234567890abcdef12345 # 21 hex digits - testformat("%x", big, "1234567890abcdef12345") - testformat("%x", -big, "-1234567890abcdef12345") - testformat("%5x", -big, "-1234567890abcdef12345") - testformat("%22x", -big, "-1234567890abcdef12345") - testformat("%23x", -big, " -1234567890abcdef12345") - testformat("%-23x", -big, "-1234567890abcdef12345 ") - testformat("%023x", -big, "-01234567890abcdef12345") - testformat("%-023x", -big, "-1234567890abcdef12345 ") - testformat("%025x", -big, "-0001234567890abcdef12345") - testformat("%025x", big, "00001234567890abcdef12345") - testformat("%0+25x", big, "+0001234567890abcdef12345") - testformat("%+25x", big, " +1234567890abcdef12345") - testformat("%25x", big, " 1234567890abcdef12345") - testformat("%.2x", big, "1234567890abcdef12345") - testformat("%.21x", big, "1234567890abcdef12345") - testformat("%.22x", big, "01234567890abcdef12345") - testformat("%23.22x", big, " 01234567890abcdef12345") - testformat("%-23.22x", big, "01234567890abcdef12345 ") - testformat("%X", big, "1234567890ABCDEF12345") - testformat("%#X", big, "0X1234567890ABCDEF12345") - testformat("%#x", big, "0x1234567890abcdef12345") - testformat("%#x", -big, "-0x1234567890abcdef12345") - testformat("%#.23x", -big, "-0x001234567890abcdef12345") - testformat("%#+.23x", big, "+0x001234567890abcdef12345") - testformat("%# .23x", big, " 0x001234567890abcdef12345") - testformat("%#+.23X", big, "+0X001234567890ABCDEF12345") - testformat("%#-+.23X", big, "+0X001234567890ABCDEF12345") - testformat("%#-+26.23X", big, "+0X001234567890ABCDEF12345") - testformat("%#-+27.23X", big, "+0X001234567890ABCDEF12345 ") - testformat("%#+27.23X", big, " +0X001234567890ABCDEF12345") + testcommon("%x", big, "1234567890abcdef12345") + testcommon("%x", -big, "-1234567890abcdef12345") + testcommon("%5x", -big, "-1234567890abcdef12345") + testcommon("%22x", -big, "-1234567890abcdef12345") + testcommon("%23x", -big, " -1234567890abcdef12345") + testcommon("%-23x", -big, "-1234567890abcdef12345 ") + testcommon("%023x", -big, "-01234567890abcdef12345") + testcommon("%-023x", -big, "-1234567890abcdef12345 ") + testcommon("%025x", -big, "-0001234567890abcdef12345") + testcommon("%025x", big, "00001234567890abcdef12345") + testcommon("%0+25x", big, "+0001234567890abcdef12345") + testcommon("%+25x", big, " +1234567890abcdef12345") + testcommon("%25x", big, " 1234567890abcdef12345") + testcommon("%.2x", big, "1234567890abcdef12345") + testcommon("%.21x", big, "1234567890abcdef12345") + testcommon("%.22x", big, "01234567890abcdef12345") + testcommon("%23.22x", big, " 01234567890abcdef12345") + testcommon("%-23.22x", big, "01234567890abcdef12345 ") + testcommon("%X", big, "1234567890ABCDEF12345") + testcommon("%#X", big, "0X1234567890ABCDEF12345") + testcommon("%#x", big, "0x1234567890abcdef12345") + testcommon("%#x", -big, "-0x1234567890abcdef12345") + testcommon("%#.23x", -big, "-0x001234567890abcdef12345") + testcommon("%#+.23x", big, "+0x001234567890abcdef12345") + testcommon("%# .23x", big, " 0x001234567890abcdef12345") + testcommon("%#+.23X", big, "+0X001234567890ABCDEF12345") + testcommon("%#-+.23X", big, "+0X001234567890ABCDEF12345") + testcommon("%#-+26.23X", big, "+0X001234567890ABCDEF12345") + testcommon("%#-+27.23X", big, "+0X001234567890ABCDEF12345 ") + testcommon("%#+27.23X", big, " +0X001234567890ABCDEF12345") # next one gets two leading zeroes from precision, and another from the # 0 flag and the width - testformat("%#+027.23X", big, "+0X0001234567890ABCDEF12345") + testcommon("%#+027.23X", big, "+0X0001234567890ABCDEF12345") # same, except no 0 flag - testformat("%#+27.23X", big, " +0X001234567890ABCDEF12345") + testcommon("%#+27.23X", big, " +0X001234567890ABCDEF12345") big = 0o12345670123456701234567012345670 # 32 octal digits - testformat("%o", big, "12345670123456701234567012345670") - testformat("%o", -big, "-12345670123456701234567012345670") - testformat("%5o", -big, "-12345670123456701234567012345670") - testformat("%33o", -big, "-12345670123456701234567012345670") - testformat("%34o", -big, " -12345670123456701234567012345670") - testformat("%-34o", -big, "-12345670123456701234567012345670 ") - testformat("%034o", -big, "-012345670123456701234567012345670") - testformat("%-034o", -big, "-12345670123456701234567012345670 ") - testformat("%036o", -big, "-00012345670123456701234567012345670") - testformat("%036o", big, "000012345670123456701234567012345670") - testformat("%0+36o", big, "+00012345670123456701234567012345670") - testformat("%+36o", big, " +12345670123456701234567012345670") - testformat("%36o", big, " 12345670123456701234567012345670") - testformat("%.2o", big, "12345670123456701234567012345670") - testformat("%.32o", big, "12345670123456701234567012345670") - testformat("%.33o", big, "012345670123456701234567012345670") - testformat("%34.33o", big, " 012345670123456701234567012345670") - testformat("%-34.33o", big, "012345670123456701234567012345670 ") - testformat("%o", big, "12345670123456701234567012345670") - testformat("%#o", big, "0o12345670123456701234567012345670") - testformat("%#o", -big, "-0o12345670123456701234567012345670") - testformat("%#.34o", -big, "-0o0012345670123456701234567012345670") - testformat("%#+.34o", big, "+0o0012345670123456701234567012345670") - testformat("%# .34o", big, " 0o0012345670123456701234567012345670") - testformat("%#+.34o", big, "+0o0012345670123456701234567012345670") - testformat("%#-+.34o", big, "+0o0012345670123456701234567012345670") - testformat("%#-+37.34o", big, "+0o0012345670123456701234567012345670") - testformat("%#+37.34o", big, "+0o0012345670123456701234567012345670") + testcommon("%o", big, "12345670123456701234567012345670") + testcommon("%o", -big, "-12345670123456701234567012345670") + testcommon("%5o", -big, "-12345670123456701234567012345670") + testcommon("%33o", -big, "-12345670123456701234567012345670") + testcommon("%34o", -big, " -12345670123456701234567012345670") + testcommon("%-34o", -big, "-12345670123456701234567012345670 ") + testcommon("%034o", -big, "-012345670123456701234567012345670") + testcommon("%-034o", -big, "-12345670123456701234567012345670 ") + testcommon("%036o", -big, "-00012345670123456701234567012345670") + testcommon("%036o", big, "000012345670123456701234567012345670") + testcommon("%0+36o", big, "+00012345670123456701234567012345670") + testcommon("%+36o", big, " +12345670123456701234567012345670") + testcommon("%36o", big, " 12345670123456701234567012345670") + testcommon("%.2o", big, "12345670123456701234567012345670") + testcommon("%.32o", big, "12345670123456701234567012345670") + testcommon("%.33o", big, "012345670123456701234567012345670") + testcommon("%34.33o", big, " 012345670123456701234567012345670") + testcommon("%-34.33o", big, "012345670123456701234567012345670 ") + testcommon("%o", big, "12345670123456701234567012345670") + testcommon("%#o", big, "0o12345670123456701234567012345670") + testcommon("%#o", -big, "-0o12345670123456701234567012345670") + testcommon("%#.34o", -big, "-0o0012345670123456701234567012345670") + testcommon("%#+.34o", big, "+0o0012345670123456701234567012345670") + testcommon("%# .34o", big, " 0o0012345670123456701234567012345670") + testcommon("%#+.34o", big, "+0o0012345670123456701234567012345670") + testcommon("%#-+.34o", big, "+0o0012345670123456701234567012345670") + testcommon("%#-+37.34o", big, "+0o0012345670123456701234567012345670") + testcommon("%#+37.34o", big, "+0o0012345670123456701234567012345670") # next one gets one leading zero from precision - testformat("%.33o", big, "012345670123456701234567012345670") + testcommon("%.33o", big, "012345670123456701234567012345670") # base marker shouldn't change that, since "0" is redundant - testformat("%#.33o", big, "0o012345670123456701234567012345670") + testcommon("%#.33o", big, "0o012345670123456701234567012345670") # but reduce precision, and base marker should add a zero - testformat("%#.32o", big, "0o12345670123456701234567012345670") + testcommon("%#.32o", big, "0o12345670123456701234567012345670") # one leading zero from precision, and another from "0" flag & width - testformat("%034.33o", big, "0012345670123456701234567012345670") + testcommon("%034.33o", big, "0012345670123456701234567012345670") # base marker shouldn't change that - testformat("%0#34.33o", big, "0o012345670123456701234567012345670") + testcommon("%0#34.33o", big, "0o012345670123456701234567012345670") # Some small ints, in both Python int and flavors). - testformat("%d", 42, "42") - testformat("%d", -42, "-42") - testformat("%d", 42, "42") - testformat("%d", -42, "-42") - testformat("%d", 42.0, "42") - testformat("%#x", 1, "0x1") - testformat("%#x", 1, "0x1") - testformat("%#X", 1, "0X1") - testformat("%#X", 1, "0X1") - testformat("%#o", 1, "0o1") - testformat("%#o", 1, "0o1") - testformat("%#o", 0, "0o0") - testformat("%#o", 0, "0o0") - testformat("%o", 0, "0") - testformat("%o", 0, "0") - testformat("%d", 0, "0") - testformat("%d", 0, "0") - testformat("%#x", 0, "0x0") - testformat("%#x", 0, "0x0") - testformat("%#X", 0, "0X0") - testformat("%#X", 0, "0X0") - testformat("%x", 0x42, "42") - testformat("%x", -0x42, "-42") - testformat("%x", 0x42, "42") - testformat("%x", -0x42, "-42") - testformat("%o", 0o42, "42") - testformat("%o", -0o42, "-42") - testformat("%o", 0o42, "42") - testformat("%o", -0o42, "-42") + testcommon("%d", 42, "42") + testcommon("%d", -42, "-42") + testcommon("%d", 42, "42") + testcommon("%d", -42, "-42") + testcommon("%d", 42.0, "42") + testcommon("%#x", 1, "0x1") + testcommon("%#x", 1, "0x1") + testcommon("%#X", 1, "0X1") + testcommon("%#X", 1, "0X1") + testcommon("%#o", 1, "0o1") + testcommon("%#o", 1, "0o1") + testcommon("%#o", 0, "0o0") + testcommon("%#o", 0, "0o0") + testcommon("%o", 0, "0") + testcommon("%o", 0, "0") + testcommon("%d", 0, "0") + testcommon("%d", 0, "0") + testcommon("%#x", 0, "0x0") + testcommon("%#x", 0, "0x0") + testcommon("%#X", 0, "0X0") + testcommon("%#X", 0, "0X0") + testcommon("%x", 0x42, "42") + testcommon("%x", -0x42, "-42") + testcommon("%x", 0x42, "42") + testcommon("%x", -0x42, "-42") + testcommon("%o", 0o42, "42") + testcommon("%o", -0o42, "-42") + testcommon("%o", 0o42, "42") + testcommon("%o", -0o42, "-42") + # alternate float formatting + testcommon('%g', 1.1, '1.1') + testcommon('%#g', 1.1, '1.10000') + + def test_str_format(self): testformat("%r", "\u0378", "'\\u0378'") # non printable testformat("%a", "\u0378", "'\\u0378'") # non printable testformat("%r", "\u0374", "'\u0374'") # printable testformat("%a", "\u0374", "'\\u0374'") # printable - # alternate float formatting - testformat('%g', 1.1, '1.1') - testformat('%#g', 1.1, '1.10000') - - # Test exception for unknown format characters + # Test exception for unknown format characters, etc. if verbose: print('Testing exceptions') def test_exc(formatstr, args, exception, excmsg): @@ -247,8 +275,83 @@ class FormatTest(unittest.TestCase): test_exc('%g', '1', TypeError, "a float is required") test_exc('no format', '1', TypeError, "not all arguments converted during string formatting") - test_exc('no format', '1', TypeError, - "not all arguments converted during string formatting") + + if maxsize == 2**31-1: + # crashes 2.2.1 and earlier: + try: + "%*d"%(maxsize, -127) + except MemoryError: + pass + else: + raise TestFailed('"%*d"%(maxsize, -127) should fail') + + def test_bytes_and_bytearray_format(self): + # %c will insert a single byte, either from an int in range(256), or + # from a bytes argument of length 1, not from a str. + testcommon(b"%c", 7, b"\x07") + testcommon(b"%c", b"Z", b"Z") + testcommon(b"%c", bytearray(b"Z"), b"Z") + # %b will insert a series of bytes, either from a type that supports + # the Py_buffer protocol, or something that has a __bytes__ method + class FakeBytes(object): + def __bytes__(self): + return b'123' + fb = FakeBytes() + testcommon(b"%b", b"abc", b"abc") + testcommon(b"%b", bytearray(b"def"), b"def") + testcommon(b"%b", fb, b"123") + # # %s is an alias for %b -- should only be used for Py2/3 code + testcommon(b"%s", b"abc", b"abc") + testcommon(b"%s", bytearray(b"def"), b"def") + testcommon(b"%s", fb, b"123") + # %a will give the equivalent of + # repr(some_obj).encode('ascii', 'backslashreplace') + testcommon(b"%a", 3.14, b"3.14") + testcommon(b"%a", b"ghi", b"b'ghi'") + testcommon(b"%a", "jkl", b"'jkl'") + testcommon(b"%a", "\u0544", b"'\\u0544'") + + # Test exception for unknown format characters, etc. + if verbose: + print('Testing exceptions') + def test_exc(formatstr, args, exception, excmsg): + try: + testformat(formatstr, args) + except exception as exc: + if str(exc) == excmsg: + if verbose: + print("yes") + else: + if verbose: print('no') + print('Unexpected ', exception, ':', repr(str(exc))) + except: + if verbose: print('no') + print('Unexpected exception') + raise + else: + raise TestFailed('did not get expected exception: %s' % excmsg) + test_exc(b'%d', '1', TypeError, + "%d format: a number is required, not str") + test_exc(b'%d', b'1', TypeError, + "%d format: a number is required, not bytes") + test_exc(b'%g', '1', TypeError, "float argument required, not str") + test_exc(b'%g', b'1', TypeError, "float argument required, not bytes") + test_exc(b'no format', 7, TypeError, + "not all arguments converted during bytes formatting") + test_exc(b'no format', b'1', TypeError, + "not all arguments converted during bytes formatting") + test_exc(b'no format', bytearray(b'1'), TypeError, + "not all arguments converted during bytes formatting") + test_exc(b"%c", 256, TypeError, + "%c requires an integer in range(256) or a single byte") + test_exc(b"%c", b"Za", TypeError, + "%c requires an integer in range(256) or a single byte") + test_exc(b"%c", "Yb", TypeError, + "%c requires an integer in range(256) or a single byte") + test_exc(b"%b", "Xc", TypeError, + "%b requires bytes, or an object that implements __bytes__, not 'str'") + test_exc(b"%s", "Wd", TypeError, + "%b requires bytes, or an object that implements __bytes__, not 'str'") if maxsize == 2**31-1: # crashes 2.2.1 and earlier: diff --git a/Misc/NEWS b/Misc/NEWS index 91643f390e4..127a7478ff1 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -14,6 +14,9 @@ Core and Builtins atomic memory access if available. Patch written by Vitor de Lima and Gustavo Temple. +- Issue #20284: %-interpolation (aka printf) formatting added for bytes and + bytearray. + - Issue #23048: Fix jumping out of an infinite while loop in the pdb. - Issue #20335: bytes constructor now raises TypeError when encoding or errors diff --git a/Objects/abstract.c b/Objects/abstract.c index 323c985b3ec..9ab60f23ca2 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -686,8 +686,9 @@ PyObject_Format(PyObject *obj, PyObject *format_spec) Py_DECREF(meth); if (result && !PyUnicode_Check(result)) { - PyErr_SetString(PyExc_TypeError, - "__format__ method did not return string"); + PyErr_Format(PyExc_TypeError, + "__format__ must return a str, not %.200s", + Py_TYPE(result)->tp_name); Py_DECREF(result); result = NULL; goto done; diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 47d480f6178..b9a87d6a315 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -4,6 +4,7 @@ #include "Python.h" #include "structmember.h" #include "bytes_methods.h" +#include "bytesobject.h" /*[clinic input] class bytearray "PyByteArrayObject *" "&PyByteArray_Type" @@ -294,6 +295,31 @@ PyByteArray_Concat(PyObject *a, PyObject *b) return (PyObject *)result; } +static PyObject * +bytearray_format(PyByteArrayObject *self, PyObject *args) +{ + PyObject *bytes_in, *bytes_out, *res; + char *bytestring; + + if (self == NULL || !PyByteArray_Check(self) || args == NULL) { + PyErr_BadInternalCall(); + return NULL; + } + bytestring = PyByteArray_AS_STRING(self); + bytes_in = PyBytes_FromString(bytestring); + if (bytes_in == NULL) + return NULL; + bytes_out = _PyBytes_Format(bytes_in, args); + Py_DECREF(bytes_in); + if (bytes_out == NULL) + return NULL; + res = PyByteArray_FromObject(bytes_out); + Py_DECREF(bytes_out); + if (res == NULL) + return NULL; + return res; +} + /* Functions stuffed into the type object */ static Py_ssize_t @@ -3723,6 +3749,21 @@ bytearray_methods[] = { {NULL} }; +static PyObject * +bytearray_mod(PyObject *v, PyObject *w) +{ + if (!PyByteArray_Check(v)) + Py_RETURN_NOTIMPLEMENTED; + return bytearray_format((PyByteArrayObject *)v, w); +} + +static PyNumberMethods bytearray_as_number = { + 0, /*nb_add*/ + 0, /*nb_subtract*/ + 0, /*nb_multiply*/ + bytearray_mod, /*nb_remainder*/ +}; + PyDoc_STRVAR(bytearray_doc, "bytearray(iterable_of_ints) -> bytearray\n\ bytearray(string, encoding[, errors]) -> bytearray\n\ @@ -3751,7 +3792,7 @@ PyTypeObject PyByteArray_Type = { 0, /* tp_setattr */ 0, /* tp_reserved */ (reprfunc)bytearray_repr, /* tp_repr */ - 0, /* tp_as_number */ + &bytearray_as_number, /* tp_as_number */ &bytearray_as_sequence, /* tp_as_sequence */ &bytearray_as_mapping, /* tp_as_mapping */ 0, /* tp_hash */ diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index a5b9feb9444..bf919b53de1 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -400,6 +400,634 @@ PyBytes_FromFormat(const char *format, ...) return ret; } +/* Helpers for formatstring */ + +Py_LOCAL_INLINE(PyObject *) +getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) +{ + Py_ssize_t argidx = *p_argidx; + if (argidx < arglen) { + (*p_argidx)++; + if (arglen < 0) + return args; + else + return PyTuple_GetItem(args, argidx); + } + PyErr_SetString(PyExc_TypeError, + "not enough arguments for format string"); + return NULL; +} + +/* Format codes + * F_LJUST '-' + * F_SIGN '+' + * F_BLANK ' ' + * F_ALT '#' + * F_ZERO '0' + */ +#define F_LJUST (1<<0) +#define F_SIGN (1<<1) +#define F_BLANK (1<<2) +#define F_ALT (1<<3) +#define F_ZERO (1<<4) + +/* Returns a new reference to a PyBytes object, or NULL on failure. */ + +static PyObject * +formatfloat(PyObject *v, int flags, int prec, int type) +{ + char *p; + PyObject *result; + double x; + + x = PyFloat_AsDouble(v); + if (x == -1.0 && PyErr_Occurred()) { + PyErr_Format(PyExc_TypeError, "float argument required, " + "not %.200s", Py_TYPE(v)->tp_name); + return NULL; + } + + if (prec < 0) + prec = 6; + + p = PyOS_double_to_string(x, type, prec, + (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL); + + if (p == NULL) + return NULL; + result = PyBytes_FromStringAndSize(p, strlen(p)); + PyMem_Free(p); + return result; +} + +/* format_long emulates the format codes d, u, o, x and X, and + * the F_ALT flag, for Python's long (unbounded) ints. It's not used for + * Python's regular ints. + * Return value: a new PyBytes*, or NULL if error. + * . *pbuf is set to point into it, + * *plen set to the # of chars following that. + * Caller must decref it when done using pbuf. + * The string starting at *pbuf is of the form + * "-"? ("0x" | "0X")? digit+ + * "0x"/"0X" are present only for x and X conversions, with F_ALT + * set in flags. The case of hex digits will be correct, + * There will be at least prec digits, zero-filled on the left if + * necessary to get that many. + * val object to be converted + * flags bitmask of format flags; only F_ALT is looked at + * prec minimum number of digits; 0-fill on left if needed + * type a character in [duoxX]; u acts the same as d + * + * CAUTION: o, x and X conversions on regular ints can never + * produce a '-' sign, but can for Python's unbounded ints. + */ + +static PyObject * +format_long(PyObject *val, int flags, int prec, int type, + char **pbuf, int *plen) +{ + PyObject *s; + PyObject *result = NULL; + + s = _PyUnicode_FormatLong(val, flags & F_ALT, prec, type); + if (!s) + return NULL; + result = _PyUnicode_AsASCIIString(s, "strict"); + Py_DECREF(s); + if (!result) + return NULL; + *pbuf = PyBytes_AS_STRING(result); + *plen = PyBytes_GET_SIZE(result); + return result; +} + +Py_LOCAL_INLINE(int) +formatchar(char *buf, size_t buflen, PyObject *v) +{ + PyObject *w = NULL; + /* convert bytearray to bytes */ + if (PyByteArray_Check(v)) { + w = PyBytes_FromObject(v); + if (w == NULL) + goto error; + v = w; + } + /* presume that the buffer is at least 2 characters long */ + if (PyBytes_Check(v)) { + if (!PyArg_Parse(v, "c;%c requires an integer in range(256) or a single byte", &buf[0])) + goto error; + } + else { + long ival = PyLong_AsLong(v); + if (ival == -1 && PyErr_Occurred()) { + PyErr_SetString(PyExc_TypeError, + "%c requires an integer in range(256) or a single byte"); + goto error; + } + if (ival < 0 || ival > 255) { + PyErr_SetString(PyExc_TypeError, + "%c requires an integer in range(256) or a single byte"); + goto error; + } + buf[0] = ival; + } + Py_XDECREF(w); + buf[1] = '\0'; + return 1; + + error: + Py_XDECREF(w); + return -1; +} + +static PyObject * +format_obj(PyObject *v) +{ + PyObject *result = NULL, *w = NULL; + PyObject *func; + _Py_IDENTIFIER(__bytes__); + /* convert bytearray to bytes */ + if (PyByteArray_Check(v)) { + w = PyBytes_FromObject(v); + if (w == NULL) + return NULL; + v = w; + } + /* is it a bytes object? */ + if (PyBytes_Check(v)) { + result = v; + Py_INCREF(v); + Py_XDECREF(w); + return result; + } + /* does it support __bytes__? */ + func = _PyObject_LookupSpecial(v, &PyId___bytes__); + if (func != NULL) { + result = PyObject_CallFunctionObjArgs(func, NULL); + Py_DECREF(func); + if (result == NULL) + return NULL; + if (!PyBytes_Check(result)) { + PyErr_Format(PyExc_TypeError, + "__bytes__ returned non-bytes (type %.200s)", + Py_TYPE(result)->tp_name); + Py_DECREF(result); + return NULL; + } + return result; + } + PyErr_Format(PyExc_TypeError, + "%%b requires bytes, or an object that implements __bytes__, not '%.100s'", + Py_TYPE(v)->tp_name); + return NULL; +} + +/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) + + FORMATBUFLEN is the length of the buffer in which the ints & + chars are formatted. XXX This is a magic number. Each formatting + routine does bounds checking to ensure no overflow, but a better + solution may be to malloc a buffer of appropriate size for each + format. For now, the current solution is sufficient. +*/ +#define FORMATBUFLEN (size_t)120 + +PyObject * +_PyBytes_Format(PyObject *format, PyObject *args) +{ + char *fmt, *res; + Py_ssize_t arglen, argidx; + Py_ssize_t reslen, rescnt, fmtcnt; + int args_owned = 0; + PyObject *result; + PyObject *repr; + PyObject *dict = NULL; + if (format == NULL || !PyBytes_Check(format) || args == NULL) { + PyErr_BadInternalCall(); + return NULL; + } + fmt = PyBytes_AS_STRING(format); + fmtcnt = PyBytes_GET_SIZE(format); + reslen = rescnt = fmtcnt + 100; + result = PyBytes_FromStringAndSize((char *)NULL, reslen); + if (result == NULL) + return NULL; + res = PyBytes_AsString(result); + if (PyTuple_Check(args)) { + arglen = PyTuple_GET_SIZE(args); + argidx = 0; + } + else { + arglen = -1; + argidx = -2; + } + if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript && + !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) && + !PyByteArray_Check(args)) { + dict = args; + } + while (--fmtcnt >= 0) { + if (*fmt != '%') { + if (--rescnt < 0) { + rescnt = fmtcnt + 100; + reslen += rescnt; + if (_PyBytes_Resize(&result, reslen)) + return NULL; + res = PyBytes_AS_STRING(result) + + reslen - rescnt; + --rescnt; + } + *res++ = *fmt++; + } + else { + /* Got a format specifier */ + int flags = 0; + Py_ssize_t width = -1; + int prec = -1; + int c = '\0'; + int fill; + int isnumok; + PyObject *v = NULL; + PyObject *temp = NULL; + Py_buffer buf; + char *pbuf; + int sign; + Py_ssize_t len; + char formatbuf[FORMATBUFLEN]; + /* For format{int,char}() */ + + buf.obj = NULL; + fmt++; + if (*fmt == '(') { + char *keystart; + Py_ssize_t keylen; + PyObject *key; + int pcount = 1; + + if (dict == NULL) { + PyErr_SetString(PyExc_TypeError, + "format requires a mapping"); + goto error; + } + ++fmt; + --fmtcnt; + keystart = fmt; + /* Skip over balanced parentheses */ + while (pcount > 0 && --fmtcnt >= 0) { + if (*fmt == ')') + --pcount; + else if (*fmt == '(') + ++pcount; + fmt++; + } + keylen = fmt - keystart - 1; + if (fmtcnt < 0 || pcount > 0) { + PyErr_SetString(PyExc_ValueError, + "incomplete format key"); + goto error; + } + key = PyBytes_FromStringAndSize(keystart, + keylen); + if (key == NULL) + goto error; + if (args_owned) { + Py_DECREF(args); + args_owned = 0; + } + args = PyObject_GetItem(dict, key); + Py_DECREF(key); + if (args == NULL) { + goto error; + } + args_owned = 1; + arglen = -1; + argidx = -2; + } + while (--fmtcnt >= 0) { + switch (c = *fmt++) { + case '-': flags |= F_LJUST; continue; + case '+': flags |= F_SIGN; continue; + case ' ': flags |= F_BLANK; continue; + case '#': flags |= F_ALT; continue; + case '0': flags |= F_ZERO; continue; + } + break; + } + if (c == '*') { + v = getnextarg(args, arglen, &argidx); + if (v == NULL) + goto error; + if (!PyLong_Check(v)) { + PyErr_SetString(PyExc_TypeError, + "* wants int"); + goto error; + } + width = PyLong_AsSsize_t(v); + if (width == -1 && PyErr_Occurred()) + goto error; + if (width < 0) { + flags |= F_LJUST; + width = -width; + } + if (--fmtcnt >= 0) + c = *fmt++; + } + else if (c >= 0 && isdigit(c)) { + width = c - '0'; + while (--fmtcnt >= 0) { + c = Py_CHARMASK(*fmt++); + if (!isdigit(c)) + break; + if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) { + PyErr_SetString( + PyExc_ValueError, + "width too big"); + goto error; + } + width = width*10 + (c - '0'); + } + } + if (c == '.') { + prec = 0; + if (--fmtcnt >= 0) + c = *fmt++; + if (c == '*') { + v = getnextarg(args, arglen, &argidx); + if (v == NULL) + goto error; + if (!PyLong_Check(v)) { + PyErr_SetString( + PyExc_TypeError, + "* wants int"); + goto error; + } + prec = PyLong_AsSsize_t(v); + if (prec == -1 && PyErr_Occurred()) + goto error; + if (prec < 0) + prec = 0; + if (--fmtcnt >= 0) + c = *fmt++; + } + else if (c >= 0 && isdigit(c)) { + prec = c - '0'; + while (--fmtcnt >= 0) { + c = Py_CHARMASK(*fmt++); + if (!isdigit(c)) + break; + if (prec > (INT_MAX - ((int)c - '0')) / 10) { + PyErr_SetString( + PyExc_ValueError, + "prec too big"); + goto error; + } + prec = prec*10 + (c - '0'); + } + } + } /* prec */ + if (fmtcnt >= 0) { + if (c == 'h' || c == 'l' || c == 'L') { + if (--fmtcnt >= 0) + c = *fmt++; + } + } + if (fmtcnt < 0) { + PyErr_SetString(PyExc_ValueError, + "incomplete format"); + goto error; + } + if (c != '%') { + v = getnextarg(args, arglen, &argidx); + if (v == NULL) + goto error; + } + sign = 0; + fill = ' '; + switch (c) { + case '%': + pbuf = "%"; + len = 1; + break; + case 'a': + temp = PyObject_Repr(v); + if (temp == NULL) + goto error; + repr = PyUnicode_AsEncodedObject(temp, "ascii", "backslashreplace"); + if (repr == NULL) { + Py_DECREF(temp); + goto error; + } + if (_getbuffer(repr, &buf) < 0) { + temp = format_obj(repr); + if (temp == NULL) { + Py_DECREF(repr); + goto error; + } + Py_DECREF(repr); + repr = temp; + } + pbuf = PyBytes_AS_STRING(repr); + len = PyBytes_GET_SIZE(repr); + Py_DECREF(repr); + if (prec >= 0 && len > prec) + len = prec; + break; + case 's': + // %s is only for 2/3 code; 3 only code should use %b + case 'b': + temp = format_obj(v); + if (temp == NULL) + goto error; + pbuf = PyBytes_AS_STRING(temp); + len = PyBytes_GET_SIZE(temp); + if (prec >= 0 && len > prec) + len = prec; + break; + case 'i': + case 'd': + case 'u': + case 'o': + case 'x': + case 'X': + if (c == 'i') + c = 'd'; + isnumok = 0; + if (PyNumber_Check(v)) { + PyObject *iobj=NULL; + + if ((PyLong_Check(v))) { + iobj = v; + Py_INCREF(iobj); + } + else { + iobj = PyNumber_Long(v); + } + if (iobj!=NULL) { + if (PyLong_Check(iobj)) { + int ilen; + + isnumok = 1; + temp = format_long(iobj, flags, prec, c, + &pbuf, &ilen); + Py_DECREF(iobj); + len = ilen; + if (!temp) + goto error; + sign = 1; + } + else { + Py_DECREF(iobj); + } + } + } + if (!isnumok) { + PyErr_Format(PyExc_TypeError, + "%%%c format: a number is required, " + "not %.200s", c, Py_TYPE(v)->tp_name); + goto error; + } + if (flags & F_ZERO) + fill = '0'; + break; + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + temp = formatfloat(v, flags, prec, c); + if (temp == NULL) + goto error; + pbuf = PyBytes_AS_STRING(temp); + len = PyBytes_GET_SIZE(temp); + sign = 1; + if (flags & F_ZERO) + fill = '0'; + break; + case 'c': + pbuf = formatbuf; + len = formatchar(pbuf, sizeof(formatbuf), v); + if (len < 0) + goto error; + break; + default: + PyErr_Format(PyExc_ValueError, + "unsupported format character '%c' (0x%x) " + "at index %zd", + c, c, + (Py_ssize_t)(fmt - 1 - + PyBytes_AsString(format))); + goto error; + } + if (sign) { + if (*pbuf == '-' || *pbuf == '+') { + sign = *pbuf++; + len--; + } + else if (flags & F_SIGN) + sign = '+'; + else if (flags & F_BLANK) + sign = ' '; + else + sign = 0; + } + if (width < len) + width = len; + if (rescnt - (sign != 0) < width) { + reslen -= rescnt; + rescnt = width + fmtcnt + 100; + reslen += rescnt; + if (reslen < 0) { + Py_DECREF(result); + PyBuffer_Release(&buf); + Py_XDECREF(temp); + return PyErr_NoMemory(); + } + if (_PyBytes_Resize(&result, reslen)) { + PyBuffer_Release(&buf); + Py_XDECREF(temp); + return NULL; + } + res = PyBytes_AS_STRING(result) + + reslen - rescnt; + } + if (sign) { + if (fill != ' ') + *res++ = sign; + rescnt--; + if (width > len) + width--; + } + if ((flags & F_ALT) && (c == 'x' || c == 'X')) { + assert(pbuf[0] == '0'); + assert(pbuf[1] == c); + if (fill != ' ') { + *res++ = *pbuf++; + *res++ = *pbuf++; + } + rescnt -= 2; + width -= 2; + if (width < 0) + width = 0; + len -= 2; + } + if (width > len && !(flags & F_LJUST)) { + do { + --rescnt; + *res++ = fill; + } while (--width > len); + } + if (fill == ' ') { + if (sign) + *res++ = sign; + if ((flags & F_ALT) && + (c == 'x' || c == 'X')) { + assert(pbuf[0] == '0'); + assert(pbuf[1] == c); + *res++ = *pbuf++; + *res++ = *pbuf++; + } + } + Py_MEMCPY(res, pbuf, len); + res += len; + rescnt -= len; + while (--width >= len) { + --rescnt; + *res++ = ' '; + } + if (dict && (argidx < arglen) && c != '%') { + PyErr_SetString(PyExc_TypeError, + "not all arguments converted during bytes formatting"); + PyBuffer_Release(&buf); + Py_XDECREF(temp); + goto error; + } + PyBuffer_Release(&buf); + Py_XDECREF(temp); + } /* '%' */ + } /* until end */ + if (argidx < arglen && !dict) { + PyErr_SetString(PyExc_TypeError, + "not all arguments converted during bytes formatting"); + goto error; + } + if (args_owned) { + Py_DECREF(args); + } + if (_PyBytes_Resize(&result, reslen - rescnt)) + return NULL; + return result; + + error: + Py_DECREF(result); + if (args_owned) { + Py_DECREF(args); + } + return NULL; +} + +/* =-= */ + static void bytes_dealloc(PyObject *op) { @@ -2995,6 +3623,21 @@ bytes_methods[] = { {NULL, NULL} /* sentinel */ }; +static PyObject * +bytes_mod(PyObject *v, PyObject *w) +{ + if (!PyBytes_Check(v)) + Py_RETURN_NOTIMPLEMENTED; + return _PyBytes_Format(v, w); +} + +static PyNumberMethods bytes_as_number = { + 0, /*nb_add*/ + 0, /*nb_subtract*/ + 0, /*nb_multiply*/ + bytes_mod, /*nb_remainder*/ +}; + static PyObject * str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds); @@ -3286,7 +3929,7 @@ PyTypeObject PyBytes_Type = { 0, /* tp_setattr */ 0, /* tp_reserved */ (reprfunc)bytes_repr, /* tp_repr */ - 0, /* tp_as_number */ + &bytes_as_number, /* tp_as_number */ &bytes_as_sequence, /* tp_as_sequence */ &bytes_as_mapping, /* tp_as_mapping */ (hashfunc)bytes_hash, /* tp_hash */ @@ -3377,14 +4020,14 @@ PyBytes_ConcatAndDel(PyObject **pv, PyObject *w) } -/* The following function breaks the notion that strings are immutable: - it changes the size of a string. We get away with this only if there +/* The following function breaks the notion that bytes are immutable: + it changes the size of a bytes object. We get away with this only if there is only one module referencing the object. You can also think of it - as creating a new string object and destroying the old one, only - more efficiently. In any case, don't use this if the string may + as creating a new bytes object and destroying the old one, only + more efficiently. In any case, don't use this if the bytes object may already be known to some other part of the code... - Note that if there's not enough memory to resize the string, the original - string object at *pv is deallocated, *pv is set to NULL, an "out of + Note that if there's not enough memory to resize the bytes object, the + original bytes object at *pv is deallocated, *pv is set to NULL, an "out of memory" exception is set, and -1 is returned. Else (on success) 0 is returned, and the value in *pv may or may not be the same as on input. As always, an extra byte is allocated for a trailing \0 byte (newsize diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3d51f340c5d..ee0c044edb7 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13893,8 +13893,8 @@ formatfloat(PyObject *v, struct unicode_format_arg_t *arg, * CAUTION: o, x and X conversions on regular ints can never * produce a '-' sign, but can for Python's unbounded ints. */ -static PyObject* -formatlong(PyObject *val, struct unicode_format_arg_t *arg) +PyObject * +_PyUnicode_FormatLong(PyObject *val, int alt, int prec, int type) { PyObject *result = NULL; char *buf; @@ -13904,8 +13904,6 @@ formatlong(PyObject *val, struct unicode_format_arg_t *arg) Py_ssize_t llen; int numdigits; /* len == numnondigits + numdigits */ int numnondigits = 0; - int prec = arg->prec; - int type = arg->ch; /* Avoid exceeding SSIZE_T_MAX */ if (prec > INT_MAX-3) { @@ -13954,7 +13952,7 @@ formatlong(PyObject *val, struct unicode_format_arg_t *arg) if (llen > INT_MAX) { Py_DECREF(result); PyErr_SetString(PyExc_ValueError, - "string too large in _PyBytes_FormatLong"); + "string too large in _PyUnicode_FormatLong"); return NULL; } len = (int)llen; @@ -13964,7 +13962,7 @@ formatlong(PyObject *val, struct unicode_format_arg_t *arg) assert(numdigits > 0); /* Get rid of base marker unless F_ALT */ - if (((arg->flags & F_ALT) == 0 && + if (((alt) == 0 && (type == 'o' || type == 'x' || type == 'X'))) { assert(buf[sign] == '0'); assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' || @@ -14099,7 +14097,7 @@ mainformatlong(PyObject *v, return 1; } - res = formatlong(iobj, arg); + res = _PyUnicode_FormatLong(iobj, arg->flags & F_ALT, arg->prec, type); Py_DECREF(iobj); if (res == NULL) return -1;