From 0c2f9305640f7655ba0cd5f478948b2763b376b3 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Wed, 29 May 2019 11:46:58 -0700 Subject: [PATCH] bpo-22385: Support output separators in hex methods. (#13578) * bpo-22385: Support output separators in hex methods. Also in binascii.hexlify aka b2a_hex. The underlying implementation behind all hex generation in CPython uses the same pystrhex.c implementation. This adds support to bytes, bytearray, and memoryview objects. The binascii module functions exist rather than being slated for deprecation because they return bytes rather than requiring an intermediate step through a str object. This change was inspired by MicroPython which supports sep in its binascii implementation (and does not yet support the .hex methods). https://bugs.python.org/issue22385 --- Doc/library/binascii.rst | 22 +++- Doc/library/stdtypes.rst | 18 +++ Include/pystrhex.h | 3 + Lib/test/test_binascii.py | 12 ++ Lib/test/test_bytes.py | 57 +++++++++ Lib/test/test_doctest.py | 5 +- .../2019-05-25-17-18-26.bpo-22385.VeVvhJ.rst | 4 + Modules/binascii.c | 33 +++-- Modules/clinic/binascii.c.h | 115 +++++++++++++++--- Objects/bytearrayobject.c | 34 ++++-- Objects/bytesobject.c | 34 ++++-- Objects/clinic/bytearrayobject.c.h | 71 ++++++++++- Objects/clinic/bytesobject.c.h | 71 ++++++++++- Objects/clinic/memoryobject.c.h | 74 +++++++++++ Objects/memoryobject.c | 45 +++++-- Python/pystrhex.c | 90 ++++++++++++-- 16 files changed, 624 insertions(+), 64 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2019-05-25-17-18-26.bpo-22385.VeVvhJ.rst create mode 100644 Objects/clinic/memoryobject.c.h diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst index 89ecddc7780..98d8679fa3d 100644 --- a/Doc/library/binascii.rst +++ b/Doc/library/binascii.rst @@ -145,8 +145,8 @@ The :mod:`binascii` module defines the following functions: platforms, use ``crc32(data) & 0xffffffff``. -.. function:: b2a_hex(data) - hexlify(data) +.. function:: b2a_hex(data[, sep[, bytes_per_sep=1]]) + hexlify(data[, sep[, bytes_per_sep=1]]) Return the hexadecimal representation of the binary *data*. Every byte of *data* is converted into the corresponding 2-digit hex representation. The @@ -155,6 +155,24 @@ The :mod:`binascii` module defines the following functions: Similar functionality (but returning a text string) is also conveniently accessible using the :meth:`bytes.hex` method. + If *sep* is specified, it must be a single character str or bytes object. + It will be inserted in the output after every *bytes_per_sep* input bytes. + Separator placement is counted from the right end of the output by default, + if you wish to count from the left, supply a negative *bytes_per_sep* value. + + >>> import binascii + >>> binascii.b2a_hex(b'\xb9\x01\xef') + b'b901ef' + >>> binascii.hexlify(b'\xb9\x01\xef', '-') + b'b9-01-ef' + >>> binascii.b2a_hex(b'\xb9\x01\xef', b'_', 2) + b'b9_01ef' + >>> binascii.b2a_hex(b'\xb9\x01\xef', b' ', -2) + b'b901 ef' + + .. versionchanged:: 3.8 + The *sep* and *bytes_per_sep* parameters were added. + .. function:: a2b_hex(hexstr) unhexlify(hexstr) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 293a1ab6a0d..fcb0da74e15 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -2404,8 +2404,26 @@ data and are closely related to string objects in a variety of other ways. >>> b'\xf0\xf1\xf2'.hex() 'f0f1f2' + If you want to make the hex string easier to read, you can specify a + single character separator *sep* parameter to include in the output. + By default between each byte. A second optional *bytes_per_sep* + parameter controls the spacing. Positive values calculate the + separator position from the right, negative values from the left. + + >>> value = b'\xf0\xf1\xf2' + >>> value.hex('-') + 'f0-f1-f2' + >>> value.hex('_', 2) + 'f0_f1f2' + >>> b'UUDDLRLRAB'.hex(' ', -4) + '55554444 4c524c52 4142' + .. versionadded:: 3.5 + .. versionchanged:: 3.8 + :meth:`bytes.hex` now supports optional *sep* and *bytes_per_sep* + parameters to insert separators between bytes in the hex output. + Since bytes objects are sequences of integers (akin to a tuple), for a bytes object *b*, ``b[0]`` will be an integer, while ``b[0:1]`` will be a bytes object of length 1. (This contrasts with text strings, where both indexing diff --git a/Include/pystrhex.h b/Include/pystrhex.h index 66a30e2233c..a4f36305bac 100644 --- a/Include/pystrhex.h +++ b/Include/pystrhex.h @@ -10,6 +10,9 @@ extern "C" { PyAPI_FUNC(PyObject*) _Py_strhex(const char* argbuf, const Py_ssize_t arglen); /* Returns a bytes() containing the ASCII hex representation of argbuf. */ PyAPI_FUNC(PyObject*) _Py_strhex_bytes(const char* argbuf, const Py_ssize_t arglen); +/* These variants include support for a separator between every N bytes: */ +PyAPI_FUNC(PyObject*) _Py_strhex_with_sep(const char* argbuf, const Py_ssize_t arglen, const PyObject* sep, const int bytes_per_group); +PyAPI_FUNC(PyObject*) _Py_strhex_bytes_with_sep(const char* argbuf, const Py_ssize_t arglen, const PyObject* sep, const int bytes_per_group); #endif /* !Py_LIMITED_API */ #ifdef __cplusplus diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 572e50c3e25..08de5c9fc7c 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -240,6 +240,18 @@ class BinASCIITest(unittest.TestCase): self.assertEqual(binascii.hexlify(self.type2test(s)), t) self.assertEqual(binascii.unhexlify(self.type2test(t)), u) + def test_hex_separator(self): + """Test that hexlify and b2a_hex are binary versions of bytes.hex.""" + # Logic of separators is tested in test_bytes.py. This checks that + # arg parsing works and exercises the direct to bytes object code + # path within pystrhex.c. + s = b'{s\005\000\000\000worldi\002\000\000\000s\005\000\000\000helloi\001\000\000\0000' + self.assertEqual(binascii.hexlify(self.type2test(s)), s.hex().encode('ascii')) + expected8 = s.hex('.', 8).encode('ascii') + self.assertEqual(binascii.hexlify(self.type2test(s), '.', 8), expected8) + expected1 = s.hex(':').encode('ascii') + self.assertEqual(binascii.b2a_hex(self.type2test(s), ':'), expected1) + def test_qp(self): type2test = self.type2test a2b_qp = binascii.a2b_qp diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index 9502a8f974b..bbd45c75298 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -417,6 +417,63 @@ class BaseBytesTest: self.assertEqual(self.type2test(b"\x1a\x2b\x30").hex(), '1a2b30') self.assertEqual(memoryview(b"\x1a\x2b\x30").hex(), '1a2b30') + def test_hex_separator_basics(self): + three_bytes = self.type2test(b'\xb9\x01\xef') + self.assertEqual(three_bytes.hex(), 'b901ef') + with self.assertRaises(ValueError): + three_bytes.hex('') + with self.assertRaises(ValueError): + three_bytes.hex('xx') + self.assertEqual(three_bytes.hex(':', 0), 'b901ef') + with self.assertRaises(TypeError): + three_bytes.hex(None, 0) + with self.assertRaises(ValueError): + three_bytes.hex('\xff') + with self.assertRaises(ValueError): + three_bytes.hex(b'\xff') + with self.assertRaises(ValueError): + three_bytes.hex(b'\x80') + with self.assertRaises(ValueError): + three_bytes.hex(chr(0x100)) + self.assertEqual(three_bytes.hex(':', 0), 'b901ef') + self.assertEqual(three_bytes.hex(b'\x00'), 'b9\x0001\x00ef') + self.assertEqual(three_bytes.hex('\x00'), 'b9\x0001\x00ef') + self.assertEqual(three_bytes.hex(b'\x7f'), 'b9\x7f01\x7fef') + self.assertEqual(three_bytes.hex('\x7f'), 'b9\x7f01\x7fef') + self.assertEqual(three_bytes.hex(':', 3), 'b901ef') + self.assertEqual(three_bytes.hex(':', 4), 'b901ef') + self.assertEqual(three_bytes.hex(':', -4), 'b901ef') + self.assertEqual(three_bytes.hex(':'), 'b9:01:ef') + self.assertEqual(three_bytes.hex(b'$'), 'b9$01$ef') + self.assertEqual(three_bytes.hex(':', 1), 'b9:01:ef') + self.assertEqual(three_bytes.hex(':', -1), 'b9:01:ef') + self.assertEqual(three_bytes.hex(':', 2), 'b9:01ef') + self.assertEqual(three_bytes.hex(':', 1), 'b9:01:ef') + self.assertEqual(three_bytes.hex('*', -2), 'b901*ef') + + value = b'{s\005\000\000\000worldi\002\000\000\000s\005\000\000\000helloi\001\000\000\0000' + self.assertEqual(value.hex('.', 8), '7b7305000000776f.726c646902000000.730500000068656c.6c6f690100000030') + + def test_hex_separator_five_bytes(self): + five_bytes = self.type2test(range(90,95)) + self.assertEqual(five_bytes.hex(), '5a5b5c5d5e') + + def test_hex_separator_six_bytes(self): + six_bytes = self.type2test(x*3 for x in range(1, 7)) + self.assertEqual(six_bytes.hex(), '0306090c0f12') + self.assertEqual(six_bytes.hex('.', 1), '03.06.09.0c.0f.12') + self.assertEqual(six_bytes.hex(' ', 2), '0306 090c 0f12') + self.assertEqual(six_bytes.hex('-', 3), '030609-0c0f12') + self.assertEqual(six_bytes.hex(':', 4), '0306:090c0f12') + self.assertEqual(six_bytes.hex(':', 5), '03:06090c0f12') + self.assertEqual(six_bytes.hex(':', 6), '0306090c0f12') + self.assertEqual(six_bytes.hex(':', 95), '0306090c0f12') + self.assertEqual(six_bytes.hex('_', -3), '030609_0c0f12') + self.assertEqual(six_bytes.hex(':', -4), '0306090c:0f12') + self.assertEqual(six_bytes.hex(b'@', -5), '0306090c0f@12') + self.assertEqual(six_bytes.hex(':', -6), '0306090c0f12') + self.assertEqual(six_bytes.hex(' ', -95), '0306090c0f12') + def test_join(self): self.assertEqual(self.type2test(b"").join([]), b"") self.assertEqual(self.type2test(b"").join([b""]), b"") diff --git a/Lib/test/test_doctest.py b/Lib/test/test_doctest.py index f1013f25725..5ea18f52c4f 100644 --- a/Lib/test/test_doctest.py +++ b/Lib/test/test_doctest.py @@ -665,11 +665,13 @@ plain ol' Python and is guaranteed to be available. True >>> real_tests = [t for t in tests if len(t.examples) > 0] >>> len(real_tests) # objects that actually have doctests - 9 + 12 >>> for t in real_tests: ... print('{} {}'.format(len(t.examples), t.name)) ... 1 builtins.bin + 5 builtins.bytearray.hex + 5 builtins.bytes.hex 3 builtins.float.as_integer_ratio 2 builtins.float.fromhex 2 builtins.float.hex @@ -677,6 +679,7 @@ plain ol' Python and is guaranteed to be available. 1 builtins.int 3 builtins.int.as_integer_ratio 2 builtins.int.bit_length + 5 builtins.memoryview.hex 1 builtins.oct Note here that 'bin', 'oct', and 'hex' are functions; 'float.as_integer_ratio', diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-05-25-17-18-26.bpo-22385.VeVvhJ.rst b/Misc/NEWS.d/next/Core and Builtins/2019-05-25-17-18-26.bpo-22385.VeVvhJ.rst new file mode 100644 index 00000000000..e10690b3a56 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2019-05-25-17-18-26.bpo-22385.VeVvhJ.rst @@ -0,0 +1,4 @@ +The `bytes.hex`, `bytearray.hex`, and `memoryview.hex` methods as well as +the `binascii.hexlify` and `b2a_hex` functions now have the ability to +include an optional separator between hex bytes. This functionality was +inspired by MicroPython's hexlify implementation. diff --git a/Modules/binascii.c b/Modules/binascii.c index d22ab7b4683..1c7dc35882d 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -1159,19 +1159,33 @@ binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc) binascii.b2a_hex data: Py_buffer - / + sep: object = NULL + An optional single character or byte to separate hex bytes. + bytes_per_sep: int = 1 + How many bytes between separators. Positive values count from the + right, negative values count from the left. Hexadecimal representation of binary data. The return value is a bytes object. This function is also available as "hexlify()". + +Example: +>>> binascii.b2a_hex(b'\xb9\x01\xef') +b'b901ef' +>>> binascii.hexlify(b'\xb9\x01\xef', ':') +b'b9:01:ef' +>>> binascii.b2a_hex(b'\xb9\x01\xef', b'_', 2) +b'b9_01ef' [clinic start generated code]*/ static PyObject * -binascii_b2a_hex_impl(PyObject *module, Py_buffer *data) -/*[clinic end generated code: output=92fec1a95c9897a0 input=96423cfa299ff3b1]*/ +binascii_b2a_hex_impl(PyObject *module, Py_buffer *data, PyObject *sep, + int bytes_per_sep) +/*[clinic end generated code: output=a26937946a81d2c7 input=ec0ade6ba2e43543]*/ { - return _Py_strhex_bytes((const char *)data->buf, data->len); + return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len, + sep, bytes_per_sep); } /*[clinic input] @@ -1179,14 +1193,17 @@ binascii.hexlify = binascii.b2a_hex Hexadecimal representation of binary data. -The return value is a bytes object. +The return value is a bytes object. This function is also +available as "b2a_hex()". [clinic start generated code]*/ static PyObject * -binascii_hexlify_impl(PyObject *module, Py_buffer *data) -/*[clinic end generated code: output=749e95e53c14880c input=2e3afae7f083f061]*/ +binascii_hexlify_impl(PyObject *module, Py_buffer *data, PyObject *sep, + int bytes_per_sep) +/*[clinic end generated code: output=d12aa1b001b15199 input=bc317bd4e241f76b]*/ { - return _Py_strhex_bytes((const char *)data->buf, data->len); + return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len, + sep, bytes_per_sep); } /*[clinic input] diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h index 4043d89d97d..d4850485690 100644 --- a/Modules/clinic/binascii.c.h +++ b/Modules/clinic/binascii.c.h @@ -432,34 +432,78 @@ exit: } PyDoc_STRVAR(binascii_b2a_hex__doc__, -"b2a_hex($module, data, /)\n" +"b2a_hex($module, /, data, sep=None, bytes_per_sep=1)\n" "--\n" "\n" "Hexadecimal representation of binary data.\n" "\n" +" sep\n" +" An optional single character or byte to separate hex bytes.\n" +" bytes_per_sep\n" +" How many bytes between separators. Positive values count from the\n" +" right, negative values count from the left.\n" +"\n" "The return value is a bytes object. This function is also\n" -"available as \"hexlify()\"."); +"available as \"hexlify()\".\n" +"\n" +"Example:\n" +">>> binascii.b2a_hex(b\'\\xb9\\x01\\xef\')\n" +"b\'b901ef\'\n" +">>> binascii.hexlify(b\'\\xb9\\x01\\xef\', \':\')\n" +"b\'b9:01:ef\'\n" +">>> binascii.b2a_hex(b\'\\xb9\\x01\\xef\', b\'_\', 2)\n" +"b\'b9_01ef\'"); #define BINASCII_B2A_HEX_METHODDEF \ - {"b2a_hex", (PyCFunction)binascii_b2a_hex, METH_O, binascii_b2a_hex__doc__}, + {"b2a_hex", (PyCFunction)(void(*)(void))binascii_b2a_hex, METH_FASTCALL|METH_KEYWORDS, binascii_b2a_hex__doc__}, static PyObject * -binascii_b2a_hex_impl(PyObject *module, Py_buffer *data); +binascii_b2a_hex_impl(PyObject *module, Py_buffer *data, PyObject *sep, + int bytes_per_sep); static PyObject * -binascii_b2a_hex(PyObject *module, PyObject *arg) +binascii_b2a_hex(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + static const char * const _keywords[] = {"data", "sep", "bytes_per_sep", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "b2a_hex", 0}; + PyObject *argsbuf[3]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; + PyObject *sep = NULL; + int bytes_per_sep = 1; - if (PyObject_GetBuffer(arg, &data, PyBUF_SIMPLE) != 0) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 3, 0, argsbuf); + if (!args) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) { goto exit; } if (!PyBuffer_IsContiguous(&data, 'C')) { - _PyArg_BadArgument("b2a_hex", 0, "contiguous buffer", arg); + _PyArg_BadArgument("b2a_hex", 1, "contiguous buffer", args[0]); goto exit; } - return_value = binascii_b2a_hex_impl(module, &data); + if (!noptargs) { + goto skip_optional_pos; + } + if (args[1]) { + sep = args[1]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (PyFloat_Check(args[2])) { + PyErr_SetString(PyExc_TypeError, + "integer argument expected, got float" ); + goto exit; + } + bytes_per_sep = _PyLong_AsInt(args[2]); + if (bytes_per_sep == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional_pos: + return_value = binascii_b2a_hex_impl(module, &data, sep, bytes_per_sep); exit: /* Cleanup for data */ @@ -471,33 +515,70 @@ exit: } PyDoc_STRVAR(binascii_hexlify__doc__, -"hexlify($module, data, /)\n" +"hexlify($module, /, data, sep=None, bytes_per_sep=1)\n" "--\n" "\n" "Hexadecimal representation of binary data.\n" "\n" -"The return value is a bytes object."); +" sep\n" +" An optional single character or byte to separate hex bytes.\n" +" bytes_per_sep\n" +" How many bytes between separators. Positive values count from the\n" +" right, negative values count from the left.\n" +"\n" +"The return value is a bytes object. This function is also\n" +"available as \"b2a_hex()\"."); #define BINASCII_HEXLIFY_METHODDEF \ - {"hexlify", (PyCFunction)binascii_hexlify, METH_O, binascii_hexlify__doc__}, + {"hexlify", (PyCFunction)(void(*)(void))binascii_hexlify, METH_FASTCALL|METH_KEYWORDS, binascii_hexlify__doc__}, static PyObject * -binascii_hexlify_impl(PyObject *module, Py_buffer *data); +binascii_hexlify_impl(PyObject *module, Py_buffer *data, PyObject *sep, + int bytes_per_sep); static PyObject * -binascii_hexlify(PyObject *module, PyObject *arg) +binascii_hexlify(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + static const char * const _keywords[] = {"data", "sep", "bytes_per_sep", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "hexlify", 0}; + PyObject *argsbuf[3]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; + PyObject *sep = NULL; + int bytes_per_sep = 1; - if (PyObject_GetBuffer(arg, &data, PyBUF_SIMPLE) != 0) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 3, 0, argsbuf); + if (!args) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) { goto exit; } if (!PyBuffer_IsContiguous(&data, 'C')) { - _PyArg_BadArgument("hexlify", 0, "contiguous buffer", arg); + _PyArg_BadArgument("hexlify", 1, "contiguous buffer", args[0]); goto exit; } - return_value = binascii_hexlify_impl(module, &data); + if (!noptargs) { + goto skip_optional_pos; + } + if (args[1]) { + sep = args[1]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (PyFloat_Check(args[2])) { + PyErr_SetString(PyExc_TypeError, + "integer argument expected, got float" ); + goto exit; + } + bytes_per_sep = _PyLong_AsInt(args[2]); + if (bytes_per_sep == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional_pos: + return_value = binascii_hexlify_impl(module, &data, sep, bytes_per_sep); exit: /* Cleanup for data */ @@ -720,4 +801,4 @@ exit: return return_value; } -/*[clinic end generated code: output=a4a38e162605aca2 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=f7b8049edb130c63 input=a9049054013a1b77]*/ diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index c7c28312b00..b9fcc01b703 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -2020,18 +2020,36 @@ bytearray_fromhex_impl(PyTypeObject *type, PyObject *string) return result; } -PyDoc_STRVAR(hex__doc__, -"B.hex() -> string\n\ -\n\ -Create a string of hexadecimal numbers from a bytearray object.\n\ -Example: bytearray([0xb9, 0x01, 0xef]).hex() -> 'b901ef'."); +/*[clinic input] +bytearray.hex + + sep: object = NULL + An optional single character or byte to separate hex bytes. + bytes_per_sep: int = 1 + How many bytes between separators. Positive values count from the + right, negative values count from the left. + +Create a str of hexadecimal numbers from a bytearray object. + +Example: +>>> value = bytearray([0xb9, 0x01, 0xef]) +>>> value.hex() +'b901ef' +>>> value.hex(':') +'b9:01:ef' +>>> value.hex(':', 2) +'b9:01ef' +>>> value.hex(':', -2) +'b901:ef' +[clinic start generated code]*/ static PyObject * -bytearray_hex(PyBytesObject *self, PyObject *Py_UNUSED(ignored)) +bytearray_hex_impl(PyByteArrayObject *self, PyObject *sep, int bytes_per_sep) +/*[clinic end generated code: output=29c4e5ef72c565a0 input=814c15830ac8c4b5]*/ { char* argbuf = PyByteArray_AS_STRING(self); Py_ssize_t arglen = PyByteArray_GET_SIZE(self); - return _Py_strhex(argbuf, arglen); + return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep); } static PyObject * @@ -2160,7 +2178,7 @@ bytearray_methods[] = { {"find", (PyCFunction)bytearray_find, METH_VARARGS, _Py_find__doc__}, BYTEARRAY_FROMHEX_METHODDEF - {"hex", (PyCFunction)bytearray_hex, METH_NOARGS, hex__doc__}, + BYTEARRAY_HEX_METHODDEF {"index", (PyCFunction)bytearray_index, METH_VARARGS, _Py_index__doc__}, BYTEARRAY_INSERT_METHODDEF {"isalnum", stringlib_isalnum, METH_NOARGS, diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 6f34037154d..bf7c7da423b 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2416,18 +2416,36 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) return NULL; } -PyDoc_STRVAR(hex__doc__, -"B.hex() -> string\n\ -\n\ -Create a string of hexadecimal numbers from a bytes object.\n\ -Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'."); +/*[clinic input] +bytes.hex + + sep: object = NULL + An optional single character or byte to separate hex bytes. + bytes_per_sep: int = 1 + How many bytes between separators. Positive values count from the + right, negative values count from the left. + +Create a str of hexadecimal numbers from a bytes object. + +Example: +>>> value = b'\xb9\x01\xef' +>>> value.hex() +'b901ef' +>>> value.hex(':') +'b9:01:ef' +>>> value.hex(':', 2) +'b9:01ef' +>>> value.hex(':', -2) +'b901:ef' +[clinic start generated code]*/ static PyObject * -bytes_hex(PyBytesObject *self, PyObject *Py_UNUSED(ignored)) +bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep) +/*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/ { char* argbuf = PyBytes_AS_STRING(self); Py_ssize_t arglen = PyBytes_GET_SIZE(self); - return _Py_strhex(argbuf, arglen); + return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep); } static PyObject * @@ -2452,7 +2470,7 @@ bytes_methods[] = { {"find", (PyCFunction)bytes_find, METH_VARARGS, _Py_find__doc__}, BYTES_FROMHEX_METHODDEF - {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__}, + BYTES_HEX_METHODDEF {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__}, {"isalnum", stringlib_isalnum, METH_NOARGS, _Py_isalnum__doc__}, diff --git a/Objects/clinic/bytearrayobject.c.h b/Objects/clinic/bytearrayobject.c.h index da1dc6a40af..08c6eb53f58 100644 --- a/Objects/clinic/bytearrayobject.c.h +++ b/Objects/clinic/bytearrayobject.c.h @@ -867,6 +867,75 @@ exit: return return_value; } +PyDoc_STRVAR(bytearray_hex__doc__, +"hex($self, /, sep=None, bytes_per_sep=1)\n" +"--\n" +"\n" +"Create a str of hexadecimal numbers from a bytearray object.\n" +"\n" +" sep\n" +" An optional single character or byte to separate hex bytes.\n" +" bytes_per_sep\n" +" How many bytes between separators. Positive values count from the\n" +" right, negative values count from the left.\n" +"\n" +"Example:\n" +">>> value = bytearray([0xb9, 0x01, 0xef])\n" +">>> value.hex()\n" +"\'b901ef\'\n" +">>> value.hex(\':\')\n" +"\'b9:01:ef\'\n" +">>> value.hex(\':\', 2)\n" +"\'b9:01ef\'\n" +">>> value.hex(\':\', -2)\n" +"\'b901:ef\'"); + +#define BYTEARRAY_HEX_METHODDEF \ + {"hex", (PyCFunction)(void(*)(void))bytearray_hex, METH_FASTCALL|METH_KEYWORDS, bytearray_hex__doc__}, + +static PyObject * +bytearray_hex_impl(PyByteArrayObject *self, PyObject *sep, int bytes_per_sep); + +static PyObject * +bytearray_hex(PyByteArrayObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"sep", "bytes_per_sep", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "hex", 0}; + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; + PyObject *sep = NULL; + int bytes_per_sep = 1; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 2, 0, argsbuf); + if (!args) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (args[0]) { + sep = args[0]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (PyFloat_Check(args[1])) { + PyErr_SetString(PyExc_TypeError, + "integer argument expected, got float" ); + goto exit; + } + bytes_per_sep = _PyLong_AsInt(args[1]); + if (bytes_per_sep == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional_pos: + return_value = bytearray_hex_impl(self, sep, bytes_per_sep); + +exit: + return return_value; +} + PyDoc_STRVAR(bytearray_reduce__doc__, "__reduce__($self, /)\n" "--\n" @@ -942,4 +1011,4 @@ bytearray_sizeof(PyByteArrayObject *self, PyObject *Py_UNUSED(ignored)) { return bytearray_sizeof_impl(self); } -/*[clinic end generated code: output=272fcb836b92da32 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=7848247e5469ba1b input=a9049054013a1b77]*/ diff --git a/Objects/clinic/bytesobject.c.h b/Objects/clinic/bytesobject.c.h index b03078305a6..69c35063c6c 100644 --- a/Objects/clinic/bytesobject.c.h +++ b/Objects/clinic/bytesobject.c.h @@ -686,4 +686,73 @@ bytes_fromhex(PyTypeObject *type, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=af9f51b9b185567d input=a9049054013a1b77]*/ + +PyDoc_STRVAR(bytes_hex__doc__, +"hex($self, /, sep=None, bytes_per_sep=1)\n" +"--\n" +"\n" +"Create a str of hexadecimal numbers from a bytes object.\n" +"\n" +" sep\n" +" An optional single character or byte to separate hex bytes.\n" +" bytes_per_sep\n" +" How many bytes between separators. Positive values count from the\n" +" right, negative values count from the left.\n" +"\n" +"Example:\n" +">>> value = b\'\\xb9\\x01\\xef\'\n" +">>> value.hex()\n" +"\'b901ef\'\n" +">>> value.hex(\':\')\n" +"\'b9:01:ef\'\n" +">>> value.hex(\':\', 2)\n" +"\'b9:01ef\'\n" +">>> value.hex(\':\', -2)\n" +"\'b901:ef\'"); + +#define BYTES_HEX_METHODDEF \ + {"hex", (PyCFunction)(void(*)(void))bytes_hex, METH_FASTCALL|METH_KEYWORDS, bytes_hex__doc__}, + +static PyObject * +bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep); + +static PyObject * +bytes_hex(PyBytesObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"sep", "bytes_per_sep", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "hex", 0}; + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; + PyObject *sep = NULL; + int bytes_per_sep = 1; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 2, 0, argsbuf); + if (!args) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (args[0]) { + sep = args[0]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (PyFloat_Check(args[1])) { + PyErr_SetString(PyExc_TypeError, + "integer argument expected, got float" ); + goto exit; + } + bytes_per_sep = _PyLong_AsInt(args[1]); + if (bytes_per_sep == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional_pos: + return_value = bytes_hex_impl(self, sep, bytes_per_sep); + +exit: + return return_value; +} +/*[clinic end generated code: output=2d0a3733e13e753a input=a9049054013a1b77]*/ diff --git a/Objects/clinic/memoryobject.c.h b/Objects/clinic/memoryobject.c.h new file mode 100644 index 00000000000..64fce10acb5 --- /dev/null +++ b/Objects/clinic/memoryobject.c.h @@ -0,0 +1,74 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +PyDoc_STRVAR(memoryview_hex__doc__, +"hex($self, /, sep=None, bytes_per_sep=1)\n" +"--\n" +"\n" +"Return the data in the buffer as a str of hexadecimal numbers.\n" +"\n" +" sep\n" +" An optional single character or byte to separate hex bytes.\n" +" bytes_per_sep\n" +" How many bytes between separators. Positive values count from the\n" +" right, negative values count from the left.\n" +"\n" +"Example:\n" +">>> value = memoryview(b\'\\xb9\\x01\\xef\')\n" +">>> value.hex()\n" +"\'b901ef\'\n" +">>> value.hex(\':\')\n" +"\'b9:01:ef\'\n" +">>> value.hex(\':\', 2)\n" +"\'b9:01ef\'\n" +">>> value.hex(\':\', -2)\n" +"\'b901:ef\'"); + +#define MEMORYVIEW_HEX_METHODDEF \ + {"hex", (PyCFunction)(void(*)(void))memoryview_hex, METH_FASTCALL|METH_KEYWORDS, memoryview_hex__doc__}, + +static PyObject * +memoryview_hex_impl(PyMemoryViewObject *self, PyObject *sep, + int bytes_per_sep); + +static PyObject * +memoryview_hex(PyMemoryViewObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"sep", "bytes_per_sep", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "hex", 0}; + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; + PyObject *sep = NULL; + int bytes_per_sep = 1; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 2, 0, argsbuf); + if (!args) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (args[0]) { + sep = args[0]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (PyFloat_Check(args[1])) { + PyErr_SetString(PyExc_TypeError, + "integer argument expected, got float" ); + goto exit; + } + bytes_per_sep = _PyLong_AsInt(args[1]); + if (bytes_per_sep == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional_pos: + return_value = memoryview_hex_impl(self, sep, bytes_per_sep); + +exit: + return return_value; +} +/*[clinic end generated code: output=5e44e2bcf01057b5 input=a9049054013a1b77]*/ diff --git a/Objects/memoryobject.c b/Objects/memoryobject.c index 6bbb413c3c1..3955c58ee5d 100644 --- a/Objects/memoryobject.c +++ b/Objects/memoryobject.c @@ -7,6 +7,12 @@ #include "pystrhex.h" #include +/*[clinic input] +class memoryview "PyMemoryViewObject *" "&PyMemoryView_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e2e49d2192835219]*/ + +#include "clinic/memoryobject.c.h" /****************************************************************************/ /* ManagedBuffer Object */ @@ -2160,8 +2166,33 @@ memory_tobytes(PyMemoryViewObject *self, PyObject *args, PyObject *kwds) return bytes; } +/*[clinic input] +memoryview.hex + + sep: object = NULL + An optional single character or byte to separate hex bytes. + bytes_per_sep: int = 1 + How many bytes between separators. Positive values count from the + right, negative values count from the left. + +Return the data in the buffer as a str of hexadecimal numbers. + +Example: +>>> value = memoryview(b'\xb9\x01\xef') +>>> value.hex() +'b901ef' +>>> value.hex(':') +'b9:01:ef' +>>> value.hex(':', 2) +'b9:01ef' +>>> value.hex(':', -2) +'b901:ef' +[clinic start generated code]*/ + static PyObject * -memory_hex(PyMemoryViewObject *self, PyObject *dummy) +memoryview_hex_impl(PyMemoryViewObject *self, PyObject *sep, + int bytes_per_sep) +/*[clinic end generated code: output=430ca760f94f3ca7 input=539f6a3a5fb56946]*/ { Py_buffer *src = VIEW_ADDR(self); PyObject *bytes; @@ -2170,7 +2201,7 @@ memory_hex(PyMemoryViewObject *self, PyObject *dummy) CHECK_RELEASED(self); if (MV_C_CONTIGUOUS(self->flags)) { - return _Py_strhex(src->buf, src->len); + return _Py_strhex_with_sep(src->buf, src->len, sep, bytes_per_sep); } bytes = PyBytes_FromStringAndSize(NULL, src->len); @@ -2182,7 +2213,9 @@ memory_hex(PyMemoryViewObject *self, PyObject *dummy) return NULL; } - ret = _Py_strhex(PyBytes_AS_STRING(bytes), PyBytes_GET_SIZE(bytes)); + ret = _Py_strhex_with_sep( + PyBytes_AS_STRING(bytes), PyBytes_GET_SIZE(bytes), + sep, bytes_per_sep); Py_DECREF(bytes); return ret; @@ -3090,10 +3123,6 @@ When order is 'C' or 'F', the data of the original array is converted to C or\n\ Fortran order. For contiguous views, 'A' returns an exact copy of the physical\n\ memory. In particular, in-memory Fortran order is preserved. For non-contiguous\n\ views, the data is converted to C first. order=None is the same as order='C'."); -PyDoc_STRVAR(memory_hex_doc, -"hex($self, /)\n--\n\ -\n\ -Return the data in the buffer as a string of hexadecimal numbers."); PyDoc_STRVAR(memory_tolist_doc, "tolist($self, /)\n--\n\ \n\ @@ -3110,7 +3139,7 @@ Return a readonly version of the memoryview."); static PyMethodDef memory_methods[] = { {"release", (PyCFunction)memory_release, METH_NOARGS, memory_release_doc}, {"tobytes", (PyCFunction)(void(*)(void))memory_tobytes, METH_VARARGS|METH_KEYWORDS, memory_tobytes_doc}, - {"hex", (PyCFunction)memory_hex, METH_NOARGS, memory_hex_doc}, + MEMORYVIEW_HEX_METHODDEF {"tolist", (PyCFunction)memory_tolist, METH_NOARGS, memory_tolist_doc}, {"cast", (PyCFunction)(void(*)(void))memory_cast, METH_VARARGS|METH_KEYWORDS, memory_cast_doc}, {"toreadonly", (PyCFunction)memory_toreadonly, METH_NOARGS, memory_toreadonly_doc}, diff --git a/Python/pystrhex.c b/Python/pystrhex.c index 028f187c707..695a3c394e9 100644 --- a/Python/pystrhex.c +++ b/Python/pystrhex.c @@ -5,40 +5,96 @@ #include "pystrhex.h" static PyObject *_Py_strhex_impl(const char* argbuf, const Py_ssize_t arglen, - int return_bytes) + const PyObject* sep, int bytes_per_sep_group, + const int return_bytes) { PyObject *retval; Py_UCS1* retbuf; - Py_ssize_t i, j; + Py_ssize_t i, j, resultlen = 0; + Py_UCS1 sep_char; + unsigned int abs_bytes_per_sep; + + if (sep) { + Py_ssize_t seplen = PyObject_Length(sep); + if (seplen < 0) { + return NULL; + } + if (seplen != 1) { + PyErr_SetString(PyExc_ValueError, "sep must be length 1."); + return NULL; + } + if (PyUnicode_Check(sep)) { + if (PyUnicode_READY(sep)) + return NULL; + if (PyUnicode_KIND(sep) != PyUnicode_1BYTE_KIND) { + PyErr_SetString(PyExc_ValueError, "sep must be ASCII."); + return NULL; + } + sep_char = PyUnicode_READ_CHAR(sep, 0); + } else if (PyBytes_Check(sep)) { + sep_char = PyBytes_AS_STRING(sep)[0]; + } else { + PyErr_SetString(PyExc_TypeError, "sep must be str or bytes."); + return NULL; + } + if (sep_char > 127 && !return_bytes) { + PyErr_SetString(PyExc_ValueError, "sep must be ASCII."); + return NULL; + } + } else { + bytes_per_sep_group = 0; + } assert(arglen >= 0); - if (arglen > PY_SSIZE_T_MAX / 2) + abs_bytes_per_sep = abs(bytes_per_sep_group); + if (bytes_per_sep_group && arglen > 0) { + /* How many sep characters we'll be inserting. */ + resultlen = (arglen - 1) / abs_bytes_per_sep; + } + /* Bounds checking for our Py_ssize_t indices. */ + if (arglen >= PY_SSIZE_T_MAX / 2 - resultlen) { return PyErr_NoMemory(); + } + resultlen += arglen * 2; + + if (abs_bytes_per_sep >= arglen) { + bytes_per_sep_group = 0; + abs_bytes_per_sep = 0; + } if (return_bytes) { /* If _PyBytes_FromSize() were public we could avoid malloc+copy. */ - retbuf = (Py_UCS1*) PyMem_Malloc(arglen*2); + retbuf = (Py_UCS1*) PyMem_Malloc(resultlen); if (!retbuf) return PyErr_NoMemory(); retval = NULL; /* silence a compiler warning, assigned later. */ } else { - retval = PyUnicode_New(arglen*2, 127); + retval = PyUnicode_New(resultlen, 127); if (!retval) return NULL; retbuf = PyUnicode_1BYTE_DATA(retval); } - /* make hex version of string, taken from shamodule.c */ - for (i=j=0; i < arglen; i++) { + /* Hexlify */ + for (i=j=0; i < arglen; ++i) { + assert(j < resultlen); unsigned char c; c = (argbuf[i] >> 4) & 0xf; retbuf[j++] = Py_hexdigits[c]; c = argbuf[i] & 0xf; retbuf[j++] = Py_hexdigits[c]; + if (bytes_per_sep_group && i < arglen - 1) { + Py_ssize_t anchor; + anchor = (bytes_per_sep_group > 0) ? (arglen - 1 - i) : (i + 1); + if (anchor % abs_bytes_per_sep == 0) { + retbuf[j++] = sep_char; + } + } } + assert(j == resultlen); if (return_bytes) { - retval = PyBytes_FromStringAndSize((const char *)retbuf, arglen*2); + retval = PyBytes_FromStringAndSize((const char *)retbuf, resultlen); PyMem_Free(retbuf); } #ifdef Py_DEBUG @@ -52,12 +108,26 @@ static PyObject *_Py_strhex_impl(const char* argbuf, const Py_ssize_t arglen, PyObject * _Py_strhex(const char* argbuf, const Py_ssize_t arglen) { - return _Py_strhex_impl(argbuf, arglen, 0); + return _Py_strhex_impl(argbuf, arglen, NULL, 0, 0); } /* Same as above but returns a bytes() instead of str() to avoid the * need to decode the str() when bytes are needed. */ PyObject * _Py_strhex_bytes(const char* argbuf, const Py_ssize_t arglen) { - return _Py_strhex_impl(argbuf, arglen, 1); + return _Py_strhex_impl(argbuf, arglen, NULL, 0, 1); +} + +/* These variants include support for a separator between every N bytes: */ + +PyObject * _Py_strhex_with_sep(const char* argbuf, const Py_ssize_t arglen, const PyObject* sep, const int bytes_per_group) +{ + return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 0); +} + +/* Same as above but returns a bytes() instead of str() to avoid the + * need to decode the str() when bytes are needed. */ +PyObject * _Py_strhex_bytes_with_sep(const char* argbuf, const Py_ssize_t arglen, const PyObject* sep, const int bytes_per_group) +{ + return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 1); }