bpo-22385: Support output separators in hex methods. (#13578)

* bpo-22385: Support output separators in hex methods.

Also in binascii.hexlify aka b2a_hex.

The underlying implementation behind all hex generation in CPython uses the
same pystrhex.c implementation.  This adds support to bytes, bytearray,
and memoryview objects.

The binascii module functions exist rather than being slated for deprecation
because they return bytes rather than requiring an intermediate step through a
str object.

This change was inspired by MicroPython which supports sep in its binascii
implementation (and does not yet support the .hex methods).

https://bugs.python.org/issue22385
This commit is contained in:
Gregory P. Smith 2019-05-29 11:46:58 -07:00 committed by GitHub
parent aacc77fbd7
commit 0c2f930564
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 624 additions and 64 deletions

View File

@ -145,8 +145,8 @@ The :mod:`binascii` module defines the following functions:
platforms, use ``crc32(data) & 0xffffffff``. platforms, use ``crc32(data) & 0xffffffff``.
.. function:: b2a_hex(data) .. function:: b2a_hex(data[, sep[, bytes_per_sep=1]])
hexlify(data) hexlify(data[, sep[, bytes_per_sep=1]])
Return the hexadecimal representation of the binary *data*. Every byte of Return the hexadecimal representation of the binary *data*. Every byte of
*data* is converted into the corresponding 2-digit hex representation. The *data* is converted into the corresponding 2-digit hex representation. The
@ -155,6 +155,24 @@ The :mod:`binascii` module defines the following functions:
Similar functionality (but returning a text string) is also conveniently Similar functionality (but returning a text string) is also conveniently
accessible using the :meth:`bytes.hex` method. accessible using the :meth:`bytes.hex` method.
If *sep* is specified, it must be a single character str or bytes object.
It will be inserted in the output after every *bytes_per_sep* input bytes.
Separator placement is counted from the right end of the output by default,
if you wish to count from the left, supply a negative *bytes_per_sep* value.
>>> import binascii
>>> binascii.b2a_hex(b'\xb9\x01\xef')
b'b901ef'
>>> binascii.hexlify(b'\xb9\x01\xef', '-')
b'b9-01-ef'
>>> binascii.b2a_hex(b'\xb9\x01\xef', b'_', 2)
b'b9_01ef'
>>> binascii.b2a_hex(b'\xb9\x01\xef', b' ', -2)
b'b901 ef'
.. versionchanged:: 3.8
The *sep* and *bytes_per_sep* parameters were added.
.. function:: a2b_hex(hexstr) .. function:: a2b_hex(hexstr)
unhexlify(hexstr) unhexlify(hexstr)

View File

@ -2404,8 +2404,26 @@ data and are closely related to string objects in a variety of other ways.
>>> b'\xf0\xf1\xf2'.hex() >>> b'\xf0\xf1\xf2'.hex()
'f0f1f2' 'f0f1f2'
If you want to make the hex string easier to read, you can specify a
single character separator *sep* parameter to include in the output.
By default between each byte. A second optional *bytes_per_sep*
parameter controls the spacing. Positive values calculate the
separator position from the right, negative values from the left.
>>> value = b'\xf0\xf1\xf2'
>>> value.hex('-')
'f0-f1-f2'
>>> value.hex('_', 2)
'f0_f1f2'
>>> b'UUDDLRLRAB'.hex(' ', -4)
'55554444 4c524c52 4142'
.. versionadded:: 3.5 .. versionadded:: 3.5
.. versionchanged:: 3.8
:meth:`bytes.hex` now supports optional *sep* and *bytes_per_sep*
parameters to insert separators between bytes in the hex output.
Since bytes objects are sequences of integers (akin to a tuple), for a bytes Since bytes objects are sequences of integers (akin to a tuple), for a bytes
object *b*, ``b[0]`` will be an integer, while ``b[0:1]`` will be a bytes object *b*, ``b[0]`` will be an integer, while ``b[0:1]`` will be a bytes
object of length 1. (This contrasts with text strings, where both indexing object of length 1. (This contrasts with text strings, where both indexing

View File

@ -10,6 +10,9 @@ extern "C" {
PyAPI_FUNC(PyObject*) _Py_strhex(const char* argbuf, const Py_ssize_t arglen); PyAPI_FUNC(PyObject*) _Py_strhex(const char* argbuf, const Py_ssize_t arglen);
/* Returns a bytes() containing the ASCII hex representation of argbuf. */ /* Returns a bytes() containing the ASCII hex representation of argbuf. */
PyAPI_FUNC(PyObject*) _Py_strhex_bytes(const char* argbuf, const Py_ssize_t arglen); PyAPI_FUNC(PyObject*) _Py_strhex_bytes(const char* argbuf, const Py_ssize_t arglen);
/* These variants include support for a separator between every N bytes: */
PyAPI_FUNC(PyObject*) _Py_strhex_with_sep(const char* argbuf, const Py_ssize_t arglen, const PyObject* sep, const int bytes_per_group);
PyAPI_FUNC(PyObject*) _Py_strhex_bytes_with_sep(const char* argbuf, const Py_ssize_t arglen, const PyObject* sep, const int bytes_per_group);
#endif /* !Py_LIMITED_API */ #endif /* !Py_LIMITED_API */
#ifdef __cplusplus #ifdef __cplusplus

View File

@ -240,6 +240,18 @@ class BinASCIITest(unittest.TestCase):
self.assertEqual(binascii.hexlify(self.type2test(s)), t) self.assertEqual(binascii.hexlify(self.type2test(s)), t)
self.assertEqual(binascii.unhexlify(self.type2test(t)), u) self.assertEqual(binascii.unhexlify(self.type2test(t)), u)
def test_hex_separator(self):
"""Test that hexlify and b2a_hex are binary versions of bytes.hex."""
# Logic of separators is tested in test_bytes.py. This checks that
# arg parsing works and exercises the direct to bytes object code
# path within pystrhex.c.
s = b'{s\005\000\000\000worldi\002\000\000\000s\005\000\000\000helloi\001\000\000\0000'
self.assertEqual(binascii.hexlify(self.type2test(s)), s.hex().encode('ascii'))
expected8 = s.hex('.', 8).encode('ascii')
self.assertEqual(binascii.hexlify(self.type2test(s), '.', 8), expected8)
expected1 = s.hex(':').encode('ascii')
self.assertEqual(binascii.b2a_hex(self.type2test(s), ':'), expected1)
def test_qp(self): def test_qp(self):
type2test = self.type2test type2test = self.type2test
a2b_qp = binascii.a2b_qp a2b_qp = binascii.a2b_qp

View File

@ -417,6 +417,63 @@ class BaseBytesTest:
self.assertEqual(self.type2test(b"\x1a\x2b\x30").hex(), '1a2b30') self.assertEqual(self.type2test(b"\x1a\x2b\x30").hex(), '1a2b30')
self.assertEqual(memoryview(b"\x1a\x2b\x30").hex(), '1a2b30') self.assertEqual(memoryview(b"\x1a\x2b\x30").hex(), '1a2b30')
def test_hex_separator_basics(self):
three_bytes = self.type2test(b'\xb9\x01\xef')
self.assertEqual(three_bytes.hex(), 'b901ef')
with self.assertRaises(ValueError):
three_bytes.hex('')
with self.assertRaises(ValueError):
three_bytes.hex('xx')
self.assertEqual(three_bytes.hex(':', 0), 'b901ef')
with self.assertRaises(TypeError):
three_bytes.hex(None, 0)
with self.assertRaises(ValueError):
three_bytes.hex('\xff')
with self.assertRaises(ValueError):
three_bytes.hex(b'\xff')
with self.assertRaises(ValueError):
three_bytes.hex(b'\x80')
with self.assertRaises(ValueError):
three_bytes.hex(chr(0x100))
self.assertEqual(three_bytes.hex(':', 0), 'b901ef')
self.assertEqual(three_bytes.hex(b'\x00'), 'b9\x0001\x00ef')
self.assertEqual(three_bytes.hex('\x00'), 'b9\x0001\x00ef')
self.assertEqual(three_bytes.hex(b'\x7f'), 'b9\x7f01\x7fef')
self.assertEqual(three_bytes.hex('\x7f'), 'b9\x7f01\x7fef')
self.assertEqual(three_bytes.hex(':', 3), 'b901ef')
self.assertEqual(three_bytes.hex(':', 4), 'b901ef')
self.assertEqual(three_bytes.hex(':', -4), 'b901ef')
self.assertEqual(three_bytes.hex(':'), 'b9:01:ef')
self.assertEqual(three_bytes.hex(b'$'), 'b9$01$ef')
self.assertEqual(three_bytes.hex(':', 1), 'b9:01:ef')
self.assertEqual(three_bytes.hex(':', -1), 'b9:01:ef')
self.assertEqual(three_bytes.hex(':', 2), 'b9:01ef')
self.assertEqual(three_bytes.hex(':', 1), 'b9:01:ef')
self.assertEqual(three_bytes.hex('*', -2), 'b901*ef')
value = b'{s\005\000\000\000worldi\002\000\000\000s\005\000\000\000helloi\001\000\000\0000'
self.assertEqual(value.hex('.', 8), '7b7305000000776f.726c646902000000.730500000068656c.6c6f690100000030')
def test_hex_separator_five_bytes(self):
five_bytes = self.type2test(range(90,95))
self.assertEqual(five_bytes.hex(), '5a5b5c5d5e')
def test_hex_separator_six_bytes(self):
six_bytes = self.type2test(x*3 for x in range(1, 7))
self.assertEqual(six_bytes.hex(), '0306090c0f12')
self.assertEqual(six_bytes.hex('.', 1), '03.06.09.0c.0f.12')
self.assertEqual(six_bytes.hex(' ', 2), '0306 090c 0f12')
self.assertEqual(six_bytes.hex('-', 3), '030609-0c0f12')
self.assertEqual(six_bytes.hex(':', 4), '0306:090c0f12')
self.assertEqual(six_bytes.hex(':', 5), '03:06090c0f12')
self.assertEqual(six_bytes.hex(':', 6), '0306090c0f12')
self.assertEqual(six_bytes.hex(':', 95), '0306090c0f12')
self.assertEqual(six_bytes.hex('_', -3), '030609_0c0f12')
self.assertEqual(six_bytes.hex(':', -4), '0306090c:0f12')
self.assertEqual(six_bytes.hex(b'@', -5), '0306090c0f@12')
self.assertEqual(six_bytes.hex(':', -6), '0306090c0f12')
self.assertEqual(six_bytes.hex(' ', -95), '0306090c0f12')
def test_join(self): def test_join(self):
self.assertEqual(self.type2test(b"").join([]), b"") self.assertEqual(self.type2test(b"").join([]), b"")
self.assertEqual(self.type2test(b"").join([b""]), b"") self.assertEqual(self.type2test(b"").join([b""]), b"")

View File

@ -665,11 +665,13 @@ plain ol' Python and is guaranteed to be available.
True True
>>> real_tests = [t for t in tests if len(t.examples) > 0] >>> real_tests = [t for t in tests if len(t.examples) > 0]
>>> len(real_tests) # objects that actually have doctests >>> len(real_tests) # objects that actually have doctests
9 12
>>> for t in real_tests: >>> for t in real_tests:
... print('{} {}'.format(len(t.examples), t.name)) ... print('{} {}'.format(len(t.examples), t.name))
... ...
1 builtins.bin 1 builtins.bin
5 builtins.bytearray.hex
5 builtins.bytes.hex
3 builtins.float.as_integer_ratio 3 builtins.float.as_integer_ratio
2 builtins.float.fromhex 2 builtins.float.fromhex
2 builtins.float.hex 2 builtins.float.hex
@ -677,6 +679,7 @@ plain ol' Python and is guaranteed to be available.
1 builtins.int 1 builtins.int
3 builtins.int.as_integer_ratio 3 builtins.int.as_integer_ratio
2 builtins.int.bit_length 2 builtins.int.bit_length
5 builtins.memoryview.hex
1 builtins.oct 1 builtins.oct
Note here that 'bin', 'oct', and 'hex' are functions; 'float.as_integer_ratio', Note here that 'bin', 'oct', and 'hex' are functions; 'float.as_integer_ratio',

View File

@ -0,0 +1,4 @@
The `bytes.hex`, `bytearray.hex`, and `memoryview.hex` methods as well as
the `binascii.hexlify` and `b2a_hex` functions now have the ability to
include an optional separator between hex bytes. This functionality was
inspired by MicroPython's hexlify implementation.

View File

@ -1159,19 +1159,33 @@ binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
binascii.b2a_hex binascii.b2a_hex
data: Py_buffer data: Py_buffer
/ sep: object = NULL
An optional single character or byte to separate hex bytes.
bytes_per_sep: int = 1
How many bytes between separators. Positive values count from the
right, negative values count from the left.
Hexadecimal representation of binary data. Hexadecimal representation of binary data.
The return value is a bytes object. This function is also The return value is a bytes object. This function is also
available as "hexlify()". available as "hexlify()".
Example:
>>> binascii.b2a_hex(b'\xb9\x01\xef')
b'b901ef'
>>> binascii.hexlify(b'\xb9\x01\xef', ':')
b'b9:01:ef'
>>> binascii.b2a_hex(b'\xb9\x01\xef', b'_', 2)
b'b9_01ef'
[clinic start generated code]*/ [clinic start generated code]*/
static PyObject * static PyObject *
binascii_b2a_hex_impl(PyObject *module, Py_buffer *data) binascii_b2a_hex_impl(PyObject *module, Py_buffer *data, PyObject *sep,
/*[clinic end generated code: output=92fec1a95c9897a0 input=96423cfa299ff3b1]*/ int bytes_per_sep)
/*[clinic end generated code: output=a26937946a81d2c7 input=ec0ade6ba2e43543]*/
{ {
return _Py_strhex_bytes((const char *)data->buf, data->len); return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
sep, bytes_per_sep);
} }
/*[clinic input] /*[clinic input]
@ -1179,14 +1193,17 @@ binascii.hexlify = binascii.b2a_hex
Hexadecimal representation of binary data. Hexadecimal representation of binary data.
The return value is a bytes object. The return value is a bytes object. This function is also
available as "b2a_hex()".
[clinic start generated code]*/ [clinic start generated code]*/
static PyObject * static PyObject *
binascii_hexlify_impl(PyObject *module, Py_buffer *data) binascii_hexlify_impl(PyObject *module, Py_buffer *data, PyObject *sep,
/*[clinic end generated code: output=749e95e53c14880c input=2e3afae7f083f061]*/ int bytes_per_sep)
/*[clinic end generated code: output=d12aa1b001b15199 input=bc317bd4e241f76b]*/
{ {
return _Py_strhex_bytes((const char *)data->buf, data->len); return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
sep, bytes_per_sep);
} }
/*[clinic input] /*[clinic input]

View File

@ -432,34 +432,78 @@ exit:
} }
PyDoc_STRVAR(binascii_b2a_hex__doc__, PyDoc_STRVAR(binascii_b2a_hex__doc__,
"b2a_hex($module, data, /)\n" "b2a_hex($module, /, data, sep=None, bytes_per_sep=1)\n"
"--\n" "--\n"
"\n" "\n"
"Hexadecimal representation of binary data.\n" "Hexadecimal representation of binary data.\n"
"\n" "\n"
" sep\n"
" An optional single character or byte to separate hex bytes.\n"
" bytes_per_sep\n"
" How many bytes between separators. Positive values count from the\n"
" right, negative values count from the left.\n"
"\n"
"The return value is a bytes object. This function is also\n" "The return value is a bytes object. This function is also\n"
"available as \"hexlify()\"."); "available as \"hexlify()\".\n"
"\n"
"Example:\n"
">>> binascii.b2a_hex(b\'\\xb9\\x01\\xef\')\n"
"b\'b901ef\'\n"
">>> binascii.hexlify(b\'\\xb9\\x01\\xef\', \':\')\n"
"b\'b9:01:ef\'\n"
">>> binascii.b2a_hex(b\'\\xb9\\x01\\xef\', b\'_\', 2)\n"
"b\'b9_01ef\'");
#define BINASCII_B2A_HEX_METHODDEF \ #define BINASCII_B2A_HEX_METHODDEF \
{"b2a_hex", (PyCFunction)binascii_b2a_hex, METH_O, binascii_b2a_hex__doc__}, {"b2a_hex", (PyCFunction)(void(*)(void))binascii_b2a_hex, METH_FASTCALL|METH_KEYWORDS, binascii_b2a_hex__doc__},
static PyObject * static PyObject *
binascii_b2a_hex_impl(PyObject *module, Py_buffer *data); binascii_b2a_hex_impl(PyObject *module, Py_buffer *data, PyObject *sep,
int bytes_per_sep);
static PyObject * static PyObject *
binascii_b2a_hex(PyObject *module, PyObject *arg) binascii_b2a_hex(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{ {
PyObject *return_value = NULL; PyObject *return_value = NULL;
static const char * const _keywords[] = {"data", "sep", "bytes_per_sep", NULL};
static _PyArg_Parser _parser = {NULL, _keywords, "b2a_hex", 0};
PyObject *argsbuf[3];
Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1;
Py_buffer data = {NULL, NULL}; Py_buffer data = {NULL, NULL};
PyObject *sep = NULL;
int bytes_per_sep = 1;
if (PyObject_GetBuffer(arg, &data, PyBUF_SIMPLE) != 0) { args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 3, 0, argsbuf);
if (!args) {
goto exit;
}
if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) {
goto exit; goto exit;
} }
if (!PyBuffer_IsContiguous(&data, 'C')) { if (!PyBuffer_IsContiguous(&data, 'C')) {
_PyArg_BadArgument("b2a_hex", 0, "contiguous buffer", arg); _PyArg_BadArgument("b2a_hex", 1, "contiguous buffer", args[0]);
goto exit; goto exit;
} }
return_value = binascii_b2a_hex_impl(module, &data); if (!noptargs) {
goto skip_optional_pos;
}
if (args[1]) {
sep = args[1];
if (!--noptargs) {
goto skip_optional_pos;
}
}
if (PyFloat_Check(args[2])) {
PyErr_SetString(PyExc_TypeError,
"integer argument expected, got float" );
goto exit;
}
bytes_per_sep = _PyLong_AsInt(args[2]);
if (bytes_per_sep == -1 && PyErr_Occurred()) {
goto exit;
}
skip_optional_pos:
return_value = binascii_b2a_hex_impl(module, &data, sep, bytes_per_sep);
exit: exit:
/* Cleanup for data */ /* Cleanup for data */
@ -471,33 +515,70 @@ exit:
} }
PyDoc_STRVAR(binascii_hexlify__doc__, PyDoc_STRVAR(binascii_hexlify__doc__,
"hexlify($module, data, /)\n" "hexlify($module, /, data, sep=None, bytes_per_sep=1)\n"
"--\n" "--\n"
"\n" "\n"
"Hexadecimal representation of binary data.\n" "Hexadecimal representation of binary data.\n"
"\n" "\n"
"The return value is a bytes object."); " sep\n"
" An optional single character or byte to separate hex bytes.\n"
" bytes_per_sep\n"
" How many bytes between separators. Positive values count from the\n"
" right, negative values count from the left.\n"
"\n"
"The return value is a bytes object. This function is also\n"
"available as \"b2a_hex()\".");
#define BINASCII_HEXLIFY_METHODDEF \ #define BINASCII_HEXLIFY_METHODDEF \
{"hexlify", (PyCFunction)binascii_hexlify, METH_O, binascii_hexlify__doc__}, {"hexlify", (PyCFunction)(void(*)(void))binascii_hexlify, METH_FASTCALL|METH_KEYWORDS, binascii_hexlify__doc__},
static PyObject * static PyObject *
binascii_hexlify_impl(PyObject *module, Py_buffer *data); binascii_hexlify_impl(PyObject *module, Py_buffer *data, PyObject *sep,
int bytes_per_sep);
static PyObject * static PyObject *
binascii_hexlify(PyObject *module, PyObject *arg) binascii_hexlify(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{ {
PyObject *return_value = NULL; PyObject *return_value = NULL;
static const char * const _keywords[] = {"data", "sep", "bytes_per_sep", NULL};
static _PyArg_Parser _parser = {NULL, _keywords, "hexlify", 0};
PyObject *argsbuf[3];
Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1;
Py_buffer data = {NULL, NULL}; Py_buffer data = {NULL, NULL};
PyObject *sep = NULL;
int bytes_per_sep = 1;
if (PyObject_GetBuffer(arg, &data, PyBUF_SIMPLE) != 0) { args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 3, 0, argsbuf);
if (!args) {
goto exit;
}
if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) {
goto exit; goto exit;
} }
if (!PyBuffer_IsContiguous(&data, 'C')) { if (!PyBuffer_IsContiguous(&data, 'C')) {
_PyArg_BadArgument("hexlify", 0, "contiguous buffer", arg); _PyArg_BadArgument("hexlify", 1, "contiguous buffer", args[0]);
goto exit; goto exit;
} }
return_value = binascii_hexlify_impl(module, &data); if (!noptargs) {
goto skip_optional_pos;
}
if (args[1]) {
sep = args[1];
if (!--noptargs) {
goto skip_optional_pos;
}
}
if (PyFloat_Check(args[2])) {
PyErr_SetString(PyExc_TypeError,
"integer argument expected, got float" );
goto exit;
}
bytes_per_sep = _PyLong_AsInt(args[2]);
if (bytes_per_sep == -1 && PyErr_Occurred()) {
goto exit;
}
skip_optional_pos:
return_value = binascii_hexlify_impl(module, &data, sep, bytes_per_sep);
exit: exit:
/* Cleanup for data */ /* Cleanup for data */
@ -720,4 +801,4 @@ exit:
return return_value; return return_value;
} }
/*[clinic end generated code: output=a4a38e162605aca2 input=a9049054013a1b77]*/ /*[clinic end generated code: output=f7b8049edb130c63 input=a9049054013a1b77]*/

View File

@ -2020,18 +2020,36 @@ bytearray_fromhex_impl(PyTypeObject *type, PyObject *string)
return result; return result;
} }
PyDoc_STRVAR(hex__doc__, /*[clinic input]
"B.hex() -> string\n\ bytearray.hex
\n\
Create a string of hexadecimal numbers from a bytearray object.\n\ sep: object = NULL
Example: bytearray([0xb9, 0x01, 0xef]).hex() -> 'b901ef'."); An optional single character or byte to separate hex bytes.
bytes_per_sep: int = 1
How many bytes between separators. Positive values count from the
right, negative values count from the left.
Create a str of hexadecimal numbers from a bytearray object.
Example:
>>> value = bytearray([0xb9, 0x01, 0xef])
>>> value.hex()
'b901ef'
>>> value.hex(':')
'b9:01:ef'
>>> value.hex(':', 2)
'b9:01ef'
>>> value.hex(':', -2)
'b901:ef'
[clinic start generated code]*/
static PyObject * static PyObject *
bytearray_hex(PyBytesObject *self, PyObject *Py_UNUSED(ignored)) bytearray_hex_impl(PyByteArrayObject *self, PyObject *sep, int bytes_per_sep)
/*[clinic end generated code: output=29c4e5ef72c565a0 input=814c15830ac8c4b5]*/
{ {
char* argbuf = PyByteArray_AS_STRING(self); char* argbuf = PyByteArray_AS_STRING(self);
Py_ssize_t arglen = PyByteArray_GET_SIZE(self); Py_ssize_t arglen = PyByteArray_GET_SIZE(self);
return _Py_strhex(argbuf, arglen); return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
} }
static PyObject * static PyObject *
@ -2160,7 +2178,7 @@ bytearray_methods[] = {
{"find", (PyCFunction)bytearray_find, METH_VARARGS, {"find", (PyCFunction)bytearray_find, METH_VARARGS,
_Py_find__doc__}, _Py_find__doc__},
BYTEARRAY_FROMHEX_METHODDEF BYTEARRAY_FROMHEX_METHODDEF
{"hex", (PyCFunction)bytearray_hex, METH_NOARGS, hex__doc__}, BYTEARRAY_HEX_METHODDEF
{"index", (PyCFunction)bytearray_index, METH_VARARGS, _Py_index__doc__}, {"index", (PyCFunction)bytearray_index, METH_VARARGS, _Py_index__doc__},
BYTEARRAY_INSERT_METHODDEF BYTEARRAY_INSERT_METHODDEF
{"isalnum", stringlib_isalnum, METH_NOARGS, {"isalnum", stringlib_isalnum, METH_NOARGS,

View File

@ -2416,18 +2416,36 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray)
return NULL; return NULL;
} }
PyDoc_STRVAR(hex__doc__, /*[clinic input]
"B.hex() -> string\n\ bytes.hex
\n\
Create a string of hexadecimal numbers from a bytes object.\n\ sep: object = NULL
Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'."); An optional single character or byte to separate hex bytes.
bytes_per_sep: int = 1
How many bytes between separators. Positive values count from the
right, negative values count from the left.
Create a str of hexadecimal numbers from a bytes object.
Example:
>>> value = b'\xb9\x01\xef'
>>> value.hex()
'b901ef'
>>> value.hex(':')
'b9:01:ef'
>>> value.hex(':', 2)
'b9:01ef'
>>> value.hex(':', -2)
'b901:ef'
[clinic start generated code]*/
static PyObject * static PyObject *
bytes_hex(PyBytesObject *self, PyObject *Py_UNUSED(ignored)) bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
/*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/
{ {
char* argbuf = PyBytes_AS_STRING(self); char* argbuf = PyBytes_AS_STRING(self);
Py_ssize_t arglen = PyBytes_GET_SIZE(self); Py_ssize_t arglen = PyBytes_GET_SIZE(self);
return _Py_strhex(argbuf, arglen); return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
} }
static PyObject * static PyObject *
@ -2452,7 +2470,7 @@ bytes_methods[] = {
{"find", (PyCFunction)bytes_find, METH_VARARGS, {"find", (PyCFunction)bytes_find, METH_VARARGS,
_Py_find__doc__}, _Py_find__doc__},
BYTES_FROMHEX_METHODDEF BYTES_FROMHEX_METHODDEF
{"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__}, BYTES_HEX_METHODDEF
{"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__}, {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
{"isalnum", stringlib_isalnum, METH_NOARGS, {"isalnum", stringlib_isalnum, METH_NOARGS,
_Py_isalnum__doc__}, _Py_isalnum__doc__},

View File

@ -867,6 +867,75 @@ exit:
return return_value; return return_value;
} }
PyDoc_STRVAR(bytearray_hex__doc__,
"hex($self, /, sep=None, bytes_per_sep=1)\n"
"--\n"
"\n"
"Create a str of hexadecimal numbers from a bytearray object.\n"
"\n"
" sep\n"
" An optional single character or byte to separate hex bytes.\n"
" bytes_per_sep\n"
" How many bytes between separators. Positive values count from the\n"
" right, negative values count from the left.\n"
"\n"
"Example:\n"
">>> value = bytearray([0xb9, 0x01, 0xef])\n"
">>> value.hex()\n"
"\'b901ef\'\n"
">>> value.hex(\':\')\n"
"\'b9:01:ef\'\n"
">>> value.hex(\':\', 2)\n"
"\'b9:01ef\'\n"
">>> value.hex(\':\', -2)\n"
"\'b901:ef\'");
#define BYTEARRAY_HEX_METHODDEF \
{"hex", (PyCFunction)(void(*)(void))bytearray_hex, METH_FASTCALL|METH_KEYWORDS, bytearray_hex__doc__},
static PyObject *
bytearray_hex_impl(PyByteArrayObject *self, PyObject *sep, int bytes_per_sep);
static PyObject *
bytearray_hex(PyByteArrayObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"sep", "bytes_per_sep", NULL};
static _PyArg_Parser _parser = {NULL, _keywords, "hex", 0};
PyObject *argsbuf[2];
Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0;
PyObject *sep = NULL;
int bytes_per_sep = 1;
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 2, 0, argsbuf);
if (!args) {
goto exit;
}
if (!noptargs) {
goto skip_optional_pos;
}
if (args[0]) {
sep = args[0];
if (!--noptargs) {
goto skip_optional_pos;
}
}
if (PyFloat_Check(args[1])) {
PyErr_SetString(PyExc_TypeError,
"integer argument expected, got float" );
goto exit;
}
bytes_per_sep = _PyLong_AsInt(args[1]);
if (bytes_per_sep == -1 && PyErr_Occurred()) {
goto exit;
}
skip_optional_pos:
return_value = bytearray_hex_impl(self, sep, bytes_per_sep);
exit:
return return_value;
}
PyDoc_STRVAR(bytearray_reduce__doc__, PyDoc_STRVAR(bytearray_reduce__doc__,
"__reduce__($self, /)\n" "__reduce__($self, /)\n"
"--\n" "--\n"
@ -942,4 +1011,4 @@ bytearray_sizeof(PyByteArrayObject *self, PyObject *Py_UNUSED(ignored))
{ {
return bytearray_sizeof_impl(self); return bytearray_sizeof_impl(self);
} }
/*[clinic end generated code: output=272fcb836b92da32 input=a9049054013a1b77]*/ /*[clinic end generated code: output=7848247e5469ba1b input=a9049054013a1b77]*/

View File

@ -686,4 +686,73 @@ bytes_fromhex(PyTypeObject *type, PyObject *arg)
exit: exit:
return return_value; return return_value;
} }
/*[clinic end generated code: output=af9f51b9b185567d input=a9049054013a1b77]*/
PyDoc_STRVAR(bytes_hex__doc__,
"hex($self, /, sep=None, bytes_per_sep=1)\n"
"--\n"
"\n"
"Create a str of hexadecimal numbers from a bytes object.\n"
"\n"
" sep\n"
" An optional single character or byte to separate hex bytes.\n"
" bytes_per_sep\n"
" How many bytes between separators. Positive values count from the\n"
" right, negative values count from the left.\n"
"\n"
"Example:\n"
">>> value = b\'\\xb9\\x01\\xef\'\n"
">>> value.hex()\n"
"\'b901ef\'\n"
">>> value.hex(\':\')\n"
"\'b9:01:ef\'\n"
">>> value.hex(\':\', 2)\n"
"\'b9:01ef\'\n"
">>> value.hex(\':\', -2)\n"
"\'b901:ef\'");
#define BYTES_HEX_METHODDEF \
{"hex", (PyCFunction)(void(*)(void))bytes_hex, METH_FASTCALL|METH_KEYWORDS, bytes_hex__doc__},
static PyObject *
bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep);
static PyObject *
bytes_hex(PyBytesObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"sep", "bytes_per_sep", NULL};
static _PyArg_Parser _parser = {NULL, _keywords, "hex", 0};
PyObject *argsbuf[2];
Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0;
PyObject *sep = NULL;
int bytes_per_sep = 1;
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 2, 0, argsbuf);
if (!args) {
goto exit;
}
if (!noptargs) {
goto skip_optional_pos;
}
if (args[0]) {
sep = args[0];
if (!--noptargs) {
goto skip_optional_pos;
}
}
if (PyFloat_Check(args[1])) {
PyErr_SetString(PyExc_TypeError,
"integer argument expected, got float" );
goto exit;
}
bytes_per_sep = _PyLong_AsInt(args[1]);
if (bytes_per_sep == -1 && PyErr_Occurred()) {
goto exit;
}
skip_optional_pos:
return_value = bytes_hex_impl(self, sep, bytes_per_sep);
exit:
return return_value;
}
/*[clinic end generated code: output=2d0a3733e13e753a input=a9049054013a1b77]*/

74
Objects/clinic/memoryobject.c.h generated Normal file
View File

@ -0,0 +1,74 @@
/*[clinic input]
preserve
[clinic start generated code]*/
PyDoc_STRVAR(memoryview_hex__doc__,
"hex($self, /, sep=None, bytes_per_sep=1)\n"
"--\n"
"\n"
"Return the data in the buffer as a str of hexadecimal numbers.\n"
"\n"
" sep\n"
" An optional single character or byte to separate hex bytes.\n"
" bytes_per_sep\n"
" How many bytes between separators. Positive values count from the\n"
" right, negative values count from the left.\n"
"\n"
"Example:\n"
">>> value = memoryview(b\'\\xb9\\x01\\xef\')\n"
">>> value.hex()\n"
"\'b901ef\'\n"
">>> value.hex(\':\')\n"
"\'b9:01:ef\'\n"
">>> value.hex(\':\', 2)\n"
"\'b9:01ef\'\n"
">>> value.hex(\':\', -2)\n"
"\'b901:ef\'");
#define MEMORYVIEW_HEX_METHODDEF \
{"hex", (PyCFunction)(void(*)(void))memoryview_hex, METH_FASTCALL|METH_KEYWORDS, memoryview_hex__doc__},
static PyObject *
memoryview_hex_impl(PyMemoryViewObject *self, PyObject *sep,
int bytes_per_sep);
static PyObject *
memoryview_hex(PyMemoryViewObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"sep", "bytes_per_sep", NULL};
static _PyArg_Parser _parser = {NULL, _keywords, "hex", 0};
PyObject *argsbuf[2];
Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0;
PyObject *sep = NULL;
int bytes_per_sep = 1;
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 2, 0, argsbuf);
if (!args) {
goto exit;
}
if (!noptargs) {
goto skip_optional_pos;
}
if (args[0]) {
sep = args[0];
if (!--noptargs) {
goto skip_optional_pos;
}
}
if (PyFloat_Check(args[1])) {
PyErr_SetString(PyExc_TypeError,
"integer argument expected, got float" );
goto exit;
}
bytes_per_sep = _PyLong_AsInt(args[1]);
if (bytes_per_sep == -1 && PyErr_Occurred()) {
goto exit;
}
skip_optional_pos:
return_value = memoryview_hex_impl(self, sep, bytes_per_sep);
exit:
return return_value;
}
/*[clinic end generated code: output=5e44e2bcf01057b5 input=a9049054013a1b77]*/

View File

@ -7,6 +7,12 @@
#include "pystrhex.h" #include "pystrhex.h"
#include <stddef.h> #include <stddef.h>
/*[clinic input]
class memoryview "PyMemoryViewObject *" "&PyMemoryView_Type"
[clinic start generated code]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e2e49d2192835219]*/
#include "clinic/memoryobject.c.h"
/****************************************************************************/ /****************************************************************************/
/* ManagedBuffer Object */ /* ManagedBuffer Object */
@ -2160,8 +2166,33 @@ memory_tobytes(PyMemoryViewObject *self, PyObject *args, PyObject *kwds)
return bytes; return bytes;
} }
/*[clinic input]
memoryview.hex
sep: object = NULL
An optional single character or byte to separate hex bytes.
bytes_per_sep: int = 1
How many bytes between separators. Positive values count from the
right, negative values count from the left.
Return the data in the buffer as a str of hexadecimal numbers.
Example:
>>> value = memoryview(b'\xb9\x01\xef')
>>> value.hex()
'b901ef'
>>> value.hex(':')
'b9:01:ef'
>>> value.hex(':', 2)
'b9:01ef'
>>> value.hex(':', -2)
'b901:ef'
[clinic start generated code]*/
static PyObject * static PyObject *
memory_hex(PyMemoryViewObject *self, PyObject *dummy) memoryview_hex_impl(PyMemoryViewObject *self, PyObject *sep,
int bytes_per_sep)
/*[clinic end generated code: output=430ca760f94f3ca7 input=539f6a3a5fb56946]*/
{ {
Py_buffer *src = VIEW_ADDR(self); Py_buffer *src = VIEW_ADDR(self);
PyObject *bytes; PyObject *bytes;
@ -2170,7 +2201,7 @@ memory_hex(PyMemoryViewObject *self, PyObject *dummy)
CHECK_RELEASED(self); CHECK_RELEASED(self);
if (MV_C_CONTIGUOUS(self->flags)) { if (MV_C_CONTIGUOUS(self->flags)) {
return _Py_strhex(src->buf, src->len); return _Py_strhex_with_sep(src->buf, src->len, sep, bytes_per_sep);
} }
bytes = PyBytes_FromStringAndSize(NULL, src->len); bytes = PyBytes_FromStringAndSize(NULL, src->len);
@ -2182,7 +2213,9 @@ memory_hex(PyMemoryViewObject *self, PyObject *dummy)
return NULL; return NULL;
} }
ret = _Py_strhex(PyBytes_AS_STRING(bytes), PyBytes_GET_SIZE(bytes)); ret = _Py_strhex_with_sep(
PyBytes_AS_STRING(bytes), PyBytes_GET_SIZE(bytes),
sep, bytes_per_sep);
Py_DECREF(bytes); Py_DECREF(bytes);
return ret; return ret;
@ -3090,10 +3123,6 @@ When order is 'C' or 'F', the data of the original array is converted to C or\n\
Fortran order. For contiguous views, 'A' returns an exact copy of the physical\n\ Fortran order. For contiguous views, 'A' returns an exact copy of the physical\n\
memory. In particular, in-memory Fortran order is preserved. For non-contiguous\n\ memory. In particular, in-memory Fortran order is preserved. For non-contiguous\n\
views, the data is converted to C first. order=None is the same as order='C'."); views, the data is converted to C first. order=None is the same as order='C'.");
PyDoc_STRVAR(memory_hex_doc,
"hex($self, /)\n--\n\
\n\
Return the data in the buffer as a string of hexadecimal numbers.");
PyDoc_STRVAR(memory_tolist_doc, PyDoc_STRVAR(memory_tolist_doc,
"tolist($self, /)\n--\n\ "tolist($self, /)\n--\n\
\n\ \n\
@ -3110,7 +3139,7 @@ Return a readonly version of the memoryview.");
static PyMethodDef memory_methods[] = { static PyMethodDef memory_methods[] = {
{"release", (PyCFunction)memory_release, METH_NOARGS, memory_release_doc}, {"release", (PyCFunction)memory_release, METH_NOARGS, memory_release_doc},
{"tobytes", (PyCFunction)(void(*)(void))memory_tobytes, METH_VARARGS|METH_KEYWORDS, memory_tobytes_doc}, {"tobytes", (PyCFunction)(void(*)(void))memory_tobytes, METH_VARARGS|METH_KEYWORDS, memory_tobytes_doc},
{"hex", (PyCFunction)memory_hex, METH_NOARGS, memory_hex_doc}, MEMORYVIEW_HEX_METHODDEF
{"tolist", (PyCFunction)memory_tolist, METH_NOARGS, memory_tolist_doc}, {"tolist", (PyCFunction)memory_tolist, METH_NOARGS, memory_tolist_doc},
{"cast", (PyCFunction)(void(*)(void))memory_cast, METH_VARARGS|METH_KEYWORDS, memory_cast_doc}, {"cast", (PyCFunction)(void(*)(void))memory_cast, METH_VARARGS|METH_KEYWORDS, memory_cast_doc},
{"toreadonly", (PyCFunction)memory_toreadonly, METH_NOARGS, memory_toreadonly_doc}, {"toreadonly", (PyCFunction)memory_toreadonly, METH_NOARGS, memory_toreadonly_doc},

View File

@ -5,40 +5,96 @@
#include "pystrhex.h" #include "pystrhex.h"
static PyObject *_Py_strhex_impl(const char* argbuf, const Py_ssize_t arglen, static PyObject *_Py_strhex_impl(const char* argbuf, const Py_ssize_t arglen,
int return_bytes) const PyObject* sep, int bytes_per_sep_group,
const int return_bytes)
{ {
PyObject *retval; PyObject *retval;
Py_UCS1* retbuf; Py_UCS1* retbuf;
Py_ssize_t i, j; Py_ssize_t i, j, resultlen = 0;
Py_UCS1 sep_char;
unsigned int abs_bytes_per_sep;
if (sep) {
Py_ssize_t seplen = PyObject_Length(sep);
if (seplen < 0) {
return NULL;
}
if (seplen != 1) {
PyErr_SetString(PyExc_ValueError, "sep must be length 1.");
return NULL;
}
if (PyUnicode_Check(sep)) {
if (PyUnicode_READY(sep))
return NULL;
if (PyUnicode_KIND(sep) != PyUnicode_1BYTE_KIND) {
PyErr_SetString(PyExc_ValueError, "sep must be ASCII.");
return NULL;
}
sep_char = PyUnicode_READ_CHAR(sep, 0);
} else if (PyBytes_Check(sep)) {
sep_char = PyBytes_AS_STRING(sep)[0];
} else {
PyErr_SetString(PyExc_TypeError, "sep must be str or bytes.");
return NULL;
}
if (sep_char > 127 && !return_bytes) {
PyErr_SetString(PyExc_ValueError, "sep must be ASCII.");
return NULL;
}
} else {
bytes_per_sep_group = 0;
}
assert(arglen >= 0); assert(arglen >= 0);
if (arglen > PY_SSIZE_T_MAX / 2) abs_bytes_per_sep = abs(bytes_per_sep_group);
if (bytes_per_sep_group && arglen > 0) {
/* How many sep characters we'll be inserting. */
resultlen = (arglen - 1) / abs_bytes_per_sep;
}
/* Bounds checking for our Py_ssize_t indices. */
if (arglen >= PY_SSIZE_T_MAX / 2 - resultlen) {
return PyErr_NoMemory(); return PyErr_NoMemory();
}
resultlen += arglen * 2;
if (abs_bytes_per_sep >= arglen) {
bytes_per_sep_group = 0;
abs_bytes_per_sep = 0;
}
if (return_bytes) { if (return_bytes) {
/* If _PyBytes_FromSize() were public we could avoid malloc+copy. */ /* If _PyBytes_FromSize() were public we could avoid malloc+copy. */
retbuf = (Py_UCS1*) PyMem_Malloc(arglen*2); retbuf = (Py_UCS1*) PyMem_Malloc(resultlen);
if (!retbuf) if (!retbuf)
return PyErr_NoMemory(); return PyErr_NoMemory();
retval = NULL; /* silence a compiler warning, assigned later. */ retval = NULL; /* silence a compiler warning, assigned later. */
} else { } else {
retval = PyUnicode_New(arglen*2, 127); retval = PyUnicode_New(resultlen, 127);
if (!retval) if (!retval)
return NULL; return NULL;
retbuf = PyUnicode_1BYTE_DATA(retval); retbuf = PyUnicode_1BYTE_DATA(retval);
} }
/* make hex version of string, taken from shamodule.c */ /* Hexlify */
for (i=j=0; i < arglen; i++) { for (i=j=0; i < arglen; ++i) {
assert(j < resultlen);
unsigned char c; unsigned char c;
c = (argbuf[i] >> 4) & 0xf; c = (argbuf[i] >> 4) & 0xf;
retbuf[j++] = Py_hexdigits[c]; retbuf[j++] = Py_hexdigits[c];
c = argbuf[i] & 0xf; c = argbuf[i] & 0xf;
retbuf[j++] = Py_hexdigits[c]; retbuf[j++] = Py_hexdigits[c];
if (bytes_per_sep_group && i < arglen - 1) {
Py_ssize_t anchor;
anchor = (bytes_per_sep_group > 0) ? (arglen - 1 - i) : (i + 1);
if (anchor % abs_bytes_per_sep == 0) {
retbuf[j++] = sep_char;
} }
}
}
assert(j == resultlen);
if (return_bytes) { if (return_bytes) {
retval = PyBytes_FromStringAndSize((const char *)retbuf, arglen*2); retval = PyBytes_FromStringAndSize((const char *)retbuf, resultlen);
PyMem_Free(retbuf); PyMem_Free(retbuf);
} }
#ifdef Py_DEBUG #ifdef Py_DEBUG
@ -52,12 +108,26 @@ static PyObject *_Py_strhex_impl(const char* argbuf, const Py_ssize_t arglen,
PyObject * _Py_strhex(const char* argbuf, const Py_ssize_t arglen) PyObject * _Py_strhex(const char* argbuf, const Py_ssize_t arglen)
{ {
return _Py_strhex_impl(argbuf, arglen, 0); return _Py_strhex_impl(argbuf, arglen, NULL, 0, 0);
} }
/* Same as above but returns a bytes() instead of str() to avoid the /* Same as above but returns a bytes() instead of str() to avoid the
* need to decode the str() when bytes are needed. */ * need to decode the str() when bytes are needed. */
PyObject * _Py_strhex_bytes(const char* argbuf, const Py_ssize_t arglen) PyObject * _Py_strhex_bytes(const char* argbuf, const Py_ssize_t arglen)
{ {
return _Py_strhex_impl(argbuf, arglen, 1); return _Py_strhex_impl(argbuf, arglen, NULL, 0, 1);
}
/* These variants include support for a separator between every N bytes: */
PyObject * _Py_strhex_with_sep(const char* argbuf, const Py_ssize_t arglen, const PyObject* sep, const int bytes_per_group)
{
return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 0);
}
/* Same as above but returns a bytes() instead of str() to avoid the
* need to decode the str() when bytes are needed. */
PyObject * _Py_strhex_bytes_with_sep(const char* argbuf, const Py_ssize_t arglen, const PyObject* sep, const int bytes_per_group)
{
return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 1);
} }