From 8cb6569fe14ba8e57ab1a2bea68594747852a9d1 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sat, 25 Apr 2015 23:22:26 +0000 Subject: [PATCH] Implements issue #9951: Adds a hex() method to bytes, bytearray, & memoryview. Also updates a few internal implementations of the same thing to use the new built-in code. Contributed by Arnon Yaari. --- Doc/library/stdtypes.rst | 37 ++++++++++++++++++++++++++++++ Doc/whatsnew/3.5.rst | 3 +++ Lib/test/test_bytes.py | 8 +++++++ Lib/test/test_doctest.py | 2 +- Makefile.pre.in | 2 ++ Misc/ACKS | 1 + Misc/NEWS | 1 + Modules/sha1module.c | 23 ++----------------- Modules/sha256module.c | 23 ++----------------- Modules/sha512module.c | 23 ++----------------- Objects/bytearrayobject.c | 15 ++++++++++++ Objects/bytesobject.c | 16 +++++++++++++ Objects/memoryobject.c | 14 +++++++++++ PCbuild/pythoncore.vcxproj | 2 ++ PCbuild/pythoncore.vcxproj.filters | 6 +++++ 15 files changed, 112 insertions(+), 64 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 26ff30988e7..2c66fca31bf 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -2289,6 +2289,19 @@ the bytes type has an additional class method to read data in that format: >>> bytes.fromhex('2Ef0 F1f2 ') b'.\xf0\xf1\xf2' +A reverse conversion function exists to transform a bytes object into its +hexadecimal representation. + +.. method:: bytes.hex() + + Return a string object containing two hexadecimal digits for each + byte in the instance. + + >>> b'\xf0\xf1\xf2'.hex() + 'f0f1f2' + + .. versionadded:: 3.5 + Since bytes objects are sequences of integers (akin to a tuple), for a bytes object *b*, ``b[0]`` will be an integer, while ``b[0:1]`` will be a bytes object of length 1. (This contrasts with text strings, where both indexing @@ -2344,6 +2357,19 @@ the bytearray type has an additional class method to read data in that format: >>> bytearray.fromhex('2Ef0 F1f2 ') bytearray(b'.\xf0\xf1\xf2') +A reverse conversion function exists to transform a bytearray object into its +hexadecimal representation. + +.. method:: bytearray.hex() + + Return a string object containing two hexadecimal digits for each + byte in the instance. + + >>> bytearray(b'\xf0\xf1\xf2').hex() + 'f0f1f2' + + .. versionadded:: 3.5 + Since bytearray objects are sequences of integers (akin to a list), for a bytearray object *b*, ``b[0]`` will be an integer, while ``b[0:1]`` will be a bytearray object of length 1. (This contrasts with text strings, where @@ -3458,6 +3484,17 @@ copying. supports all format strings, including those that are not in :mod:`struct` module syntax. + .. method:: hex() + + Return a string object containing two hexadecimal digits for each + byte in the buffer. :: + + >>> m = memoryview(b"abc") + >>> m.hex() + '616263' + + .. versionadded:: 3.5 + .. method:: tolist() Return the data in the buffer as a list of elements. :: diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst index f364317ba71..da7c5bba915 100644 --- a/Doc/whatsnew/3.5.rst +++ b/Doc/whatsnew/3.5.rst @@ -80,6 +80,9 @@ New built-in features: * ``bytes % args``, ``bytearray % args``: :pep:`461` - Adding ``%`` formatting to bytes and bytearray +* ``b'\xf0\x9f\x90\x8d'.hex()``, ``bytearray(b'\xf0\x9f\x90\x8d').hex()``, + ``memoryview(b'\xf0\x9f\x90\x8d').hex()``: :issue:`9951` - A ``hex`` method + has been added to bytes, bytearray, and memoryview. Implementation improvements: diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index ad283002da8..1c832aa9e69 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -301,6 +301,14 @@ class BaseBytesTest: self.assertRaises(ValueError, self.type2test.fromhex, '\x00') self.assertRaises(ValueError, self.type2test.fromhex, '12 \x00 34') + def test_hex(self): + self.assertRaises(TypeError, self.type2test.hex) + self.assertRaises(TypeError, self.type2test.hex, 1) + self.assertEquals(self.type2test(b"").hex(), "") + self.assertEquals(bytearray([0x1a, 0x2b, 0x30]).hex(), '1a2b30') + self.assertEquals(self.type2test(b"\x1a\x2b\x30").hex(), '1a2b30') + self.assertEquals(memoryview(b"\x1a\x2b\x30").hex(), '1a2b30') + def test_join(self): self.assertEqual(self.type2test(b"").join([]), b"") self.assertEqual(self.type2test(b"").join([b""]), b"") diff --git a/Lib/test/test_doctest.py b/Lib/test/test_doctest.py index bbe5a13628f..b92c4b53f02 100644 --- a/Lib/test/test_doctest.py +++ b/Lib/test/test_doctest.py @@ -659,7 +659,7 @@ plain ol' Python and is guaranteed to be available. >>> import builtins >>> tests = doctest.DocTestFinder().find(builtins) - >>> 790 < len(tests) < 800 # approximate number of objects with docstrings + >>> 790 < len(tests) < 810 # approximate number of objects with docstrings True >>> real_tests = [t for t in tests if len(t.examples) > 0] >>> len(real_tests) # objects that actually have doctests diff --git a/Makefile.pre.in b/Makefile.pre.in index b78bcf189ba..34b413f7078 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -402,6 +402,7 @@ PYTHON_OBJS= \ Python/getopt.o \ Python/pystrcmp.o \ Python/pystrtod.o \ + Python/pystrhex.o \ Python/dtoa.o \ Python/formatter_unicode.o \ Python/fileutils.o \ @@ -919,6 +920,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/pystate.h \ $(srcdir)/Include/pystrcmp.h \ $(srcdir)/Include/pystrtod.h \ + $(srcdir)/Include/pystrhex.h \ $(srcdir)/Include/pythonrun.h \ $(srcdir)/Include/pythread.h \ $(srcdir)/Include/pytime.h \ diff --git a/Misc/ACKS b/Misc/ACKS index 0a04a07e2f9..ca0fcab1802 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1547,6 +1547,7 @@ Doug Wyatt Robert Xiao Florent Xicluna Hirokazu Yamamoto +Arnon Yaari Ka-Ping Yee Jason Yeo EungJun Yi diff --git a/Misc/NEWS b/Misc/NEWS index c559ea528ad..1d54c5f221f 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -11,6 +11,7 @@ Core and Builtins ----------------- - Issue #24022: Fix tokenizer crash when processing undecodable source code. +- Issue #9951: Added a hex() method to bytes, bytearray, and memoryview. Library ------- diff --git a/Modules/sha1module.c b/Modules/sha1module.c index d1f89364d7b..74b94ba270a 100644 --- a/Modules/sha1module.c +++ b/Modules/sha1module.c @@ -18,6 +18,7 @@ #include "Python.h" #include "hashlib.h" +#include "pystrhex.h" /*[clinic input] module _sha1 @@ -364,32 +365,12 @@ SHA1Type_hexdigest_impl(SHA1object *self) { unsigned char digest[SHA1_DIGESTSIZE]; struct sha1_state temp; - PyObject *retval; - Py_UCS1 *hex_digest; - int i, j; /* Get the raw (binary) digest value */ temp = self->hash_state; sha1_done(&temp, digest); - /* Create a new string */ - retval = PyUnicode_New(SHA1_DIGESTSIZE * 2, 127); - if (!retval) - return NULL; - hex_digest = PyUnicode_1BYTE_DATA(retval); - - /* Make hex version of the digest */ - for(i=j=0; i> 4) & 0xf; - hex_digest[j++] = Py_hexdigits[c]; - c = (digest[i] & 0xf); - hex_digest[j++] = Py_hexdigits[c]; - } -#ifdef Py_DEBUG - assert(_PyUnicode_CheckConsistency(retval, 1)); -#endif - return retval; + return _Py_strhex((const char *)digest, SHA1_DIGESTSIZE); } /*[clinic input] diff --git a/Modules/sha256module.c b/Modules/sha256module.c index 957fd2b8f9b..8c4def05726 100644 --- a/Modules/sha256module.c +++ b/Modules/sha256module.c @@ -19,6 +19,7 @@ #include "Python.h" #include "structmember.h" #include "hashlib.h" +#include "pystrhex.h" /*[clinic input] module _sha256 @@ -454,32 +455,12 @@ SHA256Type_hexdigest_impl(SHAobject *self) { unsigned char digest[SHA_DIGESTSIZE]; SHAobject temp; - PyObject *retval; - Py_UCS1 *hex_digest; - int i, j; /* Get the raw (binary) digest value */ SHAcopy(self, &temp); sha_final(digest, &temp); - /* Create a new string */ - retval = PyUnicode_New(self->digestsize * 2, 127); - if (!retval) - return NULL; - hex_digest = PyUnicode_1BYTE_DATA(retval); - - /* Make hex version of the digest */ - for(i=j=0; idigestsize; i++) { - unsigned char c; - c = (digest[i] >> 4) & 0xf; - hex_digest[j++] = Py_hexdigits[c]; - c = (digest[i] & 0xf); - hex_digest[j++] = Py_hexdigits[c]; - } -#ifdef Py_DEBUG - assert(_PyUnicode_CheckConsistency(retval, 1)); -#endif - return retval; + return _Py_strhex((const char *)digest, self->digestsize); } /*[clinic input] diff --git a/Modules/sha512module.c b/Modules/sha512module.c index 4533c003d59..8237d867f4a 100644 --- a/Modules/sha512module.c +++ b/Modules/sha512module.c @@ -19,6 +19,7 @@ #include "Python.h" #include "structmember.h" #include "hashlib.h" +#include "pystrhex.h" /*[clinic input] module _sha512 @@ -521,32 +522,12 @@ SHA512Type_hexdigest_impl(SHAobject *self) { unsigned char digest[SHA_DIGESTSIZE]; SHAobject temp; - PyObject *retval; - Py_UCS1 *hex_digest; - int i, j; /* Get the raw (binary) digest value */ SHAcopy(self, &temp); sha512_final(digest, &temp); - /* Create a new string */ - retval = PyUnicode_New(self->digestsize * 2, 127); - if (!retval) - return NULL; - hex_digest = PyUnicode_1BYTE_DATA(retval); - - /* Make hex version of the digest */ - for (i=j=0; idigestsize; i++) { - unsigned char c; - c = (digest[i] >> 4) & 0xf; - hex_digest[j++] = Py_hexdigits[c]; - c = (digest[i] & 0xf); - hex_digest[j++] = Py_hexdigits[c]; - } -#ifdef Py_DEBUG - assert(_PyUnicode_CheckConsistency(retval, 1)); -#endif - return retval; + return _Py_strhex((const char *)digest, self->digestsize); } /*[clinic input] diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index d76f15f3acb..14444a2e8e2 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -5,6 +5,7 @@ #include "structmember.h" #include "bytes_methods.h" #include "bytesobject.h" +#include "pystrhex.h" /*[clinic input] class bytearray "PyByteArrayObject *" "&PyByteArray_Type" @@ -2872,6 +2873,19 @@ bytearray_fromhex_impl(PyObject*cls, PyObject *string) return NULL; } +PyDoc_STRVAR(hex__doc__, +"B.hex() -> string\n\ +\n\ +Create a string of hexadecimal numbers from a bytearray object.\n\ +Example: bytearray([0xb9, 0x01, 0xef]).hex() -> 'b901ef'."); + +static PyObject * +bytearray_hex(PyBytesObject *self) +{ + char* argbuf = PyByteArray_AS_STRING(self); + Py_ssize_t arglen = PyByteArray_GET_SIZE(self); + return _Py_strhex(argbuf, arglen); +} static PyObject * _common_reduce(PyByteArrayObject *self, int proto) @@ -3002,6 +3016,7 @@ bytearray_methods[] = { BYTEARRAY_EXTEND_METHODDEF {"find", (PyCFunction)bytearray_find, METH_VARARGS, find__doc__}, BYTEARRAY_FROMHEX_METHODDEF + {"hex", (PyCFunction)bytearray_hex, METH_NOARGS, hex__doc__}, {"index", (PyCFunction)bytearray_index, METH_VARARGS, index__doc__}, BYTEARRAY_INSERT_METHODDEF {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS, diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index d981e0ee698..d2b52c77b33 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -5,6 +5,7 @@ #include "Python.h" #include "bytes_methods.h" +#include "pystrhex.h" #include /*[clinic input] @@ -3036,6 +3037,20 @@ bytes_fromhex_impl(PyTypeObject *type, PyObject *string) return NULL; } +PyDoc_STRVAR(hex__doc__, +"B.hex() -> string\n\ +\n\ +Create a string of hexadecimal numbers from a bytes object.\n\ +Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'."); + +static PyObject * +bytes_hex(PyBytesObject *self) +{ + char* argbuf = PyBytes_AS_STRING(self); + Py_ssize_t arglen = PyBytes_GET_SIZE(self); + return _Py_strhex(argbuf, arglen); +} + static PyObject * bytes_getnewargs(PyBytesObject *v) { @@ -3057,6 +3072,7 @@ bytes_methods[] = { expandtabs__doc__}, {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__}, BYTES_FROMHEX_METHODDEF + {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__}, {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__}, {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS, _Py_isalnum__doc__}, diff --git a/Objects/memoryobject.c b/Objects/memoryobject.c index b6951ab88e0..54fd05f6f61 100644 --- a/Objects/memoryobject.c +++ b/Objects/memoryobject.c @@ -1,6 +1,7 @@ /* Memoryview object implementation */ #include "Python.h" +#include "pystrhex.h" #include @@ -2157,6 +2158,14 @@ memory_tobytes(PyMemoryViewObject *self, PyObject *dummy) return bytes; } +static PyObject * +memory_hex(PyMemoryViewObject *self, PyObject *dummy) +{ + Py_buffer *src = VIEW_ADDR(self); + CHECK_RELEASED(self); + return _Py_strhex(src->buf, src->len); +} + static PyObject * memory_repr(PyMemoryViewObject *self) { @@ -3061,6 +3070,10 @@ PyDoc_STRVAR(memory_tobytes_doc, "tobytes($self, /)\n--\n\ \n\ Return the data in the buffer as a byte string."); +PyDoc_STRVAR(memory_hex_doc, +"hex($self, /)\n--\n\ +\n\ +Return the data in the buffer as a string of hexadecimal numbers."); PyDoc_STRVAR(memory_tolist_doc, "tolist($self, /)\n--\n\ \n\ @@ -3073,6 +3086,7 @@ Cast a memoryview to a new format or shape."); static PyMethodDef memory_methods[] = { {"release", (PyCFunction)memory_release, METH_NOARGS, memory_release_doc}, {"tobytes", (PyCFunction)memory_tobytes, METH_NOARGS, memory_tobytes_doc}, + {"hex", (PyCFunction)memory_hex, METH_NOARGS, memory_hex_doc}, {"tolist", (PyCFunction)memory_tolist, METH_NOARGS, memory_tolist_doc}, {"cast", (PyCFunction)memory_cast, METH_VARARGS|METH_KEYWORDS, memory_cast_doc}, {"__enter__", memory_enter, METH_NOARGS, NULL}, diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index a5690f6b06e..bcefe94c979 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -147,6 +147,7 @@ + @@ -376,6 +377,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index b4154054068..3b275bb0de5 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -243,6 +243,9 @@ Include + + Include + Include @@ -908,6 +911,9 @@ Python + + Python + Python