From c36c3789dee99b3e2d01ee47731b62200157ba16 Mon Sep 17 00:00:00 2001 From: Alexandre Vassalotti Date: Sat, 9 Jan 2010 20:35:09 +0000 Subject: [PATCH] Issue #1023290: Added API for the conversion of longs to bytes and vice-versa. --- Doc/library/stdtypes.rst | 63 +++++++++++ Lib/test/test_long.py | 232 +++++++++++++++++++++++++++++++++++++++ Misc/NEWS | 3 + Objects/longobject.c | 199 +++++++++++++++++++++++++++++++++ 4 files changed, 497 insertions(+) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index d2fb5f9ce08..4e7ab3a798b 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -457,6 +457,69 @@ Additional Methods on Integer Types .. versionadded:: 3.1 + .. method:: int.to_bytes(length, byteorder, [\*, signed=False]) + + Return an array of bytes representing an integer. + + >>> (1024).to_bytes(2, byteorder='big') + b'\x04\x00' + >>> (1024).to_bytes(10, byteorder='big') + b'\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00' + >>> (-1024).to_bytes(10, byteorder='big', signed=True) + b'\xff\xff\xff\xff\xff\xff\xff\xff\xfc\x00' + >>> x = 1000 + >>> x.to_bytes((x.bit_length() // 8) + 1, byteorder='little') + b'\xe8\x03' + + The integer is represented using *length* bytes. An :exc:`OverflowError` + is raised if the integer is not representable with the given number of + bytes. + + The *byteorder* argument determines the byte order used to represent the + integer. If *byteorder* is ``"big"``, the most significant byte is at the + beginning of the byte array. If *byteorder* is ``"little"``, the most + significant byte is at the end of the byte array. To request the native + byte order of the host system, use :data:`sys.byteorder` as the byte order + value. + + The *signed* argument determines whether two's complement is used to + represent the integer. If *signed* is ``False`` and a negative integer is + given, an :exc:`OverflowError` is raised. The default value for *signed* + is ``False``. + + .. versionadded:: 3.2 + + .. classmethod:: int.from_bytes(bytes, byteorder, [\*, signed=False]]) + + Return the integer represented by the given array of bytes. + + >>> int.from_bytes(b'\x00\x10', byteorder='big') + 16 + >>> int.from_bytes(b'\x00\x10', byteorder='little') + 4096 + >>> int.from_bytes(b'\xfc\x00', byteorder='big', signed=True) + -1024 + >>> int.from_bytes(b'\xfc\x00', byteorder='big', signed=False) + 64512 + >>> int.from_bytes([255, 0, 0], byteorder='big') + 16711680 + + The argument *bytes* must either support the buffer protocol or be an + iterable producing bytes. :class:`bytes` and :class:`bytearray` are + examples of built-in objects that support the buffer protocol. + + The *byteorder* argument determines the byte order used to represent the + integer. If *byteorder* is ``"big"``, the most significant byte is at the + beginning of the byte array. If *byteorder* is ``"little"``, the most + significant byte is at the end of the byte array. To request the native + byte order of the host system, use :data:`sys.byteorder` as the byte order + value. + + The *signed* argument indicates whether two's complement is used to + represent the integer. + + .. versionadded:: 3.2 + Additional Methods on Float --------------------------- diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py index 9974a5fa9f8..b4ab74f0847 100644 --- a/Lib/test/test_long.py +++ b/Lib/test/test_long.py @@ -4,6 +4,7 @@ import sys import random import math +import array # Used for lazy formatting of failure messages class Frm(object): @@ -900,6 +901,237 @@ class LongTest(unittest.TestCase): for e in bad_exponents: self.assertRaises(TypeError, round, 3, e) + def test_to_bytes(self): + def check(tests, byteorder, signed=False): + for test, expected in tests.items(): + try: + self.assertEqual( + test.to_bytes(len(expected), byteorder, signed=signed), + expected) + except Exception as err: + raise AssertionError( + "failed to convert {0} with byteorder={1} and signed={2}" + .format(test, byteorder, signed)) from err + + # Convert integers to signed big-endian byte arrays. + tests1 = { + 0: b'\x00', + 1: b'\x01', + -1: b'\xff', + -127: b'\x81', + -128: b'\x80', + -129: b'\xff\x7f', + 127: b'\x7f', + 129: b'\x00\x81', + -255: b'\xff\x01', + -256: b'\xff\x00', + 255: b'\x00\xff', + 256: b'\x01\x00', + 32767: b'\x7f\xff', + -32768: b'\xff\x80\x00', + 65535: b'\x00\xff\xff', + -65536: b'\xff\x00\x00', + -8388608: b'\x80\x00\x00' + } + check(tests1, 'big', signed=True) + + # Convert integers to signed little-endian byte arrays. + tests2 = { + 0: b'\x00', + 1: b'\x01', + -1: b'\xff', + -127: b'\x81', + -128: b'\x80', + -129: b'\x7f\xff', + 127: b'\x7f', + 129: b'\x81\x00', + -255: b'\x01\xff', + -256: b'\x00\xff', + 255: b'\xff\x00', + 256: b'\x00\x01', + 32767: b'\xff\x7f', + -32768: b'\x00\x80', + 65535: b'\xff\xff\x00', + -65536: b'\x00\x00\xff', + -8388608: b'\x00\x00\x80' + } + check(tests2, 'little', signed=True) + + # Convert integers to unsigned big-endian byte arrays. + tests3 = { + 0: b'\x00', + 1: b'\x01', + 127: b'\x7f', + 128: b'\x80', + 255: b'\xff', + 256: b'\x01\x00', + 32767: b'\x7f\xff', + 32768: b'\x80\x00', + 65535: b'\xff\xff', + 65536: b'\x01\x00\x00' + } + check(tests3, 'big', signed=False) + + # Convert integers to unsigned little-endian byte arrays. + tests4 = { + 0: b'\x00', + 1: b'\x01', + 127: b'\x7f', + 128: b'\x80', + 255: b'\xff', + 256: b'\x00\x01', + 32767: b'\xff\x7f', + 32768: b'\x00\x80', + 65535: b'\xff\xff', + 65536: b'\x00\x00\x01' + } + check(tests4, 'little', signed=False) + + self.assertRaises(OverflowError, (256).to_bytes, 1, 'big', signed=False) + self.assertRaises(OverflowError, (256).to_bytes, 1, 'big', signed=True) + self.assertRaises(OverflowError, (256).to_bytes, 1, 'little', signed=False) + self.assertRaises(OverflowError, (256).to_bytes, 1, 'little', signed=True) + self.assertRaises(OverflowError, (-1).to_bytes, 2, 'big', signed=False), + self.assertRaises(OverflowError, (-1).to_bytes, 2, 'little', signed=False) + self.assertEqual((0).to_bytes(0, 'big'), b'') + self.assertEqual((1).to_bytes(5, 'big'), b'\x00\x00\x00\x00\x01') + self.assertEqual((0).to_bytes(5, 'big'), b'\x00\x00\x00\x00\x00') + self.assertEqual((-1).to_bytes(5, 'big', signed=True), + b'\xff\xff\xff\xff\xff') + self.assertRaises(OverflowError, (1).to_bytes, 0, 'big') + + def test_from_bytes(self): + def check(tests, byteorder, signed=False): + for test, expected in tests.items(): + try: + self.assertEqual( + int.from_bytes(test, byteorder, signed=signed), + expected) + except Exception as err: + raise AssertionError( + "failed to convert {0} with byteorder={1!r} and signed={2}" + .format(test, byteorder, signed)) from err + + # Convert signed big-endian byte arrays to integers. + tests1 = { + b'': 0, + b'\x00': 0, + b'\x00\x00': 0, + b'\x01': 1, + b'\x00\x01': 1, + b'\xff': -1, + b'\xff\xff': -1, + b'\x81': -127, + b'\x80': -128, + b'\xff\x7f': -129, + b'\x7f': 127, + b'\x00\x81': 129, + b'\xff\x01': -255, + b'\xff\x00': -256, + b'\x00\xff': 255, + b'\x01\x00': 256, + b'\x7f\xff': 32767, + b'\x80\x00': -32768, + b'\x00\xff\xff': 65535, + b'\xff\x00\x00': -65536, + b'\x80\x00\x00': -8388608 + } + check(tests1, 'big', signed=True) + + # Convert signed little-endian byte arrays to integers. + tests2 = { + b'': 0, + b'\x00': 0, + b'\x00\x00': 0, + b'\x01': 1, + b'\x00\x01': 256, + b'\xff': -1, + b'\xff\xff': -1, + b'\x81': -127, + b'\x80': -128, + b'\x7f\xff': -129, + b'\x7f': 127, + b'\x81\x00': 129, + b'\x01\xff': -255, + b'\x00\xff': -256, + b'\xff\x00': 255, + b'\x00\x01': 256, + b'\xff\x7f': 32767, + b'\x00\x80': -32768, + b'\xff\xff\x00': 65535, + b'\x00\x00\xff': -65536, + b'\x00\x00\x80': -8388608 + } + check(tests2, 'little', signed=True) + + # Convert unsigned big-endian byte arrays to integers. + tests3 = { + b'': 0, + b'\x00': 0, + b'\x01': 1, + b'\x7f': 127, + b'\x80': 128, + b'\xff': 255, + b'\x01\x00': 256, + b'\x7f\xff': 32767, + b'\x80\x00': 32768, + b'\xff\xff': 65535, + b'\x01\x00\x00': 65536, + } + check(tests3, 'big', signed=False) + + # Convert integers to unsigned little-endian byte arrays. + tests4 = { + b'': 0, + b'\x00': 0, + b'\x01': 1, + b'\x7f': 127, + b'\x80': 128, + b'\xff': 255, + b'\x00\x01': 256, + b'\xff\x7f': 32767, + b'\x00\x80': 32768, + b'\xff\xff': 65535, + b'\x00\x00\x01': 65536, + } + check(tests4, 'little', signed=False) + + class myint(int): + pass + + self.assertTrue(type(myint.from_bytes(b'\x00', 'big')) is myint) + self.assertEqual(myint.from_bytes(b'\x01', 'big'), 1) + self.assertTrue( + type(myint.from_bytes(b'\x00', 'big', signed=False)) is myint) + self.assertEqual(myint.from_bytes(b'\x01', 'big', signed=False), 1) + self.assertTrue(type(myint.from_bytes(b'\x00', 'little')) is myint) + self.assertEqual(myint.from_bytes(b'\x01', 'little'), 1) + self.assertTrue(type(myint.from_bytes( + b'\x00', 'little', signed=False)) is myint) + self.assertEqual(myint.from_bytes(b'\x01', 'little', signed=False), 1) + self.assertEqual( + int.from_bytes([255, 0, 0], 'big', signed=True), -65536) + self.assertEqual( + int.from_bytes((255, 0, 0), 'big', signed=True), -65536) + self.assertEqual(int.from_bytes( + bytearray(b'\xff\x00\x00'), 'big', signed=True), -65536) + self.assertEqual(int.from_bytes( + bytearray(b'\xff\x00\x00'), 'big', signed=True), -65536) + self.assertEqual(int.from_bytes( + array.array('B', b'\xff\x00\x00'), 'big', signed=True), -65536) + self.assertEqual(int.from_bytes( + memoryview(b'\xff\x00\x00'), 'big', signed=True), -65536) + self.assertRaises(ValueError, int.from_bytes, [256], 'big') + self.assertRaises(ValueError, int.from_bytes, [0], 'big\x00') + self.assertRaises(ValueError, int.from_bytes, [0], 'little\x00') + self.assertRaises(TypeError, int.from_bytes, "", 'big') + self.assertRaises(TypeError, int.from_bytes, "\x00", 'big') + self.assertRaises(TypeError, int.from_bytes, 0, 'big') + self.assertRaises(TypeError, int.from_bytes, 0, 'big', True) + self.assertRaises(TypeError, myint.from_bytes, "", 'big') + self.assertRaises(TypeError, myint.from_bytes, "\x00", 'big') + self.assertRaises(TypeError, myint.from_bytes, 0, 'big') + self.assertRaises(TypeError, int.from_bytes, 0, 'big', True) def test_main(): diff --git a/Misc/NEWS b/Misc/NEWS index 5fd5775f13d..adf9fb3c21c 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -149,6 +149,9 @@ Core and Builtins - Issue #6687: PyBytes_FromObject() no longer accepts an integer as its argument to construct a null-initialized bytes object. +- Issue #1023290: Add from_bytes() and to_bytes() methods to integers. + These methods allow the conversion of integers to bytes, and vice-versa. + C-API ----- diff --git a/Objects/longobject.c b/Objects/longobject.c index 2e2eca8e45b..cfe7b5c7484 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -4296,6 +4296,201 @@ long_is_finite(PyObject *v) } #endif + +PyDoc_STRVAR(long_to_bytes_doc, +"int.to_bytes(length, byteorder, *, signed=False) -> bytes\n\ +\n\ +Return an array of bytes representing an integer.\n\ +\n\ +The integer is represented using length bytes. An OverflowError is\n\ +raised if the integer is not representable with the given number of\n\ +bytes.\n\ +\n\ +The byteorder argument determines the byte order used to represent the\n\ +integer. If byteorder is 'big', the most significant byte is at the\n\ +beginning of the byte array. If byteorder is 'little', the most\n\ +significant byte is at the end of the byte array. To request the native\n\ +byte order of the host system, use `sys.byteorder' as the byte order value.\n\ +\n\ +The signed keyword-only argument determines whether two's complement is\n\ +used to represent the integer. If signed is False and a negative integer\n\ +is given, an OverflowError is raised."); + +static PyObject * +long_to_bytes(PyLongObject *v, PyObject *args, PyObject *kwds) +{ + PyObject *byteorder_str; + PyObject *is_signed_obj = NULL; + Py_ssize_t length; + int little_endian; + int is_signed; + PyObject *bytes; + static PyObject *little_str = NULL, *big_str = NULL; + static char *kwlist[] = {"length", "byteorder", "signed", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "nO|O:to_bytes", kwlist, + &length, &byteorder_str, + &is_signed_obj)) + return NULL; + + if (args != NULL && Py_SIZE(args) > 2) { + PyErr_SetString(PyExc_TypeError, + "'signed' is a keyword-only argument"); + return NULL; + } + if (little_str == NULL) { + little_str = PyUnicode_InternFromString("little"); + big_str = PyUnicode_InternFromString("big"); + if (little_str == NULL || big_str == NULL) + return NULL; + } + + if (PyObject_RichCompareBool(byteorder_str, little_str, Py_EQ)) + little_endian = 1; + else if (PyObject_RichCompareBool(byteorder_str, big_str, Py_EQ)) + little_endian = 0; + else { + PyErr_SetString(PyExc_ValueError, + "byteorder must be either 'little' or 'big'"); + return NULL; + } + + if (is_signed_obj != NULL) { + int cmp = PyObject_IsTrue(is_signed_obj); + if (cmp < 0) + return NULL; + is_signed = cmp ? 1 : 0; + } + else { + /* If the signed argument was omitted, use False as the + default. */ + is_signed = 0; + } + + if (length < 0) { + PyErr_SetString(PyExc_ValueError, + "length argument must be non-negative"); + return NULL; + } + + bytes = PyBytes_FromStringAndSize(NULL, length); + if (bytes == NULL) + return NULL; + + if (_PyLong_AsByteArray(v, (unsigned char *)PyBytes_AS_STRING(bytes), + length, little_endian, is_signed) < 0) { + Py_DECREF(bytes); + return NULL; + } + + return bytes; +} + +PyDoc_STRVAR(long_from_bytes_doc, +"int.from_bytes(bytes, byteorder, *, signed=False) -> int\n\ +\n\ +Return the integer represented by the given array of bytes.\n\ +\n\ +The bytes argument must either support the buffer protocol or be an\n\ +iterable object producing bytes. Bytes and bytearray are examples of\n\ +built-in objects that support the buffer protocol.\n\ +\n\ +The byteorder argument determines the byte order used to represent the\n\ +integer. If byteorder is 'big', the most significant byte is at the\n\ +beginning of the byte array. If byteorder is 'little', the most\n\ +significant byte is at the end of the byte array. To request the native\n\ +byte order of the host system, use `sys.byteorder' as the byte order value.\n\ +\n\ +The signed keyword-only argument indicates whether two's complement is\n\ +used to represent the integer."); + +static PyObject * +long_from_bytes(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyObject *byteorder_str; + PyObject *is_signed_obj = NULL; + int little_endian; + int is_signed; + PyObject *obj; + PyObject *bytes; + PyObject *long_obj; + static PyObject *little_str = NULL, *big_str = NULL; + static char *kwlist[] = {"bytes", "byteorder", "signed", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|O:from_bytes", kwlist, + &obj, &byteorder_str, + &is_signed_obj)) + return NULL; + + if (args != NULL && Py_SIZE(args) > 2) { + PyErr_SetString(PyExc_TypeError, + "'signed' is a keyword-only argument"); + return NULL; + } + if (little_str == NULL) { + little_str = PyUnicode_InternFromString("little"); + big_str = PyUnicode_InternFromString("big"); + if (little_str == NULL || big_str == NULL) + return NULL; + } + + if (PyObject_RichCompareBool(byteorder_str, little_str, Py_EQ)) + little_endian = 1; + else if (PyObject_RichCompareBool(byteorder_str, big_str, Py_EQ)) + little_endian = 0; + else { + PyErr_SetString(PyExc_ValueError, + "byteorder must be either 'little' or 'big'"); + return NULL; + } + + if (is_signed_obj != NULL) { + int cmp = PyObject_IsTrue(is_signed_obj); + if (cmp < 0) + return NULL; + is_signed = cmp ? 1 : 0; + } + else { + /* If the signed argument was omitted, use False as the + default. */ + is_signed = 0; + } + + bytes = PyObject_Bytes(obj); + if (bytes == NULL) + return NULL; + + long_obj = _PyLong_FromByteArray( + (unsigned char *)PyBytes_AS_STRING(bytes), Py_SIZE(bytes), + little_endian, is_signed); + Py_DECREF(bytes); + + /* If from_bytes() was used on subclass, allocate new subclass + * instance, initialize it with decoded long value and return it. + */ + if (type != &PyLong_Type && PyType_IsSubtype(type, &PyLong_Type)) { + PyLongObject *newobj; + int i; + Py_ssize_t n = ABS(Py_SIZE(long_obj)); + + newobj = (PyLongObject *)type->tp_alloc(type, n); + if (newobj == NULL) { + Py_DECREF(long_obj); + return NULL; + } + assert(PyLong_Check(newobj)); + Py_SIZE(newobj) = Py_SIZE(long_obj); + for (i = 0; i < n; i++) { + newobj->ob_digit[i] = + ((PyLongObject *)long_obj)->ob_digit[i]; + } + Py_DECREF(long_obj); + return (PyObject *)newobj; + } + + return long_obj; +} + static PyMethodDef long_methods[] = { {"conjugate", (PyCFunction)long_long, METH_NOARGS, "Returns self, the complex conjugate of any int."}, @@ -4305,6 +4500,10 @@ static PyMethodDef long_methods[] = { {"is_finite", (PyCFunction)long_is_finite, METH_NOARGS, "Returns always True."}, #endif + {"to_bytes", (PyCFunction)long_to_bytes, + METH_VARARGS|METH_KEYWORDS, long_to_bytes_doc}, + {"from_bytes", (PyCFunction)long_from_bytes, + METH_VARARGS|METH_KEYWORDS|METH_CLASS, long_from_bytes_doc}, {"__trunc__", (PyCFunction)long_long, METH_NOARGS, "Truncating an Integral returns itself."}, {"__floor__", (PyCFunction)long_long, METH_NOARGS,