From c36c3789dee99b3e2d01ee47731b62200157ba16 Mon Sep 17 00:00:00 2001
From: Alexandre Vassalotti <alexandre@peadrop.com>
Date: Sat, 9 Jan 2010 20:35:09 +0000
Subject: [PATCH] Issue #1023290: Added API for the conversion of longs to
 bytes and vice-versa.

---
 Doc/library/stdtypes.rst |  63 +++++++++++
 Lib/test/test_long.py    | 232 +++++++++++++++++++++++++++++++++++++++
 Misc/NEWS                |   3 +
 Objects/longobject.c     | 199 +++++++++++++++++++++++++++++++++
 4 files changed, 497 insertions(+)

diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst
index d2fb5f9ce08..4e7ab3a798b 100644
--- a/Doc/library/stdtypes.rst
+++ b/Doc/library/stdtypes.rst
@@ -457,6 +457,69 @@ Additional Methods on Integer Types
 
     .. versionadded:: 3.1
 
+    .. method:: int.to_bytes(length, byteorder, [\*, signed=False])
+
+    Return an array of bytes representing an integer.
+
+        >>> (1024).to_bytes(2, byteorder='big')
+        b'\x04\x00'
+        >>> (1024).to_bytes(10, byteorder='big')
+        b'\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00'
+        >>> (-1024).to_bytes(10, byteorder='big', signed=True)
+        b'\xff\xff\xff\xff\xff\xff\xff\xff\xfc\x00'
+        >>> x = 1000
+        >>> x.to_bytes((x.bit_length() // 8) + 1, byteorder='little')
+        b'\xe8\x03'
+
+    The integer is represented using *length* bytes.  An :exc:`OverflowError`
+    is raised if the integer is not representable with the given number of
+    bytes.
+
+    The *byteorder* argument determines the byte order used to represent the
+    integer.  If *byteorder* is ``"big"``, the most significant byte is at the
+    beginning of the byte array.  If *byteorder* is ``"little"``, the most
+    significant byte is at the end of the byte array.  To request the native
+    byte order of the host system, use :data:`sys.byteorder` as the byte order
+    value.
+
+    The *signed* argument determines whether two's complement is used to
+    represent the integer.  If *signed* is ``False`` and a negative integer is
+    given, an :exc:`OverflowError` is raised. The default value for *signed*
+    is ``False``.
+
+    .. versionadded:: 3.2
+
+    .. classmethod:: int.from_bytes(bytes, byteorder, [\*, signed=False]])
+
+    Return the integer represented by the given array of bytes.
+
+        >>> int.from_bytes(b'\x00\x10', byteorder='big')
+        16
+        >>> int.from_bytes(b'\x00\x10', byteorder='little')
+        4096
+        >>> int.from_bytes(b'\xfc\x00', byteorder='big', signed=True)
+        -1024
+        >>> int.from_bytes(b'\xfc\x00', byteorder='big', signed=False)
+        64512
+        >>> int.from_bytes([255, 0, 0], byteorder='big')
+        16711680
+
+    The argument *bytes* must either support the buffer protocol or be an
+    iterable producing bytes. :class:`bytes` and :class:`bytearray` are
+    examples of built-in objects that support the buffer protocol.
+
+    The *byteorder* argument determines the byte order used to represent the
+    integer.  If *byteorder* is ``"big"``, the most significant byte is at the
+    beginning of the byte array.  If *byteorder* is ``"little"``, the most
+    significant byte is at the end of the byte array.  To request the native
+    byte order of the host system, use :data:`sys.byteorder` as the byte order
+    value.
+
+    The *signed* argument indicates whether two's complement is used to
+    represent the integer.
+
+    .. versionadded:: 3.2
+
 
 Additional Methods on Float
 ---------------------------
diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py
index 9974a5fa9f8..b4ab74f0847 100644
--- a/Lib/test/test_long.py
+++ b/Lib/test/test_long.py
@@ -4,6 +4,7 @@ import sys
 
 import random
 import math
+import array
 
 # Used for lazy formatting of failure messages
 class Frm(object):
@@ -900,6 +901,237 @@ class LongTest(unittest.TestCase):
         for e in bad_exponents:
             self.assertRaises(TypeError, round, 3, e)
 
+    def test_to_bytes(self):
+        def check(tests, byteorder, signed=False):
+            for test, expected in tests.items():
+                try:
+                    self.assertEqual(
+                        test.to_bytes(len(expected), byteorder, signed=signed),
+                        expected)
+                except Exception as err:
+                    raise AssertionError(
+                        "failed to convert {0} with byteorder={1} and signed={2}"
+                        .format(test, byteorder, signed)) from err
+
+        # Convert integers to signed big-endian byte arrays.
+        tests1 = {
+            0: b'\x00',
+            1: b'\x01',
+            -1: b'\xff',
+            -127: b'\x81',
+            -128: b'\x80',
+            -129: b'\xff\x7f',
+            127: b'\x7f',
+            129: b'\x00\x81',
+            -255: b'\xff\x01',
+            -256: b'\xff\x00',
+            255: b'\x00\xff',
+            256: b'\x01\x00',
+            32767: b'\x7f\xff',
+            -32768: b'\xff\x80\x00',
+            65535: b'\x00\xff\xff',
+            -65536: b'\xff\x00\x00',
+            -8388608: b'\x80\x00\x00'
+        }
+        check(tests1, 'big', signed=True)
+
+        # Convert integers to signed little-endian byte arrays.
+        tests2 = {
+            0: b'\x00',
+            1: b'\x01',
+            -1: b'\xff',
+            -127: b'\x81',
+            -128: b'\x80',
+            -129: b'\x7f\xff',
+            127: b'\x7f',
+            129: b'\x81\x00',
+            -255: b'\x01\xff',
+            -256: b'\x00\xff',
+            255: b'\xff\x00',
+            256: b'\x00\x01',
+            32767: b'\xff\x7f',
+            -32768: b'\x00\x80',
+            65535: b'\xff\xff\x00',
+            -65536: b'\x00\x00\xff',
+            -8388608: b'\x00\x00\x80'
+        }
+        check(tests2, 'little', signed=True)
+
+        # Convert integers to unsigned big-endian byte arrays.
+        tests3 = {
+            0: b'\x00',
+            1: b'\x01',
+            127: b'\x7f',
+            128: b'\x80',
+            255: b'\xff',
+            256: b'\x01\x00',
+            32767: b'\x7f\xff',
+            32768: b'\x80\x00',
+            65535: b'\xff\xff',
+            65536: b'\x01\x00\x00'
+        }
+        check(tests3, 'big', signed=False)
+
+        # Convert integers to unsigned little-endian byte arrays.
+        tests4 = {
+            0: b'\x00',
+            1: b'\x01',
+            127: b'\x7f',
+            128: b'\x80',
+            255: b'\xff',
+            256: b'\x00\x01',
+            32767: b'\xff\x7f',
+            32768: b'\x00\x80',
+            65535: b'\xff\xff',
+            65536: b'\x00\x00\x01'
+        }
+        check(tests4, 'little', signed=False)
+
+        self.assertRaises(OverflowError, (256).to_bytes, 1, 'big', signed=False)
+        self.assertRaises(OverflowError, (256).to_bytes, 1, 'big', signed=True)
+        self.assertRaises(OverflowError, (256).to_bytes, 1, 'little', signed=False)
+        self.assertRaises(OverflowError, (256).to_bytes, 1, 'little', signed=True)
+        self.assertRaises(OverflowError, (-1).to_bytes, 2, 'big', signed=False),
+        self.assertRaises(OverflowError, (-1).to_bytes, 2, 'little', signed=False)
+        self.assertEqual((0).to_bytes(0, 'big'), b'')
+        self.assertEqual((1).to_bytes(5, 'big'), b'\x00\x00\x00\x00\x01')
+        self.assertEqual((0).to_bytes(5, 'big'), b'\x00\x00\x00\x00\x00')
+        self.assertEqual((-1).to_bytes(5, 'big', signed=True),
+                         b'\xff\xff\xff\xff\xff')
+        self.assertRaises(OverflowError, (1).to_bytes, 0, 'big')
+
+    def test_from_bytes(self):
+        def check(tests, byteorder, signed=False):
+            for test, expected in tests.items():
+                try:
+                    self.assertEqual(
+                        int.from_bytes(test, byteorder, signed=signed),
+                        expected)
+                except Exception as err:
+                    raise AssertionError(
+                        "failed to convert {0} with byteorder={1!r} and signed={2}"
+                        .format(test, byteorder, signed)) from err
+
+        # Convert signed big-endian byte arrays to integers.
+        tests1 = {
+            b'': 0,
+            b'\x00': 0,
+            b'\x00\x00': 0,
+            b'\x01': 1,
+            b'\x00\x01': 1,
+            b'\xff': -1,
+            b'\xff\xff': -1,
+            b'\x81': -127,
+            b'\x80': -128,
+            b'\xff\x7f': -129,
+            b'\x7f': 127,
+            b'\x00\x81': 129,
+            b'\xff\x01': -255,
+            b'\xff\x00': -256,
+            b'\x00\xff': 255,
+            b'\x01\x00': 256,
+            b'\x7f\xff': 32767,
+            b'\x80\x00': -32768,
+            b'\x00\xff\xff': 65535,
+            b'\xff\x00\x00': -65536,
+            b'\x80\x00\x00': -8388608
+        }
+        check(tests1, 'big', signed=True)
+
+        # Convert signed little-endian byte arrays to integers.
+        tests2 = {
+            b'': 0,
+            b'\x00': 0,
+            b'\x00\x00': 0,
+            b'\x01': 1,
+            b'\x00\x01': 256,
+            b'\xff': -1,
+            b'\xff\xff': -1,
+            b'\x81': -127,
+            b'\x80': -128,
+            b'\x7f\xff': -129,
+            b'\x7f': 127,
+            b'\x81\x00': 129,
+            b'\x01\xff': -255,
+            b'\x00\xff': -256,
+            b'\xff\x00': 255,
+            b'\x00\x01': 256,
+            b'\xff\x7f': 32767,
+            b'\x00\x80': -32768,
+            b'\xff\xff\x00': 65535,
+            b'\x00\x00\xff': -65536,
+            b'\x00\x00\x80': -8388608
+        }
+        check(tests2, 'little', signed=True)
+
+        # Convert unsigned big-endian byte arrays to integers.
+        tests3 = {
+            b'': 0,
+            b'\x00': 0,
+            b'\x01': 1,
+            b'\x7f': 127,
+            b'\x80': 128,
+            b'\xff': 255,
+            b'\x01\x00': 256,
+            b'\x7f\xff': 32767,
+            b'\x80\x00': 32768,
+            b'\xff\xff': 65535,
+            b'\x01\x00\x00': 65536,
+        }
+        check(tests3, 'big', signed=False)
+
+        # Convert integers to unsigned little-endian byte arrays.
+        tests4 = {
+            b'': 0,
+            b'\x00': 0,
+            b'\x01': 1,
+            b'\x7f': 127,
+            b'\x80': 128,
+            b'\xff': 255,
+            b'\x00\x01': 256,
+            b'\xff\x7f': 32767,
+            b'\x00\x80': 32768,
+            b'\xff\xff': 65535,
+            b'\x00\x00\x01': 65536,
+        }
+        check(tests4, 'little', signed=False)
+
+        class myint(int):
+            pass
+
+        self.assertTrue(type(myint.from_bytes(b'\x00', 'big')) is myint)
+        self.assertEqual(myint.from_bytes(b'\x01', 'big'), 1)
+        self.assertTrue(
+            type(myint.from_bytes(b'\x00', 'big', signed=False)) is myint)
+        self.assertEqual(myint.from_bytes(b'\x01', 'big', signed=False), 1)
+        self.assertTrue(type(myint.from_bytes(b'\x00', 'little')) is myint)
+        self.assertEqual(myint.from_bytes(b'\x01', 'little'), 1)
+        self.assertTrue(type(myint.from_bytes(
+            b'\x00', 'little', signed=False)) is myint)
+        self.assertEqual(myint.from_bytes(b'\x01', 'little', signed=False), 1)
+        self.assertEqual(
+            int.from_bytes([255, 0, 0], 'big', signed=True), -65536)
+        self.assertEqual(
+            int.from_bytes((255, 0, 0), 'big', signed=True), -65536)
+        self.assertEqual(int.from_bytes(
+            bytearray(b'\xff\x00\x00'), 'big', signed=True), -65536)
+        self.assertEqual(int.from_bytes(
+            bytearray(b'\xff\x00\x00'), 'big', signed=True), -65536)
+        self.assertEqual(int.from_bytes(
+            array.array('B', b'\xff\x00\x00'), 'big', signed=True), -65536)
+        self.assertEqual(int.from_bytes(
+            memoryview(b'\xff\x00\x00'), 'big', signed=True), -65536)
+        self.assertRaises(ValueError, int.from_bytes, [256], 'big')
+        self.assertRaises(ValueError, int.from_bytes, [0], 'big\x00')
+        self.assertRaises(ValueError, int.from_bytes, [0], 'little\x00')
+        self.assertRaises(TypeError, int.from_bytes, "", 'big')
+        self.assertRaises(TypeError, int.from_bytes, "\x00", 'big')
+        self.assertRaises(TypeError, int.from_bytes, 0, 'big')
+        self.assertRaises(TypeError, int.from_bytes, 0, 'big', True)
+        self.assertRaises(TypeError, myint.from_bytes, "", 'big')
+        self.assertRaises(TypeError, myint.from_bytes, "\x00", 'big')
+        self.assertRaises(TypeError, myint.from_bytes, 0, 'big')
+        self.assertRaises(TypeError, int.from_bytes, 0, 'big', True)
 
 
 def test_main():
diff --git a/Misc/NEWS b/Misc/NEWS
index 5fd5775f13d..adf9fb3c21c 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -149,6 +149,9 @@ Core and Builtins
 - Issue #6687: PyBytes_FromObject() no longer accepts an integer as its
   argument to construct a null-initialized bytes object.
 
+- Issue #1023290: Add from_bytes() and to_bytes() methods to integers.
+  These methods allow the conversion of integers to bytes, and vice-versa.
+
 C-API
 -----
 
diff --git a/Objects/longobject.c b/Objects/longobject.c
index 2e2eca8e45b..cfe7b5c7484 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -4296,6 +4296,201 @@ long_is_finite(PyObject *v)
 }
 #endif
 
+
+PyDoc_STRVAR(long_to_bytes_doc,
+"int.to_bytes(length, byteorder, *, signed=False) -> bytes\n\
+\n\
+Return an array of bytes representing an integer.\n\
+\n\
+The integer is represented using length bytes.	An OverflowError is\n\
+raised if the integer is not representable with the given number of\n\
+bytes.\n\
+\n\
+The byteorder argument determines the byte order used to represent the\n\
+integer.  If byteorder is 'big', the most significant byte is at the\n\
+beginning of the byte array.  If byteorder is 'little', the most\n\
+significant byte is at the end of the byte array.  To request the native\n\
+byte order of the host system, use `sys.byteorder' as the byte order value.\n\
+\n\
+The signed keyword-only argument determines whether two's complement is\n\
+used to represent the integer.	If signed is False and a negative integer\n\
+is given, an OverflowError is raised.");
+
+static PyObject *
+long_to_bytes(PyLongObject *v, PyObject *args, PyObject *kwds)
+{
+	PyObject *byteorder_str;
+	PyObject *is_signed_obj = NULL;
+	Py_ssize_t length;
+	int little_endian;
+	int is_signed;
+	PyObject *bytes;
+	static PyObject *little_str = NULL, *big_str = NULL;
+	static char *kwlist[] = {"length", "byteorder", "signed", NULL};
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwds, "nO|O:to_bytes", kwlist,
+					 &length, &byteorder_str,
+					 &is_signed_obj))
+		return NULL;
+
+	if (args != NULL && Py_SIZE(args) > 2) {
+		PyErr_SetString(PyExc_TypeError,
+			"'signed' is a keyword-only argument");
+		return NULL;
+	}
+	if (little_str == NULL) {
+		little_str = PyUnicode_InternFromString("little");
+		big_str = PyUnicode_InternFromString("big");
+		if (little_str == NULL || big_str == NULL)
+			return NULL;
+	}
+
+	if (PyObject_RichCompareBool(byteorder_str, little_str, Py_EQ))
+		little_endian = 1;
+	else if (PyObject_RichCompareBool(byteorder_str, big_str, Py_EQ))
+		little_endian = 0;
+	else {
+		PyErr_SetString(PyExc_ValueError,
+			"byteorder must be either 'little' or 'big'");
+		return NULL;
+	}
+
+	if (is_signed_obj != NULL) {
+		int cmp = PyObject_IsTrue(is_signed_obj);
+		if (cmp < 0)
+			return NULL;
+		is_signed = cmp ? 1 : 0;
+	}
+	else {
+		/* If the signed argument was omitted, use False as the
+		   default. */
+		is_signed = 0;
+	}
+
+	if (length < 0) {
+		PyErr_SetString(PyExc_ValueError, 
+				"length argument must be non-negative");
+		return NULL;
+	}
+
+	bytes = PyBytes_FromStringAndSize(NULL, length);
+	if (bytes == NULL)
+		return NULL;
+
+	if (_PyLong_AsByteArray(v, (unsigned char *)PyBytes_AS_STRING(bytes),
+				length, little_endian, is_signed) < 0) {
+		Py_DECREF(bytes);
+		return NULL;
+	}
+
+	return bytes;
+}
+
+PyDoc_STRVAR(long_from_bytes_doc,
+"int.from_bytes(bytes, byteorder, *, signed=False) -> int\n\
+\n\
+Return the integer represented by the given array of bytes.\n\
+\n\
+The bytes argument must either support the buffer protocol or be an\n\
+iterable object producing bytes.  Bytes and bytearray are examples of\n\
+built-in objects that support the buffer protocol.\n\
+\n\
+The byteorder argument determines the byte order used to represent the\n\
+integer.  If byteorder is 'big', the most significant byte is at the\n\
+beginning of the byte array.  If byteorder is 'little', the most\n\
+significant byte is at the end of the byte array.  To request the native\n\
+byte order of the host system, use `sys.byteorder' as the byte order value.\n\
+\n\
+The signed keyword-only argument indicates whether two's complement is\n\
+used to represent the integer.");
+
+static PyObject *
+long_from_bytes(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+	PyObject *byteorder_str;
+	PyObject *is_signed_obj = NULL;
+	int little_endian;
+	int is_signed;
+	PyObject *obj;
+	PyObject *bytes;
+	PyObject *long_obj;
+	static PyObject *little_str = NULL, *big_str = NULL;
+	static char *kwlist[] = {"bytes", "byteorder", "signed", NULL};
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|O:from_bytes", kwlist,
+					 &obj, &byteorder_str,
+					 &is_signed_obj))
+		return NULL;
+
+	if (args != NULL && Py_SIZE(args) > 2) {
+		PyErr_SetString(PyExc_TypeError,
+			"'signed' is a keyword-only argument");
+		return NULL;
+	}
+	if (little_str == NULL) {
+		little_str = PyUnicode_InternFromString("little");
+		big_str = PyUnicode_InternFromString("big");
+		if (little_str == NULL || big_str == NULL)
+			return NULL;
+	}
+
+	if (PyObject_RichCompareBool(byteorder_str, little_str, Py_EQ))
+		little_endian = 1;
+	else if (PyObject_RichCompareBool(byteorder_str, big_str, Py_EQ))
+		little_endian = 0;
+	else {
+		PyErr_SetString(PyExc_ValueError,
+			"byteorder must be either 'little' or 'big'");
+		return NULL;
+	}
+
+	if (is_signed_obj != NULL) {
+		int cmp = PyObject_IsTrue(is_signed_obj);
+		if (cmp < 0)
+			return NULL;
+		is_signed = cmp ? 1 : 0;
+	}
+	else {
+		/* If the signed argument was omitted, use False as the
+		   default. */
+		is_signed = 0;
+	}
+
+	bytes = PyObject_Bytes(obj);
+	if (bytes == NULL)
+		return NULL;
+
+	long_obj = _PyLong_FromByteArray(
+		(unsigned char *)PyBytes_AS_STRING(bytes), Py_SIZE(bytes),
+		little_endian, is_signed);
+	Py_DECREF(bytes);
+
+	/* If from_bytes() was used on subclass, allocate new subclass
+	 * instance, initialize it with decoded long value and return it.
+	 */
+	if (type != &PyLong_Type && PyType_IsSubtype(type, &PyLong_Type)) {
+		PyLongObject *newobj;
+		int i;
+		Py_ssize_t n = ABS(Py_SIZE(long_obj));
+
+		newobj = (PyLongObject *)type->tp_alloc(type, n);
+		if (newobj == NULL) {
+			Py_DECREF(long_obj);
+			return NULL;
+		}
+		assert(PyLong_Check(newobj));
+		Py_SIZE(newobj) = Py_SIZE(long_obj);
+		for (i = 0; i < n; i++) {
+			newobj->ob_digit[i] = 
+				((PyLongObject *)long_obj)->ob_digit[i];
+		}
+		Py_DECREF(long_obj);
+		return (PyObject *)newobj;
+	}
+
+	return long_obj;
+}
+
 static PyMethodDef long_methods[] = {
 	{"conjugate",	(PyCFunction)long_long,	METH_NOARGS,
 	 "Returns self, the complex conjugate of any int."},
@@ -4305,6 +4500,10 @@ static PyMethodDef long_methods[] = {
 	{"is_finite",	(PyCFunction)long_is_finite,	METH_NOARGS,
 	 "Returns always True."},
 #endif
+	{"to_bytes",	(PyCFunction)long_to_bytes,
+	 METH_VARARGS|METH_KEYWORDS, long_to_bytes_doc},
+	{"from_bytes",	(PyCFunction)long_from_bytes,
+	 METH_VARARGS|METH_KEYWORDS|METH_CLASS, long_from_bytes_doc},
 	{"__trunc__",	(PyCFunction)long_long,	METH_NOARGS,
          "Truncating an Integral returns itself."},
 	{"__floor__",	(PyCFunction)long_long,	METH_NOARGS,