From 7861dfd26a41e40c2b4361eb0bb1356b9b4a064b Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Mon, 12 Feb 2024 20:13:13 +0000 Subject: [PATCH] gh-111140: Adds PyLong_AsNativeBytes and PyLong_FromNative[Unsigned]Bytes functions (GH-114886) --- Doc/c-api/long.rst | 66 ++++++ Doc/whatsnew/3.13.rst | 7 +- Include/cpython/longobject.h | 36 ++- Lib/test/test_capi/test_long.py | 145 ++++++++++++ ...-02-05-17-11-15.gh-issue-111140.WMEjid.rst | 2 + Modules/_io/textio.c | 2 +- Modules/_pickle.c | 3 +- Modules/_randommodule.c | 3 +- Modules/_sqlite/util.c | 2 +- Modules/_struct.c | 20 +- Modules/_testcapi/long.c | 48 +++- Modules/_tkinter.c | 3 +- Modules/cjkcodecs/multibytecodec.c | 6 +- Objects/longobject.c | 216 +++++++++++++++++- 14 files changed, 533 insertions(+), 26 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2024-02-05-17-11-15.gh-issue-111140.WMEjid.rst diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index f42e23db89a..c39823e5e67 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -113,6 +113,28 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. retrieved from the resulting value using :c:func:`PyLong_AsVoidPtr`. +.. c:function:: PyObject* PyLong_FromNativeBytes(const void* buffer, size_t n_bytes, int endianness) + + Create a Python integer from the value contained in the first *n_bytes* of + *buffer*, interpreted as a two's-complement signed number. + + *endianness* may be passed ``-1`` for the native endian that CPython was + compiled with, or else ``0`` for big endian and ``1`` for little. + + .. versionadded:: 3.13 + + +.. c:function:: PyObject* PyLong_FromUnsignedNativeBytes(const void* buffer, size_t n_bytes, int endianness) + + Create a Python integer from the value contained in the first *n_bytes* of + *buffer*, interpreted as an unsigned number. + + *endianness* may be passed ``-1`` for the native endian that CPython was + compiled with, or else ``0`` for big endian and ``1`` for little. + + .. versionadded:: 3.13 + + .. XXX alias PyLong_AS_LONG (for now) .. c:function:: long PyLong_AsLong(PyObject *obj) @@ -332,6 +354,50 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. Returns ``NULL`` on error. Use :c:func:`PyErr_Occurred` to disambiguate. +.. c:function:: Py_ssize_t PyLong_AsNativeBytes(PyObject *pylong, void* buffer, Py_ssize_t n_bytes, int endianness) + + Copy the Python integer value to a native *buffer* of size *n_bytes*:: + + int value; + Py_ssize_t bytes = PyLong_CopyBits(v, &value, sizeof(value), -1); + if (bytes < 0) { + // Error occurred + return NULL; + } + else if (bytes > sizeof(value)) { + // Overflow occurred, but 'value' contains as much as could fit + } + + *endianness* may be passed ``-1`` for the native endian that CPython was + compiled with, or ``0`` for big endian and ``1`` for little. + + Return ``-1`` with an exception raised if *pylong* cannot be interpreted as + an integer. Otherwise, return the size of the buffer required to store the + value. If this is equal to or less than *n_bytes*, the entire value was + copied. + + Unless an exception is raised, all *n_bytes* of the buffer will be written + with as much of the value as can fit. This allows the caller to ignore all + non-negative results if the intent is to match the typical behavior of a + C-style downcast. + + Values are always copied as twos-complement, and sufficient size will be + requested for a sign bit. For example, this may cause an value that fits into + 8 bytes when treated as unsigned to request 9 bytes, even though all eight + bytes were copied into the buffer. What has been omitted is the zero sign + bit, which is redundant when the intention is to treat the value as unsigned. + + Passing *n_bytes* of zero will always return the requested buffer size. + + .. note:: + + When the value does not fit in the provided buffer, the requested size + returned from the function may be larger than necessary. Passing 0 to this + function is not an accurate way to determine the bit length of a value. + + .. versionadded:: 3.13 + + .. c:function:: int PyUnstable_Long_IsCompact(const PyLongObject* op) Return 1 if *op* is compact, 0 otherwise. diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 191657061f7..b96720df0a2 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -587,6 +587,7 @@ Tier 2 IR by Mark Shannon and Guido van Rossum. Tier 2 optimizer by Ken Jin.) + Deprecated ========== @@ -1526,6 +1527,11 @@ New Features (Contributed by Victor Stinner and Petr Viktorin in :gh:`110850`.) +* Add :c:func:`PyLong_AsNativeBytes`, :c:func:`PyLong_FromNativeBytes` and + :c:func:`PyLong_FromUnsignedNativeBytes` functions to simplify converting + between native integer types and Python :class:`int` objects. + (Contributed by Steve Dower in :gh:`111140`.) + Porting to Python 3.13 ---------------------- @@ -1585,7 +1591,6 @@ Porting to Python 3.13 platforms, the ``HAVE_STDDEF_H`` macro is only defined on Windows. (Contributed by Victor Stinner in :gh:`108765`.) - Deprecated ---------- diff --git a/Include/cpython/longobject.h b/Include/cpython/longobject.h index fd1be29ed39..07251db6bcc 100644 --- a/Include/cpython/longobject.h +++ b/Include/cpython/longobject.h @@ -4,6 +4,40 @@ PyAPI_FUNC(PyObject*) PyLong_FromUnicodeObject(PyObject *u, int base); +/* PyLong_AsNativeBytes: Copy the integer value to a native variable. + buffer points to the first byte of the variable. + n_bytes is the number of bytes available in the buffer. Pass 0 to request + the required size for the value. + endianness is -1 for native endian, 0 for big endian or 1 for little. + Big endian mode will write the most significant byte into the address + directly referenced by buffer; little endian will write the least significant + byte into that address. + + If an exception is raised, returns a negative value. + Otherwise, returns the number of bytes that are required to store the value. + To check that the full value is represented, ensure that the return value is + equal or less than n_bytes. + All n_bytes are guaranteed to be written (unless an exception occurs), and + so ignoring a positive return value is the equivalent of a downcast in C. + In cases where the full value could not be represented, the returned value + may be larger than necessary - this function is not an accurate way to + calculate the bit length of an integer object. + */ +PyAPI_FUNC(Py_ssize_t) PyLong_AsNativeBytes(PyObject* v, void* buffer, + Py_ssize_t n_bytes, int endianness); + +/* PyLong_FromNativeBytes: Create an int value from a native integer + n_bytes is the number of bytes to read from the buffer. Passing 0 will + always produce the zero int. + PyLong_FromUnsignedNativeBytes always produces a non-negative int. + endianness is -1 for native endian, 0 for big endian or 1 for little. + + Returns the int object, or NULL with an exception set. */ +PyAPI_FUNC(PyObject*) PyLong_FromNativeBytes(const void* buffer, size_t n_bytes, + int endianness); +PyAPI_FUNC(PyObject*) PyLong_FromUnsignedNativeBytes(const void* buffer, + size_t n_bytes, int endianness); + PyAPI_FUNC(int) PyUnstable_Long_IsCompact(const PyLongObject* op); PyAPI_FUNC(Py_ssize_t) PyUnstable_Long_CompactValue(const PyLongObject* op); @@ -50,7 +84,7 @@ PyAPI_FUNC(PyObject *) _PyLong_FromByteArray( */ PyAPI_FUNC(int) _PyLong_AsByteArray(PyLongObject* v, unsigned char* bytes, size_t n, - int little_endian, int is_signed); + int little_endian, int is_signed, int with_exceptions); /* For use by the gcd function in mathmodule.c */ PyAPI_FUNC(PyObject *) _PyLong_GCD(PyObject *, PyObject *); diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index 8e3ef25d1ff..fc82cbfa66e 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -1,5 +1,6 @@ import unittest import sys +import test.support as support from test.support import import_helper @@ -423,6 +424,150 @@ class LongTests(unittest.TestCase): self.assertRaises(OverflowError, asvoidptr, -2**1000) # CRASHES asvoidptr(NULL) + def test_long_asnativebytes(self): + import math + from _testcapi import ( + pylong_asnativebytes as asnativebytes, + SIZE_MAX, + ) + + # Abbreviate sizeof(Py_ssize_t) to SZ because we use it a lot + SZ = int(math.ceil(math.log(SIZE_MAX + 1) / math.log(2)) / 8) + MAX_SSIZE = 2 ** (SZ * 8 - 1) - 1 + MAX_USIZE = 2 ** (SZ * 8) - 1 + if support.verbose: + print(f"SIZEOF_SIZE={SZ}\n{MAX_SSIZE=:016X}\n{MAX_USIZE=:016X}") + + # These tests check that the requested buffer size is correct + for v, expect in [ + (0, SZ), + (512, SZ), + (-512, SZ), + (MAX_SSIZE, SZ), + (MAX_USIZE, SZ + 1), + (-MAX_SSIZE, SZ), + (-MAX_USIZE, SZ + 1), + (2**255-1, 32), + (-(2**255-1), 32), + (2**256-1, 33), + (-(2**256-1), 33), + ]: + with self.subTest(f"sizeof-{v:X}"): + buffer = bytearray(1) + self.assertEqual(expect, asnativebytes(v, buffer, 0, -1), + "PyLong_AsNativeBytes(v, NULL, 0, -1)") + # Also check via the __index__ path + self.assertEqual(expect, asnativebytes(Index(v), buffer, 0, -1), + "PyLong_AsNativeBytes(Index(v), NULL, 0, -1)") + + # We request as many bytes as `expect_be` contains, and always check + # the result (both big and little endian). We check the return value + # independently, since the buffer should always be filled correctly even + # if we need more bytes + for v, expect_be, expect_n in [ + (0, b'\x00', 1), + (0, b'\x00' * 2, 2), + (0, b'\x00' * 8, min(8, SZ)), + (1, b'\x01', 1), + (1, b'\x00' * 10 + b'\x01', min(11, SZ)), + (42, b'\x2a', 1), + (42, b'\x00' * 10 + b'\x2a', min(11, SZ)), + (-1, b'\xff', 1), + (-1, b'\xff' * 10, min(11, SZ)), + (-42, b'\xd6', 1), + (-42, b'\xff' * 10 + b'\xd6', min(11, SZ)), + # Extracts 255 into a single byte, but requests sizeof(Py_ssize_t) + (255, b'\xff', SZ), + (255, b'\x00\xff', 2), + (256, b'\x01\x00', 2), + # Extracts successfully (unsigned), but requests 9 bytes + (2**63, b'\x80' + b'\x00' * 7, 9), + # "Extracts", but requests 9 bytes + (-2**63, b'\x80' + b'\x00' * 7, 9), + (2**63, b'\x00\x80' + b'\x00' * 7, 9), + (-2**63, b'\xff\x80' + b'\x00' * 7, 9), + + (2**255-1, b'\x7f' + b'\xff' * 31, 32), + (-(2**255-1), b'\x80' + b'\x00' * 30 + b'\x01', 32), + # Request extra bytes, but result says we only needed 32 + (-(2**255-1), b'\xff\x80' + b'\x00' * 30 + b'\x01', 32), + (-(2**255-1), b'\xff\xff\x80' + b'\x00' * 30 + b'\x01', 32), + + # Extracting 256 bits of integer will request 33 bytes, but still + # copy as many bits as possible into the buffer. So we *can* copy + # into a 32-byte buffer, though negative number may be unrecoverable + (2**256-1, b'\xff' * 32, 33), + (2**256-1, b'\x00' + b'\xff' * 32, 33), + (-(2**256-1), b'\x00' * 31 + b'\x01', 33), + (-(2**256-1), b'\xff' + b'\x00' * 31 + b'\x01', 33), + (-(2**256-1), b'\xff\xff' + b'\x00' * 31 + b'\x01', 33), + + # The classic "Windows HRESULT as negative number" case + # HRESULT hr; + # PyLong_CopyBits(<-2147467259>, &hr, sizeof(HRESULT)) + # assert(hr == E_FAIL) + (-2147467259, b'\x80\x00\x40\x05', 4), + ]: + with self.subTest(f"{v:X}-{len(expect_be)}bytes"): + n = len(expect_be) + buffer = bytearray(n) + expect_le = expect_be[::-1] + + self.assertEqual(expect_n, asnativebytes(v, buffer, n, 0), + f"PyLong_AsNativeBytes(v, buffer, {n}, )") + self.assertEqual(expect_be, buffer[:n], "") + self.assertEqual(expect_n, asnativebytes(v, buffer, n, 1), + f"PyLong_AsNativeBytes(v, buffer, {n}, )") + self.assertEqual(expect_le, buffer[:n], "") + + # Check a few error conditions. These are validated in code, but are + # unspecified in docs, so if we make changes to the implementation, it's + # fine to just update these tests rather than preserve the behaviour. + with self.assertRaises(SystemError): + asnativebytes(1, buffer, 0, 2) + with self.assertRaises(TypeError): + asnativebytes('not a number', buffer, 0, -1) + + def test_long_fromnativebytes(self): + import math + from _testcapi import ( + pylong_fromnativebytes as fromnativebytes, + SIZE_MAX, + ) + + # Abbreviate sizeof(Py_ssize_t) to SZ because we use it a lot + SZ = int(math.ceil(math.log(SIZE_MAX + 1) / math.log(2)) / 8) + MAX_SSIZE = 2 ** (SZ * 8 - 1) - 1 + MAX_USIZE = 2 ** (SZ * 8) - 1 + + for v_be, expect_s, expect_u in [ + (b'\x00', 0, 0), + (b'\x01', 1, 1), + (b'\xff', -1, 255), + (b'\x00\xff', 255, 255), + (b'\xff\xff', -1, 65535), + ]: + with self.subTest(f"{expect_s}-{expect_u:X}-{len(v_be)}bytes"): + n = len(v_be) + v_le = v_be[::-1] + + self.assertEqual(expect_s, fromnativebytes(v_be, n, 0, 1), + f"PyLong_FromNativeBytes(buffer, {n}, )") + self.assertEqual(expect_s, fromnativebytes(v_le, n, 1, 1), + f"PyLong_FromNativeBytes(buffer, {n}, )") + self.assertEqual(expect_u, fromnativebytes(v_be, n, 0, 0), + f"PyLong_FromUnsignedNativeBytes(buffer, {n}, )") + self.assertEqual(expect_u, fromnativebytes(v_le, n, 1, 0), + f"PyLong_FromUnsignedNativeBytes(buffer, {n}, )") + + # Check native endian when the result would be the same either + # way and we can test it. + if v_be == v_le: + self.assertEqual(expect_s, fromnativebytes(v_be, n, -1, 1), + f"PyLong_FromNativeBytes(buffer, {n}, )") + self.assertEqual(expect_u, fromnativebytes(v_be, n, -1, 0), + f"PyLong_FromUnsignedNativeBytes(buffer, {n}, )") + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/C API/2024-02-05-17-11-15.gh-issue-111140.WMEjid.rst b/Misc/NEWS.d/next/C API/2024-02-05-17-11-15.gh-issue-111140.WMEjid.rst new file mode 100644 index 00000000000..a8aa191b5eb --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-02-05-17-11-15.gh-issue-111140.WMEjid.rst @@ -0,0 +1,2 @@ +Adds :c:func:`PyLong_AsNativeBytes`, :c:func:`PyLong_FromNativeBytes` and +:c:func:`PyLong_FromUnsignedNativeBytes` functions. diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index d794af8de2b..a3239ec0f52 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -2393,7 +2393,7 @@ textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj) return -1; if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer), - PY_LITTLE_ENDIAN, 0) < 0) { + PY_LITTLE_ENDIAN, 0, 1) < 0) { Py_DECREF(cookieLong); return -1; } diff --git a/Modules/_pickle.c b/Modules/_pickle.c index f210c0ca205..0d832611681 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -2162,7 +2162,8 @@ save_long(PicklerObject *self, PyObject *obj) pdata = (unsigned char *)PyBytes_AS_STRING(repr); i = _PyLong_AsByteArray((PyLongObject *)obj, pdata, nbytes, - 1 /* little endian */ , 1 /* signed */ ); + 1 /* little endian */ , 1 /* signed */ , + 1 /* with exceptions */); if (i < 0) goto error; /* If the int is negative, this may be a byte more than diff --git a/Modules/_randommodule.c b/Modules/_randommodule.c index 5481ed9b348..4463157d622 100644 --- a/Modules/_randommodule.c +++ b/Modules/_randommodule.c @@ -342,7 +342,8 @@ random_seed(RandomObject *self, PyObject *arg) res = _PyLong_AsByteArray((PyLongObject *)n, (unsigned char *)key, keyused * 4, PY_LITTLE_ENDIAN, - 0); /* unsigned */ + 0, /* unsigned */ + 1); /* with exceptions */ if (res == -1) { goto Done; } diff --git a/Modules/_sqlite/util.c b/Modules/_sqlite/util.c index 833a666301d..9e8613ef679 100644 --- a/Modules/_sqlite/util.c +++ b/Modules/_sqlite/util.c @@ -162,7 +162,7 @@ _pysqlite_long_as_int64(PyObject * py_val) sqlite_int64 int64val; if (_PyLong_AsByteArray((PyLongObject *)py_val, (unsigned char *)&int64val, sizeof(int64val), - IS_LITTLE_ENDIAN, 1 /* signed */) >= 0) { + IS_LITTLE_ENDIAN, 1 /* signed */, 0) >= 0) { return int64val; } } diff --git a/Modules/_struct.c b/Modules/_struct.c index bd16fa89f18..fa2cd37e003 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -1000,9 +1000,10 @@ bp_longlong(_structmodulestate *state, char *p, PyObject *v, const formatdef *f) (unsigned char *)p, 8, 0, /* little_endian */ - 1 /* signed */); + 1, /* signed */ + 0 /* !with_exceptions */); Py_DECREF(v); - if (res == -1 && PyErr_Occurred()) { + if (res < 0) { PyErr_Format(state->StructError, "'%c' format requires %lld <= number <= %lld", f->format, @@ -1024,9 +1025,10 @@ bp_ulonglong(_structmodulestate *state, char *p, PyObject *v, const formatdef *f (unsigned char *)p, 8, 0, /* little_endian */ - 0 /* signed */); + 0, /* signed */ + 0 /* !with_exceptions */); Py_DECREF(v); - if (res == -1 && PyErr_Occurred()) { + if (res < 0) { PyErr_Format(state->StructError, "'%c' format requires 0 <= number <= %llu", f->format, @@ -1260,9 +1262,10 @@ lp_longlong(_structmodulestate *state, char *p, PyObject *v, const formatdef *f) (unsigned char *)p, 8, 1, /* little_endian */ - 1 /* signed */); + 1, /* signed */ + 0 /* !with_exceptions */); Py_DECREF(v); - if (res == -1 && PyErr_Occurred()) { + if (res < 0) { PyErr_Format(state->StructError, "'%c' format requires %lld <= number <= %lld", f->format, @@ -1284,9 +1287,10 @@ lp_ulonglong(_structmodulestate *state, char *p, PyObject *v, const formatdef *f (unsigned char *)p, 8, 1, /* little_endian */ - 0 /* signed */); + 0, /* signed */ + 0 /* !with_exceptions */); Py_DECREF(v); - if (res == -1 && PyErr_Occurred()) { + if (res < 0) { PyErr_Format(state->StructError, "'%c' format requires 0 <= number <= %llu", f->format, diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c index 32ad8d32ab8..dc21cf9f475 100644 --- a/Modules/_testcapi/long.c +++ b/Modules/_testcapi/long.c @@ -776,6 +776,51 @@ pylong_asvoidptr(PyObject *module, PyObject *arg) return Py_NewRef((PyObject *)value); } +static PyObject * +pylong_asnativebytes(PyObject *module, PyObject *args) +{ + PyObject *v; + Py_buffer buffer; + Py_ssize_t n, endianness; + if (!PyArg_ParseTuple(args, "Ow*nn", &v, &buffer, &n, &endianness)) { + return NULL; + } + if (buffer.readonly) { + PyErr_SetString(PyExc_TypeError, "buffer must be writable"); + PyBuffer_Release(&buffer); + return NULL; + } + if (buffer.len < n) { + PyErr_SetString(PyExc_ValueError, "buffer must be at least 'n' bytes"); + PyBuffer_Release(&buffer); + return NULL; + } + Py_ssize_t res = PyLong_AsNativeBytes(v, buffer.buf, n, (int)endianness); + PyBuffer_Release(&buffer); + return res >= 0 ? PyLong_FromSsize_t(res) : NULL; +} + +static PyObject * +pylong_fromnativebytes(PyObject *module, PyObject *args) +{ + Py_buffer buffer; + Py_ssize_t n, endianness, signed_; + if (!PyArg_ParseTuple(args, "y*nnn", &buffer, &n, &endianness, &signed_)) { + return NULL; + } + if (buffer.len < n) { + PyErr_SetString(PyExc_ValueError, "buffer must be at least 'n' bytes"); + PyBuffer_Release(&buffer); + return NULL; + } + PyObject *res = signed_ + ? PyLong_FromNativeBytes(buffer.buf, n, (int)endianness) + : PyLong_FromUnsignedNativeBytes(buffer.buf, n, (int)endianness); + PyBuffer_Release(&buffer); + return res; +} + + static PyMethodDef test_methods[] = { _TESTCAPI_TEST_LONG_AND_OVERFLOW_METHODDEF _TESTCAPI_TEST_LONG_API_METHODDEF @@ -804,6 +849,8 @@ static PyMethodDef test_methods[] = { {"pylong_as_size_t", pylong_as_size_t, METH_O}, {"pylong_asdouble", pylong_asdouble, METH_O}, {"pylong_asvoidptr", pylong_asvoidptr, METH_O}, + {"pylong_asnativebytes", pylong_asnativebytes, METH_VARARGS}, + {"pylong_fromnativebytes", pylong_fromnativebytes, METH_VARARGS}, {NULL}, }; @@ -813,6 +860,5 @@ _PyTestCapi_Init_Long(PyObject *mod) if (PyModule_AddFunctions(mod, test_methods) < 0) { return -1; } - return 0; } diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c index f6181168a85..e3789867dc0 100644 --- a/Modules/_tkinter.c +++ b/Modules/_tkinter.c @@ -926,7 +926,8 @@ AsObj(PyObject *value) (unsigned char *)(void *)&wideValue, sizeof(wideValue), PY_LITTLE_ENDIAN, - /* signed */ 1) == 0) { + /* signed */ 1, + /* with_exceptions */ 1) == 0) { return Tcl_NewWideIntObj(wideValue); } PyErr_Clear(); diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c index 5d3c16a9842..2125da43796 100644 --- a/Modules/cjkcodecs/multibytecodec.c +++ b/Modules/cjkcodecs/multibytecodec.c @@ -973,7 +973,8 @@ _multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEn if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes), 1 /* little-endian */ , - 0 /* unsigned */ ) < 0) { + 0 /* unsigned */ , + 1 /* with_exceptions */) < 0) { goto errorexit; } @@ -1255,7 +1256,8 @@ _multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDe if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes), 1 /* little-endian */ , - 0 /* unsigned */ ) < 0) { + 0 /* unsigned */ , + 1 /* with_exceptions */) < 0) { return NULL; } diff --git a/Objects/longobject.c b/Objects/longobject.c index e655ba19e8f..932111f5842 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -928,7 +928,8 @@ _PyLong_FromByteArray(const unsigned char* bytes, size_t n, int _PyLong_AsByteArray(PyLongObject* v, unsigned char* bytes, size_t n, - int little_endian, int is_signed) + int little_endian, int is_signed, + int with_exceptions) { Py_ssize_t i; /* index into v->long_value.ob_digit */ Py_ssize_t ndigits; /* number of digits */ @@ -945,8 +946,10 @@ _PyLong_AsByteArray(PyLongObject* v, ndigits = _PyLong_DigitCount(v); if (_PyLong_IsNegative(v)) { if (!is_signed) { - PyErr_SetString(PyExc_OverflowError, - "can't convert negative int to unsigned"); + if (with_exceptions) { + PyErr_SetString(PyExc_OverflowError, + "can't convert negative int to unsigned"); + } return -1; } do_twos_comp = 1; @@ -967,7 +970,12 @@ _PyLong_AsByteArray(PyLongObject* v, /* Copy over all the Python digits. It's crucial that every Python digit except for the MSD contribute exactly PyLong_SHIFT bits to the total, so first assert that the int is - normalized. */ + normalized. + NOTE: PyLong_AsNativeBytes() assumes that this function will fill in 'n' + bytes even if it eventually fails to convert the whole number. Make sure + you account for that if you are changing this algorithm to return without + doing that. + */ assert(ndigits == 0 || v->long_value.ob_digit[ndigits - 1] != 0); j = 0; accum = 0; @@ -1052,11 +1060,203 @@ _PyLong_AsByteArray(PyLongObject* v, return 0; Overflow: - PyErr_SetString(PyExc_OverflowError, "int too big to convert"); + if (with_exceptions) { + PyErr_SetString(PyExc_OverflowError, "int too big to convert"); + } return -1; } +// Refactored out for readability, not reuse +static inline int +_fits_in_n_bits(Py_ssize_t v, Py_ssize_t n) +{ + if (n >= (Py_ssize_t)sizeof(Py_ssize_t) * 8) { + return 1; + } + // If all bits above n are the same, we fit. + // (Use n-1 if we require the sign bit to be consistent.) + Py_ssize_t v_extended = v >> ((int)n - 1); + return v_extended == 0 || v_extended == -1; +} + +static inline int +_resolve_endianness(int *endianness) +{ + if (*endianness < 0) { + *endianness = PY_LITTLE_ENDIAN; + } + if (*endianness != 0 && *endianness != 1) { + PyErr_SetString(PyExc_SystemError, "invalid 'endianness' value"); + return -1; + } + return 0; +} + +Py_ssize_t +PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness) +{ + PyLongObject *v; + union { + Py_ssize_t v; + unsigned char b[sizeof(Py_ssize_t)]; + } cv; + int do_decref = 0; + Py_ssize_t res = 0; + + if (vv == NULL || n < 0) { + PyErr_BadInternalCall(); + return -1; + } + + int little_endian = endianness; + if (_resolve_endianness(&little_endian) < 0) { + return -1; + } + + if (PyLong_Check(vv)) { + v = (PyLongObject *)vv; + } + else { + v = (PyLongObject *)_PyNumber_Index(vv); + if (v == NULL) { + return -1; + } + do_decref = 1; + } + + if (_PyLong_IsCompact(v)) { + res = 0; + cv.v = _PyLong_CompactValue(v); + /* Most paths result in res = sizeof(compact value). Only the case + * where 0 < n < sizeof(compact value) do we need to check and adjust + * our return value. */ + res = sizeof(cv.b); + if (n <= 0) { + // nothing to do! + } + else if (n <= sizeof(cv.b)) { +#if PY_LITTLE_ENDIAN + if (little_endian) { + memcpy(buffer, cv.b, n); + } + else { + for (Py_ssize_t i = 0; i < n; ++i) { + ((unsigned char*)buffer)[n - i - 1] = cv.b[i]; + } + } +#else + if (little_endian) { + for (Py_ssize_t i = 0; i < n; ++i) { + ((unsigned char*)buffer)[i] = cv.b[sizeof(cv.b) - i - 1]; + } + } + else { + memcpy(buffer, &cv.b[sizeof(cv.b) - n], n); + } +#endif + + /* If we fit, return the requested number of bytes */ + if (_fits_in_n_bits(cv.v, n * 8)) { + res = n; + } + } + else { + unsigned char fill = cv.v < 0 ? 0xFF : 0x00; +#if PY_LITTLE_ENDIAN + if (little_endian) { + memcpy(buffer, cv.b, sizeof(cv.b)); + memset((char *)buffer + sizeof(cv.b), fill, n - sizeof(cv.b)); + } + else { + unsigned char *b = (unsigned char *)buffer; + for (Py_ssize_t i = 0; i < n - (int)sizeof(cv.b); ++i) { + *b++ = fill; + } + for (Py_ssize_t i = sizeof(cv.b); i > 0; --i) { + *b++ = cv.b[i - 1]; + } + } +#else + if (little_endian) { + unsigned char *b = (unsigned char *)buffer; + for (Py_ssize_t i = sizeof(cv.b); i > 0; --i) { + *b++ = cv.b[i - 1]; + } + for (Py_ssize_t i = 0; i < n - sizeof(cv.b); ++i) { + *b++ = fill; + } + } + else { + memset(buffer, fill, n - sizeof(cv.b)); + memcpy((char *)buffer + n - sizeof(cv.b), cv.b, sizeof(cv.b)); + } +#endif + } + } + else { + if (n > 0) { + _PyLong_AsByteArray(v, buffer, (size_t)n, little_endian, 1, 0); + } + + // More efficient calculation for number of bytes required? + size_t nb = _PyLong_NumBits((PyObject *)v); + /* Normally this would be((nb - 1) / 8) + 1 to avoid rounding up + * multiples of 8 to the next byte, but we add an implied bit for + * the sign and it cancels out. */ + size_t n_needed = (nb / 8) + 1; + res = (Py_ssize_t)n_needed; + if ((size_t)res != n_needed) { + PyErr_SetString(PyExc_OverflowError, + "value too large to convert"); + res = -1; + } + } + + if (do_decref) { + Py_DECREF(v); + } + + return res; +} + + +PyObject * +PyLong_FromNativeBytes(const void* buffer, size_t n, int endianness) +{ + if (!buffer) { + PyErr_BadInternalCall(); + return NULL; + } + + int little_endian = endianness; + if (_resolve_endianness(&little_endian) < 0) { + return NULL; + } + + return _PyLong_FromByteArray((const unsigned char *)buffer, n, + little_endian, 1); +} + + +PyObject * +PyLong_FromUnsignedNativeBytes(const void* buffer, size_t n, int endianness) +{ + if (!buffer) { + PyErr_BadInternalCall(); + return NULL; + } + + int little_endian = endianness; + if (_resolve_endianness(&little_endian) < 0) { + return NULL; + } + + return _PyLong_FromByteArray((const unsigned char *)buffer, n, + little_endian, 0); +} + + /* Create a new int object from a C pointer */ PyObject * @@ -1231,7 +1431,7 @@ PyLong_AsLongLong(PyObject *vv) } else { res = _PyLong_AsByteArray((PyLongObject *)v, (unsigned char *)&bytes, - SIZEOF_LONG_LONG, PY_LITTLE_ENDIAN, 1); + SIZEOF_LONG_LONG, PY_LITTLE_ENDIAN, 1, 1); } if (do_decref) { Py_DECREF(v); @@ -1270,7 +1470,7 @@ PyLong_AsUnsignedLongLong(PyObject *vv) } else { res = _PyLong_AsByteArray((PyLongObject *)vv, (unsigned char *)&bytes, - SIZEOF_LONG_LONG, PY_LITTLE_ENDIAN, 0); + SIZEOF_LONG_LONG, PY_LITTLE_ENDIAN, 0, 1); } /* Plan 9 can't handle long long in ? : expressions */ @@ -6068,7 +6268,7 @@ int_to_bytes_impl(PyObject *self, Py_ssize_t length, PyObject *byteorder, if (_PyLong_AsByteArray((PyLongObject *)self, (unsigned char *)PyBytes_AS_STRING(bytes), - length, little_endian, is_signed) < 0) { + length, little_endian, is_signed, 1) < 0) { Py_DECREF(bytes); return NULL; }