Two new private longobject API functions,

_PyLong_FromByteArray _PyLong_AsByteArray Untested and probably buggy -- they compile OK, but nothing calls them yet. Will soon be called by the struct module, to implement x-platform 'q' and 'Q'. If other people have uses for them, we could move them into the public API. See longobject.h for usage details.
2001-06-11 21:23:58 +00:00 · 2001-06-11 21:23:58 +00:00 · 2a9b367385
parent 0b9bc20e66
commit 2a9b367385
2 changed files with 253 additions and 0 deletions
--- a/Include/longobject.h
+++ b/Include/longobject.h
@ -44,6 +44,46 @@ extern DL_IMPORT(unsigned LONG_LONG) PyLong_AsUnsignedLongLong(PyObject *);
 DL_IMPORT(PyObject *) PyLong_FromString(char *, char **, int);
 DL_IMPORT(PyObject *) PyLong_FromUnicode(Py_UNICODE*, int, int);

+/* _PyLong_FromByteArray:  View the n unsigned bytes as a binary integer in
+   base 256, and return a Python long with the same numeric value.
+   If n is 0, the integer is 0.  Else:
+   If little_endian is 1/true, bytes[n-1] is the MSB and bytes[0] the LSB;
+   else (little_endian is 0/false) bytes[0] is the MSB and bytes[n-1] the
+   LSB.
+   If is_signed is 0/false, view the bytes as a non-negative integer.
+   If is_signed is 1/true, view the bytes as a 2's-complement integer,
+   non-negative if bit 0x80 of the MSB is clear, negative if set.
+   Error returns:
+   + Return NULL with the appropriate exception set if there's not
+     enough memory to create the Python long.
+*/
+extern DL_IMPORT(PyObject *) _PyLong_FromByteArray(
+	const unsigned char* bytes, size_t n,
+	int little_endian, int is_signed);
+
+/* _PyLong_AsByteArray: Convert the least-significant 8*n bits of long
+   v to a base-256 integer, stored in array bytes.  Normally return 0,
+   return -1 on error.
+   If little_endian is 1/true, store the MSB at bytes[n-1] and the LSB at
+   bytes[0]; else (little_endian is 0/false) store the MSB at bytes[0] and
+   the LSB at bytes[n-1].
+   If is_signed is 0/false, it's an error if v < 0; else (v >= 0) n bytes
+   are filled and there's nothing special about bit 0x80 of the MSB.
+   If is_signed is 1/true, bytes is filled with the 2's-complement
+   representation of v's value.  Bit 0x80 of the MSB is the sign bit.
+   Error returns (-1):
+   + is_signed is 0 and v < 0.  TypeError is set in this case, and bytes
+     isn't altered.
+   + n isn't big enough to hold the full mathematical value of v.  For
+     example, if is_signed is 0 and there are more digits in the v than
+     fit in n; or if is_signed is 1, v < 0, and n is just 1 bit shy of
+     being large enough to hold a sign bit.  OverflowError is set in this
+     case, but bytes holds the least-signficant n bytes of the true value.
+*/
+extern DL_IMPORT(int) _PyLong_AsByteArray(PyLongObject* v,
+	unsigned char* bytes, size_t n,
+	int little_endian, int is_signed);
+
 #ifdef __cplusplus
 }
 #endif
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@ -211,6 +211,219 @@ PyLong_AsUnsignedLong(PyObject *vv)
 	return x;
 }

+PyObject *
+_PyLong_FromByteArray(const unsigned char* bytes, size_t n,
+		      int little_endian, int is_signed)
+{
+	const unsigned char* pstartbyte;/* LSB of bytes */
+	int incr;			/* direction to move pstartbyte */
+	const unsigned char* pendbyte;	/* MSB of bytes */
+	size_t numsignificantbytes;	/* number of bytes that matter */
+	size_t ndigits;			/* number of Python long digits */
+	PyLongObject* v;		/* result */
+	int idigit = 0;  		/* next free index in v->ob_digit */
+
+	if (n == 0)
+		return PyLong_FromLong(0L);
+
+	if (little_endian) {
+		pstartbyte = bytes;
+		pendbyte = bytes + n - 1;
+		incr = 1;
+	}
+	else {
+		pstartbyte = bytes + n - 1;
+		pendbyte = bytes;
+		incr = -1;
+	}
+
+	if (is_signed)
+		is_signed = *pendbyte >= 0x80;
+
+	/* Compute numsignificantbytes.  This consists of finding the most
+	   significant byte.  Leading 0 bytes are insignficant if the number
+	   is positive, and leading 0xff bytes if negative. */
+	{
+		size_t i;
+		const unsigned char* p = pendbyte;
+		const int pincr = -incr;  /* search MSB to LSB */
+		const unsigned char insignficant = is_signed ? 0xff : 0x00;
+
+		for (i = 0; i < n; ++i, p += pincr) {
+			if (*p != insignficant)
+				break;
+		}
+		numsignificantbytes = n - i;
+		/* 2's-comp is a bit tricky here, e.g. 0xff00 == -0x0100, so
+		   actually has 2 significant bytes.  OTOH, 0xff0001 ==
+		   -0x00ffff, so we wouldn't *need* to bump it there; but we
+		   do for 0xffff = -0x0001.  To be safe without bothering to
+		   check every case, bump it regardless. */
+		if (is_signed && numsignificantbytes < n)
+			++numsignificantbytes;
+	}
+
+	/* How many Python long digits do we need?  We have
+	   8*numsignificantbytes bits, and each Python long digit has SHIFT
+	   bits, so it's the ceiling of the quotient. */
+	ndigits = (numsignificantbytes * 8 + SHIFT - 1) / SHIFT;
+	if (ndigits > (size_t)INT_MAX)
+		return PyErr_NoMemory();
+	v = _PyLong_New((int)ndigits);
+	if (v == NULL)
+		return NULL;
+
+	/* Copy the bits over.  The tricky parts are computing 2's-comp on
+	   the fly for signed numbers, and dealing with the mismatch between
+	   8-bit bytes and (probably) 15-bit Python digits.*/
+	{
+		size_t i;
+		unsigned int carry = 1;		/* for 2's-comp calculation */
+		twodigits accum = 0;		/* sliding register */
+		unsigned int accumbits = 0; 	/* number of bits in accum */
+		const unsigned char* p = pstartbyte;
+
+		for (i = 0; i < numsignificantbytes; ++i, p += incr) {
+			unsigned int thisbyte = *p;
+			/* Compute correction for 2's comp, if needed. */
+			if (is_signed) {
+				thisbyte = (0xff ^ thisbyte) + carry;
+				carry = thisbyte >> 8;
+				thisbyte &= 0xff;
+			}
+			/* Because we're going LSB to MSB, thisbyte is
+			   more significant than what's already in accum,
+			   so needs to be prepended to accum. */
+			accum |= thisbyte << accumbits;
+			accumbits += 8;
+			if (accumbits >= SHIFT) {
+				/* There's enough to fill a Python digit. */
+				assert(idigit < (int)ndigits);
+				v->ob_digit[idigit] = (digit)(accum & MASK);
+				++idigit;
+				accum >>= SHIFT;
+				accumbits -= SHIFT;
+				assert(accumbits < SHIFT);
+			}
+		}
+		assert(accumbits < SHIFT);
+		if (accumbits) {
+			assert(idigit < (int)ndigits);
+			v->ob_digit[idigit] = (digit)accum;
+			++idigit;
+		}
+	}
+
+	v->ob_size = is_signed ? -idigit : idigit;
+	return (PyObject *)long_normalize(v);
+}
+
+int
+_PyLong_AsByteArray(PyLongObject* v,
+		    unsigned char* bytes, size_t n,
+		    int little_endian, int is_signed)
+{
+	int i;			/* index into v->ob_digit */
+	int ndigits;		/* |v->ob_size| */
+	twodigits accum;	/* sliding register */
+	unsigned int accumbits; /* # bits in accum */
+	int do_twos_comp;	/* store 2's-comp?  is_signed and v < 0 */
+	twodigits carry;	/* for computing 2's-comp */
+	size_t j;		/* # bytes filled */
+	unsigned char* p;	/* pointer to next byte in bytes */
+	int pincr;		/* direction to move p */
+
+	assert(v != NULL && PyLong_Check(v));
+
+	if (v->ob_size < 0) {
+		ndigits = -(v->ob_size);
+		if (!is_signed) {
+			PyErr_SetString(PyExc_TypeError,
+				"can't convert negative long to unsigned");
+			return -1;
+		}
+		do_twos_comp = 1;
+	}
+	else {
+		ndigits = v->ob_size;
+		do_twos_comp = 0;
+	}
+
+	if (little_endian) {
+		p = bytes;
+		pincr = 1;
+	}
+	else {
+		p = bytes + n - 1;
+		pincr = -1;
+	}
+
+	/* Copy over all the Python digits. */
+	j = 0;
+	accum = 0;
+	accumbits = 0;
+	carry = do_twos_comp ? 1 : 0;
+	for (i = 0; i < ndigits; ++i) {
+		twodigits thisdigit = v->ob_digit[i];
+		if (do_twos_comp) {
+			thisdigit = (thisdigit ^ MASK) + carry;
+			carry = thisdigit >> SHIFT;
+			thisdigit &= MASK;
+		}
+		/* Because we're going LSB to MSB, thisdigit is more
+		   significant than what's already in accum, so needs to be
+		   prepended to accum. */
+		accum |= thisdigit << accumbits;
+		accumbits += SHIFT;
+		/* Store as many bytes as possible. */
+		assert(accumbits >= 8);
+		do {
+			if (j >= n)
+				goto Overflow;
+			++j;
+			*p = (unsigned char)(accum & 0xff);
+			p += pincr;
+			accumbits -= 8;
+			accum >>= 8;
+		} while (accumbits >= 8);
+	}
+
+	/* Store the straggler (if any). */
+	assert(accumbits < 8);
+	assert(carry == 0);  /* else do_twos_comp and *every* digit was 0 */
+	if (accum) {
+		if (j >= n)
+			goto Overflow;
+		++j;
+		if (do_twos_comp) {
+			/* Fill leading bits of the byte with sign bits
+			   (appropriately pretending that the long had an
+			   infinite supply of sign bits). */
+			accum |= (~(twodigits)0) << accumbits;
+		}
+		*p = (unsigned char)(accum & 0xff);
+		p += pincr;
+	}
+
+	/* Fill remaining bytes with copies of the sign bit. */
+	for ( ; j < n; ++j, p += pincr)
+		*p = (unsigned char)(do_twos_comp ? 0xff : 0);
+
+	/* Check for delicate overflow (not enough room for the sign bit). */
+	if (j > 0 && is_signed) {
+		unsigned char msb = *(p - pincr);
+		int sign_bit_set = (msb & 0x80) != 0;
+		if (sign_bit_set != do_twos_comp)
+			goto Overflow;
+	}
+	return 0;
+
+Overflow:
+	PyErr_SetString(PyExc_OverflowError, "long too big to convert");
+	return -1;
+	
+}
+
 /* Get a C double from a long int object. */

 double