Added q/Q standard (x-platform 8-byte ints) mode in struct module.

This completes the q/Q project.

longobject.c _PyLong_AsByteArray:  The original code had a gross bug:
the most-significant Python digit doesn't necessarily have SHIFT
significant bits, and you really need to count how many copies of the sign
bit it has else spurious overflow errors result.

test_struct.py:  This now does exhaustive std q/Q testing at, and on both
sides of, all relevant power-of-2 boundaries, both positive and negative.

NEWS:  Added brief dict news while I was at it.
This commit is contained in:
Tim Peters 2001-06-12 01:22:22 +00:00
parent ac4797a12e
commit 7a3bfc3a47
5 changed files with 337 additions and 77 deletions

View File

@ -72,7 +72,8 @@ Notes:
\item[(1)]
The \character{q} and \character{Q} conversion codes are available in
native mode only if the platform C compiler supports C \ctype{long long},
or, on Windows, \ctype{__int64}.
or, on Windows, \ctype{__int64}. They're always available in standard
modes.
\end{description}
@ -100,8 +101,8 @@ passed in to \function{pack()} is too long, the stored representation
is truncated. If the string is too short, padding is used to ensure
that exactly enough bytes are used to satisfy the count.
For the \character{I} and \character{L} format characters, the return
value is a Python long integer.
For the \character{I}, \character{L}, \character{q} and \character{Q}
format characters, the return value is a Python long integer.
For the \character{P} format character, the return value is a Python
integer or long integer, depending on the size needed to hold a
@ -139,10 +140,12 @@ Native size and alignment are determined using the C compiler's
order.
Standard size and alignment are as follows: no alignment is required
for any type (so you have to use pad bytes); \ctype{short} is 2 bytes;
\ctype{int} and \ctype{long} are 4 bytes. \ctype{float} and
\ctype{double} are 32-bit and 64-bit IEEE floating point numbers,
respectively.
for any type (so you have to use pad bytes);
\ctype{short} is 2 bytes;
\ctype{int} and \ctype{long} are 4 bytes;
\ctype{long long} (\ctype{__int64} on Windows) is 8 bytes;
\ctype{float} and \ctype{double} are 32-bit and 64-bit
IEEE floating point numbers, respectively.
Note the difference between \character{@} and \character{=}: both use
native byte order, but the size and alignment of the latter is

View File

@ -12,6 +12,16 @@ def simple_err(func, *args):
func.__name__, args)
## pdb.set_trace()
def any_err(func, *args):
try:
apply(func, args)
except (struct.error, OverflowError, TypeError):
pass
else:
raise TestFailed, "%s%s did not raise error" % (
func.__name__, args)
## pdb.set_trace()
simple_err(struct.calcsize, 'Z')
sz = struct.calcsize('i')
@ -113,7 +123,8 @@ for fmt, arg, big, lil, asy in tests:
raise TestFailed, "unpack(%s, %s) -> (%s,) # expected (%s,)" % (
`fmt`, `res`, `rev`, `arg`)
# Some q/Q sanity checks.
###########################################################################
# q/Q tests.
has_native_qQ = 1
try:
@ -124,18 +135,22 @@ except struct.error:
if verbose:
print "Platform has native q/Q?", has_native_qQ and "Yes." or "No."
simple_err(struct.pack, "Q", -1) # can't pack -1 as unsigned regardless
any_err(struct.pack, "Q", -1) # can't pack -1 as unsigned regardless
simple_err(struct.pack, "q", "a") # can't pack string as 'q' regardless
simple_err(struct.pack, "Q", "a") # ditto, but 'Q'
def string_reverse(s):
chars = list(s)
chars.reverse()
return "".join(chars)
def bigendian_to_native(value):
if isbigendian:
return value
chars = list(value)
chars.reverse()
return "".join(chars)
else:
return string_reverse(value)
if has_native_qQ:
def test_native_qQ():
bytes = struct.calcsize('q')
# The expected values here are in big-endian format, primarily because
# I'm on a little-endian machine and so this is the clearest way (for
@ -156,3 +171,147 @@ if has_native_qQ:
verify(retrieved == input,
"%r-unpack of %r gave %r, not %r" %
(format, got, retrieved, input))
if has_native_qQ:
test_native_qQ()
# Standard q/Q (8 bytes; should work on all platforms).
MIN_Q, MAX_Q = 0, 2L**64 - 1
MIN_q, MAX_q = -(2L**63), 2L**63 - 1
import binascii
def test_one_qQ(x, pack=struct.pack,
unpack=struct.unpack,
unhexlify=binascii.unhexlify):
if verbose:
print "trying std q/Q on", x, "==", hex(x)
# Try 'q'.
if MIN_q <= x <= MAX_q:
# Try '>q'.
expected = long(x)
if x < 0:
expected += 1L << 64
assert expected > 0
expected = hex(expected)[2:-1] # chop "0x" and trailing 'L'
if len(expected) & 1:
expected = "0" + expected
expected = unhexlify(expected)
expected = "\x00" * (8 - len(expected)) + expected
# >q pack work?
got = pack(">q", x)
verify(got == expected,
"'>q'-pack of %r gave %r, not %r" %
(x, got, expected))
# >q unpack work?
retrieved = unpack(">q", got)[0]
verify(x == retrieved,
"'>q'-unpack of %r gave %r, not %r" %
(got, retrieved, x))
# Adding any byte should cause a "too big" error.
any_err(unpack, ">q", '\x01' + got)
# Try '<q'.
expected = string_reverse(expected)
# <q pack work?
got = pack("<q", x)
verify(got == expected,
"'<q'-pack of %r gave %r, not %r" %
(x, got, expected))
# <q unpack work?
retrieved = unpack("<q", got)[0]
verify(x == retrieved,
"'<q'-unpack of %r gave %r, not %r" %
(got, retrieved, x))
# Adding any byte should cause a "too big" error.
any_err(unpack, "<q", '\x01' + got)
else:
# x is out of q's range -- verify pack realizes that.
any_err(pack, '>q', x)
any_err(pack, '<q', x)
# Much the same for 'Q'.
if MIN_Q <= x <= MAX_Q:
# Try '>Q'.
expected = long(x)
expected = hex(expected)[2:-1] # chop "0x" and trailing 'L'
if len(expected) & 1:
expected = "0" + expected
expected = unhexlify(expected)
expected = "\x00" * (8 - len(expected)) + expected
# >Q pack work?
got = pack(">Q", x)
verify(got == expected,
"'>Q'-pack of %r gave %r, not %r" %
(x, got, expected))
# >Q unpack work?
retrieved = unpack(">Q", got)[0]
verify(x == retrieved,
"'>Q'-unpack of %r gave %r, not %r" %
(got, retrieved, x))
# Adding any byte should cause a "too big" error.
any_err(unpack, ">Q", '\x01' + got)
# Try '<Q'.
expected = string_reverse(expected)
# <Q pack work?
got = pack("<Q", x)
verify(got == expected,
"'<Q'-pack of %r gave %r, not %r" %
(x, got, expected))
# <Q unpack work?
retrieved = unpack("<Q", got)[0]
verify(x == retrieved,
"'<Q'-unpack of %r gave %r, not %r" %
(got, retrieved, x))
# Adding any byte should cause a "too big" error.
any_err(unpack, "<Q", '\x01' + got)
else:
# x is out of Q's range -- verify pack realizes that.
any_err(pack, '>Q', x)
any_err(pack, '<Q', x)
def test_std_qQ():
from random import randrange
# Create all interesting powers of 2.
values = []
for exp in range(70):
values.append(1L << exp)
# Add some random 64-bit values.
for i in range(50):
val = 0L
for j in range(8):
val = (val << 8) | randrange(256)
values.append(val)
# Try all those, and their negations, and +-1 from them. Note
# that this tests all power-of-2 boundaries in range, and a few out
# of range, plus +-(2**n +- 1).
for base in values:
for val in -base, base:
for incr in -1, 0, 1:
x = val + incr
try:
x = int(x)
except OverflowError:
pass
test_one_qQ(x)
test_std_qQ()

View File

@ -84,6 +84,9 @@ Core
sortdict(dict) function for a simple way to display a dict in sorted
order.
- Many other small changes to dicts were made, resulting in faster
operation along the most common code paths.
- Dictionary objects now support the "in" operator: "x in dict" means
the same as dict.has_key(x).
@ -119,7 +122,7 @@ Core
- Collisions in dicts are resolved via a new approach, which can help
dramatically in bad cases. For example, looking up every key in a dict
d with d.keys() = [i << 16 for i in range(20000)] is approximately 500x
d with d.keys() == [i << 16 for i in range(20000)] is approximately 500x
faster now. Thanks to Christian Tismer for pointing out the cause and
the nature of an effective cure (last December! better late than never).
@ -145,8 +148,8 @@ Library
native mode, these can be used only when the platform C compiler supports
these types (when HAVE_LONG_LONG is #define'd by the Python config
process), and then they inherit the sizes and alignments of the C types.
XXX TODO In standard mode, 'q' and 'Q' are supported on all platforms, and
XXX TODO are 8-byte integral types.
In standard mode, 'q' and 'Q' are supported on all platforms, and are
8-byte integral types.
Tests

View File

@ -80,6 +80,34 @@ typedef struct { char c; LONG_LONG x; } s_long_long;
#pragma options align=reset
#endif
/* Helper to get a PyLongObject by hook or by crook. Caller should decref. */
static PyObject *
get_pylong(PyObject *v)
{
PyNumberMethods *m;
assert(v != NULL);
if (PyInt_Check(v))
return PyLong_FromLong(PyInt_AS_LONG(v));
if (PyLong_Check(v)) {
Py_INCREF(v);
return v;
}
m = v->ob_type->tp_as_number;
if (m != NULL && m->nb_long != NULL) {
v = m->nb_long(v);
if (v == NULL)
return NULL;
if (PyLong_Check(v))
return v;
Py_DECREF(v);
}
PyErr_SetString(StructError,
"cannot convert argument to long");
return NULL;
}
/* Helper routine to get a Python integer and raise the appropriate error
if it isn't one */
@ -123,33 +151,13 @@ static int
get_longlong(PyObject *v, LONG_LONG *p)
{
LONG_LONG x;
int v_needs_decref = 0;
if (PyInt_Check(v)) {
x = (LONG_LONG)PyInt_AS_LONG(v);
*p = x;
return 0;
}
if (!PyLong_Check(v)) {
PyNumberMethods *m = v->ob_type->tp_as_number;
if (m != NULL && m->nb_long != NULL) {
v = m->nb_long(v);
if (v == NULL)
return -1;
v_needs_decref = 1;
}
if (!PyLong_Check(v)) {
PyErr_SetString(StructError,
"cannot convert argument to long");
if (v_needs_decref)
Py_DECREF(v);
return -1;
}
}
v = get_pylong(v);
if (v == NULL)
return -1;
assert(PyLong_Check(v));
x = PyLong_AsLongLong(v);
if (v_needs_decref)
Py_DECREF(v);
Py_DECREF(v);
if (x == (LONG_LONG)-1 && PyErr_Occurred())
return -1;
*p = x;
@ -162,39 +170,13 @@ static int
get_ulonglong(PyObject *v, unsigned LONG_LONG *p)
{
unsigned LONG_LONG x;
int v_needs_decref = 0;
if (PyInt_Check(v)) {
long i = PyInt_AS_LONG(v);
if (i < 0) {
PyErr_SetString(StructError, "can't convert negative "
"int to unsigned");
return -1;
}
x = (unsigned LONG_LONG)i;
*p = x;
return 0;
}
if (!PyLong_Check(v)) {
PyNumberMethods *m = v->ob_type->tp_as_number;
if (m != NULL && m->nb_long != NULL) {
v = m->nb_long(v);
if (v == NULL)
return -1;
v_needs_decref = 1;
}
if (!PyLong_Check(v)) {
PyErr_SetString(StructError,
"cannot convert argument to long");
if (v_needs_decref)
Py_DECREF(v);
return -1;
}
}
v = get_pylong(v);
if (v == NULL)
return -1;
assert(PyLong_Check(v));
x = PyLong_AsUnsignedLongLong(v);
if (v_needs_decref)
Py_DECREF(v);
Py_DECREF(v);
if (x == (unsigned LONG_LONG)-1 && PyErr_Occurred())
return -1;
*p = x;
@ -500,7 +482,7 @@ typedef struct _formatdef {
TYPE is one of char, byte, ubyte, etc.
*/
/* Native mode routines. */
/* Native mode routines. ****************************************************/
static PyObject *
nu_char(const char *p, const formatdef *f)
@ -797,6 +779,8 @@ static formatdef native_table[] = {
{0}
};
/* Big-endian routines. *****************************************************/
static PyObject *
bu_int(const char *p, const formatdef *f)
{
@ -825,6 +809,24 @@ bu_uint(const char *p, const formatdef *f)
return PyInt_FromLong((long)x);
}
static PyObject *
bu_longlong(const char *p, const formatdef *f)
{
return _PyLong_FromByteArray((const unsigned char *)p,
8,
0, /* little-endian */
1 /* signed */);
}
static PyObject *
bu_ulonglong(const char *p, const formatdef *f)
{
return _PyLong_FromByteArray((const unsigned char *)p,
8,
0, /* little-endian */
0 /* signed */);
}
static PyObject *
bu_float(const char *p, const formatdef *f)
{
@ -867,6 +869,34 @@ bp_uint(char *p, PyObject *v, const formatdef *f)
return 0;
}
static int
bp_longlong(char *p, PyObject *v, const formatdef *f)
{
int res;
v = get_pylong(v);
res = _PyLong_AsByteArray((PyLongObject *)v,
(unsigned char *)p,
8,
0, /* little_endian */
1 /* signed */);
Py_DECREF(v);
return res;
}
static int
bp_ulonglong(char *p, PyObject *v, const formatdef *f)
{
int res;
v = get_pylong(v);
res = _PyLong_AsByteArray((PyLongObject *)v,
(unsigned char *)p,
8,
0, /* little_endian */
0 /* signed */);
Py_DECREF(v);
return res;
}
static int
bp_float(char *p, PyObject *v, const formatdef *f)
{
@ -904,11 +934,15 @@ static formatdef bigendian_table[] = {
{'I', 4, 0, bu_uint, bp_uint},
{'l', 4, 0, bu_int, bp_int},
{'L', 4, 0, bu_uint, bp_uint},
{'q', 8, 0, bu_longlong, bp_longlong},
{'Q', 8, 0, bu_ulonglong, bp_ulonglong},
{'f', 4, 0, bu_float, bp_float},
{'d', 8, 0, bu_double, bp_double},
{0}
};
/* Little-endian routines. *****************************************************/
static PyObject *
lu_int(const char *p, const formatdef *f)
{
@ -937,6 +971,24 @@ lu_uint(const char *p, const formatdef *f)
return PyInt_FromLong((long)x);
}
static PyObject *
lu_longlong(const char *p, const formatdef *f)
{
return _PyLong_FromByteArray((const unsigned char *)p,
8,
1, /* little-endian */
1 /* signed */);
}
static PyObject *
lu_ulonglong(const char *p, const formatdef *f)
{
return _PyLong_FromByteArray((const unsigned char *)p,
8,
1, /* little-endian */
0 /* signed */);
}
static PyObject *
lu_float(const char *p, const formatdef *f)
{
@ -979,6 +1031,34 @@ lp_uint(char *p, PyObject *v, const formatdef *f)
return 0;
}
static int
lp_longlong(char *p, PyObject *v, const formatdef *f)
{
int res;
v = get_pylong(v);
res = _PyLong_AsByteArray((PyLongObject*)v,
(unsigned char *)p,
8,
1, /* little_endian */
1 /* signed */);
Py_DECREF(v);
return res;
}
static int
lp_ulonglong(char *p, PyObject *v, const formatdef *f)
{
int res;
v = get_pylong(v);
res = _PyLong_AsByteArray((PyLongObject*)v,
(unsigned char *)p,
8,
1, /* little_endian */
0 /* signed */);
Py_DECREF(v);
return res;
}
static int
lp_float(char *p, PyObject *v, const formatdef *f)
{
@ -1016,6 +1096,8 @@ static formatdef lilendian_table[] = {
{'I', 4, 0, lu_uint, lp_uint},
{'l', 4, 0, lu_int, lp_int},
{'L', 4, 0, lu_uint, lp_uint},
{'q', 8, 0, lu_longlong, lp_longlong},
{'Q', 8, 0, lu_ulonglong, lp_ulonglong},
{'f', 4, 0, lu_float, lp_float},
{'d', 8, 0, lu_double, lp_double},
{0}

View File

@ -364,20 +364,33 @@ _PyLong_AsByteArray(PyLongObject* v,
accumbits = 0;
carry = do_twos_comp ? 1 : 0;
for (i = 0; i < ndigits; ++i) {
unsigned int oldaccumbits = accumbits;
twodigits thisdigit = v->ob_digit[i];
if (do_twos_comp) {
thisdigit = (thisdigit ^ MASK) + carry;
carry = thisdigit >> SHIFT;
thisdigit &= MASK;
}
if (i < ndigits - 1)
accumbits += SHIFT;
else {
/* The most-significant digit may be partly empty. */
twodigits bitmask = 1 << (SHIFT - 1);
twodigits signbit = do_twos_comp << (SHIFT - 1);
unsigned int nsignbits = 0;
while ((thisdigit & bitmask) == signbit && bitmask) {
++nsignbits;
bitmask >>= 1;
signbit >>= 1;
}
accumbits += SHIFT - nsignbits;
}
/* Because we're going LSB to MSB, thisdigit is more
significant than what's already in accum, so needs to be
prepended to accum. */
accum |= thisdigit << accumbits;
accumbits += SHIFT;
accum |= thisdigit << oldaccumbits;
/* Store as many bytes as possible. */
assert(accumbits >= 8);
do {
while (accumbits >= 8) {
if (j >= n)
goto Overflow;
++j;
@ -385,13 +398,13 @@ _PyLong_AsByteArray(PyLongObject* v,
p += pincr;
accumbits -= 8;
accum >>= 8;
} while (accumbits >= 8);
}
}
/* Store the straggler (if any). */
assert(accumbits < 8);
assert(carry == 0); /* else do_twos_comp and *every* digit was 0 */
if (accum) {
if (accumbits > 0) {
if (j >= n)
goto Overflow;
++j;