This patch addresses two main issues: (1) There exist some non-fatal
errors in some of the hash algorithms. For exmaple, in float_hash and complex_hash a certain part of the value is not included in the hash calculation. See Tim's, Guido's, and my discussion of this on python-dev in May under the title "fix float_hash and complex_hash for 64-bit *nix" (2) The hash algorithms that use pointers (e.g. func_hash, code_hash) are universally not correct on Win64 (they assume that sizeof(long) == sizeof(void*)) As well, this patch significantly cleans up the hash code. It adds the two function _Py_HashDouble and _PyHash_VoidPtr that the various hashing routine are changed to use. These help maintain the hash function invariant: (a==b) => (hash(a)==hash(b))) I have added Lib/test/test_hash.py and Lib/test/output/test_hash to test this for some cases.
This commit is contained in:
parent
b46696c0ed
commit
13634cf7a4
|
@ -293,6 +293,10 @@ extern DL_IMPORT(void) Py_ReprLeave Py_PROTO((PyObject *));
|
|||
/* tstate dict key for PyObject_Compare helper */
|
||||
extern PyObject *_PyCompareState_Key;
|
||||
|
||||
/* Helpers for hash functions */
|
||||
extern DL_IMPORT(long) _Py_HashDouble Py_PROTO((double));
|
||||
extern DL_IMPORT(long) _Py_HashPointer Py_PROTO((void*));
|
||||
|
||||
/* Flag bits for printing: */
|
||||
#define Py_PRINT_RAW 1 /* No string quotes etc. */
|
||||
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
test_hash
|
|
@ -0,0 +1,26 @@
|
|||
# test the invariant that
|
||||
# iff a==b then hash(a)==hash(b)
|
||||
#
|
||||
|
||||
import test_support
|
||||
|
||||
|
||||
def same_hash(*objlist):
|
||||
# hash each object given an raise TestFailed if
|
||||
# the hash values are not all the same
|
||||
hashed = map(hash, objlist)
|
||||
for h in hashed[1:]:
|
||||
if h != hashed[0]:
|
||||
raise TestFailed, "hashed values differ: %s" % `objlist`
|
||||
|
||||
|
||||
|
||||
same_hash(1, 1L, 1.0, 1.0+0.0j)
|
||||
same_hash(int(1), long(1), float(1), complex(1))
|
||||
|
||||
same_hash(long(1.23e300), float(1.23e300))
|
||||
|
||||
same_hash(float(0.5), complex(0.5, 0.0))
|
||||
|
||||
|
||||
|
|
@ -864,10 +864,7 @@ instance_hash(inst)
|
|||
func = instance_getattr(inst, cmpstr);
|
||||
if (func == NULL) {
|
||||
PyErr_Clear();
|
||||
outcome = (long)inst;
|
||||
if (outcome == -1)
|
||||
outcome = -2;
|
||||
return outcome;
|
||||
return _Py_HashPointer(inst);
|
||||
}
|
||||
PyErr_SetString(PyExc_TypeError, "unhashable instance");
|
||||
return -1;
|
||||
|
|
|
@ -285,8 +285,7 @@ complex_hash(v)
|
|||
PyComplexObject *v;
|
||||
{
|
||||
double intpart, fractpart;
|
||||
int expo;
|
||||
long hipart, x;
|
||||
long x;
|
||||
/* This is designed so that Python numbers with the same
|
||||
value hash to the same value, otherwise comparisons
|
||||
of mapping keys will turn out weird */
|
||||
|
@ -302,7 +301,7 @@ complex_hash(v)
|
|||
#endif
|
||||
|
||||
if (fractpart == 0.0 && v->cval.imag == 0.0) {
|
||||
if (intpart > 0x7fffffffL || -intpart > 0x7fffffffL) {
|
||||
if (intpart > LONG_MAX || -intpart > LONG_MAX) {
|
||||
/* Convert to long int and use its hash... */
|
||||
PyObject *w = PyLong_FromDouble(v->cval.real);
|
||||
if (w == NULL)
|
||||
|
@ -314,36 +313,18 @@ complex_hash(v)
|
|||
x = (long)intpart;
|
||||
}
|
||||
else {
|
||||
fractpart = frexp(fractpart, &expo);
|
||||
fractpart = fractpart * 2147483648.0; /* 2**31 */
|
||||
hipart = (long)fractpart; /* Take the top 32 bits */
|
||||
fractpart = (fractpart - (double)hipart) * 2147483648.0;
|
||||
/* Get the next 32 bits */
|
||||
x = hipart + (long)fractpart + (long)intpart + (expo << 15);
|
||||
/* Combine everything */
|
||||
x = _Py_HashDouble(v->cval.real);
|
||||
if (x == -1)
|
||||
return -1;
|
||||
|
||||
if (v->cval.imag != 0.0) { /* Hash the imaginary part */
|
||||
/* XXX Note that this hashes complex(x, y)
|
||||
to the same value as complex(y, x).
|
||||
Still better than it used to be :-) */
|
||||
#ifdef MPW
|
||||
{
|
||||
extended e;
|
||||
fractpart = modf(v->cval.imag, &e);
|
||||
intpart = e;
|
||||
}
|
||||
#else
|
||||
fractpart = modf(v->cval.imag, &intpart);
|
||||
#endif
|
||||
fractpart = frexp(fractpart, &expo);
|
||||
fractpart = fractpart * 2147483648.0; /* 2**31 */
|
||||
hipart = (long)fractpart; /* Take the top 32 bits */
|
||||
fractpart =
|
||||
(fractpart - (double)hipart) * 2147483648.0;
|
||||
/* Get the next 32 bits */
|
||||
x ^= hipart + (long)fractpart +
|
||||
(long)intpart + (expo << 15);
|
||||
/* Combine everything */
|
||||
long y = _Py_HashDouble(v->cval.imag);
|
||||
if (y == -1)
|
||||
return -1;
|
||||
x += y;
|
||||
}
|
||||
}
|
||||
if (x == -1)
|
||||
|
|
|
@ -59,7 +59,13 @@ PERFORMANCE OF THIS SOFTWARE.
|
|||
#endif
|
||||
|
||||
#ifndef LONG_MAX
|
||||
#if SIZEOF_LONG == 4
|
||||
#define LONG_MAX 0X7FFFFFFFL
|
||||
#elif SIZEOF_LONG == 8
|
||||
#define LONG_MAX 0X7FFFFFFFFFFFFFFFL
|
||||
#else
|
||||
#error "could not set LONG_MAX"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef LONG_MIN
|
||||
|
@ -357,12 +363,12 @@ float_compare(v, w)
|
|||
return (i < j) ? -1 : (i > j) ? 1 : 0;
|
||||
}
|
||||
|
||||
|
||||
static long
|
||||
float_hash(v)
|
||||
PyFloatObject *v;
|
||||
{
|
||||
double intpart, fractpart;
|
||||
int expo;
|
||||
long x;
|
||||
/* This is designed so that Python numbers with the same
|
||||
value hash to the same value, otherwise comparisons
|
||||
|
@ -379,7 +385,7 @@ float_hash(v)
|
|||
#endif
|
||||
|
||||
if (fractpart == 0.0) {
|
||||
if (intpart > 0x7fffffffL || -intpart > 0x7fffffffL) {
|
||||
if (intpart > LONG_MAX || -intpart > LONG_MAX) {
|
||||
/* Convert to long int and use its hash... */
|
||||
PyObject *w = PyLong_FromDouble(v->ob_fval);
|
||||
if (w == NULL)
|
||||
|
@ -393,14 +399,9 @@ float_hash(v)
|
|||
else {
|
||||
/* Note -- if you change this code, also change the copy
|
||||
in complexobject.c */
|
||||
long hipart;
|
||||
fractpart = frexp(fractpart, &expo);
|
||||
fractpart = fractpart * 2147483648.0; /* 2**31 */
|
||||
hipart = (long)fractpart; /* Take the top 32 bits */
|
||||
fractpart = (fractpart - (double)hipart) * 2147483648.0;
|
||||
/* Get the next 32 bits */
|
||||
x = hipart + (long)fractpart + (long)intpart + (expo << 15);
|
||||
/* Combine everything */
|
||||
x = _Py_HashDouble(v->ob_fval);
|
||||
if (x == -1)
|
||||
return -1;
|
||||
}
|
||||
if (x == -1)
|
||||
x = -2;
|
||||
|
|
|
@ -231,10 +231,12 @@ static long
|
|||
func_hash(f)
|
||||
PyFunctionObject *f;
|
||||
{
|
||||
long h;
|
||||
long h,x;
|
||||
h = PyObject_Hash(f->func_code);
|
||||
if (h == -1) return h;
|
||||
h = h ^ (long)f->func_globals;
|
||||
x = _Py_HashPointer(f->func_globals);
|
||||
if (x == -1) return x;
|
||||
h ^= x;
|
||||
if (h == -1) h = -2;
|
||||
return h;
|
||||
}
|
||||
|
|
|
@ -172,7 +172,7 @@ static long
|
|||
meth_hash(a)
|
||||
PyCFunctionObject *a;
|
||||
{
|
||||
long x;
|
||||
long x,y;
|
||||
if (a->m_self == NULL)
|
||||
x = 0;
|
||||
else {
|
||||
|
@ -180,7 +180,13 @@ meth_hash(a)
|
|||
if (x == -1)
|
||||
return -1;
|
||||
}
|
||||
return x ^ (long) a->m_ml->ml_meth;
|
||||
y = _Py_HashPointer(a->m_ml->ml_meth);
|
||||
if (y == -1)
|
||||
return -1;
|
||||
x ^= y;
|
||||
if (x == -1)
|
||||
x = -2;
|
||||
return x;
|
||||
}
|
||||
|
||||
PyTypeObject PyCFunction_Type = {
|
||||
|
|
|
@ -33,6 +33,8 @@ PERFORMANCE OF THIS SOFTWARE.
|
|||
|
||||
#include "Python.h"
|
||||
|
||||
#include "mymath.h"
|
||||
|
||||
/* just for trashcan: */
|
||||
#include "compile.h"
|
||||
#include "frameobject.h"
|
||||
|
@ -507,6 +509,62 @@ PyObject_Compare(v, w)
|
|||
return result;
|
||||
}
|
||||
|
||||
|
||||
/* Set of hash utility functions to help maintaining the invariant that
|
||||
iff a==b then hash(a)==hash(b)
|
||||
|
||||
All the utility functions (_Py_Hash*()) return "-1" to signify an error.
|
||||
*/
|
||||
|
||||
long
|
||||
_Py_HashDouble(v)
|
||||
double v;
|
||||
{
|
||||
/* Use frexp to get at the bits in the double.
|
||||
* Since the VAX D double format has 56 mantissa bits, which is the
|
||||
* most of any double format in use, each of these parts may have as
|
||||
* many as (but no more than) 56 significant bits.
|
||||
* So, assuming sizeof(long) >= 4, each part can be broken into two longs;
|
||||
* frexp and multiplication are used to do that.
|
||||
* Also, since the Cray double format has 15 exponent bits, which is the
|
||||
* most of any double format in use, shifting the exponent field left by
|
||||
* 15 won't overflow a long (again assuming sizeof(long) >= 4).
|
||||
*/
|
||||
int expo;
|
||||
long hipart;
|
||||
|
||||
v = frexp(v, &expo);
|
||||
v = v * 2147483648.0; /* 2**31 */
|
||||
hipart = (long)v; /* Take the top 32 bits */
|
||||
v = (v - (double)hipart) * 2147483648.0; /* Get the next 32 bits */
|
||||
|
||||
return hipart + (long)v + (expo << 15); /* Combine everything */
|
||||
}
|
||||
|
||||
long
|
||||
_Py_HashPointer(p)
|
||||
void *p;
|
||||
{
|
||||
#if SIZEOF_LONG >= SIZEOF_VOID_P
|
||||
return (long)p;
|
||||
#else
|
||||
/* convert to a Python long and hash that */
|
||||
PyObject* longobj;
|
||||
long x;
|
||||
|
||||
if ((longobj = PyLong_FromVoidPtr(p)) == NULL) {
|
||||
x = -1;
|
||||
goto finally;
|
||||
}
|
||||
x = PyObject_Hash(longobj);
|
||||
|
||||
finally:
|
||||
Py_XDECREF(longobj);
|
||||
return x;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
long
|
||||
PyObject_Hash(v)
|
||||
PyObject *v;
|
||||
|
@ -514,8 +572,9 @@ PyObject_Hash(v)
|
|||
PyTypeObject *tp = v->ob_type;
|
||||
if (tp->tp_hash != NULL)
|
||||
return (*tp->tp_hash)(v);
|
||||
if (tp->tp_compare == NULL)
|
||||
return (long) v; /* Use address as hash value */
|
||||
if (tp->tp_compare == NULL) {
|
||||
return _Py_HashPointer(v); /* Use address as hash value */
|
||||
}
|
||||
/* If there's a cmp but no hash defined, the object can't be hashed */
|
||||
PyErr_SetString(PyExc_TypeError, "unhashable type");
|
||||
return -1;
|
||||
|
|
|
@ -423,7 +423,7 @@ PyHKEY_hashFunc(PyObject *ob)
|
|||
/* Just use the address.
|
||||
XXX - should we use the handle value?
|
||||
*/
|
||||
return (long)ob;
|
||||
return _Py_HashPointer(ob);
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue