Fix UCNs machine with >= 32bit longs
originally submitted by Bill Tutt Note: This code is actually going to be replaced in 2.0 by /F's new database. Until then, this patch keeps the test suite working.
This commit is contained in:
parent
1962fb59f3
commit
4d6381dfee
|
@ -11,7 +11,7 @@
|
|||
* perfect_hash.py:
|
||||
* http://starship.python.net/crew/amk/python/code/perfect-hash.html
|
||||
*
|
||||
* Generated on: Wed Jun 28 03:34:07 2000
|
||||
* Generated on: Fri Jul 14 08:00:58 2000
|
||||
*/
|
||||
|
||||
#define k_cHashElements 18836
|
||||
|
@ -26,20 +26,36 @@ static long f1(const char *key, unsigned int cch)
|
|||
{
|
||||
register int len;
|
||||
register unsigned char *p;
|
||||
register long x;
|
||||
register unsigned long x;
|
||||
|
||||
len = cch;
|
||||
p = (unsigned char *) key;
|
||||
x = 1694245428;
|
||||
x = 0x64fc2234;
|
||||
while (--len >= 0)
|
||||
x = (1000003*x) ^ toupper(*(p++));
|
||||
{
|
||||
/* (1000003 * x) ^ toupper(*(p++))
|
||||
* translated to handle > 32 bit longs
|
||||
*/
|
||||
x = (0xf4243 * x);
|
||||
x = x & 0xFFFFFFFF;
|
||||
x = x ^ toupper(*(p++));
|
||||
}
|
||||
x ^= cch + 10;
|
||||
if (x == -1)
|
||||
x = -2;
|
||||
if (x == 0xFFFFFFFF)
|
||||
x = 0xfffffffe;
|
||||
if (x & 0x80000000)
|
||||
{
|
||||
/* Emulate 32-bit signed (2's complement) modulo operation */
|
||||
x = (~x & 0xFFFFFFFF) + 1;
|
||||
x %= k_cHashElements;
|
||||
if (x != 0)
|
||||
{
|
||||
x = x + (~k_cHashElements & 0xFFFFFFFF) + 1;
|
||||
x = (~x & 0xFFFFFFFF) + 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
x %= k_cHashElements;
|
||||
/* ensure the returned value is positive so we mimic Python's % operator */
|
||||
if (x < 0)
|
||||
x += k_cHashElements;
|
||||
return x;
|
||||
}
|
||||
|
||||
|
@ -48,20 +64,36 @@ static long f2(const char *key, unsigned int cch)
|
|||
{
|
||||
register int len;
|
||||
register unsigned char *p;
|
||||
register long x;
|
||||
register unsigned long x;
|
||||
|
||||
len = cch;
|
||||
p = (unsigned char *) key;
|
||||
x = -1917331657;
|
||||
x = 0x8db7d737;
|
||||
while (--len >= 0)
|
||||
x = (1000003*x) ^ toupper(*(p++));
|
||||
{
|
||||
/* (1000003 * x) ^ toupper(*(p++))
|
||||
* translated to handle > 32 bit longs
|
||||
*/
|
||||
x = (0xf4243 * x);
|
||||
x = x & 0xFFFFFFFF;
|
||||
x = x ^ toupper(*(p++));
|
||||
}
|
||||
x ^= cch + 10;
|
||||
if (x == -1)
|
||||
x = -2;
|
||||
if (x == 0xFFFFFFFF)
|
||||
x = 0xfffffffe;
|
||||
if (x & 0x80000000)
|
||||
{
|
||||
/* Emulate 32-bit signed (2's complement) modulo operation */
|
||||
x = (~x & 0xFFFFFFFF) + 1;
|
||||
x %= k_cHashElements;
|
||||
if (x != 0)
|
||||
{
|
||||
x = x + (~k_cHashElements & 0xFFFFFFFF) + 1;
|
||||
x = (~x & 0xFFFFFFFF) + 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
x %= k_cHashElements;
|
||||
/* ensure the returned value is positive so we mimic Python's % operator */
|
||||
if (x < 0)
|
||||
x += k_cHashElements;
|
||||
return x;
|
||||
}
|
||||
|
||||
|
|
|
@ -12,8 +12,8 @@ import perfect_hash
|
|||
|
||||
# These variables determine which hash function is tried first.
|
||||
# Yields a multiple of 1.7875 for UnicodeData.txt on 2000/06/24/
|
||||
f1Seed = 1694245428
|
||||
f2Seed = -1917331657
|
||||
f1Seed = 0x64fc2234
|
||||
f2Seed = 0x8db7d737
|
||||
|
||||
# Maximum allowed multipler, if this isn't None then instead of continually
|
||||
# increasing C, it resets it back to initC to keep searching for
|
||||
|
|
|
@ -73,25 +73,29 @@ class Hash:
|
|||
key = str(key)
|
||||
if self.caseInsensitive:
|
||||
key = string.upper(key)
|
||||
x = perfhash.hash(self.seed, len(self.junk), key) % self.N
|
||||
#h = hash(self.junk + key) % self.N
|
||||
#assert x == h
|
||||
x = perfhash.hash(self.seed, len(self.junk), key, self.N)
|
||||
return x
|
||||
|
||||
def generate_code(self):
|
||||
s = """{
|
||||
register int len;
|
||||
register unsigned char *p;
|
||||
register long x;
|
||||
register unsigned long x;
|
||||
|
||||
len = cch;
|
||||
p = (unsigned char *) key;
|
||||
x = %(junkSeed)d;
|
||||
x = %(junkSeed)s;
|
||||
while (--len >= 0)
|
||||
x = (1000003*x) ^ """ % \
|
||||
{
|
||||
/* (1000003 * x) ^ toupper(*(p++))
|
||||
* translated to handle > 32 bit longs
|
||||
*/
|
||||
x = (0xf4243 * x);
|
||||
x = x & 0xFFFFFFFF;
|
||||
x = x ^ """ % \
|
||||
{
|
||||
"lenJunk" : len(self.junk),
|
||||
"junkSeed" : self.seed,
|
||||
"junkSeed" : hex(self.seed),
|
||||
}
|
||||
|
||||
if self.caseInsensitive:
|
||||
|
@ -99,20 +103,29 @@ class Hash:
|
|||
else:
|
||||
s = s + "*(p++);"
|
||||
s = s + """
|
||||
}
|
||||
x ^= cch + %(lenJunk)d;
|
||||
if (x == -1)
|
||||
x = -2;
|
||||
if (x == 0xFFFFFFFF)
|
||||
x = 0xfffffffe;
|
||||
if (x & 0x80000000)
|
||||
{
|
||||
/* Emulate 32-bit signed (2's complement) modulo operation */
|
||||
x = (~x & 0xFFFFFFFF) + 1;
|
||||
x %%= k_cHashElements;
|
||||
if (x != 0)
|
||||
{
|
||||
x = x + (~k_cHashElements & 0xFFFFFFFF) + 1;
|
||||
x = (~x & 0xFFFFFFFF) + 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
x %%= k_cHashElements;
|
||||
/* ensure the returned value is positive so we mimic Python's %% operator */
|
||||
if (x < 0)
|
||||
x += k_cHashElements;
|
||||
return x;
|
||||
}
|
||||
""" % { "lenJunk" : len(self.junk),
|
||||
"junkSeed" : self.seed, }
|
||||
"junkSeed" : hex(self.seed), }
|
||||
return s
|
||||
|
||||
|
||||
WHITE, GREY, BLACK = 0,1,2
|
||||
class Graph:
|
||||
"""Graph class. This class isn't particularly efficient or general,
|
||||
|
@ -139,8 +152,8 @@ class Graph:
|
|||
value 'value'"""
|
||||
|
||||
if vertex1 > vertex2: vertex1, vertex2 = vertex2, vertex1
|
||||
# if self.edges.has_key( (vertex1, vertex2) ):
|
||||
# raise ValueError, 'Collision: vertices already connected'
|
||||
if self.edges.has_key( (vertex1, vertex2) ):
|
||||
raise ValueError, 'Collision: vertices already connected'
|
||||
self.edges[ (vertex1, vertex2) ] = value
|
||||
|
||||
# Add vertices to each other's reachable list
|
||||
|
@ -341,8 +354,8 @@ typedef struct %(structName)s
|
|||
""" % (self.cHashElements, self.cchMax, self.cKeys)
|
||||
|
||||
code = code + """
|
||||
static const %s G[k_cHashElements];
|
||||
static const %s %s[k_cKeys];
|
||||
staticforward const %s G[k_cHashElements];
|
||||
staticforward const %s %s[k_cKeys];
|
||||
""" % (self.type, dataArrayType, dataArrayName)
|
||||
|
||||
code = code + """
|
||||
|
@ -553,7 +566,7 @@ def generate_hash(keys, caseInsensitive=0,
|
|||
# edge.
|
||||
for k, v in keys:
|
||||
h1 = f1(k) ; h2 = f2(k)
|
||||
G.connect( h1,h2, v)
|
||||
G.connect( h1, h2, v)
|
||||
|
||||
# Check if the resulting graph is acyclic; if it is,
|
||||
# we're done with step 1.
|
||||
|
@ -598,8 +611,9 @@ def generate_hash(keys, caseInsensitive=0,
|
|||
sys.stderr.write('Found perfect hash function!\n')
|
||||
sys.stderr.write('\nIn order to regenerate this hash function, \n')
|
||||
sys.stderr.write('you need to pass these following values back in:\n')
|
||||
sys.stderr.write('f1 seed: %s\n' % repr(f1.seed))
|
||||
sys.stderr.write('f2 seed: %s\n' % repr(f2.seed))
|
||||
sys.stderr.write('f1 seed: %s\n' % hex(f1.seed))
|
||||
sys.stderr.write('f2 seed: %s\n' % hex(f2.seed))
|
||||
sys.stderr.write('initial multipler: %s\n' % c)
|
||||
|
||||
return PerfectHash(cchMaxKey, f1, f2, G, N, len(keys), maxHashValue)
|
||||
|
||||
|
|
|
@ -5,11 +5,13 @@ static PyObject * hashFunction(PyObject *self, PyObject *args, PyObject *kw)
|
|||
PyStringObject *a;
|
||||
register int len;
|
||||
register unsigned char *p;
|
||||
register long x;
|
||||
long lSeed;
|
||||
register unsigned long x;
|
||||
unsigned long ulSeed;
|
||||
unsigned long cchSeed;
|
||||
unsigned long cHashElements;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "iiO:hash", &lSeed, &cchSeed, &a))
|
||||
if (!PyArg_ParseTuple(args, "llOl:hash",
|
||||
&ulSeed, &cchSeed, &a, &cHashElements))
|
||||
return NULL;
|
||||
if (!PyString_Check(a))
|
||||
{
|
||||
|
@ -19,13 +21,35 @@ static PyObject * hashFunction(PyObject *self, PyObject *args, PyObject *kw)
|
|||
|
||||
len = a->ob_size;
|
||||
p = (unsigned char *) a->ob_sval;
|
||||
x = lSeed;
|
||||
x = ulSeed;
|
||||
while (--len >= 0)
|
||||
x = (1000003*x) ^ *p++;
|
||||
{
|
||||
/* (1000003 * x) ^ *p++
|
||||
* translated to handle > 32 bit longs
|
||||
*/
|
||||
x = (0xf4243 * x);
|
||||
x = x & 0xFFFFFFFF;
|
||||
x = x ^ *p++;
|
||||
}
|
||||
x ^= a->ob_size + cchSeed;
|
||||
if (x == -1)
|
||||
x = -2;
|
||||
return PyInt_FromLong(x);
|
||||
if (x == 0xFFFFFFFF)
|
||||
x = 0xfffffffe;
|
||||
if (x & 0x80000000)
|
||||
{
|
||||
/* Emulate Python 32-bit signed (2's complement)
|
||||
* modulo operation
|
||||
*/
|
||||
x = (~x & 0xFFFFFFFF) + 1;
|
||||
x %= cHashElements;
|
||||
if (x != 0)
|
||||
{
|
||||
x = x + (~cHashElements & 0xFFFFFFFF) + 1;
|
||||
x = (~x & 0xFFFFFFFF) + 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
x %= cHashElements;
|
||||
return PyInt_FromLong((long)x);
|
||||
}
|
||||
|
||||
static PyObject * calcSeed(PyObject *self, PyObject *args, PyObject *kw)
|
||||
|
@ -33,7 +57,7 @@ static PyObject * calcSeed(PyObject *self, PyObject *args, PyObject *kw)
|
|||
PyStringObject *a;
|
||||
register int len;
|
||||
register unsigned char *p;
|
||||
register long x;
|
||||
register unsigned long x;
|
||||
|
||||
if (!PyString_Check(args))
|
||||
{
|
||||
|
@ -45,10 +69,17 @@ static PyObject * calcSeed(PyObject *self, PyObject *args, PyObject *kw)
|
|||
|
||||
len = a->ob_size;
|
||||
p = (unsigned char *) a->ob_sval;
|
||||
x = *p << 7;
|
||||
x = (*p << 7) & 0xFFFFFFFF;
|
||||
while (--len >= 0)
|
||||
x = (1000003*x) ^ *p++;
|
||||
return PyInt_FromLong(x);
|
||||
{
|
||||
/* (1000003 * x) ^ *p++
|
||||
* translated to handle > 32 bit longs
|
||||
*/
|
||||
x = (0xf4243 * x);
|
||||
x = x & 0xFFFFFFFF;
|
||||
x = x ^ *p++;
|
||||
}
|
||||
return PyInt_FromLong((long)x);
|
||||
}
|
||||
|
||||
|
||||
|
@ -68,5 +99,16 @@ void initperfhash(void)
|
|||
m = Py_InitModule4("perfhash", hashMethods,
|
||||
NULL, NULL, PYTHON_API_VERSION);
|
||||
if ( m == NULL )
|
||||
Py_FatalError("can't initialize module hashModule");
|
||||
Py_FatalError("can't initialize module perfhash");
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue