Fix UCNs machine with >= 32bit longs

originally submitted by Bill Tutt

Note: This code is actually going to be replaced in 2.0 by /F's new
database.  Until then, this patch keeps the test suite working.
This commit is contained in:
Jeremy Hylton 2000-07-26 03:56:06 +00:00
parent 1962fb59f3
commit 4d6381dfee
4 changed files with 144 additions and 56 deletions

View File

@ -11,7 +11,7 @@
* perfect_hash.py:
* http://starship.python.net/crew/amk/python/code/perfect-hash.html
*
* Generated on: Wed Jun 28 03:34:07 2000
* Generated on: Fri Jul 14 08:00:58 2000
*/
#define k_cHashElements 18836
@ -26,20 +26,36 @@ static long f1(const char *key, unsigned int cch)
{
register int len;
register unsigned char *p;
register long x;
register unsigned long x;
len = cch;
p = (unsigned char *) key;
x = 1694245428;
x = 0x64fc2234;
while (--len >= 0)
x = (1000003*x) ^ toupper(*(p++));
{
/* (1000003 * x) ^ toupper(*(p++))
* translated to handle > 32 bit longs
*/
x = (0xf4243 * x);
x = x & 0xFFFFFFFF;
x = x ^ toupper(*(p++));
}
x ^= cch + 10;
if (x == -1)
x = -2;
x %= k_cHashElements;
/* ensure the returned value is positive so we mimic Python's % operator */
if (x < 0)
x += k_cHashElements;
if (x == 0xFFFFFFFF)
x = 0xfffffffe;
if (x & 0x80000000)
{
/* Emulate 32-bit signed (2's complement) modulo operation */
x = (~x & 0xFFFFFFFF) + 1;
x %= k_cHashElements;
if (x != 0)
{
x = x + (~k_cHashElements & 0xFFFFFFFF) + 1;
x = (~x & 0xFFFFFFFF) + 1;
}
}
else
x %= k_cHashElements;
return x;
}
@ -48,20 +64,36 @@ static long f2(const char *key, unsigned int cch)
{
register int len;
register unsigned char *p;
register long x;
register unsigned long x;
len = cch;
p = (unsigned char *) key;
x = -1917331657;
x = 0x8db7d737;
while (--len >= 0)
x = (1000003*x) ^ toupper(*(p++));
{
/* (1000003 * x) ^ toupper(*(p++))
* translated to handle > 32 bit longs
*/
x = (0xf4243 * x);
x = x & 0xFFFFFFFF;
x = x ^ toupper(*(p++));
}
x ^= cch + 10;
if (x == -1)
x = -2;
x %= k_cHashElements;
/* ensure the returned value is positive so we mimic Python's % operator */
if (x < 0)
x += k_cHashElements;
if (x == 0xFFFFFFFF)
x = 0xfffffffe;
if (x & 0x80000000)
{
/* Emulate 32-bit signed (2's complement) modulo operation */
x = (~x & 0xFFFFFFFF) + 1;
x %= k_cHashElements;
if (x != 0)
{
x = x + (~k_cHashElements & 0xFFFFFFFF) + 1;
x = (~x & 0xFFFFFFFF) + 1;
}
}
else
x %= k_cHashElements;
return x;
}

View File

@ -12,8 +12,8 @@ import perfect_hash
# These variables determine which hash function is tried first.
# Yields a multiple of 1.7875 for UnicodeData.txt on 2000/06/24/
f1Seed = 1694245428
f2Seed = -1917331657
f1Seed = 0x64fc2234
f2Seed = 0x8db7d737
# Maximum allowed multipler, if this isn't None then instead of continually
# increasing C, it resets it back to initC to keep searching for

View File

@ -73,25 +73,29 @@ class Hash:
key = str(key)
if self.caseInsensitive:
key = string.upper(key)
x = perfhash.hash(self.seed, len(self.junk), key) % self.N
#h = hash(self.junk + key) % self.N
#assert x == h
x = perfhash.hash(self.seed, len(self.junk), key, self.N)
return x
def generate_code(self):
s = """{
register int len;
register unsigned char *p;
register long x;
register unsigned long x;
len = cch;
p = (unsigned char *) key;
x = %(junkSeed)d;
x = %(junkSeed)s;
while (--len >= 0)
x = (1000003*x) ^ """ % \
{
/* (1000003 * x) ^ toupper(*(p++))
* translated to handle > 32 bit longs
*/
x = (0xf4243 * x);
x = x & 0xFFFFFFFF;
x = x ^ """ % \
{
"lenJunk" : len(self.junk),
"junkSeed" : self.seed,
"junkSeed" : hex(self.seed),
}
if self.caseInsensitive:
@ -99,20 +103,29 @@ class Hash:
else:
s = s + "*(p++);"
s = s + """
}
x ^= cch + %(lenJunk)d;
if (x == -1)
x = -2;
x %%= k_cHashElements;
/* ensure the returned value is positive so we mimic Python's %% operator */
if (x < 0)
x += k_cHashElements;
if (x == 0xFFFFFFFF)
x = 0xfffffffe;
if (x & 0x80000000)
{
/* Emulate 32-bit signed (2's complement) modulo operation */
x = (~x & 0xFFFFFFFF) + 1;
x %%= k_cHashElements;
if (x != 0)
{
x = x + (~k_cHashElements & 0xFFFFFFFF) + 1;
x = (~x & 0xFFFFFFFF) + 1;
}
}
else
x %%= k_cHashElements;
return x;
}
""" % { "lenJunk" : len(self.junk),
"junkSeed" : self.seed, }
"junkSeed" : hex(self.seed), }
return s
WHITE, GREY, BLACK = 0,1,2
class Graph:
"""Graph class. This class isn't particularly efficient or general,
@ -139,8 +152,8 @@ class Graph:
value 'value'"""
if vertex1 > vertex2: vertex1, vertex2 = vertex2, vertex1
# if self.edges.has_key( (vertex1, vertex2) ):
# raise ValueError, 'Collision: vertices already connected'
if self.edges.has_key( (vertex1, vertex2) ):
raise ValueError, 'Collision: vertices already connected'
self.edges[ (vertex1, vertex2) ] = value
# Add vertices to each other's reachable list
@ -341,8 +354,8 @@ typedef struct %(structName)s
""" % (self.cHashElements, self.cchMax, self.cKeys)
code = code + """
static const %s G[k_cHashElements];
static const %s %s[k_cKeys];
staticforward const %s G[k_cHashElements];
staticforward const %s %s[k_cKeys];
""" % (self.type, dataArrayType, dataArrayName)
code = code + """
@ -553,7 +566,7 @@ def generate_hash(keys, caseInsensitive=0,
# edge.
for k, v in keys:
h1 = f1(k) ; h2 = f2(k)
G.connect( h1,h2, v)
G.connect( h1, h2, v)
# Check if the resulting graph is acyclic; if it is,
# we're done with step 1.
@ -598,8 +611,9 @@ def generate_hash(keys, caseInsensitive=0,
sys.stderr.write('Found perfect hash function!\n')
sys.stderr.write('\nIn order to regenerate this hash function, \n')
sys.stderr.write('you need to pass these following values back in:\n')
sys.stderr.write('f1 seed: %s\n' % repr(f1.seed))
sys.stderr.write('f2 seed: %s\n' % repr(f2.seed))
sys.stderr.write('f1 seed: %s\n' % hex(f1.seed))
sys.stderr.write('f2 seed: %s\n' % hex(f2.seed))
sys.stderr.write('initial multipler: %s\n' % c)
return PerfectHash(cchMaxKey, f1, f2, G, N, len(keys), maxHashValue)

View File

@ -5,11 +5,13 @@ static PyObject * hashFunction(PyObject *self, PyObject *args, PyObject *kw)
PyStringObject *a;
register int len;
register unsigned char *p;
register long x;
long lSeed;
register unsigned long x;
unsigned long ulSeed;
unsigned long cchSeed;
unsigned long cHashElements;
if (!PyArg_ParseTuple(args, "iiO:hash", &lSeed, &cchSeed, &a))
if (!PyArg_ParseTuple(args, "llOl:hash",
&ulSeed, &cchSeed, &a, &cHashElements))
return NULL;
if (!PyString_Check(a))
{
@ -19,13 +21,35 @@ static PyObject * hashFunction(PyObject *self, PyObject *args, PyObject *kw)
len = a->ob_size;
p = (unsigned char *) a->ob_sval;
x = lSeed;
x = ulSeed;
while (--len >= 0)
x = (1000003*x) ^ *p++;
{
/* (1000003 * x) ^ *p++
* translated to handle > 32 bit longs
*/
x = (0xf4243 * x);
x = x & 0xFFFFFFFF;
x = x ^ *p++;
}
x ^= a->ob_size + cchSeed;
if (x == -1)
x = -2;
return PyInt_FromLong(x);
if (x == 0xFFFFFFFF)
x = 0xfffffffe;
if (x & 0x80000000)
{
/* Emulate Python 32-bit signed (2's complement)
* modulo operation
*/
x = (~x & 0xFFFFFFFF) + 1;
x %= cHashElements;
if (x != 0)
{
x = x + (~cHashElements & 0xFFFFFFFF) + 1;
x = (~x & 0xFFFFFFFF) + 1;
}
}
else
x %= cHashElements;
return PyInt_FromLong((long)x);
}
static PyObject * calcSeed(PyObject *self, PyObject *args, PyObject *kw)
@ -33,7 +57,7 @@ static PyObject * calcSeed(PyObject *self, PyObject *args, PyObject *kw)
PyStringObject *a;
register int len;
register unsigned char *p;
register long x;
register unsigned long x;
if (!PyString_Check(args))
{
@ -45,10 +69,17 @@ static PyObject * calcSeed(PyObject *self, PyObject *args, PyObject *kw)
len = a->ob_size;
p = (unsigned char *) a->ob_sval;
x = *p << 7;
x = (*p << 7) & 0xFFFFFFFF;
while (--len >= 0)
x = (1000003*x) ^ *p++;
return PyInt_FromLong(x);
{
/* (1000003 * x) ^ *p++
* translated to handle > 32 bit longs
*/
x = (0xf4243 * x);
x = x & 0xFFFFFFFF;
x = x ^ *p++;
}
return PyInt_FromLong((long)x);
}
@ -68,5 +99,16 @@ void initperfhash(void)
m = Py_InitModule4("perfhash", hashMethods,
NULL, NULL, PYTHON_API_VERSION);
if ( m == NULL )
Py_FatalError("can't initialize module hashModule");
Py_FatalError("can't initialize module perfhash");
}