gethash/cmpname both looked beyond the end of the character name.

This patch makes u"\N{x}" a bit less dependent on pure luck...
This commit is contained in:
Fredrik Lundh 2001-01-19 19:45:02 +00:00
parent 5458fcf9c5
commit 7c1e4bbe25
1 changed files with 14 additions and 16 deletions

View File

@ -11,18 +11,19 @@
/* database code (cut and pasted from the unidb package) */ /* database code (cut and pasted from the unidb package) */
static unsigned long static unsigned long
gethash(const char *s) gethash(const char *s, int len)
{ {
int i;
unsigned long h = 0; unsigned long h = 0;
unsigned long i; unsigned long ix;
while (*s) { for (i = 0; i < len; i++) {
/* magic value 47 was chosen to minimize the number /* magic value 47 was chosen to minimize the number
of collisions for the uninames dataset. see the of collisions for the uninames dataset. see the
makeunicodedata script for more background */ makeunicodedata script for more background */
h = (h * 47) + (unsigned char) toupper(*s++); h = (h * 47) + (unsigned char) toupper(s[i]);
i = h & 0xff000000; ix = h & 0xff000000;
if (i) if (ix)
h = (h ^ ((i>>24) & 0xff)) & 0x00ffffff; h = (h ^ ((ix>>24) & 0xff)) & 0x00ffffff;
} }
return h; return h;
} }
@ -80,21 +81,18 @@ getname(Py_UCS4 code, char* buffer, int buflen)
} }
static int static int
cmpname(int code, const char* name) cmpname(int code, const char* name, int namelen)
{ {
/* check if code corresponds to the given name */ /* check if code corresponds to the given name */
int i; int i;
char buffer[NAME_MAXLEN]; char buffer[NAME_MAXLEN];
if (!getname(code, buffer, sizeof(buffer))) if (!getname(code, buffer, sizeof(buffer)))
return 0; return 0;
i = 0; for (i = 0; i < namelen; i++) {
for (;;) {
if (toupper(name[i]) != buffer[i]) if (toupper(name[i]) != buffer[i])
return 0; return 0;
if (!name[i] || !buffer[i])
return 1;
i++;
} }
return buffer[namelen] == '\0';
} }
static int static int
@ -108,12 +106,12 @@ getcode(const char* name, int namelen, Py_UCS4* code)
only minor changes. see the makeunicodedata script for more only minor changes. see the makeunicodedata script for more
details */ details */
h = (unsigned int) gethash(name); h = (unsigned int) gethash(name, namelen);
i = (~h) & mask; i = (~h) & mask;
v = code_hash[i]; v = code_hash[i];
if (!v) if (!v)
return 0; return 0;
if (cmpname(v, name)) { if (cmpname(v, name, namelen)) {
*code = v; *code = v;
return 1; return 1;
} }
@ -125,7 +123,7 @@ getcode(const char* name, int namelen, Py_UCS4* code)
v = code_hash[i]; v = code_hash[i];
if (!v) if (!v)
return -1; return -1;
if (cmpname(v, name)) { if (cmpname(v, name, namelen)) {
*code = v; *code = v;
return 1; return 1;
} }