From 27cbcd6241d787b5e99c6ed05ec8377051f397aa Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Mon, 10 Dec 2012 18:15:46 -0800 Subject: [PATCH 1/2] Fix the internals of our hash functions to used unsigned values during hash computation as the overflow behavior of signed integers is undefined. In practice we require compiling everything with -fwrapv which forces overflow to be defined as twos compliment but this keeps the code cleaner for checkers or in the case where someone has compiled it without -fwrapv or their compiler's equivalent. Found by Clang trunk's Undefined Behavior Sanitizer (UBSan). Cleanup only - no functionality or hash values change. --- Include/pyport.h | 2 +- Objects/bytesobject.c | 2 +- Objects/setobject.c | 12 ++++++------ Objects/tupleobject.c | 10 +++++----- Objects/unicodeobject.c | 2 +- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/Include/pyport.h b/Include/pyport.h index 9666cf28f97..c74ff9c7a4b 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -145,7 +145,7 @@ Used in: PY_LONG_LONG #endif /* Prime multiplier used in string and various other hashes. */ -#define _PyHASH_MULTIPLIER 1000003 /* 0xf4243 */ +#define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */ /* Parameters used for the numeric hash implementation. See notes for _PyHash_Double in Objects/object.c. Numeric hashes are based on diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 796e400a822..ef3a5a12a23 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -873,7 +873,7 @@ bytes_hash(PyBytesObject *a) { register Py_ssize_t len; register unsigned char *p; - register Py_hash_t x; + register Py_uhash_t x; /* Unsigned for defined overflow behavior. */ #ifdef Py_DEBUG assert(_Py_HashSecret_Initialized); diff --git a/Objects/setobject.c b/Objects/setobject.c index 3abeefb97d9..d8401f4ae74 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -77,7 +77,7 @@ NULL if the rich comparison returns an error. static setentry * set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash) { - register Py_ssize_t i; + register size_t i; /* Unsigned for defined overflow behavior. */ register size_t perturb; register setentry *freeslot; register size_t mask = so->mask; @@ -159,7 +159,7 @@ set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash) static setentry * set_lookkey_unicode(PySetObject *so, PyObject *key, register Py_hash_t hash) { - register Py_ssize_t i; + register size_t i; /* Unsigned for defined overflow behavior. */ register size_t perturb; register setentry *freeslot; register size_t mask = so->mask; @@ -768,7 +768,7 @@ static Py_hash_t frozenset_hash(PyObject *self) { PySetObject *so = (PySetObject *)self; - Py_hash_t h, hash = 1927868237L; + Py_uhash_t h, hash = 1927868237UL; setentry *entry; Py_ssize_t pos = 0; @@ -783,11 +783,11 @@ frozenset_hash(PyObject *self) hashes so that many distinct combinations collapse to only a handful of distinct hash values. */ h = entry->hash; - hash ^= (h ^ (h << 16) ^ 89869747L) * 3644798167u; + hash ^= (h ^ (h << 16) ^ 89869747UL) * 3644798167UL; } - hash = hash * 69069L + 907133923L; + hash = hash * 69069UL + 907133923UL; if (hash == -1) - hash = 590923713L; + hash = 590923713UL; so->hash = hash; return hash; } diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index b3454600c9b..c725227979b 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -315,11 +315,11 @@ error: static Py_hash_t tuplehash(PyTupleObject *v) { - register Py_hash_t x, y; + register Py_uhash_t x, y; /* Unsigned for defined overflow behavior. */ register Py_ssize_t len = Py_SIZE(v); register PyObject **p; - Py_hash_t mult = _PyHASH_MULTIPLIER; - x = 0x345678L; + Py_uhash_t mult = _PyHASH_MULTIPLIER; + x = 0x345678UL; p = v->ob_item; while (--len >= 0) { y = PyObject_Hash(*p++); @@ -327,9 +327,9 @@ tuplehash(PyTupleObject *v) return -1; x = (x ^ y) * mult; /* the cast might truncate len; that doesn't change hash stability */ - mult += (Py_hash_t)(82520L + len + len); + mult += (Py_uhash_t)(82520UL + len + len); } - x += 97531L; + x += 97531UL; if (x == -1) x = -2; return x; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 565d2982708..bb45b20b90c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -7686,7 +7686,7 @@ unicode_hash(PyUnicodeObject *self) { Py_ssize_t len; Py_UNICODE *p; - Py_hash_t x; + Py_uhash_t x; /* Unsigned for defined overflow behavior. */ #ifdef Py_DEBUG assert(_Py_HashSecret_Initialized); From a6be61ec71be600e1d270aea17f1d90dccf6088b Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Mon, 10 Dec 2012 18:34:09 -0800 Subject: [PATCH 2/2] Keep y a Py_hash_t instead of Py_uhash_t as it is compared with == -1 and the compiler logic will do the right thing with just x as a Py_uhash_t. This matches what was already done in the 3.3 version. cleanup only - no functionality or hash values change. --- Objects/tupleobject.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index c725227979b..9e914cb6344 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -315,7 +315,8 @@ error: static Py_hash_t tuplehash(PyTupleObject *v) { - register Py_uhash_t x, y; /* Unsigned for defined overflow behavior. */ + register Py_uhash_t x; /* Unsigned for defined overflow behavior. */ + register Py_hash_t y; register Py_ssize_t len = Py_SIZE(v); register PyObject **p; Py_uhash_t mult = _PyHASH_MULTIPLIER;