From 828451dfde324f9499ffebc023a22b84dc5a125b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 6 Dec 2023 15:09:22 +0100 Subject: [PATCH] gh-111545: Add Py_HashPointer() function (#112096) * Implement _Py_HashPointerRaw() as a static inline function. * Add Py_HashPointer() tests to test_capi.test_hash. * Keep _Py_HashPointer() function as an alias to Py_HashPointer(). --- Doc/c-api/hash.rst | 10 ++++ Doc/whatsnew/3.13.rst | 3 ++ Include/cpython/pyhash.h | 6 ++- Include/internal/pycore_pyhash.h | 16 ++++++- Lib/test/test_capi/test_hash.py | 48 ++++++++++++++++++- ...-11-15-01-26-59.gh-issue-111545.iAoFtA.rst | 2 + Modules/_testcapi/hash.c | 16 +++++++ Python/hashtable.c | 2 +- Python/pyhash.c | 22 ++------- 9 files changed, 103 insertions(+), 22 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2023-11-15-01-26-59.gh-issue-111545.iAoFtA.rst diff --git a/Doc/c-api/hash.rst b/Doc/c-api/hash.rst index 3bfaf8b9f54..91d88ae27bc 100644 --- a/Doc/c-api/hash.rst +++ b/Doc/c-api/hash.rst @@ -49,3 +49,13 @@ See also the :c:member:`PyTypeObject.tp_hash` member. :pep:`456` "Secure and interchangeable hash algorithm". .. versionadded:: 3.4 + + +.. c:function:: Py_hash_t Py_HashPointer(const void *ptr) + + Hash a pointer value: process the pointer value as an integer (cast it to + ``uintptr_t`` internally). The pointer is not dereferenced. + + The function cannot fail: it cannot return ``-1``. + + .. versionadded:: 3.13 diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 534813f3659..c9facad6375 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1249,6 +1249,9 @@ New Features :exc:`KeyError` if the key missing. (Contributed by Stefan Behnel and Victor Stinner in :gh:`111262`.) +* Add :c:func:`Py_HashPointer` function to hash a pointer. + (Contributed by Victor Stinner in :gh:`111545`.) + Porting to Python 3.13 ---------------------- diff --git a/Include/cpython/pyhash.h b/Include/cpython/pyhash.h index 6f7113daa5f..396c208e1b1 100644 --- a/Include/cpython/pyhash.h +++ b/Include/cpython/pyhash.h @@ -21,7 +21,9 @@ /* Helpers for hash functions */ PyAPI_FUNC(Py_hash_t) _Py_HashDouble(PyObject *, double); -PyAPI_FUNC(Py_hash_t) _Py_HashPointer(const void*); + +// Kept for backward compatibility +#define _Py_HashPointer Py_HashPointer /* hash function definition */ @@ -33,3 +35,5 @@ typedef struct { } PyHash_FuncDef; PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void); + +PyAPI_FUNC(Py_hash_t) Py_HashPointer(const void *ptr); diff --git a/Include/internal/pycore_pyhash.h b/Include/internal/pycore_pyhash.h index c3b72d90de3..0ce08900e96 100644 --- a/Include/internal/pycore_pyhash.h +++ b/Include/internal/pycore_pyhash.h @@ -5,8 +5,20 @@ # error "this header requires Py_BUILD_CORE define" #endif -// Similar to _Py_HashPointer(), but don't replace -1 with -2 -extern Py_hash_t _Py_HashPointerRaw(const void*); +// Similar to Py_HashPointer(), but don't replace -1 with -2. +static inline Py_hash_t +_Py_HashPointerRaw(const void *ptr) +{ + uintptr_t x = (uintptr_t)ptr; + Py_BUILD_ASSERT(sizeof(x) == sizeof(ptr)); + + // Bottom 3 or 4 bits are likely to be 0; rotate x by 4 to the right + // to avoid excessive hash collisions for dicts and sets. + x = (x >> 4) | (x << (8 * sizeof(uintptr_t) - 4)); + + Py_BUILD_ASSERT(sizeof(x) == sizeof(Py_hash_t)); + return (Py_hash_t)x; +} // Export for '_datetime' shared extension PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t); diff --git a/Lib/test/test_capi/test_hash.py b/Lib/test/test_capi/test_hash.py index 59dec15bc21..8436da7c32d 100644 --- a/Lib/test/test_capi/test_hash.py +++ b/Lib/test/test_capi/test_hash.py @@ -4,7 +4,8 @@ from test.support import import_helper _testcapi = import_helper.import_module('_testcapi') -SIZEOF_PY_HASH_T = _testcapi.SIZEOF_VOID_P +SIZEOF_VOID_P = _testcapi.SIZEOF_VOID_P +SIZEOF_PY_HASH_T = SIZEOF_VOID_P class CAPITest(unittest.TestCase): @@ -31,3 +32,48 @@ class CAPITest(unittest.TestCase): self.assertEqual(func_def.name, hash_info.algorithm) self.assertEqual(func_def.hash_bits, hash_info.hash_bits) self.assertEqual(func_def.seed_bits, hash_info.seed_bits) + + def test_hash_pointer(self): + # Test Py_HashPointer() + hash_pointer = _testcapi.hash_pointer + + UHASH_T_MASK = ((2 ** (8 * SIZEOF_PY_HASH_T)) - 1) + HASH_T_MAX = (2 ** (8 * SIZEOF_PY_HASH_T - 1) - 1) + + def python_hash_pointer(x): + # Py_HashPointer() rotates the pointer bits by 4 bits to the right + x = (x >> 4) | ((x & 15) << (8 * SIZEOF_VOID_P - 4)) + + # Convert unsigned uintptr_t (Py_uhash_t) to signed Py_hash_t + if HASH_T_MAX < x: + x = (~x) + 1 + x &= UHASH_T_MASK + x = (~x) + 1 + return x + + if SIZEOF_VOID_P == 8: + values = ( + 0xABCDEF1234567890, + 0x1234567890ABCDEF, + 0xFEE4ABEDD1CECA5E, + ) + else: + values = ( + 0x12345678, + 0x1234ABCD, + 0xDEADCAFE, + ) + + for value in values: + expected = python_hash_pointer(value) + with self.subTest(value=value): + self.assertEqual(hash_pointer(value), expected, + f"hash_pointer({value:x}) = " + f"{hash_pointer(value):x} != {expected:x}") + + # Py_HashPointer(NULL) returns 0 + self.assertEqual(hash_pointer(0), 0) + + # Py_HashPointer((void*)(uintptr_t)-1) doesn't return -1 but -2 + VOID_P_MAX = -1 & (2 ** (8 * SIZEOF_VOID_P) - 1) + self.assertEqual(hash_pointer(VOID_P_MAX), -2) diff --git a/Misc/NEWS.d/next/C API/2023-11-15-01-26-59.gh-issue-111545.iAoFtA.rst b/Misc/NEWS.d/next/C API/2023-11-15-01-26-59.gh-issue-111545.iAoFtA.rst new file mode 100644 index 00000000000..7bde2498acf --- /dev/null +++ b/Misc/NEWS.d/next/C API/2023-11-15-01-26-59.gh-issue-111545.iAoFtA.rst @@ -0,0 +1,2 @@ +Add :c:func:`Py_HashPointer` function to hash a pointer. Patch by Victor +Stinner. diff --git a/Modules/_testcapi/hash.c b/Modules/_testcapi/hash.c index d0b8127020c..aee76787dcd 100644 --- a/Modules/_testcapi/hash.c +++ b/Modules/_testcapi/hash.c @@ -44,8 +44,24 @@ hash_getfuncdef(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) return result; } + +static PyObject * +hash_pointer(PyObject *Py_UNUSED(module), PyObject *arg) +{ + void *ptr = PyLong_AsVoidPtr(arg); + if (ptr == NULL && PyErr_Occurred()) { + return NULL; + } + + Py_hash_t hash = Py_HashPointer(ptr); + Py_BUILD_ASSERT(sizeof(long long) >= sizeof(hash)); + return PyLong_FromLongLong(hash); +} + + static PyMethodDef test_methods[] = { {"hash_getfuncdef", hash_getfuncdef, METH_NOARGS}, + {"hash_pointer", hash_pointer, METH_O}, {NULL}, }; diff --git a/Python/hashtable.c b/Python/hashtable.c index 8f5e8168ba1..faf68fe4ff0 100644 --- a/Python/hashtable.c +++ b/Python/hashtable.c @@ -45,7 +45,7 @@ */ #include "Python.h" -#include "pycore_hashtable.h" +#include "pycore_hashtable.h" // export _Py_hashtable_new() #include "pycore_pyhash.h" // _Py_HashPointerRaw() #define HASHTABLE_MIN_SIZE 16 diff --git a/Python/pyhash.c b/Python/pyhash.c index f9060b8003a..141407c2656 100644 --- a/Python/pyhash.c +++ b/Python/pyhash.c @@ -83,8 +83,6 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0}; */ -Py_hash_t _Py_HashPointer(const void *); - Py_hash_t _Py_HashDouble(PyObject *inst, double v) { @@ -132,23 +130,13 @@ _Py_HashDouble(PyObject *inst, double v) } Py_hash_t -_Py_HashPointerRaw(const void *p) +Py_HashPointer(const void *ptr) { - size_t y = (size_t)p; - /* bottom 3 or 4 bits are likely to be 0; rotate y by 4 to avoid - excessive hash collisions for dicts and sets */ - y = (y >> 4) | (y << (8 * SIZEOF_VOID_P - 4)); - return (Py_hash_t)y; -} - -Py_hash_t -_Py_HashPointer(const void *p) -{ - Py_hash_t x = _Py_HashPointerRaw(p); - if (x == -1) { - x = -2; + Py_hash_t hash = _Py_HashPointerRaw(ptr); + if (hash == -1) { + hash = -2; } - return x; + return hash; } Py_hash_t