gh-122854: Add Py_HashBuffer() function (#122855)

This commit is contained in:
Victor Stinner 2024-08-30 17:42:27 +02:00 committed by GitHub
parent 3d60dfbe17
commit d8e69b2c1b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 80 additions and 25 deletions

View File

@ -89,6 +89,25 @@ See also the :c:member:`PyTypeObject.tp_hash` member and :ref:`numeric-hash`.
.. versionadded:: 3.13 .. versionadded:: 3.13
.. c:function:: Py_hash_t Py_HashBuffer(const void *ptr, Py_ssize_t len)
Compute and return the hash value of a buffer of *len* bytes
starting at address *ptr*. The hash is guaranteed to match that of
:class:`bytes`, :class:`memoryview`, and other built-in objects
that implement the :ref:`buffer protocol <bufferobjects>`.
Use this function to implement hashing for immutable objects whose
:c:member:`~PyTypeObject.tp_richcompare` function compares to another
object's buffer.
*len* must be greater than or equal to ``0``.
This function always succeeds.
.. versionadded:: 3.14
.. c:function:: Py_hash_t PyObject_GenericHash(PyObject *obj) .. c:function:: Py_hash_t PyObject_GenericHash(PyObject *obj)
Generic hashing function that is meant to be put into a type Generic hashing function that is meant to be put into a type

View File

@ -489,6 +489,9 @@ New Features
similar to ``sep.join(iterable)`` in Python. similar to ``sep.join(iterable)`` in Python.
(Contributed by Victor Stinner in :gh:`121645`.) (Contributed by Victor Stinner in :gh:`121645`.)
* Add :c:func:`Py_HashBuffer` to compute and return the hash value of a buffer.
(Contributed by Antoine Pitrou and Victor Stinner in :gh:`122854`.)
Porting to Python 3.14 Porting to Python 3.14
---------------------- ----------------------

View File

@ -45,3 +45,5 @@ PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
PyAPI_FUNC(Py_hash_t) Py_HashPointer(const void *ptr); PyAPI_FUNC(Py_hash_t) Py_HashPointer(const void *ptr);
PyAPI_FUNC(Py_hash_t) PyObject_GenericHash(PyObject *); PyAPI_FUNC(Py_hash_t) PyObject_GenericHash(PyObject *);
PyAPI_FUNC(Py_hash_t) Py_HashBuffer(const void *ptr, Py_ssize_t len);

View File

@ -20,9 +20,6 @@ _Py_HashPointerRaw(const void *ptr)
return (Py_hash_t)x; return (Py_hash_t)x;
} }
// Export for '_datetime' shared extension
PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t);
/* Hash secret /* Hash secret
* *
* memory layout on 64 bit systems * memory layout on 64 bit systems

View File

@ -78,6 +78,16 @@ class CAPITest(unittest.TestCase):
VOID_P_MAX = -1 & (2 ** (8 * SIZEOF_VOID_P) - 1) VOID_P_MAX = -1 & (2 ** (8 * SIZEOF_VOID_P) - 1)
self.assertEqual(hash_pointer(VOID_P_MAX), -2) self.assertEqual(hash_pointer(VOID_P_MAX), -2)
def test_hash_buffer(self):
hash_buffer = _testcapi.hash_buffer
def check(data):
self.assertEqual(hash_buffer(data), hash(data))
check(b'')
check(b'abc')
check(b'x' * 1024)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

View File

@ -0,0 +1,2 @@
Add :c:func:`Py_HashBuffer` to compute and return the hash value of a buffer.
Patch by Antoine Pitrou and Victor Stinner.

View File

@ -3842,7 +3842,7 @@ datetime_date_replace_impl(PyDateTime_Date *self, int year, int month,
static Py_hash_t static Py_hash_t
generic_hash(unsigned char *data, int len) generic_hash(unsigned char *data, int len)
{ {
return _Py_HashBytes(data, len); return Py_HashBuffer(data, len);
} }

View File

@ -25,7 +25,6 @@
#include <stdbool.h> #include <stdbool.h>
#include "Python.h" #include "Python.h"
#include "pycore_hashtable.h" #include "pycore_hashtable.h"
#include "pycore_pyhash.h" // _Py_HashBytes()
#include "pycore_strhex.h" // _Py_strhex() #include "pycore_strhex.h" // _Py_strhex()
#include "hashlib.h" #include "hashlib.h"
@ -186,7 +185,7 @@ static const py_hashentry_t py_hashes[] = {
static Py_uhash_t static Py_uhash_t
py_hashentry_t_hash_name(const void *key) { py_hashentry_t_hash_name(const void *key) {
return _Py_HashBytes(key, strlen((const char *)key)); return Py_HashBuffer(key, strlen((const char *)key));
} }
static int static int

View File

@ -2944,7 +2944,7 @@ pattern_hash(PatternObject *self)
return -1; return -1;
} }
hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize); hash2 = Py_HashBuffer(self->code, sizeof(self->code[0]) * self->codesize);
hash ^= hash2; hash ^= hash2;
hash ^= self->flags; hash ^= self->flags;

View File

@ -45,6 +45,14 @@ hash_getfuncdef(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args))
} }
static PyObject *
long_from_hash(Py_hash_t hash)
{
Py_BUILD_ASSERT(sizeof(long long) >= sizeof(hash));
return PyLong_FromLongLong(hash);
}
static PyObject * static PyObject *
hash_pointer(PyObject *Py_UNUSED(module), PyObject *arg) hash_pointer(PyObject *Py_UNUSED(module), PyObject *arg)
{ {
@ -54,8 +62,21 @@ hash_pointer(PyObject *Py_UNUSED(module), PyObject *arg)
} }
Py_hash_t hash = Py_HashPointer(ptr); Py_hash_t hash = Py_HashPointer(ptr);
Py_BUILD_ASSERT(sizeof(long long) >= sizeof(hash)); return long_from_hash(hash);
return PyLong_FromLongLong(hash); }
static PyObject *
hash_buffer(PyObject *Py_UNUSED(module), PyObject *args)
{
char *ptr;
Py_ssize_t len;
if (!PyArg_ParseTuple(args, "y#", &ptr, &len)) {
return NULL;
}
Py_hash_t hash = Py_HashBuffer(ptr, len);
return long_from_hash(hash);
} }
@ -64,14 +85,14 @@ object_generichash(PyObject *Py_UNUSED(module), PyObject *arg)
{ {
NULLABLE(arg); NULLABLE(arg);
Py_hash_t hash = PyObject_GenericHash(arg); Py_hash_t hash = PyObject_GenericHash(arg);
Py_BUILD_ASSERT(sizeof(long long) >= sizeof(hash)); return long_from_hash(hash);
return PyLong_FromLongLong(hash);
} }
static PyMethodDef test_methods[] = { static PyMethodDef test_methods[] = {
{"hash_getfuncdef", hash_getfuncdef, METH_NOARGS}, {"hash_getfuncdef", hash_getfuncdef, METH_NOARGS},
{"hash_pointer", hash_pointer, METH_O}, {"hash_pointer", hash_pointer, METH_O},
{"hash_buffer", hash_buffer, METH_VARARGS},
{"object_generichash", object_generichash, METH_O}, {"object_generichash", object_generichash, METH_O},
{NULL}, {NULL},
}; };

View File

@ -15,7 +15,6 @@
#endif #endif
#include <Python.h> #include <Python.h>
#include "pycore_pyhash.h" // _Py_HashBytes()
#include <stdlib.h> #include <stdlib.h>
#include <inttypes.h> #include <inttypes.h>
@ -45,7 +44,7 @@ static int fuzz_builtin_int(const char* data, size_t size) {
/* Pick a random valid base. (When the fuzzed function takes extra /* Pick a random valid base. (When the fuzzed function takes extra
parameters, it's somewhat normal to hash the input to generate those parameters, it's somewhat normal to hash the input to generate those
parameters. We want to exercise all code paths, so we do so here.) */ parameters. We want to exercise all code paths, so we do so here.) */
int base = _Py_HashBytes(data, size) % 37; int base = Py_HashBuffer(data, size) % 37;
if (base == 1) { if (base == 1) {
// 1 is the only number between 0 and 36 that is not a valid base. // 1 is the only number between 0 and 36 that is not a valid base.
base = 0; base = 0;

View File

@ -1598,7 +1598,7 @@ _Py_COMP_DIAG_PUSH
_Py_COMP_DIAG_IGNORE_DEPR_DECLS _Py_COMP_DIAG_IGNORE_DEPR_DECLS
if (a->ob_shash == -1) { if (a->ob_shash == -1) {
/* Can't fail */ /* Can't fail */
a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a)); a->ob_shash = Py_HashBuffer(a->ob_sval, Py_SIZE(a));
} }
return a->ob_shash; return a->ob_shash;
_Py_COMP_DIAG_POP _Py_COMP_DIAG_POP

View File

@ -2561,12 +2561,12 @@ hash_const(const void *key)
if (PySlice_Check(op)) { if (PySlice_Check(op)) {
PySliceObject *s = (PySliceObject *)op; PySliceObject *s = (PySliceObject *)op;
PyObject *data[3] = { s->start, s->stop, s->step }; PyObject *data[3] = { s->start, s->stop, s->step };
return _Py_HashBytes(&data, sizeof(data)); return Py_HashBuffer(&data, sizeof(data));
} }
else if (PyTuple_CheckExact(op)) { else if (PyTuple_CheckExact(op)) {
Py_ssize_t size = PyTuple_GET_SIZE(op); Py_ssize_t size = PyTuple_GET_SIZE(op);
PyObject **data = _PyTuple_ITEMS(op); PyObject **data = _PyTuple_ITEMS(op);
return _Py_HashBytes(data, sizeof(PyObject *) * size); return Py_HashBuffer(data, sizeof(PyObject *) * size);
} }
Py_hash_t h = PyObject_Hash(op); Py_hash_t h = PyObject_Hash(op);
if (h == -1) { if (h == -1) {

View File

@ -3087,7 +3087,7 @@ memory_hash(PyObject *_self)
} }
/* Can't fail */ /* Can't fail */
self->hash = _Py_HashBytes(mem, view->len); self->hash = Py_HashBuffer(mem, view->len);
if (mem != view->buf) if (mem != view->buf)
PyMem_Free(mem); PyMem_Free(mem);

View File

@ -11688,7 +11688,7 @@ unicode_hash(PyObject *self)
if (hash != -1) { if (hash != -1) {
return hash; return hash;
} }
x = _Py_HashBytes(PyUnicode_DATA(self), x = Py_HashBuffer(PyUnicode_DATA(self),
PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self)); PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self));
FT_ATOMIC_STORE_SSIZE_RELAXED(_PyUnicode_HASH(self), x); FT_ATOMIC_STORE_SSIZE_RELAXED(_PyUnicode_HASH(self), x);

View File

@ -1174,7 +1174,7 @@ hashtable_key_from_2_strings(PyObject *str1, PyObject *str2, const char sep)
static Py_uhash_t static Py_uhash_t
hashtable_hash_str(const void *key) hashtable_hash_str(const void *key)
{ {
return _Py_HashBytes(key, strlen((const char *)key)); return Py_HashBuffer(key, strlen((const char *)key));
} }
static int static int

View File

@ -22,7 +22,7 @@ extern PyHash_FuncDef PyHash_Func;
static PyHash_FuncDef PyHash_Func; static PyHash_FuncDef PyHash_Func;
#endif #endif
/* Count _Py_HashBytes() calls */ /* Count Py_HashBuffer() calls */
#ifdef Py_HASH_STATS #ifdef Py_HASH_STATS
#define Py_HASH_STATS_MAX 32 #define Py_HASH_STATS_MAX 32
static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0}; static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
@ -146,9 +146,8 @@ PyObject_GenericHash(PyObject *obj)
} }
Py_hash_t Py_hash_t
_Py_HashBytes(const void *src, Py_ssize_t len) Py_HashBuffer(const void *ptr, Py_ssize_t len)
{ {
Py_hash_t x;
/* /*
We make the hash of the empty string be 0, rather than using We make the hash of the empty string be 0, rather than using
(prefix ^ suffix), since this slightly obfuscates the hash secret (prefix ^ suffix), since this slightly obfuscates the hash secret
@ -161,11 +160,12 @@ _Py_HashBytes(const void *src, Py_ssize_t len)
hashstats[(len <= Py_HASH_STATS_MAX) ? len : 0]++; hashstats[(len <= Py_HASH_STATS_MAX) ? len : 0]++;
#endif #endif
Py_hash_t x;
#if Py_HASH_CUTOFF > 0 #if Py_HASH_CUTOFF > 0
if (len < Py_HASH_CUTOFF) { if (len < Py_HASH_CUTOFF) {
/* Optimize hashing of very small strings with inline DJBX33A. */ /* Optimize hashing of very small strings with inline DJBX33A. */
Py_uhash_t hash; Py_uhash_t hash;
const unsigned char *p = src; const unsigned char *p = ptr;
hash = 5381; /* DJBX33A starts with 5381 */ hash = 5381; /* DJBX33A starts with 5381 */
switch(len) { switch(len) {
@ -186,10 +186,13 @@ _Py_HashBytes(const void *src, Py_ssize_t len)
} }
else else
#endif /* Py_HASH_CUTOFF */ #endif /* Py_HASH_CUTOFF */
x = PyHash_Func.hash(src, len); {
x = PyHash_Func.hash(ptr, len);
}
if (x == -1) if (x == -1) {
return -2; return -2;
}
return x; return x;
} }