mirror of https://github.com/python/cpython
gh-122854: Add Py_HashBuffer() function (#122855)
This commit is contained in:
parent
3d60dfbe17
commit
d8e69b2c1b
|
@ -89,6 +89,25 @@ See also the :c:member:`PyTypeObject.tp_hash` member and :ref:`numeric-hash`.
|
||||||
|
|
||||||
.. versionadded:: 3.13
|
.. versionadded:: 3.13
|
||||||
|
|
||||||
|
|
||||||
|
.. c:function:: Py_hash_t Py_HashBuffer(const void *ptr, Py_ssize_t len)
|
||||||
|
|
||||||
|
Compute and return the hash value of a buffer of *len* bytes
|
||||||
|
starting at address *ptr*. The hash is guaranteed to match that of
|
||||||
|
:class:`bytes`, :class:`memoryview`, and other built-in objects
|
||||||
|
that implement the :ref:`buffer protocol <bufferobjects>`.
|
||||||
|
|
||||||
|
Use this function to implement hashing for immutable objects whose
|
||||||
|
:c:member:`~PyTypeObject.tp_richcompare` function compares to another
|
||||||
|
object's buffer.
|
||||||
|
|
||||||
|
*len* must be greater than or equal to ``0``.
|
||||||
|
|
||||||
|
This function always succeeds.
|
||||||
|
|
||||||
|
.. versionadded:: 3.14
|
||||||
|
|
||||||
|
|
||||||
.. c:function:: Py_hash_t PyObject_GenericHash(PyObject *obj)
|
.. c:function:: Py_hash_t PyObject_GenericHash(PyObject *obj)
|
||||||
|
|
||||||
Generic hashing function that is meant to be put into a type
|
Generic hashing function that is meant to be put into a type
|
||||||
|
|
|
@ -489,6 +489,9 @@ New Features
|
||||||
similar to ``sep.join(iterable)`` in Python.
|
similar to ``sep.join(iterable)`` in Python.
|
||||||
(Contributed by Victor Stinner in :gh:`121645`.)
|
(Contributed by Victor Stinner in :gh:`121645`.)
|
||||||
|
|
||||||
|
* Add :c:func:`Py_HashBuffer` to compute and return the hash value of a buffer.
|
||||||
|
(Contributed by Antoine Pitrou and Victor Stinner in :gh:`122854`.)
|
||||||
|
|
||||||
|
|
||||||
Porting to Python 3.14
|
Porting to Python 3.14
|
||||||
----------------------
|
----------------------
|
||||||
|
|
|
@ -45,3 +45,5 @@ PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
|
||||||
|
|
||||||
PyAPI_FUNC(Py_hash_t) Py_HashPointer(const void *ptr);
|
PyAPI_FUNC(Py_hash_t) Py_HashPointer(const void *ptr);
|
||||||
PyAPI_FUNC(Py_hash_t) PyObject_GenericHash(PyObject *);
|
PyAPI_FUNC(Py_hash_t) PyObject_GenericHash(PyObject *);
|
||||||
|
|
||||||
|
PyAPI_FUNC(Py_hash_t) Py_HashBuffer(const void *ptr, Py_ssize_t len);
|
||||||
|
|
|
@ -20,9 +20,6 @@ _Py_HashPointerRaw(const void *ptr)
|
||||||
return (Py_hash_t)x;
|
return (Py_hash_t)x;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Export for '_datetime' shared extension
|
|
||||||
PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t);
|
|
||||||
|
|
||||||
/* Hash secret
|
/* Hash secret
|
||||||
*
|
*
|
||||||
* memory layout on 64 bit systems
|
* memory layout on 64 bit systems
|
||||||
|
|
|
@ -78,6 +78,16 @@ class CAPITest(unittest.TestCase):
|
||||||
VOID_P_MAX = -1 & (2 ** (8 * SIZEOF_VOID_P) - 1)
|
VOID_P_MAX = -1 & (2 ** (8 * SIZEOF_VOID_P) - 1)
|
||||||
self.assertEqual(hash_pointer(VOID_P_MAX), -2)
|
self.assertEqual(hash_pointer(VOID_P_MAX), -2)
|
||||||
|
|
||||||
|
def test_hash_buffer(self):
|
||||||
|
hash_buffer = _testcapi.hash_buffer
|
||||||
|
|
||||||
|
def check(data):
|
||||||
|
self.assertEqual(hash_buffer(data), hash(data))
|
||||||
|
|
||||||
|
check(b'')
|
||||||
|
check(b'abc')
|
||||||
|
check(b'x' * 1024)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Add :c:func:`Py_HashBuffer` to compute and return the hash value of a buffer.
|
||||||
|
Patch by Antoine Pitrou and Victor Stinner.
|
|
@ -3842,7 +3842,7 @@ datetime_date_replace_impl(PyDateTime_Date *self, int year, int month,
|
||||||
static Py_hash_t
|
static Py_hash_t
|
||||||
generic_hash(unsigned char *data, int len)
|
generic_hash(unsigned char *data, int len)
|
||||||
{
|
{
|
||||||
return _Py_HashBytes(data, len);
|
return Py_HashBuffer(data, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -25,7 +25,6 @@
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include "Python.h"
|
#include "Python.h"
|
||||||
#include "pycore_hashtable.h"
|
#include "pycore_hashtable.h"
|
||||||
#include "pycore_pyhash.h" // _Py_HashBytes()
|
|
||||||
#include "pycore_strhex.h" // _Py_strhex()
|
#include "pycore_strhex.h" // _Py_strhex()
|
||||||
#include "hashlib.h"
|
#include "hashlib.h"
|
||||||
|
|
||||||
|
@ -186,7 +185,7 @@ static const py_hashentry_t py_hashes[] = {
|
||||||
|
|
||||||
static Py_uhash_t
|
static Py_uhash_t
|
||||||
py_hashentry_t_hash_name(const void *key) {
|
py_hashentry_t_hash_name(const void *key) {
|
||||||
return _Py_HashBytes(key, strlen((const char *)key));
|
return Py_HashBuffer(key, strlen((const char *)key));
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
|
|
@ -2944,7 +2944,7 @@ pattern_hash(PatternObject *self)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
|
hash2 = Py_HashBuffer(self->code, sizeof(self->code[0]) * self->codesize);
|
||||||
hash ^= hash2;
|
hash ^= hash2;
|
||||||
|
|
||||||
hash ^= self->flags;
|
hash ^= self->flags;
|
||||||
|
|
|
@ -45,6 +45,14 @@ hash_getfuncdef(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
long_from_hash(Py_hash_t hash)
|
||||||
|
{
|
||||||
|
Py_BUILD_ASSERT(sizeof(long long) >= sizeof(hash));
|
||||||
|
return PyLong_FromLongLong(hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
hash_pointer(PyObject *Py_UNUSED(module), PyObject *arg)
|
hash_pointer(PyObject *Py_UNUSED(module), PyObject *arg)
|
||||||
{
|
{
|
||||||
|
@ -54,8 +62,21 @@ hash_pointer(PyObject *Py_UNUSED(module), PyObject *arg)
|
||||||
}
|
}
|
||||||
|
|
||||||
Py_hash_t hash = Py_HashPointer(ptr);
|
Py_hash_t hash = Py_HashPointer(ptr);
|
||||||
Py_BUILD_ASSERT(sizeof(long long) >= sizeof(hash));
|
return long_from_hash(hash);
|
||||||
return PyLong_FromLongLong(hash);
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
hash_buffer(PyObject *Py_UNUSED(module), PyObject *args)
|
||||||
|
{
|
||||||
|
char *ptr;
|
||||||
|
Py_ssize_t len;
|
||||||
|
if (!PyArg_ParseTuple(args, "y#", &ptr, &len)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
Py_hash_t hash = Py_HashBuffer(ptr, len);
|
||||||
|
return long_from_hash(hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -64,14 +85,14 @@ object_generichash(PyObject *Py_UNUSED(module), PyObject *arg)
|
||||||
{
|
{
|
||||||
NULLABLE(arg);
|
NULLABLE(arg);
|
||||||
Py_hash_t hash = PyObject_GenericHash(arg);
|
Py_hash_t hash = PyObject_GenericHash(arg);
|
||||||
Py_BUILD_ASSERT(sizeof(long long) >= sizeof(hash));
|
return long_from_hash(hash);
|
||||||
return PyLong_FromLongLong(hash);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static PyMethodDef test_methods[] = {
|
static PyMethodDef test_methods[] = {
|
||||||
{"hash_getfuncdef", hash_getfuncdef, METH_NOARGS},
|
{"hash_getfuncdef", hash_getfuncdef, METH_NOARGS},
|
||||||
{"hash_pointer", hash_pointer, METH_O},
|
{"hash_pointer", hash_pointer, METH_O},
|
||||||
|
{"hash_buffer", hash_buffer, METH_VARARGS},
|
||||||
{"object_generichash", object_generichash, METH_O},
|
{"object_generichash", object_generichash, METH_O},
|
||||||
{NULL},
|
{NULL},
|
||||||
};
|
};
|
||||||
|
|
|
@ -15,7 +15,6 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <Python.h>
|
#include <Python.h>
|
||||||
#include "pycore_pyhash.h" // _Py_HashBytes()
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
|
|
||||||
|
@ -45,7 +44,7 @@ static int fuzz_builtin_int(const char* data, size_t size) {
|
||||||
/* Pick a random valid base. (When the fuzzed function takes extra
|
/* Pick a random valid base. (When the fuzzed function takes extra
|
||||||
parameters, it's somewhat normal to hash the input to generate those
|
parameters, it's somewhat normal to hash the input to generate those
|
||||||
parameters. We want to exercise all code paths, so we do so here.) */
|
parameters. We want to exercise all code paths, so we do so here.) */
|
||||||
int base = _Py_HashBytes(data, size) % 37;
|
int base = Py_HashBuffer(data, size) % 37;
|
||||||
if (base == 1) {
|
if (base == 1) {
|
||||||
// 1 is the only number between 0 and 36 that is not a valid base.
|
// 1 is the only number between 0 and 36 that is not a valid base.
|
||||||
base = 0;
|
base = 0;
|
||||||
|
|
|
@ -1598,7 +1598,7 @@ _Py_COMP_DIAG_PUSH
|
||||||
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
|
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
|
||||||
if (a->ob_shash == -1) {
|
if (a->ob_shash == -1) {
|
||||||
/* Can't fail */
|
/* Can't fail */
|
||||||
a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
|
a->ob_shash = Py_HashBuffer(a->ob_sval, Py_SIZE(a));
|
||||||
}
|
}
|
||||||
return a->ob_shash;
|
return a->ob_shash;
|
||||||
_Py_COMP_DIAG_POP
|
_Py_COMP_DIAG_POP
|
||||||
|
|
|
@ -2561,12 +2561,12 @@ hash_const(const void *key)
|
||||||
if (PySlice_Check(op)) {
|
if (PySlice_Check(op)) {
|
||||||
PySliceObject *s = (PySliceObject *)op;
|
PySliceObject *s = (PySliceObject *)op;
|
||||||
PyObject *data[3] = { s->start, s->stop, s->step };
|
PyObject *data[3] = { s->start, s->stop, s->step };
|
||||||
return _Py_HashBytes(&data, sizeof(data));
|
return Py_HashBuffer(&data, sizeof(data));
|
||||||
}
|
}
|
||||||
else if (PyTuple_CheckExact(op)) {
|
else if (PyTuple_CheckExact(op)) {
|
||||||
Py_ssize_t size = PyTuple_GET_SIZE(op);
|
Py_ssize_t size = PyTuple_GET_SIZE(op);
|
||||||
PyObject **data = _PyTuple_ITEMS(op);
|
PyObject **data = _PyTuple_ITEMS(op);
|
||||||
return _Py_HashBytes(data, sizeof(PyObject *) * size);
|
return Py_HashBuffer(data, sizeof(PyObject *) * size);
|
||||||
}
|
}
|
||||||
Py_hash_t h = PyObject_Hash(op);
|
Py_hash_t h = PyObject_Hash(op);
|
||||||
if (h == -1) {
|
if (h == -1) {
|
||||||
|
|
|
@ -3087,7 +3087,7 @@ memory_hash(PyObject *_self)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Can't fail */
|
/* Can't fail */
|
||||||
self->hash = _Py_HashBytes(mem, view->len);
|
self->hash = Py_HashBuffer(mem, view->len);
|
||||||
|
|
||||||
if (mem != view->buf)
|
if (mem != view->buf)
|
||||||
PyMem_Free(mem);
|
PyMem_Free(mem);
|
||||||
|
|
|
@ -11688,7 +11688,7 @@ unicode_hash(PyObject *self)
|
||||||
if (hash != -1) {
|
if (hash != -1) {
|
||||||
return hash;
|
return hash;
|
||||||
}
|
}
|
||||||
x = _Py_HashBytes(PyUnicode_DATA(self),
|
x = Py_HashBuffer(PyUnicode_DATA(self),
|
||||||
PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self));
|
PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self));
|
||||||
|
|
||||||
FT_ATOMIC_STORE_SSIZE_RELAXED(_PyUnicode_HASH(self), x);
|
FT_ATOMIC_STORE_SSIZE_RELAXED(_PyUnicode_HASH(self), x);
|
||||||
|
|
|
@ -1174,7 +1174,7 @@ hashtable_key_from_2_strings(PyObject *str1, PyObject *str2, const char sep)
|
||||||
static Py_uhash_t
|
static Py_uhash_t
|
||||||
hashtable_hash_str(const void *key)
|
hashtable_hash_str(const void *key)
|
||||||
{
|
{
|
||||||
return _Py_HashBytes(key, strlen((const char *)key));
|
return Py_HashBuffer(key, strlen((const char *)key));
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
|
|
@ -22,7 +22,7 @@ extern PyHash_FuncDef PyHash_Func;
|
||||||
static PyHash_FuncDef PyHash_Func;
|
static PyHash_FuncDef PyHash_Func;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Count _Py_HashBytes() calls */
|
/* Count Py_HashBuffer() calls */
|
||||||
#ifdef Py_HASH_STATS
|
#ifdef Py_HASH_STATS
|
||||||
#define Py_HASH_STATS_MAX 32
|
#define Py_HASH_STATS_MAX 32
|
||||||
static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
|
static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
|
||||||
|
@ -146,9 +146,8 @@ PyObject_GenericHash(PyObject *obj)
|
||||||
}
|
}
|
||||||
|
|
||||||
Py_hash_t
|
Py_hash_t
|
||||||
_Py_HashBytes(const void *src, Py_ssize_t len)
|
Py_HashBuffer(const void *ptr, Py_ssize_t len)
|
||||||
{
|
{
|
||||||
Py_hash_t x;
|
|
||||||
/*
|
/*
|
||||||
We make the hash of the empty string be 0, rather than using
|
We make the hash of the empty string be 0, rather than using
|
||||||
(prefix ^ suffix), since this slightly obfuscates the hash secret
|
(prefix ^ suffix), since this slightly obfuscates the hash secret
|
||||||
|
@ -161,11 +160,12 @@ _Py_HashBytes(const void *src, Py_ssize_t len)
|
||||||
hashstats[(len <= Py_HASH_STATS_MAX) ? len : 0]++;
|
hashstats[(len <= Py_HASH_STATS_MAX) ? len : 0]++;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
Py_hash_t x;
|
||||||
#if Py_HASH_CUTOFF > 0
|
#if Py_HASH_CUTOFF > 0
|
||||||
if (len < Py_HASH_CUTOFF) {
|
if (len < Py_HASH_CUTOFF) {
|
||||||
/* Optimize hashing of very small strings with inline DJBX33A. */
|
/* Optimize hashing of very small strings with inline DJBX33A. */
|
||||||
Py_uhash_t hash;
|
Py_uhash_t hash;
|
||||||
const unsigned char *p = src;
|
const unsigned char *p = ptr;
|
||||||
hash = 5381; /* DJBX33A starts with 5381 */
|
hash = 5381; /* DJBX33A starts with 5381 */
|
||||||
|
|
||||||
switch(len) {
|
switch(len) {
|
||||||
|
@ -186,10 +186,13 @@ _Py_HashBytes(const void *src, Py_ssize_t len)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif /* Py_HASH_CUTOFF */
|
#endif /* Py_HASH_CUTOFF */
|
||||||
x = PyHash_Func.hash(src, len);
|
{
|
||||||
|
x = PyHash_Func.hash(ptr, len);
|
||||||
|
}
|
||||||
|
|
||||||
if (x == -1)
|
if (x == -1) {
|
||||||
return -2;
|
return -2;
|
||||||
|
}
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue