mirror of https://github.com/python/cpython
bpo-29410: Change the default hash algorithm to SipHash13. (GH-28752)
Co-authored-by: Erlend Egeberg Aasland <erlend.aasland@innova.no> Co-authored-by: Christian Heimes <christian@python.org>
This commit is contained in:
parent
a1c3c9e824
commit
ad970e8623
|
@ -416,15 +416,19 @@ Libraries options
|
||||||
Security Options
|
Security Options
|
||||||
----------------
|
----------------
|
||||||
|
|
||||||
.. cmdoption:: --with-hash-algorithm=[fnv|siphash24]
|
.. cmdoption:: --with-hash-algorithm=[fnv|siphash13|siphash24]
|
||||||
|
|
||||||
Select hash algorithm for use in ``Python/pyhash.c``:
|
Select hash algorithm for use in ``Python/pyhash.c``:
|
||||||
|
|
||||||
* ``siphash24`` (default).
|
* ``siphash13`` (default);
|
||||||
* ``fnv``;
|
* ``siphash24``;
|
||||||
|
* ``fnv``.
|
||||||
|
|
||||||
.. versionadded:: 3.4
|
.. versionadded:: 3.4
|
||||||
|
|
||||||
|
.. versionadded:: 3.11
|
||||||
|
``siphash13`` is added and it is the new default.
|
||||||
|
|
||||||
.. cmdoption:: --with-builtin-hashlib-hashes=md5,sha1,sha256,sha512,sha3,blake2
|
.. cmdoption:: --with-builtin-hashlib-hashes=md5,sha1,sha256,sha512,sha3,blake2
|
||||||
|
|
||||||
Built-in hash modules:
|
Built-in hash modules:
|
||||||
|
|
|
@ -175,6 +175,11 @@ Other CPython Implementation Changes
|
||||||
support :class:`typing.SupportsComplex` and :class:`typing.SupportsBytes` protocols.
|
support :class:`typing.SupportsComplex` and :class:`typing.SupportsBytes` protocols.
|
||||||
(Contributed by Mark Dickinson and Dong-hee Na in :issue:`24234`.)
|
(Contributed by Mark Dickinson and Dong-hee Na in :issue:`24234`.)
|
||||||
|
|
||||||
|
* ``siphash13`` is added as a new internal hashing algorithms. It's has similar security
|
||||||
|
properties as ``siphash24`` but it is slightly faster for long inputs. ``str``, ``bytes``,
|
||||||
|
and some other types now use it as default algorithm for ``hash()``. :pep:`552`
|
||||||
|
hash-based pyc files now use ``siphash13``, too.
|
||||||
|
(Contributed by Inada Naoki in :issue:`29410`.)
|
||||||
|
|
||||||
New Modules
|
New Modules
|
||||||
===========
|
===========
|
||||||
|
|
|
@ -114,11 +114,10 @@ PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
|
||||||
|
|
||||||
/* hash algorithm selection
|
/* hash algorithm selection
|
||||||
*
|
*
|
||||||
* The values for Py_HASH_SIPHASH24 and Py_HASH_FNV are hard-coded in the
|
* The values for Py_HASH_* are hard-coded in the
|
||||||
* configure script.
|
* configure script.
|
||||||
*
|
*
|
||||||
* - FNV is available on all platforms and architectures.
|
* - FNV and SIPHASH* are available on all platforms and architectures.
|
||||||
* - SIPHASH24 only works on platforms that don't require aligned memory for integers.
|
|
||||||
* - With EXTERNAL embedders can provide an alternative implementation with::
|
* - With EXTERNAL embedders can provide an alternative implementation with::
|
||||||
*
|
*
|
||||||
* PyHash_FuncDef PyHash_Func = {...};
|
* PyHash_FuncDef PyHash_Func = {...};
|
||||||
|
@ -128,10 +127,11 @@ PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
|
||||||
#define Py_HASH_EXTERNAL 0
|
#define Py_HASH_EXTERNAL 0
|
||||||
#define Py_HASH_SIPHASH24 1
|
#define Py_HASH_SIPHASH24 1
|
||||||
#define Py_HASH_FNV 2
|
#define Py_HASH_FNV 2
|
||||||
|
#define Py_HASH_SIPHASH13 3
|
||||||
|
|
||||||
#ifndef Py_HASH_ALGORITHM
|
#ifndef Py_HASH_ALGORITHM
|
||||||
# ifndef HAVE_ALIGNED_REQUIRED
|
# ifndef HAVE_ALIGNED_REQUIRED
|
||||||
# define Py_HASH_ALGORITHM Py_HASH_SIPHASH24
|
# define Py_HASH_ALGORITHM Py_HASH_SIPHASH13
|
||||||
# else
|
# else
|
||||||
# define Py_HASH_ALGORITHM Py_HASH_FNV
|
# define Py_HASH_ALGORITHM Py_HASH_FNV
|
||||||
# endif /* uint64_t && uint32_t && aligned */
|
# endif /* uint64_t && uint32_t && aligned */
|
||||||
|
|
|
@ -42,8 +42,8 @@ def pysiphash(uint64):
|
||||||
|
|
||||||
def skip_unless_internalhash(test):
|
def skip_unless_internalhash(test):
|
||||||
"""Skip decorator for tests that depend on SipHash24 or FNV"""
|
"""Skip decorator for tests that depend on SipHash24 or FNV"""
|
||||||
ok = sys.hash_info.algorithm in {"fnv", "siphash24"}
|
ok = sys.hash_info.algorithm in {"fnv", "siphash13", "siphash24"}
|
||||||
msg = "Requires SipHash24 or FNV"
|
msg = "Requires SipHash13, SipHash24 or FNV"
|
||||||
return test if ok else unittest.skip(msg)(test)
|
return test if ok else unittest.skip(msg)(test)
|
||||||
|
|
||||||
|
|
||||||
|
@ -206,6 +206,19 @@ class StringlikeHashRandomizationTests(HashRandomizationTests):
|
||||||
# seed 42, 'abc'
|
# seed 42, 'abc'
|
||||||
[-678966196, 573763426263223372, -820489388, -4282905804826039665],
|
[-678966196, 573763426263223372, -820489388, -4282905804826039665],
|
||||||
],
|
],
|
||||||
|
'siphash13': [
|
||||||
|
# NOTE: PyUCS2 layout depends on endianness
|
||||||
|
# seed 0, 'abc'
|
||||||
|
[69611762, -4594863902769663758, 69611762, -4594863902769663758],
|
||||||
|
# seed 42, 'abc'
|
||||||
|
[-975800855, 3869580338025362921, -975800855, 3869580338025362921],
|
||||||
|
# seed 42, 'abcdefghijk'
|
||||||
|
[-595844228, 7764564197781545852, -595844228, 7764564197781545852],
|
||||||
|
# seed 0, 'äú∑ℇ'
|
||||||
|
[-1093288643, -2810468059467891395, -1041341092, 4925090034378237276],
|
||||||
|
# seed 42, 'äú∑ℇ'
|
||||||
|
[-585999602, -2845126246016066802, -817336969, -2219421378907968137],
|
||||||
|
],
|
||||||
'siphash24': [
|
'siphash24': [
|
||||||
# NOTE: PyUCS2 layout depends on endianness
|
# NOTE: PyUCS2 layout depends on endianness
|
||||||
# seed 0, 'abc'
|
# seed 0, 'abc'
|
||||||
|
|
|
@ -351,8 +351,8 @@ class ImportTests(unittest.TestCase):
|
||||||
self.assertEqual(_frozen_importlib.__spec__.origin, "frozen")
|
self.assertEqual(_frozen_importlib.__spec__.origin, "frozen")
|
||||||
|
|
||||||
def test_source_hash(self):
|
def test_source_hash(self):
|
||||||
self.assertEqual(_imp.source_hash(42, b'hi'), b'\xc6\xe7Z\r\x03:}\xab')
|
self.assertEqual(_imp.source_hash(42, b'hi'), b'\xfb\xd9G\x05\xaf$\x9b~')
|
||||||
self.assertEqual(_imp.source_hash(43, b'hi'), b'\x85\x9765\xf8\x9a\x8b9')
|
self.assertEqual(_imp.source_hash(43, b'hi'), b'\xd0/\x87C\xccC\xff\xe2')
|
||||||
|
|
||||||
def test_pyc_invalidation_mode_from_cmdline(self):
|
def test_pyc_invalidation_mode_from_cmdline(self):
|
||||||
cases = [
|
cases = [
|
||||||
|
|
|
@ -508,7 +508,7 @@ class SysModuleTest(unittest.TestCase):
|
||||||
self.assertIsInstance(sys.hash_info.nan, int)
|
self.assertIsInstance(sys.hash_info.nan, int)
|
||||||
self.assertIsInstance(sys.hash_info.imag, int)
|
self.assertIsInstance(sys.hash_info.imag, int)
|
||||||
algo = sysconfig.get_config_var("Py_HASH_ALGORITHM")
|
algo = sysconfig.get_config_var("Py_HASH_ALGORITHM")
|
||||||
if sys.hash_info.algorithm in {"fnv", "siphash24"}:
|
if sys.hash_info.algorithm in {"fnv", "siphash13", "siphash24"}:
|
||||||
self.assertIn(sys.hash_info.hash_bits, {32, 64})
|
self.assertIn(sys.hash_info.hash_bits, {32, 64})
|
||||||
self.assertIn(sys.hash_info.seed_bits, {32, 64, 128})
|
self.assertIn(sys.hash_info.seed_bits, {32, 64, 128})
|
||||||
|
|
||||||
|
@ -516,8 +516,10 @@ class SysModuleTest(unittest.TestCase):
|
||||||
self.assertEqual(sys.hash_info.algorithm, "siphash24")
|
self.assertEqual(sys.hash_info.algorithm, "siphash24")
|
||||||
elif algo == 2:
|
elif algo == 2:
|
||||||
self.assertEqual(sys.hash_info.algorithm, "fnv")
|
self.assertEqual(sys.hash_info.algorithm, "fnv")
|
||||||
|
elif algo == 3:
|
||||||
|
self.assertEqual(sys.hash_info.algorithm, "siphash13")
|
||||||
else:
|
else:
|
||||||
self.assertIn(sys.hash_info.algorithm, {"fnv", "siphash24"})
|
self.assertIn(sys.hash_info.algorithm, {"fnv", "siphash13", "siphash24"})
|
||||||
else:
|
else:
|
||||||
# PY_HASH_EXTERNAL
|
# PY_HASH_EXTERNAL
|
||||||
self.assertEqual(algo, 0)
|
self.assertEqual(algo, 0)
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Add SipHash13 for string hash algorithm and use it by default.
|
|
@ -364,13 +364,66 @@ static PyHash_FuncDef PyHash_Func = {fnv, "fnv", 8 * SIZEOF_PY_HASH_T,
|
||||||
d = ROTATE(d, t) ^ c; \
|
d = ROTATE(d, t) ^ c; \
|
||||||
a = ROTATE(a, 32);
|
a = ROTATE(a, 32);
|
||||||
|
|
||||||
#define DOUBLE_ROUND(v0,v1,v2,v3) \
|
#define SINGLE_ROUND(v0,v1,v2,v3) \
|
||||||
HALF_ROUND(v0,v1,v2,v3,13,16); \
|
|
||||||
HALF_ROUND(v2,v1,v0,v3,17,21); \
|
|
||||||
HALF_ROUND(v0,v1,v2,v3,13,16); \
|
HALF_ROUND(v0,v1,v2,v3,13,16); \
|
||||||
HALF_ROUND(v2,v1,v0,v3,17,21);
|
HALF_ROUND(v2,v1,v0,v3,17,21);
|
||||||
|
|
||||||
|
#define DOUBLE_ROUND(v0,v1,v2,v3) \
|
||||||
|
SINGLE_ROUND(v0,v1,v2,v3); \
|
||||||
|
SINGLE_ROUND(v0,v1,v2,v3);
|
||||||
|
|
||||||
|
|
||||||
|
static uint64_t
|
||||||
|
siphash13(uint64_t k0, uint64_t k1, const void *src, Py_ssize_t src_sz) {
|
||||||
|
uint64_t b = (uint64_t)src_sz << 56;
|
||||||
|
const uint8_t *in = (const uint8_t*)src;
|
||||||
|
|
||||||
|
uint64_t v0 = k0 ^ 0x736f6d6570736575ULL;
|
||||||
|
uint64_t v1 = k1 ^ 0x646f72616e646f6dULL;
|
||||||
|
uint64_t v2 = k0 ^ 0x6c7967656e657261ULL;
|
||||||
|
uint64_t v3 = k1 ^ 0x7465646279746573ULL;
|
||||||
|
|
||||||
|
uint64_t t;
|
||||||
|
uint8_t *pt;
|
||||||
|
|
||||||
|
while (src_sz >= 8) {
|
||||||
|
uint64_t mi;
|
||||||
|
memcpy(&mi, in, sizeof(mi));
|
||||||
|
mi = _le64toh(mi);
|
||||||
|
in += sizeof(mi);
|
||||||
|
src_sz -= sizeof(mi);
|
||||||
|
v3 ^= mi;
|
||||||
|
SINGLE_ROUND(v0,v1,v2,v3);
|
||||||
|
v0 ^= mi;
|
||||||
|
}
|
||||||
|
|
||||||
|
t = 0;
|
||||||
|
pt = (uint8_t *)&t;
|
||||||
|
switch (src_sz) {
|
||||||
|
case 7: pt[6] = in[6]; /* fall through */
|
||||||
|
case 6: pt[5] = in[5]; /* fall through */
|
||||||
|
case 5: pt[4] = in[4]; /* fall through */
|
||||||
|
case 4: memcpy(pt, in, sizeof(uint32_t)); break;
|
||||||
|
case 3: pt[2] = in[2]; /* fall through */
|
||||||
|
case 2: pt[1] = in[1]; /* fall through */
|
||||||
|
case 1: pt[0] = in[0]; /* fall through */
|
||||||
|
}
|
||||||
|
b |= _le64toh(t);
|
||||||
|
|
||||||
|
v3 ^= b;
|
||||||
|
SINGLE_ROUND(v0,v1,v2,v3);
|
||||||
|
v0 ^= b;
|
||||||
|
v2 ^= 0xff;
|
||||||
|
SINGLE_ROUND(v0,v1,v2,v3);
|
||||||
|
SINGLE_ROUND(v0,v1,v2,v3);
|
||||||
|
SINGLE_ROUND(v0,v1,v2,v3);
|
||||||
|
|
||||||
|
/* modified */
|
||||||
|
t = (v0 ^ v1) ^ (v2 ^ v3);
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH24
|
||||||
static uint64_t
|
static uint64_t
|
||||||
siphash24(uint64_t k0, uint64_t k1, const void *src, Py_ssize_t src_sz) {
|
siphash24(uint64_t k0, uint64_t k1, const void *src, Py_ssize_t src_sz) {
|
||||||
uint64_t b = (uint64_t)src_sz << 56;
|
uint64_t b = (uint64_t)src_sz << 56;
|
||||||
|
@ -419,14 +472,26 @@ siphash24(uint64_t k0, uint64_t k1, const void *src, Py_ssize_t src_sz) {
|
||||||
t = (v0 ^ v1) ^ (v2 ^ v3);
|
t = (v0 ^ v1) ^ (v2 ^ v3);
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
uint64_t
|
uint64_t
|
||||||
_Py_KeyedHash(uint64_t key, const void *src, Py_ssize_t src_sz)
|
_Py_KeyedHash(uint64_t key, const void *src, Py_ssize_t src_sz)
|
||||||
{
|
{
|
||||||
return siphash24(key, 0, src, src_sz);
|
return siphash13(key, 0, src, src_sz);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH13
|
||||||
|
static Py_hash_t
|
||||||
|
pysiphash(const void *src, Py_ssize_t src_sz) {
|
||||||
|
return (Py_hash_t)siphash13(
|
||||||
|
_le64toh(_Py_HashSecret.siphash.k0), _le64toh(_Py_HashSecret.siphash.k1),
|
||||||
|
src, src_sz);
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyHash_FuncDef PyHash_Func = {pysiphash, "siphash13", 64, 128};
|
||||||
|
#endif
|
||||||
|
|
||||||
#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH24
|
#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH24
|
||||||
static Py_hash_t
|
static Py_hash_t
|
||||||
pysiphash(const void *src, Py_ssize_t src_sz) {
|
pysiphash(const void *src, Py_ssize_t src_sz) {
|
||||||
|
|
|
@ -1561,9 +1561,9 @@ Optional Packages:
|
||||||
--with-undefined-behavior-sanitizer
|
--with-undefined-behavior-sanitizer
|
||||||
enable UndefinedBehaviorSanitizer undefined
|
enable UndefinedBehaviorSanitizer undefined
|
||||||
behaviour detector, 'ubsan' (default is no)
|
behaviour detector, 'ubsan' (default is no)
|
||||||
--with-hash-algorithm=[fnv|siphash24]
|
--with-hash-algorithm=[fnv|siphash13|siphash24]
|
||||||
select hash algorithm for use in Python/pyhash.c
|
select hash algorithm for use in Python/pyhash.c
|
||||||
(default is SipHash24)
|
(default is SipHash13)
|
||||||
--with-tzpath=<list of absolute paths separated by pathsep>
|
--with-tzpath=<list of absolute paths separated by pathsep>
|
||||||
Select the default time zone search path for zoneinfo.TZPATH
|
Select the default time zone search path for zoneinfo.TZPATH
|
||||||
|
|
||||||
|
@ -10431,6 +10431,10 @@ if test "${with_hash_algorithm+set}" = set; then :
|
||||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $withval" >&5
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $withval" >&5
|
||||||
$as_echo "$withval" >&6; }
|
$as_echo "$withval" >&6; }
|
||||||
case "$withval" in
|
case "$withval" in
|
||||||
|
siphash13)
|
||||||
|
$as_echo "#define Py_HASH_ALGORITHM 3" >>confdefs.h
|
||||||
|
|
||||||
|
;;
|
||||||
siphash24)
|
siphash24)
|
||||||
$as_echo "#define Py_HASH_ALGORITHM 1" >>confdefs.h
|
$as_echo "#define Py_HASH_ALGORITHM 1" >>confdefs.h
|
||||||
|
|
||||||
|
|
|
@ -3036,16 +3036,19 @@ fi
|
||||||
# str, bytes and memoryview hash algorithm
|
# str, bytes and memoryview hash algorithm
|
||||||
AH_TEMPLATE(Py_HASH_ALGORITHM,
|
AH_TEMPLATE(Py_HASH_ALGORITHM,
|
||||||
[Define hash algorithm for str, bytes and memoryview.
|
[Define hash algorithm for str, bytes and memoryview.
|
||||||
SipHash24: 1, FNV: 2, externally defined: 0])
|
SipHash24: 1, FNV: 2, SipHash13: 3, externally defined: 0])
|
||||||
|
|
||||||
AC_MSG_CHECKING(for --with-hash-algorithm)
|
AC_MSG_CHECKING(for --with-hash-algorithm)
|
||||||
dnl quadrigraphs "@<:@" and "@:>@" produce "[" and "]" in the output
|
dnl quadrigraphs "@<:@" and "@:>@" produce "[" and "]" in the output
|
||||||
AC_ARG_WITH(hash_algorithm,
|
AC_ARG_WITH(hash_algorithm,
|
||||||
AS_HELP_STRING([--with-hash-algorithm=@<:@fnv|siphash24@:>@],
|
AS_HELP_STRING([--with-hash-algorithm=@<:@fnv|siphash13|siphash24@:>@],
|
||||||
[select hash algorithm for use in Python/pyhash.c (default is SipHash24)]),
|
[select hash algorithm for use in Python/pyhash.c (default is SipHash13)]),
|
||||||
[
|
[
|
||||||
AC_MSG_RESULT($withval)
|
AC_MSG_RESULT($withval)
|
||||||
case "$withval" in
|
case "$withval" in
|
||||||
|
siphash13)
|
||||||
|
AC_DEFINE(Py_HASH_ALGORITHM, 3)
|
||||||
|
;;
|
||||||
siphash24)
|
siphash24)
|
||||||
AC_DEFINE(Py_HASH_ALGORITHM, 1)
|
AC_DEFINE(Py_HASH_ALGORITHM, 1)
|
||||||
;;
|
;;
|
||||||
|
|
|
@ -1439,7 +1439,7 @@
|
||||||
#undef Py_ENABLE_SHARED
|
#undef Py_ENABLE_SHARED
|
||||||
|
|
||||||
/* Define hash algorithm for str, bytes and memoryview. SipHash24: 1, FNV: 2,
|
/* Define hash algorithm for str, bytes and memoryview. SipHash24: 1, FNV: 2,
|
||||||
externally defined: 0 */
|
SipHash13: 3, externally defined: 0 */
|
||||||
#undef Py_HASH_ALGORITHM
|
#undef Py_HASH_ALGORITHM
|
||||||
|
|
||||||
/* Define if you want to enable tracing references for debugging purpose */
|
/* Define if you want to enable tracing references for debugging purpose */
|
||||||
|
|
Loading…
Reference in New Issue