ssue #19183: Implement PEP 456 'secure and interchangeable hash algorithm'.

Python now uses SipHash24 on all major platforms.
This commit is contained in:
Christian Heimes 2013-11-20 11:46:18 +01:00
parent fe32aec25a
commit 985ecdcfc2
27 changed files with 1029 additions and 242 deletions

View File

@ -594,9 +594,20 @@ always available.
| :const:`imag` | multiplier used for the imaginary part of a |
| | complex number |
+---------------------+--------------------------------------------------+
| :const:`algorithm` | name of the algorithm for hashing of str, bytes, |
| | and memoryview |
+---------------------+--------------------------------------------------+
| :const:`hash_bits` | internal output size of the hash algorithm |
+---------------------+--------------------------------------------------+
| :const:`seed_bits` | size of the seed key of the hash algorithm |
+---------------------+--------------------------------------------------+
.. versionadded:: 3.2
.. versionchanged: 3.4
Added *algorithm*, *hash_bits* and *seed_bits*
.. data:: hexversion

View File

@ -609,6 +609,35 @@ the following note::
http://creativecommons.org/publicdomain/zero/1.0/
SipHash24
---------
The file :file:`Python/pyhash.c` contains Marek Majkowski' implementation of
Dan Bernstein's SipHash24 algorithm. The contains the following note::
<MIT License>
Copyright (c) 2013 Marek Majkowski <marek@popcount.org>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
</MIT License>
Original location:
https://github.com/majek/csiphash/
Solution inspired by code from:
Samuel Neves (supercop/crypto_auth/siphash24/little)
djb (supercop/crypto_auth/siphash24/little2)
Jean-Philippe Aumasson (https://131002.net/siphash/siphash24.c)
strtod and dtoa
---------------

View File

@ -116,6 +116,7 @@ CPython implementation improvements:
* :ref:`PEP 442: Safe object finalization <pep-442>`
* :ref:`PEP 445: Configurable memory allocators <pep-445>`
* :pep:`456` Secure and interchangeable hash algorithm
* Improve finalization of Python modules to avoid setting their globals
to None, in most cases (:issue:`18214`).
* A more efficient :mod:`marshal` format (:issue:`16475`).

View File

@ -68,6 +68,7 @@
#include "object.h"
#include "objimpl.h"
#include "typeslots.h"
#include "pyhash.h"
#include "pydebug.h"

View File

@ -562,23 +562,6 @@ PyAPI_FUNC(PyObject *) PyObject_Dir(PyObject *);
PyAPI_FUNC(int) Py_ReprEnter(PyObject *);
PyAPI_FUNC(void) Py_ReprLeave(PyObject *);
/* Helpers for hash functions */
#ifndef Py_LIMITED_API
PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double);
PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*);
PyAPI_FUNC(Py_hash_t) _Py_HashBytes(unsigned char*, Py_ssize_t);
#endif
typedef struct {
Py_hash_t prefix;
Py_hash_t suffix;
} _Py_HashSecret_t;
PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret;
#ifdef Py_DEBUG
PyAPI_DATA(int) _Py_HashSecret_Initialized;
#endif
/* Helper for passing objects to printf and the like */
#define PyObject_REPR(obj) _PyUnicode_AsString(PyObject_Repr(obj))

147
Include/pyhash.h Normal file
View File

@ -0,0 +1,147 @@
#ifndef Py_HASH_H
#define Py_HASH_H
#ifdef __cplusplus
extern "C" {
#endif
/* Helpers for hash functions */
#ifndef Py_LIMITED_API
PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double);
PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*);
PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t);
#endif
/* Prime multiplier used in string and various other hashes. */
#define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */
/* Parameters used for the numeric hash implementation. See notes for
_Py_HashDouble in Objects/object.c. Numeric hashes are based on
reduction modulo the prime 2**_PyHASH_BITS - 1. */
#if SIZEOF_VOID_P >= 8
# define _PyHASH_BITS 61
#else
# define _PyHASH_BITS 31
#endif
#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1)
#define _PyHASH_INF 314159
#define _PyHASH_NAN 0
#define _PyHASH_IMAG _PyHASH_MULTIPLIER
/* hash secret
*
* memory layout on 64 bit systems
* cccccccc cccccccc cccccccc uc -- unsigned char[24]
* pppppppp ssssssss ........ fnv -- two Py_hash_t
* k0k0k0k0 k1k1k1k1 ........ siphash -- two PY_UINT64_T
* ........ ........ ssssssss djbx33a -- 16 bytes padding + one Py_hash_t
* ........ ........ eeeeeeee pyexpat XML hash salt
*
* memory layout on 32 bit systems
* cccccccc cccccccc cccccccc uc
* ppppssss ........ ........ fnv -- two Py_hash_t
* k0k0k0k0 k1k1k1k1 ........ siphash -- two PY_UINT64_T (*)
* ........ ........ ssss.... djbx33a -- 16 bytes padding + one Py_hash_t
* ........ ........ eeee.... pyexpat XML hash salt
*
* (*) The siphash member may not be available on 32 bit platforms without
* an unsigned int64 data type.
*/
typedef union {
/* ensure 24 bytes */
unsigned char uc[24];
/* two Py_hash_t for FNV */
struct {
Py_hash_t prefix;
Py_hash_t suffix;
} fnv;
#ifdef PY_UINT64_T
/* two uint64 for SipHash24 */
struct {
PY_UINT64_T k0;
PY_UINT64_T k1;
} siphash;
#endif
/* a different (!) Py_hash_t for small string optimization */
struct {
unsigned char padding[16];
Py_hash_t suffix;
} djbx33a;
struct {
unsigned char padding[16];
Py_hash_t hashsalt;
} expat;
} _Py_HashSecret_t;
PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret;
#ifdef Py_DEBUG
PyAPI_DATA(int) _Py_HashSecret_Initialized;
#endif
/* hash function definition */
#ifndef Py_LIMITED_API
typedef struct {
Py_hash_t (*const hash)(const void *, Py_ssize_t);
const char *name;
const int hash_bits;
const int seed_bits;
} PyHash_FuncDef;
PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
#endif
/* cutoff for small string DJBX33A optimization in range [1, cutoff).
*
* About 50% of the strings in a typical Python application are smaller than
* 6 to 7 chars. However DJBX33A is vulnerable to hash collision attacks.
* NEVER use DJBX33A for long strings!
*
* A Py_HASH_CUTOFF of 0 disables small string optimization. 32 bit platforms
* should use a smaller cutoff because it is easier to create colliding
* strings. A cutoff of 7 on 64bit platforms and 5 on 32bit platforms should
* provide a decent safety margin.
*/
#ifndef Py_HASH_CUTOFF
# define Py_HASH_CUTOFF 0
#elif (Py_HASH_CUTOFF > 7 || Py_HASH_CUTOFF < 0)
# error Py_HASH_CUTOFF must in range 0...7.
#endif /* Py_HASH_CUTOFF */
/* hash algorithm selection
*
* The values for Py_HASH_SIPHASH24 and Py_HASH_FNV are hard-coded in the
* configure script.
*
* - FNV is available on all platforms and architectures.
* - SIPHASH24 only works on plaforms that provide PY_UINT64_T and doesn't
* require aligned memory for integers.
* - With EXTERNAL embedders can provide an alternative implementation with::
*
* PyHash_FuncDef PyHash_Func = {...};
*
* XXX: Figure out __declspec() for extern PyHash_FuncDef.
*/
#define Py_HASH_EXTERNAL 0
#define Py_HASH_SIPHASH24 1
#define Py_HASH_FNV 2
#ifndef Py_HASH_ALGORITHM
# if (defined(PY_UINT64_T) && defined(PY_UINT32_T) \
&& !defined(HAVE_ALIGNED_REQUIRED))
# define Py_HASH_ALGORITHM Py_HASH_SIPHASH24
# else
# define Py_HASH_ALGORITHM Py_HASH_FNV
# endif /* uint64_t && uint32_t && aligned */
#endif /* Py_HASH_ALGORITHM */
#ifdef __cplusplus
}
#endif
#endif /* !Py_HASH_H */

View File

@ -144,23 +144,6 @@ Used in: PY_LONG_LONG
#endif
#endif
/* Prime multiplier used in string and various other hashes. */
#define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */
/* Parameters used for the numeric hash implementation. See notes for
_Py_HashDouble in Objects/object.c. Numeric hashes are based on
reduction modulo the prime 2**_PyHASH_BITS - 1. */
#if SIZEOF_VOID_P >= 8
#define _PyHASH_BITS 61
#else
#define _PyHASH_BITS 31
#endif
#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1)
#define _PyHASH_INF 314159
#define _PyHASH_NAN 0
#define _PyHASH_IMAG _PyHASH_MULTIPLIER
/* uintptr_t is the C9X name for an unsigned integral type such that a
* legitimate void* can be cast to uintptr_t and then back to void* again
* without loss of information. Similarly for intptr_t, wrt a signed
@ -199,8 +182,10 @@ typedef Py_intptr_t Py_ssize_t;
#endif
/* Py_hash_t is the same size as a pointer. */
#define SIZEOF_PY_HASH_T SIZEOF_SIZE_T
typedef Py_ssize_t Py_hash_t;
/* Py_uhash_t is the unsigned equivalent needed to calculate numeric hash. */
#define SIZEOF_PY_UHASH_T SIZEOF_SIZE_T
typedef size_t Py_uhash_t;
/* Largest possible value of size_t.

View File

@ -601,6 +601,8 @@ def main(tests=None, **kwargs):
print("==", platform.python_implementation(), *sys.version.split())
print("== ", platform.platform(aliased=True),
"%s-endian" % sys.byteorder)
print("== ", "hash algorithm:", sys.hash_info.algorithm,
"64bit" if sys.maxsize > 2**32 else "32bit")
print("== ", os.getcwd())
print("Testing with flags:", sys.flags)

View File

@ -12,6 +12,40 @@ from collections import Hashable
IS_64BIT = sys.maxsize > 2**32
def lcg(x, length=16):
"""Linear congruential generator"""
if x == 0:
return bytes(length)
out = bytearray(length)
for i in range(length):
x = (214013 * x + 2531011) & 0x7fffffff
out[i] = (x >> 16) & 0xff
return bytes(out)
def pysiphash(uint64):
"""Convert SipHash24 output to Py_hash_t
"""
assert 0 <= uint64 < (1 << 64)
# simple unsigned to signed int64
if uint64 > (1 << 63) - 1:
int64 = uint64 - (1 << 64)
else:
int64 = uint64
# mangle uint64 to uint32
uint32 = (uint64 ^ uint64 >> 32) & 0xffffffff
# simple unsigned to signed int32
if uint32 > (1 << 31) - 1:
int32 = uint32 - (1 << 32)
else:
int32 = uint32
return int32, int64
def skip_unless_internalhash(test):
"""Skip decorator for tests that depend on SipHash24 or FNV"""
ok = sys.hash_info.algorithm in {"fnv", "siphash24"}
msg = "Requires SipHash24 or FNV"
return test if ok else unittest.skip(msg)(test)
class HashEqualityTestCase(unittest.TestCase):
@ -138,7 +172,7 @@ class HashRandomizationTests:
# an object to be tested
def get_hash_command(self, repr_):
return 'print(hash(%s))' % repr_
return 'print(hash(eval(%s.decode("utf-8"))))' % repr_.encode("utf-8")
def get_hash(self, repr_, seed=None):
env = os.environ.copy()
@ -161,12 +195,67 @@ class HashRandomizationTests:
self.assertNotEqual(run1, run2)
class StringlikeHashRandomizationTests(HashRandomizationTests):
repr_ = None
repr_long = None
# 32bit little, 64bit little, 32bit big, 64bit big
known_hashes = {
'djba33x': [ # only used for small strings
# seed 0, 'abc'
[193485960, 193485960, 193485960, 193485960],
# seed 42, 'abc'
[-678966196, 573763426263223372, -820489388, -4282905804826039665],
],
'siphash24': [
# seed 0, 'abc'
[2025351752, 4596069200710135518, 1433332804,
-3481057401533226760],
# seed 42, 'abc'
[-774632014, -4501618152524544106, 1054608210,
-1493500025205289231],
# seed 42, 'abcdefghijk'
[-1436007334, 4436719588892876975, -1436007334,
4436719588892876975],
# seed 0, 'äú∑ℇ', PyUCS2 layout depends on endianess
[1386693832, 5749986484189612790, 1776982909,
-5915111450199468540],
# seed 42, 'äú∑ℇ'
[1260387190, -2947981342227738144, 1430287772,
-4296699217652516017],
],
'fnv': [
# seed 0, 'abc'
[-1600925533, 1453079729188098211, -1600925533,
1453079729188098211],
# seed 42, 'abc'
[-206076799, -4410911502303878509, -1024014457,
-3570150969479994130],
# seed 42, 'abcdefghijk'
[811136751, -5046230049376118746, -77208053 ,
-4779029615281019666],
# seed 0, 'äú∑ℇ'
[44402817, 8998297579845987431, -1956240331,
-782697888614047887],
# seed 42, 'äú∑ℇ'
[-283066365, -4576729883824601543, -271871407, None],
]
}
def get_expected_hash(self, position, length):
if length < sys.hash_info.cutoff:
algorithm = "djba33x"
else:
algorithm = sys.hash_info.algorithm
if sys.byteorder == 'little':
platform = 1 if IS_64BIT else 0
else:
assert(sys.byteorder == 'big')
platform = 3 if IS_64BIT else 2
return self.known_hashes[algorithm][position][platform]
def test_null_hash(self):
# PYTHONHASHSEED=0 disables the randomized hash
if IS_64BIT:
known_hash_of_obj = 1453079729188098211
else:
known_hash_of_obj = -1600925533
known_hash_of_obj = self.get_expected_hash(0, 3)
# Randomization is enabled by default:
self.assertNotEqual(self.get_hash(self.repr_), known_hash_of_obj)
@ -174,39 +263,53 @@ class StringlikeHashRandomizationTests(HashRandomizationTests):
# It can also be disabled by setting the seed to 0:
self.assertEqual(self.get_hash(self.repr_, seed=0), known_hash_of_obj)
@skip_unless_internalhash
def test_fixed_hash(self):
# test a fixed seed for the randomized hash
# Note that all types share the same values:
if IS_64BIT:
if sys.byteorder == 'little':
h = -4410911502303878509
else:
h = -3570150969479994130
else:
if sys.byteorder == 'little':
h = -206076799
else:
h = -1024014457
h = self.get_expected_hash(1, 3)
self.assertEqual(self.get_hash(self.repr_, seed=42), h)
@skip_unless_internalhash
def test_long_fixed_hash(self):
if self.repr_long is None:
return
h = self.get_expected_hash(2, 11)
self.assertEqual(self.get_hash(self.repr_long, seed=42), h)
class StrHashRandomizationTests(StringlikeHashRandomizationTests,
unittest.TestCase):
repr_ = repr('abc')
repr_long = repr('abcdefghijk')
repr_ucs2 = repr('äú∑ℇ')
@skip_unless_internalhash
def test_empty_string(self):
self.assertEqual(hash(""), 0)
@skip_unless_internalhash
def test_ucs2_string(self):
h = self.get_expected_hash(3, 6)
self.assertEqual(self.get_hash(self.repr_ucs2, seed=0), h)
h = self.get_expected_hash(4, 6)
self.assertEqual(self.get_hash(self.repr_ucs2, seed=42), h)
class BytesHashRandomizationTests(StringlikeHashRandomizationTests,
unittest.TestCase):
repr_ = repr(b'abc')
repr_long = repr(b'abcdefghijk')
@skip_unless_internalhash
def test_empty_string(self):
self.assertEqual(hash(b""), 0)
class MemoryviewHashRandomizationTests(StringlikeHashRandomizationTests,
unittest.TestCase):
repr_ = "memoryview(b'abc')"
repr_long = "memoryview(b'abcdefghijk')"
@skip_unless_internalhash
def test_empty_string(self):
self.assertEqual(hash(memoryview(b"")), 0)
@ -224,5 +327,22 @@ class DatetimeTimeTests(DatetimeTests, unittest.TestCase):
repr_ = repr(datetime.time(0))
class HashDistributionTestCase(unittest.TestCase):
def test_hash_distribution(self):
# check for hash collision
base = "abcdefghabcdefg"
for i in range(1, len(base)):
prefix = base[:i]
s15 = set()
s255 = set()
for c in range(256):
h = hash(prefix + chr(c))
s15.add(h & 0xf)
s255.add(h & 0xff)
# SipHash24 distribution depends on key, usually > 60%
self.assertGreater(len(s15), 8, prefix)
self.assertGreater(len(s255), 128, prefix)
if __name__ == "__main__":
unittest.main()

View File

@ -8,6 +8,7 @@ import operator
import codecs
import gc
import sysconfig
import platform
# count the number of test runs, used to create unique
# strings to intern in test_intern()
@ -431,7 +432,7 @@ class SysModuleTest(unittest.TestCase):
self.assertEqual(type(sys.int_info.sizeof_digit), int)
self.assertIsInstance(sys.hexversion, int)
self.assertEqual(len(sys.hash_info), 5)
self.assertEqual(len(sys.hash_info), 9)
self.assertLess(sys.hash_info.modulus, 2**sys.hash_info.width)
# sys.hash_info.modulus should be a prime; we do a quick
# probable primality test (doesn't exclude the possibility of
@ -446,6 +447,26 @@ class SysModuleTest(unittest.TestCase):
self.assertIsInstance(sys.hash_info.inf, int)
self.assertIsInstance(sys.hash_info.nan, int)
self.assertIsInstance(sys.hash_info.imag, int)
algo = sysconfig.get_config_var("PY_HASH_ALGORITHM")
if sys.hash_info.algorithm in {"fnv", "siphash24"}:
self.assertIn(sys.hash_info.hash_bits, {32, 64})
self.assertIn(sys.hash_info.seed_bits, {32, 64, 128})
if algo == 1:
self.assertEqual(sys.hash_info.algorithm, "siphash24")
elif algo == 2:
self.assertEqual(sys.hash_info.algorithm, "fnv")
else:
processor = platform.processor().lower()
if processor in {"sparc", "mips"}:
self.assertEqual(sys.hash_info.algorithm, "fnv")
else:
self.assertEqual(sys.hash_info.algorithm, "siphash24")
else:
# PY_HASH_EXTERNAL
self.assertEqual(algo, 0)
self.assertGreaterEqual(sys.hash_info.cutoff, 0)
self.assertLess(sys.hash_info.cutoff, 8)
self.assertIsInstance(sys.maxsize, int)
self.assertIsInstance(sys.maxunicode, int)

View File

@ -366,6 +366,7 @@ PYTHON_OBJS= \
Python/pyarena.o \
Python/pyctype.o \
Python/pyfpe.o \
Python/pyhash.o \
Python/pymath.o \
Python/pystate.o \
Python/pythonrun.o \
@ -868,6 +869,7 @@ PYTHON_HEADERS= \
$(srcdir)/Include/pydebug.h \
$(srcdir)/Include/pyerrors.h \
$(srcdir)/Include/pyfpe.h \
$(srcdir)/Include/pyhash.h \
$(srcdir)/Include/pymath.h \
$(srcdir)/Include/pygetopt.h \
$(srcdir)/Include/pymacro.h \

View File

@ -802,6 +802,7 @@ Nick Maclaren
Don MacMillen
Tomasz Maćkowiak
Steve Majewski
Marek Majkowski
Grzegorz Makarewicz
David Malcolm
Greg Malcolm

View File

@ -10,6 +10,9 @@ Projected release date: 2013-11-24
Core and Builtins
-----------------
- Issue #19183: Implement PEP 456 'secure and interchangeable hash algorithm'.
Python now uses SipHash24 on all major platforms.
- Issue #12892: The utf-16* and utf-32* encoders no longer allow surrogate code
points (U+D800-U+DFFF) to be encoded. The utf-32* decoders no longer decode
byte sequences that correspond to surrogate code points. The surrogatepass

View File

@ -1218,7 +1218,7 @@ newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
* has a backport of this feature where we also define XML_HAS_SET_HASH_SALT
* to indicate that we can still use it. */
XML_SetHashSalt(self->itself,
(unsigned long)_Py_HashSecret.prefix);
(unsigned long)_Py_HashSecret.expat.hashsalt);
#endif
XML_SetUserData(self->itself, (void *)self);
XML_SetUnknownEncodingHandler(self->itself,

View File

@ -897,7 +897,7 @@ bytes_hash(PyBytesObject *a)
{
if (a->ob_shash == -1) {
/* Can't fail */
a->ob_shash = _Py_HashBytes((unsigned char *) a->ob_sval, Py_SIZE(a));
a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
}
return a->ob_shash;
}

View File

@ -2742,7 +2742,7 @@ memory_hash(PyMemoryViewObject *self)
}
/* Can't fail */
self->hash = _Py_HashBytes((unsigned char *)mem, view->len);
self->hash = _Py_HashBytes(mem, view->len);
if (mem != view->buf)
PyMem_Free(mem);

View File

@ -731,150 +731,6 @@ PyObject_RichCompareBool(PyObject *v, PyObject *w, int op)
return ok;
}
/* Set of hash utility functions to help maintaining the invariant that
if a==b then hash(a)==hash(b)
All the utility functions (_Py_Hash*()) return "-1" to signify an error.
*/
/* For numeric types, the hash of a number x is based on the reduction
of x modulo the prime P = 2**_PyHASH_BITS - 1. It's designed so that
hash(x) == hash(y) whenever x and y are numerically equal, even if
x and y have different types.
A quick summary of the hashing strategy:
(1) First define the 'reduction of x modulo P' for any rational
number x; this is a standard extension of the usual notion of
reduction modulo P for integers. If x == p/q (written in lowest
terms), the reduction is interpreted as the reduction of p times
the inverse of the reduction of q, all modulo P; if q is exactly
divisible by P then define the reduction to be infinity. So we've
got a well-defined map
reduce : { rational numbers } -> { 0, 1, 2, ..., P-1, infinity }.
(2) Now for a rational number x, define hash(x) by:
reduce(x) if x >= 0
-reduce(-x) if x < 0
If the result of the reduction is infinity (this is impossible for
integers, floats and Decimals) then use the predefined hash value
_PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead.
_PyHASH_INF, -_PyHASH_INF and _PyHASH_NAN are also used for the
hashes of float and Decimal infinities and nans.
A selling point for the above strategy is that it makes it possible
to compute hashes of decimal and binary floating-point numbers
efficiently, even if the exponent of the binary or decimal number
is large. The key point is that
reduce(x * y) == reduce(x) * reduce(y) (modulo _PyHASH_MODULUS)
provided that {reduce(x), reduce(y)} != {0, infinity}. The reduction of a
binary or decimal float is never infinity, since the denominator is a power
of 2 (for binary) or a divisor of a power of 10 (for decimal). So we have,
for nonnegative x,
reduce(x * 2**e) == reduce(x) * reduce(2**e) % _PyHASH_MODULUS
reduce(x * 10**e) == reduce(x) * reduce(10**e) % _PyHASH_MODULUS
and reduce(10**e) can be computed efficiently by the usual modular
exponentiation algorithm. For reduce(2**e) it's even better: since
P is of the form 2**n-1, reduce(2**e) is 2**(e mod n), and multiplication
by 2**(e mod n) modulo 2**n-1 just amounts to a rotation of bits.
*/
Py_hash_t
_Py_HashDouble(double v)
{
int e, sign;
double m;
Py_uhash_t x, y;
if (!Py_IS_FINITE(v)) {
if (Py_IS_INFINITY(v))
return v > 0 ? _PyHASH_INF : -_PyHASH_INF;
else
return _PyHASH_NAN;
}
m = frexp(v, &e);
sign = 1;
if (m < 0) {
sign = -1;
m = -m;
}
/* process 28 bits at a time; this should work well both for binary
and hexadecimal floating point. */
x = 0;
while (m) {
x = ((x << 28) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - 28);
m *= 268435456.0; /* 2**28 */
e -= 28;
y = (Py_uhash_t)m; /* pull out integer part */
m -= y;
x += y;
if (x >= _PyHASH_MODULUS)
x -= _PyHASH_MODULUS;
}
/* adjust for the exponent; first reduce it modulo _PyHASH_BITS */
e = e >= 0 ? e % _PyHASH_BITS : _PyHASH_BITS-1-((-1-e) % _PyHASH_BITS);
x = ((x << e) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - e);
x = x * sign;
if (x == (Py_uhash_t)-1)
x = (Py_uhash_t)-2;
return (Py_hash_t)x;
}
Py_hash_t
_Py_HashPointer(void *p)
{
Py_hash_t x;
size_t y = (size_t)p;
/* bottom 3 or 4 bits are likely to be 0; rotate y by 4 to avoid
excessive hash collisions for dicts and sets */
y = (y >> 4) | (y << (8 * SIZEOF_VOID_P - 4));
x = (Py_hash_t)y;
if (x == -1)
x = -2;
return x;
}
Py_hash_t
_Py_HashBytes(unsigned char *p, Py_ssize_t len)
{
Py_uhash_t x;
Py_ssize_t i;
/*
We make the hash of the empty string be 0, rather than using
(prefix ^ suffix), since this slightly obfuscates the hash secret
*/
#ifdef Py_DEBUG
assert(_Py_HashSecret_Initialized);
#endif
if (len == 0) {
return 0;
}
x = (Py_uhash_t) _Py_HashSecret.prefix;
x ^= (Py_uhash_t) *p << 7;
for (i = 0; i < len; i++)
x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *p++;
x ^= (Py_uhash_t) len;
x ^= (Py_uhash_t) _Py_HashSecret.suffix;
if (x == -1)
x = -2;
return x;
}
Py_hash_t
PyObject_HashNotImplemented(PyObject *v)
{
@ -883,8 +739,6 @@ PyObject_HashNotImplemented(PyObject *v)
return -1;
}
_Py_HashSecret_t _Py_HashSecret;
Py_hash_t
PyObject_Hash(PyObject *v)
{

View File

@ -11386,39 +11386,8 @@ unicode_hash(PyObject *self)
_PyUnicode_HASH(self) = 0;
return 0;
}
/* The hash function as a macro, gets expanded three times below. */
#define HASH(P) \
x ^= (Py_uhash_t) *P << 7; \
while (--len >= 0) \
x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *P++; \
x = (Py_uhash_t) _Py_HashSecret.prefix;
switch (PyUnicode_KIND(self)) {
case PyUnicode_1BYTE_KIND: {
const unsigned char *c = PyUnicode_1BYTE_DATA(self);
HASH(c);
break;
}
case PyUnicode_2BYTE_KIND: {
const Py_UCS2 *s = PyUnicode_2BYTE_DATA(self);
HASH(s);
break;
}
default: {
Py_UCS4 *l;
assert(PyUnicode_KIND(self) == PyUnicode_4BYTE_KIND &&
"Impossible switch case in unicode_hash");
l = PyUnicode_4BYTE_DATA(self);
HASH(l);
break;
}
}
x ^= (Py_uhash_t) PyUnicode_GET_LENGTH(self);
x ^= (Py_uhash_t) _Py_HashSecret.suffix;
if (x == -1)
x = -2;
x = _Py_HashBytes(PyUnicode_DATA(self),
PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self));
_PyUnicode_HASH(self) = x;
return x;
}

View File

@ -412,6 +412,7 @@
<ClInclude Include="..\Include\patchlevel.h" />
<ClInclude Include="..\Include\pgen.h" />
<ClInclude Include="..\Include\pgenheaders.h" />
<ClInclude Include="..\Include\pyhash.h" />
<ClInclude Include="..\Include\py_curses.h" />
<ClInclude Include="..\Include\pyarena.h" />
<ClInclude Include="..\Include\pycapsule.h" />
@ -616,6 +617,7 @@
<ClCompile Include="..\PC\dl_nt.c" />
<ClCompile Include="..\PC\getpathp.c" />
<ClCompile Include="..\PC\msvcrtmodule.c" />
<ClCompile Include="..\Python\pyhash.c" />
<ClCompile Include="..\Python\random.c" />
<ClCompile Include="..\Python\_warnings.c" />
<ClCompile Include="..\Python\asdl.c" />

View File

@ -421,6 +421,9 @@
<ClInclude Include="..\Python\ceval_gil.h">
<Filter>Python</Filter>
</ClInclude>
<ClInclude Include="..\Include\pyhash.h">
<Filter>Include</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\Modules\_bisectmodule.c">
@ -931,6 +934,9 @@
<ClCompile Include="..\Modules\_stat.c">
<Filter>Modules</Filter>
</ClCompile>
<ClCompile Include="..\Python\pyhash.c">
<Filter>Python</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="..\PC\python_nt.rc">

430
Python/pyhash.c Normal file
View File

@ -0,0 +1,430 @@
/* Set of hash utility functions to help maintaining the invariant that
if a==b then hash(a)==hash(b)
All the utility functions (_Py_Hash*()) return "-1" to signify an error.
*/
#include "Python.h"
#ifdef __APPLE__
# include <libkern/OSByteOrder.h>
#elif defined(HAVE_LE64TOH) && defined(HAVE_ENDIAN_H)
# include <endian.h>
#elif defined(HAVE_LE64TOH) && defined(HAVE_SYS_ENDIAN_H)
# include <sys/endian.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
_Py_HashSecret_t _Py_HashSecret;
#if Py_HASH_ALGORITHM == Py_HASH_EXTERNAL
extern PyHash_FuncDef PyHash_Func;
#else
static PyHash_FuncDef PyHash_Func;
#endif
/* Count _Py_HashBytes() calls */
#ifdef Py_HASH_STATS
#define Py_HASH_STATS_MAX 32
static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
#endif
/* For numeric types, the hash of a number x is based on the reduction
of x modulo the prime P = 2**_PyHASH_BITS - 1. It's designed so that
hash(x) == hash(y) whenever x and y are numerically equal, even if
x and y have different types.
A quick summary of the hashing strategy:
(1) First define the 'reduction of x modulo P' for any rational
number x; this is a standard extension of the usual notion of
reduction modulo P for integers. If x == p/q (written in lowest
terms), the reduction is interpreted as the reduction of p times
the inverse of the reduction of q, all modulo P; if q is exactly
divisible by P then define the reduction to be infinity. So we've
got a well-defined map
reduce : { rational numbers } -> { 0, 1, 2, ..., P-1, infinity }.
(2) Now for a rational number x, define hash(x) by:
reduce(x) if x >= 0
-reduce(-x) if x < 0
If the result of the reduction is infinity (this is impossible for
integers, floats and Decimals) then use the predefined hash value
_PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead.
_PyHASH_INF, -_PyHASH_INF and _PyHASH_NAN are also used for the
hashes of float and Decimal infinities and nans.
A selling point for the above strategy is that it makes it possible
to compute hashes of decimal and binary floating-point numbers
efficiently, even if the exponent of the binary or decimal number
is large. The key point is that
reduce(x * y) == reduce(x) * reduce(y) (modulo _PyHASH_MODULUS)
provided that {reduce(x), reduce(y)} != {0, infinity}. The reduction of a
binary or decimal float is never infinity, since the denominator is a power
of 2 (for binary) or a divisor of a power of 10 (for decimal). So we have,
for nonnegative x,
reduce(x * 2**e) == reduce(x) * reduce(2**e) % _PyHASH_MODULUS
reduce(x * 10**e) == reduce(x) * reduce(10**e) % _PyHASH_MODULUS
and reduce(10**e) can be computed efficiently by the usual modular
exponentiation algorithm. For reduce(2**e) it's even better: since
P is of the form 2**n-1, reduce(2**e) is 2**(e mod n), and multiplication
by 2**(e mod n) modulo 2**n-1 just amounts to a rotation of bits.
*/
Py_hash_t
_Py_HashDouble(double v)
{
int e, sign;
double m;
Py_uhash_t x, y;
if (!Py_IS_FINITE(v)) {
if (Py_IS_INFINITY(v))
return v > 0 ? _PyHASH_INF : -_PyHASH_INF;
else
return _PyHASH_NAN;
}
m = frexp(v, &e);
sign = 1;
if (m < 0) {
sign = -1;
m = -m;
}
/* process 28 bits at a time; this should work well both for binary
and hexadecimal floating point. */
x = 0;
while (m) {
x = ((x << 28) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - 28);
m *= 268435456.0; /* 2**28 */
e -= 28;
y = (Py_uhash_t)m; /* pull out integer part */
m -= y;
x += y;
if (x >= _PyHASH_MODULUS)
x -= _PyHASH_MODULUS;
}
/* adjust for the exponent; first reduce it modulo _PyHASH_BITS */
e = e >= 0 ? e % _PyHASH_BITS : _PyHASH_BITS-1-((-1-e) % _PyHASH_BITS);
x = ((x << e) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - e);
x = x * sign;
if (x == (Py_uhash_t)-1)
x = (Py_uhash_t)-2;
return (Py_hash_t)x;
}
Py_hash_t
_Py_HashPointer(void *p)
{
Py_hash_t x;
size_t y = (size_t)p;
/* bottom 3 or 4 bits are likely to be 0; rotate y by 4 to avoid
excessive hash collisions for dicts and sets */
y = (y >> 4) | (y << (8 * SIZEOF_VOID_P - 4));
x = (Py_hash_t)y;
if (x == -1)
x = -2;
return x;
}
Py_hash_t
_Py_HashBytes(const void *src, Py_ssize_t len)
{
Py_hash_t x;
/*
We make the hash of the empty string be 0, rather than using
(prefix ^ suffix), since this slightly obfuscates the hash secret
*/
if (len == 0) {
return 0;
}
#ifdef Py_HASH_STATS
hashstats[(len <= Py_HASH_STATS_MAX) ? len : 0]++;
#endif
#if Py_HASH_CUTOFF > 0
if (len < Py_HASH_CUTOFF) {
/* Optimize hashing of very small strings with inline DJBX33A. */
Py_uhash_t hash;
const unsigned char *p = src;
hash = 5381; /* DJBX33A starts with 5381 */
switch(len) {
/* ((hash << 5) + hash) + *p == hash * 33 + *p */
case 7: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
case 6: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
case 5: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
case 4: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
case 3: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
case 2: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
case 1: hash = ((hash << 5) + hash) + *p++; break;
default:
assert(0);
}
hash ^= len;
hash ^= (Py_uhash_t) _Py_HashSecret.djbx33a.suffix;
x = (Py_hash_t)hash;
}
else
#endif /* Py_HASH_CUTOFF */
x = PyHash_Func.hash(src, len);
if (x == -1)
return -2;
return x;
}
void
_PyHash_Fini(void)
{
#ifdef Py_HASH_STATS
int i;
Py_ssize_t total = 0;
char *fmt = "%2i %8" PY_FORMAT_SIZE_T "d %8" PY_FORMAT_SIZE_T "d\n";
fprintf(stderr, "len calls total\n");
for (i = 1; i <= Py_HASH_STATS_MAX; i++) {
total += hashstats[i];
fprintf(stderr, fmt, i, hashstats[i], total);
}
total += hashstats[0];
fprintf(stderr, "> %8" PY_FORMAT_SIZE_T "d %8" PY_FORMAT_SIZE_T "d\n",
hashstats[0], total);
#endif
}
PyHash_FuncDef *
PyHash_GetFuncDef(void)
{
return &PyHash_Func;
}
/* Optimized memcpy() for Windows */
#ifdef _MSC_VER
# if SIZEOF_PY_UHASH_T == 4
# define PY_UHASH_CPY(dst, src) do { \
dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; \
} while(0)
# elif SIZEOF_PY_UHASH_T == 8
# define PY_UHASH_CPY(dst, src) do { \
dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; \
dst[4] = src[4]; dst[5] = src[5]; dst[6] = src[6]; dst[7] = src[7]; \
} while(0)
# else
# error SIZEOF_PY_UHASH_T must be 4 or 8
# endif /* SIZEOF_PY_UHASH_T */
#else /* not Windows */
# define PY_UHASH_CPY(dst, src) memcpy(dst, src, SIZEOF_PY_UHASH_T)
#endif /* _MSC_VER */
#if Py_HASH_ALGORITHM == Py_HASH_FNV
/* **************************************************************************
* Modified Fowler-Noll-Vo (FNV) hash function
*/
static Py_hash_t
fnv(const void *src, Py_ssize_t len)
{
const unsigned char *p = src;
Py_uhash_t x;
Py_ssize_t remainder, blocks;
union {
Py_uhash_t value;
unsigned char bytes[SIZEOF_PY_UHASH_T];
} block;
#ifdef Py_DEBUG
assert(_Py_HashSecret_Initialized);
#endif
remainder = len % SIZEOF_PY_UHASH_T;
if (remainder == 0) {
/* Process at least one block byte by byte to reduce hash collisions
* for strings with common prefixes. */
remainder = SIZEOF_PY_UHASH_T;
}
blocks = (len - remainder) / SIZEOF_PY_UHASH_T;
x = (Py_uhash_t) _Py_HashSecret.fnv.prefix;
x ^= (Py_uhash_t) *p << 7;
while (blocks--) {
PY_UHASH_CPY(block.bytes, p);
x = (_PyHASH_MULTIPLIER * x) ^ block.value;
p += SIZEOF_PY_UHASH_T;
}
/* add remainder */
for (; remainder > 0; remainder--)
x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *p++;
x ^= (Py_uhash_t) len;
x ^= (Py_uhash_t) _Py_HashSecret.fnv.suffix;
if (x == -1) {
x = -2;
}
return x;
}
static PyHash_FuncDef PyHash_Func = {fnv, "fnv", 8 * SIZEOF_PY_HASH_T,
16 * SIZEOF_PY_HASH_T};
#endif /* Py_HASH_ALGORITHM == Py_HASH_FNV */
#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH24
/* **************************************************************************
<MIT License>
Copyright (c) 2013 Marek Majkowski <marek@popcount.org>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
</MIT License>
Original location:
https://github.com/majek/csiphash/
Solution inspired by code from:
Samuel Neves (supercop/crypto_auth/siphash24/little)
djb (supercop/crypto_auth/siphash24/little2)
Jean-Philippe Aumasson (https://131002.net/siphash/siphash24.c)
Modified for Python by Christian Heimes:
- C89 / MSVC compatibility
- PY_UINT64_T, PY_UINT32_T and PY_UINT8_T
- _rotl64() on Windows
- letoh64() fallback
*/
typedef unsigned char PY_UINT8_T;
/* byte swap little endian to host endian
* Endian conversion not only ensures that the hash function returns the same
* value on all platforms. It is also required to for a good dispersion of
* the hash values' least significant bits.
*/
#if PY_LITTLE_ENDIAN
# define _le64toh(x) ((PY_UINT64_T)(x))
#elif defined(__APPLE__)
# define _le64toh(x) OSSwapLittleToHostInt64(x)
#elif defined(HAVE_LETOH64)
# define _le64toh(x) le64toh(x)
#else
# define _le64toh(x) (((PY_UINT64_T)(x) << 56) | \
(((PY_UINT64_T)(x) << 40) & 0xff000000000000ULL) | \
(((PY_UINT64_T)(x) << 24) & 0xff0000000000ULL) | \
(((PY_UINT64_T)(x) << 8) & 0xff00000000ULL) | \
(((PY_UINT64_T)(x) >> 8) & 0xff000000ULL) | \
(((PY_UINT64_T)(x) >> 24) & 0xff0000ULL) | \
(((PY_UINT64_T)(x) >> 40) & 0xff00ULL) | \
((PY_UINT64_T)(x) >> 56))
#endif
#ifdef _MSC_VER
# define ROTATE(x, b) _rotl64(x, b)
#else
# define ROTATE(x, b) (PY_UINT64_T)( ((x) << (b)) | ( (x) >> (64 - (b))) )
#endif
#define HALF_ROUND(a,b,c,d,s,t) \
a += b; c += d; \
b = ROTATE(b, s) ^ a; \
d = ROTATE(d, t) ^ c; \
a = ROTATE(a, 32);
#define DOUBLE_ROUND(v0,v1,v2,v3) \
HALF_ROUND(v0,v1,v2,v3,13,16); \
HALF_ROUND(v2,v1,v0,v3,17,21); \
HALF_ROUND(v0,v1,v2,v3,13,16); \
HALF_ROUND(v2,v1,v0,v3,17,21);
static Py_hash_t
siphash24(const void *src, Py_ssize_t src_sz) {
PY_UINT64_T k0 = _le64toh(_Py_HashSecret.siphash.k0);
PY_UINT64_T k1 = _le64toh(_Py_HashSecret.siphash.k1);
PY_UINT64_T b = (PY_UINT64_T)src_sz << 56;
const PY_UINT64_T *in = (PY_UINT64_T*)src;
PY_UINT64_T v0 = k0 ^ 0x736f6d6570736575ULL;
PY_UINT64_T v1 = k1 ^ 0x646f72616e646f6dULL;
PY_UINT64_T v2 = k0 ^ 0x6c7967656e657261ULL;
PY_UINT64_T v3 = k1 ^ 0x7465646279746573ULL;
PY_UINT64_T t;
PY_UINT8_T *pt;
PY_UINT8_T *m;
while (src_sz >= 8) {
PY_UINT64_T mi = _le64toh(*in);
in += 1;
src_sz -= 8;
v3 ^= mi;
DOUBLE_ROUND(v0,v1,v2,v3);
v0 ^= mi;
}
t = 0;
pt = (PY_UINT8_T *)&t;
m = (PY_UINT8_T *)in;
switch (src_sz) {
case 7: pt[6] = m[6];
case 6: pt[5] = m[5];
case 5: pt[4] = m[4];
case 4: *((PY_UINT32_T*)&pt[0]) = *((PY_UINT32_T*)&m[0]); break;
case 3: pt[2] = m[2];
case 2: pt[1] = m[1];
case 1: pt[0] = m[0];
}
b |= _le64toh(t);
v3 ^= b;
DOUBLE_ROUND(v0,v1,v2,v3);
v0 ^= b;
v2 ^= 0xff;
DOUBLE_ROUND(v0,v1,v2,v3);
DOUBLE_ROUND(v0,v1,v2,v3);
/* modified */
t = (v0 ^ v1) ^ (v2 ^ v3);
#if SIZEOF_VOID_P == 4
t ^= (t >> 32);
#endif
return (Py_hash_t)t;
}
static PyHash_FuncDef PyHash_Func = {siphash24, "siphash24", 64, 128};
#endif /* Py_HASH_ALGORITHM == Py_HASH_SIPHASH24 */
#ifdef __cplusplus
}
#endif

View File

@ -104,6 +104,7 @@ extern int _PyLong_Init(void);
extern void PyLong_Fini(void);
extern int _PyFaulthandler_Init(void);
extern void _PyFaulthandler_Fini(void);
extern void _PyHash_Fini(void);
#ifdef WITH_THREAD
extern void _PyGILState_Init(PyInterpreterState *, PyThreadState *);
@ -650,6 +651,8 @@ Py_Finalize(void)
#ifdef COUNT_ALLOCS
dump_counts(stdout);
#endif
/* dump hash stats */
_PyHash_Fini();
PRINT_TOTAL_REFS();

View File

@ -95,7 +95,7 @@ static int urandom_fd = -1;
/* Read size bytes from /dev/urandom into buffer.
Call Py_FatalError() on error. */
static void
dev_urandom_noraise(char *buffer, Py_ssize_t size)
dev_urandom_noraise(unsigned char *buffer, Py_ssize_t size)
{
int fd;
Py_ssize_t n;
@ -249,8 +249,9 @@ void
_PyRandom_Init(void)
{
char *env;
void *secret = &_Py_HashSecret;
unsigned char *secret = (unsigned char *)&_Py_HashSecret.uc;
Py_ssize_t secret_size = sizeof(_Py_HashSecret_t);
assert(secret_size == sizeof(_Py_HashSecret.uc));
if (_Py_HashSecret_Initialized)
return;
@ -278,17 +279,17 @@ _PyRandom_Init(void)
memset(secret, 0, secret_size);
}
else {
lcg_urandom(seed, (unsigned char*)secret, secret_size);
lcg_urandom(seed, secret, secret_size);
}
}
else {
#ifdef MS_WINDOWS
(void)win32_urandom((unsigned char *)secret, secret_size, 0);
(void)win32_urandom(secret, secret_size, 0);
#else /* #ifdef MS_WINDOWS */
# ifdef __VMS
vms_urandom((unsigned char *)secret, secret_size, 0);
vms_urandom(secret, secret_size, 0);
# else
dev_urandom_noraise((char*)secret, secret_size);
dev_urandom_noraise(secret, secret_size);
# endif
#endif
}

View File

@ -658,7 +658,7 @@ PyDoc_STRVAR(hash_info_doc,
"hash_info\n\
\n\
A struct sequence providing parameters used for computing\n\
numeric hashes. The attributes are read only.");
hashes. The attributes are read only.");
static PyStructSequence_Field hash_info_fields[] = {
{"width", "width of the type used for hashing, in bits"},
@ -667,6 +667,11 @@ static PyStructSequence_Field hash_info_fields[] = {
{"inf", "value to be used for hash of a positive infinity"},
{"nan", "value to be used for hash of a nan"},
{"imag", "multiplier used for the imaginary part of a complex number"},
{"algorithm", "name of the algorithm for hashing of str, bytes and "
"memoryviews"},
{"hash_bits", "internal output size of hash algorithm"},
{"seed_bits", "seed size of hash algorithm"},
{"cutoff", "small string optimization cutoff"},
{NULL, NULL}
};
@ -674,7 +679,7 @@ static PyStructSequence_Desc hash_info_desc = {
"sys.hash_info",
hash_info_doc,
hash_info_fields,
5,
9,
};
static PyObject *
@ -682,9 +687,11 @@ get_hash_info(void)
{
PyObject *hash_info;
int field = 0;
PyHash_FuncDef *hashfunc;
hash_info = PyStructSequence_New(&Hash_InfoType);
if (hash_info == NULL)
return NULL;
hashfunc = PyHash_GetFuncDef();
PyStructSequence_SET_ITEM(hash_info, field++,
PyLong_FromLong(8*sizeof(Py_hash_t)));
PyStructSequence_SET_ITEM(hash_info, field++,
@ -695,6 +702,14 @@ get_hash_info(void)
PyLong_FromLong(_PyHASH_NAN));
PyStructSequence_SET_ITEM(hash_info, field++,
PyLong_FromLong(_PyHASH_IMAG));
PyStructSequence_SET_ITEM(hash_info, field++,
PyUnicode_FromString(hashfunc->name));
PyStructSequence_SET_ITEM(hash_info, field++,
PyLong_FromLong(hashfunc->hash_bits));
PyStructSequence_SET_ITEM(hash_info, field++,
PyLong_FromLong(hashfunc->seed_bits));
PyStructSequence_SET_ITEM(hash_info, field++,
PyLong_FromLong(Py_HASH_CUTOFF));
if (PyErr_Occurred()) {
Py_CLEAR(hash_info);
return NULL;
@ -1338,6 +1353,7 @@ exec_prefix -- prefix used to find the machine-specific Python library\n\
executable -- absolute path of the executable binary of the Python interpreter\n\
float_info -- a struct sequence with information about the float implementation.\n\
float_repr_style -- string indicating the style of repr() output for floats\n\
hash_info -- a struct sequence with information about the hash algorithm.\n\
hexversion -- version information encoded as a single integer\n\
implementation -- Python implementation information.\n\
int_info -- a struct sequence with information about the int implementation.\n\

116
configure vendored
View File

@ -792,6 +792,7 @@ with_suffix
enable_shared
enable_profiling
with_pydebug
with_hash_algorithm
with_libs
with_system_expat
with_system_ffi
@ -1465,6 +1466,8 @@ Optional Packages:
compiler
--with-suffix=.exe set executable suffix
--with-pydebug build with Py_DEBUG defined
--with-hash-algorithm=[fnv|siphash24]
select hash algorithm
--with-libs='lib1 ...' link against additional libs
--with-system-expat build pyexpat module using an installed expat
library
@ -6956,7 +6959,8 @@ sys/param.h sys/select.h sys/sendfile.h sys/socket.h sys/statvfs.h \
sys/stat.h sys/syscall.h sys/sys_domain.h sys/termio.h sys/time.h \
sys/times.h sys/types.h sys/uio.h sys/un.h sys/utsname.h sys/wait.h pty.h \
libutil.h sys/resource.h netpacket/packet.h sysexits.h bluetooth.h \
bluetooth/bluetooth.h linux/tipc.h spawn.h util.h alloca.h
bluetooth/bluetooth.h linux/tipc.h spawn.h util.h alloca.h endian.h \
sys/endian.h
do :
as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"
@ -7330,6 +7334,43 @@ $as_echo "#define HAVE_MAKEDEV 1" >>confdefs.h
fi
# byte swapping
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for le64toh" >&5
$as_echo_n "checking for le64toh... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#ifdef HAVE_ENDIAN_H
#include <endian.h>
#elif defined(HAVE_SYS_ENDIAN_H)
#include <sys/endian.h>
#endif
int
main ()
{
le64toh(1)
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
ac_cv_has_le64toh=yes
else
ac_cv_has_le64toh=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_has_le64toh" >&5
$as_echo "$ac_cv_has_le64toh" >&6; }
if test "$ac_cv_has_le64toh" = "yes"; then
$as_echo "#define HAVE_HTOLE64 1" >>confdefs.h
fi
# Enabling LFS on Solaris (2.6 to 9) with gcc 2.95 triggers a bug in
# the system headers: If _XOPEN_SOURCE and _LARGEFILE_SOURCE are
# defined, but the compiler does not support pragma redefine_extname,
@ -8987,6 +9028,79 @@ rm -f core conftest.err conftest.$ac_objext \
*) ;;
esac
# check for systems that require aligned memory access
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking aligned memory access is required" >&5
$as_echo_n "checking aligned memory access is required... " >&6; }
if test "$cross_compiling" = yes; then :
aligned_required=yes
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
int main()
{
char s[16];
int i, *p1, *p2;
for (i=0; i < 16; i++)
s[i] = i;
p1 = (int*)(s+1);
p2 = (int*)(s+2);
if (*p1 == *p2)
return 1;
return 0;
}
_ACEOF
if ac_fn_c_try_run "$LINENO"; then :
aligned_required=no
else
aligned_required=yes
fi
rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
conftest.$ac_objext conftest.beam conftest.$ac_ext
fi
if test "$aligned_required" = yes ; then
$as_echo "#define HAVE_ALIGNED_REQUIRED 1" >>confdefs.h
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $aligned_required" >&5
$as_echo "$aligned_required" >&6; }
# str, bytes and memoryview hash algorithm
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for --with-hash-algorithm" >&5
$as_echo_n "checking for --with-hash-algorithm... " >&6; }
# Check whether --with-hash_algorithm was given.
if test "${with_hash_algorithm+set}" = set; then :
withval=$with_hash_algorithm;
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $withval" >&5
$as_echo "$withval" >&6; }
case "$withval" in
siphash24)
$as_echo "#define Py_HASH_ALGORITHM 1" >>confdefs.h
;;
fnv)
$as_echo "#define Py_HASH_ALGORITHM 2" >>confdefs.h
;;
*)
as_fn_error $? "unknown hash algorithm '$withval'" "$LINENO" 5
;;
esac
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: default" >&5
$as_echo "default" >&6; }
fi
# Most SVR4 platforms (e.g. Solaris) need -lsocket and -lnsl.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for t_open in -lnsl" >&5
$as_echo_n "checking for t_open in -lnsl... " >&6; }

View File

@ -1543,7 +1543,8 @@ sys/param.h sys/select.h sys/sendfile.h sys/socket.h sys/statvfs.h \
sys/stat.h sys/syscall.h sys/sys_domain.h sys/termio.h sys/time.h \
sys/times.h sys/types.h sys/uio.h sys/un.h sys/utsname.h sys/wait.h pty.h \
libutil.h sys/resource.h netpacket/packet.h sysexits.h bluetooth.h \
bluetooth/bluetooth.h linux/tipc.h spawn.h util.h alloca.h)
bluetooth/bluetooth.h linux/tipc.h spawn.h util.h alloca.h endian.h \
sys/endian.h)
CPPFLAGS=$ac_save_cppflags
AC_HEADER_DIRENT
AC_HEADER_MAJOR
@ -1614,6 +1615,22 @@ if test "$ac_cv_has_makedev" = "yes"; then
AC_DEFINE(HAVE_MAKEDEV, 1, [Define this if you have the makedev macro.])
fi
# byte swapping
AC_MSG_CHECKING(for le64toh)
AC_LINK_IFELSE([AC_LANG_PROGRAM([[
#ifdef HAVE_ENDIAN_H
#include <endian.h>
#elif defined(HAVE_SYS_ENDIAN_H)
#include <sys/endian.h>
#endif
]], [[
le64toh(1) ]])
],[ac_cv_has_le64toh=yes],[ac_cv_has_le64toh=no])
AC_MSG_RESULT($ac_cv_has_le64toh)
if test "$ac_cv_has_le64toh" = "yes"; then
AC_DEFINE(HAVE_HTOLE64, 1, [Define this if you have le64toh()])
fi
# Enabling LFS on Solaris (2.6 to 9) with gcc 2.95 triggers a bug in
# the system headers: If _XOPEN_SOURCE and _LARGEFILE_SOURCE are
# defined, but the compiler does not support pragma redefine_extname,
@ -2229,6 +2246,59 @@ case "$ac_sys_system" in
*) ;;
esac
# check for systems that require aligned memory access
AC_MSG_CHECKING(aligned memory access is required)
AC_TRY_RUN([
int main()
{
char s[16];
int i, *p1, *p2;
for (i=0; i < 16; i++)
s[i] = i;
p1 = (int*)(s+1);
p2 = (int*)(s+2);
if (*p1 == *p2)
return 1;
return 0;
}
],
[aligned_required=no],
[aligned_required=yes],
[aligned_required=yes])
if test "$aligned_required" = yes ; then
AC_DEFINE([HAVE_ALIGNED_REQUIRED], [1],
[Define if aligned memory access is required])
fi
AC_MSG_RESULT($aligned_required)
# str, bytes and memoryview hash algorithm
AH_TEMPLATE(Py_HASH_ALGORITHM,
[Define hash algorithm for str, bytes and memoryview.
SipHash24: 1, FNV: 2, externally defined: 0])
AC_MSG_CHECKING(for --with-hash-algorithm)
dnl quadrigraphs "@<:@" and "@:>@" produce "[" and "]" in the output
AC_ARG_WITH(hash_algorithm,
AS_HELP_STRING([--with-hash-algorithm=@<:@fnv|siphash24@:>@],
[select hash algorithm]),
[
AC_MSG_RESULT($withval)
case "$withval" in
siphash24)
AC_DEFINE(Py_HASH_ALGORITHM, 1)
;;
fnv)
AC_DEFINE(Py_HASH_ALGORITHM, 2)
;;
*)
AC_MSG_ERROR([unknown hash algorithm '$withval'])
;;
esac
],
[AC_MSG_RESULT(default)])
# Most SVR4 platforms (e.g. Solaris) need -lsocket and -lnsl.
AC_CHECK_LIB(nsl, t_open, [LIBS="-lnsl $LIBS"]) # SVR4
AC_CHECK_LIB(socket, socket, [LIBS="-lsocket $LIBS"], [], $LIBS) # SVR4 sockets

View File

@ -49,6 +49,9 @@
/* Define to 1 if you have the `alarm' function. */
#undef HAVE_ALARM
/* Define if aligned memory access is required */
#undef HAVE_ALIGNED_REQUIRED
/* Define to 1 if you have the <alloca.h> header file. */
#undef HAVE_ALLOCA_H
@ -199,6 +202,9 @@
/* Defined when any dynamic module loading is enabled. */
#undef HAVE_DYNAMIC_LOADING
/* Define to 1 if you have the <endian.h> header file. */
#undef HAVE_ENDIAN_H
/* Define if you have the 'epoll' functions. */
#undef HAVE_EPOLL
@ -408,6 +414,9 @@
/* Define if you have the 'hstrerror' function. */
#undef HAVE_HSTRERROR
/* Define this if you have le64toh() */
#undef HAVE_HTOLE64
/* Define to 1 if you have the `hypot' function. */
#undef HAVE_HYPOT
@ -927,6 +936,9 @@
*/
#undef HAVE_SYS_DIR_H
/* Define to 1 if you have the <sys/endian.h> header file. */
#undef HAVE_SYS_ENDIAN_H
/* Define to 1 if you have the <sys/epoll.h> header file. */
#undef HAVE_SYS_EPOLL_H
@ -1193,6 +1205,10 @@
/* Defined if Python is built as a shared library. */
#undef Py_ENABLE_SHARED
/* Define hash algorithm for str, bytes and memoryview. SipHash24: 1, FNV: 2,
externally defined: 0 */
#undef Py_HASH_ALGORITHM
/* assume C89 semantics that RETSIGTYPE is always void */
#undef RETSIGTYPE