diff --git a/Doc/library/hashlib.rst b/Doc/library/hashlib.rst index d5531c4a755..3bf30bb22e3 100644 --- a/Doc/library/hashlib.rst +++ b/Doc/library/hashlib.rst @@ -32,6 +32,10 @@ digests. The modern term is secure hash. Some algorithms have known hash collision weaknesses, refer to the "See also" section at the end. + +Hash algorithms +--------------- + There is one constructor method named for each type of :dfn:`hash`. All return a hash object with the same simple interface. For example: use :func:`sha1` to create a SHA1 hash object. You can now feed this object with :term:`bytes-like @@ -174,6 +178,43 @@ A hash object has the following methods: compute the digests of data sharing a common initial substring. +Key Derivation Function +----------------------- + +Key derivation and key stretching algorithms are designed for secure password +hashing. Naive algorithms such as ``sha1(password)`` are not resistant +against brute-force attacks. A good password hashing function must be tunable, +slow and include a salt. + + +.. function:: pbkdf2_hmac(name, password, salt, rounds, dklen=None) + + The function provides PKCS#5 password-based key derivation function 2. It + uses HMAC as pseudorandom function. + + The string *name* is the desired name of the hash digest algorithm for + HMAC, e.g. 'sha1' or 'sha256'. *password* and *salt* are interpreted as + buffers of bytes. Applications and libraries should limit *password* to + a sensible value (e.g. 1024). *salt* should be about 16 or more bytes from + a proper source, e.g. :func:`os.urandom`. + + The number of *rounds* should be chosen based on the hash algorithm and + computing power. As of 2013 a value of at least 100,000 rounds of SHA-256 + have been suggested. + + *dklen* is the length of the derived key. If *dklen* is ``None`` then the + digest size of the hash algorithm *name* is used, e.g. 64 for SHA-512. + + >>> import hashlib, binascii + >>> dk = hashlib.pbkdf2_hmac('sha256', b'password', b'salt', 100000) + >>> binascii.hexlify(dk) + b'0394a2ede332c9a13eb82e9b24631604c31df978b4e2f0fbd2c549944f9d79a5' + + .. versionadded:: 3.4 + + .. note:: *pbkdf2_hmac* is only available with OpenSSL 1.0 and newer. + + .. seealso:: Module :mod:`hmac` @@ -189,3 +230,5 @@ A hash object has the following methods: Wikipedia article with information on which algorithms have known issues and what that means regarding their use. + http://www.ietf.org/rfc/rfc2898.txt + PKCS #5: Password-Based Cryptography Specification Version 2.0 diff --git a/Doc/whatsnew/3.4.rst b/Doc/whatsnew/3.4.rst index 0e227150acc..50eef8432e2 100644 --- a/Doc/whatsnew/3.4.rst +++ b/Doc/whatsnew/3.4.rst @@ -261,6 +261,14 @@ functools New :func:`functools.singledispatch` decorator: see the :pep:`443`. +hashlib +------- + +New :func:`hashlib.pbkdf2_hmac` function. + +(Contributed by Christian Heimes in :issue:`18582`) + + inspect ------- diff --git a/Lib/hashlib.py b/Lib/hashlib.py index a1bd8b2de8c..73882d1872b 100644 --- a/Lib/hashlib.py +++ b/Lib/hashlib.py @@ -147,6 +147,14 @@ except ImportError: new = __py_new __get_hash = __get_builtin_constructor +# PBKDF2 requires OpenSSL 1.0+ with HMAC and SHA +try: + from _hashlib import pbkdf2_hmac +except ImportError: + pass +else: + __all__ += ('pbkdf2_hmac',) + for __func_name in __always_supported: # try them all, some may not work due to the OpenSSL # version not supporting that algorithm. diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index 6d5c5a48428..3eadee1cc7f 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -545,6 +545,88 @@ class HashLibTestCase(unittest.TestCase): self.assertEqual(expected_hash, hasher.hexdigest()) + pbkdf2_test_vectors = [ + (b'password', b'salt', 1, None), + (b'password', b'salt', 2, None), + (b'password', b'salt', 4096, None), + # too slow, it takes over a minute on a fast CPU. + #(b'password', b'salt', 16777216, None), + (b'passwordPASSWORDpassword', b'saltSALTsaltSALTsaltSALTsaltSALTsalt', + 4096, -1), + (b'pass\0word', b'sa\0lt', 4096, 16), + ] + + pbkdf2_results = { + "sha1": [ + # offical test vectors from RFC 6070 + (bytes.fromhex('0c60c80f961f0e71f3a9b524af6012062fe037a6'), None), + (bytes.fromhex('ea6c014dc72d6f8ccd1ed92ace1d41f0d8de8957'), None), + (bytes.fromhex('4b007901b765489abead49d926f721d065a429c1'), None), + #(bytes.fromhex('eefe3d61cd4da4e4e9945b3d6ba2158c2634e984'), None), + (bytes.fromhex('3d2eec4fe41c849b80c8d83662c0e44a8b291a964c' + 'f2f07038'), 25), + (bytes.fromhex('56fa6aa75548099dcc37d7f03425e0c3'), None),], + "sha256": [ + (bytes.fromhex('120fb6cffcf8b32c43e7225256c4f837' + 'a86548c92ccc35480805987cb70be17b'), None), + (bytes.fromhex('ae4d0c95af6b46d32d0adff928f06dd0' + '2a303f8ef3c251dfd6e2d85a95474c43'), None), + (bytes.fromhex('c5e478d59288c841aa530db6845c4c8d' + '962893a001ce4e11a4963873aa98134a'), None), + #(bytes.fromhex('cf81c66fe8cfc04d1f31ecb65dab4089' + # 'f7f179e89b3b0bcb17ad10e3ac6eba46'), None), + (bytes.fromhex('348c89dbcbd32b2f32d814b8116e84cf2b17' + '347ebc1800181c4e2a1fb8dd53e1c635518c7dac47e9'), 40), + (bytes.fromhex('89b69d0516f829893c696226650a8687'), None),], + "sha512": [ + (bytes.fromhex('867f70cf1ade02cff3752599a3a53dc4af34c7a669815ae5' + 'd513554e1c8cf252c02d470a285a0501bad999bfe943c08f' + '050235d7d68b1da55e63f73b60a57fce'), None), + (bytes.fromhex('e1d9c16aa681708a45f5c7c4e215ceb66e011a2e9f004071' + '3f18aefdb866d53cf76cab2868a39b9f7840edce4fef5a82' + 'be67335c77a6068e04112754f27ccf4e'), None), + (bytes.fromhex('d197b1b33db0143e018b12f3d1d1479e6cdebdcc97c5c0f8' + '7f6902e072f457b5143f30602641b3d55cd335988cb36b84' + '376060ecd532e039b742a239434af2d5'), None), + (bytes.fromhex('8c0511f4c6e597c6ac6315d8f0362e225f3c501495ba23b8' + '68c005174dc4ee71115b59f9e60cd9532fa33e0f75aefe30' + '225c583a186cd82bd4daea9724a3d3b8'), 64), + (bytes.fromhex('9d9e9c4cd21fe4be24d5b8244c759665'), None),], + } + + @unittest.skipUnless(hasattr(hashlib, 'pbkdf2_hmac'), + 'pbkdf2_hmac required for this test.') + def test_pbkdf2_hmac(self): + pbkdf2 = hashlib.pbkdf2_hmac + + for digest_name, results in self.pbkdf2_results.items(): + for i, vector in enumerate(self.pbkdf2_test_vectors): + password, salt, rounds, dklen = vector + expected, overwrite_dklen = results[i] + if overwrite_dklen: + dklen = overwrite_dklen + out = pbkdf2(digest_name, password, salt, rounds, dklen) + self.assertEqual(out, expected, + (digest_name, password, salt, rounds, dklen)) + out = pbkdf2(digest_name, memoryview(password), + memoryview(salt), rounds, dklen) + out = pbkdf2(digest_name, bytearray(password), + bytearray(salt), rounds, dklen) + self.assertEqual(out, expected) + if dklen is None: + out = pbkdf2(digest_name, password, salt, rounds) + self.assertEqual(out, expected, + (digest_name, password, salt, rounds)) + + self.assertRaises(TypeError, pbkdf2, b'sha1', b'pass', b'salt', 1) + self.assertRaises(TypeError, pbkdf2, 'sha1', 'pass', 'salt', 1) + self.assertRaises(ValueError, pbkdf2, 'sha1', b'pass', b'salt', 0) + self.assertRaises(ValueError, pbkdf2, 'sha1', b'pass', b'salt', -1) + self.assertRaises(ValueError, pbkdf2, 'sha1', b'pass', b'salt', 1, 0) + self.assertRaises(ValueError, pbkdf2, 'sha1', b'pass', b'salt', 1, -1) + with self.assertRaisesRegex(ValueError, 'unsupported hash type'): + pbkdf2('unknown', b'pass', b'salt', 1) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS b/Misc/NEWS index b94989f1433..68df3590dd0 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -42,6 +42,9 @@ Core and Builtins Library ------- +- Issue #18582: Add 'pbkdf2_hmac' to the hashlib module. It implements PKCS#5 + password-based key derivation functions with HMAC as pseudorandom function. + - Issue #19131: The aifc module now correctly reads and writes sampwidth of compressed streams. diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index c8db5ed2c86..bec84ce926d 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -22,6 +22,7 @@ #include /* We use the object interface to discover what hashes OpenSSL supports. */ #include +#include "openssl/err.h" #define MUNCH_SIZE INT_MAX @@ -61,6 +62,34 @@ DEFINE_CONSTS_FOR_NEW(sha384) DEFINE_CONSTS_FOR_NEW(sha512) #endif +static PyObject * +_setException(PyObject *exc) +{ + unsigned long errcode; + const char *lib, *func, *reason; + + errcode = ERR_peek_last_error(); + if (!errcode) { + PyErr_SetString(exc, "unknown reasons"); + return NULL; + } + ERR_clear_error(); + + lib = ERR_lib_error_string(errcode); + func = ERR_func_error_string(errcode); + reason = ERR_reason_error_string(errcode); + + if (lib && func) { + PyErr_Format(exc, "[%s: %s] %s", lib, func, reason); + } + else if (lib) { + PyErr_Format(exc, "[%s] %s", lib, reason); + } + else { + PyErr_SetString(exc, reason); + } + return NULL; +} static EVPobject * newEVPobject(PyObject *name) @@ -466,6 +495,109 @@ EVP_new(PyObject *self, PyObject *args, PyObject *kwdict) return ret_obj; } +#if (OPENSSL_VERSION_NUMBER >= 0x10000000 && !defined(OPENSSL_NO_HMAC) \ + && !defined(OPENSSL_NO_SHA)) +#define PY_PBKDF2_HMAC 1 + +PyDoc_STRVAR(pbkdf2_hmac__doc__, +"pbkdf2_hmac(hash_name, password, salt, iterations, dklen=None) -> key\n\ +\n\ +Password based key derivation function 2 (PKCS #5 v2.0) with HMAC as\n\ +pseudorandom function."); + +static PyObject * +pbkdf2_hmac(PyObject *self, PyObject *args, PyObject *kwdict) +{ + static char *kwlist[] = {"hash_name", "password", "salt", "iterations", + "dklen", NULL}; + PyObject *key_obj = NULL, *dklen_obj = Py_None; + char *name, *key; + Py_buffer password, salt; + long iterations, dklen; + int retval; + const EVP_MD *digest; + + if (!PyArg_ParseTupleAndKeywords(args, kwdict, "sy*y*l|O:pbkdf2_hmac", + kwlist, &name, &password, &salt, + &iterations, &dklen_obj)) { + return NULL; + } + + digest = EVP_get_digestbyname(name); + if (digest == NULL) { + PyErr_SetString(PyExc_ValueError, "unsupported hash type"); + goto end; + } + + if (password.len > INT_MAX) { + PyErr_SetString(PyExc_OverflowError, + "password is too long."); + goto end; + } + + if (salt.len > INT_MAX) { + PyErr_SetString(PyExc_OverflowError, + "salt is too long."); + goto end; + } + + if (iterations < 1) { + PyErr_SetString(PyExc_ValueError, + "iteration value must be greater than 0."); + goto end; + } + if (iterations > INT_MAX) { + PyErr_SetString(PyExc_OverflowError, + "iteration value is too great."); + goto end; + } + + if (dklen_obj == Py_None) { + dklen = EVP_MD_size(digest); + } else { + dklen = PyLong_AsLong(dklen_obj); + if ((dklen == -1) && PyErr_Occurred()) { + goto end; + } + } + if (dklen < 1) { + PyErr_SetString(PyExc_ValueError, + "key length must be greater than 0."); + goto end; + } + if (dklen > INT_MAX) { + /* INT_MAX is always smaller than dkLen max (2^32 - 1) * hLen */ + PyErr_SetString(PyExc_OverflowError, + "key length is too great."); + goto end; + } + + key_obj = PyBytes_FromStringAndSize(NULL, dklen); + if (key_obj == NULL) { + goto end; + } + key = PyBytes_AS_STRING(key_obj); + + Py_BEGIN_ALLOW_THREADS + retval = PKCS5_PBKDF2_HMAC((char*)password.buf, password.len, + (unsigned char *)salt.buf, salt.len, + iterations, digest, dklen, + (unsigned char *)key); + Py_END_ALLOW_THREADS + + if (!retval) { + Py_CLEAR(key_obj); + _setException(PyExc_ValueError); + goto end; + } + + end: + PyBuffer_Release(&password); + PyBuffer_Release(&salt); + return key_obj; +} + +#endif /* State for our callback function so that it can accumulate a result. */ typedef struct _internal_name_mapper_state { @@ -588,6 +720,10 @@ GEN_CONSTRUCTOR(sha512) static struct PyMethodDef EVP_functions[] = { {"new", (PyCFunction)EVP_new, METH_VARARGS|METH_KEYWORDS, EVP_new__doc__}, +#ifdef PY_PBKDF2_HMAC + {"pbkdf2_hmac", (PyCFunction)pbkdf2_hmac, METH_VARARGS|METH_KEYWORDS, + pbkdf2_hmac__doc__}, +#endif CONSTRUCTOR_METH_DEF(md5), CONSTRUCTOR_METH_DEF(sha1), #ifdef _OPENSSL_SUPPORTS_SHA2