diff --git a/Doc/library/hashlib.rst b/Doc/library/hashlib.rst index 73e6e4ea100..b7b13710ab7 100644 --- a/Doc/library/hashlib.rst +++ b/Doc/library/hashlib.rst @@ -95,6 +95,12 @@ A hash object has the following methods: a single call with the concatenation of all the arguments: ``m.update(a); m.update(b)`` is equivalent to ``m.update(a+b)``. + .. versionchanged:: 2.7 + + The Python GIL is released to allow other threads to run while + hash updates on data larger than 2048 bytes is taking place when + using hash algorithms supplied by OpenSSL. + .. method:: hash.digest() diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index e7ce1984869..4ba07b1cea4 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -2,11 +2,16 @@ # # $Id$ # -# Copyright (C) 2005 Gregory P. Smith (greg@krypto.org) +# Copyright (C) 2005-2009 Gregory P. Smith (greg@krypto.org) # Licensed to PSF under a Contributor Agreement. # import hashlib +import StringIO +try: + import threading +except ImportError: + threading = None import unittest from test import test_support from test.test_support import _4G, precisionbigmemtest @@ -61,10 +66,10 @@ class HashLibTestCase(unittest.TestCase): def check(self, name, data, digest): # test the direct constructors computed = getattr(hashlib, name)(data).hexdigest() - self.assert_(computed == digest) + self.assertEqual(computed, digest) # test the general new() interface computed = hashlib.new(name, data).hexdigest() - self.assert_(computed == digest) + self.assertEqual(computed, digest) def check_no_unicode(self, algorithm_name): # Unicode objects are not allowed as input. @@ -211,6 +216,44 @@ class HashLibTestCase(unittest.TestCase): "e718483d0ce769644e2e42c7bc15b4638e1f98b13b2044285632a803afa973eb"+ "de0ff244877ea60a4cb0432ce577c31beb009c5c2c49aa2e4eadb217ad8cc09b") + def test_threaded_hashing(self): + if not threading: + raise unittest.SkipTest('No threading module.') + + # Updating the same hash object from several threads at once + # using data chunk sizes containing the same byte sequences. + # + # If the internal locks are working to prevent multiple + # updates on the same object from running at once, the resulting + # hash will be the same as doing it single threaded upfront. + hasher = hashlib.sha1() + num_threads = 5 + smallest_data = 'swineflu' + data = smallest_data*200000 + expected_hash = hashlib.sha1(data*num_threads).hexdigest() + + def hash_in_chunks(chunk_size, event): + index = 0 + while index < len(data): + hasher.update(data[index:index+chunk_size]) + index += chunk_size + event.set() + + events = [] + for threadnum in xrange(num_threads): + chunk_size = len(data) // (10**threadnum) + assert chunk_size > 0 + assert chunk_size % len(smallest_data) == 0 + event = threading.Event() + events.append(event) + threading.Thread(target=hash_in_chunks, + args=(chunk_size, event)).start() + + for event in events: + event.wait() + + self.assertEqual(expected_hash, hasher.hexdigest()) + def test_main(): test_support.run_unittest(HashLibTestCase) diff --git a/Misc/NEWS b/Misc/NEWS index 64e473ac589..e372a623609 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -905,6 +905,9 @@ C-API Extension Modules ----------------- +- Issue #4751: For hashlib algorithms provided by OpenSSL, the Python + GIL is now released during computation on data lengths >= 2048 bytes. + - Issue #3745: Fix hashlib to always reject unicode and non buffer-api supporting objects as input no matter how it was compiled (built in implementations or external openssl library). diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 7b5a2e52bc8..8dbaa208e49 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -1,7 +1,7 @@ /* Module that wraps all OpenSSL hash algorithms */ /* - * Copyright (C) 2005-2007 Gregory P. Smith (greg@krypto.org) + * Copyright (C) 2005-2009 Gregory P. Smith (greg@krypto.org) * Licensed to PSF under a Contributor Agreement. * * Derived from a skeleton of shamodule.c containing work performed by: @@ -17,25 +17,49 @@ #include "structmember.h" #include "hashlib.h" +#ifdef WITH_THREAD +#include "pythread.h" + #define ENTER_HASHLIB(obj) \ + if ((obj)->lock) \ + { \ + if (!PyThread_acquire_lock((obj)->lock, 0)) \ + { \ + Py_BEGIN_ALLOW_THREADS \ + PyThread_acquire_lock((obj)->lock, 1); \ + Py_END_ALLOW_THREADS \ + } \ + } + #define LEAVE_HASHLIB(obj) \ + if ((obj)->lock) \ + { \ + PyThread_release_lock((obj)->lock); \ + } +#else + #define ENTER_HASHLIB(obj) + #define LEAVE_HASHLIB(obj) +#endif + /* EVP is the preferred interface to hashing in OpenSSL */ #include #define MUNCH_SIZE INT_MAX +/* TODO(gps): We should probably make this a module or EVPobject attribute + * to allow the user to optimize based on the platform they're using. */ +#define HASHLIB_GIL_MINSIZE 2048 #ifndef HASH_OBJ_CONSTRUCTOR #define HASH_OBJ_CONSTRUCTOR 0 #endif + typedef struct { PyObject_HEAD PyObject *name; /* name of this hash algorithm */ EVP_MD_CTX ctx; /* OpenSSL message digest context */ - /* - * TODO investigate performance impact of including a lock for this object - * here and releasing the Python GIL while hash updates are in progress. - * (perhaps only release GIL if input length will take long to process?) - */ +#ifdef WITH_THREAD + PyThread_type_lock lock; /* OpenSSL context lock */ +#endif } EVPobject; @@ -64,26 +88,57 @@ newEVPobject(PyObject *name) if (retval != NULL) { Py_INCREF(name); retval->name = name; +#ifdef WITH_THREAD + retval->lock = NULL; +#endif } return retval; } +static void +EVP_hash(EVPobject *self, const void *vp, Py_ssize_t len) +{ + unsigned int process; + const unsigned char *cp = (const unsigned char *)vp; + while (0 < len) + { + if (len > (Py_ssize_t)MUNCH_SIZE) + process = MUNCH_SIZE; + else + process = Py_SAFE_DOWNCAST(len, Py_ssize_t, unsigned int); + EVP_DigestUpdate(&self->ctx, (const void*)cp, process); + len -= process; + cp += process; + } +} + /* Internal methods for a hash object */ static void -EVP_dealloc(PyObject *ptr) +EVP_dealloc(EVPobject *self) { - EVP_MD_CTX_cleanup(&((EVPobject *)ptr)->ctx); - Py_XDECREF(((EVPobject *)ptr)->name); - PyObject_Del(ptr); +#ifdef WITH_THREAD + if (self->lock != NULL) + PyThread_free_lock(self->lock); +#endif + EVP_MD_CTX_cleanup(&self->ctx); + Py_XDECREF(self->name); + PyObject_Del(self); } +static void locked_EVP_MD_CTX_copy(EVP_MD_CTX *new_ctx_p, EVPobject *self) +{ + ENTER_HASHLIB(self); + EVP_MD_CTX_copy(new_ctx_p, &self->ctx); + LEAVE_HASHLIB(self); +} /* External methods for a hash object */ PyDoc_STRVAR(EVP_copy__doc__, "Return a copy of the hash object."); + static PyObject * EVP_copy(EVPobject *self, PyObject *unused) { @@ -92,7 +147,7 @@ EVP_copy(EVPobject *self, PyObject *unused) if ( (newobj = newEVPobject(self->name))==NULL) return NULL; - EVP_MD_CTX_copy(&newobj->ctx, &self->ctx); + locked_EVP_MD_CTX_copy(&newobj->ctx, self); return (PyObject *)newobj; } @@ -107,7 +162,7 @@ EVP_digest(EVPobject *self, PyObject *unused) PyObject *retval; unsigned int digest_size; - EVP_MD_CTX_copy(&temp_ctx, &self->ctx); + locked_EVP_MD_CTX_copy(&temp_ctx, self); digest_size = EVP_MD_CTX_size(&temp_ctx); EVP_DigestFinal(&temp_ctx, digest, NULL); @@ -129,7 +184,7 @@ EVP_hexdigest(EVPobject *self, PyObject *unused) unsigned int i, j, digest_size; /* Get the raw (binary) digest value */ - EVP_MD_CTX_copy(&temp_ctx, &self->ctx); + locked_EVP_MD_CTX_copy(&temp_ctx, self); digest_size = EVP_MD_CTX_size(&temp_ctx); EVP_DigestFinal(&temp_ctx, digest, NULL); @@ -174,20 +229,27 @@ EVP_update(EVPobject *self, PyObject *args) GET_BUFFER_VIEW_OR_ERROUT(obj, &view, NULL); - if (view.len > 0 && view.len <= MUNCH_SIZE) { - EVP_DigestUpdate(&self->ctx, (unsigned char*)view.buf, - Py_SAFE_DOWNCAST(view.len, Py_ssize_t, unsigned int)); - } else { - Py_ssize_t len = view.len; - unsigned char *cp = (unsigned char *)view.buf; - while (len > 0) { - unsigned int process = len > MUNCH_SIZE ? MUNCH_SIZE : len; - EVP_DigestUpdate(&self->ctx, cp, process); - len -= process; - cp += process; - } +#ifdef WITH_THREAD + if (self->lock == NULL && view.len >= HASHLIB_GIL_MINSIZE) + { + self->lock = PyThread_allocate_lock(); + /* fail? lock = NULL and we fail over to non-threaded code. */ } + if (self->lock != NULL) + { + Py_BEGIN_ALLOW_THREADS + PyThread_acquire_lock(self->lock, 1); + EVP_hash(self, view.buf, view.len); + PyThread_release_lock(self->lock); + Py_END_ALLOW_THREADS + } else { + EVP_hash(self, view.buf, view.len); + } +#else + EVP_hash(self, view.buf, view.len); +#endif + PyBuffer_Release(&view); Py_INCREF(Py_None); @@ -205,13 +267,17 @@ static PyMethodDef EVP_methods[] = { static PyObject * EVP_get_block_size(EVPobject *self, void *closure) { - return PyInt_FromLong(EVP_MD_CTX_block_size(&((EVPobject *)self)->ctx)); + long block_size; + block_size = EVP_MD_CTX_block_size(&self->ctx); + return PyLong_FromLong(block_size); } static PyObject * EVP_get_digest_size(EVPobject *self, void *closure) { - return PyInt_FromLong(EVP_MD_CTX_size(&((EVPobject *)self)->ctx)); + long size; + size = EVP_MD_CTX_size(&self->ctx); + return PyLong_FromLong(size); } static PyMemberDef EVP_members[] = { @@ -286,19 +352,14 @@ EVP_tp_init(EVPobject *self, PyObject *args, PyObject *kwds) Py_INCREF(self->name); if (data_obj) { - if (view.len > 0 && view.len <= MUNCH_SIZE) { - EVP_DigestUpdate(&self->ctx, (unsigned char*)view.buf, - Py_SAFE_DOWNCAST(view.len, Py_ssize_t, unsigned int)); + if (view.len >= HASHLIB_GIL_MINSIZE) + { + Py_BEGIN_ALLOW_THREADS + EVP_hash(self, view.buf, view.len); + Py_END_ALLOW_THREADS } else { - Py_ssize_t len = view.len; - unsigned char *cp = (unsigned char*)view.buf; - while (len > 0) { - unsigned int process = len > MUNCH_SIZE ? MUNCH_SIZE : len; - EVP_DigestUpdate(&self->ctx, cp, process); - len -= process; - cp += process; - } - } + EVP_hash(self, view.buf, view.len); + } PyBuffer_Release(&view); } @@ -329,7 +390,7 @@ static PyTypeObject EVPtype = { sizeof(EVPobject), /*tp_basicsize*/ 0, /*tp_itemsize*/ /* methods */ - EVP_dealloc, /*tp_dealloc*/ + (destructor)EVP_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ @@ -389,17 +450,13 @@ EVPnew(PyObject *name_obj, } if (cp && len) { - if (len > 0 && len <= MUNCH_SIZE) { - EVP_DigestUpdate(&self->ctx, cp, Py_SAFE_DOWNCAST(len, Py_ssize_t, - unsigned int)); + if (len >= HASHLIB_GIL_MINSIZE) + { + Py_BEGIN_ALLOW_THREADS + EVP_hash(self, cp, len); + Py_END_ALLOW_THREADS } else { - Py_ssize_t offset = 0; - while (len > 0) { - unsigned int process = len > MUNCH_SIZE ? MUNCH_SIZE : len; - EVP_DigestUpdate(&self->ctx, cp + offset, process); - len -= process; - offset += process; - } + EVP_hash(self, cp, len); } }