Merge refactoring I did when committing r72267 to trunk into the

already committed issue4751 support in py3k r68411.
This commit is contained in:
Gregory P. Smith 2009-05-04 00:45:33 +00:00
parent c1651a0b96
commit 3f61d61b35
3 changed files with 78 additions and 34 deletions

View File

@ -105,6 +105,12 @@ A hash object has the following methods:
concatenation of all the arguments: ``m.update(a); m.update(b)`` is concatenation of all the arguments: ``m.update(a); m.update(b)`` is
equivalent to ``m.update(a+b)``. equivalent to ``m.update(a+b)``.
.. versionchanged:: 2.7
The Python GIL is released to allow other threads to run while
hash updates on data larger than 2048 bytes is taking place when
using hash algorithms supplied by OpenSSL.
.. method:: hash.digest() .. method:: hash.digest()

View File

@ -2,11 +2,16 @@
# #
# $Id$ # $Id$
# #
# Copyright (C) 2005 Gregory P. Smith (greg@krypto.org) # Copyright (C) 2005-2009 Gregory P. Smith (greg@krypto.org)
# Licensed to PSF under a Contributor Agreement. # Licensed to PSF under a Contributor Agreement.
# #
import hashlib import hashlib
from io import StringIO
try:
import threading
except ImportError:
threading = None
import unittest import unittest
from test import support from test import support
from test.support import _4G, precisionbigmemtest from test.support import _4G, precisionbigmemtest
@ -224,6 +229,45 @@ class HashLibTestCase(unittest.TestCase):
m = hashlib.md5(b'x' * gil_minsize) m = hashlib.md5(b'x' * gil_minsize)
self.assertEquals(m.hexdigest(), 'cfb767f225d58469c5de3632a8803958') self.assertEquals(m.hexdigest(), 'cfb767f225d58469c5de3632a8803958')
def test_threaded_hashing(self):
if not threading:
raise unittest.SkipTest('No threading module.')
# Updating the same hash object from several threads at once
# using data chunk sizes containing the same byte sequences.
#
# If the internal locks are working to prevent multiple
# updates on the same object from running at once, the resulting
# hash will be the same as doing it single threaded upfront.
hasher = hashlib.sha1()
num_threads = 5
smallest_data = b'swineflu'
data = smallest_data*200000
expected_hash = hashlib.sha1(data*num_threads).hexdigest()
def hash_in_chunks(chunk_size, event):
index = 0
while index < len(data):
hasher.update(data[index:index+chunk_size])
index += chunk_size
event.set()
events = []
for threadnum in range(num_threads):
chunk_size = len(data) // (10**threadnum)
assert chunk_size > 0
assert chunk_size % len(smallest_data) == 0
event = threading.Event()
events.append(event)
threading.Thread(target=hash_in_chunks,
args=(chunk_size, event)).start()
for event in events:
event.wait()
self.assertEqual(expected_hash, hasher.hexdigest())
def test_main(): def test_main():
support.run_unittest(HashLibTestCase) support.run_unittest(HashLibTestCase)

View File

@ -1,7 +1,7 @@
/* Module that wraps all OpenSSL hash algorithms */ /* Module that wraps all OpenSSL hash algorithms */
/* /*
* Copyright (C) 2005-2007 Gregory P. Smith (greg@krypto.org) * Copyright (C) 2005-2009 Gregory P. Smith (greg@krypto.org)
* Licensed to PSF under a Contributor Agreement. * Licensed to PSF under a Contributor Agreement.
* *
* Derived from a skeleton of shamodule.c containing work performed by: * Derived from a skeleton of shamodule.c containing work performed by:
@ -17,21 +17,8 @@
#include "structmember.h" #include "structmember.h"
#include "hashlib.h" #include "hashlib.h"
/* EVP is the preferred interface to hashing in OpenSSL */
#include <openssl/evp.h>
#define MUNCH_SIZE INT_MAX
#ifndef HASH_OBJ_CONSTRUCTOR
#define HASH_OBJ_CONSTRUCTOR 0
#endif
#define HASHLIB_GIL_MINSIZE 2048
#ifdef WITH_THREAD #ifdef WITH_THREAD
#include "pythread.h" #include "pythread.h"
#define ENTER_HASHLIB(obj) \ #define ENTER_HASHLIB(obj) \
if ((obj)->lock) { \ if ((obj)->lock) { \
if (!PyThread_acquire_lock((obj)->lock, 0)) { \ if (!PyThread_acquire_lock((obj)->lock, 0)) { \
@ -49,6 +36,20 @@
#define LEAVE_HASHLIB(obj) #define LEAVE_HASHLIB(obj)
#endif #endif
/* EVP is the preferred interface to hashing in OpenSSL */
#include <openssl/evp.h>
#define MUNCH_SIZE INT_MAX
/* TODO(gps): We should probably make this a module or EVPobject attribute
* to allow the user to optimize based on the platform they're using. */
#define HASHLIB_GIL_MINSIZE 2048
#ifndef HASH_OBJ_CONSTRUCTOR
#define HASH_OBJ_CONSTRUCTOR 0
#endif
typedef struct { typedef struct {
PyObject_HEAD PyObject_HEAD
PyObject *name; /* name of this hash algorithm */ PyObject *name; /* name of this hash algorithm */
@ -122,11 +123,18 @@ EVP_dealloc(EVPobject *self)
PyObject_Del(self); PyObject_Del(self);
} }
static void locked_EVP_MD_CTX_copy(EVP_MD_CTX *new_ctx_p, EVPobject *self)
{
ENTER_HASHLIB(self);
EVP_MD_CTX_copy(new_ctx_p, &self->ctx);
LEAVE_HASHLIB(self);
}
/* External methods for a hash object */ /* External methods for a hash object */
PyDoc_STRVAR(EVP_copy__doc__, "Return a copy of the hash object."); PyDoc_STRVAR(EVP_copy__doc__, "Return a copy of the hash object.");
static PyObject * static PyObject *
EVP_copy(EVPobject *self, PyObject *unused) EVP_copy(EVPobject *self, PyObject *unused)
{ {
@ -135,9 +143,7 @@ EVP_copy(EVPobject *self, PyObject *unused)
if ( (newobj = newEVPobject(self->name))==NULL) if ( (newobj = newEVPobject(self->name))==NULL)
return NULL; return NULL;
ENTER_HASHLIB(self); locked_EVP_MD_CTX_copy(&newobj->ctx, self);
EVP_MD_CTX_copy(&newobj->ctx, &self->ctx);
LEAVE_HASHLIB(self);
return (PyObject *)newobj; return (PyObject *)newobj;
} }
@ -152,9 +158,7 @@ EVP_digest(EVPobject *self, PyObject *unused)
PyObject *retval; PyObject *retval;
unsigned int digest_size; unsigned int digest_size;
ENTER_HASHLIB(self); locked_EVP_MD_CTX_copy(&temp_ctx, self);
EVP_MD_CTX_copy(&temp_ctx, &self->ctx);
LEAVE_HASHLIB(self);
digest_size = EVP_MD_CTX_size(&temp_ctx); digest_size = EVP_MD_CTX_size(&temp_ctx);
EVP_DigestFinal(&temp_ctx, digest, NULL); EVP_DigestFinal(&temp_ctx, digest, NULL);
@ -176,9 +180,7 @@ EVP_hexdigest(EVPobject *self, PyObject *unused)
unsigned int i, j, digest_size; unsigned int i, j, digest_size;
/* Get the raw (binary) digest value */ /* Get the raw (binary) digest value */
ENTER_HASHLIB(self); locked_EVP_MD_CTX_copy(&temp_ctx, self);
EVP_MD_CTX_copy(&temp_ctx, &self->ctx);
LEAVE_HASHLIB(self);
digest_size = EVP_MD_CTX_size(&temp_ctx); digest_size = EVP_MD_CTX_size(&temp_ctx);
EVP_DigestFinal(&temp_ctx, digest, NULL); EVP_DigestFinal(&temp_ctx, digest, NULL);
@ -221,11 +223,7 @@ EVP_update(EVPobject *self, PyObject *args)
#ifdef WITH_THREAD #ifdef WITH_THREAD
if (self->lock == NULL && view.len >= HASHLIB_GIL_MINSIZE) { if (self->lock == NULL && view.len >= HASHLIB_GIL_MINSIZE) {
self->lock = PyThread_allocate_lock(); self->lock = PyThread_allocate_lock();
if (self->lock == NULL) { /* fail? lock = NULL and we fail over to non-threaded code. */
PyBuffer_Release(&view);
PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
return NULL;
}
} }
if (self->lock != NULL) { if (self->lock != NULL) {
@ -257,9 +255,7 @@ static PyObject *
EVP_get_block_size(EVPobject *self, void *closure) EVP_get_block_size(EVPobject *self, void *closure)
{ {
long block_size; long block_size;
ENTER_HASHLIB(self);
block_size = EVP_MD_CTX_block_size(&self->ctx); block_size = EVP_MD_CTX_block_size(&self->ctx);
LEAVE_HASHLIB(self);
return PyLong_FromLong(block_size); return PyLong_FromLong(block_size);
} }
@ -267,9 +263,7 @@ static PyObject *
EVP_get_digest_size(EVPobject *self, void *closure) EVP_get_digest_size(EVPobject *self, void *closure)
{ {
long size; long size;
ENTER_HASHLIB(self);
size = EVP_MD_CTX_size(&self->ctx); size = EVP_MD_CTX_size(&self->ctx);
LEAVE_HASHLIB(self);
return PyLong_FromLong(size); return PyLong_FromLong(size);
} }