Merge refactoring I did when committing r72267 to trunk into the

already committed issue4751 support in py3k r68411.
This commit is contained in:
Gregory P. Smith 2009-05-04 00:45:33 +00:00
parent c1651a0b96
commit 3f61d61b35
3 changed files with 78 additions and 34 deletions

View File

@ -105,6 +105,12 @@ A hash object has the following methods:
concatenation of all the arguments: ``m.update(a); m.update(b)`` is
equivalent to ``m.update(a+b)``.
.. versionchanged:: 2.7
The Python GIL is released to allow other threads to run while
hash updates on data larger than 2048 bytes is taking place when
using hash algorithms supplied by OpenSSL.
.. method:: hash.digest()

View File

@ -2,11 +2,16 @@
#
# $Id$
#
# Copyright (C) 2005 Gregory P. Smith (greg@krypto.org)
# Copyright (C) 2005-2009 Gregory P. Smith (greg@krypto.org)
# Licensed to PSF under a Contributor Agreement.
#
import hashlib
from io import StringIO
try:
import threading
except ImportError:
threading = None
import unittest
from test import support
from test.support import _4G, precisionbigmemtest
@ -224,6 +229,45 @@ class HashLibTestCase(unittest.TestCase):
m = hashlib.md5(b'x' * gil_minsize)
self.assertEquals(m.hexdigest(), 'cfb767f225d58469c5de3632a8803958')
def test_threaded_hashing(self):
if not threading:
raise unittest.SkipTest('No threading module.')
# Updating the same hash object from several threads at once
# using data chunk sizes containing the same byte sequences.
#
# If the internal locks are working to prevent multiple
# updates on the same object from running at once, the resulting
# hash will be the same as doing it single threaded upfront.
hasher = hashlib.sha1()
num_threads = 5
smallest_data = b'swineflu'
data = smallest_data*200000
expected_hash = hashlib.sha1(data*num_threads).hexdigest()
def hash_in_chunks(chunk_size, event):
index = 0
while index < len(data):
hasher.update(data[index:index+chunk_size])
index += chunk_size
event.set()
events = []
for threadnum in range(num_threads):
chunk_size = len(data) // (10**threadnum)
assert chunk_size > 0
assert chunk_size % len(smallest_data) == 0
event = threading.Event()
events.append(event)
threading.Thread(target=hash_in_chunks,
args=(chunk_size, event)).start()
for event in events:
event.wait()
self.assertEqual(expected_hash, hasher.hexdigest())
def test_main():
support.run_unittest(HashLibTestCase)

View File

@ -1,7 +1,7 @@
/* Module that wraps all OpenSSL hash algorithms */
/*
* Copyright (C) 2005-2007 Gregory P. Smith (greg@krypto.org)
* Copyright (C) 2005-2009 Gregory P. Smith (greg@krypto.org)
* Licensed to PSF under a Contributor Agreement.
*
* Derived from a skeleton of shamodule.c containing work performed by:
@ -17,21 +17,8 @@
#include "structmember.h"
#include "hashlib.h"
/* EVP is the preferred interface to hashing in OpenSSL */
#include <openssl/evp.h>
#define MUNCH_SIZE INT_MAX
#ifndef HASH_OBJ_CONSTRUCTOR
#define HASH_OBJ_CONSTRUCTOR 0
#endif
#define HASHLIB_GIL_MINSIZE 2048
#ifdef WITH_THREAD
#include "pythread.h"
#include "pythread.h"
#define ENTER_HASHLIB(obj) \
if ((obj)->lock) { \
if (!PyThread_acquire_lock((obj)->lock, 0)) { \
@ -49,6 +36,20 @@
#define LEAVE_HASHLIB(obj)
#endif
/* EVP is the preferred interface to hashing in OpenSSL */
#include <openssl/evp.h>
#define MUNCH_SIZE INT_MAX
/* TODO(gps): We should probably make this a module or EVPobject attribute
* to allow the user to optimize based on the platform they're using. */
#define HASHLIB_GIL_MINSIZE 2048
#ifndef HASH_OBJ_CONSTRUCTOR
#define HASH_OBJ_CONSTRUCTOR 0
#endif
typedef struct {
PyObject_HEAD
PyObject *name; /* name of this hash algorithm */
@ -122,11 +123,18 @@ EVP_dealloc(EVPobject *self)
PyObject_Del(self);
}
static void locked_EVP_MD_CTX_copy(EVP_MD_CTX *new_ctx_p, EVPobject *self)
{
ENTER_HASHLIB(self);
EVP_MD_CTX_copy(new_ctx_p, &self->ctx);
LEAVE_HASHLIB(self);
}
/* External methods for a hash object */
PyDoc_STRVAR(EVP_copy__doc__, "Return a copy of the hash object.");
static PyObject *
EVP_copy(EVPobject *self, PyObject *unused)
{
@ -135,9 +143,7 @@ EVP_copy(EVPobject *self, PyObject *unused)
if ( (newobj = newEVPobject(self->name))==NULL)
return NULL;
ENTER_HASHLIB(self);
EVP_MD_CTX_copy(&newobj->ctx, &self->ctx);
LEAVE_HASHLIB(self);
locked_EVP_MD_CTX_copy(&newobj->ctx, self);
return (PyObject *)newobj;
}
@ -152,9 +158,7 @@ EVP_digest(EVPobject *self, PyObject *unused)
PyObject *retval;
unsigned int digest_size;
ENTER_HASHLIB(self);
EVP_MD_CTX_copy(&temp_ctx, &self->ctx);
LEAVE_HASHLIB(self);
locked_EVP_MD_CTX_copy(&temp_ctx, self);
digest_size = EVP_MD_CTX_size(&temp_ctx);
EVP_DigestFinal(&temp_ctx, digest, NULL);
@ -176,9 +180,7 @@ EVP_hexdigest(EVPobject *self, PyObject *unused)
unsigned int i, j, digest_size;
/* Get the raw (binary) digest value */
ENTER_HASHLIB(self);
EVP_MD_CTX_copy(&temp_ctx, &self->ctx);
LEAVE_HASHLIB(self);
locked_EVP_MD_CTX_copy(&temp_ctx, self);
digest_size = EVP_MD_CTX_size(&temp_ctx);
EVP_DigestFinal(&temp_ctx, digest, NULL);
@ -221,11 +223,7 @@ EVP_update(EVPobject *self, PyObject *args)
#ifdef WITH_THREAD
if (self->lock == NULL && view.len >= HASHLIB_GIL_MINSIZE) {
self->lock = PyThread_allocate_lock();
if (self->lock == NULL) {
PyBuffer_Release(&view);
PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
return NULL;
}
/* fail? lock = NULL and we fail over to non-threaded code. */
}
if (self->lock != NULL) {
@ -257,9 +255,7 @@ static PyObject *
EVP_get_block_size(EVPobject *self, void *closure)
{
long block_size;
ENTER_HASHLIB(self);
block_size = EVP_MD_CTX_block_size(&self->ctx);
LEAVE_HASHLIB(self);
return PyLong_FromLong(block_size);
}
@ -267,9 +263,7 @@ static PyObject *
EVP_get_digest_size(EVPobject *self, void *closure)
{
long size;
ENTER_HASHLIB(self);
size = EVP_MD_CTX_size(&self->ctx);
LEAVE_HASHLIB(self);
return PyLong_FromLong(size);
}