- Issue #3745: Fix hashlib to always reject unicode and non buffer-api

supporting objects as input no matter how it was compiled (built in
  implementations or external openssl library).
(backported from a py3k branch)
This commit is contained in:
Gregory P. Smith 2009-02-13 03:00:00 +00:00
parent b516c126ef
commit ea38826ab2
10 changed files with 240 additions and 101 deletions

View File

@ -39,11 +39,11 @@ class HashLibTestCase(unittest.TestCase):
h = hashlib.new(name)
self.assert_(hexstr(h.digest()) == h.hexdigest())
def test_large_update(self):
aas = 'a' * 128
bees = 'b' * 127
cees = 'c' * 126
abcs = aas + bees + cees
for name in self.supported_hash_names:
m1 = hashlib.new(name)
@ -52,8 +52,11 @@ class HashLibTestCase(unittest.TestCase):
m1.update(cees)
m2 = hashlib.new(name)
m2.update(aas + bees + cees)
self.assertEqual(m1.digest(), m2.digest())
m2.update(abcs)
self.assertEqual(m1.digest(), m2.digest(), name+' update problem.')
m3 = hashlib.new(name, abcs)
self.assertEqual(m1.digest(), m3.digest(), name+' new problem.')
def check(self, name, data, digest):
# test the direct constructors
@ -63,6 +66,18 @@ class HashLibTestCase(unittest.TestCase):
computed = hashlib.new(name, data).hexdigest()
self.assert_(computed == digest)
def check_no_unicode(self, algorithm_name):
# Unicode objects are not allowed as input.
self.assertRaises(TypeError, getattr(hashlib, algorithm_name), u'spam')
self.assertRaises(TypeError, hashlib.new, algorithm_name, u'spam')
def test_no_unicode(self):
self.check_no_unicode('md5')
self.check_no_unicode('sha1')
self.check_no_unicode('sha224')
self.check_no_unicode('sha256')
self.check_no_unicode('sha384')
self.check_no_unicode('sha512')
def test_case_md5_0(self):
self.check('md5', '', 'd41d8cd98f00b204e9800998ecf8427e')

View File

@ -508,6 +508,10 @@ C-API
Extension Modules
-----------------
- Issue #3745: Fix hashlib to always reject unicode and non buffer-api
supporting objects as input no matter how it was compiled (built in
implementations or external openssl library).
- Issue #4397: Fix occasional test_socket failure on OS X.
- Issue #4279: Fix build of parsermodule under Cygwin.

View File

@ -1,7 +1,7 @@
/* Module that wraps all OpenSSL hash algorithms */
/*
* Copyright (C) 2005 Gregory P. Smith (greg@krypto.org)
* Copyright (C) 2005-2007 Gregory P. Smith (greg@krypto.org)
* Licensed to PSF under a Contributor Agreement.
*
* Derived from a skeleton of shamodule.c containing work performed by:
@ -15,6 +15,7 @@
#include "Python.h"
#include "structmember.h"
#include "hashlib.h"
/* EVP is the preferred interface to hashing in OpenSSL */
#include <openssl/evp.h>
@ -30,6 +31,11 @@ typedef struct {
PyObject_HEAD
PyObject *name; /* name of this hash algorithm */
EVP_MD_CTX ctx; /* OpenSSL message digest context */
/*
* TODO investigate performance impact of including a lock for this object
* here and releasing the Python GIL while hash updates are in progress.
* (perhaps only release GIL if input length will take long to process?)
*/
} EVPobject;
@ -160,24 +166,30 @@ PyDoc_STRVAR(EVP_update__doc__,
static PyObject *
EVP_update(EVPobject *self, PyObject *args)
{
unsigned char *cp;
Py_ssize_t len;
PyObject *obj;
Py_buffer view;
if (!PyArg_ParseTuple(args, "s#:update", &cp, &len))
if (!PyArg_ParseTuple(args, "O:update", &obj))
return NULL;
if (len > 0 && len <= MUNCH_SIZE) {
EVP_DigestUpdate(&self->ctx, cp, Py_SAFE_DOWNCAST(len, Py_ssize_t,
unsigned int));
GET_BUFFER_VIEW_OR_ERROUT(obj, &view, NULL);
if (view.len > 0 && view.len <= MUNCH_SIZE) {
EVP_DigestUpdate(&self->ctx, (unsigned char*)view.buf,
Py_SAFE_DOWNCAST(view.len, Py_ssize_t, unsigned int));
} else {
Py_ssize_t offset = 0;
while (len) {
Py_ssize_t len = view.len;
unsigned char *cp = (unsigned char *)view.buf;
while (len > 0) {
unsigned int process = len > MUNCH_SIZE ? MUNCH_SIZE : len;
EVP_DigestUpdate(&self->ctx, cp + offset, process);
EVP_DigestUpdate(&self->ctx, cp, process);
len -= process;
offset += process;
cp += process;
}
}
PyBuffer_Release(&view);
Py_INCREF(Py_None);
return Py_None;
}
@ -241,24 +253,31 @@ EVP_tp_init(EVPobject *self, PyObject *args, PyObject *kwds)
{
static char *kwlist[] = {"name", "string", NULL};
PyObject *name_obj = NULL;
PyObject *data_obj = NULL;
Py_buffer view;
char *nameStr;
unsigned char *cp = NULL;
Py_ssize_t len = 0;
const EVP_MD *digest;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s#:HASH", kwlist,
&name_obj, &cp, &len)) {
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:HASH", kwlist,
&name_obj, &data_obj)) {
return -1;
}
if (data_obj)
GET_BUFFER_VIEW_OR_ERROUT(data_obj, &view, -1);
if (!PyArg_Parse(name_obj, "s", &nameStr)) {
PyErr_SetString(PyExc_TypeError, "name must be a string");
if (data_obj)
PyBuffer_Release(&view);
return -1;
}
digest = EVP_get_digestbyname(nameStr);
if (!digest) {
PyErr_SetString(PyExc_ValueError, "unknown hash function");
if (data_obj)
PyBuffer_Release(&view);
return -1;
}
EVP_DigestInit(&self->ctx, digest);
@ -266,21 +285,23 @@ EVP_tp_init(EVPobject *self, PyObject *args, PyObject *kwds)
self->name = name_obj;
Py_INCREF(self->name);
if (cp && len) {
if (len > 0 && len <= MUNCH_SIZE) {
EVP_DigestUpdate(&self->ctx, cp, Py_SAFE_DOWNCAST(len, Py_ssize_t,
unsigned int));
if (data_obj) {
if (view.len > 0 && view.len <= MUNCH_SIZE) {
EVP_DigestUpdate(&self->ctx, (unsigned char*)view.buf,
Py_SAFE_DOWNCAST(view.len, Py_ssize_t, unsigned int));
} else {
Py_ssize_t offset = 0;
while (len) {
Py_ssize_t len = view.len;
unsigned char *cp = (unsigned char*)view.buf;
while (len > 0) {
unsigned int process = len > MUNCH_SIZE ? MUNCH_SIZE : len;
EVP_DigestUpdate(&self->ctx, cp + offset, process);
EVP_DigestUpdate(&self->ctx, cp, process);
len -= process;
offset += process;
cp += process;
}
}
PyBuffer_Release(&view);
}
return 0;
}
#endif
@ -373,7 +394,7 @@ EVPnew(PyObject *name_obj,
unsigned int));
} else {
Py_ssize_t offset = 0;
while (len) {
while (len > 0) {
unsigned int process = len > MUNCH_SIZE ? MUNCH_SIZE : len;
EVP_DigestUpdate(&self->ctx, cp + offset, process);
len -= process;
@ -400,13 +421,14 @@ EVP_new(PyObject *self, PyObject *args, PyObject *kwdict)
{
static char *kwlist[] = {"name", "string", NULL};
PyObject *name_obj = NULL;
PyObject *data_obj = NULL;
Py_buffer view = { 0 };
PyObject *ret_obj;
char *name;
const EVP_MD *digest;
unsigned char *cp = NULL;
Py_ssize_t len = 0;
if (!PyArg_ParseTupleAndKeywords(args, kwdict, "O|s#:new", kwlist,
&name_obj, &cp, &len)) {
if (!PyArg_ParseTupleAndKeywords(args, kwdict, "O|O:new", kwlist,
&name_obj, &data_obj)) {
return NULL;
}
@ -415,9 +437,17 @@ EVP_new(PyObject *self, PyObject *args, PyObject *kwdict)
return NULL;
}
if (data_obj)
GET_BUFFER_VIEW_OR_ERROUT(data_obj, &view, NULL);
digest = EVP_get_digestbyname(name);
return EVPnew(name_obj, digest, NULL, cp, len);
ret_obj = EVPnew(name_obj, digest, NULL, (unsigned char*)view.buf,
Py_SAFE_DOWNCAST(view.len, Py_ssize_t, unsigned int));
if (data_obj)
PyBuffer_Release(&view);
return ret_obj;
}
/*
@ -431,18 +461,27 @@ EVP_new(PyObject *self, PyObject *args, PyObject *kwdict)
static PyObject * \
EVP_new_ ## NAME (PyObject *self, PyObject *args) \
{ \
unsigned char *cp = NULL; \
Py_ssize_t len = 0; \
PyObject *data_obj = NULL; \
Py_buffer view = { 0 }; \
PyObject *ret_obj; \
\
if (!PyArg_ParseTuple(args, "|s#:" #NAME , &cp, &len)) { \
if (!PyArg_ParseTuple(args, "|O:" #NAME , &data_obj)) { \
return NULL; \
} \
\
return EVPnew( \
CONST_ ## NAME ## _name_obj, \
NULL, \
CONST_new_ ## NAME ## _ctx_p, \
cp, len); \
if (data_obj) \
GET_BUFFER_VIEW_OR_ERROUT(data_obj, &view, NULL); \
\
ret_obj = EVPnew( \
CONST_ ## NAME ## _name_obj, \
NULL, \
CONST_new_ ## NAME ## _ctx_p, \
(unsigned char*)view.buf, \
Py_SAFE_DOWNCAST(view.len, Py_ssize_t, unsigned int)); \
\
if (data_obj) \
PyBuffer_Release(&view); \
return ret_obj; \
}
/* a PyMethodDef structure for the constructor */

28
Modules/hashlib.h Normal file
View File

@ -0,0 +1,28 @@
/* Common code for use by all hashlib related modules. */
/*
* Given a PyObject* obj, fill in the Py_buffer* viewp with the result
* of PyObject_GetBuffer. Sets and exception and issues a returns
* on any errors.
*/
#define GET_BUFFER_VIEW_OR_ERROUT(obj, viewp, error_return) do { \
if (PyUnicode_Check((obj))) { \
PyErr_SetString(PyExc_TypeError, \
"Unicode-objects must be encoded before hashing");\
return error_return; \
} \
if (!PyObject_CheckBuffer((obj))) { \
PyErr_SetString(PyExc_TypeError, \
"object supporting the buffer API required"); \
return error_return; \
} \
if (PyObject_GetBuffer((obj), (viewp), PyBUF_SIMPLE) == -1) { \
return error_return; \
} \
if ((viewp)->ndim > 1) { \
PyErr_SetString(PyExc_BufferError, \
"Buffer must be single dimension"); \
PyBuffer_Release((viewp)); \
return error_return; \
} \
} while(0);

View File

@ -321,10 +321,10 @@ md5_init(md5_state_t *pms)
}
void
md5_append(md5_state_t *pms, const md5_byte_t *data, int nbytes)
md5_append(md5_state_t *pms, const md5_byte_t *data, unsigned int nbytes)
{
const md5_byte_t *p = data;
int left = nbytes;
unsigned int left = nbytes;
int offset = (pms->count[0] >> 3) & 63;
md5_word_t nbits = (md5_word_t)(nbytes << 3);
@ -333,7 +333,7 @@ md5_append(md5_state_t *pms, const md5_byte_t *data, int nbytes)
/* this special case is handled recursively */
if (nbytes > INT_MAX - offset) {
int overlap;
unsigned int overlap;
/* handle the append in two steps to prevent overflow */
overlap = 64 - offset;
@ -351,7 +351,7 @@ md5_append(md5_state_t *pms, const md5_byte_t *data, int nbytes)
/* Process an initial partial block. */
if (offset) {
int copy = (offset + nbytes > 64 ? 64 - offset : nbytes);
unsigned int copy = (offset + nbytes > 64 ? 64 - offset : nbytes);
memcpy(pms->buf + offset, p, copy);
if (offset + copy < 64)

View File

@ -79,7 +79,7 @@ extern "C"
void md5_init(md5_state_t *pms);
/* Append a string to the message. */
void md5_append(md5_state_t *pms, const md5_byte_t *data, int nbytes);
void md5_append(md5_state_t *pms, const md5_byte_t *data, unsigned int nbytes);
/* Finish the message and return the digest. */
void md5_finish(md5_state_t *pms, md5_byte_t digest[16]);

View File

@ -12,6 +12,7 @@
#include "Python.h"
#include "structmember.h"
#include "md5.h"
#include "hashlib.h"
typedef struct {
PyObject_HEAD
@ -50,14 +51,18 @@ md5_dealloc(md5object *md5p)
static PyObject *
md5_update(md5object *self, PyObject *args)
{
unsigned char *cp;
int len;
PyObject *data_obj;
Py_buffer view;
if (!PyArg_ParseTuple(args, "s#:update", &cp, &len))
if (!PyArg_ParseTuple(args, "O:update", &data_obj))
return NULL;
md5_append(&self->md5, cp, len);
GET_BUFFER_VIEW_OR_ERROUT(data_obj, &view, NULL);
md5_append(&self->md5, (unsigned char*)view.buf,
Py_SAFE_DOWNCAST(view.len, Py_ssize_t, unsigned int));
PyBuffer_Release(&view);
Py_INCREF(Py_None);
return Py_None;
}
@ -261,18 +266,25 @@ static PyObject *
MD5_new(PyObject *self, PyObject *args)
{
md5object *md5p;
unsigned char *cp = NULL;
int len = 0;
PyObject *data_obj = NULL;
Py_buffer view;
if (!PyArg_ParseTuple(args, "|s#:new", &cp, &len))
if (!PyArg_ParseTuple(args, "|O:new", &data_obj))
return NULL;
if ((md5p = newmd5object()) == NULL)
GET_BUFFER_VIEW_OR_ERROUT(data_obj, &view, NULL);
if ((md5p = newmd5object()) == NULL) {
PyBuffer_Release(&view);
return NULL;
}
if (cp)
md5_append(&md5p->md5, cp, len);
if (data_obj) {
md5_append(&md5p->md5, (unsigned char*)view.buf,
Py_SAFE_DOWNCAST(view.len, Py_ssize_t, unsigned int));
}
PyBuffer_Release(&view);
return (PyObject *)md5p;
}

View File

@ -18,6 +18,7 @@
#include "Python.h"
#include "structmember.h"
#include "hashlib.h"
/* Endianness testing and definitions */
@ -480,14 +481,17 @@ PyDoc_STRVAR(SHA256_update__doc__,
static PyObject *
SHA256_update(SHAobject *self, PyObject *args)
{
unsigned char *cp;
int len;
PyObject *obj;
Py_buffer buf;
if (!PyArg_ParseTuple(args, "s#:update", &cp, &len))
if (!PyArg_ParseTuple(args, "O:update", &obj))
return NULL;
sha_update(self, cp, len);
GET_BUFFER_VIEW_OR_ERROUT(obj, &buf, NULL);
sha_update(self, buf.buf, buf.len);
PyBuffer_Release(&buf);
Py_INCREF(Py_None);
return Py_None;
}
@ -614,14 +618,17 @@ SHA256_new(PyObject *self, PyObject *args, PyObject *kwdict)
{
static char *kwlist[] = {"string", NULL};
SHAobject *new;
unsigned char *cp = NULL;
int len;
PyObject *data_obj = NULL;
Py_buffer buf;
if (!PyArg_ParseTupleAndKeywords(args, kwdict, "|s#:new", kwlist,
&cp, &len)) {
if (!PyArg_ParseTupleAndKeywords(args, kwdict, "|O:new", kwlist,
&data_obj)) {
return NULL;
}
if (data_obj)
GET_BUFFER_VIEW_OR_ERROUT(data_obj, &buf, NULL);
if ((new = newSHA256object()) == NULL)
return NULL;
@ -631,8 +638,10 @@ SHA256_new(PyObject *self, PyObject *args, PyObject *kwdict)
Py_DECREF(new);
return NULL;
}
if (cp)
sha_update(new, cp, len);
if (data_obj) {
sha_update(new, buf.buf, buf.len);
PyBuffer_Release(&buf);
}
return (PyObject *)new;
}
@ -645,14 +654,17 @@ SHA224_new(PyObject *self, PyObject *args, PyObject *kwdict)
{
static char *kwlist[] = {"string", NULL};
SHAobject *new;
unsigned char *cp = NULL;
int len;
PyObject *data_obj = NULL;
Py_buffer buf;
if (!PyArg_ParseTupleAndKeywords(args, kwdict, "|s#:new", kwlist,
&cp, &len)) {
if (!PyArg_ParseTupleAndKeywords(args, kwdict, "|O:new", kwlist,
&data_obj)) {
return NULL;
}
if (data_obj)
GET_BUFFER_VIEW_OR_ERROUT(data_obj, &buf, NULL);
if ((new = newSHA224object()) == NULL)
return NULL;
@ -662,8 +674,10 @@ SHA224_new(PyObject *self, PyObject *args, PyObject *kwdict)
Py_DECREF(new);
return NULL;
}
if (cp)
sha_update(new, cp, len);
if (data_obj) {
sha_update(new, buf.buf, buf.len);
PyBuffer_Release(&buf);
}
return (PyObject *)new;
}

View File

@ -18,6 +18,7 @@
#include "Python.h"
#include "structmember.h"
#include "hashlib.h"
#ifdef PY_LONG_LONG /* If no PY_LONG_LONG, don't compile anything! */
@ -546,14 +547,17 @@ PyDoc_STRVAR(SHA512_update__doc__,
static PyObject *
SHA512_update(SHAobject *self, PyObject *args)
{
unsigned char *cp;
int len;
PyObject *obj;
Py_buffer buf;
if (!PyArg_ParseTuple(args, "s#:update", &cp, &len))
if (!PyArg_ParseTuple(args, "O:update", &obj))
return NULL;
sha512_update(self, cp, len);
GET_BUFFER_VIEW_OR_ERROUT(obj, &buf, NULL);
sha512_update(self, buf.buf, buf.len);
PyBuffer_Release(&buf);
Py_INCREF(Py_None);
return Py_None;
}
@ -680,14 +684,17 @@ SHA512_new(PyObject *self, PyObject *args, PyObject *kwdict)
{
static char *kwlist[] = {"string", NULL};
SHAobject *new;
unsigned char *cp = NULL;
int len;
PyObject *data_obj = NULL;
Py_buffer buf;
if (!PyArg_ParseTupleAndKeywords(args, kwdict, "|s#:new", kwlist,
&cp, &len)) {
if (!PyArg_ParseTupleAndKeywords(args, kwdict, "|O:new", kwlist,
&data_obj)) {
return NULL;
}
if (data_obj)
GET_BUFFER_VIEW_OR_ERROUT(data_obj, &buf, NULL);
if ((new = newSHA512object()) == NULL)
return NULL;
@ -697,8 +704,10 @@ SHA512_new(PyObject *self, PyObject *args, PyObject *kwdict)
Py_DECREF(new);
return NULL;
}
if (cp)
sha512_update(new, cp, len);
if (data_obj) {
sha512_update(new, buf.buf, buf.len);
PyBuffer_Release(&buf);
}
return (PyObject *)new;
}
@ -711,14 +720,17 @@ SHA384_new(PyObject *self, PyObject *args, PyObject *kwdict)
{
static char *kwlist[] = {"string", NULL};
SHAobject *new;
unsigned char *cp = NULL;
int len;
PyObject *data_obj = NULL;
Py_buffer buf;
if (!PyArg_ParseTupleAndKeywords(args, kwdict, "|s#:new", kwlist,
&cp, &len)) {
if (!PyArg_ParseTupleAndKeywords(args, kwdict, "|O:new", kwlist,
&data_obj)) {
return NULL;
}
if (data_obj)
GET_BUFFER_VIEW_OR_ERROUT(data_obj, &buf, NULL);
if ((new = newSHA384object()) == NULL)
return NULL;
@ -728,8 +740,10 @@ SHA384_new(PyObject *self, PyObject *args, PyObject *kwdict)
Py_DECREF(new);
return NULL;
}
if (cp)
sha512_update(new, cp, len);
if (data_obj) {
sha512_update(new, buf.buf, buf.len);
PyBuffer_Release(&buf);
}
return (PyObject *)new;
}

View File

@ -17,6 +17,7 @@
#include "Python.h"
#include "structmember.h"
#include "hashlib.h"
/* Endianness testing and definitions */
@ -236,9 +237,9 @@ sha_init(SHAobject *sha_info)
/* update the SHA digest */
static void
sha_update(SHAobject *sha_info, SHA_BYTE *buffer, int count)
sha_update(SHAobject *sha_info, SHA_BYTE *buffer, unsigned int count)
{
int i;
unsigned int i;
SHA_INT32 clo;
clo = sha_info->count_lo + ((SHA_INT32) count << 3);
@ -428,14 +429,18 @@ PyDoc_STRVAR(SHA_update__doc__,
static PyObject *
SHA_update(SHAobject *self, PyObject *args)
{
unsigned char *cp;
int len;
PyObject *data_obj;
Py_buffer view;
if (!PyArg_ParseTuple(args, "s#:update", &cp, &len))
if (!PyArg_ParseTuple(args, "O:update", &data_obj))
return NULL;
sha_update(self, cp, len);
GET_BUFFER_VIEW_OR_ERROUT(data_obj, &view, NULL);
sha_update(self, (unsigned char*)view.buf,
Py_SAFE_DOWNCAST(view.len, Py_ssize_t, unsigned int));
PyBuffer_Release(&view);
Py_INCREF(Py_None);
return Py_None;
}
@ -535,26 +540,34 @@ SHA_new(PyObject *self, PyObject *args, PyObject *kwdict)
{
static char *kwlist[] = {"string", NULL};
SHAobject *new;
unsigned char *cp = NULL;
int len;
PyObject *data_obj = NULL;
Py_buffer view;
if (!PyArg_ParseTupleAndKeywords(args, kwdict, "|s#:new", kwlist,
&cp, &len)) {
if (!PyArg_ParseTupleAndKeywords(args, kwdict, "|O:new", kwlist,
&data_obj)) {
return NULL;
}
if ((new = newSHAobject()) == NULL)
GET_BUFFER_VIEW_OR_ERROUT(data_obj, &view, NULL);
if ((new = newSHAobject()) == NULL) {
PyBuffer_Release(&view);
return NULL;
}
sha_init(new);
if (PyErr_Occurred()) {
Py_DECREF(new);
PyBuffer_Release(&view);
return NULL;
}
if (cp)
sha_update(new, cp, len);
if (data_obj) {
sha_update(new, (unsigned char*)view.buf,
Py_SAFE_DOWNCAST(view.len, Py_ssize_t, unsigned int));
}
PyBuffer_Release(&view);
return (PyObject *)new;
}