From 3ce5d9207e66d61d4b0502cf47ed2d2bcdd2212f Mon Sep 17 00:00:00 2001 From: Neal Norwitz Date: Sun, 24 Aug 2008 07:08:55 +0000 Subject: [PATCH] Closes release blocker #3627. Merged revisions 65335 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk TESTED=./python -E -tt ./Lib/test/regrtest.py -uall (both debug and opt) ........ r65335 | neal.norwitz | 2008-07-31 10:17:14 -0700 (Thu, 31 Jul 2008) | 1 line Security patches from Apple: prevent int overflow when allocating memory ........ --- Lib/test/seq_tests.py | 12 +++-- Lib/test/support.py | 25 ++++++++++ Lib/test/test_bigmem.py | 97 +++++++++++++++++++++++++++++++++++++-- Misc/NEWS | 2 + Modules/gcmodule.c | 7 ++- Modules/mmapmodule.c | 2 +- Objects/bytearrayobject.c | 5 ++ Objects/bytesobject.c | 8 +++- Objects/longobject.c | 2 + Objects/tupleobject.c | 5 +- Objects/unicodeobject.c | 62 +++++++++++++++++++------ 11 files changed, 201 insertions(+), 26 deletions(-) diff --git a/Lib/test/seq_tests.py b/Lib/test/seq_tests.py index 962cfe12ea2..5148d4bac23 100644 --- a/Lib/test/seq_tests.py +++ b/Lib/test/seq_tests.py @@ -304,11 +304,13 @@ class CommonTest(unittest.TestCase): self.assertEqual(id(s), id(s*1)) def test_bigrepeat(self): - x = self.type2test([0]) - x *= 2**16 - self.assertRaises(MemoryError, x.__mul__, 2**16) - if hasattr(x, '__imul__'): - self.assertRaises(MemoryError, x.__imul__, 2**16) + import sys + if sys.maxsize <= 2147483647: + x = self.type2test([0]) + x *= 2**16 + self.assertRaises(MemoryError, x.__mul__, 2**16) + if hasattr(x, '__imul__'): + self.assertRaises(MemoryError, x.__imul__, 2**16) def test_subscript(self): a = self.type2test([10, 11]) diff --git a/Lib/test/support.py b/Lib/test/support.py index d02698485e0..24aff5ebd9e 100644 --- a/Lib/test/support.py +++ b/Lib/test/support.py @@ -68,6 +68,7 @@ verbose = 1 # Flag set to 0 by regrtest.py use_resources = None # Flag set to [] by regrtest.py max_memuse = 0 # Disable bigmem tests (they will still be run with # small sizes, to make sure they work.) +real_max_memuse = 0 # _original_stdout is meant to hold stdout at the time regrtest began. # This may be "the real" stdout, or IDLE's emulation of stdout, or whatever. @@ -599,12 +600,14 @@ def run_with_locale(catstr, *locales): _1M = 1024*1024 _1G = 1024 * _1M _2G = 2 * _1G +_4G = 4 * _1G MAX_Py_ssize_t = sys.maxsize def set_memlimit(limit): import re global max_memuse + global real_max_memuse sizes = { 'k': 1024, 'm': _1M, @@ -616,6 +619,7 @@ def set_memlimit(limit): if m is None: raise ValueError('Invalid memory limit %r' % (limit,)) memlimit = int(float(m.group(1)) * sizes[m.group(3).lower()]) + real_max_memuse = memlimit if memlimit > MAX_Py_ssize_t: memlimit = MAX_Py_ssize_t if memlimit < _2G - 1: @@ -661,6 +665,27 @@ def bigmemtest(minsize, memuse, overhead=5*_1M): return wrapper return decorator +def precisionbigmemtest(size, memuse, overhead=5*_1M): + def decorator(f): + def wrapper(self): + if not real_max_memuse: + maxsize = 5147 + else: + maxsize = size + + if real_max_memuse and real_max_memuse < maxsize * memuse: + if verbose: + sys.stderr.write("Skipping %s because of memory " + "constraint\n" % (f.__name__,)) + return + + return f(self, maxsize) + wrapper.size = size + wrapper.memuse = memuse + wrapper.overhead = overhead + return wrapper + return decorator + def bigaddrspacetest(f): """Decorator for tests that fill the address space.""" def wrapper(self): diff --git a/Lib/test/test_bigmem.py b/Lib/test/test_bigmem.py index 30e1bdb18cb..5edc13907bc 100644 --- a/Lib/test/test_bigmem.py +++ b/Lib/test/test_bigmem.py @@ -1,5 +1,5 @@ from test import support -from test.support import bigmemtest, _1G, _2G +from test.support import bigmemtest, _1G, _2G, _4G, precisionbigmemtest import unittest import operator @@ -53,6 +53,22 @@ class StrTest(unittest.TestCase): self.assertEquals(s[lpadsize:-rpadsize], SUBSTR) self.assertEquals(s.strip(), SUBSTR.strip()) + @precisionbigmemtest(size=_2G - 1, memuse=1) + def test_center_unicode(self, size): + SUBSTR = ' abc def ghi' + try: + s = SUBSTR.center(size) + except OverflowError: + pass # acceptable on 32-bit + else: + self.assertEquals(len(s), size) + lpadsize = rpadsize = (len(s) - len(SUBSTR)) // 2 + if len(s) % 2: + lpadsize += 1 + self.assertEquals(s[lpadsize:-rpadsize], SUBSTR) + self.assertEquals(s.strip(), SUBSTR.strip()) + del s + @bigmemtest(minsize=_2G, memuse=2) def test_count(self, size): SUBSTR = ' abc def ghi' @@ -69,10 +85,51 @@ class StrTest(unittest.TestCase): s = b'.' * size self.assertEquals(len(s.decode('utf-8')), size) + def basic_encode_test(self, size, enc, c='.', expectedsize=None): + if expectedsize is None: + expectedsize = size + + s = c * size + self.assertEquals(len(s.encode(enc)), expectedsize) + @bigmemtest(minsize=_2G + 2, memuse=3) def test_encode(self, size): - s = '.' * size - self.assertEquals(len(s.encode('utf-8')), size) + return self.basic_encode_test(size, 'utf-8') + + @precisionbigmemtest(size=_4G / 6 + 2, memuse=2) + def test_encode_raw_unicode_escape(self, size): + try: + return self.basic_encode_test(size, 'raw_unicode_escape') + except MemoryError: + pass # acceptable on 32-bit + + @precisionbigmemtest(size=_4G / 5 + 70, memuse=3) + def test_encode_utf7(self, size): + try: + return self.basic_encode_test(size, 'utf7') + except MemoryError: + pass # acceptable on 32-bit + + @precisionbigmemtest(size=_4G / 4 + 5, memuse=6) + def test_encode_utf32(self, size): + try: + return self.basic_encode_test(size, 'utf32', expectedsize=4*size+4) + except MemoryError: + pass # acceptable on 32-bit + + @precisionbigmemtest(size=_2G-1, memuse=2) + def test_decodeascii(self, size): + return self.basic_encode_test(size, 'ascii', c='A') + + @precisionbigmemtest(size=_4G / 5, memuse=6+2) + def test_unicode_repr_oflw(self, size): + try: + s = "\uAAAA"*size + r = repr(s) + except MemoryError: + pass # acceptable on 32-bit + else: + self.failUnless(s == eval(r)) @bigmemtest(minsize=_2G, memuse=2) def test_endswith(self, size): @@ -458,6 +515,11 @@ class StrTest(unittest.TestCase): self.assertEquals(s.count('\\'), size) self.assertEquals(s.count('0'), size * 2) + @bigmemtest(minsize=2**32 / 5, memuse=6+2) + def test_unicode_repr(self, size): + s = "\uAAAA" * size + self.failUnless(len(repr(s)) > size) + # This test is meaningful even with size < 2G, as long as the # doubled string is > 2G (but it tests more if both are > 2G :) @bigmemtest(minsize=_1G + 2, memuse=3) @@ -641,6 +703,35 @@ class TupleTest(unittest.TestCase): def test_repeat_large(self, size): return self.basic_test_repeat(size) + @bigmemtest(minsize=_1G - 1, memuse=12) + def test_repeat_large_2(self, size): + return self.basic_test_repeat(size) + + @precisionbigmemtest(size=_1G - 1, memuse=9) + def test_from_2G_generator(self, size): + try: + t = tuple(range(size)) + except MemoryError: + pass # acceptable on 32-bit + else: + count = 0 + for item in t: + self.assertEquals(item, count) + count += 1 + self.assertEquals(count, size) + + @precisionbigmemtest(size=_1G - 25, memuse=9) + def test_from_almost_2G_generator(self, size): + try: + t = tuple(range(size)) + count = 0 + for item in t: + self.assertEquals(item, count) + count += 1 + self.assertEquals(count, size) + except MemoryError: + pass # acceptable, expected on 32-bit + # Like test_concat, split in two. def basic_test_repr(self, size): t = (0,) * size diff --git a/Misc/NEWS b/Misc/NEWS index 314ce5015dc..cf9b8dd85d9 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,8 @@ What's New in Python 3.0 release candidate 1 Core and Builtins ----------------- +- Apply security patches from Apple. + - Fix crashes on memory allocation failure found with failmalloc. - Fix memory leaks found with valgrind and update suppressions file. diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 51bcd79f922..f7eef4dedb7 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -1318,7 +1318,10 @@ PyObject * _PyObject_GC_Malloc(size_t basicsize) { PyObject *op; - PyGC_Head *g = (PyGC_Head *)PyObject_MALLOC( + PyGC_Head *g; + if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) + return PyErr_NoMemory(); + g = (PyGC_Head *)PyObject_MALLOC( sizeof(PyGC_Head) + basicsize); if (g == NULL) return PyErr_NoMemory(); @@ -1361,6 +1364,8 @@ _PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems) { const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems); PyGC_Head *g = AS_GC(op); + if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) + return (PyVarObject *)PyErr_NoMemory(); g = (PyGC_Head *)PyObject_REALLOC(g, sizeof(PyGC_Head) + basicsize); if (g == NULL) return (PyVarObject *)PyErr_NoMemory(); diff --git a/Modules/mmapmodule.c b/Modules/mmapmodule.c index 8abf0ff991b..9adef9bebb2 100644 --- a/Modules/mmapmodule.c +++ b/Modules/mmapmodule.c @@ -245,7 +245,7 @@ mmap_read_method(mmap_object *self, return(NULL); /* silently 'adjust' out-of-range requests */ - if ((self->pos + num_bytes) > self->size) { + if (num_bytes > self->size - self->pos) { num_bytes -= (self->pos+num_bytes) - self->size; } result = PyByteArray_FromStringAndSize(self->data+self->pos, num_bytes); diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index da11249235d..201d294e8ad 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -121,6 +121,11 @@ PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size) return NULL; } + /* Prevent buffer overflow when setting alloc to size+1. */ + if (size == PY_SSIZE_T_MAX) { + return PyErr_NoMemory(); + } + new = PyObject_New(PyByteArrayObject, &PyByteArray_Type); if (new == NULL) return NULL; diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index bfb4ff8fd3c..24228eae962 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -83,6 +83,12 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) return (PyObject *)op; } + if (size > PY_SSIZE_T_MAX - sizeof(PyBytesObject)) { + PyErr_SetString(PyExc_OverflowError, + "byte string is too large"); + return NULL; + } + /* Inline PyObject_NewVar */ op = (PyBytesObject *)PyObject_MALLOC(sizeof(PyBytesObject) + size); if (op == NULL) @@ -111,7 +117,7 @@ PyBytes_FromString(const char *str) assert(str != NULL); size = strlen(str); - if (size > PY_SSIZE_T_MAX) { + if (size > PY_SSIZE_T_MAX - sizeof(PyBytesObject)) { PyErr_SetString(PyExc_OverflowError, "byte string is too long"); return NULL; diff --git a/Objects/longobject.c b/Objects/longobject.c index 3aa518b9175..8f7ad4c93a1 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -139,6 +139,8 @@ _PyLong_New(Py_ssize_t size) PyErr_NoMemory(); return NULL; } + /* XXX(nnorwitz): This can overflow -- + PyObject_NEW_VAR / _PyObject_VAR_SIZE need to detect overflow */ return (PyLongObject*)PyObject_INIT_VAR(result, &PyLong_Type, size); } diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 7ff957e6660..963d90e1a9e 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -60,11 +60,12 @@ PyTuple_New(register Py_ssize_t size) Py_ssize_t nbytes = size * sizeof(PyObject *); /* Check for overflow */ if (nbytes / sizeof(PyObject *) != (size_t)size || - (nbytes += sizeof(PyTupleObject) - sizeof(PyObject *)) - <= 0) + (nbytes > PY_SSIZE_T_MAX - sizeof(PyTupleObject) - sizeof(PyObject *))) { return PyErr_NoMemory(); } + nbytes += sizeof(PyTupleObject) - sizeof(PyObject *); + op = PyObject_GC_NewVar(PyTupleObject, &PyTuple_Type, size); if (op == NULL) return NULL; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index c85a063db58..847b61df918 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -311,6 +311,11 @@ PyUnicodeObject *_PyUnicode_New(Py_ssize_t length) return unicode_empty; } + /* Ensure we won't overflow the size. */ + if (length > ((PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) - 1)) { + return (PyUnicodeObject *)PyErr_NoMemory(); + } + /* Unicode freelist & memory allocation */ if (free_list) { unicode = free_list; @@ -1860,6 +1865,9 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s, if (size == 0) return PyBytes_FromStringAndSize(NULL, 0); + if (cbAllocated / 5 != size) + return PyErr_NoMemory(); + v = PyByteArray_FromStringAndSize(NULL, cbAllocated); if (v == NULL) return NULL; @@ -2452,8 +2460,9 @@ PyUnicode_EncodeUTF32(const Py_UNICODE *s, { PyObject *v, *result; unsigned char *p; + Py_ssize_t nsize, bytesize; #ifndef Py_UNICODE_WIDE - int i, pairs; + Py_ssize_t i, pairs; #else const int pairs = 0; #endif @@ -2481,8 +2490,11 @@ PyUnicode_EncodeUTF32(const Py_UNICODE *s, 0xDC00 <= s[i+1] && s[i+1] <= 0xDFFF) pairs++; #endif - v = PyByteArray_FromStringAndSize(NULL, - 4 * (size - pairs + (byteorder == 0))); + nsize = (size - pairs + (byteorder == 0)); + bytesize = nsize * 4; + if (bytesize / 4 != nsize) + return PyErr_NoMemory(); + v = PyByteArray_FromStringAndSize(NULL, bytesize); if (v == NULL) return NULL; @@ -2726,8 +2738,9 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s, { PyObject *v, *result; unsigned char *p; + Py_ssize_t nsize, bytesize; #ifdef Py_UNICODE_WIDE - int i, pairs; + Py_ssize_t i, pairs; #else const int pairs = 0; #endif @@ -2750,8 +2763,15 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s, if (s[i] >= 0x10000) pairs++; #endif - v = PyByteArray_FromStringAndSize(NULL, - 2 * (size + pairs + (byteorder == 0))); + /* 2 * (size + pairs + (byteorder == 0)) */ + if (size > PY_SSIZE_T_MAX || + size > PY_SSIZE_T_MAX - pairs - (byteorder == 0)) + return PyErr_NoMemory(); + nsize = size + pairs + (byteorder == 0); + bytesize = nsize * 2; + if (bytesize / 2 != nsize) + return PyErr_NoMemory(); + v = PyByteArray_FromStringAndSize(NULL, bytesize); if (v == NULL) return NULL; @@ -3082,6 +3102,12 @@ PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s, PyObject *repr, *result; char *p; +#ifdef Py_UNICODE_WIDE + const Py_ssize_t expandsize = 10; +#else + const Py_ssize_t expandsize = 6; +#endif + /* XXX(nnorwitz): rather than over-allocating, it would be better to choose a different scheme. Perhaps scan the first N-chars of the string and allocate based on that size. @@ -3100,12 +3126,12 @@ PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s, escape. */ + if (size > (PY_SSIZE_T_MAX - 2 - 1) / expandsize) + return PyErr_NoMemory(); + repr = PyByteArray_FromStringAndSize(NULL, -#ifdef Py_UNICODE_WIDE - + 10*size -#else - + 6*size -#endif + 2 + + expandsize*size + 1); if (repr == NULL) return NULL; @@ -3353,10 +3379,15 @@ PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s, char *q; #ifdef Py_UNICODE_WIDE - repr = PyByteArray_FromStringAndSize(NULL, 10 * size); + const Py_ssize_t expandsize = 10; #else - repr = PyByteArray_FromStringAndSize(NULL, 6 * size); + const Py_ssize_t expandsize = 6; #endif + + if (size > PY_SSIZE_T_MAX / expandsize) + return PyErr_NoMemory(); + + repr = PyByteArray_FromStringAndSize(NULL, expandsize * size); if (repr == NULL) return NULL; if (size == 0) @@ -5747,6 +5778,11 @@ PyUnicodeObject *pad(PyUnicodeObject *self, return self; } + if (left > PY_SSIZE_T_MAX - self->length || + right > PY_SSIZE_T_MAX - (left + self->length)) { + PyErr_SetString(PyExc_OverflowError, "padded string is too long"); + return NULL; + } u = _PyUnicode_New(left + self->length + right); if (u) { if (left)