From 9e719b6eba2ef9317c48a1c1bd5e636469c0db56 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 28 Feb 2011 23:48:16 +0000 Subject: [PATCH] Merged revisions 88460,88464,88466,88486,88511,88652 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/py3k MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ........ r88460 | antoine.pitrou | 2011-02-21 19:03:13 +0100 (lun., 21 févr. 2011) | 4 lines Issue #10276: Fix the results of zlib.crc32() and zlib.adler32() on buffers larger than 4GB. Patch by Nadeem Vawda. ........ r88464 | antoine.pitrou | 2011-02-21 20:05:08 +0100 (lun., 21 févr. 2011) | 3 lines Fix issues on 32-bit systems introduced by r88460 ........ r88466 | antoine.pitrou | 2011-02-21 20:28:40 +0100 (lun., 21 févr. 2011) | 3 lines Fix compile error under MSVC introduced by r88460. ........ r88486 | antoine.pitrou | 2011-02-22 00:41:12 +0100 (mar., 22 févr. 2011) | 5 lines Issue #4681: Allow mmap() to work on file sizes and offsets larger than 4GB, even on 32-bit builds. Initial patch by Ross Lagerwall, adapted for 32-bit Windows. ........ r88511 | antoine.pitrou | 2011-02-22 22:42:56 +0100 (mar., 22 févr. 2011) | 4 lines Issue #11277: finally fix Snow Leopard crash following r88460. (probably an OS-related issue with mmap) ........ r88652 | antoine.pitrou | 2011-02-26 16:58:05 +0100 (sam., 26 févr. 2011) | 4 lines Issue #9931: Fix hangs in GUI tests under Windows in certain conditions. Patch by Hirokazu Yamamoto. ........ --- Lib/test/support.py | 34 ++++++++++++++ Lib/test/test_mmap.py | 61 ++++++++++++++++++++++-- Lib/test/test_zlib.py | 31 +++++++++++- Misc/NEWS | 7 +++ Modules/mmapmodule.c | 107 +++++++++++++++++++++++------------------- Modules/zlibmodule.c | 24 +++++++++- 6 files changed, 210 insertions(+), 54 deletions(-) diff --git a/Lib/test/support.py b/Lib/test/support.py index 9a4e1b1f11e..9fb3ee00c10 100644 --- a/Lib/test/support.py +++ b/Lib/test/support.py @@ -198,6 +198,36 @@ def forget(modname): # is exited) but there is a .pyo file. unlink(os.path.join(dirname, modname + '.pyo')) +# On some platforms, should not run gui test even if it is allowed +# in `use_resources'. +if sys.platform.startswith('win'): + import ctypes + import ctypes.wintypes + def _is_gui_available(): + UOI_FLAGS = 1 + WSF_VISIBLE = 0x0001 + class USEROBJECTFLAGS(ctypes.Structure): + _fields_ = [("fInherit", ctypes.wintypes.BOOL), + ("fReserved", ctypes.wintypes.BOOL), + ("dwFlags", ctypes.wintypes.DWORD)] + dll = ctypes.windll.user32 + h = dll.GetProcessWindowStation() + if not h: + raise ctypes.WinError() + uof = USEROBJECTFLAGS() + needed = ctypes.wintypes.DWORD() + res = dll.GetUserObjectInformationW(h, + UOI_FLAGS, + ctypes.byref(uof), + ctypes.sizeof(uof), + ctypes.byref(needed)) + if not res: + raise ctypes.WinError() + return bool(uof.dwFlags & WSF_VISIBLE) +else: + def _is_gui_available(): + return True + def is_resource_enabled(resource): """Test whether a resource is enabled. Known resources are set by regrtest.py.""" @@ -208,6 +238,8 @@ def requires(resource, msg=None): If the caller's module is __main__ then automatically return True. The possibility of False being returned occurs when regrtest.py is executing.""" + if resource == 'gui' and not _is_gui_available(): + raise unittest.SkipTest("Cannot use the 'gui' resource") # see if the caller's module is __main__ - if so, treat as if # the resource was set if sys._getframe(1).f_globals.get("__name__") == "__main__": @@ -869,6 +901,8 @@ def _id(obj): return obj def requires_resource(resource): + if resource == 'gui' and not _is_gui_available(): + return unittest.skip("resource 'gui' is not available") if is_resource_enabled(resource): return _id else: diff --git a/Lib/test/test_mmap.py b/Lib/test/test_mmap.py index 62569b98792..d6addff0a38 100644 --- a/Lib/test/test_mmap.py +++ b/Lib/test/test_mmap.py @@ -1,6 +1,6 @@ -from test.support import TESTFN, run_unittest, import_module +from test.support import TESTFN, run_unittest, import_module, unlink, requires import unittest -import os, re, itertools, socket +import os, re, itertools, socket, sys # Skip test if we can't import mmap. mmap = import_module('mmap') @@ -636,8 +636,63 @@ class MmapTests(unittest.TestCase): finally: s.close() + +class LargeMmapTests(unittest.TestCase): + + def setUp(self): + unlink(TESTFN) + + def tearDown(self): + unlink(TESTFN) + + def _working_largefile(self): + # Only run if the current filesystem supports large files. + f = open(TESTFN, 'wb', buffering=0) + try: + f.seek(0x80000001) + f.write(b'x') + f.flush() + except (IOError, OverflowError): + raise unittest.SkipTest("filesystem does not have largefile support") + finally: + f.close() + unlink(TESTFN) + + def test_large_offset(self): + if sys.platform[:3] == 'win' or sys.platform == 'darwin': + requires('largefile', + 'test requires %s bytes and a long time to run' % str(0x180000000)) + self._working_largefile() + with open(TESTFN, 'wb') as f: + f.seek(0x14FFFFFFF) + f.write(b" ") + + with open(TESTFN, 'rb') as f: + m = mmap.mmap(f.fileno(), 0, offset=0x140000000, access=mmap.ACCESS_READ) + try: + self.assertEqual(m[0xFFFFFFF], 32) + finally: + m.close() + + def test_large_filesize(self): + if sys.platform[:3] == 'win' or sys.platform == 'darwin': + requires('largefile', + 'test requires %s bytes and a long time to run' % str(0x180000000)) + self._working_largefile() + with open(TESTFN, 'wb') as f: + f.seek(0x17FFFFFFF) + f.write(b" ") + + with open(TESTFN, 'rb') as f: + m = mmap.mmap(f.fileno(), 0x10000, access=mmap.ACCESS_READ) + try: + self.assertEqual(m.size(), 0x180000000) + finally: + m.close() + + def test_main(): - run_unittest(MmapTests) + run_unittest(MmapTests, LargeMmapTests) if __name__ == '__main__': test_main() diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index 4b16efb2103..b27049cb862 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -2,10 +2,16 @@ import unittest from test import support import binascii import random -from test.support import precisionbigmemtest, _1G +import sys +from test.support import precisionbigmemtest, _1G, _4G zlib = support.import_module('zlib') +try: + import mmap +except ImportError: + mmap = None + class ChecksumTestCase(unittest.TestCase): # checksum test cases @@ -57,6 +63,28 @@ class ChecksumTestCase(unittest.TestCase): self.assertEqual(binascii.crc32(b'spam'), zlib.crc32(b'spam')) +# Issue #10276 - check that inputs >=4GB are handled correctly. +class ChecksumBigBufferTestCase(unittest.TestCase): + + def setUp(self): + with open(support.TESTFN, "wb+") as f: + f.seek(_4G) + f.write(b"asdf") + with open(support.TESTFN, "rb") as f: + self.mapping = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) + + def tearDown(self): + self.mapping.close() + support.unlink(support.TESTFN) + + @unittest.skipUnless(mmap, "mmap() is not available.") + @unittest.skipUnless(sys.maxsize > _4G, "Can't run on a 32-bit system.") + @unittest.skipUnless(support.is_resource_enabled("largefile"), + "May use lots of disk space.") + def test_big_buffer(self): + self.assertEqual(zlib.crc32(self.mapping), 3058686908) + self.assertEqual(zlib.adler32(self.mapping), 82837919) + class ExceptionTestCase(unittest.TestCase): # make sure we generate some expected errors @@ -567,6 +595,7 @@ LAERTES def test_main(): support.run_unittest( ChecksumTestCase, + ChecksumBigBufferTestCase, ExceptionTestCase, CompressTestCase, CompressObjectTestCase diff --git a/Misc/NEWS b/Misc/NEWS index 2f05aebf99a..d241c2e7e74 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -37,6 +37,13 @@ Core and Builtins Library ------- +- Issue #10276: Fix the results of zlib.crc32() and zlib.adler32() on buffers + larger than 4GB. Patch by Nadeem Vawda. + +- Issue #4681: Allow mmap() to work on file sizes and offsets larger than + 4GB, even on 32-bit builds. Initial patch by Ross Lagerwall, adapted for + 32-bit Windows. + - email.header.Header was incorrectly encoding folding white space when rfc2047-encoding header values with embedded newlines, leaving them without folding whitespace. It now uses the continuation_ws, as it diff --git a/Modules/mmapmodule.c b/Modules/mmapmodule.c index e47e41cea0b..c8c8cb221a7 100644 --- a/Modules/mmapmodule.c +++ b/Modules/mmapmodule.c @@ -90,7 +90,11 @@ typedef struct { char * data; size_t size; size_t pos; /* relative to offset */ - size_t offset; +#ifdef MS_WINDOWS + PY_LONG_LONG offset; +#else + off_t offset; +#endif int exports; #ifdef MS_WINDOWS @@ -435,7 +439,11 @@ mmap_size_method(mmap_object *self, PyErr_SetFromErrno(mmap_module_error); return NULL; } - return PyLong_FromSsize_t(buf.st_size); +#ifdef HAVE_LARGEFILE_SUPPORT + return PyLong_FromLongLong(buf.st_size); +#else + return PyLong_FromLong(buf.st_size); +#endif } #endif /* UNIX */ } @@ -469,17 +477,10 @@ mmap_resize_method(mmap_object *self, CloseHandle(self->map_handle); self->map_handle = NULL; /* Move to the desired EOF position */ -#if SIZEOF_SIZE_T > 4 newSizeHigh = (DWORD)((self->offset + new_size) >> 32); newSizeLow = (DWORD)((self->offset + new_size) & 0xFFFFFFFF); off_hi = (DWORD)(self->offset >> 32); off_lo = (DWORD)(self->offset & 0xFFFFFFFF); -#else - newSizeHigh = 0; - newSizeLow = (DWORD)(self->offset + new_size); - off_hi = 0; - off_lo = (DWORD)self->offset; -#endif SetFilePointer(self->file_handle, newSizeLow, &newSizeHigh, FILE_BEGIN); /* Change the size of the file */ @@ -1020,6 +1021,12 @@ _GetMapSize(PyObject *o, const char* param) } #ifdef UNIX +#ifdef HAVE_LARGEFILE_SUPPORT +#define _Py_PARSE_OFF_T "L" +#else +#define _Py_PARSE_OFF_T "l" +#endif + static PyObject * new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) { @@ -1027,8 +1034,9 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) struct stat st; #endif mmap_object *m_obj; - PyObject *map_size_obj = NULL, *offset_obj = NULL; - Py_ssize_t map_size, offset; + PyObject *map_size_obj = NULL; + Py_ssize_t map_size; + off_t offset = 0; int fd, flags = MAP_SHARED, prot = PROT_WRITE | PROT_READ; int devzero = -1; int access = (int)ACCESS_DEFAULT; @@ -1036,16 +1044,18 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) "flags", "prot", "access", "offset", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iO|iiiO", keywords, + if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iO|iii" _Py_PARSE_OFF_T, keywords, &fd, &map_size_obj, &flags, &prot, - &access, &offset_obj)) + &access, &offset)) return NULL; map_size = _GetMapSize(map_size_obj, "size"); if (map_size < 0) return NULL; - offset = _GetMapSize(offset_obj, "offset"); - if (offset < 0) + if (offset < 0) { + PyErr_SetString(PyExc_OverflowError, + "memory mapped offset must be positive"); return NULL; + } if ((access != (int)ACCESS_DEFAULT) && ((flags != MAP_SHARED) || (prot != (PROT_WRITE | PROT_READ)))) @@ -1090,8 +1100,14 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) "mmap offset is greater than file size"); return NULL; } - map_size = st.st_size - offset; - } else if ((size_t)offset + (size_t)map_size > st.st_size) { + off_t calc_size = st.st_size - offset; + map_size = calc_size; + if (map_size != calc_size) { + PyErr_SetString(PyExc_ValueError, + "mmap length is too large"); + return NULL; + } + } else if (offset + (size_t)map_size > st.st_size) { PyErr_SetString(PyExc_ValueError, "mmap length is greater than file size"); return NULL; @@ -1152,12 +1168,19 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) #endif /* UNIX */ #ifdef MS_WINDOWS + +/* A note on sizes and offsets: while the actual map size must hold in a + Py_ssize_t, both the total file size and the start offset can be longer + than a Py_ssize_t, so we use PY_LONG_LONG which is always 64-bit. +*/ + static PyObject * new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) { mmap_object *m_obj; - PyObject *map_size_obj = NULL, *offset_obj = NULL; - Py_ssize_t map_size, offset; + PyObject *map_size_obj = NULL; + Py_ssize_t map_size; + PY_LONG_LONG offset = 0, size; DWORD off_hi; /* upper 32 bits of offset */ DWORD off_lo; /* lower 32 bits of offset */ DWORD size_hi; /* upper 32 bits of size */ @@ -1172,9 +1195,9 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) "tagname", "access", "offset", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iO|ziO", keywords, + if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iO|ziL", keywords, &fileno, &map_size_obj, - &tagname, &access, &offset_obj)) { + &tagname, &access, &offset)) { return NULL; } @@ -1199,9 +1222,11 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) map_size = _GetMapSize(map_size_obj, "size"); if (map_size < 0) return NULL; - offset = _GetMapSize(offset_obj, "offset"); - if (offset < 0) + if (offset < 0) { + PyErr_SetString(PyExc_OverflowError, + "memory mapped offset must be positive"); return NULL; + } /* assume -1 and 0 both mean invalid filedescriptor to 'anonymously' map memory. @@ -1265,28 +1290,26 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) return PyErr_SetFromWindowsErr(dwErr); } -#if SIZEOF_SIZE_T > 4 - m_obj->size = (((size_t)high)<<32) + low; -#else - if (high) - /* File is too large to map completely */ - m_obj->size = (size_t)-1; - else - m_obj->size = low; -#endif - if (offset >= m_obj->size) { + size = (((PY_LONG_LONG) high) << 32) + low; + if (offset >= size) { PyErr_SetString(PyExc_ValueError, "mmap offset is greater than file size"); Py_DECREF(m_obj); return NULL; } - m_obj->size -= offset; + if (offset - size > PY_SSIZE_T_MAX) + /* Map area too large to fit in memory */ + m_obj->size = (Py_ssize_t) -1; + else + m_obj->size = (Py_ssize_t) (size - offset); } else { m_obj->size = map_size; + size = offset + map_size; } } else { m_obj->size = map_size; + size = offset + map_size; } /* set the initial position */ @@ -1307,22 +1330,10 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) m_obj->tagname = NULL; m_obj->access = (access_mode)access; - /* DWORD is a 4-byte int. If we're on a box where size_t consumes - * more than 4 bytes, we need to break it apart. Else (size_t - * consumes 4 bytes), C doesn't define what happens if we shift - * right by 32, so we need different code. - */ -#if SIZEOF_SIZE_T > 4 - size_hi = (DWORD)((offset + m_obj->size) >> 32); - size_lo = (DWORD)((offset + m_obj->size) & 0xFFFFFFFF); + size_hi = (DWORD)(size >> 32); + size_lo = (DWORD)(size & 0xFFFFFFFF); off_hi = (DWORD)(offset >> 32); off_lo = (DWORD)(offset & 0xFFFFFFFF); -#else - size_hi = 0; - size_lo = (DWORD)(offset + m_obj->size); - off_hi = 0; - off_lo = (DWORD)offset; -#endif /* For files, it would be sufficient to pass 0 as size. For anonymous maps, we have to pass the size explicitly. */ m_obj->map_handle = CreateFileMapping(m_obj->file_handle, diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index 54ab9a14995..c78cf42d1db 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -931,8 +931,18 @@ PyZlib_adler32(PyObject *self, PyObject *args) /* Releasing the GIL for very small buffers is inefficient and may lower performance */ if (pbuf.len > 1024*5) { + unsigned char *buf = pbuf.buf; + Py_ssize_t len = pbuf.len; + Py_BEGIN_ALLOW_THREADS - adler32val = adler32(adler32val, pbuf.buf, pbuf.len); + /* Avoid truncation of length for very large buffers. adler32() takes + length as an unsigned int, which may be narrower than Py_ssize_t. */ + while (len > (size_t) UINT_MAX) { + adler32val = adler32(adler32val, buf, UINT_MAX); + buf += (size_t) UINT_MAX; + len -= (size_t) UINT_MAX; + } + adler32val = adler32(adler32val, buf, len); Py_END_ALLOW_THREADS } else { adler32val = adler32(adler32val, pbuf.buf, pbuf.len); @@ -959,8 +969,18 @@ PyZlib_crc32(PyObject *self, PyObject *args) /* Releasing the GIL for very small buffers is inefficient and may lower performance */ if (pbuf.len > 1024*5) { + unsigned char *buf = pbuf.buf; + Py_ssize_t len = pbuf.len; + Py_BEGIN_ALLOW_THREADS - signed_val = crc32(crc32val, pbuf.buf, pbuf.len); + /* Avoid truncation of length for very large buffers. crc32() takes + length as an unsigned int, which may be narrower than Py_ssize_t. */ + while (len > (size_t) UINT_MAX) { + crc32val = crc32(crc32val, buf, UINT_MAX); + buf += (size_t) UINT_MAX; + len -= (size_t) UINT_MAX; + } + signed_val = crc32(crc32val, buf, len); Py_END_ALLOW_THREADS } else { signed_val = crc32(crc32val, pbuf.buf, pbuf.len);