From 2bcbc141173e5cd2ed36693a71bed8d5a1a54dd4 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Tue, 7 Jan 2014 18:30:07 -0800 Subject: [PATCH] Fixes Issue #19081: When a zipimport .zip file in sys.path being imported from is modified during the lifetime of the Python process after zipimport has already cached the zip's table of contents we detect this and recover rather than read bad data from the .zip (causing odd import errors). --- Lib/test/test_zipimport.py | 104 +++++++++++--- Misc/NEWS | 5 + Modules/zipimport.c | 287 ++++++++++++++++++++++++++++++------- 3 files changed, 329 insertions(+), 67 deletions(-) diff --git a/Lib/test/test_zipimport.py b/Lib/test/test_zipimport.py index 37603b9bcde..0459596b2a6 100644 --- a/Lib/test/test_zipimport.py +++ b/Lib/test/test_zipimport.py @@ -46,6 +46,27 @@ pyc_file = imp.cache_from_source(TESTMOD + '.py') pyc_ext = ('.pyc' if __debug__ else '.pyo') +def _write_zip_package(zipname, files, + data_to_prepend=b"", compression=ZIP_STORED): + z = ZipFile(zipname, "w") + try: + for name, (mtime, data) in files.items(): + zinfo = ZipInfo(name, time.localtime(mtime)) + zinfo.compress_type = compression + z.writestr(zinfo, data) + finally: + z.close() + + if data_to_prepend: + # Prepend data to the start of the zipfile + with open(zipname, "rb") as f: + zip_data = f.read() + + with open(zipname, "wb") as f: + f.write(data_to_prepend) + f.write(zip_data) + + class UncompressedZipImportTestCase(ImportHooksBaseTestCase): compression = ZIP_STORED @@ -58,23 +79,9 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase): ImportHooksBaseTestCase.setUp(self) def doTest(self, expected_ext, files, *modules, **kw): - z = ZipFile(TEMP_ZIP, "w") + _write_zip_package(TEMP_ZIP, files, data_to_prepend=kw.get("stuff"), + compression=self.compression) try: - for name, (mtime, data) in files.items(): - zinfo = ZipInfo(name, time.localtime(mtime)) - zinfo.compress_type = self.compression - z.writestr(zinfo, data) - z.close() - - stuff = kw.get("stuff", None) - if stuff is not None: - # Prepend 'stuff' to the start of the zipfile - with open(TEMP_ZIP, "rb") as f: - data = f.read() - with open(TEMP_ZIP, "wb") as f: - f.write(stuff) - f.write(data) - sys.path.insert(0, TEMP_ZIP) mod = __import__(".".join(modules), globals(), locals(), @@ -89,7 +96,8 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase): self.assertEqual(file, os.path.join(TEMP_ZIP, *modules) + expected_ext) finally: - z.close() + while TEMP_ZIP in sys.path: + sys.path.remove(TEMP_ZIP) os.remove(TEMP_ZIP) def testAFakeZlib(self): @@ -395,10 +403,67 @@ class CompressedZipImportTestCase(UncompressedZipImportTestCase): compression = ZIP_DEFLATED +class ZipFileModifiedAfterImportTestCase(ImportHooksBaseTestCase): + def setUp(self): + zipimport._zip_directory_cache.clear() + zipimport._zip_stat_cache.clear() + ImportHooksBaseTestCase.setUp(self) + + def tearDown(self): + ImportHooksBaseTestCase.tearDown(self) + if os.path.exists(TEMP_ZIP): + os.remove(TEMP_ZIP) + + def testZipFileChangesAfterFirstImport(self): + """Alter the zip file after caching its index and try an import.""" + packdir = TESTPACK + os.sep + files = {packdir + "__init__" + pyc_ext: (NOW, test_pyc), + packdir + TESTMOD + ".py": (NOW, "test_value = 38\n"), + "ziptest_a.py": (NOW, "test_value = 23\n"), + "ziptest_b.py": (NOW, "test_value = 42\n"), + "ziptest_c.py": (NOW, "test_value = 1337\n")} + zipfile_path = TEMP_ZIP + _write_zip_package(zipfile_path, files) + self.assertTrue(os.path.exists(zipfile_path)) + sys.path.insert(0, zipfile_path) + + # Import something out of the zipfile and confirm it is correct. + testmod = __import__(TESTPACK + "." + TESTMOD, + globals(), locals(), ["__dummy__"]) + self.assertEqual(testmod.test_value, 38) + # Import something else out of the zipfile and confirm it is correct. + ziptest_b = __import__("ziptest_b", globals(), locals(), ["test_value"]) + self.assertEqual(ziptest_b.test_value, 42) + + # Truncate and fill the zip file with non-zip garbage. + with open(zipfile_path, "rb") as orig_zip_file: + orig_zip_file_contents = orig_zip_file.read() + with open(zipfile_path, "wb") as byebye_valid_zip_file: + byebye_valid_zip_file.write(b"Tear down this wall!\n"*1987) + # Now that the zipfile has been replaced, import something else from it + # which should fail as the file contents are now garbage. + with self.assertRaises(ImportError): + ziptest_a = __import__("ziptest_a", globals(), locals(), + ["test_value"]) + self.assertEqual(ziptest_a.test_value, 23) + + # Now lets make it a valid zipfile that has some garbage at the start. + # This alters all of the offsets within the file + with open(zipfile_path, "wb") as new_zip_file: + new_zip_file.write(b"X"*1991) # The year Python was created. + new_zip_file.write(orig_zip_file_contents) + + # Now that the zip file has been "restored" to a valid but different + # zipfile the zipimporter should *successfully* re-read the new zip + # file's end of file central index and be able to import from it again. + ziptest_c = __import__("ziptest_c", globals(), locals(), ["test_value"]) + self.assertEqual(ziptest_c.test_value, 1337) + + class BadFileZipImportTestCase(unittest.TestCase): def assertZipFailure(self, filename): - self.assertRaises(zipimport.ZipImportError, - zipimport.zipimporter, filename) + with self.assertRaises(zipimport.ZipImportError): + zipimport.zipimporter(filename) def testNoFile(self): self.assertZipFailure('AdfjdkFJKDFJjdklfjs') @@ -472,6 +537,7 @@ def test_main(): UncompressedZipImportTestCase, CompressedZipImportTestCase, BadFileZipImportTestCase, + ZipFileModifiedAfterImportTestCase, ) finally: support.unlink(TESTMOD) diff --git a/Misc/NEWS b/Misc/NEWS index 0a2139b5a55..c17e493f76f 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,11 @@ What's New in Python 3.3.4 release candidate 1? Core and Builtins ----------------- +- Issue #19081: When a zipimport .zip file in sys.path being imported from + is modified during the lifetime of the Python process after zipimport has + already cached the zip's table of contents we detect this and recover + rather than read bad data from the .zip (causing odd import errors). + - Issue #17432: Drop UCS2 from names of Unicode functions in python3.def. - Issue #19969: PyBytes_FromFormatV() now raises an OverflowError if "%c" diff --git a/Modules/zipimport.c b/Modules/zipimport.c index 2feb2a827c8..bfac46233db 100644 --- a/Modules/zipimport.c +++ b/Modules/zipimport.c @@ -45,10 +45,16 @@ struct _zipimporter { static PyObject *ZipImportError; /* read_directory() cache */ static PyObject *zip_directory_cache = NULL; +static PyObject *zip_stat_cache = NULL; +/* posix.fstat or nt.fstat function. Used due to posixmodule.c's + * superior fstat implementation over libc's on Windows. */ +static PyObject *fstat_function = NULL; /* posix.fstat() or nt.fstat() */ /* forward decls */ -static PyObject *read_directory(PyObject *archive); -static PyObject *get_data(PyObject *archive, PyObject *toc_entry); +static FILE *fopen_rb_and_stat(PyObject *path, PyObject **py_stat_p); +static FILE *safely_reopen_archive(ZipImporter *self); +static PyObject *read_directory(FILE *fp, PyObject *archive); +static PyObject *get_data(FILE *fp, PyObject *archive, PyObject *toc_entry); static PyObject *get_module_code(ZipImporter *self, PyObject *fullname, int *p_ispackage, PyObject **p_modpath); @@ -128,11 +134,39 @@ zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds) files = PyDict_GetItem(zip_directory_cache, filename); if (files == NULL) { - files = read_directory(filename); - if (files == NULL) + PyObject *zip_stat = NULL; + FILE *fp = fopen_rb_and_stat(filename, &zip_stat); + if (fp == NULL) { + if (!PyErr_Occurred()) + PyErr_Format(ZipImportError, "can't open Zip file: %R", + filename); + + Py_XDECREF(zip_stat); goto error; - if (PyDict_SetItem(zip_directory_cache, filename, files) != 0) + } + + if (Py_VerboseFlag) + PySys_FormatStderr("# zipimport: %U not cached, " + "reading TOC.\n", filename); + + files = read_directory(fp, filename); + fclose(fp); + if (files == NULL) { + Py_XDECREF(zip_stat); goto error; + } + if (PyDict_SetItem(zip_directory_cache, filename, files) != 0) { + Py_DECREF(files); + Py_XDECREF(zip_stat); + goto error; + } + if (zip_stat && PyDict_SetItem(zip_stat_cache, filename, + zip_stat) != 0) { + Py_DECREF(files); + Py_DECREF(zip_stat); + goto error; + } + Py_XDECREF(zip_stat); } else Py_INCREF(files); @@ -554,10 +588,11 @@ zipimporter_get_data(PyObject *obj, PyObject *args) { ZipImporter *self = (ZipImporter *)obj; PyObject *path, *key; + FILE *fp; #ifdef ALTSEP _Py_IDENTIFIER(replace); #endif - PyObject *toc_entry; + PyObject *toc_entry, *data; Py_ssize_t path_start, path_len, len; if (!PyArg_ParseTuple(args, "U:zipimporter.get_data", &path)) @@ -585,15 +620,23 @@ zipimporter_get_data(PyObject *obj, PyObject *args) key = PyUnicode_Substring(path, path_start, path_len); if (key == NULL) goto error; + + fp = safely_reopen_archive(self); + if (fp == NULL) + goto error; + toc_entry = PyDict_GetItem(self->files, key); if (toc_entry == NULL) { PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, key); Py_DECREF(key); + fclose(fp); goto error; } Py_DECREF(key); Py_DECREF(path); - return get_data(self->archive, toc_entry); + data = get_data(fp, self->archive, toc_entry); + fclose(fp); + return data; error: Py_DECREF(path); return NULL; @@ -618,6 +661,7 @@ zipimporter_get_source(PyObject *obj, PyObject *args) PyObject *toc_entry; PyObject *fullname, *subname, *path, *fullpath; enum zi_module_info mi; + FILE *fp; if (!PyArg_ParseTuple(args, "U:zipimporter.get_source", &fullname)) return NULL; @@ -647,11 +691,18 @@ zipimporter_get_source(PyObject *obj, PyObject *args) if (fullpath == NULL) return NULL; + fp = safely_reopen_archive(self); + if (fp == NULL) { + Py_DECREF(fullpath); + return NULL; + } + toc_entry = PyDict_GetItem(self->files, fullpath); Py_DECREF(fullpath); if (toc_entry != NULL) { PyObject *res, *bytes; - bytes = get_data(self->archive, toc_entry); + bytes = get_data(fp, self->archive, toc_entry); + fclose(fp); if (bytes == NULL) return NULL; res = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(bytes), @@ -659,10 +710,10 @@ zipimporter_get_source(PyObject *obj, PyObject *args) Py_DECREF(bytes); return res; } + fclose(fp); /* we have the module, but no source */ - Py_INCREF(Py_None); - return Py_None; + Py_RETURN_NONE; } PyDoc_STRVAR(doc_find_module, @@ -828,10 +879,135 @@ get_long(unsigned char *buf) { return x; } -/* - read_directory(archive) -> files dict (new reference) +/* Return 1 if objects a and b fail a Py_EQ test for an attr. */ +static int +compare_obj_attr_strings(PyObject *obj_a, PyObject *obj_b, char *attr_name) +{ + int problem = 0; + PyObject *attr_a = PyObject_GetAttrString(obj_a, attr_name); + PyObject *attr_b = PyObject_GetAttrString(obj_b, attr_name); + if (attr_a == NULL || attr_b == NULL) + problem = 1; + else + problem = (PyObject_RichCompareBool(attr_a, attr_b, Py_EQ) != 1); + Py_XDECREF(attr_a); + Py_XDECREF(attr_b); + return problem; +} - Given a path to a Zip archive, build a dict, mapping file names +/* + * Returns an open FILE * on success. + * Returns NULL on error with the Python error context set. + */ +static FILE * +safely_reopen_archive(ZipImporter *self) +{ + FILE *fp; + PyObject *stat_now = NULL; + + fp = fopen_rb_and_stat(self->archive, &stat_now); + if (!fp) { + PyErr_Format(ZipImportError, + "zipimport: can not open file %U", self->archive); + Py_XDECREF(stat_now); + return NULL; + } + + if (stat_now != NULL) { + int problem = 0; + PyObject *files; + PyObject *prev_stat = PyDict_GetItem(zip_stat_cache, self->archive); + /* Test stat_now vs the old cached stat on some key attributes. */ + if (prev_stat != NULL) { + problem = compare_obj_attr_strings(prev_stat, stat_now, + "st_ino"); + problem |= compare_obj_attr_strings(prev_stat, stat_now, + "st_size"); + problem |= compare_obj_attr_strings(prev_stat, stat_now, + "st_mtime"); + } else { + if (Py_VerboseFlag) + PySys_FormatStderr("# zipimport: no stat data for %U!\n", + self->archive); + problem = 1; + } + + if (problem) { + if (Py_VerboseFlag) + PySys_FormatStderr("# zipimport: %U modified since last" + " import, rereading TOC.\n", self->archive); + files = read_directory(fp, self->archive); + if (files == NULL) { + Py_DECREF(stat_now); + fclose(fp); + return NULL; + } + if (PyDict_SetItem(zip_directory_cache, self->archive, + files) != 0) { + Py_DECREF(files); + Py_DECREF(stat_now); + fclose(fp); + return NULL; + } + if (stat_now && PyDict_SetItem(zip_stat_cache, self->archive, + stat_now) != 0) { + Py_DECREF(files); + Py_DECREF(stat_now); + fclose(fp); + return NULL; + } + Py_XDECREF(self->files); /* free the old value. */ + self->files = files; + } else { + /* No problem, discard the new stat data. */ + Py_DECREF(stat_now); + } + } /* stat succeeded */ + + return fp; +} + +/* + fopen_rb_and_stat(path, &py_stat) -> FILE * + + Opens path in "rb" mode and populates the Python py_stat stat_result + with information about the opened file. *py_stat may not be changed + if there is no fstat_function or if fstat_function fails. + + Returns NULL and does nothing to *py_stat if the open failed. +*/ +static FILE * +fopen_rb_and_stat(PyObject *path, PyObject **py_stat_p) +{ + FILE *fp; + assert(py_stat_p != NULL); + assert(*py_stat_p == NULL); + + fp = _Py_fopen(path, "rb"); + if (fp == NULL) { + if (!PyErr_Occurred()) + PyErr_Format(ZipImportError, + "zipimport: can not open file %U", path); + return NULL; + } + + if (fstat_function) { + PyObject *stat_result = PyObject_CallFunction(fstat_function, + "i", fileno(fp)); + if (stat_result == NULL) { + PyErr_Clear(); /* We can function without it. */ + } else { + *py_stat_p = stat_result; + } + } + + return fp; +} + +/* + read_directory(fp, archive) -> files dict (new reference) + + Given an open Zip archive, build a dict, mapping file names (local to the archive, using SEP as a separator) to toc entries. A toc_entry is a tuple: @@ -851,10 +1027,9 @@ get_long(unsigned char *buf) { data_size and file_offset are 0. */ static PyObject * -read_directory(PyObject *archive) +read_directory(FILE *fp, PyObject *archive) { PyObject *files = NULL; - FILE *fp; unsigned short flags; short compress, time, date, name_size; long crc, data_size, file_size, header_size; @@ -869,27 +1044,18 @@ read_directory(PyObject *archive) const char *charset; int bootstrap; - fp = _Py_fopen(archive, "rb"); - if (fp == NULL) { - if (!PyErr_Occurred()) - PyErr_Format(ZipImportError, "can't open Zip file: %R", archive); - return NULL; - } - + assert(fp != NULL); if (fseek(fp, -22, SEEK_END) == -1) { - fclose(fp); PyErr_Format(ZipImportError, "can't read Zip file: %R", archive); return NULL; } header_position = ftell(fp); if (fread(endof_central_dir, 1, 22, fp) != 22) { - fclose(fp); PyErr_Format(ZipImportError, "can't read Zip file: %R", archive); return NULL; } if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) { /* Bad: End of Central Dir signature */ - fclose(fp); PyErr_Format(ZipImportError, "not a Zip file: %R", archive); return NULL; } @@ -983,19 +1149,16 @@ read_directory(PyObject *archive) goto error; count++; } - fclose(fp); if (Py_VerboseFlag) PySys_FormatStderr("# zipimport: found %ld names in %R\n", count, archive); return files; fseek_error: - fclose(fp); Py_XDECREF(files); Py_XDECREF(nameobj); PyErr_Format(ZipImportError, "can't read Zip file: %R", archive); return NULL; error: - fclose(fp); Py_XDECREF(files); Py_XDECREF(nameobj); return NULL; @@ -1034,14 +1197,13 @@ get_decompress_func(void) return decompress; } -/* Given a path to a Zip file and a toc_entry, return the (uncompressed) +/* Given a FILE* to a Zip file and a toc_entry, return the (uncompressed) data as a new reference. */ static PyObject * -get_data(PyObject *archive, PyObject *toc_entry) +get_data(FILE *fp, PyObject *archive, PyObject *toc_entry) { PyObject *raw_data, *data = NULL, *decompress; char *buf; - FILE *fp; int err; Py_ssize_t bytes_read = 0; long l; @@ -1055,17 +1217,8 @@ get_data(PyObject *archive, PyObject *toc_entry) return NULL; } - fp = _Py_fopen(archive, "rb"); - if (!fp) { - if (!PyErr_Occurred()) - PyErr_Format(PyExc_IOError, - "zipimport: can not open file %U", archive); - return NULL; - } - /* Check to make sure the local file header is correct */ if (fseek(fp, file_offset, 0) == -1) { - fclose(fp); PyErr_Format(ZipImportError, "can't read Zip file: %R", archive); return NULL; } @@ -1076,11 +1229,9 @@ get_data(PyObject *archive, PyObject *toc_entry) PyErr_Format(ZipImportError, "bad local file header in %U", archive); - fclose(fp); return NULL; } if (fseek(fp, file_offset + 26, 0) == -1) { - fclose(fp); PyErr_Format(ZipImportError, "can't read Zip file: %R", archive); return NULL; } @@ -1095,7 +1246,6 @@ get_data(PyObject *archive, PyObject *toc_entry) raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size); if (raw_data == NULL) { - fclose(fp); return NULL; } buf = PyBytes_AsString(raw_data); @@ -1104,11 +1254,9 @@ get_data(PyObject *archive, PyObject *toc_entry) if (err == 0) { bytes_read = fread(buf, 1, data_size, fp); } else { - fclose(fp); PyErr_Format(ZipImportError, "can't read Zip file: %R", archive); return NULL; } - fclose(fp); if (err || bytes_read != data_size) { PyErr_SetString(PyExc_IOError, "zipimport: can't read data"); @@ -1329,12 +1477,12 @@ get_mtime_of_source(ZipImporter *self, PyObject *path) /* Return the code object for the module named by 'fullname' from the Zip archive as a new reference. */ static PyObject * -get_code_from_data(ZipImporter *self, int ispackage, int isbytecode, +get_code_from_data(ZipImporter *self, FILE *fp, int ispackage, int isbytecode, time_t mtime, PyObject *toc_entry) { PyObject *data, *modpath, *code; - data = get_data(self->archive, toc_entry); + data = get_data(fp, self->archive, toc_entry); if (data == NULL) return NULL; @@ -1356,6 +1504,7 @@ get_module_code(ZipImporter *self, PyObject *fullname, PyObject *code = NULL, *toc_entry, *subname; PyObject *path, *fullpath = NULL; struct st_zip_searchorder *zso; + FILE *fp; subname = get_subname(fullname); if (subname == NULL) @@ -1366,6 +1515,12 @@ get_module_code(ZipImporter *self, PyObject *fullname, if (path == NULL) return NULL; + fp = safely_reopen_archive(self); + if (fp == NULL) { + Py_DECREF(path); + return NULL; + } + for (zso = zip_searchorder; *zso->suffix; zso++) { code = NULL; @@ -1376,6 +1531,7 @@ get_module_code(ZipImporter *self, PyObject *fullname, if (Py_VerboseFlag > 1) PySys_FormatStderr("# trying %U%c%U\n", self->archive, (int)SEP, fullpath); + toc_entry = PyDict_GetItem(self->files, fullpath); if (toc_entry != NULL) { time_t mtime = 0; @@ -1391,7 +1547,7 @@ get_module_code(ZipImporter *self, PyObject *fullname, Py_CLEAR(fullpath); if (p_ispackage != NULL) *p_ispackage = ispackage; - code = get_code_from_data(self, ispackage, + code = get_code_from_data(self, fp, ispackage, isbytecode, mtime, toc_entry); if (code == Py_None) { @@ -1411,6 +1567,7 @@ get_module_code(ZipImporter *self, PyObject *fullname, } PyErr_Format(ZipImportError, "can't find module %R", fullname); exit: + fclose(fp); Py_DECREF(path); Py_XDECREF(fullpath); return code; @@ -1428,6 +1585,8 @@ This module exports three objects:\n\ subclass of ImportError, so it can be caught as ImportError, too.\n\ - _zip_directory_cache: a dict, mapping archive paths to zip directory\n\ info dicts, as used in zipimporter._files.\n\ +- _zip_stat_cache: a dict, mapping archive paths to stat_result\n\ + info for the .zip the last time anything was imported from it.\n\ \n\ It is usually not needed to use the zipimport module explicitly; it is\n\ used by the builtin import mechanism for sys.path items that are paths\n\ @@ -1487,6 +1646,7 @@ PyInit_zipimport(void) (PyObject *)&ZipImporter_Type) < 0) return NULL; + Py_XDECREF(zip_directory_cache); /* Avoid embedded interpreter leaks. */ zip_directory_cache = PyDict_New(); if (zip_directory_cache == NULL) return NULL; @@ -1494,5 +1654,36 @@ PyInit_zipimport(void) if (PyModule_AddObject(mod, "_zip_directory_cache", zip_directory_cache) < 0) return NULL; + + Py_XDECREF(zip_stat_cache); /* Avoid embedded interpreter leaks. */ + zip_stat_cache = PyDict_New(); + if (zip_stat_cache == NULL) + return NULL; + Py_INCREF(zip_stat_cache); + if (PyModule_AddObject(mod, "_zip_stat_cache", zip_stat_cache) < 0) + return NULL; + + { + /* We cannot import "os" here as that is a .py/.pyc file that could + * live within a zipped up standard library. Import the posix or nt + * builtin that provides the fstat() function we want instead. */ + PyObject *os_like_module; + Py_CLEAR(fstat_function); /* Avoid embedded interpreter leaks. */ + os_like_module = PyImport_ImportModule("posix"); + if (os_like_module == NULL) { + PyErr_Clear(); + os_like_module = PyImport_ImportModule("nt"); + } + if (os_like_module != NULL) { + fstat_function = PyObject_GetAttrString(os_like_module, "fstat"); + Py_DECREF(os_like_module); + } + if (fstat_function == NULL) { + PyErr_Clear(); /* non-fatal, we'll go on without it. */ + if (Py_VerboseFlag) + PySys_WriteStderr("# zipimport unable to use os.fstat().\n"); + } + } + return mod; }