From 0e6b7b5cd2f1bae9bdceac4d2bedfd6674250ac1 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 16 Feb 2013 17:43:45 +0200 Subject: [PATCH] Issue #8745: Small speed up zipimport on Windows. Patch by Catalin Iacob. --- Lib/test/test_zipimport.py | 2 ++ Misc/NEWS | 2 ++ Modules/zipimport.c | 27 ++++++++++++++++++--------- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_zipimport.py b/Lib/test/test_zipimport.py index 9c3f6d43c6c..a60d3e05142 100644 --- a/Lib/test/test_zipimport.py +++ b/Lib/test/test_zipimport.py @@ -196,6 +196,7 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase): for name, (mtime, data) in files.items(): zinfo = ZipInfo(name, time.localtime(mtime)) zinfo.compress_type = self.compression + zinfo.comment = b"spam" z.writestr(zinfo, data) z.close() @@ -245,6 +246,7 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase): for name, (mtime, data) in files.items(): zinfo = ZipInfo(name, time.localtime(mtime)) zinfo.compress_type = self.compression + zinfo.comment = b"eggs" z.writestr(zinfo, data) z.close() diff --git a/Misc/NEWS b/Misc/NEWS index dc869dfd6c9..2cc2aa439ba 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,8 @@ What's New in Python 3.4.0 Alpha 1? Core and Builtins ----------------- +- Issue #8745: Small speed up zipimport on Windows. Patch by Catalin Iacob. + - Issue #5308: Raise ValueError when marshalling too large object (a sequence with size >= 2**31), instead of producing illegal marshal data. diff --git a/Modules/zipimport.c b/Modules/zipimport.c index 2feb2a827c8..56e5c4c7a99 100644 --- a/Modules/zipimport.c +++ b/Modules/zipimport.c @@ -862,6 +862,7 @@ read_directory(PyObject *archive) long l, count; Py_ssize_t i; char name[MAXPATHLEN + 5]; + char dummy[8]; /* Buffer to read unused header values into */ PyObject *nameobj = NULL; char *p, endof_central_dir[22]; Py_ssize_t arc_offset; /* Absolute offset to start of the zip-archive. */ @@ -905,17 +906,23 @@ read_directory(PyObject *archive) /* Start of Central Directory */ count = 0; + if (fseek(fp, header_offset, 0) == -1) + goto file_error; for (;;) { PyObject *t; int err; - if (fseek(fp, header_offset, 0) == -1) /* Start of file header */ - goto fseek_error; + /* Start of file header */ l = PyMarshal_ReadLongFromFile(fp); if (l != 0x02014B50) break; /* Bad: Central Dir File Header */ - if (fseek(fp, header_offset + 8, 0) == -1) - goto fseek_error; + + /* On Windows, calling fseek to skip over the fields we don't use is + slower than reading the data into a dummy buffer because fseek flushes + stdio's internal buffers. See issue #8745. */ + if (fread(dummy, 1, 4, fp) != 4) /* Skip unused fields, avoid fseek */ + goto file_error; + flags = (unsigned short)PyMarshal_ReadShortFromFile(fp); compress = PyMarshal_ReadShortFromFile(fp); time = PyMarshal_ReadShortFromFile(fp); @@ -924,11 +931,11 @@ read_directory(PyObject *archive) data_size = PyMarshal_ReadLongFromFile(fp); file_size = PyMarshal_ReadLongFromFile(fp); name_size = PyMarshal_ReadShortFromFile(fp); - header_size = 46 + name_size + + header_size = name_size + PyMarshal_ReadShortFromFile(fp) + PyMarshal_ReadShortFromFile(fp); - if (fseek(fp, header_offset + 42, 0) == -1) - goto fseek_error; + if (fread(dummy, 1, 8, fp) != 8) /* Skip unused fields, avoid fseek */ + goto file_error; file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset; if (name_size > MAXPATHLEN) name_size = MAXPATHLEN; @@ -941,7 +948,9 @@ read_directory(PyObject *archive) p++; } *p = 0; /* Add terminating null byte */ - header_offset += header_size; + for (; i < header_size; i++) /* Skip the rest of the header */ + if(getc(fp) == EOF) /* Avoid fseek */ + goto file_error; bootstrap = 0; if (flags & 0x0800) @@ -988,7 +997,7 @@ read_directory(PyObject *archive) PySys_FormatStderr("# zipimport: found %ld names in %R\n", count, archive); return files; -fseek_error: +file_error: fclose(fp); Py_XDECREF(files); Py_XDECREF(nameobj);