From 0e6b7b5cd2f1bae9bdceac4d2bedfd6674250ac1 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Sat, 16 Feb 2013 17:43:45 +0200
Subject: [PATCH] Issue #8745: Small speed up zipimport on Windows. Patch by
 Catalin Iacob.

---
 Lib/test/test_zipimport.py |  2 ++
 Misc/NEWS                  |  2 ++
 Modules/zipimport.c        | 27 ++++++++++++++++++---------
 3 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/Lib/test/test_zipimport.py b/Lib/test/test_zipimport.py
index 9c3f6d43c6c..a60d3e05142 100644
--- a/Lib/test/test_zipimport.py
+++ b/Lib/test/test_zipimport.py
@@ -196,6 +196,7 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase):
             for name, (mtime, data) in files.items():
                 zinfo = ZipInfo(name, time.localtime(mtime))
                 zinfo.compress_type = self.compression
+                zinfo.comment = b"spam"
                 z.writestr(zinfo, data)
             z.close()
 
@@ -245,6 +246,7 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase):
             for name, (mtime, data) in files.items():
                 zinfo = ZipInfo(name, time.localtime(mtime))
                 zinfo.compress_type = self.compression
+                zinfo.comment = b"eggs"
                 z.writestr(zinfo, data)
             z.close()
 
diff --git a/Misc/NEWS b/Misc/NEWS
index dc869dfd6c9..2cc2aa439ba 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,8 @@ What's New in Python 3.4.0 Alpha 1?
 Core and Builtins
 -----------------
 
+- Issue #8745: Small speed up zipimport on Windows. Patch by Catalin Iacob.
+
 - Issue #5308: Raise ValueError when marshalling too large object (a sequence
   with size >= 2**31), instead of producing illegal marshal data.
 
diff --git a/Modules/zipimport.c b/Modules/zipimport.c
index 2feb2a827c8..56e5c4c7a99 100644
--- a/Modules/zipimport.c
+++ b/Modules/zipimport.c
@@ -862,6 +862,7 @@ read_directory(PyObject *archive)
     long l, count;
     Py_ssize_t i;
     char name[MAXPATHLEN + 5];
+    char dummy[8]; /* Buffer to read unused header values into */
     PyObject *nameobj = NULL;
     char *p, endof_central_dir[22];
     Py_ssize_t arc_offset;  /* Absolute offset to start of the zip-archive. */
@@ -905,17 +906,23 @@ read_directory(PyObject *archive)
 
     /* Start of Central Directory */
     count = 0;
+    if (fseek(fp, header_offset, 0) == -1)
+        goto file_error;
     for (;;) {
         PyObject *t;
         int err;
 
-        if (fseek(fp, header_offset, 0) == -1)  /* Start of file header */
-            goto fseek_error;
+        /* Start of file header */
         l = PyMarshal_ReadLongFromFile(fp);
         if (l != 0x02014B50)
             break;              /* Bad: Central Dir File Header */
-        if (fseek(fp, header_offset + 8, 0) == -1)
-            goto fseek_error;
+
+        /* On Windows, calling fseek to skip over the fields we don't use is
+        slower than reading the data into a dummy buffer because fseek flushes
+        stdio's internal buffers. See issue #8745. */
+        if (fread(dummy, 1, 4, fp) != 4) /* Skip unused fields, avoid fseek */
+            goto file_error;
+
         flags = (unsigned short)PyMarshal_ReadShortFromFile(fp);
         compress = PyMarshal_ReadShortFromFile(fp);
         time = PyMarshal_ReadShortFromFile(fp);
@@ -924,11 +931,11 @@ read_directory(PyObject *archive)
         data_size = PyMarshal_ReadLongFromFile(fp);
         file_size = PyMarshal_ReadLongFromFile(fp);
         name_size = PyMarshal_ReadShortFromFile(fp);
-        header_size = 46 + name_size +
+        header_size = name_size +
            PyMarshal_ReadShortFromFile(fp) +
            PyMarshal_ReadShortFromFile(fp);
-        if (fseek(fp, header_offset + 42, 0) == -1)
-            goto fseek_error;
+        if (fread(dummy, 1, 8, fp) != 8) /* Skip unused fields, avoid fseek */
+            goto file_error;
         file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
         if (name_size > MAXPATHLEN)
             name_size = MAXPATHLEN;
@@ -941,7 +948,9 @@ read_directory(PyObject *archive)
             p++;
         }
         *p = 0;         /* Add terminating null byte */
-        header_offset += header_size;
+        for (; i < header_size; i++) /* Skip the rest of the header */
+            if(getc(fp) == EOF) /* Avoid fseek */
+                goto file_error;
 
         bootstrap = 0;
         if (flags & 0x0800)
@@ -988,7 +997,7 @@ read_directory(PyObject *archive)
         PySys_FormatStderr("# zipimport: found %ld names in %R\n",
                            count, archive);
     return files;
-fseek_error:
+file_error:
     fclose(fp);
     Py_XDECREF(files);
     Py_XDECREF(nameobj);