From e24f96a05973ddbb59d88c03570aef8545c5ef10 Mon Sep 17 00:00:00 2001
From: Senthil Kumaran <senthil@uthcode.com>
Date: Tue, 13 Mar 2012 19:29:33 -0700
Subject: [PATCH] Issue10050 - urlretrieve uses newer urlopen. reporthook of
 urlretrieve takes, block number, block read size, file_size

---
 Doc/howto/urllib2.rst          |  7 +++
 Doc/library/urllib.request.rst | 39 +++++++++--------
 Lib/test/test_urllib.py        | 34 +++++++--------
 Lib/urllib/request.py          | 80 ++++++++++++++++++++++++++++++----
 Misc/ACKS                      |  1 +
 5 files changed, 119 insertions(+), 42 deletions(-)

diff --git a/Doc/howto/urllib2.rst b/Doc/howto/urllib2.rst
index 76286bdc273..058cf967ecd 100644
--- a/Doc/howto/urllib2.rst
+++ b/Doc/howto/urllib2.rst
@@ -56,6 +56,13 @@ The simplest way to use urllib.request is as follows::
     response = urllib.request.urlopen('http://python.org/')
     html = response.read()
 
+If you wish to retrieve a resource via URL and store it in a temporary location,
+you can do so via the :func:`urlretrieve` function::
+
+    import urllib.request
+    local_filename, headers = urllib.request.urlretrieve('http://python.org/')
+    html = open(local_filename)
+
 Many uses of urllib will be that simple (note that instead of an 'http:' URL we
 could have used an URL starting with 'ftp:', 'file:', etc.).  However, it's the
 purpose of this tutorial to explain the more complicated cases, concentrating on
diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst
index 29a8413a323..d624f8a50b6 100644
--- a/Doc/library/urllib.request.rst
+++ b/Doc/library/urllib.request.rst
@@ -1124,16 +1124,14 @@ The following functions and classes are ported from the Python 2 module
 ``urllib`` (as opposed to ``urllib2``).  They might become deprecated at
 some point in the future.
 
-
 .. function:: urlretrieve(url, filename=None, reporthook=None, data=None)
 
-   Copy a network object denoted by a URL to a local file, if necessary. If the URL
-   points to a local file, or a valid cached copy of the object exists, the object
-   is not copied.  Return a tuple ``(filename, headers)`` where *filename* is the
+   Copy a network object denoted by a URL to a local file. If the URL
+   points to a local file, the object will not be copied unless filename is supplied.
+   Return a tuple ``(filename, headers)`` where *filename* is the
    local file name under which the object can be found, and *headers* is whatever
    the :meth:`info` method of the object returned by :func:`urlopen` returned (for
-   a remote object, possibly cached). Exceptions are the same as for
-   :func:`urlopen`.
+   a remote object). Exceptions are the same as for :func:`urlopen`.
 
    The second argument, if present, specifies the file location to copy to (if
    absent, the location will be a tempfile with a generated name). The third
@@ -1144,11 +1142,18 @@ some point in the future.
    third argument may be ``-1`` on older FTP servers which do not return a file
    size in response to a retrieval request.
 
+   The following example illustrates the most common usage scenario::
+
+      >>> import urllib.request
+      >>> local_filename, headers = urllib.request.urlretrieve('http://python.org/')
+      >>> html = open(local_filename)
+      >>> html.close()
+
    If the *url* uses the :file:`http:` scheme identifier, the optional *data*
-   argument may be given to specify a ``POST`` request (normally the request type
-   is ``GET``).  The *data* argument must in standard
-   :mimetype:`application/x-www-form-urlencoded` format; see the :func:`urlencode`
-   function below.
+   argument may be given to specify a ``POST`` request (normally the request
+   type is ``GET``).  The *data* argument must in standard
+   :mimetype:`application/x-www-form-urlencoded` format; see the
+   :func:`urlencode` function below.
 
    :func:`urlretrieve` will raise :exc:`ContentTooShortError` when it detects that
    the amount of data available  was less than the expected amount (which is the
@@ -1156,20 +1161,20 @@ some point in the future.
    the  download is interrupted.
 
    The *Content-Length* is treated as a lower bound: if there's more data  to read,
-   :func:`urlretrieve` reads more data, but if less data is available,  it raises
-   the exception.
+   urlretrieve reads more data, but if less data is available,  it raises the
+   exception.
 
    You can still retrieve the downloaded data in this case, it is stored  in the
    :attr:`content` attribute of the exception instance.
 
-   If no *Content-Length* header was supplied, :func:`urlretrieve` can not check
-   the size of the data it has downloaded, and just returns it.  In this case
-   you just have to assume that the download was successful.
+   If no *Content-Length* header was supplied, urlretrieve can not check the size
+   of the data it has downloaded, and just returns it.  In this case you just have
+   to assume that the download was successful.
 
 .. function:: urlcleanup()
 
-   Clear the cache that may have been built up by previous calls to
-   :func:`urlretrieve`.
+   Cleans up temporary files that may have been left behind by previous
+   calls to :func:`urlretrieve`.
 
 .. class:: URLopener(proxies=None, **x509)
 
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index b2680edd422..85f8f841dfe 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -384,11 +384,11 @@ class urlretrieve_FileTests(unittest.TestCase):
 
     def test_reporthook(self):
         # Make sure that the reporthook works.
-        def hooktester(count, block_size, total_size, count_holder=[0]):
-            self.assertIsInstance(count, int)
-            self.assertIsInstance(block_size, int)
-            self.assertIsInstance(total_size, int)
-            self.assertEqual(count, count_holder[0])
+        def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
+            self.assertIsInstance(block_count, int)
+            self.assertIsInstance(block_read_size, int)
+            self.assertIsInstance(file_size, int)
+            self.assertEqual(block_count, count_holder[0])
             count_holder[0] = count_holder[0] + 1
         second_temp = "%s.2" % support.TESTFN
         self.registerFileForCleanUp(second_temp)
@@ -399,8 +399,8 @@ class urlretrieve_FileTests(unittest.TestCase):
     def test_reporthook_0_bytes(self):
         # Test on zero length file. Should call reporthook only 1 time.
         report = []
-        def hooktester(count, block_size, total_size, _report=report):
-            _report.append((count, block_size, total_size))
+        def hooktester(block_count, block_read_size, file_size, _report=report):
+            _report.append((block_count, block_read_size, file_size))
         srcFileName = self.createNewTempFile()
         urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
             support.TESTFN, hooktester)
@@ -410,31 +410,31 @@ class urlretrieve_FileTests(unittest.TestCase):
     def test_reporthook_5_bytes(self):
         # Test on 5 byte file. Should call reporthook only 2 times (once when
         # the "network connection" is established and once when the block is
-        # read). Since the block size is 8192 bytes, only one block read is
-        # required to read the entire file.
+        # read).
         report = []
-        def hooktester(count, block_size, total_size, _report=report):
-            _report.append((count, block_size, total_size))
+        def hooktester(block_count, block_read_size, file_size, _report=report):
+            _report.append((block_count, block_read_size, file_size))
         srcFileName = self.createNewTempFile(b"x" * 5)
         urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
             support.TESTFN, hooktester)
         self.assertEqual(len(report), 2)
-        self.assertEqual(report[0][1], 8192)
-        self.assertEqual(report[0][2], 5)
+        self.assertEqual(report[0][1], 0)
+        self.assertEqual(report[1][1], 5)
 
     def test_reporthook_8193_bytes(self):
         # Test on 8193 byte file. Should call reporthook only 3 times (once
         # when the "network connection" is established, once for the next 8192
         # bytes, and once for the last byte).
         report = []
-        def hooktester(count, block_size, total_size, _report=report):
-            _report.append((count, block_size, total_size))
+        def hooktester(block_count, block_read_size, file_size, _report=report):
+            _report.append((block_count, block_read_size, file_size))
         srcFileName = self.createNewTempFile(b"x" * 8193)
         urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
             support.TESTFN, hooktester)
         self.assertEqual(len(report), 3)
-        self.assertEqual(report[0][1], 8192)
-        self.assertEqual(report[0][2], 8193)
+        self.assertEqual(report[0][1], 0)
+        self.assertEqual(report[1][1], 8192)
+        self.assertEqual(report[2][1], 1)
 
 
 class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index 90dfcffe568..c220a7d0115 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -94,6 +94,9 @@ import socket
 import sys
 import time
 import collections
+import tempfile
+import contextlib
+
 
 from urllib.error import URLError, HTTPError, ContentTooShortError
 from urllib.parse import (
@@ -156,17 +159,78 @@ def install_opener(opener):
     global _opener
     _opener = opener
 
-# TODO(jhylton): Make this work with the same global opener.
-_urlopener = None
+_url_tempfiles = []
 def urlretrieve(url, filename=None, reporthook=None, data=None):
-    global _urlopener
-    if not _urlopener:
-        _urlopener = FancyURLopener()
-    return _urlopener.retrieve(url, filename, reporthook, data)
+    """
+    Retrieve a URL into a temporary location on disk.
+
+    Requires a URL argument. If a filename is passed, it is used as
+    the temporary file location. The reporthook argument should be
+    a callable that accepts a block number, a read size, and the
+    total file size of the URL target. The data argument should be
+    valid URL encoded data.
+
+    If a filename is passed and the URL points to a local resource,
+    the result is a copy from local file to new file.
+
+    Returns a tuple containing the path to the newly created
+    data file as well as the resulting HTTPMessage object.
+    """
+    url_type, path = splittype(url)
+
+    with contextlib.closing(urlopen(url, data)) as fp:
+        headers = fp.info()
+
+        # Just return the local path and the "headers" for file://
+        # URLs. No sense in performing a copy unless requested.
+        if url_type == "file" and not filename:
+            return os.path.normpath(path), headers
+
+        # Handle temporary file setup.
+        if filename:
+            tfp = open(filename, 'wb')
+        else:
+            tfp = tempfile.NamedTemporaryFile(delete=False)
+            filename = tfp.name
+            _url_tempfiles.append(filename)
+
+        with tfp:
+            result = filename, headers
+            bs = 1024*8
+            size = -1
+            read = 0
+            blocknum = 0
+            if "content-length" in headers:
+                size = int(headers["Content-Length"])
+
+            if reporthook:
+                reporthook(blocknum, 0, size)
+
+            while True:
+                block = fp.read(bs)
+                if not block:
+                    break
+                read += len(block)
+                tfp.write(block)
+                blocknum += 1
+                if reporthook:
+                    reporthook(blocknum, len(block), size)
+
+    if size >= 0 and read < size:
+        raise ContentTooShortError(
+            "retrieval incomplete: got only %i out of %i bytes"
+            % (read, size), result)
+
+    return result
 
 def urlcleanup():
-    if _urlopener:
-        _urlopener.cleanup()
+    for temp_file in _url_tempfiles:
+        try:
+            os.unlink(temp_file)
+        except EnvironmentError:
+            pass
+
+    del _url_tempfiles[:]
     global _opener
     if _opener:
         _opener = None
diff --git a/Misc/ACKS b/Misc/ACKS
index 48bdde4cdb2..a11d4ebdd87 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -288,6 +288,7 @@ Julien Élie
 Lance Ellinghaus
 David Ely
 Jeff Epler
+Jeff McNeil
 Tom Epperly
 Stoffel Erasmus
 Jürgen A. Erhard