mirror of https://github.com/python/cpython
Patch [ 1062060 ] fix for 1016880 urllib.urlretrieve silently truncates dwnld
This commit is contained in:
parent
568973181a
commit
b925602f16
|
@ -142,6 +142,25 @@ If the \var{url} uses the \file{http:} scheme identifier, the optional
|
|||
(normally the request type is \code{GET}). The \var{data} argument
|
||||
must in standard \mimetype{application/x-www-form-urlencoded} format;
|
||||
see the \function{urlencode()} function below.
|
||||
|
||||
\versionchanged[
|
||||
\function{urlretrieve()} will raise \exception{ContentTooShortError}
|
||||
when it detects that the amount of data available
|
||||
was less than the expected amount (which is the size reported by a
|
||||
\var{Content-Length} header). This can occur, for example, when the
|
||||
download is interrupted.
|
||||
|
||||
The \var{Content-Length} is treated as a lower bound: if there's more data
|
||||
to read, urlretrieve reads more data, but if less data is available,
|
||||
it raises the exception.
|
||||
|
||||
You can still retrieve the downloaded data in this case, it is stored
|
||||
in the \member{content} attribute of the exception instance.
|
||||
|
||||
If no \var{Content-Length} header was supplied, urlretrieve can
|
||||
not check the size of the data it has downloaded, and just returns it.
|
||||
In this case you just have to assume that the download was successful]{2.5}
|
||||
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{datadesc}{_urlopener}
|
||||
|
@ -283,6 +302,15 @@ subclass may override this method to support more appropriate behavior
|
|||
if needed.}
|
||||
\end{classdesc}
|
||||
|
||||
\begin{excclassdesc}{ContentTooShortError}{msg\optional{, content}}
|
||||
This exception is raised when the \function{urlretrieve()} function
|
||||
detects that the amount of the downloaded data is less than the
|
||||
expected amount (given by the \var{Content-Length} header). The
|
||||
\member{content} attribute stores the downloaded (and supposedly
|
||||
truncated) data.
|
||||
\versionadded{2.5}
|
||||
\end{excclassdesc}
|
||||
|
||||
Restrictions:
|
||||
|
||||
\begin{itemize}
|
||||
|
@ -317,7 +345,7 @@ Web client using these functions without using threads.
|
|||
\item
|
||||
The data returned by \function{urlopen()} or \function{urlretrieve()}
|
||||
is the raw data returned by the server. This may be binary data
|
||||
(e.g. an image), plain text or (for example) HTML\index{HTML}. The
|
||||
(such as an image), plain text or (for example) HTML\index{HTML}. The
|
||||
HTTP\indexii{HTTP}{protocol} protocol provides type information in the
|
||||
reply header, which can be inspected by looking at the
|
||||
\mailheader{Content-Type} header. For the
|
||||
|
|
|
@ -86,6 +86,11 @@ def urlcleanup():
|
|||
if _urlopener:
|
||||
_urlopener.cleanup()
|
||||
|
||||
# exception raised when downloaded size does not match content-length
|
||||
class ContentTooShortError(IOError):
|
||||
def __init__(self, message, content):
|
||||
IOError.__init__(self, message)
|
||||
self.content = content
|
||||
|
||||
ftpcache = {}
|
||||
class URLopener:
|
||||
|
@ -228,24 +233,33 @@ class URLopener:
|
|||
self.tempcache[url] = result
|
||||
bs = 1024*8
|
||||
size = -1
|
||||
read = 0
|
||||
blocknum = 1
|
||||
if reporthook:
|
||||
if "content-length" in headers:
|
||||
size = int(headers["Content-Length"])
|
||||
reporthook(0, bs, size)
|
||||
block = fp.read(bs)
|
||||
read += len(block)
|
||||
if reporthook:
|
||||
reporthook(1, bs, size)
|
||||
while block:
|
||||
tfp.write(block)
|
||||
block = fp.read(bs)
|
||||
blocknum = blocknum + 1
|
||||
read += len(block)
|
||||
blocknum += 1
|
||||
if reporthook:
|
||||
reporthook(blocknum, bs, size)
|
||||
fp.close()
|
||||
tfp.close()
|
||||
del fp
|
||||
del tfp
|
||||
|
||||
# raise exception if actual size does not match content-length header
|
||||
if size >= 0 and read < size:
|
||||
raise ContentTooShortError("retrieval incomplete: got only %i out "
|
||||
"of %i bytes" % (read, size), result)
|
||||
|
||||
return result
|
||||
|
||||
# Each method named open_<type> knows how to open that type of URL
|
||||
|
|
|
@ -193,6 +193,10 @@ Extension Modules
|
|||
Library
|
||||
-------
|
||||
|
||||
- Patch #1062060: urllib.urlretrieve() now raises a new exception, named
|
||||
ContentTooShortException, when the actually downloaded size does not
|
||||
match the Content-Length header.
|
||||
|
||||
- Bug #1121494: distutils.dir_utils.mkpath now accepts Unicode strings.
|
||||
|
||||
- Bug #1178484: Return complete lines from codec stream readers
|
||||
|
|
Loading…
Reference in New Issue