mirror of https://github.com/python/cpython
GH-84850: Remove `urllib.request.URLopener` and `FancyURLopener` (#125739)
This commit is contained in:
parent
a99dd23c1f
commit
4d771977b1
|
@ -145,10 +145,6 @@ although there is currently no date scheduled for their removal.
|
||||||
* ``splitvalue()``
|
* ``splitvalue()``
|
||||||
* ``to_bytes()``
|
* ``to_bytes()``
|
||||||
|
|
||||||
* :mod:`urllib.request`: :class:`~urllib.request.URLopener` and
|
|
||||||
:class:`~urllib.request.FancyURLopener` style of invoking requests is
|
|
||||||
deprecated. Use newer :func:`~urllib.request.urlopen` functions and methods.
|
|
||||||
|
|
||||||
* :mod:`wsgiref`: ``SimpleHandler.stdout.write()`` should not do partial
|
* :mod:`wsgiref`: ``SimpleHandler.stdout.write()`` should not do partial
|
||||||
writes.
|
writes.
|
||||||
|
|
||||||
|
|
|
@ -67,8 +67,7 @@ The :mod:`urllib.request` module defines the following functions:
|
||||||
the response headers as it is specified in the documentation for
|
the response headers as it is specified in the documentation for
|
||||||
:class:`~http.client.HTTPResponse`.
|
:class:`~http.client.HTTPResponse`.
|
||||||
|
|
||||||
For FTP, file, and data URLs and requests explicitly handled by legacy
|
For FTP, file, and data URLs, this function
|
||||||
:class:`URLopener` and :class:`FancyURLopener` classes, this function
|
|
||||||
returns a :class:`urllib.response.addinfourl` object.
|
returns a :class:`urllib.response.addinfourl` object.
|
||||||
|
|
||||||
Raises :exc:`~urllib.error.URLError` on protocol errors.
|
Raises :exc:`~urllib.error.URLError` on protocol errors.
|
||||||
|
@ -1339,7 +1338,7 @@ environment settings::
|
||||||
|
|
||||||
>>> import urllib.request
|
>>> import urllib.request
|
||||||
>>> proxies = {'http': 'http://proxy.example.com:8080/'}
|
>>> proxies = {'http': 'http://proxy.example.com:8080/'}
|
||||||
>>> opener = urllib.request.FancyURLopener(proxies)
|
>>> opener = urllib.request.build_opener(urllib.request.ProxyHandler(proxies))
|
||||||
>>> with opener.open("http://www.python.org") as f:
|
>>> with opener.open("http://www.python.org") as f:
|
||||||
... f.read().decode('utf-8')
|
... f.read().decode('utf-8')
|
||||||
...
|
...
|
||||||
|
@ -1347,7 +1346,7 @@ environment settings::
|
||||||
The following example uses no proxies at all, overriding environment settings::
|
The following example uses no proxies at all, overriding environment settings::
|
||||||
|
|
||||||
>>> import urllib.request
|
>>> import urllib.request
|
||||||
>>> opener = urllib.request.FancyURLopener({})
|
>>> opener = urllib.request.build_opener(urllib.request.ProxyHandler({}}))
|
||||||
>>> with opener.open("http://www.python.org/") as f:
|
>>> with opener.open("http://www.python.org/") as f:
|
||||||
... f.read().decode('utf-8')
|
... f.read().decode('utf-8')
|
||||||
...
|
...
|
||||||
|
@ -1412,121 +1411,6 @@ some point in the future.
|
||||||
Cleans up temporary files that may have been left behind by previous
|
Cleans up temporary files that may have been left behind by previous
|
||||||
calls to :func:`urlretrieve`.
|
calls to :func:`urlretrieve`.
|
||||||
|
|
||||||
.. class:: URLopener(proxies=None, **x509)
|
|
||||||
|
|
||||||
.. deprecated:: 3.3
|
|
||||||
|
|
||||||
Base class for opening and reading URLs. Unless you need to support opening
|
|
||||||
objects using schemes other than :file:`http:`, :file:`ftp:`, or :file:`file:`,
|
|
||||||
you probably want to use :class:`FancyURLopener`.
|
|
||||||
|
|
||||||
By default, the :class:`URLopener` class sends a :mailheader:`User-Agent` header
|
|
||||||
of ``urllib/VVV``, where *VVV* is the :mod:`urllib` version number.
|
|
||||||
Applications can define their own :mailheader:`User-Agent` header by subclassing
|
|
||||||
:class:`URLopener` or :class:`FancyURLopener` and setting the class attribute
|
|
||||||
:attr:`version` to an appropriate string value in the subclass definition.
|
|
||||||
|
|
||||||
The optional *proxies* parameter should be a dictionary mapping scheme names to
|
|
||||||
proxy URLs, where an empty dictionary turns proxies off completely. Its default
|
|
||||||
value is ``None``, in which case environmental proxy settings will be used if
|
|
||||||
present, as discussed in the definition of :func:`urlopen`, above.
|
|
||||||
|
|
||||||
Additional keyword parameters, collected in *x509*, may be used for
|
|
||||||
authentication of the client when using the :file:`https:` scheme. The keywords
|
|
||||||
*key_file* and *cert_file* are supported to provide an SSL key and certificate;
|
|
||||||
both are needed to support client authentication.
|
|
||||||
|
|
||||||
:class:`URLopener` objects will raise an :exc:`OSError` exception if the server
|
|
||||||
returns an error code.
|
|
||||||
|
|
||||||
.. method:: open(fullurl, data=None)
|
|
||||||
|
|
||||||
Open *fullurl* using the appropriate protocol. This method sets up cache and
|
|
||||||
proxy information, then calls the appropriate open method with its input
|
|
||||||
arguments. If the scheme is not recognized, :meth:`open_unknown` is called.
|
|
||||||
The *data* argument has the same meaning as the *data* argument of
|
|
||||||
:func:`urlopen`.
|
|
||||||
|
|
||||||
This method always quotes *fullurl* using :func:`~urllib.parse.quote`.
|
|
||||||
|
|
||||||
.. method:: open_unknown(fullurl, data=None)
|
|
||||||
|
|
||||||
Overridable interface to open unknown URL types.
|
|
||||||
|
|
||||||
|
|
||||||
.. method:: retrieve(url, filename=None, reporthook=None, data=None)
|
|
||||||
|
|
||||||
Retrieves the contents of *url* and places it in *filename*. The return value
|
|
||||||
is a tuple consisting of a local filename and either an
|
|
||||||
:class:`email.message.Message` object containing the response headers (for remote
|
|
||||||
URLs) or ``None`` (for local URLs). The caller must then open and read the
|
|
||||||
contents of *filename*. If *filename* is not given and the URL refers to a
|
|
||||||
local file, the input filename is returned. If the URL is non-local and
|
|
||||||
*filename* is not given, the filename is the output of :func:`tempfile.mktemp`
|
|
||||||
with a suffix that matches the suffix of the last path component of the input
|
|
||||||
URL. If *reporthook* is given, it must be a function accepting three numeric
|
|
||||||
parameters: A chunk number, the maximum size chunks are read in and the total size of the download
|
|
||||||
(-1 if unknown). It will be called once at the start and after each chunk of data is read from the
|
|
||||||
network. *reporthook* is ignored for local URLs.
|
|
||||||
|
|
||||||
If the *url* uses the :file:`http:` scheme identifier, the optional *data*
|
|
||||||
argument may be given to specify a ``POST`` request (normally the request type
|
|
||||||
is ``GET``). The *data* argument must in standard
|
|
||||||
:mimetype:`application/x-www-form-urlencoded` format; see the
|
|
||||||
:func:`urllib.parse.urlencode` function.
|
|
||||||
|
|
||||||
|
|
||||||
.. attribute:: version
|
|
||||||
|
|
||||||
Variable that specifies the user agent of the opener object. To get
|
|
||||||
:mod:`urllib` to tell servers that it is a particular user agent, set this in a
|
|
||||||
subclass as a class variable or in the constructor before calling the base
|
|
||||||
constructor.
|
|
||||||
|
|
||||||
|
|
||||||
.. class:: FancyURLopener(...)
|
|
||||||
|
|
||||||
.. deprecated:: 3.3
|
|
||||||
|
|
||||||
:class:`FancyURLopener` subclasses :class:`URLopener` providing default handling
|
|
||||||
for the following HTTP response codes: 301, 302, 303, 307 and 401. For the 30x
|
|
||||||
response codes listed above, the :mailheader:`Location` header is used to fetch
|
|
||||||
the actual URL. For 401 response codes (authentication required), basic HTTP
|
|
||||||
authentication is performed. For the 30x response codes, recursion is bounded
|
|
||||||
by the value of the *maxtries* attribute, which defaults to 10.
|
|
||||||
|
|
||||||
For all other response codes, the method :meth:`~BaseHandler.http_error_default` is called
|
|
||||||
which you can override in subclasses to handle the error appropriately.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
According to the letter of :rfc:`2616`, 301 and 302 responses to POST requests
|
|
||||||
must not be automatically redirected without confirmation by the user. In
|
|
||||||
reality, browsers do allow automatic redirection of these responses, changing
|
|
||||||
the POST to a GET, and :mod:`urllib` reproduces this behaviour.
|
|
||||||
|
|
||||||
The parameters to the constructor are the same as those for :class:`URLopener`.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
When performing basic authentication, a :class:`FancyURLopener` instance calls
|
|
||||||
its :meth:`prompt_user_passwd` method. The default implementation asks the
|
|
||||||
users for the required information on the controlling terminal. A subclass may
|
|
||||||
override this method to support more appropriate behavior if needed.
|
|
||||||
|
|
||||||
The :class:`FancyURLopener` class offers one additional method that should be
|
|
||||||
overloaded to provide the appropriate behavior:
|
|
||||||
|
|
||||||
.. method:: prompt_user_passwd(host, realm)
|
|
||||||
|
|
||||||
Return information needed to authenticate the user at the given host in the
|
|
||||||
specified security realm. The return value should be a tuple, ``(user,
|
|
||||||
password)``, which can be used for basic authentication.
|
|
||||||
|
|
||||||
The implementation prompts for this information on the terminal; an application
|
|
||||||
should override this method to use an appropriate interaction model in the local
|
|
||||||
environment.
|
|
||||||
|
|
||||||
|
|
||||||
:mod:`urllib.request` Restrictions
|
:mod:`urllib.request` Restrictions
|
||||||
----------------------------------
|
----------------------------------
|
||||||
|
@ -1578,8 +1462,7 @@ some point in the future.
|
||||||
you try to fetch a file whose read permissions make it inaccessible; the FTP
|
you try to fetch a file whose read permissions make it inaccessible; the FTP
|
||||||
code will try to read it, fail with a 550 error, and then perform a directory
|
code will try to read it, fail with a 550 error, and then perform a directory
|
||||||
listing for the unreadable file. If fine-grained control is needed, consider
|
listing for the unreadable file. If fine-grained control is needed, consider
|
||||||
using the :mod:`ftplib` module, subclassing :class:`FancyURLopener`, or changing
|
using the :mod:`ftplib` module.
|
||||||
*_urlopener* to meet your needs.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -769,6 +769,10 @@ urllib
|
||||||
* Remove deprecated :class:`!Quoter` class from :mod:`urllib.parse`.
|
* Remove deprecated :class:`!Quoter` class from :mod:`urllib.parse`.
|
||||||
It had previously raised a :exc:`DeprecationWarning` since Python 3.11.
|
It had previously raised a :exc:`DeprecationWarning` since Python 3.11.
|
||||||
(Contributed by Nikita Sobolev in :gh:`118827`.)
|
(Contributed by Nikita Sobolev in :gh:`118827`.)
|
||||||
|
* Remove deprecated :class:`!URLopener` and :class:`!FancyURLopener` classes
|
||||||
|
from :mod:`urllib.request`. They had previously raised a
|
||||||
|
:exc:`DeprecationWarning` since Python 3.3.
|
||||||
|
(Contributed by Barney Gale in :gh:`84850`.)
|
||||||
|
|
||||||
Others
|
Others
|
||||||
------
|
------
|
||||||
|
|
|
@ -7,11 +7,9 @@ import http.client
|
||||||
import email.message
|
import email.message
|
||||||
import io
|
import io
|
||||||
import unittest
|
import unittest
|
||||||
from unittest.mock import patch
|
|
||||||
from test import support
|
from test import support
|
||||||
from test.support import os_helper
|
from test.support import os_helper
|
||||||
from test.support import socket_helper
|
from test.support import socket_helper
|
||||||
from test.support import warnings_helper
|
|
||||||
import os
|
import os
|
||||||
try:
|
try:
|
||||||
import ssl
|
import ssl
|
||||||
|
@ -20,7 +18,6 @@ except ImportError:
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
from base64 import b64encode
|
|
||||||
import collections
|
import collections
|
||||||
|
|
||||||
|
|
||||||
|
@ -35,32 +32,6 @@ def hexescape(char):
|
||||||
hex_repr = "0%s" % hex_repr
|
hex_repr = "0%s" % hex_repr
|
||||||
return "%" + hex_repr
|
return "%" + hex_repr
|
||||||
|
|
||||||
# Shortcut for testing FancyURLopener
|
|
||||||
_urlopener = None
|
|
||||||
|
|
||||||
|
|
||||||
def urlopen(url, data=None, proxies=None):
|
|
||||||
"""urlopen(url [, data]) -> open file-like object"""
|
|
||||||
global _urlopener
|
|
||||||
if proxies is not None:
|
|
||||||
opener = urllib.request.FancyURLopener(proxies=proxies)
|
|
||||||
elif not _urlopener:
|
|
||||||
opener = FancyURLopener()
|
|
||||||
_urlopener = opener
|
|
||||||
else:
|
|
||||||
opener = _urlopener
|
|
||||||
if data is None:
|
|
||||||
return opener.open(url)
|
|
||||||
else:
|
|
||||||
return opener.open(url, data)
|
|
||||||
|
|
||||||
|
|
||||||
def FancyURLopener():
|
|
||||||
with warnings_helper.check_warnings(
|
|
||||||
('FancyURLopener style of invoking requests is deprecated.',
|
|
||||||
DeprecationWarning)):
|
|
||||||
return urllib.request.FancyURLopener()
|
|
||||||
|
|
||||||
|
|
||||||
def fakehttp(fakedata, mock_close=False):
|
def fakehttp(fakedata, mock_close=False):
|
||||||
class FakeSocket(io.BytesIO):
|
class FakeSocket(io.BytesIO):
|
||||||
|
@ -119,26 +90,6 @@ class FakeHTTPMixin(object):
|
||||||
http.client.HTTPConnection = self._connection_class
|
http.client.HTTPConnection = self._connection_class
|
||||||
|
|
||||||
|
|
||||||
class FakeFTPMixin(object):
|
|
||||||
def fakeftp(self):
|
|
||||||
class FakeFtpWrapper(object):
|
|
||||||
def __init__(self, user, passwd, host, port, dirs, timeout=None,
|
|
||||||
persistent=True):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def retrfile(self, file, type):
|
|
||||||
return io.BytesIO(), 0
|
|
||||||
|
|
||||||
def close(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
self._ftpwrapper_class = urllib.request.ftpwrapper
|
|
||||||
urllib.request.ftpwrapper = FakeFtpWrapper
|
|
||||||
|
|
||||||
def unfakeftp(self):
|
|
||||||
urllib.request.ftpwrapper = self._ftpwrapper_class
|
|
||||||
|
|
||||||
|
|
||||||
class urlopen_FileTests(unittest.TestCase):
|
class urlopen_FileTests(unittest.TestCase):
|
||||||
"""Test urlopen() opening a temporary file.
|
"""Test urlopen() opening a temporary file.
|
||||||
|
|
||||||
|
@ -158,7 +109,7 @@ class urlopen_FileTests(unittest.TestCase):
|
||||||
f.close()
|
f.close()
|
||||||
self.pathname = os_helper.TESTFN
|
self.pathname = os_helper.TESTFN
|
||||||
self.quoted_pathname = urllib.parse.quote(self.pathname)
|
self.quoted_pathname = urllib.parse.quote(self.pathname)
|
||||||
self.returned_obj = urlopen("file:%s" % self.quoted_pathname)
|
self.returned_obj = urllib.request.urlopen("file:%s" % self.quoted_pathname)
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
"""Shut down the open object"""
|
"""Shut down the open object"""
|
||||||
|
@ -205,7 +156,7 @@ class urlopen_FileTests(unittest.TestCase):
|
||||||
self.assertIsInstance(self.returned_obj.headers, email.message.Message)
|
self.assertIsInstance(self.returned_obj.headers, email.message.Message)
|
||||||
|
|
||||||
def test_url(self):
|
def test_url(self):
|
||||||
self.assertEqual(self.returned_obj.url, self.quoted_pathname)
|
self.assertEqual(self.returned_obj.url, "file://" + self.quoted_pathname)
|
||||||
|
|
||||||
def test_status(self):
|
def test_status(self):
|
||||||
self.assertIsNone(self.returned_obj.status)
|
self.assertIsNone(self.returned_obj.status)
|
||||||
|
@ -214,7 +165,7 @@ class urlopen_FileTests(unittest.TestCase):
|
||||||
self.assertIsInstance(self.returned_obj.info(), email.message.Message)
|
self.assertIsInstance(self.returned_obj.info(), email.message.Message)
|
||||||
|
|
||||||
def test_geturl(self):
|
def test_geturl(self):
|
||||||
self.assertEqual(self.returned_obj.geturl(), self.quoted_pathname)
|
self.assertEqual(self.returned_obj.geturl(), "file://" + self.quoted_pathname)
|
||||||
|
|
||||||
def test_getcode(self):
|
def test_getcode(self):
|
||||||
self.assertIsNone(self.returned_obj.getcode())
|
self.assertIsNone(self.returned_obj.getcode())
|
||||||
|
@ -339,13 +290,13 @@ class ProxyTests_withOrderedEnv(unittest.TestCase):
|
||||||
self.assertEqual('http://somewhere:3128', proxies['http'])
|
self.assertEqual('http://somewhere:3128', proxies['http'])
|
||||||
|
|
||||||
|
|
||||||
class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
|
class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin):
|
||||||
"""Test urlopen() opening a fake http connection."""
|
"""Test urlopen() opening a fake http connection."""
|
||||||
|
|
||||||
def check_read(self, ver):
|
def check_read(self, ver):
|
||||||
self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
|
self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
|
||||||
try:
|
try:
|
||||||
fp = urlopen("http://python.org/")
|
fp = urllib.request.urlopen("http://python.org/")
|
||||||
self.assertEqual(fp.readline(), b"Hello!")
|
self.assertEqual(fp.readline(), b"Hello!")
|
||||||
self.assertEqual(fp.readline(), b"")
|
self.assertEqual(fp.readline(), b"")
|
||||||
self.assertEqual(fp.geturl(), 'http://python.org/')
|
self.assertEqual(fp.geturl(), 'http://python.org/')
|
||||||
|
@ -366,8 +317,8 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
|
||||||
def test_willclose(self):
|
def test_willclose(self):
|
||||||
self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
|
self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
|
||||||
try:
|
try:
|
||||||
resp = urlopen("http://www.python.org")
|
resp = urllib.request.urlopen("http://www.python.org")
|
||||||
self.assertTrue(resp.fp.will_close)
|
self.assertTrue(resp.will_close)
|
||||||
finally:
|
finally:
|
||||||
self.unfakehttp()
|
self.unfakehttp()
|
||||||
|
|
||||||
|
@ -392,9 +343,6 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
|
||||||
with self.assertRaisesRegex(
|
with self.assertRaisesRegex(
|
||||||
InvalidURL, f"contain control.*{escaped_char_repr}"):
|
InvalidURL, f"contain control.*{escaped_char_repr}"):
|
||||||
urllib.request.urlopen(f"https:{schemeless_url}")
|
urllib.request.urlopen(f"https:{schemeless_url}")
|
||||||
# This code path quotes the URL so there is no injection.
|
|
||||||
resp = urlopen(f"http:{schemeless_url}")
|
|
||||||
self.assertNotIn(char, resp.geturl())
|
|
||||||
finally:
|
finally:
|
||||||
self.unfakehttp()
|
self.unfakehttp()
|
||||||
|
|
||||||
|
@ -416,11 +364,6 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
|
||||||
urllib.request.urlopen(f"http:{schemeless_url}")
|
urllib.request.urlopen(f"http:{schemeless_url}")
|
||||||
with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
|
with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
|
||||||
urllib.request.urlopen(f"https:{schemeless_url}")
|
urllib.request.urlopen(f"https:{schemeless_url}")
|
||||||
# This code path quotes the URL so there is no injection.
|
|
||||||
resp = urlopen(f"http:{schemeless_url}")
|
|
||||||
self.assertNotIn(' ', resp.geturl())
|
|
||||||
self.assertNotIn('\r', resp.geturl())
|
|
||||||
self.assertNotIn('\n', resp.geturl())
|
|
||||||
finally:
|
finally:
|
||||||
self.unfakehttp()
|
self.unfakehttp()
|
||||||
|
|
||||||
|
@ -435,9 +378,9 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
|
||||||
InvalidURL = http.client.InvalidURL
|
InvalidURL = http.client.InvalidURL
|
||||||
with self.assertRaisesRegex(
|
with self.assertRaisesRegex(
|
||||||
InvalidURL, f"contain control.*{escaped_char_repr}"):
|
InvalidURL, f"contain control.*{escaped_char_repr}"):
|
||||||
urlopen(f"http:{schemeless_url}")
|
urllib.request.urlopen(f"http:{schemeless_url}")
|
||||||
with self.assertRaisesRegex(InvalidURL, f"contain control.*{escaped_char_repr}"):
|
with self.assertRaisesRegex(InvalidURL, f"contain control.*{escaped_char_repr}"):
|
||||||
urlopen(f"https:{schemeless_url}")
|
urllib.request.urlopen(f"https:{schemeless_url}")
|
||||||
finally:
|
finally:
|
||||||
self.unfakehttp()
|
self.unfakehttp()
|
||||||
|
|
||||||
|
@ -450,9 +393,9 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
|
||||||
InvalidURL = http.client.InvalidURL
|
InvalidURL = http.client.InvalidURL
|
||||||
with self.assertRaisesRegex(
|
with self.assertRaisesRegex(
|
||||||
InvalidURL, r"contain control.*\\r"):
|
InvalidURL, r"contain control.*\\r"):
|
||||||
urlopen(f"http:{schemeless_url}")
|
urllib.request.urlopen(f"http:{schemeless_url}")
|
||||||
with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
|
with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
|
||||||
urlopen(f"https:{schemeless_url}")
|
urllib.request.urlopen(f"https:{schemeless_url}")
|
||||||
finally:
|
finally:
|
||||||
self.unfakehttp()
|
self.unfakehttp()
|
||||||
|
|
||||||
|
@ -476,7 +419,7 @@ Connection: close
|
||||||
Content-Type: text/html; charset=iso-8859-1
|
Content-Type: text/html; charset=iso-8859-1
|
||||||
''', mock_close=True)
|
''', mock_close=True)
|
||||||
try:
|
try:
|
||||||
self.assertRaises(OSError, urlopen, "http://python.org/")
|
self.assertRaises(OSError, urllib.request.urlopen, "http://python.org/")
|
||||||
finally:
|
finally:
|
||||||
self.unfakehttp()
|
self.unfakehttp()
|
||||||
|
|
||||||
|
@ -492,20 +435,20 @@ Content-Type: text/html; charset=iso-8859-1
|
||||||
try:
|
try:
|
||||||
msg = "Redirection to url 'file:"
|
msg = "Redirection to url 'file:"
|
||||||
with self.assertRaisesRegex(urllib.error.HTTPError, msg):
|
with self.assertRaisesRegex(urllib.error.HTTPError, msg):
|
||||||
urlopen("http://python.org/")
|
urllib.request.urlopen("http://python.org/")
|
||||||
finally:
|
finally:
|
||||||
self.unfakehttp()
|
self.unfakehttp()
|
||||||
|
|
||||||
def test_redirect_limit_independent(self):
|
def test_redirect_limit_independent(self):
|
||||||
# Ticket #12923: make sure independent requests each use their
|
# Ticket #12923: make sure independent requests each use their
|
||||||
# own retry limit.
|
# own retry limit.
|
||||||
for i in range(FancyURLopener().maxtries):
|
for i in range(urllib.request.HTTPRedirectHandler.max_redirections):
|
||||||
self.fakehttp(b'''HTTP/1.1 302 Found
|
self.fakehttp(b'''HTTP/1.1 302 Found
|
||||||
Location: file://guidocomputer.athome.com:/python/license
|
Location: file://guidocomputer.athome.com:/python/license
|
||||||
Connection: close
|
Connection: close
|
||||||
''', mock_close=True)
|
''', mock_close=True)
|
||||||
try:
|
try:
|
||||||
self.assertRaises(urllib.error.HTTPError, urlopen,
|
self.assertRaises(urllib.error.HTTPError, urllib.request.urlopen,
|
||||||
"http://something")
|
"http://something")
|
||||||
finally:
|
finally:
|
||||||
self.unfakehttp()
|
self.unfakehttp()
|
||||||
|
@ -515,14 +458,14 @@ Connection: close
|
||||||
# data. (#1680230)
|
# data. (#1680230)
|
||||||
self.fakehttp(b'')
|
self.fakehttp(b'')
|
||||||
try:
|
try:
|
||||||
self.assertRaises(OSError, urlopen, "http://something")
|
self.assertRaises(OSError, urllib.request.urlopen, "http://something")
|
||||||
finally:
|
finally:
|
||||||
self.unfakehttp()
|
self.unfakehttp()
|
||||||
|
|
||||||
def test_missing_localfile(self):
|
def test_missing_localfile(self):
|
||||||
# Test for #10836
|
# Test for #10836
|
||||||
with self.assertRaises(urllib.error.URLError) as e:
|
with self.assertRaises(urllib.error.URLError) as e:
|
||||||
urlopen('file://localhost/a/file/which/doesnot/exists.py')
|
urllib.request.urlopen('file://localhost/a/file/which/doesnot/exists.py')
|
||||||
self.assertTrue(e.exception.filename)
|
self.assertTrue(e.exception.filename)
|
||||||
self.assertTrue(e.exception.reason)
|
self.assertTrue(e.exception.reason)
|
||||||
|
|
||||||
|
@ -531,71 +474,28 @@ Connection: close
|
||||||
tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
|
tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
|
||||||
try:
|
try:
|
||||||
self.assertTrue(os.path.exists(tmp_file))
|
self.assertTrue(os.path.exists(tmp_file))
|
||||||
with urlopen(tmp_fileurl) as fobj:
|
with urllib.request.urlopen(tmp_fileurl) as fobj:
|
||||||
self.assertTrue(fobj)
|
self.assertTrue(fobj)
|
||||||
finally:
|
finally:
|
||||||
os.close(fd)
|
os.close(fd)
|
||||||
os.unlink(tmp_file)
|
os.unlink(tmp_file)
|
||||||
self.assertFalse(os.path.exists(tmp_file))
|
self.assertFalse(os.path.exists(tmp_file))
|
||||||
with self.assertRaises(urllib.error.URLError):
|
with self.assertRaises(urllib.error.URLError):
|
||||||
urlopen(tmp_fileurl)
|
urllib.request.urlopen(tmp_fileurl)
|
||||||
|
|
||||||
def test_ftp_nohost(self):
|
def test_ftp_nohost(self):
|
||||||
test_ftp_url = 'ftp:///path'
|
test_ftp_url = 'ftp:///path'
|
||||||
with self.assertRaises(urllib.error.URLError) as e:
|
with self.assertRaises(urllib.error.URLError) as e:
|
||||||
urlopen(test_ftp_url)
|
urllib.request.urlopen(test_ftp_url)
|
||||||
self.assertFalse(e.exception.filename)
|
self.assertFalse(e.exception.filename)
|
||||||
self.assertTrue(e.exception.reason)
|
self.assertTrue(e.exception.reason)
|
||||||
|
|
||||||
def test_ftp_nonexisting(self):
|
def test_ftp_nonexisting(self):
|
||||||
with self.assertRaises(urllib.error.URLError) as e:
|
with self.assertRaises(urllib.error.URLError) as e:
|
||||||
urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
|
urllib.request.urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
|
||||||
self.assertFalse(e.exception.filename)
|
self.assertFalse(e.exception.filename)
|
||||||
self.assertTrue(e.exception.reason)
|
self.assertTrue(e.exception.reason)
|
||||||
|
|
||||||
@patch.object(urllib.request, 'MAXFTPCACHE', 0)
|
|
||||||
def test_ftp_cache_pruning(self):
|
|
||||||
self.fakeftp()
|
|
||||||
try:
|
|
||||||
urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
|
|
||||||
urlopen('ftp://localhost')
|
|
||||||
finally:
|
|
||||||
self.unfakeftp()
|
|
||||||
|
|
||||||
def test_userpass_inurl(self):
|
|
||||||
self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
|
|
||||||
try:
|
|
||||||
fp = urlopen("http://user:pass@python.org/")
|
|
||||||
self.assertEqual(fp.readline(), b"Hello!")
|
|
||||||
self.assertEqual(fp.readline(), b"")
|
|
||||||
self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
|
|
||||||
self.assertEqual(fp.getcode(), 200)
|
|
||||||
finally:
|
|
||||||
self.unfakehttp()
|
|
||||||
|
|
||||||
def test_userpass_inurl_w_spaces(self):
|
|
||||||
self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
|
|
||||||
try:
|
|
||||||
userpass = "a b:c d"
|
|
||||||
url = "http://{}@python.org/".format(userpass)
|
|
||||||
fakehttp_wrapper = http.client.HTTPConnection
|
|
||||||
authorization = ("Authorization: Basic %s\r\n" %
|
|
||||||
b64encode(userpass.encode("ASCII")).decode("ASCII"))
|
|
||||||
fp = urlopen(url)
|
|
||||||
# The authorization header must be in place
|
|
||||||
self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
|
|
||||||
self.assertEqual(fp.readline(), b"Hello!")
|
|
||||||
self.assertEqual(fp.readline(), b"")
|
|
||||||
# the spaces are quoted in URL so no match
|
|
||||||
self.assertNotEqual(fp.geturl(), url)
|
|
||||||
self.assertEqual(fp.getcode(), 200)
|
|
||||||
finally:
|
|
||||||
self.unfakehttp()
|
|
||||||
|
|
||||||
def test_URLopener_deprecation(self):
|
|
||||||
with warnings_helper.check_warnings(('',DeprecationWarning)):
|
|
||||||
urllib.request.URLopener()
|
|
||||||
|
|
||||||
|
|
||||||
class urlopen_DataTests(unittest.TestCase):
|
class urlopen_DataTests(unittest.TestCase):
|
||||||
"""Test urlopen() opening a data URL."""
|
"""Test urlopen() opening a data URL."""
|
||||||
|
@ -1620,56 +1520,6 @@ class Utility_Tests(unittest.TestCase):
|
||||||
self.assertIsInstance(urllib.request.thishost(), tuple)
|
self.assertIsInstance(urllib.request.thishost(), tuple)
|
||||||
|
|
||||||
|
|
||||||
class URLopener_Tests(FakeHTTPMixin, unittest.TestCase):
|
|
||||||
"""Testcase to test the open method of URLopener class."""
|
|
||||||
|
|
||||||
def test_quoted_open(self):
|
|
||||||
class DummyURLopener(urllib.request.URLopener):
|
|
||||||
def open_spam(self, url):
|
|
||||||
return url
|
|
||||||
with warnings_helper.check_warnings(
|
|
||||||
('DummyURLopener style of invoking requests is deprecated.',
|
|
||||||
DeprecationWarning)):
|
|
||||||
self.assertEqual(DummyURLopener().open(
|
|
||||||
'spam://example/ /'),'//example/%20/')
|
|
||||||
|
|
||||||
# test the safe characters are not quoted by urlopen
|
|
||||||
self.assertEqual(DummyURLopener().open(
|
|
||||||
"spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
|
|
||||||
"//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
|
|
||||||
|
|
||||||
@warnings_helper.ignore_warnings(category=DeprecationWarning)
|
|
||||||
def test_urlopener_retrieve_file(self):
|
|
||||||
with os_helper.temp_dir() as tmpdir:
|
|
||||||
fd, tmpfile = tempfile.mkstemp(dir=tmpdir)
|
|
||||||
os.close(fd)
|
|
||||||
fileurl = "file:" + urllib.request.pathname2url(tmpfile)
|
|
||||||
filename, _ = urllib.request.URLopener().retrieve(fileurl)
|
|
||||||
# Some buildbots have TEMP folder that uses a lowercase drive letter.
|
|
||||||
self.assertEqual(os.path.normcase(filename), os.path.normcase(tmpfile))
|
|
||||||
|
|
||||||
@warnings_helper.ignore_warnings(category=DeprecationWarning)
|
|
||||||
def test_urlopener_retrieve_remote(self):
|
|
||||||
url = "http://www.python.org/file.txt"
|
|
||||||
self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
|
|
||||||
self.addCleanup(self.unfakehttp)
|
|
||||||
filename, _ = urllib.request.URLopener().retrieve(url)
|
|
||||||
self.assertEqual(os.path.splitext(filename)[1], ".txt")
|
|
||||||
|
|
||||||
@warnings_helper.ignore_warnings(category=DeprecationWarning)
|
|
||||||
def test_local_file_open(self):
|
|
||||||
# bpo-35907, CVE-2019-9948: urllib must reject local_file:// scheme
|
|
||||||
class DummyURLopener(urllib.request.URLopener):
|
|
||||||
def open_local_file(self, url):
|
|
||||||
return url
|
|
||||||
for url in ('local_file://example', 'local-file://example'):
|
|
||||||
self.assertRaises(OSError, urllib.request.urlopen, url)
|
|
||||||
self.assertRaises(OSError, urllib.request.URLopener().open, url)
|
|
||||||
self.assertRaises(OSError, urllib.request.URLopener().retrieve, url)
|
|
||||||
self.assertRaises(OSError, DummyURLopener().open, url)
|
|
||||||
self.assertRaises(OSError, DummyURLopener().retrieve, url)
|
|
||||||
|
|
||||||
|
|
||||||
class RequestTests(unittest.TestCase):
|
class RequestTests(unittest.TestCase):
|
||||||
"""Unit tests for urllib.request.Request."""
|
"""Unit tests for urllib.request.Request."""
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,7 @@ from test.support import socket_helper
|
||||||
|
|
||||||
import contextlib
|
import contextlib
|
||||||
import socket
|
import socket
|
||||||
|
import urllib.error
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import os
|
import os
|
||||||
|
@ -101,13 +102,10 @@ class urlopenNetworkTests(unittest.TestCase):
|
||||||
# test getcode() with the fancy opener to get 404 error codes
|
# test getcode() with the fancy opener to get 404 error codes
|
||||||
URL = self.url + "XXXinvalidXXX"
|
URL = self.url + "XXXinvalidXXX"
|
||||||
with socket_helper.transient_internet(URL):
|
with socket_helper.transient_internet(URL):
|
||||||
with self.assertWarns(DeprecationWarning):
|
with self.assertRaises(urllib.error.URLError) as e:
|
||||||
open_url = urllib.request.FancyURLopener().open(URL)
|
with urllib.request.urlopen(URL):
|
||||||
try:
|
pass
|
||||||
code = open_url.getcode()
|
self.assertEqual(e.exception.code, 404)
|
||||||
finally:
|
|
||||||
open_url.close()
|
|
||||||
self.assertEqual(code, 404)
|
|
||||||
|
|
||||||
@support.requires_resource('walltime')
|
@support.requires_resource('walltime')
|
||||||
def test_bad_address(self):
|
def test_bad_address(self):
|
||||||
|
|
|
@ -83,6 +83,7 @@ f = urllib.request.urlopen('https://www.python.org/')
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
import bisect
|
import bisect
|
||||||
|
import contextlib
|
||||||
import email
|
import email
|
||||||
import hashlib
|
import hashlib
|
||||||
import http.client
|
import http.client
|
||||||
|
@ -94,15 +95,13 @@ import string
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import tempfile
|
import tempfile
|
||||||
import contextlib
|
|
||||||
import warnings
|
|
||||||
|
|
||||||
|
|
||||||
from urllib.error import URLError, HTTPError, ContentTooShortError
|
from urllib.error import URLError, HTTPError, ContentTooShortError
|
||||||
from urllib.parse import (
|
from urllib.parse import (
|
||||||
urlparse, urlsplit, urljoin, unwrap, quote, unquote,
|
urlparse, urlsplit, urljoin, unwrap, quote, unquote,
|
||||||
_splittype, _splithost, _splitport, _splituser, _splitpasswd,
|
_splittype, _splithost, _splitport, _splituser, _splitpasswd,
|
||||||
_splitattr, _splitquery, _splitvalue, _splittag, _to_bytes,
|
_splitattr, _splitvalue, _splittag,
|
||||||
unquote_to_bytes, urlunparse)
|
unquote_to_bytes, urlunparse)
|
||||||
from urllib.response import addinfourl, addclosehook
|
from urllib.response import addinfourl, addclosehook
|
||||||
|
|
||||||
|
@ -128,7 +127,7 @@ __all__ = [
|
||||||
'urlopen', 'install_opener', 'build_opener',
|
'urlopen', 'install_opener', 'build_opener',
|
||||||
'pathname2url', 'url2pathname', 'getproxies',
|
'pathname2url', 'url2pathname', 'getproxies',
|
||||||
# Legacy interface
|
# Legacy interface
|
||||||
'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
|
'urlretrieve', 'urlcleanup',
|
||||||
]
|
]
|
||||||
|
|
||||||
# used in User-Agent header sent
|
# used in User-Agent header sent
|
||||||
|
@ -165,8 +164,7 @@ def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
|
||||||
the reason phrase returned by the server --- instead of the response
|
the reason phrase returned by the server --- instead of the response
|
||||||
headers as it is specified in the documentation for HTTPResponse.
|
headers as it is specified in the documentation for HTTPResponse.
|
||||||
|
|
||||||
For FTP, file, and data URLs and requests explicitly handled by legacy
|
For FTP, file, and data URLs, this function returns a
|
||||||
URLopener and FancyURLopener classes, this function returns a
|
|
||||||
urllib.response.addinfourl object.
|
urllib.response.addinfourl object.
|
||||||
|
|
||||||
Note that None may be returned if no handler handles the request (though
|
Note that None may be returned if no handler handles the request (though
|
||||||
|
@ -940,6 +938,7 @@ class AbstractBasicAuthHandler:
|
||||||
for mo in AbstractBasicAuthHandler.rx.finditer(header):
|
for mo in AbstractBasicAuthHandler.rx.finditer(header):
|
||||||
scheme, quote, realm = mo.groups()
|
scheme, quote, realm = mo.groups()
|
||||||
if quote not in ['"', "'"]:
|
if quote not in ['"', "'"]:
|
||||||
|
import warnings
|
||||||
warnings.warn("Basic Auth Realm was unquoted",
|
warnings.warn("Basic Auth Realm was unquoted",
|
||||||
UserWarning, 3)
|
UserWarning, 3)
|
||||||
|
|
||||||
|
@ -1495,7 +1494,7 @@ class FileHandler(BaseHandler):
|
||||||
origurl = 'file://' + filename
|
origurl = 'file://' + filename
|
||||||
return addinfourl(open(localfile, 'rb'), headers, origurl)
|
return addinfourl(open(localfile, 'rb'), headers, origurl)
|
||||||
except OSError as exp:
|
except OSError as exp:
|
||||||
raise URLError(exp)
|
raise URLError(exp, exp.filename)
|
||||||
raise URLError('file not on local host')
|
raise URLError('file not on local host')
|
||||||
|
|
||||||
def _safe_gethostbyname(host):
|
def _safe_gethostbyname(host):
|
||||||
|
@ -1647,8 +1646,6 @@ class DataHandler(BaseHandler):
|
||||||
|
|
||||||
# Code move from the old urllib module
|
# Code move from the old urllib module
|
||||||
|
|
||||||
MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
|
|
||||||
|
|
||||||
# Helper for non-unix systems
|
# Helper for non-unix systems
|
||||||
if os.name == 'nt':
|
if os.name == 'nt':
|
||||||
from nturl2path import url2pathname, pathname2url
|
from nturl2path import url2pathname, pathname2url
|
||||||
|
@ -1668,678 +1665,6 @@ else:
|
||||||
return quote(pathname)
|
return quote(pathname)
|
||||||
|
|
||||||
|
|
||||||
ftpcache = {}
|
|
||||||
|
|
||||||
|
|
||||||
class URLopener:
|
|
||||||
"""Class to open URLs.
|
|
||||||
This is a class rather than just a subroutine because we may need
|
|
||||||
more than one set of global protocol-specific options.
|
|
||||||
Note -- this is a base class for those who don't want the
|
|
||||||
automatic handling of errors type 302 (relocated) and 401
|
|
||||||
(authorization needed)."""
|
|
||||||
|
|
||||||
__tempfiles = None
|
|
||||||
|
|
||||||
version = "Python-urllib/%s" % __version__
|
|
||||||
|
|
||||||
# Constructor
|
|
||||||
def __init__(self, proxies=None, **x509):
|
|
||||||
msg = "%(class)s style of invoking requests is deprecated. " \
|
|
||||||
"Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
|
|
||||||
warnings.warn(msg, DeprecationWarning, stacklevel=3)
|
|
||||||
if proxies is None:
|
|
||||||
proxies = getproxies()
|
|
||||||
assert hasattr(proxies, 'keys'), "proxies must be a mapping"
|
|
||||||
self.proxies = proxies
|
|
||||||
self.key_file = x509.get('key_file')
|
|
||||||
self.cert_file = x509.get('cert_file')
|
|
||||||
self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')]
|
|
||||||
self.__tempfiles = []
|
|
||||||
self.__unlink = os.unlink # See cleanup()
|
|
||||||
self.tempcache = None
|
|
||||||
# Undocumented feature: if you assign {} to tempcache,
|
|
||||||
# it is used to cache files retrieved with
|
|
||||||
# self.retrieve(). This is not enabled by default
|
|
||||||
# since it does not work for changing documents (and I
|
|
||||||
# haven't got the logic to check expiration headers
|
|
||||||
# yet).
|
|
||||||
self.ftpcache = ftpcache
|
|
||||||
# Undocumented feature: you can use a different
|
|
||||||
# ftp cache by assigning to the .ftpcache member;
|
|
||||||
# in case you want logically independent URL openers
|
|
||||||
# XXX This is not threadsafe. Bah.
|
|
||||||
|
|
||||||
def __del__(self):
|
|
||||||
self.close()
|
|
||||||
|
|
||||||
def close(self):
|
|
||||||
self.cleanup()
|
|
||||||
|
|
||||||
def cleanup(self):
|
|
||||||
# This code sometimes runs when the rest of this module
|
|
||||||
# has already been deleted, so it can't use any globals
|
|
||||||
# or import anything.
|
|
||||||
if self.__tempfiles:
|
|
||||||
for file in self.__tempfiles:
|
|
||||||
try:
|
|
||||||
self.__unlink(file)
|
|
||||||
except OSError:
|
|
||||||
pass
|
|
||||||
del self.__tempfiles[:]
|
|
||||||
if self.tempcache:
|
|
||||||
self.tempcache.clear()
|
|
||||||
|
|
||||||
def addheader(self, *args):
|
|
||||||
"""Add a header to be used by the HTTP interface only
|
|
||||||
e.g. u.addheader('Accept', 'sound/basic')"""
|
|
||||||
self.addheaders.append(args)
|
|
||||||
|
|
||||||
# External interface
|
|
||||||
def open(self, fullurl, data=None):
|
|
||||||
"""Use URLopener().open(file) instead of open(file, 'r')."""
|
|
||||||
fullurl = unwrap(_to_bytes(fullurl))
|
|
||||||
fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
|
|
||||||
if self.tempcache and fullurl in self.tempcache:
|
|
||||||
filename, headers = self.tempcache[fullurl]
|
|
||||||
fp = open(filename, 'rb')
|
|
||||||
return addinfourl(fp, headers, fullurl)
|
|
||||||
urltype, url = _splittype(fullurl)
|
|
||||||
if not urltype:
|
|
||||||
urltype = 'file'
|
|
||||||
if urltype in self.proxies:
|
|
||||||
proxy = self.proxies[urltype]
|
|
||||||
urltype, proxyhost = _splittype(proxy)
|
|
||||||
host, selector = _splithost(proxyhost)
|
|
||||||
url = (host, fullurl) # Signal special case to open_*()
|
|
||||||
else:
|
|
||||||
proxy = None
|
|
||||||
name = 'open_' + urltype
|
|
||||||
self.type = urltype
|
|
||||||
name = name.replace('-', '_')
|
|
||||||
if not hasattr(self, name) or name == 'open_local_file':
|
|
||||||
if proxy:
|
|
||||||
return self.open_unknown_proxy(proxy, fullurl, data)
|
|
||||||
else:
|
|
||||||
return self.open_unknown(fullurl, data)
|
|
||||||
try:
|
|
||||||
if data is None:
|
|
||||||
return getattr(self, name)(url)
|
|
||||||
else:
|
|
||||||
return getattr(self, name)(url, data)
|
|
||||||
except (HTTPError, URLError):
|
|
||||||
raise
|
|
||||||
except OSError as msg:
|
|
||||||
raise OSError('socket error', msg) from msg
|
|
||||||
|
|
||||||
def open_unknown(self, fullurl, data=None):
|
|
||||||
"""Overridable interface to open unknown URL type."""
|
|
||||||
type, url = _splittype(fullurl)
|
|
||||||
raise OSError('url error', 'unknown url type', type)
|
|
||||||
|
|
||||||
def open_unknown_proxy(self, proxy, fullurl, data=None):
|
|
||||||
"""Overridable interface to open unknown URL type."""
|
|
||||||
type, url = _splittype(fullurl)
|
|
||||||
raise OSError('url error', 'invalid proxy for %s' % type, proxy)
|
|
||||||
|
|
||||||
# External interface
|
|
||||||
def retrieve(self, url, filename=None, reporthook=None, data=None):
|
|
||||||
"""retrieve(url) returns (filename, headers) for a local object
|
|
||||||
or (tempfilename, headers) for a remote object."""
|
|
||||||
url = unwrap(_to_bytes(url))
|
|
||||||
if self.tempcache and url in self.tempcache:
|
|
||||||
return self.tempcache[url]
|
|
||||||
type, url1 = _splittype(url)
|
|
||||||
if filename is None and (not type or type == 'file'):
|
|
||||||
try:
|
|
||||||
fp = self.open_local_file(url1)
|
|
||||||
hdrs = fp.info()
|
|
||||||
fp.close()
|
|
||||||
return url2pathname(_splithost(url1)[1]), hdrs
|
|
||||||
except OSError:
|
|
||||||
pass
|
|
||||||
fp = self.open(url, data)
|
|
||||||
try:
|
|
||||||
headers = fp.info()
|
|
||||||
if filename:
|
|
||||||
tfp = open(filename, 'wb')
|
|
||||||
else:
|
|
||||||
garbage, path = _splittype(url)
|
|
||||||
garbage, path = _splithost(path or "")
|
|
||||||
path, garbage = _splitquery(path or "")
|
|
||||||
path, garbage = _splitattr(path or "")
|
|
||||||
suffix = os.path.splitext(path)[1]
|
|
||||||
(fd, filename) = tempfile.mkstemp(suffix)
|
|
||||||
self.__tempfiles.append(filename)
|
|
||||||
tfp = os.fdopen(fd, 'wb')
|
|
||||||
try:
|
|
||||||
result = filename, headers
|
|
||||||
if self.tempcache is not None:
|
|
||||||
self.tempcache[url] = result
|
|
||||||
bs = 1024*8
|
|
||||||
size = -1
|
|
||||||
read = 0
|
|
||||||
blocknum = 0
|
|
||||||
if "content-length" in headers:
|
|
||||||
size = int(headers["Content-Length"])
|
|
||||||
if reporthook:
|
|
||||||
reporthook(blocknum, bs, size)
|
|
||||||
while block := fp.read(bs):
|
|
||||||
read += len(block)
|
|
||||||
tfp.write(block)
|
|
||||||
blocknum += 1
|
|
||||||
if reporthook:
|
|
||||||
reporthook(blocknum, bs, size)
|
|
||||||
finally:
|
|
||||||
tfp.close()
|
|
||||||
finally:
|
|
||||||
fp.close()
|
|
||||||
|
|
||||||
# raise exception if actual size does not match content-length header
|
|
||||||
if size >= 0 and read < size:
|
|
||||||
raise ContentTooShortError(
|
|
||||||
"retrieval incomplete: got only %i out of %i bytes"
|
|
||||||
% (read, size), result)
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
# Each method named open_<type> knows how to open that type of URL
|
|
||||||
|
|
||||||
def _open_generic_http(self, connection_factory, url, data):
|
|
||||||
"""Make an HTTP connection using connection_class.
|
|
||||||
|
|
||||||
This is an internal method that should be called from
|
|
||||||
open_http() or open_https().
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
- connection_factory should take a host name and return an
|
|
||||||
HTTPConnection instance.
|
|
||||||
- url is the url to retrieval or a host, relative-path pair.
|
|
||||||
- data is payload for a POST request or None.
|
|
||||||
"""
|
|
||||||
|
|
||||||
user_passwd = None
|
|
||||||
proxy_passwd= None
|
|
||||||
if isinstance(url, str):
|
|
||||||
host, selector = _splithost(url)
|
|
||||||
if host:
|
|
||||||
user_passwd, host = _splituser(host)
|
|
||||||
host = unquote(host)
|
|
||||||
realhost = host
|
|
||||||
else:
|
|
||||||
host, selector = url
|
|
||||||
# check whether the proxy contains authorization information
|
|
||||||
proxy_passwd, host = _splituser(host)
|
|
||||||
# now we proceed with the url we want to obtain
|
|
||||||
urltype, rest = _splittype(selector)
|
|
||||||
url = rest
|
|
||||||
user_passwd = None
|
|
||||||
if urltype.lower() != 'http':
|
|
||||||
realhost = None
|
|
||||||
else:
|
|
||||||
realhost, rest = _splithost(rest)
|
|
||||||
if realhost:
|
|
||||||
user_passwd, realhost = _splituser(realhost)
|
|
||||||
if user_passwd:
|
|
||||||
selector = "%s://%s%s" % (urltype, realhost, rest)
|
|
||||||
if proxy_bypass(realhost):
|
|
||||||
host = realhost
|
|
||||||
|
|
||||||
if not host: raise OSError('http error', 'no host given')
|
|
||||||
|
|
||||||
if proxy_passwd:
|
|
||||||
proxy_passwd = unquote(proxy_passwd)
|
|
||||||
proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
|
|
||||||
else:
|
|
||||||
proxy_auth = None
|
|
||||||
|
|
||||||
if user_passwd:
|
|
||||||
user_passwd = unquote(user_passwd)
|
|
||||||
auth = base64.b64encode(user_passwd.encode()).decode('ascii')
|
|
||||||
else:
|
|
||||||
auth = None
|
|
||||||
http_conn = connection_factory(host)
|
|
||||||
headers = {}
|
|
||||||
if proxy_auth:
|
|
||||||
headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
|
|
||||||
if auth:
|
|
||||||
headers["Authorization"] = "Basic %s" % auth
|
|
||||||
if realhost:
|
|
||||||
headers["Host"] = realhost
|
|
||||||
|
|
||||||
# Add Connection:close as we don't support persistent connections yet.
|
|
||||||
# This helps in closing the socket and avoiding ResourceWarning
|
|
||||||
|
|
||||||
headers["Connection"] = "close"
|
|
||||||
|
|
||||||
for header, value in self.addheaders:
|
|
||||||
headers[header] = value
|
|
||||||
|
|
||||||
if data is not None:
|
|
||||||
headers["Content-Type"] = "application/x-www-form-urlencoded"
|
|
||||||
http_conn.request("POST", selector, data, headers)
|
|
||||||
else:
|
|
||||||
http_conn.request("GET", selector, headers=headers)
|
|
||||||
|
|
||||||
try:
|
|
||||||
response = http_conn.getresponse()
|
|
||||||
except http.client.BadStatusLine:
|
|
||||||
# something went wrong with the HTTP status line
|
|
||||||
raise URLError("http protocol error: bad status line")
|
|
||||||
|
|
||||||
# According to RFC 2616, "2xx" code indicates that the client's
|
|
||||||
# request was successfully received, understood, and accepted.
|
|
||||||
if 200 <= response.status < 300:
|
|
||||||
return addinfourl(response, response.msg, "http:" + url,
|
|
||||||
response.status)
|
|
||||||
else:
|
|
||||||
return self.http_error(
|
|
||||||
url, response.fp,
|
|
||||||
response.status, response.reason, response.msg, data)
|
|
||||||
|
|
||||||
def open_http(self, url, data=None):
|
|
||||||
"""Use HTTP protocol."""
|
|
||||||
return self._open_generic_http(http.client.HTTPConnection, url, data)
|
|
||||||
|
|
||||||
def http_error(self, url, fp, errcode, errmsg, headers, data=None):
|
|
||||||
"""Handle http errors.
|
|
||||||
|
|
||||||
Derived class can override this, or provide specific handlers
|
|
||||||
named http_error_DDD where DDD is the 3-digit error code."""
|
|
||||||
# First check if there's a specific handler for this error
|
|
||||||
name = 'http_error_%d' % errcode
|
|
||||||
if hasattr(self, name):
|
|
||||||
method = getattr(self, name)
|
|
||||||
if data is None:
|
|
||||||
result = method(url, fp, errcode, errmsg, headers)
|
|
||||||
else:
|
|
||||||
result = method(url, fp, errcode, errmsg, headers, data)
|
|
||||||
if result: return result
|
|
||||||
return self.http_error_default(url, fp, errcode, errmsg, headers)
|
|
||||||
|
|
||||||
def http_error_default(self, url, fp, errcode, errmsg, headers):
|
|
||||||
"""Default error handler: close the connection and raise OSError."""
|
|
||||||
fp.close()
|
|
||||||
raise HTTPError(url, errcode, errmsg, headers, None)
|
|
||||||
|
|
||||||
if _have_ssl:
|
|
||||||
def _https_connection(self, host):
|
|
||||||
if self.key_file or self.cert_file:
|
|
||||||
http_version = http.client.HTTPSConnection._http_vsn
|
|
||||||
context = http.client._create_https_context(http_version)
|
|
||||||
context.load_cert_chain(self.cert_file, self.key_file)
|
|
||||||
# cert and key file means the user wants to authenticate.
|
|
||||||
# enable TLS 1.3 PHA implicitly even for custom contexts.
|
|
||||||
if context.post_handshake_auth is not None:
|
|
||||||
context.post_handshake_auth = True
|
|
||||||
else:
|
|
||||||
context = None
|
|
||||||
return http.client.HTTPSConnection(host, context=context)
|
|
||||||
|
|
||||||
def open_https(self, url, data=None):
|
|
||||||
"""Use HTTPS protocol."""
|
|
||||||
return self._open_generic_http(self._https_connection, url, data)
|
|
||||||
|
|
||||||
def open_file(self, url):
|
|
||||||
"""Use local file or FTP depending on form of URL."""
|
|
||||||
if not isinstance(url, str):
|
|
||||||
raise URLError('file error: proxy support for file protocol currently not implemented')
|
|
||||||
if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
|
|
||||||
raise ValueError("file:// scheme is supported only on localhost")
|
|
||||||
else:
|
|
||||||
return self.open_local_file(url)
|
|
||||||
|
|
||||||
def open_local_file(self, url):
|
|
||||||
"""Use local file."""
|
|
||||||
import email.utils
|
|
||||||
import mimetypes
|
|
||||||
host, file = _splithost(url)
|
|
||||||
localname = url2pathname(file)
|
|
||||||
try:
|
|
||||||
stats = os.stat(localname)
|
|
||||||
except OSError as e:
|
|
||||||
raise URLError(e.strerror, e.filename)
|
|
||||||
size = stats.st_size
|
|
||||||
modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
|
|
||||||
mtype = mimetypes.guess_type(url)[0]
|
|
||||||
headers = email.message_from_string(
|
|
||||||
'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
|
|
||||||
(mtype or 'text/plain', size, modified))
|
|
||||||
if not host:
|
|
||||||
urlfile = file
|
|
||||||
if file[:1] == '/':
|
|
||||||
urlfile = 'file://' + file
|
|
||||||
return addinfourl(open(localname, 'rb'), headers, urlfile)
|
|
||||||
host, port = _splitport(host)
|
|
||||||
if (not port
|
|
||||||
and socket.gethostbyname(host) in ((localhost(),) + thishost())):
|
|
||||||
urlfile = file
|
|
||||||
if file[:1] == '/':
|
|
||||||
urlfile = 'file://' + file
|
|
||||||
elif file[:2] == './':
|
|
||||||
raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
|
|
||||||
return addinfourl(open(localname, 'rb'), headers, urlfile)
|
|
||||||
raise URLError('local file error: not on local host')
|
|
||||||
|
|
||||||
def open_ftp(self, url):
|
|
||||||
"""Use FTP protocol."""
|
|
||||||
if not isinstance(url, str):
|
|
||||||
raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
|
|
||||||
import mimetypes
|
|
||||||
host, path = _splithost(url)
|
|
||||||
if not host: raise URLError('ftp error: no host given')
|
|
||||||
host, port = _splitport(host)
|
|
||||||
user, host = _splituser(host)
|
|
||||||
if user: user, passwd = _splitpasswd(user)
|
|
||||||
else: passwd = None
|
|
||||||
host = unquote(host)
|
|
||||||
user = unquote(user or '')
|
|
||||||
passwd = unquote(passwd or '')
|
|
||||||
host = socket.gethostbyname(host)
|
|
||||||
if not port:
|
|
||||||
import ftplib
|
|
||||||
port = ftplib.FTP_PORT
|
|
||||||
else:
|
|
||||||
port = int(port)
|
|
||||||
path, attrs = _splitattr(path)
|
|
||||||
path = unquote(path)
|
|
||||||
dirs = path.split('/')
|
|
||||||
dirs, file = dirs[:-1], dirs[-1]
|
|
||||||
if dirs and not dirs[0]: dirs = dirs[1:]
|
|
||||||
if dirs and not dirs[0]: dirs[0] = '/'
|
|
||||||
key = user, host, port, '/'.join(dirs)
|
|
||||||
# XXX thread unsafe!
|
|
||||||
if len(self.ftpcache) > MAXFTPCACHE:
|
|
||||||
# Prune the cache, rather arbitrarily
|
|
||||||
for k in list(self.ftpcache):
|
|
||||||
if k != key:
|
|
||||||
v = self.ftpcache[k]
|
|
||||||
del self.ftpcache[k]
|
|
||||||
v.close()
|
|
||||||
try:
|
|
||||||
if key not in self.ftpcache:
|
|
||||||
self.ftpcache[key] = \
|
|
||||||
ftpwrapper(user, passwd, host, port, dirs)
|
|
||||||
if not file: type = 'D'
|
|
||||||
else: type = 'I'
|
|
||||||
for attr in attrs:
|
|
||||||
attr, value = _splitvalue(attr)
|
|
||||||
if attr.lower() == 'type' and \
|
|
||||||
value in ('a', 'A', 'i', 'I', 'd', 'D'):
|
|
||||||
type = value.upper()
|
|
||||||
(fp, retrlen) = self.ftpcache[key].retrfile(file, type)
|
|
||||||
mtype = mimetypes.guess_type("ftp:" + url)[0]
|
|
||||||
headers = ""
|
|
||||||
if mtype:
|
|
||||||
headers += "Content-Type: %s\n" % mtype
|
|
||||||
if retrlen is not None and retrlen >= 0:
|
|
||||||
headers += "Content-Length: %d\n" % retrlen
|
|
||||||
headers = email.message_from_string(headers)
|
|
||||||
return addinfourl(fp, headers, "ftp:" + url)
|
|
||||||
except ftperrors() as exp:
|
|
||||||
raise URLError(f'ftp error: {exp}') from exp
|
|
||||||
|
|
||||||
def open_data(self, url, data=None):
|
|
||||||
"""Use "data" URL."""
|
|
||||||
if not isinstance(url, str):
|
|
||||||
raise URLError('data error: proxy support for data protocol currently not implemented')
|
|
||||||
# ignore POSTed data
|
|
||||||
#
|
|
||||||
# syntax of data URLs:
|
|
||||||
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
|
|
||||||
# mediatype := [ type "/" subtype ] *( ";" parameter )
|
|
||||||
# data := *urlchar
|
|
||||||
# parameter := attribute "=" value
|
|
||||||
try:
|
|
||||||
[type, data] = url.split(',', 1)
|
|
||||||
except ValueError:
|
|
||||||
raise OSError('data error', 'bad data URL')
|
|
||||||
if not type:
|
|
||||||
type = 'text/plain;charset=US-ASCII'
|
|
||||||
semi = type.rfind(';')
|
|
||||||
if semi >= 0 and '=' not in type[semi:]:
|
|
||||||
encoding = type[semi+1:]
|
|
||||||
type = type[:semi]
|
|
||||||
else:
|
|
||||||
encoding = ''
|
|
||||||
msg = []
|
|
||||||
msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
|
|
||||||
time.gmtime(time.time())))
|
|
||||||
msg.append('Content-type: %s' % type)
|
|
||||||
if encoding == 'base64':
|
|
||||||
# XXX is this encoding/decoding ok?
|
|
||||||
data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
|
|
||||||
else:
|
|
||||||
data = unquote(data)
|
|
||||||
msg.append('Content-Length: %d' % len(data))
|
|
||||||
msg.append('')
|
|
||||||
msg.append(data)
|
|
||||||
msg = '\n'.join(msg)
|
|
||||||
headers = email.message_from_string(msg)
|
|
||||||
f = io.StringIO(msg)
|
|
||||||
#f.fileno = None # needed for addinfourl
|
|
||||||
return addinfourl(f, headers, url)
|
|
||||||
|
|
||||||
|
|
||||||
class FancyURLopener(URLopener):
|
|
||||||
"""Derived class with handlers for errors we can handle (perhaps)."""
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
URLopener.__init__(self, *args, **kwargs)
|
|
||||||
self.auth_cache = {}
|
|
||||||
self.tries = 0
|
|
||||||
self.maxtries = 10
|
|
||||||
|
|
||||||
def http_error_default(self, url, fp, errcode, errmsg, headers):
|
|
||||||
"""Default error handling -- don't raise an exception."""
|
|
||||||
return addinfourl(fp, headers, "http:" + url, errcode)
|
|
||||||
|
|
||||||
def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
|
|
||||||
"""Error 302 -- relocated (temporarily)."""
|
|
||||||
self.tries += 1
|
|
||||||
try:
|
|
||||||
if self.maxtries and self.tries >= self.maxtries:
|
|
||||||
if hasattr(self, "http_error_500"):
|
|
||||||
meth = self.http_error_500
|
|
||||||
else:
|
|
||||||
meth = self.http_error_default
|
|
||||||
return meth(url, fp, 500,
|
|
||||||
"Internal Server Error: Redirect Recursion",
|
|
||||||
headers)
|
|
||||||
result = self.redirect_internal(url, fp, errcode, errmsg,
|
|
||||||
headers, data)
|
|
||||||
return result
|
|
||||||
finally:
|
|
||||||
self.tries = 0
|
|
||||||
|
|
||||||
def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
|
|
||||||
if 'location' in headers:
|
|
||||||
newurl = headers['location']
|
|
||||||
elif 'uri' in headers:
|
|
||||||
newurl = headers['uri']
|
|
||||||
else:
|
|
||||||
return
|
|
||||||
fp.close()
|
|
||||||
|
|
||||||
# In case the server sent a relative URL, join with original:
|
|
||||||
newurl = urljoin(self.type + ":" + url, newurl)
|
|
||||||
|
|
||||||
urlparts = urlparse(newurl)
|
|
||||||
|
|
||||||
# For security reasons, we don't allow redirection to anything other
|
|
||||||
# than http, https and ftp.
|
|
||||||
|
|
||||||
# We are using newer HTTPError with older redirect_internal method
|
|
||||||
# This older method will get deprecated in 3.3
|
|
||||||
|
|
||||||
if urlparts.scheme not in ('http', 'https', 'ftp', ''):
|
|
||||||
raise HTTPError(newurl, errcode,
|
|
||||||
errmsg +
|
|
||||||
" Redirection to url '%s' is not allowed." % newurl,
|
|
||||||
headers, fp)
|
|
||||||
|
|
||||||
return self.open(newurl)
|
|
||||||
|
|
||||||
def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
|
|
||||||
"""Error 301 -- also relocated (permanently)."""
|
|
||||||
return self.http_error_302(url, fp, errcode, errmsg, headers, data)
|
|
||||||
|
|
||||||
def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
|
|
||||||
"""Error 303 -- also relocated (essentially identical to 302)."""
|
|
||||||
return self.http_error_302(url, fp, errcode, errmsg, headers, data)
|
|
||||||
|
|
||||||
def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
|
|
||||||
"""Error 307 -- relocated, but turn POST into error."""
|
|
||||||
if data is None:
|
|
||||||
return self.http_error_302(url, fp, errcode, errmsg, headers, data)
|
|
||||||
else:
|
|
||||||
return self.http_error_default(url, fp, errcode, errmsg, headers)
|
|
||||||
|
|
||||||
def http_error_308(self, url, fp, errcode, errmsg, headers, data=None):
|
|
||||||
"""Error 308 -- relocated, but turn POST into error."""
|
|
||||||
if data is None:
|
|
||||||
return self.http_error_301(url, fp, errcode, errmsg, headers, data)
|
|
||||||
else:
|
|
||||||
return self.http_error_default(url, fp, errcode, errmsg, headers)
|
|
||||||
|
|
||||||
def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
|
|
||||||
retry=False):
|
|
||||||
"""Error 401 -- authentication required.
|
|
||||||
This function supports Basic authentication only."""
|
|
||||||
if 'www-authenticate' not in headers:
|
|
||||||
URLopener.http_error_default(self, url, fp,
|
|
||||||
errcode, errmsg, headers)
|
|
||||||
stuff = headers['www-authenticate']
|
|
||||||
match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
|
|
||||||
if not match:
|
|
||||||
URLopener.http_error_default(self, url, fp,
|
|
||||||
errcode, errmsg, headers)
|
|
||||||
scheme, realm = match.groups()
|
|
||||||
if scheme.lower() != 'basic':
|
|
||||||
URLopener.http_error_default(self, url, fp,
|
|
||||||
errcode, errmsg, headers)
|
|
||||||
if not retry:
|
|
||||||
URLopener.http_error_default(self, url, fp, errcode, errmsg,
|
|
||||||
headers)
|
|
||||||
name = 'retry_' + self.type + '_basic_auth'
|
|
||||||
if data is None:
|
|
||||||
return getattr(self,name)(url, realm)
|
|
||||||
else:
|
|
||||||
return getattr(self,name)(url, realm, data)
|
|
||||||
|
|
||||||
def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
|
|
||||||
retry=False):
|
|
||||||
"""Error 407 -- proxy authentication required.
|
|
||||||
This function supports Basic authentication only."""
|
|
||||||
if 'proxy-authenticate' not in headers:
|
|
||||||
URLopener.http_error_default(self, url, fp,
|
|
||||||
errcode, errmsg, headers)
|
|
||||||
stuff = headers['proxy-authenticate']
|
|
||||||
match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
|
|
||||||
if not match:
|
|
||||||
URLopener.http_error_default(self, url, fp,
|
|
||||||
errcode, errmsg, headers)
|
|
||||||
scheme, realm = match.groups()
|
|
||||||
if scheme.lower() != 'basic':
|
|
||||||
URLopener.http_error_default(self, url, fp,
|
|
||||||
errcode, errmsg, headers)
|
|
||||||
if not retry:
|
|
||||||
URLopener.http_error_default(self, url, fp, errcode, errmsg,
|
|
||||||
headers)
|
|
||||||
name = 'retry_proxy_' + self.type + '_basic_auth'
|
|
||||||
if data is None:
|
|
||||||
return getattr(self,name)(url, realm)
|
|
||||||
else:
|
|
||||||
return getattr(self,name)(url, realm, data)
|
|
||||||
|
|
||||||
def retry_proxy_http_basic_auth(self, url, realm, data=None):
|
|
||||||
host, selector = _splithost(url)
|
|
||||||
newurl = 'http://' + host + selector
|
|
||||||
proxy = self.proxies['http']
|
|
||||||
urltype, proxyhost = _splittype(proxy)
|
|
||||||
proxyhost, proxyselector = _splithost(proxyhost)
|
|
||||||
i = proxyhost.find('@') + 1
|
|
||||||
proxyhost = proxyhost[i:]
|
|
||||||
user, passwd = self.get_user_passwd(proxyhost, realm, i)
|
|
||||||
if not (user or passwd): return None
|
|
||||||
proxyhost = "%s:%s@%s" % (quote(user, safe=''),
|
|
||||||
quote(passwd, safe=''), proxyhost)
|
|
||||||
self.proxies['http'] = 'http://' + proxyhost + proxyselector
|
|
||||||
if data is None:
|
|
||||||
return self.open(newurl)
|
|
||||||
else:
|
|
||||||
return self.open(newurl, data)
|
|
||||||
|
|
||||||
def retry_proxy_https_basic_auth(self, url, realm, data=None):
|
|
||||||
host, selector = _splithost(url)
|
|
||||||
newurl = 'https://' + host + selector
|
|
||||||
proxy = self.proxies['https']
|
|
||||||
urltype, proxyhost = _splittype(proxy)
|
|
||||||
proxyhost, proxyselector = _splithost(proxyhost)
|
|
||||||
i = proxyhost.find('@') + 1
|
|
||||||
proxyhost = proxyhost[i:]
|
|
||||||
user, passwd = self.get_user_passwd(proxyhost, realm, i)
|
|
||||||
if not (user or passwd): return None
|
|
||||||
proxyhost = "%s:%s@%s" % (quote(user, safe=''),
|
|
||||||
quote(passwd, safe=''), proxyhost)
|
|
||||||
self.proxies['https'] = 'https://' + proxyhost + proxyselector
|
|
||||||
if data is None:
|
|
||||||
return self.open(newurl)
|
|
||||||
else:
|
|
||||||
return self.open(newurl, data)
|
|
||||||
|
|
||||||
def retry_http_basic_auth(self, url, realm, data=None):
|
|
||||||
host, selector = _splithost(url)
|
|
||||||
i = host.find('@') + 1
|
|
||||||
host = host[i:]
|
|
||||||
user, passwd = self.get_user_passwd(host, realm, i)
|
|
||||||
if not (user or passwd): return None
|
|
||||||
host = "%s:%s@%s" % (quote(user, safe=''),
|
|
||||||
quote(passwd, safe=''), host)
|
|
||||||
newurl = 'http://' + host + selector
|
|
||||||
if data is None:
|
|
||||||
return self.open(newurl)
|
|
||||||
else:
|
|
||||||
return self.open(newurl, data)
|
|
||||||
|
|
||||||
def retry_https_basic_auth(self, url, realm, data=None):
|
|
||||||
host, selector = _splithost(url)
|
|
||||||
i = host.find('@') + 1
|
|
||||||
host = host[i:]
|
|
||||||
user, passwd = self.get_user_passwd(host, realm, i)
|
|
||||||
if not (user or passwd): return None
|
|
||||||
host = "%s:%s@%s" % (quote(user, safe=''),
|
|
||||||
quote(passwd, safe=''), host)
|
|
||||||
newurl = 'https://' + host + selector
|
|
||||||
if data is None:
|
|
||||||
return self.open(newurl)
|
|
||||||
else:
|
|
||||||
return self.open(newurl, data)
|
|
||||||
|
|
||||||
def get_user_passwd(self, host, realm, clear_cache=0):
|
|
||||||
key = realm + '@' + host.lower()
|
|
||||||
if key in self.auth_cache:
|
|
||||||
if clear_cache:
|
|
||||||
del self.auth_cache[key]
|
|
||||||
else:
|
|
||||||
return self.auth_cache[key]
|
|
||||||
user, passwd = self.prompt_user_passwd(host, realm)
|
|
||||||
if user or passwd: self.auth_cache[key] = (user, passwd)
|
|
||||||
return user, passwd
|
|
||||||
|
|
||||||
def prompt_user_passwd(self, host, realm):
|
|
||||||
"""Override this in a GUI environment!"""
|
|
||||||
import getpass
|
|
||||||
try:
|
|
||||||
user = input("Enter username for %s at %s: " % (realm, host))
|
|
||||||
passwd = getpass.getpass("Enter password for %s in %s at %s: " %
|
|
||||||
(user, realm, host))
|
|
||||||
return user, passwd
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print()
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
|
|
||||||
# Utility functions
|
# Utility functions
|
||||||
|
|
||||||
_localhost = None
|
_localhost = None
|
||||||
|
@ -2485,9 +1810,7 @@ def getproxies_environment():
|
||||||
"""Return a dictionary of scheme -> proxy server URL mappings.
|
"""Return a dictionary of scheme -> proxy server URL mappings.
|
||||||
|
|
||||||
Scan the environment for variables named <scheme>_proxy;
|
Scan the environment for variables named <scheme>_proxy;
|
||||||
this seems to be the standard convention. If you need a
|
this seems to be the standard convention.
|
||||||
different way, you can pass a proxies dictionary to the
|
|
||||||
[Fancy]URLopener constructor.
|
|
||||||
"""
|
"""
|
||||||
# in order to prefer lowercase variables, process environment in
|
# in order to prefer lowercase variables, process environment in
|
||||||
# two passes: first matches any, second pass matches lowercase only
|
# two passes: first matches any, second pass matches lowercase only
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
Remove :class:`!URLopener` and :class:`!FancyURLopener` classes from
|
||||||
|
:mod:`urllib.request`. They had previously raised :exc:`DeprecationWarning`
|
||||||
|
since Python 3.3.
|
Loading…
Reference in New Issue