2004-08-07 14:40:50 -03:00
|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
|
|
import unittest
|
|
|
|
from test import test_support
|
2006-05-03 02:15:10 -03:00
|
|
|
from test.test_urllib2 import sanepathname2url
|
2004-08-07 14:40:50 -03:00
|
|
|
|
|
|
|
import socket
|
|
|
|
import urllib2
|
|
|
|
import os
|
2010-04-20 03:54:59 -03:00
|
|
|
import sys
|
|
|
|
|
|
|
|
TIMEOUT = 60 # seconds
|
2004-08-07 14:40:50 -03:00
|
|
|
|
2008-01-25 02:37:23 -04:00
|
|
|
|
2008-06-07 10:36:36 -03:00
|
|
|
def _retry_thrice(func, exc, *args, **kwargs):
|
2008-01-25 02:37:23 -04:00
|
|
|
for i in range(3):
|
|
|
|
try:
|
2008-06-07 10:36:36 -03:00
|
|
|
return func(*args, **kwargs)
|
|
|
|
except exc, last_exc:
|
2008-01-25 02:37:23 -04:00
|
|
|
continue
|
|
|
|
except:
|
|
|
|
raise
|
|
|
|
raise last_exc
|
|
|
|
|
2008-06-07 10:36:36 -03:00
|
|
|
def _wrap_with_retry_thrice(func, exc):
|
|
|
|
def wrapped(*args, **kwargs):
|
|
|
|
return _retry_thrice(func, exc, *args, **kwargs)
|
|
|
|
return wrapped
|
|
|
|
|
|
|
|
# Connecting to remote hosts is flaky. Make it more robust by retrying
|
|
|
|
# the connection several times.
|
|
|
|
_urlopen_with_retry = _wrap_with_retry_thrice(urllib2.urlopen, urllib2.URLError)
|
2008-01-25 02:37:23 -04:00
|
|
|
|
2006-04-30 04:06:11 -03:00
|
|
|
|
|
|
|
class AuthTests(unittest.TestCase):
|
|
|
|
"""Tests urllib2 authentication features."""
|
|
|
|
|
|
|
|
## Disabled at the moment since there is no page under python.org which
|
|
|
|
## could be used to HTTP authentication.
|
|
|
|
#
|
|
|
|
# def test_basic_auth(self):
|
|
|
|
# import httplib
|
|
|
|
#
|
|
|
|
# test_url = "http://www.python.org/test/test_urllib2/basic_auth"
|
|
|
|
# test_hostport = "www.python.org"
|
|
|
|
# test_realm = 'Test Realm'
|
|
|
|
# test_user = 'test.test_urllib2net'
|
|
|
|
# test_password = 'blah'
|
|
|
|
#
|
|
|
|
# # failure
|
|
|
|
# try:
|
2008-01-25 02:37:23 -04:00
|
|
|
# _urlopen_with_retry(test_url)
|
2006-04-30 04:06:11 -03:00
|
|
|
# except urllib2.HTTPError, exc:
|
|
|
|
# self.assertEqual(exc.code, 401)
|
|
|
|
# else:
|
|
|
|
# self.fail("urlopen() should have failed with 401")
|
|
|
|
#
|
|
|
|
# # success
|
|
|
|
# auth_handler = urllib2.HTTPBasicAuthHandler()
|
|
|
|
# auth_handler.add_password(test_realm, test_hostport,
|
|
|
|
# test_user, test_password)
|
|
|
|
# opener = urllib2.build_opener(auth_handler)
|
|
|
|
# f = opener.open('http://localhost/')
|
2008-01-25 02:37:23 -04:00
|
|
|
# response = _urlopen_with_retry("http://www.python.org/")
|
2006-04-30 04:06:11 -03:00
|
|
|
#
|
|
|
|
# # The 'userinfo' URL component is deprecated by RFC 3986 for security
|
|
|
|
# # reasons, let's not implement it! (it's already implemented for proxy
|
|
|
|
# # specification strings (that is, URLs or authorities specifying a
|
|
|
|
# # proxy), so we must keep that)
|
|
|
|
# self.assertRaises(httplib.InvalidURL,
|
|
|
|
# urllib2.urlopen, "http://evil:thing@example.com")
|
|
|
|
|
|
|
|
|
2007-01-21 06:35:10 -04:00
|
|
|
class CloseSocketTest(unittest.TestCase):
|
|
|
|
|
|
|
|
def test_close(self):
|
2010-02-07 13:03:15 -04:00
|
|
|
import httplib
|
2007-01-21 06:35:10 -04:00
|
|
|
|
|
|
|
# calling .close() on urllib2's response objects should close the
|
|
|
|
# underlying socket
|
|
|
|
|
|
|
|
# delve deep into response to fetch socket._socketobject
|
2008-01-25 02:37:23 -04:00
|
|
|
response = _urlopen_with_retry("http://www.python.org/")
|
2007-01-21 06:35:10 -04:00
|
|
|
abused_fileobject = response.fp
|
2009-06-30 19:57:08 -03:00
|
|
|
self.assertTrue(abused_fileobject.__class__ is socket._fileobject)
|
2007-01-21 06:35:10 -04:00
|
|
|
httpresponse = abused_fileobject._sock
|
2009-06-30 19:57:08 -03:00
|
|
|
self.assertTrue(httpresponse.__class__ is httplib.HTTPResponse)
|
2007-01-21 06:35:10 -04:00
|
|
|
fileobject = httpresponse.fp
|
2009-06-30 19:57:08 -03:00
|
|
|
self.assertTrue(fileobject.__class__ is socket._fileobject)
|
2007-01-21 06:35:10 -04:00
|
|
|
|
2009-06-30 19:57:08 -03:00
|
|
|
self.assertTrue(not fileobject.closed)
|
2007-01-21 06:35:10 -04:00
|
|
|
response.close()
|
2009-06-30 19:57:08 -03:00
|
|
|
self.assertTrue(fileobject.closed)
|
2007-01-21 06:35:10 -04:00
|
|
|
|
2006-05-03 02:15:10 -03:00
|
|
|
class OtherNetworkTests(unittest.TestCase):
|
|
|
|
def setUp(self):
|
|
|
|
if 0: # for debugging
|
|
|
|
import logging
|
|
|
|
logger = logging.getLogger("test_urllib2net")
|
|
|
|
logger.addHandler(logging.StreamHandler())
|
|
|
|
|
|
|
|
# XXX The rest of these tests aren't very good -- they don't check much.
|
|
|
|
# They do sometimes catch some major disasters, though.
|
|
|
|
|
|
|
|
def test_ftp(self):
|
|
|
|
urls = [
|
2007-09-09 20:36:46 -03:00
|
|
|
'ftp://ftp.kernel.org/pub/linux/kernel/README',
|
2009-02-21 16:27:01 -04:00
|
|
|
'ftp://ftp.kernel.org/pub/linux/kernel/non-existent-file',
|
2007-09-09 20:36:46 -03:00
|
|
|
#'ftp://ftp.kernel.org/pub/leenox/kernel/test',
|
2006-05-03 02:15:10 -03:00
|
|
|
'ftp://gatekeeper.research.compaq.com/pub/DEC/SRC'
|
|
|
|
'/research-reports/00README-Legal-Rules-Regs',
|
|
|
|
]
|
|
|
|
self._test_urls(urls, self._extra_handlers())
|
|
|
|
|
|
|
|
def test_file(self):
|
|
|
|
TESTFN = test_support.TESTFN
|
|
|
|
f = open(TESTFN, 'w')
|
|
|
|
try:
|
|
|
|
f.write('hi there\n')
|
|
|
|
f.close()
|
|
|
|
urls = [
|
|
|
|
'file:'+sanepathname2url(os.path.abspath(TESTFN)),
|
2007-09-09 20:36:46 -03:00
|
|
|
('file:///nonsensename/etc/passwd', None, urllib2.URLError),
|
2006-05-03 02:15:10 -03:00
|
|
|
]
|
2008-06-07 10:36:36 -03:00
|
|
|
self._test_urls(urls, self._extra_handlers(), retry=True)
|
2006-05-03 02:15:10 -03:00
|
|
|
finally:
|
|
|
|
os.remove(TESTFN)
|
|
|
|
|
|
|
|
# XXX Following test depends on machine configurations that are internal
|
|
|
|
# to CNRI. Need to set up a public server with the right authentication
|
|
|
|
# configuration for test purposes.
|
|
|
|
|
|
|
|
## def test_cnri(self):
|
|
|
|
## if socket.gethostname() == 'bitdiddle':
|
|
|
|
## localhost = 'bitdiddle.cnri.reston.va.us'
|
|
|
|
## elif socket.gethostname() == 'bitdiddle.concentric.net':
|
|
|
|
## localhost = 'localhost'
|
|
|
|
## else:
|
|
|
|
## localhost = None
|
|
|
|
## if localhost is not None:
|
|
|
|
## urls = [
|
|
|
|
## 'file://%s/etc/passwd' % localhost,
|
|
|
|
## 'http://%s/simple/' % localhost,
|
|
|
|
## 'http://%s/digest/' % localhost,
|
|
|
|
## 'http://%s/not/found.h' % localhost,
|
|
|
|
## ]
|
|
|
|
|
|
|
|
## bauth = HTTPBasicAuthHandler()
|
|
|
|
## bauth.add_password('basic_test_realm', localhost, 'jhylton',
|
|
|
|
## 'password')
|
|
|
|
## dauth = HTTPDigestAuthHandler()
|
|
|
|
## dauth.add_password('digest_test_realm', localhost, 'jhylton',
|
|
|
|
## 'password')
|
|
|
|
|
|
|
|
## self._test_urls(urls, self._extra_handlers()+[bauth, dauth])
|
|
|
|
|
2010-08-08 08:43:45 -03:00
|
|
|
def test_urlwithfrag(self):
|
|
|
|
urlwith_frag = "http://docs.python.org/glossary.html#glossary"
|
2010-10-31 10:58:00 -03:00
|
|
|
with test_support.transient_internet(urlwith_frag):
|
|
|
|
req = urllib2.Request(urlwith_frag)
|
|
|
|
res = urllib2.urlopen(req)
|
|
|
|
self.assertEqual(res.geturl(),
|
|
|
|
"http://docs.python.org/glossary.html")
|
2010-08-08 08:43:45 -03:00
|
|
|
|
2010-09-20 22:38:15 -03:00
|
|
|
def test_fileno(self):
|
|
|
|
req = urllib2.Request("http://www.python.org")
|
|
|
|
opener = urllib2.build_opener()
|
|
|
|
res = opener.open(req)
|
|
|
|
try:
|
|
|
|
res.fileno()
|
|
|
|
except AttributeError:
|
|
|
|
self.fail("HTTPResponse object should return a valid fileno")
|
|
|
|
finally:
|
|
|
|
res.close()
|
|
|
|
|
2010-09-26 22:40:59 -03:00
|
|
|
def test_custom_headers(self):
|
|
|
|
url = "http://www.example.com"
|
2010-10-31 10:58:00 -03:00
|
|
|
with test_support.transient_internet(url):
|
|
|
|
opener = urllib2.build_opener()
|
|
|
|
request = urllib2.Request(url)
|
|
|
|
self.assertFalse(request.header_items())
|
|
|
|
opener.open(request)
|
|
|
|
self.assertTrue(request.header_items())
|
|
|
|
self.assertTrue(request.has_header('User-agent'))
|
|
|
|
request.add_header('User-Agent','Test-Agent')
|
|
|
|
opener.open(request)
|
|
|
|
self.assertEqual(request.get_header('User-agent'),'Test-Agent')
|
2010-09-26 22:40:59 -03:00
|
|
|
|
2008-06-07 10:36:36 -03:00
|
|
|
def _test_urls(self, urls, handlers, retry=True):
|
2006-05-03 02:15:10 -03:00
|
|
|
import time
|
|
|
|
import logging
|
|
|
|
debug = logging.getLogger("test_urllib2").debug
|
|
|
|
|
2008-06-07 10:36:36 -03:00
|
|
|
urlopen = urllib2.build_opener(*handlers).open
|
|
|
|
if retry:
|
|
|
|
urlopen = _wrap_with_retry_thrice(urlopen, urllib2.URLError)
|
2006-05-03 02:15:10 -03:00
|
|
|
|
|
|
|
for url in urls:
|
|
|
|
if isinstance(url, tuple):
|
|
|
|
url, req, expected_err = url
|
|
|
|
else:
|
|
|
|
req = expected_err = None
|
2010-10-31 10:58:00 -03:00
|
|
|
with test_support.transient_internet(url):
|
|
|
|
debug(url)
|
2010-04-20 03:54:59 -03:00
|
|
|
try:
|
2010-10-31 10:58:00 -03:00
|
|
|
f = urlopen(url, req, TIMEOUT)
|
|
|
|
except EnvironmentError as err:
|
|
|
|
debug(err)
|
|
|
|
if expected_err:
|
|
|
|
msg = ("Didn't get expected error(s) %s for %s %s, got %s: %s" %
|
|
|
|
(expected_err, url, req, type(err), err))
|
|
|
|
self.assertIsInstance(err, expected_err, msg)
|
|
|
|
except urllib2.URLError as err:
|
|
|
|
if isinstance(err[0], socket.timeout):
|
|
|
|
print >>sys.stderr, "<timeout: %s>" % url
|
|
|
|
continue
|
|
|
|
else:
|
|
|
|
raise
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
with test_support.transient_internet(url):
|
|
|
|
buf = f.read()
|
|
|
|
debug("read %d bytes" % len(buf))
|
|
|
|
except socket.timeout:
|
|
|
|
print >>sys.stderr, "<timeout: %s>" % url
|
|
|
|
f.close()
|
2006-05-03 02:15:10 -03:00
|
|
|
debug("******** next url coming up...")
|
|
|
|
time.sleep(0.1)
|
|
|
|
|
|
|
|
def _extra_handlers(self):
|
|
|
|
handlers = []
|
|
|
|
|
|
|
|
cfh = urllib2.CacheFTPHandler()
|
|
|
|
cfh.setTimeout(1)
|
|
|
|
handlers.append(cfh)
|
|
|
|
|
|
|
|
return handlers
|
|
|
|
|
2008-03-28 05:00:44 -03:00
|
|
|
|
2007-06-06 14:15:23 -03:00
|
|
|
class TimeoutTest(unittest.TestCase):
|
|
|
|
def test_http_basic(self):
|
2008-05-29 13:39:26 -03:00
|
|
|
self.assertTrue(socket.getdefaulttimeout() is None)
|
2010-10-31 10:58:00 -03:00
|
|
|
url = "http://www.python.org"
|
|
|
|
with test_support.transient_internet(url, timeout=None):
|
|
|
|
u = _urlopen_with_retry(url)
|
|
|
|
self.assertTrue(u.fp._sock.fp._sock.gettimeout() is None)
|
2007-06-06 14:15:23 -03:00
|
|
|
|
2008-05-29 13:39:26 -03:00
|
|
|
def test_http_default_timeout(self):
|
|
|
|
self.assertTrue(socket.getdefaulttimeout() is None)
|
2010-10-31 10:58:00 -03:00
|
|
|
url = "http://www.python.org"
|
|
|
|
with test_support.transient_internet(url):
|
|
|
|
socket.setdefaulttimeout(60)
|
|
|
|
try:
|
|
|
|
u = _urlopen_with_retry(url)
|
|
|
|
finally:
|
|
|
|
socket.setdefaulttimeout(None)
|
|
|
|
self.assertEqual(u.fp._sock.fp._sock.gettimeout(), 60)
|
2008-05-29 13:39:26 -03:00
|
|
|
|
|
|
|
def test_http_no_timeout(self):
|
|
|
|
self.assertTrue(socket.getdefaulttimeout() is None)
|
2010-10-31 10:58:00 -03:00
|
|
|
url = "http://www.python.org"
|
|
|
|
with test_support.transient_internet(url):
|
|
|
|
socket.setdefaulttimeout(60)
|
|
|
|
try:
|
|
|
|
u = _urlopen_with_retry(url, timeout=None)
|
|
|
|
finally:
|
|
|
|
socket.setdefaulttimeout(None)
|
|
|
|
self.assertTrue(u.fp._sock.fp._sock.gettimeout() is None)
|
2007-06-06 14:15:23 -03:00
|
|
|
|
2008-05-29 13:39:26 -03:00
|
|
|
def test_http_timeout(self):
|
2010-10-31 10:58:00 -03:00
|
|
|
url = "http://www.python.org"
|
|
|
|
with test_support.transient_internet(url):
|
|
|
|
u = _urlopen_with_retry(url, timeout=120)
|
|
|
|
self.assertEqual(u.fp._sock.fp._sock.gettimeout(), 120)
|
2007-06-06 14:15:23 -03:00
|
|
|
|
2010-04-08 14:40:54 -03:00
|
|
|
FTP_HOST = "ftp://ftp.mirror.nl/pub/gnu/"
|
2008-01-25 02:37:23 -04:00
|
|
|
|
2007-06-06 14:15:23 -03:00
|
|
|
def test_ftp_basic(self):
|
2008-05-29 13:39:26 -03:00
|
|
|
self.assertTrue(socket.getdefaulttimeout() is None)
|
2010-10-31 10:58:00 -03:00
|
|
|
with test_support.transient_internet(self.FTP_HOST, timeout=None):
|
|
|
|
u = _urlopen_with_retry(self.FTP_HOST)
|
|
|
|
self.assertTrue(u.fp.fp._sock.gettimeout() is None)
|
2007-06-06 14:15:23 -03:00
|
|
|
|
2008-05-29 13:39:26 -03:00
|
|
|
def test_ftp_default_timeout(self):
|
|
|
|
self.assertTrue(socket.getdefaulttimeout() is None)
|
2010-10-31 10:58:00 -03:00
|
|
|
with test_support.transient_internet(self.FTP_HOST):
|
|
|
|
socket.setdefaulttimeout(60)
|
|
|
|
try:
|
|
|
|
u = _urlopen_with_retry(self.FTP_HOST)
|
|
|
|
finally:
|
|
|
|
socket.setdefaulttimeout(None)
|
|
|
|
self.assertEqual(u.fp.fp._sock.gettimeout(), 60)
|
2007-06-06 14:15:23 -03:00
|
|
|
|
2008-05-29 13:39:26 -03:00
|
|
|
def test_ftp_no_timeout(self):
|
|
|
|
self.assertTrue(socket.getdefaulttimeout() is None)
|
2010-10-31 10:58:00 -03:00
|
|
|
with test_support.transient_internet(self.FTP_HOST):
|
|
|
|
socket.setdefaulttimeout(60)
|
|
|
|
try:
|
|
|
|
u = _urlopen_with_retry(self.FTP_HOST, timeout=None)
|
|
|
|
finally:
|
|
|
|
socket.setdefaulttimeout(None)
|
|
|
|
self.assertTrue(u.fp.fp._sock.gettimeout() is None)
|
2007-06-06 14:15:23 -03:00
|
|
|
|
2008-05-29 13:39:26 -03:00
|
|
|
def test_ftp_timeout(self):
|
2010-10-31 10:58:00 -03:00
|
|
|
with test_support.transient_internet(self.FTP_HOST):
|
|
|
|
u = _urlopen_with_retry(self.FTP_HOST, timeout=60)
|
|
|
|
self.assertEqual(u.fp.fp._sock.gettimeout(), 60)
|
2007-06-06 14:15:23 -03:00
|
|
|
|
2006-05-03 02:15:10 -03:00
|
|
|
|
2004-08-07 14:40:50 -03:00
|
|
|
def test_main():
|
|
|
|
test_support.requires("network")
|
2008-03-28 05:00:44 -03:00
|
|
|
test_support.run_unittest(AuthTests,
|
2007-01-21 06:35:10 -04:00
|
|
|
OtherNetworkTests,
|
|
|
|
CloseSocketTest,
|
2007-06-06 14:15:23 -03:00
|
|
|
TimeoutTest,
|
2007-01-21 06:35:10 -04:00
|
|
|
)
|
2004-08-07 14:40:50 -03:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
test_main()
|