Make a new urllib package .

It consists of code from urllib, urllib2, urlparse, and robotparser.
The old modules have all been removed.  The new package has five
submodules: urllib.parse, urllib.request, urllib.response,
urllib.error, and urllib.robotparser.  The urllib.request.urlopen()
function uses the url opener from urllib2.

Note that the unittests have not been renamed for the
beta, but they will be renamed in the future.

Joint work with Senthil Kumaran.
This commit is contained in:
Jeremy Hylton 2008-06-18 20:49:58 +00:00
parent a656d2cd89
commit 1afc169616
40 changed files with 3190 additions and 3536 deletions

View File

@ -35,7 +35,7 @@ from operator import attrgetter
from io import StringIO
import sys
import os
import urllib
import urllib.parse
import email.parser
__all__ = ["MiniFieldStorage", "FieldStorage",
@ -216,8 +216,8 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
else:
continue
if len(nv[1]) or keep_blank_values:
name = urllib.unquote(nv[0].replace('+', ' '))
value = urllib.unquote(nv[1].replace('+', ' '))
name = urllib.parse.unquote(nv[0].replace('+', ' '))
value = urllib.parse.unquote(nv[1].replace('+', ' '))
r.append((name, value))
return r

View File

@ -7,8 +7,9 @@ Implements the Distutils 'register' command (register with the repository).
__revision__ = "$Id$"
import os, string, urllib2, getpass, urlparse
import os, string, getpass
import io
import urllib.parse, urllib.request
from distutils.core import PyPIRCCommand
from distutils.errors import *
@ -94,7 +95,8 @@ class register(PyPIRCCommand):
def classifiers(self):
''' Fetch the list of classifiers from the server.
'''
response = urllib2.urlopen(self.repository+'?:action=list_classifiers')
url = self.repository+'?:action=list_classifiers'
response = urllib.request.urlopen(url)
print(response.read())
def verify_metadata(self):
@ -166,8 +168,8 @@ Your selection [default 1]: ''', end=' ')
password = getpass.getpass('Password: ')
# set up the authentication
auth = urllib2.HTTPPasswordMgr()
host = urlparse.urlparse(self.repository)[1]
auth = urllib.request.HTTPPasswordMgr()
host = urllib.parse.urlparse(self.repository)[1]
auth.add_password(self.realm, host, username, password)
# send the info to the server and report the result
code, result = self.post_to_server(self.build_post_data('submit'),
@ -276,20 +278,20 @@ Your selection [default 1]: ''', end=' ')
'Content-type': 'multipart/form-data; boundary=%s; charset=utf-8'%boundary,
'Content-length': str(len(body))
}
req = urllib2.Request(self.repository, body, headers)
req = urllib.request.Request(self.repository, body, headers)
# handle HTTP and include the Basic Auth handler
opener = urllib2.build_opener(
urllib2.HTTPBasicAuthHandler(password_mgr=auth)
opener = urllib.request.build_opener(
urllib.request.HTTPBasicAuthHandler(password_mgr=auth)
)
data = ''
try:
result = opener.open(req)
except urllib2.HTTPError as e:
except urllib.error.HTTPError as e:
if self.show_response:
data = e.fp.read()
result = e.code, e.msg
except urllib2.URLError as e:
except urllib.error.URLError as e:
result = 500, str(e)
else:
if self.show_response:

View File

@ -13,7 +13,7 @@ import platform
import configparser
import http.client
import base64
import urlparse
import urllib.parse
class upload(PyPIRCCommand):
@ -145,10 +145,11 @@ class upload(PyPIRCCommand):
self.announce("Submitting %s to %s" % (filename, self.repository), log.INFO)
# build the Request
# We can't use urllib2 since we need to send the Basic
# We can't use urllib since we need to send the Basic
# auth right with the first request
# TODO(jhylton): Can we fix urllib?
schema, netloc, url, params, query, fragments = \
urlparse.urlparse(self.repository)
urllib.parse.urlparse(self.repository)
assert not params and not query and not fragments
if schema == 'http':
http = http.client.HTTPConnection(netloc)

View File

@ -25,6 +25,7 @@ import time
import base64
import random
import socket
import urllib.parse
import warnings
from io import StringIO
@ -218,8 +219,7 @@ def encode_rfc2231(s, charset=None, language=None):
charset is given but not language, the string is encoded using the empty
string for language.
"""
import urllib
s = urllib.quote(s, safe='')
s = urllib.parse.quote(s, safe='')
if charset is None and language is None:
return s
if language is None:
@ -234,7 +234,6 @@ def decode_params(params):
params is a sequence of 2-tuples containing (param name, string value).
"""
import urllib
# Copy params so we don't mess with the original
params = params[:]
new_params = []
@ -272,7 +271,7 @@ def decode_params(params):
# language specifiers at the beginning of the string.
for num, s, encoded in continuations:
if encoded:
s = urllib.unquote(s)
s = urllib.parse.unquote(s)
extended = True
value.append(s)
value = quote(EMPTYSTRING.join(value))

View File

@ -70,7 +70,7 @@ import io
import socket
import email.parser
import email.message
from urlparse import urlsplit
from urllib.parse import urlsplit
import warnings
__all__ = ["HTTPResponse", "HTTPConnection",

View File

@ -28,7 +28,10 @@ http://wwwsearch.sf.net/):
__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar']
import re, urlparse, copy, time, urllib
import copy
import re
import time
import urllib.parse, urllib.request
try:
import threading as _threading
except ImportError:
@ -580,7 +583,7 @@ def request_host(request):
"""
url = request.get_full_url()
host = urlparse.urlparse(url)[1]
host = urllib.parse.urlparse(url)[1]
if host == "":
host = request.get_header("Host", "")
@ -602,13 +605,11 @@ def eff_request_host(request):
def request_path(request):
"""request-URI, as defined by RFC 2965."""
url = request.get_full_url()
#scheme, netloc, path, parameters, query, frag = urlparse.urlparse(url)
#req_path = escape_path("".join(urlparse.urlparse(url)[2:]))
path, parameters, query, frag = urlparse.urlparse(url)[2:]
path, parameters, query, frag = urllib.parse.urlparse(url)[2:]
if parameters:
path = "%s;%s" % (path, parameters)
path = escape_path(path)
req_path = urlparse.urlunparse(("", "", path, "", query, frag))
req_path = urllib.parse.urlunparse(("", "", path, "", query, frag))
if not req_path.startswith("/"):
# fix bad RFC 2396 absoluteURI
req_path = "/"+req_path
@ -644,7 +645,7 @@ def escape_path(path):
# And here, kind of: draft-fielding-uri-rfc2396bis-03
# (And in draft IRI specification: draft-duerst-iri-05)
# (And here, for new URI schemes: RFC 2718)
path = urllib.quote(path, HTTP_PATH_SAFE)
path = urllib.parse.quote(path, HTTP_PATH_SAFE)
path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
return path
@ -1197,8 +1198,7 @@ class CookieJar:
"""Collection of HTTP cookies.
You may not need to know about this class: try
urllib2.build_opener(HTTPCookieProcessor).open(url).
urllib.request.build_opener(HTTPCookieProcessor).open(url).
"""
non_word_re = re.compile(r"\W")

View File

@ -93,7 +93,7 @@ import cgi
import time
import socket # For gethostbyaddr()
import shutil
import urllib
import urllib.parse
import select
import mimetypes
import posixpath
@ -683,7 +683,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
return None
list.sort(key=lambda a: a.lower())
r = []
displaypath = cgi.escape(urllib.unquote(self.path))
displaypath = cgi.escape(urllib.parse.unquote(self.path))
r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
@ -699,7 +699,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
displayname = name + "@"
# Note: a link to a directory displays with @ and links with /
r.append('<li><a href="%s">%s</a>\n'
% (urllib.quote(linkname), cgi.escape(displayname)))
% (urllib.parse.quote(linkname), cgi.escape(displayname)))
r.append("</ul>\n<hr>\n</body>\n</html>\n")
enc = sys.getfilesystemencoding()
encoded = ''.join(r).encode(enc)
@ -723,7 +723,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
# abandon query parameters
path = path.split('?',1)[0]
path = path.split('#',1)[0]
path = posixpath.normpath(urllib.unquote(path))
path = posixpath.normpath(urllib.parse.unquote(path))
words = path.split('/')
words = filter(None, words)
path = os.getcwd()
@ -947,7 +947,7 @@ class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
env['SERVER_PROTOCOL'] = self.protocol_version
env['SERVER_PORT'] = str(self.server.server_port)
env['REQUEST_METHOD'] = self.command
uqrest = urllib.unquote(rest)
uqrest = urllib.parse.unquote(rest)
env['PATH_INFO'] = uqrest
env['PATH_TRANSLATED'] = self.translate_path(uqrest)
env['SCRIPT_NAME'] = scriptname

View File

@ -2,7 +2,7 @@
Do not import directly; use urllib instead."""
import urllib
import urllib.parse
import os
__all__ = ["url2pathname","pathname2url"]
@ -13,7 +13,7 @@ def url2pathname(pathname):
#
# XXXX The .. handling should be fixed...
#
tp = urllib.splittype(pathname)[0]
tp = urllib.parsesplittype(pathname)[0]
if tp and tp != 'file':
raise RuntimeError('Cannot convert non-local URL to pathname')
# Turn starting /// into /, an empty hostname means current host
@ -47,7 +47,7 @@ def url2pathname(pathname):
i = i + 1
rv = ':' + ':'.join(components)
# and finally unquote slashes and other funny characters
return urllib.unquote(rv)
return urllib.parseunquote(rv)
def pathname2url(pathname):
"""OS-specific conversion from a file system path to a relative URL
@ -73,8 +73,8 @@ def pathname2url(pathname):
return '/'.join(components)
def _pncomp2url(component):
component = urllib.quote(component[:31], safe='') # We want to quote slashes
return component
# We want to quote slashes
return urllib.parsequote(component[:31], safe='')
def test():
for url in ["index.html",

View File

@ -24,7 +24,7 @@ read_mime_types(file) -- parse one file, return a dictionary or None
import os
import posixpath
import urllib
import urllib.parse
__all__ = [
"guess_type","guess_extension","guess_all_extensions",
@ -104,7 +104,7 @@ class MimeTypes:
Optional `strict' argument when False adds a bunch of commonly found,
but non-standard types.
"""
scheme, url = urllib.splittype(url)
scheme, url = urllib.parse.splittype(url)
if scheme == 'data':
# syntax of data URLs:
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data

View File

@ -725,7 +725,7 @@ def dash_R(the_module, test, indirect_test, huntrleaks):
def dash_R_cleanup(fs, ps, pic, abcs):
import gc, copyreg
import _strptime, linecache
import urlparse, urllib, urllib2, mimetypes, doctest
import urllib.parse, urllib.request, mimetypes, doctest
import struct, filecmp, _abcoll
from distutils.dir_util import _path_created
from weakref import WeakSet
@ -758,9 +758,8 @@ def dash_R_cleanup(fs, ps, pic, abcs):
_path_created.clear()
re.purge()
_strptime._regex_cache.clear()
urlparse.clear_cache()
urllib.urlcleanup()
urllib2.install_opener(None)
urllib.parse.clear_cache()
urllib.request.urlcleanup()
linecache.clearcache()
mimetypes._default_mime_types()
filecmp._cache.clear()

View File

@ -352,10 +352,10 @@ def check_syntax_error(testcase, statement):
testcase.fail('Missing SyntaxError: "%s"' % statement)
def open_urlresource(url, *args, **kw):
import urllib, urlparse
import urllib.request, urllib.parse
requires('urlfetch')
filename = urlparse.urlparse(url)[2].split('/')[-1] # '/': it's URL!
filename = urllib.parse.urlparse(url)[2].split('/')[-1] # '/': it's URL!
for path in [os.path.curdir, os.path.pardir]:
fn = os.path.join(path, filename)
@ -363,7 +363,7 @@ def open_urlresource(url, *args, **kw):
return open(fn, *args, **kw)
print('\tfetching %s ...' % url, file=get_original_stdout())
fn, _ = urllib.urlretrieve(url, filename)
fn, _ = urllib.request.urlretrieve(url, filename)
return open(fn, *args, **kw)

View File

@ -111,7 +111,7 @@ class AllTest(unittest.TestCase):
self.check_all("re")
self.check_all("reprlib")
self.check_all("rlcompleter")
self.check_all("robotparser")
self.check_all("urllib.robotparser")
self.check_all("sched")
self.check_all("shelve")
self.check_all("shlex")
@ -134,8 +134,6 @@ class AllTest(unittest.TestCase):
self.check_all("traceback")
self.check_all("tty")
self.check_all("unittest")
self.check_all("urllib")
self.check_all("urlparse")
self.check_all("uu")
self.check_all("warnings")
self.check_all("wave")

View File

@ -1,6 +1,6 @@
"""Tests for http/cookiejar.py."""
import re, os, time, urllib2
import re, os, time, urllib.request
from unittest import TestCase
from test import support
@ -206,7 +206,7 @@ def interact_netscape(cookiejar, url, *set_cookie_hdrs):
def _interact(cookiejar, url, set_cookie_hdrs, hdr_name):
"""Perform a single request / response cycle, returning Cookie: header."""
req = urllib2.Request(url)
req = urllib.request.Request(url)
cookiejar.add_cookie_header(req)
cookie_hdr = req.get_header("Cookie", "")
headers = []
@ -330,7 +330,7 @@ class CookieTests(TestCase):
("http://foo/", "foo.local", True),
("http://foo/", ".local", True),
]:
request = urllib2.Request(url)
request = urllib.request.Request(url)
r = pol.domain_return_ok(domain, request)
if ok: self.assert_(r)
else: self.assert_(not r)
@ -547,46 +547,48 @@ class CookieTests(TestCase):
def test_request_path(self):
# with parameters
req = urllib2.Request("http://www.example.com/rheum/rhaponicum;"
"foo=bar;sing=song?apples=pears&spam=eggs#ni")
req = urllib.request.Request(
"http://www.example.com/rheum/rhaponicum;"
"foo=bar;sing=song?apples=pears&spam=eggs#ni")
self.assertEquals(request_path(req), "/rheum/rhaponicum;"
"foo=bar;sing=song?apples=pears&spam=eggs#ni")
# without parameters
req = urllib2.Request("http://www.example.com/rheum/rhaponicum?"
"apples=pears&spam=eggs#ni")
req = urllib.request.Request(
"http://www.example.com/rheum/rhaponicum?"
"apples=pears&spam=eggs#ni")
self.assertEquals(request_path(req), "/rheum/rhaponicum?"
"apples=pears&spam=eggs#ni")
# missing final slash
req = urllib2.Request("http://www.example.com")
req = urllib.request.Request("http://www.example.com")
self.assertEquals(request_path(req), "/")
def test_request_port(self):
req = urllib2.Request("http://www.acme.com:1234/",
headers={"Host": "www.acme.com:4321"})
req = urllib.request.Request("http://www.acme.com:1234/",
headers={"Host": "www.acme.com:4321"})
self.assertEquals(request_port(req), "1234")
req = urllib2.Request("http://www.acme.com/",
headers={"Host": "www.acme.com:4321"})
req = urllib.request.Request("http://www.acme.com/",
headers={"Host": "www.acme.com:4321"})
self.assertEquals(request_port(req), DEFAULT_HTTP_PORT)
def test_request_host(self):
# this request is illegal (RFC2616, 14.2.3)
req = urllib2.Request("http://1.1.1.1/",
headers={"Host": "www.acme.com:80"})
req = urllib.request.Request("http://1.1.1.1/",
headers={"Host": "www.acme.com:80"})
# libwww-perl wants this response, but that seems wrong (RFC 2616,
# section 5.2, point 1., and RFC 2965 section 1, paragraph 3)
#self.assertEquals(request_host(req), "www.acme.com")
self.assertEquals(request_host(req), "1.1.1.1")
req = urllib2.Request("http://www.acme.com/",
headers={"Host": "irrelevant.com"})
req = urllib.request.Request("http://www.acme.com/",
headers={"Host": "irrelevant.com"})
self.assertEquals(request_host(req), "www.acme.com")
# not actually sure this one is valid Request object, so maybe should
# remove test for no host in url in request_host function?
req = urllib2.Request("/resource.html",
headers={"Host": "www.acme.com"})
req = urllib.request.Request("/resource.html",
headers={"Host": "www.acme.com"})
self.assertEquals(request_host(req), "www.acme.com")
# port shouldn't be in request-host
req = urllib2.Request("http://www.acme.com:2345/resource.html",
headers={"Host": "www.acme.com:5432"})
req = urllib.request.Request("http://www.acme.com:2345/resource.html",
headers={"Host": "www.acme.com:5432"})
self.assertEquals(request_host(req), "www.acme.com")
def test_is_HDN(self):
@ -766,24 +768,24 @@ class CookieTests(TestCase):
blocked_domains=["acme.com"],
allowed_domains=["www.acme.com"]))
req = urllib2.Request("http://acme.com/")
req = urllib.request.Request("http://acme.com/")
headers = ["Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/"]
res = FakeResponse(headers, "http://acme.com/")
c.extract_cookies(res, req)
self.assertEquals(len(c), 0)
req = urllib2.Request("http://www.acme.com/")
req = urllib.request.Request("http://www.acme.com/")
res = FakeResponse(headers, "http://www.acme.com/")
c.extract_cookies(res, req)
self.assertEquals(len(c), 1)
req = urllib2.Request("http://www.coyote.com/")
req = urllib.request.Request("http://www.coyote.com/")
res = FakeResponse(headers, "http://www.coyote.com/")
c.extract_cookies(res, req)
self.assertEquals(len(c), 1)
# set a cookie with non-allowed domain...
req = urllib2.Request("http://www.coyote.com/")
req = urllib.request.Request("http://www.coyote.com/")
res = FakeResponse(headers, "http://www.coyote.com/")
cookies = c.make_cookies(res, req)
c.set_cookie(cookies[0])
@ -798,7 +800,7 @@ class CookieTests(TestCase):
c = CookieJar(policy=pol)
headers = ["Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/"]
req = urllib2.Request("http://www.acme.com/")
req = urllib.request.Request("http://www.acme.com/")
res = FakeResponse(headers, "http://www.acme.com/")
c.extract_cookies(res, req)
self.assertEquals(len(c), 0)
@ -808,11 +810,11 @@ class CookieTests(TestCase):
self.assertEquals(len(c), 1)
c.clear()
req = urllib2.Request("http://www.roadrunner.net/")
req = urllib.request.Request("http://www.roadrunner.net/")
res = FakeResponse(headers, "http://www.roadrunner.net/")
c.extract_cookies(res, req)
self.assertEquals(len(c), 1)
req = urllib2.Request("http://www.roadrunner.net/")
req = urllib.request.Request("http://www.roadrunner.net/")
c.add_cookie_header(req)
self.assert_((req.has_header("Cookie") and
req.has_header("Cookie2")))
@ -823,7 +825,7 @@ class CookieTests(TestCase):
self.assertEquals(len(c), 1)
# set a cookie with blocked domain...
req = urllib2.Request("http://www.acme.com/")
req = urllib.request.Request("http://www.acme.com/")
res = FakeResponse(headers, "http://www.acme.com/")
cookies = c.make_cookies(res, req)
c.set_cookie(cookies[0])
@ -866,7 +868,7 @@ class CookieTests(TestCase):
url = "http://www.acme.com"
c = CookieJar(DefaultCookiePolicy(rfc2965=True))
interact_2965(c, url, "foo=bar; Version=1")
req = urllib2.Request(url)
req = urllib.request.Request(url)
self.assertEquals(len(c), 1)
c.add_cookie_header(req)
self.assert_(req.has_header("Cookie"))
@ -1009,7 +1011,7 @@ class CookieTests(TestCase):
def cookiejar_from_cookie_headers(headers):
c = CookieJar()
req = urllib2.Request("http://www.example.com/")
req = urllib.request.Request("http://www.example.com/")
r = FakeResponse(headers, "http://www.example.com/")
c.extract_cookies(r, req)
return c
@ -1080,9 +1082,9 @@ class LWPCookieTests(TestCase):
c = CookieJar(DefaultCookiePolicy(rfc2965 = True))
#req = urllib2.Request("http://1.1.1.1/",
#req = urllib.request.Request("http://1.1.1.1/",
# headers={"Host": "www.acme.com:80"})
req = urllib2.Request("http://www.acme.com:80/",
req = urllib.request.Request("http://www.acme.com:80/",
headers={"Host": "www.acme.com:80"})
headers.append(
@ -1091,7 +1093,7 @@ class LWPCookieTests(TestCase):
res = FakeResponse(headers, "http://www.acme.com/")
c.extract_cookies(res, req)
req = urllib2.Request("http://www.acme.com/")
req = urllib.request.Request("http://www.acme.com/")
c.add_cookie_header(req)
self.assertEqual(req.get_header("Cookie"), "CUSTOMER=WILE_E_COYOTE")
@ -1101,7 +1103,7 @@ class LWPCookieTests(TestCase):
res = FakeResponse(headers, "http://www.acme.com/")
c.extract_cookies(res, req)
req = urllib2.Request("http://www.acme.com/foo/bar")
req = urllib.request.Request("http://www.acme.com/foo/bar")
c.add_cookie_header(req)
h = req.get_header("Cookie")
@ -1112,7 +1114,7 @@ class LWPCookieTests(TestCase):
res = FakeResponse(headers, "http://www.acme.com")
c.extract_cookies(res, req)
req = urllib2.Request("http://www.acme.com/")
req = urllib.request.Request("http://www.acme.com/")
c.add_cookie_header(req)
h = req.get_header("Cookie")
@ -1120,7 +1122,7 @@ class LWPCookieTests(TestCase):
"CUSTOMER=WILE_E_COYOTE" in h and
"SHIPPING=FEDEX" not in h)
req = urllib2.Request("http://www.acme.com/foo/")
req = urllib.request.Request("http://www.acme.com/foo/")
c.add_cookie_header(req)
h = req.get_header("Cookie")
@ -1155,13 +1157,13 @@ class LWPCookieTests(TestCase):
c = CookieJar()
headers = []
req = urllib2.Request("http://www.acme.com/")
req = urllib.request.Request("http://www.acme.com/")
headers.append("Set-Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001; path=/")
res = FakeResponse(headers, "http://www.acme.com/")
c.extract_cookies(res, req)
req = urllib2.Request("http://www.acme.com/")
req = urllib.request.Request("http://www.acme.com/")
c.add_cookie_header(req)
self.assertEquals(req.get_header("Cookie"),
@ -1172,7 +1174,7 @@ class LWPCookieTests(TestCase):
res = FakeResponse(headers, "http://www.acme.com/")
c.extract_cookies(res, req)
req = urllib2.Request("http://www.acme.com/ammo")
req = urllib.request.Request("http://www.acme.com/ammo")
c.add_cookie_header(req)
self.assert_(re.search(r"PART_NUMBER=RIDING_ROCKET_0023;\s*"
@ -1503,7 +1505,7 @@ class LWPCookieTests(TestCase):
# Some additional Netscape cookies tests.
c = CookieJar()
headers = []
req = urllib2.Request("http://foo.bar.acme.com/foo")
req = urllib.request.Request("http://foo.bar.acme.com/foo")
# Netscape allows a host part that contains dots
headers.append("Set-Cookie: Customer=WILE_E_COYOTE; domain=.acme.com")
@ -1517,7 +1519,7 @@ class LWPCookieTests(TestCase):
res = FakeResponse(headers, "http://www.acme.com/foo")
c.extract_cookies(res, req)
req = urllib2.Request("http://foo.bar.acme.com/foo")
req = urllib.request.Request("http://foo.bar.acme.com/foo")
c.add_cookie_header(req)
self.assert_(
"PART_NUMBER=3,4" in req.get_header("Cookie") and
@ -1559,12 +1561,12 @@ class LWPCookieTests(TestCase):
c = CookieJar(DefaultCookiePolicy(rfc2965 = True))
headers = []
req = urllib2.Request("http://www.ants.com/")
req = urllib.request.Request("http://www.ants.com/")
headers.append("Set-Cookie: JSESSIONID=ABCDERANDOM123; Path=")
res = FakeResponse(headers, "http://www.ants.com/")
c.extract_cookies(res, req)
req = urllib2.Request("http://www.ants.com/")
req = urllib.request.Request("http://www.ants.com/")
c.add_cookie_header(req)
self.assertEquals(req.get_header("Cookie"),
@ -1572,7 +1574,7 @@ class LWPCookieTests(TestCase):
self.assertEquals(req.get_header("Cookie2"), '$Version="1"')
# missing path in the request URI
req = urllib2.Request("http://www.ants.com:8080")
req = urllib.request.Request("http://www.ants.com:8080")
c.add_cookie_header(req)
self.assertEquals(req.get_header("Cookie"),
@ -1585,7 +1587,7 @@ class LWPCookieTests(TestCase):
# Check session cookies are deleted properly by
# CookieJar.clear_session_cookies method
req = urllib2.Request('http://www.perlmeister.com/scripts')
req = urllib.request.Request('http://www.perlmeister.com/scripts')
headers = []
headers.append("Set-Cookie: s1=session;Path=/scripts")
headers.append("Set-Cookie: p1=perm; Domain=.perlmeister.com;"

View File

@ -11,7 +11,7 @@ import os
import sys
import base64
import shutil
import urllib
import urllib.parse
import http.client
import tempfile
import threading
@ -322,7 +322,8 @@ class CGIHTTPServerTestCase(BaseTestCase):
(res.read(), res.getheader('Content-type'), res.status))
def test_post(self):
params = urllib.urlencode({'spam' : 1, 'eggs' : 'python', 'bacon' : 123456})
params = urllib.parse.urlencode(
{'spam' : 1, 'eggs' : 'python', 'bacon' : 123456})
headers = {'Content-type' : 'application/x-www-form-urlencoded'}
res = self.request('/cgi-bin/file2.py', 'POST', params, headers)

View File

@ -247,22 +247,22 @@ class ImportHooksTestCase(ImportHooksBaseTestCase):
i = ImpWrapper()
sys.meta_path.append(i)
sys.path_hooks.append(ImpWrapper)
mnames = ("colorsys", "urlparse", "distutils.core")
mnames = ("colorsys", "urllib.parse", "distutils.core")
for mname in mnames:
parent = mname.split(".")[0]
for n in list(sys.modules.keys()):
for n in list(sys.modules):
if n.startswith(parent):
del sys.modules[n]
for mname in mnames:
m = __import__(mname, globals(), locals(), ["__dummy__"])
m.__loader__ # to make sure we actually handled the import
# Delete urllib from modules because urlparse was imported above.
# Without this hack, test_socket_ssl fails if run in this order:
# regrtest.py test_codecmaps_tw test_importhooks test_socket_ssl
try:
del sys.modules['urllib']
except KeyError:
pass
## # Delete urllib from modules because urlparse was imported above.
## # Without this hack, test_socket_ssl fails if run in this order:
## # regrtest.py test_codecmaps_tw test_importhooks test_socket_ssl
## try:
## del sys.modules['urllib']
## except KeyError:
## pass
def test_main():
support.run_unittest(ImportHooksTestCase)

View File

@ -156,16 +156,6 @@ class PyclbrTest(TestCase):
# These were once about the 10 longest modules
cm('random', ignore=('Random',)) # from _random import Random as CoreGenerator
cm('cgi', ignore=('log',)) # set with = in module
cm('urllib', ignore=('_CFNumberToInt32',
'_CStringFromCFString',
'_CFSetup',
'getproxies_registry',
'proxy_bypass_registry',
'proxy_bypass_macosx_sysconf',
'open_https',
'_https_connection',
'getproxies_macosx_sysconf',
'getproxies_internetconfig',)) # not on all platforms
cm('pickle')
cm('aifc', ignore=('openfp',)) # set with = in module
cm('sre_parse', ignore=('dump',)) # from sre_constants import *

View File

@ -1,5 +1,6 @@
import unittest, robotparser
import io
import unittest
import urllib.robotparser
from test import support
class RobotTestCase(unittest.TestCase):
@ -34,7 +35,7 @@ def RobotTest(index, robots_txt, good_urls, bad_urls,
agent="test_robotparser"):
lines = io.StringIO(robots_txt).readlines()
parser = robotparser.RobotFileParser()
parser = urllib.robotparser.RobotFileParser()
parser.parse(lines)
for url in good_urls:
tests.addTest(RobotTestCase(index, parser, url, 1, agent))
@ -140,7 +141,7 @@ class TestCase(unittest.TestCase):
support.requires('network')
# whole site is password-protected.
url = 'http://mueblesmoraleda.com'
parser = robotparser.RobotFileParser()
parser = urllib.robotparser.RobotFileParser()
parser.set_url(url)
parser.read()
self.assertEqual(parser.can_fetch("*", url+"/robots.txt"), False)

View File

@ -10,7 +10,7 @@ import subprocess
import time
import os
import pprint
import urllib, urlparse
import urllib.parse, urllib.request
import shutil
import traceback
import asyncore
@ -440,8 +440,8 @@ else:
"""
# abandon query parameters
path = urlparse.urlparse(path)[2]
path = os.path.normpath(urllib.unquote(path))
path = urllib.parse.urlparse(path)[2]
path = os.path.normpath(urllib.parse.unquote(path))
words = path.split('/')
words = filter(None, words)
path = self.root
@ -943,7 +943,7 @@ else:
# now fetch the same data from the HTTPS server
url = 'https://%s:%d/%s' % (
HOST, server.port, os.path.split(CERTFILE)[1])
f = urllib.urlopen(url)
f = urllib.request.urlopen(url)
dlen = f.info().get("content-length")
if dlen and (int(dlen) > 0):
d2 = f.read(int(dlen))

View File

@ -1,6 +1,7 @@
"""Regresssion tests for urllib"""
import urllib
import urllib.parse
import urllib.request
import http.client
import email.message
import io
@ -16,6 +17,23 @@ def hexescape(char):
hex_repr = "0%s" % hex_repr
return "%" + hex_repr
# Shortcut for testing FancyURLopener
_urlopener = None
def urlopen(url, data=None, proxies=None):
"""urlopen(url [, data]) -> open file-like object"""
global _urlopener
if proxies is not None:
opener = urllib.request.FancyURLopener(proxies=proxies)
elif not _urlopener:
opener = urllib.request.FancyURLopener()
_urlopener = opener
else:
opener = _urlopener
if data is None:
return opener.open(url)
else:
return opener.open(url, data)
class urlopen_FileTests(unittest.TestCase):
"""Test urlopen() opening a temporary file.
@ -25,15 +43,16 @@ class urlopen_FileTests(unittest.TestCase):
"""
def setUp(self):
"""Setup of a temp file to use for testing"""
self.text = bytes("test_urllib: %s\n" % self.__class__.__name__, "ascii")
FILE = open(support.TESTFN, 'wb')
# Create a temp file to use for testing
self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
"ascii")
f = open(support.TESTFN, 'wb')
try:
FILE.write(self.text)
f.write(self.text)
finally:
FILE.close()
f.close()
self.pathname = support.TESTFN
self.returned_obj = urllib.urlopen("file:%s" % self.pathname)
self.returned_obj = urlopen("file:%s" % self.pathname)
def tearDown(self):
"""Shut down the open object"""
@ -119,7 +138,7 @@ class urlopen_HttpTests(unittest.TestCase):
def test_read(self):
self.fakehttp(b"Hello!")
try:
fp = urllib.urlopen("http://python.org/")
fp = urlopen("http://python.org/")
self.assertEqual(fp.readline(), b"Hello!")
self.assertEqual(fp.readline(), b"")
self.assertEqual(fp.geturl(), 'http://python.org/')
@ -136,7 +155,7 @@ Connection: close
Content-Type: text/html; charset=iso-8859-1
''')
try:
self.assertRaises(IOError, urllib.urlopen, "http://python.org/")
self.assertRaises(IOError, urlopen, "http://python.org/")
finally:
self.unfakehttp()
@ -145,7 +164,7 @@ Content-Type: text/html; charset=iso-8859-1
# data. (#1680230)
self.fakehttp(b'')
try:
self.assertRaises(IOError, urllib.urlopen, "http://something")
self.assertRaises(IOError, urlopen, "http://something")
finally:
self.unfakehttp()
@ -180,7 +199,8 @@ class urlretrieve_FileTests(unittest.TestCase):
except: pass
def constructLocalFileUrl(self, filePath):
return "file://%s" % urllib.pathname2url(os.path.abspath(filePath))
return "file://%s" % urllib.request.pathname2url(
os.path.abspath(filePath))
def createNewTempFile(self, data=b""):
"""Creates a new temporary file containing the specified data,
@ -204,7 +224,7 @@ class urlretrieve_FileTests(unittest.TestCase):
def test_basic(self):
# Make sure that a local file just gets its own location returned and
# a headers value is returned.
result = urllib.urlretrieve("file:%s" % support.TESTFN)
result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
self.assertEqual(result[0], support.TESTFN)
self.assert_(isinstance(result[1], email.message.Message),
"did not get a email.message.Message instance as second "
@ -214,7 +234,7 @@ class urlretrieve_FileTests(unittest.TestCase):
# Test that setting the filename argument works.
second_temp = "%s.2" % support.TESTFN
self.registerFileForCleanUp(second_temp)
result = urllib.urlretrieve(self.constructLocalFileUrl(
result = urllib.request.urlretrieve(self.constructLocalFileUrl(
support.TESTFN), second_temp)
self.assertEqual(second_temp, result[0])
self.assert_(os.path.exists(second_temp), "copy of the file was not "
@ -238,7 +258,8 @@ class urlretrieve_FileTests(unittest.TestCase):
count_holder[0] = count_holder[0] + 1
second_temp = "%s.2" % support.TESTFN
self.registerFileForCleanUp(second_temp)
urllib.urlretrieve(self.constructLocalFileUrl(support.TESTFN),
urllib.request.urlretrieve(
self.constructLocalFileUrl(support.TESTFN),
second_temp, hooktester)
def test_reporthook_0_bytes(self):
@ -247,7 +268,7 @@ class urlretrieve_FileTests(unittest.TestCase):
def hooktester(count, block_size, total_size, _report=report):
_report.append((count, block_size, total_size))
srcFileName = self.createNewTempFile()
urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
support.TESTFN, hooktester)
self.assertEqual(len(report), 1)
self.assertEqual(report[0][2], 0)
@ -261,7 +282,7 @@ class urlretrieve_FileTests(unittest.TestCase):
def hooktester(count, block_size, total_size, _report=report):
_report.append((count, block_size, total_size))
srcFileName = self.createNewTempFile(b"x" * 5)
urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
support.TESTFN, hooktester)
self.assertEqual(len(report), 2)
self.assertEqual(report[0][1], 8192)
@ -275,7 +296,7 @@ class urlretrieve_FileTests(unittest.TestCase):
def hooktester(count, block_size, total_size, _report=report):
_report.append((count, block_size, total_size))
srcFileName = self.createNewTempFile(b"x" * 8193)
urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
support.TESTFN, hooktester)
self.assertEqual(len(report), 3)
self.assertEqual(report[0][1], 8192)
@ -284,10 +305,10 @@ class urlretrieve_FileTests(unittest.TestCase):
class QuotingTests(unittest.TestCase):
"""Tests for urllib.quote() and urllib.quote_plus()
According to RFC 2396 ("Uniform Resource Identifiers), to escape a
character you write it as '%' + <2 character US-ASCII hex value>. The Python
code of ``'%' + hex(ord(<character>))[2:]`` escapes a character properly.
Case does not matter on the hex letters.
According to RFC 2396 (Uniform Resource Identifiers), to escape a
character you write it as '%' + <2 character US-ASCII hex value>.
The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
character properly. Case does not matter on the hex letters.
The various character sets specified are:
@ -313,24 +334,24 @@ class QuotingTests(unittest.TestCase):
"abcdefghijklmnopqrstuvwxyz",
"0123456789",
"_.-"])
result = urllib.quote(do_not_quote)
result = urllib.parse.quote(do_not_quote)
self.assertEqual(do_not_quote, result,
"using quote(): %s != %s" % (do_not_quote, result))
result = urllib.quote_plus(do_not_quote)
result = urllib.parse.quote_plus(do_not_quote)
self.assertEqual(do_not_quote, result,
"using quote_plus(): %s != %s" % (do_not_quote, result))
def test_default_safe(self):
# Test '/' is default value for 'safe' parameter
self.assertEqual(urllib.quote.__defaults__[0], '/')
self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
def test_safe(self):
# Test setting 'safe' parameter does what it should do
quote_by_default = "<>"
result = urllib.quote(quote_by_default, safe=quote_by_default)
result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
self.assertEqual(quote_by_default, result,
"using quote(): %s != %s" % (quote_by_default, result))
result = urllib.quote_plus(quote_by_default, safe=quote_by_default)
result = urllib.parse.quote_plus(quote_by_default, safe=quote_by_default)
self.assertEqual(quote_by_default, result,
"using quote_plus(): %s != %s" %
(quote_by_default, result))
@ -343,11 +364,11 @@ class QuotingTests(unittest.TestCase):
should_quote.append(chr(127)) # For 0x7F
should_quote = ''.join(should_quote)
for char in should_quote:
result = urllib.quote(char)
result = urllib.parse.quote(char)
self.assertEqual(hexescape(char), result,
"using quote(): %s should be escaped to %s, not %s" %
(char, hexescape(char), result))
result = urllib.quote_plus(char)
result = urllib.parse.quote_plus(char)
self.assertEqual(hexescape(char), result,
"using quote_plus(): "
"%s should be escapes to %s, not %s" %
@ -355,7 +376,7 @@ class QuotingTests(unittest.TestCase):
del should_quote
partial_quote = "ab[]cd"
expected = "ab%5B%5Dcd"
result = urllib.quote(partial_quote)
result = urllib.parse.quote(partial_quote)
self.assertEqual(expected, result,
"using quote(): %s != %s" % (expected, result))
self.assertEqual(expected, result,
@ -364,26 +385,26 @@ class QuotingTests(unittest.TestCase):
def test_quoting_space(self):
# Make sure quote() and quote_plus() handle spaces as specified in
# their unique way
result = urllib.quote(' ')
result = urllib.parse.quote(' ')
self.assertEqual(result, hexescape(' '),
"using quote(): %s != %s" % (result, hexescape(' ')))
result = urllib.quote_plus(' ')
result = urllib.parse.quote_plus(' ')
self.assertEqual(result, '+',
"using quote_plus(): %s != +" % result)
given = "a b cd e f"
expect = given.replace(' ', hexescape(' '))
result = urllib.quote(given)
result = urllib.parse.quote(given)
self.assertEqual(expect, result,
"using quote(): %s != %s" % (expect, result))
expect = given.replace(' ', '+')
result = urllib.quote_plus(given)
result = urllib.parse.quote_plus(given)
self.assertEqual(expect, result,
"using quote_plus(): %s != %s" % (expect, result))
def test_quoting_plus(self):
self.assertEqual(urllib.quote_plus('alpha+beta gamma'),
self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
'alpha%2Bbeta+gamma')
self.assertEqual(urllib.quote_plus('alpha+beta gamma', '+'),
self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
'alpha+beta+gamma')
class UnquotingTests(unittest.TestCase):
@ -399,21 +420,21 @@ class UnquotingTests(unittest.TestCase):
for num in range(128):
given = hexescape(chr(num))
expect = chr(num)
result = urllib.unquote(given)
result = urllib.parse.unquote(given)
self.assertEqual(expect, result,
"using unquote(): %s != %s" % (expect, result))
result = urllib.unquote_plus(given)
result = urllib.parse.unquote_plus(given)
self.assertEqual(expect, result,
"using unquote_plus(): %s != %s" %
(expect, result))
escape_list.append(given)
escape_string = ''.join(escape_list)
del escape_list
result = urllib.unquote(escape_string)
result = urllib.parse.unquote(escape_string)
self.assertEqual(result.count('%'), 1,
"using quote(): not all characters escaped; %s" %
result)
result = urllib.unquote(escape_string)
result = urllib.parse.unquote(escape_string)
self.assertEqual(result.count('%'), 1,
"using unquote(): not all characters escaped: "
"%s" % result)
@ -423,10 +444,10 @@ class UnquotingTests(unittest.TestCase):
# interspersed
given = 'ab%sd' % hexescape('c')
expect = "abcd"
result = urllib.unquote(given)
result = urllib.parse.unquote(given)
self.assertEqual(expect, result,
"using quote(): %s != %s" % (expect, result))
result = urllib.unquote_plus(given)
result = urllib.parse.unquote_plus(given)
self.assertEqual(expect, result,
"using unquote_plus(): %s != %s" % (expect, result))
@ -434,16 +455,16 @@ class UnquotingTests(unittest.TestCase):
# Test difference between unquote() and unquote_plus()
given = "are+there+spaces..."
expect = given
result = urllib.unquote(given)
result = urllib.parse.unquote(given)
self.assertEqual(expect, result,
"using unquote(): %s != %s" % (expect, result))
expect = given.replace('+', ' ')
result = urllib.unquote_plus(given)
result = urllib.parse.unquote_plus(given)
self.assertEqual(expect, result,
"using unquote_plus(): %s != %s" % (expect, result))
def test_unquote_with_unicode(self):
r = urllib.unquote('br%C3%BCckner_sapporo_20050930.doc')
r = urllib.parse.unquote('br%C3%BCckner_sapporo_20050930.doc')
self.assertEqual(r, 'br\xc3\xbcckner_sapporo_20050930.doc')
class urlencode_Tests(unittest.TestCase):
@ -462,7 +483,7 @@ class urlencode_Tests(unittest.TestCase):
"""
expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
result = urllib.urlencode(given)
result = urllib.parse.urlencode(given)
for expected in expect_somewhere:
self.assert_(expected in result,
"testing %s: %s not found in %s" %
@ -495,20 +516,20 @@ class urlencode_Tests(unittest.TestCase):
# Make sure keys and values are quoted using quote_plus()
given = {"&":"="}
expect = "%s=%s" % (hexescape('&'), hexescape('='))
result = urllib.urlencode(given)
result = urllib.parse.urlencode(given)
self.assertEqual(expect, result)
given = {"key name":"A bunch of pluses"}
expect = "key+name=A+bunch+of+pluses"
result = urllib.urlencode(given)
result = urllib.parse.urlencode(given)
self.assertEqual(expect, result)
def test_doseq(self):
# Test that passing True for 'doseq' parameter works correctly
given = {'sequence':['1', '2', '3']}
expect = "sequence=%s" % urllib.quote_plus(str(['1', '2', '3']))
result = urllib.urlencode(given)
expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
result = urllib.parse.urlencode(given)
self.assertEqual(expect, result)
result = urllib.urlencode(given, True)
result = urllib.parse.urlencode(given, True)
for value in given["sequence"]:
expect = "sequence=%s" % value
self.assert_(expect in result,
@ -523,11 +544,11 @@ class Pathname_Tests(unittest.TestCase):
# Make sure simple tests pass
expected_path = os.path.join("parts", "of", "a", "path")
expected_url = "parts/of/a/path"
result = urllib.pathname2url(expected_path)
result = urllib.request.pathname2url(expected_path)
self.assertEqual(expected_url, result,
"pathname2url() failed; %s != %s" %
(result, expected_url))
result = urllib.url2pathname(expected_url)
result = urllib.request.url2pathname(expected_url)
self.assertEqual(expected_path, result,
"url2pathame() failed; %s != %s" %
(result, expected_path))
@ -536,25 +557,25 @@ class Pathname_Tests(unittest.TestCase):
# Test automatic quoting and unquoting works for pathnam2url() and
# url2pathname() respectively
given = os.path.join("needs", "quot=ing", "here")
expect = "needs/%s/here" % urllib.quote("quot=ing")
result = urllib.pathname2url(given)
expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
result = urllib.request.pathname2url(given)
self.assertEqual(expect, result,
"pathname2url() failed; %s != %s" %
(expect, result))
expect = given
result = urllib.url2pathname(result)
result = urllib.request.url2pathname(result)
self.assertEqual(expect, result,
"url2pathname() failed; %s != %s" %
(expect, result))
given = os.path.join("make sure", "using_quote")
expect = "%s/using_quote" % urllib.quote("make sure")
result = urllib.pathname2url(given)
expect = "%s/using_quote" % urllib.parse.quote("make sure")
result = urllib.request.pathname2url(given)
self.assertEqual(expect, result,
"pathname2url() failed; %s != %s" %
(expect, result))
given = "make+sure/using_unquote"
expect = os.path.join("make+sure", "using_unquote")
result = urllib.url2pathname(given)
result = urllib.request.url2pathname(given)
self.assertEqual(expect, result,
"url2pathname() failed; %s != %s" %
(expect, result))

View File

@ -5,8 +5,8 @@ import os
import io
import socket
import urllib2
from urllib2 import Request, OpenerDirector
import urllib.request
from urllib.request import Request, OpenerDirector
# XXX
# Request
@ -17,10 +17,10 @@ class TrivialTests(unittest.TestCase):
def test_trivial(self):
# A couple trivial tests
self.assertRaises(ValueError, urllib2.urlopen, 'bogus url')
self.assertRaises(ValueError, urllib.request.urlopen, 'bogus url')
# XXX Name hacking to get this to work on Windows.
fname = os.path.abspath(urllib2.__file__).replace('\\', '/')
fname = os.path.abspath(urllib.request.__file__).replace('\\', '/')
if fname[1:2] == ":":
fname = fname[2:]
# And more hacking to get it to work on MacOS. This assumes
@ -29,18 +29,21 @@ class TrivialTests(unittest.TestCase):
fname = '/' + fname.replace(':', '/')
file_url = "file://%s" % fname
f = urllib2.urlopen(file_url)
f = urllib.request.urlopen(file_url)
buf = f.read()
f.close()
def test_parse_http_list(self):
tests = [('a,b,c', ['a', 'b', 'c']),
('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
('a, b, "c", "d", "e,f", g, h', ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
('a="b\\"c", d="e\\,f", g="h\\\\i"', ['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
tests = [
('a,b,c', ['a', 'b', 'c']),
('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
('a, b, "c", "d", "e,f", g, h',
['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
('a="b\\"c", d="e\\,f", g="h\\\\i"',
['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
for string, list in tests:
self.assertEquals(urllib2.parse_http_list(string), list)
self.assertEquals(urllib.request.parse_http_list(string), list)
def test_request_headers_dict():
@ -107,7 +110,7 @@ def test_request_headers_methods():
def test_password_manager(self):
"""
>>> mgr = urllib2.HTTPPasswordMgr()
>>> mgr = urllib.request.HTTPPasswordMgr()
>>> add = mgr.add_password
>>> add("Some Realm", "http://example.com/", "joe", "password")
>>> add("Some Realm", "http://example.com/ni", "ni", "ni")
@ -172,7 +175,7 @@ def test_password_manager(self):
def test_password_manager_default_port(self):
"""
>>> mgr = urllib2.HTTPPasswordMgr()
>>> mgr = urllib.request.HTTPPasswordMgr()
>>> add = mgr.add_password
The point to note here is that we can't guess the default port if there's
@ -288,7 +291,7 @@ class MockHandler:
res = MockResponse(200, "OK", {}, "")
return self.parent.error("http", args[0], res, code, "", {})
elif action == "raise":
raise urllib2.URLError("blah")
raise urllib.error.URLError("blah")
assert False
def close(self): pass
def add_parent(self, parent):
@ -337,7 +340,7 @@ def build_test_opener(*handler_instances):
opener.add_handler(h)
return opener
class MockHTTPHandler(urllib2.BaseHandler):
class MockHTTPHandler(urllib.request.BaseHandler):
# useful for testing redirections and auth
# sends supplied headers and code as first response
# sends 200 OK as second response
@ -392,7 +395,7 @@ class OpenerDirectorTests(unittest.TestCase):
# TypeError in real code; here, returning self from these mock
# methods would either cause no exception, or AttributeError.
from urllib2 import URLError
from urllib.error import URLError
o = OpenerDirector()
meth_spec = [
@ -400,7 +403,7 @@ class OpenerDirectorTests(unittest.TestCase):
[("redirect_request", "return self")],
]
handlers = add_ordered_mock_handlers(o, meth_spec)
o.add_handler(urllib2.UnknownHandler())
o.add_handler(urllib.request.UnknownHandler())
for scheme in "do", "proxy", "redirect":
self.assertRaises(URLError, o.open, scheme+"://example.com/")
@ -458,7 +461,7 @@ class OpenerDirectorTests(unittest.TestCase):
handlers = add_ordered_mock_handlers(o, meth_spec)
req = Request("http://example.com/")
self.assertRaises(urllib2.URLError, o.open, req)
self.assertRaises(urllib.error.URLError, o.open, req)
self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})])
## def test_error(self):
@ -529,8 +532,7 @@ class OpenerDirectorTests(unittest.TestCase):
def sanepathname2url(path):
import urllib
urlpath = urllib.pathname2url(path)
urlpath = urllib.request.pathname2url(path)
if os.name == "nt" and urlpath.startswith("///"):
urlpath = urlpath[2:]
# XXX don't ask me about the mac...
@ -545,7 +547,7 @@ class HandlerTests(unittest.TestCase):
self.filename, self.filetype = filename, filetype
return io.StringIO(self.data), len(self.data)
class NullFTPHandler(urllib2.FTPHandler):
class NullFTPHandler(urllib.request.FTPHandler):
def __init__(self, data): self.data = data
def connect_ftp(self, user, passwd, host, port, dirs,
timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
@ -587,7 +589,7 @@ class HandlerTests(unittest.TestCase):
def test_file(self):
import email.utils, socket
h = urllib2.FileHandler()
h = urllib.request.FileHandler()
o = h.parent = MockOpener()
TESTFN = support.TESTFN
@ -644,12 +646,12 @@ class HandlerTests(unittest.TestCase):
finally:
f.close()
self.assertRaises(urllib2.URLError,
self.assertRaises(urllib.error.URLError,
h.file_open, Request(url))
finally:
os.remove(TESTFN)
h = urllib2.FileHandler()
h = urllib.request.FileHandler()
o = h.parent = MockOpener()
# XXXX why does // mean ftp (and /// mean not ftp!), and where
# is file: scheme specified? I think this is really a bug, and
@ -668,7 +670,7 @@ class HandlerTests(unittest.TestCase):
try:
h.file_open(req)
# XXXX remove OSError when bug fixed
except (urllib2.URLError, OSError):
except (urllib.error.URLError, OSError):
self.assert_(not ftp)
else:
self.assert_(o.req is req)
@ -685,6 +687,7 @@ class HandlerTests(unittest.TestCase):
return ''
class MockHTTPClass:
def __init__(self):
self.level = 0
self.req_headers = []
self.data = None
self.raise_on_endheaders = False
@ -707,7 +710,7 @@ class HandlerTests(unittest.TestCase):
def getresponse(self):
return MockHTTPResponse(MockFile(), {}, 200, "OK")
h = urllib2.AbstractHTTPHandler()
h = urllib.request.AbstractHTTPHandler()
o = h.parent = MockOpener()
url = "http://example.com/"
@ -737,7 +740,7 @@ class HandlerTests(unittest.TestCase):
# check socket.error converted to URLError
http.raise_on_endheaders = True
self.assertRaises(urllib2.URLError, h.do_open, http, req)
self.assertRaises(urllib.error.URLError, h.do_open, http, req)
# check adding of standard headers
o.addheaders = [("Spam", "eggs")]
@ -768,7 +771,7 @@ class HandlerTests(unittest.TestCase):
self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
def test_errors(self):
h = urllib2.HTTPErrorProcessor()
h = urllib.request.HTTPErrorProcessor()
o = h.parent = MockOpener()
url = "http://example.com/"
@ -794,7 +797,7 @@ class HandlerTests(unittest.TestCase):
def test_cookies(self):
cj = MockCookieJar()
h = urllib2.HTTPCookieProcessor(cj)
h = urllib.request.HTTPCookieProcessor(cj)
o = h.parent = MockOpener()
req = Request("http://example.com/")
@ -810,7 +813,7 @@ class HandlerTests(unittest.TestCase):
def test_redirect(self):
from_url = "http://example.com/a.html"
to_url = "http://example.com/b.html"
h = urllib2.HTTPRedirectHandler()
h = urllib.request.HTTPRedirectHandler()
o = h.parent = MockOpener()
# ordinary redirect behaviour
@ -825,7 +828,7 @@ class HandlerTests(unittest.TestCase):
try:
method(req, MockFile(), code, "Blah",
MockHeaders({"location": to_url}))
except urllib2.HTTPError:
except urllib.error.HTTPError:
# 307 in response to POST requires user OK
self.assert_(code == 307 and data is not None)
self.assertEqual(o.req.get_full_url(), to_url)
@ -860,9 +863,9 @@ class HandlerTests(unittest.TestCase):
while 1:
redirect(h, req, "http://example.com/")
count = count + 1
except urllib2.HTTPError:
except urllib.error.HTTPError:
# don't stop until max_repeats, because cookies may introduce state
self.assertEqual(count, urllib2.HTTPRedirectHandler.max_repeats)
self.assertEqual(count, urllib.request.HTTPRedirectHandler.max_repeats)
# detect endless non-repeating chain of redirects
req = Request(from_url, origin_req_host="example.com")
@ -871,9 +874,9 @@ class HandlerTests(unittest.TestCase):
while 1:
redirect(h, req, "http://example.com/%d" % count)
count = count + 1
except urllib2.HTTPError:
except urllib.error.HTTPError:
self.assertEqual(count,
urllib2.HTTPRedirectHandler.max_redirections)
urllib.request.HTTPRedirectHandler.max_redirections)
def test_cookie_redirect(self):
# cookies shouldn't leak into redirected requests
@ -883,16 +886,16 @@ class HandlerTests(unittest.TestCase):
cj = CookieJar()
interact_netscape(cj, "http://www.example.com/", "spam=eggs")
hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
hdeh = urllib2.HTTPDefaultErrorHandler()
hrh = urllib2.HTTPRedirectHandler()
cp = urllib2.HTTPCookieProcessor(cj)
hdeh = urllib.request.HTTPDefaultErrorHandler()
hrh = urllib.request.HTTPRedirectHandler()
cp = urllib.request.HTTPCookieProcessor(cj)
o = build_test_opener(hh, hdeh, hrh, cp)
o.open("http://www.example.com/")
self.assert_(not hh.req.has_header("Cookie"))
def test_proxy(self):
o = OpenerDirector()
ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128"))
ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
o.add_handler(ph)
meth_spec = [
[("http_open", "return response")]
@ -910,7 +913,7 @@ class HandlerTests(unittest.TestCase):
def test_basic_auth(self, quote_char='"'):
opener = OpenerDirector()
password_manager = MockPasswordManager()
auth_handler = urllib2.HTTPBasicAuthHandler(password_manager)
auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager)
realm = "ACME Widget Store"
http_handler = MockHTTPHandler(
401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
@ -928,10 +931,10 @@ class HandlerTests(unittest.TestCase):
def test_proxy_basic_auth(self):
opener = OpenerDirector()
ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128"))
ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
opener.add_handler(ph)
password_manager = MockPasswordManager()
auth_handler = urllib2.ProxyBasicAuthHandler(password_manager)
auth_handler = urllib.request.ProxyBasicAuthHandler(password_manager)
realm = "ACME Networks"
http_handler = MockHTTPHandler(
407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
@ -958,15 +961,15 @@ class HandlerTests(unittest.TestCase):
self.recorded = []
def record(self, info):
self.recorded.append(info)
class TestDigestAuthHandler(urllib2.HTTPDigestAuthHandler):
class TestDigestAuthHandler(urllib.request.HTTPDigestAuthHandler):
def http_error_401(self, *args, **kwds):
self.parent.record("digest")
urllib2.HTTPDigestAuthHandler.http_error_401(self,
urllib.request.HTTPDigestAuthHandler.http_error_401(self,
*args, **kwds)
class TestBasicAuthHandler(urllib2.HTTPBasicAuthHandler):
class TestBasicAuthHandler(urllib.request.HTTPBasicAuthHandler):
def http_error_401(self, *args, **kwds):
self.parent.record("basic")
urllib2.HTTPBasicAuthHandler.http_error_401(self,
urllib.request.HTTPBasicAuthHandler.http_error_401(self,
*args, **kwds)
opener = RecordingOpenerDirector()
@ -1030,13 +1033,13 @@ class HandlerTests(unittest.TestCase):
class MiscTests(unittest.TestCase):
def test_build_opener(self):
class MyHTTPHandler(urllib2.HTTPHandler): pass
class FooHandler(urllib2.BaseHandler):
class MyHTTPHandler(urllib.request.HTTPHandler): pass
class FooHandler(urllib.request.BaseHandler):
def foo_open(self): pass
class BarHandler(urllib2.BaseHandler):
class BarHandler(urllib.request.BaseHandler):
def bar_open(self): pass
build_opener = urllib2.build_opener
build_opener = urllib.request.build_opener
o = build_opener(FooHandler, BarHandler)
self.opener_has_handler(o, FooHandler)
@ -1054,14 +1057,14 @@ class MiscTests(unittest.TestCase):
# a particular case of overriding: default handlers can be passed
# in explicitly
o = build_opener()
self.opener_has_handler(o, urllib2.HTTPHandler)
o = build_opener(urllib2.HTTPHandler)
self.opener_has_handler(o, urllib2.HTTPHandler)
o = build_opener(urllib2.HTTPHandler())
self.opener_has_handler(o, urllib2.HTTPHandler)
self.opener_has_handler(o, urllib.request.HTTPHandler)
o = build_opener(urllib.request.HTTPHandler)
self.opener_has_handler(o, urllib.request.HTTPHandler)
o = build_opener(urllib.request.HTTPHandler())
self.opener_has_handler(o, urllib.request.HTTPHandler)
# Issue2670: multiple handlers sharing the same base class
class MyOtherHTTPHandler(urllib2.HTTPHandler): pass
class MyOtherHTTPHandler(urllib.request.HTTPHandler): pass
o = build_opener(MyHTTPHandler, MyOtherHTTPHandler)
self.opener_has_handler(o, MyHTTPHandler)
self.opener_has_handler(o, MyOtherHTTPHandler)
@ -1077,7 +1080,7 @@ class MiscTests(unittest.TestCase):
def test_main(verbose=None):
from test import test_urllib2
support.run_doctest(test_urllib2, verbose)
support.run_doctest(urllib2, verbose)
support.run_doctest(urllib.request, verbose)
tests = (TrivialTests,
OpenerDirectorTests,
HandlerTests,

View File

@ -2,8 +2,8 @@
import email
import threading
import urlparse
import urllib2
import urllib.parse
import urllib.request
import http.server
import unittest
import hashlib
@ -45,7 +45,7 @@ class LoopbackHttpServerThread(threading.Thread):
self._stop_server = False
self.ready = threading.Event()
request_handler.protocol_version = "HTTP/1.0"
self.httpd = LoopbackHttpServer(('127.0.0.1', 0),
self.httpd = LoopbackHttpServer(("127.0.0.1", 0),
request_handler)
#print "Serving HTTP on %s port %s" % (self.httpd.server_name,
# self.httpd.server_port)
@ -154,11 +154,11 @@ class DigestAuthHandler:
if len(self._users) == 0:
return True
if 'Proxy-Authorization' not in request_handler.headers:
if "Proxy-Authorization" not in request_handler.headers:
return self._return_auth_challenge(request_handler)
else:
auth_dict = self._create_auth_dict(
request_handler.headers['Proxy-Authorization']
request_handler.headers["Proxy-Authorization"]
)
if auth_dict["username"] in self._users:
password = self._users[ auth_dict["username"] ]
@ -199,12 +199,12 @@ class FakeProxyHandler(http.server.BaseHTTPRequestHandler):
def log_message(self, format, *args):
# Uncomment the next line for debugging.
#sys.stderr.write(format % args)
# sys.stderr.write(format % args)
pass
def do_GET(self):
(scm, netloc, path, params, query, fragment) = urlparse.urlparse(
self.path, 'http')
(scm, netloc, path, params, query, fragment) = urllib.parse.urlparse(
self.path, "http")
self.short_path = path
if self.digest_auth_handler.handle_request(self):
self.send_response(200, "OK")
@ -234,9 +234,10 @@ class ProxyAuthTests(unittest.TestCase):
self.server.start()
self.server.ready.wait()
proxy_url = "http://127.0.0.1:%d" % self.server.port
handler = urllib2.ProxyHandler({"http" : proxy_url})
self._digest_auth_handler = urllib2.ProxyDigestAuthHandler()
self.opener = urllib2.build_opener(handler, self._digest_auth_handler)
handler = urllib.request.ProxyHandler({"http" : proxy_url})
self._digest_auth_handler = urllib.request.ProxyDigestAuthHandler()
self.opener = urllib.request.build_opener(
handler, self._digest_auth_handler)
def tearDown(self):
self.server.stop()
@ -245,13 +246,13 @@ class ProxyAuthTests(unittest.TestCase):
self._digest_auth_handler.add_password(self.REALM, self.URL,
self.USER, self.PASSWD+"bad")
FakeProxyHandler.digest_auth_handler.set_qop("auth")
self.assertRaises(urllib2.HTTPError,
self.assertRaises(urllib.error.HTTPError,
self.opener.open,
self.URL)
def test_proxy_with_no_password_raises_httperror(self):
FakeProxyHandler.digest_auth_handler.set_qop("auth")
self.assertRaises(urllib2.HTTPError,
self.assertRaises(urllib.error.HTTPError,
self.opener.open,
self.URL)
@ -270,7 +271,7 @@ class ProxyAuthTests(unittest.TestCase):
FakeProxyHandler.digest_auth_handler.set_qop("auth-int")
try:
result = self.opener.open(self.URL)
except urllib2.URLError:
except urllib.error.URLError:
# It's okay if we don't support auth-int, but we certainly
# shouldn't receive any kind of exception here other than
# a URLError.
@ -296,7 +297,7 @@ def GetRequestHandler(responses):
self.wfile.write(body)
def do_POST(self):
content_length = self.headers['Content-Length']
content_length = self.headers["Content-Length"]
post_data = self.rfile.read(int(content_length))
self.do_GET()
self.requests.append(post_data)
@ -311,7 +312,7 @@ def GetRequestHandler(responses):
for (header, value) in headers:
self.send_header(header, value % self.port)
if body:
self.send_header('Content-type', 'text/plain')
self.send_header("Content-type", "text/plain")
self.end_headers()
return body
self.end_headers()
@ -332,7 +333,22 @@ class TestUrlopen(unittest.TestCase):
for transparent redirection have been written.
"""
def start_server(self, responses):
def setUp(self):
self.server = None
def tearDown(self):
if self.server is not None:
self.server.stop()
def urlopen(self, url, data=None):
f = urllib.request.urlopen(url, data)
result = f.read()
f.close()
return result
def start_server(self, responses=None):
if responses is None:
responses = [(200, [], b"we don't care")]
handler = GetRequestHandler(responses)
self.server = LoopbackHttpServerThread(handler)
@ -342,106 +358,71 @@ class TestUrlopen(unittest.TestCase):
handler.port = port
return handler
def test_redirection(self):
expected_response = b'We got here...'
expected_response = b"We got here..."
responses = [
(302, [('Location', 'http://localhost:%s/somewhere_else')], ''),
(302, [("Location", "http://localhost:%s/somewhere_else")], ""),
(200, [], expected_response)
]
handler = self.start_server(responses)
try:
f = urllib2.urlopen('http://localhost:%s/' % handler.port)
data = f.read()
f.close()
self.assertEquals(data, expected_response)
self.assertEquals(handler.requests, ['/', '/somewhere_else'])
finally:
self.server.stop()
data = self.urlopen("http://localhost:%s/" % handler.port)
self.assertEquals(data, expected_response)
self.assertEquals(handler.requests, ["/", "/somewhere_else"])
def test_404(self):
expected_response = b'Bad bad bad...'
expected_response = b"Bad bad bad..."
handler = self.start_server([(404, [], expected_response)])
try:
try:
urllib2.urlopen('http://localhost:%s/weeble' % handler.port)
except urllib2.URLError as f:
data = f.read()
f.close()
else:
self.fail('404 should raise URLError')
self.assertEquals(data, expected_response)
self.assertEquals(handler.requests, ['/weeble'])
finally:
self.server.stop()
self.urlopen("http://localhost:%s/weeble" % handler.port)
except urllib.error.URLError as f:
data = f.read()
f.close()
else:
self.fail("404 should raise URLError")
self.assertEquals(data, expected_response)
self.assertEquals(handler.requests, ["/weeble"])
def test_200(self):
expected_response = b'pycon 2008...'
expected_response = b"pycon 2008..."
handler = self.start_server([(200, [], expected_response)])
try:
f = urllib2.urlopen('http://localhost:%s/bizarre' % handler.port)
data = f.read()
f.close()
self.assertEquals(data, expected_response)
self.assertEquals(handler.requests, ['/bizarre'])
finally:
self.server.stop()
data = self.urlopen("http://localhost:%s/bizarre" % handler.port)
self.assertEquals(data, expected_response)
self.assertEquals(handler.requests, ["/bizarre"])
def test_200_with_parameters(self):
expected_response = b'pycon 2008...'
expected_response = b"pycon 2008..."
handler = self.start_server([(200, [], expected_response)])
try:
f = urllib2.urlopen('http://localhost:%s/bizarre' % handler.port, b'get=with_feeling')
data = f.read()
f.close()
self.assertEquals(data, expected_response)
self.assertEquals(handler.requests, ['/bizarre', b'get=with_feeling'])
finally:
self.server.stop()
data = self.urlopen("http://localhost:%s/bizarre" % handler.port,
b"get=with_feeling")
self.assertEquals(data, expected_response)
self.assertEquals(handler.requests, ["/bizarre", b"get=with_feeling"])
def test_sending_headers(self):
handler = self.start_server([(200, [], b"we don't care")])
try:
req = urllib2.Request("http://localhost:%s/" % handler.port,
headers={'Range': 'bytes=20-39'})
urllib2.urlopen(req)
self.assertEqual(handler.headers_received['Range'], 'bytes=20-39')
finally:
self.server.stop()
handler = self.start_server()
req = urllib.request.Request("http://localhost:%s/" % handler.port,
headers={"Range": "bytes=20-39"})
urllib.request.urlopen(req)
self.assertEqual(handler.headers_received["Range"], "bytes=20-39")
def test_basic(self):
handler = self.start_server([(200, [], b"we don't care")])
handler = self.start_server()
open_url = urllib.request.urlopen("http://localhost:%s" % handler.port)
for attr in ("read", "close", "info", "geturl"):
self.assert_(hasattr(open_url, attr), "object returned from "
"urlopen lacks the %s attribute" % attr)
try:
open_url = urllib2.urlopen("http://localhost:%s" % handler.port)
for attr in ("read", "close", "info", "geturl"):
self.assert_(hasattr(open_url, attr), "object returned from "
"urlopen lacks the %s attribute" % attr)
try:
self.assert_(open_url.read(), "calling 'read' failed")
finally:
open_url.close()
self.assert_(open_url.read(), "calling 'read' failed")
finally:
self.server.stop()
open_url.close()
def test_info(self):
handler = self.start_server([(200, [], b"we don't care")])
handler = self.start_server()
try:
open_url = urllib2.urlopen("http://localhost:%s" % handler.port)
open_url = urllib.request.urlopen(
"http://localhost:%s" % handler.port)
info_obj = open_url.info()
self.assert_(isinstance(info_obj, email.message.Message),
"object returned by 'info' is not an instance of "
@ -452,15 +433,10 @@ class TestUrlopen(unittest.TestCase):
def test_geturl(self):
# Make sure same URL as opened is returned by geturl.
handler = self.start_server([(200, [], b"we don't care")])
try:
open_url = urllib2.urlopen("http://localhost:%s" % handler.port)
url = open_url.geturl()
self.assertEqual(url, "http://localhost:%s" % handler.port)
finally:
self.server.stop()
handler = self.start_server()
open_url = urllib.request.urlopen("http://localhost:%s" % handler.port)
url = open_url.geturl()
self.assertEqual(url, "http://localhost:%s" % handler.port)
def test_bad_address(self):
# Make sure proper exception is raised when connecting to a bogus
@ -472,17 +448,10 @@ class TestUrlopen(unittest.TestCase):
# started failing then. One hopes the .invalid
# domain will be spared to serve its defined
# purpose.
# urllib2.urlopen, "http://www.sadflkjsasadf.com/")
urllib2.urlopen, "http://www.python.invalid./")
urllib.request.urlopen,
"http://www.python.invalid./")
def test_main():
# We will NOT depend on the network resource flag
# (Lib/test/regrtest.py -u network) since all tests here are only
# localhost. However, if this is a bad rationale, then uncomment
# the next line.
#support.requires("network")
support.run_unittest(ProxyAuthTests)
support.run_unittest(TestUrlopen)

View File

@ -4,10 +4,11 @@ import unittest
from test import support
from test.test_urllib2 import sanepathname2url
import socket
import urllib2
import sys
import os
import socket
import sys
import urllib.error
import urllib.request
def _retry_thrice(func, exc, *args, **kwargs):
@ -28,7 +29,8 @@ def _wrap_with_retry_thrice(func, exc):
# Connecting to remote hosts is flaky. Make it more robust by retrying
# the connection several times.
_urlopen_with_retry = _wrap_with_retry_thrice(urllib2.urlopen, urllib2.URLError)
_urlopen_with_retry = _wrap_with_retry_thrice(urllib.request.urlopen,
urllib.error.URLError)
class AuthTests(unittest.TestCase):
@ -78,16 +80,11 @@ class CloseSocketTest(unittest.TestCase):
# calling .close() on urllib2's response objects should close the
# underlying socket
# delve deep into response to fetch socket._socketobject
response = _urlopen_with_retry("http://www.python.org/")
abused_fileobject = response.fp
httpresponse = abused_fileobject.raw
self.assert_(httpresponse.__class__ is http.client.HTTPResponse)
fileobject = httpresponse.fp
self.assert_(not fileobject.closed)
sock = response.fp
self.assert_(not sock.closed)
response.close()
self.assert_(fileobject.closed)
self.assert_(sock.closed)
class OtherNetworkTests(unittest.TestCase):
def setUp(self):
@ -116,8 +113,9 @@ class OtherNetworkTests(unittest.TestCase):
f.write('hi there\n')
f.close()
urls = [
'file:'+sanepathname2url(os.path.abspath(TESTFN)),
('file:///nonsensename/etc/passwd', None, urllib2.URLError),
'file:' + sanepathname2url(os.path.abspath(TESTFN)),
('file:///nonsensename/etc/passwd', None,
urllib.error.URLError),
]
self._test_urls(urls, self._extra_handlers(), retry=True)
finally:
@ -157,9 +155,9 @@ class OtherNetworkTests(unittest.TestCase):
import logging
debug = logging.getLogger("test_urllib2").debug
urlopen = urllib2.build_opener(*handlers).open
urlopen = urllib.request.build_opener(*handlers).open
if retry:
urlopen = _wrap_with_retry_thrice(urlopen, urllib2.URLError)
urlopen = _wrap_with_retry_thrice(urlopen, urllib.error.URLError)
for url in urls:
if isinstance(url, tuple):
@ -186,7 +184,7 @@ class OtherNetworkTests(unittest.TestCase):
def _extra_handlers(self):
handlers = []
cfh = urllib2.CacheFTPHandler()
cfh = urllib.request.CacheFTPHandler()
cfh.setTimeout(1)
handlers.append(cfh)
@ -197,7 +195,7 @@ class TimeoutTest(unittest.TestCase):
def test_http_basic(self):
self.assertTrue(socket.getdefaulttimeout() is None)
u = _urlopen_with_retry("http://www.python.org")
self.assertTrue(u.fp.raw.fp._sock.gettimeout() is None)
self.assertTrue(u.fp._sock.gettimeout() is None)
def test_http_default_timeout(self):
self.assertTrue(socket.getdefaulttimeout() is None)
@ -206,7 +204,7 @@ class TimeoutTest(unittest.TestCase):
u = _urlopen_with_retry("http://www.python.org")
finally:
socket.setdefaulttimeout(None)
self.assertEqual(u.fp.raw.fp._sock.gettimeout(), 60)
self.assertEqual(u.fp._sock.gettimeout(), 60)
def test_http_no_timeout(self):
self.assertTrue(socket.getdefaulttimeout() is None)
@ -215,11 +213,11 @@ class TimeoutTest(unittest.TestCase):
u = _urlopen_with_retry("http://www.python.org", timeout=None)
finally:
socket.setdefaulttimeout(None)
self.assertTrue(u.fp.raw.fp._sock.gettimeout() is None)
self.assertTrue(u.fp._sock.gettimeout() is None)
def test_http_timeout(self):
u = _urlopen_with_retry("http://www.python.org", timeout=120)
self.assertEqual(u.fp.raw.fp._sock.gettimeout(), 120)
self.assertEqual(u.fp._sock.gettimeout(), 120)
FTP_HOST = "ftp://ftp.mirror.nl/pub/mirror/gnu/"

View File

@ -4,7 +4,7 @@ import unittest
from test import support
import socket
import urllib
import urllib.request
import sys
import os
import email.message
@ -36,11 +36,11 @@ class URLTimeoutTest(unittest.TestCase):
socket.setdefaulttimeout(None)
def testURLread(self):
f = _open_with_retry(urllib.urlopen, "http://www.python.org/")
f = _open_with_retry(urllib.request.urlopen, "http://www.python.org/")
x = f.read()
class urlopenNetworkTests(unittest.TestCase):
"""Tests urllib.urlopen using the network.
"""Tests urllib.reqest.urlopen using the network.
These tests are not exhaustive. Assuming that testing using files does a
good job overall of some of the basic interface features. There are no
@ -55,7 +55,7 @@ class urlopenNetworkTests(unittest.TestCase):
"""
def urlopen(self, *args):
return _open_with_retry(urllib.urlopen, *args)
return _open_with_retry(urllib.request.urlopen, *args)
def test_basic(self):
# Simple test expected to pass.
@ -105,7 +105,7 @@ class urlopenNetworkTests(unittest.TestCase):
def test_getcode(self):
# test getcode() with the fancy opener to get 404 error codes
URL = "http://www.python.org/XXXinvalidXXX"
open_url = urllib.FancyURLopener().open(URL)
open_url = urllib.request.FancyURLopener().open(URL)
try:
code = open_url.getcode()
finally:
@ -114,7 +114,7 @@ class urlopenNetworkTests(unittest.TestCase):
def test_fileno(self):
if (sys.platform in ('win32',) or
not hasattr(os, 'fdopen')):
not hasattr(os, 'fdopen')):
# On Windows, socket handles are not file descriptors; this
# test can't pass on Windows.
return
@ -142,13 +142,14 @@ class urlopenNetworkTests(unittest.TestCase):
# domain will be spared to serve its defined
# purpose.
# urllib.urlopen, "http://www.sadflkjsasadf.com/")
urllib.urlopen, "http://www.python.invalid./")
urllib.request.urlopen,
"http://www.python.invalid./")
class urlretrieveNetworkTests(unittest.TestCase):
"""Tests urllib.urlretrieve using the network."""
"""Tests urllib.request.urlretrieve using the network."""
def urlretrieve(self, *args):
return _open_with_retry(urllib.urlretrieve, *args)
return _open_with_retry(urllib.request.urlretrieve, *args)
def test_basic(self):
# Test basic functionality.

View File

@ -2,7 +2,7 @@
from test import support
import unittest
import urlparse
import urllib.parse
RFC1808_BASE = "http://a/b/c/d;p?q#f"
RFC2396_BASE = "http://a/b/c/d;p?q"
@ -10,19 +10,19 @@ RFC2396_BASE = "http://a/b/c/d;p?q"
class UrlParseTestCase(unittest.TestCase):
def checkRoundtrips(self, url, parsed, split):
result = urlparse.urlparse(url)
result = urllib.parse.urlparse(url)
self.assertEqual(result, parsed)
t = (result.scheme, result.netloc, result.path,
result.params, result.query, result.fragment)
self.assertEqual(t, parsed)
# put it back together and it should be the same
result2 = urlparse.urlunparse(result)
result2 = urllib.parse.urlunparse(result)
self.assertEqual(result2, url)
self.assertEqual(result2, result.geturl())
# the result of geturl() is a fixpoint; we can always parse it
# again to get the same result:
result3 = urlparse.urlparse(result.geturl())
result3 = urllib.parse.urlparse(result.geturl())
self.assertEqual(result3.geturl(), result.geturl())
self.assertEqual(result3, result)
self.assertEqual(result3.scheme, result.scheme)
@ -37,17 +37,17 @@ class UrlParseTestCase(unittest.TestCase):
self.assertEqual(result3.port, result.port)
# check the roundtrip using urlsplit() as well
result = urlparse.urlsplit(url)
result = urllib.parse.urlsplit(url)
self.assertEqual(result, split)
t = (result.scheme, result.netloc, result.path,
result.query, result.fragment)
self.assertEqual(t, split)
result2 = urlparse.urlunsplit(result)
result2 = urllib.parse.urlunsplit(result)
self.assertEqual(result2, url)
self.assertEqual(result2, result.geturl())
# check the fixpoint property of re-parsing the result of geturl()
result3 = urlparse.urlsplit(result.geturl())
result3 = urllib.parse.urlsplit(result.geturl())
self.assertEqual(result3.geturl(), result.geturl())
self.assertEqual(result3, result)
self.assertEqual(result3.scheme, result.scheme)
@ -83,7 +83,7 @@ class UrlParseTestCase(unittest.TestCase):
self.checkRoundtrips(url, parsed, split)
def test_http_roundtrips(self):
# urlparse.urlsplit treats 'http:' as an optimized special case,
# urllib.parse.urlsplit treats 'http:' as an optimized special case,
# so we test both 'http:' and 'https:' in all the following.
# Three cheers for white box knowledge!
testcases = [
@ -111,13 +111,13 @@ class UrlParseTestCase(unittest.TestCase):
self.checkRoundtrips(url, parsed, split)
def checkJoin(self, base, relurl, expected):
self.assertEqual(urlparse.urljoin(base, relurl), expected,
self.assertEqual(urllib.parse.urljoin(base, relurl), expected,
(base, relurl, expected))
def test_unparse_parse(self):
for u in ['Python', './Python']:
self.assertEqual(urlparse.urlunsplit(urlparse.urlsplit(u)), u)
self.assertEqual(urlparse.urlunparse(urlparse.urlparse(u)), u)
self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
def test_RFC1808(self):
# "normal" cases from RFC 1808:
@ -223,11 +223,11 @@ class UrlParseTestCase(unittest.TestCase):
(RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
(RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
]:
self.assertEqual(urlparse.urldefrag(url), (defrag, frag))
self.assertEqual(urllib.parse.urldefrag(url), (defrag, frag))
def test_urlsplit_attributes(self):
url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
p = urlparse.urlsplit(url)
p = urllib.parse.urlsplit(url)
self.assertEqual(p.scheme, "http")
self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
self.assertEqual(p.path, "/doc/")
@ -242,7 +242,7 @@ class UrlParseTestCase(unittest.TestCase):
#self.assertEqual(p.geturl(), url)
url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
p = urlparse.urlsplit(url)
p = urllib.parse.urlsplit(url)
self.assertEqual(p.scheme, "http")
self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
self.assertEqual(p.path, "/doc/")
@ -259,7 +259,7 @@ class UrlParseTestCase(unittest.TestCase):
# and request email addresses as usernames.
url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
p = urlparse.urlsplit(url)
p = urllib.parse.urlsplit(url)
self.assertEqual(p.scheme, "http")
self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
self.assertEqual(p.path, "/doc/")
@ -274,11 +274,11 @@ class UrlParseTestCase(unittest.TestCase):
def test_attributes_bad_port(self):
"""Check handling of non-integer ports."""
p = urlparse.urlsplit("http://www.example.net:foo")
p = urllib.parse.urlsplit("http://www.example.net:foo")
self.assertEqual(p.netloc, "www.example.net:foo")
self.assertRaises(ValueError, lambda: p.port)
p = urlparse.urlparse("http://www.example.net:foo")
p = urllib.parse.urlparse("http://www.example.net:foo")
self.assertEqual(p.netloc, "www.example.net:foo")
self.assertRaises(ValueError, lambda: p.port)
@ -289,7 +289,7 @@ class UrlParseTestCase(unittest.TestCase):
# scheme://netloc syntax, the netloc and related attributes
# should be left empty.
uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
p = urlparse.urlsplit(uri)
p = urllib.parse.urlsplit(uri)
self.assertEqual(p.netloc, "")
self.assertEqual(p.username, None)
self.assertEqual(p.password, None)
@ -297,7 +297,7 @@ class UrlParseTestCase(unittest.TestCase):
self.assertEqual(p.port, None)
self.assertEqual(p.geturl(), uri)
p = urlparse.urlparse(uri)
p = urllib.parse.urlparse(uri)
self.assertEqual(p.netloc, "")
self.assertEqual(p.username, None)
self.assertEqual(p.password, None)
@ -307,7 +307,7 @@ class UrlParseTestCase(unittest.TestCase):
def test_noslash(self):
# Issue 1637: http://foo.com?query is legal
self.assertEqual(urlparse.urlparse("http://example.com?blahblah=/foo"),
self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
('http', 'example.com', '', '', 'blahblah=/foo', ''))
def test_main():

View File

@ -111,8 +111,10 @@ class XMLRPCTestCase(unittest.TestCase):
(int(2**34),))
xmlrpclib.dumps((xmlrpclib.MAXINT, xmlrpclib.MININT))
self.assertRaises(OverflowError, xmlrpclib.dumps, (xmlrpclib.MAXINT+1,))
self.assertRaises(OverflowError, xmlrpclib.dumps, (xmlrpclib.MININT-1,))
self.assertRaises(OverflowError, xmlrpclib.dumps,
(xmlrpclib.MAXINT+1,))
self.assertRaises(OverflowError, xmlrpclib.dumps,
(xmlrpclib.MININT-1,))
def dummy_write(s):
pass
@ -120,9 +122,10 @@ class XMLRPCTestCase(unittest.TestCase):
m = xmlrpclib.Marshaller()
m.dump_int(xmlrpclib.MAXINT, dummy_write)
m.dump_int(xmlrpclib.MININT, dummy_write)
self.assertRaises(OverflowError, m.dump_int, xmlrpclib.MAXINT+1, dummy_write)
self.assertRaises(OverflowError, m.dump_int, xmlrpclib.MININT-1, dummy_write)
self.assertRaises(OverflowError, m.dump_int,
xmlrpclib.MAXINT+1, dummy_write)
self.assertRaises(OverflowError, m.dump_int,
xmlrpclib.MININT-1, dummy_write)
def test_dump_none(self):
value = alist + [None]
@ -132,7 +135,6 @@ class XMLRPCTestCase(unittest.TestCase):
xmlrpclib.loads(strg)[0][0])
self.assertRaises(TypeError, xmlrpclib.dumps, (arg1,))
class HelperTestCase(unittest.TestCase):
def test_escape(self):
self.assertEqual(xmlrpclib.escape("a&b"), "a&amp;b")
@ -160,7 +162,6 @@ class FaultTestCase(unittest.TestCase):
# private methods
self.assertRaises(AttributeError,
xmlrpc.server.resolve_dotted_attribute, str, '__add')
self.assert_(xmlrpc.server.resolve_dotted_attribute(str, 'title'))
class DateTimeTestCase(unittest.TestCase):
@ -170,7 +171,8 @@ class DateTimeTestCase(unittest.TestCase):
def test_time(self):
d = 1181399930.036952
t = xmlrpclib.DateTime(d)
self.assertEqual(str(t), time.strftime("%Y%m%dT%H:%M:%S", time.localtime(d)))
self.assertEqual(str(t),
time.strftime("%Y%m%dT%H:%M:%S", time.localtime(d)))
def test_time_tuple(self):
d = (2007,6,9,10,38,50,5,160,0)
@ -180,7 +182,7 @@ class DateTimeTestCase(unittest.TestCase):
def test_time_struct(self):
d = time.localtime(1181399930.036952)
t = xmlrpclib.DateTime(d)
self.assertEqual(str(t), time.strftime("%Y%m%dT%H:%M:%S", d))
self.assertEqual(str(t), time.strftime("%Y%m%dT%H:%M:%S", d))
def test_datetime_datetime(self):
d = datetime.datetime(2007,1,2,3,4,5)
@ -350,12 +352,12 @@ class SimpleServerTestCase(unittest.TestCase):
self.assertEqual(response.reason, 'Not Found')
def test_introspection1(self):
expected_methods = set(['pow', 'div', 'my_function', 'add',
'system.listMethods', 'system.methodHelp',
'system.methodSignature', 'system.multicall'])
try:
p = xmlrpclib.ServerProxy('http://localhost:%d' % PORT)
meth = p.system.listMethods()
expected_methods = set(['pow', 'div', 'my_function', 'add',
'system.listMethods', 'system.methodHelp',
'system.methodSignature', 'system.multicall'])
self.assertEqual(set(meth), expected_methods)
except (xmlrpclib.ProtocolError, socket.error) as e:
# ignore failures due to non-blocking socket 'unavailable' errors
@ -593,7 +595,8 @@ class CGIHandlerTestCase(unittest.TestCase):
# will respond exception, if so, our goal is achieved ;)
handle = open(support.TESTFN, "r").read()
# start with 44th char so as not to get http header, we just need only xml
# start with 44th char so as not to get http header, we just
# need only xml
self.assertRaises(xmlrpclib.Fault, xmlrpclib.loads, handle[44:])
os.remove("xmldata.txt")

File diff suppressed because it is too large Load Diff

0
Lib/urllib/__init__.py Normal file
View File

59
Lib/urllib/error.py Normal file
View File

@ -0,0 +1,59 @@
"""Exception classes raised by urllib.
The base exception class is URLError, which inherits from IOError. It
doesn't define any behavior of its own, but is the base class for all
exceptions defined in this package.
HTTPError is an exception class that is also a valid HTTP response
instance. It behaves this way because HTTP protocol errors are valid
responses, with a status code, headers, and a body. In some contexts,
an application may want to handle an exception like a regular
response.
"""
import urllib.response
# do these error classes make sense?
# make sure all of the IOError stuff is overridden. we just want to be
# subtypes.
class URLError(IOError):
# URLError is a sub-type of IOError, but it doesn't share any of
# the implementation. need to override __init__ and __str__.
# It sets self.args for compatibility with other EnvironmentError
# subclasses, but args doesn't have the typical format with errno in
# slot 0 and strerror in slot 1. This may be better than nothing.
def __init__(self, reason, filename=None):
self.args = reason,
self.reason = reason
if filename is not None:
self.filename = filename
def __str__(self):
return '<urlopen error %s>' % self.reason
class HTTPError(URLError, urllib.response.addinfourl):
"""Raised when HTTP error occurs, but also acts like non-error return"""
__super_init = urllib.response.addinfourl.__init__
def __init__(self, url, code, msg, hdrs, fp):
self.code = code
self.msg = msg
self.hdrs = hdrs
self.fp = fp
self.filename = url
# The addinfourl classes depend on fp being a valid file
# object. In some cases, the HTTPError may not have a valid
# file object. If this happens, the simplest workaround is to
# not initialize the base classes.
if fp is not None:
self.__super_init(fp, hdrs, url, code)
def __str__(self):
return 'HTTP Error %s: %s' % (self.code, self.msg)
# exception raised when downloaded size does not match content-length
class ContentTooShortError(URLError):
def __init__(self, message, content):
URLError.__init__(self, message)
self.content = content

View File

@ -259,6 +259,311 @@ def urldefrag(url):
return url, ''
_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
def unquote(s):
"""unquote('abc%20def') -> 'abc def'."""
res = s.split('%')
for i in range(1, len(res)):
item = res[i]
try:
res[i] = _hextochr[item[:2]] + item[2:]
except KeyError:
res[i] = '%' + item
except UnicodeDecodeError:
res[i] = chr(int(item[:2], 16)) + item[2:]
return "".join(res)
def unquote_plus(s):
"""unquote('%7e/abc+def') -> '~/abc def'"""
s = s.replace('+', ' ')
return unquote(s)
always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'abcdefghijklmnopqrstuvwxyz'
'0123456789' '_.-')
_safe_quoters= {}
class Quoter:
def __init__(self, safe):
self.cache = {}
self.safe = safe + always_safe
def __call__(self, c):
try:
return self.cache[c]
except KeyError:
if ord(c) < 256:
res = (c in self.safe) and c or ('%%%02X' % ord(c))
self.cache[c] = res
return res
else:
return "".join(['%%%02X' % i for i in c.encode("utf-8")])
def quote(s, safe = '/'):
"""quote('abc def') -> 'abc%20def'
Each part of a URL, e.g. the path info, the query, etc., has a
different set of reserved characters that must be quoted.
RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
the following reserved characters.
reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
"$" | ","
Each of these characters is reserved in some component of a URL,
but not necessarily in all of them.
By default, the quote function is intended for quoting the path
section of a URL. Thus, it will not encode '/'. This character
is reserved, but in typical usage the quote function is being
called on a path where the existing slash characters are used as
reserved characters.
"""
cachekey = (safe, always_safe)
try:
quoter = _safe_quoters[cachekey]
except KeyError:
quoter = Quoter(safe)
_safe_quoters[cachekey] = quoter
res = map(quoter, s)
return ''.join(res)
def quote_plus(s, safe = ''):
"""Quote the query fragment of a URL; replacing ' ' with '+'"""
if ' ' in s:
s = quote(s, safe + ' ')
return s.replace(' ', '+')
return quote(s, safe)
def urlencode(query,doseq=0):
"""Encode a sequence of two-element tuples or dictionary into a URL query string.
If any values in the query arg are sequences and doseq is true, each
sequence element is converted to a separate parameter.
If the query arg is a sequence of two-element tuples, the order of the
parameters in the output will match the order of parameters in the
input.
"""
if hasattr(query,"items"):
# mapping objects
query = query.items()
else:
# it's a bother at times that strings and string-like objects are
# sequences...
try:
# non-sequence items should not work with len()
# non-empty strings will fail this
if len(query) and not isinstance(query[0], tuple):
raise TypeError
# zero-length sequences of all types will get here and succeed,
# but that's a minor nit - since the original implementation
# allowed empty dicts that type of behavior probably should be
# preserved for consistency
except TypeError:
ty,va,tb = sys.exc_info()
raise TypeError("not a valid non-string sequence or mapping object").with_traceback(tb)
l = []
if not doseq:
# preserve old behavior
for k, v in query:
k = quote_plus(str(k))
v = quote_plus(str(v))
l.append(k + '=' + v)
else:
for k, v in query:
k = quote_plus(str(k))
if isinstance(v, str):
v = quote_plus(v)
l.append(k + '=' + v)
elif isinstance(v, str):
# is there a reasonable way to convert to ASCII?
# encode generates a string, but "replace" or "ignore"
# lose information and "strict" can raise UnicodeError
v = quote_plus(v.encode("ASCII","replace"))
l.append(k + '=' + v)
else:
try:
# is this a sufficient test for sequence-ness?
x = len(v)
except TypeError:
# not a sequence
v = quote_plus(str(v))
l.append(k + '=' + v)
else:
# loop over the sequence
for elt in v:
l.append(k + '=' + quote_plus(str(elt)))
return '&'.join(l)
# Utilities to parse URLs (most of these return None for missing parts):
# unwrap('<URL:type://host/path>') --> 'type://host/path'
# splittype('type:opaquestring') --> 'type', 'opaquestring'
# splithost('//host[:port]/path') --> 'host[:port]', '/path'
# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
# splitpasswd('user:passwd') -> 'user', 'passwd'
# splitport('host:port') --> 'host', 'port'
# splitquery('/path?query') --> '/path', 'query'
# splittag('/path#tag') --> '/path', 'tag'
# splitattr('/path;attr1=value1;attr2=value2;...') ->
# '/path', ['attr1=value1', 'attr2=value2', ...]
# splitvalue('attr=value') --> 'attr', 'value'
# urllib.parse.unquote('abc%20def') -> 'abc def'
# quote('abc def') -> 'abc%20def')
def toBytes(url):
"""toBytes(u"URL") --> 'URL'."""
# Most URL schemes require ASCII. If that changes, the conversion
# can be relaxed.
# XXX get rid of toBytes()
if isinstance(url, str):
try:
url = url.encode("ASCII").decode()
except UnicodeError:
raise UnicodeError("URL " + repr(url) +
" contains non-ASCII characters")
return url
def unwrap(url):
"""unwrap('<URL:type://host/path>') --> 'type://host/path'."""
url = str(url).strip()
if url[:1] == '<' and url[-1:] == '>':
url = url[1:-1].strip()
if url[:4] == 'URL:': url = url[4:].strip()
return url
_typeprog = None
def splittype(url):
"""splittype('type:opaquestring') --> 'type', 'opaquestring'."""
global _typeprog
if _typeprog is None:
import re
_typeprog = re.compile('^([^/:]+):')
match = _typeprog.match(url)
if match:
scheme = match.group(1)
return scheme.lower(), url[len(scheme) + 1:]
return None, url
_hostprog = None
def splithost(url):
"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
global _hostprog
if _hostprog is None:
import re
_hostprog = re.compile('^//([^/?]*)(.*)$')
match = _hostprog.match(url)
if match: return match.group(1, 2)
return None, url
_userprog = None
def splituser(host):
"""splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
global _userprog
if _userprog is None:
import re
_userprog = re.compile('^(.*)@(.*)$')
match = _userprog.match(host)
if match: return map(unquote, match.group(1, 2))
return None, host
_passwdprog = None
def splitpasswd(user):
"""splitpasswd('user:passwd') -> 'user', 'passwd'."""
global _passwdprog
if _passwdprog is None:
import re
_passwdprog = re.compile('^([^:]*):(.*)$')
match = _passwdprog.match(user)
if match: return match.group(1, 2)
return user, None
# splittag('/path#tag') --> '/path', 'tag'
_portprog = None
def splitport(host):
"""splitport('host:port') --> 'host', 'port'."""
global _portprog
if _portprog is None:
import re
_portprog = re.compile('^(.*):([0-9]+)$')
match = _portprog.match(host)
if match: return match.group(1, 2)
return host, None
_nportprog = None
def splitnport(host, defport=-1):
"""Split host and port, returning numeric port.
Return given default port if no ':' found; defaults to -1.
Return numerical port if a valid number are found after ':'.
Return None if ':' but not a valid number."""
global _nportprog
if _nportprog is None:
import re
_nportprog = re.compile('^(.*):(.*)$')
match = _nportprog.match(host)
if match:
host, port = match.group(1, 2)
try:
if not port: raise ValueError("no digits")
nport = int(port)
except ValueError:
nport = None
return host, nport
return host, defport
_queryprog = None
def splitquery(url):
"""splitquery('/path?query') --> '/path', 'query'."""
global _queryprog
if _queryprog is None:
import re
_queryprog = re.compile('^(.*)\?([^?]*)$')
match = _queryprog.match(url)
if match: return match.group(1, 2)
return url, None
_tagprog = None
def splittag(url):
"""splittag('/path#tag') --> '/path', 'tag'."""
global _tagprog
if _tagprog is None:
import re
_tagprog = re.compile('^(.*)#([^#]*)$')
match = _tagprog.match(url)
if match: return match.group(1, 2)
return url, None
def splitattr(url):
"""splitattr('/path;attr1=value1;attr2=value2;...') ->
'/path', ['attr1=value1', 'attr2=value2', ...]."""
words = url.split(';')
return words[0], words[1:]
_valueprog = None
def splitvalue(attr):
"""splitvalue('attr=value') --> 'attr', 'value'."""
global _valueprog
if _valueprog is None:
import re
_valueprog = re.compile('^([^=]*)=(.*)$')
match = _valueprog.match(attr)
if match: return match.group(1, 2)
return attr, None
test_input = """
http://a/b/c/d

2295
Lib/urllib/request.py Normal file

File diff suppressed because it is too large Load Diff

83
Lib/urllib/response.py Normal file
View File

@ -0,0 +1,83 @@
"""Response classes used by urllib.
The base class, addbase, defines a minimal file-like interface,
including read() and readline(). The typical response object is an
addinfourl instance, which defines an info() method that returns
headers and a geturl() method that returns the url.
"""
class addbase(object):
"""Base class for addinfo and addclosehook."""
# XXX Add a method to expose the timeout on the underlying socket?
def __init__(self, fp):
# TODO(jhylton): Is there a better way to delegate using io?
self.fp = fp
self.read = self.fp.read
self.readline = self.fp.readline
# TODO(jhylton): Make sure an object with readlines() is also iterable
if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
if hasattr(self.fp, "fileno"):
self.fileno = self.fp.fileno
else:
self.fileno = lambda: None
if hasattr(self.fp, "__iter__"):
self.__iter__ = self.fp.__iter__
if hasattr(self.fp, "__next__"):
self.__next__ = self.fp.__next__
def __repr__(self):
return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
id(self), self.fp)
def close(self):
self.read = None
self.readline = None
self.readlines = None
self.fileno = None
if self.fp: self.fp.close()
self.fp = None
class addclosehook(addbase):
"""Class to add a close hook to an open file."""
def __init__(self, fp, closehook, *hookargs):
addbase.__init__(self, fp)
self.closehook = closehook
self.hookargs = hookargs
def close(self):
addbase.close(self)
if self.closehook:
self.closehook(*self.hookargs)
self.closehook = None
self.hookargs = None
class addinfo(addbase):
"""class to add an info() method to an open file."""
def __init__(self, fp, headers):
addbase.__init__(self, fp)
self.headers = headers
def info(self):
return self.headers
class addinfourl(addbase):
"""class to add info() and geturl() methods to an open file."""
def __init__(self, fp, headers, url, code=None):
addbase.__init__(self, fp)
self.headers = headers
self.url = url
self.code = code
def info(self):
return self.headers
def getcode(self):
return self.code
def geturl(self):
return self.url

View File

@ -9,8 +9,8 @@
The robots.txt Exclusion Protocol is implemented as specified in
http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html
"""
import urlparse
import urllib
import urllib.parse, urllib.request
__all__ = ["RobotFileParser"]
@ -48,24 +48,19 @@ class RobotFileParser:
def set_url(self, url):
"""Sets the URL referring to a robots.txt file."""
self.url = url
self.host, self.path = urlparse.urlparse(url)[1:3]
self.host, self.path = urllib.parse.urlparse(url)[1:3]
def read(self):
"""Reads the robots.txt URL and feeds it to the parser."""
opener = URLopener()
f = opener.open(self.url)
lines = []
line = f.readline()
while line:
lines.append(line.strip())
line = f.readline()
self.errcode = opener.errcode
if self.errcode in (401, 403):
self.disallow_all = True
elif self.errcode >= 400:
self.allow_all = True
elif self.errcode == 200 and lines:
self.parse(lines)
try:
f = urllib.request.urlopen(self.url)
except urllib.error.HTTPError as err:
if err.code in (401, 403):
self.disallow_all = True
elif err.code >= 400:
self.allow_all = True
else:
self.parse(f.read().splitlines())
def _add_entry(self, entry):
if "*" in entry.useragents:
@ -75,15 +70,15 @@ class RobotFileParser:
self.entries.append(entry)
def parse(self, lines):
"""parse the input lines from a robots.txt file.
We allow that a user-agent: line is not preceded by
one or more blank lines."""
"""Parse the input lines from a robots.txt file.
We allow that a user-agent: line is not preceded by
one or more blank lines.
"""
state = 0
linenumber = 0
entry = Entry()
for line in lines:
linenumber = linenumber + 1
if not line:
if state == 1:
entry = Entry()
@ -102,7 +97,7 @@ class RobotFileParser:
line = line.split(':', 1)
if len(line) == 2:
line[0] = line[0].strip().lower()
line[1] = urllib.unquote(line[1].strip())
line[1] = urllib.parse.unquote(line[1].strip())
if line[0] == "user-agent":
if state == 2:
self._add_entry(entry)
@ -128,7 +123,7 @@ class RobotFileParser:
return True
# search for given user agent matches
# the first match counts
url = urllib.quote(urlparse.urlparse(urllib.unquote(url))[2]) or "/"
url = urllib.parse.quote(urllib.parse.urlparse(urllib.parse.unquote(url))[2]) or "/"
for entry in self.entries:
if entry.applies_to(useragent):
return entry.allowance(url)
@ -138,7 +133,6 @@ class RobotFileParser:
# agent not found ==> access granted
return True
def __str__(self):
return ''.join([str(entry) + "\n" for entry in self.entries])
@ -150,7 +144,7 @@ class RuleLine:
if path == '' and not allowance:
# an empty value means allow all
allowance = True
self.path = urllib.quote(path)
self.path = urllib.parse.quote(path)
self.allowance = allowance
def applies_to(self, filename):
@ -195,18 +189,3 @@ class Entry:
if line.applies_to(filename):
return line.allowance
return True
class URLopener(urllib.FancyURLopener):
def __init__(self, *args):
urllib.FancyURLopener.__init__(self, *args)
self.errcode = 200
def prompt_user_passwd(self, host, realm):
## If robots.txt file is accessible only with a password,
## we act as if the file wasn't there.
return None, None
def http_error_default(self, url, fp, errcode, errmsg, headers):
self.errcode = errcode
return urllib.FancyURLopener.http_error_default(self, url, fp, errcode,
errmsg, headers)

File diff suppressed because it is too large Load Diff

View File

@ -11,7 +11,8 @@ module. See also the BaseHTTPServer module docs for other API information.
"""
from http.server import BaseHTTPRequestHandler, HTTPServer
import urllib, sys
import sys
import urllib.parse
from wsgiref.handlers import SimpleHandler
__version__ = "0.1"
@ -93,7 +94,7 @@ class WSGIRequestHandler(BaseHTTPRequestHandler):
else:
path,query = self.path,''
env['PATH_INFO'] = urllib.unquote(path)
env['PATH_INFO'] = urllib.parse.unquote(path)
env['QUERY_STRING'] = query
host = self.address_string()

View File

@ -50,7 +50,7 @@ def guess_scheme(environ):
def application_uri(environ):
"""Return the application's base URI (no PATH_INFO or QUERY_STRING)"""
url = environ['wsgi.url_scheme']+'://'
from urllib import quote
from urllib.parse import quote
if environ.get('HTTP_HOST'):
url += environ['HTTP_HOST']
@ -70,7 +70,7 @@ def application_uri(environ):
def request_uri(environ, include_query=1):
"""Return the full request URI, optionally including the query string"""
url = application_uri(environ)
from urllib import quote
from urllib.parse import quote
path_info = quote(environ.get('PATH_INFO',''))
if not environ.get('SCRIPT_NAME'):
url += path_info[1:]

View File

@ -190,8 +190,8 @@ class DOMBuilder:
options.errorHandler = self.errorHandler
fp = input.byteStream
if fp is None and options.systemId:
import urllib2
fp = urllib2.urlopen(input.systemId)
import urllib.request
fp = urllib.request.urlopen(input.systemId)
return self._parse_bytestream(fp, options)
def parseWithContext(self, input, cnode, action):
@ -223,14 +223,14 @@ class DOMEntityResolver(object):
source.encoding = self._guess_media_encoding(source)
# determine the base URI is we can
import posixpath, urlparse
parts = urlparse.urlparse(systemId)
import posixpath, urllib.parse
parts = urllib.parse.urlparse(systemId)
scheme, netloc, path, params, query, fragment = parts
# XXX should we check the scheme here as well?
if path and not path.endswith("/"):
path = posixpath.dirname(path) + "/"
parts = scheme, netloc, path, params, query, fragment
source.baseURI = urlparse.urlunparse(parts)
source.baseURI = urllib.parse.urlunparse(parts)
return source
@ -242,8 +242,8 @@ class DOMEntityResolver(object):
return self._opener
def _create_opener(self):
import urllib2
return urllib2.build_opener()
import urllib.request
return urllib.request.build_opener()
def _guess_media_encoding(self, source):
info = source.byteStream.info()

View File

@ -3,7 +3,7 @@ A library of useful helper classes to the SAX classes, for the
convenience of application and driver writers.
"""
import os, urlparse, urllib
import os, urllib.parse, urllib.request
from . import handler
from . import xmlreader
@ -289,8 +289,8 @@ def prepare_input_source(source, base = ""):
source.setSystemId(sysidfilename)
f = open(sysidfilename, "rb")
else:
source.setSystemId(urlparse.urljoin(base, sysid))
f = urllib.urlopen(source.getSystemId())
source.setSystemId(urllib.parse.urljoin(base, sysid))
f = urllib.request.urlopen(source.getSystemId())
source.setByteStream(f)

View File

@ -1160,12 +1160,12 @@ class Transport:
if isinstance(host, tuple):
host, x509 = host
import urllib
auth, host = urllib.splituser(host)
import urllib.parse
auth, host = urllib.parse.splituser(host)
if auth:
import base64
auth = base64.encodestring(urllib.unquote(auth))
auth = base64.encodestring(urllib.parse.unquote(auth))
auth = "".join(auth.split()) # get rid of whitespace
extra_headers = [
("Authorization", "Basic " + auth)
@ -1321,11 +1321,11 @@ class ServerProxy:
# establish a "logical" server connection
# get the url
import urllib
type, uri = urllib.splittype(uri)
import urllib.parse
type, uri = urllib.parse.splittype(uri)
if type not in ("http", "https"):
raise IOError("unsupported XML-RPC protocol")
self.__host, self.__handler = urllib.splithost(uri)
self.__host, self.__handler = urllib.parse.splithost(uri)
if not self.__handler:
self.__handler = "/RPC2"

View File

@ -809,7 +809,7 @@ LIBSUBDIRS= tkinter site-packages test test/output test/data \
email email/mime email/test email/test/data \
html json json/tests http dbm xmlrpc \
sqlite3 sqlite3/test \
logging bsddb bsddb/test csv wsgiref \
logging bsddb bsddb/test csv wsgiref urllib \
lib2to3 lib2to3/fixes lib2to3/pgen2 lib2to3/tests \
ctypes ctypes/test ctypes/macholib idlelib idlelib/Icons \
distutils distutils/command distutils/tests $(XMLLIBSUBDIRS) \

View File

@ -81,6 +81,15 @@ Extension Modules
Library
-------
- a new ``urllib`` package was created. It consists of code from
``urllib``, ``urllib2``, ``urlparse``, and ``robotparser``. The old
modules have all been removed. The new package has five submodules:
``urllib.parse``, ``urllib.request``, ``urllib.response``,
``urllib.error``, and ``urllib.robotparser``. The
``urllib.request.urlopen()`` function uses the url opener from
``urllib2``. (Note that the unittests have not been renamed for the
beta, but they will be renamed in the future.)
- rfc822 has been removed in favor of the email package.
- mimetools has been removed in favor of the email package.