mirror of https://github.com/python/cpython
Issue #16423: urllib.request now has support for ``data:`` URLs.
Patch by Mathias Panzenböck.
This commit is contained in:
parent
a833e0d8ae
commit
df204be922
|
@ -121,7 +121,7 @@ The :mod:`urllib.request` module defines the following functions:
|
|||
instances of them or subclasses of them: :class:`ProxyHandler`,
|
||||
:class:`UnknownHandler`, :class:`HTTPHandler`, :class:`HTTPDefaultErrorHandler`,
|
||||
:class:`HTTPRedirectHandler`, :class:`FTPHandler`, :class:`FileHandler`,
|
||||
:class:`HTTPErrorProcessor`.
|
||||
:class:`HTTPErrorProcessor`, :class:`DataHandler`.
|
||||
|
||||
If the Python installation has SSL support (i.e., if the :mod:`ssl` module
|
||||
can be imported), :class:`HTTPSHandler` will also be added.
|
||||
|
@ -346,6 +346,11 @@ The following classes are provided:
|
|||
|
||||
Open local files.
|
||||
|
||||
.. class:: DataHandler()
|
||||
|
||||
Open data URLs.
|
||||
|
||||
.. versionadded:: 3.4
|
||||
|
||||
.. class:: FTPHandler()
|
||||
|
||||
|
@ -972,6 +977,21 @@ FileHandler Objects
|
|||
hostname is given, an :exc:`URLError` is raised.
|
||||
|
||||
|
||||
.. _data-handler-objects:
|
||||
|
||||
DataHandler Objects
|
||||
-------------------
|
||||
|
||||
.. method:: DataHandler.data_open(req)
|
||||
|
||||
Read a data URL. This kind of URL contains the content encoded in the URL
|
||||
itself. The data URL syntax is specified in :rfc:`2397`. This implementation
|
||||
ignores white spaces in base64 encoded data URLs so the URL may be wrapped
|
||||
in whatever source file it comes from. But even though some browsers don't
|
||||
mind about a missing padding at the end of a base64 encoded data URL, this
|
||||
implementation will raise an :exc:`ValueError` in that case.
|
||||
|
||||
|
||||
.. _ftp-handler-objects:
|
||||
|
||||
FTPHandler Objects
|
||||
|
@ -1374,7 +1394,9 @@ some point in the future.
|
|||
pair: FTP; protocol
|
||||
|
||||
* Currently, only the following protocols are supported: HTTP (versions 0.9 and
|
||||
1.0), FTP, and local files.
|
||||
1.0), FTP, local files, and data URLs.
|
||||
|
||||
.. versionchanged:: 3.4 Added support for data URLs.
|
||||
|
||||
* The caching feature of :func:`urlretrieve` has been disabled until someone
|
||||
finds the time to hack proper processing of Expiration time headers.
|
||||
|
|
|
@ -337,6 +337,79 @@ Content-Type: text/html; charset=iso-8859-1
|
|||
with support.check_warnings(('',DeprecationWarning)):
|
||||
urllib.request.URLopener()
|
||||
|
||||
class urlopen_DataTests(unittest.TestCase):
|
||||
"""Test urlopen() opening a data URL."""
|
||||
|
||||
def setUp(self):
|
||||
# text containing URL special- and unicode-characters
|
||||
self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
|
||||
# 2x1 pixel RGB PNG image with one black and one white pixel
|
||||
self.image = (
|
||||
b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
|
||||
b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
|
||||
b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
|
||||
b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
|
||||
|
||||
self.text_url = (
|
||||
"data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
|
||||
"D%26%20%C3%B6%20%C3%84%20")
|
||||
self.text_url_base64 = (
|
||||
"data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
|
||||
"sJT0mIPYgxCA%3D")
|
||||
# base64 encoded data URL that contains ignorable spaces,
|
||||
# such as "\n", " ", "%0A", and "%20".
|
||||
self.image_url = (
|
||||
"\n"
|
||||
"QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
|
||||
"vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
|
||||
|
||||
self.text_url_resp = urllib.request.urlopen(self.text_url)
|
||||
self.text_url_base64_resp = urllib.request.urlopen(
|
||||
self.text_url_base64)
|
||||
self.image_url_resp = urllib.request.urlopen(self.image_url)
|
||||
|
||||
def test_interface(self):
|
||||
# Make sure object returned by urlopen() has the specified methods
|
||||
for attr in ("read", "readline", "readlines",
|
||||
"close", "info", "geturl", "getcode", "__iter__"):
|
||||
self.assertTrue(hasattr(self.text_url_resp, attr),
|
||||
"object returned by urlopen() lacks %s attribute" %
|
||||
attr)
|
||||
|
||||
def test_info(self):
|
||||
self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
|
||||
self.assertEqual(self.text_url_base64_resp.info().get_params(),
|
||||
[('text/plain', ''), ('charset', 'ISO-8859-1')])
|
||||
self.assertEqual(self.image_url_resp.info()['content-length'],
|
||||
str(len(self.image)))
|
||||
self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
|
||||
[('text/plain', ''), ('charset', 'US-ASCII')])
|
||||
|
||||
def test_geturl(self):
|
||||
self.assertEqual(self.text_url_resp.geturl(), self.text_url)
|
||||
self.assertEqual(self.text_url_base64_resp.geturl(),
|
||||
self.text_url_base64)
|
||||
self.assertEqual(self.image_url_resp.geturl(), self.image_url)
|
||||
|
||||
def test_read_text(self):
|
||||
self.assertEqual(self.text_url_resp.read().decode(
|
||||
dict(self.text_url_resp.info().get_params())['charset']), self.text)
|
||||
|
||||
def test_read_text_base64(self):
|
||||
self.assertEqual(self.text_url_base64_resp.read().decode(
|
||||
dict(self.text_url_base64_resp.info().get_params())['charset']),
|
||||
self.text)
|
||||
|
||||
def test_read_image(self):
|
||||
self.assertEqual(self.image_url_resp.read(), self.image)
|
||||
|
||||
def test_missing_comma(self):
|
||||
self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
|
||||
|
||||
def test_invalid_base64_data(self):
|
||||
# missing padding character
|
||||
self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
|
||||
|
||||
class urlretrieve_FileTests(unittest.TestCase):
|
||||
"""Test urllib.urlretrieve() on local files"""
|
||||
|
||||
|
@ -1313,6 +1386,7 @@ def test_main():
|
|||
support.run_unittest(
|
||||
urlopen_FileTests,
|
||||
urlopen_HttpTests,
|
||||
urlopen_DataTests,
|
||||
urlretrieve_FileTests,
|
||||
urlretrieve_HttpTests,
|
||||
ProxyTests,
|
||||
|
|
|
@ -103,7 +103,8 @@ from urllib.error import URLError, HTTPError, ContentTooShortError
|
|||
from urllib.parse import (
|
||||
urlparse, urlsplit, urljoin, unwrap, quote, unquote,
|
||||
splittype, splithost, splitport, splituser, splitpasswd,
|
||||
splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse)
|
||||
splitattr, splitquery, splitvalue, splittag, to_bytes,
|
||||
unquote_to_bytes, urlunparse)
|
||||
from urllib.response import addinfourl, addclosehook
|
||||
|
||||
# check for SSL
|
||||
|
@ -121,7 +122,7 @@ __all__ = [
|
|||
'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
|
||||
'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler',
|
||||
'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler',
|
||||
'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler',
|
||||
'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler',
|
||||
'UnknownHandler', 'HTTPErrorProcessor',
|
||||
# Functions
|
||||
'urlopen', 'install_opener', 'build_opener',
|
||||
|
@ -535,7 +536,8 @@ def build_opener(*handlers):
|
|||
opener = OpenerDirector()
|
||||
default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
|
||||
HTTPDefaultErrorHandler, HTTPRedirectHandler,
|
||||
FTPHandler, FileHandler, HTTPErrorProcessor]
|
||||
FTPHandler, FileHandler, HTTPErrorProcessor,
|
||||
DataHandler]
|
||||
if hasattr(http.client, "HTTPSConnection"):
|
||||
default_classes.append(HTTPSHandler)
|
||||
skip = set()
|
||||
|
@ -1541,6 +1543,36 @@ class CacheFTPHandler(FTPHandler):
|
|||
self.cache.clear()
|
||||
self.timeout.clear()
|
||||
|
||||
class DataHandler(BaseHandler):
|
||||
def data_open(self, req):
|
||||
# data URLs as specified in RFC 2397.
|
||||
#
|
||||
# ignores POSTed data
|
||||
#
|
||||
# syntax:
|
||||
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
|
||||
# mediatype := [ type "/" subtype ] *( ";" parameter )
|
||||
# data := *urlchar
|
||||
# parameter := attribute "=" value
|
||||
url = req.full_url
|
||||
|
||||
scheme, data = url.split(":",1)
|
||||
mediatype, data = data.split(",",1)
|
||||
|
||||
# even base64 encoded data URLs might be quoted so unquote in any case:
|
||||
data = unquote_to_bytes(data)
|
||||
if mediatype.endswith(";base64"):
|
||||
data = base64.decodebytes(data)
|
||||
mediatype = mediatype[:-7]
|
||||
|
||||
if not mediatype:
|
||||
mediatype = "text/plain;charset=US-ASCII"
|
||||
|
||||
headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" %
|
||||
(mediatype, len(data)))
|
||||
|
||||
return addinfourl(io.BytesIO(data), headers, url)
|
||||
|
||||
|
||||
# Code move from the old urllib module
|
||||
|
||||
|
|
|
@ -884,6 +884,7 @@ Mike Pall
|
|||
Todd R. Palmer
|
||||
Juan David Ibáñez Palomar
|
||||
Jan Palus
|
||||
Mathias Panzenböck
|
||||
M. Papillon
|
||||
Peter Parente
|
||||
Alexandre Parenteau
|
||||
|
|
|
@ -138,6 +138,9 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #16423: urllib.request now has support for ``data:`` URLs. Patch by
|
||||
Mathias Panzenböck.
|
||||
|
||||
- Issue #4473: Add a POP3.stls() to switch a clear-text POP3 session into
|
||||
an encrypted POP3 session, on supported servers. Patch by Lorenzo Catucci.
|
||||
|
||||
|
|
Loading…
Reference in New Issue