From 89584c97e44b7cc7ca782e8e4c668b0647147f24 Mon Sep 17 00:00:00 2001 From: Berker Peksag Date: Thu, 25 Jun 2015 23:38:48 +0300 Subject: [PATCH] Issue #23684: Clarify the return value of the scheme attribute of ParseResult and SplitResult objects. Patch by Martin Panter. --- Doc/library/urllib.parse.rst | 16 ++++++++------ Lib/test/test_urlparse.py | 41 ++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst index 154a521fa1e..fbbabcadf9d 100644 --- a/Doc/library/urllib.parse.rst +++ b/Doc/library/urllib.parse.rst @@ -76,13 +76,15 @@ or on combining URL components into a URL string. ParseResult(scheme='', netloc='', path='help/Python.html', params='', query='', fragment='') - If the *scheme* argument is specified, it gives the default addressing - scheme, to be used only if the URL does not specify one. The default value for - this argument is the empty string. + The *scheme* argument gives the default addressing scheme, to be + used only if the URL does not specify one. It should be the same type + (text or bytes) as *urlstring*, except that the default value ``''`` is + always allowed, and is automatically converted to ``b''`` if appropriate. If the *allow_fragments* argument is false, fragment identifiers are not - recognized and parsed as part of the preceding component. The default value - for this argument is :const:`True`. + recognized. Instead, they are parsed as part of the path, parameters + or query component, and :attr:`fragment` is set to the empty string in + the return value. The return value is actually an instance of a subclass of :class:`tuple`. This class has the following additional read-only convenience attributes: @@ -90,7 +92,7 @@ or on combining URL components into a URL string. +------------------+-------+--------------------------+----------------------+ | Attribute | Index | Value | Value if not present | +==================+=======+==========================+======================+ - | :attr:`scheme` | 0 | URL scheme specifier | empty string | + | :attr:`scheme` | 0 | URL scheme specifier | *scheme* parameter | +------------------+-------+--------------------------+----------------------+ | :attr:`netloc` | 1 | Network location part | empty string | +------------------+-------+--------------------------+----------------------+ @@ -206,7 +208,7 @@ or on combining URL components into a URL string. +------------------+-------+-------------------------+----------------------+ | Attribute | Index | Value | Value if not present | +==================+=======+=========================+======================+ - | :attr:`scheme` | 0 | URL scheme specifier | empty string | + | :attr:`scheme` | 0 | URL scheme specifier | *scheme* parameter | +------------------+-------+-------------------------+----------------------+ | :attr:`netloc` | 1 | Network location part | empty string | +------------------+-------+-------------------------+----------------------+ diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index ad9820bf23e..1775ef33536 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -663,6 +663,47 @@ class UrlParseTestCase(unittest.TestCase): self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"), (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'')) + def test_default_scheme(self): + # Exercise the scheme parameter of urlparse() and urlsplit() + for func in (urllib.parse.urlparse, urllib.parse.urlsplit): + with self.subTest(function=func): + result = func("http://example.net/", "ftp") + self.assertEqual(result.scheme, "http") + result = func(b"http://example.net/", b"ftp") + self.assertEqual(result.scheme, b"http") + self.assertEqual(func("path", "ftp").scheme, "ftp") + self.assertEqual(func("path", scheme="ftp").scheme, "ftp") + self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp") + self.assertEqual(func("path").scheme, "") + self.assertEqual(func(b"path").scheme, b"") + self.assertEqual(func(b"path", "").scheme, b"") + + def test_parse_fragments(self): + # Exercise the allow_fragments parameter of urlparse() and urlsplit() + tests = ( + ("http:#frag", "path"), + ("//example.net#frag", "path"), + ("index.html#frag", "path"), + (";a=b#frag", "params"), + ("?a=b#frag", "query"), + ("#frag", "path"), + ) + for url, attr in tests: + for func in (urllib.parse.urlparse, urllib.parse.urlsplit): + if attr == "params" and func is urllib.parse.urlsplit: + attr = "path" + with self.subTest(url=url, function=func): + result = func(url, allow_fragments=False) + self.assertEqual(result.fragment, "") + self.assertTrue(getattr(result, attr).endswith("#frag")) + self.assertEqual(func(url, "", False).fragment, "") + + result = func(url, allow_fragments=True) + self.assertEqual(result.fragment, "frag") + self.assertFalse(getattr(result, attr).endswith("frag")) + self.assertEqual(func(url, "", True).fragment, "frag") + self.assertEqual(func(url).fragment, "frag") + def test_mixed_types_rejected(self): # Several functions that process either strings or ASCII encoded bytes # accept multiple arguments. Check they reject mixed type input