Issues #25232, #24657: Merge two CGI server fixes from 3.4 into 3.5

2015-10-03 06:03:25 +00:00 · 2015-10-03 06:03:25 +00:00 · 56b76d25dd
parent 103d06c81e cb29e8c0e5
commit 56b76d25dd
4 changed files with 48 additions and 9 deletions
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@ -837,13 +837,15 @@ def _url_collapse_path(path):
    The utility of this function is limited to is_cgi method and helps
    preventing some security attacks.
-    Returns: A tuple of (head, tail) where tail is everything after the final /
+    Returns: The reconstituted URL, which will always start with a '/'.
    and head is everything before it.  Head will always start with a '/' and,
    if it contains anything else, never have a trailing '/'.
    Raises: IndexError if too many '..' occur within the path.
    """
    # Query component should not be involved.
    path, _, query = path.partition('?')
    path = urllib.parse.unquote(path)
    # Similar to os.path.split(os.path.normpath(path)) but specific to URL
    # path semantics rather than local operating system semantics.
    path_parts = path.split('/')
@ -864,6 +866,9 @@ def _url_collapse_path(path):
    else:
        tail_part = ''
    if query:
        tail_part = '?'.join((tail_part, query))
    splitpath = ('/' + '/'.join(head_parts), tail_part)
    collapsed_path = "/".join(splitpath)
@ -947,7 +952,7 @@ class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
        (and the next character is a '/' or the end of the string).
        """
-        collapsed_path = _url_collapse_path(urllib.parse.unquote(self.path))
+        collapsed_path = _url_collapse_path(self.path)
        dir_sep = collapsed_path.find('/', 1)
        head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
        if head in self.cgi_directories:
@ -984,11 +989,7 @@ class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
                break
        # find an explicit query string, if present.
-        i = rest.rfind('?')
+        rest, _, query = rest.partition('?')
        if i >= 0:
            rest, query = rest[:i], rest[i+1:]
        else:
            query = ''
        # dissect the part after the directory name into a script name &
        # a possible additional path, to be stored in PATH_INFO.
--- a/Lib/test/test_httpservers.py
+++ b/Lib/test/test_httpservers.py
@ -425,6 +425,16 @@ print("%%s, %%s, %%s" %% (form.getfirst("spam"), form.getfirst("eggs"),
                          form.getfirst("bacon")))
 """
 cgi_file4 = """\
 #!%s
 import os
 print("Content-type: text/html")
 print()
 print(os.environ["%s"])
 """
@unittest.skipIf(hasattr(os, 'geteuid') and os.geteuid() == 0,
        "This test can't be run reliably as root (issue #13308).")
@ -446,6 +456,7 @@ class CGIHTTPServerTestCase(BaseTestCase):
        self.file1_path = None
        self.file2_path = None
        self.file3_path = None
        self.file4_path = None
        # The shebang line should be pure ASCII: use symlink if possible.
        # See issue #7668.
@ -484,6 +495,11 @@ class CGIHTTPServerTestCase(BaseTestCase):
            file3.write(cgi_file1 % self.pythonexe)
        os.chmod(self.file3_path, 0o777)
        self.file4_path = os.path.join(self.cgi_dir, 'file4.py')
        with open(self.file4_path, 'w', encoding='utf-8') as file4:
            file4.write(cgi_file4 % (self.pythonexe, 'QUERY_STRING'))
        os.chmod(self.file4_path, 0o777)
        os.chdir(self.parent_dir)
    def tearDown(self):
@ -499,6 +515,8 @@ class CGIHTTPServerTestCase(BaseTestCase):
                os.remove(self.file2_path)
            if self.file3_path:
                os.remove(self.file3_path)
            if self.file4_path:
                os.remove(self.file4_path)
            os.rmdir(self.cgi_child_dir)
            os.rmdir(self.cgi_dir)
            os.rmdir(self.parent_dir)
@ -606,6 +624,19 @@ class CGIHTTPServerTestCase(BaseTestCase):
            (b'Hello World' + self.linesep, 'text/html', HTTPStatus.OK),
            (res.read(), res.getheader('Content-type'), res.status))
    def test_query_with_multiple_question_mark(self):
        res = self.request('/cgi-bin/file4.py?a=b?c=d')
        self.assertEqual(
            (b'a=b?c=d' + self.linesep, 'text/html', 200),
            (res.read(), res.getheader('Content-type'), res.status))
    def test_query_with_continuous_slashes(self):
        res = self.request('/cgi-bin/file4.py?k=aa%2F%2Fbb&//q//p//=//a//b//')
        self.assertEqual(
            (b'k=aa%2F%2Fbb&//q//p//=//a//b//' + self.linesep,
             'text/html', 200),
            (res.read(), res.getheader('Content-type'), res.status))
 class SocketlessRequestHandler(SimpleHTTPRequestHandler):
    def __init__(self):
--- a/Misc/ACKS
+++ b/Misc/ACKS
@ -1580,6 +1580,7 @@ Daniel Wozniak
 Wei Wu
 Heiko Wundram
 Doug Wyatt
 Xiang Zhang
 Robert Xiao
 Florent Xicluna
 Hirokazu Yamamoto
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -34,6 +34,12 @@ Core and Builtins
 Library
 -------
 - Issue #25232: Fix CGIRequestHandler to split the query from the URL at the
  first question mark (?) rather than the last. Patch from Xiang Zhang.
 - Issue #24657: Prevent CGIRequestHandler from collapsing slashes in the
  query part of the URL as if it were a path. Patch from Xiang Zhang.
 - Issue #24483: C implementation of functools.lru_cache() now calculates key's
  hash only once.