3.2 - Issue 10484 - Incorporate improvements to CGI module - Suggested by Glenn Linderman. Refactor code and tests

This commit is contained in:
Senthil Kumaran 2012-04-12 02:34:32 +08:00
parent f6cd9b2d46
commit d70846b1b1
2 changed files with 56 additions and 51 deletions

View File

@ -825,44 +825,47 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
# Utilities for CGIHTTPRequestHandler # Utilities for CGIHTTPRequestHandler
# TODO(gregory.p.smith): Move this into an appropriate library. def _url_collapse_path(path):
def _url_collapse_path_split(path):
""" """
Given a URL path, remove extra '/'s and '.' path elements and collapse Given a URL path, remove extra '/'s and '.' path elements and collapse
any '..' references. any '..' references and returns a colllapsed path.
Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
The utility of this function is limited to is_cgi method and helps
preventing some security attacks.
Returns: A tuple of (head, tail) where tail is everything after the final / Returns: A tuple of (head, tail) where tail is everything after the final /
and head is everything before it. Head will always start with a '/' and, and head is everything before it. Head will always start with a '/' and,
if it contains anything else, never have a trailing '/'. if it contains anything else, never have a trailing '/'.
Raises: IndexError if too many '..' occur within the path. Raises: IndexError if too many '..' occur within the path.
""" """
# Similar to os.path.split(os.path.normpath(path)) but specific to URL # Similar to os.path.split(os.path.normpath(path)) but specific to URL
# path semantics rather than local operating system semantics. # path semantics rather than local operating system semantics.
path_parts = [] path_parts = path.split('/')
for part in path.split('/'): head_parts = []
if part == '.': for part in path_parts[:-1]:
path_parts.append('') if part == '..':
else: head_parts.pop() # IndexError if more '..' than prior parts
path_parts.append(part) elif part and part != '.':
# Filter out blank non trailing parts before consuming the '..'. head_parts.append( part )
path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:]
if path_parts: if path_parts:
tail_part = path_parts.pop() tail_part = path_parts.pop()
if tail_part:
if tail_part == '..':
head_parts.pop()
tail_part = ''
elif tail_part == '.':
tail_part = ''
else: else:
tail_part = '' tail_part = ''
head_parts = []
for part in path_parts: splitpath = ('/' + '/'.join(head_parts), tail_part)
if part == '..': collapsed_path = "/".join(splitpath)
head_parts.pop()
else: return collapsed_path
head_parts.append(part)
if tail_part and tail_part == '..':
head_parts.pop()
tail_part = ''
return ('/' + '/'.join(head_parts), tail_part)
nobody = None nobody = None
@ -943,16 +946,15 @@ class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
(and the next character is a '/' or the end of the string). (and the next character is a '/' or the end of the string).
""" """
collapsed_path = _url_collapse_path(self.path)
splitpath = _url_collapse_path_split(self.path) dir_sep = collapsed_path.find('/', 1)
joined_path = '/'.join(splitpath) head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
dir_sep = joined_path.find('/',1)
head, tail = joined_path[:dir_sep], joined_path[dir_sep+1:]
if head in self.cgi_directories: if head in self.cgi_directories:
self.cgi_info = head, tail self.cgi_info = head, tail
return True return True
return False return False
cgi_directories = ['/cgi-bin', '/htbin'] cgi_directories = ['/cgi-bin', '/htbin']
def is_executable(self, path): def is_executable(self, path):

View File

@ -366,41 +366,44 @@ class CGIHTTPServerTestCase(BaseTestCase):
finally: finally:
BaseTestCase.tearDown(self) BaseTestCase.tearDown(self)
def test_url_collapse_path_split(self): def test_url_collapse_path(self):
# verify tail is the last portion and head is the rest on proper urls
test_vectors = { test_vectors = {
'': ('/', ''), '': '//',
'..': IndexError, '..': IndexError,
'/.//..': IndexError, '/.//..': IndexError,
'/': ('/', ''), '/': '//',
'//': ('/', ''), '//': '//',
'/\\': ('/', '\\'), '/\\': '//\\',
'/.//': ('/', ''), '/.//': '//',
'cgi-bin/file1.py': ('/cgi-bin', 'file1.py'), 'cgi-bin/file1.py': '/cgi-bin/file1.py',
'/cgi-bin/file1.py': ('/cgi-bin', 'file1.py'), '/cgi-bin/file1.py': '/cgi-bin/file1.py',
'a': ('/', 'a'), 'a': '//a',
'/a': ('/', 'a'), '/a': '//a',
'//a': ('/', 'a'), '//a': '//a',
'./a': ('/', 'a'), './a': '//a',
'./C:/': ('/C:', ''), './C:/': '/C:/',
'/a/b': ('/a', 'b'), '/a/b': '/a/b',
'/a/b/': ('/a/b', ''), '/a/b/': '/a/b/',
'/a/b/c/..': ('/a/b', ''), '/a/b/.': '/a/b/',
'/a/b/c/../d': ('/a/b', 'd'), '/a/b/c/..': '/a/b/',
'/a/b/c/../d/e/../f': ('/a/b/d', 'f'), '/a/b/c/../d': '/a/b/d',
'/a/b/c/../d/e/../../f': ('/a/b', 'f'), '/a/b/c/../d/e/../f': '/a/b/d/f',
'/a/b/c/../d/e/.././././..//f': ('/a/b', 'f'), '/a/b/c/../d/e/../../f': '/a/b/f',
'/a/b/c/../d/e/.././././..//f': '/a/b/f',
'../a/b/c/../d/e/.././././..//f': IndexError, '../a/b/c/../d/e/.././././..//f': IndexError,
'/a/b/c/../d/e/../../../f': ('/a', 'f'), '/a/b/c/../d/e/../../../f': '/a/f',
'/a/b/c/../d/e/../../../../f': ('/', 'f'), '/a/b/c/../d/e/../../../../f': '//f',
'/a/b/c/../d/e/../../../../../f': IndexError, '/a/b/c/../d/e/../../../../../f': IndexError,
'/a/b/c/../d/e/../../../../f/..': ('/', ''), '/a/b/c/../d/e/../../../../f/..': '//',
'/a/b/c/../d/e/../../../../f/../.': '//',
} }
for path, expected in test_vectors.items(): for path, expected in test_vectors.items():
if isinstance(expected, type) and issubclass(expected, Exception): if isinstance(expected, type) and issubclass(expected, Exception):
self.assertRaises(expected, self.assertRaises(expected,
server._url_collapse_path_split, path) server._url_collapse_path, path)
else: else:
actual = server._url_collapse_path_split(path) actual = server._url_collapse_path(path)
self.assertEqual(expected, actual, self.assertEqual(expected, actual,
msg='path = %r\nGot: %r\nWanted: %r' % msg='path = %r\nGot: %r\nWanted: %r' %
(path, actual, expected)) (path, actual, expected))