From b6d4a8e4de715d7a7f1fa009f5b636643e76e292 Mon Sep 17 00:00:00 2001 From: "Phillip J. Eby" Date: Wed, 3 Nov 2010 22:39:01 +0000 Subject: [PATCH] Implement http://bugs.python.org/issue10155 using And Clover's patch, w/added docs and support for more client-generated CGI variables. (This should complete the WSGI 1.0.1 compliance changes for Python 3.x.) --- Doc/library/wsgiref.rst | 44 ++++++++++++++ Lib/test/test_wsgiref.py | 4 +- Lib/wsgiref/handlers.py | 115 +++++++++++++++++++++++++++++++++-- Lib/wsgiref/simple_server.py | 5 +- Misc/NEWS | 4 ++ 5 files changed, 164 insertions(+), 8 deletions(-) diff --git a/Doc/library/wsgiref.rst b/Doc/library/wsgiref.rst index ceacbacdeaa..385652bd78e 100644 --- a/Doc/library/wsgiref.rst +++ b/Doc/library/wsgiref.rst @@ -456,6 +456,32 @@ input, output, and error streams. environment. +.. class:: IISCGIHandler() + + A specialized alternative to :class:`CGIHandler`, for use when deploying on + Microsoft's IIS web server, without having set the config allowPathInfo + option (IIS>=7) or metabase allowPathInfoForScriptMappings (IIS<7). + + By default, IIS gives a ``PATH_INFO`` that duplicates the ``SCRIPT_NAME`` at + the front, causing problems for WSGI applications that wish to implement + routing. This handler strips any such duplicated path. + + IIS can be configured to pass the correct ``PATH_INFO``, but this causes + another bug where ``PATH_TRANSLATED`` is wrong. Luckily this variable is + rarely used and is not guaranteed by WSGI. On IIS<7, though, the + setting can only be made on a vhost level, affecting all other script + mappings, many of which break when exposed to the ``PATH_TRANSLATED`` bug. + For this reason IIS<7 is almost never deployed with the fix. (Even IIS7 + rarely uses it because there is still no UI for it.) + + There is no way for CGI code to tell whether the option was set, so a + separate handler class is provided. It is used in the same way as + :class:`CGIHandler`, i.e., by calling ``IISCGIHandler().run(app)``, where + ``app`` is the WSGI application object you wish to invoke. + + .. versionadded:: 3.2 + + .. class:: BaseCGIHandler(stdin, stdout, stderr, environ, multithread=True, multiprocess=False) Similar to :class:`CGIHandler`, but instead of using the :mod:`sys` and @@ -696,6 +722,24 @@ input, output, and error streams. version of the response set to the client. It defaults to ``"1.0"``. +.. function:: read_environ() + + Transcode CGI variables from ``os.environ`` to PEP 3333 "bytes in unicode" + strings, returning a new dictionary. This function is used by + :class:`CGIHandler` and :class:`IISCGIHandler` in place of directly using + ``os.environ``, which is not necessarily WSGI-compliant on all platforms + and web servers using Python 3 -- specifically, ones where the OS's + actual environment is Unicode (i.e. Windows), or ones where the environment + is bytes, but the system encoding used by Python to decode it is anything + other than ISO-8859-1 (e.g. Unix systems using UTF-8). + + If you are implementing a CGI-based handler of your own, you probably want + to use this routine instead of just copying values out of ``os.environ`` + directly. + + .. versionadded:: 3.2 + + Examples -------- diff --git a/Lib/test/test_wsgiref.py b/Lib/test/test_wsgiref.py index 49d372d6c66..8051b4a0813 100644 --- a/Lib/test/test_wsgiref.py +++ b/Lib/test/test_wsgiref.py @@ -131,7 +131,7 @@ class IntegrationTests(TestCase): def check_hello(self, out, has_length=True): self.assertEqual(out, ("HTTP/1.0 200 OK\r\n" - "Server: WSGIServer/0.1 Python/"+sys.version.split()[0]+"\r\n" + "Server: WSGIServer/0.2 Python/"+sys.version.split()[0]+"\r\n" "Content-Type: text/plain\r\n" "Date: Mon, 05 Jun 2006 18:49:54 GMT\r\n" + (has_length and "Content-Length: 13\r\n" or "") + @@ -187,7 +187,7 @@ class IntegrationTests(TestCase): ver = sys.version.split()[0].encode('ascii') self.assertEqual( b"HTTP/1.0 200 OK\r\n" - b"Server: WSGIServer/0.1 Python/" + ver + b"\r\n" + b"Server: WSGIServer/0.2 Python/" + ver + b"\r\n" b"Content-Type: text/plain; charset=utf-8\r\n" b"Date: Wed, 24 Dec 2008 13:29:32 GMT\r\n" b"\r\n" diff --git a/Lib/wsgiref/handlers.py b/Lib/wsgiref/handlers.py index 3e112190959..6d6f80ffd7d 100644 --- a/Lib/wsgiref/handlers.py +++ b/Lib/wsgiref/handlers.py @@ -5,7 +5,10 @@ from .headers import Headers import sys, os, time -__all__ = ['BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler'] +__all__ = [ + 'BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler', + 'IISCGIHandler', 'read_environ' +] # Weekday and month names for HTTP date/time formatting; always English! _weekdayname = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] @@ -19,6 +22,74 @@ def format_date_time(timestamp): _weekdayname[wd], day, _monthname[month], year, hh, mm, ss ) +_is_request = { + 'SCRIPT_NAME', 'PATH_INFO', 'QUERY_STRING', 'REQUEST_METHOD', 'AUTH_TYPE', + 'CONTENT_TYPE', 'CONTENT_LENGTH', 'HTTPS', 'REMOTE_USER', 'REMOTE_IDENT', +}.__contains__ + +def _needs_transcode(k): + return _is_request(k) or k.startswith('HTTP_') or k.startswith('SSL_') \ + or (k.startswith('REDIRECT_') and _needs_transcode(k[9:])) + +def read_environ(): + """Read environment, fixing HTTP variables""" + enc = sys.getfilesystemencoding() + esc = 'surrogateescape' + try: + ''.encode('utf-8', esc) + except LookupError: + esc = 'replace' + environ = {} + + # Take the basic environment from native-unicode os.environ. Attempt to + # fix up the variables that come from the HTTP request to compensate for + # the bytes->unicode decoding step that will already have taken place. + for k, v in os.environ.items(): + if _needs_transcode(k): + + # On win32, the os.environ is natively Unicode. Different servers + # decode the request bytes using different encodings. + if sys.platform == 'win32': + software = os.environ.get('SERVER_SOFTWARE', '').lower() + + # On IIS, the HTTP request will be decoded as UTF-8 as long + # as the input is a valid UTF-8 sequence. Otherwise it is + # decoded using the system code page (mbcs), with no way to + # detect this has happened. Because UTF-8 is the more likely + # encoding, and mbcs is inherently unreliable (an mbcs string + # that happens to be valid UTF-8 will not be decoded as mbcs) + # always recreate the original bytes as UTF-8. + if software.startswith('microsoft-iis/'): + v = v.encode('utf-8').decode('iso-8859-1') + + # Apache mod_cgi writes bytes-as-unicode (as if ISO-8859-1) direct + # to the Unicode environ. No modification needed. + elif software.startswith('apache/'): + pass + + # Python 3's http.server.CGIHTTPRequestHandler decodes + # using the urllib.unquote default of UTF-8, amongst other + # issues. + elif ( + software.startswith('simplehttp/') + and 'python/3' in software + ): + v = v.encode('utf-8').decode('iso-8859-1') + + # For other servers, guess that they have written bytes to + # the environ using stdio byte-oriented interfaces, ending up + # with the system code page. + else: + v = v.encode(enc, 'replace').decode('iso-8859-1') + + # Recover bytes from unicode environ, using surrogate escapes + # where available (Python 3.1+). + else: + v = v.encode(enc, esc).decode('iso-8859-1') + + environ[k] = v + return environ + class BaseHandler: """Manage the invocation of a WSGI application""" @@ -36,7 +107,7 @@ class BaseHandler: # os_environ is used to supply configuration from the OS environment: # by default it's a copy of 'os.environ' as of import time, but you can # override this in e.g. your __init__ method. - os_environ = dict(os.environ.items()) + os_environ= read_environ() # Collaborator classes wsgi_file_wrapper = FileWrapper # set to None to disable @@ -431,6 +502,42 @@ class CGIHandler(BaseCGIHandler): def __init__(self): BaseCGIHandler.__init__( - self, sys.stdin, sys.stdout, sys.stderr, dict(os.environ.items()), - multithread=False, multiprocess=True + self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr, + read_environ(), multithread=False, multiprocess=True + ) + + +class IISCGIHandler(BaseCGIHandler): + """CGI-based invocation with workaround for IIS path bug + + This handler should be used in preference to CGIHandler when deploying on + Microsoft IIS without having set the config allowPathInfo option (IIS>=7) + or metabase allowPathInfoForScriptMappings (IIS<7). + """ + wsgi_run_once = True + os_environ = {} + + # By default, IIS gives a PATH_INFO that duplicates the SCRIPT_NAME at + # the front, causing problems for WSGI applications that wish to implement + # routing. This handler strips any such duplicated path. + + # IIS can be configured to pass the correct PATH_INFO, but this causes + # another bug where PATH_TRANSLATED is wrong. Luckily this variable is + # rarely used and is not guaranteed by WSGI. On IIS<7, though, the + # setting can only be made on a vhost level, affecting all other script + # mappings, many of which break when exposed to the PATH_TRANSLATED bug. + # For this reason IIS<7 is almost never deployed with the fix. (Even IIS7 + # rarely uses it because there is still no UI for it.) + + # There is no way for CGI code to tell whether the option was set, so a + # separate handler class is provided. + def __init__(self): + environ= read_environ() + path = environ.get('PATH_INFO', '') + script = environ.get('SCRIPT_NAME', '') + if (path+'/').startswith(script+'/'): + environ['PATH_INFO'] = path[len(script):] + BaseCGIHandler.__init__( + self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr, + environ, multithread=False, multiprocess=True ) diff --git a/Lib/wsgiref/simple_server.py b/Lib/wsgiref/simple_server.py index 550f4d86a92..af82f953c53 100644 --- a/Lib/wsgiref/simple_server.py +++ b/Lib/wsgiref/simple_server.py @@ -15,7 +15,7 @@ import sys import urllib.parse from wsgiref.handlers import SimpleHandler -__version__ = "0.1" +__version__ = "0.2" __all__ = ['WSGIServer', 'WSGIRequestHandler', 'demo_app', 'make_server'] @@ -74,13 +74,14 @@ class WSGIRequestHandler(BaseHTTPRequestHandler): def get_environ(self): env = self.server.base_environ.copy() env['SERVER_PROTOCOL'] = self.request_version + env['SERVER_SOFTWARE'] = self.server_version env['REQUEST_METHOD'] = self.command if '?' in self.path: path,query = self.path.split('?',1) else: path,query = self.path,'' - env['PATH_INFO'] = urllib.parse.unquote(path) + env['PATH_INFO'] = urllib.parse.unquote_to_bytes(path).decode('iso-8859-1') env['QUERY_STRING'] = query host = self.address_string() diff --git a/Misc/NEWS b/Misc/NEWS index ba5ba384e8c..2b813712d96 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -59,6 +59,10 @@ Core and Builtins Library ------- +- Issue #10155: Add IISCGIHandler to wsgiref.handlers to support IIS + CGI environment better, and to correct unicode environment values + for WSGI 1.0.1. + - Issue #10281: nntplib now returns None for absent fields in the OVER/XOVER response, instead of raising an exception.