Implement http://bugs.python.org/issue10155 using And Clover's patch, w/added

docs and support for more client-generated CGI variables.  (This should
complete the WSGI 1.0.1 compliance changes for Python 3.x.)
This commit is contained in:
Phillip J. Eby 2010-11-03 22:39:01 +00:00
parent 3c6830ca8f
commit b6d4a8e4de
5 changed files with 164 additions and 8 deletions

View File

@ -456,6 +456,32 @@ input, output, and error streams.
environment.
.. class:: IISCGIHandler()
A specialized alternative to :class:`CGIHandler`, for use when deploying on
Microsoft's IIS web server, without having set the config allowPathInfo
option (IIS>=7) or metabase allowPathInfoForScriptMappings (IIS<7).
By default, IIS gives a ``PATH_INFO`` that duplicates the ``SCRIPT_NAME`` at
the front, causing problems for WSGI applications that wish to implement
routing. This handler strips any such duplicated path.
IIS can be configured to pass the correct ``PATH_INFO``, but this causes
another bug where ``PATH_TRANSLATED`` is wrong. Luckily this variable is
rarely used and is not guaranteed by WSGI. On IIS<7, though, the
setting can only be made on a vhost level, affecting all other script
mappings, many of which break when exposed to the ``PATH_TRANSLATED`` bug.
For this reason IIS<7 is almost never deployed with the fix. (Even IIS7
rarely uses it because there is still no UI for it.)
There is no way for CGI code to tell whether the option was set, so a
separate handler class is provided. It is used in the same way as
:class:`CGIHandler`, i.e., by calling ``IISCGIHandler().run(app)``, where
``app`` is the WSGI application object you wish to invoke.
.. versionadded:: 3.2
.. class:: BaseCGIHandler(stdin, stdout, stderr, environ, multithread=True, multiprocess=False)
Similar to :class:`CGIHandler`, but instead of using the :mod:`sys` and
@ -696,6 +722,24 @@ input, output, and error streams.
version of the response set to the client. It defaults to ``"1.0"``.
.. function:: read_environ()
Transcode CGI variables from ``os.environ`` to PEP 3333 "bytes in unicode"
strings, returning a new dictionary. This function is used by
:class:`CGIHandler` and :class:`IISCGIHandler` in place of directly using
``os.environ``, which is not necessarily WSGI-compliant on all platforms
and web servers using Python 3 -- specifically, ones where the OS's
actual environment is Unicode (i.e. Windows), or ones where the environment
is bytes, but the system encoding used by Python to decode it is anything
other than ISO-8859-1 (e.g. Unix systems using UTF-8).
If you are implementing a CGI-based handler of your own, you probably want
to use this routine instead of just copying values out of ``os.environ``
directly.
.. versionadded:: 3.2
Examples
--------

View File

@ -131,7 +131,7 @@ class IntegrationTests(TestCase):
def check_hello(self, out, has_length=True):
self.assertEqual(out,
("HTTP/1.0 200 OK\r\n"
"Server: WSGIServer/0.1 Python/"+sys.version.split()[0]+"\r\n"
"Server: WSGIServer/0.2 Python/"+sys.version.split()[0]+"\r\n"
"Content-Type: text/plain\r\n"
"Date: Mon, 05 Jun 2006 18:49:54 GMT\r\n" +
(has_length and "Content-Length: 13\r\n" or "") +
@ -187,7 +187,7 @@ class IntegrationTests(TestCase):
ver = sys.version.split()[0].encode('ascii')
self.assertEqual(
b"HTTP/1.0 200 OK\r\n"
b"Server: WSGIServer/0.1 Python/" + ver + b"\r\n"
b"Server: WSGIServer/0.2 Python/" + ver + b"\r\n"
b"Content-Type: text/plain; charset=utf-8\r\n"
b"Date: Wed, 24 Dec 2008 13:29:32 GMT\r\n"
b"\r\n"

View File

@ -5,7 +5,10 @@ from .headers import Headers
import sys, os, time
__all__ = ['BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler']
__all__ = [
'BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler',
'IISCGIHandler', 'read_environ'
]
# Weekday and month names for HTTP date/time formatting; always English!
_weekdayname = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
@ -19,6 +22,74 @@ def format_date_time(timestamp):
_weekdayname[wd], day, _monthname[month], year, hh, mm, ss
)
_is_request = {
'SCRIPT_NAME', 'PATH_INFO', 'QUERY_STRING', 'REQUEST_METHOD', 'AUTH_TYPE',
'CONTENT_TYPE', 'CONTENT_LENGTH', 'HTTPS', 'REMOTE_USER', 'REMOTE_IDENT',
}.__contains__
def _needs_transcode(k):
return _is_request(k) or k.startswith('HTTP_') or k.startswith('SSL_') \
or (k.startswith('REDIRECT_') and _needs_transcode(k[9:]))
def read_environ():
"""Read environment, fixing HTTP variables"""
enc = sys.getfilesystemencoding()
esc = 'surrogateescape'
try:
''.encode('utf-8', esc)
except LookupError:
esc = 'replace'
environ = {}
# Take the basic environment from native-unicode os.environ. Attempt to
# fix up the variables that come from the HTTP request to compensate for
# the bytes->unicode decoding step that will already have taken place.
for k, v in os.environ.items():
if _needs_transcode(k):
# On win32, the os.environ is natively Unicode. Different servers
# decode the request bytes using different encodings.
if sys.platform == 'win32':
software = os.environ.get('SERVER_SOFTWARE', '').lower()
# On IIS, the HTTP request will be decoded as UTF-8 as long
# as the input is a valid UTF-8 sequence. Otherwise it is
# decoded using the system code page (mbcs), with no way to
# detect this has happened. Because UTF-8 is the more likely
# encoding, and mbcs is inherently unreliable (an mbcs string
# that happens to be valid UTF-8 will not be decoded as mbcs)
# always recreate the original bytes as UTF-8.
if software.startswith('microsoft-iis/'):
v = v.encode('utf-8').decode('iso-8859-1')
# Apache mod_cgi writes bytes-as-unicode (as if ISO-8859-1) direct
# to the Unicode environ. No modification needed.
elif software.startswith('apache/'):
pass
# Python 3's http.server.CGIHTTPRequestHandler decodes
# using the urllib.unquote default of UTF-8, amongst other
# issues.
elif (
software.startswith('simplehttp/')
and 'python/3' in software
):
v = v.encode('utf-8').decode('iso-8859-1')
# For other servers, guess that they have written bytes to
# the environ using stdio byte-oriented interfaces, ending up
# with the system code page.
else:
v = v.encode(enc, 'replace').decode('iso-8859-1')
# Recover bytes from unicode environ, using surrogate escapes
# where available (Python 3.1+).
else:
v = v.encode(enc, esc).decode('iso-8859-1')
environ[k] = v
return environ
class BaseHandler:
"""Manage the invocation of a WSGI application"""
@ -36,7 +107,7 @@ class BaseHandler:
# os_environ is used to supply configuration from the OS environment:
# by default it's a copy of 'os.environ' as of import time, but you can
# override this in e.g. your __init__ method.
os_environ = dict(os.environ.items())
os_environ= read_environ()
# Collaborator classes
wsgi_file_wrapper = FileWrapper # set to None to disable
@ -431,6 +502,42 @@ class CGIHandler(BaseCGIHandler):
def __init__(self):
BaseCGIHandler.__init__(
self, sys.stdin, sys.stdout, sys.stderr, dict(os.environ.items()),
multithread=False, multiprocess=True
self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr,
read_environ(), multithread=False, multiprocess=True
)
class IISCGIHandler(BaseCGIHandler):
"""CGI-based invocation with workaround for IIS path bug
This handler should be used in preference to CGIHandler when deploying on
Microsoft IIS without having set the config allowPathInfo option (IIS>=7)
or metabase allowPathInfoForScriptMappings (IIS<7).
"""
wsgi_run_once = True
os_environ = {}
# By default, IIS gives a PATH_INFO that duplicates the SCRIPT_NAME at
# the front, causing problems for WSGI applications that wish to implement
# routing. This handler strips any such duplicated path.
# IIS can be configured to pass the correct PATH_INFO, but this causes
# another bug where PATH_TRANSLATED is wrong. Luckily this variable is
# rarely used and is not guaranteed by WSGI. On IIS<7, though, the
# setting can only be made on a vhost level, affecting all other script
# mappings, many of which break when exposed to the PATH_TRANSLATED bug.
# For this reason IIS<7 is almost never deployed with the fix. (Even IIS7
# rarely uses it because there is still no UI for it.)
# There is no way for CGI code to tell whether the option was set, so a
# separate handler class is provided.
def __init__(self):
environ= read_environ()
path = environ.get('PATH_INFO', '')
script = environ.get('SCRIPT_NAME', '')
if (path+'/').startswith(script+'/'):
environ['PATH_INFO'] = path[len(script):]
BaseCGIHandler.__init__(
self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr,
environ, multithread=False, multiprocess=True
)

View File

@ -15,7 +15,7 @@ import sys
import urllib.parse
from wsgiref.handlers import SimpleHandler
__version__ = "0.1"
__version__ = "0.2"
__all__ = ['WSGIServer', 'WSGIRequestHandler', 'demo_app', 'make_server']
@ -74,13 +74,14 @@ class WSGIRequestHandler(BaseHTTPRequestHandler):
def get_environ(self):
env = self.server.base_environ.copy()
env['SERVER_PROTOCOL'] = self.request_version
env['SERVER_SOFTWARE'] = self.server_version
env['REQUEST_METHOD'] = self.command
if '?' in self.path:
path,query = self.path.split('?',1)
else:
path,query = self.path,''
env['PATH_INFO'] = urllib.parse.unquote(path)
env['PATH_INFO'] = urllib.parse.unquote_to_bytes(path).decode('iso-8859-1')
env['QUERY_STRING'] = query
host = self.address_string()

View File

@ -59,6 +59,10 @@ Core and Builtins
Library
-------
- Issue #10155: Add IISCGIHandler to wsgiref.handlers to support IIS
CGI environment better, and to correct unicode environment values
for WSGI 1.0.1.
- Issue #10281: nntplib now returns None for absent fields in the OVER/XOVER
response, instead of raising an exception.