mirror of https://github.com/python/cpython
579 lines
21 KiB
Python
579 lines
21 KiB
Python
"""HTTP server base class.
|
|
|
|
Note: the class in this module doesn't implement any HTTP request; see
|
|
SimpleHTTPServer for simple implementations of GET, HEAD and POST
|
|
(including CGI scripts). It does, however, optionally implement HTTP/1.1
|
|
persistent connections, as of version 0.3.
|
|
|
|
Contents:
|
|
|
|
- BaseHTTPRequestHandler: HTTP request handler base class
|
|
- test: test function
|
|
|
|
XXX To do:
|
|
|
|
- log requests even later (to capture byte count)
|
|
- log user-agent header and other interesting goodies
|
|
- send error log to separate file
|
|
"""
|
|
|
|
|
|
# See also:
|
|
#
|
|
# HTTP Working Group T. Berners-Lee
|
|
# INTERNET-DRAFT R. T. Fielding
|
|
# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
|
|
# Expires September 8, 1995 March 8, 1995
|
|
#
|
|
# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
|
|
#
|
|
# and
|
|
#
|
|
# Network Working Group R. Fielding
|
|
# Request for Comments: 2616 et al
|
|
# Obsoletes: 2068 June 1999
|
|
# Category: Standards Track
|
|
#
|
|
# URL: http://www.faqs.org/rfcs/rfc2616.html
|
|
|
|
# Log files
|
|
# ---------
|
|
#
|
|
# Here's a quote from the NCSA httpd docs about log file format.
|
|
#
|
|
# | The logfile format is as follows. Each line consists of:
|
|
# |
|
|
# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
|
|
# |
|
|
# | host: Either the DNS name or the IP number of the remote client
|
|
# | rfc931: Any information returned by identd for this person,
|
|
# | - otherwise.
|
|
# | authuser: If user sent a userid for authentication, the user name,
|
|
# | - otherwise.
|
|
# | DD: Day
|
|
# | Mon: Month (calendar name)
|
|
# | YYYY: Year
|
|
# | hh: hour (24-hour format, the machine's timezone)
|
|
# | mm: minutes
|
|
# | ss: seconds
|
|
# | request: The first line of the HTTP request as sent by the client.
|
|
# | ddd: the status code returned by the server, - if not available.
|
|
# | bbbb: the total number of bytes sent,
|
|
# | *not including the HTTP/1.0 header*, - if not available
|
|
# |
|
|
# | You can determine the name of the file accessed through request.
|
|
#
|
|
# (Actually, the latter is only true if you know the server configuration
|
|
# at the time the request was made!)
|
|
|
|
__version__ = "0.3"
|
|
|
|
__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
|
|
|
|
import sys
|
|
import time
|
|
import socket # For gethostbyaddr()
|
|
import mimetools
|
|
import SocketServer
|
|
|
|
# Default error message
|
|
DEFAULT_ERROR_MESSAGE = """\
|
|
<head>
|
|
<title>Error response</title>
|
|
</head>
|
|
<body>
|
|
<h1>Error response</h1>
|
|
<p>Error code %(code)d.
|
|
<p>Message: %(message)s.
|
|
<p>Error code explanation: %(code)s = %(explain)s.
|
|
</body>
|
|
"""
|
|
|
|
def _quote_html(html):
|
|
return html.replace("&", "&").replace("<", "<").replace(">", ">")
|
|
|
|
class HTTPServer(SocketServer.TCPServer):
|
|
|
|
allow_reuse_address = 1 # Seems to make sense in testing environment
|
|
|
|
def server_bind(self):
|
|
"""Override server_bind to store the server name."""
|
|
SocketServer.TCPServer.server_bind(self)
|
|
host, port = self.socket.getsockname()[:2]
|
|
self.server_name = socket.getfqdn(host)
|
|
self.server_port = port
|
|
|
|
|
|
class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):
|
|
|
|
"""HTTP request handler base class.
|
|
|
|
The following explanation of HTTP serves to guide you through the
|
|
code as well as to expose any misunderstandings I may have about
|
|
HTTP (so you don't need to read the code to figure out I'm wrong
|
|
:-).
|
|
|
|
HTTP (HyperText Transfer Protocol) is an extensible protocol on
|
|
top of a reliable stream transport (e.g. TCP/IP). The protocol
|
|
recognizes three parts to a request:
|
|
|
|
1. One line identifying the request type and path
|
|
2. An optional set of RFC-822-style headers
|
|
3. An optional data part
|
|
|
|
The headers and data are separated by a blank line.
|
|
|
|
The first line of the request has the form
|
|
|
|
<command> <path> <version>
|
|
|
|
where <command> is a (case-sensitive) keyword such as GET or POST,
|
|
<path> is a string containing path information for the request,
|
|
and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
|
|
<path> is encoded using the URL encoding scheme (using %xx to signify
|
|
the ASCII character with hex code xx).
|
|
|
|
The specification specifies that lines are separated by CRLF but
|
|
for compatibility with the widest range of clients recommends
|
|
servers also handle LF. Similarly, whitespace in the request line
|
|
is treated sensibly (allowing multiple spaces between components
|
|
and allowing trailing whitespace).
|
|
|
|
Similarly, for output, lines ought to be separated by CRLF pairs
|
|
but most clients grok LF characters just fine.
|
|
|
|
If the first line of the request has the form
|
|
|
|
<command> <path>
|
|
|
|
(i.e. <version> is left out) then this is assumed to be an HTTP
|
|
0.9 request; this form has no optional headers and data part and
|
|
the reply consists of just the data.
|
|
|
|
The reply form of the HTTP 1.x protocol again has three parts:
|
|
|
|
1. One line giving the response code
|
|
2. An optional set of RFC-822-style headers
|
|
3. The data
|
|
|
|
Again, the headers and data are separated by a blank line.
|
|
|
|
The response code line has the form
|
|
|
|
<version> <responsecode> <responsestring>
|
|
|
|
where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
|
|
<responsecode> is a 3-digit response code indicating success or
|
|
failure of the request, and <responsestring> is an optional
|
|
human-readable string explaining what the response code means.
|
|
|
|
This server parses the request and the headers, and then calls a
|
|
function specific to the request type (<command>). Specifically,
|
|
a request SPAM will be handled by a method do_SPAM(). If no
|
|
such method exists the server sends an error response to the
|
|
client. If it exists, it is called with no arguments:
|
|
|
|
do_SPAM()
|
|
|
|
Note that the request name is case sensitive (i.e. SPAM and spam
|
|
are different requests).
|
|
|
|
The various request details are stored in instance variables:
|
|
|
|
- client_address is the client IP address in the form (host,
|
|
port);
|
|
|
|
- command, path and version are the broken-down request line;
|
|
|
|
- headers is an instance of mimetools.Message (or a derived
|
|
class) containing the header information;
|
|
|
|
- rfile is a file object open for reading positioned at the
|
|
start of the optional input data part;
|
|
|
|
- wfile is a file object open for writing.
|
|
|
|
IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
|
|
|
|
The first thing to be written must be the response line. Then
|
|
follow 0 or more header lines, then a blank line, and then the
|
|
actual data (if any). The meaning of the header lines depends on
|
|
the command executed by the server; in most cases, when data is
|
|
returned, there should be at least one header line of the form
|
|
|
|
Content-type: <type>/<subtype>
|
|
|
|
where <type> and <subtype> should be registered MIME types,
|
|
e.g. "text/html" or "text/plain".
|
|
|
|
"""
|
|
|
|
# The Python system version, truncated to its first component.
|
|
sys_version = "Python/" + sys.version.split()[0]
|
|
|
|
# The server software version. You may want to override this.
|
|
# The format is multiple whitespace-separated strings,
|
|
# where each string is of the form name[/version].
|
|
server_version = "BaseHTTP/" + __version__
|
|
|
|
def parse_request(self):
|
|
"""Parse a request (internal).
|
|
|
|
The request should be stored in self.raw_requestline; the results
|
|
are in self.command, self.path, self.request_version and
|
|
self.headers.
|
|
|
|
Return True for success, False for failure; on failure, an
|
|
error is sent back.
|
|
|
|
"""
|
|
self.command = None # set in case of error on the first line
|
|
self.request_version = version = "HTTP/0.9" # Default
|
|
self.close_connection = 1
|
|
requestline = str(self.raw_requestline, 'iso-8859-1')
|
|
if requestline[-2:] == '\r\n':
|
|
requestline = requestline[:-2]
|
|
elif requestline[-1:] == '\n':
|
|
requestline = requestline[:-1]
|
|
self.requestline = requestline
|
|
words = requestline.split()
|
|
if len(words) == 3:
|
|
[command, path, version] = words
|
|
if version[:5] != 'HTTP/':
|
|
self.send_error(400, "Bad request version (%r)" % version)
|
|
return False
|
|
try:
|
|
base_version_number = version.split('/', 1)[1]
|
|
version_number = base_version_number.split(".")
|
|
# RFC 2145 section 3.1 says there can be only one "." and
|
|
# - major and minor numbers MUST be treated as
|
|
# separate integers;
|
|
# - HTTP/2.4 is a lower version than HTTP/2.13, which in
|
|
# turn is lower than HTTP/12.3;
|
|
# - Leading zeros MUST be ignored by recipients.
|
|
if len(version_number) != 2:
|
|
raise ValueError
|
|
version_number = int(version_number[0]), int(version_number[1])
|
|
except (ValueError, IndexError):
|
|
self.send_error(400, "Bad request version (%r)" % version)
|
|
return False
|
|
if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
|
|
self.close_connection = 0
|
|
if version_number >= (2, 0):
|
|
self.send_error(505,
|
|
"Invalid HTTP Version (%s)" % base_version_number)
|
|
return False
|
|
elif len(words) == 2:
|
|
[command, path] = words
|
|
self.close_connection = 1
|
|
if command != 'GET':
|
|
self.send_error(400,
|
|
"Bad HTTP/0.9 request type (%r)" % command)
|
|
return False
|
|
elif not words:
|
|
return False
|
|
else:
|
|
self.send_error(400, "Bad request syntax (%r)" % requestline)
|
|
return False
|
|
self.command, self.path, self.request_version = command, path, version
|
|
|
|
# Examine the headers and look for a Connection directive
|
|
self.headers = self.MessageClass(self.rfile, 0)
|
|
|
|
conntype = self.headers.get('Connection', "")
|
|
if conntype.lower() == 'close':
|
|
self.close_connection = 1
|
|
elif (conntype.lower() == 'keep-alive' and
|
|
self.protocol_version >= "HTTP/1.1"):
|
|
self.close_connection = 0
|
|
return True
|
|
|
|
def handle_one_request(self):
|
|
"""Handle a single HTTP request.
|
|
|
|
You normally don't need to override this method; see the class
|
|
__doc__ string for information on how to handle specific HTTP
|
|
commands such as GET and POST.
|
|
|
|
"""
|
|
self.raw_requestline = self.rfile.readline()
|
|
if not self.raw_requestline:
|
|
self.close_connection = 1
|
|
return
|
|
if not self.parse_request(): # An error code has been sent, just exit
|
|
return
|
|
mname = 'do_' + self.command
|
|
if not hasattr(self, mname):
|
|
self.send_error(501, "Unsupported method (%r)" % self.command)
|
|
return
|
|
method = getattr(self, mname)
|
|
method()
|
|
|
|
def handle(self):
|
|
"""Handle multiple requests if necessary."""
|
|
self.close_connection = 1
|
|
|
|
self.handle_one_request()
|
|
while not self.close_connection:
|
|
self.handle_one_request()
|
|
|
|
def send_error(self, code, message=None):
|
|
"""Send and log an error reply.
|
|
|
|
Arguments are the error code, and a detailed message.
|
|
The detailed message defaults to the short entry matching the
|
|
response code.
|
|
|
|
This sends an error response (so it must be called before any
|
|
output has been generated), logs the error, and finally sends
|
|
a piece of HTML explaining the error to the user.
|
|
|
|
"""
|
|
|
|
try:
|
|
shortmsg, longmsg = self.responses[code]
|
|
except KeyError:
|
|
shortmsg, longmsg = '???', '???'
|
|
if message is None:
|
|
message = shortmsg
|
|
explain = longmsg
|
|
self.log_error("code %d, message %s", code, message)
|
|
# using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
|
|
content = (self.error_message_format %
|
|
{'code': code, 'message': _quote_html(message), 'explain': explain})
|
|
self.send_response(code, message)
|
|
self.send_header("Content-Type", "text/html")
|
|
self.send_header('Connection', 'close')
|
|
self.end_headers()
|
|
if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
|
|
self.wfile.write(content)
|
|
|
|
error_message_format = DEFAULT_ERROR_MESSAGE
|
|
|
|
def send_response(self, code, message=None):
|
|
"""Send the response header and log the response code.
|
|
|
|
Also send two standard headers with the server software
|
|
version and the current date.
|
|
|
|
"""
|
|
self.log_request(code)
|
|
if message is None:
|
|
if code in self.responses:
|
|
message = self.responses[code][0]
|
|
else:
|
|
message = ''
|
|
if self.request_version != 'HTTP/0.9':
|
|
self.wfile.write("%s %d %s\r\n" %
|
|
(self.protocol_version, code, message))
|
|
# print (self.protocol_version, code, message)
|
|
self.send_header('Server', self.version_string())
|
|
self.send_header('Date', self.date_time_string())
|
|
|
|
def send_header(self, keyword, value):
|
|
"""Send a MIME header."""
|
|
if self.request_version != 'HTTP/0.9':
|
|
self.wfile.write("%s: %s\r\n" % (keyword, value))
|
|
|
|
if keyword.lower() == 'connection':
|
|
if value.lower() == 'close':
|
|
self.close_connection = 1
|
|
elif value.lower() == 'keep-alive':
|
|
self.close_connection = 0
|
|
|
|
def end_headers(self):
|
|
"""Send the blank line ending the MIME headers."""
|
|
if self.request_version != 'HTTP/0.9':
|
|
self.wfile.write("\r\n")
|
|
|
|
def log_request(self, code='-', size='-'):
|
|
"""Log an accepted request.
|
|
|
|
This is called by send_response().
|
|
|
|
"""
|
|
|
|
self.log_message('"%s" %s %s',
|
|
self.requestline, str(code), str(size))
|
|
|
|
def log_error(self, format, *args):
|
|
"""Log an error.
|
|
|
|
This is called when a request cannot be fulfilled. By
|
|
default it passes the message on to log_message().
|
|
|
|
Arguments are the same as for log_message().
|
|
|
|
XXX This should go to the separate error log.
|
|
|
|
"""
|
|
|
|
self.log_message(format, *args)
|
|
|
|
def log_message(self, format, *args):
|
|
"""Log an arbitrary message.
|
|
|
|
This is used by all other logging functions. Override
|
|
it if you have specific logging wishes.
|
|
|
|
The first argument, FORMAT, is a format string for the
|
|
message to be logged. If the format string contains
|
|
any % escapes requiring parameters, they should be
|
|
specified as subsequent arguments (it's just like
|
|
printf!).
|
|
|
|
The client host and current date/time are prefixed to
|
|
every message.
|
|
|
|
"""
|
|
|
|
sys.stderr.write("%s - - [%s] %s\n" %
|
|
(self.address_string(),
|
|
self.log_date_time_string(),
|
|
format%args))
|
|
|
|
def version_string(self):
|
|
"""Return the server software version string."""
|
|
return self.server_version + ' ' + self.sys_version
|
|
|
|
def date_time_string(self, timestamp=None):
|
|
"""Return the current date and time formatted for a message header."""
|
|
if timestamp is None:
|
|
timestamp = time.time()
|
|
year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
|
|
s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
|
|
self.weekdayname[wd],
|
|
day, self.monthname[month], year,
|
|
hh, mm, ss)
|
|
return s
|
|
|
|
def log_date_time_string(self):
|
|
"""Return the current time formatted for logging."""
|
|
now = time.time()
|
|
year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
|
|
s = "%02d/%3s/%04d %02d:%02d:%02d" % (
|
|
day, self.monthname[month], year, hh, mm, ss)
|
|
return s
|
|
|
|
weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
|
|
|
|
monthname = [None,
|
|
'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
|
|
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
|
|
|
|
def address_string(self):
|
|
"""Return the client address formatted for logging.
|
|
|
|
This version looks up the full hostname using gethostbyaddr(),
|
|
and tries to find a name that contains at least one dot.
|
|
|
|
"""
|
|
|
|
host, port = self.client_address[:2]
|
|
return socket.getfqdn(host)
|
|
|
|
# Essentially static class variables
|
|
|
|
# The version of the HTTP protocol we support.
|
|
# Set this to HTTP/1.1 to enable automatic keepalive
|
|
protocol_version = "HTTP/1.0"
|
|
|
|
# The Message-like class used to parse headers
|
|
MessageClass = mimetools.Message
|
|
|
|
# Table mapping response codes to messages; entries have the
|
|
# form {code: (shortmessage, longmessage)}.
|
|
# See RFC 2616.
|
|
responses = {
|
|
100: ('Continue', 'Request received, please continue'),
|
|
101: ('Switching Protocols',
|
|
'Switching to new protocol; obey Upgrade header'),
|
|
|
|
200: ('OK', 'Request fulfilled, document follows'),
|
|
201: ('Created', 'Document created, URL follows'),
|
|
202: ('Accepted',
|
|
'Request accepted, processing continues off-line'),
|
|
203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
|
|
204: ('No Content', 'Request fulfilled, nothing follows'),
|
|
205: ('Reset Content', 'Clear input form for further input.'),
|
|
206: ('Partial Content', 'Partial content follows.'),
|
|
|
|
300: ('Multiple Choices',
|
|
'Object has several resources -- see URI list'),
|
|
301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
|
|
302: ('Found', 'Object moved temporarily -- see URI list'),
|
|
303: ('See Other', 'Object moved -- see Method and URL list'),
|
|
304: ('Not Modified',
|
|
'Document has not changed since given time'),
|
|
305: ('Use Proxy',
|
|
'You must use proxy specified in Location to access this '
|
|
'resource.'),
|
|
307: ('Temporary Redirect',
|
|
'Object moved temporarily -- see URI list'),
|
|
|
|
400: ('Bad Request',
|
|
'Bad request syntax or unsupported method'),
|
|
401: ('Unauthorized',
|
|
'No permission -- see authorization schemes'),
|
|
402: ('Payment Required',
|
|
'No payment -- see charging schemes'),
|
|
403: ('Forbidden',
|
|
'Request forbidden -- authorization will not help'),
|
|
404: ('Not Found', 'Nothing matches the given URI'),
|
|
405: ('Method Not Allowed',
|
|
'Specified method is invalid for this server.'),
|
|
406: ('Not Acceptable', 'URI not available in preferred format.'),
|
|
407: ('Proxy Authentication Required', 'You must authenticate with '
|
|
'this proxy before proceeding.'),
|
|
408: ('Request Timeout', 'Request timed out; try again later.'),
|
|
409: ('Conflict', 'Request conflict.'),
|
|
410: ('Gone',
|
|
'URI no longer exists and has been permanently removed.'),
|
|
411: ('Length Required', 'Client must specify Content-Length.'),
|
|
412: ('Precondition Failed', 'Precondition in headers is false.'),
|
|
413: ('Request Entity Too Large', 'Entity is too large.'),
|
|
414: ('Request-URI Too Long', 'URI is too long.'),
|
|
415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
|
|
416: ('Requested Range Not Satisfiable',
|
|
'Cannot satisfy request range.'),
|
|
417: ('Expectation Failed',
|
|
'Expect condition could not be satisfied.'),
|
|
|
|
500: ('Internal Server Error', 'Server got itself in trouble'),
|
|
501: ('Not Implemented',
|
|
'Server does not support this operation'),
|
|
502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
|
|
503: ('Service Unavailable',
|
|
'The server cannot process the request due to a high load'),
|
|
504: ('Gateway Timeout',
|
|
'The gateway server did not receive a timely response'),
|
|
505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
|
|
}
|
|
|
|
|
|
def test(HandlerClass = BaseHTTPRequestHandler,
|
|
ServerClass = HTTPServer, protocol="HTTP/1.0"):
|
|
"""Test the HTTP request handler class.
|
|
|
|
This runs an HTTP server on port 8000 (or the first command line
|
|
argument).
|
|
|
|
"""
|
|
|
|
if sys.argv[1:]:
|
|
port = int(sys.argv[1])
|
|
else:
|
|
port = 8000
|
|
server_address = ('', port)
|
|
|
|
HandlerClass.protocol_version = protocol
|
|
httpd = ServerClass(server_address, HandlerClass)
|
|
|
|
sa = httpd.socket.getsockname()
|
|
print("Serving HTTP on", sa[0], "port", sa[1], "...")
|
|
httpd.serve_forever()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
test()
|