Initial revision
This commit is contained in:
parent
40d1ea3b9c
commit
e7e578ffe0
|
@ -0,0 +1,482 @@
|
|||
"""HTTP server base class.
|
||||
|
||||
Note: the class in this module doesn't implement any HTTP request; see
|
||||
SimpleHTTPServer for simple implementations of GET, HEAD and POST
|
||||
(including CGI scripts).
|
||||
|
||||
Contents:
|
||||
|
||||
- BaseHTTPRequestHandler: HTTP request handler base class
|
||||
- test: test function
|
||||
|
||||
XXX To do:
|
||||
|
||||
- send server version
|
||||
- log requests even later (to capture byte count)
|
||||
- log user-agent header and other interesting goodies
|
||||
- send error log to separate file
|
||||
- are request names really case sensitive?
|
||||
|
||||
"""
|
||||
|
||||
|
||||
# See also:
|
||||
#
|
||||
# HTTP Working Group T. Berners-Lee
|
||||
# INTERNET-DRAFT R. T. Fielding
|
||||
# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
|
||||
# Expires September 8, 1995 March 8, 1995
|
||||
#
|
||||
# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
|
||||
|
||||
|
||||
# Log files
|
||||
# ---------
|
||||
#
|
||||
# Here's a quote from the NCSA httpd docs about log file format.
|
||||
#
|
||||
# | The logfile format is as follows. Each line consists of:
|
||||
# |
|
||||
# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
|
||||
# |
|
||||
# | host: Either the DNS name or the IP number of the remote client
|
||||
# | rfc931: Any information returned by identd for this person,
|
||||
# | - otherwise.
|
||||
# | authuser: If user sent a userid for authentication, the user name,
|
||||
# | - otherwise.
|
||||
# | DD: Day
|
||||
# | Mon: Month (calendar name)
|
||||
# | YYYY: Year
|
||||
# | hh: hour (24-hour format, the machine's timezone)
|
||||
# | mm: minutes
|
||||
# | ss: seconds
|
||||
# | request: The first line of the HTTP request as sent by the client.
|
||||
# | ddd: the status code returned by the server, - if not available.
|
||||
# | bbbb: the total number of bytes sent,
|
||||
# | *not including the HTTP/1.0 header*, - if not available
|
||||
# |
|
||||
# | You can determine the name of the file accessed through request.
|
||||
#
|
||||
# (Actually, the latter is only true if you know the server configuration
|
||||
# at the time the request was made!)
|
||||
|
||||
|
||||
__version__ = "0.2"
|
||||
|
||||
|
||||
import sys
|
||||
import time
|
||||
import socket # For gethostbyaddr()
|
||||
import string
|
||||
import rfc822
|
||||
import mimetools
|
||||
import SocketServer
|
||||
|
||||
# Default error message
|
||||
DEFAULT_ERROR_MESSAGE = """\
|
||||
<head>
|
||||
<title>Error response</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Error response</h1>
|
||||
<p>Error code %(code)d.
|
||||
<p>Message: %(message)s.
|
||||
<p>Error code explanation: %(code)s = %(explain)s.
|
||||
</body>
|
||||
"""
|
||||
|
||||
|
||||
class HTTPServer(SocketServer.TCPServer):
|
||||
|
||||
def server_bind(self):
|
||||
"""Override server_bind to store the server name."""
|
||||
SocketServer.TCPServer.server_bind(self)
|
||||
host, port = self.socket.getsockname()
|
||||
if not host or host == '0.0.0.0':
|
||||
host = socket.gethostname()
|
||||
hostname, hostnames, hostaddrs = socket.gethostbyaddr(host)
|
||||
if '.' not in hostname:
|
||||
for host in hostnames:
|
||||
if '.' in host:
|
||||
hostname = host
|
||||
break
|
||||
self.server_name = hostname
|
||||
self.server_port = port
|
||||
|
||||
|
||||
class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):
|
||||
|
||||
"""HTTP request handler base class.
|
||||
|
||||
The following explanation of HTTP serves to guide you through the
|
||||
code as well as to expose any misunderstandings I may have about
|
||||
HTTP (so you don't need to read the code to figure out I'm wrong
|
||||
:-).
|
||||
|
||||
HTTP (HyperText Transfer Protocol) is an extensible protocol on
|
||||
top of a reliable stream transport (e.g. TCP/IP). The protocol
|
||||
recognizes three parts to a request:
|
||||
|
||||
1. One line identifying the request type and path
|
||||
2. An optional set of RFC-822-style headers
|
||||
3. An optional data part
|
||||
|
||||
The headers and data are separated by a blank line.
|
||||
|
||||
The first line of the request has the form
|
||||
|
||||
<command> <path> <version>
|
||||
|
||||
where <command> is a (case-sensitive) keyword such as GET or POST,
|
||||
<path> is a string containing path information for the request,
|
||||
and <version> should be the string "HTTP/1.0". <path> is encoded
|
||||
using the URL encoding scheme (using %xx to signify the ASCII
|
||||
character with hex code xx).
|
||||
|
||||
The protocol is vague about whether lines are separated by LF
|
||||
characters or by CRLF pairs -- for compatibility with the widest
|
||||
range of clients, both should be accepted. Similarly, whitespace
|
||||
in the request line should be treated sensibly (allowing multiple
|
||||
spaces between components and allowing trailing whitespace).
|
||||
|
||||
Similarly, for output, lines ought to be separated by CRLF pairs
|
||||
but most clients grok LF characters just fine.
|
||||
|
||||
If the first line of the request has the form
|
||||
|
||||
<command> <path>
|
||||
|
||||
(i.e. <version> is left out) then this is assumed to be an HTTP
|
||||
0.9 request; this form has no optional headers and data part and
|
||||
the reply consists of just the data.
|
||||
|
||||
The reply form of the HTTP 1.0 protocol again has three parts:
|
||||
|
||||
1. One line giving the response code
|
||||
2. An optional set of RFC-822-style headers
|
||||
3. The data
|
||||
|
||||
Again, the headers and data are separated by a blank line.
|
||||
|
||||
The response code line has the form
|
||||
|
||||
<version> <responsecode> <responsestring>
|
||||
|
||||
where <version> is the protocol version (always "HTTP/1.0"),
|
||||
<responsecode> is a 3-digit response code indicating success or
|
||||
failure of the request, and <responsestring> is an optional
|
||||
human-readable string explaining what the response code means.
|
||||
|
||||
This server parses the request and the headers, and then calls a
|
||||
function specific to the request type (<command>). Specifically,
|
||||
a request SPAM will be handled by a method handle_SPAM(). If no
|
||||
such method exists the server sends an error response to the
|
||||
client. If it exists, it is called with no arguments:
|
||||
|
||||
do_SPAM()
|
||||
|
||||
Note that the request name is case sensitive (i.e. SPAM and spam
|
||||
are different requests).
|
||||
|
||||
The various request details are stored in instance variables:
|
||||
|
||||
- client_address is the client IP address in the form (host,
|
||||
port);
|
||||
|
||||
- command, path and version are the broken-down request line;
|
||||
|
||||
- headers is an instance of mimetools.Message (or a derived
|
||||
class) containing the header information;
|
||||
|
||||
- rfile is a file object open for reading positioned at the
|
||||
start of the optional input data part;
|
||||
|
||||
- wfile is a file object open for writing.
|
||||
|
||||
IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
|
||||
|
||||
The first thing to be written must be the response line. Then
|
||||
follow 0 or more header lines, then a blank line, and then the
|
||||
actual data (if any). The meaning of the header lines depends on
|
||||
the command executed by the server; in most cases, when data is
|
||||
returned, there should be at least one header line of the form
|
||||
|
||||
Content-type: <type>/<subtype>
|
||||
|
||||
where <type> and <subtype> should be registered MIME types,
|
||||
e.g. "text/html" or "text/plain".
|
||||
|
||||
"""
|
||||
|
||||
# The Python system version, truncated to its first component.
|
||||
sys_version = "Python/" + string.split(sys.version)[0]
|
||||
|
||||
# The server software version. You may want to override this.
|
||||
# The format is multiple whitespace-separated strings,
|
||||
# where each string is of the form name[/version].
|
||||
server_version = "BaseHTTP/" + __version__
|
||||
|
||||
def handle(self):
|
||||
"""Handle a single HTTP request.
|
||||
|
||||
You normally don't need to override this method; see the class
|
||||
__doc__ string for information on how to handle specific HTTP
|
||||
commands such as GET and POST.
|
||||
|
||||
"""
|
||||
|
||||
self.raw_requestline = self.rfile.readline()
|
||||
requestline = self.raw_requestline
|
||||
if requestline[-2:] == '\r\n':
|
||||
requestline = requestline[:-2]
|
||||
elif requestline[-1:] == '\n':
|
||||
requestline = requestline[:-1]
|
||||
self.requestline = requestline
|
||||
words = string.split(requestline)
|
||||
if len(words) == 3:
|
||||
[command, path, version] = words
|
||||
if version != self.protocol_version:
|
||||
self.send_error(400, "Bad request version (%s)" % `version`)
|
||||
return
|
||||
elif len(words) == 2:
|
||||
[command, path] = words
|
||||
if command != 'GET':
|
||||
self.send_error(400,
|
||||
"Bad HTTP/0.9 request type (%s)" % `command`)
|
||||
return
|
||||
version = "HTTP/0.9"
|
||||
else:
|
||||
self.send_error(400, "Bad request syntax (%s)" % `command`)
|
||||
return
|
||||
self.command, self.path, self.request_version = command, path, version
|
||||
self.headers = self.MessageClass(self.rfile, 0)
|
||||
mname = 'do_' + command
|
||||
if not hasattr(self, mname):
|
||||
self.send_error(501, "Unsupported method (%s)" % `command`)
|
||||
return
|
||||
method = getattr(self, mname)
|
||||
method()
|
||||
|
||||
def send_error(self, code, message=None):
|
||||
"""Send and log an error reply.
|
||||
|
||||
Arguments are the error code, and a detailed message.
|
||||
The detailed message defaults to the short entry matching the
|
||||
response code.
|
||||
|
||||
This sends an error response (so it must be called before any
|
||||
output has been generated), logs the error, and finally sends
|
||||
a piece of HTML explaining the error to the user.
|
||||
|
||||
"""
|
||||
|
||||
try:
|
||||
short, long = self.responses[code]
|
||||
except KeyError:
|
||||
short, long = '???', '???'
|
||||
if not message:
|
||||
message = short
|
||||
explain = long
|
||||
self.log_error("code %d, message %s", code, message)
|
||||
self.send_response(code, message)
|
||||
self.end_headers()
|
||||
self.wfile.write(self.error_message_format %
|
||||
{'code': code,
|
||||
'message': message,
|
||||
'explain': explain})
|
||||
|
||||
error_message_format = DEFAULT_ERROR_MESSAGE
|
||||
|
||||
def send_response(self, code, message=None):
|
||||
"""Send the response header and log the response code.
|
||||
|
||||
Also send two standard headers with the server software
|
||||
version and the current date.
|
||||
|
||||
"""
|
||||
self.log_request(code)
|
||||
if message is None:
|
||||
if self.responses.has_key(code):
|
||||
message = self.responses[code][1]
|
||||
else:
|
||||
message = ''
|
||||
if self.request_version != 'HTTP/0.9':
|
||||
self.wfile.write("%s %s %s\r\n" %
|
||||
(self.protocol_version, str(code), message))
|
||||
self.send_header('Server', self.version_string())
|
||||
self.send_header('Date', self.date_time_string())
|
||||
|
||||
def send_header(self, keyword, value):
|
||||
"""Send a MIME header."""
|
||||
if self.request_version != 'HTTP/0.9':
|
||||
self.wfile.write("%s: %s\r\n" % (keyword, value))
|
||||
|
||||
def end_headers(self):
|
||||
"""Send the blank line ending the MIME headers."""
|
||||
if self.request_version != 'HTTP/0.9':
|
||||
self.wfile.write("\r\n")
|
||||
|
||||
def log_request(self, code='-', size='-'):
|
||||
"""Log an accepted request.
|
||||
|
||||
This is called by send_reponse().
|
||||
|
||||
"""
|
||||
|
||||
self.log_message('"%s" %s %s',
|
||||
self.requestline, str(code), str(size))
|
||||
|
||||
def log_error(self, *args):
|
||||
"""Log an error.
|
||||
|
||||
This is called when a request cannot be fulfilled. By
|
||||
default it passes the message on to log_message().
|
||||
|
||||
Arguments are the same as for log_message().
|
||||
|
||||
XXX This should go to the separate error log.
|
||||
|
||||
"""
|
||||
|
||||
apply(self.log_message, args)
|
||||
|
||||
def log_message(self, format, *args):
|
||||
"""Log an arbitrary message.
|
||||
|
||||
This is used by all other logging functions. Override
|
||||
it if you have specific logging wishes.
|
||||
|
||||
The first argument, FORMAT, is a format string for the
|
||||
message to be logged. If the format string contains
|
||||
any % escapes requiring parameters, they should be
|
||||
specified as subsequent arguments (it's just like
|
||||
printf!).
|
||||
|
||||
The client host and current date/time are prefixed to
|
||||
every message.
|
||||
|
||||
"""
|
||||
|
||||
sys.stderr.write("%s - - [%s] %s\n" %
|
||||
(self.address_string(),
|
||||
self.log_date_time_string(),
|
||||
format%args))
|
||||
|
||||
def version_string(self):
|
||||
"""Return the server software version string."""
|
||||
return self.server_version + ' ' + self.sys_version
|
||||
|
||||
def date_time_string(self):
|
||||
"""Return the current date and time formatted for a message header."""
|
||||
now = time.time()
|
||||
year, month, day, hh, mm, ss, wd, y, z = time.gmtime(now)
|
||||
s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
|
||||
self.weekdayname[wd],
|
||||
day, self.monthname[month], year,
|
||||
hh, mm, ss)
|
||||
return s
|
||||
|
||||
def log_date_time_string(self):
|
||||
"""Return the current time formatted for logging."""
|
||||
now = time.time()
|
||||
year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
|
||||
s = "%02d/%3s/%04d %02d:%02d:%02d" % (
|
||||
day, self.monthname[month], year, hh, mm, ss)
|
||||
return s
|
||||
|
||||
weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
|
||||
|
||||
monthname = [None,
|
||||
'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
|
||||
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
|
||||
|
||||
def address_string(self):
|
||||
"""Return the client address formatted for logging.
|
||||
|
||||
This version looks up the full hostname using gethostbyaddr(),
|
||||
and tries to find a name that contains at least one dot.
|
||||
|
||||
"""
|
||||
|
||||
(host, port) = self.client_address
|
||||
try:
|
||||
name, names, addresses = socket.gethostbyaddr(host)
|
||||
except socket.error, msg:
|
||||
return host
|
||||
names.insert(0, name)
|
||||
for name in names:
|
||||
if '.' in name: return name
|
||||
return names[0]
|
||||
|
||||
|
||||
# Essentially static class variables
|
||||
|
||||
# The version of the HTTP protocol we support.
|
||||
# Don't override unless you know what you're doing (hint: incoming
|
||||
# requests are required to have exactly this version string).
|
||||
protocol_version = "HTTP/1.0"
|
||||
|
||||
# The Message-like class used to parse headers
|
||||
MessageClass = mimetools.Message
|
||||
|
||||
# Table mapping response codes to messages; entries have the
|
||||
# form {code: (shortmessage, longmessage)}.
|
||||
# See http://www.w3.org/hypertext/WWW/Protocols/HTTP/HTRESP.html
|
||||
responses = {
|
||||
200: ('OK', 'Request fulfilled, document follows'),
|
||||
201: ('Created', 'Document created, URL follows'),
|
||||
202: ('Accepted',
|
||||
'Request accepted, processing continues off-line'),
|
||||
203: ('Partial information', 'Request fulfilled from cache'),
|
||||
204: ('No response', 'Request fulfilled, nothing follows'),
|
||||
|
||||
301: ('Moved', 'Object moved permanently -- see URI list'),
|
||||
302: ('Found', 'Object moved temporarily -- see URI list'),
|
||||
303: ('Method', 'Object moved -- see Method and URL list'),
|
||||
304: ('Not modified',
|
||||
'Document has not changed singe given time'),
|
||||
|
||||
400: ('Bad request',
|
||||
'Bad request syntax or unsupported method'),
|
||||
401: ('Unauthorized',
|
||||
'No permission -- see authorization schemes'),
|
||||
402: ('Payment required',
|
||||
'No payment -- see charging schemes'),
|
||||
403: ('Forbidden',
|
||||
'Request forbidden -- authorization will not help'),
|
||||
404: ('Not found', 'Nothing matches the given URI'),
|
||||
|
||||
500: ('Internal error', 'Server got itself in trouble'),
|
||||
501: ('Not implemented',
|
||||
'Server does not support this operation'),
|
||||
502: ('Service temporarily overloaded',
|
||||
'The server cannot process the request due to a high load'),
|
||||
503: ('Gateway timeout',
|
||||
'The gateway server did not receive a timely response'),
|
||||
|
||||
}
|
||||
|
||||
|
||||
def test(HandlerClass = BaseHTTPRequestHandler,
|
||||
ServerClass = HTTPServer):
|
||||
"""Test the HTTP request handler class.
|
||||
|
||||
This runs an HTTP server on port 8000 (or the first command line
|
||||
argument).
|
||||
|
||||
"""
|
||||
|
||||
if sys.argv[1:]:
|
||||
port = string.atoi(sys.argv[1])
|
||||
else:
|
||||
port = 8000
|
||||
server_address = ('', port)
|
||||
|
||||
httpd = ServerClass(server_address, HandlerClass)
|
||||
|
||||
print "Serving HTTP on port", port, "..."
|
||||
httpd.serve_forever()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
|
@ -0,0 +1,203 @@
|
|||
"""CGI-savvy HTTP Server.
|
||||
|
||||
This module builds on SimpleHTTPServer by implementing GET and POST
|
||||
requests to cgi-bin scripts.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
__version__ = "0.2"
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import socket
|
||||
import string
|
||||
import urllib
|
||||
import BaseHTTPServer
|
||||
import SimpleHTTPServer
|
||||
|
||||
|
||||
class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
|
||||
|
||||
"""Complete HTTP server with GET, HEAD and POST commands.
|
||||
|
||||
GET and HEAD also support running CGI scripts.
|
||||
|
||||
The POST command is *only* implemented for CGI scripts.
|
||||
|
||||
"""
|
||||
|
||||
def do_POST(self):
|
||||
"""Serve a POST request.
|
||||
|
||||
This is only implemented for CGI scripts.
|
||||
|
||||
"""
|
||||
|
||||
if self.is_cgi():
|
||||
self.run_cgi()
|
||||
else:
|
||||
self.send_error(501, "Can only POST to CGI scripts")
|
||||
|
||||
def send_head(self):
|
||||
"""Version of send_head that support CGI scripts"""
|
||||
if self.is_cgi():
|
||||
return self.run_cgi()
|
||||
else:
|
||||
return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
|
||||
|
||||
def is_cgi(self):
|
||||
"""test whether PATH corresponds to a CGI script.
|
||||
|
||||
Return a tuple (dir, rest) if PATH requires running a
|
||||
CGI script, None if not. Note that rest begins with a
|
||||
slash if it is not empty.
|
||||
|
||||
The default implementation tests whether the path
|
||||
begins with one of the strings in the list
|
||||
self.cgi_directories (and the next character is a '/'
|
||||
or the end of the string).
|
||||
|
||||
"""
|
||||
|
||||
path = self.path
|
||||
|
||||
for x in self.cgi_directories:
|
||||
i = len(x)
|
||||
if path[:i] == x and (not path[i:] or path[i] == '/'):
|
||||
self.cgi_info = path[:i], path[i+1:]
|
||||
return 1
|
||||
return 0
|
||||
|
||||
cgi_directories = ['/cgi-bin', '/htbin']
|
||||
|
||||
def run_cgi(self):
|
||||
"""Execute a CGI script."""
|
||||
dir, rest = self.cgi_info
|
||||
i = string.rfind(rest, '?')
|
||||
if i >= 0:
|
||||
rest, query = rest[:i], rest[i+1:]
|
||||
else:
|
||||
query = ''
|
||||
i = string.find(rest, '/')
|
||||
if i >= 0:
|
||||
script, rest = rest[:i], rest[i:]
|
||||
else:
|
||||
script, rest = rest, ''
|
||||
scriptname = dir + '/' + script
|
||||
scriptfile = self.translate_path(scriptname)
|
||||
if not os.path.exists(scriptfile):
|
||||
self.send_error(404, "No such CGI script (%s)", `scriptname`)
|
||||
return
|
||||
if not os.path.isfile(scriptfile):
|
||||
self.send_error(403, "CGI script is not a plain file (%s)",
|
||||
`scriptname`)
|
||||
return
|
||||
if not executable(scriptfile):
|
||||
self.send_error(403, "CGI script is not executable (%s)",
|
||||
`scriptname`)
|
||||
return
|
||||
nobody = nobody_uid()
|
||||
self.send_response(200, "Script output follows")
|
||||
self.wfile.flush() # Always flush before forking
|
||||
pid = os.fork()
|
||||
if pid != 0:
|
||||
# Parent
|
||||
pid, sts = os.waitpid(pid, 0)
|
||||
if sts:
|
||||
self.log_error("CGI script exit status x%x" % sts)
|
||||
return
|
||||
# Child
|
||||
try:
|
||||
# Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
|
||||
# XXX Much of the following could be prepared ahead of time!
|
||||
env = {}
|
||||
env['SERVER_SOFTWARE'] = self.version_string()
|
||||
env['SERVER_NAME'] = self.server.server_name
|
||||
env['GATEWAY_INTERFACE'] = 'CGI/1.1'
|
||||
env['SERVER_PROTOCOL'] = self.protocol_version
|
||||
env['SERVER_PORT'] = str(self.server.server_port)
|
||||
env['REQUEST_METHOD'] = self.command
|
||||
uqrest = urllib.unquote(rest)
|
||||
env['PATH_INFO'] = uqrest
|
||||
env['PATH_TRANSLATED'] = self.translate_path(uqrest)
|
||||
env['SCRIPT_NAME'] = scriptname
|
||||
if query:
|
||||
env['QUERY_STRING'] = query
|
||||
host = self.address_string()
|
||||
if host != self.client_address[0]:
|
||||
env['REMOTE_HOST'] = host
|
||||
env['REMOTE_ADDR'] = self.client_address[0]
|
||||
# AUTH_TYPE
|
||||
# REMOTE_USER
|
||||
# REMOTE_IDENT
|
||||
env['CONTENT_TYPE'] = self.headers.type
|
||||
length = self.headers.getheader('content-length')
|
||||
if length:
|
||||
env['CONTENT_LENGTH'] = length
|
||||
accept = []
|
||||
for line in self.headers.getallmatchingheaders('accept'):
|
||||
if line[:1] in string.whitespace:
|
||||
accept.append(string.strip(line))
|
||||
else:
|
||||
accept = accept + string.split(line[7:])
|
||||
env['HTTP_ACCEPT'] = string.joinfields(accept, ',')
|
||||
ua = self.headers.getheader('user-agent')
|
||||
if ua:
|
||||
env['HTTP_USER_AGENT'] = ua
|
||||
# XXX Other HTTP_* headers
|
||||
import regsub
|
||||
decoded_query = regsub.gsub('+', ' ', query)
|
||||
try:
|
||||
os.setuid(nobody)
|
||||
except os.error:
|
||||
pass
|
||||
os.dup2(self.rfile.fileno(), 0)
|
||||
os.dup2(self.wfile.fileno(), 1)
|
||||
print scriptfile, script, decoded_query
|
||||
os.execve(scriptfile,
|
||||
[script, decoded_query],
|
||||
env)
|
||||
except:
|
||||
self.server.handle_error(self.request, self.client_address)
|
||||
os._exit(127)
|
||||
|
||||
|
||||
nobody = None
|
||||
|
||||
def nobody_uid():
|
||||
"""Internal routine to get nobody's uid"""
|
||||
global nobody
|
||||
if nobody:
|
||||
return nobody
|
||||
import pwd
|
||||
try:
|
||||
nobody = pwd.getpwnam('nobody')[2]
|
||||
except pwd.error:
|
||||
nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
|
||||
return nobody
|
||||
|
||||
|
||||
def executable(path):
|
||||
"""Test for executable file."""
|
||||
try:
|
||||
st = os.stat(path)
|
||||
except os.error:
|
||||
return 0
|
||||
return 1
|
||||
|
||||
|
||||
def test(HandlerClass = CGIHTTPRequestHandler,
|
||||
ServerClass = BaseHTTPServer.HTTPServer):
|
||||
import sys
|
||||
if sys.argv[1:2] == ['-r']:
|
||||
db = MyArchive()
|
||||
db.regenindices()
|
||||
return
|
||||
SimpleHTTPServer.test(HandlerClass, ServerClass)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
|
@ -0,0 +1,161 @@
|
|||
"""Simple HTTP Server.
|
||||
|
||||
This module builds on BaseHTTPServer by implementing the standard GET
|
||||
and HEAD requests in a fairly straightforward manner.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
__version__ = "0.2"
|
||||
|
||||
|
||||
import os
|
||||
import pwd
|
||||
import sys
|
||||
import time
|
||||
import socket
|
||||
import string
|
||||
import posixpath
|
||||
import SocketServer
|
||||
import BaseHTTPServer
|
||||
|
||||
|
||||
def nobody_uid():
|
||||
"""Internal routine to get nobody's uid"""
|
||||
try:
|
||||
nobody = pwd.getpwnam('nobody')[2]
|
||||
except pwd.error:
|
||||
nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
|
||||
return nobody
|
||||
|
||||
nobody = nobody_uid()
|
||||
|
||||
|
||||
class SimpleHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
|
||||
"""Simple HTTP request handler with GET and HEAD commands.
|
||||
|
||||
This serves files from the current directory and any of its
|
||||
subdirectories. It assumes that all files are plain text files
|
||||
unless they have the extension ".html" in which case it assumes
|
||||
they are HTML files.
|
||||
|
||||
The GET and HEAD requests are identical except that the HEAD
|
||||
request omits the actual contents of the file.
|
||||
|
||||
"""
|
||||
|
||||
server_version = "SimpleHTTP/" + __version__
|
||||
|
||||
def do_GET(self):
|
||||
"""Serve a GET request."""
|
||||
f = self.send_head()
|
||||
if f:
|
||||
self.copyfile(f, self.wfile)
|
||||
f.close()
|
||||
|
||||
def do_HEAD(self):
|
||||
"""Serve a HEAD request."""
|
||||
f = self.send_head()
|
||||
if f:
|
||||
f.close()
|
||||
|
||||
def send_head(self):
|
||||
"""Common code for GET and HEAD commands.
|
||||
|
||||
This sends the response code and MIME headers.
|
||||
|
||||
Return value is either a file object (which has to be copied
|
||||
to the outputfile by the caller unless the command was HEAD,
|
||||
and must be closed by the caller under all circumstances), or
|
||||
None, in which case the caller has nothing further to do.
|
||||
|
||||
"""
|
||||
path = self.translate_path(self.path)
|
||||
if os.path.isdir(path):
|
||||
self.send_error(403, "Directory listing not supported")
|
||||
return None
|
||||
try:
|
||||
f = open(path)
|
||||
except IOError:
|
||||
self.send_error(404, "File not found")
|
||||
return None
|
||||
self.send_response(200)
|
||||
self.send_header("Content-type", self.guess_type(path))
|
||||
self.end_headers()
|
||||
return f
|
||||
|
||||
def translate_path(self, path):
|
||||
"""Translate a /-separated PATH to the local filename syntax.
|
||||
|
||||
Components that mean special things to the local file system
|
||||
(e.g. drive or directory names) are ignored. (XXX They should
|
||||
probably be diagnosed.)
|
||||
|
||||
"""
|
||||
path = posixpath.normpath(path)
|
||||
words = string.splitfields(path, '/')
|
||||
words = filter(None, words)
|
||||
path = os.getcwd()
|
||||
for word in words:
|
||||
drive, word = os.path.splitdrive(word)
|
||||
head, word = os.path.split(word)
|
||||
if word in (os.curdir, os.pardir): continue
|
||||
path = os.path.join(path, word)
|
||||
return path
|
||||
|
||||
def copyfile(self, source, outputfile):
|
||||
"""Copy all data between two file objects.
|
||||
|
||||
The SOURCE argument is a file object open for reading
|
||||
(or anything with a read() method) and the DESTINATION
|
||||
argument is a file object open for writing (or
|
||||
anything with a write() method).
|
||||
|
||||
The only reason for overriding this would be to change
|
||||
the block size or perhaps to replace newlines by CRLF
|
||||
-- note however that this the default server uses this
|
||||
to copy binary data as well.
|
||||
|
||||
"""
|
||||
|
||||
BLOCKSIZE = 8192
|
||||
while 1:
|
||||
data = source.read(BLOCKSIZE)
|
||||
if not data: break
|
||||
outputfile.write(data)
|
||||
|
||||
def guess_type(self, path):
|
||||
"""Guess the type of a file.
|
||||
|
||||
Argument is a PATH (a filename).
|
||||
|
||||
Return value is a string of the form type/subtype,
|
||||
usable for a MIME Content-type header.
|
||||
|
||||
The default implementation looks the file's extension
|
||||
up in the table self.extensions_map, using text/plain
|
||||
as a default; however it would be permissible (if
|
||||
slow) to look inside the data to make a better guess.
|
||||
|
||||
"""
|
||||
|
||||
base, ext = posixpath.splitext(path)
|
||||
if self.extensions_map.has_key(ext):
|
||||
return self.extensions_map[ext]
|
||||
else:
|
||||
return self.extensions_map['']
|
||||
|
||||
extensions_map = {
|
||||
'': 'text/plain', # Default, *must* be present
|
||||
'.html': 'text/html',
|
||||
}
|
||||
|
||||
|
||||
def test(HandlerClass = SimpleHTTPRequestHandler,
|
||||
ServerClass = SocketServer.TCPServer):
|
||||
BaseHTTPServer.test(HandlerClass, ServerClass)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
|
@ -0,0 +1,413 @@
|
|||
"""Generic socket server classes.
|
||||
|
||||
This module tries to capture the various aspects of defining a server:
|
||||
|
||||
- address family:
|
||||
- AF_INET: IP (Internet Protocol) sockets (default)
|
||||
- AF_UNIX: Unix domain sockets
|
||||
- others, e.g. AF_DECNET are conceivable (see <socket.h>
|
||||
- socket type:
|
||||
- SOCK_STREAM (reliable stream, e.g. TCP)
|
||||
- SOCK_DGRAM (datagrams, e.g. UDP)
|
||||
- client address verification before further looking at the request
|
||||
(This is actually a hook for any processing that needs to look
|
||||
at the request before anything else, e.g. logging)
|
||||
- how to handle multiple requests:
|
||||
- synchronous (one request is handled at a time)
|
||||
- forking (each request is handled by a new process)
|
||||
- threading (each request is handled by a new thread)
|
||||
|
||||
The classes in this module favor the server type that is simplest to
|
||||
write: a synchronous TCP/IP server. This is bad class design, but
|
||||
save some typing. (There's also the issue that a deep class hierarchy
|
||||
slows down method lookups.)
|
||||
|
||||
There are four classes in an inheritance diagram that represent
|
||||
synchronous servers of four types:
|
||||
|
||||
+-----------+ +------------------+
|
||||
| TCPServer |------->| UnixStreamServer |
|
||||
+-----------+ +------------------+
|
||||
|
|
||||
v
|
||||
+-----------+ +--------------------+
|
||||
| UDPServer |------->| UnixDatagramServer |
|
||||
+-----------+ +--------------------+
|
||||
|
||||
(Note that UnixDatagramServer derives from UDPServer, not from
|
||||
UnixStreamServer -- the only difference between an IP and a Unix
|
||||
stream server is the address family, which is simply repeated in both
|
||||
unix server classes.)
|
||||
|
||||
Forking and threading versions of each type of server can be created
|
||||
using the ForkingServer and ThreadingServer mix-in classes. For
|
||||
instance, a threading UDP server class is created as follows:
|
||||
|
||||
class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass
|
||||
|
||||
(The Mix-in class must come first, since it overrides a method defined
|
||||
in UDPServer!)
|
||||
|
||||
To implement a service, you must derive a class from
|
||||
BaseRequestHandler and redefine its handle() method. You can then run
|
||||
various versions of the service by combining one of the server classes
|
||||
with your request handler class.
|
||||
|
||||
The request handler class must be different for datagram or stream
|
||||
services. This can be hidden by using the mix-in request handler
|
||||
classes StreamRequestHandler or DatagramRequestHandler.
|
||||
|
||||
Of course, you still have to use your head!
|
||||
|
||||
For instance, it makes no sense to use a forking server if the service
|
||||
contains state in memory that can be modified by requests (since the
|
||||
modifications in the child process would never reach the initial state
|
||||
kept in the parent process and passed to each child). In this case,
|
||||
you can use a threading server, but you will probably have to use
|
||||
locks to avoid two requests that come in nearly simultaneous to apply
|
||||
conflicting changes to the server state.
|
||||
|
||||
On the other hand, if you are building e.g. an HTTP server, where all
|
||||
data is stored externally (e.g. in the file system), a synchronous
|
||||
class will essentially render the service "deaf" while one request is
|
||||
being handled -- which may be for a very long time if a client is slow
|
||||
to reqd all the data it has requested. Here a threading or forking
|
||||
server is appropriate.
|
||||
|
||||
In some cases, it may be appropriate to process part of a request
|
||||
synchronously, but to finish processing in a forked child depending on
|
||||
the request data. This can be implemented by using a synchronous
|
||||
server and doing an explicit fork in the request handler class's
|
||||
handle() method.
|
||||
|
||||
Another approach to handling multiple simultaneous requests in an
|
||||
environment that supports neither threads nor fork (or where these are
|
||||
too expensive or inappropriate for the service) is to maintain an
|
||||
explicit table of partially finished requests and to use select() to
|
||||
decide which request to work on next (or whether to handle a new
|
||||
incoming request). This is particularly important for stream services
|
||||
where each client can potentially be connected for a long time (if
|
||||
threads or subprocesses can't be used).
|
||||
|
||||
Future work:
|
||||
- Standard classes for Sun RPC (which uses either UDP or TCP)
|
||||
- Standard mix-in classes to implement various authentication
|
||||
and encryption schemes
|
||||
- Standard framework for select-based multiplexing
|
||||
|
||||
XXX Open problems:
|
||||
- What to do with out-of-band data?
|
||||
|
||||
"""
|
||||
|
||||
|
||||
__version__ = "0.2"
|
||||
|
||||
|
||||
import socket
|
||||
import sys
|
||||
import os
|
||||
|
||||
|
||||
class TCPServer:
|
||||
|
||||
"""Base class for various socket-based server classes.
|
||||
|
||||
Defaults to synchronous IP stream (i.e., TCP).
|
||||
|
||||
Methods for the caller:
|
||||
|
||||
- __init__(server_address, RequestHandlerClass)
|
||||
- serve_forever()
|
||||
- handle_request() # if you don't use serve_forever()
|
||||
- fileno() -> int # for select()
|
||||
|
||||
Methods that may be overridden:
|
||||
|
||||
- server_bind()
|
||||
- server_activate()
|
||||
- get_request() -> request, client_address
|
||||
- verify_request(request, client_address)
|
||||
- process_request(request, client_address)
|
||||
- handle_error()
|
||||
|
||||
Methods for derived classes:
|
||||
|
||||
- finish_request(request, client_address)
|
||||
|
||||
Class variables that may be overridden by derived classes or
|
||||
instances:
|
||||
|
||||
- address_family
|
||||
- socket_type
|
||||
- request_queue_size (only for stream sockets)
|
||||
|
||||
Instance variables:
|
||||
|
||||
- server_address
|
||||
- RequestHandlerClass
|
||||
- socket
|
||||
|
||||
"""
|
||||
|
||||
address_family = socket.AF_INET
|
||||
|
||||
socket_type = socket.SOCK_STREAM
|
||||
|
||||
request_queue_size = 5
|
||||
|
||||
def __init__(self, server_address, RequestHandlerClass):
|
||||
"""Constructor. May be extended, do not override."""
|
||||
self.server_address = server_address
|
||||
self.RequestHandlerClass = RequestHandlerClass
|
||||
self.socket = socket.socket(self.address_family,
|
||||
self.socket_type)
|
||||
self.server_bind()
|
||||
self.server_activate()
|
||||
|
||||
def server_bind(self):
|
||||
"""Called by constructor to bind the socket.
|
||||
|
||||
May be overridden.
|
||||
|
||||
"""
|
||||
self.socket.bind(self.server_address)
|
||||
|
||||
def server_activate(self):
|
||||
"""Called by constructor to activate the server.
|
||||
|
||||
May be overridden.
|
||||
|
||||
"""
|
||||
self.socket.listen(self.request_queue_size)
|
||||
|
||||
def fileno(self):
|
||||
"""Return socket file number.
|
||||
|
||||
Interface required by select().
|
||||
|
||||
"""
|
||||
return self.socket.fileno()
|
||||
|
||||
def serve_forever(self):
|
||||
"""Handle one request at a time until doomsday."""
|
||||
while 1:
|
||||
self.handle_request()
|
||||
|
||||
# The distinction between handling, getting, processing and
|
||||
# finishing a request is fairly arbitrary. Remember:
|
||||
#
|
||||
# - handle_request() is the top-level call. It calls
|
||||
# get_request(), verify_request() and process_request()
|
||||
# - get_request() is different for stream or datagram sockets
|
||||
# - process_request() is the place that may fork a new process
|
||||
# or create a new thread to finish the request
|
||||
# - finish_request() instantiates the request handler class;
|
||||
# this constructor will handle the request all by itself
|
||||
|
||||
def handle_request(self):
|
||||
"""Handle one request, possibly blocking."""
|
||||
request, client_address = self.get_request()
|
||||
if self.verify_request(request, client_address):
|
||||
try:
|
||||
self.process_request(request, client_address)
|
||||
except:
|
||||
self.handle_error(request, client_address)
|
||||
|
||||
def get_request(self):
|
||||
"""Get the request and client address from the socket.
|
||||
|
||||
May be overridden.
|
||||
|
||||
"""
|
||||
return self.socket.accept()
|
||||
|
||||
def verify_request(self, request, client_address):
|
||||
"""Verify the request. May be overridden.
|
||||
|
||||
Return true if we should proceed with this request.
|
||||
|
||||
"""
|
||||
return 1
|
||||
|
||||
def process_request(self, request, client_address):
|
||||
"""Call finish_request.
|
||||
|
||||
Overridden by ForkingMixIn and ThreadingMixIn.
|
||||
|
||||
"""
|
||||
self.finish_request(request, client_address)
|
||||
|
||||
def finish_request(self, request, client_address):
|
||||
"""Finish one request by instantiating RequestHandlerClass."""
|
||||
self.RequestHandlerClass(request, client_address, self)
|
||||
|
||||
def handle_error(self, request, client_address):
|
||||
"""Handle an error gracefully. May be overridden.
|
||||
|
||||
The default is to print a traceback and continue.
|
||||
|
||||
"""
|
||||
exc, value, tb = sys.exc_type, sys.exc_value, sys.exc_traceback
|
||||
print '-'*40
|
||||
print 'Exception happened during processing of request from',
|
||||
print client_address
|
||||
import traceback
|
||||
traceback.print_exception(exc, value, tb)
|
||||
print '-'*40
|
||||
|
||||
|
||||
class UDPServer(TCPServer):
|
||||
|
||||
"""UDP server class."""
|
||||
|
||||
socket_type = socket.SOCK_DGRAM
|
||||
|
||||
max_packet_size = 8192
|
||||
|
||||
def get_request(self):
|
||||
return self.socket.recvfrom(max_packet_size)
|
||||
|
||||
|
||||
if hasattr(socket, 'AF_UNIX'):
|
||||
|
||||
class UnixStreamServer(TCPServer):
|
||||
|
||||
address_family = socket.AF_UNIX
|
||||
|
||||
|
||||
class UnixDatagramServer(UDPServer):
|
||||
|
||||
address_family = socket.AF_UNIX
|
||||
|
||||
|
||||
class ForkingMixIn:
|
||||
|
||||
"""Mix-in class to handle each request in a new process."""
|
||||
|
||||
active_children = None
|
||||
|
||||
def collect_children(self):
|
||||
"""Internal routine to wait for died children."""
|
||||
while self.active_children:
|
||||
pid = os.waitpid(0, os.WNOHANG)
|
||||
if not pid: break
|
||||
self.active_children.remove(pid)
|
||||
|
||||
def process_request(self, request, client_address):
|
||||
"""Fork a new subprocess to process the request."""
|
||||
self.collect_children()
|
||||
pid = os.fork()
|
||||
if pid:
|
||||
# Parent process
|
||||
if self.active_children is None:
|
||||
self.active_children = []
|
||||
self.active_children.append(pid)
|
||||
return
|
||||
else:
|
||||
# Child process.
|
||||
# This must never return, hence os._exit()!
|
||||
try:
|
||||
self.finish_request(request, client_address)
|
||||
os._exit(0)
|
||||
except:
|
||||
try:
|
||||
self.handle_error(request,
|
||||
client_address)
|
||||
finally:
|
||||
os._exit(1)
|
||||
|
||||
|
||||
class ThreadingMixIn:
|
||||
|
||||
"""Mix-in class to handle each request in a new thread."""
|
||||
|
||||
def process_request(self, request, client_address):
|
||||
"""Start a new thread to process the request."""
|
||||
import thread
|
||||
thread.start_new_thread(self.finish_request,
|
||||
(request, client_address))
|
||||
|
||||
|
||||
class ForkingUDPServer(ForkingMixIn, UDPServer): pass
|
||||
class ForkingTCPServer(ForkingMixIn, TCPServer): pass
|
||||
|
||||
class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass
|
||||
class ThreadingTCPServer(ThreadingMixIn, TCPServer): pass
|
||||
|
||||
|
||||
class BaseRequestHandler:
|
||||
|
||||
"""Base class for request handler classes.
|
||||
|
||||
This class is instantiated for each request to be handled. The
|
||||
constructor sets the instance variables request, client_address
|
||||
and server, and then calls the handle() method. To implement a
|
||||
specific service, all you need to do is to derive a class which
|
||||
defines a handle() method.
|
||||
|
||||
The handle() method can find the request as self.request, the
|
||||
client address as self.client_request, and the server (in case it
|
||||
needs access to per-server information) as self.server. Since a
|
||||
separate instance is created for each request, the handle() method
|
||||
can define arbitrary other instance variariables.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, request, client_address, server):
|
||||
self.request = request
|
||||
self.client_address = client_address
|
||||
self.server = server
|
||||
try:
|
||||
self.setup()
|
||||
self.handle()
|
||||
self.finish()
|
||||
finally:
|
||||
sys.exc_traceback = None # Help garbage collection
|
||||
|
||||
def setup(self):
|
||||
pass
|
||||
|
||||
def __del__(self):
|
||||
pass
|
||||
|
||||
def handle(self):
|
||||
pass
|
||||
|
||||
def finish(self):
|
||||
pass
|
||||
|
||||
|
||||
# The following two classes make it possible to use the same service
|
||||
# class for stream or datagram servers.
|
||||
# Each class sets up these instance variables:
|
||||
# - rfile: a file object from which receives the request is read
|
||||
# - wfile: a file object to which the reply is written
|
||||
# When the handle() method returns, wfile is flushed properly
|
||||
|
||||
|
||||
class StreamRequestHandler(BaseRequestHandler):
|
||||
|
||||
"""Define self.rfile and self.wfile for stream sockets."""
|
||||
|
||||
def setup(self):
|
||||
self.connection = self.request
|
||||
self.rfile = self.connection.makefile('r')
|
||||
self.wfile = self.connection.makefile('w', 0)
|
||||
|
||||
def finish(self):
|
||||
self.wfile.flush()
|
||||
|
||||
|
||||
class DatagramRequestHandler(BaseRequestHandler):
|
||||
|
||||
"""Define self.rfile and self.wfile for datagram sockets."""
|
||||
|
||||
def setup(self):
|
||||
import StringIO
|
||||
self.packet, self.socket = self.request
|
||||
self.rfile = StringIO.StringIO(self.packet)
|
||||
self.wfile = StringIO.StringIO(self.packet)
|
||||
|
||||
def finish(self):
|
||||
self.socket.send(self.wfile.getvalue())
|
|
@ -0,0 +1,365 @@
|
|||
"""Import hook support.
|
||||
|
||||
Consistent use of this module will make it possible to change the
|
||||
different mechanisms involved in loading modules independently.
|
||||
|
||||
While the built-in module imp exports interfaces to the built-in
|
||||
module searching and loading algorithm, and it is possible to replace
|
||||
the built-in function __import__ in order to change the semantics of
|
||||
the import statement, until now it has been difficult to combine the
|
||||
effect of different __import__ hacks, like loading modules from URLs
|
||||
(rimport.py), implementing a hierarchical module namespace (newimp.py)
|
||||
or restricted execution (rexec.py).
|
||||
|
||||
This module defines three new concepts:
|
||||
|
||||
(1) A "file system hooks" class provides an interface to a filesystem.
|
||||
|
||||
One hooks class is defined (Hooks), which uses the interface provided
|
||||
by standard modules os and os.path. It should be used as the base
|
||||
class for other hooks classes.
|
||||
|
||||
(2) A "module loader" class provides an interface to to search for a
|
||||
module in a search path and to load it. It defines a method which
|
||||
searches for a module in a single directory; by overriding this method
|
||||
one can redefine the details of the search. If the directory is None,
|
||||
built-in and frozen modules are searched instead.
|
||||
|
||||
Two module loader class are defined, both implementing the search
|
||||
strategy used by the built-in __import__ function: ModuleLoader uses
|
||||
the imp module's find_module interface, while HookableModuleLoader
|
||||
uses a file system hooks class to interact with the file system. Both
|
||||
use the imp module's load_* interfaces to actually load the module.
|
||||
|
||||
(3) A "module importer" class provides an interface to import a
|
||||
module, as well as interfaces to reload and unload a module. It also
|
||||
provides interfaces to install and uninstall itself instead of the
|
||||
default __import__ and reload (and unload) functions.
|
||||
|
||||
One module importer class is defined (ModuleImporter), which uses a
|
||||
module loader instance passed in (by default HookableModuleLoader is
|
||||
instantiated).
|
||||
|
||||
The classes defined here should be used as base classes for extended
|
||||
functionality along those lines.
|
||||
|
||||
If a module mporter class supports dotted names, its import_module()
|
||||
must return a different value depending on whether it is called on
|
||||
behalf of a "from ... import ..." statement or not. (This is caused
|
||||
by the way the __import__ hook is used by the Python interpreter.) It
|
||||
would also do wise to install a different version of reload().
|
||||
|
||||
XXX Should the imp.load_* functions also be called via the hooks
|
||||
instance?
|
||||
|
||||
"""
|
||||
|
||||
|
||||
import __builtin__
|
||||
import imp
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
from imp import C_EXTENSION, PY_SOURCE, PY_COMPILED
|
||||
BUILTIN_MODULE = 32
|
||||
FROZEN_MODULE = 33
|
||||
|
||||
|
||||
class _Verbose:
|
||||
|
||||
def __init__(self, verbose = 0):
|
||||
self.verbose = verbose
|
||||
|
||||
def get_verbose(self):
|
||||
return self.verbose
|
||||
|
||||
def set_verbose(self, verbose):
|
||||
self.verbose = verbose
|
||||
|
||||
# XXX The following is an experimental interface
|
||||
|
||||
def note(self, *args):
|
||||
if self.verbose:
|
||||
apply(self.message, args)
|
||||
|
||||
def message(self, format, *args):
|
||||
print format%args
|
||||
|
||||
|
||||
class BasicModuleLoader(_Verbose):
|
||||
|
||||
"""Basic module loader.
|
||||
|
||||
This provides the same functionality as built-in import. It
|
||||
doesn't deal with checking sys.modules -- all it provides is
|
||||
find_module() and a load_module(), as well as find_module_in_dir()
|
||||
which searches just one directory, and can be overridden by a
|
||||
derived class to change the module search algorithm when the basic
|
||||
dependency on sys.path is unchanged.
|
||||
|
||||
The interface is a little more convenient than imp's:
|
||||
find_module(name, [path]) returns None or 'stuff', and
|
||||
load_module(name, stuff) loads the module.
|
||||
|
||||
"""
|
||||
|
||||
def find_module(self, name, path = None):
|
||||
if path is None:
|
||||
path = [None] + self.default_path()
|
||||
for dir in path:
|
||||
stuff = self.find_module_in_dir(name, dir)
|
||||
if stuff: return stuff
|
||||
return None
|
||||
|
||||
def default_path(self):
|
||||
return sys.path
|
||||
|
||||
def find_module_in_dir(self, name, dir):
|
||||
if dir is None:
|
||||
return self.find_builtin_module(name)
|
||||
else:
|
||||
try:
|
||||
return imp.find_module(name, [dir])
|
||||
except ImportError:
|
||||
return None
|
||||
|
||||
def find_builtin_module(self, name):
|
||||
if imp.is_builtin(name):
|
||||
return None, '', ('', '', BUILTIN_MODULE)
|
||||
if imp.is_frozen(name):
|
||||
return None, '', ('', '', FROZEN_MODULE)
|
||||
return None
|
||||
|
||||
def load_module(self, name, stuff):
|
||||
file, filename, (suff, mode, type) = stuff
|
||||
if type == BUILTIN_MODULE:
|
||||
return imp.init_builtin(name)
|
||||
if type == FROZEN_MODULE:
|
||||
return imp.init_frozen(name)
|
||||
if type == C_EXTENSION:
|
||||
return imp.load_dynamic(name, filename, file)
|
||||
if type == PY_SOURCE:
|
||||
return imp.load_source(name, filename, file)
|
||||
if type == PY_COMPILED:
|
||||
return imp.load_compiled(name, filename, file)
|
||||
raise ImportError, "Unrecognized module type (%s) for %s" % \
|
||||
(`type`, name)
|
||||
|
||||
|
||||
class Hooks(_Verbose):
|
||||
|
||||
"""Hooks into the filesystem and interpreter.
|
||||
|
||||
By deriving a subclass you can redefine your filesystem interface,
|
||||
e.g. to merge it with the URL space.
|
||||
|
||||
This base class behaves just like the native filesystem.
|
||||
|
||||
"""
|
||||
|
||||
# imp interface
|
||||
def get_suffixes(self): return imp.get_suffixes()
|
||||
def new_module(self, name): return imp.new_module(name)
|
||||
def is_builtin(self, name): return imp.is_builtin(name)
|
||||
def init_builtin(self, name): return imp.init_builtin(name)
|
||||
def is_frozen(self, name): return imp.is_frozen(name)
|
||||
def init_frozen(self, name): return imp.init_frozen(name)
|
||||
def get_frozen_object(self, name): return imp.get_frozen_object(name)
|
||||
def load_source(self, name, filename, file=None):
|
||||
return imp.load_source(name, filename, file)
|
||||
def load_compiled(self, name, filename, file=None):
|
||||
return imp.load_compiled(name, filename, file)
|
||||
def load_dynamic(self, name, filename, file=None):
|
||||
return imp.load_dynamic(name, filename, file)
|
||||
|
||||
def add_module(self, name):
|
||||
d = self.modules_dict()
|
||||
if d.has_key(name): return d[name]
|
||||
d[name] = m = self.new_module(name)
|
||||
return m
|
||||
|
||||
# sys interface
|
||||
def modules_dict(self): return sys.modules
|
||||
def default_path(self): return sys.path
|
||||
|
||||
def path_split(self, x): return os.path.split(x)
|
||||
def path_join(self, x, y): return os.path.join(x, y)
|
||||
def path_isabs(self, x): return os.path.isabs(x)
|
||||
# etc.
|
||||
|
||||
def path_exists(self, x): return os.path.exists(x)
|
||||
def path_isdir(self, x): return os.path.isdir(x)
|
||||
def path_isfile(self, x): return os.path.isfile(x)
|
||||
def path_islink(self, x): return os.path.islink(x)
|
||||
# etc.
|
||||
|
||||
def openfile(self, *x): return apply(open, x)
|
||||
openfile_error = IOError
|
||||
def listdir(self, x): return os.listdir(x)
|
||||
listdir_error = os.error
|
||||
# etc.
|
||||
|
||||
|
||||
class ModuleLoader(BasicModuleLoader):
|
||||
|
||||
"""Default module loader; uses file system hooks.
|
||||
|
||||
By defining suitable hooks, you might be able to load modules from
|
||||
other sources than the file system, e.g. from compressed or
|
||||
encrypted files, tar files or (if you're brave!) URLs.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, hooks = None, verbose = 0):
|
||||
BasicModuleLoader.__init__(self, verbose)
|
||||
self.hooks = hooks or Hooks(verbose)
|
||||
|
||||
def default_path(self):
|
||||
return self.hooks.default_path()
|
||||
|
||||
def modules_dict(self):
|
||||
return self.hooks.modules_dict()
|
||||
|
||||
def get_hooks(self):
|
||||
return self.hooks
|
||||
|
||||
def set_hooks(self, hooks):
|
||||
self.hooks = hooks
|
||||
|
||||
def find_builtin_module(self, name):
|
||||
if self.hooks.is_builtin(name):
|
||||
return None, '', ('', '', BUILTIN_MODULE)
|
||||
if self.hooks.is_frozen(name):
|
||||
return None, '', ('', '', FROZEN_MODULE)
|
||||
return None
|
||||
|
||||
def find_module_in_dir(self, name, dir):
|
||||
if dir is None:
|
||||
return self.find_builtin_module(name)
|
||||
for info in self.hooks.get_suffixes():
|
||||
suff, mode, type = info
|
||||
fullname = self.hooks.path_join(dir, name+suff)
|
||||
try:
|
||||
fp = self.hooks.openfile(fullname, mode)
|
||||
return fp, fullname, info
|
||||
except self.hooks.openfile_error:
|
||||
pass
|
||||
return None
|
||||
|
||||
def load_module(self, name, stuff):
|
||||
file, filename, (suff, mode, type) = stuff
|
||||
if type == BUILTIN_MODULE:
|
||||
return self.hooks.init_builtin(name)
|
||||
if type == FROZEN_MODULE:
|
||||
return self.hooks.init_frozen(name)
|
||||
if type == C_EXTENSION:
|
||||
return self.hooks.load_dynamic(name, filename, file)
|
||||
if type == PY_SOURCE:
|
||||
return self.hooks.load_source(name, filename, file)
|
||||
if type == PY_COMPILED:
|
||||
return self.hooks.load_compiled(name, filename, file)
|
||||
raise ImportError, "Unrecognized module type (%s) for %s" % \
|
||||
(`type`, name)
|
||||
|
||||
|
||||
class FancyModuleLoader(ModuleLoader):
|
||||
|
||||
"""Fancy module loader -- parses and execs the code itself."""
|
||||
|
||||
def load_module(self, name, stuff):
|
||||
file, filename, (suff, mode, type) = stuff
|
||||
if type == FROZEN_MODULE:
|
||||
code = self.hooks.get_frozen_object(name)
|
||||
elif type == PY_COMPILED:
|
||||
file.seek(8)
|
||||
code = marshal.load(file)
|
||||
elif type == PY_SOURCE:
|
||||
data = file.read()
|
||||
code = compile(data, filename, 'exec')
|
||||
else:
|
||||
return ModuleLoader.load_module(self, name, stuff)
|
||||
m = self.hooks.add_module(name)
|
||||
exec code in m.__dict__
|
||||
return m
|
||||
|
||||
|
||||
class ModuleImporter(_Verbose):
|
||||
|
||||
"""Default module importer; uses module loader.
|
||||
|
||||
This provides the same functionality as built-in import, when
|
||||
combined with ModuleLoader.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, loader = None, verbose = 0):
|
||||
_Verbose.__init__(self, verbose)
|
||||
self.loader = loader or ModuleLoader(None, verbose)
|
||||
self.modules = self.loader.modules_dict()
|
||||
|
||||
def get_loader(self):
|
||||
return self.loader
|
||||
|
||||
def set_loader(self, loader):
|
||||
self.loader = loader
|
||||
|
||||
def get_hooks(self):
|
||||
return self.loader.get_hooks()
|
||||
|
||||
def set_hooks(self, hooks):
|
||||
return self.loader.set_hooks(hooks)
|
||||
|
||||
def import_module(self, name, globals={}, locals={}, fromlist=[]):
|
||||
if self.modules.has_key(name):
|
||||
return self.modules[name] # Fast path
|
||||
stuff = self.loader.find_module(name)
|
||||
if not stuff:
|
||||
raise ImportError, "No module named %s" % name
|
||||
return self.loader.load_module(name, stuff)
|
||||
|
||||
def reload(self, module, path = None):
|
||||
stuff = self.loader.find_module(name, path)
|
||||
if not stuff:
|
||||
raise ImportError, "Module %s not found for reload" % name
|
||||
return self.loader.load_module(name, stuff)
|
||||
|
||||
def unload(self, module):
|
||||
del self.modules[module.__name__]
|
||||
# XXX Should this try to clear the module's namespace?
|
||||
|
||||
def install(self):
|
||||
self.save_import_module = __builtin__.__import__
|
||||
self.save_reload = __builtin__.reload
|
||||
if not hasattr(__builtin__, 'unload'):
|
||||
__builtin__.unload = None
|
||||
self.save_unload = __builtin__.unload
|
||||
__builtin__.__import__ = self.import_module
|
||||
__builtin__.reload = self.reload
|
||||
__builtin__.unload = self.unload
|
||||
|
||||
def uninstall(self):
|
||||
__builtin__.__import__ = self.save_import_module
|
||||
__builtin__.reload = self.save_reload
|
||||
__builtin__.unload = self.save_unload
|
||||
if not __builtin__.unload:
|
||||
del __builtin__.unload
|
||||
|
||||
|
||||
# XXX Some experimental hacks -- importing ihooks auto-installs!
|
||||
# XXX (That's supposed to be transparent anyway...)
|
||||
|
||||
default_importer = None
|
||||
current_importer = None
|
||||
|
||||
def install(importer = None):
|
||||
global current_importer
|
||||
current_importer = importer or default_importer or ModuleImporter()
|
||||
current_importer.install()
|
||||
|
||||
def uninstall():
|
||||
global current_importer
|
||||
current_importer.uninstall()
|
||||
|
||||
|
||||
install()
|
|
@ -0,0 +1,390 @@
|
|||
"""New import scheme with package support.
|
||||
|
||||
A Package is a module that can contain other modules. Packages can be
|
||||
nested. Package introduce dotted names for modules, like P.Q.M, which
|
||||
could correspond to a file P/Q/M.py found somewhere on sys.path. It
|
||||
is possible to import a package itself, though this makes little sense
|
||||
unless the package contains a module called __init__.
|
||||
|
||||
A package has two variables that control the namespace used for
|
||||
packages and modules, both initialized to sensible defaults the first
|
||||
time the package is referenced.
|
||||
|
||||
(1) A package's *module search path*, contained in the per-package
|
||||
variable __path__, defines a list of *directories* where submodules or
|
||||
subpackages of the package are searched. It is initialized to the
|
||||
directory containing the package. Setting this variable to None makes
|
||||
the module search path default to sys.path (this is not quite the same
|
||||
as setting it to sys.path, since the latter won't track later
|
||||
assignments to sys.path).
|
||||
|
||||
(2) A package's *import domain*, contained in the per-package variable
|
||||
__domain__, defines a list of *packages* that are searched (using
|
||||
their respective module search paths) to satisfy imports. It is
|
||||
initialized to the list cosisting of the package itself, its parent
|
||||
package, its parent's parent, and so on, ending with the root package
|
||||
(the nameless package containing all top-level packages and modules,
|
||||
whose module search path is None, implying sys.path).
|
||||
|
||||
The default domain implements a search algorithm called "expanding
|
||||
search". An alternative search algorithm called "explicit search"
|
||||
fixes the import search path to contain only the root package,
|
||||
requiring the modules in the package to name all imported modules by
|
||||
their full name. The convention of using '__' to refer to the current
|
||||
package (both as a per-module variable and in module names) can be
|
||||
used by packages using explicit search to refer to modules in the same
|
||||
package; this combination is known as "explicit-relative search".
|
||||
|
||||
The PackageImporter and PackageLoader classes together implement the
|
||||
following policies:
|
||||
|
||||
- There is a root package, whose name is ''. It cannot be imported
|
||||
directly but may be referenced, e.g. by using '__' from a top-level
|
||||
module.
|
||||
|
||||
- In each module or package, the variable '__' contains a reference to
|
||||
the parent package; in the root package, '__' points to itself.
|
||||
|
||||
- In the name for imported modules (e.g. M in "import M" or "from M
|
||||
import ..."), a leading '__' refers to the current package (i.e.
|
||||
the package containing the current module); leading '__.__' and so
|
||||
on refer to the current package's parent, and so on. The use of
|
||||
'__' elsewhere in the module name is not supported.
|
||||
|
||||
- Modules are searched using the "expanding search" algorithm by
|
||||
virtue of the default value for __domain__.
|
||||
|
||||
- If A.B.C is imported, A is searched using __domain__; then
|
||||
subpackage B is searched in A using its __path__, and so on.
|
||||
|
||||
- Built-in modules have priority: even if a file sys.py exists in a
|
||||
package, "import sys" imports the built-in sys module.
|
||||
|
||||
- The same holds for frozen modules, for better or for worse.
|
||||
|
||||
- Submodules and subpackages are not automatically loaded when their
|
||||
parent packages is loaded.
|
||||
|
||||
- The construct "from package import *" is illegal. (It can still be
|
||||
used to import names from a module.)
|
||||
|
||||
- When "from package import module1, module2, ..." is used, those
|
||||
modules are explicitly loaded.
|
||||
|
||||
- When a package is loaded, if it has a submodule __init__, that
|
||||
module is loaded. This is the place where required submodules can
|
||||
be loaded, the __path__ variable extended, etc. The __init__ module
|
||||
is loaded even if the package was loaded only in order to create a
|
||||
stub for a sub-package: if "import P.Q.R" is the first reference to
|
||||
P, and P has a submodule __init__, P.__init__ is loaded before P.Q
|
||||
is even searched.
|
||||
|
||||
Caveats:
|
||||
|
||||
- It is possible to import a package that has no __init__ submodule;
|
||||
this is not particularly useful but there may be useful applications
|
||||
for it (e.g. to manipulate its search paths from the outside!).
|
||||
|
||||
- There are no special provisions for os.chdir(). If you plan to use
|
||||
os.chdir() before you have imported all your modules, it is better
|
||||
not to have relative pathnames in sys.path. (This could actually be
|
||||
fixed by changing the implementation of path_join() in the hook to
|
||||
absolutize paths.)
|
||||
|
||||
- Packages and modules are introduced in sys.modules as soon as their
|
||||
loading is started. When the loading is terminated by an exception,
|
||||
the sys.modules entries remain around.
|
||||
|
||||
- There are no special measures to support mutually recursive modules,
|
||||
but it will work under the same conditions where it works in the
|
||||
flat module space system.
|
||||
|
||||
- Sometimes dummy entries (whose value is None) are entered in
|
||||
sys.modules, to indicate that a particular module does not exist --
|
||||
this is done to speed up the expanding search algorithm when a
|
||||
module residing at a higher level is repeatedly imported (Python
|
||||
promises that importing a previously imported module is cheap!)
|
||||
|
||||
- Although dynamically loaded extensions are allowed inside packages,
|
||||
the current implementation (hardcoded in the interpreter) of their
|
||||
initialization may cause problems if an extension invokes the
|
||||
interpreter during its initialization.
|
||||
|
||||
- reload() may find another version of the module only if it occurs on
|
||||
the package search path. Thus, it keeps the connection to the
|
||||
package to which the module belongs, but may find a different file.
|
||||
|
||||
XXX Need to have an explicit name for '', e.g. '__root__'.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
import imp
|
||||
import string
|
||||
import sys
|
||||
import __builtin__
|
||||
|
||||
import ihooks
|
||||
from ihooks import ModuleLoader, ModuleImporter
|
||||
|
||||
|
||||
class PackageLoader(ModuleLoader):
|
||||
|
||||
"""A subclass of ModuleLoader with package support.
|
||||
|
||||
find_module_in_dir() will succeed if there's a subdirectory with
|
||||
the given name; load_module() will create a stub for a package and
|
||||
load its __init__ module if it exists.
|
||||
|
||||
"""
|
||||
|
||||
def find_module_in_dir(self, name, dir):
|
||||
if dir is not None:
|
||||
dirname = self.hooks.path_join(dir, name)
|
||||
if self.hooks.path_isdir(dirname):
|
||||
return None, dirname, ('', '', 'PACKAGE')
|
||||
return ModuleLoader.find_module_in_dir(self, name, dir)
|
||||
|
||||
def load_module(self, name, stuff):
|
||||
file, filename, info = stuff
|
||||
suff, mode, type = info
|
||||
if type == 'PACKAGE':
|
||||
return self.load_package(name, stuff)
|
||||
if sys.modules.has_key(name):
|
||||
m = sys.modules[name]
|
||||
else:
|
||||
sys.modules[name] = m = imp.new_module(name)
|
||||
self.set_parent(m)
|
||||
if type == imp.C_EXTENSION and '.' in name:
|
||||
return self.load_dynamic(name, stuff)
|
||||
else:
|
||||
return ModuleLoader.load_module(self, name, stuff)
|
||||
|
||||
def load_dynamic(self, name, stuff):
|
||||
file, filename, (suff, mode, type) = stuff
|
||||
# Hack around restriction in imp.load_dynamic()
|
||||
i = string.rfind(name, '.')
|
||||
tail = name[i+1:]
|
||||
if sys.modules.has_key(tail):
|
||||
save = sys.modules[tail]
|
||||
else:
|
||||
save = None
|
||||
sys.modules[tail] = imp.new_module(name)
|
||||
try:
|
||||
m = imp.load_dynamic(tail, filename, file)
|
||||
finally:
|
||||
if save:
|
||||
sys.modules[tail] = save
|
||||
else:
|
||||
del sys.modules[tail]
|
||||
sys.modules[name] = m
|
||||
return m
|
||||
|
||||
def load_package(self, name, stuff):
|
||||
file, filename, info = stuff
|
||||
if sys.modules.has_key(name):
|
||||
package = sys.modules[name]
|
||||
else:
|
||||
sys.modules[name] = package = imp.new_module(name)
|
||||
package.__path__ = [filename]
|
||||
self.init_package(package)
|
||||
return package
|
||||
|
||||
def init_package(self, package):
|
||||
self.set_parent(package)
|
||||
self.set_domain(package)
|
||||
self.call_init_module(package)
|
||||
|
||||
def set_parent(self, m):
|
||||
name = m.__name__
|
||||
if '.' in name:
|
||||
name = name[:string.rfind(name, '.')]
|
||||
else:
|
||||
name = ''
|
||||
m.__ = sys.modules[name]
|
||||
|
||||
def set_domain(self, package):
|
||||
name = package.__name__
|
||||
package.__domain__ = domain = [name]
|
||||
while '.' in name:
|
||||
name = name[:string.rfind(name, '.')]
|
||||
domain.append(name)
|
||||
if name:
|
||||
domain.append('')
|
||||
|
||||
def call_init_module(self, package):
|
||||
stuff = self.find_module('__init__', package.__path__)
|
||||
if stuff:
|
||||
m = self.load_module(package.__name__ + '.__init__', stuff)
|
||||
package.__init__ = m
|
||||
|
||||
|
||||
class PackageImporter(ModuleImporter):
|
||||
|
||||
"""Importer that understands packages and '__'."""
|
||||
|
||||
def __init__(self, loader = None, verbose = 0):
|
||||
ModuleImporter.__init__(self,
|
||||
loader or PackageLoader(None, verbose), verbose)
|
||||
|
||||
def import_module(self, name, globals={}, locals={}, fromlist=[]):
|
||||
if globals.has_key('__'):
|
||||
package = globals['__']
|
||||
else:
|
||||
# No calling context, assume in root package
|
||||
package = sys.modules['']
|
||||
if name[:3] in ('__.', '__'):
|
||||
p = package
|
||||
name = name[3:]
|
||||
while name[:3] in ('__.', '__'):
|
||||
p = package.__
|
||||
name = name[3:]
|
||||
if not name:
|
||||
return self.finish(package, p, '', fromlist)
|
||||
if '.' in name:
|
||||
i = string.find(name, '.')
|
||||
name, tail = name[:i], name[i:]
|
||||
else:
|
||||
tail = ''
|
||||
mname = p.__name__ and p.__name__+'.'+name or name
|
||||
m = self.get1(mname)
|
||||
return self.finish(package, m, tail, fromlist)
|
||||
if '.' in name:
|
||||
i = string.find(name, '.')
|
||||
name, tail = name[:i], name[i:]
|
||||
else:
|
||||
tail = ''
|
||||
for pname in package.__domain__:
|
||||
mname = pname and pname+'.'+name or name
|
||||
m = self.get0(mname)
|
||||
if m: break
|
||||
else:
|
||||
raise ImportError, "No such module %s" % name
|
||||
return self.finish(m, m, tail, fromlist)
|
||||
|
||||
def finish(self, module, m, tail, fromlist):
|
||||
# Got ....A; now get ....A.B.C.D
|
||||
yname = m.__name__
|
||||
if tail and sys.modules.has_key(yname + tail): # Fast path
|
||||
yname, tail = yname + tail, ''
|
||||
m = self.get1(yname)
|
||||
while tail:
|
||||
i = string.find(tail, '.', 1)
|
||||
if i > 0:
|
||||
head, tail = tail[:i], tail[i:]
|
||||
else:
|
||||
head, tail = tail, ''
|
||||
yname = yname + head
|
||||
m = self.get1(yname)
|
||||
|
||||
# Got ....A.B.C.D; now finalize things depending on fromlist
|
||||
if not fromlist:
|
||||
return module
|
||||
if '__' in fromlist:
|
||||
raise ImportError, "Can't import __ from anywhere"
|
||||
if not hasattr(m, '__path__'): return m
|
||||
if '*' in fromlist:
|
||||
raise ImportError, "Can't import * from a package"
|
||||
for f in fromlist:
|
||||
if hasattr(m, f): continue
|
||||
fname = yname + '.' + f
|
||||
self.get1(fname)
|
||||
return m
|
||||
|
||||
def get1(self, name):
|
||||
m = self.get(name)
|
||||
if not m:
|
||||
raise ImportError, "No module named %s" % name
|
||||
return m
|
||||
|
||||
def get0(self, name):
|
||||
m = self.get(name)
|
||||
if not m:
|
||||
sys.modules[name] = None
|
||||
return m
|
||||
|
||||
def get(self, name):
|
||||
# Internal routine to get or load a module when its parent exists
|
||||
if sys.modules.has_key(name):
|
||||
return sys.modules[name]
|
||||
if '.' in name:
|
||||
i = string.rfind(name, '.')
|
||||
head, tail = name[:i], name[i+1:]
|
||||
else:
|
||||
head, tail = '', name
|
||||
path = sys.modules[head].__path__
|
||||
stuff = self.loader.find_module(tail, path)
|
||||
if not stuff:
|
||||
return None
|
||||
sys.modules[name] = m = self.loader.load_module(name, stuff)
|
||||
if head:
|
||||
setattr(sys.modules[head], tail, m)
|
||||
return m
|
||||
|
||||
def reload(self, module):
|
||||
name = module.__name__
|
||||
if '.' in name:
|
||||
i = string.rfind(name, '.')
|
||||
head, tail = name[:i], name[i+1:]
|
||||
path = sys.modules[head].__path__
|
||||
else:
|
||||
tail = name
|
||||
path = sys.modules[''].__path__
|
||||
stuff = self.loader.find_module(tail, path)
|
||||
if not stuff:
|
||||
raise ImportError, "No module named %s" % name
|
||||
return self.loader.load_module(name, stuff)
|
||||
|
||||
def unload(self, module):
|
||||
if hasattr(module, '__path__'):
|
||||
raise ImportError, "don't know how to unload packages yet"
|
||||
PackageImporter.unload(self, module)
|
||||
|
||||
def install(self):
|
||||
if not sys.modules.has_key(''):
|
||||
sys.modules[''] = package = imp.new_module('')
|
||||
package.__path__ = None
|
||||
self.loader.init_package(package)
|
||||
for m in sys.modules.values():
|
||||
if not m: continue
|
||||
if not hasattr(m, '__'):
|
||||
self.loader.set_parent(m)
|
||||
ModuleImporter.install(self)
|
||||
|
||||
|
||||
def install(v = 0):
|
||||
ihooks.install(PackageImporter(None, v))
|
||||
|
||||
def uninstall():
|
||||
ihooks.uninstall()
|
||||
|
||||
def ni(v = 0):
|
||||
install(v)
|
||||
|
||||
def no():
|
||||
uninstall()
|
||||
|
||||
def test():
|
||||
import pdb
|
||||
try:
|
||||
testproper()
|
||||
except:
|
||||
sys.last_type, sys.last_value, sys.last_traceback = (
|
||||
sys.exc_type, sys.exc_value, sys.exc_traceback)
|
||||
print
|
||||
print sys.last_type, ':', sys.last_value
|
||||
print
|
||||
pdb.pm()
|
||||
|
||||
def testproper():
|
||||
install(1)
|
||||
try:
|
||||
import mactest
|
||||
print dir(mactest)
|
||||
raw_input('OK?')
|
||||
finally:
|
||||
uninstall()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
|
@ -0,0 +1,390 @@
|
|||
"""New import scheme with package support.
|
||||
|
||||
A Package is a module that can contain other modules. Packages can be
|
||||
nested. Package introduce dotted names for modules, like P.Q.M, which
|
||||
could correspond to a file P/Q/M.py found somewhere on sys.path. It
|
||||
is possible to import a package itself, though this makes little sense
|
||||
unless the package contains a module called __init__.
|
||||
|
||||
A package has two variables that control the namespace used for
|
||||
packages and modules, both initialized to sensible defaults the first
|
||||
time the package is referenced.
|
||||
|
||||
(1) A package's *module search path*, contained in the per-package
|
||||
variable __path__, defines a list of *directories* where submodules or
|
||||
subpackages of the package are searched. It is initialized to the
|
||||
directory containing the package. Setting this variable to None makes
|
||||
the module search path default to sys.path (this is not quite the same
|
||||
as setting it to sys.path, since the latter won't track later
|
||||
assignments to sys.path).
|
||||
|
||||
(2) A package's *import domain*, contained in the per-package variable
|
||||
__domain__, defines a list of *packages* that are searched (using
|
||||
their respective module search paths) to satisfy imports. It is
|
||||
initialized to the list cosisting of the package itself, its parent
|
||||
package, its parent's parent, and so on, ending with the root package
|
||||
(the nameless package containing all top-level packages and modules,
|
||||
whose module search path is None, implying sys.path).
|
||||
|
||||
The default domain implements a search algorithm called "expanding
|
||||
search". An alternative search algorithm called "explicit search"
|
||||
fixes the import search path to contain only the root package,
|
||||
requiring the modules in the package to name all imported modules by
|
||||
their full name. The convention of using '__' to refer to the current
|
||||
package (both as a per-module variable and in module names) can be
|
||||
used by packages using explicit search to refer to modules in the same
|
||||
package; this combination is known as "explicit-relative search".
|
||||
|
||||
The PackageImporter and PackageLoader classes together implement the
|
||||
following policies:
|
||||
|
||||
- There is a root package, whose name is ''. It cannot be imported
|
||||
directly but may be referenced, e.g. by using '__' from a top-level
|
||||
module.
|
||||
|
||||
- In each module or package, the variable '__' contains a reference to
|
||||
the parent package; in the root package, '__' points to itself.
|
||||
|
||||
- In the name for imported modules (e.g. M in "import M" or "from M
|
||||
import ..."), a leading '__' refers to the current package (i.e.
|
||||
the package containing the current module); leading '__.__' and so
|
||||
on refer to the current package's parent, and so on. The use of
|
||||
'__' elsewhere in the module name is not supported.
|
||||
|
||||
- Modules are searched using the "expanding search" algorithm by
|
||||
virtue of the default value for __domain__.
|
||||
|
||||
- If A.B.C is imported, A is searched using __domain__; then
|
||||
subpackage B is searched in A using its __path__, and so on.
|
||||
|
||||
- Built-in modules have priority: even if a file sys.py exists in a
|
||||
package, "import sys" imports the built-in sys module.
|
||||
|
||||
- The same holds for frozen modules, for better or for worse.
|
||||
|
||||
- Submodules and subpackages are not automatically loaded when their
|
||||
parent packages is loaded.
|
||||
|
||||
- The construct "from package import *" is illegal. (It can still be
|
||||
used to import names from a module.)
|
||||
|
||||
- When "from package import module1, module2, ..." is used, those
|
||||
modules are explicitly loaded.
|
||||
|
||||
- When a package is loaded, if it has a submodule __init__, that
|
||||
module is loaded. This is the place where required submodules can
|
||||
be loaded, the __path__ variable extended, etc. The __init__ module
|
||||
is loaded even if the package was loaded only in order to create a
|
||||
stub for a sub-package: if "import P.Q.R" is the first reference to
|
||||
P, and P has a submodule __init__, P.__init__ is loaded before P.Q
|
||||
is even searched.
|
||||
|
||||
Caveats:
|
||||
|
||||
- It is possible to import a package that has no __init__ submodule;
|
||||
this is not particularly useful but there may be useful applications
|
||||
for it (e.g. to manipulate its search paths from the outside!).
|
||||
|
||||
- There are no special provisions for os.chdir(). If you plan to use
|
||||
os.chdir() before you have imported all your modules, it is better
|
||||
not to have relative pathnames in sys.path. (This could actually be
|
||||
fixed by changing the implementation of path_join() in the hook to
|
||||
absolutize paths.)
|
||||
|
||||
- Packages and modules are introduced in sys.modules as soon as their
|
||||
loading is started. When the loading is terminated by an exception,
|
||||
the sys.modules entries remain around.
|
||||
|
||||
- There are no special measures to support mutually recursive modules,
|
||||
but it will work under the same conditions where it works in the
|
||||
flat module space system.
|
||||
|
||||
- Sometimes dummy entries (whose value is None) are entered in
|
||||
sys.modules, to indicate that a particular module does not exist --
|
||||
this is done to speed up the expanding search algorithm when a
|
||||
module residing at a higher level is repeatedly imported (Python
|
||||
promises that importing a previously imported module is cheap!)
|
||||
|
||||
- Although dynamically loaded extensions are allowed inside packages,
|
||||
the current implementation (hardcoded in the interpreter) of their
|
||||
initialization may cause problems if an extension invokes the
|
||||
interpreter during its initialization.
|
||||
|
||||
- reload() may find another version of the module only if it occurs on
|
||||
the package search path. Thus, it keeps the connection to the
|
||||
package to which the module belongs, but may find a different file.
|
||||
|
||||
XXX Need to have an explicit name for '', e.g. '__root__'.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
import imp
|
||||
import string
|
||||
import sys
|
||||
import __builtin__
|
||||
|
||||
import ihooks
|
||||
from ihooks import ModuleLoader, ModuleImporter
|
||||
|
||||
|
||||
class PackageLoader(ModuleLoader):
|
||||
|
||||
"""A subclass of ModuleLoader with package support.
|
||||
|
||||
find_module_in_dir() will succeed if there's a subdirectory with
|
||||
the given name; load_module() will create a stub for a package and
|
||||
load its __init__ module if it exists.
|
||||
|
||||
"""
|
||||
|
||||
def find_module_in_dir(self, name, dir):
|
||||
if dir is not None:
|
||||
dirname = self.hooks.path_join(dir, name)
|
||||
if self.hooks.path_isdir(dirname):
|
||||
return None, dirname, ('', '', 'PACKAGE')
|
||||
return ModuleLoader.find_module_in_dir(self, name, dir)
|
||||
|
||||
def load_module(self, name, stuff):
|
||||
file, filename, info = stuff
|
||||
suff, mode, type = info
|
||||
if type == 'PACKAGE':
|
||||
return self.load_package(name, stuff)
|
||||
if sys.modules.has_key(name):
|
||||
m = sys.modules[name]
|
||||
else:
|
||||
sys.modules[name] = m = imp.new_module(name)
|
||||
self.set_parent(m)
|
||||
if type == imp.C_EXTENSION and '.' in name:
|
||||
return self.load_dynamic(name, stuff)
|
||||
else:
|
||||
return ModuleLoader.load_module(self, name, stuff)
|
||||
|
||||
def load_dynamic(self, name, stuff):
|
||||
file, filename, (suff, mode, type) = stuff
|
||||
# Hack around restriction in imp.load_dynamic()
|
||||
i = string.rfind(name, '.')
|
||||
tail = name[i+1:]
|
||||
if sys.modules.has_key(tail):
|
||||
save = sys.modules[tail]
|
||||
else:
|
||||
save = None
|
||||
sys.modules[tail] = imp.new_module(name)
|
||||
try:
|
||||
m = imp.load_dynamic(tail, filename, file)
|
||||
finally:
|
||||
if save:
|
||||
sys.modules[tail] = save
|
||||
else:
|
||||
del sys.modules[tail]
|
||||
sys.modules[name] = m
|
||||
return m
|
||||
|
||||
def load_package(self, name, stuff):
|
||||
file, filename, info = stuff
|
||||
if sys.modules.has_key(name):
|
||||
package = sys.modules[name]
|
||||
else:
|
||||
sys.modules[name] = package = imp.new_module(name)
|
||||
package.__path__ = [filename]
|
||||
self.init_package(package)
|
||||
return package
|
||||
|
||||
def init_package(self, package):
|
||||
self.set_parent(package)
|
||||
self.set_domain(package)
|
||||
self.call_init_module(package)
|
||||
|
||||
def set_parent(self, m):
|
||||
name = m.__name__
|
||||
if '.' in name:
|
||||
name = name[:string.rfind(name, '.')]
|
||||
else:
|
||||
name = ''
|
||||
m.__ = sys.modules[name]
|
||||
|
||||
def set_domain(self, package):
|
||||
name = package.__name__
|
||||
package.__domain__ = domain = [name]
|
||||
while '.' in name:
|
||||
name = name[:string.rfind(name, '.')]
|
||||
domain.append(name)
|
||||
if name:
|
||||
domain.append('')
|
||||
|
||||
def call_init_module(self, package):
|
||||
stuff = self.find_module('__init__', package.__path__)
|
||||
if stuff:
|
||||
m = self.load_module(package.__name__ + '.__init__', stuff)
|
||||
package.__init__ = m
|
||||
|
||||
|
||||
class PackageImporter(ModuleImporter):
|
||||
|
||||
"""Importer that understands packages and '__'."""
|
||||
|
||||
def __init__(self, loader = None, verbose = 0):
|
||||
ModuleImporter.__init__(self,
|
||||
loader or PackageLoader(None, verbose), verbose)
|
||||
|
||||
def import_module(self, name, globals={}, locals={}, fromlist=[]):
|
||||
if globals.has_key('__'):
|
||||
package = globals['__']
|
||||
else:
|
||||
# No calling context, assume in root package
|
||||
package = sys.modules['']
|
||||
if name[:3] in ('__.', '__'):
|
||||
p = package
|
||||
name = name[3:]
|
||||
while name[:3] in ('__.', '__'):
|
||||
p = package.__
|
||||
name = name[3:]
|
||||
if not name:
|
||||
return self.finish(package, p, '', fromlist)
|
||||
if '.' in name:
|
||||
i = string.find(name, '.')
|
||||
name, tail = name[:i], name[i:]
|
||||
else:
|
||||
tail = ''
|
||||
mname = p.__name__ and p.__name__+'.'+name or name
|
||||
m = self.get1(mname)
|
||||
return self.finish(package, m, tail, fromlist)
|
||||
if '.' in name:
|
||||
i = string.find(name, '.')
|
||||
name, tail = name[:i], name[i:]
|
||||
else:
|
||||
tail = ''
|
||||
for pname in package.__domain__:
|
||||
mname = pname and pname+'.'+name or name
|
||||
m = self.get0(mname)
|
||||
if m: break
|
||||
else:
|
||||
raise ImportError, "No such module %s" % name
|
||||
return self.finish(m, m, tail, fromlist)
|
||||
|
||||
def finish(self, module, m, tail, fromlist):
|
||||
# Got ....A; now get ....A.B.C.D
|
||||
yname = m.__name__
|
||||
if tail and sys.modules.has_key(yname + tail): # Fast path
|
||||
yname, tail = yname + tail, ''
|
||||
m = self.get1(yname)
|
||||
while tail:
|
||||
i = string.find(tail, '.', 1)
|
||||
if i > 0:
|
||||
head, tail = tail[:i], tail[i:]
|
||||
else:
|
||||
head, tail = tail, ''
|
||||
yname = yname + head
|
||||
m = self.get1(yname)
|
||||
|
||||
# Got ....A.B.C.D; now finalize things depending on fromlist
|
||||
if not fromlist:
|
||||
return module
|
||||
if '__' in fromlist:
|
||||
raise ImportError, "Can't import __ from anywhere"
|
||||
if not hasattr(m, '__path__'): return m
|
||||
if '*' in fromlist:
|
||||
raise ImportError, "Can't import * from a package"
|
||||
for f in fromlist:
|
||||
if hasattr(m, f): continue
|
||||
fname = yname + '.' + f
|
||||
self.get1(fname)
|
||||
return m
|
||||
|
||||
def get1(self, name):
|
||||
m = self.get(name)
|
||||
if not m:
|
||||
raise ImportError, "No module named %s" % name
|
||||
return m
|
||||
|
||||
def get0(self, name):
|
||||
m = self.get(name)
|
||||
if not m:
|
||||
sys.modules[name] = None
|
||||
return m
|
||||
|
||||
def get(self, name):
|
||||
# Internal routine to get or load a module when its parent exists
|
||||
if sys.modules.has_key(name):
|
||||
return sys.modules[name]
|
||||
if '.' in name:
|
||||
i = string.rfind(name, '.')
|
||||
head, tail = name[:i], name[i+1:]
|
||||
else:
|
||||
head, tail = '', name
|
||||
path = sys.modules[head].__path__
|
||||
stuff = self.loader.find_module(tail, path)
|
||||
if not stuff:
|
||||
return None
|
||||
sys.modules[name] = m = self.loader.load_module(name, stuff)
|
||||
if head:
|
||||
setattr(sys.modules[head], tail, m)
|
||||
return m
|
||||
|
||||
def reload(self, module):
|
||||
name = module.__name__
|
||||
if '.' in name:
|
||||
i = string.rfind(name, '.')
|
||||
head, tail = name[:i], name[i+1:]
|
||||
path = sys.modules[head].__path__
|
||||
else:
|
||||
tail = name
|
||||
path = sys.modules[''].__path__
|
||||
stuff = self.loader.find_module(tail, path)
|
||||
if not stuff:
|
||||
raise ImportError, "No module named %s" % name
|
||||
return self.loader.load_module(name, stuff)
|
||||
|
||||
def unload(self, module):
|
||||
if hasattr(module, '__path__'):
|
||||
raise ImportError, "don't know how to unload packages yet"
|
||||
PackageImporter.unload(self, module)
|
||||
|
||||
def install(self):
|
||||
if not sys.modules.has_key(''):
|
||||
sys.modules[''] = package = imp.new_module('')
|
||||
package.__path__ = None
|
||||
self.loader.init_package(package)
|
||||
for m in sys.modules.values():
|
||||
if not m: continue
|
||||
if not hasattr(m, '__'):
|
||||
self.loader.set_parent(m)
|
||||
ModuleImporter.install(self)
|
||||
|
||||
|
||||
def install(v = 0):
|
||||
ihooks.install(PackageImporter(None, v))
|
||||
|
||||
def uninstall():
|
||||
ihooks.uninstall()
|
||||
|
||||
def ni(v = 0):
|
||||
install(v)
|
||||
|
||||
def no():
|
||||
uninstall()
|
||||
|
||||
def test():
|
||||
import pdb
|
||||
try:
|
||||
testproper()
|
||||
except:
|
||||
sys.last_type, sys.last_value, sys.last_traceback = (
|
||||
sys.exc_type, sys.exc_value, sys.exc_traceback)
|
||||
print
|
||||
print sys.last_type, ':', sys.last_value
|
||||
print
|
||||
pdb.pm()
|
||||
|
||||
def testproper():
|
||||
install(1)
|
||||
try:
|
||||
import mactest
|
||||
print dir(mactest)
|
||||
raw_input('OK?')
|
||||
finally:
|
||||
uninstall()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
Loading…
Reference in New Issue