The usual...
This commit is contained in:
parent
0b095bc092
commit
aad6761cce
|
@ -68,7 +68,6 @@ import sys
|
|||
import time
|
||||
import socket # For gethostbyaddr()
|
||||
import string
|
||||
import rfc822
|
||||
import mimetools
|
||||
import SocketServer
|
||||
|
||||
|
@ -94,12 +93,16 @@ class HTTPServer(SocketServer.TCPServer):
|
|||
host, port = self.socket.getsockname()
|
||||
if not host or host == '0.0.0.0':
|
||||
host = socket.gethostname()
|
||||
hostname, hostnames, hostaddrs = socket.gethostbyaddr(host)
|
||||
if '.' not in hostname:
|
||||
for host in hostnames:
|
||||
if '.' in host:
|
||||
hostname = host
|
||||
break
|
||||
try:
|
||||
hostname, hostnames, hostaddrs = socket.gethostbyaddr(host)
|
||||
except socket.error:
|
||||
hostname = host
|
||||
else:
|
||||
if '.' not in hostname:
|
||||
for host in hostnames:
|
||||
if '.' in host:
|
||||
hostname = host
|
||||
break
|
||||
self.server_name = hostname
|
||||
self.server_port = port
|
||||
|
||||
|
@ -169,7 +172,7 @@ class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):
|
|||
|
||||
This server parses the request and the headers, and then calls a
|
||||
function specific to the request type (<command>). Specifically,
|
||||
a request SPAM will be handled by a method handle_SPAM(). If no
|
||||
a request SPAM will be handled by a method do_SPAM(). If no
|
||||
such method exists the server sends an error response to the
|
||||
client. If it exists, it is called with no arguments:
|
||||
|
||||
|
@ -216,16 +219,17 @@ class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):
|
|||
# where each string is of the form name[/version].
|
||||
server_version = "BaseHTTP/" + __version__
|
||||
|
||||
def handle(self):
|
||||
"""Handle a single HTTP request.
|
||||
def parse_request(self):
|
||||
"""Parse a request (internal).
|
||||
|
||||
You normally don't need to override this method; see the class
|
||||
__doc__ string for information on how to handle specific HTTP
|
||||
commands such as GET and POST.
|
||||
The request should be stored in self.raw_request; the results
|
||||
are in self.command, self.path, self.request_version and
|
||||
self.headers.
|
||||
|
||||
Return value is 1 for success, 0 for failure; on failure, an
|
||||
error is sent back.
|
||||
|
||||
"""
|
||||
|
||||
self.raw_requestline = self.rfile.readline()
|
||||
self.request_version = version = "HTTP/0.9" # Default
|
||||
requestline = self.raw_requestline
|
||||
if requestline[-2:] == '\r\n':
|
||||
|
@ -238,21 +242,35 @@ class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):
|
|||
[command, path, version] = words
|
||||
if version[:5] != 'HTTP/':
|
||||
self.send_error(400, "Bad request version (%s)" % `version`)
|
||||
return
|
||||
return 0
|
||||
elif len(words) == 2:
|
||||
[command, path] = words
|
||||
if command != 'GET':
|
||||
self.send_error(400,
|
||||
"Bad HTTP/0.9 request type (%s)" % `command`)
|
||||
return
|
||||
return 0
|
||||
else:
|
||||
self.send_error(400, "Bad request syntax (%s)" % `requestline`)
|
||||
return
|
||||
return 0
|
||||
self.command, self.path, self.request_version = command, path, version
|
||||
self.headers = self.MessageClass(self.rfile, 0)
|
||||
mname = 'do_' + command
|
||||
return 1
|
||||
|
||||
def handle(self):
|
||||
"""Handle a single HTTP request.
|
||||
|
||||
You normally don't need to override this method; see the class
|
||||
__doc__ string for information on how to handle specific HTTP
|
||||
commands such as GET and POST.
|
||||
|
||||
"""
|
||||
|
||||
self.raw_requestline = self.rfile.readline()
|
||||
if not self.parse_request(): # An error code has been sent, just exit
|
||||
return
|
||||
mname = 'do_' + self.command
|
||||
if not hasattr(self, mname):
|
||||
self.send_error(501, "Unsupported method (%s)" % `command`)
|
||||
self.send_error(501, "Unsupported method (%s)" % `self.command`)
|
||||
return
|
||||
method = getattr(self, mname)
|
||||
method()
|
||||
|
|
|
@ -3,6 +3,9 @@
|
|||
This module builds on SimpleHTTPServer by implementing GET and POST
|
||||
requests to cgi-bin scripts.
|
||||
|
||||
If the os.fork() function is not present, this module will not work;
|
||||
SystemError will be raised instead.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
|
@ -10,15 +13,18 @@ __version__ = "0.3"
|
|||
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import socket
|
||||
import string
|
||||
import urllib
|
||||
import BaseHTTPServer
|
||||
import SimpleHTTPServer
|
||||
|
||||
|
||||
try:
|
||||
os.fork
|
||||
except AttributeError:
|
||||
raise SystemError, __name__ + " requires os.fork()"
|
||||
|
||||
|
||||
class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
|
||||
|
||||
"""Complete HTTP server with GET, HEAD and POST commands.
|
||||
|
@ -150,6 +156,9 @@ class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
|
|||
ua = self.headers.getheader('user-agent')
|
||||
if ua:
|
||||
env['HTTP_USER_AGENT'] = ua
|
||||
co = filter(None, self.headers.getheaders('cookie'))
|
||||
if co:
|
||||
env['HTTP_COOKIE'] = string.join(co, ', ')
|
||||
# XXX Other HTTP_* headers
|
||||
decoded_query = string.replace(query, '+', ' ')
|
||||
try:
|
||||
|
@ -177,7 +186,7 @@ def nobody_uid():
|
|||
import pwd
|
||||
try:
|
||||
nobody = pwd.getpwnam('nobody')[2]
|
||||
except pwd.error:
|
||||
except KeyError:
|
||||
nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
|
||||
return nobody
|
||||
|
||||
|
|
|
@ -33,11 +33,24 @@ ConfigParser -- responsible for for parsing a list of
|
|||
sections()
|
||||
return all the configuration section names, sans DEFAULT
|
||||
|
||||
has_section(section)
|
||||
return whether the given section exists
|
||||
|
||||
options(section)
|
||||
return list of configuration options for the named section
|
||||
|
||||
has_option(section, option)
|
||||
return whether the given section has the given option
|
||||
|
||||
read(filenames)
|
||||
read and parse the list of named configuration files
|
||||
read and parse the list of named configuration files, given by
|
||||
name. A single filename is also allowed. Non-existing files
|
||||
are ignored.
|
||||
|
||||
readfp(fp, filename=None)
|
||||
read and parse one configuration file, given as a file object.
|
||||
The filename defaults to fp.name; it is only used in error
|
||||
messages (if fp has no `name' attribute, the string `<???>' is used).
|
||||
|
||||
get(section, option, raw=0, vars=None)
|
||||
return a string value for the named option. All % interpolations are
|
||||
|
@ -158,6 +171,7 @@ class ConfigParser:
|
|||
return self.__sections.has_key(section)
|
||||
|
||||
def options(self, section):
|
||||
"""Return a list of option names for the given section name."""
|
||||
try:
|
||||
opts = self.__sections[section].copy()
|
||||
except KeyError:
|
||||
|
@ -165,16 +179,49 @@ class ConfigParser:
|
|||
opts.update(self.__defaults)
|
||||
return opts.keys()
|
||||
|
||||
def has_option(self, section, option):
|
||||
"""Return whether the given section has the given option."""
|
||||
try:
|
||||
opts = self.__sections[section]
|
||||
except KeyError:
|
||||
raise NoSectionError(section)
|
||||
return opts.has_key(option)
|
||||
|
||||
def read(self, filenames):
|
||||
"""Read and parse a list of filenames."""
|
||||
"""Read and parse a filename or a list of filenames.
|
||||
|
||||
Files that cannot be opened are silently ignored; this is
|
||||
designed so that you can specify a list of potential
|
||||
configuration file locations (e.g. current directory, user's
|
||||
home directory, systemwide directory), and all existing
|
||||
configuration files in the list will be read. A single
|
||||
filename may also be given.
|
||||
"""
|
||||
if type(filenames) is type(''):
|
||||
filenames = [filenames]
|
||||
for file in filenames:
|
||||
for filename in filenames:
|
||||
try:
|
||||
fp = open(file, 'r')
|
||||
self.__read(fp)
|
||||
fp = open(filename)
|
||||
except IOError:
|
||||
pass
|
||||
continue
|
||||
self.__read(fp, filename)
|
||||
fp.close()
|
||||
|
||||
def readfp(self, fp, filename=None):
|
||||
"""Like read() but the argument must be a file-like object.
|
||||
|
||||
The `fp' argument must have a `readline' method. Optional
|
||||
second argument is the `filename', which if not given, is
|
||||
taken from fp.name. If fp has no `name' attribute, `<???>' is
|
||||
used.
|
||||
|
||||
"""
|
||||
if filename is None:
|
||||
try:
|
||||
filename = fp.name
|
||||
except AttributeError:
|
||||
filename = '<???>'
|
||||
self.__read(fp, filename)
|
||||
|
||||
def get(self, section, option, raw=0, vars=None):
|
||||
"""Get an option value for a given section.
|
||||
|
@ -199,7 +246,7 @@ class ConfigParser:
|
|||
# Update with the entry specific variables
|
||||
if vars:
|
||||
d.update(vars)
|
||||
option = string.lower(option)
|
||||
option = self.optionxform(option)
|
||||
try:
|
||||
rawval = d[option]
|
||||
except KeyError:
|
||||
|
@ -212,7 +259,7 @@ class ConfigParser:
|
|||
depth = 0
|
||||
while depth < 10: # Loop through this until it's done
|
||||
depth = depth + 1
|
||||
if not string.find(value, "%("):
|
||||
if string.find(value, "%(") >= 0:
|
||||
try:
|
||||
value = value % d
|
||||
except KeyError, key:
|
||||
|
@ -236,25 +283,28 @@ class ConfigParser:
|
|||
raise ValueError, 'Not a boolean: %s' % v
|
||||
return val
|
||||
|
||||
def optionxform(self, optionstr):
|
||||
return string.lower(optionstr)
|
||||
|
||||
#
|
||||
# Regular expressions for parsing section headers and options. Note a
|
||||
# slight semantic change from the previous version, because of the use
|
||||
# of \w, _ is allowed in section header names.
|
||||
__SECTCRE = re.compile(
|
||||
SECTCRE = re.compile(
|
||||
r'\[' # [
|
||||
r'(?P<header>[-\w]+)' # `-', `_' or any alphanum
|
||||
r'(?P<header>[-\w_.*,(){}]+)' # a lot of stuff found by IvL
|
||||
r'\]' # ]
|
||||
)
|
||||
__OPTCRE = re.compile(
|
||||
r'(?P<option>[-.\w]+)' # - . _ alphanum
|
||||
r'[ \t]*[:=][ \t]*' # any number of space/tab,
|
||||
OPTCRE = re.compile(
|
||||
r'(?P<option>[-\w_.*,(){}]+)' # a lot of stuff found by IvL
|
||||
r'[ \t]*(?P<vi>[:=])[ \t]*' # any number of space/tab,
|
||||
# followed by separator
|
||||
# (either : or =), followed
|
||||
# by any # space/tab
|
||||
r'(?P<value>.*)$' # everything up to eol
|
||||
)
|
||||
|
||||
def __read(self, fp):
|
||||
def __read(self, fp, fpname):
|
||||
"""Parse a sectioned setup file.
|
||||
|
||||
The sections in setup file contains a title line at the top,
|
||||
|
@ -277,7 +327,7 @@ class ConfigParser:
|
|||
if string.strip(line) == '' or line[0] in '#;':
|
||||
continue
|
||||
if string.lower(string.split(line)[0]) == 'rem' \
|
||||
and line[0] == "r": # no leading whitespace
|
||||
and line[0] in "rR": # no leading whitespace
|
||||
continue
|
||||
# continuation line?
|
||||
if line[0] in ' \t' and cursect is not None and optname:
|
||||
|
@ -287,7 +337,7 @@ class ConfigParser:
|
|||
# a section header or option header?
|
||||
else:
|
||||
# is it a section header?
|
||||
mo = self.__SECTCRE.match(line)
|
||||
mo = self.SECTCRE.match(line)
|
||||
if mo:
|
||||
sectname = mo.group('header')
|
||||
if self.__sections.has_key(sectname):
|
||||
|
@ -301,13 +351,19 @@ class ConfigParser:
|
|||
optname = None
|
||||
# no section header in the file?
|
||||
elif cursect is None:
|
||||
raise MissingSectionHeaderError(fp.name, lineno, `line`)
|
||||
raise MissingSectionHeaderError(fpname, lineno, `line`)
|
||||
# an option line?
|
||||
else:
|
||||
mo = self.__OPTCRE.match(line)
|
||||
mo = self.OPTCRE.match(line)
|
||||
if mo:
|
||||
optname, optval = mo.group('option', 'value')
|
||||
optname, vi, optval = mo.group('option', 'vi', 'value')
|
||||
optname = string.lower(optname)
|
||||
if vi in ('=', ':') and ';' in optval:
|
||||
# ';' is a comment delimiter only if it follows
|
||||
# a spacing character
|
||||
pos = string.find(optval, ';')
|
||||
if pos and optval[pos-1] in string.whitespace:
|
||||
optval = optval[:pos]
|
||||
optval = string.strip(optval)
|
||||
# allow empty values
|
||||
if optval == '""':
|
||||
|
@ -319,7 +375,7 @@ class ConfigParser:
|
|||
# raised at the end of the file and will contain a
|
||||
# list of all bogus lines
|
||||
if not e:
|
||||
e = ParsingError(fp.name)
|
||||
e = ParsingError(fpname)
|
||||
e.append(lineno, `line`)
|
||||
# if any parsing errors occurred, raise an exception
|
||||
if e:
|
||||
|
|
|
@ -33,6 +33,8 @@ Exception(*)
|
|||
| |
|
||||
| +-- IOError
|
||||
| +-- OSError(*)
|
||||
| |
|
||||
| +-- WindowsError(*)
|
||||
|
|
||||
+-- EOFError
|
||||
+-- RuntimeError
|
||||
|
@ -40,6 +42,9 @@ Exception(*)
|
|||
| +-- NotImplementedError(*)
|
||||
|
|
||||
+-- NameError
|
||||
| |
|
||||
| +-- UnboundLocalError(*)
|
||||
|
|
||||
+-- AttributeError
|
||||
+-- SyntaxError
|
||||
+-- TypeError
|
||||
|
@ -56,6 +61,9 @@ Exception(*)
|
|||
| +-- FloatingPointError
|
||||
|
|
||||
+-- ValueError
|
||||
| |
|
||||
| +-- UnicodeError(*)
|
||||
|
|
||||
+-- SystemError
|
||||
+-- MemoryError
|
||||
"""
|
||||
|
@ -136,6 +144,10 @@ class OSError(EnvironmentError):
|
|||
"""OS system call failed."""
|
||||
pass
|
||||
|
||||
class WindowsError(OSError):
|
||||
"""MS-Windows OS system call failed."""
|
||||
pass
|
||||
|
||||
class RuntimeError(StandardError):
|
||||
"""Unspecified run-time error."""
|
||||
pass
|
||||
|
@ -208,7 +220,15 @@ class AttributeError(StandardError):
|
|||
pass
|
||||
|
||||
class NameError(StandardError):
|
||||
"""Name not found locally or globally."""
|
||||
"""Name not found globally."""
|
||||
pass
|
||||
|
||||
class UnboundLocalError(NameError):
|
||||
"""Local name referenced but not bound to a value."""
|
||||
pass
|
||||
|
||||
class UnicodeError(ValueError):
|
||||
"""Unicode related error."""
|
||||
pass
|
||||
|
||||
class MemoryError(StandardError):
|
||||
|
|
|
@ -73,11 +73,11 @@ XXX Possible additions:
|
|||
|
||||
"""
|
||||
|
||||
import sys, os
|
||||
import sys, os, stat
|
||||
|
||||
_state = None
|
||||
|
||||
def input(files=(), inplace=0, backup=""):
|
||||
def input(files=None, inplace=0, backup=""):
|
||||
global _state
|
||||
if _state and _state._file:
|
||||
raise RuntimeError, "input() already active"
|
||||
|
@ -123,15 +123,16 @@ def isstdin():
|
|||
|
||||
class FileInput:
|
||||
|
||||
def __init__(self, files=(), inplace=0, backup=""):
|
||||
def __init__(self, files=None, inplace=0, backup=""):
|
||||
if type(files) == type(''):
|
||||
files = (files,)
|
||||
else:
|
||||
files = tuple(files)
|
||||
if files is None:
|
||||
files = sys.argv[1:]
|
||||
if not files:
|
||||
files = tuple(sys.argv[1:])
|
||||
if not files:
|
||||
files = ('-',)
|
||||
files = ('-',)
|
||||
else:
|
||||
files = tuple(files)
|
||||
self._files = files
|
||||
self._inplace = inplace
|
||||
self._backup = backup
|
||||
|
@ -203,10 +204,22 @@ class FileInput:
|
|||
self._filename + (self._backup or ".bak"))
|
||||
try: os.unlink(self._backupfilename)
|
||||
except os.error: pass
|
||||
# The next three lines may raise IOError
|
||||
# The next few lines may raise IOError
|
||||
os.rename(self._filename, self._backupfilename)
|
||||
self._file = open(self._backupfilename, "r")
|
||||
self._output = open(self._filename, "w")
|
||||
try:
|
||||
perm = os.fstat(self._file.fileno())[stat.ST_MODE]
|
||||
except:
|
||||
self._output = open(self._filename, "w")
|
||||
else:
|
||||
fd = os.open(self._filename,
|
||||
os.O_CREAT | os.O_WRONLY | os.O_TRUNC,
|
||||
perm)
|
||||
self._output = os.fdopen(fd, "w")
|
||||
try:
|
||||
os.chmod(self._filename, perm)
|
||||
except:
|
||||
pass
|
||||
self._savestdout = sys.stdout
|
||||
sys.stdout = self._output
|
||||
else:
|
||||
|
|
|
@ -1,3 +1,23 @@
|
|||
"""Generic output formatting.
|
||||
|
||||
Formatter objects transform an abstract flow of formatting events into
|
||||
specific output events on writer objects. Formatters manage several stack
|
||||
structures to allow various properties of a writer object to be changed and
|
||||
restored; writers need not be able to handle relative changes nor any sort
|
||||
of ``change back'' operation. Specific writer properties which may be
|
||||
controlled via formatter objects are horizontal alignment, font, and left
|
||||
margin indentations. A mechanism is provided which supports providing
|
||||
arbitrary, non-exclusive style settings to a writer as well. Additional
|
||||
interfaces facilitate formatting events which are not reversible, such as
|
||||
paragraph separation.
|
||||
|
||||
Writer objects encapsulate device interfaces. Abstract devices, such as
|
||||
file formats, are supported as well as physical devices. The provided
|
||||
implementations all work with abstract devices. The interface makes
|
||||
available mechanisms for setting the properties which formatter objects
|
||||
manage and inserting data into the output.
|
||||
"""
|
||||
|
||||
import string
|
||||
import sys
|
||||
from types import StringType
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Gopher protocol client interface
|
||||
"""Gopher protocol client interface."""
|
||||
|
||||
import string
|
||||
|
||||
|
@ -29,180 +29,180 @@ A_IMAGE = 'I'
|
|||
A_WHOIS = 'w'
|
||||
A_QUERY = 'q'
|
||||
A_GIF = 'g'
|
||||
A_HTML = 'h' # HTML file
|
||||
A_WWW = 'w' # WWW address
|
||||
A_HTML = 'h' # HTML file
|
||||
A_WWW = 'w' # WWW address
|
||||
A_PLUS_IMAGE = ':'
|
||||
A_PLUS_MOVIE = ';'
|
||||
A_PLUS_SOUND = '<'
|
||||
|
||||
|
||||
# Function mapping all file types to strings; unknown types become TYPE='x'
|
||||
_names = dir()
|
||||
_type_to_name_map = {}
|
||||
def type_to_name(gtype):
|
||||
global _type_to_name_map
|
||||
if _type_to_name_map=={}:
|
||||
for name in _names:
|
||||
if name[:2] == 'A_':
|
||||
_type_to_name_map[eval(name)] = name[2:]
|
||||
if _type_to_name_map.has_key(gtype):
|
||||
return _type_to_name_map[gtype]
|
||||
return 'TYPE=' + `gtype`
|
||||
"""Map all file types to strings; unknown types become TYPE='x'."""
|
||||
global _type_to_name_map
|
||||
if _type_to_name_map=={}:
|
||||
for name in _names:
|
||||
if name[:2] == 'A_':
|
||||
_type_to_name_map[eval(name)] = name[2:]
|
||||
if _type_to_name_map.has_key(gtype):
|
||||
return _type_to_name_map[gtype]
|
||||
return 'TYPE=' + `gtype`
|
||||
|
||||
# Names for characters and strings
|
||||
CRLF = '\r\n'
|
||||
TAB = '\t'
|
||||
|
||||
# Send a selector to a given host and port, return a file with the reply
|
||||
def send_selector(selector, host, port = 0):
|
||||
import socket
|
||||
import string
|
||||
if not port:
|
||||
i = string.find(host, ':')
|
||||
if i >= 0:
|
||||
host, port = host[:i], string.atoi(host[i+1:])
|
||||
if not port:
|
||||
port = DEF_PORT
|
||||
elif type(port) == type(''):
|
||||
port = string.atoi(port)
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
s.connect(host, port)
|
||||
s.send(selector + CRLF)
|
||||
s.shutdown(1)
|
||||
return s.makefile('rb')
|
||||
"""Send a selector to a given host and port, return a file with the reply."""
|
||||
import socket
|
||||
import string
|
||||
if not port:
|
||||
i = string.find(host, ':')
|
||||
if i >= 0:
|
||||
host, port = host[:i], string.atoi(host[i+1:])
|
||||
if not port:
|
||||
port = DEF_PORT
|
||||
elif type(port) == type(''):
|
||||
port = string.atoi(port)
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
s.connect((host, port))
|
||||
s.send(selector + CRLF)
|
||||
s.shutdown(1)
|
||||
return s.makefile('rb')
|
||||
|
||||
# Send a selector and a query string
|
||||
def send_query(selector, query, host, port = 0):
|
||||
return send_selector(selector + '\t' + query, host, port)
|
||||
"""Send a selector and a query string."""
|
||||
return send_selector(selector + '\t' + query, host, port)
|
||||
|
||||
# Takes a path as returned by urlparse and returns the appropriate selector
|
||||
def path_to_selector(path):
|
||||
if path=="/":
|
||||
return "/"
|
||||
else:
|
||||
return path[2:] # Cuts initial slash and data type identifier
|
||||
"""Takes a path as returned by urlparse and returns the appropriate selector."""
|
||||
if path=="/":
|
||||
return "/"
|
||||
else:
|
||||
return path[2:] # Cuts initial slash and data type identifier
|
||||
|
||||
# Takes a path as returned by urlparse and maps it to a string
|
||||
# See section 3.4 of RFC 1738 for details
|
||||
def path_to_datatype_name(path):
|
||||
if path=="/":
|
||||
# No way to tell, although "INDEX" is likely
|
||||
return "TYPE='unknown'"
|
||||
else:
|
||||
return type_to_name(path[1])
|
||||
"""Takes a path as returned by urlparse and maps it to a string.
|
||||
See section 3.4 of RFC 1738 for details."""
|
||||
if path=="/":
|
||||
# No way to tell, although "INDEX" is likely
|
||||
return "TYPE='unknown'"
|
||||
else:
|
||||
return type_to_name(path[1])
|
||||
|
||||
# The following functions interpret the data returned by the gopher
|
||||
# server according to the expected type, e.g. textfile or directory
|
||||
|
||||
# Get a directory in the form of a list of entries
|
||||
def get_directory(f):
|
||||
import string
|
||||
list = []
|
||||
while 1:
|
||||
line = f.readline()
|
||||
if not line:
|
||||
print '(Unexpected EOF from server)'
|
||||
break
|
||||
if line[-2:] == CRLF:
|
||||
line = line[:-2]
|
||||
elif line[-1:] in CRLF:
|
||||
line = line[:-1]
|
||||
if line == '.':
|
||||
break
|
||||
if not line:
|
||||
print '(Empty line from server)'
|
||||
continue
|
||||
gtype = line[0]
|
||||
parts = string.splitfields(line[1:], TAB)
|
||||
if len(parts) < 4:
|
||||
print '(Bad line from server:', `line`, ')'
|
||||
continue
|
||||
if len(parts) > 4:
|
||||
if parts[4:] != ['+']:
|
||||
print '(Extra info from server:',
|
||||
print parts[4:], ')'
|
||||
else:
|
||||
parts.append('')
|
||||
parts.insert(0, gtype)
|
||||
list.append(parts)
|
||||
return list
|
||||
"""Get a directory in the form of a list of entries."""
|
||||
import string
|
||||
list = []
|
||||
while 1:
|
||||
line = f.readline()
|
||||
if not line:
|
||||
print '(Unexpected EOF from server)'
|
||||
break
|
||||
if line[-2:] == CRLF:
|
||||
line = line[:-2]
|
||||
elif line[-1:] in CRLF:
|
||||
line = line[:-1]
|
||||
if line == '.':
|
||||
break
|
||||
if not line:
|
||||
print '(Empty line from server)'
|
||||
continue
|
||||
gtype = line[0]
|
||||
parts = string.splitfields(line[1:], TAB)
|
||||
if len(parts) < 4:
|
||||
print '(Bad line from server:', `line`, ')'
|
||||
continue
|
||||
if len(parts) > 4:
|
||||
if parts[4:] != ['+']:
|
||||
print '(Extra info from server:',
|
||||
print parts[4:], ')'
|
||||
else:
|
||||
parts.append('')
|
||||
parts.insert(0, gtype)
|
||||
list.append(parts)
|
||||
return list
|
||||
|
||||
# Get a text file as a list of lines, with trailing CRLF stripped
|
||||
def get_textfile(f):
|
||||
list = []
|
||||
get_alt_textfile(f, list.append)
|
||||
return list
|
||||
"""Get a text file as a list of lines, with trailing CRLF stripped."""
|
||||
list = []
|
||||
get_alt_textfile(f, list.append)
|
||||
return list
|
||||
|
||||
# Get a text file and pass each line to a function, with trailing CRLF stripped
|
||||
def get_alt_textfile(f, func):
|
||||
while 1:
|
||||
line = f.readline()
|
||||
if not line:
|
||||
print '(Unexpected EOF from server)'
|
||||
break
|
||||
if line[-2:] == CRLF:
|
||||
line = line[:-2]
|
||||
elif line[-1:] in CRLF:
|
||||
line = line[:-1]
|
||||
if line == '.':
|
||||
break
|
||||
if line[:2] == '..':
|
||||
line = line[1:]
|
||||
func(line)
|
||||
"""Get a text file and pass each line to a function, with trailing CRLF stripped."""
|
||||
while 1:
|
||||
line = f.readline()
|
||||
if not line:
|
||||
print '(Unexpected EOF from server)'
|
||||
break
|
||||
if line[-2:] == CRLF:
|
||||
line = line[:-2]
|
||||
elif line[-1:] in CRLF:
|
||||
line = line[:-1]
|
||||
if line == '.':
|
||||
break
|
||||
if line[:2] == '..':
|
||||
line = line[1:]
|
||||
func(line)
|
||||
|
||||
# Get a binary file as one solid data block
|
||||
def get_binary(f):
|
||||
data = f.read()
|
||||
return data
|
||||
"""Get a binary file as one solid data block."""
|
||||
data = f.read()
|
||||
return data
|
||||
|
||||
# Get a binary file and pass each block to a function
|
||||
def get_alt_binary(f, func, blocksize):
|
||||
while 1:
|
||||
data = f.read(blocksize)
|
||||
if not data:
|
||||
break
|
||||
func(data)
|
||||
"""Get a binary file and pass each block to a function."""
|
||||
while 1:
|
||||
data = f.read(blocksize)
|
||||
if not data:
|
||||
break
|
||||
func(data)
|
||||
|
||||
# Trivial test program
|
||||
def test():
|
||||
import sys
|
||||
import getopt
|
||||
opts, args = getopt.getopt(sys.argv[1:], '')
|
||||
selector = DEF_SELECTOR
|
||||
type = selector[0]
|
||||
host = DEF_HOST
|
||||
port = DEF_PORT
|
||||
if args:
|
||||
host = args[0]
|
||||
args = args[1:]
|
||||
if args:
|
||||
type = args[0]
|
||||
args = args[1:]
|
||||
if len(type) > 1:
|
||||
type, selector = type[0], type
|
||||
else:
|
||||
selector = ''
|
||||
if args:
|
||||
selector = args[0]
|
||||
args = args[1:]
|
||||
query = ''
|
||||
if args:
|
||||
query = args[0]
|
||||
args = args[1:]
|
||||
if type == A_INDEX:
|
||||
f = send_query(selector, query, host)
|
||||
else:
|
||||
f = send_selector(selector, host)
|
||||
if type == A_TEXT:
|
||||
list = get_textfile(f)
|
||||
for item in list: print item
|
||||
elif type in (A_MENU, A_INDEX):
|
||||
list = get_directory(f)
|
||||
for item in list: print item
|
||||
else:
|
||||
data = get_binary(f)
|
||||
print 'binary data:', len(data), 'bytes:', `data[:100]`[:40]
|
||||
"""Trivial test program."""
|
||||
import sys
|
||||
import getopt
|
||||
opts, args = getopt.getopt(sys.argv[1:], '')
|
||||
selector = DEF_SELECTOR
|
||||
type = selector[0]
|
||||
host = DEF_HOST
|
||||
port = DEF_PORT
|
||||
if args:
|
||||
host = args[0]
|
||||
args = args[1:]
|
||||
if args:
|
||||
type = args[0]
|
||||
args = args[1:]
|
||||
if len(type) > 1:
|
||||
type, selector = type[0], type
|
||||
else:
|
||||
selector = ''
|
||||
if args:
|
||||
selector = args[0]
|
||||
args = args[1:]
|
||||
query = ''
|
||||
if args:
|
||||
query = args[0]
|
||||
args = args[1:]
|
||||
if type == A_INDEX:
|
||||
f = send_query(selector, query, host)
|
||||
else:
|
||||
f = send_selector(selector, host)
|
||||
if type == A_TEXT:
|
||||
list = get_textfile(f)
|
||||
for item in list: print item
|
||||
elif type in (A_MENU, A_INDEX):
|
||||
list = get_directory(f)
|
||||
for item in list: print item
|
||||
else:
|
||||
data = get_binary(f)
|
||||
print 'binary data:', len(data), 'bytes:', `data[:100]`[:40]
|
||||
|
||||
# Run the test when run as script
|
||||
if __name__ == '__main__':
|
||||
test()
|
||||
test()
|
||||
|
|
|
@ -1,105 +1,257 @@
|
|||
# Proposed entity definitions for HTML, taken from
|
||||
# http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_14.html
|
||||
"""HTML character entity references."""
|
||||
|
||||
entitydefs = {
|
||||
'lt': '<',
|
||||
'gt': '>',
|
||||
'amp': '&',
|
||||
'quot': '"',
|
||||
'nbsp': chr(160), # no-break space
|
||||
'iexcl': chr(161), # inverted exclamation mark
|
||||
'cent': chr(162), # cent sign
|
||||
'pound': chr(163), # pound sterling sign
|
||||
'curren': chr(164), # general currency sign
|
||||
'yen': chr(165), # yen sign
|
||||
'brvbar': chr(166), # broken (vertical) bar
|
||||
'sect': chr(167), # section sign
|
||||
'uml': chr(168), # umlaut (dieresis)
|
||||
'copy': chr(169), # copyright sign
|
||||
'ordf': chr(170), # ordinal indicator, feminine
|
||||
'laquo': chr(171), # angle quotation mark, left
|
||||
'not': chr(172), # not sign
|
||||
'shy': chr(173), # soft hyphen
|
||||
'reg': chr(174), # registered sign
|
||||
'macr': chr(175), # macron
|
||||
'deg': chr(176), # degree sign
|
||||
'plusmn': chr(177), # plus-or-minus sign
|
||||
'sup2': chr(178), # superscript two
|
||||
'sup3': chr(179), # superscript three
|
||||
'acute': chr(180), # acute accent
|
||||
'micro': chr(181), # micro sign
|
||||
'para': chr(182), # pilcrow (paragraph sign)
|
||||
'middot': chr(183), # middle dot
|
||||
'cedil': chr(184), # cedilla
|
||||
'sup1': chr(185), # superscript one
|
||||
'ordm': chr(186), # ordinal indicator, masculine
|
||||
'raquo': chr(187), # angle quotation mark, right
|
||||
'frac14': chr(188), # fraction one-quarter
|
||||
'frac12': chr(189), # fraction one-half
|
||||
'frac34': chr(190), # fraction three-quarters
|
||||
'iquest': chr(191), # inverted question mark
|
||||
'Agrave': chr(192), # capital A, grave accent
|
||||
'Aacute': chr(193), # capital A, acute accent
|
||||
'Acirc': chr(194), # capital A, circumflex accent
|
||||
'Atilde': chr(195), # capital A, tilde
|
||||
'Auml': chr(196), # capital A, dieresis or umlaut mark
|
||||
'Aring': chr(197), # capital A, ring
|
||||
'AElig': chr(198), # capital AE diphthong (ligature)
|
||||
'Ccedil': chr(199), # capital C, cedilla
|
||||
'Egrave': chr(200), # capital E, grave accent
|
||||
'Eacute': chr(201), # capital E, acute accent
|
||||
'Ecirc': chr(202), # capital E, circumflex accent
|
||||
'Euml': chr(203), # capital E, dieresis or umlaut mark
|
||||
'Igrave': chr(204), # capital I, grave accent
|
||||
'Iacute': chr(205), # capital I, acute accent
|
||||
'Icirc': chr(206), # capital I, circumflex accent
|
||||
'Iuml': chr(207), # capital I, dieresis or umlaut mark
|
||||
'ETH': chr(208), # capital Eth, Icelandic
|
||||
'Ntilde': chr(209), # capital N, tilde
|
||||
'Ograve': chr(210), # capital O, grave accent
|
||||
'Oacute': chr(211), # capital O, acute accent
|
||||
'Ocirc': chr(212), # capital O, circumflex accent
|
||||
'Otilde': chr(213), # capital O, tilde
|
||||
'Ouml': chr(214), # capital O, dieresis or umlaut mark
|
||||
'times': chr(215), # multiply sign
|
||||
'Oslash': chr(216), # capital O, slash
|
||||
'Ugrave': chr(217), # capital U, grave accent
|
||||
'Uacute': chr(218), # capital U, acute accent
|
||||
'Ucirc': chr(219), # capital U, circumflex accent
|
||||
'Uuml': chr(220), # capital U, dieresis or umlaut mark
|
||||
'Yacute': chr(221), # capital Y, acute accent
|
||||
'THORN': chr(222), # capital THORN, Icelandic
|
||||
'szlig': chr(223), # small sharp s, German (sz ligature)
|
||||
'agrave': chr(224), # small a, grave accent
|
||||
'aacute': chr(225), # small a, acute accent
|
||||
'acirc': chr(226), # small a, circumflex accent
|
||||
'atilde': chr(227), # small a, tilde
|
||||
'auml': chr(228), # small a, dieresis or umlaut mark
|
||||
'aring': chr(229), # small a, ring
|
||||
'aelig': chr(230), # small ae diphthong (ligature)
|
||||
'ccedil': chr(231), # small c, cedilla
|
||||
'egrave': chr(232), # small e, grave accent
|
||||
'eacute': chr(233), # small e, acute accent
|
||||
'ecirc': chr(234), # small e, circumflex accent
|
||||
'euml': chr(235), # small e, dieresis or umlaut mark
|
||||
'igrave': chr(236), # small i, grave accent
|
||||
'iacute': chr(237), # small i, acute accent
|
||||
'icirc': chr(238), # small i, circumflex accent
|
||||
'iuml': chr(239), # small i, dieresis or umlaut mark
|
||||
'eth': chr(240), # small eth, Icelandic
|
||||
'ntilde': chr(241), # small n, tilde
|
||||
'ograve': chr(242), # small o, grave accent
|
||||
'oacute': chr(243), # small o, acute accent
|
||||
'ocirc': chr(244), # small o, circumflex accent
|
||||
'otilde': chr(245), # small o, tilde
|
||||
'ouml': chr(246), # small o, dieresis or umlaut mark
|
||||
'divide': chr(247), # divide sign
|
||||
'oslash': chr(248), # small o, slash
|
||||
'ugrave': chr(249), # small u, grave accent
|
||||
'uacute': chr(250), # small u, acute accent
|
||||
'ucirc': chr(251), # small u, circumflex accent
|
||||
'uuml': chr(252), # small u, dieresis or umlaut mark
|
||||
'yacute': chr(253), # small y, acute accent
|
||||
'thorn': chr(254), # small thorn, Icelandic
|
||||
'yuml': chr(255), # small y, dieresis or umlaut mark
|
||||
'AElig': '\306', # latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1
|
||||
'Aacute': '\301', # latin capital letter A with acute, U+00C1 ISOlat1
|
||||
'Acirc': '\302', # latin capital letter A with circumflex, U+00C2 ISOlat1
|
||||
'Agrave': '\300', # latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1
|
||||
'Alpha': 'Α', # greek capital letter alpha, U+0391
|
||||
'Aring': '\305', # latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1
|
||||
'Atilde': '\303', # latin capital letter A with tilde, U+00C3 ISOlat1
|
||||
'Auml': '\304', # latin capital letter A with diaeresis, U+00C4 ISOlat1
|
||||
'Beta': 'Β', # greek capital letter beta, U+0392
|
||||
'Ccedil': '\307', # latin capital letter C with cedilla, U+00C7 ISOlat1
|
||||
'Chi': 'Χ', # greek capital letter chi, U+03A7
|
||||
'Dagger': '‡', # double dagger, U+2021 ISOpub
|
||||
'Delta': 'Δ', # greek capital letter delta, U+0394 ISOgrk3
|
||||
'ETH': '\320', # latin capital letter ETH, U+00D0 ISOlat1
|
||||
'Eacute': '\311', # latin capital letter E with acute, U+00C9 ISOlat1
|
||||
'Ecirc': '\312', # latin capital letter E with circumflex, U+00CA ISOlat1
|
||||
'Egrave': '\310', # latin capital letter E with grave, U+00C8 ISOlat1
|
||||
'Epsilon': 'Ε', # greek capital letter epsilon, U+0395
|
||||
'Eta': 'Η', # greek capital letter eta, U+0397
|
||||
'Euml': '\313', # latin capital letter E with diaeresis, U+00CB ISOlat1
|
||||
'Gamma': 'Γ', # greek capital letter gamma, U+0393 ISOgrk3
|
||||
'Iacute': '\315', # latin capital letter I with acute, U+00CD ISOlat1
|
||||
'Icirc': '\316', # latin capital letter I with circumflex, U+00CE ISOlat1
|
||||
'Igrave': '\314', # latin capital letter I with grave, U+00CC ISOlat1
|
||||
'Iota': 'Ι', # greek capital letter iota, U+0399
|
||||
'Iuml': '\317', # latin capital letter I with diaeresis, U+00CF ISOlat1
|
||||
'Kappa': 'Κ', # greek capital letter kappa, U+039A
|
||||
'Lambda': 'Λ', # greek capital letter lambda, U+039B ISOgrk3
|
||||
'Mu': 'Μ', # greek capital letter mu, U+039C
|
||||
'Ntilde': '\321', # latin capital letter N with tilde, U+00D1 ISOlat1
|
||||
'Nu': 'Ν', # greek capital letter nu, U+039D
|
||||
'OElig': 'Œ', # latin capital ligature OE, U+0152 ISOlat2
|
||||
'Oacute': '\323', # latin capital letter O with acute, U+00D3 ISOlat1
|
||||
'Ocirc': '\324', # latin capital letter O with circumflex, U+00D4 ISOlat1
|
||||
'Ograve': '\322', # latin capital letter O with grave, U+00D2 ISOlat1
|
||||
'Omega': 'Ω', # greek capital letter omega, U+03A9 ISOgrk3
|
||||
'Omicron': 'Ο', # greek capital letter omicron, U+039F
|
||||
'Oslash': '\330', # latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1
|
||||
'Otilde': '\325', # latin capital letter O with tilde, U+00D5 ISOlat1
|
||||
'Ouml': '\326', # latin capital letter O with diaeresis, U+00D6 ISOlat1
|
||||
'Phi': 'Φ', # greek capital letter phi, U+03A6 ISOgrk3
|
||||
'Pi': 'Π', # greek capital letter pi, U+03A0 ISOgrk3
|
||||
'Prime': '″', # double prime = seconds = inches, U+2033 ISOtech
|
||||
'Psi': 'Ψ', # greek capital letter psi, U+03A8 ISOgrk3
|
||||
'Rho': 'Ρ', # greek capital letter rho, U+03A1
|
||||
'Scaron': 'Š', # latin capital letter S with caron, U+0160 ISOlat2
|
||||
'Sigma': 'Σ', # greek capital letter sigma, U+03A3 ISOgrk3
|
||||
'THORN': '\336', # latin capital letter THORN, U+00DE ISOlat1
|
||||
'Tau': 'Τ', # greek capital letter tau, U+03A4
|
||||
'Theta': 'Θ', # greek capital letter theta, U+0398 ISOgrk3
|
||||
'Uacute': '\332', # latin capital letter U with acute, U+00DA ISOlat1
|
||||
'Ucirc': '\333', # latin capital letter U with circumflex, U+00DB ISOlat1
|
||||
'Ugrave': '\331', # latin capital letter U with grave, U+00D9 ISOlat1
|
||||
'Upsilon': 'Υ', # greek capital letter upsilon, U+03A5 ISOgrk3
|
||||
'Uuml': '\334', # latin capital letter U with diaeresis, U+00DC ISOlat1
|
||||
'Xi': 'Ξ', # greek capital letter xi, U+039E ISOgrk3
|
||||
'Yacute': '\335', # latin capital letter Y with acute, U+00DD ISOlat1
|
||||
'Yuml': 'Ÿ', # latin capital letter Y with diaeresis, U+0178 ISOlat2
|
||||
'Zeta': 'Ζ', # greek capital letter zeta, U+0396
|
||||
'aacute': '\341', # latin small letter a with acute, U+00E1 ISOlat1
|
||||
'acirc': '\342', # latin small letter a with circumflex, U+00E2 ISOlat1
|
||||
'acute': '\264', # acute accent = spacing acute, U+00B4 ISOdia
|
||||
'aelig': '\346', # latin small letter ae = latin small ligature ae, U+00E6 ISOlat1
|
||||
'agrave': '\340', # latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1
|
||||
'alefsym': 'ℵ', # alef symbol = first transfinite cardinal, U+2135 NEW
|
||||
'alpha': 'α', # greek small letter alpha, U+03B1 ISOgrk3
|
||||
'amp': '\46', # ampersand, U+0026 ISOnum
|
||||
'and': '∧', # logical and = wedge, U+2227 ISOtech
|
||||
'ang': '∠', # angle, U+2220 ISOamso
|
||||
'aring': '\345', # latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1
|
||||
'asymp': '≈', # almost equal to = asymptotic to, U+2248 ISOamsr
|
||||
'atilde': '\343', # latin small letter a with tilde, U+00E3 ISOlat1
|
||||
'auml': '\344', # latin small letter a with diaeresis, U+00E4 ISOlat1
|
||||
'bdquo': '„', # double low-9 quotation mark, U+201E NEW
|
||||
'beta': 'β', # greek small letter beta, U+03B2 ISOgrk3
|
||||
'brvbar': '\246', # broken bar = broken vertical bar, U+00A6 ISOnum
|
||||
'bull': '•', # bullet = black small circle, U+2022 ISOpub
|
||||
'cap': '∩', # intersection = cap, U+2229 ISOtech
|
||||
'ccedil': '\347', # latin small letter c with cedilla, U+00E7 ISOlat1
|
||||
'cedil': '\270', # cedilla = spacing cedilla, U+00B8 ISOdia
|
||||
'cent': '\242', # cent sign, U+00A2 ISOnum
|
||||
'chi': 'χ', # greek small letter chi, U+03C7 ISOgrk3
|
||||
'circ': 'ˆ', # modifier letter circumflex accent, U+02C6 ISOpub
|
||||
'clubs': '♣', # black club suit = shamrock, U+2663 ISOpub
|
||||
'cong': '≅', # approximately equal to, U+2245 ISOtech
|
||||
'copy': '\251', # copyright sign, U+00A9 ISOnum
|
||||
'crarr': '↵', # downwards arrow with corner leftwards = carriage return, U+21B5 NEW
|
||||
'cup': '∪', # union = cup, U+222A ISOtech
|
||||
'curren': '\244', # currency sign, U+00A4 ISOnum
|
||||
'dArr': '⇓', # downwards double arrow, U+21D3 ISOamsa
|
||||
'dagger': '†', # dagger, U+2020 ISOpub
|
||||
'darr': '↓', # downwards arrow, U+2193 ISOnum
|
||||
'deg': '\260', # degree sign, U+00B0 ISOnum
|
||||
'delta': 'δ', # greek small letter delta, U+03B4 ISOgrk3
|
||||
'diams': '♦', # black diamond suit, U+2666 ISOpub
|
||||
'divide': '\367', # division sign, U+00F7 ISOnum
|
||||
'eacute': '\351', # latin small letter e with acute, U+00E9 ISOlat1
|
||||
'ecirc': '\352', # latin small letter e with circumflex, U+00EA ISOlat1
|
||||
'egrave': '\350', # latin small letter e with grave, U+00E8 ISOlat1
|
||||
'empty': '∅', # empty set = null set = diameter, U+2205 ISOamso
|
||||
'emsp': ' ', # em space, U+2003 ISOpub
|
||||
'ensp': ' ', # en space, U+2002 ISOpub
|
||||
'epsilon': 'ε', # greek small letter epsilon, U+03B5 ISOgrk3
|
||||
'equiv': '≡', # identical to, U+2261 ISOtech
|
||||
'eta': 'η', # greek small letter eta, U+03B7 ISOgrk3
|
||||
'eth': '\360', # latin small letter eth, U+00F0 ISOlat1
|
||||
'euml': '\353', # latin small letter e with diaeresis, U+00EB ISOlat1
|
||||
'euro': '€', # euro sign, U+20AC NEW
|
||||
'exist': '∃', # there exists, U+2203 ISOtech
|
||||
'fnof': 'ƒ', # latin small f with hook = function = florin, U+0192 ISOtech
|
||||
'forall': '∀', # for all, U+2200 ISOtech
|
||||
'frac12': '\275', # vulgar fraction one half = fraction one half, U+00BD ISOnum
|
||||
'frac14': '\274', # vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum
|
||||
'frac34': '\276', # vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum
|
||||
'frasl': '⁄', # fraction slash, U+2044 NEW
|
||||
'gamma': 'γ', # greek small letter gamma, U+03B3 ISOgrk3
|
||||
'ge': '≥', # greater-than or equal to, U+2265 ISOtech
|
||||
'gt': '\76', # greater-than sign, U+003E ISOnum
|
||||
'hArr': '⇔', # left right double arrow, U+21D4 ISOamsa
|
||||
'harr': '↔', # left right arrow, U+2194 ISOamsa
|
||||
'hearts': '♥', # black heart suit = valentine, U+2665 ISOpub
|
||||
'hellip': '…', # horizontal ellipsis = three dot leader, U+2026 ISOpub
|
||||
'iacute': '\355', # latin small letter i with acute, U+00ED ISOlat1
|
||||
'icirc': '\356', # latin small letter i with circumflex, U+00EE ISOlat1
|
||||
'iexcl': '\241', # inverted exclamation mark, U+00A1 ISOnum
|
||||
'igrave': '\354', # latin small letter i with grave, U+00EC ISOlat1
|
||||
'image': 'ℑ', # blackletter capital I = imaginary part, U+2111 ISOamso
|
||||
'infin': '∞', # infinity, U+221E ISOtech
|
||||
'int': '∫', # integral, U+222B ISOtech
|
||||
'iota': 'ι', # greek small letter iota, U+03B9 ISOgrk3
|
||||
'iquest': '\277', # inverted question mark = turned question mark, U+00BF ISOnum
|
||||
'isin': '∈', # element of, U+2208 ISOtech
|
||||
'iuml': '\357', # latin small letter i with diaeresis, U+00EF ISOlat1
|
||||
'kappa': 'κ', # greek small letter kappa, U+03BA ISOgrk3
|
||||
'lArr': '⇐', # leftwards double arrow, U+21D0 ISOtech
|
||||
'lambda': 'λ', # greek small letter lambda, U+03BB ISOgrk3
|
||||
'lang': '〈', # left-pointing angle bracket = bra, U+2329 ISOtech
|
||||
'laquo': '\253', # left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum
|
||||
'larr': '←', # leftwards arrow, U+2190 ISOnum
|
||||
'lceil': '⌈', # left ceiling = apl upstile, U+2308 ISOamsc
|
||||
'ldquo': '“', # left double quotation mark, U+201C ISOnum
|
||||
'le': '≤', # less-than or equal to, U+2264 ISOtech
|
||||
'lfloor': '⌊', # left floor = apl downstile, U+230A ISOamsc
|
||||
'lowast': '∗', # asterisk operator, U+2217 ISOtech
|
||||
'loz': '◊', # lozenge, U+25CA ISOpub
|
||||
'lrm': '‎', # left-to-right mark, U+200E NEW RFC 2070
|
||||
'lsaquo': '‹', # single left-pointing angle quotation mark, U+2039 ISO proposed
|
||||
'lsquo': '‘', # left single quotation mark, U+2018 ISOnum
|
||||
'lt': '\74', # less-than sign, U+003C ISOnum
|
||||
'macr': '\257', # macron = spacing macron = overline = APL overbar, U+00AF ISOdia
|
||||
'mdash': '—', # em dash, U+2014 ISOpub
|
||||
'micro': '\265', # micro sign, U+00B5 ISOnum
|
||||
'middot': '\267', # middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum
|
||||
'minus': '−', # minus sign, U+2212 ISOtech
|
||||
'mu': 'μ', # greek small letter mu, U+03BC ISOgrk3
|
||||
'nabla': '∇', # nabla = backward difference, U+2207 ISOtech
|
||||
'nbsp': '\240', # no-break space = non-breaking space, U+00A0 ISOnum
|
||||
'ndash': '–', # en dash, U+2013 ISOpub
|
||||
'ne': '≠', # not equal to, U+2260 ISOtech
|
||||
'ni': '∋', # contains as member, U+220B ISOtech
|
||||
'not': '\254', # not sign, U+00AC ISOnum
|
||||
'notin': '∉', # not an element of, U+2209 ISOtech
|
||||
'nsub': '⊄', # not a subset of, U+2284 ISOamsn
|
||||
'ntilde': '\361', # latin small letter n with tilde, U+00F1 ISOlat1
|
||||
'nu': 'ν', # greek small letter nu, U+03BD ISOgrk3
|
||||
'oacute': '\363', # latin small letter o with acute, U+00F3 ISOlat1
|
||||
'ocirc': '\364', # latin small letter o with circumflex, U+00F4 ISOlat1
|
||||
'oelig': 'œ', # latin small ligature oe, U+0153 ISOlat2
|
||||
'ograve': '\362', # latin small letter o with grave, U+00F2 ISOlat1
|
||||
'oline': '‾', # overline = spacing overscore, U+203E NEW
|
||||
'omega': 'ω', # greek small letter omega, U+03C9 ISOgrk3
|
||||
'omicron': 'ο', # greek small letter omicron, U+03BF NEW
|
||||
'oplus': '⊕', # circled plus = direct sum, U+2295 ISOamsb
|
||||
'or': '∨', # logical or = vee, U+2228 ISOtech
|
||||
'ordf': '\252', # feminine ordinal indicator, U+00AA ISOnum
|
||||
'ordm': '\272', # masculine ordinal indicator, U+00BA ISOnum
|
||||
'oslash': '\370', # latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1
|
||||
'otilde': '\365', # latin small letter o with tilde, U+00F5 ISOlat1
|
||||
'otimes': '⊗', # circled times = vector product, U+2297 ISOamsb
|
||||
'ouml': '\366', # latin small letter o with diaeresis, U+00F6 ISOlat1
|
||||
'para': '\266', # pilcrow sign = paragraph sign, U+00B6 ISOnum
|
||||
'part': '∂', # partial differential, U+2202 ISOtech
|
||||
'permil': '‰', # per mille sign, U+2030 ISOtech
|
||||
'perp': '⊥', # up tack = orthogonal to = perpendicular, U+22A5 ISOtech
|
||||
'phi': 'φ', # greek small letter phi, U+03C6 ISOgrk3
|
||||
'pi': 'π', # greek small letter pi, U+03C0 ISOgrk3
|
||||
'piv': 'ϖ', # greek pi symbol, U+03D6 ISOgrk3
|
||||
'plusmn': '\261', # plus-minus sign = plus-or-minus sign, U+00B1 ISOnum
|
||||
'pound': '\243', # pound sign, U+00A3 ISOnum
|
||||
'prime': '′', # prime = minutes = feet, U+2032 ISOtech
|
||||
'prod': '∏', # n-ary product = product sign, U+220F ISOamsb
|
||||
'prop': '∝', # proportional to, U+221D ISOtech
|
||||
'psi': 'ψ', # greek small letter psi, U+03C8 ISOgrk3
|
||||
'quot': '\42', # quotation mark = APL quote, U+0022 ISOnum
|
||||
'rArr': '⇒', # rightwards double arrow, U+21D2 ISOtech
|
||||
'radic': '√', # square root = radical sign, U+221A ISOtech
|
||||
'rang': '〉', # right-pointing angle bracket = ket, U+232A ISOtech
|
||||
'raquo': '\273', # right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum
|
||||
'rarr': '→', # rightwards arrow, U+2192 ISOnum
|
||||
'rceil': '⌉', # right ceiling, U+2309 ISOamsc
|
||||
'rdquo': '”', # right double quotation mark, U+201D ISOnum
|
||||
'real': 'ℜ', # blackletter capital R = real part symbol, U+211C ISOamso
|
||||
'reg': '\256', # registered sign = registered trade mark sign, U+00AE ISOnum
|
||||
'rfloor': '⌋', # right floor, U+230B ISOamsc
|
||||
'rho': 'ρ', # greek small letter rho, U+03C1 ISOgrk3
|
||||
'rlm': '‏', # right-to-left mark, U+200F NEW RFC 2070
|
||||
'rsaquo': '›', # single right-pointing angle quotation mark, U+203A ISO proposed
|
||||
'rsquo': '’', # right single quotation mark, U+2019 ISOnum
|
||||
'sbquo': '‚', # single low-9 quotation mark, U+201A NEW
|
||||
'scaron': 'š', # latin small letter s with caron, U+0161 ISOlat2
|
||||
'sdot': '⋅', # dot operator, U+22C5 ISOamsb
|
||||
'sect': '\247', # section sign, U+00A7 ISOnum
|
||||
'shy': '\255', # soft hyphen = discretionary hyphen, U+00AD ISOnum
|
||||
'sigma': 'σ', # greek small letter sigma, U+03C3 ISOgrk3
|
||||
'sigmaf': 'ς', # greek small letter final sigma, U+03C2 ISOgrk3
|
||||
'sim': '∼', # tilde operator = varies with = similar to, U+223C ISOtech
|
||||
'spades': '♠', # black spade suit, U+2660 ISOpub
|
||||
'sub': '⊂', # subset of, U+2282 ISOtech
|
||||
'sube': '⊆', # subset of or equal to, U+2286 ISOtech
|
||||
'sum': '∑', # n-ary sumation, U+2211 ISOamsb
|
||||
'sup': '⊃', # superset of, U+2283 ISOtech
|
||||
'sup1': '\271', # superscript one = superscript digit one, U+00B9 ISOnum
|
||||
'sup2': '\262', # superscript two = superscript digit two = squared, U+00B2 ISOnum
|
||||
'sup3': '\263', # superscript three = superscript digit three = cubed, U+00B3 ISOnum
|
||||
'supe': '⊇', # superset of or equal to, U+2287 ISOtech
|
||||
'szlig': '\337', # latin small letter sharp s = ess-zed, U+00DF ISOlat1
|
||||
'tau': 'τ', # greek small letter tau, U+03C4 ISOgrk3
|
||||
'there4': '∴', # therefore, U+2234 ISOtech
|
||||
'theta': 'θ', # greek small letter theta, U+03B8 ISOgrk3
|
||||
'thetasym': 'ϑ', # greek small letter theta symbol, U+03D1 NEW
|
||||
'thinsp': ' ', # thin space, U+2009 ISOpub
|
||||
'thorn': '\376', # latin small letter thorn with, U+00FE ISOlat1
|
||||
'tilde': '˜', # small tilde, U+02DC ISOdia
|
||||
'times': '\327', # multiplication sign, U+00D7 ISOnum
|
||||
'trade': '™', # trade mark sign, U+2122 ISOnum
|
||||
'uArr': '⇑', # upwards double arrow, U+21D1 ISOamsa
|
||||
'uacute': '\372', # latin small letter u with acute, U+00FA ISOlat1
|
||||
'uarr': '↑', # upwards arrow, U+2191 ISOnum
|
||||
'ucirc': '\373', # latin small letter u with circumflex, U+00FB ISOlat1
|
||||
'ugrave': '\371', # latin small letter u with grave, U+00F9 ISOlat1
|
||||
'uml': '\250', # diaeresis = spacing diaeresis, U+00A8 ISOdia
|
||||
'upsih': 'ϒ', # greek upsilon with hook symbol, U+03D2 NEW
|
||||
'upsilon': 'υ', # greek small letter upsilon, U+03C5 ISOgrk3
|
||||
'uuml': '\374', # latin small letter u with diaeresis, U+00FC ISOlat1
|
||||
'weierp': '℘', # script capital P = power set = Weierstrass p, U+2118 ISOamso
|
||||
'xi': 'ξ', # greek small letter xi, U+03BE ISOgrk3
|
||||
'yacute': '\375', # latin small letter y with acute, U+00FD ISOlat1
|
||||
'yen': '\245', # yen sign = yuan sign, U+00A5 ISOnum
|
||||
'yuml': '\377', # latin small letter y with diaeresis, U+00FF ISOlat1
|
||||
'zeta': 'ζ', # greek small letter zeta, U+03B6 ISOgrk3
|
||||
'zwj': '‍', # zero width joiner, U+200D NEW RFC 2070
|
||||
'zwnj': '‌', # zero width non-joiner, U+200C NEW RFC 2070
|
||||
|
||||
}
|
||||
|
|
|
@ -1,18 +1,20 @@
|
|||
# Cache lines from files.
|
||||
# This is intended to read lines from modules imported -- hence if a filename
|
||||
# is not found, it will look down the module search path for a file by
|
||||
# that name.
|
||||
"""Cache lines from files.
|
||||
|
||||
This is intended to read lines from modules imported -- hence if a filename
|
||||
is not found, it will look down the module search path for a file by
|
||||
that name.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
from stat import *
|
||||
|
||||
def getline(filename, lineno):
|
||||
lines = getlines(filename)
|
||||
if 1 <= lineno <= len(lines):
|
||||
return lines[lineno-1]
|
||||
else:
|
||||
return ''
|
||||
lines = getlines(filename)
|
||||
if 1 <= lineno <= len(lines):
|
||||
return lines[lineno-1]
|
||||
else:
|
||||
return ''
|
||||
|
||||
|
||||
# The cache
|
||||
|
@ -20,71 +22,71 @@ def getline(filename, lineno):
|
|||
cache = {} # The cache
|
||||
|
||||
|
||||
# Clear the cache entirely
|
||||
|
||||
def clearcache():
|
||||
global cache
|
||||
cache = {}
|
||||
"""Clear the cache entirely."""
|
||||
|
||||
global cache
|
||||
cache = {}
|
||||
|
||||
# Get the lines for a file from the cache.
|
||||
# Update the cache if it doesn't contain an entry for this file already.
|
||||
|
||||
def getlines(filename):
|
||||
if cache.has_key(filename):
|
||||
return cache[filename][2]
|
||||
else:
|
||||
return updatecache(filename)
|
||||
"""Get the lines for a file from the cache.
|
||||
Update the cache if it doesn't contain an entry for this file already."""
|
||||
|
||||
if cache.has_key(filename):
|
||||
return cache[filename][2]
|
||||
else:
|
||||
return updatecache(filename)
|
||||
|
||||
# Discard cache entries that are out of date.
|
||||
# (This is not checked upon each call!)
|
||||
|
||||
def checkcache():
|
||||
for filename in cache.keys():
|
||||
size, mtime, lines, fullname = cache[filename]
|
||||
try:
|
||||
stat = os.stat(fullname)
|
||||
except os.error:
|
||||
del cache[filename]
|
||||
continue
|
||||
if size <> stat[ST_SIZE] or mtime <> stat[ST_MTIME]:
|
||||
del cache[filename]
|
||||
"""Discard cache entries that are out of date.
|
||||
(This is not checked upon each call!)"""
|
||||
|
||||
for filename in cache.keys():
|
||||
size, mtime, lines, fullname = cache[filename]
|
||||
try:
|
||||
stat = os.stat(fullname)
|
||||
except os.error:
|
||||
del cache[filename]
|
||||
continue
|
||||
if size <> stat[ST_SIZE] or mtime <> stat[ST_MTIME]:
|
||||
del cache[filename]
|
||||
|
||||
# Update a cache entry and return its list of lines.
|
||||
# If something's wrong, print a message, discard the cache entry,
|
||||
# and return an empty list.
|
||||
|
||||
def updatecache(filename):
|
||||
if cache.has_key(filename):
|
||||
del cache[filename]
|
||||
if not filename or filename[0] + filename[-1] == '<>':
|
||||
return []
|
||||
fullname = filename
|
||||
try:
|
||||
stat = os.stat(fullname)
|
||||
except os.error, msg:
|
||||
# Try looking through the module search path
|
||||
basename = os.path.split(filename)[1]
|
||||
for dirname in sys.path:
|
||||
fullname = os.path.join(dirname, basename)
|
||||
try:
|
||||
stat = os.stat(fullname)
|
||||
break
|
||||
except os.error:
|
||||
pass
|
||||
else:
|
||||
# No luck
|
||||
## print '*** Cannot stat', filename, ':', msg
|
||||
return []
|
||||
try:
|
||||
fp = open(fullname, 'r')
|
||||
lines = fp.readlines()
|
||||
fp.close()
|
||||
except IOError, msg:
|
||||
## print '*** Cannot open', fullname, ':', msg
|
||||
return []
|
||||
size, mtime = stat[ST_SIZE], stat[ST_MTIME]
|
||||
cache[filename] = size, mtime, lines, fullname
|
||||
return lines
|
||||
"""Update a cache entry and return its list of lines.
|
||||
If something's wrong, print a message, discard the cache entry,
|
||||
and return an empty list."""
|
||||
|
||||
if cache.has_key(filename):
|
||||
del cache[filename]
|
||||
if not filename or filename[0] + filename[-1] == '<>':
|
||||
return []
|
||||
fullname = filename
|
||||
try:
|
||||
stat = os.stat(fullname)
|
||||
except os.error, msg:
|
||||
# Try looking through the module search path
|
||||
basename = os.path.split(filename)[1]
|
||||
for dirname in sys.path:
|
||||
fullname = os.path.join(dirname, basename)
|
||||
try:
|
||||
stat = os.stat(fullname)
|
||||
break
|
||||
except os.error:
|
||||
pass
|
||||
else:
|
||||
# No luck
|
||||
## print '*** Cannot stat', filename, ':', msg
|
||||
return []
|
||||
try:
|
||||
fp = open(fullname, 'r')
|
||||
lines = fp.readlines()
|
||||
fp.close()
|
||||
except IOError, msg:
|
||||
## print '*** Cannot open', fullname, ':', msg
|
||||
return []
|
||||
size, mtime = stat[ST_SIZE], stat[ST_MTIME]
|
||||
cache[filename] = size, mtime, lines, fullname
|
||||
return lines
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
"""Mac specific module for conversion between pathnames and URLs.
|
||||
Do not import directly, use urllib instead."""
|
||||
"""Macintosh-specific module for conversion between pathnames and URLs.
|
||||
|
||||
Do not import directly; use urllib instead."""
|
||||
|
||||
import string
|
||||
import urllib
|
||||
|
@ -13,6 +14,11 @@ def url2pathname(pathname):
|
|||
tp = urllib.splittype(pathname)[0]
|
||||
if tp and tp <> 'file':
|
||||
raise RuntimeError, 'Cannot convert non-local URL to pathname'
|
||||
# Turn starting /// into /, an empty hostname means current host
|
||||
if pathname[:3] == '///':
|
||||
pathname = pathname[2:]
|
||||
elif pathname[:2] == '//':
|
||||
raise RuntimeError, 'Cannot convert non-local URL to pathname'
|
||||
components = string.split(pathname, '/')
|
||||
# Remove . and embedded ..
|
||||
i = 0
|
||||
|
|
|
@ -0,0 +1,246 @@
|
|||
"""Generic MIME parser.
|
||||
|
||||
Classes:
|
||||
|
||||
MimeParser - Generic MIME parser.
|
||||
|
||||
Exceptions:
|
||||
|
||||
MimeError - Exception raised by MimeParser class.
|
||||
|
||||
XXX To do:
|
||||
|
||||
- Content-transfer-encoding issues
|
||||
- Use Content-length header in rawbody()?
|
||||
- Cache parts instead of reparsing each time
|
||||
- The message strings in exceptions could use some work
|
||||
|
||||
"""
|
||||
|
||||
from types import * # Python types, not MIME types :-)
|
||||
import string
|
||||
import regex
|
||||
import SubFile
|
||||
import mimetools
|
||||
|
||||
|
||||
MimeError = "MimeParser.MimeError" # Exception raised by this class
|
||||
|
||||
|
||||
class MimeParser:
|
||||
|
||||
"""Generic MIME parser.
|
||||
|
||||
This requires a seekable file.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, fp):
|
||||
"""Constructor: store the file pointer and parse the headers."""
|
||||
self._fp = fp
|
||||
self._start = fp.tell()
|
||||
self._headers = h = mimetools.Message(fp)
|
||||
self._bodystart = fp.tell()
|
||||
self._multipart = h.getmaintype() == 'multipart'
|
||||
|
||||
def multipart(self):
|
||||
"""Return whether this is a multipart message."""
|
||||
return self._multipart
|
||||
|
||||
def headers(self):
|
||||
"""Return the headers of the MIME message, as a Message object."""
|
||||
return self._headers
|
||||
|
||||
def rawbody(self):
|
||||
"""Return the raw body of the MIME message, as a file-like object.
|
||||
|
||||
This is a fairly low-level interface -- for a multipart
|
||||
message, you'd have to parse the body yourself, and it doesn't
|
||||
translate the Content-transfer-encoding.
|
||||
|
||||
"""
|
||||
# XXX Use Content-length to set end if it exists?
|
||||
return SubFile.SubFile(self._fp, self._bodystart)
|
||||
|
||||
def body(self):
|
||||
"""Return the body of a 1-part MIME message, as a file-like object.
|
||||
|
||||
This should interpret the Content-transfer-encoding, if any
|
||||
(XXX currently it doesn't).
|
||||
|
||||
"""
|
||||
if self._multipart:
|
||||
raise MimeError, "body() only works for 1-part messages"
|
||||
return self.rawbody()
|
||||
|
||||
_re_content_length = regex.compile('content-length:[ \t]*\([0-9]+\)',
|
||||
regex.casefold)
|
||||
|
||||
def rawparts(self):
|
||||
"""Return the raw body parts of a multipart MIME message.
|
||||
|
||||
This returns a list of SubFile() objects corresponding to the
|
||||
parts. Note that the phantom part before the first separator
|
||||
is returned too, as list item 0. If the final part is not
|
||||
followed by a terminator, it is ignored, and this error is not
|
||||
reported. (XXX: the error should be raised).
|
||||
|
||||
"""
|
||||
if not self._multipart:
|
||||
raise MimeError, "[raw]parts() only works for multipart messages"
|
||||
h = self._headers
|
||||
separator = h.getparam('boundary')
|
||||
if not separator:
|
||||
raise MimeError, "multipart boundary not specified"
|
||||
separator = "--" + separator
|
||||
terminator = separator + "--"
|
||||
ns = len(separator)
|
||||
list = []
|
||||
f = self._fp
|
||||
start = f.tell()
|
||||
clength = -1
|
||||
bodystart = -1
|
||||
inheaders = 0
|
||||
while 1:
|
||||
end = f.tell()
|
||||
line = f.readline()
|
||||
if not line:
|
||||
break
|
||||
if line[:2] != "--" or line[:ns] != separator:
|
||||
if inheaders:
|
||||
re = self._re_content_length
|
||||
if re.match(line) > 0:
|
||||
try:
|
||||
clength = string.atoi(re.group(1))
|
||||
except string.atoi_error:
|
||||
pass
|
||||
if not string.strip(line):
|
||||
inheaders = 0
|
||||
bodystart = f.tell()
|
||||
if clength > 0:
|
||||
# Skip binary data
|
||||
f.read(clength)
|
||||
continue
|
||||
line = string.strip(line)
|
||||
if line == terminator or line == separator:
|
||||
if clength >= 0:
|
||||
# The Content-length header determines the subfile size
|
||||
end = bodystart + clength
|
||||
else:
|
||||
# The final newline is not part of the content
|
||||
end = end-1
|
||||
list.append(SubFile.SubFile(f, start, end))
|
||||
start = f.tell()
|
||||
clength = -1
|
||||
inheaders = 1
|
||||
if line == terminator:
|
||||
break
|
||||
return list
|
||||
|
||||
def parts(self):
|
||||
"""Return the parsed body parts of a multipart MIME message.
|
||||
|
||||
This returns a list of MimeParser() instances corresponding to
|
||||
the parts. The phantom part before the first separator is not
|
||||
included.
|
||||
|
||||
"""
|
||||
return map(MimeParser, self.rawparts()[1:])
|
||||
|
||||
def getsubpartbyposition(self, indices):
|
||||
part = self
|
||||
for i in indices:
|
||||
part = part.parts()[i]
|
||||
return part
|
||||
|
||||
def getsubpartbyid(self, id):
|
||||
h = self._headers
|
||||
cid = h.getheader('content-id')
|
||||
if cid and cid == id:
|
||||
return self
|
||||
if self._multipart:
|
||||
for part in self.parts():
|
||||
parser = MimeParser(part)
|
||||
hit = parser.getsubpartbyid(id)
|
||||
if hit:
|
||||
return hit
|
||||
return None
|
||||
|
||||
def index(self):
|
||||
"""Return an index of the MIME file.
|
||||
|
||||
This parses the entire file and returns index information
|
||||
about it, in the form of a tuple
|
||||
|
||||
(ctype, headers, body)
|
||||
|
||||
where 'ctype' is the content type string of the message
|
||||
(e.g. `text/plain' or `multipart/mixed') and 'headers' is a
|
||||
Message instance containing the message headers (which should
|
||||
be treated as read-only).
|
||||
|
||||
The 'body' item depends on the content type:
|
||||
|
||||
- If it is an atomic message (anything except for content type
|
||||
multipart/*), it is the file-like object returned by
|
||||
self.body().
|
||||
|
||||
- For a content type of multipart/*, it is the list of
|
||||
MimeParser() objects returned by self.parts().
|
||||
|
||||
"""
|
||||
if self._multipart:
|
||||
body = self.parts()
|
||||
else:
|
||||
body = self.body()
|
||||
return self._headers.gettype(), self._headers, body
|
||||
|
||||
|
||||
def _show(parser, level=0):
|
||||
"""Helper for _test()."""
|
||||
ctype, headers, body = parser.index()
|
||||
print ctype,
|
||||
if type(body) == ListType:
|
||||
nparts = len(body)
|
||||
print "(%d part%s):" % (nparts, nparts != 1 and "s" or "")
|
||||
n = 0
|
||||
for part in body:
|
||||
n = n+1
|
||||
print "%*d." % (4*level+2, n),
|
||||
_show(part, level+1)
|
||||
else:
|
||||
bodylines = body.readlines()
|
||||
print "(%d header lines, %d body lines)" % (
|
||||
len(headers.headers), len(bodylines))
|
||||
for line in headers.headers + ['\n'] + bodylines:
|
||||
if line[-1:] == '\n': line = line[:-1]
|
||||
print " "*level + line
|
||||
|
||||
def _test(args = None):
|
||||
"""Test program invoked when run as a script.
|
||||
|
||||
When a filename argument is specified, it reads from that file.
|
||||
When no arguments are present, it defaults to 'testkp.txt' if it
|
||||
exists, else it defaults to stdin.
|
||||
|
||||
"""
|
||||
if not args:
|
||||
import sys
|
||||
args = sys.argv[1:]
|
||||
if args:
|
||||
fn = args[0]
|
||||
else:
|
||||
import os
|
||||
fn = 'testkp.txt'
|
||||
if not os.path.exists(fn):
|
||||
fn = '-'
|
||||
if fn == '-':
|
||||
fp = sys.stdin
|
||||
else:
|
||||
fp = open(fn)
|
||||
mp = MimeParser(fp)
|
||||
_show(mp)
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
_test()
|
|
@ -1,4 +1,4 @@
|
|||
# Various tools used by MIME-reading or MIME-writing programs.
|
||||
"""Various tools used by MIME-reading or MIME-writing programs."""
|
||||
|
||||
|
||||
import os
|
||||
|
@ -7,10 +7,9 @@ import string
|
|||
import tempfile
|
||||
|
||||
|
||||
# A derived class of rfc822.Message that knows about MIME headers and
|
||||
# contains some hooks for decoding encoded and multipart messages.
|
||||
|
||||
class Message(rfc822.Message):
|
||||
"""A derived class of rfc822.Message that knows about MIME headers and
|
||||
contains some hooks for decoding encoded and multipart messages."""
|
||||
|
||||
def __init__(self, fp, seekable = 1):
|
||||
rfc822.Message.__init__(self, fp, seekable)
|
||||
|
@ -96,17 +95,17 @@ class Message(rfc822.Message):
|
|||
# -----------------
|
||||
|
||||
|
||||
# Return a random string usable as a multipart boundary.
|
||||
# The method used is so that it is *very* unlikely that the same
|
||||
# string of characters will every occur again in the Universe,
|
||||
# so the caller needn't check the data it is packing for the
|
||||
# occurrence of the boundary.
|
||||
#
|
||||
# The boundary contains dots so you have to quote it in the header.
|
||||
|
||||
_prefix = None
|
||||
|
||||
def choose_boundary():
|
||||
"""Return a random string usable as a multipart boundary.
|
||||
The method used is so that it is *very* unlikely that the same
|
||||
string of characters will every occur again in the Universe,
|
||||
so the caller needn't check the data it is packing for the
|
||||
occurrence of the boundary.
|
||||
|
||||
The boundary contains dots so you have to quote it in the header."""
|
||||
|
||||
global _prefix
|
||||
import time
|
||||
import random
|
||||
|
@ -131,6 +130,7 @@ def choose_boundary():
|
|||
# Subroutines for decoding some common content-transfer-types
|
||||
|
||||
def decode(input, output, encoding):
|
||||
"""Decode common content-transfer-encodings (base64, quopri, uuencode)."""
|
||||
if encoding == 'base64':
|
||||
import base64
|
||||
return base64.decode(input, output)
|
||||
|
@ -140,6 +140,8 @@ def decode(input, output, encoding):
|
|||
if encoding in ('uuencode', 'x-uuencode', 'uue', 'x-uue'):
|
||||
import uu
|
||||
return uu.decode(input, output)
|
||||
if encoding in ('7bit', '8bit'):
|
||||
output.write(input.read())
|
||||
if decodetab.has_key(encoding):
|
||||
pipethrough(input, decodetab[encoding], output)
|
||||
else:
|
||||
|
@ -147,6 +149,7 @@ def decode(input, output, encoding):
|
|||
'unknown Content-Transfer-Encoding: %s' % encoding
|
||||
|
||||
def encode(input, output, encoding):
|
||||
"""Encode common content-transfer-encodings (base64, quopri, uuencode)."""
|
||||
if encoding == 'base64':
|
||||
import base64
|
||||
return base64.encode(input, output)
|
||||
|
@ -156,6 +159,8 @@ def encode(input, output, encoding):
|
|||
if encoding in ('uuencode', 'x-uuencode', 'uue', 'x-uue'):
|
||||
import uu
|
||||
return uu.encode(input, output)
|
||||
if encoding in ('7bit', '8bit'):
|
||||
output.write(input.read())
|
||||
if encodetab.has_key(encoding):
|
||||
pipethrough(input, encodetab[encoding], output)
|
||||
else:
|
||||
|
|
|
@ -30,8 +30,8 @@ import urllib
|
|||
knownfiles = [
|
||||
"/usr/local/etc/httpd/conf/mime.types",
|
||||
"/usr/local/lib/netscape/mime.types",
|
||||
"/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
|
||||
"/usr/local/etc/mime.types", # Apache 1.3
|
||||
"/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
|
||||
"/usr/local/etc/mime.types", # Apache 1.3
|
||||
]
|
||||
|
||||
inited = 0
|
||||
|
@ -56,24 +56,24 @@ def guess_type(url):
|
|||
init()
|
||||
scheme, url = urllib.splittype(url)
|
||||
if scheme == 'data':
|
||||
# syntax of data URLs:
|
||||
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
|
||||
# mediatype := [ type "/" subtype ] *( ";" parameter )
|
||||
# data := *urlchar
|
||||
# parameter := attribute "=" value
|
||||
# type/subtype defaults to "text/plain"
|
||||
comma = string.find(url, ',')
|
||||
if comma < 0:
|
||||
# bad data URL
|
||||
return None, None
|
||||
semi = string.find(url, ';', 0, comma)
|
||||
if semi >= 0:
|
||||
type = url[:semi]
|
||||
else:
|
||||
type = url[:comma]
|
||||
if '=' in type or '/' not in type:
|
||||
type = 'text/plain'
|
||||
return type, None # never compressed, so encoding is None
|
||||
# syntax of data URLs:
|
||||
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
|
||||
# mediatype := [ type "/" subtype ] *( ";" parameter )
|
||||
# data := *urlchar
|
||||
# parameter := attribute "=" value
|
||||
# type/subtype defaults to "text/plain"
|
||||
comma = string.find(url, ',')
|
||||
if comma < 0:
|
||||
# bad data URL
|
||||
return None, None
|
||||
semi = string.find(url, ';', 0, comma)
|
||||
if semi >= 0:
|
||||
type = url[:semi]
|
||||
else:
|
||||
type = url[:comma]
|
||||
if '=' in type or '/' not in type:
|
||||
type = 'text/plain'
|
||||
return type, None # never compressed, so encoding is None
|
||||
base, ext = posixpath.splitext(url)
|
||||
while suffix_map.has_key(ext):
|
||||
base, ext = posixpath.splitext(base + suffix_map[ext])
|
||||
|
@ -175,6 +175,7 @@ types_map = {
|
|||
'.jpe': 'image/jpeg',
|
||||
'.jpeg': 'image/jpeg',
|
||||
'.jpg': 'image/jpeg',
|
||||
'.js': 'application/x-javascript',
|
||||
'.latex': 'application/x-latex',
|
||||
'.man': 'application/x-troff-man',
|
||||
'.me': 'application/x-troff-me',
|
||||
|
|
|
@ -1,28 +1,31 @@
|
|||
# A class that makes each part of a multipart message "feel" like an
|
||||
# ordinary file, as long as you use fp.readline(). Allows recursive
|
||||
# use, for nested multipart messages. Probably best used together
|
||||
# with module mimetools.
|
||||
#
|
||||
# Suggested use:
|
||||
#
|
||||
# real_fp = open(...)
|
||||
# fp = MultiFile(real_fp)
|
||||
#
|
||||
# "read some lines from fp"
|
||||
# fp.push(separator)
|
||||
# while 1:
|
||||
# "read lines from fp until it returns an empty string" (A)
|
||||
# if not fp.next(): break
|
||||
# fp.pop()
|
||||
# "read remaining lines from fp until it returns an empty string"
|
||||
#
|
||||
# The latter sequence may be used recursively at (A).
|
||||
# It is also allowed to use multiple push()...pop() sequences.
|
||||
#
|
||||
# If seekable is given as 0, the class code will not do the bookeeping
|
||||
# it normally attempts in order to make seeks relative to the beginning of the
|
||||
# current file part. This may be useful when using MultiFile with a non-
|
||||
# seekable stream object.
|
||||
"""A readline()-style interface to the parts of a multipart message.
|
||||
|
||||
The MultiFile class makes each part of a multipart message "feel" like
|
||||
an ordinary file, as long as you use fp.readline(). Allows recursive
|
||||
use, for nested multipart messages. Probably best used together
|
||||
with module mimetools.
|
||||
|
||||
Suggested use:
|
||||
|
||||
real_fp = open(...)
|
||||
fp = MultiFile(real_fp)
|
||||
|
||||
"read some lines from fp"
|
||||
fp.push(separator)
|
||||
while 1:
|
||||
"read lines from fp until it returns an empty string" (A)
|
||||
if not fp.next(): break
|
||||
fp.pop()
|
||||
"read remaining lines from fp until it returns an empty string"
|
||||
|
||||
The latter sequence may be used recursively at (A).
|
||||
It is also allowed to use multiple push()...pop() sequences.
|
||||
|
||||
If seekable is given as 0, the class code will not do the bookeeping
|
||||
it normally attempts in order to make seeks relative to the beginning of the
|
||||
current file part. This may be useful when using MultiFile with a non-
|
||||
seekable stream object.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import string
|
||||
|
@ -30,9 +33,9 @@ import string
|
|||
Error = 'multifile.Error'
|
||||
|
||||
class MultiFile:
|
||||
#
|
||||
|
||||
seekable = 0
|
||||
#
|
||||
|
||||
def __init__(self, fp, seekable=1):
|
||||
self.fp = fp
|
||||
self.stack = [] # Grows down
|
||||
|
@ -42,12 +45,12 @@ class MultiFile:
|
|||
self.seekable = 1
|
||||
self.start = self.fp.tell()
|
||||
self.posstack = [] # Grows down
|
||||
#
|
||||
|
||||
def tell(self):
|
||||
if self.level > 0:
|
||||
return self.lastpos
|
||||
return self.fp.tell() - self.start
|
||||
#
|
||||
|
||||
def seek(self, pos, whence=0):
|
||||
here = self.tell()
|
||||
if whence:
|
||||
|
@ -64,7 +67,7 @@ class MultiFile:
|
|||
self.fp.seek(pos + self.start)
|
||||
self.level = 0
|
||||
self.last = 0
|
||||
#
|
||||
|
||||
def readline(self):
|
||||
if self.level > 0:
|
||||
return ''
|
||||
|
@ -105,7 +108,7 @@ class MultiFile:
|
|||
if self.level > 1:
|
||||
raise Error,'Missing endmarker in MultiFile.readline()'
|
||||
return ''
|
||||
#
|
||||
|
||||
def readlines(self):
|
||||
list = []
|
||||
while 1:
|
||||
|
@ -113,10 +116,10 @@ class MultiFile:
|
|||
if not line: break
|
||||
list.append(line)
|
||||
return list
|
||||
#
|
||||
|
||||
def read(self): # Note: no size argument -- read until EOF only!
|
||||
return string.joinfields(self.readlines(), '')
|
||||
#
|
||||
|
||||
def next(self):
|
||||
while self.readline(): pass
|
||||
if self.level > 1 or self.last:
|
||||
|
@ -126,7 +129,7 @@ class MultiFile:
|
|||
if self.seekable:
|
||||
self.start = self.fp.tell()
|
||||
return 1
|
||||
#
|
||||
|
||||
def push(self, sep):
|
||||
if self.level > 0:
|
||||
raise Error, 'bad MultiFile.push() call'
|
||||
|
@ -134,7 +137,7 @@ class MultiFile:
|
|||
if self.seekable:
|
||||
self.posstack.insert(0, self.start)
|
||||
self.start = self.fp.tell()
|
||||
#
|
||||
|
||||
def pop(self):
|
||||
if self.stack == []:
|
||||
raise Error, 'bad MultiFile.pop() call'
|
||||
|
@ -149,12 +152,12 @@ class MultiFile:
|
|||
del self.posstack[0]
|
||||
if self.level > 0:
|
||||
self.lastpos = abslastpos - self.start
|
||||
#
|
||||
|
||||
def is_data(self, line):
|
||||
return line[:2] <> '--'
|
||||
#
|
||||
|
||||
def section_divider(self, str):
|
||||
return "--" + str
|
||||
#
|
||||
|
||||
def end_marker(self, str):
|
||||
return "--" + str + "--"
|
||||
|
|
|
@ -1,6 +1,4 @@
|
|||
#
|
||||
# nturl2path convert a NT pathname to a file URL and
|
||||
# vice versa
|
||||
"""Convert a NT pathname to a file URL and vice versa."""
|
||||
|
||||
def url2pathname(url):
|
||||
""" Convert a URL to a DOS path...
|
||||
|
@ -34,7 +32,6 @@ def url2pathname(url):
|
|||
return path
|
||||
|
||||
def pathname2url(p):
|
||||
|
||||
""" Convert a DOS path name to a file url...
|
||||
C:\foo\bar\spam.foo
|
||||
|
||||
|
|
|
@ -1,64 +1,61 @@
|
|||
#
|
||||
# Start of posixfile.py
|
||||
#
|
||||
"""Extended file operations available in POSIX.
|
||||
|
||||
#
|
||||
# Extended file operations
|
||||
#
|
||||
# f = posixfile.open(filename, [mode, [bufsize]])
|
||||
# will create a new posixfile object
|
||||
#
|
||||
# f = posixfile.fileopen(fileobject)
|
||||
# will create a posixfile object from a builtin file object
|
||||
#
|
||||
# f.file()
|
||||
# will return the original builtin file object
|
||||
#
|
||||
# f.dup()
|
||||
# will return a new file object based on a new filedescriptor
|
||||
#
|
||||
# f.dup2(fd)
|
||||
# will return a new file object based on the given filedescriptor
|
||||
#
|
||||
# f.flags(mode)
|
||||
# will turn on the associated flag (merge)
|
||||
# mode can contain the following characters:
|
||||
#
|
||||
# (character representing a flag)
|
||||
# a append only flag
|
||||
# c close on exec flag
|
||||
# n no delay flag
|
||||
# s synchronization flag
|
||||
# (modifiers)
|
||||
# ! turn flags 'off' instead of default 'on'
|
||||
# = copy flags 'as is' instead of default 'merge'
|
||||
# ? return a string in which the characters represent the flags
|
||||
# that are set
|
||||
#
|
||||
# note: - the '!' and '=' modifiers are mutually exclusive.
|
||||
# - the '?' modifier will return the status of the flags after they
|
||||
# have been changed by other characters in the mode string
|
||||
#
|
||||
# f.lock(mode [, len [, start [, whence]]])
|
||||
# will (un)lock a region
|
||||
# mode can contain the following characters:
|
||||
#
|
||||
# (character representing type of lock)
|
||||
# u unlock
|
||||
# r read lock
|
||||
# w write lock
|
||||
# (modifiers)
|
||||
# | wait until the lock can be granted
|
||||
# ? return the first lock conflicting with the requested lock
|
||||
# or 'None' if there is no conflict. The lock returned is in the
|
||||
# format (mode, len, start, whence, pid) where mode is a
|
||||
# character representing the type of lock ('r' or 'w')
|
||||
#
|
||||
# note: - the '?' modifier prevents a region from being locked; it is
|
||||
# query only
|
||||
#
|
||||
f = posixfile.open(filename, [mode, [bufsize]])
|
||||
will create a new posixfile object
|
||||
|
||||
f = posixfile.fileopen(fileobject)
|
||||
will create a posixfile object from a builtin file object
|
||||
|
||||
f.file()
|
||||
will return the original builtin file object
|
||||
|
||||
f.dup()
|
||||
will return a new file object based on a new filedescriptor
|
||||
|
||||
f.dup2(fd)
|
||||
will return a new file object based on the given filedescriptor
|
||||
|
||||
f.flags(mode)
|
||||
will turn on the associated flag (merge)
|
||||
mode can contain the following characters:
|
||||
|
||||
(character representing a flag)
|
||||
a append only flag
|
||||
c close on exec flag
|
||||
n no delay flag
|
||||
s synchronization flag
|
||||
(modifiers)
|
||||
! turn flags 'off' instead of default 'on'
|
||||
= copy flags 'as is' instead of default 'merge'
|
||||
? return a string in which the characters represent the flags
|
||||
that are set
|
||||
|
||||
note: - the '!' and '=' modifiers are mutually exclusive.
|
||||
- the '?' modifier will return the status of the flags after they
|
||||
have been changed by other characters in the mode string
|
||||
|
||||
f.lock(mode [, len [, start [, whence]]])
|
||||
will (un)lock a region
|
||||
mode can contain the following characters:
|
||||
|
||||
(character representing type of lock)
|
||||
u unlock
|
||||
r read lock
|
||||
w write lock
|
||||
(modifiers)
|
||||
| wait until the lock can be granted
|
||||
? return the first lock conflicting with the requested lock
|
||||
or 'None' if there is no conflict. The lock returned is in the
|
||||
format (mode, len, start, whence, pid) where mode is a
|
||||
character representing the type of lock ('r' or 'w')
|
||||
|
||||
note: - the '?' modifier prevents a region from being locked; it is
|
||||
query only
|
||||
"""
|
||||
|
||||
class _posixfile_:
|
||||
"""File wrapper class that provides extra POSIX file routines."""
|
||||
|
||||
states = ['open', 'closed']
|
||||
|
||||
#
|
||||
|
@ -178,6 +175,7 @@ class _posixfile_:
|
|||
# additions for AIX by Vladimir.Marangozov@imag.fr
|
||||
import sys, os
|
||||
if sys.platform in ('netbsd1',
|
||||
'openbsd2',
|
||||
'freebsd2', 'freebsd3',
|
||||
'bsdos2', 'bsdos3', 'bsdos4'):
|
||||
flock = struct.pack('lxxxxlxxxxlhh', \
|
||||
|
@ -193,6 +191,7 @@ class _posixfile_:
|
|||
|
||||
if '?' in how:
|
||||
if sys.platform in ('netbsd1',
|
||||
'openbsd2',
|
||||
'freebsd2', 'freebsd3',
|
||||
'bsdos2', 'bsdos3', 'bsdos4'):
|
||||
l_start, l_len, l_pid, l_type, l_whence = \
|
||||
|
@ -213,13 +212,12 @@ class _posixfile_:
|
|||
else:
|
||||
return 'w', l_len, l_start, l_whence, l_pid
|
||||
|
||||
#
|
||||
# Public routine to obtain a posixfile object
|
||||
#
|
||||
def open(name, mode='r', bufsize=-1):
|
||||
"""Public routine to open a file as a posixfile object."""
|
||||
return _posixfile_().open(name, mode, bufsize)
|
||||
|
||||
def fileopen(file):
|
||||
"""Public routine to get a posixfile object from a Python file object."""
|
||||
return _posixfile_().fileopen(file)
|
||||
|
||||
#
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
# Module 'posixpath' -- common operations on Posix pathnames.
|
||||
# Some of this can actually be useful on non-Posix systems too, e.g.
|
||||
# for manipulation of the pathname component of URLs.
|
||||
# The "os.path" name is an alias for this module on Posix systems;
|
||||
# on other systems (e.g. Mac, Windows), os.path provides the same
|
||||
# operations in a manner specific to that platform, and is an alias
|
||||
# to another module (e.g. macpath, ntpath).
|
||||
"""Common pathname manipulations, Posix version.
|
||||
Instead of importing this module
|
||||
directly, import os and refer to this module as os.path.
|
||||
"""Common operations on Posix pathnames.
|
||||
|
||||
Instead of importing this module directly, import os and refer to
|
||||
this module as os.path. The "os.path" name is an alias for this
|
||||
module on Posix systems; on other systems (e.g. Mac, Windows),
|
||||
os.path provides the same operations in a manner specific to that
|
||||
platform, and is an alias to another module (e.g. macpath, ntpath).
|
||||
|
||||
Some of this can actually be useful on non-Posix systems too, e.g.
|
||||
for manipulation of the pathname component of URLs.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
@ -143,7 +143,7 @@ def getmtime(filename):
|
|||
def getatime(filename):
|
||||
"""Return the last access time of a file, reported by os.stat()."""
|
||||
st = os.stat(filename)
|
||||
return st[stat.ST_MTIME]
|
||||
return st[stat.ST_ATIME]
|
||||
|
||||
|
||||
# Is a path a symbolic link?
|
||||
|
@ -254,7 +254,7 @@ def ismount(path):
|
|||
# or to impose a different order of visiting.
|
||||
|
||||
def walk(top, func, arg):
|
||||
"""walk(top,func,args) calls func(arg, d, files) for each directory "d"
|
||||
"""walk(top,func,arg) calls func(arg, d, files) for each directory "d"
|
||||
in the tree rooted at "top" (including "top" itself). "files" is a list
|
||||
of all the files and subdirs in directory "d".
|
||||
"""
|
||||
|
@ -263,11 +263,10 @@ of all the files and subdirs in directory "d".
|
|||
except os.error:
|
||||
return
|
||||
func(arg, top, names)
|
||||
exceptions = ('.', '..')
|
||||
for name in names:
|
||||
if name not in exceptions:
|
||||
name = join(top, name)
|
||||
if isdir(name) and not islink(name):
|
||||
st = os.lstat(name)
|
||||
if stat.S_ISDIR(st[stat.ST_MODE]):
|
||||
walk(name, func, arg)
|
||||
|
||||
|
||||
|
@ -369,8 +368,8 @@ def normpath(path):
|
|||
return slashes + string.joinfields(comps, '/')
|
||||
|
||||
|
||||
# Return an absolute path.
|
||||
def abspath(path):
|
||||
"""Return an absolute path."""
|
||||
if not isabs(path):
|
||||
path = join(os.getcwd(), path)
|
||||
return normpath(path)
|
||||
|
|
|
@ -7,7 +7,7 @@ import imp
|
|||
MAGIC = imp.get_magic()
|
||||
|
||||
def wr_long(f, x):
|
||||
"Internal; write a 32-bit int to a file in little-endian order."
|
||||
"""Internal; write a 32-bit int to a file in little-endian order."""
|
||||
f.write(chr( x & 0xff))
|
||||
f.write(chr((x >> 8) & 0xff))
|
||||
f.write(chr((x >> 16) & 0xff))
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# A multi-producer, multi-consumer queue.
|
||||
"""A multi-producer, multi-consumer queue."""
|
||||
|
||||
# define this exception to be compatible with Python 1.5's class
|
||||
# exceptions, but also when -X option is used.
|
||||
|
@ -15,7 +15,7 @@ except TypeError:
|
|||
Full = 'Queue.Full'
|
||||
|
||||
class Queue:
|
||||
def __init__(self, maxsize):
|
||||
def __init__(self, maxsize=0):
|
||||
"""Initialize a queue object with a given maximum size.
|
||||
|
||||
If maxsize is <= 0, the queue size is infinite.
|
||||
|
|
|
@ -1,5 +1,11 @@
|
|||
# These bits are passed to regex.set_syntax() to choose among
|
||||
# alternative regexp syntaxes.
|
||||
"""Constants for selecting regexp syntaxes for the obsolete regex module.
|
||||
|
||||
This module is only for backward compatibility. "regex" has now
|
||||
been replaced by the new regular expression module, "re".
|
||||
|
||||
These bits are passed to regex.set_syntax() to choose among
|
||||
alternative regexp syntaxes.
|
||||
"""
|
||||
|
||||
# 1 means plain parentheses serve as grouping, and backslash
|
||||
# parentheses are needed for literal searching.
|
||||
|
|
|
@ -0,0 +1,946 @@
|
|||
"""RFC-822 message manipulation class.
|
||||
|
||||
XXX This is only a very rough sketch of a full RFC-822 parser;
|
||||
in particular the tokenizing of addresses does not adhere to all the
|
||||
quoting rules.
|
||||
|
||||
Directions for use:
|
||||
|
||||
To create a Message object: first open a file, e.g.:
|
||||
fp = open(file, 'r')
|
||||
You can use any other legal way of getting an open file object, e.g. use
|
||||
sys.stdin or call os.popen().
|
||||
Then pass the open file object to the Message() constructor:
|
||||
m = Message(fp)
|
||||
|
||||
This class can work with any input object that supports a readline
|
||||
method. If the input object has seek and tell capability, the
|
||||
rewindbody method will work; also illegal lines will be pushed back
|
||||
onto the input stream. If the input object lacks seek but has an
|
||||
`unread' method that can push back a line of input, Message will use
|
||||
that to push back illegal lines. Thus this class can be used to parse
|
||||
messages coming from a buffered stream.
|
||||
|
||||
The optional `seekable' argument is provided as a workaround for
|
||||
certain stdio libraries in which tell() discards buffered data before
|
||||
discovering that the lseek() system call doesn't work. For maximum
|
||||
portability, you should set the seekable argument to zero to prevent
|
||||
that initial \code{tell} when passing in an unseekable object such as
|
||||
a a file object created from a socket object. If it is 1 on entry --
|
||||
which it is by default -- the tell() method of the open file object is
|
||||
called once; if this raises an exception, seekable is reset to 0. For
|
||||
other nonzero values of seekable, this test is not made.
|
||||
|
||||
To get the text of a particular header there are several methods:
|
||||
str = m.getheader(name)
|
||||
str = m.getrawheader(name)
|
||||
where name is the name of the header, e.g. 'Subject'.
|
||||
The difference is that getheader() strips the leading and trailing
|
||||
whitespace, while getrawheader() doesn't. Both functions retain
|
||||
embedded whitespace (including newlines) exactly as they are
|
||||
specified in the header, and leave the case of the text unchanged.
|
||||
|
||||
For addresses and address lists there are functions
|
||||
realname, mailaddress = m.getaddr(name) and
|
||||
list = m.getaddrlist(name)
|
||||
where the latter returns a list of (realname, mailaddr) tuples.
|
||||
|
||||
There is also a method
|
||||
time = m.getdate(name)
|
||||
which parses a Date-like field and returns a time-compatible tuple,
|
||||
i.e. a tuple such as returned by time.localtime() or accepted by
|
||||
time.mktime().
|
||||
|
||||
See the class definition for lower level access methods.
|
||||
|
||||
There are also some utility functions here.
|
||||
"""
|
||||
# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
|
||||
|
||||
import string
|
||||
import time
|
||||
|
||||
|
||||
_blanklines = ('\r\n', '\n') # Optimization for islast()
|
||||
|
||||
|
||||
class Message:
|
||||
"""Represents a single RFC-822-compliant message."""
|
||||
|
||||
def __init__(self, fp, seekable = 1):
|
||||
"""Initialize the class instance and read the headers."""
|
||||
if seekable == 1:
|
||||
# Exercise tell() to make sure it works
|
||||
# (and then assume seek() works, too)
|
||||
try:
|
||||
fp.tell()
|
||||
except:
|
||||
seekable = 0
|
||||
else:
|
||||
seekable = 1
|
||||
self.fp = fp
|
||||
self.seekable = seekable
|
||||
self.startofheaders = None
|
||||
self.startofbody = None
|
||||
#
|
||||
if self.seekable:
|
||||
try:
|
||||
self.startofheaders = self.fp.tell()
|
||||
except IOError:
|
||||
self.seekable = 0
|
||||
#
|
||||
self.readheaders()
|
||||
#
|
||||
if self.seekable:
|
||||
try:
|
||||
self.startofbody = self.fp.tell()
|
||||
except IOError:
|
||||
self.seekable = 0
|
||||
|
||||
def rewindbody(self):
|
||||
"""Rewind the file to the start of the body (if seekable)."""
|
||||
if not self.seekable:
|
||||
raise IOError, "unseekable file"
|
||||
self.fp.seek(self.startofbody)
|
||||
|
||||
def readheaders(self):
|
||||
"""Read header lines.
|
||||
|
||||
Read header lines up to the entirely blank line that
|
||||
terminates them. The (normally blank) line that ends the
|
||||
headers is skipped, but not included in the returned list.
|
||||
If a non-header line ends the headers, (which is an error),
|
||||
an attempt is made to backspace over it; it is never
|
||||
included in the returned list.
|
||||
|
||||
The variable self.status is set to the empty string if all
|
||||
went well, otherwise it is an error message.
|
||||
The variable self.headers is a completely uninterpreted list
|
||||
of lines contained in the header (so printing them will
|
||||
reproduce the header exactly as it appears in the file).
|
||||
"""
|
||||
self.dict = {}
|
||||
self.__gamh_cache = {}
|
||||
self.__gh_cache = {}
|
||||
self.unixfrom = ''
|
||||
self.headers = list = []
|
||||
self.status = ''
|
||||
headerseen = ""
|
||||
firstline = 1
|
||||
startofline = unread = tell = None
|
||||
if hasattr(self.fp, 'unread'):
|
||||
unread = self.fp.unread
|
||||
elif self.seekable:
|
||||
tell = self.fp.tell
|
||||
while 1:
|
||||
if tell:
|
||||
startofline = tell()
|
||||
line = self.fp.readline()
|
||||
if not line:
|
||||
self.status = 'EOF in headers'
|
||||
break
|
||||
# Skip unix From name time lines
|
||||
if firstline and line[:5] == 'From ':
|
||||
self.unixfrom = self.unixfrom + line
|
||||
continue
|
||||
firstline = 0
|
||||
if headerseen and line[0] in ' \t':
|
||||
# It's a continuation line.
|
||||
list.append(line)
|
||||
self.__gamh_cache[headerseen].append(line)
|
||||
x = string.lstrip(
|
||||
"%s\n %s" % (self.dict[headerseen], string.strip(line)))
|
||||
self.dict[headerseen] = x
|
||||
self.__gh_cache[headerseen][-1] = x
|
||||
continue
|
||||
elif self.iscomment(line):
|
||||
# It's a comment. Ignore it.
|
||||
continue
|
||||
elif self.islast(line):
|
||||
# Note! No pushback here! The delimiter line gets eaten.
|
||||
break
|
||||
headerseen = self.isheader(line)
|
||||
if headerseen:
|
||||
# It's a legal header line, save it.
|
||||
list.append(line)
|
||||
l = self.__gamh_cache.get(headerseen)
|
||||
if not l:
|
||||
self.__gamh_cache[headerseen] = l = []
|
||||
l.append(line)
|
||||
x = string.strip(line[len(headerseen)+1:])
|
||||
self.dict[headerseen] = x
|
||||
l = self.__gh_cache.get(headerseen)
|
||||
if not l:
|
||||
self.__gh_cache[headerseen] = l = []
|
||||
l.append(x)
|
||||
continue
|
||||
else:
|
||||
# It's not a header line; throw it back and stop here.
|
||||
if not self.dict:
|
||||
self.status = 'No headers'
|
||||
else:
|
||||
self.status = 'Non-header line where header expected'
|
||||
# Try to undo the read.
|
||||
if unread:
|
||||
unread(line)
|
||||
elif tell:
|
||||
self.fp.seek(startofline)
|
||||
else:
|
||||
self.status = self.status + '; bad seek'
|
||||
break
|
||||
|
||||
def isheader(self, line):
|
||||
"""Determine whether a given line is a legal header.
|
||||
|
||||
This method should return the header name, suitably canonicalized.
|
||||
You may override this method in order to use Message parsing
|
||||
on tagged data in RFC822-like formats with special header formats.
|
||||
"""
|
||||
i = string.find(line, ':')
|
||||
if i > 0:
|
||||
return string.lower(line[:i])
|
||||
else:
|
||||
return None
|
||||
|
||||
def islast(self, line):
|
||||
"""Determine whether a line is a legal end of RFC-822 headers.
|
||||
|
||||
You may override this method if your application wants
|
||||
to bend the rules, e.g. to strip trailing whitespace,
|
||||
or to recognise MH template separators ('--------').
|
||||
For convenience (e.g. for code reading from sockets) a
|
||||
line consisting of \r\n also matches.
|
||||
"""
|
||||
return line in _blanklines
|
||||
|
||||
def iscomment(self, line):
|
||||
"""Determine whether a line should be skipped entirely.
|
||||
|
||||
You may override this method in order to use Message parsing
|
||||
on tagged data in RFC822-like formats that support embedded
|
||||
comments or free-text data.
|
||||
"""
|
||||
return None
|
||||
|
||||
def getallmatchingheaders(self, name,
|
||||
# speed hack:
|
||||
lower = string.lower):
|
||||
"""Find all header lines matching a given header name.
|
||||
|
||||
Look through the list of headers and find all lines
|
||||
matching a given header name (and their continuation
|
||||
lines). A list of the lines is returned, without
|
||||
interpretation. If the header does not occur, an
|
||||
empty list is returned. If the header occurs multiple
|
||||
times, all occurrences are returned. Case is not
|
||||
important in the header name.
|
||||
"""
|
||||
r = self.__gamh_cache.get(lower(name))
|
||||
if r:
|
||||
return r[:]
|
||||
return []
|
||||
|
||||
def getfirstmatchingheader(self, name,
|
||||
# speed hack:
|
||||
lower = string.lower):
|
||||
"""Get the first header line matching name.
|
||||
|
||||
This is similar to getallmatchingheaders, but it returns
|
||||
only the first matching header (and its continuation
|
||||
lines).
|
||||
"""
|
||||
l = self.__gamh_cache.get(lower(name))
|
||||
if not l:
|
||||
return []
|
||||
r = []
|
||||
for item in l:
|
||||
if r and item[0] not in " \t":
|
||||
break
|
||||
r.append(item)
|
||||
return r
|
||||
|
||||
def getrawheader(self, name):
|
||||
"""A higher-level interface to getfirstmatchingheader().
|
||||
|
||||
Return a string containing the literal text of the
|
||||
header but with the keyword stripped. All leading,
|
||||
trailing and embedded whitespace is kept in the
|
||||
string, however.
|
||||
Return None if the header does not occur.
|
||||
"""
|
||||
|
||||
list = self.getfirstmatchingheader(name)
|
||||
if not list:
|
||||
return None
|
||||
list[0] = list[0][len(name) + 1:]
|
||||
return string.joinfields(list, '')
|
||||
|
||||
def getheader(self, name, default=None):
|
||||
"""Get the header value for a name.
|
||||
|
||||
This is the normal interface: it returns a stripped
|
||||
version of the header value for a given header name,
|
||||
or None if it doesn't exist. This uses the dictionary
|
||||
version which finds the *last* such header.
|
||||
"""
|
||||
try:
|
||||
return self.dict[string.lower(name)]
|
||||
except KeyError:
|
||||
return default
|
||||
get = getheader
|
||||
|
||||
def getheaders(self, name,
|
||||
# speed hack:
|
||||
lower = string.lower):
|
||||
"""Get all values for a header.
|
||||
|
||||
This returns a list of values for headers given more than once;
|
||||
each value in the result list is stripped in the same way as the
|
||||
result of getheader(). If the header is not given, return an
|
||||
empty list.
|
||||
"""
|
||||
r = self.__gh_cache.get(lower(name))
|
||||
if r:
|
||||
return r[:]
|
||||
return []
|
||||
|
||||
def getaddr(self, name):
|
||||
"""Get a single address from a header, as a tuple.
|
||||
|
||||
An example return value:
|
||||
('Guido van Rossum', 'guido@cwi.nl')
|
||||
"""
|
||||
# New, by Ben Escoto
|
||||
alist = self.getaddrlist(name)
|
||||
if alist:
|
||||
return alist[0]
|
||||
else:
|
||||
return (None, None)
|
||||
|
||||
def getaddrlist(self, name):
|
||||
"""Get a list of addresses from a header.
|
||||
|
||||
Retrieves a list of addresses from a header, where each address is a
|
||||
tuple as returned by getaddr(). Scans all named headers, so it works
|
||||
properly with multiple To: or Cc: headers for example.
|
||||
|
||||
"""
|
||||
raw = []
|
||||
for h in self.getallmatchingheaders(name):
|
||||
if h[0] in ' \t':
|
||||
raw.append(h)
|
||||
else:
|
||||
if raw:
|
||||
raw.append(', ')
|
||||
i = string.find(h, ':')
|
||||
if i > 0:
|
||||
addr = h[i+1:]
|
||||
raw.append(addr)
|
||||
alladdrs = string.join(raw, '')
|
||||
a = AddrlistClass(alladdrs)
|
||||
return a.getaddrlist()
|
||||
|
||||
def getdate(self, name):
|
||||
"""Retrieve a date field from a header.
|
||||
|
||||
Retrieves a date field from the named header, returning
|
||||
a tuple compatible with time.mktime().
|
||||
"""
|
||||
try:
|
||||
data = self[name]
|
||||
except KeyError:
|
||||
return None
|
||||
return parsedate(data)
|
||||
|
||||
def getdate_tz(self, name):
|
||||
"""Retrieve a date field from a header as a 10-tuple.
|
||||
|
||||
The first 9 elements make up a tuple compatible with
|
||||
time.mktime(), and the 10th is the offset of the poster's
|
||||
time zone from GMT/UTC.
|
||||
"""
|
||||
try:
|
||||
data = self[name]
|
||||
except KeyError:
|
||||
return None
|
||||
return parsedate_tz(data)
|
||||
|
||||
|
||||
# Access as a dictionary (only finds *last* header of each type):
|
||||
|
||||
def __len__(self):
|
||||
"""Get the number of headers in a message."""
|
||||
return len(self.dict)
|
||||
|
||||
def __getitem__(self, name):
|
||||
"""Get a specific header, as from a dictionary."""
|
||||
return self.dict[string.lower(name)]
|
||||
|
||||
def __setitem__(self, name, value):
|
||||
"""Set the value of a header.
|
||||
|
||||
Note: This is not a perfect inversion of __getitem__, because
|
||||
any changed headers get stuck at the end of the raw-headers list
|
||||
rather than where the altered header was.
|
||||
"""
|
||||
del self[name] # Won't fail if it doesn't exist
|
||||
self.dict[string.lower(name)] = value
|
||||
text = name + ": " + value
|
||||
lines = string.split(text, "\n")
|
||||
for line in lines:
|
||||
self.headers.append(line + "\n")
|
||||
|
||||
def __delitem__(self, name):
|
||||
"""Delete all occurrences of a specific header, if it is present."""
|
||||
name = string.lower(name)
|
||||
if not self.dict.has_key(name):
|
||||
return
|
||||
del self.dict[name]
|
||||
name = name + ':'
|
||||
n = len(name)
|
||||
list = []
|
||||
hit = 0
|
||||
for i in range(len(self.headers)):
|
||||
line = self.headers[i]
|
||||
if string.lower(line[:n]) == name:
|
||||
hit = 1
|
||||
elif line[:1] not in string.whitespace:
|
||||
hit = 0
|
||||
if hit:
|
||||
list.append(i)
|
||||
list.reverse()
|
||||
for i in list:
|
||||
del self.headers[i]
|
||||
|
||||
def has_key(self, name):
|
||||
"""Determine whether a message contains the named header."""
|
||||
return self.dict.has_key(string.lower(name))
|
||||
|
||||
def keys(self):
|
||||
"""Get all of a message's header field names."""
|
||||
return self.dict.keys()
|
||||
|
||||
def values(self):
|
||||
"""Get all of a message's header field values."""
|
||||
return self.dict.values()
|
||||
|
||||
def items(self):
|
||||
"""Get all of a message's headers.
|
||||
|
||||
Returns a list of name, value tuples.
|
||||
"""
|
||||
return self.dict.items()
|
||||
|
||||
def __str__(self):
|
||||
str = ''
|
||||
for hdr in self.headers:
|
||||
str = str + hdr
|
||||
return str
|
||||
|
||||
|
||||
# Utility functions
|
||||
# -----------------
|
||||
|
||||
# XXX Should fix unquote() and quote() to be really conformant.
|
||||
# XXX The inverses of the parse functions may also be useful.
|
||||
|
||||
|
||||
def unquote(str):
|
||||
"""Remove quotes from a string."""
|
||||
if len(str) > 1:
|
||||
if str[0] == '"' and str[-1:] == '"':
|
||||
return str[1:-1]
|
||||
if str[0] == '<' and str[-1:] == '>':
|
||||
return str[1:-1]
|
||||
return str
|
||||
|
||||
|
||||
def quote(str):
|
||||
"""Add quotes around a string."""
|
||||
return '"%s"' % string.join(
|
||||
string.split(
|
||||
string.join(
|
||||
string.split(str, '\\'),
|
||||
'\\\\'),
|
||||
'"'),
|
||||
'\\"')
|
||||
|
||||
|
||||
def parseaddr(address):
|
||||
"""Parse an address into a (realname, mailaddr) tuple."""
|
||||
a = AddrlistClass(address)
|
||||
list = a.getaddrlist()
|
||||
if not list:
|
||||
return (None, None)
|
||||
else:
|
||||
return list[0]
|
||||
|
||||
|
||||
class AddrlistClass:
|
||||
"""Address parser class by Ben Escoto.
|
||||
|
||||
To understand what this class does, it helps to have a copy of
|
||||
RFC-822 in front of you.
|
||||
|
||||
Note: this class interface is deprecated and may be removed in the future.
|
||||
Use rfc822.AddressList instead.
|
||||
"""
|
||||
|
||||
def __init__(self, field):
|
||||
"""Initialize a new instance.
|
||||
|
||||
`field' is an unparsed address header field, containing
|
||||
one or more addresses.
|
||||
"""
|
||||
self.specials = '()<>@,:;.\"[]'
|
||||
self.pos = 0
|
||||
self.LWS = ' \t'
|
||||
self.CR = '\r\n'
|
||||
self.atomends = self.specials + self.LWS + self.CR
|
||||
self.field = field
|
||||
self.commentlist = []
|
||||
|
||||
def gotonext(self):
|
||||
"""Parse up to the start of the next address."""
|
||||
while self.pos < len(self.field):
|
||||
if self.field[self.pos] in self.LWS + '\n\r':
|
||||
self.pos = self.pos + 1
|
||||
elif self.field[self.pos] == '(':
|
||||
self.commentlist.append(self.getcomment())
|
||||
else: break
|
||||
|
||||
def getaddrlist(self):
|
||||
"""Parse all addresses.
|
||||
|
||||
Returns a list containing all of the addresses.
|
||||
"""
|
||||
ad = self.getaddress()
|
||||
if ad:
|
||||
return ad + self.getaddrlist()
|
||||
else: return []
|
||||
|
||||
def getaddress(self):
|
||||
"""Parse the next address."""
|
||||
self.commentlist = []
|
||||
self.gotonext()
|
||||
|
||||
oldpos = self.pos
|
||||
oldcl = self.commentlist
|
||||
plist = self.getphraselist()
|
||||
|
||||
self.gotonext()
|
||||
returnlist = []
|
||||
|
||||
if self.pos >= len(self.field):
|
||||
# Bad email address technically, no domain.
|
||||
if plist:
|
||||
returnlist = [(string.join(self.commentlist), plist[0])]
|
||||
|
||||
elif self.field[self.pos] in '.@':
|
||||
# email address is just an addrspec
|
||||
# this isn't very efficient since we start over
|
||||
self.pos = oldpos
|
||||
self.commentlist = oldcl
|
||||
addrspec = self.getaddrspec()
|
||||
returnlist = [(string.join(self.commentlist), addrspec)]
|
||||
|
||||
elif self.field[self.pos] == ':':
|
||||
# address is a group
|
||||
returnlist = []
|
||||
|
||||
fieldlen = len(self.field)
|
||||
self.pos = self.pos + 1
|
||||
while self.pos < len(self.field):
|
||||
self.gotonext()
|
||||
if self.pos < fieldlen and self.field[self.pos] == ';':
|
||||
self.pos = self.pos + 1
|
||||
break
|
||||
returnlist = returnlist + self.getaddress()
|
||||
|
||||
elif self.field[self.pos] == '<':
|
||||
# Address is a phrase then a route addr
|
||||
routeaddr = self.getrouteaddr()
|
||||
|
||||
if self.commentlist:
|
||||
returnlist = [(string.join(plist) + ' (' + \
|
||||
string.join(self.commentlist) + ')', routeaddr)]
|
||||
else: returnlist = [(string.join(plist), routeaddr)]
|
||||
|
||||
else:
|
||||
if plist:
|
||||
returnlist = [(string.join(self.commentlist), plist[0])]
|
||||
elif self.field[self.pos] in self.specials:
|
||||
self.pos = self.pos + 1
|
||||
|
||||
self.gotonext()
|
||||
if self.pos < len(self.field) and self.field[self.pos] == ',':
|
||||
self.pos = self.pos + 1
|
||||
return returnlist
|
||||
|
||||
def getrouteaddr(self):
|
||||
"""Parse a route address (Return-path value).
|
||||
|
||||
This method just skips all the route stuff and returns the addrspec.
|
||||
"""
|
||||
if self.field[self.pos] != '<':
|
||||
return
|
||||
|
||||
expectroute = 0
|
||||
self.pos = self.pos + 1
|
||||
self.gotonext()
|
||||
adlist = None
|
||||
while self.pos < len(self.field):
|
||||
if expectroute:
|
||||
self.getdomain()
|
||||
expectroute = 0
|
||||
elif self.field[self.pos] == '>':
|
||||
self.pos = self.pos + 1
|
||||
break
|
||||
elif self.field[self.pos] == '@':
|
||||
self.pos = self.pos + 1
|
||||
expectroute = 1
|
||||
elif self.field[self.pos] == ':':
|
||||
self.pos = self.pos + 1
|
||||
expectaddrspec = 1
|
||||
else:
|
||||
adlist = self.getaddrspec()
|
||||
self.pos = self.pos + 1
|
||||
break
|
||||
self.gotonext()
|
||||
|
||||
return adlist
|
||||
|
||||
def getaddrspec(self):
|
||||
"""Parse an RFC-822 addr-spec."""
|
||||
aslist = []
|
||||
|
||||
self.gotonext()
|
||||
while self.pos < len(self.field):
|
||||
if self.field[self.pos] == '.':
|
||||
aslist.append('.')
|
||||
self.pos = self.pos + 1
|
||||
elif self.field[self.pos] == '"':
|
||||
aslist.append('"%s"' % self.getquote())
|
||||
elif self.field[self.pos] in self.atomends:
|
||||
break
|
||||
else: aslist.append(self.getatom())
|
||||
self.gotonext()
|
||||
|
||||
if self.pos >= len(self.field) or self.field[self.pos] != '@':
|
||||
return string.join(aslist, '')
|
||||
|
||||
aslist.append('@')
|
||||
self.pos = self.pos + 1
|
||||
self.gotonext()
|
||||
return string.join(aslist, '') + self.getdomain()
|
||||
|
||||
def getdomain(self):
|
||||
"""Get the complete domain name from an address."""
|
||||
sdlist = []
|
||||
while self.pos < len(self.field):
|
||||
if self.field[self.pos] in self.LWS:
|
||||
self.pos = self.pos + 1
|
||||
elif self.field[self.pos] == '(':
|
||||
self.commentlist.append(self.getcomment())
|
||||
elif self.field[self.pos] == '[':
|
||||
sdlist.append(self.getdomainliteral())
|
||||
elif self.field[self.pos] == '.':
|
||||
self.pos = self.pos + 1
|
||||
sdlist.append('.')
|
||||
elif self.field[self.pos] in self.atomends:
|
||||
break
|
||||
else: sdlist.append(self.getatom())
|
||||
return string.join(sdlist, '')
|
||||
|
||||
def getdelimited(self, beginchar, endchars, allowcomments = 1):
|
||||
"""Parse a header fragment delimited by special characters.
|
||||
|
||||
`beginchar' is the start character for the fragment.
|
||||
If self is not looking at an instance of `beginchar' then
|
||||
getdelimited returns the empty string.
|
||||
|
||||
`endchars' is a sequence of allowable end-delimiting characters.
|
||||
Parsing stops when one of these is encountered.
|
||||
|
||||
If `allowcomments' is non-zero, embedded RFC-822 comments
|
||||
are allowed within the parsed fragment.
|
||||
"""
|
||||
if self.field[self.pos] != beginchar:
|
||||
return ''
|
||||
|
||||
slist = ['']
|
||||
quote = 0
|
||||
self.pos = self.pos + 1
|
||||
while self.pos < len(self.field):
|
||||
if quote == 1:
|
||||
slist.append(self.field[self.pos])
|
||||
quote = 0
|
||||
elif self.field[self.pos] in endchars:
|
||||
self.pos = self.pos + 1
|
||||
break
|
||||
elif allowcomments and self.field[self.pos] == '(':
|
||||
slist.append(self.getcomment())
|
||||
elif self.field[self.pos] == '\\':
|
||||
quote = 1
|
||||
else:
|
||||
slist.append(self.field[self.pos])
|
||||
self.pos = self.pos + 1
|
||||
|
||||
return string.join(slist, '')
|
||||
|
||||
def getquote(self):
|
||||
"""Get a quote-delimited fragment from self's field."""
|
||||
return self.getdelimited('"', '"\r', 0)
|
||||
|
||||
def getcomment(self):
|
||||
"""Get a parenthesis-delimited fragment from self's field."""
|
||||
return self.getdelimited('(', ')\r', 1)
|
||||
|
||||
def getdomainliteral(self):
|
||||
"""Parse an RFC-822 domain-literal."""
|
||||
return self.getdelimited('[', ']\r', 0)
|
||||
|
||||
def getatom(self):
|
||||
"""Parse an RFC-822 atom."""
|
||||
atomlist = ['']
|
||||
|
||||
while self.pos < len(self.field):
|
||||
if self.field[self.pos] in self.atomends:
|
||||
break
|
||||
else: atomlist.append(self.field[self.pos])
|
||||
self.pos = self.pos + 1
|
||||
|
||||
return string.join(atomlist, '')
|
||||
|
||||
def getphraselist(self):
|
||||
"""Parse a sequence of RFC-822 phrases.
|
||||
|
||||
A phrase is a sequence of words, which are in turn either
|
||||
RFC-822 atoms or quoted-strings. Phrases are canonicalized
|
||||
by squeezing all runs of continuous whitespace into one space.
|
||||
"""
|
||||
plist = []
|
||||
|
||||
while self.pos < len(self.field):
|
||||
if self.field[self.pos] in self.LWS:
|
||||
self.pos = self.pos + 1
|
||||
elif self.field[self.pos] == '"':
|
||||
plist.append(self.getquote())
|
||||
elif self.field[self.pos] == '(':
|
||||
self.commentlist.append(self.getcomment())
|
||||
elif self.field[self.pos] in self.atomends:
|
||||
break
|
||||
else: plist.append(self.getatom())
|
||||
|
||||
return plist
|
||||
|
||||
class AddressList(AddrlistClass):
|
||||
"""An AddressList encapsulates a list of parsed RFC822 addresses."""
|
||||
def __init__(self, field):
|
||||
AddrlistClass.__init__(self, field)
|
||||
if field:
|
||||
self.addresslist = self.getaddrlist()
|
||||
else:
|
||||
self.addresslist = []
|
||||
|
||||
def __len__(self):
|
||||
return len(self.addresslist)
|
||||
|
||||
def __str__(self):
|
||||
return string.joinfields(map(dump_address_pair, self.addresslist),", ")
|
||||
|
||||
def __add__(self, other):
|
||||
# Set union
|
||||
newaddr = AddressList(None)
|
||||
newaddr.addresslist = self.addresslist[:]
|
||||
for x in other.addresslist:
|
||||
if not x in self.addresslist:
|
||||
newaddr.addresslist.append(x)
|
||||
return newaddr
|
||||
|
||||
def __sub__(self, other):
|
||||
# Set difference
|
||||
newaddr = AddressList(None)
|
||||
for x in self.addresslist:
|
||||
if not x in other.addresslist:
|
||||
newaddr.addresslist.append(x)
|
||||
return newaddr
|
||||
|
||||
def __getitem__(self, index):
|
||||
# Make indexing, slices, and 'in' work
|
||||
return self.addresslist[index]
|
||||
|
||||
def dump_address_pair(pair):
|
||||
"""Dump a (name, address) pair in a canonicalized form."""
|
||||
if pair[0]:
|
||||
return '"' + pair[0] + '" <' + pair[1] + '>'
|
||||
else:
|
||||
return pair[1]
|
||||
|
||||
# Parse a date field
|
||||
|
||||
_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
|
||||
'aug', 'sep', 'oct', 'nov', 'dec',
|
||||
'january', 'february', 'march', 'april', 'may', 'june', 'july',
|
||||
'august', 'september', 'october', 'november', 'december']
|
||||
_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
|
||||
|
||||
# The timezone table does not include the military time zones defined
|
||||
# in RFC822, other than Z. According to RFC1123, the description in
|
||||
# RFC822 gets the signs wrong, so we can't rely on any such time
|
||||
# zones. RFC1123 recommends that numeric timezone indicators be used
|
||||
# instead of timezone names.
|
||||
|
||||
_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
|
||||
'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
|
||||
'EST': -500, 'EDT': -400, # Eastern
|
||||
'CST': -600, 'CDT': -500, # Central
|
||||
'MST': -700, 'MDT': -600, # Mountain
|
||||
'PST': -800, 'PDT': -700 # Pacific
|
||||
}
|
||||
|
||||
|
||||
def parsedate_tz(data):
|
||||
"""Convert a date string to a time tuple.
|
||||
|
||||
Accounts for military timezones.
|
||||
"""
|
||||
data = string.split(data)
|
||||
if data[0][-1] in (',', '.') or string.lower(data[0]) in _daynames:
|
||||
# There's a dayname here. Skip it
|
||||
del data[0]
|
||||
if len(data) == 3: # RFC 850 date, deprecated
|
||||
stuff = string.split(data[0], '-')
|
||||
if len(stuff) == 3:
|
||||
data = stuff + data[1:]
|
||||
if len(data) == 4:
|
||||
s = data[3]
|
||||
i = string.find(s, '+')
|
||||
if i > 0:
|
||||
data[3:] = [s[:i], s[i+1:]]
|
||||
else:
|
||||
data.append('') # Dummy tz
|
||||
if len(data) < 5:
|
||||
return None
|
||||
data = data[:5]
|
||||
[dd, mm, yy, tm, tz] = data
|
||||
mm = string.lower(mm)
|
||||
if not mm in _monthnames:
|
||||
dd, mm = mm, string.lower(dd)
|
||||
if not mm in _monthnames:
|
||||
return None
|
||||
mm = _monthnames.index(mm)+1
|
||||
if mm > 12: mm = mm - 12
|
||||
if dd[-1] == ',':
|
||||
dd = dd[:-1]
|
||||
i = string.find(yy, ':')
|
||||
if i > 0:
|
||||
yy, tm = tm, yy
|
||||
if yy[-1] == ',':
|
||||
yy = yy[:-1]
|
||||
if yy[0] not in string.digits:
|
||||
yy, tz = tz, yy
|
||||
if tm[-1] == ',':
|
||||
tm = tm[:-1]
|
||||
tm = string.splitfields(tm, ':')
|
||||
if len(tm) == 2:
|
||||
[thh, tmm] = tm
|
||||
tss = '0'
|
||||
elif len(tm) == 3:
|
||||
[thh, tmm, tss] = tm
|
||||
else:
|
||||
return None
|
||||
try:
|
||||
yy = string.atoi(yy)
|
||||
dd = string.atoi(dd)
|
||||
thh = string.atoi(thh)
|
||||
tmm = string.atoi(tmm)
|
||||
tss = string.atoi(tss)
|
||||
except string.atoi_error:
|
||||
return None
|
||||
tzoffset=None
|
||||
tz=string.upper(tz)
|
||||
if _timezones.has_key(tz):
|
||||
tzoffset=_timezones[tz]
|
||||
else:
|
||||
try:
|
||||
tzoffset=string.atoi(tz)
|
||||
except string.atoi_error:
|
||||
pass
|
||||
# Convert a timezone offset into seconds ; -0500 -> -18000
|
||||
if tzoffset:
|
||||
if tzoffset < 0:
|
||||
tzsign = -1
|
||||
tzoffset = -tzoffset
|
||||
else:
|
||||
tzsign = 1
|
||||
tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60)
|
||||
tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
|
||||
return tuple
|
||||
|
||||
|
||||
def parsedate(data):
|
||||
"""Convert a time string to a time tuple."""
|
||||
t=parsedate_tz(data)
|
||||
if type(t)==type( () ):
|
||||
return t[:9]
|
||||
else: return t
|
||||
|
||||
|
||||
def mktime_tz(data):
|
||||
"""Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
|
||||
if data[9] is None:
|
||||
# No zone info, so localtime is better assumption than GMT
|
||||
return time.mktime(data[:8] + (-1,))
|
||||
else:
|
||||
t = time.mktime(data[:8] + (0,))
|
||||
return t - data[9] - time.timezone
|
||||
|
||||
def formatdate(timeval=None):
|
||||
"""Returns time format preferred for Internet standards.
|
||||
|
||||
Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
|
||||
"""
|
||||
if timeval is None:
|
||||
timeval = time.time()
|
||||
return "%s" % time.strftime('%a, %d %b %Y %H:%M:%S GMT',
|
||||
time.gmtime(timeval))
|
||||
|
||||
|
||||
# When used as script, run a small test program.
|
||||
# The first command line argument must be a filename containing one
|
||||
# message in RFC-822 format.
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys, os
|
||||
file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
|
||||
if sys.argv[1:]: file = sys.argv[1]
|
||||
f = open(file, 'r')
|
||||
m = Message(f)
|
||||
print 'From:', m.getaddr('from')
|
||||
print 'To:', m.getaddrlist('to')
|
||||
print 'Subject:', m.getheader('subject')
|
||||
print 'Date:', m.getheader('date')
|
||||
date = m.getdate_tz('date')
|
||||
if date:
|
||||
print 'ParsedDate:', time.asctime(date[:-1]),
|
||||
hhmmss = date[-1]
|
||||
hhmm, ss = divmod(hhmmss, 60)
|
||||
hh, mm = divmod(hhmm, 60)
|
||||
print "%+03d%02d" % (hh, mm),
|
||||
if ss: print ".%02d" % ss,
|
||||
print
|
||||
else:
|
||||
print 'ParsedDate:', None
|
||||
m.rewindbody()
|
||||
n = 0
|
||||
while f.readline():
|
||||
n = n + 1
|
||||
print 'Lines:', n
|
||||
print '-'*70
|
||||
print 'len =', len(m)
|
||||
if m.has_key('Date'): print 'Date =', m['Date']
|
||||
if m.has_key('X-Nonsense'): pass
|
||||
print 'keys =', m.keys()
|
||||
print 'values =', m.values()
|
||||
print 'items =', m.items()
|
|
@ -1,7 +1,6 @@
|
|||
"""Word completion for GNU readline 2.0.
|
||||
|
||||
This requires the latest extension to the readline module (the
|
||||
set_completer() function). When completing a simple identifier, it
|
||||
completes keywords, built-ins and globals in __main__; when completing
|
||||
NAME.NAME..., it evaluates (!) the expression up to the last dot and
|
||||
completes its attributes.
|
||||
|
@ -87,7 +86,8 @@ class Completer:
|
|||
Assuming the text is of the form NAME.NAME....[NAME], and is
|
||||
evaluabable in the globals of __main__, it will be evaluated
|
||||
and its attributes (as revealed by dir()) are used as possible
|
||||
completions.
|
||||
completions. (For class instances, class members are are also
|
||||
considered.)
|
||||
|
||||
WARNING: this can still invoke arbitrary C code, if an object
|
||||
with a __getattr__ hook is evaluated.
|
||||
|
@ -98,7 +98,11 @@ class Completer:
|
|||
if not m:
|
||||
return
|
||||
expr, attr = m.group(1, 3)
|
||||
words = dir(eval(expr, __main__.__dict__))
|
||||
object = eval(expr, __main__.__dict__)
|
||||
words = dir(object)
|
||||
if hasattr(object,'__class__'):
|
||||
words.append('__class__')
|
||||
words = words + get_class_members(object.__class__)
|
||||
matches = []
|
||||
n = len(attr)
|
||||
for word in words:
|
||||
|
@ -106,4 +110,11 @@ class Completer:
|
|||
matches.append("%s.%s" % (expr, word))
|
||||
return matches
|
||||
|
||||
def get_class_members(klass):
|
||||
ret = dir(klass)
|
||||
if hasattr(klass,'__bases__'):
|
||||
for base in klass.__bases__:
|
||||
ret = ret + get_class_members(base)
|
||||
return ret
|
||||
|
||||
readline.set_completer(Completer().complete)
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
"""
|
||||
|
||||
Robots.txt file parser class. Accepts a list of lines or robots.txt URL as
|
||||
input, builds a set of rules from that list, then answers questions about
|
||||
fetchability of other URLs.
|
||||
|
||||
"""
|
||||
|
||||
class RobotFileParser:
|
||||
|
||||
def __init__(self):
|
||||
self.rules = {}
|
||||
self.debug = 0
|
||||
self.url = ''
|
||||
self.last_checked = 0
|
||||
|
||||
def mtime(self):
|
||||
return self.last_checked
|
||||
|
||||
def modified(self):
|
||||
import time
|
||||
self.last_checked = time.time()
|
||||
|
||||
def set_url(self, url):
|
||||
self.url = url
|
||||
|
||||
def read(self):
|
||||
import urllib
|
||||
self.parse(urllib.urlopen(self.url).readlines())
|
||||
|
||||
def parse(self, lines):
|
||||
"""parse the input lines from a robot.txt file"""
|
||||
import string, re
|
||||
active = []
|
||||
for line in lines:
|
||||
if self.debug: print '>', line,
|
||||
# blank line terminates current record
|
||||
if not line[:-1]:
|
||||
active = []
|
||||
continue
|
||||
# remove optional comment and strip line
|
||||
line = string.strip(line[:string.find(line, '#')])
|
||||
if not line:
|
||||
continue
|
||||
line = re.split(' *: *', line)
|
||||
if len(line) == 2:
|
||||
line[0] = string.lower(line[0])
|
||||
if line[0] == 'user-agent':
|
||||
# this record applies to this user agent
|
||||
if self.debug: print '>> user-agent:', line[1]
|
||||
active.append(line[1])
|
||||
if not self.rules.has_key(line[1]):
|
||||
self.rules[line[1]] = []
|
||||
elif line[0] == 'disallow':
|
||||
if line[1]:
|
||||
if self.debug: print '>> disallow:', line[1]
|
||||
for agent in active:
|
||||
self.rules[agent].append(re.compile(line[1]))
|
||||
else:
|
||||
pass
|
||||
for agent in active:
|
||||
if self.debug: print '>> allow', agent
|
||||
self.rules[agent] = []
|
||||
else:
|
||||
if self.debug: print '>> unknown:', line
|
||||
|
||||
self.modified()
|
||||
|
||||
# returns true if agent is allowed to fetch url
|
||||
def can_fetch(self, useragent, url):
|
||||
"""using the parsed robots.txt decide if useragent can fetch url"""
|
||||
import urlparse
|
||||
ag = useragent
|
||||
if not self.rules.has_key(ag): ag = '*'
|
||||
if not self.rules.has_key(ag):
|
||||
if self.debug: print '>> allowing', url, 'fetch by', useragent
|
||||
return 1
|
||||
path = urlparse.urlparse(url)[2]
|
||||
for rule in self.rules[ag]:
|
||||
if rule.match(path) is not None:
|
||||
if self.debug: print '>> disallowing', url, 'fetch by', useragent
|
||||
return 0
|
||||
if self.debug: print '>> allowing', url, 'fetch by', useragent
|
||||
return 1
|
||||
|
||||
def _test():
|
||||
rp = RobotFileParser()
|
||||
rp.debug = 1
|
||||
rp.set_url('http://www.musi-cal.com/robots.txt')
|
||||
rp.read()
|
||||
print rp.rules
|
||||
print rp.can_fetch('*', 'http://www.musi-cal.com.com/')
|
||||
print rp.can_fetch('Musi-Cal-Robot',
|
||||
'http://www.musi-cal.com/cgi-bin/event-search?city=San+Francisco')
|
||||
|
||||
if __name__ == "__main__":
|
||||
_test()
|
|
@ -10,13 +10,10 @@ __version__ = "0.3"
|
|||
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import socket
|
||||
import string
|
||||
import posixpath
|
||||
import SocketServer
|
||||
import BaseHTTPServer
|
||||
import urllib
|
||||
|
||||
|
||||
class SimpleHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
|
@ -81,7 +78,7 @@ class SimpleHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
|||
probably be diagnosed.)
|
||||
|
||||
"""
|
||||
path = posixpath.normpath(path)
|
||||
path = posixpath.normpath(urllib.unquote(path))
|
||||
words = string.splitfields(path, '/')
|
||||
words = filter(None, words)
|
||||
path = os.getcwd()
|
||||
|
|
|
@ -207,7 +207,10 @@ class TCPServer:
|
|||
|
||||
def handle_request(self):
|
||||
"""Handle one request, possibly blocking."""
|
||||
request, client_address = self.get_request()
|
||||
try:
|
||||
request, client_address = self.get_request()
|
||||
except socket.error:
|
||||
return
|
||||
if self.verify_request(request, client_address):
|
||||
try:
|
||||
self.process_request(request, client_address)
|
||||
|
@ -278,11 +281,21 @@ class ForkingMixIn:
|
|||
"""Mix-in class to handle each request in a new process."""
|
||||
|
||||
active_children = None
|
||||
max_children = 40
|
||||
|
||||
def collect_children(self):
|
||||
"""Internal routine to wait for died children."""
|
||||
while self.active_children:
|
||||
pid, status = os.waitpid(0, os.WNOHANG)
|
||||
if len(self.active_children) < self.max_children:
|
||||
options = os.WNOHANG
|
||||
else:
|
||||
# If the maximum number of children are already
|
||||
# running, block while waiting for a child to exit
|
||||
options = 0
|
||||
try:
|
||||
pid, status = os.waitpid(0, options)
|
||||
except os.error:
|
||||
pid = None
|
||||
if not pid: break
|
||||
self.active_children.remove(pid)
|
||||
|
||||
|
@ -300,6 +313,7 @@ class ForkingMixIn:
|
|||
# Child process.
|
||||
# This must never return, hence os._exit()!
|
||||
try:
|
||||
self.socket.close()
|
||||
self.finish_request(request, client_address)
|
||||
os._exit(0)
|
||||
except:
|
||||
|
@ -311,14 +325,14 @@ class ForkingMixIn:
|
|||
|
||||
|
||||
class ThreadingMixIn:
|
||||
|
||||
"""Mix-in class to handle each request in a new thread."""
|
||||
|
||||
def process_request(self, request, client_address):
|
||||
"""Start a new thread to process the request."""
|
||||
import thread
|
||||
thread.start_new_thread(self.finish_request,
|
||||
(request, client_address))
|
||||
import threading
|
||||
t = threading.Thread(target = self.finish_request,
|
||||
args = (request, client_address))
|
||||
t.start()
|
||||
|
||||
|
||||
class ForkingUDPServer(ForkingMixIn, UDPServer): pass
|
||||
|
|
|
@ -0,0 +1,187 @@
|
|||
#
|
||||
# Secret Labs' Regular Expression Engine
|
||||
# $Id$
|
||||
#
|
||||
# convert template to internal format
|
||||
#
|
||||
# Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
|
||||
#
|
||||
# This code can only be used for 1.6 alpha testing. All other use
|
||||
# require explicit permission from Secret Labs AB.
|
||||
#
|
||||
# Portions of this engine have been developed in cooperation with
|
||||
# CNRI. Hewlett-Packard provided funding for 1.6 integration and
|
||||
# other compatibility work.
|
||||
#
|
||||
|
||||
# FIXME: <fl> formalize (objectify?) and document the compiler code
|
||||
# format, so that other frontends can use this compiler
|
||||
|
||||
import array, string, sys
|
||||
|
||||
import _sre
|
||||
|
||||
from sre_constants import *
|
||||
|
||||
# find an array type code that matches the engine's code size
|
||||
for WORDSIZE in "BHil":
|
||||
if len(array.array(WORDSIZE, [0]).tostring()) == _sre.getcodesize():
|
||||
break
|
||||
else:
|
||||
raise RuntimeError, "cannot find a useable array type"
|
||||
|
||||
# FIXME: <fl> should move some optimizations from the parser to here!
|
||||
|
||||
class Code:
|
||||
def __init__(self):
|
||||
self.data = []
|
||||
def __len__(self):
|
||||
return len(self.data)
|
||||
def __getitem__(self, index):
|
||||
return self.data[index]
|
||||
def __setitem__(self, index, code):
|
||||
self.data[index] = code
|
||||
def append(self, code):
|
||||
self.data.append(code)
|
||||
def todata(self):
|
||||
# print self.data
|
||||
return array.array(WORDSIZE, self.data).tostring()
|
||||
|
||||
def _lower(literal):
|
||||
# return _sre._lower(literal) # FIXME
|
||||
return string.lower(literal)
|
||||
|
||||
def _compile(code, pattern, flags):
|
||||
append = code.append
|
||||
for op, av in pattern:
|
||||
if op is ANY:
|
||||
if "s" in flags:
|
||||
append(CODES[op]) # any character at all!
|
||||
else:
|
||||
append(CODES[NOT_LITERAL])
|
||||
append(10)
|
||||
elif op in (SUCCESS, FAILURE):
|
||||
append(CODES[op])
|
||||
elif op is AT:
|
||||
append(CODES[op])
|
||||
append(POSITIONS[av])
|
||||
elif op is BRANCH:
|
||||
append(CODES[op])
|
||||
tail = []
|
||||
for av in av[1]:
|
||||
skip = len(code); append(0)
|
||||
_compile(code, av, flags)
|
||||
append(CODES[JUMP])
|
||||
tail.append(len(code)); append(0)
|
||||
code[skip] = len(code) - skip
|
||||
append(0) # end of branch
|
||||
for tail in tail:
|
||||
code[tail] = len(code) - tail
|
||||
elif op is CALL:
|
||||
append(CODES[op])
|
||||
skip = len(code); append(0)
|
||||
_compile(code, av, flags)
|
||||
append(CODES[SUCCESS])
|
||||
code[skip] = len(code) - skip
|
||||
elif op is CATEGORY: # not used by current parser
|
||||
append(CODES[op])
|
||||
append(CATEGORIES[av])
|
||||
elif op is GROUP:
|
||||
if "i" in flags:
|
||||
append(CODES[MAP_IGNORE[op]])
|
||||
else:
|
||||
append(CODES[op])
|
||||
append(av)
|
||||
elif op is IN:
|
||||
if "i" in flags:
|
||||
append(CODES[MAP_IGNORE[op]])
|
||||
def fixup(literal):
|
||||
return ord(_lower(literal))
|
||||
else:
|
||||
append(CODES[op])
|
||||
fixup = ord
|
||||
skip = len(code); append(0)
|
||||
for op, av in av:
|
||||
append(CODES[op])
|
||||
if op is NEGATE:
|
||||
pass
|
||||
elif op is LITERAL:
|
||||
append(fixup(av))
|
||||
elif op is RANGE:
|
||||
append(fixup(av[0]))
|
||||
append(fixup(av[1]))
|
||||
elif op is CATEGORY:
|
||||
append(CATEGORIES[av])
|
||||
else:
|
||||
raise ValueError, "unsupported set operator"
|
||||
append(CODES[FAILURE])
|
||||
code[skip] = len(code) - skip
|
||||
elif op in (LITERAL, NOT_LITERAL):
|
||||
if "i" in flags:
|
||||
append(CODES[MAP_IGNORE[op]])
|
||||
append(ord(_lower(av)))
|
||||
else:
|
||||
append(CODES[op])
|
||||
append(ord(av))
|
||||
elif op is MARK:
|
||||
append(CODES[op])
|
||||
append(av)
|
||||
elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
|
||||
lo, hi = av[2].getwidth()
|
||||
if lo == 0:
|
||||
raise SyntaxError, "cannot repeat zero-width items"
|
||||
if lo == hi == 1 and op is MAX_REPEAT:
|
||||
append(CODES[MAX_REPEAT_ONE])
|
||||
skip = len(code); append(0)
|
||||
append(av[0])
|
||||
append(av[1])
|
||||
_compile(code, av[2], flags)
|
||||
append(CODES[SUCCESS])
|
||||
code[skip] = len(code) - skip
|
||||
else:
|
||||
append(CODES[op])
|
||||
skip = len(code); append(0)
|
||||
append(av[0])
|
||||
append(av[1])
|
||||
_compile(code, av[2], flags)
|
||||
if op is MIN_REPEAT:
|
||||
append(CODES[MIN_UNTIL])
|
||||
else:
|
||||
# FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?)
|
||||
append(CODES[MAX_UNTIL])
|
||||
code[skip] = len(code) - skip
|
||||
elif op is SUBPATTERN:
|
||||
## group = av[0]
|
||||
## if group:
|
||||
## append(CODES[MARK])
|
||||
## append((group-1)*2)
|
||||
_compile(code, av[1], flags)
|
||||
## if group:
|
||||
## append(CODES[MARK])
|
||||
## append((group-1)*2+1)
|
||||
else:
|
||||
raise ValueError, ("unsupported operand type", op)
|
||||
|
||||
def compile(p, flags=()):
|
||||
# convert pattern list to internal format
|
||||
if type(p) in (type(""), type(u"")):
|
||||
import sre_parse
|
||||
pattern = p
|
||||
p = sre_parse.parse(p)
|
||||
else:
|
||||
pattern = None
|
||||
# print p.getwidth()
|
||||
# print p
|
||||
code = Code()
|
||||
_compile(code, p.data, p.pattern.flags)
|
||||
code.append(CODES[SUCCESS])
|
||||
# print list(code.data)
|
||||
data = code.todata()
|
||||
if 0: # debugging
|
||||
print
|
||||
print "-" * 68
|
||||
import sre_disasm
|
||||
sre_disasm.disasm(data)
|
||||
print "-" * 68
|
||||
# print len(data), p.pattern.groups, len(p.pattern.groupdict)
|
||||
return _sre.compile(pattern, data, p.pattern.groups-1, p.pattern.groupdict)
|
|
@ -0,0 +1,131 @@
|
|||
#
|
||||
# Secret Labs' Regular Expression Engine
|
||||
# $Id$
|
||||
#
|
||||
# various symbols used by the regular expression engine.
|
||||
# run this script to update the _sre include files!
|
||||
#
|
||||
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
|
||||
#
|
||||
# This code can only be used for 1.6 alpha testing. All other use
|
||||
# require explicit permission from Secret Labs AB.
|
||||
#
|
||||
# Portions of this engine have been developed in cooperation with
|
||||
# CNRI. Hewlett-Packard provided funding for 1.6 integration and
|
||||
# other compatibility work.
|
||||
#
|
||||
|
||||
# operators
|
||||
|
||||
FAILURE = "failure"
|
||||
SUCCESS = "success"
|
||||
|
||||
ANY = "any"
|
||||
ASSERT = "assert"
|
||||
AT = "at"
|
||||
BRANCH = "branch"
|
||||
CALL = "call"
|
||||
CATEGORY = "category"
|
||||
GROUP = "group"
|
||||
GROUP_IGNORE = "group_ignore"
|
||||
IN = "in"
|
||||
IN_IGNORE = "in_ignore"
|
||||
JUMP = "jump"
|
||||
LITERAL = "literal"
|
||||
LITERAL_IGNORE = "literal_ignore"
|
||||
MARK = "mark"
|
||||
MAX_REPEAT = "max_repeat"
|
||||
MAX_REPEAT_ONE = "max_repeat_one"
|
||||
MAX_UNTIL = "max_until"
|
||||
MIN_REPEAT = "min_repeat"
|
||||
MIN_UNTIL = "min_until"
|
||||
NEGATE = "negate"
|
||||
NOT_LITERAL = "not_literal"
|
||||
NOT_LITERAL_IGNORE = "not_literal_ignore"
|
||||
RANGE = "range"
|
||||
REPEAT = "repeat"
|
||||
SUBPATTERN = "subpattern"
|
||||
|
||||
# positions
|
||||
AT_BEGINNING = "at_beginning"
|
||||
AT_BOUNDARY = "at_boundary"
|
||||
AT_NON_BOUNDARY = "at_non_boundary"
|
||||
AT_END = "at_end"
|
||||
|
||||
# categories
|
||||
|
||||
CATEGORY_DIGIT = "category_digit"
|
||||
CATEGORY_NOT_DIGIT = "category_not_digit"
|
||||
CATEGORY_SPACE = "category_space"
|
||||
CATEGORY_NOT_SPACE = "category_not_space"
|
||||
CATEGORY_WORD = "category_word"
|
||||
CATEGORY_NOT_WORD = "category_not_word"
|
||||
|
||||
CODES = [
|
||||
|
||||
# failure=0 success=1 (just because it looks better that way :-)
|
||||
FAILURE, SUCCESS,
|
||||
|
||||
ANY,
|
||||
ASSERT,
|
||||
AT,
|
||||
BRANCH,
|
||||
CALL,
|
||||
CATEGORY,
|
||||
GROUP, GROUP_IGNORE,
|
||||
IN, IN_IGNORE,
|
||||
JUMP,
|
||||
LITERAL, LITERAL_IGNORE,
|
||||
MARK,
|
||||
MAX_REPEAT, MAX_UNTIL,
|
||||
MAX_REPEAT_ONE,
|
||||
MIN_REPEAT, MIN_UNTIL,
|
||||
NOT_LITERAL, NOT_LITERAL_IGNORE,
|
||||
NEGATE,
|
||||
RANGE,
|
||||
REPEAT
|
||||
|
||||
]
|
||||
|
||||
# convert to dictionary
|
||||
c = {}
|
||||
i = 0
|
||||
for code in CODES:
|
||||
c[code] = i
|
||||
i = i + 1
|
||||
CODES = c
|
||||
|
||||
# replacement operations for "ignore case" mode
|
||||
MAP_IGNORE = {
|
||||
GROUP: GROUP_IGNORE,
|
||||
IN: IN_IGNORE,
|
||||
LITERAL: LITERAL_IGNORE,
|
||||
NOT_LITERAL: NOT_LITERAL_IGNORE
|
||||
}
|
||||
|
||||
POSITIONS = {
|
||||
AT_BEGINNING: ord("a"),
|
||||
AT_BOUNDARY: ord("b"),
|
||||
AT_NON_BOUNDARY: ord("B"),
|
||||
AT_END: ord("z"),
|
||||
}
|
||||
|
||||
CATEGORIES = {
|
||||
CATEGORY_DIGIT: ord("d"),
|
||||
CATEGORY_NOT_DIGIT: ord("D"),
|
||||
CATEGORY_SPACE: ord("s"),
|
||||
CATEGORY_NOT_SPACE: ord("S"),
|
||||
CATEGORY_WORD: ord("w"),
|
||||
CATEGORY_NOT_WORD: ord("W"),
|
||||
}
|
||||
|
||||
if __name__ == "__main__":
|
||||
import string
|
||||
items = CODES.items()
|
||||
items.sort(lambda a, b: cmp(a[1], b[1]))
|
||||
f = open("sre_constants.h", "w")
|
||||
f.write("/* generated by sre_constants.py */\n")
|
||||
for k, v in items:
|
||||
f.write("#define SRE_OP_" + string.upper(k) + " " + str(v) + "\n")
|
||||
f.close()
|
||||
print "done"
|
|
@ -0,0 +1,497 @@
|
|||
#
|
||||
# Secret Labs' Regular Expression Engine
|
||||
# $Id$
|
||||
#
|
||||
# convert re-style regular expression to SRE template. the current
|
||||
# implementation is somewhat incomplete, and not very fast. should
|
||||
# definitely be rewritten before Python 1.6 goes beta.
|
||||
#
|
||||
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
|
||||
#
|
||||
# This code can only be used for 1.6 alpha testing. All other use
|
||||
# require explicit permission from Secret Labs AB.
|
||||
#
|
||||
# Portions of this engine have been developed in cooperation with
|
||||
# CNRI. Hewlett-Packard provided funding for 1.6 integration and
|
||||
# other compatibility work.
|
||||
#
|
||||
|
||||
# FIXME: comments marked with the FIXME tag are open issues. all such
|
||||
# issues should be closed before the final beta.
|
||||
|
||||
import string, sys
|
||||
|
||||
from sre_constants import *
|
||||
|
||||
SPECIAL_CHARS = ".\\[{()*+?^$|"
|
||||
REPEAT_CHARS = "*+?{"
|
||||
|
||||
# FIXME: string in tuple tests may explode with if char is unicode :-(
|
||||
DIGITS = tuple(string.digits)
|
||||
|
||||
OCTDIGITS = tuple("01234567")
|
||||
HEXDIGITS = tuple("0123456789abcdefABCDEF")
|
||||
|
||||
ESCAPES = {
|
||||
"\\a": (LITERAL, chr(7)),
|
||||
"\\b": (LITERAL, chr(8)),
|
||||
"\\f": (LITERAL, chr(12)),
|
||||
"\\n": (LITERAL, chr(10)),
|
||||
"\\r": (LITERAL, chr(13)),
|
||||
"\\t": (LITERAL, chr(9)),
|
||||
"\\v": (LITERAL, chr(11))
|
||||
}
|
||||
|
||||
CATEGORIES = {
|
||||
"\\A": (AT, AT_BEGINNING), # start of string
|
||||
"\\b": (AT, AT_BOUNDARY),
|
||||
"\\B": (AT, AT_NON_BOUNDARY),
|
||||
"\\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
|
||||
"\\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
|
||||
"\\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
|
||||
"\\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
|
||||
"\\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
|
||||
"\\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
|
||||
"\\Z": (AT, AT_END), # end of string
|
||||
}
|
||||
|
||||
class Pattern:
|
||||
# FIXME: <fl> rename class, and store flags in here too!
|
||||
def __init__(self):
|
||||
self.flags = []
|
||||
self.groups = 1
|
||||
self.groupdict = {}
|
||||
def getgroup(self, name=None):
|
||||
gid = self.groups
|
||||
self.groups = gid + 1
|
||||
if name:
|
||||
self.groupdict[name] = gid
|
||||
return gid
|
||||
def setflag(self, flag):
|
||||
if flag in self.flags:
|
||||
self.flags.append(flag)
|
||||
|
||||
class SubPattern:
|
||||
# a subpattern, in intermediate form
|
||||
def __init__(self, pattern, data=None):
|
||||
self.pattern = pattern
|
||||
if not data:
|
||||
data = []
|
||||
self.data = data
|
||||
self.flags = []
|
||||
self.width = None
|
||||
def __repr__(self):
|
||||
return repr(self.data)
|
||||
def __len__(self):
|
||||
return len(self.data)
|
||||
def __delitem__(self, index):
|
||||
del self.data[index]
|
||||
def __getitem__(self, index):
|
||||
return self.data[index]
|
||||
def __setitem__(self, index, code):
|
||||
self.data[index] = code
|
||||
def __getslice__(self, start, stop):
|
||||
return SubPattern(self.pattern, self.data[start:stop])
|
||||
def insert(self, index, code):
|
||||
self.data.insert(index, code)
|
||||
def append(self, code):
|
||||
self.data.append(code)
|
||||
def getwidth(self):
|
||||
# determine the width (min, max) for this subpattern
|
||||
if self.width:
|
||||
return self.width
|
||||
lo = hi = 0L
|
||||
for op, av in self.data:
|
||||
if op is BRANCH:
|
||||
l = sys.maxint
|
||||
h = 0
|
||||
for av in av[1]:
|
||||
i, j = av.getwidth()
|
||||
l = min(l, i)
|
||||
h = min(h, j)
|
||||
lo = lo + i
|
||||
hi = hi + j
|
||||
elif op is CALL:
|
||||
i, j = av.getwidth()
|
||||
lo = lo + i
|
||||
hi = hi + j
|
||||
elif op is SUBPATTERN:
|
||||
i, j = av[1].getwidth()
|
||||
lo = lo + i
|
||||
hi = hi + j
|
||||
elif op in (MIN_REPEAT, MAX_REPEAT):
|
||||
i, j = av[2].getwidth()
|
||||
lo = lo + i * av[0]
|
||||
hi = hi + j * av[1]
|
||||
elif op in (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY):
|
||||
lo = lo + 1
|
||||
hi = hi + 1
|
||||
elif op == SUCCESS:
|
||||
break
|
||||
self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint))
|
||||
return self.width
|
||||
def set(self, flag):
|
||||
if not flag in self.flags:
|
||||
self.flags.append(flag)
|
||||
def reset(self, flag):
|
||||
if flag in self.flags:
|
||||
self.flags.remove(flag)
|
||||
|
||||
class Tokenizer:
|
||||
def __init__(self, string):
|
||||
self.string = list(string)
|
||||
self.next = self.__next()
|
||||
def __next(self):
|
||||
if not self.string:
|
||||
return None
|
||||
char = self.string[0]
|
||||
if char[0] == "\\":
|
||||
try:
|
||||
c = self.string[1]
|
||||
except IndexError:
|
||||
raise SyntaxError, "bogus escape"
|
||||
char = char + c
|
||||
try:
|
||||
if c == "x":
|
||||
# hexadecimal constant
|
||||
for i in xrange(2, sys.maxint):
|
||||
c = self.string[i]
|
||||
if str(c) not in HEXDIGITS:
|
||||
break
|
||||
char = char + c
|
||||
elif str(c) in DIGITS:
|
||||
# decimal (or octal) number
|
||||
for i in xrange(2, sys.maxint):
|
||||
c = self.string[i]
|
||||
# FIXME: if larger than current number of
|
||||
# groups, interpret as an octal number
|
||||
if str(c) not in DIGITS:
|
||||
break
|
||||
char = char + c
|
||||
except IndexError:
|
||||
pass # use what we've got this far
|
||||
del self.string[0:len(char)]
|
||||
return char
|
||||
def match(self, char):
|
||||
if char == self.next:
|
||||
self.next = self.__next()
|
||||
return 1
|
||||
return 0
|
||||
def match_set(self, set):
|
||||
if self.next and self.next in set:
|
||||
self.next = self.__next()
|
||||
return 1
|
||||
return 0
|
||||
def get(self):
|
||||
this = self.next
|
||||
self.next = self.__next()
|
||||
return this
|
||||
|
||||
def _fixescape(escape, character_class=0):
|
||||
# convert escape to (type, value)
|
||||
if character_class:
|
||||
# inside a character class, we'll look in the character
|
||||
# escapes dictionary first
|
||||
code = ESCAPES.get(escape)
|
||||
if code:
|
||||
return code
|
||||
code = CATEGORIES.get(escape)
|
||||
else:
|
||||
code = CATEGORIES.get(escape)
|
||||
if code:
|
||||
return code
|
||||
code = ESCAPES.get(escape)
|
||||
if code:
|
||||
return code
|
||||
if not character_class:
|
||||
try:
|
||||
group = int(escape[1:])
|
||||
# FIXME: only valid if group <= current number of groups
|
||||
return GROUP, group
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
if escape[1:2] == "x":
|
||||
escape = escape[2:]
|
||||
return LITERAL, chr(int(escape[-2:], 16) & 0xff)
|
||||
elif str(escape[1:2]) in DIGITS:
|
||||
return LITERAL, chr(int(escape[1:], 8) & 0xff)
|
||||
elif len(escape) == 2:
|
||||
return LITERAL, escape[1]
|
||||
except ValueError:
|
||||
pass
|
||||
raise SyntaxError, "bogus escape: %s" % repr(escape)
|
||||
|
||||
def _branch(subpattern, items):
|
||||
|
||||
# form a branch operator from a set of items (FIXME: move this
|
||||
# optimization to the compiler module!)
|
||||
|
||||
# check if all items share a common prefix
|
||||
while 1:
|
||||
prefix = None
|
||||
for item in items:
|
||||
if not item:
|
||||
break
|
||||
if prefix is None:
|
||||
prefix = item[0]
|
||||
elif item[0] != prefix:
|
||||
break
|
||||
else:
|
||||
# all subitems start with a common "prefix".
|
||||
# move it out of the branch
|
||||
for item in items:
|
||||
del item[0]
|
||||
subpattern.append(prefix)
|
||||
continue # check next one
|
||||
break
|
||||
|
||||
# check if the branch can be replaced by a character set
|
||||
for item in items:
|
||||
if len(item) != 1 or item[0][0] != LITERAL:
|
||||
break
|
||||
else:
|
||||
# we can store this as a character set instead of a
|
||||
# branch (FIXME: use a range if possible)
|
||||
set = []
|
||||
for item in items:
|
||||
set.append(item[0])
|
||||
subpattern.append((IN, set))
|
||||
return
|
||||
|
||||
subpattern.append((BRANCH, (None, items)))
|
||||
|
||||
def _parse(source, pattern, flags=()):
|
||||
|
||||
# parse regular expression pattern into an operator list.
|
||||
|
||||
subpattern = SubPattern(pattern)
|
||||
|
||||
this = None
|
||||
|
||||
while 1:
|
||||
|
||||
if str(source.next) in ("|", ")"):
|
||||
break # end of subpattern
|
||||
this = source.get()
|
||||
if this is None:
|
||||
break # end of pattern
|
||||
|
||||
if this and this[0] not in SPECIAL_CHARS:
|
||||
subpattern.append((LITERAL, this))
|
||||
|
||||
elif this == "[":
|
||||
# character set
|
||||
set = []
|
||||
## if source.match(":"):
|
||||
## pass # handle character classes
|
||||
if source.match("^"):
|
||||
set.append((NEGATE, None))
|
||||
# check remaining characters
|
||||
start = set[:]
|
||||
while 1:
|
||||
this = source.get()
|
||||
if this == "]" and set != start:
|
||||
break
|
||||
elif this and this[0] == "\\":
|
||||
code1 = _fixescape(this, 1)
|
||||
elif this:
|
||||
code1 = LITERAL, this
|
||||
else:
|
||||
raise SyntaxError, "unexpected end of regular expression"
|
||||
if source.match("-"):
|
||||
# potential range
|
||||
this = source.get()
|
||||
if this == "]":
|
||||
set.append(code1)
|
||||
set.append((LITERAL, "-"))
|
||||
break
|
||||
else:
|
||||
if this[0] == "\\":
|
||||
code2 = _fixescape(this, 1)
|
||||
else:
|
||||
code2 = LITERAL, this
|
||||
if code1[0] != LITERAL or code2[0] != LITERAL:
|
||||
raise SyntaxError, "illegal range"
|
||||
if len(code1[1]) != 1 or len(code2[1]) != 1:
|
||||
raise SyntaxError, "illegal range"
|
||||
set.append((RANGE, (code1[1], code2[1])))
|
||||
else:
|
||||
if code1[0] is IN:
|
||||
code1 = code1[1][0]
|
||||
set.append(code1)
|
||||
|
||||
# FIXME: <fl> move set optimization to support function
|
||||
if len(set)==1 and set[0][0] is LITERAL:
|
||||
subpattern.append(set[0]) # optimization
|
||||
elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
|
||||
subpattern.append((NOT_LITERAL, set[1][1])) # optimization
|
||||
else:
|
||||
# FIXME: <fl> add charmap optimization
|
||||
subpattern.append((IN, set))
|
||||
|
||||
elif this and this[0] in REPEAT_CHARS:
|
||||
# repeat previous item
|
||||
if this == "?":
|
||||
min, max = 0, 1
|
||||
elif this == "*":
|
||||
min, max = 0, sys.maxint
|
||||
elif this == "+":
|
||||
min, max = 1, sys.maxint
|
||||
elif this == "{":
|
||||
min, max = 0, sys.maxint
|
||||
lo = hi = ""
|
||||
while str(source.next) in DIGITS:
|
||||
lo = lo + source.get()
|
||||
if source.match(","):
|
||||
while str(source.next) in DIGITS:
|
||||
hi = hi + source.get()
|
||||
else:
|
||||
hi = lo
|
||||
if not source.match("}"):
|
||||
raise SyntaxError, "bogus range"
|
||||
if lo:
|
||||
min = int(lo)
|
||||
if hi:
|
||||
max = int(hi)
|
||||
# FIXME: <fl> check that hi >= lo!
|
||||
else:
|
||||
raise SyntaxError, "not supported"
|
||||
# figure out which item to repeat
|
||||
# FIXME: should back up to the right mark, right?
|
||||
if subpattern:
|
||||
index = len(subpattern)-1
|
||||
while subpattern[index][0] is MARK:
|
||||
index = index - 1
|
||||
item = subpattern[index:index+1]
|
||||
else:
|
||||
raise SyntaxError, "nothing to repeat"
|
||||
if source.match("?"):
|
||||
subpattern[index] = (MIN_REPEAT, (min, max, item))
|
||||
else:
|
||||
subpattern[index] = (MAX_REPEAT, (min, max, item))
|
||||
elif this == ".":
|
||||
subpattern.append((ANY, None))
|
||||
elif this == "(":
|
||||
group = 1
|
||||
name = None
|
||||
if source.match("?"):
|
||||
group = 0
|
||||
# options
|
||||
if source.match("P"):
|
||||
# named group: skip forward to end of name
|
||||
if source.match("<"):
|
||||
name = ""
|
||||
while 1:
|
||||
char = source.get()
|
||||
if char is None or char == ">":
|
||||
break
|
||||
name = name + char
|
||||
group = 1
|
||||
elif source.match(":"):
|
||||
# non-capturing group
|
||||
group = 2
|
||||
elif source.match_set("iI"):
|
||||
pattern.setflag("i")
|
||||
elif source.match_set("lL"):
|
||||
pattern.setflag("l")
|
||||
elif source.match_set("mM"):
|
||||
pattern.setflag("m")
|
||||
elif source.match_set("sS"):
|
||||
pattern.setflag("s")
|
||||
elif source.match_set("xX"):
|
||||
pattern.setflag("x")
|
||||
if group:
|
||||
# parse group contents
|
||||
b = []
|
||||
if group == 2:
|
||||
# anonymous group
|
||||
group = None
|
||||
else:
|
||||
group = pattern.getgroup(name)
|
||||
if group:
|
||||
subpattern.append((MARK, (group-1)*2))
|
||||
while 1:
|
||||
p = _parse(source, pattern, flags)
|
||||
if source.match(")"):
|
||||
if b:
|
||||
b.append(p)
|
||||
_branch(subpattern, b)
|
||||
else:
|
||||
subpattern.append((SUBPATTERN, (group, p)))
|
||||
break
|
||||
elif source.match("|"):
|
||||
b.append(p)
|
||||
else:
|
||||
raise SyntaxError, "group not properly closed"
|
||||
if group:
|
||||
subpattern.append((MARK, (group-1)*2+1))
|
||||
else:
|
||||
# FIXME: should this really be a while loop?
|
||||
while 1:
|
||||
char = source.get()
|
||||
if char is None or char == ")":
|
||||
break
|
||||
|
||||
elif this == "^":
|
||||
subpattern.append((AT, AT_BEGINNING))
|
||||
|
||||
elif this == "$":
|
||||
subpattern.append((AT, AT_END))
|
||||
|
||||
elif this and this[0] == "\\":
|
||||
code =_fixescape(this)
|
||||
subpattern.append(code)
|
||||
|
||||
else:
|
||||
raise SyntaxError, "parser error"
|
||||
|
||||
return subpattern
|
||||
|
||||
def parse(source, flags=()):
|
||||
s = Tokenizer(source)
|
||||
g = Pattern()
|
||||
b = []
|
||||
while 1:
|
||||
p = _parse(s, g, flags)
|
||||
tail = s.get()
|
||||
if tail == "|":
|
||||
b.append(p)
|
||||
elif tail == ")":
|
||||
raise SyntaxError, "unbalanced parenthesis"
|
||||
elif tail is None:
|
||||
if b:
|
||||
b.append(p)
|
||||
p = SubPattern(g)
|
||||
_branch(p, b)
|
||||
break
|
||||
else:
|
||||
raise SyntaxError, "bogus characters at end of regular expression"
|
||||
return p
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pprint import pprint
|
||||
from testpatterns import PATTERNS
|
||||
a = b = c = 0
|
||||
for pattern, flags in PATTERNS:
|
||||
if flags:
|
||||
continue
|
||||
print "-"*68
|
||||
try:
|
||||
p = parse(pattern)
|
||||
print repr(pattern), "->"
|
||||
pprint(p.data)
|
||||
import sre_compile
|
||||
try:
|
||||
code = sre_compile.compile(p)
|
||||
c = c + 1
|
||||
except:
|
||||
pass
|
||||
a = a + 1
|
||||
except SyntaxError, v:
|
||||
print "**", repr(pattern), v
|
||||
b = b + 1
|
||||
print "-"*68
|
||||
print a, "of", b, "patterns successfully parsed"
|
||||
print c, "of", b, "patterns successfully compiled"
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
# Module 'statcache'
|
||||
#
|
||||
# Maintain a cache of file stats.
|
||||
# There are functions to reset the cache or to selectively remove items.
|
||||
"""Maintain a cache of stat() information on files.
|
||||
|
||||
There are functions to reset the cache or to selectively remove items.
|
||||
"""
|
||||
|
||||
import os
|
||||
from stat import *
|
||||
|
@ -12,42 +12,37 @@ from stat import *
|
|||
cache = {}
|
||||
|
||||
|
||||
# Stat a file, possibly out of the cache.
|
||||
#
|
||||
def stat(path):
|
||||
"""Stat a file, possibly out of the cache."""
|
||||
if cache.has_key(path):
|
||||
return cache[path]
|
||||
cache[path] = ret = os.stat(path)
|
||||
return ret
|
||||
|
||||
|
||||
# Reset the cache completely.
|
||||
#
|
||||
def reset():
|
||||
"""Reset the cache completely."""
|
||||
global cache
|
||||
cache = {}
|
||||
|
||||
|
||||
# Remove a given item from the cache, if it exists.
|
||||
#
|
||||
def forget(path):
|
||||
"""Remove a given item from the cache, if it exists."""
|
||||
if cache.has_key(path):
|
||||
del cache[path]
|
||||
|
||||
|
||||
# Remove all pathnames with a given prefix.
|
||||
#
|
||||
def forget_prefix(prefix):
|
||||
"""Remove all pathnames with a given prefix."""
|
||||
n = len(prefix)
|
||||
for path in cache.keys():
|
||||
if path[:n] == prefix:
|
||||
del cache[path]
|
||||
|
||||
|
||||
# Forget about a directory and all entries in it, but not about
|
||||
# entries in subdirectories.
|
||||
#
|
||||
def forget_dir(prefix):
|
||||
"""Forget about a directory and all entries in it, but not about
|
||||
entries in subdirectories."""
|
||||
if prefix[-1:] == '/' and prefix <> '/':
|
||||
prefix = prefix[:-1]
|
||||
forget(prefix)
|
||||
|
@ -62,19 +57,17 @@ def forget_dir(prefix):
|
|||
del cache[path]
|
||||
|
||||
|
||||
# Remove all pathnames except with a given prefix.
|
||||
# Normally used with prefix = '/' after a chdir().
|
||||
#
|
||||
def forget_except_prefix(prefix):
|
||||
"""Remove all pathnames except with a given prefix.
|
||||
Normally used with prefix = '/' after a chdir()."""
|
||||
n = len(prefix)
|
||||
for path in cache.keys():
|
||||
if path[:n] <> prefix:
|
||||
del cache[path]
|
||||
|
||||
|
||||
# Check for directory.
|
||||
#
|
||||
def isdir(path):
|
||||
"""Check for directory."""
|
||||
try:
|
||||
st = stat(path)
|
||||
except os.error:
|
||||
|
|
|
@ -1,30 +1,32 @@
|
|||
# class StringIO implements file-like objects that read/write a
|
||||
# string buffer (a.k.a. "memory files").
|
||||
#
|
||||
# This implements (nearly) all stdio methods.
|
||||
#
|
||||
# f = StringIO() # ready for writing
|
||||
# f = StringIO(buf) # ready for reading
|
||||
# f.close() # explicitly release resources held
|
||||
# flag = f.isatty() # always false
|
||||
# pos = f.tell() # get current position
|
||||
# f.seek(pos) # set current position
|
||||
# f.seek(pos, mode) # mode 0: absolute; 1: relative; 2: relative to EOF
|
||||
# buf = f.read() # read until EOF
|
||||
# buf = f.read(n) # read up to n bytes
|
||||
# buf = f.readline() # read until end of line ('\n') or EOF
|
||||
# list = f.readlines()# list of f.readline() results until EOF
|
||||
# f.write(buf) # write at current position
|
||||
# f.writelines(list) # for line in list: f.write(line)
|
||||
# f.getvalue() # return whole file's contents as a string
|
||||
#
|
||||
# Notes:
|
||||
# - Using a real file is often faster (but less convenient).
|
||||
# - fileno() is left unimplemented so that code which uses it triggers
|
||||
# an exception early.
|
||||
# - Seeking far beyond EOF and then writing will insert real null
|
||||
# bytes that occupy space in the buffer.
|
||||
# - There's a simple test set (see end of this file).
|
||||
"""File-like objects that read from or write to a string buffer.
|
||||
|
||||
This implements (nearly) all stdio methods.
|
||||
|
||||
f = StringIO() # ready for writing
|
||||
f = StringIO(buf) # ready for reading
|
||||
f.close() # explicitly release resources held
|
||||
flag = f.isatty() # always false
|
||||
pos = f.tell() # get current position
|
||||
f.seek(pos) # set current position
|
||||
f.seek(pos, mode) # mode 0: absolute; 1: relative; 2: relative to EOF
|
||||
buf = f.read() # read until EOF
|
||||
buf = f.read(n) # read up to n bytes
|
||||
buf = f.readline() # read until end of line ('\n') or EOF
|
||||
list = f.readlines()# list of f.readline() results until EOF
|
||||
f.write(buf) # write at current position
|
||||
f.writelines(list) # for line in list: f.write(line)
|
||||
f.getvalue() # return whole file's contents as a string
|
||||
|
||||
Notes:
|
||||
- Using a real file is often faster (but less convenient).
|
||||
- There's also a much faster implementation in C, called cStringIO, but
|
||||
it's not subclassable.
|
||||
- fileno() is left unimplemented so that code which uses it triggers
|
||||
an exception early.
|
||||
- Seeking far beyond EOF and then writing will insert real null
|
||||
bytes that occupy space in the buffer.
|
||||
- There's a simple test set (see end of this file).
|
||||
"""
|
||||
|
||||
import string
|
||||
|
||||
|
|
|
@ -0,0 +1,431 @@
|
|||
# module 'string' -- A collection of string operations
|
||||
|
||||
# Warning: most of the code you see here isn't normally used nowadays. With
|
||||
# Python 1.6, many of these functions are implemented as methods on the
|
||||
# standard string object. They used to be implemented by a built-in module
|
||||
# called strop, but strop is now obsolete itself.
|
||||
|
||||
"""Common string manipulations.
|
||||
|
||||
Public module variables:
|
||||
|
||||
whitespace -- a string containing all characters considered whitespace
|
||||
lowercase -- a string containing all characters considered lowercase letters
|
||||
uppercase -- a string containing all characters considered uppercase letters
|
||||
letters -- a string containing all characters considered letters
|
||||
digits -- a string containing all characters considered decimal digits
|
||||
hexdigits -- a string containing all characters considered hexadecimal digits
|
||||
octdigits -- a string containing all characters considered octal digits
|
||||
|
||||
"""
|
||||
|
||||
# Some strings for ctype-style character classification
|
||||
whitespace = ' \t\n\r\v\f'
|
||||
lowercase = 'abcdefghijklmnopqrstuvwxyz'
|
||||
uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
||||
letters = lowercase + uppercase
|
||||
digits = '0123456789'
|
||||
hexdigits = digits + 'abcdef' + 'ABCDEF'
|
||||
octdigits = '01234567'
|
||||
|
||||
# Case conversion helpers
|
||||
_idmap = ''
|
||||
for i in range(256): _idmap = _idmap + chr(i)
|
||||
del i
|
||||
|
||||
# Backward compatible names for exceptions
|
||||
index_error = ValueError
|
||||
atoi_error = ValueError
|
||||
atof_error = ValueError
|
||||
atol_error = ValueError
|
||||
|
||||
# convert UPPER CASE letters to lower case
|
||||
def lower(s):
|
||||
"""lower(s) -> string
|
||||
|
||||
Return a copy of the string s converted to lowercase.
|
||||
|
||||
"""
|
||||
return s.lower()
|
||||
|
||||
# Convert lower case letters to UPPER CASE
|
||||
def upper(s):
|
||||
"""upper(s) -> string
|
||||
|
||||
Return a copy of the string s converted to uppercase.
|
||||
|
||||
"""
|
||||
return s.upper()
|
||||
|
||||
# Swap lower case letters and UPPER CASE
|
||||
def swapcase(s):
|
||||
"""swapcase(s) -> string
|
||||
|
||||
Return a copy of the string s with upper case characters
|
||||
converted to lowercase and vice versa.
|
||||
|
||||
"""
|
||||
return s.swapcase()
|
||||
|
||||
# Strip leading and trailing tabs and spaces
|
||||
def strip(s):
|
||||
"""strip(s) -> string
|
||||
|
||||
Return a copy of the string s with leading and trailing
|
||||
whitespace removed.
|
||||
|
||||
"""
|
||||
return s.strip()
|
||||
|
||||
# Strip leading tabs and spaces
|
||||
def lstrip(s):
|
||||
"""lstrip(s) -> string
|
||||
|
||||
Return a copy of the string s with leading whitespace removed.
|
||||
|
||||
"""
|
||||
return s.lstrip()
|
||||
|
||||
# Strip trailing tabs and spaces
|
||||
def rstrip(s):
|
||||
"""rstrip(s) -> string
|
||||
|
||||
Return a copy of the string s with trailing whitespace
|
||||
removed.
|
||||
|
||||
"""
|
||||
return s.rstrip()
|
||||
|
||||
|
||||
# Split a string into a list of space/tab-separated words
|
||||
# NB: split(s) is NOT the same as splitfields(s, ' ')!
|
||||
def split(s, sep=None, maxsplit=0):
|
||||
"""split(str [,sep [,maxsplit]]) -> list of strings
|
||||
|
||||
Return a list of the words in the string s, using sep as the
|
||||
delimiter string. If maxsplit is nonzero, splits into at most
|
||||
maxsplit words If sep is not specified, any whitespace string
|
||||
is a separator. Maxsplit defaults to 0.
|
||||
|
||||
(split and splitfields are synonymous)
|
||||
|
||||
"""
|
||||
return s.split(sep, maxsplit)
|
||||
splitfields = split
|
||||
|
||||
# Join fields with optional separator
|
||||
def join(words, sep = ' '):
|
||||
"""join(list [,sep]) -> string
|
||||
|
||||
Return a string composed of the words in list, with
|
||||
intervening occurences of sep. The default separator is a
|
||||
single space.
|
||||
|
||||
(joinfields and join are synonymous)
|
||||
|
||||
"""
|
||||
return sep.join(words)
|
||||
joinfields = join
|
||||
|
||||
# for a little bit of speed
|
||||
_apply = apply
|
||||
|
||||
# Find substring, raise exception if not found
|
||||
def index(s, *args):
|
||||
"""index(s, sub [,start [,end]]) -> int
|
||||
|
||||
Like find but raises ValueError when the substring is not found.
|
||||
|
||||
"""
|
||||
return _apply(s.index, args)
|
||||
|
||||
# Find last substring, raise exception if not found
|
||||
def rindex(s, *args):
|
||||
"""rindex(s, sub [,start [,end]]) -> int
|
||||
|
||||
Like rfind but raises ValueError when the substring is not found.
|
||||
|
||||
"""
|
||||
return _apply(s.rindex, args)
|
||||
|
||||
# Count non-overlapping occurrences of substring
|
||||
def count(s, *args):
|
||||
"""count(s, sub[, start[,end]]) -> int
|
||||
|
||||
Return the number of occurrences of substring sub in string
|
||||
s[start:end]. Optional arguments start and end are
|
||||
interpreted as in slice notation.
|
||||
|
||||
"""
|
||||
return _apply(s.count, args)
|
||||
|
||||
# Find substring, return -1 if not found
|
||||
def find(s, *args):
|
||||
"""find(s, sub [,start [,end]]) -> in
|
||||
|
||||
Return the lowest index in s where substring sub is found,
|
||||
such that sub is contained within s[start,end]. Optional
|
||||
arguments start and end are interpreted as in slice notation.
|
||||
|
||||
Return -1 on failure.
|
||||
|
||||
"""
|
||||
return _apply(s.find, args)
|
||||
|
||||
# Find last substring, return -1 if not found
|
||||
def rfind(s, *args):
|
||||
"""rfind(s, sub [,start [,end]]) -> int
|
||||
|
||||
Return the highest index in s where substring sub is found,
|
||||
such that sub is contained within s[start,end]. Optional
|
||||
arguments start and end are interpreted as in slice notation.
|
||||
|
||||
Return -1 on failure.
|
||||
|
||||
"""
|
||||
return _apply(s.rfind, args)
|
||||
|
||||
# for a bit of speed
|
||||
_float = float
|
||||
_int = int
|
||||
_long = long
|
||||
_StringType = type('')
|
||||
|
||||
# Convert string to float
|
||||
def atof(s):
|
||||
"""atof(s) -> float
|
||||
|
||||
Return the floating point number represented by the string s.
|
||||
|
||||
"""
|
||||
if type(s) == _StringType:
|
||||
return _float(s)
|
||||
else:
|
||||
raise TypeError('argument 1: expected string, %s found' %
|
||||
type(s).__name__)
|
||||
|
||||
# Convert string to integer
|
||||
def atoi(*args):
|
||||
"""atoi(s [,base]) -> int
|
||||
|
||||
Return the integer represented by the string s in the given
|
||||
base, which defaults to 10. The string s must consist of one
|
||||
or more digits, possibly preceded by a sign. If base is 0, it
|
||||
is chosen from the leading characters of s, 0 for octal, 0x or
|
||||
0X for hexadecimal. If base is 16, a preceding 0x or 0X is
|
||||
accepted.
|
||||
|
||||
"""
|
||||
try:
|
||||
s = args[0]
|
||||
except IndexError:
|
||||
raise TypeError('function requires at least 1 argument: %d given' %
|
||||
len(args))
|
||||
# Don't catch type error resulting from too many arguments to int(). The
|
||||
# error message isn't compatible but the error type is, and this function
|
||||
# is complicated enough already.
|
||||
if type(s) == _StringType:
|
||||
return _apply(_int, args)
|
||||
else:
|
||||
raise TypeError('argument 1: expected string, %s found' %
|
||||
type(s).__name__)
|
||||
|
||||
|
||||
# Convert string to long integer
|
||||
def atol(*args):
|
||||
"""atol(s [,base]) -> long
|
||||
|
||||
Return the long integer represented by the string s in the
|
||||
given base, which defaults to 10. The string s must consist
|
||||
of one or more digits, possibly preceded by a sign. If base
|
||||
is 0, it is chosen from the leading characters of s, 0 for
|
||||
octal, 0x or 0X for hexadecimal. If base is 16, a preceding
|
||||
0x or 0X is accepted. A trailing L or l is not accepted,
|
||||
unless base is 0.
|
||||
|
||||
"""
|
||||
try:
|
||||
s = args[0]
|
||||
except IndexError:
|
||||
raise TypeError('function requires at least 1 argument: %d given' %
|
||||
len(args))
|
||||
# Don't catch type error resulting from too many arguments to long(). The
|
||||
# error message isn't compatible but the error type is, and this function
|
||||
# is complicated enough already.
|
||||
if type(s) == _StringType:
|
||||
return _apply(_long, args)
|
||||
else:
|
||||
raise TypeError('argument 1: expected string, %s found' %
|
||||
type(s).__name__)
|
||||
|
||||
|
||||
# Left-justify a string
|
||||
def ljust(s, width):
|
||||
"""ljust(s, width) -> string
|
||||
|
||||
Return a left-justified version of s, in a field of the
|
||||
specified width, padded with spaces as needed. The string is
|
||||
never truncated.
|
||||
|
||||
"""
|
||||
n = width - len(s)
|
||||
if n <= 0: return s
|
||||
return s + ' '*n
|
||||
|
||||
# Right-justify a string
|
||||
def rjust(s, width):
|
||||
"""rjust(s, width) -> string
|
||||
|
||||
Return a right-justified version of s, in a field of the
|
||||
specified width, padded with spaces as needed. The string is
|
||||
never truncated.
|
||||
|
||||
"""
|
||||
n = width - len(s)
|
||||
if n <= 0: return s
|
||||
return ' '*n + s
|
||||
|
||||
# Center a string
|
||||
def center(s, width):
|
||||
"""center(s, width) -> string
|
||||
|
||||
Return a center version of s, in a field of the specified
|
||||
width. padded with spaces as needed. The string is never
|
||||
truncated.
|
||||
|
||||
"""
|
||||
n = width - len(s)
|
||||
if n <= 0: return s
|
||||
half = n/2
|
||||
if n%2 and width%2:
|
||||
# This ensures that center(center(s, i), j) = center(s, j)
|
||||
half = half+1
|
||||
return ' '*half + s + ' '*(n-half)
|
||||
|
||||
# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
|
||||
# Decadent feature: the argument may be a string or a number
|
||||
# (Use of this is deprecated; it should be a string as with ljust c.s.)
|
||||
def zfill(x, width):
|
||||
"""zfill(x, width) -> string
|
||||
|
||||
Pad a numeric string x with zeros on the left, to fill a field
|
||||
of the specified width. The string x is never truncated.
|
||||
|
||||
"""
|
||||
if type(x) == type(''): s = x
|
||||
else: s = `x`
|
||||
n = len(s)
|
||||
if n >= width: return s
|
||||
sign = ''
|
||||
if s[0] in ('-', '+'):
|
||||
sign, s = s[0], s[1:]
|
||||
return sign + '0'*(width-n) + s
|
||||
|
||||
# Expand tabs in a string.
|
||||
# Doesn't take non-printing chars into account, but does understand \n.
|
||||
def expandtabs(s, tabsize=8):
|
||||
"""expandtabs(s [,tabsize]) -> string
|
||||
|
||||
Return a copy of the string s with all tab characters replaced
|
||||
by the appropriate number of spaces, depending on the current
|
||||
column, and the tabsize (default 8).
|
||||
|
||||
"""
|
||||
res = line = ''
|
||||
for c in s:
|
||||
if c == '\t':
|
||||
c = ' '*(tabsize - len(line) % tabsize)
|
||||
line = line + c
|
||||
if c == '\n':
|
||||
res = res + line
|
||||
line = ''
|
||||
return res + line
|
||||
|
||||
# Character translation through look-up table.
|
||||
def translate(s, table, deletions=""):
|
||||
"""translate(s,table [,deletechars]) -> string
|
||||
|
||||
Return a copy of the string s, where all characters occurring
|
||||
in the optional argument deletechars are removed, and the
|
||||
remaining characters have been mapped through the given
|
||||
translation table, which must be a string of length 256.
|
||||
|
||||
"""
|
||||
return s.translate(table, deletions)
|
||||
|
||||
# Capitalize a string, e.g. "aBc dEf" -> "Abc def".
|
||||
def capitalize(s):
|
||||
"""capitalize(s) -> string
|
||||
|
||||
Return a copy of the string s with only its first character
|
||||
capitalized.
|
||||
|
||||
"""
|
||||
return s.capitalize()
|
||||
|
||||
# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
|
||||
# See also regsub.capwords().
|
||||
def capwords(s, sep=None):
|
||||
"""capwords(s, [sep]) -> string
|
||||
|
||||
Split the argument into words using split, capitalize each
|
||||
word using capitalize, and join the capitalized words using
|
||||
join. Note that this replaces runs of whitespace characters by
|
||||
a single space.
|
||||
|
||||
"""
|
||||
return join(map(capitalize, s.split(sep)), sep or ' ')
|
||||
|
||||
# Construct a translation string
|
||||
_idmapL = None
|
||||
def maketrans(fromstr, tostr):
|
||||
"""maketrans(frm, to) -> string
|
||||
|
||||
Return a translation table (a string of 256 bytes long)
|
||||
suitable for use in string.translate. The strings frm and to
|
||||
must be of the same length.
|
||||
|
||||
"""
|
||||
if len(fromstr) != len(tostr):
|
||||
raise ValueError, "maketrans arguments must have same length"
|
||||
global _idmapL
|
||||
if not _idmapL:
|
||||
_idmapL = map(None, _idmap)
|
||||
L = _idmapL[:]
|
||||
fromstr = map(ord, fromstr)
|
||||
for i in range(len(fromstr)):
|
||||
L[fromstr[i]] = tostr[i]
|
||||
return joinfields(L, "")
|
||||
|
||||
# Substring replacement (global)
|
||||
def replace(s, old, new, maxsplit=0):
|
||||
"""replace (str, old, new[, maxsplit]) -> string
|
||||
|
||||
Return a copy of string str with all occurrences of substring
|
||||
old replaced by new. If the optional argument maxsplit is
|
||||
given, only the first maxsplit occurrences are replaced.
|
||||
|
||||
"""
|
||||
return s.replace(old, new, maxsplit)
|
||||
|
||||
|
||||
# XXX: transitional
|
||||
#
|
||||
# If string objects do not have methods, then we need to use the old string.py
|
||||
# library, which uses strop for many more things than just the few outlined
|
||||
# below.
|
||||
try:
|
||||
''.upper
|
||||
except AttributeError:
|
||||
from stringold import *
|
||||
|
||||
# Try importing optional built-in module "strop" -- if it exists,
|
||||
# it redefines some string operations that are 100-1000 times faster.
|
||||
# It also defines values for whitespace, lowercase and uppercase
|
||||
# that match <ctype.h>'s definitions.
|
||||
|
||||
try:
|
||||
from strop import maketrans, lowercase, uppercase, whitespace
|
||||
letters = lowercase + uppercase
|
||||
except ImportError:
|
||||
pass # Use the original versions
|
|
@ -329,6 +329,7 @@ class Telnet:
|
|||
opt = self.rawq_getchar()
|
||||
self.msg('IAC %s %d',
|
||||
c == WILL and 'WILL' or 'WONT', ord(c))
|
||||
self.sock.send(IAC + DONT + opt)
|
||||
else:
|
||||
self.msg('IAC %s not recognized' % `c`)
|
||||
except EOFError: # raised by self.rawq_getchar()
|
||||
|
|
|
@ -1,46 +1,93 @@
|
|||
#! /usr/bin/env python
|
||||
"""Test script for the binascii C module
|
||||
"""Test the binascii C module."""
|
||||
|
||||
Uses the mechanism of the python binhex module
|
||||
Roger E. Masse
|
||||
"""
|
||||
import binhex
|
||||
import tempfile
|
||||
from test_support import verbose
|
||||
import binascii
|
||||
|
||||
def test():
|
||||
# Show module doc string
|
||||
print binascii.__doc__
|
||||
|
||||
try:
|
||||
fname1 = tempfile.mktemp()
|
||||
fname2 = tempfile.mktemp()
|
||||
f = open(fname1, 'w')
|
||||
except:
|
||||
raise ImportError, "Cannot test binascii without a temp file"
|
||||
# Show module exceptions
|
||||
print binascii.Error
|
||||
print binascii.Incomplete
|
||||
|
||||
start = 'Jack is my hero'
|
||||
f.write(start)
|
||||
f.close()
|
||||
|
||||
binhex.binhex(fname1, fname2)
|
||||
if verbose:
|
||||
print 'binhex'
|
||||
# Check presence and display doc strings of all functions
|
||||
funcs = []
|
||||
for suffix in "base64", "hqx", "uu":
|
||||
prefixes = ["a2b_", "b2a_"]
|
||||
if suffix == "hqx":
|
||||
prefixes.extend(["crc_", "rlecode_", "rledecode_"])
|
||||
for prefix in prefixes:
|
||||
name = prefix + suffix
|
||||
funcs.append(getattr(binascii, name))
|
||||
for func in funcs:
|
||||
print "%-15s: %s" % (func.__name__, func.__doc__)
|
||||
|
||||
binhex.hexbin(fname2, fname1)
|
||||
if verbose:
|
||||
print 'hexbin'
|
||||
# Create binary test data
|
||||
testdata = "The quick brown fox jumps over the lazy dog.\r\n"
|
||||
for i in range(256):
|
||||
# Be slow so we don't depend on other modules
|
||||
testdata = testdata + chr(i)
|
||||
testdata = testdata + "\r\nHello world.\n"
|
||||
|
||||
f = open(fname1, 'r')
|
||||
finish = f.readline()
|
||||
# Test base64 with valid data
|
||||
print "base64 test"
|
||||
MAX_BASE64 = 57
|
||||
lines = []
|
||||
for i in range(0, len(testdata), MAX_BASE64):
|
||||
b = testdata[i:i+MAX_BASE64]
|
||||
a = binascii.b2a_base64(b)
|
||||
lines.append(a)
|
||||
print a,
|
||||
res = ""
|
||||
for line in lines:
|
||||
b = binascii.a2b_base64(line)
|
||||
res = res + b
|
||||
assert res == testdata
|
||||
|
||||
if start <> finish:
|
||||
print 'Error: binhex <> hexbin'
|
||||
elif verbose:
|
||||
print 'binhex == hexbin'
|
||||
# Test base64 with random invalid characters sprinkled throughout
|
||||
# (This requires a new version of binascii.)
|
||||
fillers = ""
|
||||
valid = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/"
|
||||
for i in range(256):
|
||||
c = chr(i)
|
||||
if c not in valid:
|
||||
fillers = fillers + c
|
||||
def addnoise(line):
|
||||
noise = fillers
|
||||
ratio = len(line) / len(noise)
|
||||
res = ""
|
||||
while line and noise:
|
||||
if len(line) / len(noise) > ratio:
|
||||
c, line = line[0], line[1:]
|
||||
else:
|
||||
c, noise = noise[0], noise[1:]
|
||||
res = res + c
|
||||
return res + noise + line
|
||||
res = ""
|
||||
for line in map(addnoise, lines):
|
||||
b = binascii.a2b_base64(line)
|
||||
res = res + b
|
||||
assert res == testdata
|
||||
|
||||
try:
|
||||
import os
|
||||
os.unlink(fname1)
|
||||
os.unlink(fname2)
|
||||
except:
|
||||
pass
|
||||
test()
|
||||
# Test uu
|
||||
print "uu test"
|
||||
MAX_UU = 45
|
||||
lines = []
|
||||
for i in range(0, len(testdata), MAX_UU):
|
||||
b = testdata[i:i+MAX_UU]
|
||||
a = binascii.b2a_uu(b)
|
||||
lines.append(a)
|
||||
print a,
|
||||
res = ""
|
||||
for line in lines:
|
||||
b = binascii.a2b_uu(line)
|
||||
res = res + b
|
||||
assert res == testdata
|
||||
|
||||
# Test crc32()
|
||||
crc = binascii.crc32("Test the CRC-32 of")
|
||||
crc = binascii.crc32(" this string.", crc)
|
||||
if crc != 1571220330:
|
||||
print "binascii.crc32() failed."
|
||||
|
||||
# The hqx test is in test_binhex.py
|
||||
|
|
|
@ -0,0 +1,168 @@
|
|||
from test_support import TestFailed
|
||||
|
||||
class base_set:
|
||||
|
||||
def __init__(self, el):
|
||||
self.el = el
|
||||
|
||||
class set(base_set):
|
||||
|
||||
def __contains__(self, el):
|
||||
return self.el == el
|
||||
|
||||
class seq(base_set):
|
||||
|
||||
def __getitem__(self, n):
|
||||
return [self.el][n]
|
||||
|
||||
def check(ok, *args):
|
||||
if not ok:
|
||||
raise TestFailed, " ".join(map(str, args))
|
||||
|
||||
a = base_set(1)
|
||||
b = set(1)
|
||||
c = seq(1)
|
||||
|
||||
check(1 in b, "1 not in set(1)")
|
||||
check(0 not in b, "0 in set(1)")
|
||||
check(1 in c, "1 not in seq(1)")
|
||||
check(0 not in c, "0 in seq(1)")
|
||||
|
||||
try:
|
||||
1 in a
|
||||
check(0, "in base_set did not raise error")
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
1 not in a
|
||||
check(0, "not in base_set did not raise error")
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
# Test char in string
|
||||
|
||||
check('c' in 'abc', "'c' not in 'abc'")
|
||||
check('d' not in 'abc', "'d' in 'abc'")
|
||||
|
||||
try:
|
||||
'' in 'abc'
|
||||
check(0, "'' in 'abc' did not raise error")
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
'ab' in 'abc'
|
||||
check(0, "'ab' in 'abc' did not raise error")
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
None in 'abc'
|
||||
check(0, "None in 'abc' did not raise error")
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
# Test char in Unicode
|
||||
|
||||
check('c' in u'abc', "'c' not in u'abc'")
|
||||
check('d' not in u'abc', "'d' in u'abc'")
|
||||
|
||||
try:
|
||||
'' in u'abc'
|
||||
check(0, "'' in u'abc' did not raise error")
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
'ab' in u'abc'
|
||||
check(0, "'ab' in u'abc' did not raise error")
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
None in u'abc'
|
||||
check(0, "None in u'abc' did not raise error")
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
# Test Unicode char in Unicode
|
||||
|
||||
check(u'c' in u'abc', "u'c' not in u'abc'")
|
||||
check(u'd' not in u'abc', "u'd' in u'abc'")
|
||||
|
||||
try:
|
||||
u'' in u'abc'
|
||||
check(0, "u'' in u'abc' did not raise error")
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
u'ab' in u'abc'
|
||||
check(0, "u'ab' in u'abc' did not raise error")
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
# Test Unicode char in string
|
||||
|
||||
check(u'c' in 'abc', "u'c' not in 'abc'")
|
||||
check(u'd' not in 'abc', "u'd' in 'abc'")
|
||||
|
||||
try:
|
||||
u'' in 'abc'
|
||||
check(0, "u'' in 'abc' did not raise error")
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
u'ab' in 'abc'
|
||||
check(0, "u'ab' in 'abc' did not raise error")
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
# A collection of tests on builtin sequence types
|
||||
a = range(10)
|
||||
for i in a:
|
||||
check(i in a, "%s not in %s" % (`i`, `a`))
|
||||
check(16 not in a, "16 not in %s" % `a`)
|
||||
check(a not in a, "%s not in %s" % (`a`, `a`))
|
||||
|
||||
a = tuple(a)
|
||||
for i in a:
|
||||
check(i in a, "%s not in %s" % (`i`, `a`))
|
||||
check(16 not in a, "16 not in %s" % `a`)
|
||||
check(a not in a, "%s not in %s" % (`a`, `a`))
|
||||
|
||||
class Deviant1:
|
||||
"""Behaves strangely when compared
|
||||
|
||||
This class is designed to make sure that the contains code
|
||||
works when the list is modified during the check.
|
||||
"""
|
||||
|
||||
aList = range(15)
|
||||
|
||||
def __cmp__(self, other):
|
||||
if other == 12:
|
||||
self.aList.remove(12)
|
||||
self.aList.remove(13)
|
||||
self.aList.remove(14)
|
||||
return 1
|
||||
|
||||
check(Deviant1() not in Deviant1.aList, "Deviant1 failed")
|
||||
|
||||
class Deviant2:
|
||||
"""Behaves strangely when compared
|
||||
|
||||
This class raises an exception during comparison. That in
|
||||
turn causes the comparison to fail with a TypeError.
|
||||
"""
|
||||
|
||||
def __cmp__(self, other):
|
||||
if other == 4:
|
||||
raise RuntimeError, "gotcha"
|
||||
|
||||
try:
|
||||
check(Deviant2() not in a, "oops")
|
||||
except TypeError:
|
||||
pass
|
|
@ -79,18 +79,29 @@ def dotest():
|
|||
f.close()
|
||||
try:
|
||||
cPickle.dump(123, f)
|
||||
except IOError:
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
print "dump to closed file should raise IOError"
|
||||
print "dump to closed file should raise ValueError"
|
||||
f = open(fn, "r")
|
||||
f.close()
|
||||
try:
|
||||
cPickle.load(f)
|
||||
except IOError:
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
print "load from closed file should raise IOError"
|
||||
print "load from closed file should raise ValueError"
|
||||
os.remove(fn)
|
||||
|
||||
# Test specific bad cases
|
||||
for i in range(10):
|
||||
try:
|
||||
x = cPickle.loads('garyp')
|
||||
except cPickle.BadPickleGet, y:
|
||||
del y
|
||||
else:
|
||||
print "unexpected success!"
|
||||
break
|
||||
|
||||
|
||||
dotest()
|
||||
|
|
|
@ -0,0 +1,143 @@
|
|||
from UserList import UserList
|
||||
|
||||
def f(*a, **k):
|
||||
print a, k
|
||||
|
||||
def g(x, *y, **z):
|
||||
print x, y, z
|
||||
|
||||
def h(j=1, a=2, h=3):
|
||||
print j, a, h
|
||||
|
||||
f()
|
||||
f(1)
|
||||
f(1, 2)
|
||||
f(1, 2, 3)
|
||||
|
||||
f(1, 2, 3, *(4, 5))
|
||||
f(1, 2, 3, *[4, 5])
|
||||
f(1, 2, 3, *UserList([4, 5]))
|
||||
f(1, 2, 3, **{'a':4, 'b':5})
|
||||
f(1, 2, 3, *(4, 5), **{'a':6, 'b':7})
|
||||
f(1, 2, 3, x=4, y=5, *(6, 7), **{'a':8, 'b':9})
|
||||
|
||||
try:
|
||||
g()
|
||||
except TypeError, err:
|
||||
print "TypeError:", err
|
||||
else:
|
||||
print "should raise TypeError: not enough arguments; expected 1, got 0"
|
||||
|
||||
try:
|
||||
g(*())
|
||||
except TypeError, err:
|
||||
print "TypeError:", err
|
||||
else:
|
||||
print "should raise TypeError: not enough arguments; expected 1, got 0"
|
||||
|
||||
try:
|
||||
g(*(), **{})
|
||||
except TypeError, err:
|
||||
print "TypeError:", err
|
||||
else:
|
||||
print "should raise TypeError: not enough arguments; expected 1, got 0"
|
||||
|
||||
g(1)
|
||||
g(1, 2)
|
||||
g(1, 2, 3)
|
||||
g(1, 2, 3, *(4, 5))
|
||||
class Nothing: pass
|
||||
try:
|
||||
g(*Nothing())
|
||||
except AttributeError, attr:
|
||||
pass
|
||||
else:
|
||||
print "should raise AttributeError: __len__"
|
||||
|
||||
class Nothing:
|
||||
def __len__(self):
|
||||
return 5
|
||||
try:
|
||||
g(*Nothing())
|
||||
except AttributeError, attr:
|
||||
pass
|
||||
else:
|
||||
print "should raise AttributeError: __getitem__"
|
||||
|
||||
class Nothing:
|
||||
def __len__(self):
|
||||
return 5
|
||||
def __getitem__(self, i):
|
||||
if i < 3:
|
||||
return i
|
||||
else:
|
||||
raise IndexError, i
|
||||
g(*Nothing())
|
||||
|
||||
# make sure the function call doesn't stomp on the dictionary?
|
||||
d = {'a': 1, 'b': 2, 'c': 3}
|
||||
d2 = d.copy()
|
||||
assert d == d2
|
||||
g(1, d=4, **d)
|
||||
print d
|
||||
print d2
|
||||
assert d == d2, "function call modified dictionary"
|
||||
|
||||
# what about willful misconduct?
|
||||
def saboteur(**kw):
|
||||
kw['x'] = locals()
|
||||
d = {}
|
||||
saboteur(a=1, **d)
|
||||
assert d == {}
|
||||
|
||||
try:
|
||||
g(1, 2, 3, **{'x':4, 'y':5})
|
||||
except TypeError, err:
|
||||
print err
|
||||
else:
|
||||
print "should raise TypeError: keyword parameter redefined"
|
||||
|
||||
try:
|
||||
g(1, 2, 3, a=4, b=5, *(6, 7), **{'a':8, 'b':9})
|
||||
except TypeError, err:
|
||||
print err
|
||||
else:
|
||||
print "should raise TypeError: keyword parameter redefined"
|
||||
|
||||
try:
|
||||
f(**{1:2})
|
||||
except TypeError, err:
|
||||
print err
|
||||
else:
|
||||
print "should raise TypeError: keywords must be strings"
|
||||
|
||||
try:
|
||||
h(**{'e': 2})
|
||||
except TypeError, err:
|
||||
print err
|
||||
else:
|
||||
print "should raise TypeError: unexpected keyword argument: e"
|
||||
|
||||
try:
|
||||
h(*h)
|
||||
except TypeError, err:
|
||||
print err
|
||||
else:
|
||||
print "should raise TypeError: * argument must be a tuple"
|
||||
|
||||
try:
|
||||
h(**h)
|
||||
except TypeError, err:
|
||||
print err
|
||||
else:
|
||||
print "should raise TypeError: ** argument must be a dictionary"
|
||||
|
||||
def f2(*a, **b):
|
||||
return a, b
|
||||
|
||||
d = {}
|
||||
for i in range(512):
|
||||
key = 'k%d' % i
|
||||
d[key] = i
|
||||
a, b = f2(1, *(2, 3), **d)
|
||||
print len(a), len(b), b == d
|
|
@ -18,7 +18,8 @@ if verbose:
|
|||
|
||||
if sys.platform in ('netbsd1',
|
||||
'freebsd2', 'freebsd3',
|
||||
'bsdos2', 'bsdos3', 'bsdos4'):
|
||||
'bsdos2', 'bsdos3', 'bsdos4',
|
||||
'openbsd', 'openbsd2'):
|
||||
lockdata = struct.pack('lxxxxlxxxxlhh', 0, 0, 0, FCNTL.F_WRLCK, 0)
|
||||
elif sys.platform in ['aix3', 'aix4']:
|
||||
lockdata = struct.pack('hhlllii', FCNTL.F_WRLCK, 0, 0, 0, 0, 0, 0)
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
"""This test checks for correct fork() behavior.
|
||||
|
||||
We want fork1() semantics -- only the forking thread survives in the
|
||||
child after a fork().
|
||||
|
||||
On some systems (e.g. Solaris without posix threads) we find that all
|
||||
active threads survive in the child after a fork(); this is an error.
|
||||
|
||||
"""
|
||||
|
||||
import os, sys, time, thread
|
||||
|
||||
try:
|
||||
os.fork
|
||||
except AttributeError:
|
||||
raise ImportError, "os.fork not defined -- skipping test_fork1"
|
||||
|
||||
LONGSLEEP = 2
|
||||
|
||||
SHORTSLEEP = 0.5
|
||||
|
||||
NUM_THREADS = 4
|
||||
|
||||
alive = {}
|
||||
|
||||
stop = 0
|
||||
|
||||
def f(id):
|
||||
while not stop:
|
||||
alive[id] = os.getpid()
|
||||
try:
|
||||
time.sleep(SHORTSLEEP)
|
||||
except IOError:
|
||||
pass
|
||||
|
||||
def main():
|
||||
for i in range(NUM_THREADS):
|
||||
thread.start_new(f, (i,))
|
||||
|
||||
time.sleep(LONGSLEEP)
|
||||
|
||||
a = alive.keys()
|
||||
a.sort()
|
||||
assert a == range(NUM_THREADS)
|
||||
|
||||
prefork_lives = alive.copy()
|
||||
|
||||
cpid = os.fork()
|
||||
|
||||
if cpid == 0:
|
||||
# Child
|
||||
time.sleep(LONGSLEEP)
|
||||
n = 0
|
||||
for key in alive.keys():
|
||||
if alive[key] != prefork_lives[key]:
|
||||
n = n+1
|
||||
os._exit(n)
|
||||
else:
|
||||
# Parent
|
||||
spid, status = os.waitpid(cpid, 0)
|
||||
assert spid == cpid
|
||||
assert status == 0, "cause = %d, exit = %d" % (status&0xff, status>>8)
|
||||
global stop
|
||||
# Tell threads to die
|
||||
stop = 1
|
||||
time.sleep(2*SHORTSLEEP) # Wait for threads to die
|
||||
|
||||
main()
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
import gdbm
|
||||
from gdbm import error
|
||||
from test_support import verbose
|
||||
from test_support import verbose, TestFailed
|
||||
|
||||
filename= '/tmp/delete_me'
|
||||
|
||||
|
@ -18,6 +18,12 @@ if verbose:
|
|||
|
||||
g.has_key('a')
|
||||
g.close()
|
||||
try:
|
||||
g['a']
|
||||
except error:
|
||||
pass
|
||||
else:
|
||||
raise TestFailed, "expected gdbm.error accessing closed database"
|
||||
g = gdbm.open(filename, 'r')
|
||||
g.close()
|
||||
g = gdbm.open(filename, 'rw')
|
||||
|
|
|
@ -140,11 +140,17 @@ x = eval('1, 0 or 1')
|
|||
print 'funcdef'
|
||||
### 'def' NAME parameters ':' suite
|
||||
### parameters: '(' [varargslist] ')'
|
||||
### varargslist: (fpdef ['=' test] ',')* '*' NAME
|
||||
### | fpdef ['=' test] (',' fpdef ['=' test])* [',']
|
||||
### varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' ('**'|'*' '*') NAME]
|
||||
### | ('**'|'*' '*') NAME)
|
||||
### | fpdef ['=' test] (',' fpdef ['=' test])* [',']
|
||||
### fpdef: NAME | '(' fplist ')'
|
||||
### fplist: fpdef (',' fpdef)* [',']
|
||||
### arglist: (argument ',')* (argument | *' test [',' '**' test] | '**' test)
|
||||
### argument: [test '='] test # Really [keyword '='] test
|
||||
def f1(): pass
|
||||
f1()
|
||||
f1(*())
|
||||
f1(*(), **{})
|
||||
def f2(one_argument): pass
|
||||
def f3(two, arguments): pass
|
||||
def f4(two, (compound, (argument, list))): pass
|
||||
|
@ -157,16 +163,27 @@ def v3(a, (b, c), *rest): pass
|
|||
def d01(a=1): pass
|
||||
d01()
|
||||
d01(1)
|
||||
d01(*(1,))
|
||||
d01(**{'a':2})
|
||||
def d11(a, b=1): pass
|
||||
d11(1)
|
||||
d11(1, 2)
|
||||
d11(1, **{'b':2})
|
||||
def d21(a, b, c=1): pass
|
||||
d21(1, 2)
|
||||
d21(1, 2, 3)
|
||||
d21(*(1, 2, 3))
|
||||
d21(1, *(2, 3))
|
||||
d21(1, 2, *(3,))
|
||||
d21(1, 2, **{'c':3})
|
||||
def d02(a=1, b=2): pass
|
||||
d02()
|
||||
d02(1)
|
||||
d02(1, 2)
|
||||
d02(*(1, 2))
|
||||
d02(1, *(2,))
|
||||
d02(1, **{'b':2})
|
||||
d02(**{'a': 1, 'b': 2})
|
||||
def d12(a, b=1, c=2): pass
|
||||
d12(1)
|
||||
d12(1, 2)
|
||||
|
@ -179,6 +196,9 @@ def d01v(a=1, *rest): pass
|
|||
d01v()
|
||||
d01v(1)
|
||||
d01v(1, 2)
|
||||
d01v(*(1, 2, 3, 4))
|
||||
d01v(*(1,))
|
||||
d01v(**{'a':2})
|
||||
def d11v(a, b=1, *rest): pass
|
||||
d11v(1)
|
||||
d11v(1, 2)
|
||||
|
@ -187,21 +207,31 @@ def d21v(a, b, c=1, *rest): pass
|
|||
d21v(1, 2)
|
||||
d21v(1, 2, 3)
|
||||
d21v(1, 2, 3, 4)
|
||||
d21v(*(1, 2, 3, 4))
|
||||
d21v(1, 2, **{'c': 3})
|
||||
def d02v(a=1, b=2, *rest): pass
|
||||
d02v()
|
||||
d02v(1)
|
||||
d02v(1, 2)
|
||||
d02v(1, 2, 3)
|
||||
d02v(1, *(2, 3, 4))
|
||||
d02v(**{'a': 1, 'b': 2})
|
||||
def d12v(a, b=1, c=2, *rest): pass
|
||||
d12v(1)
|
||||
d12v(1, 2)
|
||||
d12v(1, 2, 3)
|
||||
d12v(1, 2, 3, 4)
|
||||
d12v(*(1, 2, 3, 4))
|
||||
d12v(1, 2, *(3, 4, 5))
|
||||
d12v(1, *(2,), **{'c': 3})
|
||||
def d22v(a, b, c=1, d=2, *rest): pass
|
||||
d22v(1, 2)
|
||||
d22v(1, 2, 3)
|
||||
d22v(1, 2, 3, 4)
|
||||
d22v(1, 2, 3, 4, 5)
|
||||
d22v(*(1, 2, 3, 4))
|
||||
d22v(1, 2, *(3, 4, 5))
|
||||
d22v(1, *(2, 3), **{'d': 4})
|
||||
|
||||
### stmt: simple_stmt | compound_stmt
|
||||
# Tested below
|
||||
|
@ -455,6 +485,7 @@ v2(1,2,3,4,5,6,7,8,9,0)
|
|||
v3(1,(2,3))
|
||||
v3(1,(2,3),4)
|
||||
v3(1,(2,3),4,5,6,7,8,9,0)
|
||||
print
|
||||
import sys, time
|
||||
c = sys.path[0]
|
||||
x = time.time()
|
||||
|
|
|
@ -77,6 +77,8 @@ def getran2(ndigits):
|
|||
def test_division_2(x, y):
|
||||
q, r = divmod(x, y)
|
||||
q2, r2 = x/y, x%y
|
||||
pab, pba = x*y, y*x
|
||||
check(pab == pba, "multiplication does not commute for", x, y)
|
||||
check(q == q2, "divmod returns different quotient than / for", x, y)
|
||||
check(r == r2, "divmod returns different mod than % for", x, y)
|
||||
check(x == q*y + r, "x != q*y + r after divmod on", x, y)
|
||||
|
@ -159,7 +161,7 @@ def test_bitop_identities(maxdigits=MAXDIGITS):
|
|||
test_bitop_identities_2(x, y)
|
||||
test_bitop_identities_3(x, y, getran((lenx + leny)/2))
|
||||
|
||||
# ------------------------------------------------------ hex oct str atol
|
||||
# ------------------------------------------------- hex oct repr str atol
|
||||
|
||||
def slow_format(x, base):
|
||||
if (x, base) == (0, 8):
|
||||
|
@ -181,12 +183,18 @@ def slow_format(x, base):
|
|||
|
||||
def test_format_1(x):
|
||||
from string import atol
|
||||
for base, mapper in (8, oct), (10, str), (16, hex):
|
||||
for base, mapper in (8, oct), (10, repr), (16, hex):
|
||||
got = mapper(x)
|
||||
expected = slow_format(x, base)
|
||||
check(got == expected, mapper.__name__, "returned",
|
||||
got, "but expected", expected, "for", x)
|
||||
check(atol(got, 0) == x, 'atol("%s", 0) !=' % got, x)
|
||||
# str() has to be checked a little differently since there's no
|
||||
# trailing "L"
|
||||
got = str(x)
|
||||
expected = slow_format(x, 10)[:-1]
|
||||
check(got == expected, mapper.__name__, "returned",
|
||||
got, "but expected", expected, "for", x)
|
||||
|
||||
def test_format(maxdigits=MAXDIGITS):
|
||||
print "long str/hex/oct/atol"
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
|
||||
import mmap
|
||||
import string, os, re, sys
|
||||
|
||||
PAGESIZE = mmap.PAGESIZE
|
||||
|
||||
def test_both():
|
||||
"Test mmap module on Unix systems and Windows"
|
||||
|
||||
# Create an mmap'ed file
|
||||
f = open('foo', 'w+')
|
||||
|
||||
# Write 2 pages worth of data to the file
|
||||
f.write('\0'* PAGESIZE)
|
||||
f.write('foo')
|
||||
f.write('\0'* (PAGESIZE-3) )
|
||||
|
||||
m = mmap.mmap(f.fileno(), 2 * PAGESIZE)
|
||||
f.close()
|
||||
|
||||
# Simple sanity checks
|
||||
print ' Position of foo:', string.find(m, 'foo') / float(PAGESIZE), 'pages'
|
||||
assert string.find(m, 'foo') == PAGESIZE
|
||||
|
||||
print ' Length of file:', len(m) / float(PAGESIZE), 'pages'
|
||||
assert len(m) == 2*PAGESIZE
|
||||
|
||||
print ' Contents of byte 0:', repr(m[0])
|
||||
assert m[0] == '\0'
|
||||
print ' Contents of first 3 bytes:', repr(m[0:3])
|
||||
assert m[0:3] == '\0\0\0'
|
||||
|
||||
# Modify the file's content
|
||||
print "\n Modifying file's content..."
|
||||
m[0] = '3'
|
||||
m[PAGESIZE +3: PAGESIZE +3+3]='bar'
|
||||
|
||||
# Check that the modification worked
|
||||
print ' Contents of byte 0:', repr(m[0])
|
||||
assert m[0] == '3'
|
||||
print ' Contents of first 3 bytes:', repr(m[0:3])
|
||||
assert m[0:3] == '3\0\0'
|
||||
print ' Contents of second page:', m[PAGESIZE-1 : PAGESIZE + 7]
|
||||
assert m[PAGESIZE-1 : PAGESIZE + 7] == '\0foobar\0'
|
||||
|
||||
m.flush()
|
||||
|
||||
# Test doing a regular expression match in an mmap'ed file
|
||||
match=re.search('[A-Za-z]+', m)
|
||||
if match == None:
|
||||
print ' ERROR: regex match on mmap failed!'
|
||||
else:
|
||||
start, end = match.span(0)
|
||||
length = end - start
|
||||
|
||||
print ' Regex match on mmap (page start, length of match):',
|
||||
print start / float(PAGESIZE), length
|
||||
|
||||
assert start == PAGESIZE
|
||||
assert end == PAGESIZE + 6
|
||||
|
||||
m.close()
|
||||
os.unlink("foo")
|
||||
print ' Test passed'
|
||||
|
||||
test_both()
|
||||
|
|
@ -0,0 +1,107 @@
|
|||
# Very simple test - Parse a file and print what happens
|
||||
|
||||
# XXX TypeErrors on calling handlers, or on bad return values from a
|
||||
# handler, are obscure and unhelpful.
|
||||
|
||||
import sys, string
|
||||
import os
|
||||
|
||||
import pyexpat
|
||||
|
||||
class Outputter:
|
||||
def StartElementHandler(self, name, attrs):
|
||||
print 'Start element:\n\t', name, attrs
|
||||
|
||||
def EndElementHandler(self, name):
|
||||
print 'End element:\n\t', name
|
||||
|
||||
def CharacterDataHandler(self, data):
|
||||
data = string.strip(data)
|
||||
if data:
|
||||
print 'Character data:'
|
||||
print '\t', repr(data)
|
||||
|
||||
def ProcessingInstructionHandler(self, target, data):
|
||||
print 'PI:\n\t', target, data
|
||||
|
||||
def StartNamespaceDeclHandler(self, prefix, uri):
|
||||
print 'NS decl:\n\t', prefix, uri
|
||||
|
||||
def EndNamespaceDeclHandler(self, prefix):
|
||||
print 'End of NS decl:\n\t', prefix
|
||||
|
||||
def StartCdataSectionHandler(self):
|
||||
print 'Start of CDATA section'
|
||||
|
||||
def EndCdataSectionHandler(self):
|
||||
print 'End of CDATA section'
|
||||
|
||||
def CommentHandler(self, text):
|
||||
print 'Comment:\n\t', repr(text)
|
||||
|
||||
def NotationDeclHandler(self, *args):
|
||||
name, base, sysid, pubid = args
|
||||
print 'Notation declared:', args
|
||||
|
||||
def UnparsedEntityDeclHandler(self, *args):
|
||||
entityName, base, systemId, publicId, notationName = args
|
||||
print 'Unparsed entity decl:\n\t', args
|
||||
|
||||
def NotStandaloneHandler(self, userData):
|
||||
print 'Not standalone'
|
||||
return 1
|
||||
|
||||
def ExternalEntityRefHandler(self, context, base, sysId, pubId):
|
||||
print 'External entity ref:', context, base, sysId, pubId
|
||||
return 1
|
||||
|
||||
def DefaultHandler(self, userData):
|
||||
pass
|
||||
|
||||
def DefaultHandlerExpand(self, userData):
|
||||
pass
|
||||
|
||||
|
||||
out = Outputter()
|
||||
parser = pyexpat.ParserCreate(namespace_separator='!')
|
||||
for name in ['StartElementHandler', 'EndElementHandler',
|
||||
'CharacterDataHandler', 'ProcessingInstructionHandler',
|
||||
'UnparsedEntityDeclHandler', 'NotationDeclHandler',
|
||||
'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler',
|
||||
'CommentHandler', 'StartCdataSectionHandler',
|
||||
'EndCdataSectionHandler',
|
||||
'DefaultHandler', 'DefaultHandlerExpand',
|
||||
#'NotStandaloneHandler',
|
||||
'ExternalEntityRefHandler'
|
||||
]:
|
||||
setattr(parser, name, getattr(out, name) )
|
||||
|
||||
data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
|
||||
<?xml-stylesheet href="stylesheet.css"?>
|
||||
<!-- comment data -->
|
||||
<!DOCTYPE quotations SYSTEM "quotations.dtd" [
|
||||
<!ELEMENT root ANY>
|
||||
<!NOTATION notation SYSTEM "notation.jpeg">
|
||||
<!ENTITY acirc "â">
|
||||
<!ENTITY external_entity SYSTEM "entity.file">
|
||||
<!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
|
||||
%unparsed_entity;
|
||||
]>
|
||||
|
||||
<root>
|
||||
<myns:subelement xmlns:myns="http://www.python.org/namespace">
|
||||
Contents of subelements
|
||||
</myns:subelement>
|
||||
<sub2><![CDATA[contents of CDATA section]]></sub2>
|
||||
&external_entity;
|
||||
</root>
|
||||
"""
|
||||
|
||||
try:
|
||||
parser.Parse(data, 1)
|
||||
except pyexpat.error:
|
||||
print '** Error', parser.ErrorCode, pyexpat.ErrorString( parser.ErrorCode)
|
||||
print '** Line', parser.ErrorLineNumber
|
||||
print '** Column', parser.ErrorColumnNumber
|
||||
print '** Byte', parser.ErrorByteIndex
|
||||
|
|
@ -11,6 +11,7 @@ def test(msg, results):
|
|||
fp.seek(0)
|
||||
m = rfc822.Message(fp)
|
||||
i = 0
|
||||
|
||||
for n, a in m.getaddrlist('to') + m.getaddrlist('cc'):
|
||||
if verbose:
|
||||
print 'name:', repr(n), 'addr:', repr(a)
|
||||
|
@ -28,6 +29,21 @@ def test(msg, results):
|
|||
print ' [no match]'
|
||||
print 'not found:', repr(n), repr(a)
|
||||
|
||||
out = m.getdate('date')
|
||||
if out:
|
||||
if verbose:
|
||||
print 'Date:', m.getheader('date')
|
||||
if out == (1999, 1, 13, 23, 57, 35, 0, 0, 0):
|
||||
if verbose:
|
||||
print ' [matched]'
|
||||
else:
|
||||
if verbose:
|
||||
print ' [no match]'
|
||||
print 'Date conversion failed:', out
|
||||
|
||||
# Note: all test cases must have the same date (in various formats),
|
||||
# or no date!
|
||||
|
||||
test('''Date: Wed, 13 Jan 1999 23:57:35 -0500
|
||||
From: Guido van Rossum <guido@CNRI.Reston.VA.US>
|
||||
To: "Guido van
|
||||
|
@ -40,6 +56,7 @@ test2
|
|||
test('''From: Barry <bwarsaw@python.org
|
||||
To: guido@python.org (Guido: the Barbarian)
|
||||
Subject: nonsense
|
||||
Date: Wednesday, January 13 1999 23:57:35 -0500
|
||||
|
||||
test''', [('Guido: the Barbarian', 'guido@python.org'),
|
||||
])
|
||||
|
@ -47,6 +64,7 @@ test''', [('Guido: the Barbarian', 'guido@python.org'),
|
|||
test('''From: Barry <bwarsaw@python.org
|
||||
To: guido@python.org (Guido: the Barbarian)
|
||||
Cc: "Guido: the Madman" <guido@python.org>
|
||||
Date: 13-Jan-1999 23:57:35 EST
|
||||
|
||||
test''', [('Guido: the Barbarian', 'guido@python.org'),
|
||||
('Guido: the Madman', 'guido@python.org')
|
||||
|
@ -54,6 +72,7 @@ test''', [('Guido: the Barbarian', 'guido@python.org'),
|
|||
|
||||
test('''To: "The monster with
|
||||
the very long name: Guido" <guido@python.org>
|
||||
Date: Wed, 13 Jan 1999 23:57:35 -0500
|
||||
|
||||
test''', [('The monster with\n the very long name: Guido',
|
||||
'guido@python.org')])
|
||||
|
@ -63,6 +82,7 @@ CC: Mike Fletcher <mfletch@vrtelecom.com>,
|
|||
"'string-sig@python.org'" <string-sig@python.org>
|
||||
Cc: fooz@bat.com, bart@toof.com
|
||||
Cc: goit@lip.com
|
||||
Date: Wed, 13 Jan 1999 23:57:35 -0500
|
||||
|
||||
test''', [('Amit J. Patel', 'amitp@Theory.Stanford.EDU'),
|
||||
('Mike Fletcher', 'mfletch@vrtelecom.com'),
|
||||
|
@ -75,8 +95,28 @@ test''', [('Amit J. Patel', 'amitp@Theory.Stanford.EDU'),
|
|||
# This one is just twisted. I don't know what the proper result should be,
|
||||
# but it shouldn't be to infloop, which is what used to happen!
|
||||
test('''To: <[smtp:dd47@mail.xxx.edu]_at_hmhq@hdq-mdm1-imgout.companay.com>
|
||||
Date: Wed, 13 Jan 1999 23:57:35 -0500
|
||||
|
||||
test''', [('', ''),
|
||||
('', 'dd47@mail.xxx.edu'),
|
||||
('', '_at_hmhq@hdq-mdm1-imgout.companay.com')
|
||||
])
|
||||
|
||||
# This exercises the old commas-in-a-full-name bug, which should be doing the
|
||||
# right thing in recent versions of the module.
|
||||
test('''To: "last, first" <userid@foo.net>
|
||||
|
||||
test''', [('last, first', 'userid@foo.net'),
|
||||
])
|
||||
|
||||
test('''To: (Comment stuff) "Quoted name"@somewhere.com
|
||||
|
||||
test''', [('Comment stuff', '"Quoted name"@somewhere.com'),
|
||||
])
|
||||
|
||||
test('''To: :
|
||||
Cc: goit@lip.com
|
||||
Date: Wed, 13 Jan 1999 23:57:35 -0500
|
||||
|
||||
test''', [('', 'goit@lip.com')])
|
||||
|
||||
|
|
|
@ -97,7 +97,7 @@ try:
|
|||
if not canfork or os.fork():
|
||||
# parent is server
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
s.bind(hostname, PORT)
|
||||
s.bind((hostname, PORT))
|
||||
s.listen(1)
|
||||
if verbose:
|
||||
print 'parent accepting'
|
||||
|
@ -133,7 +133,7 @@ try:
|
|||
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
if verbose:
|
||||
print 'child connecting'
|
||||
s.connect(hostname, PORT)
|
||||
s.connect((hostname, PORT))
|
||||
msg = 'socket test'
|
||||
s.send(msg)
|
||||
data = s.recv(1024)
|
||||
|
|
|
@ -130,7 +130,9 @@ if len([1,]) <> 1: raise TestFailed, 'len([1,])'
|
|||
if len([1,2,3,4,5,6]) <> 6: raise TestFailed, 'len([1,2,3,4,5,6])'
|
||||
if [1,2]+[3,4] <> [1,2,3,4]: raise TestFailed, 'list concatenation'
|
||||
if [1,2]*3 <> [1,2,1,2,1,2]: raise TestFailed, 'list repetition *3'
|
||||
if [1,2]*3L <> [1,2,1,2,1,2]: raise TestFailed, 'list repetition *3L'
|
||||
if 0*[1,2,3] <> []: raise TestFailed, 'list repetition 0*'
|
||||
if 0L*[1,2,3] <> []: raise TestFailed, 'list repetition 0L*'
|
||||
if min([1,2]) <> 1 or max([1,2]) <> 2: raise TestFailed, 'min/max list'
|
||||
if 0 in [0,1,2] and 1 in [0,1,2] and 2 in [0,1,2] and 3 not in [0,1,2]: pass
|
||||
else: raise TestFailed, 'in/not in list'
|
||||
|
@ -150,10 +152,17 @@ if a != [1, 1, 2, 3, 4, 5, 5]:
|
|||
|
||||
print '6.5.3a Additional list operations'
|
||||
a = [0,1,2,3,4]
|
||||
a[0L] = 1
|
||||
a[1L] = 2
|
||||
a[2L] = 3
|
||||
if a <> [1,2,3,3,4]: raise TestFailed, 'list item assignment [0L], [1L], [2L]'
|
||||
a[0] = 5
|
||||
a[1] = 6
|
||||
a[2] = 7
|
||||
if a <> [5,6,7,3,4]: raise TestFailed, 'list item assignment [0], [1], [2]'
|
||||
a[-2L] = 88
|
||||
a[-1L] = 99
|
||||
if a <> [5,6,7,88,99]: raise TestFailed, 'list item assignment [-2L], [-1L]'
|
||||
a[-2] = 8
|
||||
a[-1] = 9
|
||||
if a <> [5,6,7,8,9]: raise TestFailed, 'list item assignment [-2], [-1]'
|
||||
|
@ -161,12 +170,21 @@ a[:2] = [0,4]
|
|||
a[-3:] = []
|
||||
a[1:1] = [1,2,3]
|
||||
if a <> [0,1,2,3,4]: raise TestFailed, 'list slice assignment'
|
||||
a[ 1L : 4L] = [7,8,9]
|
||||
if a <> [0,7,8,9,4]: raise TestFailed, 'list slice assignment using long ints'
|
||||
del a[1:4]
|
||||
if a <> [0,4]: raise TestFailed, 'list slice deletion'
|
||||
del a[0]
|
||||
if a <> [4]: raise TestFailed, 'list item deletion [0]'
|
||||
del a[-1]
|
||||
if a <> []: raise TestFailed, 'list item deletion [-1]'
|
||||
a=range(0,5)
|
||||
del a[1L:4L]
|
||||
if a <> [0,4]: raise TestFailed, 'list slice deletion'
|
||||
del a[0L]
|
||||
if a <> [4]: raise TestFailed, 'list item deletion [0]'
|
||||
del a[-1L]
|
||||
if a <> []: raise TestFailed, 'list item deletion [-1]'
|
||||
a.append(0)
|
||||
a.append(1)
|
||||
a.append(2)
|
||||
|
@ -192,6 +210,13 @@ def myComparison(x,y):
|
|||
z = range(12)
|
||||
z.sort(myComparison)
|
||||
|
||||
# Test extreme cases with long ints
|
||||
a = [0,1,2,3,4]
|
||||
if a[ -pow(2,128L): 3 ] != [0,1,2]:
|
||||
raise TestFailed, "list slicing with too-small long integer"
|
||||
if a[ 3: pow(2,145L) ] != [3,4]:
|
||||
raise TestFailed, "list slicing with too-large long integer"
|
||||
|
||||
print '6.6 Mappings == Dictionaries'
|
||||
d = {}
|
||||
if d.keys() <> []: raise TestFailed, '{}.keys()'
|
||||
|
|
|
@ -0,0 +1,401 @@
|
|||
""" Test script for the Unicode implementation.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""
|
||||
from test_support import verbose
|
||||
import sys
|
||||
|
||||
def test(method, input, output, *args):
|
||||
if verbose:
|
||||
print '%s.%s%s =? %s... ' % (repr(input), method, args, output),
|
||||
try:
|
||||
f = getattr(input, method)
|
||||
value = apply(f, args)
|
||||
except:
|
||||
value = sys.exc_type
|
||||
exc = sys.exc_info()[:2]
|
||||
else:
|
||||
exc = None
|
||||
if value != output:
|
||||
if verbose:
|
||||
print 'no'
|
||||
print '*',f, `input`, `output`, `value`
|
||||
if exc:
|
||||
print ' value == %s: %s' % (exc)
|
||||
else:
|
||||
if verbose:
|
||||
print 'yes'
|
||||
|
||||
test('capitalize', u' hello ', u' hello ')
|
||||
test('capitalize', u'hello ', u'Hello ')
|
||||
|
||||
test('title', u' hello ', u' Hello ')
|
||||
test('title', u'hello ', u'Hello ')
|
||||
test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
|
||||
test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
|
||||
test('title', u"getInt", u'Getint')
|
||||
|
||||
test('find', u'abcdefghiabc', 0, u'abc')
|
||||
test('find', u'abcdefghiabc', 9, u'abc', 1)
|
||||
test('find', u'abcdefghiabc', -1, u'def', 4)
|
||||
|
||||
test('rfind', u'abcdefghiabc', 9, u'abc')
|
||||
|
||||
test('lower', u'HeLLo', u'hello')
|
||||
test('lower', u'hello', u'hello')
|
||||
|
||||
test('upper', u'HeLLo', u'HELLO')
|
||||
test('upper', u'HELLO', u'HELLO')
|
||||
|
||||
if 0:
|
||||
transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
|
||||
|
||||
test('maketrans', u'abc', transtable, u'xyz')
|
||||
test('maketrans', u'abc', ValueError, u'xyzq')
|
||||
|
||||
test('split', u'this is the split function',
|
||||
[u'this', u'is', u'the', u'split', u'function'])
|
||||
test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
|
||||
test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
|
||||
test('split', u'a b c d', [u'a', u'b c d'], None, 1)
|
||||
test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
|
||||
test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
|
||||
test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
|
||||
test('split', u'a b c d', [u'a b c d'], None, 0)
|
||||
test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
|
||||
test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
|
||||
|
||||
# join now works with any sequence type
|
||||
class Sequence:
|
||||
def __init__(self): self.seq = 'wxyz'
|
||||
def __len__(self): return len(self.seq)
|
||||
def __getitem__(self, i): return self.seq[i]
|
||||
|
||||
test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
|
||||
test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
|
||||
test('join', u' ', u'w x y z', Sequence())
|
||||
test('join', u' ', TypeError, 7)
|
||||
|
||||
class BadSeq(Sequence):
|
||||
def __init__(self): self.seq = [7, u'hello', 123L]
|
||||
|
||||
test('join', u' ', TypeError, BadSeq())
|
||||
|
||||
result = u''
|
||||
for i in range(10):
|
||||
if i > 0:
|
||||
result = result + u':'
|
||||
result = result + u'x'*10
|
||||
test('join', u':', result, [u'x' * 10] * 10)
|
||||
test('join', u':', result, (u'x' * 10,) * 10)
|
||||
|
||||
test('strip', u' hello ', u'hello')
|
||||
test('lstrip', u' hello ', u'hello ')
|
||||
test('rstrip', u' hello ', u' hello')
|
||||
test('strip', u'hello', u'hello')
|
||||
|
||||
test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
|
||||
|
||||
if 0:
|
||||
test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
|
||||
|
||||
table = string.maketrans('a', u'A')
|
||||
test('translate', u'abc', u'Abc', table)
|
||||
test('translate', u'xyz', u'xyz', table)
|
||||
|
||||
test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
|
||||
test('replace', u'one!two!three!', u'onetwothree', '!', '')
|
||||
test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
|
||||
test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
|
||||
test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
|
||||
test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
|
||||
test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
|
||||
test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
|
||||
test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
|
||||
|
||||
test('startswith', u'hello', 1, u'he')
|
||||
test('startswith', u'hello', 1, u'hello')
|
||||
test('startswith', u'hello', 0, u'hello world')
|
||||
test('startswith', u'hello', 1, u'')
|
||||
test('startswith', u'hello', 0, u'ello')
|
||||
test('startswith', u'hello', 1, u'ello', 1)
|
||||
test('startswith', u'hello', 1, u'o', 4)
|
||||
test('startswith', u'hello', 0, u'o', 5)
|
||||
test('startswith', u'hello', 1, u'', 5)
|
||||
test('startswith', u'hello', 0, u'lo', 6)
|
||||
test('startswith', u'helloworld', 1, u'lowo', 3)
|
||||
test('startswith', u'helloworld', 1, u'lowo', 3, 7)
|
||||
test('startswith', u'helloworld', 0, u'lowo', 3, 6)
|
||||
|
||||
test('endswith', u'hello', 1, u'lo')
|
||||
test('endswith', u'hello', 0, u'he')
|
||||
test('endswith', u'hello', 1, u'')
|
||||
test('endswith', u'hello', 0, u'hello world')
|
||||
test('endswith', u'helloworld', 0, u'worl')
|
||||
test('endswith', u'helloworld', 1, u'worl', 3, 9)
|
||||
test('endswith', u'helloworld', 1, u'world', 3, 12)
|
||||
test('endswith', u'helloworld', 1, u'lowo', 1, 7)
|
||||
test('endswith', u'helloworld', 1, u'lowo', 2, 7)
|
||||
test('endswith', u'helloworld', 1, u'lowo', 3, 7)
|
||||
test('endswith', u'helloworld', 0, u'lowo', 4, 7)
|
||||
test('endswith', u'helloworld', 0, u'lowo', 3, 8)
|
||||
test('endswith', u'ab', 0, u'ab', 0, 1)
|
||||
test('endswith', u'ab', 0, u'ab', 0, 0)
|
||||
|
||||
test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
|
||||
test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
|
||||
test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
|
||||
test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
|
||||
|
||||
if 0:
|
||||
test('capwords', u'abc def ghi', u'Abc Def Ghi')
|
||||
test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
|
||||
test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
|
||||
|
||||
# Comparisons:
|
||||
print 'Testing Unicode comparisons...',
|
||||
assert u'abc' == 'abc'
|
||||
assert 'abc' == u'abc'
|
||||
assert u'abc' == u'abc'
|
||||
assert u'abcd' > 'abc'
|
||||
assert 'abcd' > u'abc'
|
||||
assert u'abcd' > u'abc'
|
||||
assert u'abc' < 'abcd'
|
||||
assert 'abc' < u'abcd'
|
||||
assert u'abc' < u'abcd'
|
||||
print 'done.'
|
||||
|
||||
test('ljust', u'abc', u'abc ', 10)
|
||||
test('rjust', u'abc', u' abc', 10)
|
||||
test('center', u'abc', u' abc ', 10)
|
||||
test('ljust', u'abc', u'abc ', 6)
|
||||
test('rjust', u'abc', u' abc', 6)
|
||||
test('center', u'abc', u' abc ', 6)
|
||||
test('ljust', u'abc', u'abc', 2)
|
||||
test('rjust', u'abc', u'abc', 2)
|
||||
test('center', u'abc', u'abc', 2)
|
||||
|
||||
test('islower', u'a', 1)
|
||||
test('islower', u'A', 0)
|
||||
test('islower', u'\n', 0)
|
||||
test('islower', u'\u1FFc', 0)
|
||||
test('islower', u'abc', 1)
|
||||
test('islower', u'aBc', 0)
|
||||
test('islower', u'abc\n', 1)
|
||||
|
||||
test('isupper', u'a', 0)
|
||||
test('isupper', u'A', 1)
|
||||
test('isupper', u'\n', 0)
|
||||
test('isupper', u'\u1FFc', 0)
|
||||
test('isupper', u'ABC', 1)
|
||||
test('isupper', u'AbC', 0)
|
||||
test('isupper', u'ABC\n', 1)
|
||||
|
||||
test('istitle', u'a', 0)
|
||||
test('istitle', u'A', 1)
|
||||
test('istitle', u'\n', 0)
|
||||
test('istitle', u'\u1FFc', 1)
|
||||
test('istitle', u'A Titlecased Line', 1)
|
||||
test('istitle', u'A\nTitlecased Line', 1)
|
||||
test('istitle', u'A Titlecased, Line', 1)
|
||||
test('istitle', u'Greek \u1FFcitlecases ...', 1)
|
||||
test('istitle', u'Not a capitalized String', 0)
|
||||
test('istitle', u'Not\ta Titlecase String', 0)
|
||||
test('istitle', u'Not--a Titlecase String', 0)
|
||||
|
||||
test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
|
||||
test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
|
||||
test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
|
||||
test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
|
||||
test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
|
||||
test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
|
||||
test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1)
|
||||
|
||||
test('translate', u"abababc", u'bbbc', {ord('a'):None})
|
||||
test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
|
||||
test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
|
||||
|
||||
# Contains:
|
||||
print 'Testing Unicode contains method...',
|
||||
assert ('a' in u'abdb') == 1
|
||||
assert ('a' in u'bdab') == 1
|
||||
assert ('a' in u'bdaba') == 1
|
||||
assert ('a' in u'bdba') == 1
|
||||
assert ('a' in u'bdba') == 1
|
||||
assert (u'a' in u'bdba') == 1
|
||||
assert (u'a' in u'bdb') == 0
|
||||
assert (u'a' in 'bdb') == 0
|
||||
assert (u'a' in 'bdba') == 1
|
||||
assert (u'a' in ('a',1,None)) == 1
|
||||
assert (u'a' in (1,None,'a')) == 1
|
||||
assert (u'a' in (1,None,u'a')) == 1
|
||||
assert ('a' in ('a',1,None)) == 1
|
||||
assert ('a' in (1,None,'a')) == 1
|
||||
assert ('a' in (1,None,u'a')) == 1
|
||||
assert ('a' in ('x',1,u'y')) == 0
|
||||
assert ('a' in ('x',1,None)) == 0
|
||||
print 'done.'
|
||||
|
||||
# Formatting:
|
||||
print 'Testing Unicode formatting strings...',
|
||||
assert u"%s, %s" % (u"abc", "abc") == u'abc, abc'
|
||||
assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00'
|
||||
assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00'
|
||||
assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50'
|
||||
assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57'
|
||||
assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57'
|
||||
assert u"%c" % (u"abc",) == u'a'
|
||||
assert u"%c" % ("abc",) == u'a'
|
||||
assert u"%c" % (34,) == u'"'
|
||||
assert u"%c" % (36,) == u'$'
|
||||
assert u"%r, %r" % (u"abc", "abc") == u"u'abc', 'abc'"
|
||||
assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def'
|
||||
assert u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"} == u'abc, def'
|
||||
# formatting jobs delegated from the string implementation:
|
||||
assert '...%(foo)s...' % {'foo':u"abc"} == u'...abc...'
|
||||
assert '...%(foo)s...' % {'foo':"abc"} == '...abc...'
|
||||
assert '...%(foo)s...' % {u'foo':"abc"} == '...abc...'
|
||||
assert '...%(foo)s...' % {u'foo':u"abc"} == u'...abc...'
|
||||
assert '...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...'
|
||||
assert '...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...'
|
||||
assert '...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...'
|
||||
assert '...%s...' % u"abc" == u'...abc...'
|
||||
print 'done.'
|
||||
|
||||
# Test builtin codecs
|
||||
print 'Testing builtin codecs...',
|
||||
|
||||
assert unicode('hello','ascii') == u'hello'
|
||||
assert unicode('hello','utf-8') == u'hello'
|
||||
assert unicode('hello','utf8') == u'hello'
|
||||
assert unicode('hello','latin-1') == u'hello'
|
||||
|
||||
try:
|
||||
u'Andr\202 x'.encode('ascii')
|
||||
u'Andr\202 x'.encode('ascii','strict')
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
raise AssertionError, "u'Andr\202'.encode('ascii') failed to raise an exception"
|
||||
assert u'Andr\202 x'.encode('ascii','ignore') == "Andr x"
|
||||
assert u'Andr\202 x'.encode('ascii','replace') == "Andr? x"
|
||||
|
||||
try:
|
||||
unicode('Andr\202 x','ascii')
|
||||
unicode('Andr\202 x','ascii','strict')
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
raise AssertionError, "unicode('Andr\202') failed to raise an exception"
|
||||
assert unicode('Andr\202 x','ascii','ignore') == u"Andr x"
|
||||
assert unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x'
|
||||
|
||||
assert u'hello'.encode('ascii') == 'hello'
|
||||
assert u'hello'.encode('utf-8') == 'hello'
|
||||
assert u'hello'.encode('utf8') == 'hello'
|
||||
assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000'
|
||||
assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o'
|
||||
assert u'hello'.encode('latin-1') == 'hello'
|
||||
|
||||
u = u''.join(map(unichr, range(1024)))
|
||||
for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
|
||||
'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
|
||||
assert unicode(u.encode(encoding),encoding) == u
|
||||
|
||||
u = u''.join(map(unichr, range(256)))
|
||||
for encoding in (
|
||||
'latin-1',
|
||||
):
|
||||
try:
|
||||
assert unicode(u.encode(encoding),encoding) == u
|
||||
except AssertionError:
|
||||
print '*** codec "%s" failed round-trip' % encoding
|
||||
except ValueError,why:
|
||||
print '*** codec for "%s" failed: %s' % (encoding, why)
|
||||
|
||||
u = u''.join(map(unichr, range(128)))
|
||||
for encoding in (
|
||||
'ascii',
|
||||
):
|
||||
try:
|
||||
assert unicode(u.encode(encoding),encoding) == u
|
||||
except AssertionError:
|
||||
print '*** codec "%s" failed round-trip' % encoding
|
||||
except ValueError,why:
|
||||
print '*** codec for "%s" failed: %s' % (encoding, why)
|
||||
|
||||
print 'done.'
|
||||
|
||||
print 'Testing standard mapping codecs...',
|
||||
|
||||
print '0-127...',
|
||||
s = ''.join(map(chr, range(128)))
|
||||
for encoding in (
|
||||
'cp037', 'cp1026',
|
||||
'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
|
||||
'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
|
||||
'cp863', 'cp865', 'cp866',
|
||||
'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
|
||||
'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
|
||||
'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
|
||||
'mac_cyrillic', 'mac_latin2',
|
||||
|
||||
'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
|
||||
'cp1256', 'cp1257', 'cp1258',
|
||||
'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
|
||||
|
||||
'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
|
||||
'cp1006', 'cp875', 'iso8859_8',
|
||||
|
||||
### These have undefined mappings:
|
||||
#'cp424',
|
||||
|
||||
):
|
||||
try:
|
||||
assert unicode(s,encoding).encode(encoding) == s
|
||||
except AssertionError:
|
||||
print '*** codec "%s" failed round-trip' % encoding
|
||||
except ValueError,why:
|
||||
print '*** codec for "%s" failed: %s' % (encoding, why)
|
||||
|
||||
print '128-255...',
|
||||
s = ''.join(map(chr, range(128,256)))
|
||||
for encoding in (
|
||||
'cp037', 'cp1026',
|
||||
'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
|
||||
'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
|
||||
'cp863', 'cp865', 'cp866',
|
||||
'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
|
||||
'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
|
||||
'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
|
||||
'mac_cyrillic', 'mac_latin2',
|
||||
|
||||
### These have undefined mappings:
|
||||
#'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
|
||||
#'cp1256', 'cp1257', 'cp1258',
|
||||
#'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
|
||||
#'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
|
||||
|
||||
### These fail the round-trip:
|
||||
#'cp1006', 'cp875', 'iso8859_8',
|
||||
|
||||
):
|
||||
try:
|
||||
assert unicode(s,encoding).encode(encoding) == s
|
||||
except AssertionError:
|
||||
print '*** codec "%s" failed round-trip' % encoding
|
||||
except ValueError,why:
|
||||
print '*** codec for "%s" failed: %s' % (encoding, why)
|
||||
|
||||
print 'done.'
|
||||
|
||||
print 'Testing Unicode string concatenation...',
|
||||
assert (u"abc" u"def") == u"abcdef"
|
||||
assert ("abc" u"def") == u"abcdef"
|
||||
assert (u"abc" "def") == u"abcdef"
|
||||
assert (u"abc" u"def" "ghi") == u"abcdefghi"
|
||||
assert ("abc" "def" u"ghi") == u"abcdefghi"
|
||||
print 'done.'
|
|
@ -0,0 +1,147 @@
|
|||
# Test the windows specific win32reg module.
|
||||
# Only win32reg functions not hit here: FlushKey, LoadKey and SaveKey
|
||||
|
||||
from winreg import *
|
||||
import os, sys
|
||||
|
||||
test_key_name = "SOFTWARE\\Python Registry Test Key - Delete Me"
|
||||
|
||||
test_data = [
|
||||
("Int Value", 45, REG_DWORD),
|
||||
("String Val", "A string value", REG_SZ,),
|
||||
(u"Unicode Val", u"A Unicode value", REG_SZ,),
|
||||
("StringExpand", "The path is %path%", REG_EXPAND_SZ),
|
||||
("UnicodeExpand", u"The path is %path%", REG_EXPAND_SZ),
|
||||
("Multi-string", ["Lots", "of", "string", "values"], REG_MULTI_SZ),
|
||||
("Multi-unicode", [u"Lots", u"of", u"unicode", u"values"], REG_MULTI_SZ),
|
||||
("Multi-mixed", [u"Unicode", u"and", "string", "values"],REG_MULTI_SZ),
|
||||
("Raw Data", ("binary"+chr(0)+"data"), REG_BINARY),
|
||||
]
|
||||
|
||||
def WriteTestData(root_key):
|
||||
# Set the default value for this key.
|
||||
SetValue(root_key, test_key_name, REG_SZ, "Default value")
|
||||
key = CreateKey(root_key, test_key_name)
|
||||
# Create a sub-key
|
||||
sub_key = CreateKey(key, "sub_key")
|
||||
# Give the sub-key some named values
|
||||
|
||||
for value_name, value_data, value_type in test_data:
|
||||
SetValueEx(sub_key, value_name, 0, value_type, value_data)
|
||||
|
||||
# Check we wrote as many items as we thought.
|
||||
nkeys, nvalues, since_mod = QueryInfoKey(key)
|
||||
assert nkeys==1, "Not the correct number of sub keys"
|
||||
assert nvalues==1, "Not the correct number of values"
|
||||
nkeys, nvalues, since_mod = QueryInfoKey(sub_key)
|
||||
assert nkeys==0, "Not the correct number of sub keys"
|
||||
assert nvalues==len(test_data), "Not the correct number of values"
|
||||
# Close this key this way...
|
||||
# (but before we do, copy the key as an integer - this allows
|
||||
# us to test that the key really gets closed).
|
||||
int_sub_key = int(sub_key)
|
||||
CloseKey(sub_key)
|
||||
try:
|
||||
QueryInfoKey(int_sub_key)
|
||||
raise RuntimeError, "It appears the CloseKey() function does not close the actual key!"
|
||||
except EnvironmentError:
|
||||
pass
|
||||
# ... and close that key that way :-)
|
||||
int_key = int(key)
|
||||
key.Close()
|
||||
try:
|
||||
QueryInfoKey(int_key)
|
||||
raise RuntimeError, "It appears the key.Close() function does not close the actual key!"
|
||||
except EnvironmentError:
|
||||
pass
|
||||
|
||||
def ReadTestData(root_key):
|
||||
# Check we can get default value for this key.
|
||||
val = QueryValue(root_key, test_key_name)
|
||||
assert val=="Default value", "Registry didnt give back the correct value"
|
||||
|
||||
key = OpenKey(root_key, test_key_name)
|
||||
# Read the sub-keys
|
||||
sub_key = OpenKey(key, "sub_key")
|
||||
# Check I can enumerate over the values.
|
||||
index = 0
|
||||
while 1:
|
||||
try:
|
||||
data = EnumValue(sub_key, index)
|
||||
except EnvironmentError:
|
||||
break
|
||||
assert data in test_data, "didnt read back the correct test data."
|
||||
index = index + 1
|
||||
assert index==len(test_data), "Didnt read the correct number of items"
|
||||
# Check I can directly access each item
|
||||
for value_name, value_data, value_type in test_data:
|
||||
read_val, read_typ = QueryValueEx(sub_key, value_name)
|
||||
assert read_val==value_data and read_typ == value_type, \
|
||||
"Could not directly read the value"
|
||||
sub_key.Close()
|
||||
# Enumerate our main key.
|
||||
read_val = EnumKey(key, 0)
|
||||
assert read_val == "sub_key", "Read subkey value wrong"
|
||||
try:
|
||||
EnumKey(key, 1)
|
||||
assert 0, "Was able to get a second key when I only have one!"
|
||||
except EnvironmentError:
|
||||
pass
|
||||
|
||||
key.Close()
|
||||
|
||||
def DeleteTestData(root_key):
|
||||
key = OpenKey(root_key, test_key_name, 0, KEY_ALL_ACCESS)
|
||||
sub_key = OpenKey(key, "sub_key", 0, KEY_ALL_ACCESS)
|
||||
# It is not necessary to delete the values before deleting
|
||||
# the key (although subkeys must not exist). We delete them
|
||||
# manually just to prove we can :-)
|
||||
for value_name, value_data, value_type in test_data:
|
||||
DeleteValue(sub_key, value_name)
|
||||
|
||||
nkeys, nvalues, since_mod = QueryInfoKey(sub_key)
|
||||
assert nkeys==0 and nvalues==0, "subkey not empty before delete"
|
||||
sub_key.Close()
|
||||
DeleteKey(key, "sub_key")
|
||||
|
||||
try:
|
||||
# Shouldnt be able to delete it twice!
|
||||
DeleteKey(key, "sub_key")
|
||||
assert 0, "Deleting the key twice succeeded"
|
||||
except EnvironmentError:
|
||||
pass
|
||||
key.Close()
|
||||
DeleteKey(root_key, test_key_name)
|
||||
# Opening should now fail!
|
||||
try:
|
||||
key = OpenKey(root_key, test_key_name)
|
||||
assert 0, "Could open the non-existant key"
|
||||
except WindowsError: # Use this error name this time
|
||||
pass
|
||||
|
||||
def TestAll(root_key):
|
||||
WriteTestData(root_key)
|
||||
ReadTestData(root_key)
|
||||
DeleteTestData(root_key)
|
||||
|
||||
# Test on my local machine.
|
||||
TestAll(HKEY_CURRENT_USER)
|
||||
print "Local registry tests worked"
|
||||
try:
|
||||
remote_name = sys.argv[sys.argv.index("--remote")+1]
|
||||
except (IndexError, ValueError):
|
||||
remote_name = None
|
||||
|
||||
if remote_name is not None:
|
||||
try:
|
||||
remote_key = ConnectRegistry(remote_name, HKEY_CURRENT_USER)
|
||||
except EnvironmentError, exc:
|
||||
print "Could not connect to the remote machine -", exc.strerror
|
||||
remote_key = None
|
||||
if remote_key is not None:
|
||||
TestAll(remote_key)
|
||||
print "Remote registry tests worked"
|
||||
else:
|
||||
print "Remote registry calls can be tested using",
|
||||
print "'test_winreg.py --remote \\\\machine_name'"
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
import zipfile, os
|
||||
|
||||
srcname = "junk9630.tmp"
|
||||
zipname = "junk9708.tmp"
|
||||
|
||||
try:
|
||||
fp = open(srcname, "w") # Make a source file with some lines
|
||||
for i in range(0, 1000):
|
||||
fp.write("Test of zipfile line %d.\n" % i)
|
||||
fp.close()
|
||||
|
||||
zip = zipfile.ZipFile(zipname, "w") # Create the ZIP archive
|
||||
zip.write(srcname, srcname)
|
||||
zip.write(srcname, "another.name")
|
||||
zip.close()
|
||||
|
||||
zip = zipfile.ZipFile(zipname, "r") # Read the ZIP archive
|
||||
zip.read("another.name")
|
||||
zip.read(srcname)
|
||||
zip.close()
|
||||
finally:
|
||||
if os.path.isfile(srcname): # Remove temporary files
|
||||
os.unlink(srcname)
|
||||
if os.path.isfile(zipname):
|
||||
os.unlink(zipname)
|
||||
|
|
@ -80,14 +80,14 @@ else:
|
|||
# in order to provide more variations.
|
||||
for sync in [zlib.Z_NO_FLUSH, zlib.Z_SYNC_FLUSH, zlib.Z_FULL_FLUSH]:
|
||||
for level in range(10):
|
||||
obj = zlib.compressobj( level )
|
||||
d = obj.compress( buf[:3000] )
|
||||
d = d + obj.flush( sync )
|
||||
d = d + obj.compress( buf[3000:] )
|
||||
d = d + obj.flush()
|
||||
if zlib.decompress(d) != buf:
|
||||
print "Decompress failed: flush mode=%i, level=%i" % (sync,level)
|
||||
del obj
|
||||
obj = zlib.compressobj( level )
|
||||
d = obj.compress( buf[:3000] )
|
||||
d = d + obj.flush( sync )
|
||||
d = d + obj.compress( buf[3000:] )
|
||||
d = d + obj.flush()
|
||||
if zlib.decompress(d) != buf:
|
||||
print "Decompress failed: flush mode=%i, level=%i" % (sync,level)
|
||||
del obj
|
||||
|
||||
def ignore():
|
||||
"""An empty function with a big string.
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
# threading.py:
|
||||
# Proposed new threading module, emulating a subset of Java's threading model
|
||||
"""Proposed new threading module, emulating a subset of Java's threading model."""
|
||||
|
||||
import sys
|
||||
import time
|
||||
|
@ -238,7 +237,7 @@ def Semaphore(*args, **kwargs):
|
|||
|
||||
class _Semaphore(_Verbose):
|
||||
|
||||
# After Tim Peters' semaphore class, but bnot quite the same (no maximum)
|
||||
# After Tim Peters' semaphore class, but not quite the same (no maximum)
|
||||
|
||||
def __init__(self, value=1, verbose=None):
|
||||
assert value >= 0, "Semaphore initial value must be >= 0"
|
||||
|
@ -506,7 +505,7 @@ class _DummyThread(Thread):
|
|||
|
||||
def __init__(self):
|
||||
Thread.__init__(self, name=_newname("Dummy-%d"))
|
||||
self.__Thread_started = 1
|
||||
self._Thread__started = 1
|
||||
_active_limbo_lock.acquire()
|
||||
_active[_get_ident()] = self
|
||||
_active_limbo_lock.release()
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
import thread
|
||||
# Start empty thread to initialise thread mechanics (and global lock!)
|
||||
# This thread will finish immediately thus won't make much influence on
|
||||
# test results by itself, only by that fact that it initialises global lock
|
||||
thread.start_new_thread(lambda : 1, ())
|
||||
|
||||
import test.pystone
|
||||
test.pystone.main()
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
# Format and print Python stack traces
|
||||
"""Extract, format and print information about Python stack traces."""
|
||||
|
||||
import linecache
|
||||
import string
|
||||
|
@ -10,6 +10,8 @@ def _print(file, str='', terminator='\n'):
|
|||
|
||||
|
||||
def print_list(extracted_list, file=None):
|
||||
"""Print the list of tuples as returned by extract_tb() or
|
||||
extract_stack() as a formatted stack trace to the given file."""
|
||||
if not file:
|
||||
file = sys.stderr
|
||||
for filename, lineno, name, line in extracted_list:
|
||||
|
@ -19,6 +21,12 @@ def print_list(extracted_list, file=None):
|
|||
_print(file, ' %s' % string.strip(line))
|
||||
|
||||
def format_list(extracted_list):
|
||||
"""Given a list of tuples as returned by extract_tb() or
|
||||
extract_stack(), return a list of strings ready for printing.
|
||||
Each string in the resulting list corresponds to the item with
|
||||
the same index in the argument list. Each string ends in a
|
||||
newline; the strings may contain internal newlines as well, for
|
||||
those items whose source text line is not None."""
|
||||
list = []
|
||||
for filename, lineno, name, line in extracted_list:
|
||||
item = ' File "%s", line %d, in %s\n' % (filename,lineno,name)
|
||||
|
@ -29,6 +37,10 @@ def format_list(extracted_list):
|
|||
|
||||
|
||||
def print_tb(tb, limit=None, file=None):
|
||||
"""Print up to 'limit' stack trace entries from the traceback 'tb'.
|
||||
If 'limit' is omitted or None, all entries are printed. If 'file' is
|
||||
omitted or None, the output goes to sys.stderr; otherwise 'file'
|
||||
should be an open file or file-like object with a write() method."""
|
||||
if not file:
|
||||
file = sys.stderr
|
||||
if limit is None:
|
||||
|
@ -49,9 +61,18 @@ def print_tb(tb, limit=None, file=None):
|
|||
n = n+1
|
||||
|
||||
def format_tb(tb, limit = None):
|
||||
"""A shorthand for 'format_list(extract_stack(f, limit))."""
|
||||
return format_list(extract_tb(tb, limit))
|
||||
|
||||
def extract_tb(tb, limit = None):
|
||||
"""Return a list of up to 'limit' pre-processed stack trace entries
|
||||
extracted from the traceback object 'traceback'. This is useful for
|
||||
alternate formatting of stack traces. If 'limit' is omitted or None,
|
||||
all entries are extracted. A pre-processed stack trace entry is a
|
||||
quadruple (filename, line number, function name, text) representing
|
||||
the information that is usually printed for a stack trace. The text
|
||||
is a string with leading and trailing whitespace stripped; if the
|
||||
source is not available it is None."""
|
||||
if limit is None:
|
||||
if hasattr(sys, 'tracebacklimit'):
|
||||
limit = sys.tracebacklimit
|
||||
|
@ -73,10 +94,18 @@ def extract_tb(tb, limit = None):
|
|||
|
||||
|
||||
def print_exception(etype, value, tb, limit=None, file=None):
|
||||
"""Print exception information and up to 'limit' stack trace entries
|
||||
from the traceback 'tb' to 'file'. This differs from print_tb() in
|
||||
the following ways: (1) if traceback is not None, it prints a header
|
||||
"Traceback (most recent call last):"; (2) it prints the exception type and
|
||||
value after the stack trace; (3) if type is SyntaxError and value has
|
||||
the appropriate format, it prints the line where the syntax error
|
||||
occurred with a caret on the next line indicating the approximate
|
||||
position of the error."""
|
||||
if not file:
|
||||
file = sys.stderr
|
||||
if tb:
|
||||
_print(file, 'Traceback (innermost last):')
|
||||
_print(file, 'Traceback (most recent call last):')
|
||||
print_tb(tb, limit, file)
|
||||
lines = format_exception_only(etype, value)
|
||||
for line in lines[:-1]:
|
||||
|
@ -84,8 +113,14 @@ def print_exception(etype, value, tb, limit=None, file=None):
|
|||
_print(file, lines[-1], '')
|
||||
|
||||
def format_exception(etype, value, tb, limit = None):
|
||||
"""Format a stack trace and the exception information. The arguments
|
||||
have the same meaning as the corresponding arguments to
|
||||
print_exception(). The return value is a list of strings, each
|
||||
ending in a newline and some containing internal newlines. When
|
||||
these lines are contatenated and printed, exactly the same text is
|
||||
printed as does print_exception()."""
|
||||
if tb:
|
||||
list = ['Traceback (innermost last):\n']
|
||||
list = ['Traceback (most recent call last):\n']
|
||||
list = list + format_tb(tb, limit)
|
||||
else:
|
||||
list = []
|
||||
|
@ -93,6 +128,14 @@ def format_exception(etype, value, tb, limit = None):
|
|||
return list
|
||||
|
||||
def format_exception_only(etype, value):
|
||||
"""Format the exception part of a traceback. The arguments are the
|
||||
exception type and value such as given by sys.last_type and
|
||||
sys.last_value. The return value is a list of strings, each ending
|
||||
in a newline. Normally, the list contains a single string;
|
||||
however, for SyntaxError exceptions, it contains several lines that
|
||||
(when printed) display detailed information about where the syntax
|
||||
error occurred. The message indicating which exception occurred is
|
||||
the always last string in the list."""
|
||||
list = []
|
||||
if type(etype) == types.ClassType:
|
||||
stype = etype.__name__
|
||||
|
@ -128,6 +171,10 @@ def format_exception_only(etype, value):
|
|||
|
||||
|
||||
def print_exc(limit=None, file=None):
|
||||
"""This is a shorthand for 'print_exception(sys.exc_type,
|
||||
sys.exc_value, sys.exc_traceback, limit, file)'.
|
||||
(In fact, it uses sys.exc_info() to retrieve the same information
|
||||
in a thread-safe way.)"""
|
||||
if not file:
|
||||
file = sys.stderr
|
||||
try:
|
||||
|
@ -137,6 +184,8 @@ def print_exc(limit=None, file=None):
|
|||
etype = value = tb = None
|
||||
|
||||
def print_last(limit=None, file=None):
|
||||
"""This is a shorthand for 'print_exception(sys.last_type,
|
||||
sys.last_value, sys.last_traceback, limit, file)'."""
|
||||
if not file:
|
||||
file = sys.stderr
|
||||
print_exception(sys.last_type, sys.last_value, sys.last_traceback,
|
||||
|
@ -144,6 +193,10 @@ def print_last(limit=None, file=None):
|
|||
|
||||
|
||||
def print_stack(f=None, limit=None, file=None):
|
||||
"""This function prints a stack trace from its invocation point.
|
||||
The optional 'f' argument can be used to specify an alternate stack
|
||||
frame at which to start. The optional 'limit' and 'file' arguments
|
||||
have the same meaning as for print_exception()."""
|
||||
if f is None:
|
||||
try:
|
||||
raise ZeroDivisionError
|
||||
|
@ -152,6 +205,7 @@ def print_stack(f=None, limit=None, file=None):
|
|||
print_list(extract_stack(f, limit), file)
|
||||
|
||||
def format_stack(f=None, limit=None):
|
||||
"""A shorthand for 'format_list(extract_stack(f, limit))'."""
|
||||
if f is None:
|
||||
try:
|
||||
raise ZeroDivisionError
|
||||
|
@ -160,6 +214,12 @@ def format_stack(f=None, limit=None):
|
|||
return format_list(extract_stack(f, limit))
|
||||
|
||||
def extract_stack(f=None, limit = None):
|
||||
"""Extract the raw traceback from the current stack frame. The
|
||||
return value has the same format as for extract_tb(). The optional
|
||||
'f' and 'limit' arguments have the same meaning as for print_stack().
|
||||
Each item in the list is a quadruple (filename, line number,
|
||||
function name, text), and the entries are in order from oldest
|
||||
to newest stack frame."""
|
||||
if f is None:
|
||||
try:
|
||||
raise ZeroDivisionError
|
||||
|
@ -184,13 +244,14 @@ def extract_stack(f=None, limit = None):
|
|||
list.reverse()
|
||||
return list
|
||||
|
||||
# Calculate the correct line number of the traceback given in tb (even
|
||||
# with -O on).
|
||||
# Coded by Marc-Andre Lemburg from the example of PyCode_Addr2Line()
|
||||
# in compile.c.
|
||||
# Revised version by Jim Hugunin to work with JPython too.
|
||||
|
||||
def tb_lineno(tb):
|
||||
"""Calculate the correct line number of the traceback given in tb
|
||||
(even with -O on)."""
|
||||
|
||||
# Coded by Marc-Andre Lemburg from the example of PyCode_Addr2Line()
|
||||
# in compile.c.
|
||||
# Revised version by Jim Hugunin to work with JPython too.
|
||||
|
||||
c = tb.tb_frame.f_code
|
||||
if not hasattr(c, 'co_lnotab'):
|
||||
return tb.tb_lineno
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# A more or less complete user-defined wrapper around dictionary objects
|
||||
"""A more or less complete user-defined wrapper around dictionary objects."""
|
||||
|
||||
class UserDict:
|
||||
def __init__(self, dict=None):
|
||||
|
|
|
@ -1,13 +1,16 @@
|
|||
# A more or less complete user-defined wrapper around list objects
|
||||
"""A more or less complete user-defined wrapper around list objects."""
|
||||
|
||||
class UserList:
|
||||
def __init__(self, list=None):
|
||||
def __init__(self, initlist=None):
|
||||
self.data = []
|
||||
if list is not None:
|
||||
if type(list) == type(self.data):
|
||||
self.data[:] = list
|
||||
if initlist is not None:
|
||||
# XXX should this accept an arbitary sequence?
|
||||
if type(initlist) == type(self.data):
|
||||
self.data[:] = initlist
|
||||
elif isinstance(initlist, UserList):
|
||||
self.data[:] = initlist.data[:]
|
||||
else:
|
||||
self.data[:] = list.data[:]
|
||||
self.data = list(initlist)
|
||||
def __repr__(self): return repr(self.data)
|
||||
def __cmp__(self, other):
|
||||
if isinstance(other, UserList):
|
||||
|
|
|
@ -0,0 +1,158 @@
|
|||
#!/usr/bin/env python
|
||||
## vim:ts=4:et:nowrap
|
||||
"""A user-defined wrapper around string objects
|
||||
|
||||
Note: string objects have grown methods in Python 1.6
|
||||
This module requires Python 1.6 or later.
|
||||
"""
|
||||
from types import StringType, UnicodeType
|
||||
import sys
|
||||
|
||||
class UserString:
|
||||
def __init__(self, seq):
|
||||
if isinstance(seq, StringType) or isinstance(seq, UnicodeType):
|
||||
self.data = seq
|
||||
elif isinstance(seq, UserString):
|
||||
self.data = seq.data[:]
|
||||
else:
|
||||
self.data = str(seq)
|
||||
def __str__(self): return str(self.data)
|
||||
def __repr__(self): return repr(self.data)
|
||||
def __int__(self): return int(self.data)
|
||||
def __long__(self): return long(self.data)
|
||||
def __float__(self): return float(self.data)
|
||||
def __complex__(self): return complex(self.data)
|
||||
def __hash__(self): return hash(self.data)
|
||||
|
||||
def __cmp__(self, string):
|
||||
if isinstance(string, UserString):
|
||||
return cmp(self.data, string.data)
|
||||
else:
|
||||
return cmp(self.data, string)
|
||||
def __contains__(self, char):
|
||||
return char in self.data
|
||||
|
||||
def __len__(self): return len(self.data)
|
||||
def __getitem__(self, index): return self.__class__(self.data[index])
|
||||
def __getslice__(self, start, end):
|
||||
start = max(start, 0); end = max(end, 0)
|
||||
return self.__class__(self.data[start:end])
|
||||
|
||||
def __add__(self, other):
|
||||
if isinstance(other, UserString):
|
||||
return self.__class__(self.data + other.data)
|
||||
elif isinstance(other, StringType) or isinstance(other, UnicodeType):
|
||||
return self.__class__(self.data + other)
|
||||
else:
|
||||
return self.__class__(self.data + str(other))
|
||||
def __radd__(self, other):
|
||||
if isinstance(other, StringType) or isinstance(other, UnicodeType):
|
||||
return self.__class__(other + self.data)
|
||||
else:
|
||||
return self.__class__(str(other) + self.data)
|
||||
def __mul__(self, n):
|
||||
return self.__class__(self.data*n)
|
||||
__rmul__ = __mul__
|
||||
|
||||
# the following methods are defined in alphabetical order:
|
||||
def capitalize(self): return self.__class__(self.data.capitalize())
|
||||
def center(self, width): return self.__class__(self.data.center(width))
|
||||
def count(self, sub, start=0, end=sys.maxint):
|
||||
return self.data.count(sub, start, end)
|
||||
def encode(self, encoding=None, errors=None): # XXX improve this?
|
||||
if encoding:
|
||||
if errors:
|
||||
return self.__class__(self.data.encode(encoding, errors))
|
||||
else:
|
||||
return self.__class__(self.data.encode(encoding))
|
||||
else:
|
||||
return self.__class__(self.data.encode())
|
||||
def endswith(self, suffix, start=0, end=sys.maxint):
|
||||
return self.data.endswith(suffix, start, end)
|
||||
def expandtabs(self, tabsize=8):
|
||||
return self.__class__(self.data.expandtabs(tabsize))
|
||||
def find(self, sub, start=0, end=sys.maxint):
|
||||
return self.data.find(sub, start, end)
|
||||
def index(self, sub, start=0, end=sys.maxint):
|
||||
return self.data.index(sub, start, end)
|
||||
def isdecimal(self): return self.data.isdecimal()
|
||||
def isdigit(self): return self.data.isdigit()
|
||||
def islower(self): return self.data.islower()
|
||||
def isnumeric(self): return self.data.isnumeric()
|
||||
def isspace(self): return self.data.isspace()
|
||||
def istitle(self): return self.data.istitle()
|
||||
def isupper(self): return self.data.isupper()
|
||||
def join(self, seq): return self.data.join(seq)
|
||||
def ljust(self, width): return self.__class__(self.data.ljust(width))
|
||||
def lower(self): return self.__class__(self.data.lower())
|
||||
def lstrip(self): return self.__class__(self.data.lstrip())
|
||||
def replace(self, old, new, maxsplit=-1):
|
||||
return self.__class__(self.data.replace(old, new, maxsplit))
|
||||
def rfind(self, sub, start=0, end=sys.maxint):
|
||||
return self.data.rfind(sub, start, end)
|
||||
def rindex(self, sub, start=0, end=sys.maxint):
|
||||
return self.data.rindex(sub, start, end)
|
||||
def rjust(self, width): return self.__class__(self.data.rjust(width))
|
||||
def rstrip(self): return self.__class__(self.data.rstrip())
|
||||
def split(self, sep=None, maxsplit=-1):
|
||||
return self.data.split(sep, maxsplit)
|
||||
def splitlines(self, keepends=0): return self.data.splitlines(keepends)
|
||||
def startswith(self, prefix, start=0, end=sys.maxint):
|
||||
return self.data.startswith(prefix, start, end)
|
||||
def strip(self): return self.__class__(self.data.strip())
|
||||
def swapcase(self): return self.__class__(self.data.swapcase())
|
||||
def title(self): return self.__class__(self.data.title())
|
||||
def translate(self, table, deletechars=""):
|
||||
return self.__class__(self.data.translate(table, deletechars))
|
||||
def upper(self): return self.__class__(self.data.upper())
|
||||
|
||||
class MutableString(UserString):
|
||||
"""mutable string objects
|
||||
|
||||
Python strings are immutable objects. This has the advantage, that
|
||||
strings may be used as dictionary keys. If this property isn't needed
|
||||
and you insist on changing string values in place instead, you may cheat
|
||||
and use MutableString.
|
||||
|
||||
But the purpose of this class is an educational one: to prevent
|
||||
people from inventing their own mutable string class derived
|
||||
from UserString and than forget thereby to remove (override) the
|
||||
__hash__ method inherited from ^UserString. This would lead to
|
||||
errors that would be very hard to track down.
|
||||
|
||||
A faster and better solution is to rewrite your program using lists."""
|
||||
def __init__(self, string=""):
|
||||
self.data = string
|
||||
def __hash__(self):
|
||||
raise TypeError, "unhashable type (it is mutable)"
|
||||
def __setitem__(self, index, sub):
|
||||
if index < 0 or index >= len(self.data): raise IndexError
|
||||
self.data = self.data[:index] + sub + self.data[index+1:]
|
||||
def __delitem__(self, index):
|
||||
if index < 0 or index >= len(self.data): raise IndexError
|
||||
self.data = self.data[:index] + self.data[index+1:]
|
||||
def __setslice__(self, start, end, sub):
|
||||
start = max(start, 0); end = max(end, 0)
|
||||
if isinstance(sub, UserString):
|
||||
self.data = self.data[:start]+sub.data+self.data[end:]
|
||||
elif isinstance(sub, StringType) or isinstance(sub, UnicodeType):
|
||||
self.data = self.data[:start]+sub+self.data[end:]
|
||||
else:
|
||||
self.data = self.data[:start]+str(sub)+self.data[end:]
|
||||
def __delslice__(self, start, end):
|
||||
start = max(start, 0); end = max(end, 0)
|
||||
self.data = self.data[:start] + self.data[end:]
|
||||
def immutable(self):
|
||||
return UserString(self.data)
|
||||
|
||||
if __name__ == "__main__":
|
||||
# execute the regression test to stdout, if called as a script:
|
||||
import os
|
||||
called_in_dir, called_as = os.path.split(sys.argv[0])
|
||||
called_in_dir = os.path.abspath(called_in_dir)
|
||||
called_as, py = os.path.splitext(called_as)
|
||||
sys.path.append(os.path.join(called_in_dir, 'test'))
|
||||
if '-q' in sys.argv:
|
||||
import test_support
|
||||
test_support.verbose = 0
|
||||
__import__('test_' + called_as.lower())
|
Loading…
Reference in New Issue