The usual...

This commit is contained in:
Guido van Rossum 2000-05-08 17:31:04 +00:00
parent 0b095bc092
commit aad6761cce
56 changed files with 5040 additions and 656 deletions

View File

@ -68,7 +68,6 @@ import sys
import time
import socket # For gethostbyaddr()
import string
import rfc822
import mimetools
import SocketServer
@ -94,12 +93,16 @@ class HTTPServer(SocketServer.TCPServer):
host, port = self.socket.getsockname()
if not host or host == '0.0.0.0':
host = socket.gethostname()
hostname, hostnames, hostaddrs = socket.gethostbyaddr(host)
if '.' not in hostname:
for host in hostnames:
if '.' in host:
hostname = host
break
try:
hostname, hostnames, hostaddrs = socket.gethostbyaddr(host)
except socket.error:
hostname = host
else:
if '.' not in hostname:
for host in hostnames:
if '.' in host:
hostname = host
break
self.server_name = hostname
self.server_port = port
@ -169,7 +172,7 @@ class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):
This server parses the request and the headers, and then calls a
function specific to the request type (<command>). Specifically,
a request SPAM will be handled by a method handle_SPAM(). If no
a request SPAM will be handled by a method do_SPAM(). If no
such method exists the server sends an error response to the
client. If it exists, it is called with no arguments:
@ -216,16 +219,17 @@ class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):
# where each string is of the form name[/version].
server_version = "BaseHTTP/" + __version__
def handle(self):
"""Handle a single HTTP request.
def parse_request(self):
"""Parse a request (internal).
You normally don't need to override this method; see the class
__doc__ string for information on how to handle specific HTTP
commands such as GET and POST.
The request should be stored in self.raw_request; the results
are in self.command, self.path, self.request_version and
self.headers.
Return value is 1 for success, 0 for failure; on failure, an
error is sent back.
"""
self.raw_requestline = self.rfile.readline()
self.request_version = version = "HTTP/0.9" # Default
requestline = self.raw_requestline
if requestline[-2:] == '\r\n':
@ -238,21 +242,35 @@ class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):
[command, path, version] = words
if version[:5] != 'HTTP/':
self.send_error(400, "Bad request version (%s)" % `version`)
return
return 0
elif len(words) == 2:
[command, path] = words
if command != 'GET':
self.send_error(400,
"Bad HTTP/0.9 request type (%s)" % `command`)
return
return 0
else:
self.send_error(400, "Bad request syntax (%s)" % `requestline`)
return
return 0
self.command, self.path, self.request_version = command, path, version
self.headers = self.MessageClass(self.rfile, 0)
mname = 'do_' + command
return 1
def handle(self):
"""Handle a single HTTP request.
You normally don't need to override this method; see the class
__doc__ string for information on how to handle specific HTTP
commands such as GET and POST.
"""
self.raw_requestline = self.rfile.readline()
if not self.parse_request(): # An error code has been sent, just exit
return
mname = 'do_' + self.command
if not hasattr(self, mname):
self.send_error(501, "Unsupported method (%s)" % `command`)
self.send_error(501, "Unsupported method (%s)" % `self.command`)
return
method = getattr(self, mname)
method()

View File

@ -3,6 +3,9 @@
This module builds on SimpleHTTPServer by implementing GET and POST
requests to cgi-bin scripts.
If the os.fork() function is not present, this module will not work;
SystemError will be raised instead.
"""
@ -10,15 +13,18 @@ __version__ = "0.3"
import os
import sys
import time
import socket
import string
import urllib
import BaseHTTPServer
import SimpleHTTPServer
try:
os.fork
except AttributeError:
raise SystemError, __name__ + " requires os.fork()"
class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
"""Complete HTTP server with GET, HEAD and POST commands.
@ -150,6 +156,9 @@ class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
ua = self.headers.getheader('user-agent')
if ua:
env['HTTP_USER_AGENT'] = ua
co = filter(None, self.headers.getheaders('cookie'))
if co:
env['HTTP_COOKIE'] = string.join(co, ', ')
# XXX Other HTTP_* headers
decoded_query = string.replace(query, '+', ' ')
try:
@ -177,7 +186,7 @@ def nobody_uid():
import pwd
try:
nobody = pwd.getpwnam('nobody')[2]
except pwd.error:
except KeyError:
nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
return nobody

View File

@ -33,11 +33,24 @@ ConfigParser -- responsible for for parsing a list of
sections()
return all the configuration section names, sans DEFAULT
has_section(section)
return whether the given section exists
options(section)
return list of configuration options for the named section
has_option(section, option)
return whether the given section has the given option
read(filenames)
read and parse the list of named configuration files
read and parse the list of named configuration files, given by
name. A single filename is also allowed. Non-existing files
are ignored.
readfp(fp, filename=None)
read and parse one configuration file, given as a file object.
The filename defaults to fp.name; it is only used in error
messages (if fp has no `name' attribute, the string `<???>' is used).
get(section, option, raw=0, vars=None)
return a string value for the named option. All % interpolations are
@ -158,6 +171,7 @@ class ConfigParser:
return self.__sections.has_key(section)
def options(self, section):
"""Return a list of option names for the given section name."""
try:
opts = self.__sections[section].copy()
except KeyError:
@ -165,16 +179,49 @@ class ConfigParser:
opts.update(self.__defaults)
return opts.keys()
def has_option(self, section, option):
"""Return whether the given section has the given option."""
try:
opts = self.__sections[section]
except KeyError:
raise NoSectionError(section)
return opts.has_key(option)
def read(self, filenames):
"""Read and parse a list of filenames."""
"""Read and parse a filename or a list of filenames.
Files that cannot be opened are silently ignored; this is
designed so that you can specify a list of potential
configuration file locations (e.g. current directory, user's
home directory, systemwide directory), and all existing
configuration files in the list will be read. A single
filename may also be given.
"""
if type(filenames) is type(''):
filenames = [filenames]
for file in filenames:
for filename in filenames:
try:
fp = open(file, 'r')
self.__read(fp)
fp = open(filename)
except IOError:
pass
continue
self.__read(fp, filename)
fp.close()
def readfp(self, fp, filename=None):
"""Like read() but the argument must be a file-like object.
The `fp' argument must have a `readline' method. Optional
second argument is the `filename', which if not given, is
taken from fp.name. If fp has no `name' attribute, `<???>' is
used.
"""
if filename is None:
try:
filename = fp.name
except AttributeError:
filename = '<???>'
self.__read(fp, filename)
def get(self, section, option, raw=0, vars=None):
"""Get an option value for a given section.
@ -199,7 +246,7 @@ class ConfigParser:
# Update with the entry specific variables
if vars:
d.update(vars)
option = string.lower(option)
option = self.optionxform(option)
try:
rawval = d[option]
except KeyError:
@ -212,7 +259,7 @@ class ConfigParser:
depth = 0
while depth < 10: # Loop through this until it's done
depth = depth + 1
if not string.find(value, "%("):
if string.find(value, "%(") >= 0:
try:
value = value % d
except KeyError, key:
@ -236,25 +283,28 @@ class ConfigParser:
raise ValueError, 'Not a boolean: %s' % v
return val
def optionxform(self, optionstr):
return string.lower(optionstr)
#
# Regular expressions for parsing section headers and options. Note a
# slight semantic change from the previous version, because of the use
# of \w, _ is allowed in section header names.
__SECTCRE = re.compile(
SECTCRE = re.compile(
r'\[' # [
r'(?P<header>[-\w]+)' # `-', `_' or any alphanum
r'(?P<header>[-\w_.*,(){}]+)' # a lot of stuff found by IvL
r'\]' # ]
)
__OPTCRE = re.compile(
r'(?P<option>[-.\w]+)' # - . _ alphanum
r'[ \t]*[:=][ \t]*' # any number of space/tab,
OPTCRE = re.compile(
r'(?P<option>[-\w_.*,(){}]+)' # a lot of stuff found by IvL
r'[ \t]*(?P<vi>[:=])[ \t]*' # any number of space/tab,
# followed by separator
# (either : or =), followed
# by any # space/tab
r'(?P<value>.*)$' # everything up to eol
)
def __read(self, fp):
def __read(self, fp, fpname):
"""Parse a sectioned setup file.
The sections in setup file contains a title line at the top,
@ -277,7 +327,7 @@ class ConfigParser:
if string.strip(line) == '' or line[0] in '#;':
continue
if string.lower(string.split(line)[0]) == 'rem' \
and line[0] == "r": # no leading whitespace
and line[0] in "rR": # no leading whitespace
continue
# continuation line?
if line[0] in ' \t' and cursect is not None and optname:
@ -287,7 +337,7 @@ class ConfigParser:
# a section header or option header?
else:
# is it a section header?
mo = self.__SECTCRE.match(line)
mo = self.SECTCRE.match(line)
if mo:
sectname = mo.group('header')
if self.__sections.has_key(sectname):
@ -301,13 +351,19 @@ class ConfigParser:
optname = None
# no section header in the file?
elif cursect is None:
raise MissingSectionHeaderError(fp.name, lineno, `line`)
raise MissingSectionHeaderError(fpname, lineno, `line`)
# an option line?
else:
mo = self.__OPTCRE.match(line)
mo = self.OPTCRE.match(line)
if mo:
optname, optval = mo.group('option', 'value')
optname, vi, optval = mo.group('option', 'vi', 'value')
optname = string.lower(optname)
if vi in ('=', ':') and ';' in optval:
# ';' is a comment delimiter only if it follows
# a spacing character
pos = string.find(optval, ';')
if pos and optval[pos-1] in string.whitespace:
optval = optval[:pos]
optval = string.strip(optval)
# allow empty values
if optval == '""':
@ -319,7 +375,7 @@ class ConfigParser:
# raised at the end of the file and will contain a
# list of all bogus lines
if not e:
e = ParsingError(fp.name)
e = ParsingError(fpname)
e.append(lineno, `line`)
# if any parsing errors occurred, raise an exception
if e:

View File

@ -33,6 +33,8 @@ Exception(*)
| |
| +-- IOError
| +-- OSError(*)
| |
| +-- WindowsError(*)
|
+-- EOFError
+-- RuntimeError
@ -40,6 +42,9 @@ Exception(*)
| +-- NotImplementedError(*)
|
+-- NameError
| |
| +-- UnboundLocalError(*)
|
+-- AttributeError
+-- SyntaxError
+-- TypeError
@ -56,6 +61,9 @@ Exception(*)
| +-- FloatingPointError
|
+-- ValueError
| |
| +-- UnicodeError(*)
|
+-- SystemError
+-- MemoryError
"""
@ -136,6 +144,10 @@ class OSError(EnvironmentError):
"""OS system call failed."""
pass
class WindowsError(OSError):
"""MS-Windows OS system call failed."""
pass
class RuntimeError(StandardError):
"""Unspecified run-time error."""
pass
@ -208,7 +220,15 @@ class AttributeError(StandardError):
pass
class NameError(StandardError):
"""Name not found locally or globally."""
"""Name not found globally."""
pass
class UnboundLocalError(NameError):
"""Local name referenced but not bound to a value."""
pass
class UnicodeError(ValueError):
"""Unicode related error."""
pass
class MemoryError(StandardError):

View File

@ -73,11 +73,11 @@ XXX Possible additions:
"""
import sys, os
import sys, os, stat
_state = None
def input(files=(), inplace=0, backup=""):
def input(files=None, inplace=0, backup=""):
global _state
if _state and _state._file:
raise RuntimeError, "input() already active"
@ -123,15 +123,16 @@ def isstdin():
class FileInput:
def __init__(self, files=(), inplace=0, backup=""):
def __init__(self, files=None, inplace=0, backup=""):
if type(files) == type(''):
files = (files,)
else:
files = tuple(files)
if files is None:
files = sys.argv[1:]
if not files:
files = tuple(sys.argv[1:])
if not files:
files = ('-',)
files = ('-',)
else:
files = tuple(files)
self._files = files
self._inplace = inplace
self._backup = backup
@ -203,10 +204,22 @@ class FileInput:
self._filename + (self._backup or ".bak"))
try: os.unlink(self._backupfilename)
except os.error: pass
# The next three lines may raise IOError
# The next few lines may raise IOError
os.rename(self._filename, self._backupfilename)
self._file = open(self._backupfilename, "r")
self._output = open(self._filename, "w")
try:
perm = os.fstat(self._file.fileno())[stat.ST_MODE]
except:
self._output = open(self._filename, "w")
else:
fd = os.open(self._filename,
os.O_CREAT | os.O_WRONLY | os.O_TRUNC,
perm)
self._output = os.fdopen(fd, "w")
try:
os.chmod(self._filename, perm)
except:
pass
self._savestdout = sys.stdout
sys.stdout = self._output
else:

View File

@ -1,3 +1,23 @@
"""Generic output formatting.
Formatter objects transform an abstract flow of formatting events into
specific output events on writer objects. Formatters manage several stack
structures to allow various properties of a writer object to be changed and
restored; writers need not be able to handle relative changes nor any sort
of ``change back'' operation. Specific writer properties which may be
controlled via formatter objects are horizontal alignment, font, and left
margin indentations. A mechanism is provided which supports providing
arbitrary, non-exclusive style settings to a writer as well. Additional
interfaces facilitate formatting events which are not reversible, such as
paragraph separation.
Writer objects encapsulate device interfaces. Abstract devices, such as
file formats, are supported as well as physical devices. The provided
implementations all work with abstract devices. The interface makes
available mechanisms for setting the properties which formatter objects
manage and inserting data into the output.
"""
import string
import sys
from types import StringType

View File

@ -1,4 +1,4 @@
# Gopher protocol client interface
"""Gopher protocol client interface."""
import string
@ -29,180 +29,180 @@ A_IMAGE = 'I'
A_WHOIS = 'w'
A_QUERY = 'q'
A_GIF = 'g'
A_HTML = 'h' # HTML file
A_WWW = 'w' # WWW address
A_HTML = 'h' # HTML file
A_WWW = 'w' # WWW address
A_PLUS_IMAGE = ':'
A_PLUS_MOVIE = ';'
A_PLUS_SOUND = '<'
# Function mapping all file types to strings; unknown types become TYPE='x'
_names = dir()
_type_to_name_map = {}
def type_to_name(gtype):
global _type_to_name_map
if _type_to_name_map=={}:
for name in _names:
if name[:2] == 'A_':
_type_to_name_map[eval(name)] = name[2:]
if _type_to_name_map.has_key(gtype):
return _type_to_name_map[gtype]
return 'TYPE=' + `gtype`
"""Map all file types to strings; unknown types become TYPE='x'."""
global _type_to_name_map
if _type_to_name_map=={}:
for name in _names:
if name[:2] == 'A_':
_type_to_name_map[eval(name)] = name[2:]
if _type_to_name_map.has_key(gtype):
return _type_to_name_map[gtype]
return 'TYPE=' + `gtype`
# Names for characters and strings
CRLF = '\r\n'
TAB = '\t'
# Send a selector to a given host and port, return a file with the reply
def send_selector(selector, host, port = 0):
import socket
import string
if not port:
i = string.find(host, ':')
if i >= 0:
host, port = host[:i], string.atoi(host[i+1:])
if not port:
port = DEF_PORT
elif type(port) == type(''):
port = string.atoi(port)
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect(host, port)
s.send(selector + CRLF)
s.shutdown(1)
return s.makefile('rb')
"""Send a selector to a given host and port, return a file with the reply."""
import socket
import string
if not port:
i = string.find(host, ':')
if i >= 0:
host, port = host[:i], string.atoi(host[i+1:])
if not port:
port = DEF_PORT
elif type(port) == type(''):
port = string.atoi(port)
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect((host, port))
s.send(selector + CRLF)
s.shutdown(1)
return s.makefile('rb')
# Send a selector and a query string
def send_query(selector, query, host, port = 0):
return send_selector(selector + '\t' + query, host, port)
"""Send a selector and a query string."""
return send_selector(selector + '\t' + query, host, port)
# Takes a path as returned by urlparse and returns the appropriate selector
def path_to_selector(path):
if path=="/":
return "/"
else:
return path[2:] # Cuts initial slash and data type identifier
"""Takes a path as returned by urlparse and returns the appropriate selector."""
if path=="/":
return "/"
else:
return path[2:] # Cuts initial slash and data type identifier
# Takes a path as returned by urlparse and maps it to a string
# See section 3.4 of RFC 1738 for details
def path_to_datatype_name(path):
if path=="/":
# No way to tell, although "INDEX" is likely
return "TYPE='unknown'"
else:
return type_to_name(path[1])
"""Takes a path as returned by urlparse and maps it to a string.
See section 3.4 of RFC 1738 for details."""
if path=="/":
# No way to tell, although "INDEX" is likely
return "TYPE='unknown'"
else:
return type_to_name(path[1])
# The following functions interpret the data returned by the gopher
# server according to the expected type, e.g. textfile or directory
# Get a directory in the form of a list of entries
def get_directory(f):
import string
list = []
while 1:
line = f.readline()
if not line:
print '(Unexpected EOF from server)'
break
if line[-2:] == CRLF:
line = line[:-2]
elif line[-1:] in CRLF:
line = line[:-1]
if line == '.':
break
if not line:
print '(Empty line from server)'
continue
gtype = line[0]
parts = string.splitfields(line[1:], TAB)
if len(parts) < 4:
print '(Bad line from server:', `line`, ')'
continue
if len(parts) > 4:
if parts[4:] != ['+']:
print '(Extra info from server:',
print parts[4:], ')'
else:
parts.append('')
parts.insert(0, gtype)
list.append(parts)
return list
"""Get a directory in the form of a list of entries."""
import string
list = []
while 1:
line = f.readline()
if not line:
print '(Unexpected EOF from server)'
break
if line[-2:] == CRLF:
line = line[:-2]
elif line[-1:] in CRLF:
line = line[:-1]
if line == '.':
break
if not line:
print '(Empty line from server)'
continue
gtype = line[0]
parts = string.splitfields(line[1:], TAB)
if len(parts) < 4:
print '(Bad line from server:', `line`, ')'
continue
if len(parts) > 4:
if parts[4:] != ['+']:
print '(Extra info from server:',
print parts[4:], ')'
else:
parts.append('')
parts.insert(0, gtype)
list.append(parts)
return list
# Get a text file as a list of lines, with trailing CRLF stripped
def get_textfile(f):
list = []
get_alt_textfile(f, list.append)
return list
"""Get a text file as a list of lines, with trailing CRLF stripped."""
list = []
get_alt_textfile(f, list.append)
return list
# Get a text file and pass each line to a function, with trailing CRLF stripped
def get_alt_textfile(f, func):
while 1:
line = f.readline()
if not line:
print '(Unexpected EOF from server)'
break
if line[-2:] == CRLF:
line = line[:-2]
elif line[-1:] in CRLF:
line = line[:-1]
if line == '.':
break
if line[:2] == '..':
line = line[1:]
func(line)
"""Get a text file and pass each line to a function, with trailing CRLF stripped."""
while 1:
line = f.readline()
if not line:
print '(Unexpected EOF from server)'
break
if line[-2:] == CRLF:
line = line[:-2]
elif line[-1:] in CRLF:
line = line[:-1]
if line == '.':
break
if line[:2] == '..':
line = line[1:]
func(line)
# Get a binary file as one solid data block
def get_binary(f):
data = f.read()
return data
"""Get a binary file as one solid data block."""
data = f.read()
return data
# Get a binary file and pass each block to a function
def get_alt_binary(f, func, blocksize):
while 1:
data = f.read(blocksize)
if not data:
break
func(data)
"""Get a binary file and pass each block to a function."""
while 1:
data = f.read(blocksize)
if not data:
break
func(data)
# Trivial test program
def test():
import sys
import getopt
opts, args = getopt.getopt(sys.argv[1:], '')
selector = DEF_SELECTOR
type = selector[0]
host = DEF_HOST
port = DEF_PORT
if args:
host = args[0]
args = args[1:]
if args:
type = args[0]
args = args[1:]
if len(type) > 1:
type, selector = type[0], type
else:
selector = ''
if args:
selector = args[0]
args = args[1:]
query = ''
if args:
query = args[0]
args = args[1:]
if type == A_INDEX:
f = send_query(selector, query, host)
else:
f = send_selector(selector, host)
if type == A_TEXT:
list = get_textfile(f)
for item in list: print item
elif type in (A_MENU, A_INDEX):
list = get_directory(f)
for item in list: print item
else:
data = get_binary(f)
print 'binary data:', len(data), 'bytes:', `data[:100]`[:40]
"""Trivial test program."""
import sys
import getopt
opts, args = getopt.getopt(sys.argv[1:], '')
selector = DEF_SELECTOR
type = selector[0]
host = DEF_HOST
port = DEF_PORT
if args:
host = args[0]
args = args[1:]
if args:
type = args[0]
args = args[1:]
if len(type) > 1:
type, selector = type[0], type
else:
selector = ''
if args:
selector = args[0]
args = args[1:]
query = ''
if args:
query = args[0]
args = args[1:]
if type == A_INDEX:
f = send_query(selector, query, host)
else:
f = send_selector(selector, host)
if type == A_TEXT:
list = get_textfile(f)
for item in list: print item
elif type in (A_MENU, A_INDEX):
list = get_directory(f)
for item in list: print item
else:
data = get_binary(f)
print 'binary data:', len(data), 'bytes:', `data[:100]`[:40]
# Run the test when run as script
if __name__ == '__main__':
test()
test()

View File

@ -1,105 +1,257 @@
# Proposed entity definitions for HTML, taken from
# http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_14.html
"""HTML character entity references."""
entitydefs = {
'lt': '<',
'gt': '>',
'amp': '&',
'quot': '"',
'nbsp': chr(160), # no-break space
'iexcl': chr(161), # inverted exclamation mark
'cent': chr(162), # cent sign
'pound': chr(163), # pound sterling sign
'curren': chr(164), # general currency sign
'yen': chr(165), # yen sign
'brvbar': chr(166), # broken (vertical) bar
'sect': chr(167), # section sign
'uml': chr(168), # umlaut (dieresis)
'copy': chr(169), # copyright sign
'ordf': chr(170), # ordinal indicator, feminine
'laquo': chr(171), # angle quotation mark, left
'not': chr(172), # not sign
'shy': chr(173), # soft hyphen
'reg': chr(174), # registered sign
'macr': chr(175), # macron
'deg': chr(176), # degree sign
'plusmn': chr(177), # plus-or-minus sign
'sup2': chr(178), # superscript two
'sup3': chr(179), # superscript three
'acute': chr(180), # acute accent
'micro': chr(181), # micro sign
'para': chr(182), # pilcrow (paragraph sign)
'middot': chr(183), # middle dot
'cedil': chr(184), # cedilla
'sup1': chr(185), # superscript one
'ordm': chr(186), # ordinal indicator, masculine
'raquo': chr(187), # angle quotation mark, right
'frac14': chr(188), # fraction one-quarter
'frac12': chr(189), # fraction one-half
'frac34': chr(190), # fraction three-quarters
'iquest': chr(191), # inverted question mark
'Agrave': chr(192), # capital A, grave accent
'Aacute': chr(193), # capital A, acute accent
'Acirc': chr(194), # capital A, circumflex accent
'Atilde': chr(195), # capital A, tilde
'Auml': chr(196), # capital A, dieresis or umlaut mark
'Aring': chr(197), # capital A, ring
'AElig': chr(198), # capital AE diphthong (ligature)
'Ccedil': chr(199), # capital C, cedilla
'Egrave': chr(200), # capital E, grave accent
'Eacute': chr(201), # capital E, acute accent
'Ecirc': chr(202), # capital E, circumflex accent
'Euml': chr(203), # capital E, dieresis or umlaut mark
'Igrave': chr(204), # capital I, grave accent
'Iacute': chr(205), # capital I, acute accent
'Icirc': chr(206), # capital I, circumflex accent
'Iuml': chr(207), # capital I, dieresis or umlaut mark
'ETH': chr(208), # capital Eth, Icelandic
'Ntilde': chr(209), # capital N, tilde
'Ograve': chr(210), # capital O, grave accent
'Oacute': chr(211), # capital O, acute accent
'Ocirc': chr(212), # capital O, circumflex accent
'Otilde': chr(213), # capital O, tilde
'Ouml': chr(214), # capital O, dieresis or umlaut mark
'times': chr(215), # multiply sign
'Oslash': chr(216), # capital O, slash
'Ugrave': chr(217), # capital U, grave accent
'Uacute': chr(218), # capital U, acute accent
'Ucirc': chr(219), # capital U, circumflex accent
'Uuml': chr(220), # capital U, dieresis or umlaut mark
'Yacute': chr(221), # capital Y, acute accent
'THORN': chr(222), # capital THORN, Icelandic
'szlig': chr(223), # small sharp s, German (sz ligature)
'agrave': chr(224), # small a, grave accent
'aacute': chr(225), # small a, acute accent
'acirc': chr(226), # small a, circumflex accent
'atilde': chr(227), # small a, tilde
'auml': chr(228), # small a, dieresis or umlaut mark
'aring': chr(229), # small a, ring
'aelig': chr(230), # small ae diphthong (ligature)
'ccedil': chr(231), # small c, cedilla
'egrave': chr(232), # small e, grave accent
'eacute': chr(233), # small e, acute accent
'ecirc': chr(234), # small e, circumflex accent
'euml': chr(235), # small e, dieresis or umlaut mark
'igrave': chr(236), # small i, grave accent
'iacute': chr(237), # small i, acute accent
'icirc': chr(238), # small i, circumflex accent
'iuml': chr(239), # small i, dieresis or umlaut mark
'eth': chr(240), # small eth, Icelandic
'ntilde': chr(241), # small n, tilde
'ograve': chr(242), # small o, grave accent
'oacute': chr(243), # small o, acute accent
'ocirc': chr(244), # small o, circumflex accent
'otilde': chr(245), # small o, tilde
'ouml': chr(246), # small o, dieresis or umlaut mark
'divide': chr(247), # divide sign
'oslash': chr(248), # small o, slash
'ugrave': chr(249), # small u, grave accent
'uacute': chr(250), # small u, acute accent
'ucirc': chr(251), # small u, circumflex accent
'uuml': chr(252), # small u, dieresis or umlaut mark
'yacute': chr(253), # small y, acute accent
'thorn': chr(254), # small thorn, Icelandic
'yuml': chr(255), # small y, dieresis or umlaut mark
'AElig': '\306', # latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1
'Aacute': '\301', # latin capital letter A with acute, U+00C1 ISOlat1
'Acirc': '\302', # latin capital letter A with circumflex, U+00C2 ISOlat1
'Agrave': '\300', # latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1
'Alpha': '&#913;', # greek capital letter alpha, U+0391
'Aring': '\305', # latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1
'Atilde': '\303', # latin capital letter A with tilde, U+00C3 ISOlat1
'Auml': '\304', # latin capital letter A with diaeresis, U+00C4 ISOlat1
'Beta': '&#914;', # greek capital letter beta, U+0392
'Ccedil': '\307', # latin capital letter C with cedilla, U+00C7 ISOlat1
'Chi': '&#935;', # greek capital letter chi, U+03A7
'Dagger': '&#8225;', # double dagger, U+2021 ISOpub
'Delta': '&#916;', # greek capital letter delta, U+0394 ISOgrk3
'ETH': '\320', # latin capital letter ETH, U+00D0 ISOlat1
'Eacute': '\311', # latin capital letter E with acute, U+00C9 ISOlat1
'Ecirc': '\312', # latin capital letter E with circumflex, U+00CA ISOlat1
'Egrave': '\310', # latin capital letter E with grave, U+00C8 ISOlat1
'Epsilon': '&#917;', # greek capital letter epsilon, U+0395
'Eta': '&#919;', # greek capital letter eta, U+0397
'Euml': '\313', # latin capital letter E with diaeresis, U+00CB ISOlat1
'Gamma': '&#915;', # greek capital letter gamma, U+0393 ISOgrk3
'Iacute': '\315', # latin capital letter I with acute, U+00CD ISOlat1
'Icirc': '\316', # latin capital letter I with circumflex, U+00CE ISOlat1
'Igrave': '\314', # latin capital letter I with grave, U+00CC ISOlat1
'Iota': '&#921;', # greek capital letter iota, U+0399
'Iuml': '\317', # latin capital letter I with diaeresis, U+00CF ISOlat1
'Kappa': '&#922;', # greek capital letter kappa, U+039A
'Lambda': '&#923;', # greek capital letter lambda, U+039B ISOgrk3
'Mu': '&#924;', # greek capital letter mu, U+039C
'Ntilde': '\321', # latin capital letter N with tilde, U+00D1 ISOlat1
'Nu': '&#925;', # greek capital letter nu, U+039D
'OElig': '&#338;', # latin capital ligature OE, U+0152 ISOlat2
'Oacute': '\323', # latin capital letter O with acute, U+00D3 ISOlat1
'Ocirc': '\324', # latin capital letter O with circumflex, U+00D4 ISOlat1
'Ograve': '\322', # latin capital letter O with grave, U+00D2 ISOlat1
'Omega': '&#937;', # greek capital letter omega, U+03A9 ISOgrk3
'Omicron': '&#927;', # greek capital letter omicron, U+039F
'Oslash': '\330', # latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1
'Otilde': '\325', # latin capital letter O with tilde, U+00D5 ISOlat1
'Ouml': '\326', # latin capital letter O with diaeresis, U+00D6 ISOlat1
'Phi': '&#934;', # greek capital letter phi, U+03A6 ISOgrk3
'Pi': '&#928;', # greek capital letter pi, U+03A0 ISOgrk3
'Prime': '&#8243;', # double prime = seconds = inches, U+2033 ISOtech
'Psi': '&#936;', # greek capital letter psi, U+03A8 ISOgrk3
'Rho': '&#929;', # greek capital letter rho, U+03A1
'Scaron': '&#352;', # latin capital letter S with caron, U+0160 ISOlat2
'Sigma': '&#931;', # greek capital letter sigma, U+03A3 ISOgrk3
'THORN': '\336', # latin capital letter THORN, U+00DE ISOlat1
'Tau': '&#932;', # greek capital letter tau, U+03A4
'Theta': '&#920;', # greek capital letter theta, U+0398 ISOgrk3
'Uacute': '\332', # latin capital letter U with acute, U+00DA ISOlat1
'Ucirc': '\333', # latin capital letter U with circumflex, U+00DB ISOlat1
'Ugrave': '\331', # latin capital letter U with grave, U+00D9 ISOlat1
'Upsilon': '&#933;', # greek capital letter upsilon, U+03A5 ISOgrk3
'Uuml': '\334', # latin capital letter U with diaeresis, U+00DC ISOlat1
'Xi': '&#926;', # greek capital letter xi, U+039E ISOgrk3
'Yacute': '\335', # latin capital letter Y with acute, U+00DD ISOlat1
'Yuml': '&#376;', # latin capital letter Y with diaeresis, U+0178 ISOlat2
'Zeta': '&#918;', # greek capital letter zeta, U+0396
'aacute': '\341', # latin small letter a with acute, U+00E1 ISOlat1
'acirc': '\342', # latin small letter a with circumflex, U+00E2 ISOlat1
'acute': '\264', # acute accent = spacing acute, U+00B4 ISOdia
'aelig': '\346', # latin small letter ae = latin small ligature ae, U+00E6 ISOlat1
'agrave': '\340', # latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1
'alefsym': '&#8501;', # alef symbol = first transfinite cardinal, U+2135 NEW
'alpha': '&#945;', # greek small letter alpha, U+03B1 ISOgrk3
'amp': '\46', # ampersand, U+0026 ISOnum
'and': '&#8743;', # logical and = wedge, U+2227 ISOtech
'ang': '&#8736;', # angle, U+2220 ISOamso
'aring': '\345', # latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1
'asymp': '&#8776;', # almost equal to = asymptotic to, U+2248 ISOamsr
'atilde': '\343', # latin small letter a with tilde, U+00E3 ISOlat1
'auml': '\344', # latin small letter a with diaeresis, U+00E4 ISOlat1
'bdquo': '&#8222;', # double low-9 quotation mark, U+201E NEW
'beta': '&#946;', # greek small letter beta, U+03B2 ISOgrk3
'brvbar': '\246', # broken bar = broken vertical bar, U+00A6 ISOnum
'bull': '&#8226;', # bullet = black small circle, U+2022 ISOpub
'cap': '&#8745;', # intersection = cap, U+2229 ISOtech
'ccedil': '\347', # latin small letter c with cedilla, U+00E7 ISOlat1
'cedil': '\270', # cedilla = spacing cedilla, U+00B8 ISOdia
'cent': '\242', # cent sign, U+00A2 ISOnum
'chi': '&#967;', # greek small letter chi, U+03C7 ISOgrk3
'circ': '&#710;', # modifier letter circumflex accent, U+02C6 ISOpub
'clubs': '&#9827;', # black club suit = shamrock, U+2663 ISOpub
'cong': '&#8773;', # approximately equal to, U+2245 ISOtech
'copy': '\251', # copyright sign, U+00A9 ISOnum
'crarr': '&#8629;', # downwards arrow with corner leftwards = carriage return, U+21B5 NEW
'cup': '&#8746;', # union = cup, U+222A ISOtech
'curren': '\244', # currency sign, U+00A4 ISOnum
'dArr': '&#8659;', # downwards double arrow, U+21D3 ISOamsa
'dagger': '&#8224;', # dagger, U+2020 ISOpub
'darr': '&#8595;', # downwards arrow, U+2193 ISOnum
'deg': '\260', # degree sign, U+00B0 ISOnum
'delta': '&#948;', # greek small letter delta, U+03B4 ISOgrk3
'diams': '&#9830;', # black diamond suit, U+2666 ISOpub
'divide': '\367', # division sign, U+00F7 ISOnum
'eacute': '\351', # latin small letter e with acute, U+00E9 ISOlat1
'ecirc': '\352', # latin small letter e with circumflex, U+00EA ISOlat1
'egrave': '\350', # latin small letter e with grave, U+00E8 ISOlat1
'empty': '&#8709;', # empty set = null set = diameter, U+2205 ISOamso
'emsp': '&#8195;', # em space, U+2003 ISOpub
'ensp': '&#8194;', # en space, U+2002 ISOpub
'epsilon': '&#949;', # greek small letter epsilon, U+03B5 ISOgrk3
'equiv': '&#8801;', # identical to, U+2261 ISOtech
'eta': '&#951;', # greek small letter eta, U+03B7 ISOgrk3
'eth': '\360', # latin small letter eth, U+00F0 ISOlat1
'euml': '\353', # latin small letter e with diaeresis, U+00EB ISOlat1
'euro': '&#8364;', # euro sign, U+20AC NEW
'exist': '&#8707;', # there exists, U+2203 ISOtech
'fnof': '&#402;', # latin small f with hook = function = florin, U+0192 ISOtech
'forall': '&#8704;', # for all, U+2200 ISOtech
'frac12': '\275', # vulgar fraction one half = fraction one half, U+00BD ISOnum
'frac14': '\274', # vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum
'frac34': '\276', # vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum
'frasl': '&#8260;', # fraction slash, U+2044 NEW
'gamma': '&#947;', # greek small letter gamma, U+03B3 ISOgrk3
'ge': '&#8805;', # greater-than or equal to, U+2265 ISOtech
'gt': '\76', # greater-than sign, U+003E ISOnum
'hArr': '&#8660;', # left right double arrow, U+21D4 ISOamsa
'harr': '&#8596;', # left right arrow, U+2194 ISOamsa
'hearts': '&#9829;', # black heart suit = valentine, U+2665 ISOpub
'hellip': '&#8230;', # horizontal ellipsis = three dot leader, U+2026 ISOpub
'iacute': '\355', # latin small letter i with acute, U+00ED ISOlat1
'icirc': '\356', # latin small letter i with circumflex, U+00EE ISOlat1
'iexcl': '\241', # inverted exclamation mark, U+00A1 ISOnum
'igrave': '\354', # latin small letter i with grave, U+00EC ISOlat1
'image': '&#8465;', # blackletter capital I = imaginary part, U+2111 ISOamso
'infin': '&#8734;', # infinity, U+221E ISOtech
'int': '&#8747;', # integral, U+222B ISOtech
'iota': '&#953;', # greek small letter iota, U+03B9 ISOgrk3
'iquest': '\277', # inverted question mark = turned question mark, U+00BF ISOnum
'isin': '&#8712;', # element of, U+2208 ISOtech
'iuml': '\357', # latin small letter i with diaeresis, U+00EF ISOlat1
'kappa': '&#954;', # greek small letter kappa, U+03BA ISOgrk3
'lArr': '&#8656;', # leftwards double arrow, U+21D0 ISOtech
'lambda': '&#955;', # greek small letter lambda, U+03BB ISOgrk3
'lang': '&#9001;', # left-pointing angle bracket = bra, U+2329 ISOtech
'laquo': '\253', # left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum
'larr': '&#8592;', # leftwards arrow, U+2190 ISOnum
'lceil': '&#8968;', # left ceiling = apl upstile, U+2308 ISOamsc
'ldquo': '&#8220;', # left double quotation mark, U+201C ISOnum
'le': '&#8804;', # less-than or equal to, U+2264 ISOtech
'lfloor': '&#8970;', # left floor = apl downstile, U+230A ISOamsc
'lowast': '&#8727;', # asterisk operator, U+2217 ISOtech
'loz': '&#9674;', # lozenge, U+25CA ISOpub
'lrm': '&#8206;', # left-to-right mark, U+200E NEW RFC 2070
'lsaquo': '&#8249;', # single left-pointing angle quotation mark, U+2039 ISO proposed
'lsquo': '&#8216;', # left single quotation mark, U+2018 ISOnum
'lt': '\74', # less-than sign, U+003C ISOnum
'macr': '\257', # macron = spacing macron = overline = APL overbar, U+00AF ISOdia
'mdash': '&#8212;', # em dash, U+2014 ISOpub
'micro': '\265', # micro sign, U+00B5 ISOnum
'middot': '\267', # middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum
'minus': '&#8722;', # minus sign, U+2212 ISOtech
'mu': '&#956;', # greek small letter mu, U+03BC ISOgrk3
'nabla': '&#8711;', # nabla = backward difference, U+2207 ISOtech
'nbsp': '\240', # no-break space = non-breaking space, U+00A0 ISOnum
'ndash': '&#8211;', # en dash, U+2013 ISOpub
'ne': '&#8800;', # not equal to, U+2260 ISOtech
'ni': '&#8715;', # contains as member, U+220B ISOtech
'not': '\254', # not sign, U+00AC ISOnum
'notin': '&#8713;', # not an element of, U+2209 ISOtech
'nsub': '&#8836;', # not a subset of, U+2284 ISOamsn
'ntilde': '\361', # latin small letter n with tilde, U+00F1 ISOlat1
'nu': '&#957;', # greek small letter nu, U+03BD ISOgrk3
'oacute': '\363', # latin small letter o with acute, U+00F3 ISOlat1
'ocirc': '\364', # latin small letter o with circumflex, U+00F4 ISOlat1
'oelig': '&#339;', # latin small ligature oe, U+0153 ISOlat2
'ograve': '\362', # latin small letter o with grave, U+00F2 ISOlat1
'oline': '&#8254;', # overline = spacing overscore, U+203E NEW
'omega': '&#969;', # greek small letter omega, U+03C9 ISOgrk3
'omicron': '&#959;', # greek small letter omicron, U+03BF NEW
'oplus': '&#8853;', # circled plus = direct sum, U+2295 ISOamsb
'or': '&#8744;', # logical or = vee, U+2228 ISOtech
'ordf': '\252', # feminine ordinal indicator, U+00AA ISOnum
'ordm': '\272', # masculine ordinal indicator, U+00BA ISOnum
'oslash': '\370', # latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1
'otilde': '\365', # latin small letter o with tilde, U+00F5 ISOlat1
'otimes': '&#8855;', # circled times = vector product, U+2297 ISOamsb
'ouml': '\366', # latin small letter o with diaeresis, U+00F6 ISOlat1
'para': '\266', # pilcrow sign = paragraph sign, U+00B6 ISOnum
'part': '&#8706;', # partial differential, U+2202 ISOtech
'permil': '&#8240;', # per mille sign, U+2030 ISOtech
'perp': '&#8869;', # up tack = orthogonal to = perpendicular, U+22A5 ISOtech
'phi': '&#966;', # greek small letter phi, U+03C6 ISOgrk3
'pi': '&#960;', # greek small letter pi, U+03C0 ISOgrk3
'piv': '&#982;', # greek pi symbol, U+03D6 ISOgrk3
'plusmn': '\261', # plus-minus sign = plus-or-minus sign, U+00B1 ISOnum
'pound': '\243', # pound sign, U+00A3 ISOnum
'prime': '&#8242;', # prime = minutes = feet, U+2032 ISOtech
'prod': '&#8719;', # n-ary product = product sign, U+220F ISOamsb
'prop': '&#8733;', # proportional to, U+221D ISOtech
'psi': '&#968;', # greek small letter psi, U+03C8 ISOgrk3
'quot': '\42', # quotation mark = APL quote, U+0022 ISOnum
'rArr': '&#8658;', # rightwards double arrow, U+21D2 ISOtech
'radic': '&#8730;', # square root = radical sign, U+221A ISOtech
'rang': '&#9002;', # right-pointing angle bracket = ket, U+232A ISOtech
'raquo': '\273', # right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum
'rarr': '&#8594;', # rightwards arrow, U+2192 ISOnum
'rceil': '&#8969;', # right ceiling, U+2309 ISOamsc
'rdquo': '&#8221;', # right double quotation mark, U+201D ISOnum
'real': '&#8476;', # blackletter capital R = real part symbol, U+211C ISOamso
'reg': '\256', # registered sign = registered trade mark sign, U+00AE ISOnum
'rfloor': '&#8971;', # right floor, U+230B ISOamsc
'rho': '&#961;', # greek small letter rho, U+03C1 ISOgrk3
'rlm': '&#8207;', # right-to-left mark, U+200F NEW RFC 2070
'rsaquo': '&#8250;', # single right-pointing angle quotation mark, U+203A ISO proposed
'rsquo': '&#8217;', # right single quotation mark, U+2019 ISOnum
'sbquo': '&#8218;', # single low-9 quotation mark, U+201A NEW
'scaron': '&#353;', # latin small letter s with caron, U+0161 ISOlat2
'sdot': '&#8901;', # dot operator, U+22C5 ISOamsb
'sect': '\247', # section sign, U+00A7 ISOnum
'shy': '\255', # soft hyphen = discretionary hyphen, U+00AD ISOnum
'sigma': '&#963;', # greek small letter sigma, U+03C3 ISOgrk3
'sigmaf': '&#962;', # greek small letter final sigma, U+03C2 ISOgrk3
'sim': '&#8764;', # tilde operator = varies with = similar to, U+223C ISOtech
'spades': '&#9824;', # black spade suit, U+2660 ISOpub
'sub': '&#8834;', # subset of, U+2282 ISOtech
'sube': '&#8838;', # subset of or equal to, U+2286 ISOtech
'sum': '&#8721;', # n-ary sumation, U+2211 ISOamsb
'sup': '&#8835;', # superset of, U+2283 ISOtech
'sup1': '\271', # superscript one = superscript digit one, U+00B9 ISOnum
'sup2': '\262', # superscript two = superscript digit two = squared, U+00B2 ISOnum
'sup3': '\263', # superscript three = superscript digit three = cubed, U+00B3 ISOnum
'supe': '&#8839;', # superset of or equal to, U+2287 ISOtech
'szlig': '\337', # latin small letter sharp s = ess-zed, U+00DF ISOlat1
'tau': '&#964;', # greek small letter tau, U+03C4 ISOgrk3
'there4': '&#8756;', # therefore, U+2234 ISOtech
'theta': '&#952;', # greek small letter theta, U+03B8 ISOgrk3
'thetasym': '&#977;', # greek small letter theta symbol, U+03D1 NEW
'thinsp': '&#8201;', # thin space, U+2009 ISOpub
'thorn': '\376', # latin small letter thorn with, U+00FE ISOlat1
'tilde': '&#732;', # small tilde, U+02DC ISOdia
'times': '\327', # multiplication sign, U+00D7 ISOnum
'trade': '&#8482;', # trade mark sign, U+2122 ISOnum
'uArr': '&#8657;', # upwards double arrow, U+21D1 ISOamsa
'uacute': '\372', # latin small letter u with acute, U+00FA ISOlat1
'uarr': '&#8593;', # upwards arrow, U+2191 ISOnum
'ucirc': '\373', # latin small letter u with circumflex, U+00FB ISOlat1
'ugrave': '\371', # latin small letter u with grave, U+00F9 ISOlat1
'uml': '\250', # diaeresis = spacing diaeresis, U+00A8 ISOdia
'upsih': '&#978;', # greek upsilon with hook symbol, U+03D2 NEW
'upsilon': '&#965;', # greek small letter upsilon, U+03C5 ISOgrk3
'uuml': '\374', # latin small letter u with diaeresis, U+00FC ISOlat1
'weierp': '&#8472;', # script capital P = power set = Weierstrass p, U+2118 ISOamso
'xi': '&#958;', # greek small letter xi, U+03BE ISOgrk3
'yacute': '\375', # latin small letter y with acute, U+00FD ISOlat1
'yen': '\245', # yen sign = yuan sign, U+00A5 ISOnum
'yuml': '\377', # latin small letter y with diaeresis, U+00FF ISOlat1
'zeta': '&#950;', # greek small letter zeta, U+03B6 ISOgrk3
'zwj': '&#8205;', # zero width joiner, U+200D NEW RFC 2070
'zwnj': '&#8204;', # zero width non-joiner, U+200C NEW RFC 2070
}

View File

@ -1,18 +1,20 @@
# Cache lines from files.
# This is intended to read lines from modules imported -- hence if a filename
# is not found, it will look down the module search path for a file by
# that name.
"""Cache lines from files.
This is intended to read lines from modules imported -- hence if a filename
is not found, it will look down the module search path for a file by
that name.
"""
import sys
import os
from stat import *
def getline(filename, lineno):
lines = getlines(filename)
if 1 <= lineno <= len(lines):
return lines[lineno-1]
else:
return ''
lines = getlines(filename)
if 1 <= lineno <= len(lines):
return lines[lineno-1]
else:
return ''
# The cache
@ -20,71 +22,71 @@ def getline(filename, lineno):
cache = {} # The cache
# Clear the cache entirely
def clearcache():
global cache
cache = {}
"""Clear the cache entirely."""
global cache
cache = {}
# Get the lines for a file from the cache.
# Update the cache if it doesn't contain an entry for this file already.
def getlines(filename):
if cache.has_key(filename):
return cache[filename][2]
else:
return updatecache(filename)
"""Get the lines for a file from the cache.
Update the cache if it doesn't contain an entry for this file already."""
if cache.has_key(filename):
return cache[filename][2]
else:
return updatecache(filename)
# Discard cache entries that are out of date.
# (This is not checked upon each call!)
def checkcache():
for filename in cache.keys():
size, mtime, lines, fullname = cache[filename]
try:
stat = os.stat(fullname)
except os.error:
del cache[filename]
continue
if size <> stat[ST_SIZE] or mtime <> stat[ST_MTIME]:
del cache[filename]
"""Discard cache entries that are out of date.
(This is not checked upon each call!)"""
for filename in cache.keys():
size, mtime, lines, fullname = cache[filename]
try:
stat = os.stat(fullname)
except os.error:
del cache[filename]
continue
if size <> stat[ST_SIZE] or mtime <> stat[ST_MTIME]:
del cache[filename]
# Update a cache entry and return its list of lines.
# If something's wrong, print a message, discard the cache entry,
# and return an empty list.
def updatecache(filename):
if cache.has_key(filename):
del cache[filename]
if not filename or filename[0] + filename[-1] == '<>':
return []
fullname = filename
try:
stat = os.stat(fullname)
except os.error, msg:
# Try looking through the module search path
basename = os.path.split(filename)[1]
for dirname in sys.path:
fullname = os.path.join(dirname, basename)
try:
stat = os.stat(fullname)
break
except os.error:
pass
else:
# No luck
## print '*** Cannot stat', filename, ':', msg
return []
try:
fp = open(fullname, 'r')
lines = fp.readlines()
fp.close()
except IOError, msg:
## print '*** Cannot open', fullname, ':', msg
return []
size, mtime = stat[ST_SIZE], stat[ST_MTIME]
cache[filename] = size, mtime, lines, fullname
return lines
"""Update a cache entry and return its list of lines.
If something's wrong, print a message, discard the cache entry,
and return an empty list."""
if cache.has_key(filename):
del cache[filename]
if not filename or filename[0] + filename[-1] == '<>':
return []
fullname = filename
try:
stat = os.stat(fullname)
except os.error, msg:
# Try looking through the module search path
basename = os.path.split(filename)[1]
for dirname in sys.path:
fullname = os.path.join(dirname, basename)
try:
stat = os.stat(fullname)
break
except os.error:
pass
else:
# No luck
## print '*** Cannot stat', filename, ':', msg
return []
try:
fp = open(fullname, 'r')
lines = fp.readlines()
fp.close()
except IOError, msg:
## print '*** Cannot open', fullname, ':', msg
return []
size, mtime = stat[ST_SIZE], stat[ST_MTIME]
cache[filename] = size, mtime, lines, fullname
return lines

View File

@ -1,5 +1,6 @@
"""Mac specific module for conversion between pathnames and URLs.
Do not import directly, use urllib instead."""
"""Macintosh-specific module for conversion between pathnames and URLs.
Do not import directly; use urllib instead."""
import string
import urllib
@ -13,6 +14,11 @@ def url2pathname(pathname):
tp = urllib.splittype(pathname)[0]
if tp and tp <> 'file':
raise RuntimeError, 'Cannot convert non-local URL to pathname'
# Turn starting /// into /, an empty hostname means current host
if pathname[:3] == '///':
pathname = pathname[2:]
elif pathname[:2] == '//':
raise RuntimeError, 'Cannot convert non-local URL to pathname'
components = string.split(pathname, '/')
# Remove . and embedded ..
i = 0

246
Lib/dos-8x3/mimepars.py Normal file
View File

@ -0,0 +1,246 @@
"""Generic MIME parser.
Classes:
MimeParser - Generic MIME parser.
Exceptions:
MimeError - Exception raised by MimeParser class.
XXX To do:
- Content-transfer-encoding issues
- Use Content-length header in rawbody()?
- Cache parts instead of reparsing each time
- The message strings in exceptions could use some work
"""
from types import * # Python types, not MIME types :-)
import string
import regex
import SubFile
import mimetools
MimeError = "MimeParser.MimeError" # Exception raised by this class
class MimeParser:
"""Generic MIME parser.
This requires a seekable file.
"""
def __init__(self, fp):
"""Constructor: store the file pointer and parse the headers."""
self._fp = fp
self._start = fp.tell()
self._headers = h = mimetools.Message(fp)
self._bodystart = fp.tell()
self._multipart = h.getmaintype() == 'multipart'
def multipart(self):
"""Return whether this is a multipart message."""
return self._multipart
def headers(self):
"""Return the headers of the MIME message, as a Message object."""
return self._headers
def rawbody(self):
"""Return the raw body of the MIME message, as a file-like object.
This is a fairly low-level interface -- for a multipart
message, you'd have to parse the body yourself, and it doesn't
translate the Content-transfer-encoding.
"""
# XXX Use Content-length to set end if it exists?
return SubFile.SubFile(self._fp, self._bodystart)
def body(self):
"""Return the body of a 1-part MIME message, as a file-like object.
This should interpret the Content-transfer-encoding, if any
(XXX currently it doesn't).
"""
if self._multipart:
raise MimeError, "body() only works for 1-part messages"
return self.rawbody()
_re_content_length = regex.compile('content-length:[ \t]*\([0-9]+\)',
regex.casefold)
def rawparts(self):
"""Return the raw body parts of a multipart MIME message.
This returns a list of SubFile() objects corresponding to the
parts. Note that the phantom part before the first separator
is returned too, as list item 0. If the final part is not
followed by a terminator, it is ignored, and this error is not
reported. (XXX: the error should be raised).
"""
if not self._multipart:
raise MimeError, "[raw]parts() only works for multipart messages"
h = self._headers
separator = h.getparam('boundary')
if not separator:
raise MimeError, "multipart boundary not specified"
separator = "--" + separator
terminator = separator + "--"
ns = len(separator)
list = []
f = self._fp
start = f.tell()
clength = -1
bodystart = -1
inheaders = 0
while 1:
end = f.tell()
line = f.readline()
if not line:
break
if line[:2] != "--" or line[:ns] != separator:
if inheaders:
re = self._re_content_length
if re.match(line) > 0:
try:
clength = string.atoi(re.group(1))
except string.atoi_error:
pass
if not string.strip(line):
inheaders = 0
bodystart = f.tell()
if clength > 0:
# Skip binary data
f.read(clength)
continue
line = string.strip(line)
if line == terminator or line == separator:
if clength >= 0:
# The Content-length header determines the subfile size
end = bodystart + clength
else:
# The final newline is not part of the content
end = end-1
list.append(SubFile.SubFile(f, start, end))
start = f.tell()
clength = -1
inheaders = 1
if line == terminator:
break
return list
def parts(self):
"""Return the parsed body parts of a multipart MIME message.
This returns a list of MimeParser() instances corresponding to
the parts. The phantom part before the first separator is not
included.
"""
return map(MimeParser, self.rawparts()[1:])
def getsubpartbyposition(self, indices):
part = self
for i in indices:
part = part.parts()[i]
return part
def getsubpartbyid(self, id):
h = self._headers
cid = h.getheader('content-id')
if cid and cid == id:
return self
if self._multipart:
for part in self.parts():
parser = MimeParser(part)
hit = parser.getsubpartbyid(id)
if hit:
return hit
return None
def index(self):
"""Return an index of the MIME file.
This parses the entire file and returns index information
about it, in the form of a tuple
(ctype, headers, body)
where 'ctype' is the content type string of the message
(e.g. `text/plain' or `multipart/mixed') and 'headers' is a
Message instance containing the message headers (which should
be treated as read-only).
The 'body' item depends on the content type:
- If it is an atomic message (anything except for content type
multipart/*), it is the file-like object returned by
self.body().
- For a content type of multipart/*, it is the list of
MimeParser() objects returned by self.parts().
"""
if self._multipart:
body = self.parts()
else:
body = self.body()
return self._headers.gettype(), self._headers, body
def _show(parser, level=0):
"""Helper for _test()."""
ctype, headers, body = parser.index()
print ctype,
if type(body) == ListType:
nparts = len(body)
print "(%d part%s):" % (nparts, nparts != 1 and "s" or "")
n = 0
for part in body:
n = n+1
print "%*d." % (4*level+2, n),
_show(part, level+1)
else:
bodylines = body.readlines()
print "(%d header lines, %d body lines)" % (
len(headers.headers), len(bodylines))
for line in headers.headers + ['\n'] + bodylines:
if line[-1:] == '\n': line = line[:-1]
print " "*level + line
def _test(args = None):
"""Test program invoked when run as a script.
When a filename argument is specified, it reads from that file.
When no arguments are present, it defaults to 'testkp.txt' if it
exists, else it defaults to stdin.
"""
if not args:
import sys
args = sys.argv[1:]
if args:
fn = args[0]
else:
import os
fn = 'testkp.txt'
if not os.path.exists(fn):
fn = '-'
if fn == '-':
fp = sys.stdin
else:
fp = open(fn)
mp = MimeParser(fp)
_show(mp)
if __name__ == '__main__':
import sys
_test()

View File

@ -1,4 +1,4 @@
# Various tools used by MIME-reading or MIME-writing programs.
"""Various tools used by MIME-reading or MIME-writing programs."""
import os
@ -7,10 +7,9 @@ import string
import tempfile
# A derived class of rfc822.Message that knows about MIME headers and
# contains some hooks for decoding encoded and multipart messages.
class Message(rfc822.Message):
"""A derived class of rfc822.Message that knows about MIME headers and
contains some hooks for decoding encoded and multipart messages."""
def __init__(self, fp, seekable = 1):
rfc822.Message.__init__(self, fp, seekable)
@ -96,17 +95,17 @@ class Message(rfc822.Message):
# -----------------
# Return a random string usable as a multipart boundary.
# The method used is so that it is *very* unlikely that the same
# string of characters will every occur again in the Universe,
# so the caller needn't check the data it is packing for the
# occurrence of the boundary.
#
# The boundary contains dots so you have to quote it in the header.
_prefix = None
def choose_boundary():
"""Return a random string usable as a multipart boundary.
The method used is so that it is *very* unlikely that the same
string of characters will every occur again in the Universe,
so the caller needn't check the data it is packing for the
occurrence of the boundary.
The boundary contains dots so you have to quote it in the header."""
global _prefix
import time
import random
@ -131,6 +130,7 @@ def choose_boundary():
# Subroutines for decoding some common content-transfer-types
def decode(input, output, encoding):
"""Decode common content-transfer-encodings (base64, quopri, uuencode)."""
if encoding == 'base64':
import base64
return base64.decode(input, output)
@ -140,6 +140,8 @@ def decode(input, output, encoding):
if encoding in ('uuencode', 'x-uuencode', 'uue', 'x-uue'):
import uu
return uu.decode(input, output)
if encoding in ('7bit', '8bit'):
output.write(input.read())
if decodetab.has_key(encoding):
pipethrough(input, decodetab[encoding], output)
else:
@ -147,6 +149,7 @@ def decode(input, output, encoding):
'unknown Content-Transfer-Encoding: %s' % encoding
def encode(input, output, encoding):
"""Encode common content-transfer-encodings (base64, quopri, uuencode)."""
if encoding == 'base64':
import base64
return base64.encode(input, output)
@ -156,6 +159,8 @@ def encode(input, output, encoding):
if encoding in ('uuencode', 'x-uuencode', 'uue', 'x-uue'):
import uu
return uu.encode(input, output)
if encoding in ('7bit', '8bit'):
output.write(input.read())
if encodetab.has_key(encoding):
pipethrough(input, encodetab[encoding], output)
else:

View File

@ -30,8 +30,8 @@ import urllib
knownfiles = [
"/usr/local/etc/httpd/conf/mime.types",
"/usr/local/lib/netscape/mime.types",
"/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
"/usr/local/etc/mime.types", # Apache 1.3
"/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
"/usr/local/etc/mime.types", # Apache 1.3
]
inited = 0
@ -56,24 +56,24 @@ def guess_type(url):
init()
scheme, url = urllib.splittype(url)
if scheme == 'data':
# syntax of data URLs:
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
# mediatype := [ type "/" subtype ] *( ";" parameter )
# data := *urlchar
# parameter := attribute "=" value
# type/subtype defaults to "text/plain"
comma = string.find(url, ',')
if comma < 0:
# bad data URL
return None, None
semi = string.find(url, ';', 0, comma)
if semi >= 0:
type = url[:semi]
else:
type = url[:comma]
if '=' in type or '/' not in type:
type = 'text/plain'
return type, None # never compressed, so encoding is None
# syntax of data URLs:
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
# mediatype := [ type "/" subtype ] *( ";" parameter )
# data := *urlchar
# parameter := attribute "=" value
# type/subtype defaults to "text/plain"
comma = string.find(url, ',')
if comma < 0:
# bad data URL
return None, None
semi = string.find(url, ';', 0, comma)
if semi >= 0:
type = url[:semi]
else:
type = url[:comma]
if '=' in type or '/' not in type:
type = 'text/plain'
return type, None # never compressed, so encoding is None
base, ext = posixpath.splitext(url)
while suffix_map.has_key(ext):
base, ext = posixpath.splitext(base + suffix_map[ext])
@ -175,6 +175,7 @@ types_map = {
'.jpe': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.jpg': 'image/jpeg',
'.js': 'application/x-javascript',
'.latex': 'application/x-latex',
'.man': 'application/x-troff-man',
'.me': 'application/x-troff-me',

View File

@ -1,28 +1,31 @@
# A class that makes each part of a multipart message "feel" like an
# ordinary file, as long as you use fp.readline(). Allows recursive
# use, for nested multipart messages. Probably best used together
# with module mimetools.
#
# Suggested use:
#
# real_fp = open(...)
# fp = MultiFile(real_fp)
#
# "read some lines from fp"
# fp.push(separator)
# while 1:
# "read lines from fp until it returns an empty string" (A)
# if not fp.next(): break
# fp.pop()
# "read remaining lines from fp until it returns an empty string"
#
# The latter sequence may be used recursively at (A).
# It is also allowed to use multiple push()...pop() sequences.
#
# If seekable is given as 0, the class code will not do the bookeeping
# it normally attempts in order to make seeks relative to the beginning of the
# current file part. This may be useful when using MultiFile with a non-
# seekable stream object.
"""A readline()-style interface to the parts of a multipart message.
The MultiFile class makes each part of a multipart message "feel" like
an ordinary file, as long as you use fp.readline(). Allows recursive
use, for nested multipart messages. Probably best used together
with module mimetools.
Suggested use:
real_fp = open(...)
fp = MultiFile(real_fp)
"read some lines from fp"
fp.push(separator)
while 1:
"read lines from fp until it returns an empty string" (A)
if not fp.next(): break
fp.pop()
"read remaining lines from fp until it returns an empty string"
The latter sequence may be used recursively at (A).
It is also allowed to use multiple push()...pop() sequences.
If seekable is given as 0, the class code will not do the bookeeping
it normally attempts in order to make seeks relative to the beginning of the
current file part. This may be useful when using MultiFile with a non-
seekable stream object.
"""
import sys
import string
@ -30,9 +33,9 @@ import string
Error = 'multifile.Error'
class MultiFile:
#
seekable = 0
#
def __init__(self, fp, seekable=1):
self.fp = fp
self.stack = [] # Grows down
@ -42,12 +45,12 @@ class MultiFile:
self.seekable = 1
self.start = self.fp.tell()
self.posstack = [] # Grows down
#
def tell(self):
if self.level > 0:
return self.lastpos
return self.fp.tell() - self.start
#
def seek(self, pos, whence=0):
here = self.tell()
if whence:
@ -64,7 +67,7 @@ class MultiFile:
self.fp.seek(pos + self.start)
self.level = 0
self.last = 0
#
def readline(self):
if self.level > 0:
return ''
@ -105,7 +108,7 @@ class MultiFile:
if self.level > 1:
raise Error,'Missing endmarker in MultiFile.readline()'
return ''
#
def readlines(self):
list = []
while 1:
@ -113,10 +116,10 @@ class MultiFile:
if not line: break
list.append(line)
return list
#
def read(self): # Note: no size argument -- read until EOF only!
return string.joinfields(self.readlines(), '')
#
def next(self):
while self.readline(): pass
if self.level > 1 or self.last:
@ -126,7 +129,7 @@ class MultiFile:
if self.seekable:
self.start = self.fp.tell()
return 1
#
def push(self, sep):
if self.level > 0:
raise Error, 'bad MultiFile.push() call'
@ -134,7 +137,7 @@ class MultiFile:
if self.seekable:
self.posstack.insert(0, self.start)
self.start = self.fp.tell()
#
def pop(self):
if self.stack == []:
raise Error, 'bad MultiFile.pop() call'
@ -149,12 +152,12 @@ class MultiFile:
del self.posstack[0]
if self.level > 0:
self.lastpos = abslastpos - self.start
#
def is_data(self, line):
return line[:2] <> '--'
#
def section_divider(self, str):
return "--" + str
#
def end_marker(self, str):
return "--" + str + "--"

View File

@ -1,6 +1,4 @@
#
# nturl2path convert a NT pathname to a file URL and
# vice versa
"""Convert a NT pathname to a file URL and vice versa."""
def url2pathname(url):
""" Convert a URL to a DOS path...
@ -34,7 +32,6 @@ def url2pathname(url):
return path
def pathname2url(p):
""" Convert a DOS path name to a file url...
C:\foo\bar\spam.foo

View File

@ -1,64 +1,61 @@
#
# Start of posixfile.py
#
"""Extended file operations available in POSIX.
#
# Extended file operations
#
# f = posixfile.open(filename, [mode, [bufsize]])
# will create a new posixfile object
#
# f = posixfile.fileopen(fileobject)
# will create a posixfile object from a builtin file object
#
# f.file()
# will return the original builtin file object
#
# f.dup()
# will return a new file object based on a new filedescriptor
#
# f.dup2(fd)
# will return a new file object based on the given filedescriptor
#
# f.flags(mode)
# will turn on the associated flag (merge)
# mode can contain the following characters:
#
# (character representing a flag)
# a append only flag
# c close on exec flag
# n no delay flag
# s synchronization flag
# (modifiers)
# ! turn flags 'off' instead of default 'on'
# = copy flags 'as is' instead of default 'merge'
# ? return a string in which the characters represent the flags
# that are set
#
# note: - the '!' and '=' modifiers are mutually exclusive.
# - the '?' modifier will return the status of the flags after they
# have been changed by other characters in the mode string
#
# f.lock(mode [, len [, start [, whence]]])
# will (un)lock a region
# mode can contain the following characters:
#
# (character representing type of lock)
# u unlock
# r read lock
# w write lock
# (modifiers)
# | wait until the lock can be granted
# ? return the first lock conflicting with the requested lock
# or 'None' if there is no conflict. The lock returned is in the
# format (mode, len, start, whence, pid) where mode is a
# character representing the type of lock ('r' or 'w')
#
# note: - the '?' modifier prevents a region from being locked; it is
# query only
#
f = posixfile.open(filename, [mode, [bufsize]])
will create a new posixfile object
f = posixfile.fileopen(fileobject)
will create a posixfile object from a builtin file object
f.file()
will return the original builtin file object
f.dup()
will return a new file object based on a new filedescriptor
f.dup2(fd)
will return a new file object based on the given filedescriptor
f.flags(mode)
will turn on the associated flag (merge)
mode can contain the following characters:
(character representing a flag)
a append only flag
c close on exec flag
n no delay flag
s synchronization flag
(modifiers)
! turn flags 'off' instead of default 'on'
= copy flags 'as is' instead of default 'merge'
? return a string in which the characters represent the flags
that are set
note: - the '!' and '=' modifiers are mutually exclusive.
- the '?' modifier will return the status of the flags after they
have been changed by other characters in the mode string
f.lock(mode [, len [, start [, whence]]])
will (un)lock a region
mode can contain the following characters:
(character representing type of lock)
u unlock
r read lock
w write lock
(modifiers)
| wait until the lock can be granted
? return the first lock conflicting with the requested lock
or 'None' if there is no conflict. The lock returned is in the
format (mode, len, start, whence, pid) where mode is a
character representing the type of lock ('r' or 'w')
note: - the '?' modifier prevents a region from being locked; it is
query only
"""
class _posixfile_:
"""File wrapper class that provides extra POSIX file routines."""
states = ['open', 'closed']
#
@ -178,6 +175,7 @@ class _posixfile_:
# additions for AIX by Vladimir.Marangozov@imag.fr
import sys, os
if sys.platform in ('netbsd1',
'openbsd2',
'freebsd2', 'freebsd3',
'bsdos2', 'bsdos3', 'bsdos4'):
flock = struct.pack('lxxxxlxxxxlhh', \
@ -193,6 +191,7 @@ class _posixfile_:
if '?' in how:
if sys.platform in ('netbsd1',
'openbsd2',
'freebsd2', 'freebsd3',
'bsdos2', 'bsdos3', 'bsdos4'):
l_start, l_len, l_pid, l_type, l_whence = \
@ -213,13 +212,12 @@ class _posixfile_:
else:
return 'w', l_len, l_start, l_whence, l_pid
#
# Public routine to obtain a posixfile object
#
def open(name, mode='r', bufsize=-1):
"""Public routine to open a file as a posixfile object."""
return _posixfile_().open(name, mode, bufsize)
def fileopen(file):
"""Public routine to get a posixfile object from a Python file object."""
return _posixfile_().fileopen(file)
#

View File

@ -1,13 +1,13 @@
# Module 'posixpath' -- common operations on Posix pathnames.
# Some of this can actually be useful on non-Posix systems too, e.g.
# for manipulation of the pathname component of URLs.
# The "os.path" name is an alias for this module on Posix systems;
# on other systems (e.g. Mac, Windows), os.path provides the same
# operations in a manner specific to that platform, and is an alias
# to another module (e.g. macpath, ntpath).
"""Common pathname manipulations, Posix version.
Instead of importing this module
directly, import os and refer to this module as os.path.
"""Common operations on Posix pathnames.
Instead of importing this module directly, import os and refer to
this module as os.path. The "os.path" name is an alias for this
module on Posix systems; on other systems (e.g. Mac, Windows),
os.path provides the same operations in a manner specific to that
platform, and is an alias to another module (e.g. macpath, ntpath).
Some of this can actually be useful on non-Posix systems too, e.g.
for manipulation of the pathname component of URLs.
"""
import os
@ -143,7 +143,7 @@ def getmtime(filename):
def getatime(filename):
"""Return the last access time of a file, reported by os.stat()."""
st = os.stat(filename)
return st[stat.ST_MTIME]
return st[stat.ST_ATIME]
# Is a path a symbolic link?
@ -254,7 +254,7 @@ def ismount(path):
# or to impose a different order of visiting.
def walk(top, func, arg):
"""walk(top,func,args) calls func(arg, d, files) for each directory "d"
"""walk(top,func,arg) calls func(arg, d, files) for each directory "d"
in the tree rooted at "top" (including "top" itself). "files" is a list
of all the files and subdirs in directory "d".
"""
@ -263,11 +263,10 @@ of all the files and subdirs in directory "d".
except os.error:
return
func(arg, top, names)
exceptions = ('.', '..')
for name in names:
if name not in exceptions:
name = join(top, name)
if isdir(name) and not islink(name):
st = os.lstat(name)
if stat.S_ISDIR(st[stat.ST_MODE]):
walk(name, func, arg)
@ -369,8 +368,8 @@ def normpath(path):
return slashes + string.joinfields(comps, '/')
# Return an absolute path.
def abspath(path):
"""Return an absolute path."""
if not isabs(path):
path = join(os.getcwd(), path)
return normpath(path)

View File

@ -7,7 +7,7 @@ import imp
MAGIC = imp.get_magic()
def wr_long(f, x):
"Internal; write a 32-bit int to a file in little-endian order."
"""Internal; write a 32-bit int to a file in little-endian order."""
f.write(chr( x & 0xff))
f.write(chr((x >> 8) & 0xff))
f.write(chr((x >> 16) & 0xff))

View File

@ -1,4 +1,4 @@
# A multi-producer, multi-consumer queue.
"""A multi-producer, multi-consumer queue."""
# define this exception to be compatible with Python 1.5's class
# exceptions, but also when -X option is used.
@ -15,7 +15,7 @@ except TypeError:
Full = 'Queue.Full'
class Queue:
def __init__(self, maxsize):
def __init__(self, maxsize=0):
"""Initialize a queue object with a given maximum size.
If maxsize is <= 0, the queue size is infinite.

View File

@ -1,5 +1,11 @@
# These bits are passed to regex.set_syntax() to choose among
# alternative regexp syntaxes.
"""Constants for selecting regexp syntaxes for the obsolete regex module.
This module is only for backward compatibility. "regex" has now
been replaced by the new regular expression module, "re".
These bits are passed to regex.set_syntax() to choose among
alternative regexp syntaxes.
"""
# 1 means plain parentheses serve as grouping, and backslash
# parentheses are needed for literal searching.

946
Lib/dos-8x3/rfc822-n.py Normal file
View File

@ -0,0 +1,946 @@
"""RFC-822 message manipulation class.
XXX This is only a very rough sketch of a full RFC-822 parser;
in particular the tokenizing of addresses does not adhere to all the
quoting rules.
Directions for use:
To create a Message object: first open a file, e.g.:
fp = open(file, 'r')
You can use any other legal way of getting an open file object, e.g. use
sys.stdin or call os.popen().
Then pass the open file object to the Message() constructor:
m = Message(fp)
This class can work with any input object that supports a readline
method. If the input object has seek and tell capability, the
rewindbody method will work; also illegal lines will be pushed back
onto the input stream. If the input object lacks seek but has an
`unread' method that can push back a line of input, Message will use
that to push back illegal lines. Thus this class can be used to parse
messages coming from a buffered stream.
The optional `seekable' argument is provided as a workaround for
certain stdio libraries in which tell() discards buffered data before
discovering that the lseek() system call doesn't work. For maximum
portability, you should set the seekable argument to zero to prevent
that initial \code{tell} when passing in an unseekable object such as
a a file object created from a socket object. If it is 1 on entry --
which it is by default -- the tell() method of the open file object is
called once; if this raises an exception, seekable is reset to 0. For
other nonzero values of seekable, this test is not made.
To get the text of a particular header there are several methods:
str = m.getheader(name)
str = m.getrawheader(name)
where name is the name of the header, e.g. 'Subject'.
The difference is that getheader() strips the leading and trailing
whitespace, while getrawheader() doesn't. Both functions retain
embedded whitespace (including newlines) exactly as they are
specified in the header, and leave the case of the text unchanged.
For addresses and address lists there are functions
realname, mailaddress = m.getaddr(name) and
list = m.getaddrlist(name)
where the latter returns a list of (realname, mailaddr) tuples.
There is also a method
time = m.getdate(name)
which parses a Date-like field and returns a time-compatible tuple,
i.e. a tuple such as returned by time.localtime() or accepted by
time.mktime().
See the class definition for lower level access methods.
There are also some utility functions here.
"""
# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
import string
import time
_blanklines = ('\r\n', '\n') # Optimization for islast()
class Message:
"""Represents a single RFC-822-compliant message."""
def __init__(self, fp, seekable = 1):
"""Initialize the class instance and read the headers."""
if seekable == 1:
# Exercise tell() to make sure it works
# (and then assume seek() works, too)
try:
fp.tell()
except:
seekable = 0
else:
seekable = 1
self.fp = fp
self.seekable = seekable
self.startofheaders = None
self.startofbody = None
#
if self.seekable:
try:
self.startofheaders = self.fp.tell()
except IOError:
self.seekable = 0
#
self.readheaders()
#
if self.seekable:
try:
self.startofbody = self.fp.tell()
except IOError:
self.seekable = 0
def rewindbody(self):
"""Rewind the file to the start of the body (if seekable)."""
if not self.seekable:
raise IOError, "unseekable file"
self.fp.seek(self.startofbody)
def readheaders(self):
"""Read header lines.
Read header lines up to the entirely blank line that
terminates them. The (normally blank) line that ends the
headers is skipped, but not included in the returned list.
If a non-header line ends the headers, (which is an error),
an attempt is made to backspace over it; it is never
included in the returned list.
The variable self.status is set to the empty string if all
went well, otherwise it is an error message.
The variable self.headers is a completely uninterpreted list
of lines contained in the header (so printing them will
reproduce the header exactly as it appears in the file).
"""
self.dict = {}
self.__gamh_cache = {}
self.__gh_cache = {}
self.unixfrom = ''
self.headers = list = []
self.status = ''
headerseen = ""
firstline = 1
startofline = unread = tell = None
if hasattr(self.fp, 'unread'):
unread = self.fp.unread
elif self.seekable:
tell = self.fp.tell
while 1:
if tell:
startofline = tell()
line = self.fp.readline()
if not line:
self.status = 'EOF in headers'
break
# Skip unix From name time lines
if firstline and line[:5] == 'From ':
self.unixfrom = self.unixfrom + line
continue
firstline = 0
if headerseen and line[0] in ' \t':
# It's a continuation line.
list.append(line)
self.__gamh_cache[headerseen].append(line)
x = string.lstrip(
"%s\n %s" % (self.dict[headerseen], string.strip(line)))
self.dict[headerseen] = x
self.__gh_cache[headerseen][-1] = x
continue
elif self.iscomment(line):
# It's a comment. Ignore it.
continue
elif self.islast(line):
# Note! No pushback here! The delimiter line gets eaten.
break
headerseen = self.isheader(line)
if headerseen:
# It's a legal header line, save it.
list.append(line)
l = self.__gamh_cache.get(headerseen)
if not l:
self.__gamh_cache[headerseen] = l = []
l.append(line)
x = string.strip(line[len(headerseen)+1:])
self.dict[headerseen] = x
l = self.__gh_cache.get(headerseen)
if not l:
self.__gh_cache[headerseen] = l = []
l.append(x)
continue
else:
# It's not a header line; throw it back and stop here.
if not self.dict:
self.status = 'No headers'
else:
self.status = 'Non-header line where header expected'
# Try to undo the read.
if unread:
unread(line)
elif tell:
self.fp.seek(startofline)
else:
self.status = self.status + '; bad seek'
break
def isheader(self, line):
"""Determine whether a given line is a legal header.
This method should return the header name, suitably canonicalized.
You may override this method in order to use Message parsing
on tagged data in RFC822-like formats with special header formats.
"""
i = string.find(line, ':')
if i > 0:
return string.lower(line[:i])
else:
return None
def islast(self, line):
"""Determine whether a line is a legal end of RFC-822 headers.
You may override this method if your application wants
to bend the rules, e.g. to strip trailing whitespace,
or to recognise MH template separators ('--------').
For convenience (e.g. for code reading from sockets) a
line consisting of \r\n also matches.
"""
return line in _blanklines
def iscomment(self, line):
"""Determine whether a line should be skipped entirely.
You may override this method in order to use Message parsing
on tagged data in RFC822-like formats that support embedded
comments or free-text data.
"""
return None
def getallmatchingheaders(self, name,
# speed hack:
lower = string.lower):
"""Find all header lines matching a given header name.
Look through the list of headers and find all lines
matching a given header name (and their continuation
lines). A list of the lines is returned, without
interpretation. If the header does not occur, an
empty list is returned. If the header occurs multiple
times, all occurrences are returned. Case is not
important in the header name.
"""
r = self.__gamh_cache.get(lower(name))
if r:
return r[:]
return []
def getfirstmatchingheader(self, name,
# speed hack:
lower = string.lower):
"""Get the first header line matching name.
This is similar to getallmatchingheaders, but it returns
only the first matching header (and its continuation
lines).
"""
l = self.__gamh_cache.get(lower(name))
if not l:
return []
r = []
for item in l:
if r and item[0] not in " \t":
break
r.append(item)
return r
def getrawheader(self, name):
"""A higher-level interface to getfirstmatchingheader().
Return a string containing the literal text of the
header but with the keyword stripped. All leading,
trailing and embedded whitespace is kept in the
string, however.
Return None if the header does not occur.
"""
list = self.getfirstmatchingheader(name)
if not list:
return None
list[0] = list[0][len(name) + 1:]
return string.joinfields(list, '')
def getheader(self, name, default=None):
"""Get the header value for a name.
This is the normal interface: it returns a stripped
version of the header value for a given header name,
or None if it doesn't exist. This uses the dictionary
version which finds the *last* such header.
"""
try:
return self.dict[string.lower(name)]
except KeyError:
return default
get = getheader
def getheaders(self, name,
# speed hack:
lower = string.lower):
"""Get all values for a header.
This returns a list of values for headers given more than once;
each value in the result list is stripped in the same way as the
result of getheader(). If the header is not given, return an
empty list.
"""
r = self.__gh_cache.get(lower(name))
if r:
return r[:]
return []
def getaddr(self, name):
"""Get a single address from a header, as a tuple.
An example return value:
('Guido van Rossum', 'guido@cwi.nl')
"""
# New, by Ben Escoto
alist = self.getaddrlist(name)
if alist:
return alist[0]
else:
return (None, None)
def getaddrlist(self, name):
"""Get a list of addresses from a header.
Retrieves a list of addresses from a header, where each address is a
tuple as returned by getaddr(). Scans all named headers, so it works
properly with multiple To: or Cc: headers for example.
"""
raw = []
for h in self.getallmatchingheaders(name):
if h[0] in ' \t':
raw.append(h)
else:
if raw:
raw.append(', ')
i = string.find(h, ':')
if i > 0:
addr = h[i+1:]
raw.append(addr)
alladdrs = string.join(raw, '')
a = AddrlistClass(alladdrs)
return a.getaddrlist()
def getdate(self, name):
"""Retrieve a date field from a header.
Retrieves a date field from the named header, returning
a tuple compatible with time.mktime().
"""
try:
data = self[name]
except KeyError:
return None
return parsedate(data)
def getdate_tz(self, name):
"""Retrieve a date field from a header as a 10-tuple.
The first 9 elements make up a tuple compatible with
time.mktime(), and the 10th is the offset of the poster's
time zone from GMT/UTC.
"""
try:
data = self[name]
except KeyError:
return None
return parsedate_tz(data)
# Access as a dictionary (only finds *last* header of each type):
def __len__(self):
"""Get the number of headers in a message."""
return len(self.dict)
def __getitem__(self, name):
"""Get a specific header, as from a dictionary."""
return self.dict[string.lower(name)]
def __setitem__(self, name, value):
"""Set the value of a header.
Note: This is not a perfect inversion of __getitem__, because
any changed headers get stuck at the end of the raw-headers list
rather than where the altered header was.
"""
del self[name] # Won't fail if it doesn't exist
self.dict[string.lower(name)] = value
text = name + ": " + value
lines = string.split(text, "\n")
for line in lines:
self.headers.append(line + "\n")
def __delitem__(self, name):
"""Delete all occurrences of a specific header, if it is present."""
name = string.lower(name)
if not self.dict.has_key(name):
return
del self.dict[name]
name = name + ':'
n = len(name)
list = []
hit = 0
for i in range(len(self.headers)):
line = self.headers[i]
if string.lower(line[:n]) == name:
hit = 1
elif line[:1] not in string.whitespace:
hit = 0
if hit:
list.append(i)
list.reverse()
for i in list:
del self.headers[i]
def has_key(self, name):
"""Determine whether a message contains the named header."""
return self.dict.has_key(string.lower(name))
def keys(self):
"""Get all of a message's header field names."""
return self.dict.keys()
def values(self):
"""Get all of a message's header field values."""
return self.dict.values()
def items(self):
"""Get all of a message's headers.
Returns a list of name, value tuples.
"""
return self.dict.items()
def __str__(self):
str = ''
for hdr in self.headers:
str = str + hdr
return str
# Utility functions
# -----------------
# XXX Should fix unquote() and quote() to be really conformant.
# XXX The inverses of the parse functions may also be useful.
def unquote(str):
"""Remove quotes from a string."""
if len(str) > 1:
if str[0] == '"' and str[-1:] == '"':
return str[1:-1]
if str[0] == '<' and str[-1:] == '>':
return str[1:-1]
return str
def quote(str):
"""Add quotes around a string."""
return '"%s"' % string.join(
string.split(
string.join(
string.split(str, '\\'),
'\\\\'),
'"'),
'\\"')
def parseaddr(address):
"""Parse an address into a (realname, mailaddr) tuple."""
a = AddrlistClass(address)
list = a.getaddrlist()
if not list:
return (None, None)
else:
return list[0]
class AddrlistClass:
"""Address parser class by Ben Escoto.
To understand what this class does, it helps to have a copy of
RFC-822 in front of you.
Note: this class interface is deprecated and may be removed in the future.
Use rfc822.AddressList instead.
"""
def __init__(self, field):
"""Initialize a new instance.
`field' is an unparsed address header field, containing
one or more addresses.
"""
self.specials = '()<>@,:;.\"[]'
self.pos = 0
self.LWS = ' \t'
self.CR = '\r\n'
self.atomends = self.specials + self.LWS + self.CR
self.field = field
self.commentlist = []
def gotonext(self):
"""Parse up to the start of the next address."""
while self.pos < len(self.field):
if self.field[self.pos] in self.LWS + '\n\r':
self.pos = self.pos + 1
elif self.field[self.pos] == '(':
self.commentlist.append(self.getcomment())
else: break
def getaddrlist(self):
"""Parse all addresses.
Returns a list containing all of the addresses.
"""
ad = self.getaddress()
if ad:
return ad + self.getaddrlist()
else: return []
def getaddress(self):
"""Parse the next address."""
self.commentlist = []
self.gotonext()
oldpos = self.pos
oldcl = self.commentlist
plist = self.getphraselist()
self.gotonext()
returnlist = []
if self.pos >= len(self.field):
# Bad email address technically, no domain.
if plist:
returnlist = [(string.join(self.commentlist), plist[0])]
elif self.field[self.pos] in '.@':
# email address is just an addrspec
# this isn't very efficient since we start over
self.pos = oldpos
self.commentlist = oldcl
addrspec = self.getaddrspec()
returnlist = [(string.join(self.commentlist), addrspec)]
elif self.field[self.pos] == ':':
# address is a group
returnlist = []
fieldlen = len(self.field)
self.pos = self.pos + 1
while self.pos < len(self.field):
self.gotonext()
if self.pos < fieldlen and self.field[self.pos] == ';':
self.pos = self.pos + 1
break
returnlist = returnlist + self.getaddress()
elif self.field[self.pos] == '<':
# Address is a phrase then a route addr
routeaddr = self.getrouteaddr()
if self.commentlist:
returnlist = [(string.join(plist) + ' (' + \
string.join(self.commentlist) + ')', routeaddr)]
else: returnlist = [(string.join(plist), routeaddr)]
else:
if plist:
returnlist = [(string.join(self.commentlist), plist[0])]
elif self.field[self.pos] in self.specials:
self.pos = self.pos + 1
self.gotonext()
if self.pos < len(self.field) and self.field[self.pos] == ',':
self.pos = self.pos + 1
return returnlist
def getrouteaddr(self):
"""Parse a route address (Return-path value).
This method just skips all the route stuff and returns the addrspec.
"""
if self.field[self.pos] != '<':
return
expectroute = 0
self.pos = self.pos + 1
self.gotonext()
adlist = None
while self.pos < len(self.field):
if expectroute:
self.getdomain()
expectroute = 0
elif self.field[self.pos] == '>':
self.pos = self.pos + 1
break
elif self.field[self.pos] == '@':
self.pos = self.pos + 1
expectroute = 1
elif self.field[self.pos] == ':':
self.pos = self.pos + 1
expectaddrspec = 1
else:
adlist = self.getaddrspec()
self.pos = self.pos + 1
break
self.gotonext()
return adlist
def getaddrspec(self):
"""Parse an RFC-822 addr-spec."""
aslist = []
self.gotonext()
while self.pos < len(self.field):
if self.field[self.pos] == '.':
aslist.append('.')
self.pos = self.pos + 1
elif self.field[self.pos] == '"':
aslist.append('"%s"' % self.getquote())
elif self.field[self.pos] in self.atomends:
break
else: aslist.append(self.getatom())
self.gotonext()
if self.pos >= len(self.field) or self.field[self.pos] != '@':
return string.join(aslist, '')
aslist.append('@')
self.pos = self.pos + 1
self.gotonext()
return string.join(aslist, '') + self.getdomain()
def getdomain(self):
"""Get the complete domain name from an address."""
sdlist = []
while self.pos < len(self.field):
if self.field[self.pos] in self.LWS:
self.pos = self.pos + 1
elif self.field[self.pos] == '(':
self.commentlist.append(self.getcomment())
elif self.field[self.pos] == '[':
sdlist.append(self.getdomainliteral())
elif self.field[self.pos] == '.':
self.pos = self.pos + 1
sdlist.append('.')
elif self.field[self.pos] in self.atomends:
break
else: sdlist.append(self.getatom())
return string.join(sdlist, '')
def getdelimited(self, beginchar, endchars, allowcomments = 1):
"""Parse a header fragment delimited by special characters.
`beginchar' is the start character for the fragment.
If self is not looking at an instance of `beginchar' then
getdelimited returns the empty string.
`endchars' is a sequence of allowable end-delimiting characters.
Parsing stops when one of these is encountered.
If `allowcomments' is non-zero, embedded RFC-822 comments
are allowed within the parsed fragment.
"""
if self.field[self.pos] != beginchar:
return ''
slist = ['']
quote = 0
self.pos = self.pos + 1
while self.pos < len(self.field):
if quote == 1:
slist.append(self.field[self.pos])
quote = 0
elif self.field[self.pos] in endchars:
self.pos = self.pos + 1
break
elif allowcomments and self.field[self.pos] == '(':
slist.append(self.getcomment())
elif self.field[self.pos] == '\\':
quote = 1
else:
slist.append(self.field[self.pos])
self.pos = self.pos + 1
return string.join(slist, '')
def getquote(self):
"""Get a quote-delimited fragment from self's field."""
return self.getdelimited('"', '"\r', 0)
def getcomment(self):
"""Get a parenthesis-delimited fragment from self's field."""
return self.getdelimited('(', ')\r', 1)
def getdomainliteral(self):
"""Parse an RFC-822 domain-literal."""
return self.getdelimited('[', ']\r', 0)
def getatom(self):
"""Parse an RFC-822 atom."""
atomlist = ['']
while self.pos < len(self.field):
if self.field[self.pos] in self.atomends:
break
else: atomlist.append(self.field[self.pos])
self.pos = self.pos + 1
return string.join(atomlist, '')
def getphraselist(self):
"""Parse a sequence of RFC-822 phrases.
A phrase is a sequence of words, which are in turn either
RFC-822 atoms or quoted-strings. Phrases are canonicalized
by squeezing all runs of continuous whitespace into one space.
"""
plist = []
while self.pos < len(self.field):
if self.field[self.pos] in self.LWS:
self.pos = self.pos + 1
elif self.field[self.pos] == '"':
plist.append(self.getquote())
elif self.field[self.pos] == '(':
self.commentlist.append(self.getcomment())
elif self.field[self.pos] in self.atomends:
break
else: plist.append(self.getatom())
return plist
class AddressList(AddrlistClass):
"""An AddressList encapsulates a list of parsed RFC822 addresses."""
def __init__(self, field):
AddrlistClass.__init__(self, field)
if field:
self.addresslist = self.getaddrlist()
else:
self.addresslist = []
def __len__(self):
return len(self.addresslist)
def __str__(self):
return string.joinfields(map(dump_address_pair, self.addresslist),", ")
def __add__(self, other):
# Set union
newaddr = AddressList(None)
newaddr.addresslist = self.addresslist[:]
for x in other.addresslist:
if not x in self.addresslist:
newaddr.addresslist.append(x)
return newaddr
def __sub__(self, other):
# Set difference
newaddr = AddressList(None)
for x in self.addresslist:
if not x in other.addresslist:
newaddr.addresslist.append(x)
return newaddr
def __getitem__(self, index):
# Make indexing, slices, and 'in' work
return self.addresslist[index]
def dump_address_pair(pair):
"""Dump a (name, address) pair in a canonicalized form."""
if pair[0]:
return '"' + pair[0] + '" <' + pair[1] + '>'
else:
return pair[1]
# Parse a date field
_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
'aug', 'sep', 'oct', 'nov', 'dec',
'january', 'february', 'march', 'april', 'may', 'june', 'july',
'august', 'september', 'october', 'november', 'december']
_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
# The timezone table does not include the military time zones defined
# in RFC822, other than Z. According to RFC1123, the description in
# RFC822 gets the signs wrong, so we can't rely on any such time
# zones. RFC1123 recommends that numeric timezone indicators be used
# instead of timezone names.
_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
'EST': -500, 'EDT': -400, # Eastern
'CST': -600, 'CDT': -500, # Central
'MST': -700, 'MDT': -600, # Mountain
'PST': -800, 'PDT': -700 # Pacific
}
def parsedate_tz(data):
"""Convert a date string to a time tuple.
Accounts for military timezones.
"""
data = string.split(data)
if data[0][-1] in (',', '.') or string.lower(data[0]) in _daynames:
# There's a dayname here. Skip it
del data[0]
if len(data) == 3: # RFC 850 date, deprecated
stuff = string.split(data[0], '-')
if len(stuff) == 3:
data = stuff + data[1:]
if len(data) == 4:
s = data[3]
i = string.find(s, '+')
if i > 0:
data[3:] = [s[:i], s[i+1:]]
else:
data.append('') # Dummy tz
if len(data) < 5:
return None
data = data[:5]
[dd, mm, yy, tm, tz] = data
mm = string.lower(mm)
if not mm in _monthnames:
dd, mm = mm, string.lower(dd)
if not mm in _monthnames:
return None
mm = _monthnames.index(mm)+1
if mm > 12: mm = mm - 12
if dd[-1] == ',':
dd = dd[:-1]
i = string.find(yy, ':')
if i > 0:
yy, tm = tm, yy
if yy[-1] == ',':
yy = yy[:-1]
if yy[0] not in string.digits:
yy, tz = tz, yy
if tm[-1] == ',':
tm = tm[:-1]
tm = string.splitfields(tm, ':')
if len(tm) == 2:
[thh, tmm] = tm
tss = '0'
elif len(tm) == 3:
[thh, tmm, tss] = tm
else:
return None
try:
yy = string.atoi(yy)
dd = string.atoi(dd)
thh = string.atoi(thh)
tmm = string.atoi(tmm)
tss = string.atoi(tss)
except string.atoi_error:
return None
tzoffset=None
tz=string.upper(tz)
if _timezones.has_key(tz):
tzoffset=_timezones[tz]
else:
try:
tzoffset=string.atoi(tz)
except string.atoi_error:
pass
# Convert a timezone offset into seconds ; -0500 -> -18000
if tzoffset:
if tzoffset < 0:
tzsign = -1
tzoffset = -tzoffset
else:
tzsign = 1
tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60)
tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
return tuple
def parsedate(data):
"""Convert a time string to a time tuple."""
t=parsedate_tz(data)
if type(t)==type( () ):
return t[:9]
else: return t
def mktime_tz(data):
"""Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
if data[9] is None:
# No zone info, so localtime is better assumption than GMT
return time.mktime(data[:8] + (-1,))
else:
t = time.mktime(data[:8] + (0,))
return t - data[9] - time.timezone
def formatdate(timeval=None):
"""Returns time format preferred for Internet standards.
Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
"""
if timeval is None:
timeval = time.time()
return "%s" % time.strftime('%a, %d %b %Y %H:%M:%S GMT',
time.gmtime(timeval))
# When used as script, run a small test program.
# The first command line argument must be a filename containing one
# message in RFC-822 format.
if __name__ == '__main__':
import sys, os
file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
if sys.argv[1:]: file = sys.argv[1]
f = open(file, 'r')
m = Message(f)
print 'From:', m.getaddr('from')
print 'To:', m.getaddrlist('to')
print 'Subject:', m.getheader('subject')
print 'Date:', m.getheader('date')
date = m.getdate_tz('date')
if date:
print 'ParsedDate:', time.asctime(date[:-1]),
hhmmss = date[-1]
hhmm, ss = divmod(hhmmss, 60)
hh, mm = divmod(hhmm, 60)
print "%+03d%02d" % (hh, mm),
if ss: print ".%02d" % ss,
print
else:
print 'ParsedDate:', None
m.rewindbody()
n = 0
while f.readline():
n = n + 1
print 'Lines:', n
print '-'*70
print 'len =', len(m)
if m.has_key('Date'): print 'Date =', m['Date']
if m.has_key('X-Nonsense'): pass
print 'keys =', m.keys()
print 'values =', m.values()
print 'items =', m.items()

View File

@ -1,7 +1,6 @@
"""Word completion for GNU readline 2.0.
This requires the latest extension to the readline module (the
set_completer() function). When completing a simple identifier, it
completes keywords, built-ins and globals in __main__; when completing
NAME.NAME..., it evaluates (!) the expression up to the last dot and
completes its attributes.
@ -87,7 +86,8 @@ class Completer:
Assuming the text is of the form NAME.NAME....[NAME], and is
evaluabable in the globals of __main__, it will be evaluated
and its attributes (as revealed by dir()) are used as possible
completions.
completions. (For class instances, class members are are also
considered.)
WARNING: this can still invoke arbitrary C code, if an object
with a __getattr__ hook is evaluated.
@ -98,7 +98,11 @@ class Completer:
if not m:
return
expr, attr = m.group(1, 3)
words = dir(eval(expr, __main__.__dict__))
object = eval(expr, __main__.__dict__)
words = dir(object)
if hasattr(object,'__class__'):
words.append('__class__')
words = words + get_class_members(object.__class__)
matches = []
n = len(attr)
for word in words:
@ -106,4 +110,11 @@ class Completer:
matches.append("%s.%s" % (expr, word))
return matches
def get_class_members(klass):
ret = dir(klass)
if hasattr(klass,'__bases__'):
for base in klass.__bases__:
ret = ret + get_class_members(base)
return ret
readline.set_completer(Completer().complete)

97
Lib/dos-8x3/robotpar.py Normal file
View File

@ -0,0 +1,97 @@
"""
Robots.txt file parser class. Accepts a list of lines or robots.txt URL as
input, builds a set of rules from that list, then answers questions about
fetchability of other URLs.
"""
class RobotFileParser:
def __init__(self):
self.rules = {}
self.debug = 0
self.url = ''
self.last_checked = 0
def mtime(self):
return self.last_checked
def modified(self):
import time
self.last_checked = time.time()
def set_url(self, url):
self.url = url
def read(self):
import urllib
self.parse(urllib.urlopen(self.url).readlines())
def parse(self, lines):
"""parse the input lines from a robot.txt file"""
import string, re
active = []
for line in lines:
if self.debug: print '>', line,
# blank line terminates current record
if not line[:-1]:
active = []
continue
# remove optional comment and strip line
line = string.strip(line[:string.find(line, '#')])
if not line:
continue
line = re.split(' *: *', line)
if len(line) == 2:
line[0] = string.lower(line[0])
if line[0] == 'user-agent':
# this record applies to this user agent
if self.debug: print '>> user-agent:', line[1]
active.append(line[1])
if not self.rules.has_key(line[1]):
self.rules[line[1]] = []
elif line[0] == 'disallow':
if line[1]:
if self.debug: print '>> disallow:', line[1]
for agent in active:
self.rules[agent].append(re.compile(line[1]))
else:
pass
for agent in active:
if self.debug: print '>> allow', agent
self.rules[agent] = []
else:
if self.debug: print '>> unknown:', line
self.modified()
# returns true if agent is allowed to fetch url
def can_fetch(self, useragent, url):
"""using the parsed robots.txt decide if useragent can fetch url"""
import urlparse
ag = useragent
if not self.rules.has_key(ag): ag = '*'
if not self.rules.has_key(ag):
if self.debug: print '>> allowing', url, 'fetch by', useragent
return 1
path = urlparse.urlparse(url)[2]
for rule in self.rules[ag]:
if rule.match(path) is not None:
if self.debug: print '>> disallowing', url, 'fetch by', useragent
return 0
if self.debug: print '>> allowing', url, 'fetch by', useragent
return 1
def _test():
rp = RobotFileParser()
rp.debug = 1
rp.set_url('http://www.musi-cal.com/robots.txt')
rp.read()
print rp.rules
print rp.can_fetch('*', 'http://www.musi-cal.com.com/')
print rp.can_fetch('Musi-Cal-Robot',
'http://www.musi-cal.com/cgi-bin/event-search?city=San+Francisco')
if __name__ == "__main__":
_test()

View File

@ -10,13 +10,10 @@ __version__ = "0.3"
import os
import sys
import time
import socket
import string
import posixpath
import SocketServer
import BaseHTTPServer
import urllib
class SimpleHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
@ -81,7 +78,7 @@ class SimpleHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
probably be diagnosed.)
"""
path = posixpath.normpath(path)
path = posixpath.normpath(urllib.unquote(path))
words = string.splitfields(path, '/')
words = filter(None, words)
path = os.getcwd()

View File

@ -207,7 +207,10 @@ class TCPServer:
def handle_request(self):
"""Handle one request, possibly blocking."""
request, client_address = self.get_request()
try:
request, client_address = self.get_request()
except socket.error:
return
if self.verify_request(request, client_address):
try:
self.process_request(request, client_address)
@ -278,11 +281,21 @@ class ForkingMixIn:
"""Mix-in class to handle each request in a new process."""
active_children = None
max_children = 40
def collect_children(self):
"""Internal routine to wait for died children."""
while self.active_children:
pid, status = os.waitpid(0, os.WNOHANG)
if len(self.active_children) < self.max_children:
options = os.WNOHANG
else:
# If the maximum number of children are already
# running, block while waiting for a child to exit
options = 0
try:
pid, status = os.waitpid(0, options)
except os.error:
pid = None
if not pid: break
self.active_children.remove(pid)
@ -300,6 +313,7 @@ class ForkingMixIn:
# Child process.
# This must never return, hence os._exit()!
try:
self.socket.close()
self.finish_request(request, client_address)
os._exit(0)
except:
@ -311,14 +325,14 @@ class ForkingMixIn:
class ThreadingMixIn:
"""Mix-in class to handle each request in a new thread."""
def process_request(self, request, client_address):
"""Start a new thread to process the request."""
import thread
thread.start_new_thread(self.finish_request,
(request, client_address))
import threading
t = threading.Thread(target = self.finish_request,
args = (request, client_address))
t.start()
class ForkingUDPServer(ForkingMixIn, UDPServer): pass

187
Lib/dos-8x3/sre_comp.py Normal file
View File

@ -0,0 +1,187 @@
#
# Secret Labs' Regular Expression Engine
# $Id$
#
# convert template to internal format
#
# Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
#
# This code can only be used for 1.6 alpha testing. All other use
# require explicit permission from Secret Labs AB.
#
# Portions of this engine have been developed in cooperation with
# CNRI. Hewlett-Packard provided funding for 1.6 integration and
# other compatibility work.
#
# FIXME: <fl> formalize (objectify?) and document the compiler code
# format, so that other frontends can use this compiler
import array, string, sys
import _sre
from sre_constants import *
# find an array type code that matches the engine's code size
for WORDSIZE in "BHil":
if len(array.array(WORDSIZE, [0]).tostring()) == _sre.getcodesize():
break
else:
raise RuntimeError, "cannot find a useable array type"
# FIXME: <fl> should move some optimizations from the parser to here!
class Code:
def __init__(self):
self.data = []
def __len__(self):
return len(self.data)
def __getitem__(self, index):
return self.data[index]
def __setitem__(self, index, code):
self.data[index] = code
def append(self, code):
self.data.append(code)
def todata(self):
# print self.data
return array.array(WORDSIZE, self.data).tostring()
def _lower(literal):
# return _sre._lower(literal) # FIXME
return string.lower(literal)
def _compile(code, pattern, flags):
append = code.append
for op, av in pattern:
if op is ANY:
if "s" in flags:
append(CODES[op]) # any character at all!
else:
append(CODES[NOT_LITERAL])
append(10)
elif op in (SUCCESS, FAILURE):
append(CODES[op])
elif op is AT:
append(CODES[op])
append(POSITIONS[av])
elif op is BRANCH:
append(CODES[op])
tail = []
for av in av[1]:
skip = len(code); append(0)
_compile(code, av, flags)
append(CODES[JUMP])
tail.append(len(code)); append(0)
code[skip] = len(code) - skip
append(0) # end of branch
for tail in tail:
code[tail] = len(code) - tail
elif op is CALL:
append(CODES[op])
skip = len(code); append(0)
_compile(code, av, flags)
append(CODES[SUCCESS])
code[skip] = len(code) - skip
elif op is CATEGORY: # not used by current parser
append(CODES[op])
append(CATEGORIES[av])
elif op is GROUP:
if "i" in flags:
append(CODES[MAP_IGNORE[op]])
else:
append(CODES[op])
append(av)
elif op is IN:
if "i" in flags:
append(CODES[MAP_IGNORE[op]])
def fixup(literal):
return ord(_lower(literal))
else:
append(CODES[op])
fixup = ord
skip = len(code); append(0)
for op, av in av:
append(CODES[op])
if op is NEGATE:
pass
elif op is LITERAL:
append(fixup(av))
elif op is RANGE:
append(fixup(av[0]))
append(fixup(av[1]))
elif op is CATEGORY:
append(CATEGORIES[av])
else:
raise ValueError, "unsupported set operator"
append(CODES[FAILURE])
code[skip] = len(code) - skip
elif op in (LITERAL, NOT_LITERAL):
if "i" in flags:
append(CODES[MAP_IGNORE[op]])
append(ord(_lower(av)))
else:
append(CODES[op])
append(ord(av))
elif op is MARK:
append(CODES[op])
append(av)
elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
lo, hi = av[2].getwidth()
if lo == 0:
raise SyntaxError, "cannot repeat zero-width items"
if lo == hi == 1 and op is MAX_REPEAT:
append(CODES[MAX_REPEAT_ONE])
skip = len(code); append(0)
append(av[0])
append(av[1])
_compile(code, av[2], flags)
append(CODES[SUCCESS])
code[skip] = len(code) - skip
else:
append(CODES[op])
skip = len(code); append(0)
append(av[0])
append(av[1])
_compile(code, av[2], flags)
if op is MIN_REPEAT:
append(CODES[MIN_UNTIL])
else:
# FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?)
append(CODES[MAX_UNTIL])
code[skip] = len(code) - skip
elif op is SUBPATTERN:
## group = av[0]
## if group:
## append(CODES[MARK])
## append((group-1)*2)
_compile(code, av[1], flags)
## if group:
## append(CODES[MARK])
## append((group-1)*2+1)
else:
raise ValueError, ("unsupported operand type", op)
def compile(p, flags=()):
# convert pattern list to internal format
if type(p) in (type(""), type(u"")):
import sre_parse
pattern = p
p = sre_parse.parse(p)
else:
pattern = None
# print p.getwidth()
# print p
code = Code()
_compile(code, p.data, p.pattern.flags)
code.append(CODES[SUCCESS])
# print list(code.data)
data = code.todata()
if 0: # debugging
print
print "-" * 68
import sre_disasm
sre_disasm.disasm(data)
print "-" * 68
# print len(data), p.pattern.groups, len(p.pattern.groupdict)
return _sre.compile(pattern, data, p.pattern.groups-1, p.pattern.groupdict)

131
Lib/dos-8x3/sre_cons.py Normal file
View File

@ -0,0 +1,131 @@
#
# Secret Labs' Regular Expression Engine
# $Id$
#
# various symbols used by the regular expression engine.
# run this script to update the _sre include files!
#
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
#
# This code can only be used for 1.6 alpha testing. All other use
# require explicit permission from Secret Labs AB.
#
# Portions of this engine have been developed in cooperation with
# CNRI. Hewlett-Packard provided funding for 1.6 integration and
# other compatibility work.
#
# operators
FAILURE = "failure"
SUCCESS = "success"
ANY = "any"
ASSERT = "assert"
AT = "at"
BRANCH = "branch"
CALL = "call"
CATEGORY = "category"
GROUP = "group"
GROUP_IGNORE = "group_ignore"
IN = "in"
IN_IGNORE = "in_ignore"
JUMP = "jump"
LITERAL = "literal"
LITERAL_IGNORE = "literal_ignore"
MARK = "mark"
MAX_REPEAT = "max_repeat"
MAX_REPEAT_ONE = "max_repeat_one"
MAX_UNTIL = "max_until"
MIN_REPEAT = "min_repeat"
MIN_UNTIL = "min_until"
NEGATE = "negate"
NOT_LITERAL = "not_literal"
NOT_LITERAL_IGNORE = "not_literal_ignore"
RANGE = "range"
REPEAT = "repeat"
SUBPATTERN = "subpattern"
# positions
AT_BEGINNING = "at_beginning"
AT_BOUNDARY = "at_boundary"
AT_NON_BOUNDARY = "at_non_boundary"
AT_END = "at_end"
# categories
CATEGORY_DIGIT = "category_digit"
CATEGORY_NOT_DIGIT = "category_not_digit"
CATEGORY_SPACE = "category_space"
CATEGORY_NOT_SPACE = "category_not_space"
CATEGORY_WORD = "category_word"
CATEGORY_NOT_WORD = "category_not_word"
CODES = [
# failure=0 success=1 (just because it looks better that way :-)
FAILURE, SUCCESS,
ANY,
ASSERT,
AT,
BRANCH,
CALL,
CATEGORY,
GROUP, GROUP_IGNORE,
IN, IN_IGNORE,
JUMP,
LITERAL, LITERAL_IGNORE,
MARK,
MAX_REPEAT, MAX_UNTIL,
MAX_REPEAT_ONE,
MIN_REPEAT, MIN_UNTIL,
NOT_LITERAL, NOT_LITERAL_IGNORE,
NEGATE,
RANGE,
REPEAT
]
# convert to dictionary
c = {}
i = 0
for code in CODES:
c[code] = i
i = i + 1
CODES = c
# replacement operations for "ignore case" mode
MAP_IGNORE = {
GROUP: GROUP_IGNORE,
IN: IN_IGNORE,
LITERAL: LITERAL_IGNORE,
NOT_LITERAL: NOT_LITERAL_IGNORE
}
POSITIONS = {
AT_BEGINNING: ord("a"),
AT_BOUNDARY: ord("b"),
AT_NON_BOUNDARY: ord("B"),
AT_END: ord("z"),
}
CATEGORIES = {
CATEGORY_DIGIT: ord("d"),
CATEGORY_NOT_DIGIT: ord("D"),
CATEGORY_SPACE: ord("s"),
CATEGORY_NOT_SPACE: ord("S"),
CATEGORY_WORD: ord("w"),
CATEGORY_NOT_WORD: ord("W"),
}
if __name__ == "__main__":
import string
items = CODES.items()
items.sort(lambda a, b: cmp(a[1], b[1]))
f = open("sre_constants.h", "w")
f.write("/* generated by sre_constants.py */\n")
for k, v in items:
f.write("#define SRE_OP_" + string.upper(k) + " " + str(v) + "\n")
f.close()
print "done"

497
Lib/dos-8x3/sre_pars.py Normal file
View File

@ -0,0 +1,497 @@
#
# Secret Labs' Regular Expression Engine
# $Id$
#
# convert re-style regular expression to SRE template. the current
# implementation is somewhat incomplete, and not very fast. should
# definitely be rewritten before Python 1.6 goes beta.
#
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
#
# This code can only be used for 1.6 alpha testing. All other use
# require explicit permission from Secret Labs AB.
#
# Portions of this engine have been developed in cooperation with
# CNRI. Hewlett-Packard provided funding for 1.6 integration and
# other compatibility work.
#
# FIXME: comments marked with the FIXME tag are open issues. all such
# issues should be closed before the final beta.
import string, sys
from sre_constants import *
SPECIAL_CHARS = ".\\[{()*+?^$|"
REPEAT_CHARS = "*+?{"
# FIXME: string in tuple tests may explode with if char is unicode :-(
DIGITS = tuple(string.digits)
OCTDIGITS = tuple("01234567")
HEXDIGITS = tuple("0123456789abcdefABCDEF")
ESCAPES = {
"\\a": (LITERAL, chr(7)),
"\\b": (LITERAL, chr(8)),
"\\f": (LITERAL, chr(12)),
"\\n": (LITERAL, chr(10)),
"\\r": (LITERAL, chr(13)),
"\\t": (LITERAL, chr(9)),
"\\v": (LITERAL, chr(11))
}
CATEGORIES = {
"\\A": (AT, AT_BEGINNING), # start of string
"\\b": (AT, AT_BOUNDARY),
"\\B": (AT, AT_NON_BOUNDARY),
"\\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
"\\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
"\\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
"\\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
"\\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
"\\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
"\\Z": (AT, AT_END), # end of string
}
class Pattern:
# FIXME: <fl> rename class, and store flags in here too!
def __init__(self):
self.flags = []
self.groups = 1
self.groupdict = {}
def getgroup(self, name=None):
gid = self.groups
self.groups = gid + 1
if name:
self.groupdict[name] = gid
return gid
def setflag(self, flag):
if flag in self.flags:
self.flags.append(flag)
class SubPattern:
# a subpattern, in intermediate form
def __init__(self, pattern, data=None):
self.pattern = pattern
if not data:
data = []
self.data = data
self.flags = []
self.width = None
def __repr__(self):
return repr(self.data)
def __len__(self):
return len(self.data)
def __delitem__(self, index):
del self.data[index]
def __getitem__(self, index):
return self.data[index]
def __setitem__(self, index, code):
self.data[index] = code
def __getslice__(self, start, stop):
return SubPattern(self.pattern, self.data[start:stop])
def insert(self, index, code):
self.data.insert(index, code)
def append(self, code):
self.data.append(code)
def getwidth(self):
# determine the width (min, max) for this subpattern
if self.width:
return self.width
lo = hi = 0L
for op, av in self.data:
if op is BRANCH:
l = sys.maxint
h = 0
for av in av[1]:
i, j = av.getwidth()
l = min(l, i)
h = min(h, j)
lo = lo + i
hi = hi + j
elif op is CALL:
i, j = av.getwidth()
lo = lo + i
hi = hi + j
elif op is SUBPATTERN:
i, j = av[1].getwidth()
lo = lo + i
hi = hi + j
elif op in (MIN_REPEAT, MAX_REPEAT):
i, j = av[2].getwidth()
lo = lo + i * av[0]
hi = hi + j * av[1]
elif op in (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY):
lo = lo + 1
hi = hi + 1
elif op == SUCCESS:
break
self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint))
return self.width
def set(self, flag):
if not flag in self.flags:
self.flags.append(flag)
def reset(self, flag):
if flag in self.flags:
self.flags.remove(flag)
class Tokenizer:
def __init__(self, string):
self.string = list(string)
self.next = self.__next()
def __next(self):
if not self.string:
return None
char = self.string[0]
if char[0] == "\\":
try:
c = self.string[1]
except IndexError:
raise SyntaxError, "bogus escape"
char = char + c
try:
if c == "x":
# hexadecimal constant
for i in xrange(2, sys.maxint):
c = self.string[i]
if str(c) not in HEXDIGITS:
break
char = char + c
elif str(c) in DIGITS:
# decimal (or octal) number
for i in xrange(2, sys.maxint):
c = self.string[i]
# FIXME: if larger than current number of
# groups, interpret as an octal number
if str(c) not in DIGITS:
break
char = char + c
except IndexError:
pass # use what we've got this far
del self.string[0:len(char)]
return char
def match(self, char):
if char == self.next:
self.next = self.__next()
return 1
return 0
def match_set(self, set):
if self.next and self.next in set:
self.next = self.__next()
return 1
return 0
def get(self):
this = self.next
self.next = self.__next()
return this
def _fixescape(escape, character_class=0):
# convert escape to (type, value)
if character_class:
# inside a character class, we'll look in the character
# escapes dictionary first
code = ESCAPES.get(escape)
if code:
return code
code = CATEGORIES.get(escape)
else:
code = CATEGORIES.get(escape)
if code:
return code
code = ESCAPES.get(escape)
if code:
return code
if not character_class:
try:
group = int(escape[1:])
# FIXME: only valid if group <= current number of groups
return GROUP, group
except ValueError:
pass
try:
if escape[1:2] == "x":
escape = escape[2:]
return LITERAL, chr(int(escape[-2:], 16) & 0xff)
elif str(escape[1:2]) in DIGITS:
return LITERAL, chr(int(escape[1:], 8) & 0xff)
elif len(escape) == 2:
return LITERAL, escape[1]
except ValueError:
pass
raise SyntaxError, "bogus escape: %s" % repr(escape)
def _branch(subpattern, items):
# form a branch operator from a set of items (FIXME: move this
# optimization to the compiler module!)
# check if all items share a common prefix
while 1:
prefix = None
for item in items:
if not item:
break
if prefix is None:
prefix = item[0]
elif item[0] != prefix:
break
else:
# all subitems start with a common "prefix".
# move it out of the branch
for item in items:
del item[0]
subpattern.append(prefix)
continue # check next one
break
# check if the branch can be replaced by a character set
for item in items:
if len(item) != 1 or item[0][0] != LITERAL:
break
else:
# we can store this as a character set instead of a
# branch (FIXME: use a range if possible)
set = []
for item in items:
set.append(item[0])
subpattern.append((IN, set))
return
subpattern.append((BRANCH, (None, items)))
def _parse(source, pattern, flags=()):
# parse regular expression pattern into an operator list.
subpattern = SubPattern(pattern)
this = None
while 1:
if str(source.next) in ("|", ")"):
break # end of subpattern
this = source.get()
if this is None:
break # end of pattern
if this and this[0] not in SPECIAL_CHARS:
subpattern.append((LITERAL, this))
elif this == "[":
# character set
set = []
## if source.match(":"):
## pass # handle character classes
if source.match("^"):
set.append((NEGATE, None))
# check remaining characters
start = set[:]
while 1:
this = source.get()
if this == "]" and set != start:
break
elif this and this[0] == "\\":
code1 = _fixescape(this, 1)
elif this:
code1 = LITERAL, this
else:
raise SyntaxError, "unexpected end of regular expression"
if source.match("-"):
# potential range
this = source.get()
if this == "]":
set.append(code1)
set.append((LITERAL, "-"))
break
else:
if this[0] == "\\":
code2 = _fixescape(this, 1)
else:
code2 = LITERAL, this
if code1[0] != LITERAL or code2[0] != LITERAL:
raise SyntaxError, "illegal range"
if len(code1[1]) != 1 or len(code2[1]) != 1:
raise SyntaxError, "illegal range"
set.append((RANGE, (code1[1], code2[1])))
else:
if code1[0] is IN:
code1 = code1[1][0]
set.append(code1)
# FIXME: <fl> move set optimization to support function
if len(set)==1 and set[0][0] is LITERAL:
subpattern.append(set[0]) # optimization
elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
subpattern.append((NOT_LITERAL, set[1][1])) # optimization
else:
# FIXME: <fl> add charmap optimization
subpattern.append((IN, set))
elif this and this[0] in REPEAT_CHARS:
# repeat previous item
if this == "?":
min, max = 0, 1
elif this == "*":
min, max = 0, sys.maxint
elif this == "+":
min, max = 1, sys.maxint
elif this == "{":
min, max = 0, sys.maxint
lo = hi = ""
while str(source.next) in DIGITS:
lo = lo + source.get()
if source.match(","):
while str(source.next) in DIGITS:
hi = hi + source.get()
else:
hi = lo
if not source.match("}"):
raise SyntaxError, "bogus range"
if lo:
min = int(lo)
if hi:
max = int(hi)
# FIXME: <fl> check that hi >= lo!
else:
raise SyntaxError, "not supported"
# figure out which item to repeat
# FIXME: should back up to the right mark, right?
if subpattern:
index = len(subpattern)-1
while subpattern[index][0] is MARK:
index = index - 1
item = subpattern[index:index+1]
else:
raise SyntaxError, "nothing to repeat"
if source.match("?"):
subpattern[index] = (MIN_REPEAT, (min, max, item))
else:
subpattern[index] = (MAX_REPEAT, (min, max, item))
elif this == ".":
subpattern.append((ANY, None))
elif this == "(":
group = 1
name = None
if source.match("?"):
group = 0
# options
if source.match("P"):
# named group: skip forward to end of name
if source.match("<"):
name = ""
while 1:
char = source.get()
if char is None or char == ">":
break
name = name + char
group = 1
elif source.match(":"):
# non-capturing group
group = 2
elif source.match_set("iI"):
pattern.setflag("i")
elif source.match_set("lL"):
pattern.setflag("l")
elif source.match_set("mM"):
pattern.setflag("m")
elif source.match_set("sS"):
pattern.setflag("s")
elif source.match_set("xX"):
pattern.setflag("x")
if group:
# parse group contents
b = []
if group == 2:
# anonymous group
group = None
else:
group = pattern.getgroup(name)
if group:
subpattern.append((MARK, (group-1)*2))
while 1:
p = _parse(source, pattern, flags)
if source.match(")"):
if b:
b.append(p)
_branch(subpattern, b)
else:
subpattern.append((SUBPATTERN, (group, p)))
break
elif source.match("|"):
b.append(p)
else:
raise SyntaxError, "group not properly closed"
if group:
subpattern.append((MARK, (group-1)*2+1))
else:
# FIXME: should this really be a while loop?
while 1:
char = source.get()
if char is None or char == ")":
break
elif this == "^":
subpattern.append((AT, AT_BEGINNING))
elif this == "$":
subpattern.append((AT, AT_END))
elif this and this[0] == "\\":
code =_fixescape(this)
subpattern.append(code)
else:
raise SyntaxError, "parser error"
return subpattern
def parse(source, flags=()):
s = Tokenizer(source)
g = Pattern()
b = []
while 1:
p = _parse(s, g, flags)
tail = s.get()
if tail == "|":
b.append(p)
elif tail == ")":
raise SyntaxError, "unbalanced parenthesis"
elif tail is None:
if b:
b.append(p)
p = SubPattern(g)
_branch(p, b)
break
else:
raise SyntaxError, "bogus characters at end of regular expression"
return p
if __name__ == "__main__":
from pprint import pprint
from testpatterns import PATTERNS
a = b = c = 0
for pattern, flags in PATTERNS:
if flags:
continue
print "-"*68
try:
p = parse(pattern)
print repr(pattern), "->"
pprint(p.data)
import sre_compile
try:
code = sre_compile.compile(p)
c = c + 1
except:
pass
a = a + 1
except SyntaxError, v:
print "**", repr(pattern), v
b = b + 1
print "-"*68
print a, "of", b, "patterns successfully parsed"
print c, "of", b, "patterns successfully compiled"

View File

@ -1,7 +1,7 @@
# Module 'statcache'
#
# Maintain a cache of file stats.
# There are functions to reset the cache or to selectively remove items.
"""Maintain a cache of stat() information on files.
There are functions to reset the cache or to selectively remove items.
"""
import os
from stat import *
@ -12,42 +12,37 @@ from stat import *
cache = {}
# Stat a file, possibly out of the cache.
#
def stat(path):
"""Stat a file, possibly out of the cache."""
if cache.has_key(path):
return cache[path]
cache[path] = ret = os.stat(path)
return ret
# Reset the cache completely.
#
def reset():
"""Reset the cache completely."""
global cache
cache = {}
# Remove a given item from the cache, if it exists.
#
def forget(path):
"""Remove a given item from the cache, if it exists."""
if cache.has_key(path):
del cache[path]
# Remove all pathnames with a given prefix.
#
def forget_prefix(prefix):
"""Remove all pathnames with a given prefix."""
n = len(prefix)
for path in cache.keys():
if path[:n] == prefix:
del cache[path]
# Forget about a directory and all entries in it, but not about
# entries in subdirectories.
#
def forget_dir(prefix):
"""Forget about a directory and all entries in it, but not about
entries in subdirectories."""
if prefix[-1:] == '/' and prefix <> '/':
prefix = prefix[:-1]
forget(prefix)
@ -62,19 +57,17 @@ def forget_dir(prefix):
del cache[path]
# Remove all pathnames except with a given prefix.
# Normally used with prefix = '/' after a chdir().
#
def forget_except_prefix(prefix):
"""Remove all pathnames except with a given prefix.
Normally used with prefix = '/' after a chdir()."""
n = len(prefix)
for path in cache.keys():
if path[:n] <> prefix:
del cache[path]
# Check for directory.
#
def isdir(path):
"""Check for directory."""
try:
st = stat(path)
except os.error:

View File

@ -1,30 +1,32 @@
# class StringIO implements file-like objects that read/write a
# string buffer (a.k.a. "memory files").
#
# This implements (nearly) all stdio methods.
#
# f = StringIO() # ready for writing
# f = StringIO(buf) # ready for reading
# f.close() # explicitly release resources held
# flag = f.isatty() # always false
# pos = f.tell() # get current position
# f.seek(pos) # set current position
# f.seek(pos, mode) # mode 0: absolute; 1: relative; 2: relative to EOF
# buf = f.read() # read until EOF
# buf = f.read(n) # read up to n bytes
# buf = f.readline() # read until end of line ('\n') or EOF
# list = f.readlines()# list of f.readline() results until EOF
# f.write(buf) # write at current position
# f.writelines(list) # for line in list: f.write(line)
# f.getvalue() # return whole file's contents as a string
#
# Notes:
# - Using a real file is often faster (but less convenient).
# - fileno() is left unimplemented so that code which uses it triggers
# an exception early.
# - Seeking far beyond EOF and then writing will insert real null
# bytes that occupy space in the buffer.
# - There's a simple test set (see end of this file).
"""File-like objects that read from or write to a string buffer.
This implements (nearly) all stdio methods.
f = StringIO() # ready for writing
f = StringIO(buf) # ready for reading
f.close() # explicitly release resources held
flag = f.isatty() # always false
pos = f.tell() # get current position
f.seek(pos) # set current position
f.seek(pos, mode) # mode 0: absolute; 1: relative; 2: relative to EOF
buf = f.read() # read until EOF
buf = f.read(n) # read up to n bytes
buf = f.readline() # read until end of line ('\n') or EOF
list = f.readlines()# list of f.readline() results until EOF
f.write(buf) # write at current position
f.writelines(list) # for line in list: f.write(line)
f.getvalue() # return whole file's contents as a string
Notes:
- Using a real file is often faster (but less convenient).
- There's also a much faster implementation in C, called cStringIO, but
it's not subclassable.
- fileno() is left unimplemented so that code which uses it triggers
an exception early.
- Seeking far beyond EOF and then writing will insert real null
bytes that occupy space in the buffer.
- There's a simple test set (see end of this file).
"""
import string

431
Lib/dos-8x3/stringol.py Normal file
View File

@ -0,0 +1,431 @@
# module 'string' -- A collection of string operations
# Warning: most of the code you see here isn't normally used nowadays. With
# Python 1.6, many of these functions are implemented as methods on the
# standard string object. They used to be implemented by a built-in module
# called strop, but strop is now obsolete itself.
"""Common string manipulations.
Public module variables:
whitespace -- a string containing all characters considered whitespace
lowercase -- a string containing all characters considered lowercase letters
uppercase -- a string containing all characters considered uppercase letters
letters -- a string containing all characters considered letters
digits -- a string containing all characters considered decimal digits
hexdigits -- a string containing all characters considered hexadecimal digits
octdigits -- a string containing all characters considered octal digits
"""
# Some strings for ctype-style character classification
whitespace = ' \t\n\r\v\f'
lowercase = 'abcdefghijklmnopqrstuvwxyz'
uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
letters = lowercase + uppercase
digits = '0123456789'
hexdigits = digits + 'abcdef' + 'ABCDEF'
octdigits = '01234567'
# Case conversion helpers
_idmap = ''
for i in range(256): _idmap = _idmap + chr(i)
del i
# Backward compatible names for exceptions
index_error = ValueError
atoi_error = ValueError
atof_error = ValueError
atol_error = ValueError
# convert UPPER CASE letters to lower case
def lower(s):
"""lower(s) -> string
Return a copy of the string s converted to lowercase.
"""
return s.lower()
# Convert lower case letters to UPPER CASE
def upper(s):
"""upper(s) -> string
Return a copy of the string s converted to uppercase.
"""
return s.upper()
# Swap lower case letters and UPPER CASE
def swapcase(s):
"""swapcase(s) -> string
Return a copy of the string s with upper case characters
converted to lowercase and vice versa.
"""
return s.swapcase()
# Strip leading and trailing tabs and spaces
def strip(s):
"""strip(s) -> string
Return a copy of the string s with leading and trailing
whitespace removed.
"""
return s.strip()
# Strip leading tabs and spaces
def lstrip(s):
"""lstrip(s) -> string
Return a copy of the string s with leading whitespace removed.
"""
return s.lstrip()
# Strip trailing tabs and spaces
def rstrip(s):
"""rstrip(s) -> string
Return a copy of the string s with trailing whitespace
removed.
"""
return s.rstrip()
# Split a string into a list of space/tab-separated words
# NB: split(s) is NOT the same as splitfields(s, ' ')!
def split(s, sep=None, maxsplit=0):
"""split(str [,sep [,maxsplit]]) -> list of strings
Return a list of the words in the string s, using sep as the
delimiter string. If maxsplit is nonzero, splits into at most
maxsplit words If sep is not specified, any whitespace string
is a separator. Maxsplit defaults to 0.
(split and splitfields are synonymous)
"""
return s.split(sep, maxsplit)
splitfields = split
# Join fields with optional separator
def join(words, sep = ' '):
"""join(list [,sep]) -> string
Return a string composed of the words in list, with
intervening occurences of sep. The default separator is a
single space.
(joinfields and join are synonymous)
"""
return sep.join(words)
joinfields = join
# for a little bit of speed
_apply = apply
# Find substring, raise exception if not found
def index(s, *args):
"""index(s, sub [,start [,end]]) -> int
Like find but raises ValueError when the substring is not found.
"""
return _apply(s.index, args)
# Find last substring, raise exception if not found
def rindex(s, *args):
"""rindex(s, sub [,start [,end]]) -> int
Like rfind but raises ValueError when the substring is not found.
"""
return _apply(s.rindex, args)
# Count non-overlapping occurrences of substring
def count(s, *args):
"""count(s, sub[, start[,end]]) -> int
Return the number of occurrences of substring sub in string
s[start:end]. Optional arguments start and end are
interpreted as in slice notation.
"""
return _apply(s.count, args)
# Find substring, return -1 if not found
def find(s, *args):
"""find(s, sub [,start [,end]]) -> in
Return the lowest index in s where substring sub is found,
such that sub is contained within s[start,end]. Optional
arguments start and end are interpreted as in slice notation.
Return -1 on failure.
"""
return _apply(s.find, args)
# Find last substring, return -1 if not found
def rfind(s, *args):
"""rfind(s, sub [,start [,end]]) -> int
Return the highest index in s where substring sub is found,
such that sub is contained within s[start,end]. Optional
arguments start and end are interpreted as in slice notation.
Return -1 on failure.
"""
return _apply(s.rfind, args)
# for a bit of speed
_float = float
_int = int
_long = long
_StringType = type('')
# Convert string to float
def atof(s):
"""atof(s) -> float
Return the floating point number represented by the string s.
"""
if type(s) == _StringType:
return _float(s)
else:
raise TypeError('argument 1: expected string, %s found' %
type(s).__name__)
# Convert string to integer
def atoi(*args):
"""atoi(s [,base]) -> int
Return the integer represented by the string s in the given
base, which defaults to 10. The string s must consist of one
or more digits, possibly preceded by a sign. If base is 0, it
is chosen from the leading characters of s, 0 for octal, 0x or
0X for hexadecimal. If base is 16, a preceding 0x or 0X is
accepted.
"""
try:
s = args[0]
except IndexError:
raise TypeError('function requires at least 1 argument: %d given' %
len(args))
# Don't catch type error resulting from too many arguments to int(). The
# error message isn't compatible but the error type is, and this function
# is complicated enough already.
if type(s) == _StringType:
return _apply(_int, args)
else:
raise TypeError('argument 1: expected string, %s found' %
type(s).__name__)
# Convert string to long integer
def atol(*args):
"""atol(s [,base]) -> long
Return the long integer represented by the string s in the
given base, which defaults to 10. The string s must consist
of one or more digits, possibly preceded by a sign. If base
is 0, it is chosen from the leading characters of s, 0 for
octal, 0x or 0X for hexadecimal. If base is 16, a preceding
0x or 0X is accepted. A trailing L or l is not accepted,
unless base is 0.
"""
try:
s = args[0]
except IndexError:
raise TypeError('function requires at least 1 argument: %d given' %
len(args))
# Don't catch type error resulting from too many arguments to long(). The
# error message isn't compatible but the error type is, and this function
# is complicated enough already.
if type(s) == _StringType:
return _apply(_long, args)
else:
raise TypeError('argument 1: expected string, %s found' %
type(s).__name__)
# Left-justify a string
def ljust(s, width):
"""ljust(s, width) -> string
Return a left-justified version of s, in a field of the
specified width, padded with spaces as needed. The string is
never truncated.
"""
n = width - len(s)
if n <= 0: return s
return s + ' '*n
# Right-justify a string
def rjust(s, width):
"""rjust(s, width) -> string
Return a right-justified version of s, in a field of the
specified width, padded with spaces as needed. The string is
never truncated.
"""
n = width - len(s)
if n <= 0: return s
return ' '*n + s
# Center a string
def center(s, width):
"""center(s, width) -> string
Return a center version of s, in a field of the specified
width. padded with spaces as needed. The string is never
truncated.
"""
n = width - len(s)
if n <= 0: return s
half = n/2
if n%2 and width%2:
# This ensures that center(center(s, i), j) = center(s, j)
half = half+1
return ' '*half + s + ' '*(n-half)
# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
# Decadent feature: the argument may be a string or a number
# (Use of this is deprecated; it should be a string as with ljust c.s.)
def zfill(x, width):
"""zfill(x, width) -> string
Pad a numeric string x with zeros on the left, to fill a field
of the specified width. The string x is never truncated.
"""
if type(x) == type(''): s = x
else: s = `x`
n = len(s)
if n >= width: return s
sign = ''
if s[0] in ('-', '+'):
sign, s = s[0], s[1:]
return sign + '0'*(width-n) + s
# Expand tabs in a string.
# Doesn't take non-printing chars into account, but does understand \n.
def expandtabs(s, tabsize=8):
"""expandtabs(s [,tabsize]) -> string
Return a copy of the string s with all tab characters replaced
by the appropriate number of spaces, depending on the current
column, and the tabsize (default 8).
"""
res = line = ''
for c in s:
if c == '\t':
c = ' '*(tabsize - len(line) % tabsize)
line = line + c
if c == '\n':
res = res + line
line = ''
return res + line
# Character translation through look-up table.
def translate(s, table, deletions=""):
"""translate(s,table [,deletechars]) -> string
Return a copy of the string s, where all characters occurring
in the optional argument deletechars are removed, and the
remaining characters have been mapped through the given
translation table, which must be a string of length 256.
"""
return s.translate(table, deletions)
# Capitalize a string, e.g. "aBc dEf" -> "Abc def".
def capitalize(s):
"""capitalize(s) -> string
Return a copy of the string s with only its first character
capitalized.
"""
return s.capitalize()
# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
# See also regsub.capwords().
def capwords(s, sep=None):
"""capwords(s, [sep]) -> string
Split the argument into words using split, capitalize each
word using capitalize, and join the capitalized words using
join. Note that this replaces runs of whitespace characters by
a single space.
"""
return join(map(capitalize, s.split(sep)), sep or ' ')
# Construct a translation string
_idmapL = None
def maketrans(fromstr, tostr):
"""maketrans(frm, to) -> string
Return a translation table (a string of 256 bytes long)
suitable for use in string.translate. The strings frm and to
must be of the same length.
"""
if len(fromstr) != len(tostr):
raise ValueError, "maketrans arguments must have same length"
global _idmapL
if not _idmapL:
_idmapL = map(None, _idmap)
L = _idmapL[:]
fromstr = map(ord, fromstr)
for i in range(len(fromstr)):
L[fromstr[i]] = tostr[i]
return joinfields(L, "")
# Substring replacement (global)
def replace(s, old, new, maxsplit=0):
"""replace (str, old, new[, maxsplit]) -> string
Return a copy of string str with all occurrences of substring
old replaced by new. If the optional argument maxsplit is
given, only the first maxsplit occurrences are replaced.
"""
return s.replace(old, new, maxsplit)
# XXX: transitional
#
# If string objects do not have methods, then we need to use the old string.py
# library, which uses strop for many more things than just the few outlined
# below.
try:
''.upper
except AttributeError:
from stringold import *
# Try importing optional built-in module "strop" -- if it exists,
# it redefines some string operations that are 100-1000 times faster.
# It also defines values for whitespace, lowercase and uppercase
# that match <ctype.h>'s definitions.
try:
from strop import maketrans, lowercase, uppercase, whitespace
letters = lowercase + uppercase
except ImportError:
pass # Use the original versions

View File

@ -329,6 +329,7 @@ class Telnet:
opt = self.rawq_getchar()
self.msg('IAC %s %d',
c == WILL and 'WILL' or 'WONT', ord(c))
self.sock.send(IAC + DONT + opt)
else:
self.msg('IAC %s not recognized' % `c`)
except EOFError: # raised by self.rawq_getchar()

View File

@ -1,46 +1,93 @@
#! /usr/bin/env python
"""Test script for the binascii C module
"""Test the binascii C module."""
Uses the mechanism of the python binhex module
Roger E. Masse
"""
import binhex
import tempfile
from test_support import verbose
import binascii
def test():
# Show module doc string
print binascii.__doc__
try:
fname1 = tempfile.mktemp()
fname2 = tempfile.mktemp()
f = open(fname1, 'w')
except:
raise ImportError, "Cannot test binascii without a temp file"
# Show module exceptions
print binascii.Error
print binascii.Incomplete
start = 'Jack is my hero'
f.write(start)
f.close()
binhex.binhex(fname1, fname2)
if verbose:
print 'binhex'
# Check presence and display doc strings of all functions
funcs = []
for suffix in "base64", "hqx", "uu":
prefixes = ["a2b_", "b2a_"]
if suffix == "hqx":
prefixes.extend(["crc_", "rlecode_", "rledecode_"])
for prefix in prefixes:
name = prefix + suffix
funcs.append(getattr(binascii, name))
for func in funcs:
print "%-15s: %s" % (func.__name__, func.__doc__)
binhex.hexbin(fname2, fname1)
if verbose:
print 'hexbin'
# Create binary test data
testdata = "The quick brown fox jumps over the lazy dog.\r\n"
for i in range(256):
# Be slow so we don't depend on other modules
testdata = testdata + chr(i)
testdata = testdata + "\r\nHello world.\n"
f = open(fname1, 'r')
finish = f.readline()
# Test base64 with valid data
print "base64 test"
MAX_BASE64 = 57
lines = []
for i in range(0, len(testdata), MAX_BASE64):
b = testdata[i:i+MAX_BASE64]
a = binascii.b2a_base64(b)
lines.append(a)
print a,
res = ""
for line in lines:
b = binascii.a2b_base64(line)
res = res + b
assert res == testdata
if start <> finish:
print 'Error: binhex <> hexbin'
elif verbose:
print 'binhex == hexbin'
# Test base64 with random invalid characters sprinkled throughout
# (This requires a new version of binascii.)
fillers = ""
valid = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/"
for i in range(256):
c = chr(i)
if c not in valid:
fillers = fillers + c
def addnoise(line):
noise = fillers
ratio = len(line) / len(noise)
res = ""
while line and noise:
if len(line) / len(noise) > ratio:
c, line = line[0], line[1:]
else:
c, noise = noise[0], noise[1:]
res = res + c
return res + noise + line
res = ""
for line in map(addnoise, lines):
b = binascii.a2b_base64(line)
res = res + b
assert res == testdata
try:
import os
os.unlink(fname1)
os.unlink(fname2)
except:
pass
test()
# Test uu
print "uu test"
MAX_UU = 45
lines = []
for i in range(0, len(testdata), MAX_UU):
b = testdata[i:i+MAX_UU]
a = binascii.b2a_uu(b)
lines.append(a)
print a,
res = ""
for line in lines:
b = binascii.a2b_uu(line)
res = res + b
assert res == testdata
# Test crc32()
crc = binascii.crc32("Test the CRC-32 of")
crc = binascii.crc32(" this string.", crc)
if crc != 1571220330:
print "binascii.crc32() failed."
# The hqx test is in test_binhex.py

168
Lib/dos-8x3/test_con.py Normal file
View File

@ -0,0 +1,168 @@
from test_support import TestFailed
class base_set:
def __init__(self, el):
self.el = el
class set(base_set):
def __contains__(self, el):
return self.el == el
class seq(base_set):
def __getitem__(self, n):
return [self.el][n]
def check(ok, *args):
if not ok:
raise TestFailed, " ".join(map(str, args))
a = base_set(1)
b = set(1)
c = seq(1)
check(1 in b, "1 not in set(1)")
check(0 not in b, "0 in set(1)")
check(1 in c, "1 not in seq(1)")
check(0 not in c, "0 in seq(1)")
try:
1 in a
check(0, "in base_set did not raise error")
except AttributeError:
pass
try:
1 not in a
check(0, "not in base_set did not raise error")
except AttributeError:
pass
# Test char in string
check('c' in 'abc', "'c' not in 'abc'")
check('d' not in 'abc', "'d' in 'abc'")
try:
'' in 'abc'
check(0, "'' in 'abc' did not raise error")
except TypeError:
pass
try:
'ab' in 'abc'
check(0, "'ab' in 'abc' did not raise error")
except TypeError:
pass
try:
None in 'abc'
check(0, "None in 'abc' did not raise error")
except TypeError:
pass
# Test char in Unicode
check('c' in u'abc', "'c' not in u'abc'")
check('d' not in u'abc', "'d' in u'abc'")
try:
'' in u'abc'
check(0, "'' in u'abc' did not raise error")
except TypeError:
pass
try:
'ab' in u'abc'
check(0, "'ab' in u'abc' did not raise error")
except TypeError:
pass
try:
None in u'abc'
check(0, "None in u'abc' did not raise error")
except TypeError:
pass
# Test Unicode char in Unicode
check(u'c' in u'abc', "u'c' not in u'abc'")
check(u'd' not in u'abc', "u'd' in u'abc'")
try:
u'' in u'abc'
check(0, "u'' in u'abc' did not raise error")
except TypeError:
pass
try:
u'ab' in u'abc'
check(0, "u'ab' in u'abc' did not raise error")
except TypeError:
pass
# Test Unicode char in string
check(u'c' in 'abc', "u'c' not in 'abc'")
check(u'd' not in 'abc', "u'd' in 'abc'")
try:
u'' in 'abc'
check(0, "u'' in 'abc' did not raise error")
except TypeError:
pass
try:
u'ab' in 'abc'
check(0, "u'ab' in 'abc' did not raise error")
except TypeError:
pass
# A collection of tests on builtin sequence types
a = range(10)
for i in a:
check(i in a, "%s not in %s" % (`i`, `a`))
check(16 not in a, "16 not in %s" % `a`)
check(a not in a, "%s not in %s" % (`a`, `a`))
a = tuple(a)
for i in a:
check(i in a, "%s not in %s" % (`i`, `a`))
check(16 not in a, "16 not in %s" % `a`)
check(a not in a, "%s not in %s" % (`a`, `a`))
class Deviant1:
"""Behaves strangely when compared
This class is designed to make sure that the contains code
works when the list is modified during the check.
"""
aList = range(15)
def __cmp__(self, other):
if other == 12:
self.aList.remove(12)
self.aList.remove(13)
self.aList.remove(14)
return 1
check(Deviant1() not in Deviant1.aList, "Deviant1 failed")
class Deviant2:
"""Behaves strangely when compared
This class raises an exception during comparison. That in
turn causes the comparison to fail with a TypeError.
"""
def __cmp__(self, other):
if other == 4:
raise RuntimeError, "gotcha"
try:
check(Deviant2() not in a, "oops")
except TypeError:
pass

View File

@ -79,18 +79,29 @@ def dotest():
f.close()
try:
cPickle.dump(123, f)
except IOError:
except ValueError:
pass
else:
print "dump to closed file should raise IOError"
print "dump to closed file should raise ValueError"
f = open(fn, "r")
f.close()
try:
cPickle.load(f)
except IOError:
except ValueError:
pass
else:
print "load from closed file should raise IOError"
print "load from closed file should raise ValueError"
os.remove(fn)
# Test specific bad cases
for i in range(10):
try:
x = cPickle.loads('garyp')
except cPickle.BadPickleGet, y:
del y
else:
print "unexpected success!"
break
dotest()

143
Lib/dos-8x3/test_ext.py Normal file
View File

@ -0,0 +1,143 @@
from UserList import UserList
def f(*a, **k):
print a, k
def g(x, *y, **z):
print x, y, z
def h(j=1, a=2, h=3):
print j, a, h
f()
f(1)
f(1, 2)
f(1, 2, 3)
f(1, 2, 3, *(4, 5))
f(1, 2, 3, *[4, 5])
f(1, 2, 3, *UserList([4, 5]))
f(1, 2, 3, **{'a':4, 'b':5})
f(1, 2, 3, *(4, 5), **{'a':6, 'b':7})
f(1, 2, 3, x=4, y=5, *(6, 7), **{'a':8, 'b':9})
try:
g()
except TypeError, err:
print "TypeError:", err
else:
print "should raise TypeError: not enough arguments; expected 1, got 0"
try:
g(*())
except TypeError, err:
print "TypeError:", err
else:
print "should raise TypeError: not enough arguments; expected 1, got 0"
try:
g(*(), **{})
except TypeError, err:
print "TypeError:", err
else:
print "should raise TypeError: not enough arguments; expected 1, got 0"
g(1)
g(1, 2)
g(1, 2, 3)
g(1, 2, 3, *(4, 5))
class Nothing: pass
try:
g(*Nothing())
except AttributeError, attr:
pass
else:
print "should raise AttributeError: __len__"
class Nothing:
def __len__(self):
return 5
try:
g(*Nothing())
except AttributeError, attr:
pass
else:
print "should raise AttributeError: __getitem__"
class Nothing:
def __len__(self):
return 5
def __getitem__(self, i):
if i < 3:
return i
else:
raise IndexError, i
g(*Nothing())
# make sure the function call doesn't stomp on the dictionary?
d = {'a': 1, 'b': 2, 'c': 3}
d2 = d.copy()
assert d == d2
g(1, d=4, **d)
print d
print d2
assert d == d2, "function call modified dictionary"
# what about willful misconduct?
def saboteur(**kw):
kw['x'] = locals()
d = {}
saboteur(a=1, **d)
assert d == {}
try:
g(1, 2, 3, **{'x':4, 'y':5})
except TypeError, err:
print err
else:
print "should raise TypeError: keyword parameter redefined"
try:
g(1, 2, 3, a=4, b=5, *(6, 7), **{'a':8, 'b':9})
except TypeError, err:
print err
else:
print "should raise TypeError: keyword parameter redefined"
try:
f(**{1:2})
except TypeError, err:
print err
else:
print "should raise TypeError: keywords must be strings"
try:
h(**{'e': 2})
except TypeError, err:
print err
else:
print "should raise TypeError: unexpected keyword argument: e"
try:
h(*h)
except TypeError, err:
print err
else:
print "should raise TypeError: * argument must be a tuple"
try:
h(**h)
except TypeError, err:
print err
else:
print "should raise TypeError: ** argument must be a dictionary"
def f2(*a, **b):
return a, b
d = {}
for i in range(512):
key = 'k%d' % i
d[key] = i
a, b = f2(1, *(2, 3), **d)
print len(a), len(b), b == d

View File

@ -18,7 +18,8 @@ if verbose:
if sys.platform in ('netbsd1',
'freebsd2', 'freebsd3',
'bsdos2', 'bsdos3', 'bsdos4'):
'bsdos2', 'bsdos3', 'bsdos4',
'openbsd', 'openbsd2'):
lockdata = struct.pack('lxxxxlxxxxlhh', 0, 0, 0, FCNTL.F_WRLCK, 0)
elif sys.platform in ['aix3', 'aix4']:
lockdata = struct.pack('hhlllii', FCNTL.F_WRLCK, 0, 0, 0, 0, 0, 0)

68
Lib/dos-8x3/test_for.py Normal file
View File

@ -0,0 +1,68 @@
"""This test checks for correct fork() behavior.
We want fork1() semantics -- only the forking thread survives in the
child after a fork().
On some systems (e.g. Solaris without posix threads) we find that all
active threads survive in the child after a fork(); this is an error.
"""
import os, sys, time, thread
try:
os.fork
except AttributeError:
raise ImportError, "os.fork not defined -- skipping test_fork1"
LONGSLEEP = 2
SHORTSLEEP = 0.5
NUM_THREADS = 4
alive = {}
stop = 0
def f(id):
while not stop:
alive[id] = os.getpid()
try:
time.sleep(SHORTSLEEP)
except IOError:
pass
def main():
for i in range(NUM_THREADS):
thread.start_new(f, (i,))
time.sleep(LONGSLEEP)
a = alive.keys()
a.sort()
assert a == range(NUM_THREADS)
prefork_lives = alive.copy()
cpid = os.fork()
if cpid == 0:
# Child
time.sleep(LONGSLEEP)
n = 0
for key in alive.keys():
if alive[key] != prefork_lives[key]:
n = n+1
os._exit(n)
else:
# Parent
spid, status = os.waitpid(cpid, 0)
assert spid == cpid
assert status == 0, "cause = %d, exit = %d" % (status&0xff, status>>8)
global stop
# Tell threads to die
stop = 1
time.sleep(2*SHORTSLEEP) # Wait for threads to die
main()

View File

@ -5,7 +5,7 @@
import gdbm
from gdbm import error
from test_support import verbose
from test_support import verbose, TestFailed
filename= '/tmp/delete_me'
@ -18,6 +18,12 @@ if verbose:
g.has_key('a')
g.close()
try:
g['a']
except error:
pass
else:
raise TestFailed, "expected gdbm.error accessing closed database"
g = gdbm.open(filename, 'r')
g.close()
g = gdbm.open(filename, 'rw')

View File

@ -140,11 +140,17 @@ x = eval('1, 0 or 1')
print 'funcdef'
### 'def' NAME parameters ':' suite
### parameters: '(' [varargslist] ')'
### varargslist: (fpdef ['=' test] ',')* '*' NAME
### | fpdef ['=' test] (',' fpdef ['=' test])* [',']
### varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' ('**'|'*' '*') NAME]
### | ('**'|'*' '*') NAME)
### | fpdef ['=' test] (',' fpdef ['=' test])* [',']
### fpdef: NAME | '(' fplist ')'
### fplist: fpdef (',' fpdef)* [',']
### arglist: (argument ',')* (argument | *' test [',' '**' test] | '**' test)
### argument: [test '='] test # Really [keyword '='] test
def f1(): pass
f1()
f1(*())
f1(*(), **{})
def f2(one_argument): pass
def f3(two, arguments): pass
def f4(two, (compound, (argument, list))): pass
@ -157,16 +163,27 @@ def v3(a, (b, c), *rest): pass
def d01(a=1): pass
d01()
d01(1)
d01(*(1,))
d01(**{'a':2})
def d11(a, b=1): pass
d11(1)
d11(1, 2)
d11(1, **{'b':2})
def d21(a, b, c=1): pass
d21(1, 2)
d21(1, 2, 3)
d21(*(1, 2, 3))
d21(1, *(2, 3))
d21(1, 2, *(3,))
d21(1, 2, **{'c':3})
def d02(a=1, b=2): pass
d02()
d02(1)
d02(1, 2)
d02(*(1, 2))
d02(1, *(2,))
d02(1, **{'b':2})
d02(**{'a': 1, 'b': 2})
def d12(a, b=1, c=2): pass
d12(1)
d12(1, 2)
@ -179,6 +196,9 @@ def d01v(a=1, *rest): pass
d01v()
d01v(1)
d01v(1, 2)
d01v(*(1, 2, 3, 4))
d01v(*(1,))
d01v(**{'a':2})
def d11v(a, b=1, *rest): pass
d11v(1)
d11v(1, 2)
@ -187,21 +207,31 @@ def d21v(a, b, c=1, *rest): pass
d21v(1, 2)
d21v(1, 2, 3)
d21v(1, 2, 3, 4)
d21v(*(1, 2, 3, 4))
d21v(1, 2, **{'c': 3})
def d02v(a=1, b=2, *rest): pass
d02v()
d02v(1)
d02v(1, 2)
d02v(1, 2, 3)
d02v(1, *(2, 3, 4))
d02v(**{'a': 1, 'b': 2})
def d12v(a, b=1, c=2, *rest): pass
d12v(1)
d12v(1, 2)
d12v(1, 2, 3)
d12v(1, 2, 3, 4)
d12v(*(1, 2, 3, 4))
d12v(1, 2, *(3, 4, 5))
d12v(1, *(2,), **{'c': 3})
def d22v(a, b, c=1, d=2, *rest): pass
d22v(1, 2)
d22v(1, 2, 3)
d22v(1, 2, 3, 4)
d22v(1, 2, 3, 4, 5)
d22v(*(1, 2, 3, 4))
d22v(1, 2, *(3, 4, 5))
d22v(1, *(2, 3), **{'d': 4})
### stmt: simple_stmt | compound_stmt
# Tested below
@ -455,6 +485,7 @@ v2(1,2,3,4,5,6,7,8,9,0)
v3(1,(2,3))
v3(1,(2,3),4)
v3(1,(2,3),4,5,6,7,8,9,0)
print
import sys, time
c = sys.path[0]
x = time.time()

View File

@ -77,6 +77,8 @@ def getran2(ndigits):
def test_division_2(x, y):
q, r = divmod(x, y)
q2, r2 = x/y, x%y
pab, pba = x*y, y*x
check(pab == pba, "multiplication does not commute for", x, y)
check(q == q2, "divmod returns different quotient than / for", x, y)
check(r == r2, "divmod returns different mod than % for", x, y)
check(x == q*y + r, "x != q*y + r after divmod on", x, y)
@ -159,7 +161,7 @@ def test_bitop_identities(maxdigits=MAXDIGITS):
test_bitop_identities_2(x, y)
test_bitop_identities_3(x, y, getran((lenx + leny)/2))
# ------------------------------------------------------ hex oct str atol
# ------------------------------------------------- hex oct repr str atol
def slow_format(x, base):
if (x, base) == (0, 8):
@ -181,12 +183,18 @@ def slow_format(x, base):
def test_format_1(x):
from string import atol
for base, mapper in (8, oct), (10, str), (16, hex):
for base, mapper in (8, oct), (10, repr), (16, hex):
got = mapper(x)
expected = slow_format(x, base)
check(got == expected, mapper.__name__, "returned",
got, "but expected", expected, "for", x)
check(atol(got, 0) == x, 'atol("%s", 0) !=' % got, x)
# str() has to be checked a little differently since there's no
# trailing "L"
got = str(x)
expected = slow_format(x, 10)[:-1]
check(got == expected, mapper.__name__, "returned",
got, "but expected", expected, "for", x)
def test_format(maxdigits=MAXDIGITS):
print "long str/hex/oct/atol"

67
Lib/dos-8x3/test_mma.py Normal file
View File

@ -0,0 +1,67 @@
import mmap
import string, os, re, sys
PAGESIZE = mmap.PAGESIZE
def test_both():
"Test mmap module on Unix systems and Windows"
# Create an mmap'ed file
f = open('foo', 'w+')
# Write 2 pages worth of data to the file
f.write('\0'* PAGESIZE)
f.write('foo')
f.write('\0'* (PAGESIZE-3) )
m = mmap.mmap(f.fileno(), 2 * PAGESIZE)
f.close()
# Simple sanity checks
print ' Position of foo:', string.find(m, 'foo') / float(PAGESIZE), 'pages'
assert string.find(m, 'foo') == PAGESIZE
print ' Length of file:', len(m) / float(PAGESIZE), 'pages'
assert len(m) == 2*PAGESIZE
print ' Contents of byte 0:', repr(m[0])
assert m[0] == '\0'
print ' Contents of first 3 bytes:', repr(m[0:3])
assert m[0:3] == '\0\0\0'
# Modify the file's content
print "\n Modifying file's content..."
m[0] = '3'
m[PAGESIZE +3: PAGESIZE +3+3]='bar'
# Check that the modification worked
print ' Contents of byte 0:', repr(m[0])
assert m[0] == '3'
print ' Contents of first 3 bytes:', repr(m[0:3])
assert m[0:3] == '3\0\0'
print ' Contents of second page:', m[PAGESIZE-1 : PAGESIZE + 7]
assert m[PAGESIZE-1 : PAGESIZE + 7] == '\0foobar\0'
m.flush()
# Test doing a regular expression match in an mmap'ed file
match=re.search('[A-Za-z]+', m)
if match == None:
print ' ERROR: regex match on mmap failed!'
else:
start, end = match.span(0)
length = end - start
print ' Regex match on mmap (page start, length of match):',
print start / float(PAGESIZE), length
assert start == PAGESIZE
assert end == PAGESIZE + 6
m.close()
os.unlink("foo")
print ' Test passed'
test_both()

107
Lib/dos-8x3/test_pye.py Normal file
View File

@ -0,0 +1,107 @@
# Very simple test - Parse a file and print what happens
# XXX TypeErrors on calling handlers, or on bad return values from a
# handler, are obscure and unhelpful.
import sys, string
import os
import pyexpat
class Outputter:
def StartElementHandler(self, name, attrs):
print 'Start element:\n\t', name, attrs
def EndElementHandler(self, name):
print 'End element:\n\t', name
def CharacterDataHandler(self, data):
data = string.strip(data)
if data:
print 'Character data:'
print '\t', repr(data)
def ProcessingInstructionHandler(self, target, data):
print 'PI:\n\t', target, data
def StartNamespaceDeclHandler(self, prefix, uri):
print 'NS decl:\n\t', prefix, uri
def EndNamespaceDeclHandler(self, prefix):
print 'End of NS decl:\n\t', prefix
def StartCdataSectionHandler(self):
print 'Start of CDATA section'
def EndCdataSectionHandler(self):
print 'End of CDATA section'
def CommentHandler(self, text):
print 'Comment:\n\t', repr(text)
def NotationDeclHandler(self, *args):
name, base, sysid, pubid = args
print 'Notation declared:', args
def UnparsedEntityDeclHandler(self, *args):
entityName, base, systemId, publicId, notationName = args
print 'Unparsed entity decl:\n\t', args
def NotStandaloneHandler(self, userData):
print 'Not standalone'
return 1
def ExternalEntityRefHandler(self, context, base, sysId, pubId):
print 'External entity ref:', context, base, sysId, pubId
return 1
def DefaultHandler(self, userData):
pass
def DefaultHandlerExpand(self, userData):
pass
out = Outputter()
parser = pyexpat.ParserCreate(namespace_separator='!')
for name in ['StartElementHandler', 'EndElementHandler',
'CharacterDataHandler', 'ProcessingInstructionHandler',
'UnparsedEntityDeclHandler', 'NotationDeclHandler',
'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler',
'CommentHandler', 'StartCdataSectionHandler',
'EndCdataSectionHandler',
'DefaultHandler', 'DefaultHandlerExpand',
#'NotStandaloneHandler',
'ExternalEntityRefHandler'
]:
setattr(parser, name, getattr(out, name) )
data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
<?xml-stylesheet href="stylesheet.css"?>
<!-- comment data -->
<!DOCTYPE quotations SYSTEM "quotations.dtd" [
<!ELEMENT root ANY>
<!NOTATION notation SYSTEM "notation.jpeg">
<!ENTITY acirc "&#226;">
<!ENTITY external_entity SYSTEM "entity.file">
<!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
%unparsed_entity;
]>
<root>
<myns:subelement xmlns:myns="http://www.python.org/namespace">
Contents of subelements
</myns:subelement>
<sub2><![CDATA[contents of CDATA section]]></sub2>
&external_entity;
</root>
"""
try:
parser.Parse(data, 1)
except pyexpat.error:
print '** Error', parser.ErrorCode, pyexpat.ErrorString( parser.ErrorCode)
print '** Line', parser.ErrorLineNumber
print '** Column', parser.ErrorColumnNumber
print '** Byte', parser.ErrorByteIndex

View File

@ -11,6 +11,7 @@ def test(msg, results):
fp.seek(0)
m = rfc822.Message(fp)
i = 0
for n, a in m.getaddrlist('to') + m.getaddrlist('cc'):
if verbose:
print 'name:', repr(n), 'addr:', repr(a)
@ -28,6 +29,21 @@ def test(msg, results):
print ' [no match]'
print 'not found:', repr(n), repr(a)
out = m.getdate('date')
if out:
if verbose:
print 'Date:', m.getheader('date')
if out == (1999, 1, 13, 23, 57, 35, 0, 0, 0):
if verbose:
print ' [matched]'
else:
if verbose:
print ' [no match]'
print 'Date conversion failed:', out
# Note: all test cases must have the same date (in various formats),
# or no date!
test('''Date: Wed, 13 Jan 1999 23:57:35 -0500
From: Guido van Rossum <guido@CNRI.Reston.VA.US>
To: "Guido van
@ -40,6 +56,7 @@ test2
test('''From: Barry <bwarsaw@python.org
To: guido@python.org (Guido: the Barbarian)
Subject: nonsense
Date: Wednesday, January 13 1999 23:57:35 -0500
test''', [('Guido: the Barbarian', 'guido@python.org'),
])
@ -47,6 +64,7 @@ test''', [('Guido: the Barbarian', 'guido@python.org'),
test('''From: Barry <bwarsaw@python.org
To: guido@python.org (Guido: the Barbarian)
Cc: "Guido: the Madman" <guido@python.org>
Date: 13-Jan-1999 23:57:35 EST
test''', [('Guido: the Barbarian', 'guido@python.org'),
('Guido: the Madman', 'guido@python.org')
@ -54,6 +72,7 @@ test''', [('Guido: the Barbarian', 'guido@python.org'),
test('''To: "The monster with
the very long name: Guido" <guido@python.org>
Date: Wed, 13 Jan 1999 23:57:35 -0500
test''', [('The monster with\n the very long name: Guido',
'guido@python.org')])
@ -63,6 +82,7 @@ CC: Mike Fletcher <mfletch@vrtelecom.com>,
"'string-sig@python.org'" <string-sig@python.org>
Cc: fooz@bat.com, bart@toof.com
Cc: goit@lip.com
Date: Wed, 13 Jan 1999 23:57:35 -0500
test''', [('Amit J. Patel', 'amitp@Theory.Stanford.EDU'),
('Mike Fletcher', 'mfletch@vrtelecom.com'),
@ -75,8 +95,28 @@ test''', [('Amit J. Patel', 'amitp@Theory.Stanford.EDU'),
# This one is just twisted. I don't know what the proper result should be,
# but it shouldn't be to infloop, which is what used to happen!
test('''To: <[smtp:dd47@mail.xxx.edu]_at_hmhq@hdq-mdm1-imgout.companay.com>
Date: Wed, 13 Jan 1999 23:57:35 -0500
test''', [('', ''),
('', 'dd47@mail.xxx.edu'),
('', '_at_hmhq@hdq-mdm1-imgout.companay.com')
])
# This exercises the old commas-in-a-full-name bug, which should be doing the
# right thing in recent versions of the module.
test('''To: "last, first" <userid@foo.net>
test''', [('last, first', 'userid@foo.net'),
])
test('''To: (Comment stuff) "Quoted name"@somewhere.com
test''', [('Comment stuff', '"Quoted name"@somewhere.com'),
])
test('''To: :
Cc: goit@lip.com
Date: Wed, 13 Jan 1999 23:57:35 -0500
test''', [('', 'goit@lip.com')])

View File

@ -97,7 +97,7 @@ try:
if not canfork or os.fork():
# parent is server
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.bind(hostname, PORT)
s.bind((hostname, PORT))
s.listen(1)
if verbose:
print 'parent accepting'
@ -133,7 +133,7 @@ try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
if verbose:
print 'child connecting'
s.connect(hostname, PORT)
s.connect((hostname, PORT))
msg = 'socket test'
s.send(msg)
data = s.recv(1024)

View File

@ -130,7 +130,9 @@ if len([1,]) <> 1: raise TestFailed, 'len([1,])'
if len([1,2,3,4,5,6]) <> 6: raise TestFailed, 'len([1,2,3,4,5,6])'
if [1,2]+[3,4] <> [1,2,3,4]: raise TestFailed, 'list concatenation'
if [1,2]*3 <> [1,2,1,2,1,2]: raise TestFailed, 'list repetition *3'
if [1,2]*3L <> [1,2,1,2,1,2]: raise TestFailed, 'list repetition *3L'
if 0*[1,2,3] <> []: raise TestFailed, 'list repetition 0*'
if 0L*[1,2,3] <> []: raise TestFailed, 'list repetition 0L*'
if min([1,2]) <> 1 or max([1,2]) <> 2: raise TestFailed, 'min/max list'
if 0 in [0,1,2] and 1 in [0,1,2] and 2 in [0,1,2] and 3 not in [0,1,2]: pass
else: raise TestFailed, 'in/not in list'
@ -150,10 +152,17 @@ if a != [1, 1, 2, 3, 4, 5, 5]:
print '6.5.3a Additional list operations'
a = [0,1,2,3,4]
a[0L] = 1
a[1L] = 2
a[2L] = 3
if a <> [1,2,3,3,4]: raise TestFailed, 'list item assignment [0L], [1L], [2L]'
a[0] = 5
a[1] = 6
a[2] = 7
if a <> [5,6,7,3,4]: raise TestFailed, 'list item assignment [0], [1], [2]'
a[-2L] = 88
a[-1L] = 99
if a <> [5,6,7,88,99]: raise TestFailed, 'list item assignment [-2L], [-1L]'
a[-2] = 8
a[-1] = 9
if a <> [5,6,7,8,9]: raise TestFailed, 'list item assignment [-2], [-1]'
@ -161,12 +170,21 @@ a[:2] = [0,4]
a[-3:] = []
a[1:1] = [1,2,3]
if a <> [0,1,2,3,4]: raise TestFailed, 'list slice assignment'
a[ 1L : 4L] = [7,8,9]
if a <> [0,7,8,9,4]: raise TestFailed, 'list slice assignment using long ints'
del a[1:4]
if a <> [0,4]: raise TestFailed, 'list slice deletion'
del a[0]
if a <> [4]: raise TestFailed, 'list item deletion [0]'
del a[-1]
if a <> []: raise TestFailed, 'list item deletion [-1]'
a=range(0,5)
del a[1L:4L]
if a <> [0,4]: raise TestFailed, 'list slice deletion'
del a[0L]
if a <> [4]: raise TestFailed, 'list item deletion [0]'
del a[-1L]
if a <> []: raise TestFailed, 'list item deletion [-1]'
a.append(0)
a.append(1)
a.append(2)
@ -192,6 +210,13 @@ def myComparison(x,y):
z = range(12)
z.sort(myComparison)
# Test extreme cases with long ints
a = [0,1,2,3,4]
if a[ -pow(2,128L): 3 ] != [0,1,2]:
raise TestFailed, "list slicing with too-small long integer"
if a[ 3: pow(2,145L) ] != [3,4]:
raise TestFailed, "list slicing with too-large long integer"
print '6.6 Mappings == Dictionaries'
d = {}
if d.keys() <> []: raise TestFailed, '{}.keys()'

401
Lib/dos-8x3/test_uni.py Normal file
View File

@ -0,0 +1,401 @@
""" Test script for the Unicode implementation.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""
from test_support import verbose
import sys
def test(method, input, output, *args):
if verbose:
print '%s.%s%s =? %s... ' % (repr(input), method, args, output),
try:
f = getattr(input, method)
value = apply(f, args)
except:
value = sys.exc_type
exc = sys.exc_info()[:2]
else:
exc = None
if value != output:
if verbose:
print 'no'
print '*',f, `input`, `output`, `value`
if exc:
print ' value == %s: %s' % (exc)
else:
if verbose:
print 'yes'
test('capitalize', u' hello ', u' hello ')
test('capitalize', u'hello ', u'Hello ')
test('title', u' hello ', u' Hello ')
test('title', u'hello ', u'Hello ')
test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
test('title', u"getInt", u'Getint')
test('find', u'abcdefghiabc', 0, u'abc')
test('find', u'abcdefghiabc', 9, u'abc', 1)
test('find', u'abcdefghiabc', -1, u'def', 4)
test('rfind', u'abcdefghiabc', 9, u'abc')
test('lower', u'HeLLo', u'hello')
test('lower', u'hello', u'hello')
test('upper', u'HeLLo', u'HELLO')
test('upper', u'HELLO', u'HELLO')
if 0:
transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
test('maketrans', u'abc', transtable, u'xyz')
test('maketrans', u'abc', ValueError, u'xyzq')
test('split', u'this is the split function',
[u'this', u'is', u'the', u'split', u'function'])
test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
test('split', u'a b c d', [u'a', u'b c d'], None, 1)
test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
test('split', u'a b c d', [u'a b c d'], None, 0)
test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
# join now works with any sequence type
class Sequence:
def __init__(self): self.seq = 'wxyz'
def __len__(self): return len(self.seq)
def __getitem__(self, i): return self.seq[i]
test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
test('join', u' ', u'w x y z', Sequence())
test('join', u' ', TypeError, 7)
class BadSeq(Sequence):
def __init__(self): self.seq = [7, u'hello', 123L]
test('join', u' ', TypeError, BadSeq())
result = u''
for i in range(10):
if i > 0:
result = result + u':'
result = result + u'x'*10
test('join', u':', result, [u'x' * 10] * 10)
test('join', u':', result, (u'x' * 10,) * 10)
test('strip', u' hello ', u'hello')
test('lstrip', u' hello ', u'hello ')
test('rstrip', u' hello ', u' hello')
test('strip', u'hello', u'hello')
test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
if 0:
test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
table = string.maketrans('a', u'A')
test('translate', u'abc', u'Abc', table)
test('translate', u'xyz', u'xyz', table)
test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
test('replace', u'one!two!three!', u'onetwothree', '!', '')
test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
test('startswith', u'hello', 1, u'he')
test('startswith', u'hello', 1, u'hello')
test('startswith', u'hello', 0, u'hello world')
test('startswith', u'hello', 1, u'')
test('startswith', u'hello', 0, u'ello')
test('startswith', u'hello', 1, u'ello', 1)
test('startswith', u'hello', 1, u'o', 4)
test('startswith', u'hello', 0, u'o', 5)
test('startswith', u'hello', 1, u'', 5)
test('startswith', u'hello', 0, u'lo', 6)
test('startswith', u'helloworld', 1, u'lowo', 3)
test('startswith', u'helloworld', 1, u'lowo', 3, 7)
test('startswith', u'helloworld', 0, u'lowo', 3, 6)
test('endswith', u'hello', 1, u'lo')
test('endswith', u'hello', 0, u'he')
test('endswith', u'hello', 1, u'')
test('endswith', u'hello', 0, u'hello world')
test('endswith', u'helloworld', 0, u'worl')
test('endswith', u'helloworld', 1, u'worl', 3, 9)
test('endswith', u'helloworld', 1, u'world', 3, 12)
test('endswith', u'helloworld', 1, u'lowo', 1, 7)
test('endswith', u'helloworld', 1, u'lowo', 2, 7)
test('endswith', u'helloworld', 1, u'lowo', 3, 7)
test('endswith', u'helloworld', 0, u'lowo', 4, 7)
test('endswith', u'helloworld', 0, u'lowo', 3, 8)
test('endswith', u'ab', 0, u'ab', 0, 1)
test('endswith', u'ab', 0, u'ab', 0, 0)
test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
if 0:
test('capwords', u'abc def ghi', u'Abc Def Ghi')
test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
# Comparisons:
print 'Testing Unicode comparisons...',
assert u'abc' == 'abc'
assert 'abc' == u'abc'
assert u'abc' == u'abc'
assert u'abcd' > 'abc'
assert 'abcd' > u'abc'
assert u'abcd' > u'abc'
assert u'abc' < 'abcd'
assert 'abc' < u'abcd'
assert u'abc' < u'abcd'
print 'done.'
test('ljust', u'abc', u'abc ', 10)
test('rjust', u'abc', u' abc', 10)
test('center', u'abc', u' abc ', 10)
test('ljust', u'abc', u'abc ', 6)
test('rjust', u'abc', u' abc', 6)
test('center', u'abc', u' abc ', 6)
test('ljust', u'abc', u'abc', 2)
test('rjust', u'abc', u'abc', 2)
test('center', u'abc', u'abc', 2)
test('islower', u'a', 1)
test('islower', u'A', 0)
test('islower', u'\n', 0)
test('islower', u'\u1FFc', 0)
test('islower', u'abc', 1)
test('islower', u'aBc', 0)
test('islower', u'abc\n', 1)
test('isupper', u'a', 0)
test('isupper', u'A', 1)
test('isupper', u'\n', 0)
test('isupper', u'\u1FFc', 0)
test('isupper', u'ABC', 1)
test('isupper', u'AbC', 0)
test('isupper', u'ABC\n', 1)
test('istitle', u'a', 0)
test('istitle', u'A', 1)
test('istitle', u'\n', 0)
test('istitle', u'\u1FFc', 1)
test('istitle', u'A Titlecased Line', 1)
test('istitle', u'A\nTitlecased Line', 1)
test('istitle', u'A Titlecased, Line', 1)
test('istitle', u'Greek \u1FFcitlecases ...', 1)
test('istitle', u'Not a capitalized String', 0)
test('istitle', u'Not\ta Titlecase String', 0)
test('istitle', u'Not--a Titlecase String', 0)
test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1)
test('translate', u"abababc", u'bbbc', {ord('a'):None})
test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
# Contains:
print 'Testing Unicode contains method...',
assert ('a' in u'abdb') == 1
assert ('a' in u'bdab') == 1
assert ('a' in u'bdaba') == 1
assert ('a' in u'bdba') == 1
assert ('a' in u'bdba') == 1
assert (u'a' in u'bdba') == 1
assert (u'a' in u'bdb') == 0
assert (u'a' in 'bdb') == 0
assert (u'a' in 'bdba') == 1
assert (u'a' in ('a',1,None)) == 1
assert (u'a' in (1,None,'a')) == 1
assert (u'a' in (1,None,u'a')) == 1
assert ('a' in ('a',1,None)) == 1
assert ('a' in (1,None,'a')) == 1
assert ('a' in (1,None,u'a')) == 1
assert ('a' in ('x',1,u'y')) == 0
assert ('a' in ('x',1,None)) == 0
print 'done.'
# Formatting:
print 'Testing Unicode formatting strings...',
assert u"%s, %s" % (u"abc", "abc") == u'abc, abc'
assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00'
assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00'
assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50'
assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57'
assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57'
assert u"%c" % (u"abc",) == u'a'
assert u"%c" % ("abc",) == u'a'
assert u"%c" % (34,) == u'"'
assert u"%c" % (36,) == u'$'
assert u"%r, %r" % (u"abc", "abc") == u"u'abc', 'abc'"
assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def'
assert u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"} == u'abc, def'
# formatting jobs delegated from the string implementation:
assert '...%(foo)s...' % {'foo':u"abc"} == u'...abc...'
assert '...%(foo)s...' % {'foo':"abc"} == '...abc...'
assert '...%(foo)s...' % {u'foo':"abc"} == '...abc...'
assert '...%(foo)s...' % {u'foo':u"abc"} == u'...abc...'
assert '...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...'
assert '...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...'
assert '...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...'
assert '...%s...' % u"abc" == u'...abc...'
print 'done.'
# Test builtin codecs
print 'Testing builtin codecs...',
assert unicode('hello','ascii') == u'hello'
assert unicode('hello','utf-8') == u'hello'
assert unicode('hello','utf8') == u'hello'
assert unicode('hello','latin-1') == u'hello'
try:
u'Andr\202 x'.encode('ascii')
u'Andr\202 x'.encode('ascii','strict')
except ValueError:
pass
else:
raise AssertionError, "u'Andr\202'.encode('ascii') failed to raise an exception"
assert u'Andr\202 x'.encode('ascii','ignore') == "Andr x"
assert u'Andr\202 x'.encode('ascii','replace') == "Andr? x"
try:
unicode('Andr\202 x','ascii')
unicode('Andr\202 x','ascii','strict')
except ValueError:
pass
else:
raise AssertionError, "unicode('Andr\202') failed to raise an exception"
assert unicode('Andr\202 x','ascii','ignore') == u"Andr x"
assert unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x'
assert u'hello'.encode('ascii') == 'hello'
assert u'hello'.encode('utf-8') == 'hello'
assert u'hello'.encode('utf8') == 'hello'
assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000'
assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o'
assert u'hello'.encode('latin-1') == 'hello'
u = u''.join(map(unichr, range(1024)))
for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
assert unicode(u.encode(encoding),encoding) == u
u = u''.join(map(unichr, range(256)))
for encoding in (
'latin-1',
):
try:
assert unicode(u.encode(encoding),encoding) == u
except AssertionError:
print '*** codec "%s" failed round-trip' % encoding
except ValueError,why:
print '*** codec for "%s" failed: %s' % (encoding, why)
u = u''.join(map(unichr, range(128)))
for encoding in (
'ascii',
):
try:
assert unicode(u.encode(encoding),encoding) == u
except AssertionError:
print '*** codec "%s" failed round-trip' % encoding
except ValueError,why:
print '*** codec for "%s" failed: %s' % (encoding, why)
print 'done.'
print 'Testing standard mapping codecs...',
print '0-127...',
s = ''.join(map(chr, range(128)))
for encoding in (
'cp037', 'cp1026',
'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
'cp863', 'cp865', 'cp866',
'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
'mac_cyrillic', 'mac_latin2',
'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
'cp1256', 'cp1257', 'cp1258',
'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
'cp1006', 'cp875', 'iso8859_8',
### These have undefined mappings:
#'cp424',
):
try:
assert unicode(s,encoding).encode(encoding) == s
except AssertionError:
print '*** codec "%s" failed round-trip' % encoding
except ValueError,why:
print '*** codec for "%s" failed: %s' % (encoding, why)
print '128-255...',
s = ''.join(map(chr, range(128,256)))
for encoding in (
'cp037', 'cp1026',
'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
'cp863', 'cp865', 'cp866',
'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
'mac_cyrillic', 'mac_latin2',
### These have undefined mappings:
#'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
#'cp1256', 'cp1257', 'cp1258',
#'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
#'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
### These fail the round-trip:
#'cp1006', 'cp875', 'iso8859_8',
):
try:
assert unicode(s,encoding).encode(encoding) == s
except AssertionError:
print '*** codec "%s" failed round-trip' % encoding
except ValueError,why:
print '*** codec for "%s" failed: %s' % (encoding, why)
print 'done.'
print 'Testing Unicode string concatenation...',
assert (u"abc" u"def") == u"abcdef"
assert ("abc" u"def") == u"abcdef"
assert (u"abc" "def") == u"abcdef"
assert (u"abc" u"def" "ghi") == u"abcdefghi"
assert ("abc" "def" u"ghi") == u"abcdefghi"
print 'done.'

147
Lib/dos-8x3/test_win.py Normal file
View File

@ -0,0 +1,147 @@
# Test the windows specific win32reg module.
# Only win32reg functions not hit here: FlushKey, LoadKey and SaveKey
from winreg import *
import os, sys
test_key_name = "SOFTWARE\\Python Registry Test Key - Delete Me"
test_data = [
("Int Value", 45, REG_DWORD),
("String Val", "A string value", REG_SZ,),
(u"Unicode Val", u"A Unicode value", REG_SZ,),
("StringExpand", "The path is %path%", REG_EXPAND_SZ),
("UnicodeExpand", u"The path is %path%", REG_EXPAND_SZ),
("Multi-string", ["Lots", "of", "string", "values"], REG_MULTI_SZ),
("Multi-unicode", [u"Lots", u"of", u"unicode", u"values"], REG_MULTI_SZ),
("Multi-mixed", [u"Unicode", u"and", "string", "values"],REG_MULTI_SZ),
("Raw Data", ("binary"+chr(0)+"data"), REG_BINARY),
]
def WriteTestData(root_key):
# Set the default value for this key.
SetValue(root_key, test_key_name, REG_SZ, "Default value")
key = CreateKey(root_key, test_key_name)
# Create a sub-key
sub_key = CreateKey(key, "sub_key")
# Give the sub-key some named values
for value_name, value_data, value_type in test_data:
SetValueEx(sub_key, value_name, 0, value_type, value_data)
# Check we wrote as many items as we thought.
nkeys, nvalues, since_mod = QueryInfoKey(key)
assert nkeys==1, "Not the correct number of sub keys"
assert nvalues==1, "Not the correct number of values"
nkeys, nvalues, since_mod = QueryInfoKey(sub_key)
assert nkeys==0, "Not the correct number of sub keys"
assert nvalues==len(test_data), "Not the correct number of values"
# Close this key this way...
# (but before we do, copy the key as an integer - this allows
# us to test that the key really gets closed).
int_sub_key = int(sub_key)
CloseKey(sub_key)
try:
QueryInfoKey(int_sub_key)
raise RuntimeError, "It appears the CloseKey() function does not close the actual key!"
except EnvironmentError:
pass
# ... and close that key that way :-)
int_key = int(key)
key.Close()
try:
QueryInfoKey(int_key)
raise RuntimeError, "It appears the key.Close() function does not close the actual key!"
except EnvironmentError:
pass
def ReadTestData(root_key):
# Check we can get default value for this key.
val = QueryValue(root_key, test_key_name)
assert val=="Default value", "Registry didnt give back the correct value"
key = OpenKey(root_key, test_key_name)
# Read the sub-keys
sub_key = OpenKey(key, "sub_key")
# Check I can enumerate over the values.
index = 0
while 1:
try:
data = EnumValue(sub_key, index)
except EnvironmentError:
break
assert data in test_data, "didnt read back the correct test data."
index = index + 1
assert index==len(test_data), "Didnt read the correct number of items"
# Check I can directly access each item
for value_name, value_data, value_type in test_data:
read_val, read_typ = QueryValueEx(sub_key, value_name)
assert read_val==value_data and read_typ == value_type, \
"Could not directly read the value"
sub_key.Close()
# Enumerate our main key.
read_val = EnumKey(key, 0)
assert read_val == "sub_key", "Read subkey value wrong"
try:
EnumKey(key, 1)
assert 0, "Was able to get a second key when I only have one!"
except EnvironmentError:
pass
key.Close()
def DeleteTestData(root_key):
key = OpenKey(root_key, test_key_name, 0, KEY_ALL_ACCESS)
sub_key = OpenKey(key, "sub_key", 0, KEY_ALL_ACCESS)
# It is not necessary to delete the values before deleting
# the key (although subkeys must not exist). We delete them
# manually just to prove we can :-)
for value_name, value_data, value_type in test_data:
DeleteValue(sub_key, value_name)
nkeys, nvalues, since_mod = QueryInfoKey(sub_key)
assert nkeys==0 and nvalues==0, "subkey not empty before delete"
sub_key.Close()
DeleteKey(key, "sub_key")
try:
# Shouldnt be able to delete it twice!
DeleteKey(key, "sub_key")
assert 0, "Deleting the key twice succeeded"
except EnvironmentError:
pass
key.Close()
DeleteKey(root_key, test_key_name)
# Opening should now fail!
try:
key = OpenKey(root_key, test_key_name)
assert 0, "Could open the non-existant key"
except WindowsError: # Use this error name this time
pass
def TestAll(root_key):
WriteTestData(root_key)
ReadTestData(root_key)
DeleteTestData(root_key)
# Test on my local machine.
TestAll(HKEY_CURRENT_USER)
print "Local registry tests worked"
try:
remote_name = sys.argv[sys.argv.index("--remote")+1]
except (IndexError, ValueError):
remote_name = None
if remote_name is not None:
try:
remote_key = ConnectRegistry(remote_name, HKEY_CURRENT_USER)
except EnvironmentError, exc:
print "Could not connect to the remote machine -", exc.strerror
remote_key = None
if remote_key is not None:
TestAll(remote_key)
print "Remote registry tests worked"
else:
print "Remote registry calls can be tested using",
print "'test_winreg.py --remote \\\\machine_name'"

26
Lib/dos-8x3/test_zip.py Normal file
View File

@ -0,0 +1,26 @@
import zipfile, os
srcname = "junk9630.tmp"
zipname = "junk9708.tmp"
try:
fp = open(srcname, "w") # Make a source file with some lines
for i in range(0, 1000):
fp.write("Test of zipfile line %d.\n" % i)
fp.close()
zip = zipfile.ZipFile(zipname, "w") # Create the ZIP archive
zip.write(srcname, srcname)
zip.write(srcname, "another.name")
zip.close()
zip = zipfile.ZipFile(zipname, "r") # Read the ZIP archive
zip.read("another.name")
zip.read(srcname)
zip.close()
finally:
if os.path.isfile(srcname): # Remove temporary files
os.unlink(srcname)
if os.path.isfile(zipname):
os.unlink(zipname)

View File

@ -80,14 +80,14 @@ else:
# in order to provide more variations.
for sync in [zlib.Z_NO_FLUSH, zlib.Z_SYNC_FLUSH, zlib.Z_FULL_FLUSH]:
for level in range(10):
obj = zlib.compressobj( level )
d = obj.compress( buf[:3000] )
d = d + obj.flush( sync )
d = d + obj.compress( buf[3000:] )
d = d + obj.flush()
if zlib.decompress(d) != buf:
print "Decompress failed: flush mode=%i, level=%i" % (sync,level)
del obj
obj = zlib.compressobj( level )
d = obj.compress( buf[:3000] )
d = d + obj.flush( sync )
d = d + obj.compress( buf[3000:] )
d = d + obj.flush()
if zlib.decompress(d) != buf:
print "Decompress failed: flush mode=%i, level=%i" % (sync,level)
del obj
def ignore():
"""An empty function with a big string.

View File

@ -1,5 +1,4 @@
# threading.py:
# Proposed new threading module, emulating a subset of Java's threading model
"""Proposed new threading module, emulating a subset of Java's threading model."""
import sys
import time
@ -238,7 +237,7 @@ def Semaphore(*args, **kwargs):
class _Semaphore(_Verbose):
# After Tim Peters' semaphore class, but bnot quite the same (no maximum)
# After Tim Peters' semaphore class, but not quite the same (no maximum)
def __init__(self, value=1, verbose=None):
assert value >= 0, "Semaphore initial value must be >= 0"
@ -506,7 +505,7 @@ class _DummyThread(Thread):
def __init__(self):
Thread.__init__(self, name=_newname("Dummy-%d"))
self.__Thread_started = 1
self._Thread__started = 1
_active_limbo_lock.acquire()
_active[_get_ident()] = self
_active_limbo_lock.release()

9
Lib/dos-8x3/threadst.py Normal file
View File

@ -0,0 +1,9 @@
import thread
# Start empty thread to initialise thread mechanics (and global lock!)
# This thread will finish immediately thus won't make much influence on
# test results by itself, only by that fact that it initialises global lock
thread.start_new_thread(lambda : 1, ())
import test.pystone
test.pystone.main()

View File

@ -1,4 +1,4 @@
# Format and print Python stack traces
"""Extract, format and print information about Python stack traces."""
import linecache
import string
@ -10,6 +10,8 @@ def _print(file, str='', terminator='\n'):
def print_list(extracted_list, file=None):
"""Print the list of tuples as returned by extract_tb() or
extract_stack() as a formatted stack trace to the given file."""
if not file:
file = sys.stderr
for filename, lineno, name, line in extracted_list:
@ -19,6 +21,12 @@ def print_list(extracted_list, file=None):
_print(file, ' %s' % string.strip(line))
def format_list(extracted_list):
"""Given a list of tuples as returned by extract_tb() or
extract_stack(), return a list of strings ready for printing.
Each string in the resulting list corresponds to the item with
the same index in the argument list. Each string ends in a
newline; the strings may contain internal newlines as well, for
those items whose source text line is not None."""
list = []
for filename, lineno, name, line in extracted_list:
item = ' File "%s", line %d, in %s\n' % (filename,lineno,name)
@ -29,6 +37,10 @@ def format_list(extracted_list):
def print_tb(tb, limit=None, file=None):
"""Print up to 'limit' stack trace entries from the traceback 'tb'.
If 'limit' is omitted or None, all entries are printed. If 'file' is
omitted or None, the output goes to sys.stderr; otherwise 'file'
should be an open file or file-like object with a write() method."""
if not file:
file = sys.stderr
if limit is None:
@ -49,9 +61,18 @@ def print_tb(tb, limit=None, file=None):
n = n+1
def format_tb(tb, limit = None):
"""A shorthand for 'format_list(extract_stack(f, limit))."""
return format_list(extract_tb(tb, limit))
def extract_tb(tb, limit = None):
"""Return a list of up to 'limit' pre-processed stack trace entries
extracted from the traceback object 'traceback'. This is useful for
alternate formatting of stack traces. If 'limit' is omitted or None,
all entries are extracted. A pre-processed stack trace entry is a
quadruple (filename, line number, function name, text) representing
the information that is usually printed for a stack trace. The text
is a string with leading and trailing whitespace stripped; if the
source is not available it is None."""
if limit is None:
if hasattr(sys, 'tracebacklimit'):
limit = sys.tracebacklimit
@ -73,10 +94,18 @@ def extract_tb(tb, limit = None):
def print_exception(etype, value, tb, limit=None, file=None):
"""Print exception information and up to 'limit' stack trace entries
from the traceback 'tb' to 'file'. This differs from print_tb() in
the following ways: (1) if traceback is not None, it prints a header
"Traceback (most recent call last):"; (2) it prints the exception type and
value after the stack trace; (3) if type is SyntaxError and value has
the appropriate format, it prints the line where the syntax error
occurred with a caret on the next line indicating the approximate
position of the error."""
if not file:
file = sys.stderr
if tb:
_print(file, 'Traceback (innermost last):')
_print(file, 'Traceback (most recent call last):')
print_tb(tb, limit, file)
lines = format_exception_only(etype, value)
for line in lines[:-1]:
@ -84,8 +113,14 @@ def print_exception(etype, value, tb, limit=None, file=None):
_print(file, lines[-1], '')
def format_exception(etype, value, tb, limit = None):
"""Format a stack trace and the exception information. The arguments
have the same meaning as the corresponding arguments to
print_exception(). The return value is a list of strings, each
ending in a newline and some containing internal newlines. When
these lines are contatenated and printed, exactly the same text is
printed as does print_exception()."""
if tb:
list = ['Traceback (innermost last):\n']
list = ['Traceback (most recent call last):\n']
list = list + format_tb(tb, limit)
else:
list = []
@ -93,6 +128,14 @@ def format_exception(etype, value, tb, limit = None):
return list
def format_exception_only(etype, value):
"""Format the exception part of a traceback. The arguments are the
exception type and value such as given by sys.last_type and
sys.last_value. The return value is a list of strings, each ending
in a newline. Normally, the list contains a single string;
however, for SyntaxError exceptions, it contains several lines that
(when printed) display detailed information about where the syntax
error occurred. The message indicating which exception occurred is
the always last string in the list."""
list = []
if type(etype) == types.ClassType:
stype = etype.__name__
@ -128,6 +171,10 @@ def format_exception_only(etype, value):
def print_exc(limit=None, file=None):
"""This is a shorthand for 'print_exception(sys.exc_type,
sys.exc_value, sys.exc_traceback, limit, file)'.
(In fact, it uses sys.exc_info() to retrieve the same information
in a thread-safe way.)"""
if not file:
file = sys.stderr
try:
@ -137,6 +184,8 @@ def print_exc(limit=None, file=None):
etype = value = tb = None
def print_last(limit=None, file=None):
"""This is a shorthand for 'print_exception(sys.last_type,
sys.last_value, sys.last_traceback, limit, file)'."""
if not file:
file = sys.stderr
print_exception(sys.last_type, sys.last_value, sys.last_traceback,
@ -144,6 +193,10 @@ def print_last(limit=None, file=None):
def print_stack(f=None, limit=None, file=None):
"""This function prints a stack trace from its invocation point.
The optional 'f' argument can be used to specify an alternate stack
frame at which to start. The optional 'limit' and 'file' arguments
have the same meaning as for print_exception()."""
if f is None:
try:
raise ZeroDivisionError
@ -152,6 +205,7 @@ def print_stack(f=None, limit=None, file=None):
print_list(extract_stack(f, limit), file)
def format_stack(f=None, limit=None):
"""A shorthand for 'format_list(extract_stack(f, limit))'."""
if f is None:
try:
raise ZeroDivisionError
@ -160,6 +214,12 @@ def format_stack(f=None, limit=None):
return format_list(extract_stack(f, limit))
def extract_stack(f=None, limit = None):
"""Extract the raw traceback from the current stack frame. The
return value has the same format as for extract_tb(). The optional
'f' and 'limit' arguments have the same meaning as for print_stack().
Each item in the list is a quadruple (filename, line number,
function name, text), and the entries are in order from oldest
to newest stack frame."""
if f is None:
try:
raise ZeroDivisionError
@ -184,13 +244,14 @@ def extract_stack(f=None, limit = None):
list.reverse()
return list
# Calculate the correct line number of the traceback given in tb (even
# with -O on).
# Coded by Marc-Andre Lemburg from the example of PyCode_Addr2Line()
# in compile.c.
# Revised version by Jim Hugunin to work with JPython too.
def tb_lineno(tb):
"""Calculate the correct line number of the traceback given in tb
(even with -O on)."""
# Coded by Marc-Andre Lemburg from the example of PyCode_Addr2Line()
# in compile.c.
# Revised version by Jim Hugunin to work with JPython too.
c = tb.tb_frame.f_code
if not hasattr(c, 'co_lnotab'):
return tb.tb_lineno

View File

@ -1,4 +1,4 @@
# A more or less complete user-defined wrapper around dictionary objects
"""A more or less complete user-defined wrapper around dictionary objects."""
class UserDict:
def __init__(self, dict=None):

View File

@ -1,13 +1,16 @@
# A more or less complete user-defined wrapper around list objects
"""A more or less complete user-defined wrapper around list objects."""
class UserList:
def __init__(self, list=None):
def __init__(self, initlist=None):
self.data = []
if list is not None:
if type(list) == type(self.data):
self.data[:] = list
if initlist is not None:
# XXX should this accept an arbitary sequence?
if type(initlist) == type(self.data):
self.data[:] = initlist
elif isinstance(initlist, UserList):
self.data[:] = initlist.data[:]
else:
self.data[:] = list.data[:]
self.data = list(initlist)
def __repr__(self): return repr(self.data)
def __cmp__(self, other):
if isinstance(other, UserList):

158
Lib/dos-8x3/userstri.py Normal file
View File

@ -0,0 +1,158 @@
#!/usr/bin/env python
## vim:ts=4:et:nowrap
"""A user-defined wrapper around string objects
Note: string objects have grown methods in Python 1.6
This module requires Python 1.6 or later.
"""
from types import StringType, UnicodeType
import sys
class UserString:
def __init__(self, seq):
if isinstance(seq, StringType) or isinstance(seq, UnicodeType):
self.data = seq
elif isinstance(seq, UserString):
self.data = seq.data[:]
else:
self.data = str(seq)
def __str__(self): return str(self.data)
def __repr__(self): return repr(self.data)
def __int__(self): return int(self.data)
def __long__(self): return long(self.data)
def __float__(self): return float(self.data)
def __complex__(self): return complex(self.data)
def __hash__(self): return hash(self.data)
def __cmp__(self, string):
if isinstance(string, UserString):
return cmp(self.data, string.data)
else:
return cmp(self.data, string)
def __contains__(self, char):
return char in self.data
def __len__(self): return len(self.data)
def __getitem__(self, index): return self.__class__(self.data[index])
def __getslice__(self, start, end):
start = max(start, 0); end = max(end, 0)
return self.__class__(self.data[start:end])
def __add__(self, other):
if isinstance(other, UserString):
return self.__class__(self.data + other.data)
elif isinstance(other, StringType) or isinstance(other, UnicodeType):
return self.__class__(self.data + other)
else:
return self.__class__(self.data + str(other))
def __radd__(self, other):
if isinstance(other, StringType) or isinstance(other, UnicodeType):
return self.__class__(other + self.data)
else:
return self.__class__(str(other) + self.data)
def __mul__(self, n):
return self.__class__(self.data*n)
__rmul__ = __mul__
# the following methods are defined in alphabetical order:
def capitalize(self): return self.__class__(self.data.capitalize())
def center(self, width): return self.__class__(self.data.center(width))
def count(self, sub, start=0, end=sys.maxint):
return self.data.count(sub, start, end)
def encode(self, encoding=None, errors=None): # XXX improve this?
if encoding:
if errors:
return self.__class__(self.data.encode(encoding, errors))
else:
return self.__class__(self.data.encode(encoding))
else:
return self.__class__(self.data.encode())
def endswith(self, suffix, start=0, end=sys.maxint):
return self.data.endswith(suffix, start, end)
def expandtabs(self, tabsize=8):
return self.__class__(self.data.expandtabs(tabsize))
def find(self, sub, start=0, end=sys.maxint):
return self.data.find(sub, start, end)
def index(self, sub, start=0, end=sys.maxint):
return self.data.index(sub, start, end)
def isdecimal(self): return self.data.isdecimal()
def isdigit(self): return self.data.isdigit()
def islower(self): return self.data.islower()
def isnumeric(self): return self.data.isnumeric()
def isspace(self): return self.data.isspace()
def istitle(self): return self.data.istitle()
def isupper(self): return self.data.isupper()
def join(self, seq): return self.data.join(seq)
def ljust(self, width): return self.__class__(self.data.ljust(width))
def lower(self): return self.__class__(self.data.lower())
def lstrip(self): return self.__class__(self.data.lstrip())
def replace(self, old, new, maxsplit=-1):
return self.__class__(self.data.replace(old, new, maxsplit))
def rfind(self, sub, start=0, end=sys.maxint):
return self.data.rfind(sub, start, end)
def rindex(self, sub, start=0, end=sys.maxint):
return self.data.rindex(sub, start, end)
def rjust(self, width): return self.__class__(self.data.rjust(width))
def rstrip(self): return self.__class__(self.data.rstrip())
def split(self, sep=None, maxsplit=-1):
return self.data.split(sep, maxsplit)
def splitlines(self, keepends=0): return self.data.splitlines(keepends)
def startswith(self, prefix, start=0, end=sys.maxint):
return self.data.startswith(prefix, start, end)
def strip(self): return self.__class__(self.data.strip())
def swapcase(self): return self.__class__(self.data.swapcase())
def title(self): return self.__class__(self.data.title())
def translate(self, table, deletechars=""):
return self.__class__(self.data.translate(table, deletechars))
def upper(self): return self.__class__(self.data.upper())
class MutableString(UserString):
"""mutable string objects
Python strings are immutable objects. This has the advantage, that
strings may be used as dictionary keys. If this property isn't needed
and you insist on changing string values in place instead, you may cheat
and use MutableString.
But the purpose of this class is an educational one: to prevent
people from inventing their own mutable string class derived
from UserString and than forget thereby to remove (override) the
__hash__ method inherited from ^UserString. This would lead to
errors that would be very hard to track down.
A faster and better solution is to rewrite your program using lists."""
def __init__(self, string=""):
self.data = string
def __hash__(self):
raise TypeError, "unhashable type (it is mutable)"
def __setitem__(self, index, sub):
if index < 0 or index >= len(self.data): raise IndexError
self.data = self.data[:index] + sub + self.data[index+1:]
def __delitem__(self, index):
if index < 0 or index >= len(self.data): raise IndexError
self.data = self.data[:index] + self.data[index+1:]
def __setslice__(self, start, end, sub):
start = max(start, 0); end = max(end, 0)
if isinstance(sub, UserString):
self.data = self.data[:start]+sub.data+self.data[end:]
elif isinstance(sub, StringType) or isinstance(sub, UnicodeType):
self.data = self.data[:start]+sub+self.data[end:]
else:
self.data = self.data[:start]+str(sub)+self.data[end:]
def __delslice__(self, start, end):
start = max(start, 0); end = max(end, 0)
self.data = self.data[:start] + self.data[end:]
def immutable(self):
return UserString(self.data)
if __name__ == "__main__":
# execute the regression test to stdout, if called as a script:
import os
called_in_dir, called_as = os.path.split(sys.argv[0])
called_in_dir = os.path.abspath(called_in_dir)
called_as, py = os.path.splitext(called_as)
sys.path.append(os.path.join(called_in_dir, 'test'))
if '-q' in sys.argv:
import test_support
test_support.verbose = 0
__import__('test_' + called_as.lower())