Doc strings and reformatting with 4 spaces bty Mitch Chapman.

Untabified and minor tweaks by me.
1997-12-10 16:17:39 +00:00 · 1997-12-10 16:17:39 +00:00 · 9ab94c18d8
parent 24e62e2c7c
commit 9ab94c18d8
1 changed files with 660 additions and 608 deletions
--- a/Lib/rfc822.py
+++ b/Lib/rfc822.py
@ -1,42 +1,42 @@
-# RFC-822 message manipulation class.
-#
-# XXX This is only a very rough sketch of a full RFC-822 parser;
-# in particular the tokenizing of addresses does not adhere to all the
-# quoting rules.
-#
-# Directions for use:
-#
-# To create a Message object: first open a file, e.g.:
-#   fp = open(file, 'r')
-# (or use any other legal way of getting an open file object, e.g. use
-# sys.stdin or call os.popen()).
-# Then pass the open file object to the Message() constructor:
-#   m = Message(fp)
-#
-# To get the text of a particular header there are several methods:
-#   str = m.getheader(name)
-#   str = m.getrawheader(name)
-# where name is the name of the header, e.g. 'Subject'.
-# The difference is that getheader() strips the leading and trailing
-# whitespace, while getrawheader() doesn't.  Both functions retain
-# embedded whitespace (including newlines) exactly as they are
-# specified in the header, and leave the case of the text unchanged.
-#
-# For addresses and address lists there are functions
-#   realname, mailaddress = m.getaddr(name) and
-#   list = m.getaddrlist(name)
-# where the latter returns a list of (realname, mailaddr) tuples.
-#
-# There is also a method
-#   time = m.getdate(name)
-# which parses a Date-like field and returns a time-compatible tuple,
-# i.e. a tuple such as returned by time.localtime() or accepted by
-# time.mktime().
-#
-# See the class definition for lower level access methods.
-#
-# There are also some utility functions here.
+"""RFC-822 message manipulation class.

+XXX This is only a very rough sketch of a full RFC-822 parser;
+in particular the tokenizing of addresses does not adhere to all the
+quoting rules.
+
+Directions for use:
+
+To create a Message object: first open a file, e.g.:
+  fp = open(file, 'r')
+(or use any other legal way of getting an open file object, e.g. use
+sys.stdin or call os.popen()).
+Then pass the open file object to the Message() constructor:
+  m = Message(fp)
+
+To get the text of a particular header there are several methods:
+  str = m.getheader(name)
+  str = m.getrawheader(name)
+where name is the name of the header, e.g. 'Subject'.
+The difference is that getheader() strips the leading and trailing
+whitespace, while getrawheader() doesn't.  Both functions retain
+embedded whitespace (including newlines) exactly as they are
+specified in the header, and leave the case of the text unchanged.
+
+For addresses and address lists there are functions
+  realname, mailaddress = m.getaddr(name) and
+  list = m.getaddrlist(name)
+where the latter returns a list of (realname, mailaddr) tuples.
+
+There is also a method
+  time = m.getdate(name)
+which parses a Date-like field and returns a time-compatible tuple,
+i.e. a tuple such as returned by time.localtime() or accepted by
+time.mktime().
+
+See the class definition for lower level access methods.
+
+There are also some utility functions here.
+"""

 import re
 import string
@ -47,10 +47,10 @@ _blanklines = ('\r\n', '\n')		# Optimization for islast()


 class Message:
-
-	# Initialize the class instance and read the headers.
+    """Represents a single RFC-822-compliant message."""
    
    def __init__(self, fp, seekable = 1):
+        """Initialize the class instance and read the headers."""
        self.fp = fp
        self.seekable = seekable
        self.startofheaders = None
@ -70,29 +70,28 @@ class Message:
            except IOError:
                self.seekable = 0
    
-
-	# Rewind the file to the start of the body (if seekable).
-
    def rewindbody(self):
+        """Rewind the file to the start of the body (if seekable)."""
        if not self.seekable:
            raise IOError, "unseekable file"
        self.fp.seek(self.startofbody)
    
-
-	# Read header lines up to the entirely blank line that
-	# terminates them.  The (normally blank) line that ends the
-	# headers is skipped, but not included in the returned list.
-	# If a non-header line ends the headers, (which is an error),
-	# an attempt is made to backspace over it; it is never
-	# included in the returned list.
-	#
-	# The variable self.status is set to the empty string if all
-	# went well, otherwise it is an error message.
-	# The variable self.headers is a completely uninterpreted list
-	# of lines contained in the header (so printing them will
-	# reproduce the header exactly as it appears in the file).
-
    def readheaders(self):
+        """Read header lines.
+        
+        Read header lines up to the entirely blank line that
+        terminates them.  The (normally blank) line that ends the
+        headers is skipped, but not included in the returned list.
+        If a non-header line ends the headers, (which is an error),
+        an attempt is made to backspace over it; it is never
+        included in the returned list.
+        
+        The variable self.status is set to the empty string if all
+        went well, otherwise it is an error message.
+        The variable self.headers is a completely uninterpreted list
+        of lines contained in the header (so printing them will
+        reproduce the header exactly as it appears in the file).
+        """
        self.dict = {}
        self.unixfrom = ''
        self.headers = list = []
@ -138,26 +137,28 @@ class Message:
                                                self.status + '; bad seek'
                break
    
-
-	# Method to determine whether a line is a legal end of
-	# RFC-822 headers.  You may override this method if your
-	# application wants to bend the rules, e.g. to strip trailing
-	# whitespace, or to recognise MH template separators
-	# ('--------').  For convenience (e.g. for code reading from
-	# sockets) a line consisting of \r\n also matches.
-
    def islast(self, line):
+        """Determine whether a line is a legal end of RFC-822 headers.
+        
+        You may override this method if your application wants
+        to bend the rules, e.g. to strip trailing whitespace,
+        or to recognise MH template separators ('--------').
+        For convenience (e.g. for code reading from sockets) a
+        line consisting of \r\n also matches.                
+        """
        return line in _blanklines
    
-
-	# Look through the list of headers and find all lines matching
-	# a given header name (and their continuation lines).
-	# A list of the lines is returned, without interpretation.
-	# If the header does not occur, an empty list is returned.
-	# If the header occurs multiple times, all occurrences are
-	# returned.  Case is not important in the header name.
-
    def getallmatchingheaders(self, name):
+        """Find all header lines matching a given header name.
+        
+        Look through the list of headers and find all lines
+        matching a given header name (and their continuation
+        lines).  A list of the lines is returned, without
+        interpretation.  If the header does not occur, an
+        empty list is returned.  If the header occurs multiple
+        times, all occurrences are returned.  Case is not
+        important in the header name.
+        """
        name = string.lower(name) + ':'
        n = len(name)
        list = []
@ -171,11 +172,13 @@ class Message:
                list.append(line)
        return list
    
-
-	# Similar, but return only the first matching header (and its
-	# continuation lines).
-
    def getfirstmatchingheader(self, name):
+        """Get the first header line matching name.
+        
+        This is similar to getallmatchingheaders, but it returns
+        only the first matching header (and its continuation
+        lines).
+        """
        name = string.lower(name) + ':'
        n = len(name)
        list = []
@ -190,37 +193,41 @@ class Message:
                list.append(line)
        return list
    
-
-	# A higher-level interface to getfirstmatchingheader().
-	# Return a string containing the literal text of the header
-	# but with the keyword stripped.  All leading, trailing and
-	# embedded whitespace is kept in the string, however.
-	# Return None if the header does not occur.
-
    def getrawheader(self, name):
+        """A higher-level interface to getfirstmatchingheader().
+        
+        Return a string containing the literal text of the
+        header but with the keyword stripped.  All leading,
+        trailing and embedded whitespace is kept in the
+        string, however.
+        Return None if the header does not occur.
+        """
+        
        list = self.getfirstmatchingheader(name)
        if not list:
            return None
        list[0] = list[0][len(name) + 1:]
        return string.joinfields(list, '')
    
-
-	# The normal interface: return a stripped version of the
-	# header value with a name, or None if it doesn't exist.  This
-	# uses the dictionary version which finds the *last* such
-	# header.
-
    def getheader(self, name):
+        """Get the header value for a name.
+        
+        This is the normal interface: it return a stripped
+        version of the header value for a given header name,
+        or None if it doesn't exist.  This uses the dictionary
+        version which finds the *last* such header.
+        """
        try:
            return self.dict[string.lower(name)]
        except KeyError:
            return None
    
-
-	# Retrieve a single address from a header as a tuple, e.g.
-	# ('Guido van Rossum', 'guido@cwi.nl').
-
    def getaddr(self, name):
+        """Get a single address from a header, as a tuple.
+        
+        An example return value:
+        ('Guido van Rossum', 'guido@cwi.nl')
+        """
        # New, by Ben Escoto
        alist = self.getaddrlist(name)
        if alist:
@ -228,10 +235,12 @@ class Message:
        else:
            return (None, None)
    
-	# Retrieve a list of addresses from a header, where each
-	# address is a tuple as returned by getaddr().
-
    def getaddrlist(self, name):
+        """Get a list of addresses from a header.
+        
+        Retrieves a list of addresses from a header, where each
+        address is a tuple as returned by getaddr().
+        """
        # New, by Ben Escoto
        try:
            data = self[name]
@ -240,22 +249,25 @@ class Message:
        a = AddrlistClass(data)
        return a.getaddrlist()
    
-	# Retrieve a date field from a header as a tuple compatible
-	# with time.mktime().
-
    def getdate(self, name):
+        """Retrieve a date field from a header.
+        
+        Retrieves a date field from the named header, returning
+        a tuple compatible with time.mktime().
+        """
        try:
            data = self[name]
        except KeyError:
            return None
        return parsedate(data)
    
-	# Retrieve a date field from a header as a 10-tuple.  
-	# The first 9 elements make up a tuple compatible
-	# with time.mktime(), and the 10th is the offset
-	# of the poster's time zone from GMT/UTC.
-
    def getdate_tz(self, name):
+        """Retrieve a date field from a header as a 10-tuple.
+        
+        The first 9 elements make up a tuple compatible with
+        time.mktime(), and the 10th is the offset of the poster's
+        time zone from GMT/UTC.
+        """
        try:
            data = self[name]
        except KeyError:
@ -266,21 +278,30 @@ class Message:
    # Access as a dictionary (only finds *last* header of each type):
    
    def __len__(self):
+        """Get the number of headers in a message."""
        return len(self.dict)
    
    def __getitem__(self, name):
+        """Get a specific header, as from a dictionary."""
        return self.dict[string.lower(name)]
    
    def has_key(self, name):
+        """Determine whether a message contains the named header."""
        return self.dict.has_key(string.lower(name))
    
    def keys(self):
+        """Get all of a message's header field names."""
        return self.dict.keys()
    
    def values(self):
+        """Get all of a message's header field values."""
        return self.dict.values()
    
    def items(self):
+        """Get all of a message's headers.
+        
+        Returns a list of name, value tuples.
+        """
        return self.dict.items()


@ -292,9 +313,8 @@ class Message:
 # XXX The inverses of the parse functions may also be useful.


-# Remove quotes from a string.
-
 def unquote(str):
+    """Remove quotes from a string."""
    if len(str) > 1:
        if str[0] == '"' and str[-1:] == '"':
            return str[1:-1]
@ -303,9 +323,8 @@ def unquote(str):
    return str


-# Add quotes around a string.
-
 def quote(str):
+    """Add quotes around a string."""
    return '"%s"' % string.join(
    string.split(
    string.join(
@ -315,9 +334,8 @@ def quote(str):
    '\\"')


-# External interface to parse an address
-
 def parseaddr(address):
+    """Parse an address into a (realname, mailaddr) tuple."""
    a = AddrlistClass(address)
    list = a.getaddrlist()
    if not list:
@ -326,12 +344,19 @@ def parseaddr(address):
        return list[0]


-# Address parser class by Ben Escoto
-
 class AddrlistClass:
+    """Address parser class by Ben Escoto.
+    
+    To understand what this class does, it helps to have a copy of
+    RFC-822 in front of you.
+    """
    
    def __init__(self, field):
+        """Initialize a new instance.
        
+        `field' is an unparsed address header field, containing
+        one or more addresses.
+        """
        self.specials = '()<>@,:;.\"[]'
        self.pos = 0
        self.LWS = ' \t'
@ -341,9 +366,8 @@ class AddrlistClass:
        self.field = field
        self.commentlist = []
    
-
    def gotonext(self):
-
+        """Parse up to the start of the next address."""
        while self.pos < len(self.field):
            if self.field[self.pos] in self.LWS + '\n\r':
                self.pos = self.pos + 1
@ -352,13 +376,17 @@ class AddrlistClass:
            else: break
    
    def getaddrlist(self):
+        """Parse all addresses.
        
+        Returns a list containing all of the addresses.
+        """
        ad = self.getaddress()
        if ad:
            return ad + self.getaddrlist()
        else: return []
    
    def getaddress(self):
+        """Parse the next address."""
        self.commentlist = []
        self.gotonext()
        
@ -412,9 +440,11 @@ class AddrlistClass:
            self.pos = self.pos + 1
        return returnlist
    
-
    def getrouteaddr(self):
-	# This just skips all the route stuff and returns the addrspec
+        """Parse a route address (Return-path value).
+        
+        This method just skips all the route stuff and returns the addrspec.
+        """
        if self.field[self.pos] != '<':
            return
        
@ -442,9 +472,8 @@ class AddrlistClass:
        
        return adlist
    
-
    def getaddrspec(self):
-
+        """Parse an RFC-822 addr-spec."""
        aslist = []
        
        self.gotonext()
@ -467,9 +496,8 @@ class AddrlistClass:
        self.gotonext()
        return string.join(aslist, '') + self.getdomain()
    
-
    def getdomain(self):
-
+        """Get the complete domain name from an address."""
        sdlist = []
        while self.pos < len(self.field):
            if self.field[self.pos] in self.LWS:
@ -487,9 +515,19 @@ class AddrlistClass:
        
        return string.join(sdlist, '')
    
-
    def getdelimited(self, beginchar, endchars, allowcomments = 1):
+        """Parse a header fragment delimited by special characters.
        
+        `beginchar' is the start character for the fragment.
+        If self is not looking at an instance of `beginchar' then
+        getdelimited returns the empty string.
+        
+        `endchars' is a sequence of allowable end-delimiting characters.
+        Parsing stops when one of these is encountered.
+        
+        If `allowcomments' is non-zero, embedded RFC-822 comments
+        are allowed within the parsed fragment.
+        """
        if self.field[self.pos] != beginchar:
            return ''
        
@ -514,17 +552,19 @@ class AddrlistClass:
        return string.join(slist, '')
    
    def getquote(self):
+        """Get a quote-delimited fragment from self's field."""
        return self.getdelimited('"', '"\r', 0)
    
    def getcomment(self):
+        """Get a parenthesis-delimited fragment from self's field."""
        return self.getdelimited('(', ')\r', 1)
    
    def getdomainliteral(self):
+        """Parse an RFC-822 domain-literal."""
        return self.getdelimited('[', ']\r', 0)
    
-
    def getatom(self):
-
+        """Parse an RFC-822 atom."""
        atomlist = ['']
        
        while self.pos < len(self.field):
@ -535,9 +575,12 @@ class AddrlistClass:
        
        return string.join(atomlist, '')
    
-
    def getphraselist(self):
+        """Parse a sequence of RFC-822 phrases.
        
+        A phrase is a sequence of words, which are in turn either
+        RFC-822 atoms or quoted-strings.
+        """
        plist = []
        
        while self.pos < len(self.field):
@ -574,7 +617,12 @@ _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
 	      'PST':-800, 'PDT':-700     # Pacific
 	      }    

+
 def parsedate_tz(data):
+    """Convert a date string to a time tuple.
+    
+    Accounts for military timezones.
+    """
    data = string.split(data)
    if data[0][-1] == ',' or data[0] in _daynames:
        # There's a dayname here. Skip it
@ -630,12 +678,15 @@ def parsedate_tz(data):
    tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
    return tuple

+
 def parsedate(data):
+    """Convert a time string to a time tuple."""
    t=parsedate_tz(data)
    if type(t)==type( () ):
        return t[:9]
    else: return t    

+
 def mktime_tz(data):
    """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp.
    
@ -648,6 +699,7 @@ def mktime_tz(data):
    t = time.mktime(data[:8] + (0,))
    return t + data[9] - time.timezone

+
 # When used as script, run a small test program.
 # The first command line argument must be a filename containing one
 # message in RFC-822 format.