UserString class from Peter Funk <pf@artcom-gmbh.de>.

2000-04-03 03:51:50 +00:00 · 2000-04-03 03:51:50 +00:00 · a22b576d05
parent e3ba931aa4
commit a22b576d05
4 changed files with 435 additions and 0 deletions
--- a/Doc/lib/libuserdict.tex
+++ b/Doc/lib/libuserdict.tex
@ -63,3 +63,52 @@ provide the following attribute:
 A real Python list object used to store the contents of the
 \class{UserList} class.
 \end{memberdesc}
+
+
+\section{\module{UserString} ---
+         Class wrapper for string objects}
+
+\declaremodule{standard}{UserString}
+\modulesynopsis{Class wrapper for string objects.}
+\moduleauthor{Peter Funk}{pf@artcom-gmbh.de}
+\sectionauthor{Peter Funk}{pf@artcom-gmbh.de}
+
+This module defines a class that acts as a wrapper around
+string objects.  It is a useful base class for
+your own string-like classes, which can inherit from
+them and override existing methods or add new ones.  In this way one
+can add new behaviours to strings.
+
+The \module{UserString} module defines the \class{UserString} class:
+
+\begin{classdesc}{UserString}{\optional{sequence}}
+Return a class instance that simulates a string or an unicode string object.
+The instance's content is kept in a regular string or unicode string
+object, which is accessible via the
+\member{data} attribute of \class{UserString} instances.  The instance's
+contents are initially set to a copy of \var{sequence}.
+\var{sequence} can be either a regular Python string or unicode string,
+an instance of \class{UserString} (or a subclass) or an arbitrary sequence
+which can be converted into a string.
+supporting.
+\end{classdesc}
+
+In addition to supporting the methods and operations of string  or
+unicode objects (see section \ref{typesseq}), \class{UserString} instances
+provide the following attribute:
+
+\begin{memberdesc}{data}
+A real Python string or unicode object used to store the content of the
+\class{UserString} class.
+\end{memberdesc}
+
+\begin{classdesc}{MutableString}{\optional{sequence}}
+This class is derived from the \class{UserString} above and redefines
+strings to be \emph{mutable}.  Mutable strings can't be used as
+dictionary keys, because dictionaries require \emph{immutable} objects as
+keys.  The main intention of this class is to serve as an educational
+example for inheritance and necessity to remove (override) the
+\function{__hash__} method in order to trap attempts to use a
+mutable object as dictionary key, which would be otherwise very
+errorprone and hard to track down.
+\end{classdesc}
--- a/Lib/UserString.py
+++ b/Lib/UserString.py
@ -0,0 +1,158 @@
+#!/usr/bin/env python
+## vim:ts=4:et:nowrap
+"""A user-defined wrapper around string objects
+
+Note: string objects have grown methods in Python 1.6 
+This module requires Python 1.6 or later.
+"""
+from types import StringType, UnicodeType
+import sys
+
+class UserString:
+    def __init__(self, seq):
+        if isinstance(seq, StringType) or isinstance(seq, UnicodeType):
+            self.data = seq
+        elif isinstance(seq, UserString):
+            self.data = seq.data[:]
+        else: 
+            self.data = str(seq)
+    def __str__(self): return str(self.data)
+    def __repr__(self): return repr(self.data)
+    def __int__(self): return int(self.data)
+    def __long__(self): return long(self.data)
+    def __float__(self): return float(self.data)
+    def __complex__(self): return complex(self.data)
+    def __hash__(self): return hash(self.data)
+
+    def __cmp__(self, string):
+        if isinstance(string, UserString):
+            return cmp(self.data, string.data)
+        else:
+            return cmp(self.data, string)
+    def __contains__(self, char):
+        return char in self.data
+
+    def __len__(self): return len(self.data)
+    def __getitem__(self, index): return self.__class__(self.data[index])
+    def __getslice__(self, start, end):
+        start = max(start, 0); end = max(end, 0)
+        return self.__class__(self.data[start:end])
+
+    def __add__(self, other):
+        if isinstance(other, UserString):
+            return self.__class__(self.data + other.data)
+        elif isinstance(other, StringType) or isinstance(other, UnicodeType):
+            return self.__class__(self.data + other)
+        else:
+            return self.__class__(self.data + str(other))
+    def __radd__(self, other):
+        if isinstance(other, StringType) or isinstance(other, UnicodeType):
+            return self.__class__(other + self.data)
+        else:
+            return self.__class__(str(other) + self.data)
+    def __mul__(self, n):
+        return self.__class__(self.data*n)
+    __rmul__ = __mul__
+
+    # the following methods are defined in alphabetical order:
+    def capitalize(self): return self.__class__(self.data.capitalize())
+    def center(self, width): return self.__class__(self.data.center(width))
+    def count(self, sub, start=0, end=sys.maxint):
+        return self.data.count(sub, start, end)
+    def encode(self, encoding=None, errors=None): # XXX improve this?
+        if encoding:
+            if errors:
+                return self.__class__(self.data.encode(encoding, errors))
+            else:
+                return self.__class__(self.data.encode(encoding))
+        else: 
+            return self.__class__(self.data.encode())
+    def endswith(self, suffix, start=0, end=sys.maxint):
+        return self.data.endswith(suffix, start, end)
+    def expandtabs(self, tabsize=8): 
+        return self.__class__(self.data.expandtabs(tabsize))
+    def find(self, sub, start=0, end=sys.maxint): 
+        return self.data.find(sub, start, end)
+    def index(self, sub, start=0, end=sys.maxint): 
+        return self.data.index(sub, start, end)
+    def isdecimal(self): return self.data.isdecimal()
+    def isdigit(self): return self.data.isdigit()
+    def islower(self): return self.data.islower()
+    def isnumeric(self): return self.data.isnumeric()
+    def isspace(self): return self.data.isspace()
+    def istitle(self): return self.data.istitle()
+    def isupper(self): return self.data.isupper()
+    def join(self, seq): return self.data.join(seq)
+    def ljust(self, width): return self.__class__(self.data.ljust(width))
+    def lower(self): return self.__class__(self.data.lower())
+    def lstrip(self): return self.__class__(self.data.lstrip())
+    def replace(self, old, new, maxsplit=-1): 
+        return self.__class__(self.data.replace(old, new, maxsplit))
+    def rfind(self, sub, start=0, end=sys.maxint): 
+        return self.data.rfind(sub, start, end)
+    def rindex(self, sub, start=0, end=sys.maxint): 
+        return self.data.rindex(sub, start, end)
+    def rjust(self, width): return self.__class__(self.data.rjust(width))
+    def rstrip(self): return self.__class__(self.data.rstrip())
+    def split(self, sep=None, maxsplit=-1): 
+        return self.data.split(sep, maxsplit)
+    def splitlines(self, maxsplit=-1): return self.data.splitlines(maxsplit)
+    def startswith(self, prefix, start=0, end=sys.maxint): 
+        return self.data.startswith(prefix, start, end)
+    def strip(self): return self.__class__(self.data.strip())
+    def swapcase(self): return self.__class__(self.data.swapcase())
+    def title(self): return self.__class__(self.data.title())
+    def translate(self, table, deletechars=""): 
+        return self.__class__(self.data.translate(table, deletechars))
+    def upper(self): return self.__class__(self.data.upper())
+
+class MutableString(UserString):
+    """mutable string objects
+
+    Python strings are immutable objects.  This has the advantage, that
+    strings may be used as dictionary keys.  If this property isn't needed
+    and you insist on changing string values in place instead, you may cheat
+    and use MutableString.
+
+    But the purpose of this class is an educational one: to prevent
+    people from inventing their own mutable string class derived
+    from UserString and than forget thereby to remove (override) the
+    __hash__ method inherited from ^UserString.  This would lead to
+    errors that would be very hard to track down.
+
+    A faster and better solution is to rewrite your program using lists."""
+    def __init__(self, string=""):
+        self.data = string
+    def __hash__(self): 
+        raise TypeError, "unhashable type (it is mutable)"
+    def __setitem__(self, index, sub):
+        if index < 0 or index >= len(self.data): raise IndexError
+        self.data = self.data[:index] + sub + self.data[index+1:]
+    def __delitem__(self, index):
+        if index < 0 or index >= len(self.data): raise IndexError
+        self.data = self.data[:index] + self.data[index+1:]
+    def __setslice__(self, start, end, sub):
+        start = max(start, 0); end = max(end, 0)
+        if isinstance(sub, UserString):
+            self.data = self.data[:start]+sub.data+self.data[end:]
+        elif isinstance(sub, StringType) or isinstance(sub, UnicodeType):
+            self.data = self.data[:start]+sub+self.data[end:]
+        else:
+            self.data =  self.data[:start]+str(sub)+self.data[end:]
+    def __delslice__(self, start, end):
+        start = max(start, 0); end = max(end, 0)
+        self.data = self.data[:start] + self.data[end:]
+    def immutable(self):
+        return UserString(self.data)
+    
+if __name__ == "__main__":
+    # execute the regression test to stdout, if called as a script:
+    import os
+    called_in_dir, called_as = os.path.split(sys.argv[0])
+    called_in_dir = os.path.abspath(called_in_dir)
+    called_as, py = os.path.splitext(called_as)
+    sys.path.append(os.path.join(called_in_dir, 'test'))
+    if '-q' in sys.argv:
+        import test_support
+        test_support.verbose = 0
+    __import__('test_' + called_as.lower())
--- a/Lib/test/output/test_userstring
+++ b/Lib/test/output/test_userstring
@ -0,0 +1 @@
+test_userstring
--- a/Lib/test/test_userstring.py
+++ b/Lib/test/test_userstring.py
@ -0,0 +1,227 @@
+#!/usr/bin/env python
+import sys, string
+from test_support import verbose
+# UserString is a wrapper around the native builtin string type.
+# UserString instances should behave similar to builtin string objects.
+# The test cases were in part derived from 'test_string.py'.
+from UserString import UserString
+
+if __name__ == "__main__":
+    verbose = 0
+
+tested_methods = {}
+
+def test(methodname, input, *args):
+    global tested_methods
+    tested_methods[methodname] = 1
+    if verbose:
+        print '%s.%s(%s) ' % (input, methodname, args),
+    u = UserString(input)
+    objects = [input, u, UserString(u)]
+    res = [""] * 3
+    for i in range(3):
+        object = objects[i]
+        try:
+            f = getattr(object, methodname)
+            res[i] = apply(f, args)
+        except:
+            res[i] = sys.exc_type
+    if res[0] != res[1]:
+        if verbose:
+            print 'no'
+        print `input`, f, `res[0]`, "<>", `res[1]`
+    else:
+        if verbose:
+            print 'yes'
+    if res[1] != res[2]:
+        if verbose:
+            print 'no'
+        print `input`, f, `res[1]`, "<>", `res[2]`
+    else:
+        if verbose:
+            print 'yes'
+
+test('capitalize', ' hello ')
+test('capitalize', 'hello ')
+
+test('center', 'foo', 0)
+test('center', 'foo', 3)
+test('center', 'foo', 16)
+
+test('ljust', 'foo', 0)
+test('ljust', 'foo', 3)
+test('ljust', 'foo', 16)
+
+test('rjust', 'foo', 0)
+test('rjust', 'foo', 3)
+test('rjust', 'foo', 16)
+
+test('count', 'abcabcabc', 'abc')
+test('count', 'abcabcabc', 'abc', 1)
+test('count', 'abcabcabc', 'abc', -1)
+test('count', 'abcabcabc', 'abc', 7)
+test('count', 'abcabcabc', 'abc', 0, 3)
+test('count', 'abcabcabc', 'abc', 0, 333)
+
+test('find', 'abcdefghiabc', 'abc')
+test('find', 'abcdefghiabc', 'abc', 1)
+test('find', 'abcdefghiabc', 'def', 4)
+test('rfind', 'abcdefghiabc', 'abc')
+
+test('index', 'abcabcabc', 'abc')
+test('index', 'abcabcabc', 'abc', 1)
+test('index', 'abcabcabc', 'abc', -1)
+test('index', 'abcabcabc', 'abc', 7)
+test('index', 'abcabcabc', 'abc', 0, 3)
+test('index', 'abcabcabc', 'abc', 0, 333)
+
+test('rindex', 'abcabcabc', 'abc')
+test('rindex', 'abcabcabc', 'abc', 1)
+test('rindex', 'abcabcabc', 'abc', -1)
+test('rindex', 'abcabcabc', 'abc', 7)
+test('rindex', 'abcabcabc', 'abc', 0, 3)
+test('rindex', 'abcabcabc', 'abc', 0, 333)
+
+
+test('lower', 'HeLLo')
+test('lower', 'hello')
+test('upper', 'HeLLo')
+test('upper', 'HELLO')
+
+test('title', ' hello ')
+test('title', 'hello ')
+test('title', "fOrMaT thIs aS titLe String")
+test('title', "fOrMaT,thIs-aS*titLe;String")
+test('title', "getInt")
+
+test('expandtabs', 'abc\rab\tdef\ng\thi')
+test('expandtabs', 'abc\rab\tdef\ng\thi', 8)
+test('expandtabs', 'abc\rab\tdef\ng\thi', 4)
+test('expandtabs', 'abc\r\nab\tdef\ng\thi', 4)
+
+test('islower', 'a')
+test('islower', 'A')
+test('islower', '\n')
+test('islower', 'abc')
+test('islower', 'aBc')
+test('islower', 'abc\n')
+
+test('isupper', 'a')
+test('isupper', 'A')
+test('isupper', '\n')
+test('isupper', 'ABC')
+test('isupper', 'AbC')
+test('isupper', 'ABC\n')
+
+test('isdigit', '  0123456789')
+test('isdigit', '56789')
+test('isdigit', '567.89')
+test('isdigit', '0123456789abc')
+
+test('isspace', '')
+test('isspace', ' ')
+test('isspace', ' \t')
+test('isspace', ' \t\f\n')
+
+test('istitle', 'a')
+test('istitle', 'A')
+test('istitle', '\n')
+test('istitle', 'A Titlecased Line')
+test('istitle', 'A\nTitlecased Line')
+test('istitle', 'A Titlecased, Line')
+test('istitle', 'Not a capitalized String')
+test('istitle', 'Not\ta Titlecase String')
+test('istitle', 'Not--a Titlecase String')
+
+test('splitlines', "abc\ndef\n\rghi")
+test('splitlines', "abc\ndef\n\r\nghi")
+test('splitlines', "abc\ndef\r\nghi")
+test('splitlines', "abc\ndef\r\nghi\n")
+test('splitlines', "abc\ndef\r\nghi\n\r")
+test('splitlines', "\nabc\ndef\r\nghi\n\r")
+test('splitlines', "\nabc\ndef\r\nghi\n\r")
+test('splitlines', "\nabc\ndef\r\nghi\n\r")
+
+test('split', 'this is the split function')
+test('split', 'a|b|c|d', '|')
+test('split', 'a|b|c|d', '|', 2)
+test('split', 'a b c d', None, 1)
+test('split', 'a b c d', None, 2)
+test('split', 'a b c d', None, 3)
+test('split', 'a b c d', None, 4)
+test('split', 'a b c d', None, 0)
+test('split', 'a  b  c  d', None, 2)
+test('split', 'a b c d ')
+
+# join now works with any sequence type
+class Sequence:
+    def __init__(self): self.seq = 'wxyz'
+    def __len__(self): return len(self.seq)
+    def __getitem__(self, i): return self.seq[i]
+
+test('join', '', ('a', 'b', 'c', 'd'))
+test('join', '', Sequence())
+test('join', '', 7)
+
+class BadSeq(Sequence):
+    def __init__(self): self.seq = [7, 'hello', 123L]
+
+test('join', '', BadSeq())
+
+test('strip', '   hello   ')
+test('lstrip', '   hello   ')
+test('rstrip', '   hello   ')
+test('strip', 'hello')
+
+test('swapcase', 'HeLLo cOmpUteRs')
+transtable = string.maketrans("abc", "xyz")
+test('translate', 'xyzabcdef', transtable, 'def')
+
+transtable = string.maketrans('a', 'A')
+test('translate', 'abc', transtable)
+test('translate', 'xyz', transtable)
+
+test('replace', 'one!two!three!', '!', '@', 1)
+test('replace', 'one!two!three!', '!', '')
+test('replace', 'one!two!three!', '!', '@', 2)
+test('replace', 'one!two!three!', '!', '@', 3)
+test('replace', 'one!two!three!', '!', '@', 4)
+test('replace', 'one!two!three!', '!', '@', 0)
+test('replace', 'one!two!three!', '!', '@')
+test('replace', 'one!two!three!', 'x', '@')
+test('replace', 'one!two!three!', 'x', '@', 2)
+
+test('startswith', 'hello', 'he')
+test('startswith', 'hello', 'hello')
+test('startswith', 'hello', 'hello world')
+test('startswith', 'hello', '')
+test('startswith', 'hello', 'ello')
+test('startswith', 'hello', 'ello', 1)
+test('startswith', 'hello', 'o', 4)
+test('startswith', 'hello', 'o', 5)
+test('startswith', 'hello', '', 5)
+test('startswith', 'hello', 'lo', 6)
+test('startswith', 'helloworld', 'lowo', 3)
+test('startswith', 'helloworld', 'lowo', 3, 7)
+test('startswith', 'helloworld', 'lowo', 3, 6)
+
+test('endswith', 'hello', 'lo')
+test('endswith', 'hello', 'he')
+test('endswith', 'hello', '')
+test('endswith', 'hello', 'hello world')
+test('endswith', 'helloworld', 'worl')
+test('endswith', 'helloworld', 'worl', 3, 9)
+test('endswith', 'helloworld', 'world', 3, 12)
+test('endswith', 'helloworld', 'lowo', 1, 7)
+test('endswith', 'helloworld', 'lowo', 2, 7)
+test('endswith', 'helloworld', 'lowo', 3, 7)
+test('endswith', 'helloworld', 'lowo', 4, 7)
+test('endswith', 'helloworld', 'lowo', 3, 8)
+test('endswith', 'ab', 'ab', 0, 1)
+test('endswith', 'ab', 'ab', 0, 0)
+
+# TODO: test cases for: int, long, float, complex, +, * and cmp
+s = ""
+for builtin_method in dir(s):
+    if not tested_methods.has_key(builtin_method):
+        print "no regression test case for method '"+builtin_method+"'"