""" Test script for the Unicode implementation. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. """ from test_support import verbose import sys def test(method, input, output, *args): if verbose: print '%s.%s%s =? %s... ' % (repr(input), method, args, output), try: f = getattr(input, method) value = apply(f, args) except: value = sys.exc_type exc = sys.exc_info() else: exc = None if value != output: if verbose: print 'no' print '*',f, `input`, `output`, `value` if exc: print ' value == %s: %s' % (exc[:2]) else: if verbose: print 'yes' test('capitalize', u' hello ', u' hello ') test('capitalize', u'hello ', u'Hello ') test('title', u' hello ', u' Hello ') test('title', u'hello ', u'Hello ') test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String') test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String') test('title', u"getInt", u'Getint') test('find', u'abcdefghiabc', 0, u'abc') test('find', u'abcdefghiabc', 9, u'abc', 1) test('find', u'abcdefghiabc', -1, u'def', 4) test('rfind', u'abcdefghiabc', 9, u'abc') test('lower', u'HeLLo', u'hello') test('lower', u'hello', u'hello') test('upper', u'HeLLo', u'HELLO') test('upper', u'HELLO', u'HELLO') if 0: transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377' test('maketrans', u'abc', transtable, u'xyz') test('maketrans', u'abc', ValueError, u'xyzq') test('split', u'this is the split function', [u'this', u'is', u'the', u'split', u'function']) test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|') test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2) test('split', u'a b c d', [u'a', u'b c d'], None, 1) test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2) test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3) test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4) test('split', u'a b c d', [u'a b c d'], None, 0) test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2) test('split', u'a b c d ', [u'a', u'b', u'c', u'd']) # join now works with any sequence type class Sequence: def __init__(self): self.seq = 'wxyz' def __len__(self): return len(self.seq) def __getitem__(self, i): return self.seq[i] test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd']) test('join', u'', u'abcd', (u'a', u'b', u'c', u'd')) test('join', u' ', u'w x y z', Sequence()) test('join', u' ', TypeError, 7) class BadSeq(Sequence): def __init__(self): self.seq = [7, u'hello', 123L] test('join', u' ', TypeError, BadSeq()) result = u'' for i in range(10): if i > 0: result = result + u':' result = result + u'x'*10 test('join', u':', result, [u'x' * 10] * 10) test('join', u':', result, (u'x' * 10,) * 10) test('strip', u' hello ', u'hello') test('lstrip', u' hello ', u'hello ') test('rstrip', u' hello ', u' hello') test('strip', u'hello', u'hello') test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS') if 0: test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def') table = string.maketrans('a', u'A') test('translate', u'abc', u'Abc', table) test('translate', u'xyz', u'xyz', table) test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1) test('replace', u'one!two!three!', u'onetwothree', '!', '') test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2) test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3) test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4) test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0) test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@') test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@') test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2) test('startswith', u'hello', 1, u'he') test('startswith', u'hello', 1, u'hello') test('startswith', u'hello', 0, u'hello world') test('startswith', u'hello', 1, u'') test('startswith', u'hello', 0, u'ello') test('startswith', u'hello', 1, u'ello', 1) test('startswith', u'hello', 1, u'o', 4) test('startswith', u'hello', 0, u'o', 5) test('startswith', u'hello', 1, u'', 5) test('startswith', u'hello', 0, u'lo', 6) test('startswith', u'helloworld', 1, u'lowo', 3) test('startswith', u'helloworld', 1, u'lowo', 3, 7) test('startswith', u'helloworld', 0, u'lowo', 3, 6) test('endswith', u'hello', 1, u'lo') test('endswith', u'hello', 0, u'he') test('endswith', u'hello', 1, u'') test('endswith', u'hello', 0, u'hello world') test('endswith', u'helloworld', 0, u'worl') test('endswith', u'helloworld', 1, u'worl', 3, 9) test('endswith', u'helloworld', 1, u'world', 3, 12) test('endswith', u'helloworld', 1, u'lowo', 1, 7) test('endswith', u'helloworld', 1, u'lowo', 2, 7) test('endswith', u'helloworld', 1, u'lowo', 3, 7) test('endswith', u'helloworld', 0, u'lowo', 4, 7) test('endswith', u'helloworld', 0, u'lowo', 3, 8) test('endswith', u'ab', 0, u'ab', 0, 1) test('endswith', u'ab', 0, u'ab', 0, 0) test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi') test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8) test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4) test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4) if 0: test('capwords', u'abc def ghi', u'Abc Def Ghi') test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi') test('capwords', u'abc\t def \nghi', u'Abc Def Ghi') # Comparisons: print 'Testing Unicode comparisons...', assert u'abc' == 'abc' assert 'abc' == u'abc' assert u'abc' == u'abc' assert u'abcd' > 'abc' assert 'abcd' > u'abc' assert u'abcd' > u'abc' assert u'abc' < 'abcd' assert 'abc' < u'abcd' assert u'abc' < u'abcd' print 'done.' test('ljust', u'abc', u'abc ', 10) test('rjust', u'abc', u' abc', 10) test('center', u'abc', u' abc ', 10) test('ljust', u'abc', u'abc ', 6) test('rjust', u'abc', u' abc', 6) test('center', u'abc', u' abc ', 6) test('ljust', u'abc', u'abc', 2) test('rjust', u'abc', u'abc', 2) test('center', u'abc', u'abc', 2) test('islower', u'a', 1) test('islower', u'A', 0) test('islower', u'\n', 0) test('islower', u'\u1FFc', 0) test('islower', u'abc', 1) test('islower', u'aBc', 0) test('islower', u'abc\n', 1) test('isupper', u'a', 0) test('isupper', u'A', 1) test('isupper', u'\n', 0) test('isupper', u'\u1FFc', 0) test('isupper', u'ABC', 1) test('isupper', u'AbC', 0) test('isupper', u'ABC\n', 1) test('istitle', u'a', 0) test('istitle', u'A', 1) test('istitle', u'\n', 0) test('istitle', u'\u1FFc', 1) test('istitle', u'A Titlecased Line', 1) test('istitle', u'A\nTitlecased Line', 1) test('istitle', u'A Titlecased, Line', 1) test('istitle', u'Greek \u1FFcitlecases ...', 1) test('istitle', u'Not a capitalized String', 0) test('istitle', u'Not\ta Titlecase String', 0) test('istitle', u'Not--a Titlecase String', 0) test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi']) test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi']) test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi']) test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi']) test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u'']) test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u'']) test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc\012def\015\012ghi\012\015'], 1) test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def\015\012ghi\012\015'], 2) test('translate', u"abababc", u'bbbc', {ord('a'):None}) test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')}) test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'}) # Contains: print 'Testing Unicode contains method...', assert ('a' in 'abdb') == 1 assert ('a' in 'bdab') == 1 assert ('a' in 'bdaba') == 1 assert ('a' in 'bdba') == 1 assert ('a' in u'bdba') == 1 assert (u'a' in u'bdba') == 1 assert (u'a' in u'bdb') == 0 assert (u'a' in 'bdb') == 0 assert (u'a' in 'bdba') == 1 print 'done.' # Formatting: print 'Testing Unicode formatting strings...', assert u"%s, %s" % (u"abc", "abc") == u'abc, abc' assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00' assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00' assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50' assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57' assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57' assert u"%c" % (u"abc",) == u'a' assert u"%c" % ("abc",) == u'a' assert u"%c" % (34,) == u'"' assert u"%c" % (36,) == u'$' assert u"%r, %r" % (u"abc", "abc") == u"u'abc', 'abc'" assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def' assert u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"} == u'abc, def' print 'done.' # Test Unicode database APIs try: import unicodedata except ImportError: pass else: print 'Testing unicodedata module...', assert unicodedata.digit(u'A',None) is None assert unicodedata.digit(u'9') == 9 assert unicodedata.digit(u'\u215b',None) is None assert unicodedata.digit(u'\u2468') == 9 assert unicodedata.numeric(u'A',None) is None assert unicodedata.numeric(u'9') == 9 assert unicodedata.numeric(u'\u215b') == 0.125 assert unicodedata.numeric(u'\u2468') == 9.0 assert unicodedata.decimal(u'A',None) is None assert unicodedata.decimal(u'9') == 9 assert unicodedata.decimal(u'\u215b',None) is None assert unicodedata.decimal(u'\u2468',None) is None assert unicodedata.category(u'\uFFFE') == 'Cn' assert unicodedata.category(u'a') == 'Ll' assert unicodedata.category(u'A') == 'Lu' assert unicodedata.bidirectional(u'\uFFFE') == '' assert unicodedata.bidirectional(u' ') == 'WS' assert unicodedata.bidirectional(u'A') == 'L' assert unicodedata.decomposition(u'\uFFFE') == '' assert unicodedata.decomposition(u'\u00bc') == ' 0031 2044 0034' assert unicodedata.mirrored(u'\uFFFE') == 0 assert unicodedata.mirrored(u'a') == 0 assert unicodedata.mirrored(u'\u2201') == 1 assert unicodedata.combining(u'\uFFFE') == 0 assert unicodedata.combining(u'a') == 0 assert unicodedata.combining(u'\u20e1') == 230 print 'done.' # Test builtin codecs print 'Testing builtin codecs...', assert unicode('hello','ascii') == u'hello' assert unicode('hello','utf-8') == u'hello' assert unicode('hello','utf8') == u'hello' assert unicode('hello','latin-1') == u'hello' assert u'hello'.encode('ascii') == 'hello' assert u'hello'.encode('utf-8') == 'hello' assert u'hello'.encode('utf8') == 'hello' assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000' assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o' assert u'hello'.encode('latin-1') == 'hello' u = u''.join(map(unichr, range(1024))) for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be', 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'): assert unicode(u.encode(encoding),encoding) == u u = u''.join(map(unichr, range(256))) for encoding in ('latin-1',): assert unicode(u.encode(encoding),encoding) == u u = u''.join(map(unichr, range(128))) for encoding in ('ascii',): assert unicode(u.encode(encoding),encoding) == u print 'done.'