From 67e83886d9143dec4476dc56d1206c55baa45850 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Walter=20D=C3=B6rwald?= Date: Sat, 5 May 2007 12:26:27 +0000 Subject: [PATCH] test_unicode.py passes again 9except for problems with the idna codec. --- Lib/test/string_tests.py | 19 +---- Lib/test/test_unicode.py | 168 +++++++++++++++------------------------ 2 files changed, 68 insertions(+), 119 deletions(-) diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index 116bcf007c8..f169fde0a42 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -642,17 +642,6 @@ class CommonTest(BaseTest): self.checkequal('xyzzyhello', 'xyzzyhelloxyzzy', 'rstrip', 'xyz') self.checkequal('hello', 'hello', 'strip', 'xyz') - # strip/lstrip/rstrip with unicode arg - if test_support.have_unicode: - self.checkequal(str('hello', 'ascii'), 'xyzzyhelloxyzzy', - 'strip', str('xyz', 'ascii')) - self.checkequal(str('helloxyzzy', 'ascii'), 'xyzzyhelloxyzzy', - 'lstrip', str('xyz', 'ascii')) - self.checkequal(str('xyzzyhello', 'ascii'), 'xyzzyhelloxyzzy', - 'rstrip', str('xyz', 'ascii')) - self.checkequal(str('hello', 'ascii'), 'hello', - 'strip', str('xyz', 'ascii')) - self.checkraises(TypeError, 'hello', 'strip', 42, 42) self.checkraises(TypeError, 'hello', 'lstrip', 42, 42) self.checkraises(TypeError, 'hello', 'rstrip', 42, 42) @@ -956,12 +945,8 @@ class MixinStrUnicodeUserStringTest: self.checkequal('w x y z', ' ', 'join', Sequence()) self.checkequal('abc', 'a', 'join', ('abc',)) self.checkequal('z', 'a', 'join', UserList(['z'])) - if test_support.have_unicode: - self.checkequal(str('a.b.c'), str('.'), 'join', ['a', 'b', 'c']) - self.checkequal(str('a.b.c'), '.', 'join', [str('a'), 'b', 'c']) - self.checkequal(str('a.b.c'), '.', 'join', ['a', str('b'), 'c']) - self.checkequal(str('a.b.c'), '.', 'join', ['a', 'b', str('c')]) - self.checkraises(TypeError, '.', 'join', ['a', str('b'), 3]) + self.checkequal('a.b.c', '.', 'join', ['a', 'b', 'c']) + self.checkraises(TypeError, '.', 'join', ['a', 'b', 3]) for i in [5, 25, 125]: self.checkequal(((('a' * i) + '-') * i)[:-1], '-', 'join', ['a' * i] * i) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index ccfa92207e3..8f842f3601b 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -62,21 +62,21 @@ class UnicodeTest( def test_repr(self): if not sys.platform.startswith('java'): # Test basic sanity of repr() - self.assertEqual(repr('abc'), "u'abc'") - self.assertEqual(repr('ab\\c'), "u'ab\\\\c'") - self.assertEqual(repr('ab\\'), "u'ab\\\\'") - self.assertEqual(repr('\\c'), "u'\\\\c'") - self.assertEqual(repr('\\'), "u'\\\\'") - self.assertEqual(repr('\n'), "u'\\n'") - self.assertEqual(repr('\r'), "u'\\r'") - self.assertEqual(repr('\t'), "u'\\t'") - self.assertEqual(repr('\b'), "u'\\x08'") - self.assertEqual(repr("'\""), """u'\\'"'""") - self.assertEqual(repr("'\""), """u'\\'"'""") - self.assertEqual(repr("'"), '''u"'"''') - self.assertEqual(repr('"'), """u'"'""") + self.assertEqual(repr('abc'), "'abc'") + self.assertEqual(repr('ab\\c'), "'ab\\\\c'") + self.assertEqual(repr('ab\\'), "'ab\\\\'") + self.assertEqual(repr('\\c'), "'\\\\c'") + self.assertEqual(repr('\\'), "'\\\\'") + self.assertEqual(repr('\n'), "'\\n'") + self.assertEqual(repr('\r'), "'\\r'") + self.assertEqual(repr('\t'), "'\\t'") + self.assertEqual(repr('\b'), "'\\x08'") + self.assertEqual(repr("'\""), """'\\'"'""") + self.assertEqual(repr("'\""), """'\\'"'""") + self.assertEqual(repr("'"), '''"'"''') + self.assertEqual(repr('"'), """'"'""") latin1repr = ( - "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r" + "'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r" "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a" "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI" "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f" @@ -165,7 +165,7 @@ class UnicodeTest( self.checkequalnofix('xyyx', 'xzx', 'translate', {ord('z'):'yy'}) self.assertRaises(TypeError, 'hello'.translate) - self.assertRaises(TypeError, 'abababc'.translate, {ord('a'):''}) + self.assertRaises(TypeError, 'abababc'.translate, 'abc', 'xyz') def test_split(self): string_tests.CommonTest.test_split(self) @@ -187,10 +187,6 @@ class UnicodeTest( self.checkequalnofix('abcd', '', 'join', ('a', 'b', 'c', 'd')) self.checkequalnofix('w x y z', ' ', 'join', string_tests.Sequence('wxyz')) - def test_strip(self): - string_tests.CommonTest.test_strip(self) - self.assertRaises(UnicodeError, "hello".strip, "\xff") - def test_replace(self): string_tests.CommonTest.test_replace(self) @@ -323,58 +319,27 @@ class UnicodeTest( self.assert_('a' in 'bdab') self.assert_('a' in 'bdaba') self.assert_('a' in 'bdba') - self.assert_('a' in 'bdba') - self.assert_('a' in 'bdba') - self.assert_('a' not in 'bdb') self.assert_('a' not in 'bdb') self.assert_('a' in 'bdba') self.assert_('a' in ('a',1,None)) self.assert_('a' in (1,None,'a')) - self.assert_('a' in (1,None,'a')) self.assert_('a' in ('a',1,None)) self.assert_('a' in (1,None,'a')) - self.assert_('a' in (1,None,'a')) self.assert_('a' not in ('x',1,'y')) self.assert_('a' not in ('x',1,None)) self.assert_('abcd' not in 'abcxxxx') self.assert_('ab' in 'abcd') self.assert_('ab' in 'abc') - self.assert_('ab' in 'abc') self.assert_('ab' in (1,None,'ab')) self.assert_('' in 'abc') - self.assert_('' in 'abc') - - # If the following fails either - # the contains operator does not propagate UnicodeErrors or - # someone has changed the default encoding - self.assertRaises(UnicodeError, 'g\xe2teau'.__contains__, '\xe2') - - self.assert_('' in '') - self.assert_('' in '') self.assert_('' in '') self.assert_('' in 'abc') - self.assert_('' in 'abc') - self.assert_('' in 'abc') - self.assert_('\0' not in 'abc') - self.assert_('\0' not in 'abc') self.assert_('\0' not in 'abc') self.assert_('\0' in '\0abc') - self.assert_('\0' in '\0abc') - self.assert_('\0' in '\0abc') - self.assert_('\0' in 'abc\0') - self.assert_('\0' in 'abc\0') self.assert_('\0' in 'abc\0') self.assert_('a' in '\0abc') - self.assert_('a' in '\0abc') - self.assert_('a' in '\0abc') - self.assert_('asdf' in 'asdf') - self.assert_('asdf' in 'asdf') self.assert_('asdf' in 'asdf') self.assert_('asdf' not in 'asd') - self.assert_('asdf' not in 'asd') - self.assert_('asdf' not in 'asd') - self.assert_('asdf' not in '') - self.assert_('asdf' not in '') self.assert_('asdf' not in '') self.assertRaises(TypeError, "abc".__contains__) @@ -389,7 +354,7 @@ class UnicodeTest( self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", -1, -2, 3.57), 'abc, abc, -1, -2.000000, 3.57') self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", -1, -2, 1003.57), 'abc, abc, -1, -2.000000, 1003.57') if not sys.platform.startswith('java'): - self.assertEqual("%r, %r" % ("abc", "abc"), "u'abc', 'abc'") + self.assertEqual("%r, %r" % (b"abc", "abc"), "b'abc', 'abc'") self.assertEqual("%(x)s, %(y)s" % {'x':"abc", 'y':"def"}, 'abc, def') self.assertEqual("%(x)s, %(\xfc)s" % {'x':"abc", '\xfc':"def"}, 'abc, def') @@ -495,7 +460,7 @@ class UnicodeTest( ) self.assertEqual( - str('strings are decoded to unicode', 'utf-8', 'strict'), + str(b'strings are decoded to unicode', 'utf-8', 'strict'), 'strings are decoded to unicode' ) @@ -513,38 +478,38 @@ class UnicodeTest( def test_codecs_utf7(self): utfTests = [ - ('A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example - ('Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example - ('\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example - ('Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example - ('+', '+-'), - ('+-', '+--'), - ('+?', '+-?'), - ('\?', '+AFw?'), - ('+?', '+-?'), - (r'\\?', '+AFwAXA?'), - (r'\\\?', '+AFwAXABc?'), - (r'++--', '+-+---') + ('A\u2262\u0391.', b'A+ImIDkQ.'), # RFC2152 example + ('Hi Mom -\u263a-!', b'Hi Mom -+Jjo--!'), # RFC2152 example + ('\u65E5\u672C\u8A9E', b'+ZeVnLIqe-'), # RFC2152 example + ('Item 3 is \u00a31.', b'Item 3 is +AKM-1.'), # RFC2152 example + ('+', b'+-'), + ('+-', b'+--'), + ('+?', b'+-?'), + ('\?', b'+AFw?'), + ('+?', b'+-?'), + (r'\\?', b'+AFwAXA?'), + (r'\\\?', b'+AFwAXABc?'), + (r'++--', b'+-+---') ] for (x, y) in utfTests: self.assertEqual(x.encode('utf-7'), y) # surrogates not supported - self.assertRaises(UnicodeError, str, '+3ADYAA-', 'utf-7') + self.assertRaises(UnicodeError, str, b'+3ADYAA-', 'utf-7') - self.assertEqual(str('+3ADYAA-', 'utf-7', 'replace'), '\ufffd') + self.assertEqual(str(b'+3ADYAA-', 'utf-7', 'replace'), '\ufffd') def test_codecs_utf8(self): - self.assertEqual(''.encode('utf-8'), '') - self.assertEqual('\u20ac'.encode('utf-8'), '\xe2\x82\xac') - self.assertEqual('\ud800\udc02'.encode('utf-8'), '\xf0\x90\x80\x82') - self.assertEqual('\ud84d\udc56'.encode('utf-8'), '\xf0\xa3\x91\x96') - self.assertEqual('\ud800'.encode('utf-8'), '\xed\xa0\x80') - self.assertEqual('\udc00'.encode('utf-8'), '\xed\xb0\x80') + self.assertEqual(''.encode('utf-8'), b'') + self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac') + self.assertEqual('\ud800\udc02'.encode('utf-8'), b'\xf0\x90\x80\x82') + self.assertEqual('\ud84d\udc56'.encode('utf-8'), b'\xf0\xa3\x91\x96') + self.assertEqual('\ud800'.encode('utf-8'), b'\xed\xa0\x80') + self.assertEqual('\udc00'.encode('utf-8'), b'\xed\xb0\x80') self.assertEqual( ('\ud800\udc02'*1000).encode('utf-8'), - '\xf0\x90\x80\x82'*1000 + b'\xf0\x90\x80\x82'*1000 ) self.assertEqual( '\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f' @@ -553,22 +518,22 @@ class UnicodeTest( '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067' '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das' ' Nunstuck git und'.encode('utf-8'), - '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81' - '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3' - '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe' - '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83' - '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8' - '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81' - '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81' - '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3' - '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf' - '\xe3\x80\x8cWenn ist das Nunstuck git und' + b'\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81' + b'\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3' + b'\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe' + b'\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83' + b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8' + b'\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81' + b'\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81' + b'\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3' + b'\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf' + b'\xe3\x80\x8cWenn ist das Nunstuck git und' ) # UTF-8 specific decoding tests - self.assertEqual(str('\xf0\xa3\x91\x96', 'utf-8'), '\U00023456' ) - self.assertEqual(str('\xf0\x90\x80\x82', 'utf-8'), '\U00010002' ) - self.assertEqual(str('\xe2\x82\xac', 'utf-8'), '\u20ac' ) + self.assertEqual(str(b'\xf0\xa3\x91\x96', 'utf-8'), '\U00023456' ) + self.assertEqual(str(b'\xf0\x90\x80\x82', 'utf-8'), '\U00010002' ) + self.assertEqual(str(b'\xe2\x82\xac', 'utf-8'), '\u20ac' ) # Other possible utf-8 test cases: # * strict decoding testing for all of the @@ -582,14 +547,14 @@ class UnicodeTest( # Error handling (encoding) self.assertRaises(UnicodeError, 'Andr\202 x'.encode, 'ascii') self.assertRaises(UnicodeError, 'Andr\202 x'.encode, 'ascii','strict') - self.assertEqual('Andr\202 x'.encode('ascii','ignore'), "Andr x") - self.assertEqual('Andr\202 x'.encode('ascii','replace'), "Andr? x") + self.assertEqual('Andr\202 x'.encode('ascii','ignore'), b"Andr x") + self.assertEqual('Andr\202 x'.encode('ascii','replace'), b"Andr? x") # Error handling (decoding) - self.assertRaises(UnicodeError, str, 'Andr\202 x', 'ascii') - self.assertRaises(UnicodeError, str, 'Andr\202 x', 'ascii','strict') - self.assertEqual(str('Andr\202 x','ascii','ignore'), "Andr x") - self.assertEqual(str('Andr\202 x','ascii','replace'), 'Andr\uFFFD x') + self.assertRaises(UnicodeError, str, b'Andr\202 x', 'ascii') + self.assertRaises(UnicodeError, str, b'Andr\202 x', 'ascii', 'strict') + self.assertEqual(str(b'Andr\202 x', 'ascii', 'ignore'), "Andr x") + self.assertEqual(str(b'Andr\202 x', 'ascii', 'replace'), 'Andr\uFFFD x') # Error handling (unknown character names) self.assertEqual("\\N{foo}xx".decode("unicode-escape", "ignore"), "xx") @@ -618,13 +583,13 @@ class UnicodeTest( def test_codecs(self): # Encoding - self.assertEqual('hello'.encode('ascii'), 'hello') - self.assertEqual('hello'.encode('utf-7'), 'hello') - self.assertEqual('hello'.encode('utf-8'), 'hello') - self.assertEqual('hello'.encode('utf8'), 'hello') - self.assertEqual('hello'.encode('utf-16-le'), 'h\000e\000l\000l\000o\000') - self.assertEqual('hello'.encode('utf-16-be'), '\000h\000e\000l\000l\000o') - self.assertEqual('hello'.encode('latin-1'), 'hello') + self.assertEqual('hello'.encode('ascii'), b'hello') + self.assertEqual('hello'.encode('utf-7'), b'hello') + self.assertEqual('hello'.encode('utf-8'), b'hello') + self.assertEqual('hello'.encode('utf8'), b'hello') + self.assertEqual('hello'.encode('utf-16-le'), b'h\000e\000l\000l\000o\000') + self.assertEqual('hello'.encode('utf-16-be'), b'\000h\000e\000l\000l\000o') + self.assertEqual('hello'.encode('latin-1'), b'hello') # Roundtrip safety for BMP (just the first 1024 chars) for c in xrange(1024): @@ -663,7 +628,7 @@ class UnicodeTest( def test_codecs_charmap(self): # 0-127 - s = ''.join(map(chr, xrange(128))) + s = bytes(xrange(128)) for encoding in ( 'cp037', 'cp1026', 'cp437', 'cp500', 'cp737', 'cp775', 'cp850', @@ -691,7 +656,7 @@ class UnicodeTest( self.assertEqual(str(s, encoding).encode(encoding), s) # 128-255 - s = ''.join(map(chr, xrange(128, 256))) + s = bytes(xrange(128, 256)) for encoding in ( 'cp037', 'cp1026', 'cp437', 'cp500', 'cp737', 'cp775', 'cp850', @@ -805,7 +770,6 @@ class UnicodeTest( self.assertEqual(str(Foo6("bar")), "foou") self.assertEqual(str(Foo7("bar")), "foou") self.assertEqual(str(Foo8("foo")), "foofoo") - self.assertEqual(str(Foo9("foo")), "string") self.assertEqual(str(Foo9("foo")), "not unicode") def test_unicode_repr(self):