test_unicode.py passes again 9except for problems

with the idna codec.
This commit is contained in:
Walter Dörwald 2007-05-05 12:26:27 +00:00
parent acaa5a16d6
commit 67e83886d9
2 changed files with 68 additions and 119 deletions

View File

@ -642,17 +642,6 @@ class CommonTest(BaseTest):
self.checkequal('xyzzyhello', 'xyzzyhelloxyzzy', 'rstrip', 'xyz')
self.checkequal('hello', 'hello', 'strip', 'xyz')
# strip/lstrip/rstrip with unicode arg
if test_support.have_unicode:
self.checkequal(str('hello', 'ascii'), 'xyzzyhelloxyzzy',
'strip', str('xyz', 'ascii'))
self.checkequal(str('helloxyzzy', 'ascii'), 'xyzzyhelloxyzzy',
'lstrip', str('xyz', 'ascii'))
self.checkequal(str('xyzzyhello', 'ascii'), 'xyzzyhelloxyzzy',
'rstrip', str('xyz', 'ascii'))
self.checkequal(str('hello', 'ascii'), 'hello',
'strip', str('xyz', 'ascii'))
self.checkraises(TypeError, 'hello', 'strip', 42, 42)
self.checkraises(TypeError, 'hello', 'lstrip', 42, 42)
self.checkraises(TypeError, 'hello', 'rstrip', 42, 42)
@ -956,12 +945,8 @@ class MixinStrUnicodeUserStringTest:
self.checkequal('w x y z', ' ', 'join', Sequence())
self.checkequal('abc', 'a', 'join', ('abc',))
self.checkequal('z', 'a', 'join', UserList(['z']))
if test_support.have_unicode:
self.checkequal(str('a.b.c'), str('.'), 'join', ['a', 'b', 'c'])
self.checkequal(str('a.b.c'), '.', 'join', [str('a'), 'b', 'c'])
self.checkequal(str('a.b.c'), '.', 'join', ['a', str('b'), 'c'])
self.checkequal(str('a.b.c'), '.', 'join', ['a', 'b', str('c')])
self.checkraises(TypeError, '.', 'join', ['a', str('b'), 3])
self.checkequal('a.b.c', '.', 'join', ['a', 'b', 'c'])
self.checkraises(TypeError, '.', 'join', ['a', 'b', 3])
for i in [5, 25, 125]:
self.checkequal(((('a' * i) + '-') * i)[:-1], '-', 'join',
['a' * i] * i)

View File

@ -62,21 +62,21 @@ class UnicodeTest(
def test_repr(self):
if not sys.platform.startswith('java'):
# Test basic sanity of repr()
self.assertEqual(repr('abc'), "u'abc'")
self.assertEqual(repr('ab\\c'), "u'ab\\\\c'")
self.assertEqual(repr('ab\\'), "u'ab\\\\'")
self.assertEqual(repr('\\c'), "u'\\\\c'")
self.assertEqual(repr('\\'), "u'\\\\'")
self.assertEqual(repr('\n'), "u'\\n'")
self.assertEqual(repr('\r'), "u'\\r'")
self.assertEqual(repr('\t'), "u'\\t'")
self.assertEqual(repr('\b'), "u'\\x08'")
self.assertEqual(repr("'\""), """u'\\'"'""")
self.assertEqual(repr("'\""), """u'\\'"'""")
self.assertEqual(repr("'"), '''u"'"''')
self.assertEqual(repr('"'), """u'"'""")
self.assertEqual(repr('abc'), "'abc'")
self.assertEqual(repr('ab\\c'), "'ab\\\\c'")
self.assertEqual(repr('ab\\'), "'ab\\\\'")
self.assertEqual(repr('\\c'), "'\\\\c'")
self.assertEqual(repr('\\'), "'\\\\'")
self.assertEqual(repr('\n'), "'\\n'")
self.assertEqual(repr('\r'), "'\\r'")
self.assertEqual(repr('\t'), "'\\t'")
self.assertEqual(repr('\b'), "'\\x08'")
self.assertEqual(repr("'\""), """'\\'"'""")
self.assertEqual(repr("'\""), """'\\'"'""")
self.assertEqual(repr("'"), '''"'"''')
self.assertEqual(repr('"'), """'"'""")
latin1repr = (
"u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
"'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
"\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
"\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
"JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
@ -165,7 +165,7 @@ class UnicodeTest(
self.checkequalnofix('xyyx', 'xzx', 'translate', {ord('z'):'yy'})
self.assertRaises(TypeError, 'hello'.translate)
self.assertRaises(TypeError, 'abababc'.translate, {ord('a'):''})
self.assertRaises(TypeError, 'abababc'.translate, 'abc', 'xyz')
def test_split(self):
string_tests.CommonTest.test_split(self)
@ -187,10 +187,6 @@ class UnicodeTest(
self.checkequalnofix('abcd', '', 'join', ('a', 'b', 'c', 'd'))
self.checkequalnofix('w x y z', ' ', 'join', string_tests.Sequence('wxyz'))
def test_strip(self):
string_tests.CommonTest.test_strip(self)
self.assertRaises(UnicodeError, "hello".strip, "\xff")
def test_replace(self):
string_tests.CommonTest.test_replace(self)
@ -323,58 +319,27 @@ class UnicodeTest(
self.assert_('a' in 'bdab')
self.assert_('a' in 'bdaba')
self.assert_('a' in 'bdba')
self.assert_('a' in 'bdba')
self.assert_('a' in 'bdba')
self.assert_('a' not in 'bdb')
self.assert_('a' not in 'bdb')
self.assert_('a' in 'bdba')
self.assert_('a' in ('a',1,None))
self.assert_('a' in (1,None,'a'))
self.assert_('a' in (1,None,'a'))
self.assert_('a' in ('a',1,None))
self.assert_('a' in (1,None,'a'))
self.assert_('a' in (1,None,'a'))
self.assert_('a' not in ('x',1,'y'))
self.assert_('a' not in ('x',1,None))
self.assert_('abcd' not in 'abcxxxx')
self.assert_('ab' in 'abcd')
self.assert_('ab' in 'abc')
self.assert_('ab' in 'abc')
self.assert_('ab' in (1,None,'ab'))
self.assert_('' in 'abc')
self.assert_('' in 'abc')
# If the following fails either
# the contains operator does not propagate UnicodeErrors or
# someone has changed the default encoding
self.assertRaises(UnicodeError, 'g\xe2teau'.__contains__, '\xe2')
self.assert_('' in '')
self.assert_('' in '')
self.assert_('' in '')
self.assert_('' in 'abc')
self.assert_('' in 'abc')
self.assert_('' in 'abc')
self.assert_('\0' not in 'abc')
self.assert_('\0' not in 'abc')
self.assert_('\0' not in 'abc')
self.assert_('\0' in '\0abc')
self.assert_('\0' in '\0abc')
self.assert_('\0' in '\0abc')
self.assert_('\0' in 'abc\0')
self.assert_('\0' in 'abc\0')
self.assert_('\0' in 'abc\0')
self.assert_('a' in '\0abc')
self.assert_('a' in '\0abc')
self.assert_('a' in '\0abc')
self.assert_('asdf' in 'asdf')
self.assert_('asdf' in 'asdf')
self.assert_('asdf' in 'asdf')
self.assert_('asdf' not in 'asd')
self.assert_('asdf' not in 'asd')
self.assert_('asdf' not in 'asd')
self.assert_('asdf' not in '')
self.assert_('asdf' not in '')
self.assert_('asdf' not in '')
self.assertRaises(TypeError, "abc".__contains__)
@ -389,7 +354,7 @@ class UnicodeTest(
self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", -1, -2, 3.57), 'abc, abc, -1, -2.000000, 3.57')
self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", -1, -2, 1003.57), 'abc, abc, -1, -2.000000, 1003.57')
if not sys.platform.startswith('java'):
self.assertEqual("%r, %r" % ("abc", "abc"), "u'abc', 'abc'")
self.assertEqual("%r, %r" % (b"abc", "abc"), "b'abc', 'abc'")
self.assertEqual("%(x)s, %(y)s" % {'x':"abc", 'y':"def"}, 'abc, def')
self.assertEqual("%(x)s, %(\xfc)s" % {'x':"abc", '\xfc':"def"}, 'abc, def')
@ -495,7 +460,7 @@ class UnicodeTest(
)
self.assertEqual(
str('strings are decoded to unicode', 'utf-8', 'strict'),
str(b'strings are decoded to unicode', 'utf-8', 'strict'),
'strings are decoded to unicode'
)
@ -513,38 +478,38 @@ class UnicodeTest(
def test_codecs_utf7(self):
utfTests = [
('A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
('Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
('\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
('Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
('+', '+-'),
('+-', '+--'),
('+?', '+-?'),
('\?', '+AFw?'),
('+?', '+-?'),
(r'\\?', '+AFwAXA?'),
(r'\\\?', '+AFwAXABc?'),
(r'++--', '+-+---')
('A\u2262\u0391.', b'A+ImIDkQ.'), # RFC2152 example
('Hi Mom -\u263a-!', b'Hi Mom -+Jjo--!'), # RFC2152 example
('\u65E5\u672C\u8A9E', b'+ZeVnLIqe-'), # RFC2152 example
('Item 3 is \u00a31.', b'Item 3 is +AKM-1.'), # RFC2152 example
('+', b'+-'),
('+-', b'+--'),
('+?', b'+-?'),
('\?', b'+AFw?'),
('+?', b'+-?'),
(r'\\?', b'+AFwAXA?'),
(r'\\\?', b'+AFwAXABc?'),
(r'++--', b'+-+---')
]
for (x, y) in utfTests:
self.assertEqual(x.encode('utf-7'), y)
# surrogates not supported
self.assertRaises(UnicodeError, str, '+3ADYAA-', 'utf-7')
self.assertRaises(UnicodeError, str, b'+3ADYAA-', 'utf-7')
self.assertEqual(str('+3ADYAA-', 'utf-7', 'replace'), '\ufffd')
self.assertEqual(str(b'+3ADYAA-', 'utf-7', 'replace'), '\ufffd')
def test_codecs_utf8(self):
self.assertEqual(''.encode('utf-8'), '')
self.assertEqual('\u20ac'.encode('utf-8'), '\xe2\x82\xac')
self.assertEqual('\ud800\udc02'.encode('utf-8'), '\xf0\x90\x80\x82')
self.assertEqual('\ud84d\udc56'.encode('utf-8'), '\xf0\xa3\x91\x96')
self.assertEqual('\ud800'.encode('utf-8'), '\xed\xa0\x80')
self.assertEqual('\udc00'.encode('utf-8'), '\xed\xb0\x80')
self.assertEqual(''.encode('utf-8'), b'')
self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac')
self.assertEqual('\ud800\udc02'.encode('utf-8'), b'\xf0\x90\x80\x82')
self.assertEqual('\ud84d\udc56'.encode('utf-8'), b'\xf0\xa3\x91\x96')
self.assertEqual('\ud800'.encode('utf-8'), b'\xed\xa0\x80')
self.assertEqual('\udc00'.encode('utf-8'), b'\xed\xb0\x80')
self.assertEqual(
('\ud800\udc02'*1000).encode('utf-8'),
'\xf0\x90\x80\x82'*1000
b'\xf0\x90\x80\x82'*1000
)
self.assertEqual(
'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
@ -553,22 +518,22 @@ class UnicodeTest(
'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
' Nunstuck git und'.encode('utf-8'),
'\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
'\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
'\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
'\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
'\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
'\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
'\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
'\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
'\xe3\x80\x8cWenn ist das Nunstuck git und'
b'\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
b'\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
b'\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
b'\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
b'\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
b'\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
b'\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
b'\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
b'\xe3\x80\x8cWenn ist das Nunstuck git und'
)
# UTF-8 specific decoding tests
self.assertEqual(str('\xf0\xa3\x91\x96', 'utf-8'), '\U00023456' )
self.assertEqual(str('\xf0\x90\x80\x82', 'utf-8'), '\U00010002' )
self.assertEqual(str('\xe2\x82\xac', 'utf-8'), '\u20ac' )
self.assertEqual(str(b'\xf0\xa3\x91\x96', 'utf-8'), '\U00023456' )
self.assertEqual(str(b'\xf0\x90\x80\x82', 'utf-8'), '\U00010002' )
self.assertEqual(str(b'\xe2\x82\xac', 'utf-8'), '\u20ac' )
# Other possible utf-8 test cases:
# * strict decoding testing for all of the
@ -582,14 +547,14 @@ class UnicodeTest(
# Error handling (encoding)
self.assertRaises(UnicodeError, 'Andr\202 x'.encode, 'ascii')
self.assertRaises(UnicodeError, 'Andr\202 x'.encode, 'ascii','strict')
self.assertEqual('Andr\202 x'.encode('ascii','ignore'), "Andr x")
self.assertEqual('Andr\202 x'.encode('ascii','replace'), "Andr? x")
self.assertEqual('Andr\202 x'.encode('ascii','ignore'), b"Andr x")
self.assertEqual('Andr\202 x'.encode('ascii','replace'), b"Andr? x")
# Error handling (decoding)
self.assertRaises(UnicodeError, str, 'Andr\202 x', 'ascii')
self.assertRaises(UnicodeError, str, 'Andr\202 x', 'ascii','strict')
self.assertEqual(str('Andr\202 x','ascii','ignore'), "Andr x")
self.assertEqual(str('Andr\202 x','ascii','replace'), 'Andr\uFFFD x')
self.assertRaises(UnicodeError, str, b'Andr\202 x', 'ascii')
self.assertRaises(UnicodeError, str, b'Andr\202 x', 'ascii', 'strict')
self.assertEqual(str(b'Andr\202 x', 'ascii', 'ignore'), "Andr x")
self.assertEqual(str(b'Andr\202 x', 'ascii', 'replace'), 'Andr\uFFFD x')
# Error handling (unknown character names)
self.assertEqual("\\N{foo}xx".decode("unicode-escape", "ignore"), "xx")
@ -618,13 +583,13 @@ class UnicodeTest(
def test_codecs(self):
# Encoding
self.assertEqual('hello'.encode('ascii'), 'hello')
self.assertEqual('hello'.encode('utf-7'), 'hello')
self.assertEqual('hello'.encode('utf-8'), 'hello')
self.assertEqual('hello'.encode('utf8'), 'hello')
self.assertEqual('hello'.encode('utf-16-le'), 'h\000e\000l\000l\000o\000')
self.assertEqual('hello'.encode('utf-16-be'), '\000h\000e\000l\000l\000o')
self.assertEqual('hello'.encode('latin-1'), 'hello')
self.assertEqual('hello'.encode('ascii'), b'hello')
self.assertEqual('hello'.encode('utf-7'), b'hello')
self.assertEqual('hello'.encode('utf-8'), b'hello')
self.assertEqual('hello'.encode('utf8'), b'hello')
self.assertEqual('hello'.encode('utf-16-le'), b'h\000e\000l\000l\000o\000')
self.assertEqual('hello'.encode('utf-16-be'), b'\000h\000e\000l\000l\000o')
self.assertEqual('hello'.encode('latin-1'), b'hello')
# Roundtrip safety for BMP (just the first 1024 chars)
for c in xrange(1024):
@ -663,7 +628,7 @@ class UnicodeTest(
def test_codecs_charmap(self):
# 0-127
s = ''.join(map(chr, xrange(128)))
s = bytes(xrange(128))
for encoding in (
'cp037', 'cp1026',
'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
@ -691,7 +656,7 @@ class UnicodeTest(
self.assertEqual(str(s, encoding).encode(encoding), s)
# 128-255
s = ''.join(map(chr, xrange(128, 256)))
s = bytes(xrange(128, 256))
for encoding in (
'cp037', 'cp1026',
'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
@ -805,7 +770,6 @@ class UnicodeTest(
self.assertEqual(str(Foo6("bar")), "foou")
self.assertEqual(str(Foo7("bar")), "foou")
self.assertEqual(str(Foo8("foo")), "foofoo")
self.assertEqual(str(Foo9("foo")), "string")
self.assertEqual(str(Foo9("foo")), "not unicode")
def test_unicode_repr(self):