diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 36c40401770..99ed82d63b3 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -549,6 +549,182 @@ class StreamReaderTest(unittest.TestCase): f = self.reader(self.stream) self.assertEquals(f.readlines(), [u'\ud55c\n', u'\uae00']) +all_unicode_encodings = [ + "ascii", + "base64_codec", + "big5", + "big5hkscs", + "charmap", + "cp037", + "cp1006", + "cp1026", + "cp1140", + "cp1250", + "cp1251", + "cp1252", + "cp1253", + "cp1254", + "cp1255", + "cp1256", + "cp1257", + "cp1258", + "cp424", + "cp437", + "cp500", + "cp737", + "cp775", + "cp850", + "cp852", + "cp855", + "cp856", + "cp857", + "cp860", + "cp861", + "cp862", + "cp863", + "cp864", + "cp865", + "cp866", + "cp869", + "cp874", + "cp875", + "cp932", + "cp949", + "cp950", + "euc_jis_2004", + "euc_jisx0213", + "euc_jp", + "euc_kr", + "gb18030", + "gb2312", + "gbk", + "hex_codec", + "hp_roman8", + "hz", + "idna", + "iso2022_jp", + "iso2022_jp_1", + "iso2022_jp_2", + "iso2022_jp_2004", + "iso2022_jp_3", + "iso2022_jp_ext", + "iso2022_kr", + "iso8859_1", + "iso8859_10", + "iso8859_11", + "iso8859_13", + "iso8859_14", + "iso8859_15", + "iso8859_16", + "iso8859_2", + "iso8859_3", + "iso8859_4", + "iso8859_5", + "iso8859_6", + "iso8859_7", + "iso8859_8", + "iso8859_9", + "johab", + "koi8_r", + "koi8_u", + "latin_1", + "mac_cyrillic", + "mac_greek", + "mac_iceland", + "mac_latin2", + "mac_roman", + "mac_turkish", + "palmos", + "ptcp154", + "punycode", + "raw_unicode_escape", + "rot_13", + "shift_jis", + "shift_jis_2004", + "shift_jisx0213", + "tis_620", + "unicode_escape", + "unicode_internal", + "utf_16", + "utf_16_be", + "utf_16_le", + "utf_7", + "utf_8", +] + +if hasattr(codecs, "mbcs_encode"): + all_unicode_encodings.append("mbcs") + +# The following encodings work only with str, not unicode +all_string_encodings = [ + "quopri_codec", + "string_escape", + "uu_codec", +] + +# The following encoding is not tested, because it's not supposed +# to work: +# "undefined" + +# The following encodings don't work in stateful mode +broken_unicode_with_streams = [ + "base64_codec", + "hex_codec", + "punycode", + "unicode_internal" +] + +try: + import bz2 +except ImportError: + pass +else: + all_unicode_encodings.append("bz2_codec") + broken_unicode_with_streams.append("bz2_codec") + +try: + import zlib +except ImportError: + pass +else: + all_unicode_encodings.append("zlib_codec") + broken_unicode_with_streams.append("zlib_codec") + +class BasicUnicodeTest(unittest.TestCase): + def test_basics(self): + s = u"abc123" # all codecs should be able to encode these + for encoding in all_unicode_encodings: + (bytes, size) = codecs.getencoder(encoding)(s) + if encoding != "unicode_internal": + self.assertEqual(size, len(s), "%r != %r (encoding=%r)" % (size, len(s), encoding)) + (chars, size) = codecs.getdecoder(encoding)(bytes) + self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding)) + + if encoding not in broken_unicode_with_streams: + # check stream reader/writer + q = Queue() + writer = codecs.getwriter(encoding)(q) + encodedresult = "" + for c in s: + writer.write(c) + encodedresult += q.read() + q = Queue() + reader = codecs.getreader(encoding)(q) + decodedresult = u"" + for c in encodedresult: + q.write(c) + decodedresult += reader.read() + self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding)) + +class BasicStrTest(unittest.TestCase): + def test_basics(self): + s = "abc123" + for encoding in all_string_encodings: + (bytes, size) = codecs.getencoder(encoding)(s) + self.assertEqual(size, len(s)) + (chars, size) = codecs.getdecoder(encoding)(bytes) + self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding)) + def test_main(): test_support.run_unittest( UTF16Test, @@ -561,7 +737,9 @@ def test_main(): NameprepTest, CodecTest, CodecsModuleTest, - StreamReaderTest + StreamReaderTest, + BasicUnicodeTest, + BasicStrTest )