Make all the multibyte codec tests pass.
Changes to io.py, necessary to make this work: - Redid io.StringIO as a TextIOWrapper on top of a BytesIO instance. - Got rid of _MemoryIOMixin, folding it into BytesIO instead. - The read() functions that take -1 to mean "eveything" now also take None. - Added readline() support to BufferedIOBase. :-(
This commit is contained in:
parent
f4cfc8f6bb
commit
024da5c257
129
Lib/io.py
129
Lib/io.py
|
@ -415,8 +415,8 @@ class BufferedIOBase(IOBase):
|
||||||
def read(self, n: int = -1) -> bytes:
|
def read(self, n: int = -1) -> bytes:
|
||||||
"""read(n: int = -1) -> bytes. Read and return up to n bytes.
|
"""read(n: int = -1) -> bytes. Read and return up to n bytes.
|
||||||
|
|
||||||
If the argument is omitted, or negative, reads and returns all
|
If the argument is omitted, None, or negative, reads and
|
||||||
data until EOF.
|
returns all data until EOF.
|
||||||
|
|
||||||
If the argument is positive, and the underlying raw stream is
|
If the argument is positive, and the underlying raw stream is
|
||||||
not 'interactive', multiple raw reads may be issued to satisfy
|
not 'interactive', multiple raw reads may be issued to satisfy
|
||||||
|
@ -450,6 +450,20 @@ class BufferedIOBase(IOBase):
|
||||||
b[:n] = data
|
b[:n] = data
|
||||||
return n
|
return n
|
||||||
|
|
||||||
|
def readline(self, sizehint: int = -1) -> bytes:
|
||||||
|
"""For backwards compatibility, a (slow) readline()."""
|
||||||
|
if sizehint is None:
|
||||||
|
sizehint = -1
|
||||||
|
res = b""
|
||||||
|
while sizehint < 0 or len(res) < sizehint:
|
||||||
|
b = self.read(1)
|
||||||
|
if not b:
|
||||||
|
break
|
||||||
|
res += b
|
||||||
|
if b == b"\n":
|
||||||
|
break
|
||||||
|
return res
|
||||||
|
|
||||||
def write(self, b: bytes) -> int:
|
def write(self, b: bytes) -> int:
|
||||||
"""write(b: bytes) -> int. Write the given buffer to the IO stream.
|
"""write(b: bytes) -> int. Write the given buffer to the IO stream.
|
||||||
|
|
||||||
|
@ -518,19 +532,25 @@ class _BufferedIOMixin(BufferedIOBase):
|
||||||
return self.raw.isatty()
|
return self.raw.isatty()
|
||||||
|
|
||||||
|
|
||||||
class _MemoryIOMixin(BufferedIOBase):
|
class BytesIO(BufferedIOBase):
|
||||||
|
|
||||||
# XXX docstring
|
"""Buffered I/O implementation using an in-memory bytes buffer."""
|
||||||
|
|
||||||
def __init__(self, buffer):
|
# XXX More docs
|
||||||
|
|
||||||
|
def __init__(self, initial_bytes=None):
|
||||||
|
buffer = b""
|
||||||
|
if initial_bytes is not None:
|
||||||
|
buffer += initial_bytes
|
||||||
self._buffer = buffer
|
self._buffer = buffer
|
||||||
self._pos = 0
|
self._pos = 0
|
||||||
|
|
||||||
def getvalue(self):
|
def getvalue(self):
|
||||||
return self._buffer
|
return self._buffer
|
||||||
|
|
||||||
def read(self, n=-1):
|
def read(self, n=None):
|
||||||
assert n is not None
|
if n is None:
|
||||||
|
n = -1
|
||||||
if n < 0:
|
if n < 0:
|
||||||
n = len(self._buffer)
|
n = len(self._buffer)
|
||||||
newpos = min(len(self._buffer), self._pos + n)
|
newpos = min(len(self._buffer), self._pos + n)
|
||||||
|
@ -538,6 +558,9 @@ class _MemoryIOMixin(BufferedIOBase):
|
||||||
self._pos = newpos
|
self._pos = newpos
|
||||||
return b
|
return b
|
||||||
|
|
||||||
|
def read1(self, n):
|
||||||
|
return self.read(n)
|
||||||
|
|
||||||
def write(self, b):
|
def write(self, b):
|
||||||
n = len(b)
|
n = len(b)
|
||||||
newpos = self._pos + n
|
newpos = self._pos + n
|
||||||
|
@ -575,65 +598,6 @@ class _MemoryIOMixin(BufferedIOBase):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
class BytesIO(_MemoryIOMixin):
|
|
||||||
|
|
||||||
"""Buffered I/O implementation using a bytes buffer, like StringIO."""
|
|
||||||
|
|
||||||
# XXX More docs
|
|
||||||
|
|
||||||
def __init__(self, initial_bytes=None):
|
|
||||||
buffer = b""
|
|
||||||
if initial_bytes is not None:
|
|
||||||
buffer += initial_bytes
|
|
||||||
_MemoryIOMixin.__init__(self, buffer)
|
|
||||||
|
|
||||||
|
|
||||||
# XXX This should inherit from TextIOBase
|
|
||||||
class StringIO(_MemoryIOMixin):
|
|
||||||
|
|
||||||
"""Buffered I/O implementation using a string buffer, like StringIO."""
|
|
||||||
|
|
||||||
# XXX More docs
|
|
||||||
|
|
||||||
# Reuses the same code as BytesIO, but encode strings on the way in
|
|
||||||
# and decode them on the way out.
|
|
||||||
|
|
||||||
charsize = len("!".encode("unicode-internal"))
|
|
||||||
|
|
||||||
def __init__(self, initial_string=None):
|
|
||||||
if initial_string is not None:
|
|
||||||
buffer = initial_string.encode("unicode-internal")
|
|
||||||
else:
|
|
||||||
buffer = b""
|
|
||||||
_MemoryIOMixin.__init__(self, buffer)
|
|
||||||
|
|
||||||
def getvalue(self):
|
|
||||||
return self._buffer.encode("unicode-internal")
|
|
||||||
|
|
||||||
def read(self, n=-1):
|
|
||||||
return super(StringIO, self).read(n*self.charsize) \
|
|
||||||
.decode("unicode-internal")
|
|
||||||
|
|
||||||
def write(self, s):
|
|
||||||
return super(StringIO, self).write(s.encode("unicode-internal")) \
|
|
||||||
//self.charsize
|
|
||||||
|
|
||||||
def seek(self, pos, whence=0):
|
|
||||||
return super(StringIO, self).seek(self.charsize*pos, whence) \
|
|
||||||
//self.charsize
|
|
||||||
|
|
||||||
def tell(self):
|
|
||||||
return super(StringIO, self).tell()//self.charsize
|
|
||||||
|
|
||||||
def truncate(self, pos=None):
|
|
||||||
if pos is not None:
|
|
||||||
pos *= self.charsize
|
|
||||||
return super(StringIO, self).truncate(pos)//self.charsize
|
|
||||||
|
|
||||||
def readinto(self, b: bytes) -> int:
|
|
||||||
self._unsupported("readinto")
|
|
||||||
|
|
||||||
|
|
||||||
class BufferedReader(_BufferedIOMixin):
|
class BufferedReader(_BufferedIOMixin):
|
||||||
|
|
||||||
"""Buffer for a readable sequential RawIO object."""
|
"""Buffer for a readable sequential RawIO object."""
|
||||||
|
@ -646,7 +610,7 @@ class BufferedReader(_BufferedIOMixin):
|
||||||
self._read_buf = b""
|
self._read_buf = b""
|
||||||
self.buffer_size = buffer_size
|
self.buffer_size = buffer_size
|
||||||
|
|
||||||
def read(self, n=-1):
|
def read(self, n=None):
|
||||||
"""Read n bytes.
|
"""Read n bytes.
|
||||||
|
|
||||||
Returns exactly n bytes of data unless the underlying raw IO
|
Returns exactly n bytes of data unless the underlying raw IO
|
||||||
|
@ -654,7 +618,8 @@ class BufferedReader(_BufferedIOMixin):
|
||||||
mode. If n is negative, read until EOF or until read() would
|
mode. If n is negative, read until EOF or until read() would
|
||||||
block.
|
block.
|
||||||
"""
|
"""
|
||||||
assert n is not None
|
if n is None:
|
||||||
|
n = -1
|
||||||
nodata_val = b""
|
nodata_val = b""
|
||||||
while n < 0 or len(self._read_buf) < n:
|
while n < 0 or len(self._read_buf) < n:
|
||||||
to_read = max(self.buffer_size,
|
to_read = max(self.buffer_size,
|
||||||
|
@ -801,7 +766,9 @@ class BufferedRWPair(BufferedIOBase):
|
||||||
self.reader = BufferedReader(reader, buffer_size)
|
self.reader = BufferedReader(reader, buffer_size)
|
||||||
self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
|
self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
|
||||||
|
|
||||||
def read(self, n=-1):
|
def read(self, n=None):
|
||||||
|
if n is None:
|
||||||
|
n = -1
|
||||||
return self.reader.read(n)
|
return self.reader.read(n)
|
||||||
|
|
||||||
def readinto(self, b):
|
def readinto(self, b):
|
||||||
|
@ -861,7 +828,9 @@ class BufferedRandom(BufferedWriter, BufferedReader):
|
||||||
else:
|
else:
|
||||||
return self.raw.tell() - len(self._read_buf)
|
return self.raw.tell() - len(self._read_buf)
|
||||||
|
|
||||||
def read(self, n=-1):
|
def read(self, n=None):
|
||||||
|
if n is None:
|
||||||
|
n = -1
|
||||||
self.flush()
|
self.flush()
|
||||||
return BufferedReader.read(self, n)
|
return BufferedReader.read(self, n)
|
||||||
|
|
||||||
|
@ -1129,7 +1098,9 @@ class TextIOWrapper(TextIOBase):
|
||||||
except UnicodeEncodeError:
|
except UnicodeEncodeError:
|
||||||
return u
|
return u
|
||||||
|
|
||||||
def read(self, n: int = -1):
|
def read(self, n=None):
|
||||||
|
if n is None:
|
||||||
|
n = -1
|
||||||
decoder = self._decoder or self._get_decoder()
|
decoder = self._decoder or self._get_decoder()
|
||||||
res = self._pending
|
res = self._pending
|
||||||
if n < 0:
|
if n < 0:
|
||||||
|
@ -1146,7 +1117,7 @@ class TextIOWrapper(TextIOBase):
|
||||||
self._pending = res[n:]
|
self._pending = res[n:]
|
||||||
return self._simplify(res[:n])
|
return self._simplify(res[:n])
|
||||||
|
|
||||||
def __next__(self) -> str:
|
def __next__(self):
|
||||||
self._telling = False
|
self._telling = False
|
||||||
line = self.readline()
|
line = self.readline()
|
||||||
if not line:
|
if not line:
|
||||||
|
@ -1218,3 +1189,17 @@ class TextIOWrapper(TextIOBase):
|
||||||
return self._simplify(line[:endpos] + "\n")
|
return self._simplify(line[:endpos] + "\n")
|
||||||
else:
|
else:
|
||||||
return self._simplify(line[:nextpos])
|
return self._simplify(line[:nextpos])
|
||||||
|
|
||||||
|
|
||||||
|
class StringIO(TextIOWrapper):
|
||||||
|
|
||||||
|
# XXX This is really slow, but fully functional
|
||||||
|
|
||||||
|
def __init__(self, initial_value=""):
|
||||||
|
super(StringIO, self).__init__(BytesIO(), "utf-8")
|
||||||
|
if initial_value:
|
||||||
|
self.write(initial_value)
|
||||||
|
self.seek(0)
|
||||||
|
|
||||||
|
def getvalue(self):
|
||||||
|
return self.buffer.getvalue().decode("utf-8")
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -13,12 +13,12 @@ class Test_GB2312(test_multibytecodec_support.TestBase, unittest.TestCase):
|
||||||
tstring = test_multibytecodec_support.load_teststring('gb2312')
|
tstring = test_multibytecodec_support.load_teststring('gb2312')
|
||||||
codectests = (
|
codectests = (
|
||||||
# invalid bytes
|
# invalid bytes
|
||||||
("abc\x81\x81\xc1\xc4", "strict", None),
|
(b"abc\x81\x81\xc1\xc4", "strict", None),
|
||||||
("abc\xc8", "strict", None),
|
(b"abc\xc8", "strict", None),
|
||||||
("abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\u804a"),
|
(b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\u804a"),
|
||||||
("abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
|
(b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
|
||||||
("abc\x81\x81\xc1\xc4", "ignore", "abc\u804a"),
|
(b"abc\x81\x81\xc1\xc4", "ignore", "abc\u804a"),
|
||||||
("\xc1\x64", "strict", None),
|
(b"\xc1\x64", "strict", None),
|
||||||
)
|
)
|
||||||
|
|
||||||
class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase):
|
class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase):
|
||||||
|
@ -26,12 +26,12 @@ class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase):
|
||||||
tstring = test_multibytecodec_support.load_teststring('gbk')
|
tstring = test_multibytecodec_support.load_teststring('gbk')
|
||||||
codectests = (
|
codectests = (
|
||||||
# invalid bytes
|
# invalid bytes
|
||||||
("abc\x80\x80\xc1\xc4", "strict", None),
|
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||||
("abc\xc8", "strict", None),
|
(b"abc\xc8", "strict", None),
|
||||||
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"),
|
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"),
|
||||||
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
|
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
|
||||||
("abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"),
|
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"),
|
||||||
("\x83\x34\x83\x31", "strict", None),
|
(b"\x83\x34\x83\x31", "strict", None),
|
||||||
("\u30fb", "strict", None),
|
("\u30fb", "strict", None),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -40,13 +40,13 @@ class Test_GB18030(test_multibytecodec_support.TestBase, unittest.TestCase):
|
||||||
tstring = test_multibytecodec_support.load_teststring('gb18030')
|
tstring = test_multibytecodec_support.load_teststring('gb18030')
|
||||||
codectests = (
|
codectests = (
|
||||||
# invalid bytes
|
# invalid bytes
|
||||||
("abc\x80\x80\xc1\xc4", "strict", None),
|
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||||
("abc\xc8", "strict", None),
|
(b"abc\xc8", "strict", None),
|
||||||
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"),
|
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"),
|
||||||
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
|
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
|
||||||
("abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"),
|
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"),
|
||||||
("abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd\u804a"),
|
(b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd\u804a"),
|
||||||
("\u30fb", "strict", "\x819\xa79"),
|
("\u30fb", "strict", b"\x819\xa79"),
|
||||||
)
|
)
|
||||||
has_iso10646 = True
|
has_iso10646 = True
|
||||||
|
|
||||||
|
|
|
@ -13,11 +13,11 @@ class Test_Big5HKSCS(test_multibytecodec_support.TestBase, unittest.TestCase):
|
||||||
tstring = test_multibytecodec_support.load_teststring('big5hkscs')
|
tstring = test_multibytecodec_support.load_teststring('big5hkscs')
|
||||||
codectests = (
|
codectests = (
|
||||||
# invalid bytes
|
# invalid bytes
|
||||||
("abc\x80\x80\xc1\xc4", "strict", None),
|
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||||
("abc\xc8", "strict", None),
|
(b"abc\xc8", "strict", None),
|
||||||
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
|
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
|
||||||
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
|
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
|
||||||
("abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"),
|
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"),
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
|
|
|
@ -13,14 +13,14 @@ class Test_CP932(test_multibytecodec_support.TestBase, unittest.TestCase):
|
||||||
tstring = test_multibytecodec_support.load_teststring('shift_jis')
|
tstring = test_multibytecodec_support.load_teststring('shift_jis')
|
||||||
codectests = (
|
codectests = (
|
||||||
# invalid bytes
|
# invalid bytes
|
||||||
("abc\x81\x00\x81\x00\x82\x84", "strict", None),
|
(b"abc\x81\x00\x81\x00\x82\x84", "strict", None),
|
||||||
("abc\xf8", "strict", None),
|
(b"abc\xf8", "strict", None),
|
||||||
("abc\x81\x00\x82\x84", "replace", "abc\ufffd\uff44"),
|
(b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\uff44"),
|
||||||
("abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
|
(b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
|
||||||
("abc\x81\x00\x82\x84", "ignore", "abc\uff44"),
|
(b"abc\x81\x00\x82\x84", "ignore", "abc\uff44"),
|
||||||
# sjis vs cp932
|
# sjis vs cp932
|
||||||
("\\\x7e", "replace", "\\\x7e"),
|
(b"\\\x7e", "replace", "\\\x7e"),
|
||||||
("\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"),
|
(b"\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"),
|
||||||
)
|
)
|
||||||
|
|
||||||
class Test_EUC_JISX0213(test_multibytecodec_support.TestBase,
|
class Test_EUC_JISX0213(test_multibytecodec_support.TestBase,
|
||||||
|
@ -29,28 +29,28 @@ class Test_EUC_JISX0213(test_multibytecodec_support.TestBase,
|
||||||
tstring = test_multibytecodec_support.load_teststring('euc_jisx0213')
|
tstring = test_multibytecodec_support.load_teststring('euc_jisx0213')
|
||||||
codectests = (
|
codectests = (
|
||||||
# invalid bytes
|
# invalid bytes
|
||||||
("abc\x80\x80\xc1\xc4", "strict", None),
|
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||||
("abc\xc8", "strict", None),
|
(b"abc\xc8", "strict", None),
|
||||||
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
|
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
|
||||||
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
|
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
|
||||||
("abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
|
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
|
||||||
("abc\x8f\x83\x83", "replace", "abc\ufffd"),
|
(b"abc\x8f\x83\x83", "replace", "abc\ufffd"),
|
||||||
("\xc1\x64", "strict", None),
|
(b"\xc1\x64", "strict", None),
|
||||||
("\xa1\xc0", "strict", "\uff3c"),
|
(b"\xa1\xc0", "strict", "\uff3c"),
|
||||||
)
|
)
|
||||||
xmlcharnametest = (
|
xmlcharnametest = (
|
||||||
"\xab\u211c\xbb = \u2329\u1234\u232a",
|
"\xab\u211c\xbb = \u2329\u1234\u232a",
|
||||||
"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
|
b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
|
||||||
)
|
)
|
||||||
|
|
||||||
eucjp_commontests = (
|
eucjp_commontests = (
|
||||||
("abc\x80\x80\xc1\xc4", "strict", None),
|
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||||
("abc\xc8", "strict", None),
|
(b"abc\xc8", "strict", None),
|
||||||
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
|
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
|
||||||
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
|
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
|
||||||
("abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
|
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
|
||||||
("abc\x8f\x83\x83", "replace", "abc\ufffd"),
|
(b"abc\x8f\x83\x83", "replace", "abc\ufffd"),
|
||||||
("\xc1\x64", "strict", None),
|
(b"\xc1\x64", "strict", None),
|
||||||
)
|
)
|
||||||
|
|
||||||
class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
|
class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
|
||||||
|
@ -58,25 +58,25 @@ class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
|
||||||
encoding = 'euc_jp'
|
encoding = 'euc_jp'
|
||||||
tstring = test_multibytecodec_support.load_teststring('euc_jp')
|
tstring = test_multibytecodec_support.load_teststring('euc_jp')
|
||||||
codectests = eucjp_commontests + (
|
codectests = eucjp_commontests + (
|
||||||
("\xa1\xc0\\", "strict", "\uff3c\\"),
|
(b"\xa1\xc0\\", "strict", "\uff3c\\"),
|
||||||
("\xa5", "strict", "\x5c"),
|
("\xa5", "strict", b"\x5c"),
|
||||||
("\u203e", "strict", "\x7e"),
|
("\u203e", "strict", b"\x7e"),
|
||||||
)
|
)
|
||||||
|
|
||||||
shiftjis_commonenctests = (
|
shiftjis_commonenctests = (
|
||||||
("abc\x80\x80\x82\x84", "strict", None),
|
(b"abc\x80\x80\x82\x84", "strict", None),
|
||||||
("abc\xf8", "strict", None),
|
(b"abc\xf8", "strict", None),
|
||||||
("abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
|
(b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
|
||||||
("abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
|
(b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
|
||||||
("abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"),
|
(b"abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"),
|
||||||
)
|
)
|
||||||
|
|
||||||
class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase):
|
class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase):
|
||||||
encoding = 'shift_jis'
|
encoding = 'shift_jis'
|
||||||
tstring = test_multibytecodec_support.load_teststring('shift_jis')
|
tstring = test_multibytecodec_support.load_teststring('shift_jis')
|
||||||
codectests = shiftjis_commonenctests + (
|
codectests = shiftjis_commonenctests + (
|
||||||
("\\\x7e", "strict", "\\\x7e"),
|
(b"\\\x7e", "strict", "\\\x7e"),
|
||||||
("\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"),
|
(b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"),
|
||||||
)
|
)
|
||||||
|
|
||||||
class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase):
|
class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase):
|
||||||
|
@ -84,18 +84,18 @@ class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase):
|
||||||
tstring = test_multibytecodec_support.load_teststring('shift_jisx0213')
|
tstring = test_multibytecodec_support.load_teststring('shift_jisx0213')
|
||||||
codectests = (
|
codectests = (
|
||||||
# invalid bytes
|
# invalid bytes
|
||||||
("abc\x80\x80\x82\x84", "strict", None),
|
(b"abc\x80\x80\x82\x84", "strict", None),
|
||||||
("abc\xf8", "strict", None),
|
(b"abc\xf8", "strict", None),
|
||||||
("abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
|
(b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
|
||||||
("abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
|
(b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
|
||||||
("abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"),
|
(b"abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"),
|
||||||
# sjis vs cp932
|
# sjis vs cp932
|
||||||
("\\\x7e", "replace", "\xa5\u203e"),
|
(b"\\\x7e", "replace", "\xa5\u203e"),
|
||||||
("\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"),
|
(b"\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"),
|
||||||
)
|
)
|
||||||
xmlcharnametest = (
|
xmlcharnametest = (
|
||||||
"\xab\u211c\xbb = \u2329\u1234\u232a",
|
"\xab\u211c\xbb = \u2329\u1234\u232a",
|
||||||
"\x85Gℜ\x85Q = ⟨ሴ⟩"
|
b"\x85Gℜ\x85Q = ⟨ሴ⟩"
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
|
|
|
@ -13,11 +13,11 @@ class Test_CP949(test_multibytecodec_support.TestBase, unittest.TestCase):
|
||||||
tstring = test_multibytecodec_support.load_teststring('cp949')
|
tstring = test_multibytecodec_support.load_teststring('cp949')
|
||||||
codectests = (
|
codectests = (
|
||||||
# invalid bytes
|
# invalid bytes
|
||||||
("abc\x80\x80\xc1\xc4", "strict", None),
|
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||||
("abc\xc8", "strict", None),
|
(b"abc\xc8", "strict", None),
|
||||||
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"),
|
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"),
|
||||||
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"),
|
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"),
|
||||||
("abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"),
|
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"),
|
||||||
)
|
)
|
||||||
|
|
||||||
class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase):
|
class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase):
|
||||||
|
@ -25,11 +25,11 @@ class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase):
|
||||||
tstring = test_multibytecodec_support.load_teststring('euc_kr')
|
tstring = test_multibytecodec_support.load_teststring('euc_kr')
|
||||||
codectests = (
|
codectests = (
|
||||||
# invalid bytes
|
# invalid bytes
|
||||||
("abc\x80\x80\xc1\xc4", "strict", None),
|
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||||
("abc\xc8", "strict", None),
|
(b"abc\xc8", "strict", None),
|
||||||
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"),
|
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"),
|
||||||
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"),
|
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"),
|
||||||
("abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"),
|
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"),
|
||||||
)
|
)
|
||||||
|
|
||||||
class Test_JOHAB(test_multibytecodec_support.TestBase, unittest.TestCase):
|
class Test_JOHAB(test_multibytecodec_support.TestBase, unittest.TestCase):
|
||||||
|
@ -37,11 +37,11 @@ class Test_JOHAB(test_multibytecodec_support.TestBase, unittest.TestCase):
|
||||||
tstring = test_multibytecodec_support.load_teststring('johab')
|
tstring = test_multibytecodec_support.load_teststring('johab')
|
||||||
codectests = (
|
codectests = (
|
||||||
# invalid bytes
|
# invalid bytes
|
||||||
("abc\x80\x80\xc1\xc4", "strict", None),
|
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||||
("abc\xc8", "strict", None),
|
(b"abc\xc8", "strict", None),
|
||||||
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ucd27"),
|
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ucd27"),
|
||||||
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ucd27\ufffd"),
|
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ucd27\ufffd"),
|
||||||
("abc\x80\x80\xc1\xc4", "ignore", "abc\ucd27"),
|
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\ucd27"),
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
|
|
|
@ -13,11 +13,11 @@ class Test_Big5(test_multibytecodec_support.TestBase, unittest.TestCase):
|
||||||
tstring = test_multibytecodec_support.load_teststring('big5')
|
tstring = test_multibytecodec_support.load_teststring('big5')
|
||||||
codectests = (
|
codectests = (
|
||||||
# invalid bytes
|
# invalid bytes
|
||||||
("abc\x80\x80\xc1\xc4", "strict", None),
|
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||||
("abc\xc8", "strict", None),
|
(b"abc\xc8", "strict", None),
|
||||||
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
|
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
|
||||||
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
|
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
|
||||||
("abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"),
|
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"),
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
from test import test_support
|
from test import test_support
|
||||||
from test import test_multibytecodec_support
|
from test import test_multibytecodec_support
|
||||||
from test.test_support import TESTFN
|
from test.test_support import TESTFN
|
||||||
import unittest, StringIO, codecs, sys, os
|
import unittest, io, codecs, sys, os
|
||||||
|
|
||||||
ALL_CJKENCODINGS = [
|
ALL_CJKENCODINGS = [
|
||||||
# _codecs_cn
|
# _codecs_cn
|
||||||
|
@ -30,13 +30,13 @@ class Test_MultibyteCodec(unittest.TestCase):
|
||||||
|
|
||||||
def test_nullcoding(self):
|
def test_nullcoding(self):
|
||||||
for enc in ALL_CJKENCODINGS:
|
for enc in ALL_CJKENCODINGS:
|
||||||
self.assertEqual(''.decode(enc), '')
|
self.assertEqual(b''.decode(enc), '')
|
||||||
self.assertEqual(str('', enc), '')
|
self.assertEqual(str(b'', enc), '')
|
||||||
self.assertEqual(''.encode(enc), '')
|
self.assertEqual(''.encode(enc), b'')
|
||||||
|
|
||||||
def test_str_decode(self):
|
def test_str_decode(self):
|
||||||
for enc in ALL_CJKENCODINGS:
|
for enc in ALL_CJKENCODINGS:
|
||||||
self.assertEqual('abcd'.encode(enc), 'abcd')
|
self.assertEqual('abcd'.encode(enc), b'abcd')
|
||||||
|
|
||||||
def test_errorcallback_longindex(self):
|
def test_errorcallback_longindex(self):
|
||||||
dec = codecs.getdecoder('euc-kr')
|
dec = codecs.getdecoder('euc-kr')
|
||||||
|
@ -48,7 +48,7 @@ class Test_MultibyteCodec(unittest.TestCase):
|
||||||
def test_codingspec(self):
|
def test_codingspec(self):
|
||||||
try:
|
try:
|
||||||
for enc in ALL_CJKENCODINGS:
|
for enc in ALL_CJKENCODINGS:
|
||||||
print('# coding:', enc, file=open(TESTFN, 'w'))
|
print('# coding:', enc, file=io.open(TESTFN, 'w'))
|
||||||
execfile(TESTFN)
|
execfile(TESTFN)
|
||||||
finally:
|
finally:
|
||||||
os.unlink(TESTFN)
|
os.unlink(TESTFN)
|
||||||
|
@ -59,13 +59,13 @@ class Test_IncrementalEncoder(unittest.TestCase):
|
||||||
# cp949 encoder isn't stateful at all.
|
# cp949 encoder isn't stateful at all.
|
||||||
encoder = codecs.getincrementalencoder('cp949')()
|
encoder = codecs.getincrementalencoder('cp949')()
|
||||||
self.assertEqual(encoder.encode('\ud30c\uc774\uc36c \ub9c8\uc744'),
|
self.assertEqual(encoder.encode('\ud30c\uc774\uc36c \ub9c8\uc744'),
|
||||||
'\xc6\xc4\xc0\xcc\xbd\xe3 \xb8\xb6\xc0\xbb')
|
b'\xc6\xc4\xc0\xcc\xbd\xe3 \xb8\xb6\xc0\xbb')
|
||||||
self.assertEqual(encoder.reset(), None)
|
self.assertEqual(encoder.reset(), None)
|
||||||
self.assertEqual(encoder.encode('\u2606\u223c\u2606', True),
|
self.assertEqual(encoder.encode('\u2606\u223c\u2606', True),
|
||||||
'\xa1\xd9\xa1\xad\xa1\xd9')
|
b'\xa1\xd9\xa1\xad\xa1\xd9')
|
||||||
self.assertEqual(encoder.reset(), None)
|
self.assertEqual(encoder.reset(), None)
|
||||||
self.assertEqual(encoder.encode('', True), '')
|
self.assertEqual(encoder.encode('', True), b'')
|
||||||
self.assertEqual(encoder.encode('', False), '')
|
self.assertEqual(encoder.encode('', False), b'')
|
||||||
self.assertEqual(encoder.reset(), None)
|
self.assertEqual(encoder.reset(), None)
|
||||||
|
|
||||||
def test_stateful(self):
|
def test_stateful(self):
|
||||||
|
@ -75,29 +75,29 @@ class Test_IncrementalEncoder(unittest.TestCase):
|
||||||
# U+0300 => ABDC
|
# U+0300 => ABDC
|
||||||
|
|
||||||
encoder = codecs.getincrementalencoder('jisx0213')()
|
encoder = codecs.getincrementalencoder('jisx0213')()
|
||||||
self.assertEqual(encoder.encode('\u00e6\u0300'), '\xab\xc4')
|
self.assertEqual(encoder.encode('\u00e6\u0300'), b'\xab\xc4')
|
||||||
self.assertEqual(encoder.encode('\u00e6'), '')
|
self.assertEqual(encoder.encode('\u00e6'), b'')
|
||||||
self.assertEqual(encoder.encode('\u0300'), '\xab\xc4')
|
self.assertEqual(encoder.encode('\u0300'), b'\xab\xc4')
|
||||||
self.assertEqual(encoder.encode('\u00e6', True), '\xa9\xdc')
|
self.assertEqual(encoder.encode('\u00e6', True), b'\xa9\xdc')
|
||||||
|
|
||||||
self.assertEqual(encoder.reset(), None)
|
self.assertEqual(encoder.reset(), None)
|
||||||
self.assertEqual(encoder.encode('\u0300'), '\xab\xdc')
|
self.assertEqual(encoder.encode('\u0300'), b'\xab\xdc')
|
||||||
|
|
||||||
self.assertEqual(encoder.encode('\u00e6'), '')
|
self.assertEqual(encoder.encode('\u00e6'), b'')
|
||||||
self.assertEqual(encoder.encode('', True), '\xa9\xdc')
|
self.assertEqual(encoder.encode('', True), b'\xa9\xdc')
|
||||||
self.assertEqual(encoder.encode('', True), '')
|
self.assertEqual(encoder.encode('', True), b'')
|
||||||
|
|
||||||
def test_stateful_keep_buffer(self):
|
def test_stateful_keep_buffer(self):
|
||||||
encoder = codecs.getincrementalencoder('jisx0213')()
|
encoder = codecs.getincrementalencoder('jisx0213')()
|
||||||
self.assertEqual(encoder.encode('\u00e6'), '')
|
self.assertEqual(encoder.encode('\u00e6'), b'')
|
||||||
self.assertRaises(UnicodeEncodeError, encoder.encode, '\u0123')
|
self.assertRaises(UnicodeEncodeError, encoder.encode, '\u0123')
|
||||||
self.assertEqual(encoder.encode('\u0300\u00e6'), '\xab\xc4')
|
self.assertEqual(encoder.encode('\u0300\u00e6'), b'\xab\xc4')
|
||||||
self.assertRaises(UnicodeEncodeError, encoder.encode, '\u0123')
|
self.assertRaises(UnicodeEncodeError, encoder.encode, '\u0123')
|
||||||
self.assertEqual(encoder.reset(), None)
|
self.assertEqual(encoder.reset(), None)
|
||||||
self.assertEqual(encoder.encode('\u0300'), '\xab\xdc')
|
self.assertEqual(encoder.encode('\u0300'), b'\xab\xdc')
|
||||||
self.assertEqual(encoder.encode('\u00e6'), '')
|
self.assertEqual(encoder.encode('\u00e6'), b'')
|
||||||
self.assertRaises(UnicodeEncodeError, encoder.encode, '\u0123')
|
self.assertRaises(UnicodeEncodeError, encoder.encode, '\u0123')
|
||||||
self.assertEqual(encoder.encode('', True), '\xa9\xdc')
|
self.assertEqual(encoder.encode('', True), b'\xa9\xdc')
|
||||||
|
|
||||||
|
|
||||||
class Test_IncrementalDecoder(unittest.TestCase):
|
class Test_IncrementalDecoder(unittest.TestCase):
|
||||||
|
@ -105,21 +105,21 @@ class Test_IncrementalDecoder(unittest.TestCase):
|
||||||
def test_dbcs(self):
|
def test_dbcs(self):
|
||||||
# cp949 decoder is simple with only 1 or 2 bytes sequences.
|
# cp949 decoder is simple with only 1 or 2 bytes sequences.
|
||||||
decoder = codecs.getincrementaldecoder('cp949')()
|
decoder = codecs.getincrementaldecoder('cp949')()
|
||||||
self.assertEqual(decoder.decode('\xc6\xc4\xc0\xcc\xbd'),
|
self.assertEqual(decoder.decode(b'\xc6\xc4\xc0\xcc\xbd'),
|
||||||
'\ud30c\uc774')
|
'\ud30c\uc774')
|
||||||
self.assertEqual(decoder.decode('\xe3 \xb8\xb6\xc0\xbb'),
|
self.assertEqual(decoder.decode(b'\xe3 \xb8\xb6\xc0\xbb'),
|
||||||
'\uc36c \ub9c8\uc744')
|
'\uc36c \ub9c8\uc744')
|
||||||
self.assertEqual(decoder.decode(''), '')
|
self.assertEqual(decoder.decode(b''), '')
|
||||||
|
|
||||||
def test_dbcs_keep_buffer(self):
|
def test_dbcs_keep_buffer(self):
|
||||||
decoder = codecs.getincrementaldecoder('cp949')()
|
decoder = codecs.getincrementaldecoder('cp949')()
|
||||||
self.assertEqual(decoder.decode('\xc6\xc4\xc0'), '\ud30c')
|
self.assertEqual(decoder.decode(b'\xc6\xc4\xc0'), '\ud30c')
|
||||||
self.assertRaises(UnicodeDecodeError, decoder.decode, '', True)
|
self.assertRaises(UnicodeDecodeError, decoder.decode, '', True)
|
||||||
self.assertEqual(decoder.decode('\xcc'), '\uc774')
|
self.assertEqual(decoder.decode(b'\xcc'), '\uc774')
|
||||||
|
|
||||||
self.assertEqual(decoder.decode('\xc6\xc4\xc0'), '\ud30c')
|
self.assertEqual(decoder.decode(b'\xc6\xc4\xc0'), '\ud30c')
|
||||||
self.assertRaises(UnicodeDecodeError, decoder.decode, '\xcc\xbd', True)
|
self.assertRaises(UnicodeDecodeError, decoder.decode, '\xcc\xbd', True)
|
||||||
self.assertEqual(decoder.decode('\xcc'), '\uc774')
|
self.assertEqual(decoder.decode(b'\xcc'), '\uc774')
|
||||||
|
|
||||||
def test_iso2022(self):
|
def test_iso2022(self):
|
||||||
decoder = codecs.getincrementaldecoder('iso2022-jp')()
|
decoder = codecs.getincrementaldecoder('iso2022-jp')()
|
||||||
|
@ -140,61 +140,61 @@ class Test_IncrementalDecoder(unittest.TestCase):
|
||||||
class Test_StreamWriter(unittest.TestCase):
|
class Test_StreamWriter(unittest.TestCase):
|
||||||
if len('\U00012345') == 2: # UCS2
|
if len('\U00012345') == 2: # UCS2
|
||||||
def test_gb18030(self):
|
def test_gb18030(self):
|
||||||
s= StringIO.StringIO()
|
s= io.BytesIO()
|
||||||
c = codecs.getwriter('gb18030')(s)
|
c = codecs.getwriter('gb18030')(s)
|
||||||
c.write('123')
|
c.write('123')
|
||||||
self.assertEqual(s.getvalue(), '123')
|
self.assertEqual(s.getvalue(), b'123')
|
||||||
c.write('\U00012345')
|
c.write('\U00012345')
|
||||||
self.assertEqual(s.getvalue(), '123\x907\x959')
|
self.assertEqual(s.getvalue(), b'123\x907\x959')
|
||||||
c.write('\U00012345'[0])
|
c.write('\U00012345'[0])
|
||||||
self.assertEqual(s.getvalue(), '123\x907\x959')
|
self.assertEqual(s.getvalue(), b'123\x907\x959')
|
||||||
c.write('\U00012345'[1] + '\U00012345' + '\uac00\u00ac')
|
c.write('\U00012345'[1] + '\U00012345' + '\uac00\u00ac')
|
||||||
self.assertEqual(s.getvalue(),
|
self.assertEqual(s.getvalue(),
|
||||||
'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
|
b'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
|
||||||
c.write('\U00012345'[0])
|
c.write('\U00012345'[0])
|
||||||
self.assertEqual(s.getvalue(),
|
self.assertEqual(s.getvalue(),
|
||||||
'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
|
b'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
|
||||||
self.assertRaises(UnicodeError, c.reset)
|
self.assertRaises(UnicodeError, c.reset)
|
||||||
self.assertEqual(s.getvalue(),
|
self.assertEqual(s.getvalue(),
|
||||||
'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
|
b'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
|
||||||
|
|
||||||
def test_utf_8(self):
|
def test_utf_8(self):
|
||||||
s= StringIO.StringIO()
|
s= io.BytesIO()
|
||||||
c = codecs.getwriter('utf-8')(s)
|
c = codecs.getwriter('utf-8')(s)
|
||||||
c.write('123')
|
c.write('123')
|
||||||
self.assertEqual(s.getvalue(), '123')
|
self.assertEqual(s.getvalue(), b'123')
|
||||||
c.write('\U00012345')
|
c.write('\U00012345')
|
||||||
self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
|
self.assertEqual(s.getvalue(), b'123\xf0\x92\x8d\x85')
|
||||||
|
|
||||||
# Python utf-8 codec can't buffer surrogate pairs yet.
|
# Python utf-8 codec can't buffer surrogate pairs yet.
|
||||||
if 0:
|
if 0:
|
||||||
c.write('\U00012345'[0])
|
c.write('\U00012345'[0])
|
||||||
self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
|
self.assertEqual(s.getvalue(), b'123\xf0\x92\x8d\x85')
|
||||||
c.write('\U00012345'[1] + '\U00012345' + '\uac00\u00ac')
|
c.write('\U00012345'[1] + '\U00012345' + '\uac00\u00ac')
|
||||||
self.assertEqual(s.getvalue(),
|
self.assertEqual(s.getvalue(),
|
||||||
'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
|
b'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
|
||||||
'\xea\xb0\x80\xc2\xac')
|
b'\xea\xb0\x80\xc2\xac')
|
||||||
c.write('\U00012345'[0])
|
c.write('\U00012345'[0])
|
||||||
self.assertEqual(s.getvalue(),
|
self.assertEqual(s.getvalue(),
|
||||||
'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
|
b'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
|
||||||
'\xea\xb0\x80\xc2\xac')
|
b'\xea\xb0\x80\xc2\xac')
|
||||||
c.reset()
|
c.reset()
|
||||||
self.assertEqual(s.getvalue(),
|
self.assertEqual(s.getvalue(),
|
||||||
'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
|
b'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
|
||||||
'\xea\xb0\x80\xc2\xac\xed\xa0\x88')
|
b'\xea\xb0\x80\xc2\xac\xed\xa0\x88')
|
||||||
c.write('\U00012345'[1])
|
c.write('\U00012345'[1])
|
||||||
self.assertEqual(s.getvalue(),
|
self.assertEqual(s.getvalue(),
|
||||||
'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
|
b'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
|
||||||
'\xea\xb0\x80\xc2\xac\xed\xa0\x88\xed\xbd\x85')
|
b'\xea\xb0\x80\xc2\xac\xed\xa0\x88\xed\xbd\x85')
|
||||||
|
|
||||||
else: # UCS4
|
else: # UCS4
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def test_streamwriter_strwrite(self):
|
def test_streamwriter_strwrite(self):
|
||||||
s = StringIO.StringIO()
|
s = io.BytesIO()
|
||||||
wr = codecs.getwriter('gb18030')(s)
|
wr = codecs.getwriter('gb18030')(s)
|
||||||
wr.write('abcd')
|
wr.write('abcd')
|
||||||
self.assertEqual(s.getvalue(), 'abcd')
|
self.assertEqual(s.getvalue(), b'abcd')
|
||||||
|
|
||||||
class Test_ISO2022(unittest.TestCase):
|
class Test_ISO2022(unittest.TestCase):
|
||||||
def test_g2(self):
|
def test_g2(self):
|
||||||
|
@ -203,10 +203,10 @@ class Test_ISO2022(unittest.TestCase):
|
||||||
self.assertEqual(iso2022jp2.decode('iso2022-jp-2'), uni)
|
self.assertEqual(iso2022jp2.decode('iso2022-jp-2'), uni)
|
||||||
|
|
||||||
def test_iso2022_jp_g0(self):
|
def test_iso2022_jp_g0(self):
|
||||||
self.failIf('\x0e' in '\N{SOFT HYPHEN}'.encode('iso-2022-jp-2'))
|
self.failIf(b'\x0e' in '\N{SOFT HYPHEN}'.encode('iso-2022-jp-2'))
|
||||||
for encoding in ('iso-2022-jp-2004', 'iso-2022-jp-3'):
|
for encoding in ('iso-2022-jp-2004', 'iso-2022-jp-3'):
|
||||||
e = '\u3406'.encode(encoding)
|
e = '\u3406'.encode(encoding)
|
||||||
self.failIf(filter(lambda x: x >= '\x80', e))
|
self.failIf(any(x > 0x80 for x in e))
|
||||||
|
|
||||||
def test_bug1572832(self):
|
def test_bug1572832(self):
|
||||||
if sys.maxunicode >= 0x10000:
|
if sys.maxunicode >= 0x10000:
|
||||||
|
|
|
@ -7,12 +7,12 @@
|
||||||
import sys, codecs, os.path
|
import sys, codecs, os.path
|
||||||
import unittest
|
import unittest
|
||||||
from test import test_support
|
from test import test_support
|
||||||
from StringIO import StringIO
|
from io import BytesIO
|
||||||
|
|
||||||
class TestBase:
|
class TestBase:
|
||||||
encoding = '' # codec name
|
encoding = '' # codec name
|
||||||
codec = None # codec tuple (with 4 elements)
|
codec = None # codec tuple (with 4 elements)
|
||||||
tstring = '' # string to test StreamReader
|
tstring = None # must set. 2 strings to test StreamReader
|
||||||
|
|
||||||
codectests = None # must set. codec test tuple
|
codectests = None # must set. codec test tuple
|
||||||
roundtriptest = 1 # set if roundtrip is possible with unicode
|
roundtriptest = 1 # set if roundtrip is possible with unicode
|
||||||
|
@ -31,7 +31,7 @@ class TestBase:
|
||||||
self.incrementaldecoder = self.codec.incrementaldecoder
|
self.incrementaldecoder = self.codec.incrementaldecoder
|
||||||
|
|
||||||
def test_chunkcoding(self):
|
def test_chunkcoding(self):
|
||||||
for native, utf8 in zip(*[StringIO(f).readlines()
|
for native, utf8 in zip(*[map(bytes, str8(f).splitlines(1))
|
||||||
for f in self.tstring]):
|
for f in self.tstring]):
|
||||||
u = self.decode(native)[0]
|
u = self.decode(native)[0]
|
||||||
self.assertEqual(u, utf8.decode('utf-8'))
|
self.assertEqual(u, utf8.decode('utf-8'))
|
||||||
|
@ -40,7 +40,7 @@ class TestBase:
|
||||||
|
|
||||||
def test_errorhandle(self):
|
def test_errorhandle(self):
|
||||||
for source, scheme, expected in self.codectests:
|
for source, scheme, expected in self.codectests:
|
||||||
if type(source) == type(''):
|
if isinstance(source, bytes):
|
||||||
func = self.decode
|
func = self.decode
|
||||||
else:
|
else:
|
||||||
func = self.encode
|
func = self.encode
|
||||||
|
@ -57,7 +57,7 @@ class TestBase:
|
||||||
s = "\u0b13\u0b23\u0b60 nd eggs"
|
s = "\u0b13\u0b23\u0b60 nd eggs"
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
self.encode(s, "xmlcharrefreplace")[0],
|
self.encode(s, "xmlcharrefreplace")[0],
|
||||||
"ଓଣୠ nd eggs"
|
b"ଓଣୠ nd eggs"
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_customreplace_encode(self):
|
def test_customreplace_encode(self):
|
||||||
|
@ -83,7 +83,7 @@ class TestBase:
|
||||||
sin, sout = self.xmlcharnametest
|
sin, sout = self.xmlcharnametest
|
||||||
else:
|
else:
|
||||||
sin = "\xab\u211c\xbb = \u2329\u1234\u232a"
|
sin = "\xab\u211c\xbb = \u2329\u1234\u232a"
|
||||||
sout = "«ℜ» = ⟨ሴ⟩"
|
sout = b"«ℜ» = ⟨ሴ⟩"
|
||||||
self.assertEqual(self.encode(sin,
|
self.assertEqual(self.encode(sin,
|
||||||
"test.xmlcharnamereplace")[0], sout)
|
"test.xmlcharnamereplace")[0], sout)
|
||||||
|
|
||||||
|
@ -92,7 +92,7 @@ class TestBase:
|
||||||
return (ret, exc.end)
|
return (ret, exc.end)
|
||||||
codecs.register_error("test.cjktest", myreplace)
|
codecs.register_error("test.cjktest", myreplace)
|
||||||
|
|
||||||
for ret in ([1, 2, 3], [], None, object(), 'string', ''):
|
for ret in ([1, 2, 3], [], None, object(), b'string', b''):
|
||||||
self.assertRaises(TypeError, self.encode, self.unmappedunicode,
|
self.assertRaises(TypeError, self.encode, self.unmappedunicode,
|
||||||
'test.cjktest')
|
'test.cjktest')
|
||||||
|
|
||||||
|
@ -101,7 +101,7 @@ class TestBase:
|
||||||
return ('x', int(exc.end))
|
return ('x', int(exc.end))
|
||||||
codecs.register_error("test.cjktest", myreplace)
|
codecs.register_error("test.cjktest", myreplace)
|
||||||
self.assertEqual(self.encode('abcd' + self.unmappedunicode + 'efgh',
|
self.assertEqual(self.encode('abcd' + self.unmappedunicode + 'efgh',
|
||||||
'test.cjktest'), ('abcdxefgh', 9))
|
'test.cjktest'), (b'abcdxefgh', 9))
|
||||||
|
|
||||||
def myreplace(exc):
|
def myreplace(exc):
|
||||||
return ('x', sys.maxint + 1)
|
return ('x', sys.maxint + 1)
|
||||||
|
@ -127,14 +127,14 @@ class TestBase:
|
||||||
codecs.register_error("test.cjktest", myreplace)
|
codecs.register_error("test.cjktest", myreplace)
|
||||||
self.assertEqual(self.encode('abcd' + self.unmappedunicode + 'efgh',
|
self.assertEqual(self.encode('abcd' + self.unmappedunicode + 'efgh',
|
||||||
'test.cjktest'),
|
'test.cjktest'),
|
||||||
('abcdREPLACEDabcdREPLACEDabcdREPLACEDabcdTERMINALefgh', 9))
|
(b'abcdREPLACEDabcdREPLACEDabcdREPLACEDabcdTERMINALefgh', 9))
|
||||||
|
|
||||||
def test_callback_forward_index(self):
|
def test_callback_forward_index(self):
|
||||||
def myreplace(exc):
|
def myreplace(exc):
|
||||||
return ('REPLACED', exc.end + 2)
|
return ('REPLACED', exc.end + 2)
|
||||||
codecs.register_error("test.cjktest", myreplace)
|
codecs.register_error("test.cjktest", myreplace)
|
||||||
self.assertEqual(self.encode('abcd' + self.unmappedunicode + 'efgh',
|
self.assertEqual(self.encode('abcd' + self.unmappedunicode + 'efgh',
|
||||||
'test.cjktest'), ('abcdREPLACEDgh', 9))
|
'test.cjktest'), (b'abcdREPLACEDgh', 9))
|
||||||
|
|
||||||
def test_callback_index_outofbound(self):
|
def test_callback_index_outofbound(self):
|
||||||
def myreplace(exc):
|
def myreplace(exc):
|
||||||
|
@ -147,8 +147,8 @@ class TestBase:
|
||||||
UTF8Reader = codecs.getreader('utf-8')
|
UTF8Reader = codecs.getreader('utf-8')
|
||||||
for sizehint in [None] + list(range(1, 33)) + \
|
for sizehint in [None] + list(range(1, 33)) + \
|
||||||
[64, 128, 256, 512, 1024]:
|
[64, 128, 256, 512, 1024]:
|
||||||
istream = UTF8Reader(StringIO(self.tstring[1]))
|
istream = UTF8Reader(BytesIO(self.tstring[1]))
|
||||||
ostream = StringIO()
|
ostream = BytesIO()
|
||||||
encoder = self.incrementalencoder()
|
encoder = self.incrementalencoder()
|
||||||
while 1:
|
while 1:
|
||||||
if sizehint is not None:
|
if sizehint is not None:
|
||||||
|
@ -167,8 +167,8 @@ class TestBase:
|
||||||
UTF8Writer = codecs.getwriter('utf-8')
|
UTF8Writer = codecs.getwriter('utf-8')
|
||||||
for sizehint in [None, -1] + list(range(1, 33)) + \
|
for sizehint in [None, -1] + list(range(1, 33)) + \
|
||||||
[64, 128, 256, 512, 1024]:
|
[64, 128, 256, 512, 1024]:
|
||||||
istream = StringIO(self.tstring[0])
|
istream = BytesIO(self.tstring[0])
|
||||||
ostream = UTF8Writer(StringIO())
|
ostream = UTF8Writer(BytesIO())
|
||||||
decoder = self.incrementaldecoder()
|
decoder = self.incrementaldecoder()
|
||||||
while 1:
|
while 1:
|
||||||
data = istream.read(sizehint)
|
data = istream.read(sizehint)
|
||||||
|
@ -187,26 +187,26 @@ class TestBase:
|
||||||
self.assertRaises(UnicodeEncodeError, e.encode, inv, True)
|
self.assertRaises(UnicodeEncodeError, e.encode, inv, True)
|
||||||
|
|
||||||
e.errors = 'ignore'
|
e.errors = 'ignore'
|
||||||
self.assertEqual(e.encode(inv, True), '')
|
self.assertEqual(e.encode(inv, True), b'')
|
||||||
|
|
||||||
e.reset()
|
e.reset()
|
||||||
def tempreplace(exc):
|
def tempreplace(exc):
|
||||||
return ('called', exc.end)
|
return ('called', exc.end)
|
||||||
codecs.register_error('test.incremental_error_callback', tempreplace)
|
codecs.register_error('test.incremental_error_callback', tempreplace)
|
||||||
e.errors = 'test.incremental_error_callback'
|
e.errors = 'test.incremental_error_callback'
|
||||||
self.assertEqual(e.encode(inv, True), 'called')
|
self.assertEqual(e.encode(inv, True), b'called')
|
||||||
|
|
||||||
# again
|
# again
|
||||||
e.errors = 'ignore'
|
e.errors = 'ignore'
|
||||||
self.assertEqual(e.encode(inv, True), '')
|
self.assertEqual(e.encode(inv, True), b'')
|
||||||
|
|
||||||
def test_streamreader(self):
|
def test_streamreader(self):
|
||||||
UTF8Writer = codecs.getwriter('utf-8')
|
UTF8Writer = codecs.getwriter('utf-8')
|
||||||
for name in ["read", "readline", "readlines"]:
|
for name in ["read", "readline", "readlines"]:
|
||||||
for sizehint in [None, -1] + list(range(1, 33)) + \
|
for sizehint in [None, -1] + list(range(1, 33)) + \
|
||||||
[64, 128, 256, 512, 1024]:
|
[64, 128, 256, 512, 1024]:
|
||||||
istream = self.reader(StringIO(self.tstring[0]))
|
istream = self.reader(BytesIO(self.tstring[0]))
|
||||||
ostream = UTF8Writer(StringIO())
|
ostream = UTF8Writer(BytesIO())
|
||||||
func = getattr(istream, name)
|
func = getattr(istream, name)
|
||||||
while 1:
|
while 1:
|
||||||
data = func(sizehint)
|
data = func(sizehint)
|
||||||
|
@ -225,8 +225,8 @@ class TestBase:
|
||||||
for name in readfuncs:
|
for name in readfuncs:
|
||||||
for sizehint in [None] + list(range(1, 33)) + \
|
for sizehint in [None] + list(range(1, 33)) + \
|
||||||
[64, 128, 256, 512, 1024]:
|
[64, 128, 256, 512, 1024]:
|
||||||
istream = UTF8Reader(StringIO(self.tstring[1]))
|
istream = UTF8Reader(BytesIO(self.tstring[1]))
|
||||||
ostream = self.writer(StringIO())
|
ostream = self.writer(BytesIO())
|
||||||
func = getattr(istream, name)
|
func = getattr(istream, name)
|
||||||
while 1:
|
while 1:
|
||||||
if sizehint is not None:
|
if sizehint is not None:
|
||||||
|
|
|
@ -138,6 +138,11 @@ codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
|
||||||
{
|
{
|
||||||
PyObject *cb;
|
PyObject *cb;
|
||||||
|
|
||||||
|
if (PyUnicode_Check(value)) {
|
||||||
|
value = _PyUnicode_AsDefaultEncodedString(value, NULL);
|
||||||
|
if (value == NULL)
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
if (!PyString_Check(value)) {
|
if (!PyString_Check(value)) {
|
||||||
PyErr_SetString(PyExc_TypeError, "errors must be a string");
|
PyErr_SetString(PyExc_TypeError, "errors must be a string");
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -322,11 +327,11 @@ multibytecodec_encerror(MultibyteCodec *codec,
|
||||||
goto errorexit;
|
goto errorexit;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(PyString_Check(retstr));
|
assert(PyBytes_Check(retstr));
|
||||||
retstrsize = PyString_GET_SIZE(retstr);
|
retstrsize = PyBytes_GET_SIZE(retstr);
|
||||||
REQUIRE_ENCODEBUFFER(buf, retstrsize);
|
REQUIRE_ENCODEBUFFER(buf, retstrsize);
|
||||||
|
|
||||||
memcpy(buf->outbuf, PyString_AS_STRING(retstr), retstrsize);
|
memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize);
|
||||||
buf->outbuf += retstrsize;
|
buf->outbuf += retstrsize;
|
||||||
|
|
||||||
newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
|
newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
|
||||||
|
@ -1224,10 +1229,18 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self,
|
||||||
if (cres == NULL)
|
if (cres == NULL)
|
||||||
goto errorexit;
|
goto errorexit;
|
||||||
|
|
||||||
|
if (PyString_Check(cres)) {
|
||||||
|
PyObject *cres2 = PyBytes_FromObject(cres);
|
||||||
|
if (cres2 == NULL)
|
||||||
|
return NULL;
|
||||||
|
Py_DECREF(cres);
|
||||||
|
cres = cres2;
|
||||||
|
}
|
||||||
|
|
||||||
if (!PyBytes_Check(cres)) {
|
if (!PyBytes_Check(cres)) {
|
||||||
PyErr_Format(PyExc_TypeError,
|
PyErr_Format(PyExc_TypeError,
|
||||||
"stream function returned a "
|
"stream function returned a "
|
||||||
"non-string object (%.100s)",
|
"non-bytes object (%.100s)",
|
||||||
cres->ob_type->tp_name);
|
cres->ob_type->tp_name);
|
||||||
goto errorexit;
|
goto errorexit;
|
||||||
}
|
}
|
||||||
|
@ -1596,8 +1609,8 @@ mbstreamwriter_reset(MultibyteStreamWriterObject *self)
|
||||||
if (pwrt == NULL)
|
if (pwrt == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
assert(PyString_Check(pwrt));
|
assert(PyBytes_Check(pwrt));
|
||||||
if (PyString_Size(pwrt) > 0) {
|
if (PyBytes_Size(pwrt) > 0) {
|
||||||
PyObject *wr;
|
PyObject *wr;
|
||||||
wr = PyObject_CallMethod(self->stream, "write", "O", pwrt);
|
wr = PyObject_CallMethod(self->stream, "write", "O", pwrt);
|
||||||
if (wr == NULL) {
|
if (wr == NULL) {
|
||||||
|
|
Loading…
Reference in New Issue