Make all the multibyte codec tests pass.
Changes to io.py, necessary to make this work: - Redid io.StringIO as a TextIOWrapper on top of a BytesIO instance. - Got rid of _MemoryIOMixin, folding it into BytesIO instead. - The read() functions that take -1 to mean "eveything" now also take None. - Added readline() support to BufferedIOBase. :-(
This commit is contained in:
parent
f4cfc8f6bb
commit
024da5c257
129
Lib/io.py
129
Lib/io.py
|
@ -415,8 +415,8 @@ class BufferedIOBase(IOBase):
|
|||
def read(self, n: int = -1) -> bytes:
|
||||
"""read(n: int = -1) -> bytes. Read and return up to n bytes.
|
||||
|
||||
If the argument is omitted, or negative, reads and returns all
|
||||
data until EOF.
|
||||
If the argument is omitted, None, or negative, reads and
|
||||
returns all data until EOF.
|
||||
|
||||
If the argument is positive, and the underlying raw stream is
|
||||
not 'interactive', multiple raw reads may be issued to satisfy
|
||||
|
@ -450,6 +450,20 @@ class BufferedIOBase(IOBase):
|
|||
b[:n] = data
|
||||
return n
|
||||
|
||||
def readline(self, sizehint: int = -1) -> bytes:
|
||||
"""For backwards compatibility, a (slow) readline()."""
|
||||
if sizehint is None:
|
||||
sizehint = -1
|
||||
res = b""
|
||||
while sizehint < 0 or len(res) < sizehint:
|
||||
b = self.read(1)
|
||||
if not b:
|
||||
break
|
||||
res += b
|
||||
if b == b"\n":
|
||||
break
|
||||
return res
|
||||
|
||||
def write(self, b: bytes) -> int:
|
||||
"""write(b: bytes) -> int. Write the given buffer to the IO stream.
|
||||
|
||||
|
@ -518,19 +532,25 @@ class _BufferedIOMixin(BufferedIOBase):
|
|||
return self.raw.isatty()
|
||||
|
||||
|
||||
class _MemoryIOMixin(BufferedIOBase):
|
||||
class BytesIO(BufferedIOBase):
|
||||
|
||||
# XXX docstring
|
||||
"""Buffered I/O implementation using an in-memory bytes buffer."""
|
||||
|
||||
def __init__(self, buffer):
|
||||
# XXX More docs
|
||||
|
||||
def __init__(self, initial_bytes=None):
|
||||
buffer = b""
|
||||
if initial_bytes is not None:
|
||||
buffer += initial_bytes
|
||||
self._buffer = buffer
|
||||
self._pos = 0
|
||||
|
||||
def getvalue(self):
|
||||
return self._buffer
|
||||
|
||||
def read(self, n=-1):
|
||||
assert n is not None
|
||||
def read(self, n=None):
|
||||
if n is None:
|
||||
n = -1
|
||||
if n < 0:
|
||||
n = len(self._buffer)
|
||||
newpos = min(len(self._buffer), self._pos + n)
|
||||
|
@ -538,6 +558,9 @@ class _MemoryIOMixin(BufferedIOBase):
|
|||
self._pos = newpos
|
||||
return b
|
||||
|
||||
def read1(self, n):
|
||||
return self.read(n)
|
||||
|
||||
def write(self, b):
|
||||
n = len(b)
|
||||
newpos = self._pos + n
|
||||
|
@ -575,65 +598,6 @@ class _MemoryIOMixin(BufferedIOBase):
|
|||
return True
|
||||
|
||||
|
||||
class BytesIO(_MemoryIOMixin):
|
||||
|
||||
"""Buffered I/O implementation using a bytes buffer, like StringIO."""
|
||||
|
||||
# XXX More docs
|
||||
|
||||
def __init__(self, initial_bytes=None):
|
||||
buffer = b""
|
||||
if initial_bytes is not None:
|
||||
buffer += initial_bytes
|
||||
_MemoryIOMixin.__init__(self, buffer)
|
||||
|
||||
|
||||
# XXX This should inherit from TextIOBase
|
||||
class StringIO(_MemoryIOMixin):
|
||||
|
||||
"""Buffered I/O implementation using a string buffer, like StringIO."""
|
||||
|
||||
# XXX More docs
|
||||
|
||||
# Reuses the same code as BytesIO, but encode strings on the way in
|
||||
# and decode them on the way out.
|
||||
|
||||
charsize = len("!".encode("unicode-internal"))
|
||||
|
||||
def __init__(self, initial_string=None):
|
||||
if initial_string is not None:
|
||||
buffer = initial_string.encode("unicode-internal")
|
||||
else:
|
||||
buffer = b""
|
||||
_MemoryIOMixin.__init__(self, buffer)
|
||||
|
||||
def getvalue(self):
|
||||
return self._buffer.encode("unicode-internal")
|
||||
|
||||
def read(self, n=-1):
|
||||
return super(StringIO, self).read(n*self.charsize) \
|
||||
.decode("unicode-internal")
|
||||
|
||||
def write(self, s):
|
||||
return super(StringIO, self).write(s.encode("unicode-internal")) \
|
||||
//self.charsize
|
||||
|
||||
def seek(self, pos, whence=0):
|
||||
return super(StringIO, self).seek(self.charsize*pos, whence) \
|
||||
//self.charsize
|
||||
|
||||
def tell(self):
|
||||
return super(StringIO, self).tell()//self.charsize
|
||||
|
||||
def truncate(self, pos=None):
|
||||
if pos is not None:
|
||||
pos *= self.charsize
|
||||
return super(StringIO, self).truncate(pos)//self.charsize
|
||||
|
||||
def readinto(self, b: bytes) -> int:
|
||||
self._unsupported("readinto")
|
||||
|
||||
|
||||
class BufferedReader(_BufferedIOMixin):
|
||||
|
||||
"""Buffer for a readable sequential RawIO object."""
|
||||
|
@ -646,7 +610,7 @@ class BufferedReader(_BufferedIOMixin):
|
|||
self._read_buf = b""
|
||||
self.buffer_size = buffer_size
|
||||
|
||||
def read(self, n=-1):
|
||||
def read(self, n=None):
|
||||
"""Read n bytes.
|
||||
|
||||
Returns exactly n bytes of data unless the underlying raw IO
|
||||
|
@ -654,7 +618,8 @@ class BufferedReader(_BufferedIOMixin):
|
|||
mode. If n is negative, read until EOF or until read() would
|
||||
block.
|
||||
"""
|
||||
assert n is not None
|
||||
if n is None:
|
||||
n = -1
|
||||
nodata_val = b""
|
||||
while n < 0 or len(self._read_buf) < n:
|
||||
to_read = max(self.buffer_size,
|
||||
|
@ -801,7 +766,9 @@ class BufferedRWPair(BufferedIOBase):
|
|||
self.reader = BufferedReader(reader, buffer_size)
|
||||
self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
|
||||
|
||||
def read(self, n=-1):
|
||||
def read(self, n=None):
|
||||
if n is None:
|
||||
n = -1
|
||||
return self.reader.read(n)
|
||||
|
||||
def readinto(self, b):
|
||||
|
@ -861,7 +828,9 @@ class BufferedRandom(BufferedWriter, BufferedReader):
|
|||
else:
|
||||
return self.raw.tell() - len(self._read_buf)
|
||||
|
||||
def read(self, n=-1):
|
||||
def read(self, n=None):
|
||||
if n is None:
|
||||
n = -1
|
||||
self.flush()
|
||||
return BufferedReader.read(self, n)
|
||||
|
||||
|
@ -1129,7 +1098,9 @@ class TextIOWrapper(TextIOBase):
|
|||
except UnicodeEncodeError:
|
||||
return u
|
||||
|
||||
def read(self, n: int = -1):
|
||||
def read(self, n=None):
|
||||
if n is None:
|
||||
n = -1
|
||||
decoder = self._decoder or self._get_decoder()
|
||||
res = self._pending
|
||||
if n < 0:
|
||||
|
@ -1146,7 +1117,7 @@ class TextIOWrapper(TextIOBase):
|
|||
self._pending = res[n:]
|
||||
return self._simplify(res[:n])
|
||||
|
||||
def __next__(self) -> str:
|
||||
def __next__(self):
|
||||
self._telling = False
|
||||
line = self.readline()
|
||||
if not line:
|
||||
|
@ -1218,3 +1189,17 @@ class TextIOWrapper(TextIOBase):
|
|||
return self._simplify(line[:endpos] + "\n")
|
||||
else:
|
||||
return self._simplify(line[:nextpos])
|
||||
|
||||
|
||||
class StringIO(TextIOWrapper):
|
||||
|
||||
# XXX This is really slow, but fully functional
|
||||
|
||||
def __init__(self, initial_value=""):
|
||||
super(StringIO, self).__init__(BytesIO(), "utf-8")
|
||||
if initial_value:
|
||||
self.write(initial_value)
|
||||
self.seek(0)
|
||||
|
||||
def getvalue(self):
|
||||
return self.buffer.getvalue().decode("utf-8")
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -13,12 +13,12 @@ class Test_GB2312(test_multibytecodec_support.TestBase, unittest.TestCase):
|
|||
tstring = test_multibytecodec_support.load_teststring('gb2312')
|
||||
codectests = (
|
||||
# invalid bytes
|
||||
("abc\x81\x81\xc1\xc4", "strict", None),
|
||||
("abc\xc8", "strict", None),
|
||||
("abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\u804a"),
|
||||
("abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
|
||||
("abc\x81\x81\xc1\xc4", "ignore", "abc\u804a"),
|
||||
("\xc1\x64", "strict", None),
|
||||
(b"abc\x81\x81\xc1\xc4", "strict", None),
|
||||
(b"abc\xc8", "strict", None),
|
||||
(b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\u804a"),
|
||||
(b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
|
||||
(b"abc\x81\x81\xc1\xc4", "ignore", "abc\u804a"),
|
||||
(b"\xc1\x64", "strict", None),
|
||||
)
|
||||
|
||||
class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase):
|
||||
|
@ -26,12 +26,12 @@ class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase):
|
|||
tstring = test_multibytecodec_support.load_teststring('gbk')
|
||||
codectests = (
|
||||
# invalid bytes
|
||||
("abc\x80\x80\xc1\xc4", "strict", None),
|
||||
("abc\xc8", "strict", None),
|
||||
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"),
|
||||
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
|
||||
("abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"),
|
||||
("\x83\x34\x83\x31", "strict", None),
|
||||
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||
(b"abc\xc8", "strict", None),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"),
|
||||
(b"\x83\x34\x83\x31", "strict", None),
|
||||
("\u30fb", "strict", None),
|
||||
)
|
||||
|
||||
|
@ -40,13 +40,13 @@ class Test_GB18030(test_multibytecodec_support.TestBase, unittest.TestCase):
|
|||
tstring = test_multibytecodec_support.load_teststring('gb18030')
|
||||
codectests = (
|
||||
# invalid bytes
|
||||
("abc\x80\x80\xc1\xc4", "strict", None),
|
||||
("abc\xc8", "strict", None),
|
||||
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"),
|
||||
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
|
||||
("abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"),
|
||||
("abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd\u804a"),
|
||||
("\u30fb", "strict", "\x819\xa79"),
|
||||
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||
(b"abc\xc8", "strict", None),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"),
|
||||
(b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd\u804a"),
|
||||
("\u30fb", "strict", b"\x819\xa79"),
|
||||
)
|
||||
has_iso10646 = True
|
||||
|
||||
|
|
|
@ -13,11 +13,11 @@ class Test_Big5HKSCS(test_multibytecodec_support.TestBase, unittest.TestCase):
|
|||
tstring = test_multibytecodec_support.load_teststring('big5hkscs')
|
||||
codectests = (
|
||||
# invalid bytes
|
||||
("abc\x80\x80\xc1\xc4", "strict", None),
|
||||
("abc\xc8", "strict", None),
|
||||
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
|
||||
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
|
||||
("abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"),
|
||||
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||
(b"abc\xc8", "strict", None),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"),
|
||||
)
|
||||
|
||||
def test_main():
|
||||
|
|
|
@ -13,14 +13,14 @@ class Test_CP932(test_multibytecodec_support.TestBase, unittest.TestCase):
|
|||
tstring = test_multibytecodec_support.load_teststring('shift_jis')
|
||||
codectests = (
|
||||
# invalid bytes
|
||||
("abc\x81\x00\x81\x00\x82\x84", "strict", None),
|
||||
("abc\xf8", "strict", None),
|
||||
("abc\x81\x00\x82\x84", "replace", "abc\ufffd\uff44"),
|
||||
("abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
|
||||
("abc\x81\x00\x82\x84", "ignore", "abc\uff44"),
|
||||
(b"abc\x81\x00\x81\x00\x82\x84", "strict", None),
|
||||
(b"abc\xf8", "strict", None),
|
||||
(b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\uff44"),
|
||||
(b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
|
||||
(b"abc\x81\x00\x82\x84", "ignore", "abc\uff44"),
|
||||
# sjis vs cp932
|
||||
("\\\x7e", "replace", "\\\x7e"),
|
||||
("\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"),
|
||||
(b"\\\x7e", "replace", "\\\x7e"),
|
||||
(b"\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"),
|
||||
)
|
||||
|
||||
class Test_EUC_JISX0213(test_multibytecodec_support.TestBase,
|
||||
|
@ -29,28 +29,28 @@ class Test_EUC_JISX0213(test_multibytecodec_support.TestBase,
|
|||
tstring = test_multibytecodec_support.load_teststring('euc_jisx0213')
|
||||
codectests = (
|
||||
# invalid bytes
|
||||
("abc\x80\x80\xc1\xc4", "strict", None),
|
||||
("abc\xc8", "strict", None),
|
||||
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
|
||||
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
|
||||
("abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
|
||||
("abc\x8f\x83\x83", "replace", "abc\ufffd"),
|
||||
("\xc1\x64", "strict", None),
|
||||
("\xa1\xc0", "strict", "\uff3c"),
|
||||
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||
(b"abc\xc8", "strict", None),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
|
||||
(b"abc\x8f\x83\x83", "replace", "abc\ufffd"),
|
||||
(b"\xc1\x64", "strict", None),
|
||||
(b"\xa1\xc0", "strict", "\uff3c"),
|
||||
)
|
||||
xmlcharnametest = (
|
||||
"\xab\u211c\xbb = \u2329\u1234\u232a",
|
||||
"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
|
||||
b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
|
||||
)
|
||||
|
||||
eucjp_commontests = (
|
||||
("abc\x80\x80\xc1\xc4", "strict", None),
|
||||
("abc\xc8", "strict", None),
|
||||
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
|
||||
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
|
||||
("abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
|
||||
("abc\x8f\x83\x83", "replace", "abc\ufffd"),
|
||||
("\xc1\x64", "strict", None),
|
||||
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||
(b"abc\xc8", "strict", None),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
|
||||
(b"abc\x8f\x83\x83", "replace", "abc\ufffd"),
|
||||
(b"\xc1\x64", "strict", None),
|
||||
)
|
||||
|
||||
class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
|
||||
|
@ -58,25 +58,25 @@ class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
|
|||
encoding = 'euc_jp'
|
||||
tstring = test_multibytecodec_support.load_teststring('euc_jp')
|
||||
codectests = eucjp_commontests + (
|
||||
("\xa1\xc0\\", "strict", "\uff3c\\"),
|
||||
("\xa5", "strict", "\x5c"),
|
||||
("\u203e", "strict", "\x7e"),
|
||||
(b"\xa1\xc0\\", "strict", "\uff3c\\"),
|
||||
("\xa5", "strict", b"\x5c"),
|
||||
("\u203e", "strict", b"\x7e"),
|
||||
)
|
||||
|
||||
shiftjis_commonenctests = (
|
||||
("abc\x80\x80\x82\x84", "strict", None),
|
||||
("abc\xf8", "strict", None),
|
||||
("abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
|
||||
("abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
|
||||
("abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"),
|
||||
(b"abc\x80\x80\x82\x84", "strict", None),
|
||||
(b"abc\xf8", "strict", None),
|
||||
(b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
|
||||
(b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
|
||||
(b"abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"),
|
||||
)
|
||||
|
||||
class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase):
|
||||
encoding = 'shift_jis'
|
||||
tstring = test_multibytecodec_support.load_teststring('shift_jis')
|
||||
codectests = shiftjis_commonenctests + (
|
||||
("\\\x7e", "strict", "\\\x7e"),
|
||||
("\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"),
|
||||
(b"\\\x7e", "strict", "\\\x7e"),
|
||||
(b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"),
|
||||
)
|
||||
|
||||
class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase):
|
||||
|
@ -84,18 +84,18 @@ class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase):
|
|||
tstring = test_multibytecodec_support.load_teststring('shift_jisx0213')
|
||||
codectests = (
|
||||
# invalid bytes
|
||||
("abc\x80\x80\x82\x84", "strict", None),
|
||||
("abc\xf8", "strict", None),
|
||||
("abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
|
||||
("abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
|
||||
("abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"),
|
||||
(b"abc\x80\x80\x82\x84", "strict", None),
|
||||
(b"abc\xf8", "strict", None),
|
||||
(b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
|
||||
(b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
|
||||
(b"abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"),
|
||||
# sjis vs cp932
|
||||
("\\\x7e", "replace", "\xa5\u203e"),
|
||||
("\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"),
|
||||
(b"\\\x7e", "replace", "\xa5\u203e"),
|
||||
(b"\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"),
|
||||
)
|
||||
xmlcharnametest = (
|
||||
"\xab\u211c\xbb = \u2329\u1234\u232a",
|
||||
"\x85Gℜ\x85Q = ⟨ሴ⟩"
|
||||
b"\x85Gℜ\x85Q = ⟨ሴ⟩"
|
||||
)
|
||||
|
||||
def test_main():
|
||||
|
|
|
@ -13,11 +13,11 @@ class Test_CP949(test_multibytecodec_support.TestBase, unittest.TestCase):
|
|||
tstring = test_multibytecodec_support.load_teststring('cp949')
|
||||
codectests = (
|
||||
# invalid bytes
|
||||
("abc\x80\x80\xc1\xc4", "strict", None),
|
||||
("abc\xc8", "strict", None),
|
||||
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"),
|
||||
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"),
|
||||
("abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"),
|
||||
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||
(b"abc\xc8", "strict", None),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"),
|
||||
)
|
||||
|
||||
class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase):
|
||||
|
@ -25,11 +25,11 @@ class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase):
|
|||
tstring = test_multibytecodec_support.load_teststring('euc_kr')
|
||||
codectests = (
|
||||
# invalid bytes
|
||||
("abc\x80\x80\xc1\xc4", "strict", None),
|
||||
("abc\xc8", "strict", None),
|
||||
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"),
|
||||
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"),
|
||||
("abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"),
|
||||
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||
(b"abc\xc8", "strict", None),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"),
|
||||
)
|
||||
|
||||
class Test_JOHAB(test_multibytecodec_support.TestBase, unittest.TestCase):
|
||||
|
@ -37,11 +37,11 @@ class Test_JOHAB(test_multibytecodec_support.TestBase, unittest.TestCase):
|
|||
tstring = test_multibytecodec_support.load_teststring('johab')
|
||||
codectests = (
|
||||
# invalid bytes
|
||||
("abc\x80\x80\xc1\xc4", "strict", None),
|
||||
("abc\xc8", "strict", None),
|
||||
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ucd27"),
|
||||
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ucd27\ufffd"),
|
||||
("abc\x80\x80\xc1\xc4", "ignore", "abc\ucd27"),
|
||||
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||
(b"abc\xc8", "strict", None),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ucd27"),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ucd27\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\ucd27"),
|
||||
)
|
||||
|
||||
def test_main():
|
||||
|
|
|
@ -13,11 +13,11 @@ class Test_Big5(test_multibytecodec_support.TestBase, unittest.TestCase):
|
|||
tstring = test_multibytecodec_support.load_teststring('big5')
|
||||
codectests = (
|
||||
# invalid bytes
|
||||
("abc\x80\x80\xc1\xc4", "strict", None),
|
||||
("abc\xc8", "strict", None),
|
||||
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
|
||||
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
|
||||
("abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"),
|
||||
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||
(b"abc\xc8", "strict", None),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"),
|
||||
)
|
||||
|
||||
def test_main():
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
from test import test_support
|
||||
from test import test_multibytecodec_support
|
||||
from test.test_support import TESTFN
|
||||
import unittest, StringIO, codecs, sys, os
|
||||
import unittest, io, codecs, sys, os
|
||||
|
||||
ALL_CJKENCODINGS = [
|
||||
# _codecs_cn
|
||||
|
@ -30,13 +30,13 @@ class Test_MultibyteCodec(unittest.TestCase):
|
|||
|
||||
def test_nullcoding(self):
|
||||
for enc in ALL_CJKENCODINGS:
|
||||
self.assertEqual(''.decode(enc), '')
|
||||
self.assertEqual(str('', enc), '')
|
||||
self.assertEqual(''.encode(enc), '')
|
||||
self.assertEqual(b''.decode(enc), '')
|
||||
self.assertEqual(str(b'', enc), '')
|
||||
self.assertEqual(''.encode(enc), b'')
|
||||
|
||||
def test_str_decode(self):
|
||||
for enc in ALL_CJKENCODINGS:
|
||||
self.assertEqual('abcd'.encode(enc), 'abcd')
|
||||
self.assertEqual('abcd'.encode(enc), b'abcd')
|
||||
|
||||
def test_errorcallback_longindex(self):
|
||||
dec = codecs.getdecoder('euc-kr')
|
||||
|
@ -48,7 +48,7 @@ class Test_MultibyteCodec(unittest.TestCase):
|
|||
def test_codingspec(self):
|
||||
try:
|
||||
for enc in ALL_CJKENCODINGS:
|
||||
print('# coding:', enc, file=open(TESTFN, 'w'))
|
||||
print('# coding:', enc, file=io.open(TESTFN, 'w'))
|
||||
execfile(TESTFN)
|
||||
finally:
|
||||
os.unlink(TESTFN)
|
||||
|
@ -59,13 +59,13 @@ class Test_IncrementalEncoder(unittest.TestCase):
|
|||
# cp949 encoder isn't stateful at all.
|
||||
encoder = codecs.getincrementalencoder('cp949')()
|
||||
self.assertEqual(encoder.encode('\ud30c\uc774\uc36c \ub9c8\uc744'),
|
||||
'\xc6\xc4\xc0\xcc\xbd\xe3 \xb8\xb6\xc0\xbb')
|
||||
b'\xc6\xc4\xc0\xcc\xbd\xe3 \xb8\xb6\xc0\xbb')
|
||||
self.assertEqual(encoder.reset(), None)
|
||||
self.assertEqual(encoder.encode('\u2606\u223c\u2606', True),
|
||||
'\xa1\xd9\xa1\xad\xa1\xd9')
|
||||
b'\xa1\xd9\xa1\xad\xa1\xd9')
|
||||
self.assertEqual(encoder.reset(), None)
|
||||
self.assertEqual(encoder.encode('', True), '')
|
||||
self.assertEqual(encoder.encode('', False), '')
|
||||
self.assertEqual(encoder.encode('', True), b'')
|
||||
self.assertEqual(encoder.encode('', False), b'')
|
||||
self.assertEqual(encoder.reset(), None)
|
||||
|
||||
def test_stateful(self):
|
||||
|
@ -75,29 +75,29 @@ class Test_IncrementalEncoder(unittest.TestCase):
|
|||
# U+0300 => ABDC
|
||||
|
||||
encoder = codecs.getincrementalencoder('jisx0213')()
|
||||
self.assertEqual(encoder.encode('\u00e6\u0300'), '\xab\xc4')
|
||||
self.assertEqual(encoder.encode('\u00e6'), '')
|
||||
self.assertEqual(encoder.encode('\u0300'), '\xab\xc4')
|
||||
self.assertEqual(encoder.encode('\u00e6', True), '\xa9\xdc')
|
||||
self.assertEqual(encoder.encode('\u00e6\u0300'), b'\xab\xc4')
|
||||
self.assertEqual(encoder.encode('\u00e6'), b'')
|
||||
self.assertEqual(encoder.encode('\u0300'), b'\xab\xc4')
|
||||
self.assertEqual(encoder.encode('\u00e6', True), b'\xa9\xdc')
|
||||
|
||||
self.assertEqual(encoder.reset(), None)
|
||||
self.assertEqual(encoder.encode('\u0300'), '\xab\xdc')
|
||||
self.assertEqual(encoder.encode('\u0300'), b'\xab\xdc')
|
||||
|
||||
self.assertEqual(encoder.encode('\u00e6'), '')
|
||||
self.assertEqual(encoder.encode('', True), '\xa9\xdc')
|
||||
self.assertEqual(encoder.encode('', True), '')
|
||||
self.assertEqual(encoder.encode('\u00e6'), b'')
|
||||
self.assertEqual(encoder.encode('', True), b'\xa9\xdc')
|
||||
self.assertEqual(encoder.encode('', True), b'')
|
||||
|
||||
def test_stateful_keep_buffer(self):
|
||||
encoder = codecs.getincrementalencoder('jisx0213')()
|
||||
self.assertEqual(encoder.encode('\u00e6'), '')
|
||||
self.assertEqual(encoder.encode('\u00e6'), b'')
|
||||
self.assertRaises(UnicodeEncodeError, encoder.encode, '\u0123')
|
||||
self.assertEqual(encoder.encode('\u0300\u00e6'), '\xab\xc4')
|
||||
self.assertEqual(encoder.encode('\u0300\u00e6'), b'\xab\xc4')
|
||||
self.assertRaises(UnicodeEncodeError, encoder.encode, '\u0123')
|
||||
self.assertEqual(encoder.reset(), None)
|
||||
self.assertEqual(encoder.encode('\u0300'), '\xab\xdc')
|
||||
self.assertEqual(encoder.encode('\u00e6'), '')
|
||||
self.assertEqual(encoder.encode('\u0300'), b'\xab\xdc')
|
||||
self.assertEqual(encoder.encode('\u00e6'), b'')
|
||||
self.assertRaises(UnicodeEncodeError, encoder.encode, '\u0123')
|
||||
self.assertEqual(encoder.encode('', True), '\xa9\xdc')
|
||||
self.assertEqual(encoder.encode('', True), b'\xa9\xdc')
|
||||
|
||||
|
||||
class Test_IncrementalDecoder(unittest.TestCase):
|
||||
|
@ -105,21 +105,21 @@ class Test_IncrementalDecoder(unittest.TestCase):
|
|||
def test_dbcs(self):
|
||||
# cp949 decoder is simple with only 1 or 2 bytes sequences.
|
||||
decoder = codecs.getincrementaldecoder('cp949')()
|
||||
self.assertEqual(decoder.decode('\xc6\xc4\xc0\xcc\xbd'),
|
||||
self.assertEqual(decoder.decode(b'\xc6\xc4\xc0\xcc\xbd'),
|
||||
'\ud30c\uc774')
|
||||
self.assertEqual(decoder.decode('\xe3 \xb8\xb6\xc0\xbb'),
|
||||
self.assertEqual(decoder.decode(b'\xe3 \xb8\xb6\xc0\xbb'),
|
||||
'\uc36c \ub9c8\uc744')
|
||||
self.assertEqual(decoder.decode(''), '')
|
||||
self.assertEqual(decoder.decode(b''), '')
|
||||
|
||||
def test_dbcs_keep_buffer(self):
|
||||
decoder = codecs.getincrementaldecoder('cp949')()
|
||||
self.assertEqual(decoder.decode('\xc6\xc4\xc0'), '\ud30c')
|
||||
self.assertEqual(decoder.decode(b'\xc6\xc4\xc0'), '\ud30c')
|
||||
self.assertRaises(UnicodeDecodeError, decoder.decode, '', True)
|
||||
self.assertEqual(decoder.decode('\xcc'), '\uc774')
|
||||
self.assertEqual(decoder.decode(b'\xcc'), '\uc774')
|
||||
|
||||
self.assertEqual(decoder.decode('\xc6\xc4\xc0'), '\ud30c')
|
||||
self.assertEqual(decoder.decode(b'\xc6\xc4\xc0'), '\ud30c')
|
||||
self.assertRaises(UnicodeDecodeError, decoder.decode, '\xcc\xbd', True)
|
||||
self.assertEqual(decoder.decode('\xcc'), '\uc774')
|
||||
self.assertEqual(decoder.decode(b'\xcc'), '\uc774')
|
||||
|
||||
def test_iso2022(self):
|
||||
decoder = codecs.getincrementaldecoder('iso2022-jp')()
|
||||
|
@ -140,61 +140,61 @@ class Test_IncrementalDecoder(unittest.TestCase):
|
|||
class Test_StreamWriter(unittest.TestCase):
|
||||
if len('\U00012345') == 2: # UCS2
|
||||
def test_gb18030(self):
|
||||
s= StringIO.StringIO()
|
||||
s= io.BytesIO()
|
||||
c = codecs.getwriter('gb18030')(s)
|
||||
c.write('123')
|
||||
self.assertEqual(s.getvalue(), '123')
|
||||
self.assertEqual(s.getvalue(), b'123')
|
||||
c.write('\U00012345')
|
||||
self.assertEqual(s.getvalue(), '123\x907\x959')
|
||||
self.assertEqual(s.getvalue(), b'123\x907\x959')
|
||||
c.write('\U00012345'[0])
|
||||
self.assertEqual(s.getvalue(), '123\x907\x959')
|
||||
self.assertEqual(s.getvalue(), b'123\x907\x959')
|
||||
c.write('\U00012345'[1] + '\U00012345' + '\uac00\u00ac')
|
||||
self.assertEqual(s.getvalue(),
|
||||
'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
|
||||
b'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
|
||||
c.write('\U00012345'[0])
|
||||
self.assertEqual(s.getvalue(),
|
||||
'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
|
||||
b'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
|
||||
self.assertRaises(UnicodeError, c.reset)
|
||||
self.assertEqual(s.getvalue(),
|
||||
'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
|
||||
b'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
|
||||
|
||||
def test_utf_8(self):
|
||||
s= StringIO.StringIO()
|
||||
s= io.BytesIO()
|
||||
c = codecs.getwriter('utf-8')(s)
|
||||
c.write('123')
|
||||
self.assertEqual(s.getvalue(), '123')
|
||||
self.assertEqual(s.getvalue(), b'123')
|
||||
c.write('\U00012345')
|
||||
self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
|
||||
self.assertEqual(s.getvalue(), b'123\xf0\x92\x8d\x85')
|
||||
|
||||
# Python utf-8 codec can't buffer surrogate pairs yet.
|
||||
if 0:
|
||||
c.write('\U00012345'[0])
|
||||
self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
|
||||
self.assertEqual(s.getvalue(), b'123\xf0\x92\x8d\x85')
|
||||
c.write('\U00012345'[1] + '\U00012345' + '\uac00\u00ac')
|
||||
self.assertEqual(s.getvalue(),
|
||||
'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
|
||||
'\xea\xb0\x80\xc2\xac')
|
||||
b'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
|
||||
b'\xea\xb0\x80\xc2\xac')
|
||||
c.write('\U00012345'[0])
|
||||
self.assertEqual(s.getvalue(),
|
||||
'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
|
||||
'\xea\xb0\x80\xc2\xac')
|
||||
b'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
|
||||
b'\xea\xb0\x80\xc2\xac')
|
||||
c.reset()
|
||||
self.assertEqual(s.getvalue(),
|
||||
'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
|
||||
'\xea\xb0\x80\xc2\xac\xed\xa0\x88')
|
||||
b'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
|
||||
b'\xea\xb0\x80\xc2\xac\xed\xa0\x88')
|
||||
c.write('\U00012345'[1])
|
||||
self.assertEqual(s.getvalue(),
|
||||
'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
|
||||
'\xea\xb0\x80\xc2\xac\xed\xa0\x88\xed\xbd\x85')
|
||||
b'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
|
||||
b'\xea\xb0\x80\xc2\xac\xed\xa0\x88\xed\xbd\x85')
|
||||
|
||||
else: # UCS4
|
||||
pass
|
||||
|
||||
def test_streamwriter_strwrite(self):
|
||||
s = StringIO.StringIO()
|
||||
s = io.BytesIO()
|
||||
wr = codecs.getwriter('gb18030')(s)
|
||||
wr.write('abcd')
|
||||
self.assertEqual(s.getvalue(), 'abcd')
|
||||
self.assertEqual(s.getvalue(), b'abcd')
|
||||
|
||||
class Test_ISO2022(unittest.TestCase):
|
||||
def test_g2(self):
|
||||
|
@ -203,10 +203,10 @@ class Test_ISO2022(unittest.TestCase):
|
|||
self.assertEqual(iso2022jp2.decode('iso2022-jp-2'), uni)
|
||||
|
||||
def test_iso2022_jp_g0(self):
|
||||
self.failIf('\x0e' in '\N{SOFT HYPHEN}'.encode('iso-2022-jp-2'))
|
||||
self.failIf(b'\x0e' in '\N{SOFT HYPHEN}'.encode('iso-2022-jp-2'))
|
||||
for encoding in ('iso-2022-jp-2004', 'iso-2022-jp-3'):
|
||||
e = '\u3406'.encode(encoding)
|
||||
self.failIf(filter(lambda x: x >= '\x80', e))
|
||||
self.failIf(any(x > 0x80 for x in e))
|
||||
|
||||
def test_bug1572832(self):
|
||||
if sys.maxunicode >= 0x10000:
|
||||
|
|
|
@ -7,12 +7,12 @@
|
|||
import sys, codecs, os.path
|
||||
import unittest
|
||||
from test import test_support
|
||||
from StringIO import StringIO
|
||||
from io import BytesIO
|
||||
|
||||
class TestBase:
|
||||
encoding = '' # codec name
|
||||
codec = None # codec tuple (with 4 elements)
|
||||
tstring = '' # string to test StreamReader
|
||||
tstring = None # must set. 2 strings to test StreamReader
|
||||
|
||||
codectests = None # must set. codec test tuple
|
||||
roundtriptest = 1 # set if roundtrip is possible with unicode
|
||||
|
@ -31,7 +31,7 @@ class TestBase:
|
|||
self.incrementaldecoder = self.codec.incrementaldecoder
|
||||
|
||||
def test_chunkcoding(self):
|
||||
for native, utf8 in zip(*[StringIO(f).readlines()
|
||||
for native, utf8 in zip(*[map(bytes, str8(f).splitlines(1))
|
||||
for f in self.tstring]):
|
||||
u = self.decode(native)[0]
|
||||
self.assertEqual(u, utf8.decode('utf-8'))
|
||||
|
@ -40,7 +40,7 @@ class TestBase:
|
|||
|
||||
def test_errorhandle(self):
|
||||
for source, scheme, expected in self.codectests:
|
||||
if type(source) == type(''):
|
||||
if isinstance(source, bytes):
|
||||
func = self.decode
|
||||
else:
|
||||
func = self.encode
|
||||
|
@ -57,7 +57,7 @@ class TestBase:
|
|||
s = "\u0b13\u0b23\u0b60 nd eggs"
|
||||
self.assertEqual(
|
||||
self.encode(s, "xmlcharrefreplace")[0],
|
||||
"ଓଣୠ nd eggs"
|
||||
b"ଓଣୠ nd eggs"
|
||||
)
|
||||
|
||||
def test_customreplace_encode(self):
|
||||
|
@ -83,7 +83,7 @@ class TestBase:
|
|||
sin, sout = self.xmlcharnametest
|
||||
else:
|
||||
sin = "\xab\u211c\xbb = \u2329\u1234\u232a"
|
||||
sout = "«ℜ» = ⟨ሴ⟩"
|
||||
sout = b"«ℜ» = ⟨ሴ⟩"
|
||||
self.assertEqual(self.encode(sin,
|
||||
"test.xmlcharnamereplace")[0], sout)
|
||||
|
||||
|
@ -92,7 +92,7 @@ class TestBase:
|
|||
return (ret, exc.end)
|
||||
codecs.register_error("test.cjktest", myreplace)
|
||||
|
||||
for ret in ([1, 2, 3], [], None, object(), 'string', ''):
|
||||
for ret in ([1, 2, 3], [], None, object(), b'string', b''):
|
||||
self.assertRaises(TypeError, self.encode, self.unmappedunicode,
|
||||
'test.cjktest')
|
||||
|
||||
|
@ -101,7 +101,7 @@ class TestBase:
|
|||
return ('x', int(exc.end))
|
||||
codecs.register_error("test.cjktest", myreplace)
|
||||
self.assertEqual(self.encode('abcd' + self.unmappedunicode + 'efgh',
|
||||
'test.cjktest'), ('abcdxefgh', 9))
|
||||
'test.cjktest'), (b'abcdxefgh', 9))
|
||||
|
||||
def myreplace(exc):
|
||||
return ('x', sys.maxint + 1)
|
||||
|
@ -127,14 +127,14 @@ class TestBase:
|
|||
codecs.register_error("test.cjktest", myreplace)
|
||||
self.assertEqual(self.encode('abcd' + self.unmappedunicode + 'efgh',
|
||||
'test.cjktest'),
|
||||
('abcdREPLACEDabcdREPLACEDabcdREPLACEDabcdTERMINALefgh', 9))
|
||||
(b'abcdREPLACEDabcdREPLACEDabcdREPLACEDabcdTERMINALefgh', 9))
|
||||
|
||||
def test_callback_forward_index(self):
|
||||
def myreplace(exc):
|
||||
return ('REPLACED', exc.end + 2)
|
||||
codecs.register_error("test.cjktest", myreplace)
|
||||
self.assertEqual(self.encode('abcd' + self.unmappedunicode + 'efgh',
|
||||
'test.cjktest'), ('abcdREPLACEDgh', 9))
|
||||
'test.cjktest'), (b'abcdREPLACEDgh', 9))
|
||||
|
||||
def test_callback_index_outofbound(self):
|
||||
def myreplace(exc):
|
||||
|
@ -147,8 +147,8 @@ class TestBase:
|
|||
UTF8Reader = codecs.getreader('utf-8')
|
||||
for sizehint in [None] + list(range(1, 33)) + \
|
||||
[64, 128, 256, 512, 1024]:
|
||||
istream = UTF8Reader(StringIO(self.tstring[1]))
|
||||
ostream = StringIO()
|
||||
istream = UTF8Reader(BytesIO(self.tstring[1]))
|
||||
ostream = BytesIO()
|
||||
encoder = self.incrementalencoder()
|
||||
while 1:
|
||||
if sizehint is not None:
|
||||
|
@ -167,8 +167,8 @@ class TestBase:
|
|||
UTF8Writer = codecs.getwriter('utf-8')
|
||||
for sizehint in [None, -1] + list(range(1, 33)) + \
|
||||
[64, 128, 256, 512, 1024]:
|
||||
istream = StringIO(self.tstring[0])
|
||||
ostream = UTF8Writer(StringIO())
|
||||
istream = BytesIO(self.tstring[0])
|
||||
ostream = UTF8Writer(BytesIO())
|
||||
decoder = self.incrementaldecoder()
|
||||
while 1:
|
||||
data = istream.read(sizehint)
|
||||
|
@ -187,26 +187,26 @@ class TestBase:
|
|||
self.assertRaises(UnicodeEncodeError, e.encode, inv, True)
|
||||
|
||||
e.errors = 'ignore'
|
||||
self.assertEqual(e.encode(inv, True), '')
|
||||
self.assertEqual(e.encode(inv, True), b'')
|
||||
|
||||
e.reset()
|
||||
def tempreplace(exc):
|
||||
return ('called', exc.end)
|
||||
codecs.register_error('test.incremental_error_callback', tempreplace)
|
||||
e.errors = 'test.incremental_error_callback'
|
||||
self.assertEqual(e.encode(inv, True), 'called')
|
||||
self.assertEqual(e.encode(inv, True), b'called')
|
||||
|
||||
# again
|
||||
e.errors = 'ignore'
|
||||
self.assertEqual(e.encode(inv, True), '')
|
||||
self.assertEqual(e.encode(inv, True), b'')
|
||||
|
||||
def test_streamreader(self):
|
||||
UTF8Writer = codecs.getwriter('utf-8')
|
||||
for name in ["read", "readline", "readlines"]:
|
||||
for sizehint in [None, -1] + list(range(1, 33)) + \
|
||||
[64, 128, 256, 512, 1024]:
|
||||
istream = self.reader(StringIO(self.tstring[0]))
|
||||
ostream = UTF8Writer(StringIO())
|
||||
istream = self.reader(BytesIO(self.tstring[0]))
|
||||
ostream = UTF8Writer(BytesIO())
|
||||
func = getattr(istream, name)
|
||||
while 1:
|
||||
data = func(sizehint)
|
||||
|
@ -225,8 +225,8 @@ class TestBase:
|
|||
for name in readfuncs:
|
||||
for sizehint in [None] + list(range(1, 33)) + \
|
||||
[64, 128, 256, 512, 1024]:
|
||||
istream = UTF8Reader(StringIO(self.tstring[1]))
|
||||
ostream = self.writer(StringIO())
|
||||
istream = UTF8Reader(BytesIO(self.tstring[1]))
|
||||
ostream = self.writer(BytesIO())
|
||||
func = getattr(istream, name)
|
||||
while 1:
|
||||
if sizehint is not None:
|
||||
|
|
|
@ -138,6 +138,11 @@ codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
|
|||
{
|
||||
PyObject *cb;
|
||||
|
||||
if (PyUnicode_Check(value)) {
|
||||
value = _PyUnicode_AsDefaultEncodedString(value, NULL);
|
||||
if (value == NULL)
|
||||
return -1;
|
||||
}
|
||||
if (!PyString_Check(value)) {
|
||||
PyErr_SetString(PyExc_TypeError, "errors must be a string");
|
||||
return -1;
|
||||
|
@ -322,11 +327,11 @@ multibytecodec_encerror(MultibyteCodec *codec,
|
|||
goto errorexit;
|
||||
}
|
||||
|
||||
assert(PyString_Check(retstr));
|
||||
retstrsize = PyString_GET_SIZE(retstr);
|
||||
assert(PyBytes_Check(retstr));
|
||||
retstrsize = PyBytes_GET_SIZE(retstr);
|
||||
REQUIRE_ENCODEBUFFER(buf, retstrsize);
|
||||
|
||||
memcpy(buf->outbuf, PyString_AS_STRING(retstr), retstrsize);
|
||||
memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize);
|
||||
buf->outbuf += retstrsize;
|
||||
|
||||
newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
|
||||
|
@ -1224,10 +1229,18 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self,
|
|||
if (cres == NULL)
|
||||
goto errorexit;
|
||||
|
||||
if (PyString_Check(cres)) {
|
||||
PyObject *cres2 = PyBytes_FromObject(cres);
|
||||
if (cres2 == NULL)
|
||||
return NULL;
|
||||
Py_DECREF(cres);
|
||||
cres = cres2;
|
||||
}
|
||||
|
||||
if (!PyBytes_Check(cres)) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"stream function returned a "
|
||||
"non-string object (%.100s)",
|
||||
"non-bytes object (%.100s)",
|
||||
cres->ob_type->tp_name);
|
||||
goto errorexit;
|
||||
}
|
||||
|
@ -1596,8 +1609,8 @@ mbstreamwriter_reset(MultibyteStreamWriterObject *self)
|
|||
if (pwrt == NULL)
|
||||
return NULL;
|
||||
|
||||
assert(PyString_Check(pwrt));
|
||||
if (PyString_Size(pwrt) > 0) {
|
||||
assert(PyBytes_Check(pwrt));
|
||||
if (PyBytes_Size(pwrt) > 0) {
|
||||
PyObject *wr;
|
||||
wr = PyObject_CallMethod(self->stream, "write", "O", pwrt);
|
||||
if (wr == NULL) {
|
||||
|
|
Loading…
Reference in New Issue