mirror of https://github.com/python/cpython
Issue1395: Universal mode used to duplicate newlines when using read(1).
"Universal newline" is now an incremental decoder wrapping the initial one, with its own additional buffer (if '\r' is seen at the end of the input). A decoder allows the tell() funtion to record the state of the translation. This also simplifies the readline() process. Now test_netrc passes on Windows, as well as many new tests in test_io.py
This commit is contained in:
parent
74c29c71b1
commit
1ff9910f59
205
Lib/io.py
205
Lib/io.py
|
@ -1041,6 +1041,84 @@ class TextIOBase(IOBase):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
|
||||||
|
"""Codec used when reading a file in universal newlines mode.
|
||||||
|
It wraps another incremental decoder, translating \\r\\n and \\r into \\n.
|
||||||
|
It also records the types of newlines encountered.
|
||||||
|
When used with translate=False, it ensures that the newline sequence is
|
||||||
|
returned in one piece.
|
||||||
|
"""
|
||||||
|
def __init__(self, decoder, translate, errors='strict'):
|
||||||
|
codecs.IncrementalDecoder.__init__(self, errors=errors)
|
||||||
|
self.buffer = b''
|
||||||
|
self.translate = translate
|
||||||
|
self.decoder = decoder
|
||||||
|
self.seennl = 0
|
||||||
|
|
||||||
|
def decode(self, input, final=False):
|
||||||
|
# decode input (with the eventual \r from a previous pass)
|
||||||
|
if self.buffer:
|
||||||
|
input = self.buffer + input
|
||||||
|
|
||||||
|
output = self.decoder.decode(input, final=final)
|
||||||
|
|
||||||
|
# retain last \r even when not translating data:
|
||||||
|
# then readline() is sure to get \r\n in one pass
|
||||||
|
if output.endswith("\r") and not final:
|
||||||
|
output = output[:-1]
|
||||||
|
self.buffer = b'\r'
|
||||||
|
else:
|
||||||
|
self.buffer = b''
|
||||||
|
|
||||||
|
# Record which newlines are read
|
||||||
|
crlf = output.count('\r\n')
|
||||||
|
cr = output.count('\r') - crlf
|
||||||
|
lf = output.count('\n') - crlf
|
||||||
|
self.seennl |= (lf and self._LF) | (cr and self._CR) \
|
||||||
|
| (crlf and self._CRLF)
|
||||||
|
|
||||||
|
if self.translate:
|
||||||
|
if crlf:
|
||||||
|
output = output.replace("\r\n", "\n")
|
||||||
|
if cr:
|
||||||
|
output = output.replace("\r", "\n")
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
def getstate(self):
|
||||||
|
buf, flag = self.decoder.getstate()
|
||||||
|
return buf + self.buffer, flag
|
||||||
|
|
||||||
|
def setstate(self, state):
|
||||||
|
buf, flag = state
|
||||||
|
if buf.endswith(b'\r'):
|
||||||
|
self.buffer = b'\r'
|
||||||
|
buf = buf[:-1]
|
||||||
|
else:
|
||||||
|
self.buffer = b''
|
||||||
|
self.decoder.setstate((buf, flag))
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.buffer = b''
|
||||||
|
self.decoder.reset()
|
||||||
|
|
||||||
|
_LF = 1
|
||||||
|
_CR = 2
|
||||||
|
_CRLF = 4
|
||||||
|
|
||||||
|
@property
|
||||||
|
def newlines(self):
|
||||||
|
return (None,
|
||||||
|
"\n",
|
||||||
|
"\r",
|
||||||
|
("\r", "\n"),
|
||||||
|
"\r\n",
|
||||||
|
("\n", "\r\n"),
|
||||||
|
("\r", "\r\n"),
|
||||||
|
("\r", "\n", "\r\n")
|
||||||
|
)[self.seennl]
|
||||||
|
|
||||||
|
|
||||||
class TextIOWrapper(TextIOBase):
|
class TextIOWrapper(TextIOBase):
|
||||||
|
|
||||||
"""Buffered text stream.
|
"""Buffered text stream.
|
||||||
|
@ -1077,7 +1155,6 @@ class TextIOWrapper(TextIOBase):
|
||||||
self._readnl = newline
|
self._readnl = newline
|
||||||
self._writetranslate = newline != ''
|
self._writetranslate = newline != ''
|
||||||
self._writenl = newline or os.linesep
|
self._writenl = newline or os.linesep
|
||||||
self._seennl = 0
|
|
||||||
self._decoder = None
|
self._decoder = None
|
||||||
self._pending = ""
|
self._pending = ""
|
||||||
self._snapshot = None
|
self._snapshot = None
|
||||||
|
@ -1124,6 +1201,7 @@ class TextIOWrapper(TextIOBase):
|
||||||
if not isinstance(s, str):
|
if not isinstance(s, str):
|
||||||
raise TypeError("can't write %s to text stream" %
|
raise TypeError("can't write %s to text stream" %
|
||||||
s.__class__.__name__)
|
s.__class__.__name__)
|
||||||
|
length = len(s)
|
||||||
haslf = "\n" in s
|
haslf = "\n" in s
|
||||||
if haslf and self._writetranslate and self._writenl != "\n":
|
if haslf and self._writetranslate and self._writenl != "\n":
|
||||||
s = s.replace("\n", self._writenl)
|
s = s.replace("\n", self._writenl)
|
||||||
|
@ -1132,15 +1210,20 @@ class TextIOWrapper(TextIOBase):
|
||||||
self.buffer.write(b)
|
self.buffer.write(b)
|
||||||
if haslf and self.isatty():
|
if haslf and self.isatty():
|
||||||
self.flush()
|
self.flush()
|
||||||
self._snapshot = self._decoder = None
|
self._snapshot = None
|
||||||
return len(s)
|
if self._decoder:
|
||||||
|
self._decoder.reset()
|
||||||
|
return length
|
||||||
|
|
||||||
def _get_decoder(self):
|
def _get_decoder(self):
|
||||||
make_decoder = codecs.getincrementaldecoder(self._encoding)
|
make_decoder = codecs.getincrementaldecoder(self._encoding)
|
||||||
if make_decoder is None:
|
if make_decoder is None:
|
||||||
raise IOError("Can't find an incremental decoder for encoding %s" %
|
raise IOError("Can't find an incremental decoder for encoding %s" %
|
||||||
self._encoding)
|
self._encoding)
|
||||||
decoder = self._decoder = make_decoder() # XXX: errors
|
decoder = make_decoder() # XXX: errors
|
||||||
|
if self._readuniversal:
|
||||||
|
decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
|
||||||
|
self._decoder = decoder
|
||||||
return decoder
|
return decoder
|
||||||
|
|
||||||
def _read_chunk(self):
|
def _read_chunk(self):
|
||||||
|
@ -1220,7 +1303,8 @@ class TextIOWrapper(TextIOBase):
|
||||||
pos = self.buffer.seek(0, 2)
|
pos = self.buffer.seek(0, 2)
|
||||||
self._snapshot = None
|
self._snapshot = None
|
||||||
self._pending = ""
|
self._pending = ""
|
||||||
self._decoder = None
|
if self._decoder:
|
||||||
|
self._decoder.reset()
|
||||||
return pos
|
return pos
|
||||||
if whence != 0:
|
if whence != 0:
|
||||||
raise ValueError("Invalid whence (%r, should be 0, 1 or 2)" %
|
raise ValueError("Invalid whence (%r, should be 0, 1 or 2)" %
|
||||||
|
@ -1234,7 +1318,8 @@ class TextIOWrapper(TextIOBase):
|
||||||
self.buffer.seek(pos)
|
self.buffer.seek(pos)
|
||||||
self._snapshot = None
|
self._snapshot = None
|
||||||
self._pending = ""
|
self._pending = ""
|
||||||
self._decoder = None
|
if self._decoder:
|
||||||
|
self._decoder.reset()
|
||||||
return pos
|
return pos
|
||||||
decoder = self._decoder or self._get_decoder()
|
decoder = self._decoder or self._get_decoder()
|
||||||
decoder.set_state(("", ds))
|
decoder.set_state(("", ds))
|
||||||
|
@ -1253,7 +1338,7 @@ class TextIOWrapper(TextIOBase):
|
||||||
res += decoder.decode(self.buffer.read(), True)
|
res += decoder.decode(self.buffer.read(), True)
|
||||||
self._pending = ""
|
self._pending = ""
|
||||||
self._snapshot = None
|
self._snapshot = None
|
||||||
return self._replacenl(res)
|
return res
|
||||||
else:
|
else:
|
||||||
while len(res) < n:
|
while len(res) < n:
|
||||||
readahead, pending = self._read_chunk()
|
readahead, pending = self._read_chunk()
|
||||||
|
@ -1261,7 +1346,7 @@ class TextIOWrapper(TextIOBase):
|
||||||
if not readahead:
|
if not readahead:
|
||||||
break
|
break
|
||||||
self._pending = res[n:]
|
self._pending = res[n:]
|
||||||
return self._replacenl(res[:n])
|
return res[:n]
|
||||||
|
|
||||||
def __next__(self):
|
def __next__(self):
|
||||||
self._telling = False
|
self._telling = False
|
||||||
|
@ -1285,62 +1370,55 @@ class TextIOWrapper(TextIOBase):
|
||||||
|
|
||||||
line = self._pending
|
line = self._pending
|
||||||
start = 0
|
start = 0
|
||||||
cr_eof = False
|
|
||||||
decoder = self._decoder or self._get_decoder()
|
decoder = self._decoder or self._get_decoder()
|
||||||
|
|
||||||
pos = endpos = None
|
pos = endpos = None
|
||||||
ending = None
|
|
||||||
while True:
|
while True:
|
||||||
if self._readuniversal:
|
if self._readtranslate:
|
||||||
|
# Newlines are already translated, only search for \n
|
||||||
|
pos = line.find('\n', start)
|
||||||
|
if pos >= 0:
|
||||||
|
endpos = pos + 1
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
start = len(line)
|
||||||
|
|
||||||
|
elif self._readuniversal:
|
||||||
# Universal newline search. Find any of \r, \r\n, \n
|
# Universal newline search. Find any of \r, \r\n, \n
|
||||||
|
# The decoder ensures that \r\n are not split in two pieces
|
||||||
|
|
||||||
# In C we'd look for these in parallel of course.
|
# In C we'd look for these in parallel of course.
|
||||||
nlpos = line.find("\n", start)
|
nlpos = line.find("\n", start)
|
||||||
crpos = line.find("\r", start)
|
crpos = line.find("\r", start)
|
||||||
if crpos == -1:
|
if crpos == -1:
|
||||||
if nlpos == -1:
|
if nlpos == -1:
|
||||||
|
# Nothing found
|
||||||
start = len(line)
|
start = len(line)
|
||||||
else:
|
else:
|
||||||
# Found \n
|
# Found \n
|
||||||
pos = nlpos
|
endpos = nlpos + 1
|
||||||
endpos = pos + 1
|
|
||||||
ending = self._LF
|
|
||||||
break
|
break
|
||||||
elif nlpos == -1:
|
elif nlpos == -1:
|
||||||
if crpos == len(line) - 1:
|
# Found lone \r
|
||||||
# Found \r at end of buffer, must keep reading
|
endpos = crpos + 1
|
||||||
start = crpos
|
break
|
||||||
cr_eof = True
|
|
||||||
else:
|
|
||||||
# Found lone \r
|
|
||||||
ending = self._CR
|
|
||||||
pos = crpos
|
|
||||||
endpos = pos + 1
|
|
||||||
break
|
|
||||||
elif nlpos < crpos:
|
elif nlpos < crpos:
|
||||||
# Found \n
|
# Found \n
|
||||||
pos = nlpos
|
endpos = nlpos + 1
|
||||||
endpos = pos + 1
|
|
||||||
ending = self._LF
|
|
||||||
break
|
break
|
||||||
elif nlpos == crpos + 1:
|
elif nlpos == crpos + 1:
|
||||||
# Found \r\n
|
# Found \r\n
|
||||||
ending = self._CRLF
|
endpos = crpos + 2
|
||||||
pos = crpos
|
|
||||||
endpos = pos + 2
|
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
# Found \r
|
# Found \r
|
||||||
pos = crpos
|
endpos = crpos + 1
|
||||||
endpos = pos + 1
|
|
||||||
ending = self._CR
|
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
# non-universal
|
# non-universal
|
||||||
pos = line.find(self._readnl)
|
pos = line.find(self._readnl)
|
||||||
if pos >= 0:
|
if pos >= 0:
|
||||||
endpos = pos+len(self._readnl)
|
endpos = pos + len(self._readnl)
|
||||||
ending = self._nlflag(self._readnl)
|
|
||||||
break
|
break
|
||||||
|
|
||||||
# No line ending seen yet - get more data
|
# No line ending seen yet - get more data
|
||||||
|
@ -1356,65 +1434,14 @@ class TextIOWrapper(TextIOBase):
|
||||||
# end of file
|
# end of file
|
||||||
self._pending = ''
|
self._pending = ''
|
||||||
self._snapshot = None
|
self._snapshot = None
|
||||||
if cr_eof:
|
return line
|
||||||
self._seennl |= self._CR
|
|
||||||
return line[:-1] + '\n'
|
|
||||||
else:
|
|
||||||
return line
|
|
||||||
|
|
||||||
self._pending = line[endpos:]
|
self._pending = line[endpos:]
|
||||||
if self._readtranslate:
|
return line[:endpos]
|
||||||
self._seennl |= ending
|
|
||||||
if ending != self._LF:
|
|
||||||
return line[:pos] + '\n'
|
|
||||||
else:
|
|
||||||
return line[:endpos]
|
|
||||||
else:
|
|
||||||
return line[:endpos]
|
|
||||||
|
|
||||||
def _replacenl(self, data):
|
|
||||||
# Replace newlines in data as needed and record that they have
|
|
||||||
# been seen.
|
|
||||||
if not self._readtranslate:
|
|
||||||
return data
|
|
||||||
if self._readuniversal:
|
|
||||||
crlf = data.count('\r\n')
|
|
||||||
cr = data.count('\r') - crlf
|
|
||||||
lf = data.count('\n') - crlf
|
|
||||||
self._seennl |= (lf and self._LF) | (cr and self._CR) \
|
|
||||||
| (crlf and self._CRLF)
|
|
||||||
if crlf:
|
|
||||||
data = data.replace("\r\n", "\n")
|
|
||||||
if cr:
|
|
||||||
data = data.replace("\r", "\n")
|
|
||||||
elif self._readnl == '\n':
|
|
||||||
# Only need to detect if \n was seen.
|
|
||||||
if data.count('\n'):
|
|
||||||
self._seennl |= self._LF
|
|
||||||
else:
|
|
||||||
newdata = data.replace(self._readnl, '\n')
|
|
||||||
if newdata is not data:
|
|
||||||
self._seennl |= self._nlflag(self._readnl)
|
|
||||||
data = newdata
|
|
||||||
return data
|
|
||||||
|
|
||||||
_LF = 1
|
|
||||||
_CR = 2
|
|
||||||
_CRLF = 4
|
|
||||||
@property
|
@property
|
||||||
def newlines(self):
|
def newlines(self):
|
||||||
return (None,
|
return self._decoder.newlines if self._decoder else None
|
||||||
"\n",
|
|
||||||
"\r",
|
|
||||||
("\r", "\n"),
|
|
||||||
"\r\n",
|
|
||||||
("\n", "\r\n"),
|
|
||||||
("\r", "\r\n"),
|
|
||||||
("\r", "\n", "\r\n")
|
|
||||||
)[self._seennl]
|
|
||||||
|
|
||||||
def _nlflag(self, nlstr):
|
|
||||||
return [None, "\n", "\r", None, "\r\n"].index(nlstr)
|
|
||||||
|
|
||||||
class StringIO(TextIOWrapper):
|
class StringIO(TextIOWrapper):
|
||||||
|
|
||||||
|
|
|
@ -489,6 +489,10 @@ class BufferedRandomTest(unittest.TestCase):
|
||||||
|
|
||||||
class TextIOWrapperTest(unittest.TestCase):
|
class TextIOWrapperTest(unittest.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.testdata = b"AAA\r\nBBB\rCCC\r\nDDD\nEEE\r\n"
|
||||||
|
self.normalized = b"AAA\nBBB\nCCC\nDDD\nEEE\n".decode("ascii")
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
test_support.unlink(test_support.TESTFN)
|
test_support.unlink(test_support.TESTFN)
|
||||||
|
|
||||||
|
@ -496,14 +500,14 @@ class TextIOWrapperTest(unittest.TestCase):
|
||||||
testdata = b"AAA\nBBB\nCCC\rDDD\rEEE\r\nFFF\r\nGGG"
|
testdata = b"AAA\nBBB\nCCC\rDDD\rEEE\r\nFFF\r\nGGG"
|
||||||
normalized = testdata.replace(b"\r\n", b"\n").replace(b"\r", b"\n")
|
normalized = testdata.replace(b"\r\n", b"\n").replace(b"\r", b"\n")
|
||||||
for newline, expected in [
|
for newline, expected in [
|
||||||
(None, normalized.decode("ASCII").splitlines(True)),
|
(None, normalized.decode("ascii").splitlines(True)),
|
||||||
("", testdata.decode("ASCII").splitlines(True)),
|
("", testdata.decode("ascii").splitlines(True)),
|
||||||
("\n", ["AAA\n", "BBB\n", "CCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]),
|
("\n", ["AAA\n", "BBB\n", "CCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]),
|
||||||
("\r\n", ["AAA\nBBB\nCCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]),
|
("\r\n", ["AAA\nBBB\nCCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]),
|
||||||
("\r", ["AAA\nBBB\nCCC\r", "DDD\r", "EEE\r", "\nFFF\r", "\nGGG"]),
|
("\r", ["AAA\nBBB\nCCC\r", "DDD\r", "EEE\r", "\nFFF\r", "\nGGG"]),
|
||||||
]:
|
]:
|
||||||
buf = io.BytesIO(testdata)
|
buf = io.BytesIO(testdata)
|
||||||
txt = io.TextIOWrapper(buf, encoding="ASCII", newline=newline)
|
txt = io.TextIOWrapper(buf, encoding="ascii", newline=newline)
|
||||||
self.assertEquals(txt.readlines(), expected)
|
self.assertEquals(txt.readlines(), expected)
|
||||||
txt.seek(0)
|
txt.seek(0)
|
||||||
self.assertEquals(txt.read(), "".join(expected))
|
self.assertEquals(txt.read(), "".join(expected))
|
||||||
|
@ -518,7 +522,7 @@ class TextIOWrapperTest(unittest.TestCase):
|
||||||
tests = [(None, testdict[os.linesep])] + sorted(testdict.items())
|
tests = [(None, testdict[os.linesep])] + sorted(testdict.items())
|
||||||
for newline, expected in tests:
|
for newline, expected in tests:
|
||||||
buf = io.BytesIO()
|
buf = io.BytesIO()
|
||||||
txt = io.TextIOWrapper(buf, encoding="ASCII", newline=newline)
|
txt = io.TextIOWrapper(buf, encoding="ascii", newline=newline)
|
||||||
txt.write("AAA\nB")
|
txt.write("AAA\nB")
|
||||||
txt.write("BB\nCCC\n")
|
txt.write("BB\nCCC\n")
|
||||||
txt.write("X\rY\r\nZ")
|
txt.write("X\rY\r\nZ")
|
||||||
|
@ -568,14 +572,14 @@ class TextIOWrapperTest(unittest.TestCase):
|
||||||
testdata = b"AAA\nBBB\nCCC\rDDD\rEEE\r\nFFF\r\nGGG"
|
testdata = b"AAA\nBBB\nCCC\rDDD\rEEE\r\nFFF\r\nGGG"
|
||||||
normalized = testdata.replace(b"\r\n", b"\n").replace(b"\r", b"\n")
|
normalized = testdata.replace(b"\r\n", b"\n").replace(b"\r", b"\n")
|
||||||
for newline, expected in [
|
for newline, expected in [
|
||||||
(None, normalized.decode("ASCII").splitlines(True)),
|
(None, normalized.decode("ascii").splitlines(True)),
|
||||||
("", testdata.decode("ASCII").splitlines(True)),
|
("", testdata.decode("ascii").splitlines(True)),
|
||||||
("\n", ["AAA\n", "BBB\n", "CCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]),
|
("\n", ["AAA\n", "BBB\n", "CCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]),
|
||||||
("\r\n", ["AAA\nBBB\nCCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]),
|
("\r\n", ["AAA\nBBB\nCCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]),
|
||||||
("\r", ["AAA\nBBB\nCCC\r", "DDD\r", "EEE\r", "\nFFF\r", "\nGGG"]),
|
("\r", ["AAA\nBBB\nCCC\r", "DDD\r", "EEE\r", "\nFFF\r", "\nGGG"]),
|
||||||
]:
|
]:
|
||||||
buf = io.BytesIO(testdata)
|
buf = io.BytesIO(testdata)
|
||||||
txt = io.TextIOWrapper(buf, encoding="ASCII", newline=newline)
|
txt = io.TextIOWrapper(buf, encoding="ascii", newline=newline)
|
||||||
self.assertEquals(txt.readlines(), expected)
|
self.assertEquals(txt.readlines(), expected)
|
||||||
txt.seek(0)
|
txt.seek(0)
|
||||||
self.assertEquals(txt.read(), "".join(expected))
|
self.assertEquals(txt.read(), "".join(expected))
|
||||||
|
@ -600,7 +604,7 @@ class TextIOWrapperTest(unittest.TestCase):
|
||||||
("\r\n", "\r\n", data_crlf),
|
("\r\n", "\r\n", data_crlf),
|
||||||
]:
|
]:
|
||||||
buf = io.BytesIO()
|
buf = io.BytesIO()
|
||||||
txt = io.TextIOWrapper(buf, encoding="ASCII", newline=newline)
|
txt = io.TextIOWrapper(buf, encoding="ascii", newline=newline)
|
||||||
txt.write(data)
|
txt.write(data)
|
||||||
txt.close()
|
txt.close()
|
||||||
self.assertEquals(buf.getvalue(), expected)
|
self.assertEquals(buf.getvalue(), expected)
|
||||||
|
@ -745,6 +749,114 @@ class TextIOWrapperTest(unittest.TestCase):
|
||||||
print("Reading using readline(): %6.3f seconds" % (t3-t2))
|
print("Reading using readline(): %6.3f seconds" % (t3-t2))
|
||||||
print("Using readline()+tell(): %6.3f seconds" % (t4-t3))
|
print("Using readline()+tell(): %6.3f seconds" % (t4-t3))
|
||||||
|
|
||||||
|
def testReadOneByOne(self):
|
||||||
|
txt = io.TextIOWrapper(io.BytesIO(b"AA\r\nBB"))
|
||||||
|
reads = ""
|
||||||
|
while True:
|
||||||
|
c = txt.read(1)
|
||||||
|
if not c:
|
||||||
|
break
|
||||||
|
reads += c
|
||||||
|
self.assertEquals(reads, "AA\nBB")
|
||||||
|
|
||||||
|
# read in amounts equal to TextIOWrapper._CHUNK_SIZE which is 128.
|
||||||
|
def testReadByChunk(self):
|
||||||
|
# make sure "\r\n" straddles 128 char boundary.
|
||||||
|
txt = io.TextIOWrapper(io.BytesIO(b"A" * 127 + b"\r\nB"))
|
||||||
|
reads = ""
|
||||||
|
while True:
|
||||||
|
c = txt.read(128)
|
||||||
|
if not c:
|
||||||
|
break
|
||||||
|
reads += c
|
||||||
|
self.assertEquals(reads, "A"*127+"\nB")
|
||||||
|
|
||||||
|
def test_issue1395_1(self):
|
||||||
|
txt = io.TextIOWrapper(io.BytesIO(self.testdata), encoding="ascii")
|
||||||
|
|
||||||
|
# read one char at a time
|
||||||
|
reads = ""
|
||||||
|
while True:
|
||||||
|
c = txt.read(1)
|
||||||
|
if not c:
|
||||||
|
break
|
||||||
|
reads += c
|
||||||
|
self.assertEquals(reads, self.normalized)
|
||||||
|
|
||||||
|
def test_issue1395_2(self):
|
||||||
|
txt = io.TextIOWrapper(io.BytesIO(self.testdata), encoding="ascii")
|
||||||
|
txt._CHUNK_SIZE = 4
|
||||||
|
|
||||||
|
reads = ""
|
||||||
|
while True:
|
||||||
|
c = txt.read(4)
|
||||||
|
if not c:
|
||||||
|
break
|
||||||
|
reads += c
|
||||||
|
self.assertEquals(reads, self.normalized)
|
||||||
|
|
||||||
|
def test_issue1395_3(self):
|
||||||
|
txt = io.TextIOWrapper(io.BytesIO(self.testdata), encoding="ascii")
|
||||||
|
txt._CHUNK_SIZE = 4
|
||||||
|
|
||||||
|
reads = txt.read(4)
|
||||||
|
reads += txt.read(4)
|
||||||
|
reads += txt.readline()
|
||||||
|
reads += txt.readline()
|
||||||
|
reads += txt.readline()
|
||||||
|
self.assertEquals(reads, self.normalized)
|
||||||
|
|
||||||
|
def test_issue1395_4(self):
|
||||||
|
txt = io.TextIOWrapper(io.BytesIO(self.testdata), encoding="ascii")
|
||||||
|
txt._CHUNK_SIZE = 4
|
||||||
|
|
||||||
|
reads = txt.read(4)
|
||||||
|
reads += txt.read()
|
||||||
|
self.assertEquals(reads, self.normalized)
|
||||||
|
|
||||||
|
def test_issue1395_5(self):
|
||||||
|
txt = io.TextIOWrapper(io.BytesIO(self.testdata), encoding="ascii")
|
||||||
|
txt._CHUNK_SIZE = 4
|
||||||
|
|
||||||
|
reads = txt.read(4)
|
||||||
|
pos = txt.tell()
|
||||||
|
txt.seek(0)
|
||||||
|
txt.seek(pos)
|
||||||
|
self.assertEquals(txt.read(4), "BBB\n")
|
||||||
|
|
||||||
|
def test_newline_decoder(self):
|
||||||
|
import codecs
|
||||||
|
decoder = codecs.getincrementaldecoder("utf-8")()
|
||||||
|
decoder = io.IncrementalNewlineDecoder(decoder, translate=True)
|
||||||
|
|
||||||
|
self.assertEquals(decoder.decode(b'\xe8\xa2\x88'), "\u8888")
|
||||||
|
|
||||||
|
self.assertEquals(decoder.decode(b'\xe8'), "")
|
||||||
|
self.assertEquals(decoder.decode(b'\xa2'), "")
|
||||||
|
self.assertEquals(decoder.decode(b'\x88'), "\u8888")
|
||||||
|
|
||||||
|
self.assertEquals(decoder.decode(b'\xe8'), "")
|
||||||
|
self.assertRaises(UnicodeDecodeError, decoder.decode, b'', final=True)
|
||||||
|
|
||||||
|
decoder.setstate((b'', 0))
|
||||||
|
self.assertEquals(decoder.decode(b'\n'), "\n")
|
||||||
|
self.assertEquals(decoder.decode(b'\r'), "")
|
||||||
|
self.assertEquals(decoder.decode(b'', final=True), "\n")
|
||||||
|
self.assertEquals(decoder.decode(b'\r', final=True), "\n")
|
||||||
|
|
||||||
|
self.assertEquals(decoder.decode(b'\r'), "")
|
||||||
|
self.assertEquals(decoder.decode(b'a'), "\na")
|
||||||
|
|
||||||
|
self.assertEquals(decoder.decode(b'\r\r\n'), "\n\n")
|
||||||
|
self.assertEquals(decoder.decode(b'\r'), "")
|
||||||
|
self.assertEquals(decoder.decode(b'\r'), "\n")
|
||||||
|
self.assertEquals(decoder.decode(b'\na'), "\na")
|
||||||
|
|
||||||
|
self.assertEquals(decoder.decode(b'\xe8\xa2\x88\r\n'), "\u8888\n")
|
||||||
|
self.assertEquals(decoder.decode(b'\xe8\xa2\x88'), "\u8888")
|
||||||
|
self.assertEquals(decoder.decode(b'\n'), "\n")
|
||||||
|
self.assertEquals(decoder.decode(b'\xe8\xa2\x88\r'), "\u8888")
|
||||||
|
self.assertEquals(decoder.decode(b'\n'), "\n")
|
||||||
|
|
||||||
# XXX Tests for open()
|
# XXX Tests for open()
|
||||||
|
|
||||||
|
|
|
@ -867,7 +867,7 @@ static PyGetSetDef fileio_getsetlist[] = {
|
||||||
|
|
||||||
PyTypeObject PyFileIO_Type = {
|
PyTypeObject PyFileIO_Type = {
|
||||||
PyVarObject_HEAD_INIT(&PyType_Type, 0)
|
PyVarObject_HEAD_INIT(&PyType_Type, 0)
|
||||||
"FileIO",
|
"_FileIO",
|
||||||
sizeof(PyFileIOObject),
|
sizeof(PyFileIOObject),
|
||||||
0,
|
0,
|
||||||
(destructor)fileio_dealloc, /* tp_dealloc */
|
(destructor)fileio_dealloc, /* tp_dealloc */
|
||||||
|
|
Loading…
Reference in New Issue