Revert r61508: it caused test_mailbox to fail on all platforms.
This commit is contained in:
parent
ada8c3b046
commit
e2b070558c
146
Lib/io.py
146
Lib/io.py
|
@ -1180,14 +1180,14 @@ class TextIOWrapper(TextIOBase):
|
||||||
self._encoder = None
|
self._encoder = None
|
||||||
self._decoder = None
|
self._decoder = None
|
||||||
self._decoded_text = "" # buffer for text produced by decoder
|
self._decoded_text = "" # buffer for text produced by decoder
|
||||||
self._decoded_text_offset = 0 # offset to text returned by read()
|
|
||||||
self._snapshot = None # info for reconstructing decoder state
|
self._snapshot = None # info for reconstructing decoder state
|
||||||
self._seekable = self._telling = self.buffer.seekable()
|
self._seekable = self._telling = self.buffer.seekable()
|
||||||
|
|
||||||
# A word about _snapshot. This attribute is either None, or a tuple
|
# A word about _snapshot. This attribute is either None, or a tuple
|
||||||
# (decoder_state, next_input) where decoder_state is the second
|
# (decoder_state, input_chunk, decoded_chars) where decoder_state is
|
||||||
# (integer) item of the decoder state, and next_input is the chunk
|
# the second (integer) item of the decoder state, input_chunk is the
|
||||||
# of bytes that comes after the snapshot point in the input.
|
# chunk of bytes that was read, and decoded_chars is the number of
|
||||||
|
# characters rendered by the decoder after feeding it those bytes.
|
||||||
# We use this to reconstruct intermediate decoder states in tell().
|
# We use this to reconstruct intermediate decoder states in tell().
|
||||||
|
|
||||||
# Naming convention:
|
# Naming convention:
|
||||||
|
@ -1271,10 +1271,10 @@ class TextIOWrapper(TextIOBase):
|
||||||
"""
|
"""
|
||||||
Read and decode the next chunk of data from the BufferedReader.
|
Read and decode the next chunk of data from the BufferedReader.
|
||||||
|
|
||||||
The return value is True unless EOF was reached. The decoded string
|
Return a tuple of two elements: all the bytes that were read, and
|
||||||
is placed in self._decoded_text (replacing its previous value).
|
the decoded string produced by the decoder. (The entire input
|
||||||
(The entire input chunk is sent to the decoder, though some of it
|
chunk is sent to the decoder, but some of it may remain buffered
|
||||||
may remain buffered in the decoder, yet to be converted.)
|
in the decoder, yet to be converted.)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if self._decoder is None:
|
if self._decoder is None:
|
||||||
|
@ -1283,9 +1283,8 @@ class TextIOWrapper(TextIOBase):
|
||||||
# No one should call tell(), so don't bother taking a snapshot.
|
# No one should call tell(), so don't bother taking a snapshot.
|
||||||
input_chunk = self.buffer.read1(self._CHUNK_SIZE)
|
input_chunk = self.buffer.read1(self._CHUNK_SIZE)
|
||||||
eof = not input_chunk
|
eof = not input_chunk
|
||||||
self._decoded_text = self._decoder.decode(input_chunk, eof)
|
decoded = self._decoder.decode(input_chunk, eof)
|
||||||
self._decoded_text_offset = 0
|
return (input_chunk, decoded)
|
||||||
return not eof
|
|
||||||
|
|
||||||
# The cookie returned by tell() cannot include the contents of
|
# The cookie returned by tell() cannot include the contents of
|
||||||
# the decoder's buffer, so we need to snapshot a point in the
|
# the decoder's buffer, so we need to snapshot a point in the
|
||||||
|
@ -1299,15 +1298,16 @@ class TextIOWrapper(TextIOBase):
|
||||||
|
|
||||||
input_chunk = self.buffer.read1(self._CHUNK_SIZE)
|
input_chunk = self.buffer.read1(self._CHUNK_SIZE)
|
||||||
eof = not input_chunk
|
eof = not input_chunk
|
||||||
self._decoded_text = self._decoder.decode(input_chunk, eof)
|
decoded = self._decoder.decode(input_chunk, eof)
|
||||||
self._decoded_text_offset = 0
|
|
||||||
|
|
||||||
# At the snapshot point, len(dec_buffer) bytes ago, the next input
|
# At the snapshot point len(dec_buffer) bytes ago, the next input
|
||||||
# to be passed to the decoder is dec_buffer + input_chunk.
|
# to be passed to the decoder is dec_buffer + input_chunk. Save
|
||||||
self._snapshot = (dec_flags, dec_buffer + input_chunk)
|
# len(decoded) so that later, tell() can figure out how much
|
||||||
return not eof
|
# decoded data has been used up by TextIOWrapper.read().
|
||||||
|
self._snapshot = (dec_flags, dec_buffer + input_chunk, len(decoded))
|
||||||
|
return (input_chunk, decoded)
|
||||||
|
|
||||||
def _pack_cookie(self, position, dec_flags=0,
|
def _encode_tell_cookie(self, position, dec_flags=0,
|
||||||
feed_bytes=0, need_eof=0, skip_chars=0):
|
feed_bytes=0, need_eof=0, skip_chars=0):
|
||||||
# The meaning of a tell() cookie is: seek to position, set the
|
# The meaning of a tell() cookie is: seek to position, set the
|
||||||
# decoder flags to dec_flags, read feed_bytes bytes, feed them
|
# decoder flags to dec_flags, read feed_bytes bytes, feed them
|
||||||
|
@ -1317,7 +1317,7 @@ class TextIOWrapper(TextIOBase):
|
||||||
return (position | (dec_flags<<64) | (feed_bytes<<128) |
|
return (position | (dec_flags<<64) | (feed_bytes<<128) |
|
||||||
(skip_chars<<192) | bool(need_eof)<<256)
|
(skip_chars<<192) | bool(need_eof)<<256)
|
||||||
|
|
||||||
def _unpack_cookie(self, bigint):
|
def _decode_tell_cookie(self, bigint):
|
||||||
rest, position = divmod(bigint, 1<<64)
|
rest, position = divmod(bigint, 1<<64)
|
||||||
rest, dec_flags = divmod(rest, 1<<64)
|
rest, dec_flags = divmod(rest, 1<<64)
|
||||||
rest, feed_bytes = divmod(rest, 1<<64)
|
rest, feed_bytes = divmod(rest, 1<<64)
|
||||||
|
@ -1339,14 +1339,14 @@ class TextIOWrapper(TextIOBase):
|
||||||
return position
|
return position
|
||||||
|
|
||||||
# Skip backward to the snapshot point (see _read_chunk).
|
# Skip backward to the snapshot point (see _read_chunk).
|
||||||
dec_flags, next_input = self._snapshot
|
dec_flags, next_input, decoded_chars = self._snapshot
|
||||||
position -= len(next_input)
|
position -= len(next_input)
|
||||||
|
|
||||||
# How many decoded characters have been returned since the snapshot?
|
# How many decoded characters have been consumed since the snapshot?
|
||||||
skip_chars = self._decoded_text_offset
|
skip_chars = decoded_chars - len(self._decoded_text)
|
||||||
if skip_chars == 0:
|
if skip_chars == 0:
|
||||||
# We haven't moved from the snapshot point.
|
# We haven't moved from the snapshot point.
|
||||||
return self._pack_cookie(position, dec_flags)
|
return self._encode_tell_cookie(position, dec_flags)
|
||||||
|
|
||||||
# Walk the decoder forward, one byte at a time, to find the minimum
|
# Walk the decoder forward, one byte at a time, to find the minimum
|
||||||
# input necessary to give us the decoded characters we need to skip.
|
# input necessary to give us the decoded characters we need to skip.
|
||||||
|
@ -1373,8 +1373,8 @@ class TextIOWrapper(TextIOBase):
|
||||||
if decoded_chars >= skip_chars:
|
if decoded_chars >= skip_chars:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
# We didn't get enough decoded data; signal EOF to get more.
|
# We didn't get enough decoded data; send EOF to get more.
|
||||||
decoded = decoder.decode(b"", final=True)
|
decoded = decoder.decode(b"", True)
|
||||||
decoded_chars += len(decoded)
|
decoded_chars += len(decoded)
|
||||||
need_eof = 1
|
need_eof = 1
|
||||||
if decoded_chars < skip_chars:
|
if decoded_chars < skip_chars:
|
||||||
|
@ -1385,7 +1385,7 @@ class TextIOWrapper(TextIOBase):
|
||||||
position += safe_fed_bytes
|
position += safe_fed_bytes
|
||||||
fed_bytes -= safe_fed_bytes
|
fed_bytes -= safe_fed_bytes
|
||||||
skip_chars -= safe_decoded_chars
|
skip_chars -= safe_decoded_chars
|
||||||
return self._pack_cookie(
|
return self._encode_tell_cookie(
|
||||||
position, dec_flags, fed_bytes, need_eof, skip_chars)
|
position, dec_flags, fed_bytes, need_eof, skip_chars)
|
||||||
finally:
|
finally:
|
||||||
decoder.setstate(saved_state)
|
decoder.setstate(saved_state)
|
||||||
|
@ -1405,7 +1405,8 @@ class TextIOWrapper(TextIOBase):
|
||||||
raise IOError("can't do nonzero end-relative seeks")
|
raise IOError("can't do nonzero end-relative seeks")
|
||||||
self.flush()
|
self.flush()
|
||||||
position = self.buffer.seek(0, 2)
|
position = self.buffer.seek(0, 2)
|
||||||
self._clear_decoded_text()
|
self._decoded_text = ""
|
||||||
|
self._snapshot = None
|
||||||
if self._decoder:
|
if self._decoder:
|
||||||
self._decoder.reset()
|
self._decoder.reset()
|
||||||
return position
|
return position
|
||||||
|
@ -1418,70 +1419,48 @@ class TextIOWrapper(TextIOBase):
|
||||||
|
|
||||||
# Seek back to the snapshot point.
|
# Seek back to the snapshot point.
|
||||||
position, dec_flags, feed_bytes, need_eof, skip_chars = \
|
position, dec_flags, feed_bytes, need_eof, skip_chars = \
|
||||||
self._unpack_cookie(cookie)
|
self._decode_tell_cookie(cookie)
|
||||||
self.buffer.seek(position)
|
self.buffer.seek(position)
|
||||||
self._clear_decoded_text()
|
self._decoded_text = ""
|
||||||
|
self._snapshot = None
|
||||||
|
|
||||||
if self._decoder or dec_flags or feed_bytes or need_eof:
|
if self._decoder or dec_flags or feed_bytes or need_eof:
|
||||||
# Restore the decoder flags to their values from the snapshot.
|
# Restore the decoder flags to their values from the snapshot.
|
||||||
self._decoder = self._decoder or self._get_decoder()
|
self._decoder = self._decoder or self._get_decoder()
|
||||||
self._decoder.setstate((b"", dec_flags))
|
self._decoder.setstate((b"", dec_flags))
|
||||||
self._snapshot = (dec_flags, b'')
|
|
||||||
|
|
||||||
if feed_bytes or need_eof:
|
if feed_bytes or need_eof:
|
||||||
# Feed feed_bytes bytes to the decoder.
|
# Feed feed_bytes bytes to the decoder.
|
||||||
input_chunk = self.buffer.read(feed_bytes)
|
input_chunk = self.buffer.read(feed_bytes)
|
||||||
self._decoded_text = self._decoder.decode(input_chunk, need_eof)
|
decoded = self._decoder.decode(input_chunk, need_eof)
|
||||||
if len(self._decoded_text) < skip_chars:
|
if len(decoded) < skip_chars:
|
||||||
raise IOError("can't restore logical file position")
|
raise IOError("can't restore logical file position")
|
||||||
|
|
||||||
# Skip skip_chars of the decoded characters.
|
# Skip skip_chars of the decoded characters.
|
||||||
self._decoded_text_offset = skip_chars
|
self._decoded_text = decoded[skip_chars:]
|
||||||
|
|
||||||
# Restore the snapshot.
|
# Restore the snapshot.
|
||||||
self._snapshot = (dec_flags, input_chunk)
|
self._snapshot = (dec_flags, input_chunk, len(decoded))
|
||||||
return cookie
|
return cookie
|
||||||
|
|
||||||
def _clear_decoded_text(self):
|
|
||||||
"""Reset the _decoded_text buffer."""
|
|
||||||
self._decoded_text = ''
|
|
||||||
self._decoded_text_offset = 0
|
|
||||||
self._snapshot = None
|
|
||||||
|
|
||||||
def _emit_decoded_text(self, n=None):
|
|
||||||
"""Advance into the _decoded_text buffer."""
|
|
||||||
offset = self._decoded_text_offset
|
|
||||||
if n is None:
|
|
||||||
text = self._decoded_text[offset:]
|
|
||||||
else:
|
|
||||||
text = self._decoded_text[offset:offset + n]
|
|
||||||
self._decoded_text_offset += len(text)
|
|
||||||
return text
|
|
||||||
|
|
||||||
def _unemit_decoded_text(self, n):
|
|
||||||
"""Rewind the _decoded_text buffer."""
|
|
||||||
if self._decoded_text_offset < n:
|
|
||||||
raise AssertionError("unemit out of bounds")
|
|
||||||
self._decoded_text_offset -= n
|
|
||||||
|
|
||||||
def read(self, n=None):
|
def read(self, n=None):
|
||||||
if n is None:
|
if n is None:
|
||||||
n = -1
|
n = -1
|
||||||
decoder = self._decoder or self._get_decoder()
|
decoder = self._decoder or self._get_decoder()
|
||||||
|
result = self._decoded_text
|
||||||
if n < 0:
|
if n < 0:
|
||||||
# Read everything.
|
result += decoder.decode(self.buffer.read(), True)
|
||||||
result = (self._emit_decoded_text() +
|
self._decoded_text = ""
|
||||||
decoder.decode(self.buffer.read(), final=True))
|
self._snapshot = None
|
||||||
self._clear_decoded_text()
|
|
||||||
return result
|
return result
|
||||||
else:
|
else:
|
||||||
# Keep reading chunks until we have n characters to return.
|
while len(result) < n:
|
||||||
eof = False
|
input_chunk, decoded = self._read_chunk()
|
||||||
result = self._emit_decoded_text(n)
|
result += decoded
|
||||||
while len(result) < n and not eof:
|
if not input_chunk:
|
||||||
eof = not self._read_chunk()
|
break
|
||||||
result += self._emit_decoded_text(n - len(result))
|
self._decoded_text = result[n:]
|
||||||
return result
|
return result[:n]
|
||||||
|
|
||||||
def __next__(self):
|
def __next__(self):
|
||||||
self._telling = False
|
self._telling = False
|
||||||
|
@ -1495,20 +1474,21 @@ class TextIOWrapper(TextIOBase):
|
||||||
def readline(self, limit=None):
|
def readline(self, limit=None):
|
||||||
if limit is None:
|
if limit is None:
|
||||||
limit = -1
|
limit = -1
|
||||||
|
if limit >= 0:
|
||||||
|
# XXX Hack to support limit argument, for backwards compatibility
|
||||||
|
line = self.readline()
|
||||||
|
if len(line) <= limit:
|
||||||
|
return line
|
||||||
|
line, self._decoded_text = \
|
||||||
|
line[:limit], line[limit:] + self._decoded_text
|
||||||
|
return line
|
||||||
|
|
||||||
# Grab all the decoded text (we will rewind any extra bits later).
|
line = self._decoded_text
|
||||||
line = self._emit_decoded_text()
|
|
||||||
|
|
||||||
start = 0
|
start = 0
|
||||||
decoder = self._decoder or self._get_decoder()
|
decoder = self._decoder or self._get_decoder()
|
||||||
|
|
||||||
pos = endpos = None
|
pos = endpos = None
|
||||||
while True:
|
while True:
|
||||||
if limit >= 0 and len(line) >= limit:
|
|
||||||
# Length limit has been reached.
|
|
||||||
endpos = limit
|
|
||||||
break
|
|
||||||
|
|
||||||
if self._readtranslate:
|
if self._readtranslate:
|
||||||
# Newlines are already translated, only search for \n
|
# Newlines are already translated, only search for \n
|
||||||
pos = line.find('\n', start)
|
pos = line.find('\n', start)
|
||||||
|
@ -1558,18 +1538,20 @@ class TextIOWrapper(TextIOBase):
|
||||||
|
|
||||||
# No line ending seen yet - get more data
|
# No line ending seen yet - get more data
|
||||||
more_line = ''
|
more_line = ''
|
||||||
while self._read_chunk():
|
while True:
|
||||||
if self._decoded_text:
|
readahead, pending = self._read_chunk()
|
||||||
|
more_line = pending
|
||||||
|
if more_line or not readahead:
|
||||||
break
|
break
|
||||||
if self._decoded_text:
|
if more_line:
|
||||||
line += self._emit_decoded_text()
|
line += more_line
|
||||||
else:
|
else:
|
||||||
# end of file
|
# end of file
|
||||||
self._clear_decoded_text()
|
self._decoded_text = ''
|
||||||
|
self._snapshot = None
|
||||||
return line
|
return line
|
||||||
|
|
||||||
# Rewind _decoded_text to just after the line ending we found.
|
self._decoded_text = line[endpos:]
|
||||||
self._unemit_decoded_text(len(line) - endpos)
|
|
||||||
return line[:endpos]
|
return line[:endpos]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
|
@ -590,9 +590,7 @@ class StatefulIncrementalDecoderTest(unittest.TestCase):
|
||||||
# I=0, O=3
|
# I=0, O=3
|
||||||
(b'i.o3.x.xyz.toolong.', False, 'x--.xyz.too.'),
|
(b'i.o3.x.xyz.toolong.', False, 'x--.xyz.too.'),
|
||||||
# I=6, O=3
|
# I=6, O=3
|
||||||
(b'i.o3.i6.abcdefghijklmnop', True, 'abc.ghi.mno.'),
|
(b'i.o3.i6.abcdefghijklmnop', True, 'abc.ghi.mno.')
|
||||||
# I=5, O=8 with newlines
|
|
||||||
(b'i.o8.i5.abc\ndef\nghy\nz', True, 'abc\nd---.ef\ngh---.y\nz-----.')
|
|
||||||
]
|
]
|
||||||
|
|
||||||
def testDecoder(self):
|
def testDecoder(self):
|
||||||
|
@ -892,8 +890,8 @@ class TextIOWrapperTest(unittest.TestCase):
|
||||||
return codecs.CodecInfo(
|
return codecs.CodecInfo(
|
||||||
name='test_decoder', encode=None, decode=None,
|
name='test_decoder', encode=None, decode=None,
|
||||||
incrementalencoder=None,
|
incrementalencoder=None,
|
||||||
incrementaldecoder=StatefulIncrementalDecoder,
|
streamreader=None, streamwriter=None,
|
||||||
streamreader=None, streamwriter=None)
|
incrementaldecoder=StatefulIncrementalDecoder)
|
||||||
|
|
||||||
def testSeekAndTellWithData(data, min_pos=0):
|
def testSeekAndTellWithData(data, min_pos=0):
|
||||||
"""Tell/seek to various points within a data stream and ensure
|
"""Tell/seek to various points within a data stream and ensure
|
||||||
|
@ -905,42 +903,16 @@ class TextIOWrapperTest(unittest.TestCase):
|
||||||
decoded = f.read()
|
decoded = f.read()
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
# Use read() to move to various positions in the input;
|
for i in range(min_pos, len(decoded) + 1): # seek positions
|
||||||
# then tell, read some more data, and seek back.
|
for j in [1, 5, len(decoded) - i]: # read lengths
|
||||||
for i in range(min_pos, len(decoded) + 1): # to read before tell
|
|
||||||
for j in [1, 5, len(decoded)]: # to read after tell
|
|
||||||
f = io.open(test_support.TESTFN, encoding='test_decoder')
|
f = io.open(test_support.TESTFN, encoding='test_decoder')
|
||||||
self.assertEquals(f.read(i), decoded[:i])
|
self.assertEquals(f.read(i), decoded[:i])
|
||||||
cookie = f.tell()
|
cookie = f.tell()
|
||||||
self.assertEquals(f.read(j), decoded[i:i + j])
|
self.assertEquals(f.read(j), decoded[i:i + j])
|
||||||
f.seek(cookie)
|
f.seek(cookie)
|
||||||
self.assertEquals(f.tell(), cookie)
|
|
||||||
self.assertEquals(f.read(), decoded[i:])
|
self.assertEquals(f.read(), decoded[i:])
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
lines = len(decoded.split('\n'))
|
|
||||||
|
|
||||||
# Use readline() to move to various positions in the input;
|
|
||||||
# then tell, read some more data, and seek back.
|
|
||||||
for limit in [-1, 4, 128]: # 'limit' argument for readline()
|
|
||||||
for j in [1, 5, len(decoded)]: # to read after tell()
|
|
||||||
f = io.open(test_support.TESTFN, encoding='test_decoder')
|
|
||||||
text = ''
|
|
||||||
for k in range(lines): # repeatedly call readline()
|
|
||||||
line = f.readline(limit=limit)
|
|
||||||
if limit >= 0:
|
|
||||||
self.assert_(len(line) <= limit)
|
|
||||||
text += line
|
|
||||||
i = len(text)
|
|
||||||
self.assertEquals(text, decoded[:i])
|
|
||||||
cookie = f.tell()
|
|
||||||
self.assertEquals(f.read(j), decoded[i:i + j])
|
|
||||||
f.seek(cookie)
|
|
||||||
self.assertEquals(f.tell(), cookie)
|
|
||||||
self.assertEquals(f.read(), decoded[i:])
|
|
||||||
f.seek(cookie)
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
# Register a special incremental decoder for testing.
|
# Register a special incremental decoder for testing.
|
||||||
codecs.register(lookupTestDecoder)
|
codecs.register(lookupTestDecoder)
|
||||||
self.codecEnabled = 1
|
self.codecEnabled = 1
|
||||||
|
|
Loading…
Reference in New Issue