Revert r61508: it caused test_mailbox to fail on all platforms.

This commit is contained in:
Neal Norwitz 2008-03-18 19:52:05 +00:00
parent ada8c3b046
commit e2b070558c
2 changed files with 69 additions and 115 deletions

146
Lib/io.py
View File

@ -1180,14 +1180,14 @@ class TextIOWrapper(TextIOBase):
self._encoder = None self._encoder = None
self._decoder = None self._decoder = None
self._decoded_text = "" # buffer for text produced by decoder self._decoded_text = "" # buffer for text produced by decoder
self._decoded_text_offset = 0 # offset to text returned by read()
self._snapshot = None # info for reconstructing decoder state self._snapshot = None # info for reconstructing decoder state
self._seekable = self._telling = self.buffer.seekable() self._seekable = self._telling = self.buffer.seekable()
# A word about _snapshot. This attribute is either None, or a tuple # A word about _snapshot. This attribute is either None, or a tuple
# (decoder_state, next_input) where decoder_state is the second # (decoder_state, input_chunk, decoded_chars) where decoder_state is
# (integer) item of the decoder state, and next_input is the chunk # the second (integer) item of the decoder state, input_chunk is the
# of bytes that comes after the snapshot point in the input. # chunk of bytes that was read, and decoded_chars is the number of
# characters rendered by the decoder after feeding it those bytes.
# We use this to reconstruct intermediate decoder states in tell(). # We use this to reconstruct intermediate decoder states in tell().
# Naming convention: # Naming convention:
@ -1271,10 +1271,10 @@ class TextIOWrapper(TextIOBase):
""" """
Read and decode the next chunk of data from the BufferedReader. Read and decode the next chunk of data from the BufferedReader.
The return value is True unless EOF was reached. The decoded string Return a tuple of two elements: all the bytes that were read, and
is placed in self._decoded_text (replacing its previous value). the decoded string produced by the decoder. (The entire input
(The entire input chunk is sent to the decoder, though some of it chunk is sent to the decoder, but some of it may remain buffered
may remain buffered in the decoder, yet to be converted.) in the decoder, yet to be converted.)
""" """
if self._decoder is None: if self._decoder is None:
@ -1283,9 +1283,8 @@ class TextIOWrapper(TextIOBase):
# No one should call tell(), so don't bother taking a snapshot. # No one should call tell(), so don't bother taking a snapshot.
input_chunk = self.buffer.read1(self._CHUNK_SIZE) input_chunk = self.buffer.read1(self._CHUNK_SIZE)
eof = not input_chunk eof = not input_chunk
self._decoded_text = self._decoder.decode(input_chunk, eof) decoded = self._decoder.decode(input_chunk, eof)
self._decoded_text_offset = 0 return (input_chunk, decoded)
return not eof
# The cookie returned by tell() cannot include the contents of # The cookie returned by tell() cannot include the contents of
# the decoder's buffer, so we need to snapshot a point in the # the decoder's buffer, so we need to snapshot a point in the
@ -1299,15 +1298,16 @@ class TextIOWrapper(TextIOBase):
input_chunk = self.buffer.read1(self._CHUNK_SIZE) input_chunk = self.buffer.read1(self._CHUNK_SIZE)
eof = not input_chunk eof = not input_chunk
self._decoded_text = self._decoder.decode(input_chunk, eof) decoded = self._decoder.decode(input_chunk, eof)
self._decoded_text_offset = 0
# At the snapshot point, len(dec_buffer) bytes ago, the next input # At the snapshot point len(dec_buffer) bytes ago, the next input
# to be passed to the decoder is dec_buffer + input_chunk. # to be passed to the decoder is dec_buffer + input_chunk. Save
self._snapshot = (dec_flags, dec_buffer + input_chunk) # len(decoded) so that later, tell() can figure out how much
return not eof # decoded data has been used up by TextIOWrapper.read().
self._snapshot = (dec_flags, dec_buffer + input_chunk, len(decoded))
return (input_chunk, decoded)
def _pack_cookie(self, position, dec_flags=0, def _encode_tell_cookie(self, position, dec_flags=0,
feed_bytes=0, need_eof=0, skip_chars=0): feed_bytes=0, need_eof=0, skip_chars=0):
# The meaning of a tell() cookie is: seek to position, set the # The meaning of a tell() cookie is: seek to position, set the
# decoder flags to dec_flags, read feed_bytes bytes, feed them # decoder flags to dec_flags, read feed_bytes bytes, feed them
@ -1317,7 +1317,7 @@ class TextIOWrapper(TextIOBase):
return (position | (dec_flags<<64) | (feed_bytes<<128) | return (position | (dec_flags<<64) | (feed_bytes<<128) |
(skip_chars<<192) | bool(need_eof)<<256) (skip_chars<<192) | bool(need_eof)<<256)
def _unpack_cookie(self, bigint): def _decode_tell_cookie(self, bigint):
rest, position = divmod(bigint, 1<<64) rest, position = divmod(bigint, 1<<64)
rest, dec_flags = divmod(rest, 1<<64) rest, dec_flags = divmod(rest, 1<<64)
rest, feed_bytes = divmod(rest, 1<<64) rest, feed_bytes = divmod(rest, 1<<64)
@ -1339,14 +1339,14 @@ class TextIOWrapper(TextIOBase):
return position return position
# Skip backward to the snapshot point (see _read_chunk). # Skip backward to the snapshot point (see _read_chunk).
dec_flags, next_input = self._snapshot dec_flags, next_input, decoded_chars = self._snapshot
position -= len(next_input) position -= len(next_input)
# How many decoded characters have been returned since the snapshot? # How many decoded characters have been consumed since the snapshot?
skip_chars = self._decoded_text_offset skip_chars = decoded_chars - len(self._decoded_text)
if skip_chars == 0: if skip_chars == 0:
# We haven't moved from the snapshot point. # We haven't moved from the snapshot point.
return self._pack_cookie(position, dec_flags) return self._encode_tell_cookie(position, dec_flags)
# Walk the decoder forward, one byte at a time, to find the minimum # Walk the decoder forward, one byte at a time, to find the minimum
# input necessary to give us the decoded characters we need to skip. # input necessary to give us the decoded characters we need to skip.
@ -1373,8 +1373,8 @@ class TextIOWrapper(TextIOBase):
if decoded_chars >= skip_chars: if decoded_chars >= skip_chars:
break break
else: else:
# We didn't get enough decoded data; signal EOF to get more. # We didn't get enough decoded data; send EOF to get more.
decoded = decoder.decode(b"", final=True) decoded = decoder.decode(b"", True)
decoded_chars += len(decoded) decoded_chars += len(decoded)
need_eof = 1 need_eof = 1
if decoded_chars < skip_chars: if decoded_chars < skip_chars:
@ -1385,7 +1385,7 @@ class TextIOWrapper(TextIOBase):
position += safe_fed_bytes position += safe_fed_bytes
fed_bytes -= safe_fed_bytes fed_bytes -= safe_fed_bytes
skip_chars -= safe_decoded_chars skip_chars -= safe_decoded_chars
return self._pack_cookie( return self._encode_tell_cookie(
position, dec_flags, fed_bytes, need_eof, skip_chars) position, dec_flags, fed_bytes, need_eof, skip_chars)
finally: finally:
decoder.setstate(saved_state) decoder.setstate(saved_state)
@ -1405,7 +1405,8 @@ class TextIOWrapper(TextIOBase):
raise IOError("can't do nonzero end-relative seeks") raise IOError("can't do nonzero end-relative seeks")
self.flush() self.flush()
position = self.buffer.seek(0, 2) position = self.buffer.seek(0, 2)
self._clear_decoded_text() self._decoded_text = ""
self._snapshot = None
if self._decoder: if self._decoder:
self._decoder.reset() self._decoder.reset()
return position return position
@ -1418,70 +1419,48 @@ class TextIOWrapper(TextIOBase):
# Seek back to the snapshot point. # Seek back to the snapshot point.
position, dec_flags, feed_bytes, need_eof, skip_chars = \ position, dec_flags, feed_bytes, need_eof, skip_chars = \
self._unpack_cookie(cookie) self._decode_tell_cookie(cookie)
self.buffer.seek(position) self.buffer.seek(position)
self._clear_decoded_text() self._decoded_text = ""
self._snapshot = None
if self._decoder or dec_flags or feed_bytes or need_eof: if self._decoder or dec_flags or feed_bytes or need_eof:
# Restore the decoder flags to their values from the snapshot. # Restore the decoder flags to their values from the snapshot.
self._decoder = self._decoder or self._get_decoder() self._decoder = self._decoder or self._get_decoder()
self._decoder.setstate((b"", dec_flags)) self._decoder.setstate((b"", dec_flags))
self._snapshot = (dec_flags, b'')
if feed_bytes or need_eof: if feed_bytes or need_eof:
# Feed feed_bytes bytes to the decoder. # Feed feed_bytes bytes to the decoder.
input_chunk = self.buffer.read(feed_bytes) input_chunk = self.buffer.read(feed_bytes)
self._decoded_text = self._decoder.decode(input_chunk, need_eof) decoded = self._decoder.decode(input_chunk, need_eof)
if len(self._decoded_text) < skip_chars: if len(decoded) < skip_chars:
raise IOError("can't restore logical file position") raise IOError("can't restore logical file position")
# Skip skip_chars of the decoded characters. # Skip skip_chars of the decoded characters.
self._decoded_text_offset = skip_chars self._decoded_text = decoded[skip_chars:]
# Restore the snapshot. # Restore the snapshot.
self._snapshot = (dec_flags, input_chunk) self._snapshot = (dec_flags, input_chunk, len(decoded))
return cookie return cookie
def _clear_decoded_text(self):
"""Reset the _decoded_text buffer."""
self._decoded_text = ''
self._decoded_text_offset = 0
self._snapshot = None
def _emit_decoded_text(self, n=None):
"""Advance into the _decoded_text buffer."""
offset = self._decoded_text_offset
if n is None:
text = self._decoded_text[offset:]
else:
text = self._decoded_text[offset:offset + n]
self._decoded_text_offset += len(text)
return text
def _unemit_decoded_text(self, n):
"""Rewind the _decoded_text buffer."""
if self._decoded_text_offset < n:
raise AssertionError("unemit out of bounds")
self._decoded_text_offset -= n
def read(self, n=None): def read(self, n=None):
if n is None: if n is None:
n = -1 n = -1
decoder = self._decoder or self._get_decoder() decoder = self._decoder or self._get_decoder()
result = self._decoded_text
if n < 0: if n < 0:
# Read everything. result += decoder.decode(self.buffer.read(), True)
result = (self._emit_decoded_text() + self._decoded_text = ""
decoder.decode(self.buffer.read(), final=True)) self._snapshot = None
self._clear_decoded_text()
return result return result
else: else:
# Keep reading chunks until we have n characters to return. while len(result) < n:
eof = False input_chunk, decoded = self._read_chunk()
result = self._emit_decoded_text(n) result += decoded
while len(result) < n and not eof: if not input_chunk:
eof = not self._read_chunk() break
result += self._emit_decoded_text(n - len(result)) self._decoded_text = result[n:]
return result return result[:n]
def __next__(self): def __next__(self):
self._telling = False self._telling = False
@ -1495,20 +1474,21 @@ class TextIOWrapper(TextIOBase):
def readline(self, limit=None): def readline(self, limit=None):
if limit is None: if limit is None:
limit = -1 limit = -1
if limit >= 0:
# XXX Hack to support limit argument, for backwards compatibility
line = self.readline()
if len(line) <= limit:
return line
line, self._decoded_text = \
line[:limit], line[limit:] + self._decoded_text
return line
# Grab all the decoded text (we will rewind any extra bits later). line = self._decoded_text
line = self._emit_decoded_text()
start = 0 start = 0
decoder = self._decoder or self._get_decoder() decoder = self._decoder or self._get_decoder()
pos = endpos = None pos = endpos = None
while True: while True:
if limit >= 0 and len(line) >= limit:
# Length limit has been reached.
endpos = limit
break
if self._readtranslate: if self._readtranslate:
# Newlines are already translated, only search for \n # Newlines are already translated, only search for \n
pos = line.find('\n', start) pos = line.find('\n', start)
@ -1558,18 +1538,20 @@ class TextIOWrapper(TextIOBase):
# No line ending seen yet - get more data # No line ending seen yet - get more data
more_line = '' more_line = ''
while self._read_chunk(): while True:
if self._decoded_text: readahead, pending = self._read_chunk()
more_line = pending
if more_line or not readahead:
break break
if self._decoded_text: if more_line:
line += self._emit_decoded_text() line += more_line
else: else:
# end of file # end of file
self._clear_decoded_text() self._decoded_text = ''
self._snapshot = None
return line return line
# Rewind _decoded_text to just after the line ending we found. self._decoded_text = line[endpos:]
self._unemit_decoded_text(len(line) - endpos)
return line[:endpos] return line[:endpos]
@property @property

View File

@ -590,9 +590,7 @@ class StatefulIncrementalDecoderTest(unittest.TestCase):
# I=0, O=3 # I=0, O=3
(b'i.o3.x.xyz.toolong.', False, 'x--.xyz.too.'), (b'i.o3.x.xyz.toolong.', False, 'x--.xyz.too.'),
# I=6, O=3 # I=6, O=3
(b'i.o3.i6.abcdefghijklmnop', True, 'abc.ghi.mno.'), (b'i.o3.i6.abcdefghijklmnop', True, 'abc.ghi.mno.')
# I=5, O=8 with newlines
(b'i.o8.i5.abc\ndef\nghy\nz', True, 'abc\nd---.ef\ngh---.y\nz-----.')
] ]
def testDecoder(self): def testDecoder(self):
@ -892,8 +890,8 @@ class TextIOWrapperTest(unittest.TestCase):
return codecs.CodecInfo( return codecs.CodecInfo(
name='test_decoder', encode=None, decode=None, name='test_decoder', encode=None, decode=None,
incrementalencoder=None, incrementalencoder=None,
incrementaldecoder=StatefulIncrementalDecoder, streamreader=None, streamwriter=None,
streamreader=None, streamwriter=None) incrementaldecoder=StatefulIncrementalDecoder)
def testSeekAndTellWithData(data, min_pos=0): def testSeekAndTellWithData(data, min_pos=0):
"""Tell/seek to various points within a data stream and ensure """Tell/seek to various points within a data stream and ensure
@ -905,42 +903,16 @@ class TextIOWrapperTest(unittest.TestCase):
decoded = f.read() decoded = f.read()
f.close() f.close()
# Use read() to move to various positions in the input; for i in range(min_pos, len(decoded) + 1): # seek positions
# then tell, read some more data, and seek back. for j in [1, 5, len(decoded) - i]: # read lengths
for i in range(min_pos, len(decoded) + 1): # to read before tell
for j in [1, 5, len(decoded)]: # to read after tell
f = io.open(test_support.TESTFN, encoding='test_decoder') f = io.open(test_support.TESTFN, encoding='test_decoder')
self.assertEquals(f.read(i), decoded[:i]) self.assertEquals(f.read(i), decoded[:i])
cookie = f.tell() cookie = f.tell()
self.assertEquals(f.read(j), decoded[i:i + j]) self.assertEquals(f.read(j), decoded[i:i + j])
f.seek(cookie) f.seek(cookie)
self.assertEquals(f.tell(), cookie)
self.assertEquals(f.read(), decoded[i:]) self.assertEquals(f.read(), decoded[i:])
f.close() f.close()
lines = len(decoded.split('\n'))
# Use readline() to move to various positions in the input;
# then tell, read some more data, and seek back.
for limit in [-1, 4, 128]: # 'limit' argument for readline()
for j in [1, 5, len(decoded)]: # to read after tell()
f = io.open(test_support.TESTFN, encoding='test_decoder')
text = ''
for k in range(lines): # repeatedly call readline()
line = f.readline(limit=limit)
if limit >= 0:
self.assert_(len(line) <= limit)
text += line
i = len(text)
self.assertEquals(text, decoded[:i])
cookie = f.tell()
self.assertEquals(f.read(j), decoded[i:i + j])
f.seek(cookie)
self.assertEquals(f.tell(), cookie)
self.assertEquals(f.read(), decoded[i:])
f.seek(cookie)
f.close()
# Register a special incremental decoder for testing. # Register a special incremental decoder for testing.
codecs.register(lookupTestDecoder) codecs.register(lookupTestDecoder)
self.codecEnabled = 1 self.codecEnabled = 1