Issue #22982: Improve BOM handling when seeking to multiple positions of a writable text file.

This commit is contained in:
Antoine Pitrou 2015-04-13 20:01:21 +02:00
parent 20d31b5182
commit 85e3ee749c
4 changed files with 52 additions and 15 deletions

View File

@ -1865,6 +1865,19 @@ class TextIOWrapper(TextIOBase):
return buffer return buffer
def seek(self, cookie, whence=0): def seek(self, cookie, whence=0):
def _reset_encoder(position):
"""Reset the encoder (merely useful for proper BOM handling)"""
try:
encoder = self._encoder or self._get_encoder()
except LookupError:
# Sometimes the encoder doesn't exist
pass
else:
if position != 0:
encoder.setstate(0)
else:
encoder.reset()
if self.closed: if self.closed:
raise ValueError("tell on closed file") raise ValueError("tell on closed file")
if not self._seekable: if not self._seekable:
@ -1885,6 +1898,7 @@ class TextIOWrapper(TextIOBase):
self._snapshot = None self._snapshot = None
if self._decoder: if self._decoder:
self._decoder.reset() self._decoder.reset()
_reset_encoder(position)
return position return position
if whence != 0: if whence != 0:
raise ValueError("unsupported whence (%r)" % (whence,)) raise ValueError("unsupported whence (%r)" % (whence,))
@ -1922,17 +1936,7 @@ class TextIOWrapper(TextIOBase):
raise OSError("can't restore logical file position") raise OSError("can't restore logical file position")
self._decoded_chars_used = chars_to_skip self._decoded_chars_used = chars_to_skip
# Finally, reset the encoder (merely useful for proper BOM handling) _reset_encoder(cookie)
try:
encoder = self._encoder or self._get_encoder()
except LookupError:
# Sometimes the encoder doesn't exist
pass
else:
if cookie != 0:
encoder.setstate(0)
else:
encoder.reset()
return cookie return cookie
def read(self, size=None): def read(self, size=None):

View File

@ -2669,6 +2669,19 @@ class TextIOWrapperTest(unittest.TestCase):
with self.open(filename, 'rb') as f: with self.open(filename, 'rb') as f:
self.assertEqual(f.read(), 'bbbzzz'.encode(charset)) self.assertEqual(f.read(), 'bbbzzz'.encode(charset))
def test_seek_append_bom(self):
# Same test, but first seek to the start and then to the end
filename = support.TESTFN
for charset in ('utf-8-sig', 'utf-16', 'utf-32'):
with self.open(filename, 'w', encoding=charset) as f:
f.write('aaa')
with self.open(filename, 'a', encoding=charset) as f:
f.seek(0)
f.seek(0, self.SEEK_END)
f.write('xxx')
with self.open(filename, 'rb') as f:
self.assertEqual(f.read(), 'aaaxxx'.encode(charset))
def test_errors_property(self): def test_errors_property(self):
with self.open(support.TESTFN, "w") as f: with self.open(support.TESTFN, "w") as f:
self.assertEqual(f.errors, "strict") self.assertEqual(f.errors, "strict")

View File

@ -29,6 +29,9 @@ Core and Builtins
Library Library
------- -------
- Issue #22982: Improve BOM handling when seeking to multiple positions of
a writable text file.
- Issue #23865: close() methods in multiple modules now are idempotent and more - Issue #23865: close() methods in multiple modules now are idempotent and more
robust at shutdown. If needs to release multiple resources, they are released robust at shutdown. If needs to release multiple resources, they are released
even if errors are occured. even if errors are occured.

View File

@ -2042,11 +2042,10 @@ _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
} }
static int static int
_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie) _textiowrapper_encoder_reset(textio *self, int start_of_stream)
{ {
PyObject *res; PyObject *res;
/* Same as _textiowrapper_decoder_setstate() above. */ if (start_of_stream) {
if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL); res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
self->encoding_start_of_stream = 1; self->encoding_start_of_stream = 1;
} }
@ -2061,6 +2060,14 @@ _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
return 0; return 0;
} }
static int
_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
{
/* Same as _textiowrapper_decoder_setstate() above. */
return _textiowrapper_encoder_reset(
self, cookie->start_pos == 0 && cookie->dec_flags == 0);
}
static PyObject * static PyObject *
textiowrapper_seek(textio *self, PyObject *args) textiowrapper_seek(textio *self, PyObject *args)
{ {
@ -2128,7 +2135,17 @@ textiowrapper_seek(textio *self, PyObject *args)
} }
res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2); res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Py_XDECREF(cookieObj); Py_CLEAR(cookieObj);
if (res == NULL)
goto fail;
if (self->encoder) {
/* If seek() == 0, we are at the start of stream, otherwise not */
cmp = PyObject_RichCompareBool(res, _PyIO_zero, Py_EQ);
if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
Py_DECREF(res);
goto fail;
}
}
return res; return res;
} }
else if (whence != 0) { else if (whence != 0) {