Issue #22982: Improve BOM handling when seeking to multiple positions of a writable text file.
This commit is contained in:
parent
20d31b5182
commit
85e3ee749c
26
Lib/_pyio.py
26
Lib/_pyio.py
|
@ -1865,6 +1865,19 @@ class TextIOWrapper(TextIOBase):
|
||||||
return buffer
|
return buffer
|
||||||
|
|
||||||
def seek(self, cookie, whence=0):
|
def seek(self, cookie, whence=0):
|
||||||
|
def _reset_encoder(position):
|
||||||
|
"""Reset the encoder (merely useful for proper BOM handling)"""
|
||||||
|
try:
|
||||||
|
encoder = self._encoder or self._get_encoder()
|
||||||
|
except LookupError:
|
||||||
|
# Sometimes the encoder doesn't exist
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
if position != 0:
|
||||||
|
encoder.setstate(0)
|
||||||
|
else:
|
||||||
|
encoder.reset()
|
||||||
|
|
||||||
if self.closed:
|
if self.closed:
|
||||||
raise ValueError("tell on closed file")
|
raise ValueError("tell on closed file")
|
||||||
if not self._seekable:
|
if not self._seekable:
|
||||||
|
@ -1885,6 +1898,7 @@ class TextIOWrapper(TextIOBase):
|
||||||
self._snapshot = None
|
self._snapshot = None
|
||||||
if self._decoder:
|
if self._decoder:
|
||||||
self._decoder.reset()
|
self._decoder.reset()
|
||||||
|
_reset_encoder(position)
|
||||||
return position
|
return position
|
||||||
if whence != 0:
|
if whence != 0:
|
||||||
raise ValueError("unsupported whence (%r)" % (whence,))
|
raise ValueError("unsupported whence (%r)" % (whence,))
|
||||||
|
@ -1922,17 +1936,7 @@ class TextIOWrapper(TextIOBase):
|
||||||
raise OSError("can't restore logical file position")
|
raise OSError("can't restore logical file position")
|
||||||
self._decoded_chars_used = chars_to_skip
|
self._decoded_chars_used = chars_to_skip
|
||||||
|
|
||||||
# Finally, reset the encoder (merely useful for proper BOM handling)
|
_reset_encoder(cookie)
|
||||||
try:
|
|
||||||
encoder = self._encoder or self._get_encoder()
|
|
||||||
except LookupError:
|
|
||||||
# Sometimes the encoder doesn't exist
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
if cookie != 0:
|
|
||||||
encoder.setstate(0)
|
|
||||||
else:
|
|
||||||
encoder.reset()
|
|
||||||
return cookie
|
return cookie
|
||||||
|
|
||||||
def read(self, size=None):
|
def read(self, size=None):
|
||||||
|
|
|
@ -2669,6 +2669,19 @@ class TextIOWrapperTest(unittest.TestCase):
|
||||||
with self.open(filename, 'rb') as f:
|
with self.open(filename, 'rb') as f:
|
||||||
self.assertEqual(f.read(), 'bbbzzz'.encode(charset))
|
self.assertEqual(f.read(), 'bbbzzz'.encode(charset))
|
||||||
|
|
||||||
|
def test_seek_append_bom(self):
|
||||||
|
# Same test, but first seek to the start and then to the end
|
||||||
|
filename = support.TESTFN
|
||||||
|
for charset in ('utf-8-sig', 'utf-16', 'utf-32'):
|
||||||
|
with self.open(filename, 'w', encoding=charset) as f:
|
||||||
|
f.write('aaa')
|
||||||
|
with self.open(filename, 'a', encoding=charset) as f:
|
||||||
|
f.seek(0)
|
||||||
|
f.seek(0, self.SEEK_END)
|
||||||
|
f.write('xxx')
|
||||||
|
with self.open(filename, 'rb') as f:
|
||||||
|
self.assertEqual(f.read(), 'aaaxxx'.encode(charset))
|
||||||
|
|
||||||
def test_errors_property(self):
|
def test_errors_property(self):
|
||||||
with self.open(support.TESTFN, "w") as f:
|
with self.open(support.TESTFN, "w") as f:
|
||||||
self.assertEqual(f.errors, "strict")
|
self.assertEqual(f.errors, "strict")
|
||||||
|
|
|
@ -29,6 +29,9 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #22982: Improve BOM handling when seeking to multiple positions of
|
||||||
|
a writable text file.
|
||||||
|
|
||||||
- Issue #23865: close() methods in multiple modules now are idempotent and more
|
- Issue #23865: close() methods in multiple modules now are idempotent and more
|
||||||
robust at shutdown. If needs to release multiple resources, they are released
|
robust at shutdown. If needs to release multiple resources, they are released
|
||||||
even if errors are occured.
|
even if errors are occured.
|
||||||
|
|
|
@ -2042,11 +2042,10 @@ _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
|
_textiowrapper_encoder_reset(textio *self, int start_of_stream)
|
||||||
{
|
{
|
||||||
PyObject *res;
|
PyObject *res;
|
||||||
/* Same as _textiowrapper_decoder_setstate() above. */
|
if (start_of_stream) {
|
||||||
if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
|
|
||||||
res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
|
res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
|
||||||
self->encoding_start_of_stream = 1;
|
self->encoding_start_of_stream = 1;
|
||||||
}
|
}
|
||||||
|
@ -2061,6 +2060,14 @@ _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
|
||||||
|
{
|
||||||
|
/* Same as _textiowrapper_decoder_setstate() above. */
|
||||||
|
return _textiowrapper_encoder_reset(
|
||||||
|
self, cookie->start_pos == 0 && cookie->dec_flags == 0);
|
||||||
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
textiowrapper_seek(textio *self, PyObject *args)
|
textiowrapper_seek(textio *self, PyObject *args)
|
||||||
{
|
{
|
||||||
|
@ -2128,7 +2135,17 @@ textiowrapper_seek(textio *self, PyObject *args)
|
||||||
}
|
}
|
||||||
|
|
||||||
res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
|
res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
|
||||||
Py_XDECREF(cookieObj);
|
Py_CLEAR(cookieObj);
|
||||||
|
if (res == NULL)
|
||||||
|
goto fail;
|
||||||
|
if (self->encoder) {
|
||||||
|
/* If seek() == 0, we are at the start of stream, otherwise not */
|
||||||
|
cmp = PyObject_RichCompareBool(res, _PyIO_zero, Py_EQ);
|
||||||
|
if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
|
||||||
|
Py_DECREF(res);
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
else if (whence != 0) {
|
else if (whence != 0) {
|
||||||
|
|
Loading…
Reference in New Issue