Merged revisions 81471-81472 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ........ r81471 | victor.stinner | 2010-05-22 15:37:56 +0200 (sam., 22 mai 2010) | 7 lines Issue #6268: More bugfixes about BOM, UTF-16 and UTF-32 * Fix seek() method of codecs.open(), don't write the BOM twice after seek(0) * Fix reset() method of codecs, UTF-16, UTF-32 and StreamWriter classes * test_codecs: use "w+" mode instead of "wt+". "t" mode is not supported by Solaris or Windows, but does it really exist? I found it the in the issue. ........ r81472 | victor.stinner | 2010-05-22 15:44:25 +0200 (sam., 22 mai 2010) | 4 lines Fix my last commit (r81471) about codecs Rememder: don't touch the code just before a commit ........
This commit is contained in:
parent
fff532bef3
commit
a92ad7ee2c
|
@ -374,6 +374,11 @@ class StreamWriter(Codec):
|
|||
"""
|
||||
pass
|
||||
|
||||
def seek(self, offset, whence=0):
|
||||
self.stream.seek(offset, whence)
|
||||
if whence == 0 and offset == 0:
|
||||
self.reset()
|
||||
|
||||
def __getattr__(self, name,
|
||||
getattr=getattr):
|
||||
|
||||
|
@ -606,8 +611,8 @@ class StreamReader(Codec):
|
|||
|
||||
Resets the codec buffers used for keeping state.
|
||||
"""
|
||||
self.reset()
|
||||
self.stream.seek(offset, whence)
|
||||
self.reset()
|
||||
|
||||
def __next__(self):
|
||||
|
||||
|
@ -700,8 +705,10 @@ class StreamReaderWriter:
|
|||
self.writer.reset()
|
||||
|
||||
def seek(self, offset, whence=0):
|
||||
self.reader.seek(offset, whence)
|
||||
self.writer.seek(offset, whence)
|
||||
self.stream.seek(offset, whence)
|
||||
self.reader.reset()
|
||||
if whence == 0 and offset == 0:
|
||||
self.writer.reset()
|
||||
|
||||
def __getattr__(self, name,
|
||||
getattr=getattr):
|
||||
|
|
|
@ -103,17 +103,23 @@ class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
|
|||
|
||||
class StreamWriter(codecs.StreamWriter):
|
||||
def __init__(self, stream, errors='strict'):
|
||||
self.bom_written = False
|
||||
codecs.StreamWriter.__init__(self, stream, errors)
|
||||
self.encoder = None
|
||||
|
||||
def reset(self):
|
||||
codecs.StreamWriter.reset(self)
|
||||
self.encoder = None
|
||||
|
||||
def encode(self, input, errors='strict'):
|
||||
self.bom_written = True
|
||||
result = codecs.utf_16_encode(input, errors)
|
||||
if sys.byteorder == 'little':
|
||||
self.encode = codecs.utf_16_le_encode
|
||||
if self.encoder is None:
|
||||
result = codecs.utf_16_encode(input, errors)
|
||||
if sys.byteorder == 'little':
|
||||
self.encoder = codecs.utf_16_le_encode
|
||||
else:
|
||||
self.encoder = codecs.utf_16_be_encode
|
||||
return result
|
||||
else:
|
||||
self.encode = codecs.utf_16_be_encode
|
||||
return result
|
||||
return self.encoder(input, errors)
|
||||
|
||||
class StreamReader(codecs.StreamReader):
|
||||
|
||||
|
|
|
@ -98,17 +98,23 @@ class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
|
|||
|
||||
class StreamWriter(codecs.StreamWriter):
|
||||
def __init__(self, stream, errors='strict'):
|
||||
self.bom_written = False
|
||||
self.encoder = None
|
||||
codecs.StreamWriter.__init__(self, stream, errors)
|
||||
|
||||
def reset(self):
|
||||
codecs.StreamWriter.reset(self)
|
||||
self.encoder = None
|
||||
|
||||
def encode(self, input, errors='strict'):
|
||||
self.bom_written = True
|
||||
result = codecs.utf_32_encode(input, errors)
|
||||
if sys.byteorder == 'little':
|
||||
self.encode = codecs.utf_32_le_encode
|
||||
if self.encoder is None:
|
||||
result = codecs.utf_32_encode(input, errors)
|
||||
if sys.byteorder == 'little':
|
||||
self.encoder = codecs.utf_32_le_encode
|
||||
else:
|
||||
self.encoder = codecs.utf_32_be_encode
|
||||
return result
|
||||
else:
|
||||
self.encode = codecs.utf_32_be_encode
|
||||
return result
|
||||
return self.encoder(input, errors)
|
||||
|
||||
class StreamReader(codecs.StreamReader):
|
||||
|
||||
|
|
|
@ -1604,8 +1604,8 @@ class BomTest(unittest.TestCase):
|
|||
"utf-32-le",
|
||||
"utf-32-be")
|
||||
for encoding in tests:
|
||||
with codecs.open('foo', 'w+', encoding=encoding) as f:
|
||||
# Check if the BOM is written only once
|
||||
# Check if the BOM is written only once
|
||||
with codecs.open(support.TESTFN, 'w+', encoding=encoding) as f:
|
||||
f.write(data)
|
||||
f.write(data)
|
||||
f.seek(0)
|
||||
|
@ -1613,6 +1613,42 @@ class BomTest(unittest.TestCase):
|
|||
f.seek(0)
|
||||
self.assertEquals(f.read(), data * 2)
|
||||
|
||||
# Check that the BOM is written after a seek(0)
|
||||
with codecs.open(support.TESTFN, 'w+', encoding=encoding) as f:
|
||||
f.write(data[0])
|
||||
self.assertNotEquals(f.tell(), 0)
|
||||
f.seek(0)
|
||||
f.write(data)
|
||||
f.seek(0)
|
||||
self.assertEquals(f.read(), data)
|
||||
|
||||
# (StreamWriter) Check that the BOM is written after a seek(0)
|
||||
with codecs.open(support.TESTFN, 'w+', encoding=encoding) as f:
|
||||
f.writer.write(data[0])
|
||||
self.assertNotEquals(f.writer.tell(), 0)
|
||||
f.writer.seek(0)
|
||||
f.writer.write(data)
|
||||
f.seek(0)
|
||||
self.assertEquals(f.read(), data)
|
||||
|
||||
# Check that the BOM is not written after a seek() at a position
|
||||
# different than the start
|
||||
with codecs.open(support.TESTFN, 'w+', encoding=encoding) as f:
|
||||
f.write(data)
|
||||
f.seek(f.tell())
|
||||
f.write(data)
|
||||
f.seek(0)
|
||||
self.assertEquals(f.read(), data * 2)
|
||||
|
||||
# (StreamWriter) Check that the BOM is not written after a seek()
|
||||
# at a position different than the start
|
||||
with codecs.open(support.TESTFN, 'w+', encoding=encoding) as f:
|
||||
f.writer.write(data)
|
||||
f.writer.seek(f.writer.tell())
|
||||
f.writer.write(data)
|
||||
f.seek(0)
|
||||
self.assertEquals(f.read(), data * 2)
|
||||
|
||||
|
||||
def test_main():
|
||||
support.run_unittest(
|
||||
|
|
|
@ -393,8 +393,9 @@ C-API
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #6268: Fix seek() method of codecs.open(), don't read the BOM twice
|
||||
after seek(0)
|
||||
- Issue #6268: Fix seek() method of codecs.open(), don't read or write the BOM
|
||||
twice after seek(0). Fix also reset() method of codecs, UTF-16, UTF-32 and
|
||||
StreamWriter classes.
|
||||
|
||||
- Issue #3798: sys.exit(message) writes the message to sys.stderr file, instead
|
||||
of the C file stderr, to use stderr encoding and error handler
|
||||
|
|
Loading…
Reference in New Issue