mirror of https://github.com/python/cpython
This patch by Martin v. Loewis changes the UTF-16 codec to only
write a BOM at the start of the stream and also to only read it as BOM at the start of a stream. Subsequent reading/writing of BOMs will read/write the BOM as ZWNBSP character. This is in sync with the Unicode specifications. Note that UTF-16 files will now *have* to start with a BOM mark in order to be readable by the codec.
This commit is contained in:
parent
8c78d3a5d1
commit
92b550cdd8
|
@ -6,7 +6,7 @@ Written by Marc-Andre Lemburg (mal@lemburg.com).
|
|||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""
|
||||
import codecs
|
||||
import codecs, sys
|
||||
|
||||
### Codec APIs
|
||||
|
||||
|
@ -18,10 +18,40 @@ class Codec(codecs.Codec):
|
|||
decode = codecs.utf_16_decode
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
def __init__(self, stream, errors='strict'):
|
||||
self.bom_written = 0
|
||||
codecs.StreamWriter.__init__(self, stream, errors)
|
||||
|
||||
def write(self, data):
|
||||
result = codecs.StreamWriter.write(self, data)
|
||||
if not self.bom_written:
|
||||
self.bom_written = 1
|
||||
if sys.byteorder == 'little':
|
||||
self.encode = codecs.utf_16_le_encode
|
||||
else:
|
||||
self.encode = codecs.utf_16_be_encode
|
||||
return result
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
def __init__(self, stream, errors='strict'):
|
||||
self.bom_read = 0
|
||||
codecs.StreamReader.__init__(self, stream, errors)
|
||||
|
||||
def read(self, size=-1):
|
||||
if not self.bom_read:
|
||||
signature = self.stream.read(2)
|
||||
if signature == codecs.BOM_BE:
|
||||
self.decode = codecs.utf_16_be_decode
|
||||
elif signature == codecs.BOM_LE:
|
||||
self.decode = codecs.utf_16_le_decode
|
||||
else:
|
||||
raise UnicodeError,"UTF-16 stream does not start with BOM"
|
||||
if size > 2:
|
||||
size -= 2
|
||||
elif size >= 0:
|
||||
size = 0
|
||||
self.bom_read = 1
|
||||
return codecs.StreamReader.read(self, size)
|
||||
|
||||
### encodings module API
|
||||
|
||||
|
|
Loading…
Reference in New Issue