cpython/Lib/encodings/utf_32.py

"""
Python 'utf-32' Codec
"""
import codecs, sys

### Codec APIs

encode = codecs.utf_32_encode

def decode(input, errors='strict'):
    return codecs.utf_32_decode(input, errors, True)

class IncrementalEncoder(codecs.IncrementalEncoder):
    def __init__(self, errors='strict'):
        codecs.IncrementalEncoder.__init__(self, errors)
        self.encoder = None

    def encode(self, input, final=False):
        if self.encoder is None:
            result = codecs.utf_32_encode(input, self.errors)[0]
            if sys.byteorder == 'little':
                self.encoder = codecs.utf_32_le_encode
            else:
                self.encoder = codecs.utf_32_be_encode
            return result
        return self.encoder(input, self.errors)[0]

    def reset(self):
        codecs.IncrementalEncoder.reset(self)
        self.encoder = None

    def getstate(self):
        # state info we return to the caller:
        # 0: stream is in natural order for this platform
        # 2: endianness hasn't been determined yet
        # (we're never writing in unnatural order)
        return (2 if self.encoder is None else 0)

    def setstate(self, state):
        if state:
            self.encoder = None
        else:
            if sys.byteorder == 'little':
                self.encoder = codecs.utf_32_le_encode
            else:
                self.encoder = codecs.utf_32_be_encode

class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
    def __init__(self, errors='strict'):
        codecs.BufferedIncrementalDecoder.__init__(self, errors)
        self.decoder = None

    def _buffer_decode(self, input, errors, final):
        if self.decoder is None:
            (output, consumed, byteorder) = \
                codecs.utf_32_ex_decode(input, errors, 0, final)
            if byteorder == -1:
                self.decoder = codecs.utf_32_le_decode
            elif byteorder == 1:
                self.decoder = codecs.utf_32_be_decode
            elif consumed >= 4:
                raise UnicodeError("UTF-32 stream does not start with BOM")
            return (output, consumed)
        return self.decoder(input, self.errors, final)

    def reset(self):
        codecs.BufferedIncrementalDecoder.reset(self)
        self.decoder = None

    def getstate(self):
        # additional state info from the base class must be None here,
        # as it isn't passed along to the caller
        state = codecs.BufferedIncrementalDecoder.getstate(self)[0]
        # additional state info we pass to the caller:
        # 0: stream is in natural order for this platform
        # 1: stream is in unnatural order
        # 2: endianness hasn't been determined yet
        if self.decoder is None:
            return (state, 2)
        addstate = int((sys.byteorder == "big") !=
                       (self.decoder is codecs.utf_32_be_decode))
        return (state, addstate)

    def setstate(self, state):
        # state[1] will be ignored by BufferedIncrementalDecoder.setstate()
        codecs.BufferedIncrementalDecoder.setstate(self, state)
        state = state[1]
        if state == 0:
            self.decoder = (codecs.utf_32_be_decode
                            if sys.byteorder == "big"
                            else codecs.utf_32_le_decode)
        elif state == 1:
            self.decoder = (codecs.utf_32_le_decode
                            if sys.byteorder == "big"
                            else codecs.utf_32_be_decode)
        else:
            self.decoder = None

class StreamWriter(codecs.StreamWriter):
    def __init__(self, stream, errors='strict'):
        self.encoder = None
        codecs.StreamWriter.__init__(self, stream, errors)

    def reset(self):
        codecs.StreamWriter.reset(self)
        self.encoder = None

    def encode(self, input, errors='strict'):
        if self.encoder is None:
            result = codecs.utf_32_encode(input, errors)
            if sys.byteorder == 'little':
                self.encoder = codecs.utf_32_le_encode
            else:
                self.encoder = codecs.utf_32_be_encode
            return result
        else:
            return self.encoder(input, errors)

class StreamReader(codecs.StreamReader):

    def reset(self):
        codecs.StreamReader.reset(self)
        try:
            del self.decode
        except AttributeError:
            pass

    def decode(self, input, errors='strict'):
        (object, consumed, byteorder) = \
            codecs.utf_32_ex_decode(input, errors, 0, False)
        if byteorder == -1:
            self.decode = codecs.utf_32_le_decode
        elif byteorder == 1:
            self.decode = codecs.utf_32_be_decode
        elif consumed>=4:
            raise UnicodeError("UTF-32 stream does not start with BOM")
        return (object, consumed)

### encodings module API

def getregentry():
    return codecs.CodecInfo(
        name='utf-32',
        encode=encode,
        decode=decode,
        incrementalencoder=IncrementalEncoder,
        incrementaldecoder=IncrementalDecoder,
        streamreader=StreamReader,
        streamwriter=StreamWriter,
    )
Apply SF patch #1775604: This adds three new codecs (utf-32, utf-32-le and ut-32-be). On narrow builds the codecs combine surrogate pairs in the unicode object into one codepoint on encoding and create surrogate pairs for codepoints outside the BMP on decoding. Lone surrogates are passed through unchanged in all cases. Backport to the trunk will follow. 2007-08-16 18:55:45 -03:00			`"""`
			`Python 'utf-32' Codec`
			`"""`
			`import codecs, sys`

			`### Codec APIs`

			`encode = codecs.utf_32_encode`

			`def decode(input, errors='strict'):`
			`return codecs.utf_32_decode(input, errors, True)`

			`class IncrementalEncoder(codecs.IncrementalEncoder):`
			`def __init__(self, errors='strict'):`
			`codecs.IncrementalEncoder.__init__(self, errors)`
			`self.encoder = None`

			`def encode(self, input, final=False):`
			`if self.encoder is None:`
			`result = codecs.utf_32_encode(input, self.errors)[0]`
			`if sys.byteorder == 'little':`
			`self.encoder = codecs.utf_32_le_encode`
			`else:`
			`self.encoder = codecs.utf_32_be_encode`
			`return result`
			`return self.encoder(input, self.errors)[0]`

			`def reset(self):`
			`codecs.IncrementalEncoder.reset(self)`
			`self.encoder = None`

			`def getstate(self):`
			`# state info we return to the caller:`
			`# 0: stream is in natural order for this platform`
			`# 2: endianness hasn't been determined yet`
			`# (we're never writing in unnatural order)`
			`return (2 if self.encoder is None else 0)`

			`def setstate(self, state):`
			`if state:`
			`self.encoder = None`
			`else:`
			`if sys.byteorder == 'little':`
			`self.encoder = codecs.utf_32_le_encode`
			`else:`
			`self.encoder = codecs.utf_32_be_encode`

			`class IncrementalDecoder(codecs.BufferedIncrementalDecoder):`
			`def __init__(self, errors='strict'):`
			`codecs.BufferedIncrementalDecoder.__init__(self, errors)`
			`self.decoder = None`

			`def _buffer_decode(self, input, errors, final):`
			`if self.decoder is None:`
			`(output, consumed, byteorder) = \`
			`codecs.utf_32_ex_decode(input, errors, 0, final)`
			`if byteorder == -1:`
			`self.decoder = codecs.utf_32_le_decode`
			`elif byteorder == 1:`
			`self.decoder = codecs.utf_32_be_decode`
			`elif consumed >= 4:`
			`raise UnicodeError("UTF-32 stream does not start with BOM")`
			`return (output, consumed)`
			`return self.decoder(input, self.errors, final)`

			`def reset(self):`
			`codecs.BufferedIncrementalDecoder.reset(self)`
			`self.decoder = None`

			`def getstate(self):`
Issue #27076: Doc, comment and tests spelling fixes Most fixes to Doc/ and Lib/ directories by Ville Skyttä. 2016-05-26 02:35:26 -03:00			`# additional state info from the base class must be None here,`
Apply SF patch #1775604: This adds three new codecs (utf-32, utf-32-le and ut-32-be). On narrow builds the codecs combine surrogate pairs in the unicode object into one codepoint on encoding and create surrogate pairs for codepoints outside the BMP on decoding. Lone surrogates are passed through unchanged in all cases. Backport to the trunk will follow. 2007-08-16 18:55:45 -03:00			`# as it isn't passed along to the caller`
			`state = codecs.BufferedIncrementalDecoder.getstate(self)[0]`
			`# additional state info we pass to the caller:`
			`# 0: stream is in natural order for this platform`
			`# 1: stream is in unnatural order`
			`# 2: endianness hasn't been determined yet`
			`if self.decoder is None:`
			`return (state, 2)`
			`addstate = int((sys.byteorder == "big") !=`
			`(self.decoder is codecs.utf_32_be_decode))`
			`return (state, addstate)`

			`def setstate(self, state):`
			`# state[1] will be ignored by BufferedIncrementalDecoder.setstate()`
			`codecs.BufferedIncrementalDecoder.setstate(self, state)`
			`state = state[1]`
			`if state == 0:`
			`self.decoder = (codecs.utf_32_be_decode`
			`if sys.byteorder == "big"`
			`else codecs.utf_32_le_decode)`
			`elif state == 1:`
			`self.decoder = (codecs.utf_32_le_decode`
			`if sys.byteorder == "big"`
			`else codecs.utf_32_be_decode)`
			`else:`
			`self.decoder = None`

			`class StreamWriter(codecs.StreamWriter):`
			`def __init__(self, stream, errors='strict'):`
Merged revisions 81471-81472 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r81471 \| victor.stinner \| 2010-05-22 15:37:56 +0200 (sam., 22 mai 2010) \| 7 lines Issue #6268: More bugfixes about BOM, UTF-16 and UTF-32 * Fix seek() method of codecs.open(), don't write the BOM twice after seek(0) * Fix reset() method of codecs, UTF-16, UTF-32 and StreamWriter classes * test_codecs: use "w+" mode instead of "wt+". "t" mode is not supported by Solaris or Windows, but does it really exist? I found it the in the issue. ........ r81472 \| victor.stinner \| 2010-05-22 15:44:25 +0200 (sam., 22 mai 2010) \| 4 lines Fix my last commit (r81471) about codecs Rememder: don't touch the code just before a commit ........ 2010-05-22 13:59:09 -03:00			`self.encoder = None`
Apply SF patch #1775604: This adds three new codecs (utf-32, utf-32-le and ut-32-be). On narrow builds the codecs combine surrogate pairs in the unicode object into one codepoint on encoding and create surrogate pairs for codepoints outside the BMP on decoding. Lone surrogates are passed through unchanged in all cases. Backport to the trunk will follow. 2007-08-16 18:55:45 -03:00			`codecs.StreamWriter.__init__(self, stream, errors)`

Merged revisions 81471-81472 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r81471 \| victor.stinner \| 2010-05-22 15:37:56 +0200 (sam., 22 mai 2010) \| 7 lines Issue #6268: More bugfixes about BOM, UTF-16 and UTF-32 * Fix seek() method of codecs.open(), don't write the BOM twice after seek(0) * Fix reset() method of codecs, UTF-16, UTF-32 and StreamWriter classes * test_codecs: use "w+" mode instead of "wt+". "t" mode is not supported by Solaris or Windows, but does it really exist? I found it the in the issue. ........ r81472 \| victor.stinner \| 2010-05-22 15:44:25 +0200 (sam., 22 mai 2010) \| 4 lines Fix my last commit (r81471) about codecs Rememder: don't touch the code just before a commit ........ 2010-05-22 13:59:09 -03:00			`def reset(self):`
			`codecs.StreamWriter.reset(self)`
			`self.encoder = None`

Apply SF patch #1775604: This adds three new codecs (utf-32, utf-32-le and ut-32-be). On narrow builds the codecs combine surrogate pairs in the unicode object into one codepoint on encoding and create surrogate pairs for codepoints outside the BMP on decoding. Lone surrogates are passed through unchanged in all cases. Backport to the trunk will follow. 2007-08-16 18:55:45 -03:00			`def encode(self, input, errors='strict'):`
Merged revisions 81471-81472 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r81471 \| victor.stinner \| 2010-05-22 15:37:56 +0200 (sam., 22 mai 2010) \| 7 lines Issue #6268: More bugfixes about BOM, UTF-16 and UTF-32 * Fix seek() method of codecs.open(), don't write the BOM twice after seek(0) * Fix reset() method of codecs, UTF-16, UTF-32 and StreamWriter classes * test_codecs: use "w+" mode instead of "wt+". "t" mode is not supported by Solaris or Windows, but does it really exist? I found it the in the issue. ........ r81472 \| victor.stinner \| 2010-05-22 15:44:25 +0200 (sam., 22 mai 2010) \| 4 lines Fix my last commit (r81471) about codecs Rememder: don't touch the code just before a commit ........ 2010-05-22 13:59:09 -03:00			`if self.encoder is None:`
			`result = codecs.utf_32_encode(input, errors)`
			`if sys.byteorder == 'little':`
			`self.encoder = codecs.utf_32_le_encode`
			`else:`
			`self.encoder = codecs.utf_32_be_encode`
			`return result`
Apply SF patch #1775604: This adds three new codecs (utf-32, utf-32-le and ut-32-be). On narrow builds the codecs combine surrogate pairs in the unicode object into one codepoint on encoding and create surrogate pairs for codepoints outside the BMP on decoding. Lone surrogates are passed through unchanged in all cases. Backport to the trunk will follow. 2007-08-16 18:55:45 -03:00			`else:`
Merged revisions 81471-81472 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r81471 \| victor.stinner \| 2010-05-22 15:37:56 +0200 (sam., 22 mai 2010) \| 7 lines Issue #6268: More bugfixes about BOM, UTF-16 and UTF-32 * Fix seek() method of codecs.open(), don't write the BOM twice after seek(0) * Fix reset() method of codecs, UTF-16, UTF-32 and StreamWriter classes * test_codecs: use "w+" mode instead of "wt+". "t" mode is not supported by Solaris or Windows, but does it really exist? I found it the in the issue. ........ r81472 \| victor.stinner \| 2010-05-22 15:44:25 +0200 (sam., 22 mai 2010) \| 4 lines Fix my last commit (r81471) about codecs Rememder: don't touch the code just before a commit ........ 2010-05-22 13:59:09 -03:00			`return self.encoder(input, errors)`
Apply SF patch #1775604: This adds three new codecs (utf-32, utf-32-le and ut-32-be). On narrow builds the codecs combine surrogate pairs in the unicode object into one codepoint on encoding and create surrogate pairs for codepoints outside the BMP on decoding. Lone surrogates are passed through unchanged in all cases. Backport to the trunk will follow. 2007-08-16 18:55:45 -03:00
			`class StreamReader(codecs.StreamReader):`

			`def reset(self):`
			`codecs.StreamReader.reset(self)`
			`try:`
			`del self.decode`
			`except AttributeError:`
			`pass`

			`def decode(self, input, errors='strict'):`
			`(object, consumed, byteorder) = \`
			`codecs.utf_32_ex_decode(input, errors, 0, False)`
			`if byteorder == -1:`
			`self.decode = codecs.utf_32_le_decode`
			`elif byteorder == 1:`
Fix stupid typo in Lib/encodings/utf_32.py which led to failing tests on big endian machines. Update documentation: UTF-32 codecs will be in 2.6. 2007-08-17 13:23:21 -03:00			`self.decode = codecs.utf_32_be_decode`
Apply SF patch #1775604: This adds three new codecs (utf-32, utf-32-le and ut-32-be). On narrow builds the codecs combine surrogate pairs in the unicode object into one codepoint on encoding and create surrogate pairs for codepoints outside the BMP on decoding. Lone surrogates are passed through unchanged in all cases. Backport to the trunk will follow. 2007-08-16 18:55:45 -03:00			`elif consumed>=4:`
More raise statement normalization. 2007-08-30 15:18:27 -03:00			`raise UnicodeError("UTF-32 stream does not start with BOM")`
Apply SF patch #1775604: This adds three new codecs (utf-32, utf-32-le and ut-32-be). On narrow builds the codecs combine surrogate pairs in the unicode object into one codepoint on encoding and create surrogate pairs for codepoints outside the BMP on decoding. Lone surrogates are passed through unchanged in all cases. Backport to the trunk will follow. 2007-08-16 18:55:45 -03:00			`return (object, consumed)`

			`### encodings module API`

			`def getregentry():`
			`return codecs.CodecInfo(`
			`name='utf-32',`
			`encode=encode,`
			`decode=decode,`
			`incrementalencoder=IncrementalEncoder,`
			`incrementaldecoder=IncrementalDecoder,`
			`streamreader=StreamReader,`
			`streamwriter=StreamWriter,`
			`)`