Close #7475: Restore binary & text transform codecs
The codecs themselves were restored in Python 3.2, this completes the restoration by adding back the convenience aliases. These aliases were originally left out due to confusing errors when attempting to use them with the text encoding specific convenience methods. Python 3.4 includes several improvements to those errors, thus permitting the aliases to be restored as well.
This commit is contained in:
parent
12820c0d5d
commit
9c1aed8f94
|
@ -1188,6 +1188,9 @@ common use case for codecs, the underlying codec infrastructure supports
|
|||
arbitrary data transforms rather than just text encodings). For asymmetric
|
||||
codecs, the stated purpose describes the encoding direction.
|
||||
|
||||
Text Encodings
|
||||
^^^^^^^^^^^^^^
|
||||
|
||||
The following codecs provide :class:`str` to :class:`bytes` encoding and
|
||||
:term:`bytes-like object` to :class:`str` decoding, similar to the Unicode text
|
||||
encodings.
|
||||
|
@ -1234,62 +1237,83 @@ encodings.
|
|||
| | | .. deprecated:: 3.3 |
|
||||
+--------------------+---------+---------------------------+
|
||||
|
||||
The following codecs provide :term:`bytes-like object` to :class:`bytes`
|
||||
mappings.
|
||||
.. _binary-transforms:
|
||||
|
||||
Binary Transforms
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
||||
The following codecs provide binary transforms: :term:`bytes-like object`
|
||||
to :class:`bytes` mappings.
|
||||
|
||||
|
||||
.. tabularcolumns:: |l|L|L|
|
||||
.. tabularcolumns:: |l|L|L|L|
|
||||
|
||||
+----------------------+------------------------------+------------------------------+
|
||||
| Codec | Purpose | Encoder / decoder |
|
||||
+======================+==============================+==============================+
|
||||
| base64_codec [#b64]_ | Convert operand to MIME | :meth:`base64.b64encode` / |
|
||||
| | base64 (the result always | :meth:`base64.b64decode` |
|
||||
| | includes a trailing | |
|
||||
| | ``'\n'``) | |
|
||||
| | | |
|
||||
| | .. versionchanged:: 3.4 | |
|
||||
| | accepts any | |
|
||||
| | :term:`bytes-like object` | |
|
||||
| | as input for encoding and | |
|
||||
| | decoding | |
|
||||
+----------------------+------------------------------+------------------------------+
|
||||
| bz2_codec | Compress the operand | :meth:`bz2.compress` / |
|
||||
| | using bz2 | :meth:`bz2.decompress` |
|
||||
+----------------------+------------------------------+------------------------------+
|
||||
| hex_codec | Convert operand to | :meth:`base64.b16encode` / |
|
||||
| | hexadecimal | :meth:`base64.b16decode` |
|
||||
| | representation, with two | |
|
||||
| | digits per byte | |
|
||||
+----------------------+------------------------------+------------------------------+
|
||||
| quopri_codec | Convert operand to MIME | :meth:`quopri.encodestring` /|
|
||||
| | quoted printable | :meth:`quopri.decodestring` |
|
||||
+----------------------+------------------------------+------------------------------+
|
||||
| uu_codec | Convert the operand using | :meth:`uu.encode` / |
|
||||
| | uuencode | :meth:`uu.decode` |
|
||||
+----------------------+------------------------------+------------------------------+
|
||||
| zlib_codec | Compress the operand | :meth:`zlib.compress` / |
|
||||
| | using gzip | :meth:`zlib.decompress` |
|
||||
+----------------------+------------------------------+------------------------------+
|
||||
+----------------------+------------------+------------------------------+------------------------------+
|
||||
| Codec | Aliases | Purpose | Encoder / decoder |
|
||||
+======================+==================+==============================+==============================+
|
||||
| base64_codec [#b64]_ | base64, base_64 | Convert operand to MIME | :meth:`base64.b64encode` / |
|
||||
| | | base64 (the result always | :meth:`base64.b64decode` |
|
||||
| | | includes a trailing | |
|
||||
| | | ``'\n'``) | |
|
||||
| | | | |
|
||||
| | | .. versionchanged:: 3.4 | |
|
||||
| | | accepts any | |
|
||||
| | | :term:`bytes-like object` | |
|
||||
| | | as input for encoding and | |
|
||||
| | | decoding | |
|
||||
+----------------------+------------------+------------------------------+------------------------------+
|
||||
| bz2_codec | bz2 | Compress the operand | :meth:`bz2.compress` / |
|
||||
| | | using bz2 | :meth:`bz2.decompress` |
|
||||
+----------------------+------------------+------------------------------+------------------------------+
|
||||
| hex_codec | hex | Convert operand to | :meth:`base64.b16encode` / |
|
||||
| | | hexadecimal | :meth:`base64.b16decode` |
|
||||
| | | representation, with two | |
|
||||
| | | digits per byte | |
|
||||
+----------------------+------------------+------------------------------+------------------------------+
|
||||
| quopri_codec | quopri, | Convert operand to MIME | :meth:`quopri.encodestring` /|
|
||||
| | quotedprintable, | quoted printable | :meth:`quopri.decodestring` |
|
||||
| | quoted_printable | | |
|
||||
+----------------------+------------------+------------------------------+------------------------------+
|
||||
| uu_codec | uu | Convert the operand using | :meth:`uu.encode` / |
|
||||
| | | uuencode | :meth:`uu.decode` |
|
||||
+----------------------+------------------+------------------------------+------------------------------+
|
||||
| zlib_codec | zip, zlib | Compress the operand | :meth:`zlib.compress` / |
|
||||
| | | using gzip | :meth:`zlib.decompress` |
|
||||
+----------------------+------------------+------------------------------+------------------------------+
|
||||
|
||||
.. [#b64] In addition to :term:`bytes-like objects <bytes-like object>`,
|
||||
``'base64_codec'`` also accepts ASCII-only instances of :class:`str` for
|
||||
decoding
|
||||
|
||||
.. versionadded:: 3.2
|
||||
Restoration of the binary transforms.
|
||||
|
||||
The following codecs provide :class:`str` to :class:`str` mappings.
|
||||
.. versionchanged:: 3.4
|
||||
Restoration of the aliases for the binary transforms.
|
||||
|
||||
.. tabularcolumns:: |l|L|
|
||||
|
||||
+--------------------+---------------------------+
|
||||
| Codec | Purpose |
|
||||
+====================+===========================+
|
||||
| rot_13 | Returns the Caesar-cypher |
|
||||
| | encryption of the operand |
|
||||
+--------------------+---------------------------+
|
||||
.. _text-transforms:
|
||||
|
||||
Text Transforms
|
||||
^^^^^^^^^^^^^^^
|
||||
|
||||
The following codec provides a text transform: a :class:`str` to :class:`str`
|
||||
mapping.
|
||||
|
||||
.. tabularcolumns:: |l|l|L|
|
||||
|
||||
+--------------------+---------+---------------------------+
|
||||
| Codec | Aliases | Purpose |
|
||||
+====================+=========+===========================+
|
||||
| rot_13 | rot13 | Returns the Caesar-cypher |
|
||||
| | | encryption of the operand |
|
||||
+--------------------+---------+---------------------------+
|
||||
|
||||
.. versionadded:: 3.2
|
||||
bytes-to-bytes and str-to-str codecs.
|
||||
Restoration of the ``rot_13`` text transform.
|
||||
|
||||
.. versionchanged:: 3.4
|
||||
Restoration of the ``rot13`` alias.
|
||||
|
||||
|
||||
:mod:`encodings.idna` --- Internationalized Domain Names in Applications
|
||||
|
|
|
@ -103,7 +103,8 @@ New expected features for Python implementations:
|
|||
* :ref:`PEP 446: Make newly created file descriptors non-inheritable <pep-446>`.
|
||||
* command line option for :ref:`isolated mode <using-on-misc-options>`,
|
||||
(:issue:`16499`).
|
||||
* improvements to handling of non-Unicode codecs
|
||||
* :ref:`improvements <codec-handling-improvements>` in the handling of
|
||||
codecs that are not text encodings
|
||||
|
||||
Significantly Improved Library Modules:
|
||||
|
||||
|
@ -173,8 +174,10 @@ PEP 446: Make newly created file descriptors non-inheritable
|
|||
PEP written and implemented by Victor Stinner.
|
||||
|
||||
|
||||
Improvements to handling of non-Unicode codecs
|
||||
==============================================
|
||||
.. _codec-handling-improvements:
|
||||
|
||||
Improvements to codec handling
|
||||
==============================
|
||||
|
||||
Since it was first introduced, the :mod:`codecs` module has always been
|
||||
intended to operate as a type-neutral dynamic encoding and decoding
|
||||
|
@ -186,7 +189,7 @@ fact.
|
|||
As a key step in clarifying the situation, the :meth:`codecs.encode` and
|
||||
:meth:`codecs.decode` convenience functions are now properly documented in
|
||||
Python 2.7, 3.3 and 3.4. These functions have existed in the :mod:`codecs`
|
||||
module and have been covered by the regression test suite since Python 2.4,
|
||||
module (and have been covered by the regression test suite) since Python 2.4,
|
||||
but were previously only discoverable through runtime introspection.
|
||||
|
||||
Unlike the convenience methods on :class:`str`, :class:`bytes` and
|
||||
|
@ -199,43 +202,58 @@ In Python 3.4, the interpreter is able to identify the known non-text
|
|||
encodings provided in the standard library and direct users towards these
|
||||
general purpose convenience functions when appropriate::
|
||||
|
||||
>>> import codecs
|
||||
|
||||
>>> b"abcdef".decode("hex_codec")
|
||||
>>> b"abcdef".decode("hex")
|
||||
Traceback (most recent call last):
|
||||
File "<stdin>", line 1, in <module>
|
||||
LookupError: 'hex_codec' is not a text encoding; use codecs.decode() to handle arbitrary codecs
|
||||
LookupError: 'hex' is not a text encoding; use codecs.decode() to handle arbitrary codecs
|
||||
|
||||
>>> "hello".encode("rot_13")
|
||||
>>> "hello".encode("rot13")
|
||||
Traceback (most recent call last):
|
||||
File "<stdin>", line 1, in <module>
|
||||
LookupError: 'rot_13' is not a text encoding; use codecs.encode() to handle arbitrary codecs
|
||||
LookupError: 'rot13' is not a text encoding; use codecs.encode() to handle arbitrary codecs
|
||||
|
||||
In a related change, whenever it is feasible without breaking backwards
|
||||
compatibility, exceptions raised during encoding and decoding operations
|
||||
will be wrapped in a chained exception of the same type that mentions the
|
||||
name of the codec responsible for producing the error::
|
||||
|
||||
>>> codecs.decode(b"abcdefgh", "hex_codec")
|
||||
>>> import codecs
|
||||
|
||||
>>> codecs.decode(b"abcdefgh", "hex")
|
||||
binascii.Error: Non-hexadecimal digit found
|
||||
|
||||
The above exception was the direct cause of the following exception:
|
||||
|
||||
Traceback (most recent call last):
|
||||
File "<stdin>", line 1, in <module>
|
||||
binascii.Error: decoding with 'hex_codec' codec failed (Error: Non-hexadecimal digit found)
|
||||
binascii.Error: decoding with 'hex' codec failed (Error: Non-hexadecimal digit found)
|
||||
|
||||
>>> codecs.encode("hello", "bz2_codec")
|
||||
>>> codecs.encode("hello", "bz2")
|
||||
TypeError: 'str' does not support the buffer interface
|
||||
|
||||
The above exception was the direct cause of the following exception:
|
||||
|
||||
Traceback (most recent call last):
|
||||
File "<stdin>", line 1, in <module>
|
||||
TypeError: encoding with 'bz2_codec' codec failed (TypeError: 'str' does not support the buffer interface)
|
||||
TypeError: encoding with 'bz2' codec failed (TypeError: 'str' does not support the buffer interface)
|
||||
|
||||
(Contributed by Nick Coghlan in :issue:`17827`, :issue:`17828` and
|
||||
:issue:`19619`)
|
||||
Finally, as the examples above show, these improvements have permitted
|
||||
the restoration of the convenience aliases for the non-Unicode codecs that
|
||||
were themselves restored in Python 3.2. This means that encoding binary data
|
||||
to and from its hexadecimal representation (for example) can now be written
|
||||
as::
|
||||
|
||||
>>> from codecs import encode, decode
|
||||
>>> encode(b"hello", "hex")
|
||||
b'68656c6c6f'
|
||||
>>> decode(b"68656c6c6f", "hex")
|
||||
b'hello'
|
||||
|
||||
The binary and text transforms provided in the standard library are detailed
|
||||
in :ref:`binary-transforms` and :ref:`text-transforms`.
|
||||
|
||||
(Contributed by Nick Coghlan in :issue:`7475`, , :issue:`17827`,
|
||||
:issue:`17828` and :issue:`19619`)
|
||||
|
||||
.. _pep-451:
|
||||
|
||||
|
|
|
@ -33,9 +33,9 @@ aliases = {
|
|||
'us' : 'ascii',
|
||||
'us_ascii' : 'ascii',
|
||||
|
||||
## base64_codec codec
|
||||
#'base64' : 'base64_codec',
|
||||
#'base_64' : 'base64_codec',
|
||||
# base64_codec codec
|
||||
'base64' : 'base64_codec',
|
||||
'base_64' : 'base64_codec',
|
||||
|
||||
# big5 codec
|
||||
'big5_tw' : 'big5',
|
||||
|
@ -45,8 +45,8 @@ aliases = {
|
|||
'big5_hkscs' : 'big5hkscs',
|
||||
'hkscs' : 'big5hkscs',
|
||||
|
||||
## bz2_codec codec
|
||||
#'bz2' : 'bz2_codec',
|
||||
# bz2_codec codec
|
||||
'bz2' : 'bz2_codec',
|
||||
|
||||
# cp037 codec
|
||||
'037' : 'cp037',
|
||||
|
@ -248,8 +248,8 @@ aliases = {
|
|||
'cp936' : 'gbk',
|
||||
'ms936' : 'gbk',
|
||||
|
||||
## hex_codec codec
|
||||
#'hex' : 'hex_codec',
|
||||
# hex_codec codec
|
||||
'hex' : 'hex_codec',
|
||||
|
||||
# hp_roman8 codec
|
||||
'roman8' : 'hp_roman8',
|
||||
|
@ -450,13 +450,13 @@ aliases = {
|
|||
'cp154' : 'ptcp154',
|
||||
'cyrillic_asian' : 'ptcp154',
|
||||
|
||||
## quopri_codec codec
|
||||
#'quopri' : 'quopri_codec',
|
||||
#'quoted_printable' : 'quopri_codec',
|
||||
#'quotedprintable' : 'quopri_codec',
|
||||
# quopri_codec codec
|
||||
'quopri' : 'quopri_codec',
|
||||
'quoted_printable' : 'quopri_codec',
|
||||
'quotedprintable' : 'quopri_codec',
|
||||
|
||||
## rot_13 codec
|
||||
#'rot13' : 'rot_13',
|
||||
# rot_13 codec
|
||||
'rot13' : 'rot_13',
|
||||
|
||||
# shift_jis codec
|
||||
'csshiftjis' : 'shift_jis',
|
||||
|
@ -518,12 +518,12 @@ aliases = {
|
|||
'utf8_ucs2' : 'utf_8',
|
||||
'utf8_ucs4' : 'utf_8',
|
||||
|
||||
## uu_codec codec
|
||||
#'uu' : 'uu_codec',
|
||||
# uu_codec codec
|
||||
'uu' : 'uu_codec',
|
||||
|
||||
## zlib_codec codec
|
||||
#'zip' : 'zlib_codec',
|
||||
#'zlib' : 'zlib_codec',
|
||||
# zlib_codec codec
|
||||
'zip' : 'zlib_codec',
|
||||
'zlib' : 'zlib_codec',
|
||||
|
||||
# temporary mac CJK aliases, will be replaced by proper codecs in 3.1
|
||||
'x_mac_japanese' : 'shift_jis',
|
||||
|
|
|
@ -2320,18 +2320,29 @@ bytes_transform_encodings = [
|
|||
"quopri_codec",
|
||||
"hex_codec",
|
||||
]
|
||||
|
||||
transform_aliases = {
|
||||
"base64_codec": ["base64", "base_64"],
|
||||
"uu_codec": ["uu"],
|
||||
"quopri_codec": ["quopri", "quoted_printable", "quotedprintable"],
|
||||
"hex_codec": ["hex"],
|
||||
"rot_13": ["rot13"],
|
||||
}
|
||||
|
||||
try:
|
||||
import zlib
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
bytes_transform_encodings.append("zlib_codec")
|
||||
transform_aliases["zlib_codec"] = ["zip", "zlib"]
|
||||
try:
|
||||
import bz2
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
bytes_transform_encodings.append("bz2_codec")
|
||||
transform_aliases["bz2_codec"] = ["bz2"]
|
||||
|
||||
class TransformCodecTest(unittest.TestCase):
|
||||
|
||||
|
@ -2445,6 +2456,15 @@ class TransformCodecTest(unittest.TestCase):
|
|||
# Unfortunately, the bz2 module throws OSError, which the codec
|
||||
# machinery currently can't wrap :(
|
||||
|
||||
# Ensure codec aliases from http://bugs.python.org/issue7475 work
|
||||
def test_aliases(self):
|
||||
for codec_name, aliases in transform_aliases.items():
|
||||
expected_name = codecs.lookup(codec_name).name
|
||||
for alias in aliases:
|
||||
with self.subTest(alias=alias):
|
||||
info = codecs.lookup(alias)
|
||||
self.assertEqual(info.name, expected_name)
|
||||
|
||||
|
||||
# The codec system tries to wrap exceptions in order to ensure the error
|
||||
# mentions the operation being performed and the codec involved. We
|
||||
|
|
Loading…
Reference in New Issue