Rip out all codecs that can't work in a unicode/bytes world:

base64, uu, zlib, rot_13, hex, quopri, bz2, string_escape.

However codecs.escape_encode() and codecs.escape_decode()
still exist, as they are used for pickling str8 objects
(so those two functions can go, when the str8 type is removed).
This commit is contained in:
Walter Dörwald 2007-06-12 16:40:17 +00:00
parent 6e8fcae38f
commit 42748a8d6d
11 changed files with 6 additions and 730 deletions

View File

@ -1214,22 +1214,6 @@ listed as operand type in the table.
\begin{tableiv}{l|l|l|l}{textrm}{Codec}{Aliases}{Operand type}{Purpose}
\lineiv{base64_codec}
{base64, base-64}
{byte string}
{Convert operand to MIME base64}
\lineiv{bz2_codec}
{bz2}
{byte string}
{Compress the operand using bz2}
\lineiv{hex_codec}
{hex}
{byte string}
{Convert operand to hexadecimal representation, with two
digits per byte}
\lineiv{idna}
{}
{Unicode string}
@ -1251,34 +1235,18 @@ listed as operand type in the table.
{Unicode string}
{Implements \rfc{3492}}
\lineiv{quopri_codec}
{quopri, quoted-printable, quotedprintable}
{byte string}
{Convert operand to MIME quoted printable}
\lineiv{raw_unicode_escape}
{}
{Unicode string}
{Produce a string that is suitable as raw Unicode literal in
Python source code}
\lineiv{rot_13}
{rot13}
{Unicode string}
{Returns the Caesar-cypher encryption of the operand}
\lineiv{string_escape}
{}
{byte string}
{Produce a string that is suitable as string literal in
Python source code}
\lineiv{undefined}
{}
{any}
{Raise an exception for all conversions. Can be used as the
system encoding if no automatic coercion between byte and
Unicode strings is desired.}
Unicode strings is desired.}
\lineiv{unicode_escape}
{}
@ -1290,17 +1258,6 @@ listed as operand type in the table.
{}
{Unicode string}
{Return the internal representation of the operand}
\lineiv{uu_codec}
{uu}
{byte string}
{Convert the operand using uuencode}
\lineiv{zlib_codec}
{zip, zlib}
{byte string}
{Compress the operand using gzip}
\end{tableiv}
\versionadded[The \code{idna} and \code{punycode} encodings]{2.3}

View File

@ -1,79 +0,0 @@
""" Python 'base64_codec' Codec - base64 content transfer encoding
Unlike most of the other codecs which target Unicode, this codec
will return Python string objects for both encode and decode.
Written by Marc-Andre Lemburg (mal@lemburg.com).
"""
import codecs, base64
### Codec APIs
def base64_encode(input,errors='strict'):
""" Encodes the object input and returns a tuple (output
object, length consumed).
errors defines the error handling to apply. It defaults to
'strict' handling which is the only currently supported
error handling for this codec.
"""
assert errors == 'strict'
output = bytes(base64.encodestring(input))
return (output, len(input))
def base64_decode(input,errors='strict'):
""" Decodes the object input and returns a tuple (output
object, length consumed).
input must be an object which provides the bf_getreadbuf
buffer slot. Python strings, buffer objects and memory
mapped files are examples of objects providing this slot.
errors defines the error handling to apply. It defaults to
'strict' handling which is the only currently supported
error handling for this codec.
"""
assert errors == 'strict'
output = base64.decodestring(input)
return (output, len(input))
class Codec(codecs.Codec):
def encode(self, input,errors='strict'):
return base64_encode(input,errors)
def decode(self, input,errors='strict'):
return base64_decode(input,errors)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
assert self.errors == 'strict'
return base64.encodestring(input)
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
assert self.errors == 'strict'
return base64.decodestring(input)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='base64',
encode=base64_encode,
decode=base64_decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
)

View File

@ -1,79 +0,0 @@
""" Python 'hex_codec' Codec - 2-digit hex content transfer encoding
Unlike most of the other codecs which target Unicode, this codec
will return Python string objects for both encode and decode.
Written by Marc-Andre Lemburg (mal@lemburg.com).
"""
import codecs, binascii
### Codec APIs
def hex_encode(input,errors='strict'):
""" Encodes the object input and returns a tuple (output
object, length consumed).
errors defines the error handling to apply. It defaults to
'strict' handling which is the only currently supported
error handling for this codec.
"""
assert errors == 'strict'
output = binascii.b2a_hex(input)
return (output, len(input))
def hex_decode(input,errors='strict'):
""" Decodes the object input and returns a tuple (output
object, length consumed).
input must be an object which provides the bf_getreadbuf
buffer slot. Python strings, buffer objects and memory
mapped files are examples of objects providing this slot.
errors defines the error handling to apply. It defaults to
'strict' handling which is the only currently supported
error handling for this codec.
"""
assert errors == 'strict'
output = binascii.a2b_hex(input)
return (output, len(input))
class Codec(codecs.Codec):
def encode(self, input,errors='strict'):
return hex_encode(input,errors)
def decode(self, input,errors='strict'):
return hex_decode(input,errors)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
assert self.errors == 'strict'
return binascii.b2a_hex(input)
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
assert self.errors == 'strict'
return binascii.a2b_hex(input)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='hex',
encode=hex_encode,
decode=hex_decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
)

View File

@ -1,74 +0,0 @@
"""Codec for quoted-printable encoding.
Like base64 and rot13, this returns Python strings, not Unicode.
"""
import codecs, quopri
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
def quopri_encode(input, errors='strict'):
"""Encode the input, returning a tuple (output object, length consumed).
errors defines the error handling to apply. It defaults to
'strict' handling which is the only currently supported
error handling for this codec.
"""
assert errors == 'strict'
f = StringIO(input)
g = StringIO()
quopri.encode(f, g, 1)
output = g.getvalue()
return (output, len(input))
def quopri_decode(input, errors='strict'):
"""Decode the input, returning a tuple (output object, length consumed).
errors defines the error handling to apply. It defaults to
'strict' handling which is the only currently supported
error handling for this codec.
"""
assert errors == 'strict'
f = StringIO(input)
g = StringIO()
quopri.decode(f, g)
output = g.getvalue()
return (output, len(input))
class Codec(codecs.Codec):
def encode(self, input,errors='strict'):
return quopri_encode(input,errors)
def decode(self, input,errors='strict'):
return quopri_decode(input,errors)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return quopri_encode(input, self.errors)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return quopri_decode(input, self.errors)[0]
class StreamWriter(Codec, codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
# encodings module API
def getregentry():
return codecs.CodecInfo(
name='quopri',
encode=quopri_encode,
decode=quopri_decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
)

View File

@ -1,118 +0,0 @@
#!/usr/bin/env python
""" Python Character Mapping Codec for ROT13.
See http://ucsub.colorado.edu/~kominek/rot13/ for details.
Written by Marc-Andre Lemburg (mal@lemburg.com).
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_map)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='rot-13',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0041: 0x004e,
0x0042: 0x004f,
0x0043: 0x0050,
0x0044: 0x0051,
0x0045: 0x0052,
0x0046: 0x0053,
0x0047: 0x0054,
0x0048: 0x0055,
0x0049: 0x0056,
0x004a: 0x0057,
0x004b: 0x0058,
0x004c: 0x0059,
0x004d: 0x005a,
0x004e: 0x0041,
0x004f: 0x0042,
0x0050: 0x0043,
0x0051: 0x0044,
0x0052: 0x0045,
0x0053: 0x0046,
0x0054: 0x0047,
0x0055: 0x0048,
0x0056: 0x0049,
0x0057: 0x004a,
0x0058: 0x004b,
0x0059: 0x004c,
0x005a: 0x004d,
0x0061: 0x006e,
0x0062: 0x006f,
0x0063: 0x0070,
0x0064: 0x0071,
0x0065: 0x0072,
0x0066: 0x0073,
0x0067: 0x0074,
0x0068: 0x0075,
0x0069: 0x0076,
0x006a: 0x0077,
0x006b: 0x0078,
0x006c: 0x0079,
0x006d: 0x007a,
0x006e: 0x0061,
0x006f: 0x0062,
0x0070: 0x0063,
0x0071: 0x0064,
0x0072: 0x0065,
0x0073: 0x0066,
0x0074: 0x0067,
0x0075: 0x0068,
0x0076: 0x0069,
0x0077: 0x006a,
0x0078: 0x006b,
0x0079: 0x006c,
0x007a: 0x006d,
})
### Encoding Map
encoding_map = codecs.make_encoding_map(decoding_map)
### Filter API
def rot13(infile, outfile):
outfile.write(infile.read().encode('rot-13'))
if __name__ == '__main__':
import sys
rot13(sys.stdin, sys.stdout)

View File

@ -1,38 +0,0 @@
# -*- coding: iso-8859-1 -*-
""" Python 'escape' Codec
Written by Martin v. Löwis (martin@v.loewis.de).
"""
import codecs
class Codec(codecs.Codec):
encode = codecs.escape_encode
decode = codecs.escape_decode
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.escape_encode(input, self.errors)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.escape_decode(input, self.errors)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
def getregentry():
return codecs.CodecInfo(
name='string-escape',
encode=Codec.encode,
decode=Codec.decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
)

View File

@ -1,128 +0,0 @@
""" Python 'uu_codec' Codec - UU content transfer encoding
Unlike most of the other codecs which target Unicode, this codec
will return Python string objects for both encode and decode.
Written by Marc-Andre Lemburg (mal@lemburg.com). Some details were
adapted from uu.py which was written by Lance Ellinghouse and
modified by Jack Jansen and Fredrik Lundh.
"""
import codecs, binascii
### Codec APIs
def uu_encode(input,errors='strict',filename='<data>',mode=0666):
""" Encodes the object input and returns a tuple (output
object, length consumed).
errors defines the error handling to apply. It defaults to
'strict' handling which is the only currently supported
error handling for this codec.
"""
assert errors == 'strict'
from cStringIO import StringIO
from binascii import b2a_uu
infile = StringIO(input)
outfile = StringIO()
read = infile.read
write = outfile.write
# Encode
write('begin %o %s\n' % (mode & 0777, filename))
chunk = read(45)
while chunk:
write(b2a_uu(chunk))
chunk = read(45)
write(' \nend\n')
return (outfile.getvalue(), len(input))
def uu_decode(input,errors='strict'):
""" Decodes the object input and returns a tuple (output
object, length consumed).
input must be an object which provides the bf_getreadbuf
buffer slot. Python strings, buffer objects and memory
mapped files are examples of objects providing this slot.
errors defines the error handling to apply. It defaults to
'strict' handling which is the only currently supported
error handling for this codec.
Note: filename and file mode information in the input data is
ignored.
"""
assert errors == 'strict'
from cStringIO import StringIO
from binascii import a2b_uu
infile = StringIO(input)
outfile = StringIO()
readline = infile.readline
write = outfile.write
# Find start of encoded data
while 1:
s = readline()
if not s:
raise ValueError, 'Missing "begin" line in input data'
if s[:5] == 'begin':
break
# Decode
while 1:
s = readline()
if not s or \
s == 'end\n':
break
try:
data = a2b_uu(s)
except binascii.Error as v:
# Workaround for broken uuencoders by /Fredrik Lundh
nbytes = (((ord(s[0])-32) & 63) * 4 + 5) / 3
data = a2b_uu(s[:nbytes])
#sys.stderr.write("Warning: %s\n" % str(v))
write(data)
if not s:
raise ValueError, 'Truncated input data'
return (outfile.getvalue(), len(input))
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return uu_encode(input,errors)
def decode(self,input,errors='strict'):
return uu_decode(input,errors)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return uu_encode(input, self.errors)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return uu_decode(input, self.errors)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='uu',
encode=uu_encode,
decode=uu_decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

View File

@ -1,102 +0,0 @@
""" Python 'zlib_codec' Codec - zlib compression encoding
Unlike most of the other codecs which target Unicode, this codec
will return Python string objects for both encode and decode.
Written by Marc-Andre Lemburg (mal@lemburg.com).
"""
import codecs
import zlib # this codec needs the optional zlib module !
### Codec APIs
def zlib_encode(input,errors='strict'):
""" Encodes the object input and returns a tuple (output
object, length consumed).
errors defines the error handling to apply. It defaults to
'strict' handling which is the only currently supported
error handling for this codec.
"""
assert errors == 'strict'
output = zlib.compress(input)
return (output, len(input))
def zlib_decode(input,errors='strict'):
""" Decodes the object input and returns a tuple (output
object, length consumed).
input must be an object which provides the bf_getreadbuf
buffer slot. Python strings, buffer objects and memory
mapped files are examples of objects providing this slot.
errors defines the error handling to apply. It defaults to
'strict' handling which is the only currently supported
error handling for this codec.
"""
assert errors == 'strict'
output = zlib.decompress(input)
return (output, len(input))
class Codec(codecs.Codec):
def encode(self, input, errors='strict'):
return zlib_encode(input, errors)
def decode(self, input, errors='strict'):
return zlib_decode(input, errors)
class IncrementalEncoder(codecs.IncrementalEncoder):
def __init__(self, errors='strict'):
assert errors == 'strict'
self.errors = errors
self.compressobj = zlib.compressobj()
def encode(self, input, final=False):
if final:
c = self.compressobj.compress(input)
return c + self.compressobj.flush()
else:
return self.compressobj.compress(input)
def reset(self):
self.compressobj = zlib.compressobj()
class IncrementalDecoder(codecs.IncrementalDecoder):
def __init__(self, errors='strict'):
assert errors == 'strict'
self.errors = errors
self.decompressobj = zlib.decompressobj()
def decode(self, input, final=False):
if final:
c = self.decompressobj.decompress(input)
return c + self.decompressobj.flush()
else:
return self.decompressobj.decompress(input)
def reset(self):
self.decompressobj = zlib.decompressobj()
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='zlib',
encode=zlib_encode,
decode=zlib_decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

View File

@ -34,6 +34,7 @@ import sys
import struct
import re
import io
import codecs
__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
"Unpickler", "dump", "dumps", "load", "loads"]
@ -929,7 +930,7 @@ class Unpickler:
break
else:
raise ValueError, "insecure string pickle"
self.append(str8(rep.decode("string-escape")))
self.append(str8(codecs.escape_decode(rep)[0]))
dispatch[STRING[0]] = load_string
def load_binstring(self):

View File

@ -10,6 +10,8 @@ dis(pickle, out=None, memo=None, indentlevel=4)
Print a symbolic disassembly of a pickle.
'''
import codecs
__all__ = ['dis',
'genops',
]
@ -318,10 +320,8 @@ def read_stringnl(f, decode=True, stripquotes=True):
else:
raise ValueError("no string quotes around %r" % data)
# I'm not sure when 'string_escape' was added to the std codecs; it's
# crazy not to use it if it's there.
if decode:
data = data.decode('string_escape')
data = codecs.escape_decode(data)[0]
return data
stringnl = ArgumentDescriptor(

View File

@ -486,10 +486,6 @@ class UTF8SigTest(ReadTest):
self.check_state_handling_decode(self.encoding,
u, u.encode(self.encoding))
class EscapeDecodeTest(unittest.TestCase):
def test_empty(self):
self.assertEquals(codecs.escape_decode(""), ("", 0))
class RecodingTest(unittest.TestCase):
def test_recoding(self):
f = io.BytesIO()
@ -986,25 +982,8 @@ class EncodedFileTest(unittest.TestCase):
ef.write(b'\xc3\xbc')
self.assertEquals(f.getvalue(), b'\xfc')
class Str2StrTest(unittest.TestCase):
def test_read(self):
sin = "\x80".encode("base64_codec")
reader = codecs.getreader("base64_codec")(io.BytesIO(sin))
sout = reader.read()
self.assertEqual(sout, "\x80")
self.assert_(isinstance(sout, str))
def test_readline(self):
sin = "\x80".encode("base64_codec")
reader = codecs.getreader("base64_codec")(io.BytesIO(sin))
sout = reader.readline()
self.assertEqual(sout, "\x80")
self.assert_(isinstance(sout, str))
all_unicode_encodings = [
"ascii",
"base64_codec",
"big5",
"big5hkscs",
"charmap",
@ -1051,7 +1030,6 @@ all_unicode_encodings = [
"gb18030",
"gb2312",
"gbk",
"hex_codec",
"hp_roman8",
"hz",
"idna",
@ -1091,7 +1069,6 @@ all_unicode_encodings = [
"ptcp154",
"punycode",
"raw_unicode_escape",
"rot_13",
"shift_jis",
"shift_jis_2004",
"shift_jisx0213",
@ -1108,53 +1085,24 @@ all_unicode_encodings = [
if hasattr(codecs, "mbcs_encode"):
all_unicode_encodings.append("mbcs")
# The following encodings work only with str8, not str
all_string_encodings = [
"quopri_codec",
"string_escape",
"uu_codec",
]
# The following encoding is not tested, because it's not supposed
# to work:
# "undefined"
# The following encodings don't work in stateful mode
broken_unicode_with_streams = [
"base64_codec",
"hex_codec",
"punycode",
"unicode_internal"
]
broken_incremental_coders = broken_unicode_with_streams + [
"idna",
"zlib_codec",
"bz2_codec",
]
# The following encodings only support "strict" mode
only_strict_mode = [
"idna",
"zlib_codec",
"bz2_codec",
]
try:
import bz2
except ImportError:
pass
else:
all_unicode_encodings.append("bz2_codec")
broken_unicode_with_streams.append("bz2_codec")
try:
import zlib
except ImportError:
pass
else:
all_unicode_encodings.append("zlib_codec")
broken_unicode_with_streams.append("zlib_codec")
class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
def test_basics(self):
s = "abc123" # all codecs should be able to encode these
@ -1287,15 +1235,6 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
self.check_state_handling_decode(encoding, u, u.encode(encoding))
self.check_state_handling_encode(encoding, u, u.encode(encoding))
class BasicStrTest(unittest.TestCase):
def test_basics(self):
s = str8("abc123")
for encoding in all_string_encodings:
(encoded, size) = codecs.getencoder(encoding)(s)
self.assertEqual(size, len(s))
(chars, size) = codecs.getdecoder(encoding)(encoded)
self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding))
class CharmapTest(unittest.TestCase):
def test_decode_with_string_map(self):
self.assertEquals(
@ -1354,7 +1293,6 @@ def test_main():
UTF16ExTest,
ReadBufferTest,
CharBufferTest,
EscapeDecodeTest,
RecodingTest,
PunycodeTest,
UnicodeInternalTest,
@ -1363,9 +1301,7 @@ def test_main():
CodecsModuleTest,
StreamReaderTest,
EncodedFileTest,
Str2StrTest,
BasicUnicodeTest,
BasicStrTest,
CharmapTest,
WithStmtTest,
)