#7475: add (un)transform method to bytes/bytearray and str, add back codecs that can be used with them from Python 2.

This commit is contained in:
Georg Brandl 2010-12-02 18:06:51 +00:00
parent de0ab5eab3
commit 02524629f3
17 changed files with 900 additions and 29 deletions

View File

@ -1165,6 +1165,46 @@ particular, the following variants typically exist:
| | | operand |
+--------------------+---------+---------------------------+
The following codecs provide bytes-to-bytes mappings. They can be used with
:meth:`bytes.transform` and :meth:`bytes.untransform`.
+--------------------+---------------------------+---------------------------+
| Codec | Aliases | Purpose |
+====================+===========================+===========================+
| base64_codec | base64, base-64 | Convert operand to MIME |
| | | base64 |
+--------------------+---------------------------+---------------------------+
| bz2_codec | bz2 | Compress the operand |
| | | using bz2 |
+--------------------+---------------------------+---------------------------+
| hex_codec | hex | Convert operand to |
| | | hexadecimal |
| | | representation, with two |
| | | digits per byte |
+--------------------+---------------------------+---------------------------+
| quopri_codec | quopri, quoted-printable, | Convert operand to MIME |
| | quotedprintable | quoted printable |
+--------------------+---------------------------+---------------------------+
| uu_codec | uu | Convert the operand using |
| | | uuencode |
+--------------------+---------------------------+---------------------------+
| zlib_codec | zip, zlib | Compress the operand |
| | | using gzip |
+--------------------+---------------------------+---------------------------+
The following codecs provide string-to-string mappings. They can be used with
:meth:`str.transform` and :meth:`str.untransform`.
+--------------------+---------------------------+---------------------------+
| Codec | Aliases | Purpose |
+====================+===========================+===========================+
| rot_13 | rot13 | Returns the Caesar-cypher |
| | | encryption of the operand |
+--------------------+---------------------------+---------------------------+
.. versionadded:: 3.2
bytes-to-bytes and string-to-string codecs.
:mod:`encodings.idna` --- Internationalized Domain Names in Applications
------------------------------------------------------------------------

View File

@ -1352,6 +1352,19 @@ functions based on regular expressions.
"They're Bill's Friends."
.. method:: str.transform(encoding, errors='strict')
Return an encoded version of the string. In contrast to :meth:`encode`, this
method works with codecs that provide string-to-string mappings, and not
string-to-bytes mappings. :meth:`transform` therefore returns a string
object.
The codecs that can be used with this method are listed in
:ref:`standard-encodings`.
.. versionadded:: 3.2
.. method:: str.translate(map)
Return a copy of the *s* where all characters have been mapped through the
@ -1369,6 +1382,14 @@ functions based on regular expressions.
example).
.. method:: str.untransform(encoding, errors='strict')
Return a decoded version of the string. This provides the reverse operation
of :meth:`transform`.
.. versionadded:: 3.2
.. method:: str.upper()
Return a copy of the string converted to uppercase.
@ -1800,6 +1821,20 @@ The bytes and bytearray types have an additional class method:
The maketrans and translate methods differ in semantics from the versions
available on strings:
.. method:: bytes.transform(encoding, errors='strict')
bytearray.transform(encoding, errors='strict')
Return an encoded version of the bytes object. In contrast to
:meth:`encode`, this method works with codecs that provide bytes-to-bytes
mappings, and not string-to-bytes mappings. :meth:`transform` therefore
returns a bytes or bytearray object.
The codecs that can be used with this method are listed in
:ref:`standard-encodings`.
.. versionadded:: 3.2
.. method:: bytes.translate(table[, delete])
bytearray.translate(table[, delete])
@ -1817,6 +1852,15 @@ available on strings:
b'rd ths shrt txt'
.. method:: bytes.untransform(encoding, errors='strict')
bytearray.untransform(encoding, errors='strict')
Return an decoded version of the bytes object. This provides the reverse
operation of :meth:`transform`.
.. versionadded:: 3.2
.. staticmethod:: bytes.maketrans(from, to)
bytearray.maketrans(from, to)

View File

@ -396,6 +396,8 @@ class StreamWriter(Codec):
class StreamReader(Codec):
charbuffertype = str
def __init__(self, stream, errors='strict'):
""" Creates a StreamReader instance.
@ -417,9 +419,8 @@ class StreamReader(Codec):
self.stream = stream
self.errors = errors
self.bytebuffer = b""
# For str->str decoding this will stay a str
# For str->unicode decoding the first read will promote it to unicode
self.charbuffer = ""
self._empty_charbuffer = self.charbuffertype()
self.charbuffer = self._empty_charbuffer
self.linebuffer = None
def decode(self, input, errors='strict'):
@ -455,7 +456,7 @@ class StreamReader(Codec):
"""
# If we have lines cached, first merge them back into characters
if self.linebuffer:
self.charbuffer = "".join(self.linebuffer)
self.charbuffer = self._empty_charbuffer.join(self.linebuffer)
self.linebuffer = None
# read until we get the required number of characters (if available)
@ -498,7 +499,7 @@ class StreamReader(Codec):
if chars < 0:
# Return everything we've got
result = self.charbuffer
self.charbuffer = ""
self.charbuffer = self._empty_charbuffer
else:
# Return the first chars characters
result = self.charbuffer[:chars]
@ -529,7 +530,7 @@ class StreamReader(Codec):
return line
readsize = size or 72
line = ""
line = self._empty_charbuffer
# If size is given, we call read() only once
while True:
data = self.read(readsize, firstline=True)
@ -537,7 +538,8 @@ class StreamReader(Codec):
# If we're at a "\r" read one extra character (which might
# be a "\n") to get a proper line ending. If the stream is
# temporarily exhausted we return the wrong line ending.
if data.endswith("\r"):
if (isinstance(data, str) and data.endswith("\r")) or \
(isinstance(data, bytes) and data.endswith(b"\r")):
data += self.read(size=1, chars=1)
line += data
@ -563,7 +565,8 @@ class StreamReader(Codec):
line0withoutend = lines[0].splitlines(False)[0]
if line0withend != line0withoutend: # We really have a line end
# Put the rest back together and keep it until the next call
self.charbuffer = "".join(lines[1:]) + self.charbuffer
self.charbuffer = self._empty_charbuffer.join(lines[1:]) + \
self.charbuffer
if keepends:
line = line0withend
else:
@ -574,7 +577,7 @@ class StreamReader(Codec):
if line and not keepends:
line = line.splitlines(False)[0]
break
if readsize<8000:
if readsize < 8000:
readsize *= 2
return line
@ -603,7 +606,7 @@ class StreamReader(Codec):
"""
self.bytebuffer = b""
self.charbuffer = ""
self.charbuffer = self._empty_charbuffer
self.linebuffer = None
def seek(self, offset, whence=0):

View File

@ -33,9 +33,9 @@ aliases = {
'us' : 'ascii',
'us_ascii' : 'ascii',
## base64_codec codec
#'base64' : 'base64_codec',
#'base_64' : 'base64_codec',
# base64_codec codec
'base64' : 'base64_codec',
'base_64' : 'base64_codec',
# big5 codec
'big5_tw' : 'big5',
@ -45,8 +45,8 @@ aliases = {
'big5_hkscs' : 'big5hkscs',
'hkscs' : 'big5hkscs',
## bz2_codec codec
#'bz2' : 'bz2_codec',
# bz2_codec codec
'bz2' : 'bz2_codec',
# cp037 codec
'037' : 'cp037',
@ -248,8 +248,8 @@ aliases = {
'cp936' : 'gbk',
'ms936' : 'gbk',
## hex_codec codec
#'hex' : 'hex_codec',
# hex_codec codec
'hex' : 'hex_codec',
# hp_roman8 codec
'roman8' : 'hp_roman8',
@ -450,13 +450,13 @@ aliases = {
'cp154' : 'ptcp154',
'cyrillic_asian' : 'ptcp154',
## quopri_codec codec
#'quopri' : 'quopri_codec',
#'quoted_printable' : 'quopri_codec',
#'quotedprintable' : 'quopri_codec',
# quopri_codec codec
'quopri' : 'quopri_codec',
'quoted_printable' : 'quopri_codec',
'quotedprintable' : 'quopri_codec',
## rot_13 codec
#'rot13' : 'rot_13',
# rot_13 codec
'rot13' : 'rot_13',
# shift_jis codec
'csshiftjis' : 'shift_jis',
@ -518,12 +518,12 @@ aliases = {
'utf8_ucs2' : 'utf_8',
'utf8_ucs4' : 'utf_8',
## uu_codec codec
#'uu' : 'uu_codec',
# uu_codec codec
'uu' : 'uu_codec',
## zlib_codec codec
#'zip' : 'zlib_codec',
#'zlib' : 'zlib_codec',
# zlib_codec codec
'zip' : 'zlib_codec',
'zlib' : 'zlib_codec',
# temporary mac CJK aliases, will be replaced by proper codecs in 3.1
'x_mac_japanese' : 'shift_jis',

View File

@ -0,0 +1,55 @@
"""Python 'base64_codec' Codec - base64 content transfer encoding.
This codec de/encodes from bytes to bytes and is therefore usable with
bytes.transform() and bytes.untransform().
Written by Marc-Andre Lemburg (mal@lemburg.com).
"""
import codecs
import base64
### Codec APIs
def base64_encode(input, errors='strict'):
assert errors == 'strict'
return (base64.encodestring(input), len(input))
def base64_decode(input, errors='strict'):
assert errors == 'strict'
return (base64.decodestring(input), len(input))
class Codec(codecs.Codec):
def encode(self, input, errors='strict'):
return base64_encode(input, errors)
def decode(self, input, errors='strict'):
return base64_decode(input, errors)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
assert self.errors == 'strict'
return base64.encodestring(input)
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
assert self.errors == 'strict'
return base64.decodestring(input)
class StreamWriter(Codec, codecs.StreamWriter):
charbuffertype = bytes
class StreamReader(Codec, codecs.StreamReader):
charbuffertype = bytes
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='base64',
encode=base64_encode,
decode=base64_decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
)

View File

@ -0,0 +1,77 @@
"""Python 'bz2_codec' Codec - bz2 compression encoding.
This codec de/encodes from bytes to bytes and is therefore usable with
bytes.transform() and bytes.untransform().
Adapted by Raymond Hettinger from zlib_codec.py which was written
by Marc-Andre Lemburg (mal@lemburg.com).
"""
import codecs
import bz2 # this codec needs the optional bz2 module !
### Codec APIs
def bz2_encode(input, errors='strict'):
assert errors == 'strict'
return (bz2.compress(input), len(input))
def bz2_decode(input, errors='strict'):
assert errors == 'strict'
return (bz2.decompress(input), len(input))
class Codec(codecs.Codec):
def encode(self, input, errors='strict'):
return bz2_encode(input, errors)
def decode(self, input, errors='strict'):
return bz2_decode(input, errors)
class IncrementalEncoder(codecs.IncrementalEncoder):
def __init__(self, errors='strict'):
assert errors == 'strict'
self.errors = errors
self.compressobj = bz2.BZ2Compressor()
def encode(self, input, final=False):
if final:
c = self.compressobj.compress(input)
return c + self.compressobj.flush()
else:
return self.compressobj.compress(input)
def reset(self):
self.compressobj = bz2.BZ2Compressor()
class IncrementalDecoder(codecs.IncrementalDecoder):
def __init__(self, errors='strict'):
assert errors == 'strict'
self.errors = errors
self.decompressobj = bz2.BZ2Decompressor()
def decode(self, input, final=False):
try:
return self.decompressobj.decompress(input)
except EOFError:
return ''
def reset(self):
self.decompressobj = bz2.BZ2Decompressor()
class StreamWriter(Codec, codecs.StreamWriter):
charbuffertype = bytes
class StreamReader(Codec, codecs.StreamReader):
charbuffertype = bytes
### encodings module API
def getregentry():
return codecs.CodecInfo(
name="bz2",
encode=bz2_encode,
decode=bz2_decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
)

View File

@ -0,0 +1,55 @@
"""Python 'hex_codec' Codec - 2-digit hex content transfer encoding.
This codec de/encodes from bytes to bytes and is therefore usable with
bytes.transform() and bytes.untransform().
Written by Marc-Andre Lemburg (mal@lemburg.com).
"""
import codecs
import binascii
### Codec APIs
def hex_encode(input, errors='strict'):
assert errors == 'strict'
return (binascii.b2a_hex(input), len(input))
def hex_decode(input, errors='strict'):
assert errors == 'strict'
return (binascii.a2b_hex(input), len(input))
class Codec(codecs.Codec):
def encode(self, input, errors='strict'):
return hex_encode(input, errors)
def decode(self, input, errors='strict'):
return hex_decode(input, errors)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
assert self.errors == 'strict'
return binascii.b2a_hex(input)
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
assert self.errors == 'strict'
return binascii.a2b_hex(input)
class StreamWriter(Codec, codecs.StreamWriter):
charbuffertype = bytes
class StreamReader(Codec, codecs.StreamReader):
charbuffertype = bytes
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='hex',
encode=hex_encode,
decode=hex_decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
)

View File

@ -0,0 +1,56 @@
"""Codec for quoted-printable encoding.
This codec de/encodes from bytes to bytes and is therefore usable with
bytes.transform() and bytes.untransform().
"""
import codecs
import quopri
from io import BytesIO
def quopri_encode(input, errors='strict'):
assert errors == 'strict'
f = BytesIO(input)
g = BytesIO()
quopri.encode(f, g, 1)
return (g.getvalue(), len(input))
def quopri_decode(input, errors='strict'):
assert errors == 'strict'
f = BytesIO(input)
g = BytesIO()
quopri.decode(f, g)
return (g.getvalue(), len(input))
class Codec(codecs.Codec):
def encode(self, input, errors='strict'):
return quopri_encode(input, errors)
def decode(self, input, errors='strict'):
return quopri_decode(input, errors)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return quopri_encode(input, self.errors)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return quopri_decode(input, self.errors)[0]
class StreamWriter(Codec, codecs.StreamWriter):
charbuffertype = bytes
class StreamReader(Codec, codecs.StreamReader):
charbuffertype = bytes
# encodings module API
def getregentry():
return codecs.CodecInfo(
name='quopri',
encode=quopri_encode,
decode=quopri_decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
)

113
Lib/encodings/rot_13.py Executable file
View File

@ -0,0 +1,113 @@
#!/usr/bin/env python
""" Python Character Mapping Codec for ROT13.
This codec de/encodes from str to str and is therefore usable with
str.transform() and str.untransform().
Written by Marc-Andre Lemburg (mal@lemburg.com).
"""
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self, input, errors='strict'):
return (input.translate(rot13_map), len(input))
def decode(self, input, errors='strict'):
return (input.translate(rot13_map), len(input))
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return input.translate(rot13_map)
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return input.translate(rot13_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='rot-13',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
)
### Map
rot13_map = codecs.make_identity_dict(range(256))
rot13_map.update({
0x0041: 0x004e,
0x0042: 0x004f,
0x0043: 0x0050,
0x0044: 0x0051,
0x0045: 0x0052,
0x0046: 0x0053,
0x0047: 0x0054,
0x0048: 0x0055,
0x0049: 0x0056,
0x004a: 0x0057,
0x004b: 0x0058,
0x004c: 0x0059,
0x004d: 0x005a,
0x004e: 0x0041,
0x004f: 0x0042,
0x0050: 0x0043,
0x0051: 0x0044,
0x0052: 0x0045,
0x0053: 0x0046,
0x0054: 0x0047,
0x0055: 0x0048,
0x0056: 0x0049,
0x0057: 0x004a,
0x0058: 0x004b,
0x0059: 0x004c,
0x005a: 0x004d,
0x0061: 0x006e,
0x0062: 0x006f,
0x0063: 0x0070,
0x0064: 0x0071,
0x0065: 0x0072,
0x0066: 0x0073,
0x0067: 0x0074,
0x0068: 0x0075,
0x0069: 0x0076,
0x006a: 0x0077,
0x006b: 0x0078,
0x006c: 0x0079,
0x006d: 0x007a,
0x006e: 0x0061,
0x006f: 0x0062,
0x0070: 0x0063,
0x0071: 0x0064,
0x0072: 0x0065,
0x0073: 0x0066,
0x0074: 0x0067,
0x0075: 0x0068,
0x0076: 0x0069,
0x0077: 0x006a,
0x0078: 0x006b,
0x0079: 0x006c,
0x007a: 0x006d,
})
### Filter API
def rot13(infile, outfile):
outfile.write(infile.read().encode('rot-13'))
if __name__ == '__main__':
import sys
rot13(sys.stdin, sys.stdout)

99
Lib/encodings/uu_codec.py Normal file
View File

@ -0,0 +1,99 @@
"""Python 'uu_codec' Codec - UU content transfer encoding.
This codec de/encodes from bytes to bytes and is therefore usable with
bytes.transform() and bytes.untransform().
Written by Marc-Andre Lemburg (mal@lemburg.com). Some details were
adapted from uu.py which was written by Lance Ellinghouse and
modified by Jack Jansen and Fredrik Lundh.
"""
import codecs
import binascii
from io import BytesIO
### Codec APIs
def uu_encode(input, errors='strict', filename='<data>', mode=0o666):
assert errors == 'strict'
infile = BytesIO(input)
outfile = BytesIO()
read = infile.read
write = outfile.write
# Encode
write(('begin %o %s\n' % (mode & 0o777, filename)).encode('ascii'))
chunk = read(45)
while chunk:
write(binascii.b2a_uu(chunk))
chunk = read(45)
write(b' \nend\n')
return (outfile.getvalue(), len(input))
def uu_decode(input, errors='strict'):
assert errors == 'strict'
infile = BytesIO(input)
outfile = BytesIO()
readline = infile.readline
write = outfile.write
# Find start of encoded data
while 1:
s = readline()
if not s:
raise ValueError('Missing "begin" line in input data')
if s[:5] == b'begin':
break
# Decode
while True:
s = readline()
if not s or s == b'end\n':
break
try:
data = binascii.a2b_uu(s)
except binascii.Error as v:
# Workaround for broken uuencoders by /Fredrik Lundh
nbytes = (((ord(s[0])-32) & 63) * 4 + 5) / 3
data = binascii.a2b_uu(s[:nbytes])
#sys.stderr.write("Warning: %s\n" % str(v))
write(data)
if not s:
raise ValueError('Truncated input data')
return (outfile.getvalue(), len(input))
class Codec(codecs.Codec):
def encode(self, input, errors='strict'):
return uu_encode(input, errors)
def decode(self, input, errors='strict'):
return uu_decode(input, errors)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return uu_encode(input, self.errors)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return uu_decode(input, self.errors)[0]
class StreamWriter(Codec, codecs.StreamWriter):
charbuffertype = bytes
class StreamReader(Codec, codecs.StreamReader):
charbuffertype = bytes
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='uu',
encode=uu_encode,
decode=uu_decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

View File

@ -0,0 +1,77 @@
"""Python 'zlib_codec' Codec - zlib compression encoding.
This codec de/encodes from bytes to bytes and is therefore usable with
bytes.transform() and bytes.untransform().
Written by Marc-Andre Lemburg (mal@lemburg.com).
"""
import codecs
import zlib # this codec needs the optional zlib module !
### Codec APIs
def zlib_encode(input, errors='strict'):
assert errors == 'strict'
return (zlib.compress(input), len(input))
def zlib_decode(input, errors='strict'):
assert errors == 'strict'
return (zlib.decompress(input), len(input))
class Codec(codecs.Codec):
def encode(self, input, errors='strict'):
return zlib_encode(input, errors)
def decode(self, input, errors='strict'):
return zlib_decode(input, errors)
class IncrementalEncoder(codecs.IncrementalEncoder):
def __init__(self, errors='strict'):
assert errors == 'strict'
self.errors = errors
self.compressobj = zlib.compressobj()
def encode(self, input, final=False):
if final:
c = self.compressobj.compress(input)
return c + self.compressobj.flush()
else:
return self.compressobj.compress(input)
def reset(self):
self.compressobj = zlib.compressobj()
class IncrementalDecoder(codecs.IncrementalDecoder):
def __init__(self, errors='strict'):
assert errors == 'strict'
self.errors = errors
self.decompressobj = zlib.decompressobj()
def decode(self, input, final=False):
if final:
c = self.decompressobj.decompress(input)
return c + self.decompressobj.flush()
else:
return self.decompressobj.decompress(input)
def reset(self):
self.decompressobj = zlib.decompressobj()
class StreamWriter(Codec, codecs.StreamWriter):
charbuffertype = bytes
class StreamReader(Codec, codecs.StreamReader):
charbuffertype = bytes
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='zlib',
encode=zlib_encode,
decode=zlib_decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

View File

@ -207,6 +207,11 @@ class BaseBytesTest(unittest.TestCase):
self.assertEqual(b.decode(errors="ignore", encoding="utf8"),
"Hello world\n")
def test_transform(self):
b1 = self.type2test(range(256))
b2 = b1.transform("base64").untransform("base64")
self.assertEqual(b2, b1)
def test_from_int(self):
b = self.type2test(0)
self.assertEqual(b, self.type2test())

View File

@ -1659,6 +1659,67 @@ class BomTest(unittest.TestCase):
self.assertEqual(f.read(), data * 2)
bytes_transform_encodings = [
"base64_codec",
"uu_codec",
"quopri_codec",
"hex_codec",
]
try:
import zlib
except ImportError:
pass
else:
bytes_transform_encodings.append("zlib_codec")
try:
import bz2
except ImportError:
pass
else:
bytes_transform_encodings.append("bz2_codec")
class TransformCodecTest(unittest.TestCase):
def test_basics(self):
binput = bytes(range(256))
ainput = bytearray(binput)
for encoding in bytes_transform_encodings:
# generic codecs interface
(o, size) = codecs.getencoder(encoding)(binput)
self.assertEqual(size, len(binput))
(i, size) = codecs.getdecoder(encoding)(o)
self.assertEqual(size, len(o))
self.assertEqual(i, binput)
# transform interface
boutput = binput.transform(encoding)
aoutput = ainput.transform(encoding)
self.assertEqual(boutput, aoutput)
self.assertIsInstance(boutput, bytes)
self.assertIsInstance(aoutput, bytearray)
bback = boutput.untransform(encoding)
aback = aoutput.untransform(encoding)
self.assertEqual(bback, aback)
self.assertEqual(bback, binput)
self.assertIsInstance(bback, bytes)
self.assertIsInstance(aback, bytearray)
def test_read(self):
for encoding in bytes_transform_encodings:
sin = b"\x80".transform(encoding)
reader = codecs.getreader(encoding)(io.BytesIO(sin))
sout = reader.read()
self.assertEqual(sout, b"\x80")
def test_readline(self):
for encoding in bytes_transform_encodings:
if encoding in ['uu_codec', 'zlib_codec']:
continue
sin = b"\x80".transform(encoding)
reader = codecs.getreader(encoding)(io.BytesIO(sin))
sout = reader.readline()
self.assertEqual(sout, b"\x80")
def test_main():
support.run_unittest(
UTF32Test,
@ -1686,6 +1747,7 @@ def test_main():
TypesTest,
SurrogateEscapeTest,
BomTest,
TransformCodecTest,
)

View File

@ -10,6 +10,10 @@ What's New in Python 3.2 Beta 1?
Core and Builtins
-----------------
- Issue #7475: Added transform() and untransform() methods to both bytes
and string types. They can be used to access those codecs providing
bytes-to-bytes and string-to-string mappings.
- Issue #8685: Speed up set difference ``a - b`` when source set ``a`` is
much larger than operand ``b``. Patch by Andrew Bennetts.

View File

@ -2488,6 +2488,75 @@ bytearray_decode(PyObject *self, PyObject *args, PyObject *kwargs)
return PyUnicode_FromEncodedObject(self, encoding, errors);
}
PyDoc_STRVAR(transform__doc__,
"B.transform(encoding, errors='strict') -> bytearray\n\
\n\
Transform B using the codec registered for encoding. errors may be given\n\
to set a different error handling scheme.");
static PyObject *
bytearray_transform(PyObject *self, PyObject *args, PyObject *kwargs)
{
const char *encoding = NULL;
const char *errors = NULL;
static char *kwlist[] = {"encoding", "errors", 0};
PyObject *v, *w;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|s:transform",
kwlist, &encoding, &errors))
return NULL;
v = PyCodec_Encode(self, encoding, errors);
if (v == NULL)
return NULL;
if (!PyBytes_Check(v)) {
PyErr_Format(PyExc_TypeError,
"encoder did not return a bytes object (type=%.400s)",
Py_TYPE(v)->tp_name);
Py_DECREF(v);
return NULL;
}
w = PyByteArray_FromStringAndSize(PyBytes_AS_STRING(v),
PyBytes_GET_SIZE(v));
Py_DECREF(v);
return w;
}
PyDoc_STRVAR(untransform__doc__,
"B.untransform(encoding, errors='strict') -> bytearray\n\
\n\
Reverse-transform B using the codec registered for encoding. errors may\n\
be given to set a different error handling scheme.");
static PyObject *
bytearray_untransform(PyObject *self, PyObject *args, PyObject *kwargs)
{
const char *encoding = NULL;
const char *errors = NULL;
static char *kwlist[] = {"encoding", "errors", 0};
PyObject *v, *w;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|s:untransform",
kwlist, &encoding, &errors))
return NULL;
v = PyCodec_Decode(self, encoding, errors);
if (v == NULL)
return NULL;
if (!PyBytes_Check(v)) {
PyErr_Format(PyExc_TypeError,
"decoder did not return a bytes object (type=%.400s)",
Py_TYPE(v)->tp_name);
Py_DECREF(v);
return NULL;
}
w = PyByteArray_FromStringAndSize(PyBytes_AS_STRING(v),
PyBytes_GET_SIZE(v));
Py_DECREF(v);
return w;
}
PyDoc_STRVAR(alloc_doc,
"B.__alloc__() -> int\n\
\n\
@ -2782,8 +2851,12 @@ bytearray_methods[] = {
{"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
_Py_swapcase__doc__},
{"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
{"transform", (PyCFunction)bytearray_transform, METH_VARARGS | METH_KEYWORDS,
transform__doc__},
{"translate", (PyCFunction)bytearray_translate, METH_VARARGS,
translate__doc__},
{"untransform", (PyCFunction)bytearray_untransform, METH_VARARGS | METH_KEYWORDS,
untransform__doc__},
{"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
{"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
{NULL}

View File

@ -2312,6 +2312,68 @@ bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
return PyUnicode_FromEncodedObject(self, encoding, errors);
}
PyDoc_STRVAR(transform__doc__,
"B.transform(encoding, errors='strict') -> bytes\n\
\n\
Transform B using the codec registered for encoding. errors may be given\n\
to set a different error handling scheme.");
static PyObject *
bytes_transform(PyObject *self, PyObject *args, PyObject *kwargs)
{
const char *encoding = NULL;
const char *errors = NULL;
static char *kwlist[] = {"encoding", "errors", 0};
PyObject *v;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|s:transform",
kwlist, &encoding, &errors))
return NULL;
v = PyCodec_Encode(self, encoding, errors);
if (v == NULL)
return NULL;
if (!PyBytes_Check(v)) {
PyErr_Format(PyExc_TypeError,
"encoder did not return a bytes object (type=%.400s)",
Py_TYPE(v)->tp_name);
Py_DECREF(v);
return NULL;
}
return v;
}
PyDoc_STRVAR(untransform__doc__,
"B.untransform(encoding, errors='strict') -> bytes\n\
\n\
Reverse-transform B using the codec registered for encoding. errors may\n\
be given to set a different error handling scheme.");
static PyObject *
bytes_untransform(PyObject *self, PyObject *args, PyObject *kwargs)
{
const char *encoding = NULL;
const char *errors = NULL;
static char *kwlist[] = {"encoding", "errors", 0};
PyObject *v;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|s:untransform",
kwlist, &encoding, &errors))
return NULL;
v = PyCodec_Decode(self, encoding, errors);
if (v == NULL)
return NULL;
if (!PyBytes_Check(v)) {
PyErr_Format(PyExc_TypeError,
"decoder did not return a bytes object (type=%.400s)",
Py_TYPE(v)->tp_name);
Py_DECREF(v);
return NULL;
}
return v;
}
PyDoc_STRVAR(splitlines__doc__,
"B.splitlines([keepends]) -> list of lines\n\
@ -2475,8 +2537,10 @@ bytes_methods[] = {
{"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
_Py_swapcase__doc__},
{"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
{"transform", (PyCFunction)bytes_transform, METH_VARARGS | METH_KEYWORDS, transform__doc__},
{"translate", (PyCFunction)bytes_translate, METH_VARARGS,
translate__doc__},
{"untransform", (PyCFunction)bytes_untransform, METH_VARARGS | METH_KEYWORDS, untransform__doc__},
{"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
{"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
{"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,

View File

@ -7432,6 +7432,7 @@ unicode_encode(PyUnicodeObject *self, PyObject *args, PyObject *kwargs)
v = PyUnicode_AsEncodedString((PyObject *)self, encoding, errors);
if (v == NULL)
goto onError;
/* XXX this check is redundant */
if (!PyBytes_Check(v)) {
PyErr_Format(PyExc_TypeError,
"encoder did not return a bytes object "
@ -7446,6 +7447,44 @@ unicode_encode(PyUnicodeObject *self, PyObject *args, PyObject *kwargs)
return NULL;
}
PyDoc_STRVAR(transform__doc__,
"S.transform(encoding, errors='strict') -> str\n\
\n\
Transform S using the codec registered for encoding. errors may be given\n\
to set a different error handling scheme.");
static PyObject *
unicode_transform(PyUnicodeObject *self, PyObject *args, PyObject *kwargs)
{
static char *kwlist[] = {"encoding", "errors", 0};
char *encoding = NULL;
char *errors = NULL;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|s:transform",
kwlist, &encoding, &errors))
return NULL;
return PyUnicode_AsEncodedUnicode((PyObject *)self, encoding, errors);
}
PyDoc_STRVAR(untransform__doc__,
"S.untransform(encoding, errors='strict') -> str\n\
\n\
Reverse-transform S using the codec registered for encoding. errors may be\n\
given to set a different error handling scheme.");
static PyObject *
unicode_untransform(PyUnicodeObject *self, PyObject *args, PyObject *kwargs)
{
static char *kwlist[] = {"encoding", "errors", 0};
char *encoding = NULL;
char *errors = NULL;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|s:untransform",
kwlist, &encoding, &errors))
return NULL;
return PyUnicode_AsDecodedUnicode((PyObject *)self, encoding, errors);
}
PyDoc_STRVAR(expandtabs__doc__,
"S.expandtabs([tabsize]) -> str\n\
\n\
@ -9091,7 +9130,8 @@ static PyMethodDef unicode_methods[] = {
/* Order is according to common usage: often used methods should
appear first, since lookup is done sequentially. */
{"encode", (PyCFunction) unicode_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
{"encode", (PyCFunction) unicode_encode, METH_VARARGS | METH_KEYWORDS,
encode__doc__},
{"replace", (PyCFunction) unicode_replace, METH_VARARGS, replace__doc__},
{"split", (PyCFunction) unicode_split, METH_VARARGS, split__doc__},
{"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS, rsplit__doc__},
@ -9136,6 +9176,10 @@ static PyMethodDef unicode_methods[] = {
{"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__},
{"maketrans", (PyCFunction) unicode_maketrans,
METH_VARARGS | METH_STATIC, maketrans__doc__},
{"transform", (PyCFunction) unicode_transform, METH_VARARGS | METH_KEYWORDS,
transform__doc__},
{"untransform", (PyCFunction) unicode_untransform, METH_VARARGS | METH_KEYWORDS,
untransform__doc__},
{"__sizeof__", (PyCFunction) unicode__sizeof__, METH_NOARGS, sizeof__doc__},
#if 0
{"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__},