From 9a80c5dbc497211267062d39c3fb4e2d5a8e702d Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <python@rcn.com>
Date: Tue, 23 Sep 2003 20:21:01 +0000
Subject: [PATCH] Added codec for bz2 compression.

---
 Doc/lib/libcodecs.tex      |  5 +++
 Lib/encodings/aliases.py   |  3 ++
 Lib/encodings/bz2_codec.py | 64 ++++++++++++++++++++++++++++++++++++++
 Misc/NEWS                  |  3 ++
 4 files changed, 75 insertions(+)
 create mode 100644 Lib/encodings/bz2_codec.py

diff --git a/Doc/lib/libcodecs.tex b/Doc/lib/libcodecs.tex
index 284164333cf..c67bcccfb12 100644
--- a/Doc/lib/libcodecs.tex
+++ b/Doc/lib/libcodecs.tex
@@ -804,6 +804,11 @@ listed as operand type in the table.
          {byte string}
          {Convert operand to MIME base64}
 
+\lineiv{bz2_codec}
+         {bz2}
+         {byte string}
+         {Compress the operand using bz2}
+
 \lineiv{hex_codec}
          {hex}
          {byte string}
diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py
index 6620c2e4f46..b304f156386 100644
--- a/Lib/encodings/aliases.py
+++ b/Lib/encodings/aliases.py
@@ -41,6 +41,9 @@ aliases = {
     'base64'             : 'base64_codec',
     'base_64'            : 'base64_codec',
 
+    # bz2_codec codec
+    'bz2'                : 'bz2_codec',
+
     # cp037 codec
     'csibm037'           : 'cp037',
     'ebcdic_cp_ca'       : 'cp037',
diff --git a/Lib/encodings/bz2_codec.py b/Lib/encodings/bz2_codec.py
new file mode 100644
index 00000000000..3d1f75e6d45
--- /dev/null
+++ b/Lib/encodings/bz2_codec.py
@@ -0,0 +1,64 @@
+""" Python 'bz2_codec' Codec - bz2 compression encoding
+
+    Unlike most of the other codecs which target Unicode, this codec
+    will return Python string objects for both encode and decode.
+
+    Adapted by Raymond Hettinger from bz2_codec which was written
+    by Marc-Andre Lemburg (mal@lemburg.com).
+
+"""
+import codecs
+import bz2 # this codec needs the optional bz2 module !
+
+### Codec APIs
+
+def bz2_encode(input,errors='strict'):
+
+    """ Encodes the object input and returns a tuple (output
+        object, length consumed).
+
+        errors defines the error handling to apply. It defaults to
+        'strict' handling which is the only currently supported
+        error handling for this codec.
+
+    """
+    assert errors == 'strict'
+    output = bz2.compress(input)
+    return (output, len(input))
+
+def bz2_decode(input,errors='strict'):
+
+    """ Decodes the object input and returns a tuple (output
+        object, length consumed).
+
+        input must be an object which provides the bf_getreadbuf
+        buffer slot. Python strings, buffer objects and memory
+        mapped files are examples of objects providing this slot.
+
+        errors defines the error handling to apply. It defaults to
+        'strict' handling which is the only currently supported
+        error handling for this codec.
+
+    """
+    assert errors == 'strict'
+    output = bz2.decompress(input)
+    return (output, len(input))
+
+class Codec(codecs.Codec):
+
+    def encode(self, input, errors='strict'):
+        return bz2_encode(input, errors)
+    def decode(self, input, errors='strict'):
+        return bz2_decode(input, errors)
+
+class StreamWriter(Codec,codecs.StreamWriter):
+    pass
+
+class StreamReader(Codec,codecs.StreamReader):
+    pass
+
+### encodings module API
+
+def getregentry():
+
+    return (bz2_encode,bz2_decode,StreamReader,StreamWriter)
diff --git a/Misc/NEWS b/Misc/NEWS
index 2c9c9c873a9..9b06c700de6 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -46,6 +46,9 @@ Extension modules
 Library
 -------
 
+- encodings.bz2_codec was added for access to bz2 compression
+  using "a long string".encode('bz2')
+
 - Various improvements to unittest.py, realigned with PyUnit CVS.
 
 - dircache now passes exceptions to the caller, instead of returning