merge heads

This commit is contained in:
Benjamin Peterson 2011-12-10 17:55:31 -05:00
commit 8bbe788deb
7 changed files with 153 additions and 48 deletions

View File

@ -13,12 +13,12 @@
-------------- --------------
The :mod:`tarfile` module makes it possible to read and write tar The :mod:`tarfile` module makes it possible to read and write tar
archives, including those using gzip or bz2 compression. archives, including those using gzip, bz2 and lzma compression.
(:file:`.zip` files can be read and written using the :mod:`zipfile` module.) (:file:`.zip` files can be read and written using the :mod:`zipfile` module.)
Some facts and figures: Some facts and figures:
* reads and writes :mod:`gzip` and :mod:`bz2` compressed archives. * reads and writes :mod:`gzip`, :mod:`bz2` and :mod:`lzma` compressed archives.
* read/write support for the POSIX.1-1988 (ustar) format. * read/write support for the POSIX.1-1988 (ustar) format.
@ -55,6 +55,8 @@ Some facts and figures:
+------------------+---------------------------------------------+ +------------------+---------------------------------------------+
| ``'r:bz2'`` | Open for reading with bzip2 compression. | | ``'r:bz2'`` | Open for reading with bzip2 compression. |
+------------------+---------------------------------------------+ +------------------+---------------------------------------------+
| ``'r:xz'`` | Open for reading with lzma compression. |
+------------------+---------------------------------------------+
| ``'a' or 'a:'`` | Open for appending with no compression. The | | ``'a' or 'a:'`` | Open for appending with no compression. The |
| | file is created if it does not exist. | | | file is created if it does not exist. |
+------------------+---------------------------------------------+ +------------------+---------------------------------------------+
@ -64,11 +66,13 @@ Some facts and figures:
+------------------+---------------------------------------------+ +------------------+---------------------------------------------+
| ``'w:bz2'`` | Open for bzip2 compressed writing. | | ``'w:bz2'`` | Open for bzip2 compressed writing. |
+------------------+---------------------------------------------+ +------------------+---------------------------------------------+
| ``'w:xz'`` | Open for lzma compressed writing. |
+------------------+---------------------------------------------+
Note that ``'a:gz'`` or ``'a:bz2'`` is not possible. If *mode* is not suitable Note that ``'a:gz'``, ``'a:bz2'`` or ``'a:xz'`` is not possible. If *mode*
to open a certain (compressed) file for reading, :exc:`ReadError` is raised. Use is not suitable to open a certain (compressed) file for reading,
*mode* ``'r'`` to avoid this. If a compression method is not supported, :exc:`ReadError` is raised. Use *mode* ``'r'`` to avoid this. If a
:exc:`CompressionError` is raised. compression method is not supported, :exc:`CompressionError` is raised.
If *fileobj* is specified, it is used as an alternative to a :term:`file object` If *fileobj* is specified, it is used as an alternative to a :term:`file object`
opened in binary mode for *name*. It is supposed to be at position 0. opened in binary mode for *name*. It is supposed to be at position 0.
@ -99,6 +103,9 @@ Some facts and figures:
| ``'r|bz2'`` | Open a bzip2 compressed *stream* for | | ``'r|bz2'`` | Open a bzip2 compressed *stream* for |
| | reading. | | | reading. |
+-------------+--------------------------------------------+ +-------------+--------------------------------------------+
| ``'r|xz'`` | Open a lzma compressed *stream* for |
| | reading. |
+-------------+--------------------------------------------+
| ``'w|'`` | Open an uncompressed *stream* for writing. | | ``'w|'`` | Open an uncompressed *stream* for writing. |
+-------------+--------------------------------------------+ +-------------+--------------------------------------------+
| ``'w|gz'`` | Open a gzip compressed *stream* for | | ``'w|gz'`` | Open a gzip compressed *stream* for |
@ -107,6 +114,9 @@ Some facts and figures:
| ``'w|bz2'`` | Open a bzip2 compressed *stream* for | | ``'w|bz2'`` | Open a bzip2 compressed *stream* for |
| | writing. | | | writing. |
+-------------+--------------------------------------------+ +-------------+--------------------------------------------+
| ``'w|xz'`` | Open an lzma compressed *stream* for |
| | writing. |
+-------------+--------------------------------------------+
.. class:: TarFile .. class:: TarFile

View File

@ -359,8 +359,7 @@ class FTP:
conn.close() conn.close()
raise raise
else: else:
sock = self.makeport() with self.makeport() as sock:
try:
if rest is not None: if rest is not None:
self.sendcmd("REST %s" % rest) self.sendcmd("REST %s" % rest)
resp = self.sendcmd(cmd) resp = self.sendcmd(cmd)
@ -372,8 +371,6 @@ class FTP:
conn, sockaddr = sock.accept() conn, sockaddr = sock.accept()
if self.timeout is not _GLOBAL_DEFAULT_TIMEOUT: if self.timeout is not _GLOBAL_DEFAULT_TIMEOUT:
conn.settimeout(self.timeout) conn.settimeout(self.timeout)
finally:
sock.close()
if resp[:3] == '150': if resp[:3] == '150':
# this is conditional in case we received a 125 # this is conditional in case we received a 125
size = parse150(resp) size = parse150(resp)
@ -753,8 +750,7 @@ else:
def retrbinary(self, cmd, callback, blocksize=8192, rest=None): def retrbinary(self, cmd, callback, blocksize=8192, rest=None):
self.voidcmd('TYPE I') self.voidcmd('TYPE I')
conn = self.transfercmd(cmd, rest) with self.transfercmd(cmd, rest) as conn:
try:
while 1: while 1:
data = conn.recv(blocksize) data = conn.recv(blocksize)
if not data: if not data:
@ -763,8 +759,6 @@ else:
# shutdown ssl layer # shutdown ssl layer
if isinstance(conn, ssl.SSLSocket): if isinstance(conn, ssl.SSLSocket):
conn.unwrap() conn.unwrap()
finally:
conn.close()
return self.voidresp() return self.voidresp()
def retrlines(self, cmd, callback = None): def retrlines(self, cmd, callback = None):
@ -772,7 +766,7 @@ else:
resp = self.sendcmd('TYPE A') resp = self.sendcmd('TYPE A')
conn = self.transfercmd(cmd) conn = self.transfercmd(cmd)
fp = conn.makefile('r', encoding=self.encoding) fp = conn.makefile('r', encoding=self.encoding)
try: with fp, conn:
while 1: while 1:
line = fp.readline() line = fp.readline()
if self.debugging > 2: print('*retr*', repr(line)) if self.debugging > 2: print('*retr*', repr(line))
@ -786,15 +780,11 @@ else:
# shutdown ssl layer # shutdown ssl layer
if isinstance(conn, ssl.SSLSocket): if isinstance(conn, ssl.SSLSocket):
conn.unwrap() conn.unwrap()
finally:
fp.close()
conn.close()
return self.voidresp() return self.voidresp()
def storbinary(self, cmd, fp, blocksize=8192, callback=None, rest=None): def storbinary(self, cmd, fp, blocksize=8192, callback=None, rest=None):
self.voidcmd('TYPE I') self.voidcmd('TYPE I')
conn = self.transfercmd(cmd, rest) with self.transfercmd(cmd, rest) as conn:
try:
while 1: while 1:
buf = fp.read(blocksize) buf = fp.read(blocksize)
if not buf: break if not buf: break
@ -803,14 +793,11 @@ else:
# shutdown ssl layer # shutdown ssl layer
if isinstance(conn, ssl.SSLSocket): if isinstance(conn, ssl.SSLSocket):
conn.unwrap() conn.unwrap()
finally:
conn.close()
return self.voidresp() return self.voidresp()
def storlines(self, cmd, fp, callback=None): def storlines(self, cmd, fp, callback=None):
self.voidcmd('TYPE A') self.voidcmd('TYPE A')
conn = self.transfercmd(cmd) with self.transfercmd(cmd) as conn:
try:
while 1: while 1:
buf = fp.readline() buf = fp.readline()
if not buf: break if not buf: break
@ -822,8 +809,6 @@ else:
# shutdown ssl layer # shutdown ssl layer
if isinstance(conn, ssl.SSLSocket): if isinstance(conn, ssl.SSLSocket):
conn.unwrap() conn.unwrap()
finally:
conn.close()
return self.voidresp() return self.voidresp()
def abort(self): def abort(self):

View File

@ -420,10 +420,11 @@ class _Stream:
self.crc = zlib.crc32(b"") self.crc = zlib.crc32(b"")
if mode == "r": if mode == "r":
self._init_read_gz() self._init_read_gz()
self.exception = zlib.error
else: else:
self._init_write_gz() self._init_write_gz()
if comptype == "bz2": elif comptype == "bz2":
try: try:
import bz2 import bz2
except ImportError: except ImportError:
@ -431,8 +432,25 @@ class _Stream:
if mode == "r": if mode == "r":
self.dbuf = b"" self.dbuf = b""
self.cmp = bz2.BZ2Decompressor() self.cmp = bz2.BZ2Decompressor()
self.exception = IOError
else: else:
self.cmp = bz2.BZ2Compressor() self.cmp = bz2.BZ2Compressor()
elif comptype == "xz":
try:
import lzma
except ImportError:
raise CompressionError("lzma module is not available")
if mode == "r":
self.dbuf = b""
self.cmp = lzma.LZMADecompressor()
self.exception = lzma.LZMAError
else:
self.cmp = lzma.LZMACompressor()
elif comptype != "tar":
raise CompressionError("unknown compression type %r" % comptype)
except: except:
if not self._extfileobj: if not self._extfileobj:
self.fileobj.close() self.fileobj.close()
@ -584,7 +602,7 @@ class _Stream:
break break
try: try:
buf = self.cmp.decompress(buf) buf = self.cmp.decompress(buf)
except IOError: except self.exception:
raise ReadError("invalid compressed data") raise ReadError("invalid compressed data")
self.dbuf += buf self.dbuf += buf
c += len(buf) c += len(buf)
@ -622,11 +640,14 @@ class _StreamProxy(object):
return self.buf return self.buf
def getcomptype(self): def getcomptype(self):
if self.buf.startswith(b"\037\213\010"): if self.buf.startswith(b"\x1f\x8b\x08"):
return "gz" return "gz"
if self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY": elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
return "bz2" return "bz2"
return "tar" elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
return "xz"
else:
return "tar"
def close(self): def close(self):
self.fileobj.close() self.fileobj.close()
@ -1651,18 +1672,22 @@ class TarFile(object):
'r:' open for reading exclusively uncompressed 'r:' open for reading exclusively uncompressed
'r:gz' open for reading with gzip compression 'r:gz' open for reading with gzip compression
'r:bz2' open for reading with bzip2 compression 'r:bz2' open for reading with bzip2 compression
'r:xz' open for reading with lzma compression
'a' or 'a:' open for appending, creating the file if necessary 'a' or 'a:' open for appending, creating the file if necessary
'w' or 'w:' open for writing without compression 'w' or 'w:' open for writing without compression
'w:gz' open for writing with gzip compression 'w:gz' open for writing with gzip compression
'w:bz2' open for writing with bzip2 compression 'w:bz2' open for writing with bzip2 compression
'w:xz' open for writing with lzma compression
'r|*' open a stream of tar blocks with transparent compression 'r|*' open a stream of tar blocks with transparent compression
'r|' open an uncompressed stream of tar blocks for reading 'r|' open an uncompressed stream of tar blocks for reading
'r|gz' open a gzip compressed stream of tar blocks 'r|gz' open a gzip compressed stream of tar blocks
'r|bz2' open a bzip2 compressed stream of tar blocks 'r|bz2' open a bzip2 compressed stream of tar blocks
'r|xz' open an lzma compressed stream of tar blocks
'w|' open an uncompressed stream for writing 'w|' open an uncompressed stream for writing
'w|gz' open a gzip compressed stream for writing 'w|gz' open a gzip compressed stream for writing
'w|bz2' open a bzip2 compressed stream for writing 'w|bz2' open a bzip2 compressed stream for writing
'w|xz' open an lzma compressed stream for writing
""" """
if not name and not fileobj: if not name and not fileobj:
@ -1780,11 +1805,40 @@ class TarFile(object):
t._extfileobj = False t._extfileobj = False
return t return t
@classmethod
def xzopen(cls, name, mode="r", fileobj=None, preset=9, **kwargs):
"""Open lzma compressed tar archive name for reading or writing.
Appending is not allowed.
"""
if mode not in ("r", "w"):
raise ValueError("mode must be 'r' or 'w'")
try:
import lzma
except ImportError:
raise CompressionError("lzma module is not available")
if mode == "r":
# LZMAFile complains about a preset argument in read mode.
preset = None
fileobj = lzma.LZMAFile(filename=name if fileobj is None else None,
mode=mode, fileobj=fileobj, preset=preset)
try:
t = cls.taropen(name, mode, fileobj, **kwargs)
except (lzma.LZMAError, EOFError):
fileobj.close()
raise ReadError("not an lzma file")
t._extfileobj = False
return t
# All *open() methods are registered here. # All *open() methods are registered here.
OPEN_METH = { OPEN_METH = {
"tar": "taropen", # uncompressed tar "tar": "taropen", # uncompressed tar
"gz": "gzopen", # gzip compressed tar "gz": "gzopen", # gzip compressed tar
"bz2": "bz2open" # bzip2 compressed tar "bz2": "bz2open", # bzip2 compressed tar
"xz": "xzopen" # lzma compressed tar
} }
#-------------------------------------------------------------------------- #--------------------------------------------------------------------------

View File

@ -47,7 +47,7 @@ class MinidomTest(unittest.TestCase):
def checkWholeText(self, node, s): def checkWholeText(self, node, s):
t = node.wholeText t = node.wholeText
self.confirm(t == s, "looking for %s, found %s" % (repr(s), repr(t))) self.confirm(t == s, "looking for %r, found %r" % (s, t))
def testParseFromFile(self): def testParseFromFile(self):
with open(tstfile) as file: with open(tstfile) as file:

View File

@ -21,6 +21,10 @@ try:
import bz2 import bz2
except ImportError: except ImportError:
bz2 = None bz2 = None
try:
import lzma
except ImportError:
lzma = None
def md5sum(data): def md5sum(data):
return md5(data).hexdigest() return md5(data).hexdigest()
@ -29,6 +33,7 @@ TEMPDIR = os.path.abspath(support.TESTFN) + "-tardir"
tarname = support.findfile("testtar.tar") tarname = support.findfile("testtar.tar")
gzipname = os.path.join(TEMPDIR, "testtar.tar.gz") gzipname = os.path.join(TEMPDIR, "testtar.tar.gz")
bz2name = os.path.join(TEMPDIR, "testtar.tar.bz2") bz2name = os.path.join(TEMPDIR, "testtar.tar.bz2")
xzname = os.path.join(TEMPDIR, "testtar.tar.xz")
tmpname = os.path.join(TEMPDIR, "tmp.tar") tmpname = os.path.join(TEMPDIR, "tmp.tar")
md5_regtype = "65f477c818ad9e15f7feab0c6d37742f" md5_regtype = "65f477c818ad9e15f7feab0c6d37742f"
@ -201,13 +206,15 @@ class CommonReadTest(ReadTest):
_open = gzip.GzipFile _open = gzip.GzipFile
elif self.mode.endswith(":bz2"): elif self.mode.endswith(":bz2"):
_open = bz2.BZ2File _open = bz2.BZ2File
elif self.mode.endswith(":xz"):
_open = lzma.LZMAFile
else: else:
_open = open _open = io.FileIO
for char in (b'\0', b'a'): for char in (b'\0', b'a'):
# Test if EOFHeaderError ('\0') and InvalidHeaderError ('a') # Test if EOFHeaderError ('\0') and InvalidHeaderError ('a')
# are ignored correctly. # are ignored correctly.
with _open(tmpname, "wb") as fobj: with _open(tmpname, "w") as fobj:
fobj.write(char * 1024) fobj.write(char * 1024)
fobj.write(tarfile.TarInfo("foo").tobuf()) fobj.write(tarfile.TarInfo("foo").tobuf())
@ -222,9 +229,10 @@ class CommonReadTest(ReadTest):
class MiscReadTest(CommonReadTest): class MiscReadTest(CommonReadTest):
def test_no_name_argument(self): def test_no_name_argument(self):
if self.mode.endswith("bz2"): if self.mode.endswith(("bz2", "xz")):
# BZ2File has no name attribute. # BZ2File and LZMAFile have no name attribute.
return self.skipTest("no name attribute")
with open(self.tarname, "rb") as fobj: with open(self.tarname, "rb") as fobj:
tar = tarfile.open(fileobj=fobj, mode=self.mode) tar = tarfile.open(fileobj=fobj, mode=self.mode)
self.assertEqual(tar.name, os.path.abspath(fobj.name)) self.assertEqual(tar.name, os.path.abspath(fobj.name))
@ -265,10 +273,12 @@ class MiscReadTest(CommonReadTest):
_open = gzip.GzipFile _open = gzip.GzipFile
elif self.mode.endswith(":bz2"): elif self.mode.endswith(":bz2"):
_open = bz2.BZ2File _open = bz2.BZ2File
elif self.mode.endswith(":xz"):
_open = lzma.LZMAFile
else: else:
_open = open _open = io.FileIO
fobj = _open(self.tarname, "rb")
try: with _open(self.tarname) as fobj:
fobj.seek(offset) fobj.seek(offset)
# Test if the tarfile starts with the second member. # Test if the tarfile starts with the second member.
@ -281,8 +291,6 @@ class MiscReadTest(CommonReadTest):
self.assertEqual(tar.extractfile(t).read(), data, self.assertEqual(tar.extractfile(t).read(), data,
"seek back did not work") "seek back did not work")
tar.close() tar.close()
finally:
fobj.close()
def test_fail_comp(self): def test_fail_comp(self):
# For Gzip and Bz2 Tests: fail with a ReadError on an uncompressed file. # For Gzip and Bz2 Tests: fail with a ReadError on an uncompressed file.
@ -526,6 +534,18 @@ class DetectReadTest(unittest.TestCase):
testfunc(bz2name, "r|*") testfunc(bz2name, "r|*")
testfunc(bz2name, "r|bz2") testfunc(bz2name, "r|bz2")
if lzma:
self.assertRaises(tarfile.ReadError, tarfile.open, tarname, mode="r:xz")
self.assertRaises(tarfile.ReadError, tarfile.open, tarname, mode="r|xz")
self.assertRaises(tarfile.ReadError, tarfile.open, xzname, mode="r:")
self.assertRaises(tarfile.ReadError, tarfile.open, xzname, mode="r|")
testfunc(xzname, "r")
testfunc(xzname, "r:*")
testfunc(xzname, "r:xz")
testfunc(xzname, "r|*")
testfunc(xzname, "r|xz")
def test_detect_file(self): def test_detect_file(self):
self._test_modes(self._testfunc_file) self._test_modes(self._testfunc_file)
@ -1096,6 +1116,9 @@ class StreamWriteTest(WriteTestBase):
data = dec.decompress(data) data = dec.decompress(data)
self.assertTrue(len(dec.unused_data) == 0, self.assertTrue(len(dec.unused_data) == 0,
"found trailing data") "found trailing data")
elif self.mode.endswith("xz"):
with lzma.LZMAFile(tmpname) as fobj:
data = fobj.read()
else: else:
with open(tmpname, "rb") as fobj: with open(tmpname, "rb") as fobj:
data = fobj.read() data = fobj.read()
@ -1510,6 +1533,12 @@ class AppendTest(unittest.TestCase):
self._create_testtar("w:bz2") self._create_testtar("w:bz2")
self.assertRaises(tarfile.ReadError, tarfile.open, tmpname, "a") self.assertRaises(tarfile.ReadError, tarfile.open, tmpname, "a")
def test_append_lzma(self):
if lzma is None:
self.skipTest("lzma module not available")
self._create_testtar("w:xz")
self.assertRaises(tarfile.ReadError, tarfile.open, tmpname, "a")
# Append mode is supposed to fail if the tarfile to append to # Append mode is supposed to fail if the tarfile to append to
# does not end with a zero block. # does not end with a zero block.
def _test_error(self, data): def _test_error(self, data):
@ -1788,6 +1817,21 @@ class Bz2PartialReadTest(unittest.TestCase):
self._test_partial_input("r:bz2") self._test_partial_input("r:bz2")
class LzmaMiscReadTest(MiscReadTest):
tarname = xzname
mode = "r:xz"
class LzmaUstarReadTest(UstarReadTest):
tarname = xzname
mode = "r:xz"
class LzmaStreamReadTest(StreamReadTest):
tarname = xzname
mode = "r|xz"
class LzmaWriteTest(WriteTest):
mode = "w:xz"
class LzmaStreamWriteTest(StreamWriteTest):
mode = "w|xz"
def test_main(): def test_main():
support.unlink(TEMPDIR) support.unlink(TEMPDIR)
os.makedirs(TEMPDIR) os.makedirs(TEMPDIR)
@ -1850,6 +1894,20 @@ def test_main():
Bz2PartialReadTest, Bz2PartialReadTest,
] ]
if lzma:
# Create testtar.tar.xz and add lzma-specific tests.
support.unlink(xzname)
with lzma.LZMAFile(xzname, "w") as tar:
tar.write(data)
tests += [
LzmaMiscReadTest,
LzmaUstarReadTest,
LzmaStreamReadTest,
LzmaWriteTest,
LzmaStreamWriteTest,
]
try: try:
support.run_unittest(*tests) support.run_unittest(*tests)
finally: finally:

View File

@ -1,6 +1,5 @@
import xml.sax import xml.sax
import xml.sax.handler import xml.sax.handler
import types
START_ELEMENT = "START_ELEMENT" START_ELEMENT = "START_ELEMENT"
END_ELEMENT = "END_ELEMENT" END_ELEMENT = "END_ELEMENT"
@ -334,10 +333,7 @@ def parse(stream_or_string, parser=None, bufsize=None):
return DOMEventStream(stream, parser, bufsize) return DOMEventStream(stream, parser, bufsize)
def parseString(string, parser=None): def parseString(string, parser=None):
try: from io import StringIO
from io import StringIO
except ImportError:
from io import StringIO
bufsize = len(string) bufsize = len(string)
buf = StringIO(string) buf = StringIO(string)

View File

@ -408,6 +408,8 @@ Library
- Alias resource.error to OSError ala PEP 3151. - Alias resource.error to OSError ala PEP 3151.
- Issue #5689: Add support for lzma compression to the tarfile module.
- Issue #13248: Turn 3.2's PendingDeprecationWarning into 3.3's - Issue #13248: Turn 3.2's PendingDeprecationWarning into 3.3's
DeprecationWarning. It covers 'cgi.escape', 'importlib.abc.PyLoader', DeprecationWarning. It covers 'cgi.escape', 'importlib.abc.PyLoader',
'importlib.abc.PyPycLoader', 'nntplib.NNTP.xgtitle', 'nntplib.NNTP.xpath', 'importlib.abc.PyPycLoader', 'nntplib.NNTP.xgtitle', 'nntplib.NNTP.xpath',