Issue #4272: Add an optional argument to the GzipFile constructor to override the timestamp in the gzip stream.

This commit is contained in:
Antoine Pitrou 2009-01-04 21:29:23 +00:00
parent 514d483a7d
commit f0d2c3f730
4 changed files with 94 additions and 5 deletions

View File

@ -24,7 +24,7 @@ For other archive formats, see the :mod:`bz2`, :mod:`zipfile`, and
The module defines the following items:
.. class:: GzipFile([filename[, mode[, compresslevel[, fileobj]]]])
.. class:: GzipFile([filename[, mode[, compresslevel[, fileobj[, mtime]]]]])
Constructor for the :class:`GzipFile` class, which simulates most of the methods
of a file object, with the exception of the :meth:`readinto` and
@ -52,6 +52,15 @@ The module defines the following items:
level of compression; ``1`` is fastest and produces the least compression, and
``9`` is slowest and produces the most compression. The default is ``9``.
The *mtime* argument is an optional numeric timestamp to be written to
the stream when compressing. All :program:`gzip`compressed streams are
required to contain a timestamp. If omitted or ``None``, the current
time is used. This module ignores the timestamp when decompressing;
however, some programs, such as :program:`gunzip`\ , make use of it.
The format of the timestamp is the same as that of the return value of
``time.time()`` and of the ``st_mtime`` member of the object returned
by ``os.stat()``.
Calling a :class:`GzipFile` object's :meth:`close` method does not close
*fileobj*, since you might wish to append more material after the compressed
data. This also allows you to pass a :class:`StringIO` object opened for

View File

@ -42,7 +42,7 @@ class GzipFile:
max_read_chunk = 10 * 1024 * 1024 # 10Mb
def __init__(self, filename=None, mode=None,
compresslevel=9, fileobj=None):
compresslevel=9, fileobj=None, mtime=None):
"""Constructor for the GzipFile class.
At least one of fileobj and filename must be given a
@ -69,6 +69,15 @@ class GzipFile:
level of compression; 1 is fastest and produces the least compression,
and 9 is slowest and produces the most compression. The default is 9.
The mtime argument is an optional numeric timestamp to be written
to the stream when compressing. All gzip compressed streams
are required to contain a timestamp. If omitted or None, the
current time is used. This module ignores the timestamp when
decompressing; however, some programs, such as gunzip, make use
of it. The format of the timestamp is the same as that of the
return value of time.time() and of the st_mtime member of the
object returned by os.stat().
"""
# guarantee the file is opened in binary mode on platforms
@ -107,6 +116,7 @@ class GzipFile:
self.fileobj = fileobj
self.offset = 0
self.mtime = mtime
if self.mode == WRITE:
self._write_gzip_header()
@ -140,7 +150,10 @@ class GzipFile:
if fname:
flags = FNAME
self.fileobj.write(chr(flags))
write32u(self.fileobj, long(time.time()))
mtime = self.mtime
if mtime is None:
mtime = time.time()
write32u(self.fileobj, long(mtime))
self.fileobj.write('\002')
self.fileobj.write('\377')
if fname:
@ -158,10 +171,10 @@ class GzipFile:
if method != 8:
raise IOError, 'Unknown compression method'
flag = ord( self.fileobj.read(1) )
# modtime = self.fileobj.read(4)
self.mtime = read32(self.fileobj)
# extraflag = self.fileobj.read(1)
# os = self.fileobj.read(1)
self.fileobj.read(6)
self.fileobj.read(2)
if flag & FEXTRA:
# Read & discard the extra field, if present

View File

@ -6,6 +6,7 @@ import unittest
from test import test_support
import os
import gzip
import struct
data1 = """ int length=DEFAULTALLOC, err = Z_OK;
@ -160,6 +161,67 @@ class TestGzip(unittest.TestCase):
self.assertEqual(f.name, self.filename)
f.close()
def test_mtime(self):
mtime = 123456789
fWrite = gzip.GzipFile(self.filename, 'w', mtime = mtime)
fWrite.write(data1)
fWrite.close()
fRead = gzip.GzipFile(self.filename)
dataRead = fRead.read()
self.assertEqual(dataRead, data1)
self.assert_(hasattr(fRead, 'mtime'))
self.assertEqual(fRead.mtime, mtime)
fRead.close()
def test_metadata(self):
mtime = 123456789
fWrite = gzip.GzipFile(self.filename, 'w', mtime = mtime)
fWrite.write(data1)
fWrite.close()
fRead = open(self.filename, 'rb')
# see RFC 1952: http://www.faqs.org/rfcs/rfc1952.html
idBytes = fRead.read(2)
self.assertEqual(idBytes, '\x1f\x8b') # gzip ID
cmByte = fRead.read(1)
self.assertEqual(cmByte, '\x08') # deflate
flagsByte = fRead.read(1)
self.assertEqual(flagsByte, '\x08') # only the FNAME flag is set
mtimeBytes = fRead.read(4)
self.assertEqual(mtimeBytes, struct.pack('<i', mtime)) # little-endian
xflByte = fRead.read(1)
self.assertEqual(xflByte, '\x02') # maximum compression
osByte = fRead.read(1)
self.assertEqual(osByte, '\xff') # OS "unknown" (OS-independent)
# Since the FNAME flag is set, the zero-terminated filename follows.
# RFC 1952 specifies that this is the name of the input file, if any.
# However, the gzip module defaults to storing the name of the output
# file in this field.
nameBytes = fRead.read(len(self.filename) + 1)
self.assertEqual(nameBytes, self.filename + '\x00')
# Since no other flags were set, the header ends here.
# Rather than process the compressed data, let's seek to the trailer.
fRead.seek(os.stat(self.filename).st_size - 8)
crc32Bytes = fRead.read(4) # CRC32 of uncompressed data [data1]
self.assertEqual(crc32Bytes, '\xaf\xd7d\x83')
isizeBytes = fRead.read(4)
self.assertEqual(isizeBytes, struct.pack('<i', len(data1)))
fRead.close()
def test_main(verbose=None):
test_support.run_unittest(TestGzip)

View File

@ -108,6 +108,11 @@ Core and Builtins
Library
-------
- Issue #4272: Add an optional argument to the GzipFile constructor to override
the timestamp in the gzip stream. The default value remains the current time.
The information can be used by e.g. gunzip when decompressing. Patch by
Jacques Frechet.
- Restore Python 2.3 compatibility for decimal.py.
- Issue #1702551: distutils sdist was not excluding VCS directories under