bpo-39389: gzip: fix compression level metadata (GH-18077)
As described in RFC 1952, section 2.3.1, the XFL (eXtra FLags) byte of a
gzip member header should indicate whether the DEFLATE algorithm was
tuned for speed or compression ratio. Prior to this patch, archives
emitted by the `gzip` module always indicated maximum compression.
(cherry picked from commit eab3b3f1c6
)
Co-authored-by: William Chargin <wchargin@gmail.com>
This commit is contained in:
parent
060ad2fc15
commit
ab0d8e356e
12
Lib/gzip.py
12
Lib/gzip.py
|
@ -201,7 +201,7 @@ class GzipFile(_compression.BaseStream):
|
|||
self.fileobj = fileobj
|
||||
|
||||
if self.mode == WRITE:
|
||||
self._write_gzip_header()
|
||||
self._write_gzip_header(compresslevel)
|
||||
|
||||
@property
|
||||
def filename(self):
|
||||
|
@ -228,7 +228,7 @@ class GzipFile(_compression.BaseStream):
|
|||
self.bufsize = 0
|
||||
self.offset = 0 # Current file offset for seek(), tell(), etc
|
||||
|
||||
def _write_gzip_header(self):
|
||||
def _write_gzip_header(self, compresslevel):
|
||||
self.fileobj.write(b'\037\213') # magic header
|
||||
self.fileobj.write(b'\010') # compression method
|
||||
try:
|
||||
|
@ -249,7 +249,13 @@ class GzipFile(_compression.BaseStream):
|
|||
if mtime is None:
|
||||
mtime = time.time()
|
||||
write32u(self.fileobj, int(mtime))
|
||||
self.fileobj.write(b'\002')
|
||||
if compresslevel == _COMPRESS_LEVEL_BEST:
|
||||
xfl = b'\002'
|
||||
elif compresslevel == _COMPRESS_LEVEL_FAST:
|
||||
xfl = b'\004'
|
||||
else:
|
||||
xfl = b'\000'
|
||||
self.fileobj.write(xfl)
|
||||
self.fileobj.write(b'\377')
|
||||
if fname:
|
||||
self.fileobj.write(fname + b'\000')
|
||||
|
|
|
@ -358,6 +358,26 @@ class TestGzip(BaseTest):
|
|||
isizeBytes = fRead.read(4)
|
||||
self.assertEqual(isizeBytes, struct.pack('<i', len(data1)))
|
||||
|
||||
def test_compresslevel_metadata(self):
|
||||
# see RFC 1952: http://www.faqs.org/rfcs/rfc1952.html
|
||||
# specifically, discussion of XFL in section 2.3.1
|
||||
cases = [
|
||||
('fast', 1, b'\x04'),
|
||||
('best', 9, b'\x02'),
|
||||
('tradeoff', 6, b'\x00'),
|
||||
]
|
||||
xflOffset = 8
|
||||
|
||||
for (name, level, expectedXflByte) in cases:
|
||||
with self.subTest(name):
|
||||
fWrite = gzip.GzipFile(self.filename, 'w', compresslevel=level)
|
||||
with fWrite:
|
||||
fWrite.write(data1)
|
||||
with open(self.filename, 'rb') as fRead:
|
||||
fRead.seek(xflOffset)
|
||||
xflByte = fRead.read(1)
|
||||
self.assertEqual(xflByte, expectedXflByte)
|
||||
|
||||
def test_with_open(self):
|
||||
# GzipFile supports the context management protocol
|
||||
with gzip.GzipFile(self.filename, "wb") as f:
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Write accurate compression level metadata in :mod:`gzip` archives, rather
|
||||
than always signaling maximum compression.
|
Loading…
Reference in New Issue