bpo-39389: gzip: fix compression level metadata (GH-18077)
As described in RFC 1952, section 2.3.1, the XFL (eXtra FLags) byte of a gzip member header should indicate whether the DEFLATE algorithm was tuned for speed or compression ratio. Prior to this patch, archives emitted by the `gzip` module always indicated maximum compression.
This commit is contained in:
parent
85ead4fc62
commit
eab3b3f1c6
12
Lib/gzip.py
12
Lib/gzip.py
|
@ -209,7 +209,7 @@ class GzipFile(_compression.BaseStream):
|
||||||
self.fileobj = fileobj
|
self.fileobj = fileobj
|
||||||
|
|
||||||
if self.mode == WRITE:
|
if self.mode == WRITE:
|
||||||
self._write_gzip_header()
|
self._write_gzip_header(compresslevel)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def filename(self):
|
def filename(self):
|
||||||
|
@ -236,7 +236,7 @@ class GzipFile(_compression.BaseStream):
|
||||||
self.bufsize = 0
|
self.bufsize = 0
|
||||||
self.offset = 0 # Current file offset for seek(), tell(), etc
|
self.offset = 0 # Current file offset for seek(), tell(), etc
|
||||||
|
|
||||||
def _write_gzip_header(self):
|
def _write_gzip_header(self, compresslevel):
|
||||||
self.fileobj.write(b'\037\213') # magic header
|
self.fileobj.write(b'\037\213') # magic header
|
||||||
self.fileobj.write(b'\010') # compression method
|
self.fileobj.write(b'\010') # compression method
|
||||||
try:
|
try:
|
||||||
|
@ -257,7 +257,13 @@ class GzipFile(_compression.BaseStream):
|
||||||
if mtime is None:
|
if mtime is None:
|
||||||
mtime = time.time()
|
mtime = time.time()
|
||||||
write32u(self.fileobj, int(mtime))
|
write32u(self.fileobj, int(mtime))
|
||||||
self.fileobj.write(b'\002')
|
if compresslevel == _COMPRESS_LEVEL_BEST:
|
||||||
|
xfl = b'\002'
|
||||||
|
elif compresslevel == _COMPRESS_LEVEL_FAST:
|
||||||
|
xfl = b'\004'
|
||||||
|
else:
|
||||||
|
xfl = b'\000'
|
||||||
|
self.fileobj.write(xfl)
|
||||||
self.fileobj.write(b'\377')
|
self.fileobj.write(b'\377')
|
||||||
if fname:
|
if fname:
|
||||||
self.fileobj.write(fname + b'\000')
|
self.fileobj.write(fname + b'\000')
|
||||||
|
|
|
@ -358,6 +358,26 @@ class TestGzip(BaseTest):
|
||||||
isizeBytes = fRead.read(4)
|
isizeBytes = fRead.read(4)
|
||||||
self.assertEqual(isizeBytes, struct.pack('<i', len(data1)))
|
self.assertEqual(isizeBytes, struct.pack('<i', len(data1)))
|
||||||
|
|
||||||
|
def test_compresslevel_metadata(self):
|
||||||
|
# see RFC 1952: http://www.faqs.org/rfcs/rfc1952.html
|
||||||
|
# specifically, discussion of XFL in section 2.3.1
|
||||||
|
cases = [
|
||||||
|
('fast', 1, b'\x04'),
|
||||||
|
('best', 9, b'\x02'),
|
||||||
|
('tradeoff', 6, b'\x00'),
|
||||||
|
]
|
||||||
|
xflOffset = 8
|
||||||
|
|
||||||
|
for (name, level, expectedXflByte) in cases:
|
||||||
|
with self.subTest(name):
|
||||||
|
fWrite = gzip.GzipFile(self.filename, 'w', compresslevel=level)
|
||||||
|
with fWrite:
|
||||||
|
fWrite.write(data1)
|
||||||
|
with open(self.filename, 'rb') as fRead:
|
||||||
|
fRead.seek(xflOffset)
|
||||||
|
xflByte = fRead.read(1)
|
||||||
|
self.assertEqual(xflByte, expectedXflByte)
|
||||||
|
|
||||||
def test_with_open(self):
|
def test_with_open(self):
|
||||||
# GzipFile supports the context management protocol
|
# GzipFile supports the context management protocol
|
||||||
with gzip.GzipFile(self.filename, "wb") as f:
|
with gzip.GzipFile(self.filename, "wb") as f:
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Write accurate compression level metadata in :mod:`gzip` archives, rather
|
||||||
|
than always signaling maximum compression.
|
Loading…
Reference in New Issue