cpython/Demo/zlib/minigzip.py

135 lines
4.3 KiB
Python
Executable File

#!/usr/bin/env python3
# Demo program for zlib; it compresses or decompresses files, but *doesn't*
# delete the original. This doesn't support all of gzip's options.
#
# The 'gzip' module in the standard library provides a more complete
# implementation of gzip-format files.
import zlib, sys, os
FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
def write32(output, value):
output.write(bytes([value & 255])) ; value=value // 256
output.write(bytes([value & 255])) ; value=value // 256
output.write(bytes([value & 255])) ; value=value // 256
output.write(bytes([value & 255]))
def read32(input):
v = ord(input.read(1))
v += (ord(input.read(1)) << 8 )
v += (ord(input.read(1)) << 16)
v += (ord(input.read(1)) << 24)
return v
def compress(filename, input, output):
output.write(b'\037\213\010') # Write the header, ...
output.write(bytes([FNAME])) # ... flag byte ...
statval = os.stat(filename) # ... modification time ...
mtime = statval[8]
write32(output, mtime)
output.write(b'\002') # ... slowest compression alg. ...
output.write(b'\377') # ... OS (=unknown) ...
bfilename = os.fsencode(filename)
output.write(bfilename + b'\000') # ... original filename ...
crcval = zlib.crc32(b'')
compobj = zlib.compressobj(9, zlib.DEFLATED, -zlib.MAX_WBITS,
zlib.DEF_MEM_LEVEL, 0)
while True:
data = input.read(1024)
if data == b'':
break
crcval = zlib.crc32(data, crcval)
output.write(compobj.compress(data))
output.write(compobj.flush())
write32(output, crcval) # ... the CRC ...
write32(output, statval[6]) # and the file size.
def decompress(input, output):
magic = input.read(2)
if magic != b'\037\213':
print('Not a gzipped file')
sys.exit(0)
if ord(input.read(1)) != 8:
print('Unknown compression method')
sys.exit(0)
flag = ord(input.read(1))
input.read(4+1+1) # Discard modification time,
# extra flags, and OS byte.
if flag & FEXTRA:
# Read & discard the extra field, if present
xlen = ord(input.read(1))
xlen += 256*ord(input.read(1))
input.read(xlen)
if flag & FNAME:
# Read and discard a null-terminated string containing the filename
while True:
s = input.read(1)
if s == b'\0': break
if flag & FCOMMENT:
# Read and discard a null-terminated string containing a comment
while True:
s = input.read(1)
if s == b'\0': break
if flag & FHCRC:
input.read(2) # Read & discard the 16-bit header CRC
decompobj = zlib.decompressobj(-zlib.MAX_WBITS)
crcval = zlib.crc32(b'')
length = 0
while True:
data = input.read(1024)
if data == b"":
break
decompdata = decompobj.decompress(data)
output.write(decompdata)
length += len(decompdata)
crcval = zlib.crc32(decompdata, crcval)
decompdata = decompobj.flush()
output.write(decompdata)
length += len(decompdata)
crcval = zlib.crc32(decompdata, crcval)
# We've read to the end of the file, so we have to rewind in order
# to reread the 8 bytes containing the CRC and the file size. The
# decompressor is smart and knows when to stop, so feeding it
# extra data is harmless.
input.seek(-8, 2)
crc32 = read32(input)
isize = read32(input)
if crc32 != crcval:
print('CRC check failed.')
if isize != length:
print('Incorrect length of data produced')
def main():
if len(sys.argv)!=2:
print('Usage: minigzip.py <filename>')
print(' The file will be compressed or decompressed.')
sys.exit(0)
filename = sys.argv[1]
if filename.endswith('.gz'):
compressing = False
outputname = filename[:-3]
else:
compressing = True
outputname = filename + '.gz'
input = open(filename, 'rb')
output = open(outputname, 'wb')
if compressing:
compress(filename, input, output)
else:
decompress(input, output)
input.close()
output.close()
if __name__ == '__main__':
main()