"""Functions that read and write gzipped files. The user of the file doesn't have to worry about the compression, but random access is not allowed.""" # based on Andrew Kuchling's minigzip.py distributed with the zlib module import struct, sys, time, os import zlib import builtins import io import _compression __all__ = ["BadGzipFile", "GzipFile", "open", "compress", "decompress"] FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16 READ, WRITE = 1, 2 _COMPRESS_LEVEL_FAST = 1 _COMPRESS_LEVEL_TRADEOFF = 6 _COMPRESS_LEVEL_BEST = 9 READ_BUFFER_SIZE = 128 * 1024 _WRITE_BUFFER_SIZE = 4 * io.DEFAULT_BUFFER_SIZE def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_BEST, encoding=None, errors=None, newline=None): """Open a gzip-compressed file in binary or text mode. The filename argument can be an actual filename (a str or bytes object), or an existing file object to read from or write to. The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or "ab" for binary mode, or "rt", "wt", "xt" or "at" for text mode. The default mode is "rb", and the default compresslevel is 9. For binary mode, this function is equivalent to the GzipFile constructor: GzipFile(filename, mode, compresslevel). In this case, the encoding, errors and newline arguments must not be provided. For text mode, a GzipFile object is created, and wrapped in an io.TextIOWrapper instance with the specified encoding, error handling behavior, and line ending(s). """ if "t" in mode: if "b" in mode: raise ValueError("Invalid mode: %r" % (mode,)) else: if encoding is not None: raise ValueError("Argument 'encoding' not supported in binary mode") if errors is not None: raise ValueError("Argument 'errors' not supported in binary mode") if newline is not None: raise ValueError("Argument 'newline' not supported in binary mode") gz_mode = mode.replace("t", "") if isinstance(filename, (str, bytes, os.PathLike)): binary_file = GzipFile(filename, gz_mode, compresslevel) elif hasattr(filename, "read") or hasattr(filename, "write"): binary_file = GzipFile(None, gz_mode, compresslevel, filename) else: raise TypeError("filename must be a str or bytes object, or a file") if "t" in mode: encoding = io.text_encoding(encoding) return io.TextIOWrapper(binary_file, encoding, errors, newline) else: return binary_file def write32u(output, value): # The L format writes the bit pattern correctly whether signed # or unsigned. output.write(struct.pack("' def _init_write(self, filename): self.name = filename self.crc = zlib.crc32(b"") self.size = 0 self.writebuf = [] self.bufsize = 0 self.offset = 0 # Current file offset for seek(), tell(), etc def tell(self): self._check_not_closed() self._buffer.flush() return super().tell() def _write_gzip_header(self, compresslevel): self.fileobj.write(b'\037\213') # magic header self.fileobj.write(b'\010') # compression method try: # RFC 1952 requires the FNAME field to be Latin-1. Do not # include filenames that cannot be represented that way. fname = os.path.basename(self.name) if not isinstance(fname, bytes): fname = fname.encode('latin-1') if fname.endswith(b'.gz'): fname = fname[:-3] except UnicodeEncodeError: fname = b'' flags = 0 if fname: flags = FNAME self.fileobj.write(chr(flags).encode('latin-1')) mtime = self._write_mtime if mtime is None: mtime = time.time() write32u(self.fileobj, int(mtime)) if compresslevel == _COMPRESS_LEVEL_BEST: xfl = b'\002' elif compresslevel == _COMPRESS_LEVEL_FAST: xfl = b'\004' else: xfl = b'\000' self.fileobj.write(xfl) self.fileobj.write(b'\377') if fname: self.fileobj.write(fname + b'\000') def write(self,data): self._check_not_closed() if self.mode != WRITE: import errno raise OSError(errno.EBADF, "write() on read-only GzipFile object") if self.fileobj is None: raise ValueError("write() on closed GzipFile object") return self._buffer.write(data) def _write_raw(self, data): # Called by our self._buffer underlying WriteBufferStream. if isinstance(data, (bytes, bytearray)): length = len(data) else: # accept any data that supports the buffer protocol data = memoryview(data) length = data.nbytes if length > 0: self.fileobj.write(self.compress.compress(data)) self.size += length self.crc = zlib.crc32(data, self.crc) self.offset += length return length def read(self, size=-1): self._check_not_closed() if self.mode != READ: import errno raise OSError(errno.EBADF, "read() on write-only GzipFile object") return self._buffer.read(size) def read1(self, size=-1): """Implements BufferedIOBase.read1() Reads up to a buffer's worth of data if size is negative.""" self._check_not_closed() if self.mode != READ: import errno raise OSError(errno.EBADF, "read1() on write-only GzipFile object") if size < 0: size = io.DEFAULT_BUFFER_SIZE return self._buffer.read1(size) def peek(self, n): self._check_not_closed() if self.mode != READ: import errno raise OSError(errno.EBADF, "peek() on write-only GzipFile object") return self._buffer.peek(n) @property def closed(self): return self.fileobj is None def close(self): fileobj = self.fileobj if fileobj is None: return try: if self.mode == WRITE: self._buffer.flush() fileobj.write(self.compress.flush()) write32u(fileobj, self.crc) # self.size may exceed 2 GiB, or even 4 GiB write32u(fileobj, self.size & 0xffffffff) elif self.mode == READ: self._buffer.close() finally: self.fileobj = None myfileobj = self.myfileobj if myfileobj: self.myfileobj = None myfileobj.close() def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH): self._check_not_closed() if self.mode == WRITE: # Ensure the compressor's buffer is flushed self._buffer.flush() self.fileobj.flush() def fileno(self): """Invoke the underlying file object's fileno() method. This will raise AttributeError if the underlying file object doesn't support fileno(). """ return self.fileobj.fileno() def rewind(self): '''Return the uncompressed stream file position indicator to the beginning of the file''' if self.mode != READ: raise OSError("Can't rewind in write mode") self._buffer.seek(0) def readable(self): return self.mode == READ def writable(self): return self.mode == WRITE def seekable(self): return True def seek(self, offset, whence=io.SEEK_SET): if self.mode == WRITE: if whence != io.SEEK_SET: if whence == io.SEEK_CUR: offset = self.offset + offset else: raise ValueError('Seek from end not supported') if offset < self.offset: raise OSError('Negative seek in write mode') count = offset - self.offset chunk = b'\0' * self._buffer_size for i in range(count // self._buffer_size): self.write(chunk) self.write(b'\0' * (count % self._buffer_size)) elif self.mode == READ: self._check_not_closed() return self._buffer.seek(offset, whence) return self.offset def readline(self, size=-1): self._check_not_closed() return self._buffer.readline(size) def _read_exact(fp, n): '''Read exactly *n* bytes from `fp` This method is required because fp may be unbuffered, i.e. return short reads. ''' data = fp.read(n) while len(data) < n: b = fp.read(n - len(data)) if not b: raise EOFError("Compressed file ended before the " "end-of-stream marker was reached") data += b return data def _read_gzip_header(fp): '''Read a gzip header from `fp` and progress to the end of the header. Returns last mtime if header was present or None otherwise. ''' magic = fp.read(2) if magic == b'': return None if magic != b'\037\213': raise BadGzipFile('Not a gzipped file (%r)' % magic) (method, flag, last_mtime) = struct.unpack(" bytes: """ Write a simple gzip header with no extra fields. :param compresslevel: Compresslevel used to determine the xfl bytes. :param mtime: The mtime (must support conversion to a 32-bit integer). :return: A bytes object representing the gzip header. """ if mtime is None: mtime = time.time() if compresslevel == _COMPRESS_LEVEL_BEST: xfl = 2 elif compresslevel == _COMPRESS_LEVEL_FAST: xfl = 4 else: xfl = 0 # Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra # fields added to header), mtime, xfl and os (255 for unknown OS). return struct.pack("