Fix gzip.py: Use bytes where 8bit strings have been used formerly.

(The filename gets written in utf-8 encoded form which probably
isn't correct.)

Fix the test.
This commit is contained in:
Walter Dörwald 2007-06-06 16:43:59 +00:00
parent 3a77c7ab16
commit 5b1284d0b7
2 changed files with 31 additions and 29 deletions

View File

@ -104,7 +104,7 @@ class GzipFile:
self.mode = READ self.mode = READ
# Set flag indicating start of a new member # Set flag indicating start of a new member
self._new_member = True self._new_member = True
self.extrabuf = "" self.extrabuf = b""
self.extrasize = 0 self.extrasize = 0
self.name = filename self.name = filename
# Starts small, scales exponentially # Starts small, scales exponentially
@ -147,20 +147,21 @@ class GzipFile:
self.bufsize = 0 self.bufsize = 0
def _write_gzip_header(self): def _write_gzip_header(self):
self.fileobj.write('\037\213') # magic header self.fileobj.write(b'\037\213') # magic header
self.fileobj.write('\010') # compression method self.fileobj.write(b'\010') # compression method
fname = self.name fname = self.name
if fname.endswith(".gz"): if fname.endswith(".gz"):
fname = fname[:-3] fname = fname[:-3]
flags = 0 flags = 0
if fname: if fname:
flags = FNAME flags = FNAME
self.fileobj.write(chr(flags)) self.fileobj.write(chr(flags).encode('latin-1'))
write32u(self.fileobj, int(time.time())) write32u(self.fileobj, int(time.time()))
self.fileobj.write('\002') self.fileobj.write(b'\002')
self.fileobj.write('\377') self.fileobj.write(b'\377')
if fname: if fname:
self.fileobj.write(fname + '\000') # XXX: Ist utf-8 the correct encoding?
self.fileobj.write(fname.encode('utf-8') + b'\000')
def _init_read(self): def _init_read(self):
self.crc = zlib.crc32("") self.crc = zlib.crc32("")
@ -168,7 +169,7 @@ class GzipFile:
def _read_gzip_header(self): def _read_gzip_header(self):
magic = self.fileobj.read(2) magic = self.fileobj.read(2)
if magic != '\037\213': if magic != b'\037\213':
raise IOError, 'Not a gzipped file' raise IOError, 'Not a gzipped file'
method = ord( self.fileobj.read(1) ) method = ord( self.fileobj.read(1) )
if method != 8: if method != 8:
@ -188,13 +189,13 @@ class GzipFile:
# Read and discard a null-terminated string containing the filename # Read and discard a null-terminated string containing the filename
while True: while True:
s = self.fileobj.read(1) s = self.fileobj.read(1)
if not s or s=='\000': if not s or s==b'\000':
break break
if flag & FCOMMENT: if flag & FCOMMENT:
# Read and discard a null-terminated string containing a comment # Read and discard a null-terminated string containing a comment
while True: while True:
s = self.fileobj.read(1) s = self.fileobj.read(1)
if not s or s=='\000': if not s or s==b'\000':
break break
if flag & FHCRC: if flag & FHCRC:
self.fileobj.read(2) # Read & discard the 16-bit header CRC self.fileobj.read(2) # Read & discard the 16-bit header CRC
@ -219,7 +220,7 @@ class GzipFile:
raise IOError(errno.EBADF, "read() on write-only GzipFile object") raise IOError(errno.EBADF, "read() on write-only GzipFile object")
if self.extrasize <= 0 and self.fileobj is None: if self.extrasize <= 0 and self.fileobj is None:
return '' return b''
readsize = 1024 readsize = 1024
if size < 0: # get the whole thing if size < 0: # get the whole thing
@ -278,7 +279,7 @@ class GzipFile:
# If the EOF has been reached, flush the decompression object # If the EOF has been reached, flush the decompression object
# and mark this object as finished. # and mark this object as finished.
if buf == "": if buf == b"":
uncompress = self.decompress.flush() uncompress = self.decompress.flush()
self._read_eof() self._read_eof()
self._add_read_data( uncompress ) self._add_read_data( uncompress )
@ -287,7 +288,7 @@ class GzipFile:
uncompress = self.decompress.decompress(buf) uncompress = self.decompress.decompress(buf)
self._add_read_data( uncompress ) self._add_read_data( uncompress )
if self.decompress.unused_data != "": if self.decompress.unused_data != b"":
# Ending case: we've come to the end of a member in the file, # Ending case: we've come to the end of a member in the file,
# so seek back to the start of the unused data, finish up # so seek back to the start of the unused data, finish up
# this member, and read a new gzip header. # this member, and read a new gzip header.
@ -375,7 +376,7 @@ class GzipFile:
raise IOError("Can't rewind in write mode") raise IOError("Can't rewind in write mode")
self.fileobj.seek(0) self.fileobj.seek(0)
self._new_member = True self._new_member = True
self.extrabuf = "" self.extrabuf = b""
self.extrasize = 0 self.extrasize = 0
self.offset = 0 self.offset = 0
@ -389,9 +390,10 @@ class GzipFile:
if offset < self.offset: if offset < self.offset:
raise IOError('Negative seek in write mode') raise IOError('Negative seek in write mode')
count = offset - self.offset count = offset - self.offset
chunk = bytes(1024)
for i in range(count // 1024): for i in range(count // 1024):
self.write(1024 * '\0') self.write(chunk)
self.write((count % 1024) * '\0') self.write(bytes(count % 1024))
elif self.mode == READ: elif self.mode == READ:
if offset < self.offset: if offset < self.offset:
# for negative seek, rewind and do positive seek # for negative seek, rewind and do positive seek
@ -410,7 +412,7 @@ class GzipFile:
bufs = [] bufs = []
while size != 0: while size != 0:
c = self.read(readsize) c = self.read(readsize)
i = c.find('\n') i = c.find(b'\n')
# We set i=size to break out of the loop under two # We set i=size to break out of the loop under two
# conditions: 1) there's no newline, and the chunk is # conditions: 1) there's no newline, and the chunk is
@ -419,7 +421,7 @@ class GzipFile:
if (size <= i) or (i == -1 and len(c) > size): if (size <= i) or (i == -1 and len(c) > size):
i = size - 1 i = size - 1
if i >= 0 or c == '': if i >= 0 or c == b'':
bufs.append(c[:i + 1]) # Add portion of last chunk bufs.append(c[:i + 1]) # Add portion of last chunk
self._unread(c[i + 1:]) # Push back rest of chunk self._unread(c[i + 1:]) # Push back rest of chunk
break break
@ -430,7 +432,7 @@ class GzipFile:
readsize = min(size, readsize * 2) readsize = min(size, readsize * 2)
if readsize > self.min_readsize: if readsize > self.min_readsize:
self.min_readsize = min(readsize, self.min_readsize * 2, 512) self.min_readsize = min(readsize, self.min_readsize * 2, 512)
return ''.join(bufs) # Return resulting line return b''.join(bufs) # Return resulting line
def readlines(self, sizehint=0): def readlines(self, sizehint=0):
# Negative numbers result in reading all the lines # Negative numbers result in reading all the lines
@ -439,7 +441,7 @@ class GzipFile:
L = [] L = []
while sizehint > 0: while sizehint > 0:
line = self.readline() line = self.readline()
if line == "": if line == b"":
break break
L.append(line) L.append(line)
sizehint = sizehint - len(line) sizehint = sizehint - len(line)

View File

@ -8,14 +8,14 @@ import sys, os
import gzip import gzip
data1 = """ int length=DEFAULTALLOC, err = Z_OK; data1 = b""" int length=DEFAULTALLOC, err = Z_OK;
PyObject *RetVal; PyObject *RetVal;
int flushmode = Z_FINISH; int flushmode = Z_FINISH;
unsigned long start_total_out; unsigned long start_total_out;
""" """
data2 = """/* zlibmodule.c -- gzip-compatible data compression */ data2 = b"""/* zlibmodule.c -- gzip-compatible data compression */
/* See http://www.gzip.org/zlib/ /* See http://www.gzip.org/zlib/
/* See http://www.winimage.com/zLibDll for Windows */ /* See http://www.winimage.com/zLibDll for Windows */
""" """
@ -63,22 +63,22 @@ class TestGzip(unittest.TestCase):
# many, many members. Create such a file and verify that reading it # many, many members. Create such a file and verify that reading it
# works. # works.
f = gzip.open(self.filename, 'wb', 9) f = gzip.open(self.filename, 'wb', 9)
f.write('a') f.write(b'a')
f.close() f.close()
for i in range(0,200): for i in range(0, 200):
f = gzip.open(self.filename, "ab", 9) # append f = gzip.open(self.filename, "ab", 9) # append
f.write('a') f.write(b'a')
f.close() f.close()
# Try reading the file # Try reading the file
zgfile = gzip.open(self.filename, "rb") zgfile = gzip.open(self.filename, "rb")
contents = "" contents = b""
while 1: while 1:
ztxt = zgfile.read(8192) ztxt = zgfile.read(8192)
contents += ztxt contents += ztxt
if not ztxt: break if not ztxt: break
zgfile.close() zgfile.close()
self.assertEquals(contents, 'a'*201) self.assertEquals(contents, b'a'*201)
def test_readline(self): def test_readline(self):
@ -89,7 +89,7 @@ class TestGzip(unittest.TestCase):
line_length = 0 line_length = 0
while 1: while 1:
L = f.readline(line_length) L = f.readline(line_length)
if L == "" and line_length != 0: break if not L and line_length != 0: break
self.assert_(len(L) <= line_length) self.assert_(len(L) <= line_length)
line_length = (line_length + 1) % 50 line_length = (line_length + 1) % 50
f.close() f.close()
@ -144,7 +144,7 @@ class TestGzip(unittest.TestCase):
f = gzip.GzipFile(self.filename, 'w') f = gzip.GzipFile(self.filename, 'w')
for pos in range(0, 256, 16): for pos in range(0, 256, 16):
f.seek(pos) f.seek(pos)
f.write('GZ\n') f.write(b'GZ\n')
f.close() f.close()
def test_mode(self): def test_mode(self):