From aedb92e59c2f4c3c33fbb33d5dc4afefe344620c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Gust=C3=A4bel?= Date: Sat, 23 Dec 2006 16:51:47 +0000 Subject: [PATCH] Patch #1230446: tarfile.py: fix ExFileObject so that read() and tell() work correctly together with readline(). (backport from rev. 53153) --- Lib/tarfile.py | 285 +++++++++++++++++++++++---------------- Lib/test/test_tarfile.py | 12 +- Misc/NEWS | 3 + 3 files changed, 180 insertions(+), 120 deletions(-) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 1b8f1408a79..cffde457f14 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -622,64 +622,158 @@ class _BZ2Proxy(object): #------------------------ # Extraction file object #------------------------ -class ExFileObject(object): - """File-like object for reading an archive member. - Is returned by TarFile.extractfile(). Support for - sparse files included. +class _FileInFile(object): + """A thin wrapper around an existing file object that + provides a part of its data as an individual file + object. """ - def __init__(self, tarfile, tarinfo): - self.fileobj = tarfile.fileobj - self.name = tarinfo.name - self.mode = "r" - self.closed = False - self.offset = tarinfo.offset_data - self.size = tarinfo.size - self.pos = 0L - self.linebuffer = "" - if tarinfo.issparse(): - self.sparse = tarinfo.sparse - self.read = self._readsparse - else: - self.read = self._readnormal + def __init__(self, fileobj, offset, size, sparse=None): + self.fileobj = fileobj + self.offset = offset + self.size = size + self.sparse = sparse + self.position = 0 - def __read(self, size): - """Overloadable read method. + def tell(self): + """Return the current file position. """ + return self.position + + def seek(self, position): + """Seek to a position in the file. + """ + self.position = position + + def read(self, size=None): + """Read data from the file. + """ + if size is None: + size = self.size - self.position + else: + size = min(size, self.size - self.position) + + if self.sparse is None: + return self.readnormal(size) + else: + return self.readsparse(size) + + def readnormal(self, size): + """Read operation for regular files. + """ + self.fileobj.seek(self.offset + self.position) + self.position += size return self.fileobj.read(size) - def readline(self, size=-1): - """Read a line with approx. size. If size is negative, - read a whole line. readline() and read() must not - be mixed up (!). + def readsparse(self, size): + """Read operation for sparse files. """ - if size < 0: - size = sys.maxint + data = [] + while size > 0: + buf = self.readsparsesection(size) + if not buf: + break + size -= len(buf) + data.append(buf) + return "".join(data) - nl = self.linebuffer.find("\n") - if nl >= 0: - nl = min(nl, size) + def readsparsesection(self, size): + """Read a single section of a sparse file. + """ + section = self.sparse.find(self.position) + + if section is None: + return "" + + size = min(size, section.offset + section.size - self.position) + + if isinstance(section, _data): + realpos = section.realpos + self.position - section.offset + self.fileobj.seek(self.offset + realpos) + self.position += size + return self.fileobj.read(size) else: - size -= len(self.linebuffer) - while (nl < 0 and size > 0): - buf = self.read(min(size, 100)) - if not buf: + self.position += size + return NUL * size +#class _FileInFile + + +class ExFileObject(object): + """File-like object for reading an archive member. + Is returned by TarFile.extractfile(). + """ + blocksize = 1024 + + def __init__(self, tarfile, tarinfo): + self.fileobj = _FileInFile(tarfile.fileobj, + tarinfo.offset_data, + tarinfo.size, + getattr(tarinfo, "sparse", None)) + self.name = tarinfo.name + self.mode = "r" + self.closed = False + self.size = tarinfo.size + + self.position = 0 + self.buffer = "" + + def read(self, size=None): + """Read at most size bytes from the file. If size is not + present or None, read all data until EOF is reached. + """ + if self.closed: + raise ValueError("I/O operation on closed file") + + buf = "" + if self.buffer: + if size is None: + buf = self.buffer + self.buffer = "" + else: + buf = self.buffer[:size] + self.buffer = self.buffer[size:] + + if size is None: + buf += self.fileobj.read() + else: + buf += self.fileobj.read(size - len(buf)) + + self.position += len(buf) + return buf + + def readline(self, size=-1): + """Read one entire line from the file. If size is present + and non-negative, return a string with at most that + size, which may be an incomplete line. + """ + if self.closed: + raise ValueError("I/O operation on closed file") + + if "\n" in self.buffer: + pos = self.buffer.find("\n") + 1 + else: + buffers = [self.buffer] + while True: + buf = self.fileobj.read(self.blocksize) + buffers.append(buf) + if not buf or "\n" in buf: + self.buffer = "".join(buffers) + pos = self.buffer.find("\n") + 1 + if pos == 0: + # no newline found. + pos = len(self.buffer) break - self.linebuffer += buf - size -= len(buf) - nl = self.linebuffer.find("\n") - if nl == -1: - s = self.linebuffer - self.linebuffer = "" - return s - buf = self.linebuffer[:nl] - self.linebuffer = self.linebuffer[nl + 1:] - while buf[-1:] == "\r": - buf = buf[:-1] - return buf + "\n" + + if size != -1: + pos = min(size, pos) + + buf = self.buffer[:pos] + self.buffer = self.buffer[pos:] + self.position += len(buf) + return buf def readlines(self): - """Return a list with all (following) lines. + """Return a list with all remaining lines. """ result = [] while True: @@ -688,74 +782,34 @@ class ExFileObject(object): result.append(line) return result - def _readnormal(self, size=None): - """Read operation for regular files. - """ - if self.closed: - raise ValueError("file is closed") - self.fileobj.seek(self.offset + self.pos) - bytesleft = self.size - self.pos - if size is None: - bytestoread = bytesleft - else: - bytestoread = min(size, bytesleft) - self.pos += bytestoread - return self.__read(bytestoread) - - def _readsparse(self, size=None): - """Read operation for sparse files. - """ - if self.closed: - raise ValueError("file is closed") - - if size is None: - size = self.size - self.pos - - data = [] - while size > 0: - buf = self._readsparsesection(size) - if not buf: - break - size -= len(buf) - data.append(buf) - return "".join(data) - - def _readsparsesection(self, size): - """Read a single section of a sparse file. - """ - section = self.sparse.find(self.pos) - - if section is None: - return "" - - toread = min(size, section.offset + section.size - self.pos) - if isinstance(section, _data): - realpos = section.realpos + self.pos - section.offset - self.pos += toread - self.fileobj.seek(self.offset + realpos) - return self.__read(toread) - else: - self.pos += toread - return NUL * toread - def tell(self): """Return the current file position. """ - return self.pos + if self.closed: + raise ValueError("I/O operation on closed file") - def seek(self, pos, whence=0): + return self.position + + def seek(self, pos, whence=os.SEEK_SET): """Seek to a position in the file. """ - self.linebuffer = "" - if whence == 0: - self.pos = min(max(pos, 0), self.size) - if whence == 1: + if self.closed: + raise ValueError("I/O operation on closed file") + + if whence == os.SEEK_SET: + self.position = min(max(pos, 0), self.size) + elif whence == os.SEEK_CUR: if pos < 0: - self.pos = max(self.pos + pos, 0) + self.position = max(self.position + pos, 0) else: - self.pos = min(self.pos + pos, self.size) - if whence == 2: - self.pos = max(min(self.size + pos, self.size), 0) + self.position = min(self.position + pos, self.size) + elif whence == os.SEEK_END: + self.position = max(min(self.size + pos, self.size), 0) + else: + raise ValueError("Invalid argument") + + self.buffer = "" + self.fileobj.seek(self.position) def close(self): """Close the file object. @@ -763,20 +817,13 @@ class ExFileObject(object): self.closed = True def __iter__(self): - """Get an iterator over the file object. + """Get an iterator over the file's lines. """ - if self.closed: - raise ValueError("I/O operation on closed file") - return self - - def next(self): - """Get the next item from the file iterator. - """ - result = self.readline() - if not result: - raise StopIteration - return result - + while True: + line = self.readline() + if not line: + break + yield line #class ExFileObject #------------------ diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 2685d67819c..f229fa566dc 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -110,7 +110,7 @@ class ReadTest(BaseTest): """Test seek() method of _FileObject, incl. random reading. """ if self.sep != "|": - filename = "0-REGTYPE" + filename = "0-REGTYPE-TEXT" self.tar.extract(filename, dirname()) f = open(os.path.join(dirname(), filename), "rb") data = f.read() @@ -149,6 +149,16 @@ class ReadTest(BaseTest): s2 = fobj.readlines() self.assert_(s1 == s2, "readlines() after seek failed") + fobj.seek(0) + self.assert_(len(fobj.readline()) == fobj.tell(), + "tell() after readline() failed") + fobj.seek(512) + self.assert_(len(fobj.readline()) + 512 == fobj.tell(), + "tell() after seek() and readline() failed") + fobj.seek(0) + line = fobj.readline() + self.assert_(fobj.read() == data[len(line):], + "read() after readline() failed") fobj.close() def test_old_dirtype(self): diff --git a/Misc/NEWS b/Misc/NEWS index 897346b5485..f8fc7a75fe2 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -620,6 +620,9 @@ Core and builtins Library ------- +- Patch #1230446: tarfile.py: fix ExFileObject so that read() and tell() + work correctly together with readline(). + - Correction of patch #1455898: In the mbcs decoder, set final=False for stream decoder, but final=True for the decode function.