Issue #27199: TarFile expose copyfileobj bufsize to improve throughput
Patch by Jason Fried.
This commit is contained in:
parent
f5781958af
commit
04bedfa3ce
|
@ -228,21 +228,21 @@ def calc_chksums(buf):
|
||||||
signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
|
signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
|
||||||
return unsigned_chksum, signed_chksum
|
return unsigned_chksum, signed_chksum
|
||||||
|
|
||||||
def copyfileobj(src, dst, length=None, exception=OSError):
|
def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
|
||||||
"""Copy length bytes from fileobj src to fileobj dst.
|
"""Copy length bytes from fileobj src to fileobj dst.
|
||||||
If length is None, copy the entire content.
|
If length is None, copy the entire content.
|
||||||
"""
|
"""
|
||||||
|
bufsize = bufsize or 16 * 1024
|
||||||
if length == 0:
|
if length == 0:
|
||||||
return
|
return
|
||||||
if length is None:
|
if length is None:
|
||||||
shutil.copyfileobj(src, dst)
|
shutil.copyfileobj(src, dst, bufsize)
|
||||||
return
|
return
|
||||||
|
|
||||||
BUFSIZE = 16 * 1024
|
blocks, remainder = divmod(length, bufsize)
|
||||||
blocks, remainder = divmod(length, BUFSIZE)
|
|
||||||
for b in range(blocks):
|
for b in range(blocks):
|
||||||
buf = src.read(BUFSIZE)
|
buf = src.read(bufsize)
|
||||||
if len(buf) < BUFSIZE:
|
if len(buf) < bufsize:
|
||||||
raise exception("unexpected end of data")
|
raise exception("unexpected end of data")
|
||||||
dst.write(buf)
|
dst.write(buf)
|
||||||
|
|
||||||
|
@ -1403,7 +1403,8 @@ class TarFile(object):
|
||||||
|
|
||||||
def __init__(self, name=None, mode="r", fileobj=None, format=None,
|
def __init__(self, name=None, mode="r", fileobj=None, format=None,
|
||||||
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
|
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
|
||||||
errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None):
|
errors="surrogateescape", pax_headers=None, debug=None,
|
||||||
|
errorlevel=None, copybufsize=None):
|
||||||
"""Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
|
"""Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
|
||||||
read from an existing archive, 'a' to append data to an existing
|
read from an existing archive, 'a' to append data to an existing
|
||||||
file or 'w' to create a new file overwriting an existing one. `mode'
|
file or 'w' to create a new file overwriting an existing one. `mode'
|
||||||
|
@ -1459,6 +1460,7 @@ class TarFile(object):
|
||||||
self.errorlevel = errorlevel
|
self.errorlevel = errorlevel
|
||||||
|
|
||||||
# Init datastructures.
|
# Init datastructures.
|
||||||
|
self.copybufsize = copybufsize
|
||||||
self.closed = False
|
self.closed = False
|
||||||
self.members = [] # list of members as TarInfo objects
|
self.members = [] # list of members as TarInfo objects
|
||||||
self._loaded = False # flag if all members have been read
|
self._loaded = False # flag if all members have been read
|
||||||
|
@ -1558,7 +1560,7 @@ class TarFile(object):
|
||||||
saved_pos = fileobj.tell()
|
saved_pos = fileobj.tell()
|
||||||
try:
|
try:
|
||||||
return func(name, "r", fileobj, **kwargs)
|
return func(name, "r", fileobj, **kwargs)
|
||||||
except (ReadError, CompressionError) as e:
|
except (ReadError, CompressionError):
|
||||||
if fileobj is not None:
|
if fileobj is not None:
|
||||||
fileobj.seek(saved_pos)
|
fileobj.seek(saved_pos)
|
||||||
continue
|
continue
|
||||||
|
@ -1963,10 +1965,10 @@ class TarFile(object):
|
||||||
buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
|
buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
|
||||||
self.fileobj.write(buf)
|
self.fileobj.write(buf)
|
||||||
self.offset += len(buf)
|
self.offset += len(buf)
|
||||||
|
bufsize=self.copybufsize
|
||||||
# If there's data to follow, append it.
|
# If there's data to follow, append it.
|
||||||
if fileobj is not None:
|
if fileobj is not None:
|
||||||
copyfileobj(fileobj, self.fileobj, tarinfo.size)
|
copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
|
||||||
blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
|
blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
|
||||||
if remainder > 0:
|
if remainder > 0:
|
||||||
self.fileobj.write(NUL * (BLOCKSIZE - remainder))
|
self.fileobj.write(NUL * (BLOCKSIZE - remainder))
|
||||||
|
@ -2148,15 +2150,16 @@ class TarFile(object):
|
||||||
"""
|
"""
|
||||||
source = self.fileobj
|
source = self.fileobj
|
||||||
source.seek(tarinfo.offset_data)
|
source.seek(tarinfo.offset_data)
|
||||||
|
bufsize = self.copybufsize
|
||||||
with bltn_open(targetpath, "wb") as target:
|
with bltn_open(targetpath, "wb") as target:
|
||||||
if tarinfo.sparse is not None:
|
if tarinfo.sparse is not None:
|
||||||
for offset, size in tarinfo.sparse:
|
for offset, size in tarinfo.sparse:
|
||||||
target.seek(offset)
|
target.seek(offset)
|
||||||
copyfileobj(source, target, size, ReadError)
|
copyfileobj(source, target, size, ReadError, bufsize)
|
||||||
target.seek(tarinfo.size)
|
target.seek(tarinfo.size)
|
||||||
target.truncate()
|
target.truncate()
|
||||||
else:
|
else:
|
||||||
copyfileobj(source, target, tarinfo.size, ReadError)
|
copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
|
||||||
|
|
||||||
def makeunknown(self, tarinfo, targetpath):
|
def makeunknown(self, tarinfo, targetpath):
|
||||||
"""Make a file from a TarInfo object with an unknown type
|
"""Make a file from a TarInfo object with an unknown type
|
||||||
|
@ -2235,7 +2238,7 @@ class TarFile(object):
|
||||||
os.lchown(targetpath, u, g)
|
os.lchown(targetpath, u, g)
|
||||||
else:
|
else:
|
||||||
os.chown(targetpath, u, g)
|
os.chown(targetpath, u, g)
|
||||||
except OSError as e:
|
except OSError:
|
||||||
raise ExtractError("could not change owner")
|
raise ExtractError("could not change owner")
|
||||||
|
|
||||||
def chmod(self, tarinfo, targetpath):
|
def chmod(self, tarinfo, targetpath):
|
||||||
|
@ -2244,7 +2247,7 @@ class TarFile(object):
|
||||||
if hasattr(os, 'chmod'):
|
if hasattr(os, 'chmod'):
|
||||||
try:
|
try:
|
||||||
os.chmod(targetpath, tarinfo.mode)
|
os.chmod(targetpath, tarinfo.mode)
|
||||||
except OSError as e:
|
except OSError:
|
||||||
raise ExtractError("could not change mode")
|
raise ExtractError("could not change mode")
|
||||||
|
|
||||||
def utime(self, tarinfo, targetpath):
|
def utime(self, tarinfo, targetpath):
|
||||||
|
@ -2254,7 +2257,7 @@ class TarFile(object):
|
||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
|
os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
|
||||||
except OSError as e:
|
except OSError:
|
||||||
raise ExtractError("could not change modification time")
|
raise ExtractError("could not change modification time")
|
||||||
|
|
||||||
#--------------------------------------------------------------------------
|
#--------------------------------------------------------------------------
|
||||||
|
|
|
@ -10,6 +10,9 @@ What's New in Python 3.6.0 beta 1
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #27199: In tarfile, expose copyfileobj bufsize to improve throughput.
|
||||||
|
Patch by Jason Fried.
|
||||||
|
|
||||||
- Issue #27948: In f-strings, only allow backslashes inside the braces
|
- Issue #27948: In f-strings, only allow backslashes inside the braces
|
||||||
(where the expressions are). This is a breaking change from the 3.6
|
(where the expressions are). This is a breaking change from the 3.6
|
||||||
alpha releases, where backslashes are allowed anywhere in an
|
alpha releases, where backslashes are allowed anywhere in an
|
||||||
|
|
Loading…
Reference in New Issue