Issue 23193: Add numeric_owner to tarfile.TarFile.extract() and tarfile.TarFile.extractall().

This commit is contained in:
Eric V. Smith 2015-04-15 10:27:58 -04:00
parent 28edf12cd4
commit 7a80389ce5
6 changed files with 200 additions and 33 deletions

View File

@ -367,7 +367,7 @@ be finalized; only the internally used file object will be closed. See the
available.
.. method:: TarFile.extractall(path=".", members=None)
.. method:: TarFile.extractall(path=".", members=None, *, numeric_owner=False)
Extract all members from the archive to the current working directory or
directory *path*. If optional *members* is given, it must be a subset of the
@ -377,6 +377,10 @@ be finalized; only the internally used file object will be closed. See the
reset each time a file is created in it. And, if a directory's permissions do
not allow writing, extracting files to it will fail.
If *numeric_owner* is :const:`True`, the uid and gid numbers from the tarfile
are used to set the owner/group for the extracted files. Otherwise, the named
values from the tarfile are used.
.. warning::
Never extract archives from untrusted sources without prior inspection.
@ -384,8 +388,11 @@ be finalized; only the internally used file object will be closed. See the
that have absolute filenames starting with ``"/"`` or filenames with two
dots ``".."``.
.. versionchanged:: 3.5
Added the *numeric_only* parameter.
.. method:: TarFile.extract(member, path="", set_attrs=True)
.. method:: TarFile.extract(member, path="", set_attrs=True, *, numeric_owner=False)
Extract a member from the archive to the current working directory, using its
full name. Its file information is extracted as accurately as possible. *member*
@ -393,6 +400,10 @@ be finalized; only the internally used file object will be closed. See the
directory using *path*. File attributes (owner, mtime, mode) are set unless
*set_attrs* is false.
If *numeric_owner* is :const:`True`, the uid and gid numbers from the tarfile
are used to set the owner/group for the extracted files. Otherwise, the named
values from the tarfile are used.
.. note::
The :meth:`extract` method does not take care of several extraction issues.
@ -405,6 +416,9 @@ be finalized; only the internally used file object will be closed. See the
.. versionchanged:: 3.2
Added the *set_attrs* parameter.
.. versionchanged:: 3.5
Added the *numeric_only* parameter.
.. method:: TarFile.extractfile(member)
Extract a member from the archive as a file object. *member* may be a filename
@ -827,4 +841,3 @@ In case of :const:`PAX_FORMAT` archives, *encoding* is generally not needed
because all the metadata is stored using *UTF-8*. *encoding* is only used in
the rare cases when binary pax headers are decoded or when strings with
surrogate characters are stored.

View File

@ -479,19 +479,6 @@ socket
:meth:`socket.socket.send`.
(Contributed by Giampaolo Rodola' in :issue:`17552`.)
sysconfig
---------
* The user scripts directory on Windows is now versioned.
(Contributed by Paul Moore in :issue:`23437`.)
tarfile
-------
* The :func:`tarfile.open` function now supports ``'x'`` (exclusive creation)
mode. (Contributed by Berker Peksag in :issue:`21717`.)
subprocess
----------
@ -500,6 +487,25 @@ subprocess
API than :func:`~subprocess.call`, :func:`~subprocess.check_call` and
:func:`~subprocess.check_output`.
sysconfig
---------
* The user scripts directory on Windows is now versioned.
(Contributed by Paul Moore in :issue:`23437`.)
tarfile
-------
* The :func:`tarfile.open` function now supports ``'x'`` (exclusive creation)
mode. (Contributed by Berker Peksag in :issue:`21717`.)
* The :meth:`~tarfile.TarFile.extractall` and :meth:`~tarfile.TarFile.extract`
methods now take a keyword parameter *numeric_only*. If set to ``True``,
the extracted files and directories will be owned by the numeric uid and gid
from the tarfile. If set to ``False`` (the default, and the behavior in
versions prior to 3.5), they will be owned bythe named user and group in the
tarfile. (Contributed by Michael Vogt and Eric Smith in :issue:`23193`.)
time
----

View File

@ -1972,12 +1972,13 @@ class TarFile(object):
self.members.append(tarinfo)
def extractall(self, path=".", members=None):
def extractall(self, path=".", members=None, *, numeric_owner=False):
"""Extract all members from the archive to the current working
directory and set owner, modification time and permissions on
directories afterwards. `path' specifies a different directory
to extract to. `members' is optional and must be a subset of the
list returned by getmembers().
list returned by getmembers(). If `numeric_owner` is True, only
the numbers for user/group names are used and not the names.
"""
directories = []
@ -1991,7 +1992,8 @@ class TarFile(object):
tarinfo = copy.copy(tarinfo)
tarinfo.mode = 0o700
# Do not set_attrs directories, as we will do that further down
self.extract(tarinfo, path, set_attrs=not tarinfo.isdir())
self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
numeric_owner=numeric_owner)
# Reverse sort directories.
directories.sort(key=lambda a: a.name)
@ -2001,7 +2003,7 @@ class TarFile(object):
for tarinfo in directories:
dirpath = os.path.join(path, tarinfo.name)
try:
self.chown(tarinfo, dirpath)
self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
self.utime(tarinfo, dirpath)
self.chmod(tarinfo, dirpath)
except ExtractError as e:
@ -2010,12 +2012,14 @@ class TarFile(object):
else:
self._dbg(1, "tarfile: %s" % e)
def extract(self, member, path="", set_attrs=True):
def extract(self, member, path="", set_attrs=True, *, numeric_owner=False):
"""Extract a member from the archive to the current working directory,
using its full name. Its file information is extracted as accurately
as possible. `member' may be a filename or a TarInfo object. You can
specify a different directory using `path'. File attributes (owner,
mtime, mode) are set unless `set_attrs' is False.
mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
is True, only the numbers for user/group names are used and not
the names.
"""
self._check("r")
@ -2030,7 +2034,8 @@ class TarFile(object):
try:
self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
set_attrs=set_attrs)
set_attrs=set_attrs,
numeric_owner=numeric_owner)
except OSError as e:
if self.errorlevel > 0:
raise
@ -2076,7 +2081,8 @@ class TarFile(object):
# blkdev, etc.), return None instead of a file object.
return None
def _extract_member(self, tarinfo, targetpath, set_attrs=True):
def _extract_member(self, tarinfo, targetpath, set_attrs=True,
numeric_owner=False):
"""Extract the TarInfo object tarinfo to a physical
file called targetpath.
"""
@ -2114,7 +2120,7 @@ class TarFile(object):
self.makefile(tarinfo, targetpath)
if set_attrs:
self.chown(tarinfo, targetpath)
self.chown(tarinfo, targetpath, numeric_owner)
if not tarinfo.issym():
self.chmod(tarinfo, targetpath)
self.utime(tarinfo, targetpath)
@ -2203,11 +2209,16 @@ class TarFile(object):
except KeyError:
raise ExtractError("unable to resolve link inside archive")
def chown(self, tarinfo, targetpath):
"""Set owner of targetpath according to tarinfo.
def chown(self, tarinfo, targetpath, numeric_owner):
"""Set owner of targetpath according to tarinfo. If numeric_owner
is True, use .gid/.uid instead of .gname/.uname.
"""
if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
# We have to be root to do so.
if numeric_owner:
g = tarinfo.gid
u = tarinfo.uid
else:
try:
g = grp.getgrnam(tarinfo.gname)[2]
except KeyError:

View File

@ -2,8 +2,10 @@ import sys
import os
import io
from hashlib import md5
from contextlib import contextmanager
import unittest
import unittest.mock
import tarfile
from test import support, script_helper
@ -2264,6 +2266,136 @@ class Bz2PartialReadTest(Bz2Test, unittest.TestCase):
self._test_partial_input("r:bz2")
def root_is_uid_gid_0():
try:
import pwd, grp
except ImportError:
return False
if pwd.getpwuid(0)[0] != 'root':
return False
if grp.getgrgid(0)[0] != 'root':
return False
return True
class NumericOwnerTest(unittest.TestCase):
# mock the following:
# os.chown: so we can test what's being called
# os.chmod: so the modes are not actually changed. if they are, we can't
# delete the files/directories
# os.geteuid: so we can lie and say we're root (uid = 0)
@staticmethod
def _make_test_archive(filename_1, dirname_1, filename_2):
# the file contents to write
fobj = io.BytesIO(b"content")
# create a tar file with a file, a directory, and a file within that
# directory. Assign various .uid/.gid values to them
items = [(filename_1, 99, 98, tarfile.REGTYPE, fobj),
(dirname_1, 77, 76, tarfile.DIRTYPE, None),
(filename_2, 88, 87, tarfile.REGTYPE, fobj),
]
with tarfile.open(tmpname, 'w') as tarfl:
for name, uid, gid, typ, contents in items:
t = tarfile.TarInfo(name)
t.uid = uid
t.gid = gid
t.uname = 'root'
t.gname = 'root'
t.type = typ
tarfl.addfile(t, contents)
# return the full pathname to the tar file
return tmpname
@staticmethod
@contextmanager
def _setup_test(mock_geteuid):
mock_geteuid.return_value = 0 # lie and say we're root
fname = 'numeric-owner-testfile'
dirname = 'dir'
# the names we want stored in the tarfile
filename_1 = fname
dirname_1 = dirname
filename_2 = os.path.join(dirname, fname)
# create the tarfile with the contents we're after
tar_filename = NumericOwnerTest._make_test_archive(filename_1,
dirname_1,
filename_2)
# open the tarfile for reading. yield it and the names of the items
# we stored into the file
with tarfile.open(tar_filename) as tarfl:
yield tarfl, filename_1, dirname_1, filename_2
@unittest.mock.patch('os.chown')
@unittest.mock.patch('os.chmod')
@unittest.mock.patch('os.geteuid')
def test_extract_with_numeric_owner(self, mock_geteuid, mock_chmod,
mock_chown):
with self._setup_test(mock_geteuid) as (tarfl, filename_1, _,
filename_2):
tarfl.extract(filename_1, TEMPDIR, numeric_owner=True)
tarfl.extract(filename_2 , TEMPDIR, numeric_owner=True)
# convert to filesystem paths
f_filename_1 = os.path.join(TEMPDIR, filename_1)
f_filename_2 = os.path.join(TEMPDIR, filename_2)
mock_chown.assert_has_calls([unittest.mock.call(f_filename_1, 99, 98),
unittest.mock.call(f_filename_2, 88, 87),
],
any_order=True)
@unittest.mock.patch('os.chown')
@unittest.mock.patch('os.chmod')
@unittest.mock.patch('os.geteuid')
def test_extractall_with_numeric_owner(self, mock_geteuid, mock_chmod,
mock_chown):
with self._setup_test(mock_geteuid) as (tarfl, filename_1, dirname_1,
filename_2):
tarfl.extractall(TEMPDIR, numeric_owner=True)
# convert to filesystem paths
f_filename_1 = os.path.join(TEMPDIR, filename_1)
f_dirname_1 = os.path.join(TEMPDIR, dirname_1)
f_filename_2 = os.path.join(TEMPDIR, filename_2)
mock_chown.assert_has_calls([unittest.mock.call(f_filename_1, 99, 98),
unittest.mock.call(f_dirname_1, 77, 76),
unittest.mock.call(f_filename_2, 88, 87),
],
any_order=True)
# this test requires that uid=0 and gid=0 really be named 'root'. that's
# because the uname and gname in the test file are 'root', and extract()
# will look them up using pwd and grp to find their uid and gid, which we
# test here to be 0.
@unittest.skipUnless(root_is_uid_gid_0(),
'uid=0,gid=0 must be named "root"')
@unittest.mock.patch('os.chown')
@unittest.mock.patch('os.chmod')
@unittest.mock.patch('os.geteuid')
def test_extract_without_numeric_owner(self, mock_geteuid, mock_chmod,
mock_chown):
with self._setup_test(mock_geteuid) as (tarfl, filename_1, _, _):
tarfl.extract(filename_1, TEMPDIR, numeric_owner=False)
# convert to filesystem paths
f_filename_1 = os.path.join(TEMPDIR, filename_1)
mock_chown.assert_called_with(f_filename_1, 0, 0)
@unittest.mock.patch('os.geteuid')
def test_keyword_only(self, mock_geteuid):
with self._setup_test(mock_geteuid) as (tarfl, filename_1, _, _):
self.assertRaises(TypeError,
tarfl.extract, filename_1, TEMPDIR, False, True)
def setUpModule():
support.unlink(TEMPDIR)
os.makedirs(TEMPDIR)

View File

@ -1458,6 +1458,7 @@ Norman Vine
Pauli Virtanen
Frank Visser
Johannes Vogel
Michael Vogt
Radu Voicilas
Alex Volkov
Martijn Vries

View File

@ -32,6 +32,10 @@ Core and Builtins
Library
-------
- Issue #23193: Add a numeric_owner parameter to
tarfile.TarFile.extract and tarfile.TarFile.extractall. Patch by
Michael Vogt and Eric Smith.
- Issue #23342: Add a subprocess.run() function than returns a CalledProcess
instance for a more consistent API than the existing call* functions.