mirror of https://github.com/python/cpython
Issue 23193: Add numeric_owner to tarfile.TarFile.extract() and tarfile.TarFile.extractall().
This commit is contained in:
parent
28edf12cd4
commit
7a80389ce5
|
@ -367,7 +367,7 @@ be finalized; only the internally used file object will be closed. See the
|
|||
available.
|
||||
|
||||
|
||||
.. method:: TarFile.extractall(path=".", members=None)
|
||||
.. method:: TarFile.extractall(path=".", members=None, *, numeric_owner=False)
|
||||
|
||||
Extract all members from the archive to the current working directory or
|
||||
directory *path*. If optional *members* is given, it must be a subset of the
|
||||
|
@ -377,6 +377,10 @@ be finalized; only the internally used file object will be closed. See the
|
|||
reset each time a file is created in it. And, if a directory's permissions do
|
||||
not allow writing, extracting files to it will fail.
|
||||
|
||||
If *numeric_owner* is :const:`True`, the uid and gid numbers from the tarfile
|
||||
are used to set the owner/group for the extracted files. Otherwise, the named
|
||||
values from the tarfile are used.
|
||||
|
||||
.. warning::
|
||||
|
||||
Never extract archives from untrusted sources without prior inspection.
|
||||
|
@ -384,8 +388,11 @@ be finalized; only the internally used file object will be closed. See the
|
|||
that have absolute filenames starting with ``"/"`` or filenames with two
|
||||
dots ``".."``.
|
||||
|
||||
.. versionchanged:: 3.5
|
||||
Added the *numeric_only* parameter.
|
||||
|
||||
.. method:: TarFile.extract(member, path="", set_attrs=True)
|
||||
|
||||
.. method:: TarFile.extract(member, path="", set_attrs=True, *, numeric_owner=False)
|
||||
|
||||
Extract a member from the archive to the current working directory, using its
|
||||
full name. Its file information is extracted as accurately as possible. *member*
|
||||
|
@ -393,6 +400,10 @@ be finalized; only the internally used file object will be closed. See the
|
|||
directory using *path*. File attributes (owner, mtime, mode) are set unless
|
||||
*set_attrs* is false.
|
||||
|
||||
If *numeric_owner* is :const:`True`, the uid and gid numbers from the tarfile
|
||||
are used to set the owner/group for the extracted files. Otherwise, the named
|
||||
values from the tarfile are used.
|
||||
|
||||
.. note::
|
||||
|
||||
The :meth:`extract` method does not take care of several extraction issues.
|
||||
|
@ -405,6 +416,9 @@ be finalized; only the internally used file object will be closed. See the
|
|||
.. versionchanged:: 3.2
|
||||
Added the *set_attrs* parameter.
|
||||
|
||||
.. versionchanged:: 3.5
|
||||
Added the *numeric_only* parameter.
|
||||
|
||||
.. method:: TarFile.extractfile(member)
|
||||
|
||||
Extract a member from the archive as a file object. *member* may be a filename
|
||||
|
@ -827,4 +841,3 @@ In case of :const:`PAX_FORMAT` archives, *encoding* is generally not needed
|
|||
because all the metadata is stored using *UTF-8*. *encoding* is only used in
|
||||
the rare cases when binary pax headers are decoded or when strings with
|
||||
surrogate characters are stored.
|
||||
|
||||
|
|
|
@ -479,19 +479,6 @@ socket
|
|||
:meth:`socket.socket.send`.
|
||||
(Contributed by Giampaolo Rodola' in :issue:`17552`.)
|
||||
|
||||
sysconfig
|
||||
---------
|
||||
|
||||
* The user scripts directory on Windows is now versioned.
|
||||
(Contributed by Paul Moore in :issue:`23437`.)
|
||||
|
||||
|
||||
tarfile
|
||||
-------
|
||||
|
||||
* The :func:`tarfile.open` function now supports ``'x'`` (exclusive creation)
|
||||
mode. (Contributed by Berker Peksag in :issue:`21717`.)
|
||||
|
||||
subprocess
|
||||
----------
|
||||
|
||||
|
@ -500,6 +487,25 @@ subprocess
|
|||
API than :func:`~subprocess.call`, :func:`~subprocess.check_call` and
|
||||
:func:`~subprocess.check_output`.
|
||||
|
||||
sysconfig
|
||||
---------
|
||||
|
||||
* The user scripts directory on Windows is now versioned.
|
||||
(Contributed by Paul Moore in :issue:`23437`.)
|
||||
|
||||
tarfile
|
||||
-------
|
||||
|
||||
* The :func:`tarfile.open` function now supports ``'x'`` (exclusive creation)
|
||||
mode. (Contributed by Berker Peksag in :issue:`21717`.)
|
||||
|
||||
* The :meth:`~tarfile.TarFile.extractall` and :meth:`~tarfile.TarFile.extract`
|
||||
methods now take a keyword parameter *numeric_only*. If set to ``True``,
|
||||
the extracted files and directories will be owned by the numeric uid and gid
|
||||
from the tarfile. If set to ``False`` (the default, and the behavior in
|
||||
versions prior to 3.5), they will be owned bythe named user and group in the
|
||||
tarfile. (Contributed by Michael Vogt and Eric Smith in :issue:`23193`.)
|
||||
|
||||
time
|
||||
----
|
||||
|
||||
|
|
|
@ -1972,12 +1972,13 @@ class TarFile(object):
|
|||
|
||||
self.members.append(tarinfo)
|
||||
|
||||
def extractall(self, path=".", members=None):
|
||||
def extractall(self, path=".", members=None, *, numeric_owner=False):
|
||||
"""Extract all members from the archive to the current working
|
||||
directory and set owner, modification time and permissions on
|
||||
directories afterwards. `path' specifies a different directory
|
||||
to extract to. `members' is optional and must be a subset of the
|
||||
list returned by getmembers().
|
||||
list returned by getmembers(). If `numeric_owner` is True, only
|
||||
the numbers for user/group names are used and not the names.
|
||||
"""
|
||||
directories = []
|
||||
|
||||
|
@ -1991,7 +1992,8 @@ class TarFile(object):
|
|||
tarinfo = copy.copy(tarinfo)
|
||||
tarinfo.mode = 0o700
|
||||
# Do not set_attrs directories, as we will do that further down
|
||||
self.extract(tarinfo, path, set_attrs=not tarinfo.isdir())
|
||||
self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
|
||||
numeric_owner=numeric_owner)
|
||||
|
||||
# Reverse sort directories.
|
||||
directories.sort(key=lambda a: a.name)
|
||||
|
@ -2001,7 +2003,7 @@ class TarFile(object):
|
|||
for tarinfo in directories:
|
||||
dirpath = os.path.join(path, tarinfo.name)
|
||||
try:
|
||||
self.chown(tarinfo, dirpath)
|
||||
self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
|
||||
self.utime(tarinfo, dirpath)
|
||||
self.chmod(tarinfo, dirpath)
|
||||
except ExtractError as e:
|
||||
|
@ -2010,12 +2012,14 @@ class TarFile(object):
|
|||
else:
|
||||
self._dbg(1, "tarfile: %s" % e)
|
||||
|
||||
def extract(self, member, path="", set_attrs=True):
|
||||
def extract(self, member, path="", set_attrs=True, *, numeric_owner=False):
|
||||
"""Extract a member from the archive to the current working directory,
|
||||
using its full name. Its file information is extracted as accurately
|
||||
as possible. `member' may be a filename or a TarInfo object. You can
|
||||
specify a different directory using `path'. File attributes (owner,
|
||||
mtime, mode) are set unless `set_attrs' is False.
|
||||
mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
|
||||
is True, only the numbers for user/group names are used and not
|
||||
the names.
|
||||
"""
|
||||
self._check("r")
|
||||
|
||||
|
@ -2030,7 +2034,8 @@ class TarFile(object):
|
|||
|
||||
try:
|
||||
self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
|
||||
set_attrs=set_attrs)
|
||||
set_attrs=set_attrs,
|
||||
numeric_owner=numeric_owner)
|
||||
except OSError as e:
|
||||
if self.errorlevel > 0:
|
||||
raise
|
||||
|
@ -2076,7 +2081,8 @@ class TarFile(object):
|
|||
# blkdev, etc.), return None instead of a file object.
|
||||
return None
|
||||
|
||||
def _extract_member(self, tarinfo, targetpath, set_attrs=True):
|
||||
def _extract_member(self, tarinfo, targetpath, set_attrs=True,
|
||||
numeric_owner=False):
|
||||
"""Extract the TarInfo object tarinfo to a physical
|
||||
file called targetpath.
|
||||
"""
|
||||
|
@ -2114,7 +2120,7 @@ class TarFile(object):
|
|||
self.makefile(tarinfo, targetpath)
|
||||
|
||||
if set_attrs:
|
||||
self.chown(tarinfo, targetpath)
|
||||
self.chown(tarinfo, targetpath, numeric_owner)
|
||||
if not tarinfo.issym():
|
||||
self.chmod(tarinfo, targetpath)
|
||||
self.utime(tarinfo, targetpath)
|
||||
|
@ -2203,19 +2209,24 @@ class TarFile(object):
|
|||
except KeyError:
|
||||
raise ExtractError("unable to resolve link inside archive")
|
||||
|
||||
def chown(self, tarinfo, targetpath):
|
||||
"""Set owner of targetpath according to tarinfo.
|
||||
def chown(self, tarinfo, targetpath, numeric_owner):
|
||||
"""Set owner of targetpath according to tarinfo. If numeric_owner
|
||||
is True, use .gid/.uid instead of .gname/.uname.
|
||||
"""
|
||||
if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
|
||||
# We have to be root to do so.
|
||||
try:
|
||||
g = grp.getgrnam(tarinfo.gname)[2]
|
||||
except KeyError:
|
||||
if numeric_owner:
|
||||
g = tarinfo.gid
|
||||
try:
|
||||
u = pwd.getpwnam(tarinfo.uname)[2]
|
||||
except KeyError:
|
||||
u = tarinfo.uid
|
||||
else:
|
||||
try:
|
||||
g = grp.getgrnam(tarinfo.gname)[2]
|
||||
except KeyError:
|
||||
g = tarinfo.gid
|
||||
try:
|
||||
u = pwd.getpwnam(tarinfo.uname)[2]
|
||||
except KeyError:
|
||||
u = tarinfo.uid
|
||||
try:
|
||||
if tarinfo.issym() and hasattr(os, "lchown"):
|
||||
os.lchown(targetpath, u, g)
|
||||
|
|
|
@ -2,8 +2,10 @@ import sys
|
|||
import os
|
||||
import io
|
||||
from hashlib import md5
|
||||
from contextlib import contextmanager
|
||||
|
||||
import unittest
|
||||
import unittest.mock
|
||||
import tarfile
|
||||
|
||||
from test import support, script_helper
|
||||
|
@ -2264,6 +2266,136 @@ class Bz2PartialReadTest(Bz2Test, unittest.TestCase):
|
|||
self._test_partial_input("r:bz2")
|
||||
|
||||
|
||||
def root_is_uid_gid_0():
|
||||
try:
|
||||
import pwd, grp
|
||||
except ImportError:
|
||||
return False
|
||||
if pwd.getpwuid(0)[0] != 'root':
|
||||
return False
|
||||
if grp.getgrgid(0)[0] != 'root':
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class NumericOwnerTest(unittest.TestCase):
|
||||
# mock the following:
|
||||
# os.chown: so we can test what's being called
|
||||
# os.chmod: so the modes are not actually changed. if they are, we can't
|
||||
# delete the files/directories
|
||||
# os.geteuid: so we can lie and say we're root (uid = 0)
|
||||
|
||||
@staticmethod
|
||||
def _make_test_archive(filename_1, dirname_1, filename_2):
|
||||
# the file contents to write
|
||||
fobj = io.BytesIO(b"content")
|
||||
|
||||
# create a tar file with a file, a directory, and a file within that
|
||||
# directory. Assign various .uid/.gid values to them
|
||||
items = [(filename_1, 99, 98, tarfile.REGTYPE, fobj),
|
||||
(dirname_1, 77, 76, tarfile.DIRTYPE, None),
|
||||
(filename_2, 88, 87, tarfile.REGTYPE, fobj),
|
||||
]
|
||||
with tarfile.open(tmpname, 'w') as tarfl:
|
||||
for name, uid, gid, typ, contents in items:
|
||||
t = tarfile.TarInfo(name)
|
||||
t.uid = uid
|
||||
t.gid = gid
|
||||
t.uname = 'root'
|
||||
t.gname = 'root'
|
||||
t.type = typ
|
||||
tarfl.addfile(t, contents)
|
||||
|
||||
# return the full pathname to the tar file
|
||||
return tmpname
|
||||
|
||||
@staticmethod
|
||||
@contextmanager
|
||||
def _setup_test(mock_geteuid):
|
||||
mock_geteuid.return_value = 0 # lie and say we're root
|
||||
fname = 'numeric-owner-testfile'
|
||||
dirname = 'dir'
|
||||
|
||||
# the names we want stored in the tarfile
|
||||
filename_1 = fname
|
||||
dirname_1 = dirname
|
||||
filename_2 = os.path.join(dirname, fname)
|
||||
|
||||
# create the tarfile with the contents we're after
|
||||
tar_filename = NumericOwnerTest._make_test_archive(filename_1,
|
||||
dirname_1,
|
||||
filename_2)
|
||||
|
||||
# open the tarfile for reading. yield it and the names of the items
|
||||
# we stored into the file
|
||||
with tarfile.open(tar_filename) as tarfl:
|
||||
yield tarfl, filename_1, dirname_1, filename_2
|
||||
|
||||
@unittest.mock.patch('os.chown')
|
||||
@unittest.mock.patch('os.chmod')
|
||||
@unittest.mock.patch('os.geteuid')
|
||||
def test_extract_with_numeric_owner(self, mock_geteuid, mock_chmod,
|
||||
mock_chown):
|
||||
with self._setup_test(mock_geteuid) as (tarfl, filename_1, _,
|
||||
filename_2):
|
||||
tarfl.extract(filename_1, TEMPDIR, numeric_owner=True)
|
||||
tarfl.extract(filename_2 , TEMPDIR, numeric_owner=True)
|
||||
|
||||
# convert to filesystem paths
|
||||
f_filename_1 = os.path.join(TEMPDIR, filename_1)
|
||||
f_filename_2 = os.path.join(TEMPDIR, filename_2)
|
||||
|
||||
mock_chown.assert_has_calls([unittest.mock.call(f_filename_1, 99, 98),
|
||||
unittest.mock.call(f_filename_2, 88, 87),
|
||||
],
|
||||
any_order=True)
|
||||
|
||||
@unittest.mock.patch('os.chown')
|
||||
@unittest.mock.patch('os.chmod')
|
||||
@unittest.mock.patch('os.geteuid')
|
||||
def test_extractall_with_numeric_owner(self, mock_geteuid, mock_chmod,
|
||||
mock_chown):
|
||||
with self._setup_test(mock_geteuid) as (tarfl, filename_1, dirname_1,
|
||||
filename_2):
|
||||
tarfl.extractall(TEMPDIR, numeric_owner=True)
|
||||
|
||||
# convert to filesystem paths
|
||||
f_filename_1 = os.path.join(TEMPDIR, filename_1)
|
||||
f_dirname_1 = os.path.join(TEMPDIR, dirname_1)
|
||||
f_filename_2 = os.path.join(TEMPDIR, filename_2)
|
||||
|
||||
mock_chown.assert_has_calls([unittest.mock.call(f_filename_1, 99, 98),
|
||||
unittest.mock.call(f_dirname_1, 77, 76),
|
||||
unittest.mock.call(f_filename_2, 88, 87),
|
||||
],
|
||||
any_order=True)
|
||||
|
||||
# this test requires that uid=0 and gid=0 really be named 'root'. that's
|
||||
# because the uname and gname in the test file are 'root', and extract()
|
||||
# will look them up using pwd and grp to find their uid and gid, which we
|
||||
# test here to be 0.
|
||||
@unittest.skipUnless(root_is_uid_gid_0(),
|
||||
'uid=0,gid=0 must be named "root"')
|
||||
@unittest.mock.patch('os.chown')
|
||||
@unittest.mock.patch('os.chmod')
|
||||
@unittest.mock.patch('os.geteuid')
|
||||
def test_extract_without_numeric_owner(self, mock_geteuid, mock_chmod,
|
||||
mock_chown):
|
||||
with self._setup_test(mock_geteuid) as (tarfl, filename_1, _, _):
|
||||
tarfl.extract(filename_1, TEMPDIR, numeric_owner=False)
|
||||
|
||||
# convert to filesystem paths
|
||||
f_filename_1 = os.path.join(TEMPDIR, filename_1)
|
||||
|
||||
mock_chown.assert_called_with(f_filename_1, 0, 0)
|
||||
|
||||
@unittest.mock.patch('os.geteuid')
|
||||
def test_keyword_only(self, mock_geteuid):
|
||||
with self._setup_test(mock_geteuid) as (tarfl, filename_1, _, _):
|
||||
self.assertRaises(TypeError,
|
||||
tarfl.extract, filename_1, TEMPDIR, False, True)
|
||||
|
||||
|
||||
def setUpModule():
|
||||
support.unlink(TEMPDIR)
|
||||
os.makedirs(TEMPDIR)
|
||||
|
|
|
@ -1458,6 +1458,7 @@ Norman Vine
|
|||
Pauli Virtanen
|
||||
Frank Visser
|
||||
Johannes Vogel
|
||||
Michael Vogt
|
||||
Radu Voicilas
|
||||
Alex Volkov
|
||||
Martijn Vries
|
||||
|
|
|
@ -32,6 +32,10 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #23193: Add a numeric_owner parameter to
|
||||
tarfile.TarFile.extract and tarfile.TarFile.extractall. Patch by
|
||||
Michael Vogt and Eric Smith.
|
||||
|
||||
- Issue #23342: Add a subprocess.run() function than returns a CalledProcess
|
||||
instance for a more consistent API than the existing call* functions.
|
||||
|
||||
|
|
Loading…
Reference in New Issue