mirror of https://github.com/python/cpython
gh-121999: Change default tarfile filter to 'data' (GH-122002)
Co-authored-by: Tomas R <tomas.roun8@gmail.com> Co-authored-by: Scott Odle <scott@sjodle.com> Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Petr Viktorin <encukou@gmail.com>
This commit is contained in:
parent
bc94cf7e25
commit
dcafb362f7
|
@ -706,11 +706,9 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules.
|
||||||
|
|
||||||
The keyword-only *filter* argument is passed to the underlying unpacking
|
The keyword-only *filter* argument is passed to the underlying unpacking
|
||||||
function. For zip files, *filter* is not accepted.
|
function. For zip files, *filter* is not accepted.
|
||||||
For tar files, it is recommended to set it to ``'data'``,
|
For tar files, it is recommended to use ``'data'`` (default since Python
|
||||||
unless using features specific to tar and UNIX-like filesystems.
|
3.14), unless using features specific to tar and UNIX-like filesystems.
|
||||||
(See :ref:`tarfile-extraction-filter` for details.)
|
(See :ref:`tarfile-extraction-filter` for details.)
|
||||||
The ``'data'`` filter will become the default for tar files
|
|
||||||
in Python 3.14.
|
|
||||||
|
|
||||||
.. audit-event:: shutil.unpack_archive filename,extract_dir,format shutil.unpack_archive
|
.. audit-event:: shutil.unpack_archive filename,extract_dir,format shutil.unpack_archive
|
||||||
|
|
||||||
|
@ -721,6 +719,12 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules.
|
||||||
the *extract_dir* argument, e.g. members that have absolute filenames
|
the *extract_dir* argument, e.g. members that have absolute filenames
|
||||||
starting with "/" or filenames with two dots "..".
|
starting with "/" or filenames with two dots "..".
|
||||||
|
|
||||||
|
Since Python 3.14, the defaults for both built-in formats (zip and tar
|
||||||
|
files) will prevent the most dangerous of such security issues,
|
||||||
|
but will not prevent *all* unintended behavior.
|
||||||
|
Read the :ref:`tarfile-further-verification`
|
||||||
|
section for tar-specific details.
|
||||||
|
|
||||||
.. versionchanged:: 3.7
|
.. versionchanged:: 3.7
|
||||||
Accepts a :term:`path-like object` for *filename* and *extract_dir*.
|
Accepts a :term:`path-like object` for *filename* and *extract_dir*.
|
||||||
|
|
||||||
|
|
|
@ -40,9 +40,12 @@ Some facts and figures:
|
||||||
Archives are extracted using a :ref:`filter <tarfile-extraction-filter>`,
|
Archives are extracted using a :ref:`filter <tarfile-extraction-filter>`,
|
||||||
which makes it possible to either limit surprising/dangerous features,
|
which makes it possible to either limit surprising/dangerous features,
|
||||||
or to acknowledge that they are expected and the archive is fully trusted.
|
or to acknowledge that they are expected and the archive is fully trusted.
|
||||||
By default, archives are fully trusted, but this default is deprecated
|
|
||||||
and slated to change in Python 3.14.
|
|
||||||
|
|
||||||
|
.. versionchanged:: 3.14
|
||||||
|
Set the default extraction filter to :func:`data <data_filter>`,
|
||||||
|
which disallows some dangerous features such as links to absolute paths
|
||||||
|
or paths outside of the destination. Previously, the filter strategy
|
||||||
|
was equivalent to :func:`fully_trusted <fully_trusted_filter>`.
|
||||||
|
|
||||||
.. function:: open(name=None, mode='r', fileobj=None, bufsize=10240, **kwargs)
|
.. function:: open(name=None, mode='r', fileobj=None, bufsize=10240, **kwargs)
|
||||||
|
|
||||||
|
@ -495,18 +498,18 @@ be finalized; only the internally used file object will be closed. See the
|
||||||
The *filter* argument specifies how ``members`` are modified or rejected
|
The *filter* argument specifies how ``members`` are modified or rejected
|
||||||
before extraction.
|
before extraction.
|
||||||
See :ref:`tarfile-extraction-filter` for details.
|
See :ref:`tarfile-extraction-filter` for details.
|
||||||
It is recommended to set this explicitly depending on which *tar* features
|
It is recommended to set this explicitly only if specific *tar* features
|
||||||
you need to support.
|
are required, or as ``filter='data'`` to support Python versions with a less
|
||||||
|
secure default (3.13 and lower).
|
||||||
|
|
||||||
.. warning::
|
.. warning::
|
||||||
|
|
||||||
Never extract archives from untrusted sources without prior inspection.
|
Never extract archives from untrusted sources without prior inspection.
|
||||||
It is possible that files are created outside of *path*, e.g. members
|
|
||||||
that have absolute filenames starting with ``"/"`` or filenames with two
|
|
||||||
dots ``".."``.
|
|
||||||
|
|
||||||
Set ``filter='data'`` to prevent the most dangerous security issues,
|
Since Python 3.14, the default (:func:`data <data_filter>`) will prevent
|
||||||
and read the :ref:`tarfile-extraction-filter` section for details.
|
the most dangerous security issues.
|
||||||
|
However, it will not prevent *all* unintended or insecure behavior.
|
||||||
|
Read the :ref:`tarfile-extraction-filter` section for details.
|
||||||
|
|
||||||
.. versionchanged:: 3.5
|
.. versionchanged:: 3.5
|
||||||
Added the *numeric_owner* parameter.
|
Added the *numeric_owner* parameter.
|
||||||
|
@ -517,6 +520,9 @@ be finalized; only the internally used file object will be closed. See the
|
||||||
.. versionchanged:: 3.12
|
.. versionchanged:: 3.12
|
||||||
Added the *filter* parameter.
|
Added the *filter* parameter.
|
||||||
|
|
||||||
|
.. versionchanged:: 3.14
|
||||||
|
The *filter* parameter now defaults to ``'data'``.
|
||||||
|
|
||||||
|
|
||||||
.. method:: TarFile.extract(member, path="", set_attrs=True, *, numeric_owner=False, filter=None)
|
.. method:: TarFile.extract(member, path="", set_attrs=True, *, numeric_owner=False, filter=None)
|
||||||
|
|
||||||
|
@ -536,10 +542,8 @@ be finalized; only the internally used file object will be closed. See the
|
||||||
|
|
||||||
.. warning::
|
.. warning::
|
||||||
|
|
||||||
See the warning for :meth:`extractall`.
|
Never extract archives from untrusted sources without prior inspection.
|
||||||
|
See the warning for :meth:`extractall` for details.
|
||||||
Set ``filter='data'`` to prevent the most dangerous security issues,
|
|
||||||
and read the :ref:`tarfile-extraction-filter` section for details.
|
|
||||||
|
|
||||||
.. versionchanged:: 3.2
|
.. versionchanged:: 3.2
|
||||||
Added the *set_attrs* parameter.
|
Added the *set_attrs* parameter.
|
||||||
|
@ -602,14 +606,8 @@ be finalized; only the internally used file object will be closed. See the
|
||||||
String names are not allowed for this attribute, unlike the *filter*
|
String names are not allowed for this attribute, unlike the *filter*
|
||||||
argument to :meth:`~TarFile.extract`.
|
argument to :meth:`~TarFile.extract`.
|
||||||
|
|
||||||
If ``extraction_filter`` is ``None`` (the default),
|
If ``extraction_filter`` is ``None`` (the default), extraction methods
|
||||||
calling an extraction method without a *filter* argument will raise a
|
will use the :func:`data <data_filter>` filter by default.
|
||||||
``DeprecationWarning``,
|
|
||||||
and fall back to the :func:`fully_trusted <fully_trusted_filter>` filter,
|
|
||||||
whose dangerous behavior matches previous versions of Python.
|
|
||||||
|
|
||||||
In Python 3.14+, leaving ``extraction_filter=None`` will cause
|
|
||||||
extraction methods to use the :func:`data <data_filter>` filter by default.
|
|
||||||
|
|
||||||
The attribute may be set on instances or overridden in subclasses.
|
The attribute may be set on instances or overridden in subclasses.
|
||||||
It also is possible to set it on the ``TarFile`` class itself to set a
|
It also is possible to set it on the ``TarFile`` class itself to set a
|
||||||
|
@ -619,6 +617,14 @@ be finalized; only the internally used file object will be closed. See the
|
||||||
To set a global default this way, a filter function needs to be wrapped in
|
To set a global default this way, a filter function needs to be wrapped in
|
||||||
:func:`staticmethod()` to prevent injection of a ``self`` argument.
|
:func:`staticmethod()` to prevent injection of a ``self`` argument.
|
||||||
|
|
||||||
|
.. versionchanged:: 3.14
|
||||||
|
|
||||||
|
The default filter is set to :func:`data <data_filter>`,
|
||||||
|
which disallows some dangerous features such as links to absolute paths
|
||||||
|
or paths outside of the destination.
|
||||||
|
Previously, the default was equivalent to
|
||||||
|
:func:`fully_trusted <fully_trusted_filter>`.
|
||||||
|
|
||||||
.. method:: TarFile.add(name, arcname=None, recursive=True, *, filter=None)
|
.. method:: TarFile.add(name, arcname=None, recursive=True, *, filter=None)
|
||||||
|
|
||||||
Add the file *name* to the archive. *name* may be any type of file
|
Add the file *name* to the archive. *name* may be any type of file
|
||||||
|
@ -969,6 +975,12 @@ In most cases, the full functionality is not needed.
|
||||||
Therefore, *tarfile* supports extraction filters: a mechanism to limit
|
Therefore, *tarfile* supports extraction filters: a mechanism to limit
|
||||||
functionality, and thus mitigate some of the security issues.
|
functionality, and thus mitigate some of the security issues.
|
||||||
|
|
||||||
|
.. warning::
|
||||||
|
|
||||||
|
None of the available filters blocks *all* dangerous archive features.
|
||||||
|
Never extract archives from untrusted sources without prior inspection.
|
||||||
|
See also :ref:`tarfile-further-verification`.
|
||||||
|
|
||||||
.. seealso::
|
.. seealso::
|
||||||
|
|
||||||
:pep:`706`
|
:pep:`706`
|
||||||
|
@ -992,12 +1004,13 @@ can be:
|
||||||
|
|
||||||
* ``None`` (default): Use :attr:`TarFile.extraction_filter`.
|
* ``None`` (default): Use :attr:`TarFile.extraction_filter`.
|
||||||
|
|
||||||
If that is also ``None`` (the default), raise a ``DeprecationWarning``,
|
If that is also ``None`` (the default), the ``'data'`` filter will be used.
|
||||||
and fall back to the ``'fully_trusted'`` filter, whose dangerous behavior
|
|
||||||
matches previous versions of Python.
|
|
||||||
|
|
||||||
In Python 3.14, the ``'data'`` filter will become the default instead.
|
.. versionchanged:: 3.14
|
||||||
It's possible to switch earlier; see :attr:`TarFile.extraction_filter`.
|
|
||||||
|
The default filter is set to :func:`data <data_filter>`.
|
||||||
|
Previously, the default was equivalent to
|
||||||
|
:func:`fully_trusted <fully_trusted_filter>`.
|
||||||
|
|
||||||
* A callable which will be called for each extracted member with a
|
* A callable which will be called for each extracted member with a
|
||||||
:ref:`TarInfo <tarinfo-objects>` describing the member and the destination
|
:ref:`TarInfo <tarinfo-objects>` describing the member and the destination
|
||||||
|
@ -1080,6 +1093,9 @@ reused in custom filters:
|
||||||
|
|
||||||
Return the modified ``TarInfo`` member.
|
Return the modified ``TarInfo`` member.
|
||||||
|
|
||||||
|
Note that this filter does not block *all* dangerous archive features.
|
||||||
|
See :ref:`tarfile-further-verification` for details.
|
||||||
|
|
||||||
|
|
||||||
.. _tarfile-extraction-refuse:
|
.. _tarfile-extraction-refuse:
|
||||||
|
|
||||||
|
@ -1093,6 +1109,8 @@ With ``errorlevel=0`` the error will be logged and the member will be skipped,
|
||||||
but extraction will continue.
|
but extraction will continue.
|
||||||
|
|
||||||
|
|
||||||
|
.. _tarfile-further-verification:
|
||||||
|
|
||||||
Hints for further verification
|
Hints for further verification
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
@ -1110,9 +1128,10 @@ Here is an incomplete list of things to consider:
|
||||||
disk, memory and CPU usage.
|
disk, memory and CPU usage.
|
||||||
* Check filenames against an allow-list of characters
|
* Check filenames against an allow-list of characters
|
||||||
(to filter out control characters, confusables, foreign path separators,
|
(to filter out control characters, confusables, foreign path separators,
|
||||||
etc.).
|
and so on).
|
||||||
* Check that filenames have expected extensions (discouraging files that
|
* Check that filenames have expected extensions (discouraging files that
|
||||||
execute when you “click on them”, or extension-less files like Windows special device names).
|
execute when you “click on them”, or extension-less files like Windows
|
||||||
|
special device names).
|
||||||
* Limit the number of extracted files, total size of extracted data,
|
* Limit the number of extracted files, total size of extracted data,
|
||||||
filename length (including symlink length), and size of individual files.
|
filename length (including symlink length), and size of individual files.
|
||||||
* Check for files that would be shadowed on case-insensitive filesystems.
|
* Check for files that would be shadowed on case-insensitive filesystems.
|
||||||
|
|
|
@ -2248,13 +2248,7 @@ class TarFile(object):
|
||||||
if filter is None:
|
if filter is None:
|
||||||
filter = self.extraction_filter
|
filter = self.extraction_filter
|
||||||
if filter is None:
|
if filter is None:
|
||||||
import warnings
|
return data_filter
|
||||||
warnings.warn(
|
|
||||||
'Python 3.14 will, by default, filter extracted tar '
|
|
||||||
+ 'archives and reject files or modify their metadata. '
|
|
||||||
+ 'Use the filter argument to control this behavior.',
|
|
||||||
DeprecationWarning, stacklevel=3)
|
|
||||||
return fully_trusted_filter
|
|
||||||
if isinstance(filter, str):
|
if isinstance(filter, str):
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
'String names are not supported for '
|
'String names are not supported for '
|
||||||
|
|
|
@ -2145,9 +2145,6 @@ class TestArchives(BaseTest, unittest.TestCase):
|
||||||
def check_unpack_tarball(self, format):
|
def check_unpack_tarball(self, format):
|
||||||
self.check_unpack_archive(format, filter='fully_trusted')
|
self.check_unpack_archive(format, filter='fully_trusted')
|
||||||
self.check_unpack_archive(format, filter='data')
|
self.check_unpack_archive(format, filter='data')
|
||||||
with warnings_helper.check_warnings(
|
|
||||||
('Python 3.14', DeprecationWarning)):
|
|
||||||
self.check_unpack_archive(format)
|
|
||||||
|
|
||||||
def test_unpack_archive_tar(self):
|
def test_unpack_archive_tar(self):
|
||||||
self.check_unpack_tarball('tar')
|
self.check_unpack_tarball('tar')
|
||||||
|
|
|
@ -722,6 +722,24 @@ class MiscReadTestBase(CommonReadTest):
|
||||||
tar.close()
|
tar.close()
|
||||||
os_helper.rmtree(DIR)
|
os_helper.rmtree(DIR)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def test_extractall_default_filter():
|
||||||
|
# Test that the default filter is now "data", and the other filter types are not used.
|
||||||
|
DIR = pathlib.Path(TEMPDIR) / "extractall_default_filter"
|
||||||
|
with (
|
||||||
|
os_helper.temp_dir(DIR),
|
||||||
|
tarfile.open(tarname, encoding="iso8859-1") as tar,
|
||||||
|
unittest.mock.patch("tarfile.data_filter", wraps=tarfile.data_filter) as mock_data_filter,
|
||||||
|
unittest.mock.patch("tarfile.tar_filter", wraps=tarfile.tar_filter) as mock_tar_filter,
|
||||||
|
unittest.mock.patch("tarfile.fully_trusted_filter", wraps=tarfile.fully_trusted_filter) as mock_ft_filter
|
||||||
|
):
|
||||||
|
directories = [t for t in tar if t.isdir()]
|
||||||
|
tar.extractall(DIR, directories)
|
||||||
|
|
||||||
|
mock_data_filter.assert_called()
|
||||||
|
mock_ft_filter.assert_not_called()
|
||||||
|
mock_tar_filter.assert_not_called()
|
||||||
|
|
||||||
@os_helper.skip_unless_working_chmod
|
@os_helper.skip_unless_working_chmod
|
||||||
def test_extract_directory(self):
|
def test_extract_directory(self):
|
||||||
dirtype = "ustar/dirtype"
|
dirtype = "ustar/dirtype"
|
||||||
|
@ -738,31 +756,6 @@ class MiscReadTestBase(CommonReadTest):
|
||||||
finally:
|
finally:
|
||||||
os_helper.rmtree(DIR)
|
os_helper.rmtree(DIR)
|
||||||
|
|
||||||
def test_deprecation_if_no_filter_passed_to_extractall(self):
|
|
||||||
DIR = pathlib.Path(TEMPDIR) / "extractall"
|
|
||||||
with (
|
|
||||||
os_helper.temp_dir(DIR),
|
|
||||||
tarfile.open(tarname, encoding="iso8859-1") as tar
|
|
||||||
):
|
|
||||||
directories = [t for t in tar if t.isdir()]
|
|
||||||
with self.assertWarnsRegex(DeprecationWarning, "Use the filter argument") as cm:
|
|
||||||
tar.extractall(DIR, directories)
|
|
||||||
# check that the stacklevel of the deprecation warning is correct:
|
|
||||||
self.assertEqual(cm.filename, __file__)
|
|
||||||
|
|
||||||
def test_deprecation_if_no_filter_passed_to_extract(self):
|
|
||||||
dirtype = "ustar/dirtype"
|
|
||||||
DIR = pathlib.Path(TEMPDIR) / "extractall"
|
|
||||||
with (
|
|
||||||
os_helper.temp_dir(DIR),
|
|
||||||
tarfile.open(tarname, encoding="iso8859-1") as tar
|
|
||||||
):
|
|
||||||
tarinfo = tar.getmember(dirtype)
|
|
||||||
with self.assertWarnsRegex(DeprecationWarning, "Use the filter argument") as cm:
|
|
||||||
tar.extract(tarinfo, path=DIR)
|
|
||||||
# check that the stacklevel of the deprecation warning is correct:
|
|
||||||
self.assertEqual(cm.filename, __file__)
|
|
||||||
|
|
||||||
def test_extractall_pathlike_dir(self):
|
def test_extractall_pathlike_dir(self):
|
||||||
DIR = os.path.join(TEMPDIR, "extractall")
|
DIR = os.path.join(TEMPDIR, "extractall")
|
||||||
with os_helper.temp_dir(DIR), \
|
with os_helper.temp_dir(DIR), \
|
||||||
|
@ -4011,15 +4004,6 @@ class TestExtractionFilters(unittest.TestCase):
|
||||||
self.assertIs(filtered.name, tarinfo.name)
|
self.assertIs(filtered.name, tarinfo.name)
|
||||||
self.assertIs(filtered.type, tarinfo.type)
|
self.assertIs(filtered.type, tarinfo.type)
|
||||||
|
|
||||||
def test_default_filter_warns(self):
|
|
||||||
"""Ensure the default filter warns"""
|
|
||||||
with ArchiveMaker() as arc:
|
|
||||||
arc.add('foo')
|
|
||||||
with warnings_helper.check_warnings(
|
|
||||||
('Python 3.14', DeprecationWarning)):
|
|
||||||
with self.check_context(arc.open(), None):
|
|
||||||
self.expect_file('foo')
|
|
||||||
|
|
||||||
def test_change_default_filter_on_instance(self):
|
def test_change_default_filter_on_instance(self):
|
||||||
tar = tarfile.TarFile(tarname, 'r')
|
tar = tarfile.TarFile(tarname, 'r')
|
||||||
def strict_filter(tarinfo, path):
|
def strict_filter(tarinfo, path):
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
The default extraction filter for the :mod:`tarfile` module is now
|
||||||
|
set to :func:`'data' <tarfile.data_filter>`.
|
Loading…
Reference in New Issue