Issue #8390: tarfile uses surrogateespace as the default error handler
(instead of replace in read mode or strict in write mode)
This commit is contained in:
parent
aac786e586
commit
de629d46f2
|
@ -218,7 +218,7 @@ be finalized; only the internally used file object will be closed. See the
|
||||||
.. versionadded:: 3.2
|
.. versionadded:: 3.2
|
||||||
Added support for the context manager protocol.
|
Added support for the context manager protocol.
|
||||||
|
|
||||||
.. class:: TarFile(name=None, mode='r', fileobj=None, format=DEFAULT_FORMAT, tarinfo=TarInfo, dereference=False, ignore_zeros=False, encoding=ENCODING, errors=None, pax_headers=None, debug=0, errorlevel=0)
|
.. class:: TarFile(name=None, mode='r', fileobj=None, format=DEFAULT_FORMAT, tarinfo=TarInfo, dereference=False, ignore_zeros=False, encoding=ENCODING, errors='surrogateescape', pax_headers=None, debug=0, errorlevel=0)
|
||||||
|
|
||||||
All following arguments are optional and can be accessed as instance attributes
|
All following arguments are optional and can be accessed as instance attributes
|
||||||
as well.
|
as well.
|
||||||
|
@ -267,6 +267,9 @@ be finalized; only the internally used file object will be closed. See the
|
||||||
to be handled. The default settings will work for most users.
|
to be handled. The default settings will work for most users.
|
||||||
See section :ref:`tar-unicode` for in-depth information.
|
See section :ref:`tar-unicode` for in-depth information.
|
||||||
|
|
||||||
|
.. versionchanged:: 3.2
|
||||||
|
Use ``'surrogateescape'`` as the default for the *errors* argument.
|
||||||
|
|
||||||
The *pax_headers* argument is an optional dictionary of strings which
|
The *pax_headers* argument is an optional dictionary of strings which
|
||||||
will be added as a pax global header if *format* is :const:`PAX_FORMAT`.
|
will be added as a pax global header if *format* is :const:`PAX_FORMAT`.
|
||||||
|
|
||||||
|
@ -449,11 +452,14 @@ It does *not* contain the file's data itself.
|
||||||
a :class:`TarInfo` object.
|
a :class:`TarInfo` object.
|
||||||
|
|
||||||
|
|
||||||
.. method:: TarInfo.tobuf(format=DEFAULT_FORMAT, encoding=ENCODING, errors='strict')
|
.. method:: TarInfo.tobuf(format=DEFAULT_FORMAT, encoding=ENCODING, errors='surrogateescape')
|
||||||
|
|
||||||
Create a string buffer from a :class:`TarInfo` object. For information on the
|
Create a string buffer from a :class:`TarInfo` object. For information on the
|
||||||
arguments see the constructor of the :class:`TarFile` class.
|
arguments see the constructor of the :class:`TarFile` class.
|
||||||
|
|
||||||
|
.. versionchanged:: 3.2
|
||||||
|
Use ``'surrogateescape'`` as the default for the *errors* argument.
|
||||||
|
|
||||||
|
|
||||||
A ``TarInfo`` object has the following public data attributes:
|
A ``TarInfo`` object has the following public data attributes:
|
||||||
|
|
||||||
|
@ -701,11 +707,10 @@ metadata must be either decoded or encoded. If *encoding* is not set
|
||||||
appropriately, this conversion may fail.
|
appropriately, this conversion may fail.
|
||||||
|
|
||||||
The *errors* argument defines how characters are treated that cannot be
|
The *errors* argument defines how characters are treated that cannot be
|
||||||
converted. Possible values are listed in section :ref:`codec-base-classes`. In
|
converted. Possible values are listed in section :ref:`codec-base-classes`.
|
||||||
read mode the default scheme is ``'replace'``. This avoids unexpected
|
The default scheme is ``'surrogateescape'`` which Python also uses for its
|
||||||
:exc:`UnicodeError` exceptions and guarantees that an archive can always be
|
file system calls, see :ref:`os-filenames`.
|
||||||
read. In write mode the default value for *errors* is ``'strict'``. This
|
|
||||||
ensures that name information is not altered unnoticed.
|
|
||||||
|
|
||||||
In case of writing :const:`PAX_FORMAT` archives, *encoding* is ignored because
|
In case of writing :const:`PAX_FORMAT` archives, *encoding* is ignored because
|
||||||
non-ASCII metadata is stored using *UTF-8*.
|
non-ASCII metadata is stored using *UTF-8*. Storing surrogate characters is not
|
||||||
|
possible and will raise a :exc:`UnicodeEncodeError`.
|
||||||
|
|
|
@ -978,7 +978,7 @@ class TarInfo(object):
|
||||||
|
|
||||||
return info
|
return info
|
||||||
|
|
||||||
def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
|
def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
|
||||||
"""Return a tar header as a string of 512 byte blocks.
|
"""Return a tar header as a string of 512 byte blocks.
|
||||||
"""
|
"""
|
||||||
info = self.get_info()
|
info = self.get_info()
|
||||||
|
@ -1490,7 +1490,7 @@ class TarFile(object):
|
||||||
|
|
||||||
def __init__(self, name=None, mode="r", fileobj=None, format=None,
|
def __init__(self, name=None, mode="r", fileobj=None, format=None,
|
||||||
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
|
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
|
||||||
errors=None, pax_headers=None, debug=None, errorlevel=None):
|
errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None):
|
||||||
"""Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
|
"""Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
|
||||||
read from an existing archive, 'a' to append data to an existing
|
read from an existing archive, 'a' to append data to an existing
|
||||||
file or 'w' to create a new file overwriting an existing one. `mode'
|
file or 'w' to create a new file overwriting an existing one. `mode'
|
||||||
|
@ -1531,13 +1531,7 @@ class TarFile(object):
|
||||||
self.ignore_zeros = ignore_zeros
|
self.ignore_zeros = ignore_zeros
|
||||||
if encoding is not None:
|
if encoding is not None:
|
||||||
self.encoding = encoding
|
self.encoding = encoding
|
||||||
|
self.errors = errors
|
||||||
if errors is not None:
|
|
||||||
self.errors = errors
|
|
||||||
elif mode == "r":
|
|
||||||
self.errors = "replace"
|
|
||||||
else:
|
|
||||||
self.errors = "strict"
|
|
||||||
|
|
||||||
if pax_headers is not None and self.format == PAX_FORMAT:
|
if pax_headers is not None and self.format == PAX_FORMAT:
|
||||||
self.pax_headers = pax_headers
|
self.pax_headers = pax_headers
|
||||||
|
|
|
@ -1118,8 +1118,8 @@ class UstarUnicodeTest(unittest.TestCase):
|
||||||
if self.format != tarfile.PAX_FORMAT:
|
if self.format != tarfile.PAX_FORMAT:
|
||||||
tar = tarfile.open(tmpname, encoding="ascii")
|
tar = tarfile.open(tmpname, encoding="ascii")
|
||||||
t = tar.getmember("foo")
|
t = tar.getmember("foo")
|
||||||
self.assertEqual(t.uname, "\ufffd\ufffd\ufffd")
|
self.assertEqual(t.uname, "\udce4\udcf6\udcfc")
|
||||||
self.assertEqual(t.gname, "\ufffd\ufffd\ufffd")
|
self.assertEqual(t.gname, "\udce4\udcf6\udcfc")
|
||||||
|
|
||||||
|
|
||||||
class GNUUnicodeTest(UstarUnicodeTest):
|
class GNUUnicodeTest(UstarUnicodeTest):
|
||||||
|
|
|
@ -348,6 +348,9 @@ C-API
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #8390: tarfile uses surrogateespace as the default error handler
|
||||||
|
(instead of replace in read mode or strict in write mode)
|
||||||
|
|
||||||
- Issue #7755: Use an unencumbered audio file for tests.
|
- Issue #7755: Use an unencumbered audio file for tests.
|
||||||
|
|
||||||
- Issue #8621: uuid.uuid4() returned the same sequence of values in the
|
- Issue #8621: uuid.uuid4() returned the same sequence of values in the
|
||||||
|
|
Loading…
Reference in New Issue