bpo-18819: tarfile: only set device fields for device files (GH-18080)

The GNU docs describe the `devmajor` and `devminor` fields of the tar
header struct only in the context of character and block special files,
suggesting that in other cases they are not populated. Typical utilities
behave accordingly; this patch teaches `tarfile` to do the same.
This commit is contained in:
William Chargin 2020-02-12 11:56:02 -08:00 committed by GitHub
parent 4fac7ed43e
commit 674935b8ca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 60 additions and 2 deletions

View File

@ -930,6 +930,14 @@ class TarInfo(object):
"""Return a header block. info is a dictionary with file """Return a header block. info is a dictionary with file
information, format must be one of the *_FORMAT constants. information, format must be one of the *_FORMAT constants.
""" """
has_device_fields = info.get("type") in (CHRTYPE, BLKTYPE)
if has_device_fields:
devmajor = itn(info.get("devmajor", 0), 8, format)
devminor = itn(info.get("devminor", 0), 8, format)
else:
devmajor = stn("", 8, encoding, errors)
devminor = stn("", 8, encoding, errors)
parts = [ parts = [
stn(info.get("name", ""), 100, encoding, errors), stn(info.get("name", ""), 100, encoding, errors),
itn(info.get("mode", 0) & 0o7777, 8, format), itn(info.get("mode", 0) & 0o7777, 8, format),
@ -943,8 +951,8 @@ class TarInfo(object):
info.get("magic", POSIX_MAGIC), info.get("magic", POSIX_MAGIC),
stn(info.get("uname", ""), 32, encoding, errors), stn(info.get("uname", ""), 32, encoding, errors),
stn(info.get("gname", ""), 32, encoding, errors), stn(info.get("gname", ""), 32, encoding, errors),
itn(info.get("devmajor", 0), 8, format), devmajor,
itn(info.get("devminor", 0), 8, format), devminor,
stn(info.get("prefix", ""), 155, encoding, errors) stn(info.get("prefix", ""), 155, encoding, errors)
] ]

View File

@ -1549,6 +1549,52 @@ class GNUWriteTest(unittest.TestCase):
("longlnk/" * 127) + "longlink_") ("longlnk/" * 127) + "longlink_")
class DeviceHeaderTest(WriteTestBase, unittest.TestCase):
prefix = "w:"
def test_headers_written_only_for_device_files(self):
# Regression test for bpo-18819.
tempdir = os.path.join(TEMPDIR, "device_header_test")
os.mkdir(tempdir)
try:
tar = tarfile.open(tmpname, self.mode)
try:
input_blk = tarfile.TarInfo(name="my_block_device")
input_reg = tarfile.TarInfo(name="my_regular_file")
input_blk.type = tarfile.BLKTYPE
input_reg.type = tarfile.REGTYPE
tar.addfile(input_blk)
tar.addfile(input_reg)
finally:
tar.close()
# devmajor and devminor should be *interpreted* as 0 in both...
tar = tarfile.open(tmpname, "r")
try:
output_blk = tar.getmember("my_block_device")
output_reg = tar.getmember("my_regular_file")
finally:
tar.close()
self.assertEqual(output_blk.devmajor, 0)
self.assertEqual(output_blk.devminor, 0)
self.assertEqual(output_reg.devmajor, 0)
self.assertEqual(output_reg.devminor, 0)
# ...but the fields should not actually be set on regular files:
with open(tmpname, "rb") as infile:
buf = infile.read()
buf_blk = buf[output_blk.offset:output_blk.offset_data]
buf_reg = buf[output_reg.offset:output_reg.offset_data]
# See `struct posixheader` in GNU docs for byte offsets:
# <https://www.gnu.org/software/tar/manual/html_node/Standard.html>
device_headers = slice(329, 329 + 16)
self.assertEqual(buf_blk[device_headers], b"0000000\0" * 2)
self.assertEqual(buf_reg[device_headers], b"\0" * 16)
finally:
support.rmtree(tempdir)
class CreateTest(WriteTestBase, unittest.TestCase): class CreateTest(WriteTestBase, unittest.TestCase):
prefix = "x:" prefix = "x:"

View File

@ -286,6 +286,7 @@ Brad Chapman
Greg Chapman Greg Chapman
Mitch Chapman Mitch Chapman
Matt Chaput Matt Chaput
William Chargin
Yogesh Chaudhari Yogesh Chaudhari
David Chaum David Chaum
Nicolas Chauvat Nicolas Chauvat

View File

@ -0,0 +1,3 @@
Omit ``devmajor`` and ``devminor`` fields for non-device files in
:mod:`tarfile` archives, enabling bit-for-bit compatibility with GNU
``tar(1)``.