mirror of https://github.com/python/cpython
gh-86094: Add support for Unicode Path Extra Field in ZipFile (gh-102566)
This commit is contained in:
parent
a28d4edb23
commit
8f70b16e33
|
@ -1616,6 +1616,33 @@ class OtherTests(unittest.TestCase):
|
|||
self.assertEqual(zf.filelist[0].filename, "foo.txt")
|
||||
self.assertEqual(zf.filelist[1].filename, "\xf6.txt")
|
||||
|
||||
@requires_zlib()
|
||||
def test_read_zipfile_containing_unicode_path_extra_field(self):
|
||||
with zipfile.ZipFile(TESTFN, mode='w') as zf:
|
||||
# create a file with a non-ASCII name
|
||||
filename = '이름.txt'
|
||||
filename_encoded = filename.encode('utf-8')
|
||||
|
||||
# create a ZipInfo object with Unicode path extra field
|
||||
zip_info = zipfile.ZipInfo(filename)
|
||||
|
||||
tag_for_unicode_path = b'\x75\x70'
|
||||
version_of_unicode_path = b'\x01'
|
||||
|
||||
import zlib
|
||||
filename_crc = struct.pack('<L', zlib.crc32(filename_encoded))
|
||||
|
||||
extra_data = version_of_unicode_path + filename_crc + filename_encoded
|
||||
tsize = len(extra_data).to_bytes(2, 'little')
|
||||
|
||||
zip_info.extra = tag_for_unicode_path + tsize + extra_data
|
||||
|
||||
# add the file to the ZIP archive
|
||||
zf.writestr(zip_info, b'Hello World!')
|
||||
|
||||
with zipfile.ZipFile(TESTFN, "r") as zf:
|
||||
self.assertEqual(zf.filelist[0].filename, "이름.txt")
|
||||
|
||||
def test_read_after_write_unicode_filenames(self):
|
||||
with zipfile.ZipFile(TESTFN2, 'w') as zipfp:
|
||||
zipfp.writestr('приклад', b'sample')
|
||||
|
|
|
@ -338,6 +338,22 @@ def _EndRecData(fpin):
|
|||
# Unable to find a valid end of central directory structure
|
||||
return None
|
||||
|
||||
def _sanitize_filename(filename):
|
||||
"""Terminate the file name at the first null byte and
|
||||
ensure paths always use forward slashes as the directory separator."""
|
||||
|
||||
# Terminate the file name at the first null byte. Null bytes in file
|
||||
# names are used as tricks by viruses in archives.
|
||||
null_byte = filename.find(chr(0))
|
||||
if null_byte >= 0:
|
||||
filename = filename[0:null_byte]
|
||||
# This is used to ensure paths in generated ZIP files always use
|
||||
# forward slashes as the directory separator, as required by the
|
||||
# ZIP format specification.
|
||||
if os.sep != "/" and os.sep in filename:
|
||||
filename = filename.replace(os.sep, "/")
|
||||
return filename
|
||||
|
||||
|
||||
class ZipInfo (object):
|
||||
"""Class with attributes describing each file in the ZIP archive."""
|
||||
|
@ -368,16 +384,9 @@ class ZipInfo (object):
|
|||
def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
|
||||
self.orig_filename = filename # Original file name in archive
|
||||
|
||||
# Terminate the file name at the first null byte. Null bytes in file
|
||||
# names are used as tricks by viruses in archives.
|
||||
null_byte = filename.find(chr(0))
|
||||
if null_byte >= 0:
|
||||
filename = filename[0:null_byte]
|
||||
# This is used to ensure paths in generated ZIP files always use
|
||||
# forward slashes as the directory separator, as required by the
|
||||
# ZIP format specification.
|
||||
if os.sep != "/" and os.sep in filename:
|
||||
filename = filename.replace(os.sep, "/")
|
||||
# Terminate the file name at the first null byte and
|
||||
# ensure paths always use forward slashes as the directory separator.
|
||||
filename = _sanitize_filename(filename)
|
||||
|
||||
self.filename = filename # Normalized file name
|
||||
self.date_time = date_time # year, month, day, hour, min, sec
|
||||
|
@ -482,7 +491,7 @@ class ZipInfo (object):
|
|||
except UnicodeEncodeError:
|
||||
return self.filename.encode('utf-8'), self.flag_bits | _MASK_UTF_FILENAME
|
||||
|
||||
def _decodeExtra(self):
|
||||
def _decodeExtra(self, filename_crc):
|
||||
# Try to decode the extra field.
|
||||
extra = self.extra
|
||||
unpack = struct.unpack
|
||||
|
@ -508,6 +517,21 @@ class ZipInfo (object):
|
|||
except struct.error:
|
||||
raise BadZipFile(f"Corrupt zip64 extra field. "
|
||||
f"{field} not found.") from None
|
||||
elif tp == 0x7075:
|
||||
data = extra[4:ln+4]
|
||||
# Unicode Path Extra Field
|
||||
try:
|
||||
up_version, up_name_crc = unpack('<BL', data[:5])
|
||||
if up_version == 1 and up_name_crc == filename_crc:
|
||||
up_unicode_name = data[5:].decode('utf-8')
|
||||
if up_unicode_name:
|
||||
self.filename = _sanitize_filename(up_unicode_name)
|
||||
else:
|
||||
warnings.warn("Empty unicode path extra field (0x7075)", stacklevel=2)
|
||||
except struct.error as e:
|
||||
raise BadZipFile("Corrupt unicode path extra field (0x7075)") from e
|
||||
except UnicodeDecodeError as e:
|
||||
raise BadZipFile('Corrupt unicode path extra field (0x7075): invalid utf-8 bytes') from e
|
||||
|
||||
extra = extra[ln+4:]
|
||||
|
||||
|
@ -1409,6 +1433,7 @@ class ZipFile:
|
|||
if self.debug > 2:
|
||||
print(centdir)
|
||||
filename = fp.read(centdir[_CD_FILENAME_LENGTH])
|
||||
orig_filename_crc = crc32(filename)
|
||||
flags = centdir[_CD_FLAG_BITS]
|
||||
if flags & _MASK_UTF_FILENAME:
|
||||
# UTF-8 file names extension
|
||||
|
@ -1432,8 +1457,7 @@ class ZipFile:
|
|||
x._raw_time = t
|
||||
x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
|
||||
t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
|
||||
|
||||
x._decodeExtra()
|
||||
x._decodeExtra(orig_filename_crc)
|
||||
x.header_offset = x.header_offset + concat
|
||||
self.filelist.append(x)
|
||||
self.NameToInfo[x.filename] = x
|
||||
|
|
|
@ -627,6 +627,7 @@ Julian Gindi
|
|||
Yannick Gingras
|
||||
Neil Girdhar
|
||||
Matt Giuca
|
||||
Andrea Giudiceandrea
|
||||
Franz Glasner
|
||||
Wim Glenn
|
||||
Michael Goderbauer
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Add support for Unicode Path Extra Field in ZipFile. Patch by Yeojin Kim
|
||||
and Andrea Giudiceandrea
|
Loading…
Reference in New Issue