mirror of https://github.com/python/cpython
372 lines
14 KiB
TeX
372 lines
14 KiB
TeX
\section{\module{zipfile} ---
|
|
Work with ZIP archives}
|
|
|
|
\declaremodule{standard}{zipfile}
|
|
\modulesynopsis{Read and write ZIP-format archive files.}
|
|
\moduleauthor{James C. Ahlstrom}{jim@interet.com}
|
|
\sectionauthor{James C. Ahlstrom}{jim@interet.com}
|
|
% LaTeX markup by Fred L. Drake, Jr. <fdrake@acm.org>
|
|
|
|
\versionadded{1.6}
|
|
|
|
The ZIP file format is a common archive and compression standard.
|
|
This module provides tools to create, read, write, append, and list a
|
|
ZIP file. Any advanced use of this module will require an
|
|
understanding of the format, as defined in
|
|
\citetitle[http://www.pkware.com/business_and_developers/developer/appnote/]
|
|
{PKZIP Application Note}.
|
|
|
|
This module does not currently handle ZIP files which have appended
|
|
comments, or multi-disk ZIP files. It can handle ZIP files that use
|
|
the ZIP64 extensions (that is ZIP files that are more than 4 GByte in
|
|
size). It supports decryption of encrypted files in ZIP archives, but
|
|
it cannot currently create an encrypted file.
|
|
|
|
The available attributes of this module are:
|
|
|
|
\begin{excdesc}{BadZipfile}
|
|
The error raised for bad ZIP files (old name: \code{zipfile.error}).
|
|
\end{excdesc}
|
|
|
|
\begin{excdesc}{LargeZipFile}
|
|
The error raised when a ZIP file would require ZIP64 functionality but that
|
|
has not been enabled.
|
|
\end{excdesc}
|
|
|
|
\begin{classdesc*}{ZipFile}
|
|
The class for reading and writing ZIP files. See
|
|
``\citetitle{ZipFile Objects}'' (section \ref{zipfile-objects}) for
|
|
constructor details.
|
|
\end{classdesc*}
|
|
|
|
\begin{classdesc*}{PyZipFile}
|
|
Class for creating ZIP archives containing Python libraries.
|
|
\end{classdesc*}
|
|
|
|
\begin{classdesc}{ZipInfo}{\optional{filename\optional{, date_time}}}
|
|
Class used to represent information about a member of an archive.
|
|
Instances of this class are returned by the \method{getinfo()} and
|
|
\method{infolist()} methods of \class{ZipFile} objects. Most users
|
|
of the \module{zipfile} module will not need to create these, but
|
|
only use those created by this module.
|
|
\var{filename} should be the full name of the archive member, and
|
|
\var{date_time} should be a tuple containing six fields which
|
|
describe the time of the last modification to the file; the fields
|
|
are described in section \ref{zipinfo-objects}, ``ZipInfo Objects.''
|
|
\end{classdesc}
|
|
|
|
\begin{funcdesc}{is_zipfile}{filename}
|
|
Returns \code{True} if \var{filename} is a valid ZIP file based on its magic
|
|
number, otherwise returns \code{False}. This module does not currently
|
|
handle ZIP files which have appended comments.
|
|
\end{funcdesc}
|
|
|
|
\begin{datadesc}{ZIP_STORED}
|
|
The numeric constant for an uncompressed archive member.
|
|
\end{datadesc}
|
|
|
|
\begin{datadesc}{ZIP_DEFLATED}
|
|
The numeric constant for the usual ZIP compression method. This
|
|
requires the zlib module. No other compression methods are
|
|
currently supported.
|
|
\end{datadesc}
|
|
|
|
|
|
\begin{seealso}
|
|
\seetitle[http://www.pkware.com/business_and_developers/developer/appnote/]
|
|
{PKZIP Application Note}{Documentation on the ZIP file format by
|
|
Phil Katz, the creator of the format and algorithms used.}
|
|
|
|
\seetitle[http://www.info-zip.org/pub/infozip/]{Info-ZIP Home Page}{
|
|
Information about the Info-ZIP project's ZIP archive
|
|
programs and development libraries.}
|
|
\end{seealso}
|
|
|
|
|
|
\subsection{ZipFile Objects \label{zipfile-objects}}
|
|
|
|
\begin{classdesc}{ZipFile}{file\optional{, mode\optional{, compression\optional{, allowZip64}}}}
|
|
Open a ZIP file, where \var{file} can be either a path to a file
|
|
(a string) or a file-like object. The \var{mode} parameter
|
|
should be \code{'r'} to read an existing file, \code{'w'} to
|
|
truncate and write a new file, or \code{'a'} to append to an
|
|
existing file. If \var{mode} is \code{'a'} and \var{file}
|
|
refers to an existing ZIP file, then additional files are added to
|
|
it. If \var{file} does not refer to a ZIP file, then a new ZIP
|
|
archive is appended to the file. This is meant for adding a ZIP
|
|
archive to another file, such as \file{python.exe}. Using
|
|
|
|
\begin{verbatim}
|
|
cat myzip.zip >> python.exe
|
|
\end{verbatim}
|
|
|
|
also works, and at least \program{WinZip} can read such files.
|
|
If \var{mode} is \code{a} and the file does not exist at all,
|
|
it is created.
|
|
\var{compression} is the ZIP compression method to use when writing
|
|
the archive, and should be \constant{ZIP_STORED} or
|
|
\constant{ZIP_DEFLATED}; unrecognized values will cause
|
|
\exception{RuntimeError} to be raised. If \constant{ZIP_DEFLATED}
|
|
is specified but the \refmodule{zlib} module is not available,
|
|
\exception{RuntimeError} is also raised. The default is
|
|
\constant{ZIP_STORED}.
|
|
If \var{allowZip64} is \code{True} zipfile will create ZIP files that use
|
|
the ZIP64 extensions when the zipfile is larger than 2 GB. If it is
|
|
false (the default) \module{zipfile} will raise an exception when the
|
|
ZIP file would require ZIP64 extensions. ZIP64 extensions are disabled by
|
|
default because the default \program{zip} and \program{unzip} commands on
|
|
\UNIX{} (the InfoZIP utilities) don't support these extensions.
|
|
|
|
\versionchanged[If the file does not exist, it is created if the
|
|
mode is 'a']{2.6}
|
|
\end{classdesc}
|
|
|
|
\begin{methoddesc}{close}{}
|
|
Close the archive file. You must call \method{close()} before
|
|
exiting your program or essential records will not be written.
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{getinfo}{name}
|
|
Return a \class{ZipInfo} object with information about the archive
|
|
member \var{name}. Calling \method{getinfo()} for a name not currently
|
|
contained in the archive will raise a \exception{KeyError}.
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{infolist}{}
|
|
Return a list containing a \class{ZipInfo} object for each member of
|
|
the archive. The objects are in the same order as their entries in
|
|
the actual ZIP file on disk if an existing archive was opened.
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{namelist}{}
|
|
Return a list of archive members by name.
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{open}{name\optional{, mode\optional{, pwd}}}
|
|
Extract a member from the archive as a file-like object (ZipExtFile).
|
|
\var{name} is the name of the file in the archive. The \var{mode}
|
|
parameter, if included, must be one of the following: \code{'r'} (the
|
|
default), \code{'U'}, or \code{'rU'}. Choosing \code{'U'} or
|
|
\code{'rU'} will enable universal newline support in the read-only
|
|
object. \var{pwd} is the password used for encrypted files. Calling
|
|
\method{open()} on a closed ZipFile will raise a
|
|
\exception{RuntimeError}.
|
|
\begin{notice}
|
|
The file-like object is read-only and provides the following methods:
|
|
\method{read()}, \method{readline()}, \method{readlines()},
|
|
\method{__iter__()}, \method{next()}.
|
|
\end{notice}
|
|
\begin{notice}
|
|
If the ZipFile was created by passing in a file-like object as the
|
|
first argument to the constructor, then the object returned by
|
|
\method{open()} shares the ZipFile's file pointer. Under these
|
|
circumstances, the object returned by \method{open()} should not
|
|
be used after any additional operations are performed on the
|
|
ZipFile object. If the ZipFile was created by passing in a string
|
|
(the filename) as the first argument to the constructor, then
|
|
\method{open()} will create a new file object that will be held
|
|
by the ZipExtFile, allowing it to operate independently of the
|
|
ZipFile.
|
|
\end{notice}
|
|
|
|
\versionadded{2.6}
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{printdir}{}
|
|
Print a table of contents for the archive to \code{sys.stdout}.
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{setpassword}{pwd}
|
|
Set \var{pwd} as default password to extract encrypted files.
|
|
\versionadded{2.6}
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{read}{name\optional{, pwd}}
|
|
Return the bytes of the file in the archive. The archive must be
|
|
open for read or append. \var{pwd} is the password used for encrypted
|
|
files and, if specified, it will override the default password set with
|
|
\method{setpassword()}. Calling \method{read()} on a closed ZipFile
|
|
will raise a \exception{RuntimeError}.
|
|
|
|
\versionchanged[\var{pwd} was added]{2.6}
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{testzip}{}
|
|
Read all the files in the archive and check their CRC's and file
|
|
headers. Return the name of the first bad file, or else return \code{None}.
|
|
Calling \method{testzip()} on a closed ZipFile will raise a
|
|
\exception{RuntimeError}.
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{write}{filename\optional{, arcname\optional{,
|
|
compress_type}}}
|
|
Write the file named \var{filename} to the archive, giving it the
|
|
archive name \var{arcname} (by default, this will be the same as
|
|
\var{filename}, but without a drive letter and with leading path
|
|
separators removed). If given, \var{compress_type} overrides the
|
|
value given for the \var{compression} parameter to the constructor
|
|
for the new entry. The archive must be open with mode \code{'w'}
|
|
or \code{'a'} -- calling \method{write()} on a ZipFile created with
|
|
mode \code{'r'} will raise a \exception{RuntimeError}. Calling
|
|
\method{write()} on a closed ZipFile will raise a
|
|
\exception{RuntimeError}.
|
|
|
|
\note{There is no official file name encoding for ZIP files.
|
|
If you have unicode file names, please convert them to byte strings
|
|
in your desired encoding before passing them to \method{write()}.
|
|
WinZip interprets all file names as encoded in CP437, also known
|
|
as DOS Latin.}
|
|
|
|
\note{Archive names should be relative to the archive root, that is,
|
|
they should not start with a path separator.}
|
|
|
|
\note{If \code{arcname} (or \code{filename}, if \code{arcname} is
|
|
not given) contains a null byte, the name of the file in the archive will
|
|
be truncated at the null byte.}
|
|
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{writestr}{zinfo_or_arcname, bytes}
|
|
Write the string \var{bytes} to the archive; \var{zinfo_or_arcname}
|
|
is either the file name it will be given in the archive, or a
|
|
\class{ZipInfo} instance. If it's an instance, at least the
|
|
filename, date, and time must be given. If it's a name, the date
|
|
and time is set to the current date and time. The archive must be
|
|
opened with mode \code{'w'} or \code{'a'} -- calling
|
|
\method{writestr()} on a ZipFile created with mode \code{'r'}
|
|
will raise a \exception{RuntimeError}. Calling \method{writestr()}
|
|
on a closed ZipFile will raise a \exception{RuntimeError}.
|
|
\end{methoddesc}
|
|
|
|
|
|
The following data attribute is also available:
|
|
|
|
\begin{memberdesc}{debug}
|
|
The level of debug output to use. This may be set from \code{0}
|
|
(the default, no output) to \code{3} (the most output). Debugging
|
|
information is written to \code{sys.stdout}.
|
|
\end{memberdesc}
|
|
|
|
|
|
\subsection{PyZipFile Objects \label{pyzipfile-objects}}
|
|
|
|
The \class{PyZipFile} constructor takes the same parameters as the
|
|
\class{ZipFile} constructor. Instances have one method in addition to
|
|
those of \class{ZipFile} objects.
|
|
|
|
\begin{methoddesc}[PyZipFile]{writepy}{pathname\optional{, basename}}
|
|
Search for files \file{*.py} and add the corresponding file to the
|
|
archive. The corresponding file is a \file{*.pyo} file if
|
|
available, else a \file{*.pyc} file, compiling if necessary. If the
|
|
pathname is a file, the filename must end with \file{.py}, and just
|
|
the (corresponding \file{*.py[co]}) file is added at the top level
|
|
(no path information). If the pathname is a file that does not end with
|
|
\file{.py}, a \exception{RuntimeError} will be raised. If it is a
|
|
directory, and the directory is not a package directory, then all the
|
|
files \file{*.py[co]} are added at the top level. If the directory is
|
|
a package directory, then all \file{*.py[co]} are added under the package
|
|
name as a file path, and if any subdirectories are package directories, all
|
|
of these are added recursively. \var{basename} is intended for
|
|
internal use only. The \method{writepy()} method makes archives
|
|
with file names like this:
|
|
|
|
\begin{verbatim}
|
|
string.pyc # Top level name
|
|
test/__init__.pyc # Package directory
|
|
test/testall.pyc # Module test.testall
|
|
test/bogus/__init__.pyc # Subpackage directory
|
|
test/bogus/myfile.pyc # Submodule test.bogus.myfile
|
|
\end{verbatim}
|
|
\end{methoddesc}
|
|
|
|
|
|
\subsection{ZipInfo Objects \label{zipinfo-objects}}
|
|
|
|
Instances of the \class{ZipInfo} class are returned by the
|
|
\method{getinfo()} and \method{infolist()} methods of
|
|
\class{ZipFile} objects. Each object stores information about a
|
|
single member of the ZIP archive.
|
|
|
|
Instances have the following attributes:
|
|
|
|
\begin{memberdesc}[ZipInfo]{filename}
|
|
Name of the file in the archive.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}[ZipInfo]{date_time}
|
|
The time and date of the last modification to the archive
|
|
member. This is a tuple of six values:
|
|
|
|
\begin{tableii}{c|l}{code}{Index}{Value}
|
|
\lineii{0}{Year}
|
|
\lineii{1}{Month (one-based)}
|
|
\lineii{2}{Day of month (one-based)}
|
|
\lineii{3}{Hours (zero-based)}
|
|
\lineii{4}{Minutes (zero-based)}
|
|
\lineii{5}{Seconds (zero-based)}
|
|
\end{tableii}
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}[ZipInfo]{compress_type}
|
|
Type of compression for the archive member.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}[ZipInfo]{comment}
|
|
Comment for the individual archive member.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}[ZipInfo]{extra}
|
|
Expansion field data. The
|
|
\citetitle[http://www.pkware.com/business_and_developers/developer/appnote/]
|
|
{PKZIP Application Note} contains some comments on the internal
|
|
structure of the data contained in this string.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}[ZipInfo]{create_system}
|
|
System which created ZIP archive.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}[ZipInfo]{create_version}
|
|
PKZIP version which created ZIP archive.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}[ZipInfo]{extract_version}
|
|
PKZIP version needed to extract archive.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}[ZipInfo]{reserved}
|
|
Must be zero.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}[ZipInfo]{flag_bits}
|
|
ZIP flag bits.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}[ZipInfo]{volume}
|
|
Volume number of file header.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}[ZipInfo]{internal_attr}
|
|
Internal attributes.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}[ZipInfo]{external_attr}
|
|
External file attributes.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}[ZipInfo]{header_offset}
|
|
Byte offset to the file header.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}[ZipInfo]{CRC}
|
|
CRC-32 of the uncompressed file.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}[ZipInfo]{compress_size}
|
|
Size of the compressed data.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}[ZipInfo]{file_size}
|
|
Size of the uncompressed file.
|
|
\end{memberdesc}
|