mirror of https://github.com/python/cpython
506 lines
18 KiB
TeX
506 lines
18 KiB
TeX
\section{\module{tarfile} --- Read and write tar archive files}
|
|
|
|
\declaremodule{standard}{tarfile}
|
|
\modulesynopsis{Read and write tar-format archive files.}
|
|
\versionadded{2.3}
|
|
|
|
\moduleauthor{Lars Gust\"abel}{lars@gustaebel.de}
|
|
\sectionauthor{Lars Gust\"abel}{lars@gustaebel.de}
|
|
|
|
The \module{tarfile} module makes it possible to read and create tar archives.
|
|
Some facts and figures:
|
|
|
|
\begin{itemize}
|
|
\item reads and writes \module{gzip} and \module{bzip2} compressed archives.
|
|
\item creates \POSIX{} 1003.1-1990 compliant or GNU tar compatible archives.
|
|
\item reads GNU tar extensions \emph{longname}, \emph{longlink} and
|
|
\emph{sparse}.
|
|
\item stores pathnames of unlimited length using GNU tar extensions.
|
|
\item handles directories, regular files, hardlinks, symbolic links, fifos,
|
|
character devices and block devices and is able to acquire and
|
|
restore file information like timestamp, access permissions and owner.
|
|
\item can handle tape devices.
|
|
\end{itemize}
|
|
|
|
\begin{funcdesc}{open}{\optional{name\optional{, mode
|
|
\optional{, fileobj\optional{, bufsize}}}}}
|
|
Return a \class{TarFile} object for the pathname \var{name}.
|
|
For detailed information on \class{TarFile} objects,
|
|
see \citetitle{TarFile Objects} (section \ref{tarfile-objects}).
|
|
|
|
\var{mode} has to be a string of the form \code{'filemode[:compression]'},
|
|
it defaults to \code{'r'}. Here is a full list of mode combinations:
|
|
|
|
\begin{tableii}{c|l}{code}{mode}{action}
|
|
\lineii{'r' or 'r:*'}{Open for reading with transparent compression (recommended).}
|
|
\lineii{'r:'}{Open for reading exclusively without compression.}
|
|
\lineii{'r:gz'}{Open for reading with gzip compression.}
|
|
\lineii{'r:bz2'}{Open for reading with bzip2 compression.}
|
|
\lineii{'a' or 'a:'}{Open for appending with no compression. The file
|
|
is created if it does not exist.}
|
|
\lineii{'w' or 'w:'}{Open for uncompressed writing.}
|
|
\lineii{'w:gz'}{Open for gzip compressed writing.}
|
|
\lineii{'w:bz2'}{Open for bzip2 compressed writing.}
|
|
\end{tableii}
|
|
|
|
Note that \code{'a:gz'} or \code{'a:bz2'} is not possible.
|
|
If \var{mode} is not suitable to open a certain (compressed) file for
|
|
reading, \exception{ReadError} is raised. Use \var{mode} \code{'r'} to
|
|
avoid this. If a compression method is not supported,
|
|
\exception{CompressionError} is raised.
|
|
|
|
If \var{fileobj} is specified, it is used as an alternative to a file
|
|
object opened for \var{name}. It is supposed to be at position 0.
|
|
|
|
For special purposes, there is a second format for \var{mode}:
|
|
\code{'filemode|[compression]'}. \function{open()} will return a
|
|
\class{TarFile} object that processes its data as a stream of
|
|
blocks. No random seeking will be done on the file. If given,
|
|
\var{fileobj} may be any object that has a \method{read()} or
|
|
\method{write()} method (depending on the \var{mode}).
|
|
\var{bufsize} specifies the blocksize and defaults to \code{20 *
|
|
512} bytes. Use this variant in combination with
|
|
e.g. \code{sys.stdin}, a socket file object or a tape device.
|
|
However, such a \class{TarFile} object is limited in that it does
|
|
not allow to be accessed randomly, see ``Examples''
|
|
(section~\ref{tar-examples}). The currently possible modes:
|
|
|
|
\begin{tableii}{c|l}{code}{Mode}{Action}
|
|
\lineii{'r|*'}{Open a \emph{stream} of tar blocks for reading with transparent compression.}
|
|
\lineii{'r|'}{Open a \emph{stream} of uncompressed tar blocks for reading.}
|
|
\lineii{'r|gz'}{Open a gzip compressed \emph{stream} for reading.}
|
|
\lineii{'r|bz2'}{Open a bzip2 compressed \emph{stream} for reading.}
|
|
\lineii{'w|'}{Open an uncompressed \emph{stream} for writing.}
|
|
\lineii{'w|gz'}{Open an gzip compressed \emph{stream} for writing.}
|
|
\lineii{'w|bz2'}{Open an bzip2 compressed \emph{stream} for writing.}
|
|
\end{tableii}
|
|
\end{funcdesc}
|
|
|
|
\begin{classdesc*}{TarFile}
|
|
Class for reading and writing tar archives. Do not use this
|
|
class directly, better use \function{open()} instead.
|
|
See ``TarFile Objects'' (section~\ref{tarfile-objects}).
|
|
\end{classdesc*}
|
|
|
|
\begin{funcdesc}{is_tarfile}{name}
|
|
Return \constant{True} if \var{name} is a tar archive file, that
|
|
the \module{tarfile} module can read.
|
|
\end{funcdesc}
|
|
|
|
\begin{classdesc}{TarFileCompat}{filename\optional{, mode\optional{,
|
|
compression}}}
|
|
Class for limited access to tar archives with a
|
|
\refmodule{zipfile}-like interface. Please consult the
|
|
documentation of the \refmodule{zipfile} module for more details.
|
|
\var{compression} must be one of the following constants:
|
|
\begin{datadesc}{TAR_PLAIN}
|
|
Constant for an uncompressed tar archive.
|
|
\end{datadesc}
|
|
\begin{datadesc}{TAR_GZIPPED}
|
|
Constant for a \refmodule{gzip} compressed tar archive.
|
|
\end{datadesc}
|
|
\end{classdesc}
|
|
|
|
\begin{excdesc}{TarError}
|
|
Base class for all \module{tarfile} exceptions.
|
|
\end{excdesc}
|
|
|
|
\begin{excdesc}{ReadError}
|
|
Is raised when a tar archive is opened, that either cannot be handled by
|
|
the \module{tarfile} module or is somehow invalid.
|
|
\end{excdesc}
|
|
|
|
\begin{excdesc}{CompressionError}
|
|
Is raised when a compression method is not supported or when the data
|
|
cannot be decoded properly.
|
|
\end{excdesc}
|
|
|
|
\begin{excdesc}{StreamError}
|
|
Is raised for the limitations that are typical for stream-like
|
|
\class{TarFile} objects.
|
|
\end{excdesc}
|
|
|
|
\begin{excdesc}{ExtractError}
|
|
Is raised for \emph{non-fatal} errors when using \method{extract()}, but
|
|
only if \member{TarFile.errorlevel}\code{ == 2}.
|
|
\end{excdesc}
|
|
|
|
\begin{excdesc}{HeaderError}
|
|
Is raised by \method{frombuf()} if the buffer it gets is invalid.
|
|
\versionadded{2.6}
|
|
\end{excdesc}
|
|
|
|
\begin{seealso}
|
|
\seemodule{zipfile}{Documentation of the \refmodule{zipfile}
|
|
standard module.}
|
|
|
|
\seetitle[http://www.gnu.org/software/tar/manual/html_node/tar_134.html\#SEC134]
|
|
{GNU tar manual, Basic Tar Format}{Documentation for tar archive files,
|
|
including GNU tar extensions.}
|
|
\end{seealso}
|
|
|
|
%-----------------
|
|
% TarFile Objects
|
|
%-----------------
|
|
|
|
\subsection{TarFile Objects \label{tarfile-objects}}
|
|
|
|
The \class{TarFile} object provides an interface to a tar archive. A tar
|
|
archive is a sequence of blocks. An archive member (a stored file) is made up
|
|
of a header block followed by data blocks. It is possible, to store a file in a
|
|
tar archive several times. Each archive member is represented by a
|
|
\class{TarInfo} object, see \citetitle{TarInfo Objects} (section
|
|
\ref{tarinfo-objects}) for details.
|
|
|
|
\begin{classdesc}{TarFile}{\optional{name
|
|
\optional{, mode\optional{, fileobj}}}}
|
|
Open an \emph{(uncompressed)} tar archive \var{name}.
|
|
\var{mode} is either \code{'r'} to read from an existing archive,
|
|
\code{'a'} to append data to an existing file or \code{'w'} to create a new
|
|
file overwriting an existing one. \var{mode} defaults to \code{'r'}.
|
|
|
|
If \var{fileobj} is given, it is used for reading or writing data.
|
|
If it can be determined, \var{mode} is overridden by \var{fileobj}'s mode.
|
|
\var{fileobj} will be used from position 0.
|
|
\begin{notice}
|
|
\var{fileobj} is not closed, when \class{TarFile} is closed.
|
|
\end{notice}
|
|
\end{classdesc}
|
|
|
|
\begin{methoddesc}{open}{...}
|
|
Alternative constructor. The \function{open()} function on module level is
|
|
actually a shortcut to this classmethod. See section~\ref{module-tarfile}
|
|
for details.
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{getmember}{name}
|
|
Return a \class{TarInfo} object for member \var{name}. If \var{name} can
|
|
not be found in the archive, \exception{KeyError} is raised.
|
|
\begin{notice}
|
|
If a member occurs more than once in the archive, its last
|
|
occurrence is assumed to be the most up-to-date version.
|
|
\end{notice}
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{getmembers}{}
|
|
Return the members of the archive as a list of \class{TarInfo} objects.
|
|
The list has the same order as the members in the archive.
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{getnames}{}
|
|
Return the members as a list of their names. It has the same order as
|
|
the list returned by \method{getmembers()}.
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{list}{verbose=True}
|
|
Print a table of contents to \code{sys.stdout}. If \var{verbose} is
|
|
\constant{False}, only the names of the members are printed. If it is
|
|
\constant{True}, output similar to that of \program{ls -l} is produced.
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{next}{}
|
|
Return the next member of the archive as a \class{TarInfo} object, when
|
|
\class{TarFile} is opened for reading. Return \code{None} if there is no
|
|
more available.
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{extractall}{\optional{path\optional{, members}}}
|
|
Extract all members from the archive to the current working directory
|
|
or directory \var{path}. If optional \var{members} is given, it must be
|
|
a subset of the list returned by \method{getmembers()}.
|
|
Directory informations like owner, modification time and permissions are
|
|
set after all members have been extracted. This is done to work around two
|
|
problems: A directory's modification time is reset each time a file is
|
|
created in it. And, if a directory's permissions do not allow writing,
|
|
extracting files to it will fail.
|
|
\versionadded{2.5}
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{extract}{member\optional{, path}}
|
|
Extract a member from the archive to the current working directory,
|
|
using its full name. Its file information is extracted as accurately as
|
|
possible.
|
|
\var{member} may be a filename or a \class{TarInfo} object.
|
|
You can specify a different directory using \var{path}.
|
|
\begin{notice}
|
|
Because the \method{extract()} method allows random access to a tar
|
|
archive there are some issues you must take care of yourself. See the
|
|
description for \method{extractall()} above.
|
|
\end{notice}
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{extractfile}{member}
|
|
Extract a member from the archive as a file object.
|
|
\var{member} may be a filename or a \class{TarInfo} object.
|
|
If \var{member} is a regular file, a file-like object is returned.
|
|
If \var{member} is a link, a file-like object is constructed from the
|
|
link's target.
|
|
If \var{member} is none of the above, \code{None} is returned.
|
|
\begin{notice}
|
|
The file-like object is read-only and provides the following methods:
|
|
\method{read()}, \method{readline()}, \method{readlines()},
|
|
\method{seek()}, \method{tell()}.
|
|
\end{notice}
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{add}{name\optional{, arcname\optional{, recursive}}}
|
|
Add the file \var{name} to the archive. \var{name} may be any type
|
|
of file (directory, fifo, symbolic link, etc.).
|
|
If given, \var{arcname} specifies an alternative name for the file in the
|
|
archive. Directories are added recursively by default.
|
|
This can be avoided by setting \var{recursive} to \constant{False};
|
|
the default is \constant{True}.
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{addfile}{tarinfo\optional{, fileobj}}
|
|
Add the \class{TarInfo} object \var{tarinfo} to the archive.
|
|
If \var{fileobj} is given, \code{\var{tarinfo}.size} bytes are read
|
|
from it and added to the archive. You can create \class{TarInfo} objects
|
|
using \method{gettarinfo()}.
|
|
\begin{notice}
|
|
On Windows platforms, \var{fileobj} should always be opened with mode
|
|
\code{'rb'} to avoid irritation about the file size.
|
|
\end{notice}
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{gettarinfo}{\optional{name\optional{,
|
|
arcname\optional{, fileobj}}}}
|
|
Create a \class{TarInfo} object for either the file \var{name} or
|
|
the file object \var{fileobj} (using \function{os.fstat()} on its
|
|
file descriptor). You can modify some of the \class{TarInfo}'s
|
|
attributes before you add it using \method{addfile()}. If given,
|
|
\var{arcname} specifies an alternative name for the file in the
|
|
archive.
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{close}{}
|
|
Close the \class{TarFile}. In write mode, two finishing zero
|
|
blocks are appended to the archive.
|
|
\end{methoddesc}
|
|
|
|
\begin{memberdesc}{posix}
|
|
If true, create a \POSIX{} 1003.1-1990 compliant archive. GNU
|
|
extensions are not used, because they are not part of the \POSIX{}
|
|
standard. This limits the length of filenames to at most 256,
|
|
link names to 100 characters and the maximum file size to 8
|
|
gigabytes. A \exception{ValueError} is raised if a file exceeds
|
|
this limit. If false, create a GNU tar compatible archive. It
|
|
will not be \POSIX{} compliant, but can store files without any
|
|
of the above restrictions.
|
|
\versionchanged[\var{posix} defaults to \constant{False}]{2.4}
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}{dereference}
|
|
If false, add symbolic and hard links to archive. If true, add the
|
|
content of the target files to the archive. This has no effect on
|
|
systems that do not support symbolic links.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}{ignore_zeros}
|
|
If false, treat an empty block as the end of the archive. If true,
|
|
skip empty (and invalid) blocks and try to get as many members as
|
|
possible. This is only useful for concatenated or damaged
|
|
archives.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}{debug=0}
|
|
To be set from \code{0} (no debug messages; the default) up to
|
|
\code{3} (all debug messages). The messages are written to
|
|
\code{sys.stderr}.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}{errorlevel}
|
|
If \code{0} (the default), all errors are ignored when using
|
|
\method{extract()}. Nevertheless, they appear as error messages
|
|
in the debug output, when debugging is enabled. If \code{1}, all
|
|
\emph{fatal} errors are raised as \exception{OSError} or
|
|
\exception{IOError} exceptions. If \code{2}, all \emph{non-fatal}
|
|
errors are raised as \exception{TarError} exceptions as well.
|
|
\end{memberdesc}
|
|
|
|
%-----------------
|
|
% TarInfo Objects
|
|
%-----------------
|
|
|
|
\subsection{TarInfo Objects \label{tarinfo-objects}}
|
|
|
|
A \class{TarInfo} object represents one member in a
|
|
\class{TarFile}. Aside from storing all required attributes of a file
|
|
(like file type, size, time, permissions, owner etc.), it provides
|
|
some useful methods to determine its type. It does \emph{not} contain
|
|
the file's data itself.
|
|
|
|
\class{TarInfo} objects are returned by \class{TarFile}'s methods
|
|
\method{getmember()}, \method{getmembers()} and \method{gettarinfo()}.
|
|
|
|
\begin{classdesc}{TarInfo}{\optional{name}}
|
|
Create a \class{TarInfo} object.
|
|
\end{classdesc}
|
|
|
|
\begin{methoddesc}{frombuf}{}
|
|
Create and return a \class{TarInfo} object from a string buffer.
|
|
\versionadded[Raises \exception{HeaderError} if the buffer is
|
|
invalid.]{2.6}
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{tobuf}{posix}
|
|
Create a string buffer from a \class{TarInfo} object.
|
|
See \class{TarFile}'s \member{posix} attribute for information
|
|
on the \var{posix} argument. It defaults to \constant{False}.
|
|
|
|
\versionadded[The \var{posix} parameter]{2.5}
|
|
\end{methoddesc}
|
|
|
|
A \code{TarInfo} object has the following public data attributes:
|
|
|
|
\begin{memberdesc}{name}
|
|
Name of the archive member.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}{size}
|
|
Size in bytes.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}{mtime}
|
|
Time of last modification.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}{mode}
|
|
Permission bits.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}{type}
|
|
File type. \var{type} is usually one of these constants:
|
|
\constant{REGTYPE}, \constant{AREGTYPE}, \constant{LNKTYPE},
|
|
\constant{SYMTYPE}, \constant{DIRTYPE}, \constant{FIFOTYPE},
|
|
\constant{CONTTYPE}, \constant{CHRTYPE}, \constant{BLKTYPE},
|
|
\constant{GNUTYPE_SPARSE}. To determine the type of a
|
|
\class{TarInfo} object more conveniently, use the \code{is_*()}
|
|
methods below.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}{linkname}
|
|
Name of the target file name, which is only present in
|
|
\class{TarInfo} objects of type \constant{LNKTYPE} and
|
|
\constant{SYMTYPE}.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}{uid}
|
|
User ID of the user who originally stored this member.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}{gid}
|
|
Group ID of the user who originally stored this member.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}{uname}
|
|
User name.
|
|
\end{memberdesc}
|
|
|
|
\begin{memberdesc}{gname}
|
|
Group name.
|
|
\end{memberdesc}
|
|
|
|
A \class{TarInfo} object also provides some convenient query methods:
|
|
|
|
\begin{methoddesc}{isfile}{}
|
|
Return \constant{True} if the \class{Tarinfo} object is a regular
|
|
file.
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{isreg}{}
|
|
Same as \method{isfile()}.
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{isdir}{}
|
|
Return \constant{True} if it is a directory.
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{issym}{}
|
|
Return \constant{True} if it is a symbolic link.
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{islnk}{}
|
|
Return \constant{True} if it is a hard link.
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{ischr}{}
|
|
Return \constant{True} if it is a character device.
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{isblk}{}
|
|
Return \constant{True} if it is a block device.
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{isfifo}{}
|
|
Return \constant{True} if it is a FIFO.
|
|
\end{methoddesc}
|
|
|
|
\begin{methoddesc}{isdev}{}
|
|
Return \constant{True} if it is one of character device, block
|
|
device or FIFO.
|
|
\end{methoddesc}
|
|
|
|
%------------------------
|
|
% Examples
|
|
%------------------------
|
|
|
|
\subsection{Examples \label{tar-examples}}
|
|
|
|
How to extract an entire tar archive to the current working directory:
|
|
\begin{verbatim}
|
|
import tarfile
|
|
tar = tarfile.open("sample.tar.gz")
|
|
tar.extractall()
|
|
tar.close()
|
|
\end{verbatim}
|
|
|
|
How to create an uncompressed tar archive from a list of filenames:
|
|
\begin{verbatim}
|
|
import tarfile
|
|
tar = tarfile.open("sample.tar", "w")
|
|
for name in ["foo", "bar", "quux"]:
|
|
tar.add(name)
|
|
tar.close()
|
|
\end{verbatim}
|
|
|
|
How to read a gzip compressed tar archive and display some member information:
|
|
\begin{verbatim}
|
|
import tarfile
|
|
tar = tarfile.open("sample.tar.gz", "r:gz")
|
|
for tarinfo in tar:
|
|
print tarinfo.name, "is", tarinfo.size, "bytes in size and is",
|
|
if tarinfo.isreg():
|
|
print "a regular file."
|
|
elif tarinfo.isdir():
|
|
print "a directory."
|
|
else:
|
|
print "something else."
|
|
tar.close()
|
|
\end{verbatim}
|
|
|
|
How to create a tar archive with faked information:
|
|
\begin{verbatim}
|
|
import tarfile
|
|
tar = tarfile.open("sample.tar.gz", "w:gz")
|
|
for name in namelist:
|
|
tarinfo = tar.gettarinfo(name, "fakeproj-1.0/" + name)
|
|
tarinfo.uid = 123
|
|
tarinfo.gid = 456
|
|
tarinfo.uname = "johndoe"
|
|
tarinfo.gname = "fake"
|
|
tar.addfile(tarinfo, file(name))
|
|
tar.close()
|
|
\end{verbatim}
|
|
|
|
The \emph{only} way to extract an uncompressed tar stream from
|
|
\code{sys.stdin}:
|
|
\begin{verbatim}
|
|
import sys
|
|
import tarfile
|
|
tar = tarfile.open(mode="r|", fileobj=sys.stdin)
|
|
for tarinfo in tar:
|
|
tar.extract(tarinfo)
|
|
tar.close()
|
|
\end{verbatim}
|