diff --git a/Doc/lib/lib.tex b/Doc/lib/lib.tex index 33bb9c5be2a..2750984c0f1 100644 --- a/Doc/lib/lib.tex +++ b/Doc/lib/lib.tex @@ -165,6 +165,7 @@ and how to embed it in other applications. \input{libbsddb} \input{libzlib} \input{libgzip} +\input{libzipfile} \input{librlcompleter} \input{libunix} % UNIX Specific Services @@ -230,6 +231,7 @@ and how to embed it in other applications. \input{libmhlib} \input{libmimify} \input{libnetrc} +\input{librobotparser} \input{librestricted} \input{librexec} diff --git a/Doc/lib/librobotparser.tex b/Doc/lib/librobotparser.tex new file mode 100644 index 00000000000..bf35fac8cd4 --- /dev/null +++ b/Doc/lib/librobotparser.tex @@ -0,0 +1,68 @@ +\section{\module{robotparser} --- + Parser for \filenq{robots.txt}} + +\declaremodule{standard}{robotparser} +\modulesynopsis{Accepts as input a list of lines or URL that refers to a + robots.txt file, parses the file, then builds a + set of rules from that list and answers questions + about fetchability of other URLs.} +\sectionauthor{Skip Montanaro}{skip@mojam.com} + +\index{WWW} +\index{World-Wide Web} +\index{URL} +\index{robots.txt} + +This module provides a single class, \class{RobotFileParser}, which answers +questions about whether or not a particular user agent can fetch a URL on +the web site that published the \file{robots.txt} file. For more details on +the structure of \file{robots.txt} files, see +\url{http://info.webcrawler.com/mak/projects/robots/norobots.html}. + +\begin{classdesc}{RobotFileParser}{} + +This class provides a set of methods to read, parse and answer questions +about a single \file{robots.txt} file. + +\begin{methoddesc}{set_url}{url} +Sets the URL referring to a \file{robots.txt} file. +\end{methoddesc} + +\begin{methoddesc}{read}{} +Reads the \file{robots.txt} URL and feeds it to the parser. +\end{methoddesc} + +\begin{methoddesc}{parse}{lines} +Parses the lines argument. +\end{methoddesc} + +\begin{methoddesc}{can_fetch}{useragent, url} +Returns true if the \var{useragent} is allowed to fetch the \var{url} +according to the rules contained in the parsed \file{robots.txt} file. +\end{methoddesc} + +\begin{methoddesc}{mtime}{} +Returns the time the \code{robots.txt} file was last fetched. This is +useful for long-running web spiders that need to check for new +\code{robots.txt} files periodically. +\end{methoddesc} + +\begin{methoddesc}{modified}{} +Sets the time the \code{robots.txt} file was last fetched to the current +time. +\end{methoddesc} + +\end{classdesc} + +The following example demonstrates basic use of the RobotFileParser class. + +\begin{verbatim} +>>> import robotparser +>>> rp = robotparser.RobotFileParser() +>>> rp.set_url("http://www.musi-cal.com/robots.txt") +>>> rp.read() +>>> rp.can_fetch("*", "http://www.musi-cal.com/cgi-bin/search?city=San+Francisco") +0 +>>> rp.can_fetch("*", "http://www.musi-cal.com/") +1 +\end{verbatim} diff --git a/Doc/lib/libzipfile.tex b/Doc/lib/libzipfile.tex new file mode 100644 index 00000000000..0b1d43d8965 --- /dev/null +++ b/Doc/lib/libzipfile.tex @@ -0,0 +1,169 @@ +\section{\module{zipfile} --- + Work with ZIP archives} + +\modulesynopsis{Read and write ZIP-format archive files.} +\moduleauthor{James C. Ahlstrom}{jim@interet.com} +\sectionauthor{James C. Ahlstrom}{jim@interet.com} +% LaTeX markup by Fred L. Drake, Jr. + +The ZIP file format is a common archive and compression standard. +This module provides tools to create, read, write, append, and list a +ZIP file. + +The available attributes of this module are: + +\begin{excdesc}{error} + The error raised for bad ZIP files. +\end{excdesc} + +\begin{datadesc}{_debug} + Level of printing, defaults to \code{1}. +\end{datadesc} + +\begin{classdesc}{ZipFile}{...} + The class for reading and writing ZIP files. See + ``\citetitle{ZipFile Objects}'' (section \ref{zipfile-objects}) for + constructor details. +\end{classdesc} + +\begin{funcdesc}{is_zipfile}{path} + Returns true if \var{path} is a valid ZIP file based on its magic + number, otherwise returns false. This module does not currently + handle ZIP files which have appended comments. +\end{funcdesc} + +\begin{funcdesc}{zip2date}{zdate} + Return \code{(\var{year}, \var{month}, \var{day})} for a ZIP date + code. +\end{funcdesc} + +\begin{funcdesc}{zip2time}{ztime} + Return \code{(\var{hour}, \var{minute}, \var{second})} for a ZIP + time code. +\end{funcdesc} + +\begin{funcdesc}{date2zip}{year, month, day} + Return a ZIP date code. +\end{funcdesc} + +\begin{funcdesc}{time2zip}{hour, minute, second} + Return a ZIP time code. +\end{funcdesc} + +\begin{datadesc}{ZIP_STORED} + The numeric constant (\code{0}) for an uncompressed archive member. +\end{datadesc} + +\begin{datadesc}{ZIP_DEFLATED} + The numeric constant for the usual ZIP compression method. This + requires the zlib module. No other compression methods are + currently supported. +\end{datadesc} + + +\begin{seealso} + \seetext{XXX point to ZIP format definition} + \seetext{XXX point to Info-ZIP home page; mention WiZ} +\end{seealso} + + +\subsection{ZipFile Objects \label{zipfile-objects}} + +\begin{classdesc}{ZipFile}{filename\optional{, mode\optional{, compression}}} + Open a ZIP file named \var{filename}. The \var{mode} parameter + should be \code{'r'} to read an existing file, \code{'w'} to + truncate and write a new file, or \code{'a'} to append to an + existing file. For \var{mode} is \code{'a'} and \var{filename} + refers to an existing ZIP file, then additional files are added to + it. If \var{filename} does not refer to a ZIP file, then a new ZIP + archive is appended to the file. This is meant for adding a ZIP + archive to another file, such as \file{python.exe}. Using +\begin{verbatim} +cat myzip.zip >> python.exe +\end{verbatim} + also works, and at least \program{WinZip} can read such files. + \var{compression} is the ZIP compression method to use when writing + the archive, and should be \constant{ZIP_STORED} or + \constant{ZIP_DEFLATED}; unrecognized values will cause + \exception{ValueError} to be raised. The default is + \constant{ZIP_STORED}. +\end{classdesc} + +XXX explain the "extra" string for the ZIP format + +\begin{memberdesc}{TOC} + A read-only dictionary whose keys are the names in the archive, and + whose values are tuples as follows: + +\begin{tableii}{c|l}{code}{Index}{Meaning} + \lineii{0}{File data seek offset} + \lineii{1}{ZIP file "extra" data as a string} + \lineii{2}{ZIP file bit flags} + \lineii{3}{ZIP file compression type} + \lineii{4}{File modification time in DOS format} + \lineii{5}{File modification date in DOS format} + \lineii{6}{The CRC-32 of the uncompressed data} + \lineii{7}{The compressed size of the file} + \lineii{8}{The uncompressed size of the file} +\end{tableii} +\end{memberdesc} + +The class ZipFile has these methods: + +\begin{methoddesc}{listdir}{} + Return a list of names in the archive. Equivalent to + \code{\var{zipfile}.TOC.keys()}. +\end{methoddesc} + +\begin{methoddesc}{printdir}{} + Print a table of contents for the archive to stdout. +\end{methoddesc} + +\begin{methoddesc}{read}{name} + Return the bytes of the file in the archive. The archive must be + open for read or append. +\end{methoddesc} + +\begin{methoddesc}{writestr}{bytes, arcname, year, month, day, hour, + minute, second\optional{, extra}} + Write the string \var{bytes} and the other data to the archive, and + give the archive member the name \var{arcname}. \var{extra} is the + ZIP extra data string. The archive must be opened with mode + \code{'w'} or \code{'a'}. +\end{methoddesc} + +\begin{methoddesc}{write}{filename, arcname\optional{, extra}} + Write the file named \var{filename} to the archive, giving it the + archive name \var{arcname}. \var{extra} is the ZIP extra data + string. The archive must be open with mode \code{'w'} or + \code{'a'}. +\end{methoddesc} + +\begin{methoddesc}{writepy}{pathname\optional{, basename}} + Search for files \file{*.py} and add the corresponding file to the + archive. The corresponding file is a \file{*.pyo} file if + available, else a \file{*.pyc} file, compiling if necessary. If the + pathname is a file, the filename must end with \file{.py}, and just + the (corresponding \file{*.py[oc]}) file is added at the top level + (no path information). If it is a directory, and the directory is + not a package directory, then all the files \file{*.py[oc]} are + added at the top level. If the directory is a package directory, + then all \file{*.py[oc]} are added under the package name as a file + path, and if any subdirectories are package directories, all of + these are added recursively. \var{basename} is intended for + internal use only. The \method{writepy()} method makes archives + with file names like this: + +\begin{verbatim} + string.pyc # Top level name + test/__init__.pyc # Package directory + test/testall.pyc # Module test.testall + test/bogus/__init__.pyc # Subpackage directory + test/bogus/myfile.pyc # Submodule test.bogus.myfile +\end{verbatim} +\end{methoddesc} + +\begin{methoddesc}{close}{} + Close the archive file. You must call \method{close()} before + exiting your program or essential records will not be written. +\end{methoddesc}