mirror of https://github.com/python/cpython
Patch #963318: Add support for client-side cookie management.
This commit is contained in:
parent
0a6d0ff8d9
commit
2a6ba9097e
|
@ -233,6 +233,7 @@ and how to embed it in other applications.
|
|||
\input{libbasehttp}
|
||||
\input{libsimplehttp}
|
||||
\input{libcgihttp}
|
||||
\input{libcookielib}
|
||||
\input{libcookie}
|
||||
\input{libxmlrpclib}
|
||||
\input{libsimplexmlrpc}
|
||||
|
|
|
@ -68,6 +68,10 @@ you should not use the \class{SerialCookie} class.
|
|||
|
||||
|
||||
\begin{seealso}
|
||||
\seemodule{cookielib}{HTTP cookie handling for for web
|
||||
\emph{clients}. The \module{cookielib} and \module{Cookie}
|
||||
modules do not depend on each other.}
|
||||
|
||||
\seerfc{2109}{HTTP State Management Mechanism}{This is the state
|
||||
management specification implemented by this module.}
|
||||
\end{seealso}
|
||||
|
|
|
@ -0,0 +1,679 @@
|
|||
\section{\module{cookielib} ---
|
||||
Cookie handling for HTTP clients}
|
||||
|
||||
\declaremodule{standard}{cookielib}
|
||||
\moduleauthor{John J. Lee}{jjl@pobox.com}
|
||||
\sectionauthor{John J. Lee}{jjl@pobox.com}
|
||||
|
||||
\modulesynopsis{Cookie handling for HTTP clients}
|
||||
|
||||
The \module{cookielib} module defines classes for automatic handling
|
||||
of HTTP cookies. It is useful for accessing web sites that require
|
||||
small pieces of data -- \dfn{cookies} -- to be set on the client
|
||||
machine by an HTTP response from a web server, and then returned to
|
||||
the server in later HTTP requests.
|
||||
|
||||
Both the regular Netscape cookie protocol and the protocol defined by
|
||||
\rfc{2965} are handled. RFC 2965 handling is switched off by default.
|
||||
\rfc{2109} cookies are parsed as Netscape cookies and subsequently
|
||||
treated as RFC 2965 cookies. Note that the great majority of cookies
|
||||
on the Internet are Netscape cookies. \module{cookielib} attempts to
|
||||
follow the de-facto Netscape cookie protocol (which differs
|
||||
substantially from that set out in the original Netscape
|
||||
specification), including taking note of the \code{max-age} and
|
||||
\code{port} cookie-attributes introduced with RFC 2109. \note{The
|
||||
various named parameters found in \mailheader{Set-Cookie} and
|
||||
\mailheader{Set-Cookie2} headers (eg. \code{domain} and
|
||||
\code{expires}) are conventionally referred to as \dfn{attributes}.
|
||||
To distinguish them from Python attributes, the documentation for this
|
||||
module uses the term \dfn{cookie-attribute} instead}.
|
||||
|
||||
|
||||
The module defines the following exception:
|
||||
|
||||
\begin{excdesc}{LoadError}
|
||||
Instances of \class{FileCookieJar} raise this exception on failure to
|
||||
load cookies from a file.
|
||||
\end{excdesc}
|
||||
|
||||
|
||||
The following classes are provided:
|
||||
|
||||
\begin{classdesc}{CookieJar}{policy=\constant{None}}
|
||||
\var{policy} is an object implementing the \class{CookiePolicy}
|
||||
interface.
|
||||
|
||||
The \class{CookieJar} class stores HTTP cookies. It extracts cookies
|
||||
from HTTP requests, and returns them in HTTP responses.
|
||||
\class{CookieJar} instances automatically expire contained cookies
|
||||
when necessary. Subclasses are also responsible for storing and
|
||||
retrieving cookies from a file or database.
|
||||
\end{classdesc}
|
||||
|
||||
\begin{classdesc}{FileCookieJar}{filename, delayload=\constant{None},
|
||||
policy=\constant{None}}
|
||||
\var{policy} is an object implementing the \class{CookiePolicy}
|
||||
interface. For the other arguments, see the documentation for the
|
||||
corresponding attributes.
|
||||
|
||||
A \class{CookieJar} which can load cookies from, and perhaps save
|
||||
cookies to, a file on disk. Cookies are \strong{NOT} loaded from the
|
||||
named file until either the \method{load()} or \method{revert()}
|
||||
method is called. Subclasses of this class are documented in section
|
||||
\ref{file-cookie-jar-classes}.
|
||||
\end{classdesc}
|
||||
|
||||
\begin{classdesc}{CookiePolicy}{}
|
||||
This class is responsible for deciding whether each cookie should be
|
||||
accepted from / returned to the server.
|
||||
\end{classdesc}
|
||||
|
||||
\begin{classdesc}{DefaultCookiePolicy}{
|
||||
blocked_domains=\constant{None},
|
||||
allowed_domains=\constant{None},
|
||||
netscape=\constant{True}, rfc2965=\constant{False},
|
||||
hide_cookie2=\constant{False},
|
||||
strict_domain=\constant{False},
|
||||
strict_rfc2965_unverifiable=\constant{True},
|
||||
strict_ns_unverifiable=\constant{False},
|
||||
strict_ns_domain=\constant{DefaultCookiePolicy.DomainLiberal},
|
||||
strict_ns_set_initial_dollar=\constant{False},
|
||||
strict_ns_set_path=\constant{False}
|
||||
}
|
||||
|
||||
Constructor arguments should be passed as keyword arguments only.
|
||||
\var{blocked_domains} is a sequence of domain names that we never
|
||||
accept cookies from, nor return cookies to. \var{allowed_domains} if
|
||||
not \constant{None}, this is a sequence of the only domains for which
|
||||
we accept and return cookies. For all other arguments, see the
|
||||
documentation for \class{CookiePolicy} and \class{DefaultCookiePolicy}
|
||||
objects.
|
||||
|
||||
\class{DefaultCookiePolicy} implements the standard accept / reject
|
||||
rules for Netscape and RFC 2965 cookies. RFC 2109 cookies
|
||||
(ie. cookies received in a \mailheader{Set-Cookie} header with a
|
||||
version cookie-attribute of 1) are treated according to the RFC 2965
|
||||
rules. \class{DefaultCookiePolicy} also provides some parameters to
|
||||
allow some fine-tuning of policy.
|
||||
\end{classdesc}
|
||||
|
||||
\begin{classdesc}{Cookie}{}
|
||||
This class represents Netscape, RFC 2109 and RFC 2965 cookies. It is
|
||||
not expected that users of \module{cookielib} construct their own
|
||||
\class{Cookie} instances. Instead, if necessary, call
|
||||
\method{make_cookies()} on a \class{CookieJar} instance.
|
||||
\end{classdesc}
|
||||
|
||||
\begin{seealso}
|
||||
|
||||
\seemodule{urllib2}{URL opening with automatic cookie handling.}
|
||||
|
||||
\seemodule{Cookie}{HTTP cookie classes, principally useful for
|
||||
server-side code. The \module{cookielib} and \module{Cookie} modules
|
||||
do not depend on each other.}
|
||||
|
||||
\seeurl{http://wwwsearch.sf.net/ClientCookie/}{Extensions to this
|
||||
module, including a class for reading Microsoft Internet Explorer
|
||||
cookies on Windows.}
|
||||
|
||||
\seeurl{http://www.netscape.com/newsref/std/cookie_spec.html}{The
|
||||
specification of the original Netscape cookie protocol. Though this
|
||||
is still the dominant protocol, the 'Netscape cookie protocol'
|
||||
implemented by all the major browsers (and \module{cookielib}) only
|
||||
bears a passing resemblance to the one sketched out in
|
||||
\code{cookie_spec.html}.}
|
||||
|
||||
\seerfc{2109}{HTTP State Management Mechanism}{Obsoleted by RFC 2965.
|
||||
Uses \mailheader{Set-Cookie} with version=1.}
|
||||
|
||||
\seerfc{2965}{HTTP State Management Mechanism}{The Netscape protocol
|
||||
with the bugs fixed. Uses \mailheader{Set-Cookie2} in place of
|
||||
\mailheader{Set-Cookie}. Not widely used.}
|
||||
|
||||
\seeurl{http://kristol.org/cookie/errata.html}{Unfinished errata to
|
||||
RFC 2965.}
|
||||
|
||||
\seerfc{2964}{Use of HTTP State Management}{}
|
||||
|
||||
\end{seealso}
|
||||
|
||||
|
||||
\subsection{CookieJar and FileCookieJar Objects \label{cookie-jar-objects}}
|
||||
|
||||
\class{CookieJar} objects support the iterator protocol.
|
||||
|
||||
\class{CookieJar} has the following methods:
|
||||
|
||||
\begin{methoddesc}[CookieJar]{add_cookie_header}{request}
|
||||
Add correct \mailheader{Cookie} header to \var{request}.
|
||||
|
||||
If the CookiePolicy allows (ie. the \class{CookiePolicy} instance's
|
||||
\member{rfc2965} and \member{hide_cookie2} attributes are true and
|
||||
false respectively), the \mailheader{Cookie2} header is also added
|
||||
when appropriate.
|
||||
|
||||
The \var{request} object (usually a \class{urllib2.Request} instance)
|
||||
must support the methods \method{get_full_url()}, \method{get_host()},
|
||||
\method{get_type()}, \method{unverifiable()},
|
||||
\method{get_origin_req_host()}, \method{has_header()},
|
||||
\method{get_header()}, \method{header_items()}, and
|
||||
\method{add_unredirected_header()},as documented by \module{urllib2}.
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}[CookieJar]{extract_cookies}{response, request}
|
||||
Extract cookies from HTTP \var{response} and store them in the
|
||||
\class{CookieJar}, where allowed by policy.
|
||||
|
||||
The \class{CookieJar} will look for allowable \mailheader{Set-Cookie}
|
||||
and \mailheader{Set-Cookie2} headers in the \var{response} argument,
|
||||
and store cookies as appropriate (subject to the
|
||||
\method{CookiePolicy.set_ok()} method's approval).
|
||||
|
||||
The \var{response} object (usually the result of a call to
|
||||
\method{urllib2.urlopen()}, or similar) should support an
|
||||
\method{info()} method, which returns an object with a
|
||||
\method{getallmatchingheaders()} method (usually a
|
||||
\class{mimetools.Message} instance).
|
||||
|
||||
The \var{request} object (usually a \class{urllib2.Request} instance)
|
||||
must support the methods \method{get_full_url()}, \method{get_host()},
|
||||
\method{unverifiable()}, and \method{get_origin_req_host()}, as
|
||||
documented by \module{urllib2}. The request is used to set default
|
||||
values for cookie-attributes as well as for checking that the cookie
|
||||
is allowed to be set.
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}[CookieJar]{set_policy}{policy}
|
||||
Set the \class{CookiePolicy} instance to be used.
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}[CookieJar]{make_cookies}{response, request}
|
||||
Return sequence of \class{Cookie} objects extracted from
|
||||
\var{response} object.
|
||||
|
||||
See the documentation for \method{extract_cookies} for the interfaces
|
||||
required of the \var{response} and \var{request} arguments.
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}[CookieJar]{set_cookie_if_ok}{cookie, request}
|
||||
Set a \class{Cookie} if policy says it's OK to do so.
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}[CookieJar]{set_cookie}{cookie}
|
||||
Set a \class{Cookie}, without checking with policy to see whether or
|
||||
not it should be set.
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}[CookieJar]{clear}{\optional{domain\optional{,
|
||||
path\optional{, name}}}}
|
||||
Clear some cookies.
|
||||
|
||||
If invoked without arguments, clear all cookies. If given a single
|
||||
argument, only cookies belonging to that \var{domain} will be removed.
|
||||
If given two arguments, cookies belonging to the specified
|
||||
\var{domain} and URL \var{path} are removed. If given three
|
||||
arguments, then the cookie with the specified \var{domain}, \var{path}
|
||||
and \var{name} is removed.
|
||||
|
||||
Raises \exception{KeyError} if no matching cookie exists.
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}[CookieJar]{clear_session_cookies}{}
|
||||
Discard all session cookies.
|
||||
|
||||
Discards all contained cookies that have a true \member{discard}
|
||||
attribute (usually because they had either no \code{max-age} or
|
||||
\code{expires} cookie-attribute, or an explicit \code{discard}
|
||||
cookie-attribute). For interactive browsers, the end of a session
|
||||
usually corresponds to closing the browser window.
|
||||
|
||||
Note that the \method{save()} method won't save session cookies
|
||||
anyway, unless you ask otherwise by passing a true
|
||||
\var{ignore_discard} argument.
|
||||
\end{methoddesc}
|
||||
|
||||
\class{FileCookieJar} implements the following additional methods:
|
||||
|
||||
\begin{methoddesc}[FileCookieJar]{save}{filename=\constant{None},
|
||||
ignore_discard=\constant{False}, ignore_expires=\constant{False}}
|
||||
Save cookies to a file.
|
||||
|
||||
This base class raises \class{NotImplementedError}. Subclasses may
|
||||
leave this method unimplemented.
|
||||
|
||||
\var{filename} is the name of file in which to save cookies. If
|
||||
\var{filename} is not specified, \member{self.filename} is used (whose
|
||||
default is the value passed to the constructor, if any); if
|
||||
\member{self.filename} is \constant{None}, \exception{ValueError} is
|
||||
raised.
|
||||
|
||||
\var{ignore_discard}: save even cookies set to be discarded.
|
||||
\var{ignore_expires}: save even cookies that have expired
|
||||
|
||||
The file is overwritten if it already exists, thus wiping all the
|
||||
cookies it contains. Saved cookies can be restored later using the
|
||||
\method{load()} or \method{revert()} methods.
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}[FileCookieJar]{load}{filename=\constant{None},
|
||||
ignore_discard=\constant{False}, ignore_expires=\constant{False}}
|
||||
Load cookies from a file.
|
||||
|
||||
Old cookies are kept unless overwritten by newly loaded ones.
|
||||
|
||||
Arguments are as for \method{save()}.
|
||||
|
||||
The named file must be in the format understood by the class, or
|
||||
\exception{LoadError} will be raised.
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}[FileCookieJar]{revert}{filename=\constant{None},
|
||||
ignore_discard=\constant{False}, ignore_expires=\constant{False}}
|
||||
Clear all cookies and reload cookies from a saved file.
|
||||
|
||||
Raises \exception{cookielib.LoadError} or \exception{IOError} if
|
||||
reversion is not successful; the object's state will not be altered if
|
||||
this happens.
|
||||
\end{methoddesc}
|
||||
|
||||
\class{FileCookieJar} instances have the following public attributes:
|
||||
|
||||
\begin{memberdesc}{filename}
|
||||
Filename of default file in which to keep cookies.
|
||||
\end{memberdesc}
|
||||
|
||||
\begin{memberdesc}{delayload}
|
||||
If true, load cookies lazily from disk. This is only a hint, since
|
||||
this only affects performance, not behaviour (unless the cookies on
|
||||
disk are changing). A \class{CookieJar} object may ignore it. None
|
||||
of the \class{FileCookieJar} classes included in the standard library
|
||||
lazily loads cookies.
|
||||
\end{memberdesc}
|
||||
|
||||
|
||||
\subsection{FileCookieJar subclasses and co-operation with web browsers
|
||||
\label{file-cookie-jar-classes}}
|
||||
|
||||
The following \class{CookieJar} subclasses are provided for reading
|
||||
and writing . Further \class{CookieJar} subclasses, including one
|
||||
that reads Microsoft Internet Explorer cookies, are available at
|
||||
\url{http://wwwsearch.sf.net/ClientCookie/}.
|
||||
|
||||
\begin{classdesc}{MozillaCookieJar}{filename, delayload=\constant{None},
|
||||
policy=\constant{None}}
|
||||
A \class{FileCookieJar} that can load from and save cookies to disk in
|
||||
the Mozilla \code{cookies.txt} file format (which is also used by the
|
||||
lynx and Netscape browsers). \note{This loses information about RFC
|
||||
2965 cookies, and also about newer or non-standard cookie-attributes
|
||||
such as \code{port}.}
|
||||
|
||||
\warning{Back up your cookies before saving if you have cookies whose
|
||||
loss / corruption would be inconvenient (there are some subtleties
|
||||
which may lead to slight changes in the file over a load / save
|
||||
round-trip).}
|
||||
|
||||
Also note that cookies saved while Mozilla is running will get
|
||||
clobbered by Mozilla.
|
||||
\end{classdesc}
|
||||
|
||||
\begin{classdesc}{LWPCookieJar}{filename, delayload=\constant{None},
|
||||
policy=\constant{None}}
|
||||
A \class{FileCookieJar} that can load from and save cookies to disk in
|
||||
format compatible with the libwww-perl library's \code{Set-Cookie3}
|
||||
file format. This is convenient if you want to store cookies in a
|
||||
human-readable file.
|
||||
\end{classdesc}
|
||||
|
||||
|
||||
\subsection{CookiePolicy Objects \label{cookie-policy-objects}}
|
||||
|
||||
Objects implementing the \class{CookiePolicy} interface have the
|
||||
following methods:
|
||||
|
||||
\begin{methoddesc}[CookiePolicy]{set_ok}{cookie, request}
|
||||
Return boolean value indicating whether cookie should be accepted from server.
|
||||
|
||||
\var{cookie} is a \class{cookielib.Cookie} instance. \var{request} is
|
||||
an object implementing the interface defined by the documentation for
|
||||
\method{CookieJar.extract_cookies()}.
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}[CookiePolicy]{return_ok}{cookie, request}
|
||||
Return boolean value indicating whether cookie should be returned to server.
|
||||
|
||||
\var{cookie} is a \class{cookielib.Cookie} instance. \var{request} is
|
||||
an object implementing the interface defined by the documentation for
|
||||
\method{CookieJar.add_cookie_header()}.
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}[CookiePolicy]{domain_return_ok}{domain, request}
|
||||
Return false if cookies should not be returned, given cookie domain.
|
||||
|
||||
This method is an optimization. It removes the need for checking
|
||||
every cookie with a particular domain (which might involve reading
|
||||
many files). The default implementations of
|
||||
\method{domain_return_ok()} and \method{path_return_ok()}
|
||||
(\samp{return True}) leave all the work to \method{return_ok()}.
|
||||
|
||||
If \method{domain_return_ok()} returns true for the cookie domain,
|
||||
\method{path_return_ok()} is called for the cookie path. Otherwise,
|
||||
\method{path_return_ok()} and \method{return_ok()} are never called
|
||||
for that cookie domain. If \method{path_return_ok()} returns true,
|
||||
\method{return_ok()} is called with the \class{Cookie} object itself
|
||||
for a full check. Otherwise, \method{return_ok()} is never called for
|
||||
that cookie path.
|
||||
|
||||
Note that \method{domain_return_ok()} is called for every
|
||||
\emph{cookie} domain, not just for the \emph{request} domain. For
|
||||
example, the function might be called with both \code{".example.com"}
|
||||
and \code{"www.example.com"} if the request domain is
|
||||
\code{"www.example.com"}. The same goes for
|
||||
\method{path_return_ok()}.
|
||||
|
||||
The \var{request} argument is as documented for \method{return_ok()}.
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}[CookiePolicy]{path_return_ok}{path, request}
|
||||
Return false if cookies should not be returned, given cookie path.
|
||||
|
||||
See the documentation for \method{domain_return_ok()}.
|
||||
\end{methoddesc}
|
||||
|
||||
|
||||
In addition to implementing the methods above, implementations of the
|
||||
\class{CookiePolicy} interface must also supply the following
|
||||
attributes, indicating which protocols should be used, and how. All
|
||||
of these attributes may be assigned to.
|
||||
|
||||
\begin{memberdesc}{netscape}
|
||||
Implement netscape protocol.
|
||||
\end{memberdesc}
|
||||
\begin{memberdesc}{rfc2965}
|
||||
Implement RFC 2965 protocol.
|
||||
\end{memberdesc}
|
||||
\begin{memberdesc}{hide_cookie2}
|
||||
Don't add Cookie2 header to requests (the presence of this header
|
||||
indicates to the server that we understand RFC 2965 cookies).
|
||||
\end{memberdesc}
|
||||
|
||||
The most useful way to define a \class{CookiePolicy} class is by
|
||||
subclassing from \class{DefaultCookiePolicy} and overriding some or
|
||||
all of the methods above. \class{CookiePolicy} itself may be used as
|
||||
a 'null policy' to allow setting and receiving any and all cookies.
|
||||
|
||||
|
||||
\subsection{DefaultCookiePolicy Objects \label{default-cookie-policy-objects}}
|
||||
|
||||
Implements the standard rules for accepting and returning cookies.
|
||||
|
||||
Both RFC 2965 and Netscape cookies are covered. RFC 2965 handling is
|
||||
switched off by default.
|
||||
|
||||
The easiest way to provide your own policy is to override this class
|
||||
and call its methods in your overriden implementations before adding
|
||||
your own additional checks:
|
||||
|
||||
\begin{verbatim}
|
||||
import cookielib
|
||||
class MyCookiePolicy(cookielib.DefaultCookiePolicy):
|
||||
def set_ok(self, cookie, request):
|
||||
if not cookielib.DefaultCookiePolicy.set_ok(self, cookie, request):
|
||||
return False
|
||||
if i_dont_want_to_store_this_cookie(cookie):
|
||||
return False
|
||||
return True
|
||||
\end{verbatim}
|
||||
|
||||
In addition to the features required to implement the
|
||||
\class{CookiePolicy} interface, this class allows you to block and
|
||||
allow domains from setting and receiving cookies. There are also some
|
||||
strictness switches that allow you to tighten up the rather loose
|
||||
Netscape protocol rules a little bit (at the cost of blocking some
|
||||
benign cookies).
|
||||
|
||||
A domain blacklist and whitelist is provided (both off by default).
|
||||
Only domains not in the blacklist and present in the whitelist (if the
|
||||
whitelist is active) participate in cookie setting and returning. Use
|
||||
the \var{blocked_domains} constructor argument, and
|
||||
\method{blocked_domains()} and \method{set_blocked_domains()} methods
|
||||
(and the corresponding argument and methods for
|
||||
\var{allowed_domains}). If you set a whitelist, you can turn it off
|
||||
again by setting it to \constant{None}.
|
||||
|
||||
Domains in block or allow lists that do not start with a dot must be
|
||||
equal. For example, \code{"example.com"} matches a blacklist entry of
|
||||
\code{"example.com"}, but \code{"www.example.com"} does not. Domains
|
||||
that do start with a dot are matched by more specific domains too.
|
||||
For example, both \code{"www.example.com"} and
|
||||
\code{"www.coyote.example.com"} match \code{".example.com"} (but
|
||||
\code{"example.com"} itself does not). IP addresses are an exception,
|
||||
and must match exactly. For example, if blocked_domains contains
|
||||
\code{"192.168.1.2"} and \code{".168.1.2"}, 192.168.1.2 is blocked,
|
||||
but 193.168.1.2 is not.
|
||||
|
||||
\class{DefaultCookiePolicy} implements the following additional
|
||||
methods:
|
||||
|
||||
\begin{methoddesc}[DefaultCookiePolicy]{blocked_domains}{}
|
||||
Return the sequence of blocked domains (as a tuple).
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}[DefaultCookiePolicy]{set_blocked_domains}
|
||||
{blocked_domains}
|
||||
Set the sequence of blocked domains.
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}[DefaultCookiePolicy]{is_blocked}{domain}
|
||||
Return whether \var{domain} is on the blacklist for setting or
|
||||
receiving cookies.
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}[DefaultCookiePolicy]{allowed_domains}{}
|
||||
Return \constant{None}, or the sequence of allowed domains (as a tuple).
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}[DefaultCookiePolicy]{set_allowed_domains}
|
||||
{allowed_domains}
|
||||
Set the sequence of allowed domains, or \constant{None}.
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}[DefaultCookiePolicy]{is_not_allowed}{domain}
|
||||
Return whether \var{domain} is not on the whitelist for setting or
|
||||
receiving cookies.
|
||||
\end{methoddesc}
|
||||
|
||||
\class{DefaultCookiePolicy} instances have the following attributes,
|
||||
which are all initialised from the constructor arguments of the same
|
||||
name, and which may all be assigned to.
|
||||
|
||||
General strictness switches:
|
||||
|
||||
\begin{memberdesc}{strict_domain}
|
||||
Don't allow sites to set two-component domains with country-code
|
||||
top-level domains like \code{.co.uk}, \code{.gov.uk},
|
||||
\code{.co.nz}.etc. This is far from perfect and isn't guaranteed to
|
||||
work!
|
||||
\end{memberdesc}
|
||||
|
||||
RFC 2965 protocol strictness switches:
|
||||
|
||||
\begin{memberdesc}{strict_rfc2965_unverifiable}
|
||||
Follow RFC 2965 rules on unverifiable transactions (usually, an
|
||||
unverifiable transaction is one resulting from a redirect or a request
|
||||
for an image hosted on another site). If this is false, cookies are
|
||||
\emph{never} blocked on the basis of verifiability
|
||||
\end{memberdesc}
|
||||
|
||||
Netscape protocol strictness switches:
|
||||
|
||||
\begin{memberdesc}{strict_ns_unverifiable}
|
||||
apply RFC 2965 rules on unverifiable transactions even to Netscape
|
||||
cookies
|
||||
\end{memberdesc}
|
||||
\begin{memberdesc}{strict_ns_domain}
|
||||
Flags indicating how strict to be with domain-matching rules for
|
||||
Netscape cookies. See below for acceptable values.
|
||||
\end{memberdesc}
|
||||
\begin{memberdesc}{strict_ns_set_initial_dollar}
|
||||
Ignore cookies in Set-Cookie: headers that have names starting with
|
||||
\code{'\$'}.
|
||||
\end{memberdesc}
|
||||
\begin{memberdesc}{strict_ns_set_path}
|
||||
Don't allow setting cookies whose path doesn't path-match request URI.
|
||||
\end{memberdesc}
|
||||
|
||||
\member{strict_ns_domain} is a collection of flags. Its value is
|
||||
constructed by or-ing together (for example,
|
||||
\code{DomainStrictNoDots|DomainStrictNonDomain} means both flags are
|
||||
set).
|
||||
|
||||
\begin{memberdesc}{DomainStrictNoDots}
|
||||
When setting cookies, the 'host prefix' must not contain a dot
|
||||
(eg. \code{www.foo.bar.com} can't set a cookie for \code{.bar.com},
|
||||
because \code{www.foo} contains a dot).
|
||||
\end{memberdesc}
|
||||
\begin{memberdesc}{DomainStrictNonDomain}
|
||||
Cookies that did not explicitly specify a \code{domain}
|
||||
cookie-attribute can only be returned to a domain that string-compares
|
||||
equal to the domain that set the cookie (eg. \code{spam.example.com}
|
||||
won't be returned cookies from \code{example.com} that had no
|
||||
\code{domain} cookie-attribute).
|
||||
\end{memberdesc}
|
||||
\begin{memberdesc}{DomainRFC2965Match}
|
||||
When setting cookies, require a full RFC 2965 domain-match.
|
||||
\end{memberdesc}
|
||||
|
||||
The following attributes are provided for convenience, and are the
|
||||
most useful combinations of the above flags:
|
||||
|
||||
\begin{memberdesc}{DomainLiberal}
|
||||
Equivalent to 0 (ie. all of the above Netscape domain strictness flags
|
||||
switched off).
|
||||
\end{memberdesc}
|
||||
\begin{memberdesc}{DomainStrict}
|
||||
Equivalent to \code{DomainStrictNoDots|DomainStrictNonDomain}.
|
||||
\end{memberdesc}
|
||||
|
||||
|
||||
\subsection{Cookie Objects \label{cookie-jar-objects}}
|
||||
|
||||
\class{Cookie} instances have Python attributes roughly corresponding
|
||||
to the standard cookie-attributes specified in the various cookie
|
||||
standards. The correspondence is not one-to-one, because there are
|
||||
complicated rules for assigning default values, and because the
|
||||
\code{max-age} and \code{expires} cookie-attributes contain equivalent
|
||||
information.
|
||||
|
||||
Assignment to these attributes should not be necessary other than in
|
||||
rare circumstances in a \class{CookiePolicy} method. The class does
|
||||
not enforce internal consistency, so you should know what you're
|
||||
doing if you do that.
|
||||
|
||||
\begin{memberdesc}[Cookie]{version}
|
||||
Integer or \constant{None}. Netscape cookies have version 0. RFC
|
||||
2965 and RFC 2109 cookies have version 1.
|
||||
\end{memberdesc}
|
||||
\begin{memberdesc}[Cookie]{name}
|
||||
Cookie name (a string), or \constant{None}.
|
||||
\end{memberdesc}
|
||||
\begin{memberdesc}[Cookie]{value}
|
||||
Cookie value (a string).
|
||||
\end{memberdesc}
|
||||
\begin{memberdesc}[Cookie]{port}
|
||||
String representing a port or a set of ports (eg. '80', or '80,8080'),
|
||||
or \constant{None}.
|
||||
\end{memberdesc}
|
||||
\begin{memberdesc}[Cookie]{path}
|
||||
Cookie path (a string, eg. '/acme/rocket_launchers').
|
||||
\end{memberdesc}
|
||||
\begin{memberdesc}[Cookie]{secure}
|
||||
True if cookie should only be returned over a secure connection.
|
||||
\end{memberdesc}
|
||||
\begin{memberdesc}[Cookie]{expires}
|
||||
Integer expiry date in seconds since epoch, or \constant{None}. See
|
||||
also the \method{is_expired()} method.
|
||||
\end{memberdesc}
|
||||
\begin{memberdesc}[Cookie]{discard}
|
||||
True if this is a session cookie.
|
||||
\end{memberdesc}
|
||||
\begin{memberdesc}[Cookie]{comment}
|
||||
String comment from the server explaining the function of this cookie,
|
||||
or \constant{None}.
|
||||
\end{memberdesc}
|
||||
\begin{memberdesc}[Cookie]{comment_url}
|
||||
URL linking to a comment from the server explaining the function of
|
||||
this cookie, or \constant{None}.
|
||||
\end{memberdesc}
|
||||
|
||||
\begin{memberdesc}[Cookie]{port_specified}
|
||||
True if a port or set of ports was explicitly specified by the server
|
||||
(in the \mailheader{Set-Cookie} / \mailheader{Set-Cookie2} header).
|
||||
\end{memberdesc}
|
||||
\begin{memberdesc}[Cookie]{domain_specified}
|
||||
True if a domain was explicitly specified by the server.
|
||||
\end{memberdesc}
|
||||
\begin{memberdesc}[Cookie]{domain_initial_dot}
|
||||
True if the domain explicitly specified by the server began with a
|
||||
dot ('.').
|
||||
\end{memberdesc}
|
||||
|
||||
Cookies may have additional non-standard cookie-attributes. These may
|
||||
be accessed using the following methods:
|
||||
|
||||
\begin{methoddesc}[Cookie]{has_nonstandard_attr}{name}
|
||||
Return true if cookie has the named cookie-attribute.
|
||||
\end{methoddesc}
|
||||
\begin{methoddesc}[Cookie]{get_nonstandard_attr}{name, default=\constant{None}}
|
||||
If cookie has the named cookie-attribute, return its value.
|
||||
Otherwise, return \var{default}.
|
||||
\end{methoddesc}
|
||||
\begin{methoddesc}[Cookie]{set_nonstandard_attr}{name, value}
|
||||
Set the value of the named cookie-attribute.
|
||||
\end{methoddesc}
|
||||
|
||||
The \class{Cookie} class also defines the following method:
|
||||
|
||||
\begin{methoddesc}[Cookie]{is_expired}{\optional{now=\constant{None}}}
|
||||
True if cookie has passed the time at which the server requested it
|
||||
should expire. If \var{now} is given (in seconds since the epoch),
|
||||
return whether the cookie has expired at the specified time.
|
||||
\end{methoddesc}
|
||||
|
||||
|
||||
\subsection{Examples \label{cookielib-examples}}
|
||||
|
||||
The first example shows the most common usage of \module{cookielib}:
|
||||
|
||||
\begin{verbatim}
|
||||
import cookielib, urllib2
|
||||
cj = cookielib.CookieJar()
|
||||
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
|
||||
r = opener.open("http://example.com/")
|
||||
\end{verbatim}
|
||||
|
||||
This example illustrates how to open a URL using your Netscape,
|
||||
Mozilla, or lynx cookies (assumes \UNIX{} convention for location of
|
||||
the cookies file):
|
||||
|
||||
\begin{verbatim}
|
||||
import os, cookielib, urllib2
|
||||
cj = cookielib.MozillaCookieJar()
|
||||
cj.load(os.path.join(os.environ["HOME"], "/.netscape/cookies.txt"))
|
||||
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
|
||||
r = opener.open("http://example.com/")
|
||||
\end{verbatim}
|
||||
|
||||
The next example illustrates the use of \class{DefaultCookiePolicy}.
|
||||
Turn on RFC 2965 cookies, be more strict about domains when setting
|
||||
and returning Netscape cookies, and block some domains from setting
|
||||
cookies or having them returned:
|
||||
|
||||
\begin{verbatim}
|
||||
import urllib2
|
||||
from cookielib import CookieJar, DefaultCookiePolicy as Policy
|
||||
policy = Policy(rfc2965=True, strict_ns_domain=Policy.DomainStrict,
|
||||
blocked_domains=["ads.net", ".ads.net"])
|
||||
cj = CookieJar(policy)
|
||||
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
|
||||
r = opener.open("http://example.com/")
|
||||
\end{verbatim}
|
|
@ -10,14 +10,13 @@
|
|||
|
||||
The \module{urllib2} module defines functions and classes which help
|
||||
in opening URLs (mostly HTTP) in a complex world --- basic and digest
|
||||
authentication, redirections and more.
|
||||
authentication, redirections, cookies and more.
|
||||
|
||||
The \module{urllib2} module defines the following functions:
|
||||
|
||||
\begin{funcdesc}{urlopen}{url\optional{, data}}
|
||||
Open the URL \var{url}, which can be either a string or a \class{Request}
|
||||
object (currently the code checks that it really is a \class{Request}
|
||||
instance, or an instance of a subclass of \class{Request}).
|
||||
object.
|
||||
|
||||
\var{data} should be a string, which specifies additional data to
|
||||
send to the server. In HTTP requests, which are the only ones that
|
||||
|
@ -87,13 +86,32 @@ Gopher handler.
|
|||
|
||||
The following classes are provided:
|
||||
|
||||
\begin{classdesc}{Request}{url\optional{, data\optional{, headers}}}
|
||||
\begin{classdesc}{Request}{url\optional{, data}\optional{, headers}
|
||||
\optional{, origin_req_host}\optional{, unverifiable}}
|
||||
This class is an abstraction of a URL request.
|
||||
|
||||
\var{url} should be a string which is a valid URL. For a description
|
||||
of \var{data} see the \method{add_data()} description.
|
||||
\var{headers} should be a dictionary, and will be treated as if
|
||||
\method{add_header()} was called with each key and value as arguments.
|
||||
|
||||
The final two arguments are only of interest for correct handling of
|
||||
third-party HTTP cookies:
|
||||
|
||||
\var{origin_req_host} should be the request-host of the origin
|
||||
transaction, as defined by \rfc{2965}. It defaults to
|
||||
\code{cookielib.request_host(self)}. This is the host name or IP
|
||||
address of the original request that was initiated by the user. For
|
||||
example, if the request is for an image in an HTML document, this
|
||||
should be the request-host of the request for the page containing the
|
||||
image.
|
||||
|
||||
\var{unverifiable} should indicate whether the request is
|
||||
unverifiable, as defined by RFC 2965. It defaults to False. An
|
||||
unverifiable request is one whose URL the user did not have the option
|
||||
to approve. For example, if the request is for an image in an HTML
|
||||
document, and the user had no option to approve the automatic fetching
|
||||
of the image, this should be true.
|
||||
\end{classdesc}
|
||||
|
||||
\begin{classdesc}{OpenerDirector}{}
|
||||
|
@ -116,6 +134,10 @@ responses are turned into \exception{HTTPError} exceptions.
|
|||
A class to handle redirections.
|
||||
\end{classdesc}
|
||||
|
||||
\begin{classdesc}{HTTPCookieProcessor}{\optional{cookiejar}}
|
||||
A class to handle HTTP Cookies.
|
||||
\end{classdesc}
|
||||
|
||||
\begin{classdesc}{ProxyHandler}{\optional{proxies}}
|
||||
Cause requests to go through a proxy.
|
||||
If \var{proxies} is given, it must be a dictionary mapping
|
||||
|
@ -217,10 +239,10 @@ The following methods describe all of \class{Request}'s public interface,
|
|||
and so all must be overridden in subclasses.
|
||||
|
||||
\begin{methoddesc}[Request]{add_data}{data}
|
||||
Set the \class{Request} data to \var{data}. This is ignored
|
||||
by all handlers except HTTP handlers --- and there it should be an
|
||||
\mimetype{application/x-www-form-encoded} buffer, and will change the
|
||||
request to be \code{POST} rather than \code{GET}.
|
||||
Set the \class{Request} data to \var{data}. This is ignored by all
|
||||
handlers except HTTP handlers --- and there it should be a byte
|
||||
string, and will change the request to be \code{POST} rather than
|
||||
\code{GET}.
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}[Request]{get_method}{}
|
||||
|
@ -282,6 +304,17 @@ and \var{type} will replace those of the instance, and the instance's
|
|||
selector will be the original URL given in the constructor.
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}[Request]{get_origin_req_host}{}
|
||||
Return the request-host of the origin transaction, as defined by
|
||||
\rfc{2965}. See the documentation for the \class{Request}
|
||||
constructor.
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}[Request]{is_unverifiable}{}
|
||||
Return whether the request is unverifiable, as defined by RFC 2965.
|
||||
See the documentation for the \class{Request} constructor.
|
||||
\end{methoddesc}
|
||||
|
||||
|
||||
\subsection{OpenerDirector Objects \label{opener-director-objects}}
|
||||
|
||||
|
@ -289,14 +322,18 @@ selector will be the original URL given in the constructor.
|
|||
|
||||
\begin{methoddesc}[OpenerDirector]{add_handler}{handler}
|
||||
\var{handler} should be an instance of \class{BaseHandler}. The
|
||||
following methods are searched, and added to the possible chains.
|
||||
following methods are searched, and added to the possible chains (note
|
||||
that HTTP errors are a special case).
|
||||
|
||||
\begin{itemize}
|
||||
\item \method{\var{protocol}_open()} ---
|
||||
signal that the handler knows how to open \var{protocol} URLs.
|
||||
\item \method{\var{protocol}_error_\var{type}()} ---
|
||||
signal that the handler knows how to handle \var{type} errors from
|
||||
\var{protocol}.
|
||||
\item \method{http_error_\var{type}()} ---
|
||||
signal that the handler knows how to handle HTTP errors with HTTP
|
||||
error code \var{type}.
|
||||
\item \method{\var{protocol}_error()} ---
|
||||
signal that the handler knows how to handle errors from
|
||||
(non-\code{http}) \var{protocol}.
|
||||
\item \method{\var{protocol}_request()} ---
|
||||
signal that the handler knows how to pre-process \var{protocol}
|
||||
requests.
|
||||
|
@ -306,26 +343,17 @@ following methods are searched, and added to the possible chains.
|
|||
\end{itemize}
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}[OpenerDirector]{close}{}
|
||||
Explicitly break cycles, and delete all the handlers.
|
||||
Because the \class{OpenerDirector} needs to know the registered handlers,
|
||||
and a handler needs to know who the \class{OpenerDirector} who called
|
||||
it is, there is a reference cycle. Even though recent versions of Python
|
||||
have cycle-collection, it is sometimes preferable to explicitly break
|
||||
the cycles.
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}[OpenerDirector]{open}{url\optional{, data}}
|
||||
Open the given \var{url} (which can be a request object or a string),
|
||||
optionally passing the given \var{data}.
|
||||
Arguments, return values and exceptions raised are the same as those
|
||||
of \function{urlopen()} (which simply calls the \method{open()} method
|
||||
on the default installed \class{OpenerDirector}).
|
||||
on the currently installed global \class{OpenerDirector}).
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}[OpenerDirector]{error}{proto\optional{,
|
||||
arg\optional{, \moreargs}}}
|
||||
Handle an error in a given protocol. This will call the registered
|
||||
Handle an error of the given protocol. This will call the registered
|
||||
error handlers for the given protocol with the given arguments (which
|
||||
are protocol specific). The HTTP protocol is a special case which
|
||||
uses the HTTP response code to determine the specific error handler;
|
||||
|
@ -335,6 +363,45 @@ Return values and exceptions raised are the same as those
|
|||
of \function{urlopen()}.
|
||||
\end{methoddesc}
|
||||
|
||||
OpenerDirector objects open URLs in three stages:
|
||||
|
||||
\begin{enumerate}
|
||||
\item Every handler with a method named like
|
||||
\method{\var{protocol}_request()} has that method called to
|
||||
pre-process the request.
|
||||
|
||||
The order in which these methods are called is determined by
|
||||
sorting the handler instances by the \member{.processor_order}
|
||||
attribute.
|
||||
|
||||
\item Handlers with a method named like
|
||||
\method{\var{protocol}_open()} are called to handle the request.
|
||||
This stage ends when a handler either returns a
|
||||
non-\constant{None} value (ie. a response), or raises an exception
|
||||
(usually URLError). Exceptions are allowed to propagate.
|
||||
|
||||
In fact, the above algorithm is first tried for methods named
|
||||
\method{default_open}. If all such methods return
|
||||
\constant{None}, the algorithm is repeated for methods named like
|
||||
\method{\var{protocol}_open()}. If all such methods return
|
||||
\constant{None}, the algorithm is repeated for methods named
|
||||
\method{unknown_open()}.
|
||||
|
||||
Note that the implementation of these methods may involve calls of
|
||||
the parent \class{OpenerDirector} instance's \method{.open()} and
|
||||
\method{.error()} methods.
|
||||
|
||||
The order in which these methods are called is determined by
|
||||
sorting the handler instances.
|
||||
|
||||
\item Every handler with a method named like
|
||||
\method{\var{protocol}_response()} has that method called to
|
||||
post-process the response.
|
||||
|
||||
The order in which these methods are called is determined by
|
||||
sorting the handler instances by the \member{.processor_order}
|
||||
attribute.
|
||||
\end{enumerate}
|
||||
|
||||
\subsection{BaseHandler Objects \label{base-handler-objects}}
|
||||
|
||||
|
@ -351,7 +418,11 @@ Remove any parents.
|
|||
\end{methoddesc}
|
||||
|
||||
The following members and methods should only be used by classes
|
||||
derived from \class{BaseHandler}:
|
||||
derived from \class{BaseHandler}. \note{The convention has been
|
||||
adopted that subclasses defining \method{\var{protocol}_request()} or
|
||||
\method{\var{protocol}_response()} methods are named
|
||||
\class{*Processor}; all others are named \class{*Handler}.}
|
||||
|
||||
|
||||
\begin{memberdesc}[BaseHandler]{parent}
|
||||
A valid \class{OpenerDirector}, which can be used to open using a
|
||||
|
@ -423,6 +494,29 @@ Arguments, return values and exceptions raised should be the same as
|
|||
for \method{http_error_default()}.
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddescni}[BaseHandler]{\var{protocol}_request}{req}
|
||||
This method is \emph{not} defined in \class{BaseHandler}, but
|
||||
subclasses should define it if they want to pre-process requests of
|
||||
the given protocol.
|
||||
|
||||
This method, if defined, will be called by the parent
|
||||
\class{OpenerDirector}. \var{req} will be a \class{Request} object.
|
||||
The return value should be a \class{Request} object.
|
||||
\end{methoddescni}
|
||||
|
||||
\begin{methoddescni}[BaseHandler]{\var{protocol}_response}{req, response}
|
||||
This method is \emph{not} defined in \class{BaseHandler}, but
|
||||
subclasses should define it if they want to post-process responses of
|
||||
the given protocol.
|
||||
|
||||
This method, if defined, will be called by the parent
|
||||
\class{OpenerDirector}. \var{req} will be a \class{Request} object.
|
||||
\var{response} will be an object implementing the same interface as
|
||||
the return value of \function{urlopen()}. The return value should
|
||||
implement the same interface as the return value of
|
||||
\function{urlopen()}.
|
||||
\end{methoddescni}
|
||||
|
||||
\subsection{HTTPRedirectHandler Objects \label{http-redirect-handler}}
|
||||
|
||||
\note{Some HTTP redirections require action from this module's client
|
||||
|
@ -434,12 +528,12 @@ for \method{http_error_default()}.
|
|||
fp, code, msg, hdrs}
|
||||
Return a \class{Request} or \code{None} in response to a redirect.
|
||||
This is called by the default implementations of the
|
||||
\method{http_error_30*()} methods when a redirection is received
|
||||
from the server. If a redirection should take place, return a new
|
||||
\method{http_error_30*()} methods when a redirection is received from
|
||||
the server. If a redirection should take place, return a new
|
||||
\class{Request} to allow \method{http_error_30*()} to perform the
|
||||
redirect. Otherwise, raise \exception{HTTPError} if no other
|
||||
\class{Handler} should try to handle this URL, or return \code{None}
|
||||
if you can't but another \class{Handler} might.
|
||||
redirect. Otherwise, raise \exception{HTTPError} if no other handler
|
||||
should try to handle this URL, or return \code{None} if you can't but
|
||||
another handler might.
|
||||
|
||||
\begin{notice}
|
||||
The default implementation of this method does not strictly
|
||||
|
@ -478,6 +572,15 @@ The same as \method{http_error_301()}, but called for the
|
|||
\end{methoddesc}
|
||||
|
||||
|
||||
\subsection{HTTPCookieProcessor Objects \label{http-cookie-processor}}
|
||||
|
||||
\class{HTTPCookieProcessor} instances have one attribute:
|
||||
|
||||
\begin{memberdesc}{cookiejar}
|
||||
The \class{cookielib.CookieJar} in which cookies are stored.
|
||||
\end{memberdesc}
|
||||
|
||||
|
||||
\subsection{ProxyHandler Objects \label{proxy-handler}}
|
||||
|
||||
\begin{methoddescni}[ProxyHandler]{\var{protocol}_open}{request}
|
||||
|
|
|
@ -569,6 +569,25 @@ For example:
|
|||
%======================================================================
|
||||
% whole new modules get described in \subsections here
|
||||
|
||||
\subsection{cookielib}
|
||||
|
||||
The \module{cookielib} library supports client-side handling for HTTP
|
||||
cookies, just as the \module{Cookie} provides server-side cookie
|
||||
support in CGI scripts. This library manages cookies in a way similar
|
||||
to web browsers. Cookies are stored in cookie jars; the library
|
||||
transparently stores cookies offered by the web server in the cookie
|
||||
jar, and fetches the cookie from the jar when connecting to the
|
||||
server. Similar to web browsers, policy objects control whether
|
||||
cookies are accepted or not.
|
||||
|
||||
In order to store cookies across sessions, two implementations of
|
||||
cookie jars are provided: one that stores cookies in the Netscape
|
||||
format, so applications can use the Mozilla or Lynx cookie jars, and
|
||||
one that stores cookies in the same format as the Perl libwww libary.
|
||||
|
||||
\module{urllib2} has been changed to interact with \module{cookielib}:
|
||||
\class{HTTPCookieProcessor} manages a cookie jar that is used when
|
||||
accessing URLs.
|
||||
|
||||
% ======================================================================
|
||||
\section{Build and C API Changes}
|
||||
|
|
|
@ -0,0 +1,171 @@
|
|||
"""Load / save to libwww-perl (LWP) format files.
|
||||
|
||||
Actually, the format is slightly extended from that used by LWP's
|
||||
(libwww-perl's) HTTP::Cookies, to avoid losing some RFC 2965 information
|
||||
not recorded by LWP.
|
||||
|
||||
It uses the version string "2.0", though really there isn't an LWP Cookies
|
||||
2.0 format. This indicates that there is extra information in here
|
||||
(domain_dot and # port_spec) while still being compatible with
|
||||
libwww-perl, I hope.
|
||||
|
||||
"""
|
||||
|
||||
import time, re, logging
|
||||
from cookielib import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
|
||||
MISSING_FILENAME_TEXT, join_header_words, split_header_words, \
|
||||
iso2time, time2isoz
|
||||
|
||||
def lwp_cookie_str(cookie):
|
||||
"""Return string representation of Cookie in an the LWP cookie file format.
|
||||
|
||||
Actually, the format is extended a bit -- see module docstring.
|
||||
|
||||
"""
|
||||
h = [(cookie.name, cookie.value),
|
||||
("path", cookie.path),
|
||||
("domain", cookie.domain)]
|
||||
if cookie.port is not None: h.append(("port", cookie.port))
|
||||
if cookie.path_specified: h.append(("path_spec", None))
|
||||
if cookie.port_specified: h.append(("port_spec", None))
|
||||
if cookie.domain_initial_dot: h.append(("domain_dot", None))
|
||||
if cookie.secure: h.append(("secure", None))
|
||||
if cookie.expires: h.append(("expires",
|
||||
time2isoz(float(cookie.expires))))
|
||||
if cookie.discard: h.append(("discard", None))
|
||||
if cookie.comment: h.append(("comment", cookie.comment))
|
||||
if cookie.comment_url: h.append(("commenturl", cookie.comment_url))
|
||||
|
||||
keys = cookie._rest.keys()
|
||||
keys.sort()
|
||||
for k in keys:
|
||||
h.append((k, str(cookie._rest[k])))
|
||||
|
||||
h.append(("version", str(cookie.version)))
|
||||
|
||||
return join_header_words([h])
|
||||
|
||||
class LWPCookieJar(FileCookieJar):
|
||||
"""
|
||||
The LWPCookieJar saves a sequence of"Set-Cookie3" lines.
|
||||
"Set-Cookie3" is the format used by the libwww-perl libary, not known
|
||||
to be compatible with any browser, but which is easy to read and
|
||||
doesn't lose information about RFC 2965 cookies.
|
||||
|
||||
Additional methods
|
||||
|
||||
as_lwp_str(ignore_discard=True, ignore_expired=True)
|
||||
|
||||
"""
|
||||
|
||||
def as_lwp_str(self, ignore_discard=True, ignore_expires=True):
|
||||
"""Return cookies as a string of "\n"-separated "Set-Cookie3" headers.
|
||||
|
||||
ignore_discard and ignore_expires: see docstring for FileCookieJar.save
|
||||
|
||||
"""
|
||||
now = time.time()
|
||||
r = []
|
||||
for cookie in self:
|
||||
if not ignore_discard and cookie.discard:
|
||||
continue
|
||||
if not ignore_expires and cookie.is_expired(now):
|
||||
continue
|
||||
r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie))
|
||||
return "\n".join(r+[""])
|
||||
|
||||
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
|
||||
if filename is None:
|
||||
if self.filename is not None: filename = self.filename
|
||||
else: raise ValueError(MISSING_FILENAME_TEXT)
|
||||
|
||||
f = open(filename, "w")
|
||||
try:
|
||||
# There really isn't an LWP Cookies 2.0 format, but this indicates
|
||||
# that there is extra information in here (domain_dot and
|
||||
# port_spec) while still being compatible with libwww-perl, I hope.
|
||||
f.write("#LWP-Cookies-2.0\n")
|
||||
f.write(self.as_lwp_str(ignore_discard, ignore_expires))
|
||||
finally:
|
||||
f.close()
|
||||
|
||||
def _really_load(self, f, filename, ignore_discard, ignore_expires):
|
||||
magic = f.readline()
|
||||
if not re.search(self.magic_re, magic):
|
||||
msg = "%s does not seem to contain cookies" % filename
|
||||
raise IOError(msg)
|
||||
|
||||
now = time.time()
|
||||
|
||||
header = "Set-Cookie3:"
|
||||
boolean_attrs = ("port_spec", "path_spec", "domain_dot",
|
||||
"secure", "discard")
|
||||
value_attrs = ("version",
|
||||
"port", "path", "domain",
|
||||
"expires",
|
||||
"comment", "commenturl")
|
||||
|
||||
try:
|
||||
while 1:
|
||||
line = f.readline()
|
||||
if line == "": break
|
||||
if not line.startswith(header):
|
||||
continue
|
||||
line = line[len(header):].strip()
|
||||
|
||||
for data in split_header_words([line]):
|
||||
name, value = data[0]
|
||||
# name and value are an exception here, since a plain "foo"
|
||||
# (with no "=", unlike "bar=foo") means a cookie with no
|
||||
# name and value "foo". With all other cookie-attributes,
|
||||
# the situation is reversed: "foo" means an attribute named
|
||||
# "foo" with no value!
|
||||
if value is None:
|
||||
name, value = value, name
|
||||
standard = {}
|
||||
rest = {}
|
||||
for k in boolean_attrs:
|
||||
standard[k] = False
|
||||
for k, v in data[1:]:
|
||||
if k is not None:
|
||||
lc = k.lower()
|
||||
else:
|
||||
lc = None
|
||||
# don't lose case distinction for unknown fields
|
||||
if (lc in value_attrs) or (lc in boolean_attrs):
|
||||
k = lc
|
||||
if k in boolean_attrs:
|
||||
if v is None: v = True
|
||||
standard[k] = v
|
||||
elif k in value_attrs:
|
||||
standard[k] = v
|
||||
else:
|
||||
rest[k] = v
|
||||
|
||||
h = standard.get
|
||||
expires = h("expires")
|
||||
discard = h("discard")
|
||||
if expires is not None:
|
||||
expires = iso2time(expires)
|
||||
if expires is None:
|
||||
discard = True
|
||||
domain = h("domain")
|
||||
domain_specified = domain.startswith(".")
|
||||
c = Cookie(h("version"), name, value,
|
||||
h("port"), h("port_spec"),
|
||||
domain, domain_specified, h("domain_dot"),
|
||||
h("path"), h("path_spec"),
|
||||
h("secure"),
|
||||
expires,
|
||||
discard,
|
||||
h("comment"),
|
||||
h("commenturl"),
|
||||
rest)
|
||||
if not ignore_discard and c.discard:
|
||||
continue
|
||||
if not ignore_expires and c.is_expired(now):
|
||||
continue
|
||||
self.set_cookie(c)
|
||||
except:
|
||||
reraise_unmasked_exceptions((IOError,))
|
||||
raise IOError("invalid Set-Cookie3 format file %s" % filename)
|
|
@ -0,0 +1,145 @@
|
|||
"""Mozilla / Netscape cookie loading / saving."""
|
||||
|
||||
import re, time, logging
|
||||
|
||||
from cookielib import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
|
||||
MISSING_FILENAME_TEXT
|
||||
|
||||
class MozillaCookieJar(FileCookieJar):
|
||||
"""
|
||||
|
||||
WARNING: you may want to backup your browser's cookies file if you use
|
||||
this class to save cookies. I *think* it works, but there have been
|
||||
bugs in the past!
|
||||
|
||||
This class differs from CookieJar only in the format it uses to save and
|
||||
load cookies to and from a file. This class uses the Mozilla/Netscape
|
||||
`cookies.txt' format. lynx uses this file format, too.
|
||||
|
||||
Don't expect cookies saved while the browser is running to be noticed by
|
||||
the browser (in fact, Mozilla on unix will overwrite your saved cookies if
|
||||
you change them on disk while it's running; on Windows, you probably can't
|
||||
save at all while the browser is running).
|
||||
|
||||
Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to
|
||||
Netscape cookies on saving.
|
||||
|
||||
In particular, the cookie version and port number information is lost,
|
||||
together with information about whether or not Path, Port and Discard were
|
||||
specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
|
||||
domain as set in the HTTP header started with a dot (yes, I'm aware some
|
||||
domains in Netscape files start with a dot and some don't -- trust me, you
|
||||
really don't want to know any more about this).
|
||||
|
||||
Note that though Mozilla and Netscape use the same format, they use
|
||||
slightly different headers. The class saves cookies using the Netscape
|
||||
header by default (Mozilla can cope with that).
|
||||
|
||||
"""
|
||||
magic_re = "#( Netscape)? HTTP Cookie File"
|
||||
header = """\
|
||||
# Netscape HTTP Cookie File
|
||||
# http://www.netscape.com/newsref/std/cookie_spec.html
|
||||
# This is a generated file! Do not edit.
|
||||
|
||||
"""
|
||||
|
||||
def _really_load(self, f, filename, ignore_discard, ignore_expires):
|
||||
now = time.time()
|
||||
|
||||
magic = f.readline()
|
||||
if not re.search(self.magic_re, magic):
|
||||
f.close()
|
||||
raise IOError(
|
||||
"%s does not look like a Netscape format cookies file" %
|
||||
filename)
|
||||
|
||||
try:
|
||||
while 1:
|
||||
line = f.readline()
|
||||
if line == "": break
|
||||
|
||||
# last field may be absent, so keep any trailing tab
|
||||
if line.endswith("\n"): line = line[:-1]
|
||||
|
||||
# skip comments and blank lines XXX what is $ for?
|
||||
if (line.strip().startswith("#") or
|
||||
line.strip().startswith("$") or
|
||||
line.strip() == ""):
|
||||
continue
|
||||
|
||||
domain, domain_specified, path, secure, expires, name, value = \
|
||||
line.split("\t")
|
||||
secure = (secure == "TRUE")
|
||||
domain_specified = (domain_specified == "TRUE")
|
||||
if name == "":
|
||||
name = value
|
||||
value = None
|
||||
|
||||
initial_dot = domain.startswith(".")
|
||||
assert domain_specified == initial_dot
|
||||
|
||||
discard = False
|
||||
if expires == "":
|
||||
expires = None
|
||||
discard = True
|
||||
|
||||
# assume path_specified is false
|
||||
c = Cookie(0, name, value,
|
||||
None, False,
|
||||
domain, domain_specified, initial_dot,
|
||||
path, False,
|
||||
secure,
|
||||
expires,
|
||||
discard,
|
||||
None,
|
||||
None,
|
||||
{})
|
||||
if not ignore_discard and c.discard:
|
||||
continue
|
||||
if not ignore_expires and c.is_expired(now):
|
||||
continue
|
||||
self.set_cookie(c)
|
||||
|
||||
except:
|
||||
reraise_unmasked_exceptions((IOError,))
|
||||
raise IOError("invalid Netscape format file %s: %s" %
|
||||
(filename, line))
|
||||
|
||||
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
|
||||
if filename is None:
|
||||
if self.filename is not None: filename = self.filename
|
||||
else: raise ValueError(MISSING_FILENAME_TEXT)
|
||||
|
||||
f = open(filename, "w")
|
||||
try:
|
||||
f.write(self.header)
|
||||
now = time.time()
|
||||
for cookie in self:
|
||||
if not ignore_discard and cookie.discard:
|
||||
continue
|
||||
if not ignore_expires and cookie.is_expired(now):
|
||||
continue
|
||||
if cookie.secure: secure = "TRUE"
|
||||
else: secure = "FALSE"
|
||||
if cookie.domain.startswith("."): initial_dot = "TRUE"
|
||||
else: initial_dot = "FALSE"
|
||||
if cookie.expires is not None:
|
||||
expires = str(cookie.expires)
|
||||
else:
|
||||
expires = ""
|
||||
if cookie.value is None:
|
||||
# cookies.txt regards 'Set-Cookie: foo' as a cookie
|
||||
# with no name, whereas cookielib regards it as a
|
||||
# cookie with no value.
|
||||
name = ""
|
||||
value = cookie.name
|
||||
else:
|
||||
name = cookie.name
|
||||
value = cookie.value
|
||||
f.write(
|
||||
"\t".join([cookie.domain, initial_dot, cookie.path,
|
||||
secure, expires, name, value])+
|
||||
"\n")
|
||||
finally:
|
||||
f.close()
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -54,6 +54,10 @@ class MockFile:
|
|||
def readline(self, count=None): pass
|
||||
def close(self): pass
|
||||
|
||||
class MockHeaders(dict):
|
||||
def getheaders(self, name):
|
||||
return self.values()
|
||||
|
||||
class MockResponse(StringIO.StringIO):
|
||||
def __init__(self, code, msg, headers, data, url=None):
|
||||
StringIO.StringIO.__init__(self, data)
|
||||
|
@ -63,6 +67,12 @@ class MockResponse(StringIO.StringIO):
|
|||
def geturl(self):
|
||||
return self.url
|
||||
|
||||
class MockCookieJar:
|
||||
def add_cookie_header(self, request):
|
||||
self.ach_req = request
|
||||
def extract_cookies(self, response, request):
|
||||
self.ec_req, self.ec_r = request, response
|
||||
|
||||
class FakeMethod:
|
||||
def __init__(self, meth_name, action, handle):
|
||||
self.meth_name = meth_name
|
||||
|
@ -474,7 +484,7 @@ class HandlerTests(unittest.TestCase):
|
|||
for data in "", None: # POST, GET
|
||||
req = Request("http://example.com/", data)
|
||||
r = MockResponse(200, "OK", {}, "")
|
||||
newreq = h.do_request(req)
|
||||
newreq = h.do_request_(req)
|
||||
if data is None: # GET
|
||||
self.assert_("Content-length" not in req.unredirected_hdrs)
|
||||
self.assert_("Content-type" not in req.unredirected_hdrs)
|
||||
|
@ -491,7 +501,7 @@ class HandlerTests(unittest.TestCase):
|
|||
req.add_unredirected_header("Content-type", "bar")
|
||||
req.add_unredirected_header("Host", "baz")
|
||||
req.add_unredirected_header("Spam", "foo")
|
||||
newreq = h.do_request(req)
|
||||
newreq = h.do_request_(req)
|
||||
self.assertEqual(req.unredirected_hdrs["Content-length"], "foo")
|
||||
self.assertEqual(req.unredirected_hdrs["Content-type"], "bar")
|
||||
self.assertEqual(req.unredirected_hdrs["Host"], "baz")
|
||||
|
@ -514,6 +524,21 @@ class HandlerTests(unittest.TestCase):
|
|||
self.assertEqual(o.proto, "http") # o.error called
|
||||
self.assertEqual(o.args, (req, r, 201, "Created", {}))
|
||||
|
||||
def test_cookies(self):
|
||||
cj = MockCookieJar()
|
||||
h = urllib2.HTTPCookieProcessor(cj)
|
||||
o = h.parent = MockOpener()
|
||||
|
||||
req = Request("http://example.com/")
|
||||
r = MockResponse(200, "OK", {}, "")
|
||||
newreq = h.http_request(req)
|
||||
self.assert_(cj.ach_req is req is newreq)
|
||||
self.assertEquals(req.get_origin_req_host(), "example.com")
|
||||
self.assert_(not req.is_unverifiable())
|
||||
newr = h.http_response(req, r)
|
||||
self.assert_(cj.ec_req is req)
|
||||
self.assert_(cj.ec_r is r is newr)
|
||||
|
||||
def test_redirect(self):
|
||||
from_url = "http://example.com/a.html"
|
||||
to_url = "http://example.com/b.html"
|
||||
|
@ -528,7 +553,8 @@ class HandlerTests(unittest.TestCase):
|
|||
req.add_header("Nonsense", "viking=withhold")
|
||||
req.add_unredirected_header("Spam", "spam")
|
||||
try:
|
||||
method(req, MockFile(), code, "Blah", {"location": to_url})
|
||||
method(req, MockFile(), code, "Blah",
|
||||
MockHeaders({"location": to_url}))
|
||||
except urllib2.HTTPError:
|
||||
# 307 in response to POST requires user OK
|
||||
self.assert_(code == 307 and data is not None)
|
||||
|
@ -544,38 +570,65 @@ class HandlerTests(unittest.TestCase):
|
|||
|
||||
# loop detection
|
||||
req = Request(from_url)
|
||||
req.origin_req_host = "example.com"
|
||||
def redirect(h, req, code, url=to_url):
|
||||
method = getattr(h, "http_error_%s" % code)
|
||||
method(req, MockFile(), code, "Blah", {"location": url})
|
||||
def redirect(h, req, url=to_url):
|
||||
h.http_error_302(req, MockFile(), 302, "Blah",
|
||||
MockHeaders({"location": url}))
|
||||
# Note that the *original* request shares the same record of
|
||||
# redirections with the sub-requests caused by the redirections.
|
||||
# once
|
||||
redirect(h, req, 302)
|
||||
# twice: loop detected
|
||||
self.assertRaises(urllib2.HTTPError, redirect, h, req, 302)
|
||||
# and again
|
||||
self.assertRaises(urllib2.HTTPError, redirect, h, req, 302)
|
||||
# but this is a different redirect code, so OK...
|
||||
redirect(h, req, 301)
|
||||
self.assertRaises(urllib2.HTTPError, redirect, h, req, 301)
|
||||
# order doesn't matter
|
||||
redirect(h, req, 303)
|
||||
redirect(h, req, 307)
|
||||
self.assertRaises(urllib2.HTTPError, redirect, h, req, 303)
|
||||
|
||||
# detect endless non-repeating chain of redirects
|
||||
req = Request(from_url)
|
||||
req.origin_req_host = "example.com"
|
||||
# detect infinite loop redirect of a URL to itself
|
||||
req = Request(from_url, origin_req_host="example.com")
|
||||
count = 0
|
||||
try:
|
||||
while 1:
|
||||
redirect(h, req, 302, "http://example.com/%d" % count)
|
||||
redirect(h, req, "http://example.com/")
|
||||
count = count + 1
|
||||
except urllib2.HTTPError:
|
||||
# don't stop until max_repeats, because cookies may introduce state
|
||||
self.assertEqual(count, urllib2.HTTPRedirectHandler.max_repeats)
|
||||
|
||||
# detect endless non-repeating chain of redirects
|
||||
req = Request(from_url, origin_req_host="example.com")
|
||||
count = 0
|
||||
try:
|
||||
while 1:
|
||||
redirect(h, req, "http://example.com/%d" % count)
|
||||
count = count + 1
|
||||
except urllib2.HTTPError:
|
||||
self.assertEqual(count,
|
||||
urllib2.HTTPRedirectHandler.max_redirections)
|
||||
|
||||
def test_cookie_redirect(self):
|
||||
class MockHTTPHandler(urllib2.HTTPHandler):
|
||||
def __init__(self): self._count = 0
|
||||
def http_open(self, req):
|
||||
import mimetools
|
||||
from StringIO import StringIO
|
||||
if self._count == 0:
|
||||
self._count = self._count + 1
|
||||
msg = mimetools.Message(
|
||||
StringIO("Location: http://www.cracker.com/\r\n\r\n"))
|
||||
return self.parent.error(
|
||||
"http", req, MockFile(), 302, "Found", msg)
|
||||
else:
|
||||
self.req = req
|
||||
msg = mimetools.Message(StringIO("\r\n\r\n"))
|
||||
return MockResponse(200, "OK", msg, "", req.get_full_url())
|
||||
# cookies shouldn't leak into redirected requests
|
||||
from cookielib import CookieJar
|
||||
from urllib2 import build_opener, HTTPHandler, HTTPError, \
|
||||
HTTPCookieProcessor
|
||||
|
||||
from test_cookielib import interact_netscape
|
||||
|
||||
cj = CookieJar()
|
||||
interact_netscape(cj, "http://www.example.com/", "spam=eggs")
|
||||
hh = MockHTTPHandler()
|
||||
cp = HTTPCookieProcessor(cj)
|
||||
o = build_opener(hh, cp)
|
||||
o.open("http://www.example.com/")
|
||||
self.assert_(not hh.req.has_header("Cookie"))
|
||||
|
||||
|
||||
class MiscTests(unittest.TestCase):
|
||||
|
||||
|
|
|
@ -106,6 +106,7 @@ import sys
|
|||
import time
|
||||
import urlparse
|
||||
import bisect
|
||||
import cookielib
|
||||
|
||||
try:
|
||||
from cStringIO import StringIO
|
||||
|
@ -176,7 +177,8 @@ class GopherError(URLError):
|
|||
|
||||
class Request:
|
||||
|
||||
def __init__(self, url, data=None, headers={}):
|
||||
def __init__(self, url, data=None, headers={},
|
||||
origin_req_host=None, unverifiable=False):
|
||||
# unwrap('<URL:type://host/path>') --> 'type://host/path'
|
||||
self.__original = unwrap(url)
|
||||
self.type = None
|
||||
|
@ -188,6 +190,10 @@ class Request:
|
|||
for key, value in headers.items():
|
||||
self.add_header(key, value)
|
||||
self.unredirected_hdrs = {}
|
||||
if origin_req_host is None:
|
||||
origin_req_host = cookielib.request_host(self)
|
||||
self.origin_req_host = origin_req_host
|
||||
self.unverifiable = unverifiable
|
||||
|
||||
def __getattr__(self, attr):
|
||||
# XXX this is a fallback mechanism to guard against these
|
||||
|
@ -242,6 +248,12 @@ class Request:
|
|||
self.host, self.type = host, type
|
||||
self.__r_host = self.__original
|
||||
|
||||
def get_origin_req_host(self):
|
||||
return self.origin_req_host
|
||||
|
||||
def is_unverifiable(self):
|
||||
return self.unverifiable
|
||||
|
||||
def add_header(self, key, val):
|
||||
# useful for something like authentication
|
||||
self.headers[key.capitalize()] = val
|
||||
|
@ -254,6 +266,15 @@ class Request:
|
|||
return bool(header_name in self.headers or
|
||||
header_name in self.unredirected_hdrs)
|
||||
|
||||
def get_header(self, header_name, default=None):
|
||||
return self.headers.get(
|
||||
header_name,
|
||||
self.unredirected_hdrs.get(header_name, default))
|
||||
|
||||
def header_items(self):
|
||||
hdrs = self.unredirected_hdrs.copy()
|
||||
hdrs.update(self.headers)
|
||||
return hdrs.items()
|
||||
|
||||
class OpenerDirector:
|
||||
def __init__(self):
|
||||
|
@ -460,7 +481,11 @@ class HTTPDefaultErrorHandler(BaseHandler):
|
|||
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
|
||||
|
||||
class HTTPRedirectHandler(BaseHandler):
|
||||
# maximum number of redirections before assuming we're in a loop
|
||||
# maximum number of redirections to any single URL
|
||||
# this is needed because of the state that cookies introduce
|
||||
max_repeats = 4
|
||||
# maximum total number of redirections (regardless of URL) before
|
||||
# assuming we're in a loop
|
||||
max_redirections = 10
|
||||
|
||||
def redirect_request(self, req, fp, code, msg, headers, newurl):
|
||||
|
@ -481,7 +506,10 @@ class HTTPRedirectHandler(BaseHandler):
|
|||
# from the user (of urllib2, in this case). In practice,
|
||||
# essentially all clients do redirect in this case, so we
|
||||
# do the same.
|
||||
return Request(newurl, headers=req.headers)
|
||||
return Request(newurl,
|
||||
headers=req.headers,
|
||||
origin_req_host=req.get_origin_req_host(),
|
||||
unverifiable=True)
|
||||
else:
|
||||
raise HTTPError(req.get_full_url(), code, msg, headers, fp)
|
||||
|
||||
|
@ -490,10 +518,12 @@ class HTTPRedirectHandler(BaseHandler):
|
|||
# have already seen. Do this by adding a handler-specific
|
||||
# attribute to the Request object.
|
||||
def http_error_302(self, req, fp, code, msg, headers):
|
||||
# Some servers (incorrectly) return multiple Location headers
|
||||
# (so probably same goes for URI). Use first header.
|
||||
if 'location' in headers:
|
||||
newurl = headers['location']
|
||||
newurl = headers.getheaders('location')[0]
|
||||
elif 'uri' in headers:
|
||||
newurl = headers['uri']
|
||||
newurl = headers.getheaders('uri')[0]
|
||||
else:
|
||||
return
|
||||
newurl = urlparse.urljoin(req.get_full_url(), newurl)
|
||||
|
@ -506,20 +536,16 @@ class HTTPRedirectHandler(BaseHandler):
|
|||
return
|
||||
|
||||
# loop detection
|
||||
# .redirect_dict has a key (url, code) if url was previously
|
||||
# visited as a result of a redirection with that code. The
|
||||
# code is needed in addition to the URL because visiting a URL
|
||||
# twice isn't necessarily a loop: there is more than one way
|
||||
# to redirect (301, 302, 303, 307, refresh).
|
||||
key = (newurl, code)
|
||||
# .redirect_dict has a key url if url was previously visited.
|
||||
if hasattr(req, 'redirect_dict'):
|
||||
visited = new.redirect_dict = req.redirect_dict
|
||||
if key in visited or len(visited) >= self.max_redirections:
|
||||
if (visited.get(newurl, 0) >= self.max_repeats or
|
||||
len(visited) >= self.max_redirections):
|
||||
raise HTTPError(req.get_full_url(), code,
|
||||
self.inf_msg + msg, headers, fp)
|
||||
else:
|
||||
visited = new.redirect_dict = req.redirect_dict = {}
|
||||
visited[key] = None
|
||||
visited[newurl] = visited.get(newurl, 0) + 1
|
||||
|
||||
# Don't close the fp until we are sure that we won't use it
|
||||
# with HTTPError.
|
||||
|
@ -912,7 +938,7 @@ class AbstractHTTPHandler(BaseHandler):
|
|||
def set_http_debuglevel(self, level):
|
||||
self._debuglevel = level
|
||||
|
||||
def do_request(self, request):
|
||||
def do_request_(self, request):
|
||||
host = request.get_host()
|
||||
if not host:
|
||||
raise URLError('no host given')
|
||||
|
@ -987,7 +1013,7 @@ class HTTPHandler(AbstractHTTPHandler):
|
|||
def http_open(self, req):
|
||||
return self.do_open(httplib.HTTPConnection, req)
|
||||
|
||||
http_request = AbstractHTTPHandler.do_request
|
||||
http_request = AbstractHTTPHandler.do_request_
|
||||
|
||||
if hasattr(httplib, 'HTTPS'):
|
||||
class HTTPSHandler(AbstractHTTPHandler):
|
||||
|
@ -995,7 +1021,24 @@ if hasattr(httplib, 'HTTPS'):
|
|||
def https_open(self, req):
|
||||
return self.do_open(httplib.HTTPSConnection, req)
|
||||
|
||||
https_request = AbstractHTTPHandler.do_request
|
||||
https_request = AbstractHTTPHandler.do_request_
|
||||
|
||||
class HTTPCookieProcessor(BaseHandler):
|
||||
def __init__(self, cookiejar=None):
|
||||
if cookiejar is None:
|
||||
cookiejar = CookieJar()
|
||||
self.cookiejar = cookiejar
|
||||
|
||||
def http_request(self, request):
|
||||
self.cookiejar.add_cookie_header(request)
|
||||
return request
|
||||
|
||||
def http_response(self, request, response):
|
||||
self.cookiejar.extract_cookies(response, request)
|
||||
return response
|
||||
|
||||
https_request = http_request
|
||||
https_response = http_response
|
||||
|
||||
class UnknownHandler(BaseHandler):
|
||||
def unknown_open(self, req):
|
||||
|
|
|
@ -311,6 +311,10 @@ Extension modules
|
|||
Library
|
||||
-------
|
||||
|
||||
- Added a new module: cookielib. Automatic cookie handling for HTTP
|
||||
clients. Also, support for cookielib has been added to urllib2, so
|
||||
urllib2.urlopen() can transparently handle cookies.
|
||||
|
||||
- stringprep.py now uses built-in set() instead of sets.Set().
|
||||
|
||||
- Bug #876278: Unbounded recursion in modulefinder
|
||||
|
|
Loading…
Reference in New Issue