Fullscreen support, UI fixes, reset improved
[smpy-maemo.git] / mechanize / _clientcookie.py
blobe8f0f67d4a181b2b603f4fdb633eaa23e700d09c
1 """HTTP cookie handling for web clients.
3 This module originally developed from my port of Gisle Aas' Perl module
4 HTTP::Cookies, from the libwww-perl library.
6 Docstrings, comments and debug strings in this code refer to the
7 attributes of the HTTP cookie system as cookie-attributes, to distinguish
8 them clearly from Python attributes.
10 CookieJar____
11 / \ \
12 FileCookieJar \ \
13 / | \ \ \
14 MozillaCookieJar | LWPCookieJar \ \
15 | | \
16 | ---MSIEBase | \
17 | / | | \
18 | / MSIEDBCookieJar BSDDBCookieJar
19 |/
20 MSIECookieJar
22 Comments to John J Lee <jjl@pobox.com>.
25 Copyright 2002-2006 John J Lee <jjl@pobox.com>
26 Copyright 1997-1999 Gisle Aas (original libwww-perl code)
27 Copyright 2002-2003 Johnny Lee (original MSIE Perl code)
29 This code is free software; you can redistribute it and/or modify it
30 under the terms of the BSD or ZPL 2.1 licenses (see the file
31 COPYING.txt included with the distribution).
33 """
35 import sys, re, copy, time, struct, urllib, types, logging
36 try:
37 import threading
38 _threading = threading; del threading
39 except ImportError:
40 import dummy_threading
41 _threading = dummy_threading; del dummy_threading
42 import httplib # only for the default HTTP port
44 MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
45 "instance initialised with one)")
46 DEFAULT_HTTP_PORT = str(httplib.HTTP_PORT)
48 from _headersutil import split_header_words, parse_ns_headers
49 from _util import isstringlike
50 import _rfc3986
52 debug = logging.getLogger("mechanize.cookies").debug
55 def reraise_unmasked_exceptions(unmasked=()):
56 # There are a few catch-all except: statements in this module, for
57 # catching input that's bad in unexpected ways.
58 # This function re-raises some exceptions we don't want to trap.
59 import mechanize, warnings
60 if not mechanize.USE_BARE_EXCEPT:
61 raise
62 unmasked = unmasked + (KeyboardInterrupt, SystemExit, MemoryError)
63 etype = sys.exc_info()[0]
64 if issubclass(etype, unmasked):
65 raise
66 # swallowed an exception
67 import traceback, StringIO
68 f = StringIO.StringIO()
69 traceback.print_exc(None, f)
70 msg = f.getvalue()
71 warnings.warn("mechanize bug!\n%s" % msg, stacklevel=2)
74 IPV4_RE = re.compile(r"\.\d+$")
75 def is_HDN(text):
76 """Return True if text is a host domain name."""
77 # XXX
78 # This may well be wrong. Which RFC is HDN defined in, if any (for
79 # the purposes of RFC 2965)?
80 # For the current implementation, what about IPv6? Remember to look
81 # at other uses of IPV4_RE also, if change this.
82 return not (IPV4_RE.search(text) or
83 text == "" or
84 text[0] == "." or text[-1] == ".")
86 def domain_match(A, B):
87 """Return True if domain A domain-matches domain B, according to RFC 2965.
89 A and B may be host domain names or IP addresses.
91 RFC 2965, section 1:
93 Host names can be specified either as an IP address or a HDN string.
94 Sometimes we compare one host name with another. (Such comparisons SHALL
95 be case-insensitive.) Host A's name domain-matches host B's if
97 * their host name strings string-compare equal; or
99 * A is a HDN string and has the form NB, where N is a non-empty
100 name string, B has the form .B', and B' is a HDN string. (So,
101 x.y.com domain-matches .Y.com but not Y.com.)
103 Note that domain-match is not a commutative operation: a.b.c.com
104 domain-matches .c.com, but not the reverse.
107 # Note that, if A or B are IP addresses, the only relevant part of the
108 # definition of the domain-match algorithm is the direct string-compare.
109 A = A.lower()
110 B = B.lower()
111 if A == B:
112 return True
113 if not is_HDN(A):
114 return False
115 i = A.rfind(B)
116 has_form_nb = not (i == -1 or i == 0)
117 return (
118 has_form_nb and
119 B.startswith(".") and
120 is_HDN(B[1:])
123 def liberal_is_HDN(text):
124 """Return True if text is a sort-of-like a host domain name.
126 For accepting/blocking domains.
129 return not IPV4_RE.search(text)
131 def user_domain_match(A, B):
132 """For blocking/accepting domains.
134 A and B may be host domain names or IP addresses.
137 A = A.lower()
138 B = B.lower()
139 if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
140 if A == B:
141 # equal IP addresses
142 return True
143 return False
144 initial_dot = B.startswith(".")
145 if initial_dot and A.endswith(B):
146 return True
147 if not initial_dot and A == B:
148 return True
149 return False
151 cut_port_re = re.compile(r":\d+$")
152 def request_host(request):
153 """Return request-host, as defined by RFC 2965.
155 Variation from RFC: returned value is lowercased, for convenient
156 comparison.
159 url = request.get_full_url()
160 host = _rfc3986.urlsplit(url)[1]
161 if host is None:
162 host = request.get_header("Host", "")
164 # remove port, if present
165 host = cut_port_re.sub("", host, 1)
166 return host.lower()
168 def eff_request_host(request):
169 """Return a tuple (request-host, effective request-host name).
171 As defined by RFC 2965, except both are lowercased.
174 erhn = req_host = request_host(request)
175 if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
176 erhn = req_host + ".local"
177 return req_host, erhn
179 def request_path(request):
180 """request-URI, as defined by RFC 2965."""
181 url = request.get_full_url()
182 path, query, frag = _rfc3986.urlsplit(url)[2:]
183 path = escape_path(path)
184 req_path = _rfc3986.urlunsplit((None, None, path, query, frag))
185 if not req_path.startswith("/"):
186 req_path = "/"+req_path
187 return req_path
189 def request_port(request):
190 host = request.get_host()
191 i = host.find(':')
192 if i >= 0:
193 port = host[i+1:]
194 try:
195 int(port)
196 except ValueError:
197 debug("nonnumeric port: '%s'", port)
198 return None
199 else:
200 port = DEFAULT_HTTP_PORT
201 return port
203 # Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
204 # need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
205 HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
206 ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
207 def uppercase_escaped_char(match):
208 return "%%%s" % match.group(1).upper()
209 def escape_path(path):
210 """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
211 # There's no knowing what character encoding was used to create URLs
212 # containing %-escapes, but since we have to pick one to escape invalid
213 # path characters, we pick UTF-8, as recommended in the HTML 4.0
214 # specification:
215 # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
216 # And here, kind of: draft-fielding-uri-rfc2396bis-03
217 # (And in draft IRI specification: draft-duerst-iri-05)
218 # (And here, for new URI schemes: RFC 2718)
219 if isinstance(path, types.UnicodeType):
220 path = path.encode("utf-8")
221 path = urllib.quote(path, HTTP_PATH_SAFE)
222 path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
223 return path
225 def reach(h):
226 """Return reach of host h, as defined by RFC 2965, section 1.
228 The reach R of a host name H is defined as follows:
230 * If
232 - H is the host domain name of a host; and,
234 - H has the form A.B; and
236 - A has no embedded (that is, interior) dots; and
238 - B has at least one embedded dot, or B is the string "local".
239 then the reach of H is .B.
241 * Otherwise, the reach of H is H.
243 >>> reach("www.acme.com")
244 '.acme.com'
245 >>> reach("acme.com")
246 'acme.com'
247 >>> reach("acme.local")
248 '.local'
251 i = h.find(".")
252 if i >= 0:
253 #a = h[:i] # this line is only here to show what a is
254 b = h[i+1:]
255 i = b.find(".")
256 if is_HDN(h) and (i >= 0 or b == "local"):
257 return "."+b
258 return h
260 def is_third_party(request):
263 RFC 2965, section 3.3.6:
265 An unverifiable transaction is to a third-party host if its request-
266 host U does not domain-match the reach R of the request-host O in the
267 origin transaction.
270 req_host = request_host(request)
271 # the origin request's request-host was stuffed into request by
272 # _urllib2_support.AbstractHTTPHandler
273 return not domain_match(req_host, reach(request.origin_req_host))
276 class Cookie:
277 """HTTP Cookie.
279 This class represents both Netscape and RFC 2965 cookies.
281 This is deliberately a very simple class. It just holds attributes. It's
282 possible to construct Cookie instances that don't comply with the cookie
283 standards. CookieJar.make_cookies is the factory function for Cookie
284 objects -- it deals with cookie parsing, supplying defaults, and
285 normalising to the representation used in this class. CookiePolicy is
286 responsible for checking them to see whether they should be accepted from
287 and returned to the server.
289 version: integer;
290 name: string;
291 value: string (may be None);
292 port: string; None indicates no attribute was supplied (eg. "Port", rather
293 than eg. "Port=80"); otherwise, a port string (eg. "80") or a port list
294 string (eg. "80,8080")
295 port_specified: boolean; true if a value was supplied with the Port
296 cookie-attribute
297 domain: string;
298 domain_specified: boolean; true if Domain was explicitly set
299 domain_initial_dot: boolean; true if Domain as set in HTTP header by server
300 started with a dot (yes, this really is necessary!)
301 path: string;
302 path_specified: boolean; true if Path was explicitly set
303 secure: boolean; true if should only be returned over secure connection
304 expires: integer; seconds since epoch (RFC 2965 cookies should calculate
305 this value from the Max-Age attribute)
306 discard: boolean, true if this is a session cookie; (if no expires value,
307 this should be true)
308 comment: string;
309 comment_url: string;
310 rfc2109: boolean; true if cookie arrived in a Set-Cookie: (not
311 Set-Cookie2:) header, but had a version cookie-attribute of 1
312 rest: mapping of other cookie-attributes
314 Note that the port may be present in the headers, but unspecified ("Port"
315 rather than"Port=80", for example); if this is the case, port is None.
319 def __init__(self, version, name, value,
320 port, port_specified,
321 domain, domain_specified, domain_initial_dot,
322 path, path_specified,
323 secure,
324 expires,
325 discard,
326 comment,
327 comment_url,
328 rest,
329 rfc2109=False,
332 if version is not None: version = int(version)
333 if expires is not None: expires = int(expires)
334 if port is None and port_specified is True:
335 raise ValueError("if port is None, port_specified must be false")
337 self.version = version
338 self.name = name
339 self.value = value
340 self.port = port
341 self.port_specified = port_specified
342 # normalise case, as per RFC 2965 section 3.3.3
343 self.domain = domain.lower()
344 self.domain_specified = domain_specified
345 # Sigh. We need to know whether the domain given in the
346 # cookie-attribute had an initial dot, in order to follow RFC 2965
347 # (as clarified in draft errata). Needed for the returned $Domain
348 # value.
349 self.domain_initial_dot = domain_initial_dot
350 self.path = path
351 self.path_specified = path_specified
352 self.secure = secure
353 self.expires = expires
354 self.discard = discard
355 self.comment = comment
356 self.comment_url = comment_url
357 self.rfc2109 = rfc2109
359 self._rest = copy.copy(rest)
361 def has_nonstandard_attr(self, name):
362 return self._rest.has_key(name)
363 def get_nonstandard_attr(self, name, default=None):
364 return self._rest.get(name, default)
365 def set_nonstandard_attr(self, name, value):
366 self._rest[name] = value
367 def nonstandard_attr_keys(self):
368 return self._rest.keys()
370 def is_expired(self, now=None):
371 if now is None: now = time.time()
372 return (self.expires is not None) and (self.expires <= now)
374 def __str__(self):
375 if self.port is None: p = ""
376 else: p = ":"+self.port
377 limit = self.domain + p + self.path
378 if self.value is not None:
379 namevalue = "%s=%s" % (self.name, self.value)
380 else:
381 namevalue = self.name
382 return "<Cookie %s for %s>" % (namevalue, limit)
384 def __repr__(self):
385 args = []
386 for name in ["version", "name", "value",
387 "port", "port_specified",
388 "domain", "domain_specified", "domain_initial_dot",
389 "path", "path_specified",
390 "secure", "expires", "discard", "comment", "comment_url",
392 attr = getattr(self, name)
393 args.append("%s=%s" % (name, repr(attr)))
394 args.append("rest=%s" % repr(self._rest))
395 args.append("rfc2109=%s" % repr(self.rfc2109))
396 return "Cookie(%s)" % ", ".join(args)
399 class CookiePolicy:
400 """Defines which cookies get accepted from and returned to server.
402 May also modify cookies.
404 The subclass DefaultCookiePolicy defines the standard rules for Netscape
405 and RFC 2965 cookies -- override that if you want a customised policy.
407 As well as implementing set_ok and return_ok, implementations of this
408 interface must also supply the following attributes, indicating which
409 protocols should be used, and how. These can be read and set at any time,
410 though whether that makes complete sense from the protocol point of view is
411 doubtful.
413 Public attributes:
415 netscape: implement netscape protocol
416 rfc2965: implement RFC 2965 protocol
417 rfc2109_as_netscape:
418 WARNING: This argument will change or go away if is not accepted into
419 the Python standard library in this form!
420 If true, treat RFC 2109 cookies as though they were Netscape cookies. The
421 default is for this attribute to be None, which means treat 2109 cookies
422 as RFC 2965 cookies unless RFC 2965 handling is switched off (which it is,
423 by default), and as Netscape cookies otherwise.
424 hide_cookie2: don't add Cookie2 header to requests (the presence of
425 this header indicates to the server that we understand RFC 2965
426 cookies)
429 def set_ok(self, cookie, request):
430 """Return true if (and only if) cookie should be accepted from server.
432 Currently, pre-expired cookies never get this far -- the CookieJar
433 class deletes such cookies itself.
435 cookie: mechanize.Cookie object
436 request: object implementing the interface defined by
437 CookieJar.extract_cookies.__doc__
440 raise NotImplementedError()
442 def return_ok(self, cookie, request):
443 """Return true if (and only if) cookie should be returned to server.
445 cookie: mechanize.Cookie object
446 request: object implementing the interface defined by
447 CookieJar.add_cookie_header.__doc__
450 raise NotImplementedError()
452 def domain_return_ok(self, domain, request):
453 """Return false if cookies should not be returned, given cookie domain.
455 This is here as an optimization, to remove the need for checking every
456 cookie with a particular domain (which may involve reading many files).
457 The default implementations of domain_return_ok and path_return_ok
458 (return True) leave all the work to return_ok.
460 If domain_return_ok returns true for the cookie domain, path_return_ok
461 is called for the cookie path. Otherwise, path_return_ok and return_ok
462 are never called for that cookie domain. If path_return_ok returns
463 true, return_ok is called with the Cookie object itself for a full
464 check. Otherwise, return_ok is never called for that cookie path.
466 Note that domain_return_ok is called for every *cookie* domain, not
467 just for the *request* domain. For example, the function might be
468 called with both ".acme.com" and "www.acme.com" if the request domain is
469 "www.acme.com". The same goes for path_return_ok.
471 For argument documentation, see the docstring for return_ok.
474 return True
476 def path_return_ok(self, path, request):
477 """Return false if cookies should not be returned, given cookie path.
479 See the docstring for domain_return_ok.
482 return True
485 class DefaultCookiePolicy(CookiePolicy):
486 """Implements the standard rules for accepting and returning cookies.
488 Both RFC 2965 and Netscape cookies are covered. RFC 2965 handling is
489 switched off by default.
491 The easiest way to provide your own policy is to override this class and
492 call its methods in your overriden implementations before adding your own
493 additional checks.
495 import mechanize
496 class MyCookiePolicy(mechanize.DefaultCookiePolicy):
497 def set_ok(self, cookie, request):
498 if not mechanize.DefaultCookiePolicy.set_ok(
499 self, cookie, request):
500 return False
501 if i_dont_want_to_store_this_cookie():
502 return False
503 return True
505 In addition to the features required to implement the CookiePolicy
506 interface, this class allows you to block and allow domains from setting
507 and receiving cookies. There are also some strictness switches that allow
508 you to tighten up the rather loose Netscape protocol rules a little bit (at
509 the cost of blocking some benign cookies).
511 A domain blacklist and whitelist is provided (both off by default). Only
512 domains not in the blacklist and present in the whitelist (if the whitelist
513 is active) participate in cookie setting and returning. Use the
514 blocked_domains constructor argument, and blocked_domains and
515 set_blocked_domains methods (and the corresponding argument and methods for
516 allowed_domains). If you set a whitelist, you can turn it off again by
517 setting it to None.
519 Domains in block or allow lists that do not start with a dot must
520 string-compare equal. For example, "acme.com" matches a blacklist entry of
521 "acme.com", but "www.acme.com" does not. Domains that do start with a dot
522 are matched by more specific domains too. For example, both "www.acme.com"
523 and "www.munitions.acme.com" match ".acme.com" (but "acme.com" itself does
524 not). IP addresses are an exception, and must match exactly. For example,
525 if blocked_domains contains "192.168.1.2" and ".168.1.2" 192.168.1.2 is
526 blocked, but 193.168.1.2 is not.
528 Additional Public Attributes:
530 General strictness switches
532 strict_domain: don't allow sites to set two-component domains with
533 country-code top-level domains like .co.uk, .gov.uk, .co.nz. etc.
534 This is far from perfect and isn't guaranteed to work!
536 RFC 2965 protocol strictness switches
538 strict_rfc2965_unverifiable: follow RFC 2965 rules on unverifiable
539 transactions (usually, an unverifiable transaction is one resulting from
540 a redirect or an image hosted on another site); if this is false, cookies
541 are NEVER blocked on the basis of verifiability
543 Netscape protocol strictness switches
545 strict_ns_unverifiable: apply RFC 2965 rules on unverifiable transactions
546 even to Netscape cookies
547 strict_ns_domain: flags indicating how strict to be with domain-matching
548 rules for Netscape cookies:
549 DomainStrictNoDots: when setting cookies, host prefix must not contain a
550 dot (eg. www.foo.bar.com can't set a cookie for .bar.com, because
551 www.foo contains a dot)
552 DomainStrictNonDomain: cookies that did not explicitly specify a Domain
553 cookie-attribute can only be returned to a domain that string-compares
554 equal to the domain that set the cookie (eg. rockets.acme.com won't
555 be returned cookies from acme.com that had no Domain cookie-attribute)
556 DomainRFC2965Match: when setting cookies, require a full RFC 2965
557 domain-match
558 DomainLiberal and DomainStrict are the most useful combinations of the
559 above flags, for convenience
560 strict_ns_set_initial_dollar: ignore cookies in Set-Cookie: headers that
561 have names starting with '$'
562 strict_ns_set_path: don't allow setting cookies whose path doesn't
563 path-match request URI
567 DomainStrictNoDots = 1
568 DomainStrictNonDomain = 2
569 DomainRFC2965Match = 4
571 DomainLiberal = 0
572 DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
574 def __init__(self,
575 blocked_domains=None, allowed_domains=None,
576 netscape=True, rfc2965=False,
577 # WARNING: this argument will change or go away if is not
578 # accepted into the Python standard library in this form!
579 # default, ie. treat 2109 as netscape iff not rfc2965
580 rfc2109_as_netscape=None,
581 hide_cookie2=False,
582 strict_domain=False,
583 strict_rfc2965_unverifiable=True,
584 strict_ns_unverifiable=False,
585 strict_ns_domain=DomainLiberal,
586 strict_ns_set_initial_dollar=False,
587 strict_ns_set_path=False,
590 Constructor arguments should be used as keyword arguments only.
592 blocked_domains: sequence of domain names that we never accept cookies
593 from, nor return cookies to
594 allowed_domains: if not None, this is a sequence of the only domains
595 for which we accept and return cookies
597 For other arguments, see CookiePolicy.__doc__ and
598 DefaultCookiePolicy.__doc__..
601 self.netscape = netscape
602 self.rfc2965 = rfc2965
603 self.rfc2109_as_netscape = rfc2109_as_netscape
604 self.hide_cookie2 = hide_cookie2
605 self.strict_domain = strict_domain
606 self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
607 self.strict_ns_unverifiable = strict_ns_unverifiable
608 self.strict_ns_domain = strict_ns_domain
609 self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
610 self.strict_ns_set_path = strict_ns_set_path
612 if blocked_domains is not None:
613 self._blocked_domains = tuple(blocked_domains)
614 else:
615 self._blocked_domains = ()
617 if allowed_domains is not None:
618 allowed_domains = tuple(allowed_domains)
619 self._allowed_domains = allowed_domains
621 def blocked_domains(self):
622 """Return the sequence of blocked domains (as a tuple)."""
623 return self._blocked_domains
624 def set_blocked_domains(self, blocked_domains):
625 """Set the sequence of blocked domains."""
626 self._blocked_domains = tuple(blocked_domains)
628 def is_blocked(self, domain):
629 for blocked_domain in self._blocked_domains:
630 if user_domain_match(domain, blocked_domain):
631 return True
632 return False
634 def allowed_domains(self):
635 """Return None, or the sequence of allowed domains (as a tuple)."""
636 return self._allowed_domains
637 def set_allowed_domains(self, allowed_domains):
638 """Set the sequence of allowed domains, or None."""
639 if allowed_domains is not None:
640 allowed_domains = tuple(allowed_domains)
641 self._allowed_domains = allowed_domains
643 def is_not_allowed(self, domain):
644 if self._allowed_domains is None:
645 return False
646 for allowed_domain in self._allowed_domains:
647 if user_domain_match(domain, allowed_domain):
648 return False
649 return True
651 def set_ok(self, cookie, request):
653 If you override set_ok, be sure to call this method. If it returns
654 false, so should your subclass (assuming your subclass wants to be more
655 strict about which cookies to accept).
658 debug(" - checking cookie %s", cookie)
660 assert cookie.name is not None
662 for n in "version", "verifiability", "name", "path", "domain", "port":
663 fn_name = "set_ok_"+n
664 fn = getattr(self, fn_name)
665 if not fn(cookie, request):
666 return False
668 return True
670 def set_ok_version(self, cookie, request):
671 if cookie.version is None:
672 # Version is always set to 0 by parse_ns_headers if it's a Netscape
673 # cookie, so this must be an invalid RFC 2965 cookie.
674 debug(" Set-Cookie2 without version attribute (%s)", cookie)
675 return False
676 if cookie.version > 0 and not self.rfc2965:
677 debug(" RFC 2965 cookies are switched off")
678 return False
679 elif cookie.version == 0 and not self.netscape:
680 debug(" Netscape cookies are switched off")
681 return False
682 return True
684 def set_ok_verifiability(self, cookie, request):
685 if request.unverifiable and is_third_party(request):
686 if cookie.version > 0 and self.strict_rfc2965_unverifiable:
687 debug(" third-party RFC 2965 cookie during "
688 "unverifiable transaction")
689 return False
690 elif cookie.version == 0 and self.strict_ns_unverifiable:
691 debug(" third-party Netscape cookie during "
692 "unverifiable transaction")
693 return False
694 return True
696 def set_ok_name(self, cookie, request):
697 # Try and stop servers setting V0 cookies designed to hack other
698 # servers that know both V0 and V1 protocols.
699 if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
700 cookie.name.startswith("$")):
701 debug(" illegal name (starts with '$'): '%s'", cookie.name)
702 return False
703 return True
705 def set_ok_path(self, cookie, request):
706 if cookie.path_specified:
707 req_path = request_path(request)
708 if ((cookie.version > 0 or
709 (cookie.version == 0 and self.strict_ns_set_path)) and
710 not req_path.startswith(cookie.path)):
711 debug(" path attribute %s is not a prefix of request "
712 "path %s", cookie.path, req_path)
713 return False
714 return True
716 def set_ok_countrycode_domain(self, cookie, request):
717 """Return False if explicit cookie domain is not acceptable.
719 Called by set_ok_domain, for convenience of overriding by
720 subclasses.
723 if cookie.domain_specified and self.strict_domain:
724 domain = cookie.domain
725 # since domain was specified, we know that:
726 assert domain.startswith(".")
727 if domain.count(".") == 2:
728 # domain like .foo.bar
729 i = domain.rfind(".")
730 tld = domain[i+1:]
731 sld = domain[1:i]
732 if (sld.lower() in [
733 "co", "ac",
734 "com", "edu", "org", "net", "gov", "mil", "int",
735 "aero", "biz", "cat", "coop", "info", "jobs", "mobi",
736 "museum", "name", "pro", "travel",
737 ] and
738 len(tld) == 2):
739 # domain like .co.uk
740 return False
741 return True
743 def set_ok_domain(self, cookie, request):
744 if self.is_blocked(cookie.domain):
745 debug(" domain %s is in user block-list", cookie.domain)
746 return False
747 if self.is_not_allowed(cookie.domain):
748 debug(" domain %s is not in user allow-list", cookie.domain)
749 return False
750 if not self.set_ok_countrycode_domain(cookie, request):
751 debug(" country-code second level domain %s", cookie.domain)
752 return False
753 if cookie.domain_specified:
754 req_host, erhn = eff_request_host(request)
755 domain = cookie.domain
756 if domain.startswith("."):
757 undotted_domain = domain[1:]
758 else:
759 undotted_domain = domain
760 embedded_dots = (undotted_domain.find(".") >= 0)
761 if not embedded_dots and domain != ".local":
762 debug(" non-local domain %s contains no embedded dot",
763 domain)
764 return False
765 if cookie.version == 0:
766 if (not erhn.endswith(domain) and
767 (not erhn.startswith(".") and
768 not ("."+erhn).endswith(domain))):
769 debug(" effective request-host %s (even with added "
770 "initial dot) does not end end with %s",
771 erhn, domain)
772 return False
773 if (cookie.version > 0 or
774 (self.strict_ns_domain & self.DomainRFC2965Match)):
775 if not domain_match(erhn, domain):
776 debug(" effective request-host %s does not domain-match "
777 "%s", erhn, domain)
778 return False
779 if (cookie.version > 0 or
780 (self.strict_ns_domain & self.DomainStrictNoDots)):
781 host_prefix = req_host[:-len(domain)]
782 if (host_prefix.find(".") >= 0 and
783 not IPV4_RE.search(req_host)):
784 debug(" host prefix %s for domain %s contains a dot",
785 host_prefix, domain)
786 return False
787 return True
789 def set_ok_port(self, cookie, request):
790 if cookie.port_specified:
791 req_port = request_port(request)
792 if req_port is None:
793 req_port = "80"
794 else:
795 req_port = str(req_port)
796 for p in cookie.port.split(","):
797 try:
798 int(p)
799 except ValueError:
800 debug(" bad port %s (not numeric)", p)
801 return False
802 if p == req_port:
803 break
804 else:
805 debug(" request port (%s) not found in %s",
806 req_port, cookie.port)
807 return False
808 return True
810 def return_ok(self, cookie, request):
812 If you override return_ok, be sure to call this method. If it returns
813 false, so should your subclass (assuming your subclass wants to be more
814 strict about which cookies to return).
817 # Path has already been checked by path_return_ok, and domain blocking
818 # done by domain_return_ok.
819 debug(" - checking cookie %s", cookie)
821 for n in "version", "verifiability", "secure", "expires", "port", "domain":
822 fn_name = "return_ok_"+n
823 fn = getattr(self, fn_name)
824 if not fn(cookie, request):
825 return False
826 return True
828 def return_ok_version(self, cookie, request):
829 if cookie.version > 0 and not self.rfc2965:
830 debug(" RFC 2965 cookies are switched off")
831 return False
832 elif cookie.version == 0 and not self.netscape:
833 debug(" Netscape cookies are switched off")
834 return False
835 return True
837 def return_ok_verifiability(self, cookie, request):
838 if request.unverifiable and is_third_party(request):
839 if cookie.version > 0 and self.strict_rfc2965_unverifiable:
840 debug(" third-party RFC 2965 cookie during unverifiable "
841 "transaction")
842 return False
843 elif cookie.version == 0 and self.strict_ns_unverifiable:
844 debug(" third-party Netscape cookie during unverifiable "
845 "transaction")
846 return False
847 return True
849 def return_ok_secure(self, cookie, request):
850 if cookie.secure and request.get_type() != "https":
851 debug(" secure cookie with non-secure request")
852 return False
853 return True
855 def return_ok_expires(self, cookie, request):
856 if cookie.is_expired(self._now):
857 debug(" cookie expired")
858 return False
859 return True
861 def return_ok_port(self, cookie, request):
862 if cookie.port:
863 req_port = request_port(request)
864 if req_port is None:
865 req_port = "80"
866 for p in cookie.port.split(","):
867 if p == req_port:
868 break
869 else:
870 debug(" request port %s does not match cookie port %s",
871 req_port, cookie.port)
872 return False
873 return True
875 def return_ok_domain(self, cookie, request):
876 req_host, erhn = eff_request_host(request)
877 domain = cookie.domain
879 # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
880 if (cookie.version == 0 and
881 (self.strict_ns_domain & self.DomainStrictNonDomain) and
882 not cookie.domain_specified and domain != erhn):
883 debug(" cookie with unspecified domain does not string-compare "
884 "equal to request domain")
885 return False
887 if cookie.version > 0 and not domain_match(erhn, domain):
888 debug(" effective request-host name %s does not domain-match "
889 "RFC 2965 cookie domain %s", erhn, domain)
890 return False
891 if cookie.version == 0 and not ("."+erhn).endswith(domain):
892 debug(" request-host %s does not match Netscape cookie domain "
893 "%s", req_host, domain)
894 return False
895 return True
897 def domain_return_ok(self, domain, request):
898 # Liberal check of domain. This is here as an optimization to avoid
899 # having to load lots of MSIE cookie files unless necessary.
901 # Munge req_host and erhn to always start with a dot, so as to err on
902 # the side of letting cookies through.
903 dotted_req_host, dotted_erhn = eff_request_host(request)
904 if not dotted_req_host.startswith("."):
905 dotted_req_host = "."+dotted_req_host
906 if not dotted_erhn.startswith("."):
907 dotted_erhn = "."+dotted_erhn
908 if not (dotted_req_host.endswith(domain) or
909 dotted_erhn.endswith(domain)):
910 #debug(" request domain %s does not match cookie domain %s",
911 # req_host, domain)
912 return False
914 if self.is_blocked(domain):
915 debug(" domain %s is in user block-list", domain)
916 return False
917 if self.is_not_allowed(domain):
918 debug(" domain %s is not in user allow-list", domain)
919 return False
921 return True
923 def path_return_ok(self, path, request):
924 debug("- checking cookie path=%s", path)
925 req_path = request_path(request)
926 if not req_path.startswith(path):
927 debug(" %s does not path-match %s", req_path, path)
928 return False
929 return True
932 def vals_sorted_by_key(adict):
933 keys = adict.keys()
934 keys.sort()
935 return map(adict.get, keys)
937 class MappingIterator:
938 """Iterates over nested mapping, depth-first, in sorted order by key."""
939 def __init__(self, mapping):
940 self._s = [(vals_sorted_by_key(mapping), 0, None)] # LIFO stack
942 def __iter__(self): return self
944 def next(self):
945 # this is hairy because of lack of generators
946 while 1:
947 try:
948 vals, i, prev_item = self._s.pop()
949 except IndexError:
950 raise StopIteration()
951 if i < len(vals):
952 item = vals[i]
953 i = i + 1
954 self._s.append((vals, i, prev_item))
955 try:
956 item.items
957 except AttributeError:
958 # non-mapping
959 break
960 else:
961 # mapping
962 self._s.append((vals_sorted_by_key(item), 0, item))
963 continue
964 return item
967 # Used as second parameter to dict.get method, to distinguish absent
968 # dict key from one with a None value.
969 class Absent: pass
971 class CookieJar:
972 """Collection of HTTP cookies.
974 You may not need to know about this class: try mechanize.urlopen().
976 The major methods are extract_cookies and add_cookie_header; these are all
977 you are likely to need.
979 CookieJar supports the iterator protocol:
981 for cookie in cookiejar:
982 # do something with cookie
984 Methods:
986 add_cookie_header(request)
987 extract_cookies(response, request)
988 make_cookies(response, request)
989 set_cookie_if_ok(cookie, request)
990 set_cookie(cookie)
991 clear_session_cookies()
992 clear_expired_cookies()
993 clear(domain=None, path=None, name=None)
995 Public attributes
997 policy: CookiePolicy object
1001 non_word_re = re.compile(r"\W")
1002 quote_re = re.compile(r"([\"\\])")
1003 strict_domain_re = re.compile(r"\.?[^.]*")
1004 domain_re = re.compile(r"[^.]*")
1005 dots_re = re.compile(r"^\.+")
1007 def __init__(self, policy=None):
1009 See CookieJar.__doc__ for argument documentation.
1012 if policy is None:
1013 policy = DefaultCookiePolicy()
1014 self._policy = policy
1016 self._cookies = {}
1018 # for __getitem__ iteration in pre-2.2 Pythons
1019 self._prev_getitem_index = 0
1021 def set_policy(self, policy):
1022 self._policy = policy
1024 def _cookies_for_domain(self, domain, request):
1025 cookies = []
1026 if not self._policy.domain_return_ok(domain, request):
1027 return []
1028 debug("Checking %s for cookies to return", domain)
1029 cookies_by_path = self._cookies[domain]
1030 for path in cookies_by_path.keys():
1031 if not self._policy.path_return_ok(path, request):
1032 continue
1033 cookies_by_name = cookies_by_path[path]
1034 for cookie in cookies_by_name.values():
1035 if not self._policy.return_ok(cookie, request):
1036 debug(" not returning cookie")
1037 continue
1038 debug(" it's a match")
1039 cookies.append(cookie)
1040 return cookies
1042 def _cookies_for_request(self, request):
1043 """Return a list of cookies to be returned to server."""
1044 cookies = []
1045 for domain in self._cookies.keys():
1046 cookies.extend(self._cookies_for_domain(domain, request))
1047 return cookies
1049 def _cookie_attrs(self, cookies):
1050 """Return a list of cookie-attributes to be returned to server.
1052 like ['foo="bar"; $Path="/"', ...]
1054 The $Version attribute is also added when appropriate (currently only
1055 once per request).
1058 # add cookies in order of most specific (ie. longest) path first
1059 def decreasing_size(a, b): return cmp(len(b.path), len(a.path))
1060 cookies.sort(decreasing_size)
1062 version_set = False
1064 attrs = []
1065 for cookie in cookies:
1066 # set version of Cookie header
1067 # XXX
1068 # What should it be if multiple matching Set-Cookie headers have
1069 # different versions themselves?
1070 # Answer: there is no answer; was supposed to be settled by
1071 # RFC 2965 errata, but that may never appear...
1072 version = cookie.version
1073 if not version_set:
1074 version_set = True
1075 if version > 0:
1076 attrs.append("$Version=%s" % version)
1078 # quote cookie value if necessary
1079 # (not for Netscape protocol, which already has any quotes
1080 # intact, due to the poorly-specified Netscape Cookie: syntax)
1081 if ((cookie.value is not None) and
1082 self.non_word_re.search(cookie.value) and version > 0):
1083 value = self.quote_re.sub(r"\\\1", cookie.value)
1084 else:
1085 value = cookie.value
1087 # add cookie-attributes to be returned in Cookie header
1088 if cookie.value is None:
1089 attrs.append(cookie.name)
1090 else:
1091 attrs.append("%s=%s" % (cookie.name, value))
1092 if version > 0:
1093 if cookie.path_specified:
1094 attrs.append('$Path="%s"' % cookie.path)
1095 if cookie.domain.startswith("."):
1096 domain = cookie.domain
1097 if (not cookie.domain_initial_dot and
1098 domain.startswith(".")):
1099 domain = domain[1:]
1100 attrs.append('$Domain="%s"' % domain)
1101 if cookie.port is not None:
1102 p = "$Port"
1103 if cookie.port_specified:
1104 p = p + ('="%s"' % cookie.port)
1105 attrs.append(p)
1107 return attrs
1109 def add_cookie_header(self, request):
1110 """Add correct Cookie: header to request (urllib2.Request object).
1112 The Cookie2 header is also added unless policy.hide_cookie2 is true.
1114 The request object (usually a urllib2.Request instance) must support
1115 the methods get_full_url, get_host, get_type, has_header, get_header,
1116 header_items and add_unredirected_header, as documented by urllib2, and
1117 the port attribute (the port number). Actually,
1118 RequestUpgradeProcessor will automatically upgrade your Request object
1119 to one with has_header, get_header, header_items and
1120 add_unredirected_header, if it lacks those methods, for compatibility
1121 with pre-2.4 versions of urllib2.
1124 debug("add_cookie_header")
1125 self._policy._now = self._now = int(time.time())
1127 req_host, erhn = eff_request_host(request)
1128 strict_non_domain = (
1129 self._policy.strict_ns_domain & self._policy.DomainStrictNonDomain)
1131 cookies = self._cookies_for_request(request)
1133 attrs = self._cookie_attrs(cookies)
1134 if attrs:
1135 if not request.has_header("Cookie"):
1136 request.add_unredirected_header("Cookie", "; ".join(attrs))
1138 # if necessary, advertise that we know RFC 2965
1139 if self._policy.rfc2965 and not self._policy.hide_cookie2:
1140 for cookie in cookies:
1141 if cookie.version != 1 and not request.has_header("Cookie2"):
1142 request.add_unredirected_header("Cookie2", '$Version="1"')
1143 break
1145 self.clear_expired_cookies()
1147 def _normalized_cookie_tuples(self, attrs_set):
1148 """Return list of tuples containing normalised cookie information.
1150 attrs_set is the list of lists of key,value pairs extracted from
1151 the Set-Cookie or Set-Cookie2 headers.
1153 Tuples are name, value, standard, rest, where name and value are the
1154 cookie name and value, standard is a dictionary containing the standard
1155 cookie-attributes (discard, secure, version, expires or max-age,
1156 domain, path and port) and rest is a dictionary containing the rest of
1157 the cookie-attributes.
1160 cookie_tuples = []
1162 boolean_attrs = "discard", "secure"
1163 value_attrs = ("version",
1164 "expires", "max-age",
1165 "domain", "path", "port",
1166 "comment", "commenturl")
1168 for cookie_attrs in attrs_set:
1169 name, value = cookie_attrs[0]
1171 # Build dictionary of standard cookie-attributes (standard) and
1172 # dictionary of other cookie-attributes (rest).
1174 # Note: expiry time is normalised to seconds since epoch. V0
1175 # cookies should have the Expires cookie-attribute, and V1 cookies
1176 # should have Max-Age, but since V1 includes RFC 2109 cookies (and
1177 # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
1178 # accept either (but prefer Max-Age).
1179 max_age_set = False
1181 bad_cookie = False
1183 standard = {}
1184 rest = {}
1185 for k, v in cookie_attrs[1:]:
1186 lc = k.lower()
1187 # don't lose case distinction for unknown fields
1188 if lc in value_attrs or lc in boolean_attrs:
1189 k = lc
1190 if k in boolean_attrs and v is None:
1191 # boolean cookie-attribute is present, but has no value
1192 # (like "discard", rather than "port=80")
1193 v = True
1194 if standard.has_key(k):
1195 # only first value is significant
1196 continue
1197 if k == "domain":
1198 if v is None:
1199 debug(" missing value for domain attribute")
1200 bad_cookie = True
1201 break
1202 # RFC 2965 section 3.3.3
1203 v = v.lower()
1204 if k == "expires":
1205 if max_age_set:
1206 # Prefer max-age to expires (like Mozilla)
1207 continue
1208 if v is None:
1209 debug(" missing or invalid value for expires "
1210 "attribute: treating as session cookie")
1211 continue
1212 if k == "max-age":
1213 max_age_set = True
1214 try:
1215 v = int(v)
1216 except ValueError:
1217 debug(" missing or invalid (non-numeric) value for "
1218 "max-age attribute")
1219 bad_cookie = True
1220 break
1221 # convert RFC 2965 Max-Age to seconds since epoch
1222 # XXX Strictly you're supposed to follow RFC 2616
1223 # age-calculation rules. Remember that zero Max-Age is a
1224 # is a request to discard (old and new) cookie, though.
1225 k = "expires"
1226 v = self._now + v
1227 if (k in value_attrs) or (k in boolean_attrs):
1228 if (v is None and
1229 k not in ["port", "comment", "commenturl"]):
1230 debug(" missing value for %s attribute" % k)
1231 bad_cookie = True
1232 break
1233 standard[k] = v
1234 else:
1235 rest[k] = v
1237 if bad_cookie:
1238 continue
1240 cookie_tuples.append((name, value, standard, rest))
1242 return cookie_tuples
1244 def _cookie_from_cookie_tuple(self, tup, request):
1245 # standard is dict of standard cookie-attributes, rest is dict of the
1246 # rest of them
1247 name, value, standard, rest = tup
1249 domain = standard.get("domain", Absent)
1250 path = standard.get("path", Absent)
1251 port = standard.get("port", Absent)
1252 expires = standard.get("expires", Absent)
1254 # set the easy defaults
1255 version = standard.get("version", None)
1256 if version is not None: version = int(version)
1257 secure = standard.get("secure", False)
1258 # (discard is also set if expires is Absent)
1259 discard = standard.get("discard", False)
1260 comment = standard.get("comment", None)
1261 comment_url = standard.get("commenturl", None)
1263 # set default path
1264 if path is not Absent and path != "":
1265 path_specified = True
1266 path = escape_path(path)
1267 else:
1268 path_specified = False
1269 path = request_path(request)
1270 i = path.rfind("/")
1271 if i != -1:
1272 if version == 0:
1273 # Netscape spec parts company from reality here
1274 path = path[:i]
1275 else:
1276 path = path[:i+1]
1277 if len(path) == 0: path = "/"
1279 # set default domain
1280 domain_specified = domain is not Absent
1281 # but first we have to remember whether it starts with a dot
1282 domain_initial_dot = False
1283 if domain_specified:
1284 domain_initial_dot = bool(domain.startswith("."))
1285 if domain is Absent:
1286 req_host, erhn = eff_request_host(request)
1287 domain = erhn
1288 elif not domain.startswith("."):
1289 domain = "."+domain
1291 # set default port
1292 port_specified = False
1293 if port is not Absent:
1294 if port is None:
1295 # Port attr present, but has no value: default to request port.
1296 # Cookie should then only be sent back on that port.
1297 port = request_port(request)
1298 else:
1299 port_specified = True
1300 port = re.sub(r"\s+", "", port)
1301 else:
1302 # No port attr present. Cookie can be sent back on any port.
1303 port = None
1305 # set default expires and discard
1306 if expires is Absent:
1307 expires = None
1308 discard = True
1309 elif expires <= self._now:
1310 # Expiry date in past is request to delete cookie. This can't be
1311 # in DefaultCookiePolicy, because can't delete cookies there.
1312 try:
1313 self.clear(domain, path, name)
1314 except KeyError:
1315 pass
1316 debug("Expiring cookie, domain='%s', path='%s', name='%s'",
1317 domain, path, name)
1318 return None
1320 return Cookie(version,
1321 name, value,
1322 port, port_specified,
1323 domain, domain_specified, domain_initial_dot,
1324 path, path_specified,
1325 secure,
1326 expires,
1327 discard,
1328 comment,
1329 comment_url,
1330 rest)
1332 def _cookies_from_attrs_set(self, attrs_set, request):
1333 cookie_tuples = self._normalized_cookie_tuples(attrs_set)
1335 cookies = []
1336 for tup in cookie_tuples:
1337 cookie = self._cookie_from_cookie_tuple(tup, request)
1338 if cookie: cookies.append(cookie)
1339 return cookies
1341 def _process_rfc2109_cookies(self, cookies):
1342 if self._policy.rfc2109_as_netscape is None:
1343 rfc2109_as_netscape = not self._policy.rfc2965
1344 else:
1345 rfc2109_as_netscape = self._policy.rfc2109_as_netscape
1346 for cookie in cookies:
1347 if cookie.version == 1:
1348 cookie.rfc2109 = True
1349 if rfc2109_as_netscape:
1350 # treat 2109 cookies as Netscape cookies rather than
1351 # as RFC2965 cookies
1352 cookie.version = 0
1354 def make_cookies(self, response, request):
1355 """Return sequence of Cookie objects extracted from response object.
1357 See extract_cookies.__doc__ for the interfaces required of the
1358 response and request arguments.
1361 # get cookie-attributes for RFC 2965 and Netscape protocols
1362 headers = response.info()
1363 rfc2965_hdrs = headers.getheaders("Set-Cookie2")
1364 ns_hdrs = headers.getheaders("Set-Cookie")
1366 rfc2965 = self._policy.rfc2965
1367 netscape = self._policy.netscape
1369 if ((not rfc2965_hdrs and not ns_hdrs) or
1370 (not ns_hdrs and not rfc2965) or
1371 (not rfc2965_hdrs and not netscape) or
1372 (not netscape and not rfc2965)):
1373 return [] # no relevant cookie headers: quick exit
1375 try:
1376 cookies = self._cookies_from_attrs_set(
1377 split_header_words(rfc2965_hdrs), request)
1378 except:
1379 reraise_unmasked_exceptions()
1380 cookies = []
1382 if ns_hdrs and netscape:
1383 try:
1384 # RFC 2109 and Netscape cookies
1385 ns_cookies = self._cookies_from_attrs_set(
1386 parse_ns_headers(ns_hdrs), request)
1387 except:
1388 reraise_unmasked_exceptions()
1389 ns_cookies = []
1390 self._process_rfc2109_cookies(ns_cookies)
1392 # Look for Netscape cookies (from Set-Cookie headers) that match
1393 # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
1394 # For each match, keep the RFC 2965 cookie and ignore the Netscape
1395 # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
1396 # bundled in with the Netscape cookies for this purpose, which is
1397 # reasonable behaviour.
1398 if rfc2965:
1399 lookup = {}
1400 for cookie in cookies:
1401 lookup[(cookie.domain, cookie.path, cookie.name)] = None
1403 def no_matching_rfc2965(ns_cookie, lookup=lookup):
1404 key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
1405 return not lookup.has_key(key)
1406 ns_cookies = filter(no_matching_rfc2965, ns_cookies)
1408 if ns_cookies:
1409 cookies.extend(ns_cookies)
1411 return cookies
1413 def set_cookie_if_ok(self, cookie, request):
1414 """Set a cookie if policy says it's OK to do so.
1416 cookie: mechanize.Cookie instance
1417 request: see extract_cookies.__doc__ for the required interface
1420 self._policy._now = self._now = int(time.time())
1422 if self._policy.set_ok(cookie, request):
1423 self.set_cookie(cookie)
1425 def set_cookie(self, cookie):
1426 """Set a cookie, without checking whether or not it should be set.
1428 cookie: mechanize.Cookie instance
1430 c = self._cookies
1431 if not c.has_key(cookie.domain): c[cookie.domain] = {}
1432 c2 = c[cookie.domain]
1433 if not c2.has_key(cookie.path): c2[cookie.path] = {}
1434 c3 = c2[cookie.path]
1435 c3[cookie.name] = cookie
1437 def extract_cookies(self, response, request):
1438 """Extract cookies from response, where allowable given the request.
1440 Look for allowable Set-Cookie: and Set-Cookie2: headers in the response
1441 object passed as argument. Any of these headers that are found are
1442 used to update the state of the object (subject to the policy.set_ok
1443 method's approval).
1445 The response object (usually be the result of a call to
1446 mechanize.urlopen, or similar) should support an info method, which
1447 returns a mimetools.Message object (in fact, the 'mimetools.Message
1448 object' may be any object that provides a getallmatchingheaders
1449 method).
1451 The request object (usually a urllib2.Request instance) must support
1452 the methods get_full_url and get_host, as documented by urllib2, and
1453 the port attribute (the port number). The request is used to set
1454 default values for cookie-attributes as well as for checking that the
1455 cookie is OK to be set.
1458 debug("extract_cookies: %s", response.info())
1459 self._policy._now = self._now = int(time.time())
1461 for cookie in self.make_cookies(response, request):
1462 if self._policy.set_ok(cookie, request):
1463 debug(" setting cookie: %s", cookie)
1464 self.set_cookie(cookie)
1466 def clear(self, domain=None, path=None, name=None):
1467 """Clear some cookies.
1469 Invoking this method without arguments will clear all cookies. If
1470 given a single argument, only cookies belonging to that domain will be
1471 removed. If given two arguments, cookies belonging to the specified
1472 path within that domain are removed. If given three arguments, then
1473 the cookie with the specified name, path and domain is removed.
1475 Raises KeyError if no matching cookie exists.
1478 if name is not None:
1479 if (domain is None) or (path is None):
1480 raise ValueError(
1481 "domain and path must be given to remove a cookie by name")
1482 del self._cookies[domain][path][name]
1483 elif path is not None:
1484 if domain is None:
1485 raise ValueError(
1486 "domain must be given to remove cookies by path")
1487 del self._cookies[domain][path]
1488 elif domain is not None:
1489 del self._cookies[domain]
1490 else:
1491 self._cookies = {}
1493 def clear_session_cookies(self):
1494 """Discard all session cookies.
1496 Discards all cookies held by object which had either no Max-Age or
1497 Expires cookie-attribute or an explicit Discard cookie-attribute, or
1498 which otherwise have ended up with a true discard attribute. For
1499 interactive browsers, the end of a session usually corresponds to
1500 closing the browser window.
1502 Note that the save method won't save session cookies anyway, unless you
1503 ask otherwise by passing a true ignore_discard argument.
1506 for cookie in self:
1507 if cookie.discard:
1508 self.clear(cookie.domain, cookie.path, cookie.name)
1510 def clear_expired_cookies(self):
1511 """Discard all expired cookies.
1513 You probably don't need to call this method: expired cookies are never
1514 sent back to the server (provided you're using DefaultCookiePolicy),
1515 this method is called by CookieJar itself every so often, and the save
1516 method won't save expired cookies anyway (unless you ask otherwise by
1517 passing a true ignore_expires argument).
1520 now = time.time()
1521 for cookie in self:
1522 if cookie.is_expired(now):
1523 self.clear(cookie.domain, cookie.path, cookie.name)
1525 def __getitem__(self, i):
1526 if i == 0:
1527 self._getitem_iterator = self.__iter__()
1528 elif self._prev_getitem_index != i-1: raise IndexError(
1529 "CookieJar.__getitem__ only supports sequential iteration")
1530 self._prev_getitem_index = i
1531 try:
1532 return self._getitem_iterator.next()
1533 except StopIteration:
1534 raise IndexError()
1536 def __iter__(self):
1537 return MappingIterator(self._cookies)
1539 def __len__(self):
1540 """Return number of contained cookies."""
1541 i = 0
1542 for cookie in self: i = i + 1
1543 return i
1545 def __repr__(self):
1546 r = []
1547 for cookie in self: r.append(repr(cookie))
1548 return "<%s[%s]>" % (self.__class__, ", ".join(r))
1550 def __str__(self):
1551 r = []
1552 for cookie in self: r.append(str(cookie))
1553 return "<%s[%s]>" % (self.__class__, ", ".join(r))
1556 class LoadError(Exception): pass
1558 class FileCookieJar(CookieJar):
1559 """CookieJar that can be loaded from and saved to a file.
1561 Additional methods
1563 save(filename=None, ignore_discard=False, ignore_expires=False)
1564 load(filename=None, ignore_discard=False, ignore_expires=False)
1565 revert(filename=None, ignore_discard=False, ignore_expires=False)
1567 Additional public attributes
1569 filename: filename for loading and saving cookies
1571 Additional public readable attributes
1573 delayload: request that cookies are lazily loaded from disk; this is only
1574 a hint since this only affects performance, not behaviour (unless the
1575 cookies on disk are changing); a CookieJar object may ignore it (in fact,
1576 only MSIECookieJar lazily loads cookies at the moment)
1580 def __init__(self, filename=None, delayload=False, policy=None):
1582 See FileCookieJar.__doc__ for argument documentation.
1584 Cookies are NOT loaded from the named file until either the load or
1585 revert method is called.
1588 CookieJar.__init__(self, policy)
1589 if filename is not None and not isstringlike(filename):
1590 raise ValueError("filename must be string-like")
1591 self.filename = filename
1592 self.delayload = bool(delayload)
1594 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
1595 """Save cookies to a file.
1597 filename: name of file in which to save cookies
1598 ignore_discard: save even cookies set to be discarded
1599 ignore_expires: save even cookies that have expired
1601 The file is overwritten if it already exists, thus wiping all its
1602 cookies. Saved cookies can be restored later using the load or revert
1603 methods. If filename is not specified, self.filename is used; if
1604 self.filename is None, ValueError is raised.
1607 raise NotImplementedError()
1609 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1610 """Load cookies from a file.
1612 Old cookies are kept unless overwritten by newly loaded ones.
1614 Arguments are as for .save().
1616 If filename is not specified, self.filename is used; if self.filename
1617 is None, ValueError is raised. The named file must be in the format
1618 understood by the class, or LoadError will be raised. This format will
1619 be identical to that written by the save method, unless the load format
1620 is not sufficiently well understood (as is the case for MSIECookieJar).
1623 if filename is None:
1624 if self.filename is not None: filename = self.filename
1625 else: raise ValueError(MISSING_FILENAME_TEXT)
1627 f = open(filename)
1628 try:
1629 self._really_load(f, filename, ignore_discard, ignore_expires)
1630 finally:
1631 f.close()
1633 def revert(self, filename=None,
1634 ignore_discard=False, ignore_expires=False):
1635 """Clear all cookies and reload cookies from a saved file.
1637 Raises LoadError (or IOError) if reversion is not successful; the
1638 object's state will not be altered if this happens.
1641 if filename is None:
1642 if self.filename is not None: filename = self.filename
1643 else: raise ValueError(MISSING_FILENAME_TEXT)
1645 old_state = copy.deepcopy(self._cookies)
1646 self._cookies = {}
1647 try:
1648 self.load(filename, ignore_discard, ignore_expires)
1649 except (LoadError, IOError):
1650 self._cookies = old_state
1651 raise