1 """HTTP cookie handling for web clients.
3 This module has (now fairly distant) origins in Gisle Aas' Perl module
4 HTTP::Cookies, from the libwww-perl library.
6 Docstrings, comments and debug strings in this code refer to the
7 attributes of the HTTP cookie system as cookie-attributes, to distinguish
8 them clearly from Python attributes.
10 Class diagram (note that the classes which do not derive from
11 FileCookieJar are not distributed with the Python standard library, but
12 are available from http://wwwsearch.sf.net/):
18 MozillaCookieJar | LWPCookieJar \ \
22 | / MSIEDBCookieJar BSDDBCookieJar
28 import sys
, re
, urlparse
, copy
, time
, urllib
, logging
30 import threading
as _threading
32 import dummy_threading
as _threading
33 import httplib
# only for the default HTTP port
34 from calendar
import timegm
36 debug
= logging
.getLogger("cookielib").debug
38 DEFAULT_HTTP_PORT
= str(httplib
.HTTP_PORT
)
39 MISSING_FILENAME_TEXT
= ("a filename was not supplied (nor was the CookieJar "
40 "instance initialised with one)")
42 def reraise_unmasked_exceptions(unmasked
=()):
43 # There are a few catch-all except: statements in this module, for
44 # catching input that's bad in unexpected ways.
45 # This function re-raises some exceptions we don't want to trap.
46 unmasked
= unmasked
+ (KeyboardInterrupt, SystemExit, MemoryError)
47 etype
= sys
.exc_info()[0]
48 if issubclass(etype
, unmasked
):
50 # swallowed an exception
51 import warnings
, traceback
, StringIO
52 f
= StringIO
.StringIO()
53 traceback
.print_exc(None, f
)
55 warnings
.warn("cookielib bug!\n%s" % msg
, stacklevel
=2)
58 # Date/time conversion
59 # -----------------------------------------------------------------------------
63 year
, month
, mday
, hour
, min, sec
= tt
[:6]
64 if ((year
>= EPOCH_YEAR
) and (1 <= month
<= 12) and (1 <= mday
<= 31) and
65 (0 <= hour
<= 24) and (0 <= min <= 59) and (0 <= sec
<= 61)):
70 DAYS
= ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
71 MONTHS
= ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
72 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
74 for month
in MONTHS
: MONTHS_LOWER
.append(month
.lower())
76 def time2isoz(t
=None):
77 """Return a string representing time in seconds since epoch, t.
79 If the function is called without an argument, it will use the current
82 The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
83 representing Universal Time (UTC, aka GMT). An example of this format is:
88 if t
is None: t
= time
.time()
89 year
, mon
, mday
, hour
, min, sec
= time
.gmtime(t
)[:6]
90 return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
91 year
, mon
, mday
, hour
, min, sec
)
93 def time2netscape(t
=None):
94 """Return a string representing time in seconds since epoch, t.
96 If the function is called without an argument, it will use the current
99 The format of the returned string is like this:
101 Wed, DD-Mon-YYYY HH:MM:SS GMT
104 if t
is None: t
= time
.time()
105 year
, mon
, mday
, hour
, min, sec
, wday
= time
.gmtime(t
)[:7]
106 return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
107 DAYS
[wday
], mday
, MONTHS
[mon
-1], year
, hour
, min, sec
)
110 UTC_ZONES
= {"GMT": None, "UTC": None, "UT": None, "Z": None}
112 TIMEZONE_RE
= re
.compile(r
"^([-+])?(\d\d?):?(\d\d)?$")
113 def offset_from_tz_string(tz
):
118 m
= TIMEZONE_RE
.search(tz
)
120 offset
= 3600 * int(m
.group(2))
122 offset
= offset
+ 60 * int(m
.group(3))
123 if m
.group(1) == '-':
127 def _str2time(day
, mon
, yr
, hr
, min, sec
, tz
):
128 # translate month name to number
129 # month numbers start with 1 (January)
131 mon
= MONTHS_LOWER
.index(mon
.lower())+1
133 # maybe it's already a number
143 # make sure clock elements are defined
144 if hr
is None: hr
= 0
145 if min is None: min = 0
146 if sec
is None: sec
= 0
155 # find "obvious" year
156 cur_yr
= time
.localtime(time
.time())[0]
162 if m
> 0: yr
= yr
+ 100
165 # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
166 t
= _timegm((yr
, mon
, day
, hr
, min, sec
, tz
))
169 # adjust time using timezone string, to get absolute time since epoch
173 offset
= offset_from_tz_string(tz
)
180 STRICT_DATE_RE
= re
.compile(
181 r
"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
182 "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
183 WEEKDAY_RE
= re
.compile(
184 r
"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re
.I
)
185 LOOSE_HTTP_DATE_RE
= re
.compile(
193 (?:\s+|:) # separator before clock
194 (\d\d?):(\d\d) # hour:min
195 (?::(\d\d))? # optional seconds
198 ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
200 (?:\(\w+\))? # ASCII representation of timezone in parens.
203 """Returns time in seconds since epoch of time represented by a string.
205 Return value is an integer.
207 None is returned if the format of str is unrecognized, the time is outside
208 the representable range, or the timezone string is not recognized. If the
209 string contains no timezone, UTC is assumed.
211 The timezone in the string may be numerical (like "-0800" or "+0100") or a
212 string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
213 timezone strings equivalent to UTC (zero offset) are known to the function.
215 The function loosely parses the following formats:
217 Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
218 Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
219 Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
220 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
221 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
222 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
224 The parser ignores leading and trailing whitespace. The time may be
227 If the year is given with only 2 digits, the function will select the
228 century that makes the year closest to the current date.
231 # fast exit for strictly conforming string
232 m
= STRICT_DATE_RE
.search(text
)
235 mon
= MONTHS_LOWER
.index(g
[1].lower()) + 1
236 tt
= (int(g
[2]), mon
, int(g
[0]),
237 int(g
[3]), int(g
[4]), float(g
[5]))
240 # No, we need some messy parsing...
244 text
= WEEKDAY_RE
.sub("", text
, 1) # Useless weekday
246 # tz is time zone specifier string
247 day
, mon
, yr
, hr
, min, sec
, tz
= [None]*7
250 m
= LOOSE_HTTP_DATE_RE
.search(text
)
252 day
, mon
, yr
, hr
, min, sec
, tz
= m
.groups()
254 return None # bad format
256 return _str2time(day
, mon
, yr
, hr
, min, sec
, tz
)
258 ISO_DATE_RE
= re
.compile(
262 (\d\d?) # numerical month
266 (?:\s+|[-:Tt]) # separator before clock
267 (\d\d?):?(\d\d) # hour:min
268 (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
271 ([-+]?\d\d?:?(:?\d\d)?
272 |Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
276 As for http2time, but parses the ISO 8601 formats:
278 1994-02-03 14:15:29 -0100 -- ISO 8601 format
279 1994-02-03 14:15:29 -- zone is optional
280 1994-02-03 -- only date
281 1994-02-03T14:15:29 -- Use T as separator
282 19940203T141529Z -- ISO 8601 compact format
283 19940203 -- only date
289 # tz is time zone specifier string
290 day
, mon
, yr
, hr
, min, sec
, tz
= [None]*7
293 m
= ISO_DATE_RE
.search(text
)
295 # XXX there's an extra bit of the timezone I'm ignoring here: is
296 # this the right thing to do?
297 yr
, mon
, day
, hr
, min, sec
, tz
, _
= m
.groups()
299 return None # bad format
301 return _str2time(day
, mon
, yr
, hr
, min, sec
, tz
)
305 # -----------------------------------------------------------------------------
307 def unmatched(match
):
308 """Return unmatched part of re.Match object."""
309 start
, end
= match
.span(0)
310 return match
.string
[:start
]+match
.string
[end
:]
312 HEADER_TOKEN_RE
= re
.compile(r
"^\s*([^=\s;,]+)")
313 HEADER_QUOTED_VALUE_RE
= re
.compile(r
"^\s*=\s*\"([^
\"\\]*(?
:\\.[^
\"\\]*)*)\"")
314 HEADER_VALUE_RE = re.compile(r"^\s
*=\s
*([^\s
;,]*)")
315 HEADER_ESCAPE_RE = re.compile(r"\\(.)")
316 def split_header_words(header_values):
317 r"""Parse header values into a list of lists containing key,value pairs.
319 The function knows how to deal with ",", ";" and "=" as well as quoted
320 values after "=". A list of space separated tokens are parsed as if they
321 were separated by ";".
323 If the header_values passed as argument contains multiple values, then they
324 are treated as if they were a single value separated by comma ",".
326 This means that this function is useful for parsing header fields that
327 follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
328 the requirement for tokens).
331 header = (token | parameter) *( [";"] (token | parameter))
333 token = 1*<any CHAR except CTLs or separators>
334 separators = "(" | ")" | "<" | ">" | "@"
335 | "," | ";" | ":" | "\" |
<">
336 | "/" | "[" | "]" | "?
" | "="
337 | "{" | "}" | SP | HT
339 quoted-string = ( <"> *(qdtext | quoted
-pair
) <"> )
340 qdtext = <any TEXT except <">>
341 quoted
-pair
= "\" CHAR
343 parameter = attribute "=" value
345 value = token | quoted-string
347 Each header is represented by a list of key/value pairs. The value for a
348 simple token (not part of a parameter) is None. Syntactically incorrect
349 headers will not necessarily be parsed as you would want.
351 This is easier to describe with some examples:
353 >>> split_header_words(['foo="bar
"; port="80,81"; discard, bar=baz'])
354 [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
355 >>> split_header_words(['text/html; charset="iso
-8859-1"'])
356 [[('text/html', None), ('charset', 'iso-8859-1')]]
357 >>> split_header_words([r'Basic realm="\"foo
\bar
\""'])
358 [[('Basic', None), ('realm', '"foobar
"')]]
361 assert not isinstance(header_values, basestring)
363 for text in header_values:
367 m = HEADER_TOKEN_RE.search(text)
371 m = HEADER_QUOTED_VALUE_RE.search(text)
375 value = HEADER_ESCAPE_RE.sub(r"\
1", value)
377 m = HEADER_VALUE_RE.search(text)
378 if m: # unquoted value
381 value = value.rstrip()
383 # no value, a lone token
385 pairs.append((name, value))
386 elif text.lstrip().startswith(","):
387 # concatenated headers, as per RFC 2616 section 4.2
388 text = text.lstrip()[1:]
389 if pairs: result.append(pairs)
393 non_junk, nr_junk_chars = re.subn("^
[=\s
;]*", "", text)
394 assert nr_junk_chars > 0, (
395 "split_header_words bug
: '%s', '%s', %s" %
396 (orig_text, text, pairs))
398 if pairs: result.append(pairs)
401 HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
402 def join_header_words(lists):
403 """Do the inverse (almost) of the conversion done by split_header_words.
405 Takes a list of lists of (key, value) pairs and produces a single header
406 value. Attribute values are quoted if needed.
408 >>> join_header_words([[("text
/plain
", None), ("charset
", "iso
-8859/1")]])
409 'text/plain; charset="iso
-8859/1"'
410 >>> join_header_words([[("text
/plain
", None)], [("charset
", "iso
-8859/1")]])
411 'text/plain, charset="iso
-8859/1"'
419 if not re.search(r"^\w
+$
", v):
420 v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\
1", v) # escape " and \
424 if attr
: headers
.append("; ".join(attr
))
425 return ", ".join(headers
)
427 def parse_ns_headers(ns_headers
):
428 """Ad-hoc parser for Netscape protocol cookie-attributes.
430 The old Netscape cookie format for Set-Cookie can for instance contain
431 an unquoted "," in the expires field, so we have to use this ad-hoc
432 parser instead of split_header_words.
434 XXX This may not make the best possible effort to parse all the crap
435 that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
436 parser is probably better, so could do worse than following that if
437 this ever gives any trouble.
439 Currently, this is also used for parsing RFC 2109 cookies.
442 known_attrs
= ("expires", "domain", "path", "secure",
443 # RFC 2109 attrs (may turn up in Netscape cookies, too)
447 for ns_header
in ns_headers
:
450 for ii
, param
in enumerate(re
.split(r
";\s*", ns_header
)):
451 param
= param
.rstrip()
452 if param
== "": continue
456 k
, v
= re
.split(r
"\s*=\s*", param
, 1)
460 if lc
in known_attrs
:
463 # This is an RFC 2109 cookie.
466 # convert expires date to seconds since epoch
467 if v
.startswith('"'): v
= v
[1:]
468 if v
.endswith('"'): v
= v
[:-1]
469 v
= http2time(v
) # None if invalid
474 pairs
.append(("version", "0"))
480 IPV4_RE
= re
.compile(r
"\.\d+$")
482 """Return True if text is a host domain name."""
484 # This may well be wrong. Which RFC is HDN defined in, if any (for
485 # the purposes of RFC 2965)?
486 # For the current implementation, what about IPv6? Remember to look
487 # at other uses of IPV4_RE also, if change this.
488 if IPV4_RE
.search(text
):
492 if text
[0] == "." or text
[-1] == ".":
496 def domain_match(A
, B
):
497 """Return True if domain A domain-matches domain B, according to RFC 2965.
499 A and B may be host domain names or IP addresses.
503 Host names can be specified either as an IP address or a HDN string.
504 Sometimes we compare one host name with another. (Such comparisons SHALL
505 be case-insensitive.) Host A's name domain-matches host B's if
507 * their host name strings string-compare equal; or
509 * A is a HDN string and has the form NB, where N is a non-empty
510 name string, B has the form .B', and B' is a HDN string. (So,
511 x.y.com domain-matches .Y.com but not Y.com.)
513 Note that domain-match is not a commutative operation: a.b.c.com
514 domain-matches .c.com, but not the reverse.
517 # Note that, if A or B are IP addresses, the only relevant part of the
518 # definition of the domain-match algorithm is the direct string-compare.
526 if i
== -1 or i
== 0:
527 # A does not have form NB, or N is the empty string
529 if not B
.startswith("."):
531 if not is_HDN(B
[1:]):
535 def liberal_is_HDN(text
):
536 """Return True if text is a sort-of-like a host domain name.
538 For accepting/blocking domains.
541 if IPV4_RE
.search(text
):
545 def user_domain_match(A
, B
):
546 """For blocking/accepting domains.
548 A and B may be host domain names or IP addresses.
553 if not (liberal_is_HDN(A
) and liberal_is_HDN(B
)):
558 initial_dot
= B
.startswith(".")
559 if initial_dot
and A
.endswith(B
):
561 if not initial_dot
and A
== B
:
565 cut_port_re
= re
.compile(r
":\d+$")
566 def request_host(request
):
567 """Return request-host, as defined by RFC 2965.
569 Variation from RFC: returned value is lowercased, for convenient
573 url
= request
.get_full_url()
574 host
= urlparse
.urlparse(url
)[1]
576 host
= request
.get_header("Host", "")
578 # remove port, if present
579 host
= cut_port_re
.sub("", host
, 1)
582 def eff_request_host(request
):
583 """Return a tuple (request-host, effective request-host name).
585 As defined by RFC 2965, except both are lowercased.
588 erhn
= req_host
= request_host(request
)
589 if req_host
.find(".") == -1 and not IPV4_RE
.search(req_host
):
590 erhn
= req_host
+ ".local"
591 return req_host
, erhn
593 def request_path(request
):
594 """request-URI, as defined by RFC 2965."""
595 url
= request
.get_full_url()
596 #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(url)
597 #req_path = escape_path("".join(urlparse.urlparse(url)[2:]))
598 path
, parameters
, query
, frag
= urlparse
.urlparse(url
)[2:]
600 path
= "%s;%s" % (path
, parameters
)
601 path
= escape_path(path
)
602 req_path
= urlparse
.urlunparse(("", "", path
, "", query
, frag
))
603 if not req_path
.startswith("/"):
604 # fix bad RFC 2396 absoluteURI
605 req_path
= "/"+req_path
608 def request_port(request
):
609 host
= request
.get_host()
616 debug("nonnumeric port: '%s'", port
)
619 port
= DEFAULT_HTTP_PORT
622 # Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
623 # need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
624 HTTP_PATH_SAFE
= "%/;:@&=+$,!~*'()"
625 ESCAPED_CHAR_RE
= re
.compile(r
"%([0-9a-fA-F][0-9a-fA-F])")
626 def uppercase_escaped_char(match
):
627 return "%%%s" % match
.group(1).upper()
628 def escape_path(path
):
629 """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
630 # There's no knowing what character encoding was used to create URLs
631 # containing %-escapes, but since we have to pick one to escape invalid
632 # path characters, we pick UTF-8, as recommended in the HTML 4.0
634 # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
635 # And here, kind of: draft-fielding-uri-rfc2396bis-03
636 # (And in draft IRI specification: draft-duerst-iri-05)
637 # (And here, for new URI schemes: RFC 2718)
638 if isinstance(path
, unicode):
639 path
= path
.encode("utf-8")
640 path
= urllib
.quote(path
, HTTP_PATH_SAFE
)
641 path
= ESCAPED_CHAR_RE
.sub(uppercase_escaped_char
, path
)
645 """Return reach of host h, as defined by RFC 2965, section 1.
647 The reach R of a host name H is defined as follows:
651 - H is the host domain name of a host; and,
653 - H has the form A.B; and
655 - A has no embedded (that is, interior) dots; and
657 - B has at least one embedded dot, or B is the string "local".
658 then the reach of H is .B.
660 * Otherwise, the reach of H is H.
662 >>> reach("www.acme.com")
664 >>> reach("acme.com")
666 >>> reach("acme.local")
672 #a = h[:i] # this line is only here to show what a is
675 if is_HDN(h
) and (i
>= 0 or b
== "local"):
679 def is_third_party(request
):
682 RFC 2965, section 3.3.6:
684 An unverifiable transaction is to a third-party host if its request-
685 host U does not domain-match the reach R of the request-host O in the
689 req_host
= request_host(request
)
690 if not domain_match(req_host
, reach(request
.get_origin_req_host())):
699 This class represents both Netscape and RFC 2965 cookies.
701 This is deliberately a very simple class. It just holds attributes. It's
702 possible to construct Cookie instances that don't comply with the cookie
703 standards. CookieJar.make_cookies is the factory function for Cookie
704 objects -- it deals with cookie parsing, supplying defaults, and
705 normalising to the representation used in this class. CookiePolicy is
706 responsible for checking them to see whether they should be accepted from
707 and returned to the server.
709 Note that the port may be present in the headers, but unspecified ("Port"
710 rather than"Port=80", for example); if this is the case, port is None.
714 def __init__(self
, version
, name
, value
,
715 port
, port_specified
,
716 domain
, domain_specified
, domain_initial_dot
,
717 path
, path_specified
,
727 if version
is not None: version
= int(version
)
728 if expires
is not None: expires
= int(expires
)
729 if port
is None and port_specified
is True:
730 raise ValueError("if port is None, port_specified must be false")
732 self
.version
= version
736 self
.port_specified
= port_specified
737 # normalise case, as per RFC 2965 section 3.3.3
738 self
.domain
= domain
.lower()
739 self
.domain_specified
= domain_specified
740 # Sigh. We need to know whether the domain given in the
741 # cookie-attribute had an initial dot, in order to follow RFC 2965
742 # (as clarified in draft errata). Needed for the returned $Domain
744 self
.domain_initial_dot
= domain_initial_dot
746 self
.path_specified
= path_specified
748 self
.expires
= expires
749 self
.discard
= discard
750 self
.comment
= comment
751 self
.comment_url
= comment_url
752 self
.rfc2109
= rfc2109
754 self
._rest
= copy
.copy(rest
)
756 def has_nonstandard_attr(self
, name
):
757 return name
in self
._rest
758 def get_nonstandard_attr(self
, name
, default
=None):
759 return self
._rest
.get(name
, default
)
760 def set_nonstandard_attr(self
, name
, value
):
761 self
._rest
[name
] = value
763 def is_expired(self
, now
=None):
764 if now
is None: now
= time
.time()
765 if (self
.expires
is not None) and (self
.expires
<= now
):
770 if self
.port
is None: p
= ""
771 else: p
= ":"+self
.port
772 limit
= self
.domain
+ p
+ self
.path
773 if self
.value
is not None:
774 namevalue
= "%s=%s" % (self
.name
, self
.value
)
776 namevalue
= self
.name
777 return "<Cookie %s for %s>" % (namevalue
, limit
)
781 for name
in ("version", "name", "value",
782 "port", "port_specified",
783 "domain", "domain_specified", "domain_initial_dot",
784 "path", "path_specified",
785 "secure", "expires", "discard", "comment", "comment_url",
787 attr
= getattr(self
, name
)
788 args
.append("%s=%s" % (name
, repr(attr
)))
789 args
.append("rest=%s" % repr(self
._rest
))
790 args
.append("rfc2109=%s" % repr(self
.rfc2109
))
791 return "Cookie(%s)" % ", ".join(args
)
795 """Defines which cookies get accepted from and returned to server.
797 May also modify cookies, though this is probably a bad idea.
799 The subclass DefaultCookiePolicy defines the standard rules for Netscape
800 and RFC 2965 cookies -- override that if you want a customised policy.
803 def set_ok(self
, cookie
, request
):
804 """Return true if (and only if) cookie should be accepted from server.
806 Currently, pre-expired cookies never get this far -- the CookieJar
807 class deletes such cookies itself.
810 raise NotImplementedError()
812 def return_ok(self
, cookie
, request
):
813 """Return true if (and only if) cookie should be returned to server."""
814 raise NotImplementedError()
816 def domain_return_ok(self
, domain
, request
):
817 """Return false if cookies should not be returned, given cookie domain.
821 def path_return_ok(self
, path
, request
):
822 """Return false if cookies should not be returned, given cookie path.
827 class DefaultCookiePolicy(CookiePolicy
):
828 """Implements the standard rules for accepting and returning cookies."""
830 DomainStrictNoDots
= 1
831 DomainStrictNonDomain
= 2
832 DomainRFC2965Match
= 4
835 DomainStrict
= DomainStrictNoDots|DomainStrictNonDomain
838 blocked_domains
=None, allowed_domains
=None,
839 netscape
=True, rfc2965
=False,
840 rfc2109_as_netscape
=None,
843 strict_rfc2965_unverifiable
=True,
844 strict_ns_unverifiable
=False,
845 strict_ns_domain
=DomainLiberal
,
846 strict_ns_set_initial_dollar
=False,
847 strict_ns_set_path
=False,
849 """Constructor arguments should be passed as keyword arguments only."""
850 self
.netscape
= netscape
851 self
.rfc2965
= rfc2965
852 self
.rfc2109_as_netscape
= rfc2109_as_netscape
853 self
.hide_cookie2
= hide_cookie2
854 self
.strict_domain
= strict_domain
855 self
.strict_rfc2965_unverifiable
= strict_rfc2965_unverifiable
856 self
.strict_ns_unverifiable
= strict_ns_unverifiable
857 self
.strict_ns_domain
= strict_ns_domain
858 self
.strict_ns_set_initial_dollar
= strict_ns_set_initial_dollar
859 self
.strict_ns_set_path
= strict_ns_set_path
861 if blocked_domains
is not None:
862 self
._blocked
_domains
= tuple(blocked_domains
)
864 self
._blocked
_domains
= ()
866 if allowed_domains
is not None:
867 allowed_domains
= tuple(allowed_domains
)
868 self
._allowed
_domains
= allowed_domains
870 def blocked_domains(self
):
871 """Return the sequence of blocked domains (as a tuple)."""
872 return self
._blocked
_domains
873 def set_blocked_domains(self
, blocked_domains
):
874 """Set the sequence of blocked domains."""
875 self
._blocked
_domains
= tuple(blocked_domains
)
877 def is_blocked(self
, domain
):
878 for blocked_domain
in self
._blocked
_domains
:
879 if user_domain_match(domain
, blocked_domain
):
883 def allowed_domains(self
):
884 """Return None, or the sequence of allowed domains (as a tuple)."""
885 return self
._allowed
_domains
886 def set_allowed_domains(self
, allowed_domains
):
887 """Set the sequence of allowed domains, or None."""
888 if allowed_domains
is not None:
889 allowed_domains
= tuple(allowed_domains
)
890 self
._allowed
_domains
= allowed_domains
892 def is_not_allowed(self
, domain
):
893 if self
._allowed
_domains
is None:
895 for allowed_domain
in self
._allowed
_domains
:
896 if user_domain_match(domain
, allowed_domain
):
900 def set_ok(self
, cookie
, request
):
902 If you override .set_ok(), be sure to call this method. If it returns
903 false, so should your subclass (assuming your subclass wants to be more
904 strict about which cookies to accept).
907 debug(" - checking cookie %s=%s", cookie
.name
, cookie
.value
)
909 assert cookie
.name
is not None
911 for n
in "version", "verifiability", "name", "path", "domain", "port":
912 fn_name
= "set_ok_"+n
913 fn
= getattr(self
, fn_name
)
914 if not fn(cookie
, request
):
919 def set_ok_version(self
, cookie
, request
):
920 if cookie
.version
is None:
921 # Version is always set to 0 by parse_ns_headers if it's a Netscape
922 # cookie, so this must be an invalid RFC 2965 cookie.
923 debug(" Set-Cookie2 without version attribute (%s=%s)",
924 cookie
.name
, cookie
.value
)
926 if cookie
.version
> 0 and not self
.rfc2965
:
927 debug(" RFC 2965 cookies are switched off")
929 elif cookie
.version
== 0 and not self
.netscape
:
930 debug(" Netscape cookies are switched off")
934 def set_ok_verifiability(self
, cookie
, request
):
935 if request
.is_unverifiable() and is_third_party(request
):
936 if cookie
.version
> 0 and self
.strict_rfc2965_unverifiable
:
937 debug(" third-party RFC 2965 cookie during "
938 "unverifiable transaction")
940 elif cookie
.version
== 0 and self
.strict_ns_unverifiable
:
941 debug(" third-party Netscape cookie during "
942 "unverifiable transaction")
946 def set_ok_name(self
, cookie
, request
):
947 # Try and stop servers setting V0 cookies designed to hack other
948 # servers that know both V0 and V1 protocols.
949 if (cookie
.version
== 0 and self
.strict_ns_set_initial_dollar
and
950 cookie
.name
.startswith("$")):
951 debug(" illegal name (starts with '$'): '%s'", cookie
.name
)
955 def set_ok_path(self
, cookie
, request
):
956 if cookie
.path_specified
:
957 req_path
= request_path(request
)
958 if ((cookie
.version
> 0 or
959 (cookie
.version
== 0 and self
.strict_ns_set_path
)) and
960 not req_path
.startswith(cookie
.path
)):
961 debug(" path attribute %s is not a prefix of request "
962 "path %s", cookie
.path
, req_path
)
966 def set_ok_domain(self
, cookie
, request
):
967 if self
.is_blocked(cookie
.domain
):
968 debug(" domain %s is in user block-list", cookie
.domain
)
970 if self
.is_not_allowed(cookie
.domain
):
971 debug(" domain %s is not in user allow-list", cookie
.domain
)
973 if cookie
.domain_specified
:
974 req_host
, erhn
= eff_request_host(request
)
975 domain
= cookie
.domain
976 if self
.strict_domain
and (domain
.count(".") >= 2):
977 i
= domain
.rfind(".")
978 j
= domain
.rfind(".", 0, i
)
979 if j
== 0: # domain like .foo.bar
984 "com", "edu", "org", "net", "gov", "mil", "int") and
987 debug(" country-code second level domain %s", domain
)
989 if domain
.startswith("."):
990 undotted_domain
= domain
[1:]
992 undotted_domain
= domain
993 embedded_dots
= (undotted_domain
.find(".") >= 0)
994 if not embedded_dots
and domain
!= ".local":
995 debug(" non-local domain %s contains no embedded dot",
998 if cookie
.version
== 0:
999 if (not erhn
.endswith(domain
) and
1000 (not erhn
.startswith(".") and
1001 not ("."+erhn
).endswith(domain
))):
1002 debug(" effective request-host %s (even with added "
1003 "initial dot) does not end end with %s",
1006 if (cookie
.version
> 0 or
1007 (self
.strict_ns_domain
& self
.DomainRFC2965Match
)):
1008 if not domain_match(erhn
, domain
):
1009 debug(" effective request-host %s does not domain-match "
1012 if (cookie
.version
> 0 or
1013 (self
.strict_ns_domain
& self
.DomainStrictNoDots
)):
1014 host_prefix
= req_host
[:-len(domain
)]
1015 if (host_prefix
.find(".") >= 0 and
1016 not IPV4_RE
.search(req_host
)):
1017 debug(" host prefix %s for domain %s contains a dot",
1018 host_prefix
, domain
)
1022 def set_ok_port(self
, cookie
, request
):
1023 if cookie
.port_specified
:
1024 req_port
= request_port(request
)
1025 if req_port
is None:
1028 req_port
= str(req_port
)
1029 for p
in cookie
.port
.split(","):
1033 debug(" bad port %s (not numeric)", p
)
1038 debug(" request port (%s) not found in %s",
1039 req_port
, cookie
.port
)
1043 def return_ok(self
, cookie
, request
):
1045 If you override .return_ok(), be sure to call this method. If it
1046 returns false, so should your subclass (assuming your subclass wants to
1047 be more strict about which cookies to return).
1050 # Path has already been checked by .path_return_ok(), and domain
1051 # blocking done by .domain_return_ok().
1052 debug(" - checking cookie %s=%s", cookie
.name
, cookie
.value
)
1054 for n
in "version", "verifiability", "secure", "expires", "port", "domain":
1055 fn_name
= "return_ok_"+n
1056 fn
= getattr(self
, fn_name
)
1057 if not fn(cookie
, request
):
1061 def return_ok_version(self
, cookie
, request
):
1062 if cookie
.version
> 0 and not self
.rfc2965
:
1063 debug(" RFC 2965 cookies are switched off")
1065 elif cookie
.version
== 0 and not self
.netscape
:
1066 debug(" Netscape cookies are switched off")
1070 def return_ok_verifiability(self
, cookie
, request
):
1071 if request
.is_unverifiable() and is_third_party(request
):
1072 if cookie
.version
> 0 and self
.strict_rfc2965_unverifiable
:
1073 debug(" third-party RFC 2965 cookie during unverifiable "
1076 elif cookie
.version
== 0 and self
.strict_ns_unverifiable
:
1077 debug(" third-party Netscape cookie during unverifiable "
1082 def return_ok_secure(self
, cookie
, request
):
1083 if cookie
.secure
and request
.get_type() != "https":
1084 debug(" secure cookie with non-secure request")
1088 def return_ok_expires(self
, cookie
, request
):
1089 if cookie
.is_expired(self
._now
):
1090 debug(" cookie expired")
1094 def return_ok_port(self
, cookie
, request
):
1096 req_port
= request_port(request
)
1097 if req_port
is None:
1099 for p
in cookie
.port
.split(","):
1103 debug(" request port %s does not match cookie port %s",
1104 req_port
, cookie
.port
)
1108 def return_ok_domain(self
, cookie
, request
):
1109 req_host
, erhn
= eff_request_host(request
)
1110 domain
= cookie
.domain
1112 # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
1113 if (cookie
.version
== 0 and
1114 (self
.strict_ns_domain
& self
.DomainStrictNonDomain
) and
1115 not cookie
.domain_specified
and domain
!= erhn
):
1116 debug(" cookie with unspecified domain does not string-compare "
1117 "equal to request domain")
1120 if cookie
.version
> 0 and not domain_match(erhn
, domain
):
1121 debug(" effective request-host name %s does not domain-match "
1122 "RFC 2965 cookie domain %s", erhn
, domain
)
1124 if cookie
.version
== 0 and not ("."+erhn
).endswith(domain
):
1125 debug(" request-host %s does not match Netscape cookie domain "
1126 "%s", req_host
, domain
)
1130 def domain_return_ok(self
, domain
, request
):
1131 # Liberal check of. This is here as an optimization to avoid
1132 # having to load lots of MSIE cookie files unless necessary.
1133 req_host
, erhn
= eff_request_host(request
)
1134 if not req_host
.startswith("."):
1135 req_host
= "."+req_host
1136 if not erhn
.startswith("."):
1138 if not (req_host
.endswith(domain
) or erhn
.endswith(domain
)):
1139 #debug(" request domain %s does not match cookie domain %s",
1143 if self
.is_blocked(domain
):
1144 debug(" domain %s is in user block-list", domain
)
1146 if self
.is_not_allowed(domain
):
1147 debug(" domain %s is not in user allow-list", domain
)
1152 def path_return_ok(self
, path
, request
):
1153 debug("- checking cookie path=%s", path
)
1154 req_path
= request_path(request
)
1155 if not req_path
.startswith(path
):
1156 debug(" %s does not path-match %s", req_path
, path
)
1161 def vals_sorted_by_key(adict
):
1164 return map(adict
.get
, keys
)
1166 def deepvalues(mapping
):
1167 """Iterates over nested mapping, depth-first, in sorted order by key."""
1168 values
= vals_sorted_by_key(mapping
)
1173 except AttributeError:
1177 for subobj
in deepvalues(obj
):
1183 # Used as second parameter to dict.get() method, to distinguish absent
1184 # dict key from one with a None value.
1188 """Collection of HTTP cookies.
1190 You may not need to know about this class: try
1191 urllib2.build_opener(HTTPCookieProcessor).open(url).
1195 non_word_re
= re
.compile(r
"\W")
1196 quote_re
= re
.compile(r
"([\"\\])")
1197 strict_domain_re = re.compile(r"\
.?
[^
.]*")
1198 domain_re = re.compile(r"[^
.]*")
1199 dots_re = re.compile(r"^\
.+")
1201 magic_re = r"^\
#LWP-Cookies-(\d+\.\d+)"
1203 def __init__(self
, policy
=None):
1205 policy
= DefaultCookiePolicy()
1206 self
._policy
= policy
1208 self
._cookies
_lock
= _threading
.RLock()
1211 def set_policy(self
, policy
):
1212 self
._policy
= policy
1214 def _cookies_for_domain(self
, domain
, request
):
1216 if not self
._policy
.domain_return_ok(domain
, request
):
1218 debug("Checking %s for cookies to return", domain
)
1219 cookies_by_path
= self
._cookies
[domain
]
1220 for path
in cookies_by_path
.keys():
1221 if not self
._policy
.path_return_ok(path
, request
):
1223 cookies_by_name
= cookies_by_path
[path
]
1224 for cookie
in cookies_by_name
.values():
1225 if not self
._policy
.return_ok(cookie
, request
):
1226 debug(" not returning cookie")
1228 debug(" it's a match")
1229 cookies
.append(cookie
)
1232 def _cookies_for_request(self
, request
):
1233 """Return a list of cookies to be returned to server."""
1235 for domain
in self
._cookies
.keys():
1236 cookies
.extend(self
._cookies
_for
_domain
(domain
, request
))
1239 def _cookie_attrs(self
, cookies
):
1240 """Return a list of cookie-attributes to be returned to server.
1242 like ['foo="bar"; $Path="/"', ...]
1244 The $Version attribute is also added when appropriate (currently only
1248 # add cookies in order of most specific (ie. longest) path first
1249 def decreasing_size(a
, b
): return cmp(len(b
.path
), len(a
.path
))
1250 cookies
.sort(decreasing_size
)
1255 for cookie
in cookies
:
1256 # set version of Cookie header
1258 # What should it be if multiple matching Set-Cookie headers have
1259 # different versions themselves?
1260 # Answer: there is no answer; was supposed to be settled by
1261 # RFC 2965 errata, but that may never appear...
1262 version
= cookie
.version
1266 attrs
.append("$Version=%s" % version
)
1268 # quote cookie value if necessary
1269 # (not for Netscape protocol, which already has any quotes
1270 # intact, due to the poorly-specified Netscape Cookie: syntax)
1271 if ((cookie
.value
is not None) and
1272 self
.non_word_re
.search(cookie
.value
) and version
> 0):
1273 value
= self
.quote_re
.sub(r
"\\\1", cookie
.value
)
1275 value
= cookie
.value
1277 # add cookie-attributes to be returned in Cookie header
1278 if cookie
.value
is None:
1279 attrs
.append(cookie
.name
)
1281 attrs
.append("%s=%s" % (cookie
.name
, value
))
1283 if cookie
.path_specified
:
1284 attrs
.append('$Path="%s"' % cookie
.path
)
1285 if cookie
.domain
.startswith("."):
1286 domain
= cookie
.domain
1287 if (not cookie
.domain_initial_dot
and
1288 domain
.startswith(".")):
1290 attrs
.append('$Domain="%s"' % domain
)
1291 if cookie
.port
is not None:
1293 if cookie
.port_specified
:
1294 p
= p
+ ('="%s"' % cookie
.port
)
1299 def add_cookie_header(self
, request
):
1300 """Add correct Cookie: header to request (urllib2.Request object).
1302 The Cookie2 header is also added unless policy.hide_cookie2 is true.
1305 debug("add_cookie_header")
1306 self
._cookies
_lock
.acquire()
1308 self
._policy
._now
= self
._now
= int(time
.time())
1310 cookies
= self
._cookies
_for
_request
(request
)
1312 attrs
= self
._cookie
_attrs
(cookies
)
1314 if not request
.has_header("Cookie"):
1315 request
.add_unredirected_header(
1316 "Cookie", "; ".join(attrs
))
1318 # if necessary, advertise that we know RFC 2965
1319 if (self
._policy
.rfc2965
and not self
._policy
.hide_cookie2
and
1320 not request
.has_header("Cookie2")):
1321 for cookie
in cookies
:
1322 if cookie
.version
!= 1:
1323 request
.add_unredirected_header("Cookie2", '$Version="1"')
1326 self
._cookies
_lock
.release()
1328 self
.clear_expired_cookies()
1330 def _normalized_cookie_tuples(self
, attrs_set
):
1331 """Return list of tuples containing normalised cookie information.
1333 attrs_set is the list of lists of key,value pairs extracted from
1334 the Set-Cookie or Set-Cookie2 headers.
1336 Tuples are name, value, standard, rest, where name and value are the
1337 cookie name and value, standard is a dictionary containing the standard
1338 cookie-attributes (discard, secure, version, expires or max-age,
1339 domain, path and port) and rest is a dictionary containing the rest of
1340 the cookie-attributes.
1345 boolean_attrs
= "discard", "secure"
1346 value_attrs
= ("version",
1347 "expires", "max-age",
1348 "domain", "path", "port",
1349 "comment", "commenturl")
1351 for cookie_attrs
in attrs_set
:
1352 name
, value
= cookie_attrs
[0]
1354 # Build dictionary of standard cookie-attributes (standard) and
1355 # dictionary of other cookie-attributes (rest).
1357 # Note: expiry time is normalised to seconds since epoch. V0
1358 # cookies should have the Expires cookie-attribute, and V1 cookies
1359 # should have Max-Age, but since V1 includes RFC 2109 cookies (and
1360 # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
1361 # accept either (but prefer Max-Age).
1368 for k
, v
in cookie_attrs
[1:]:
1370 # don't lose case distinction for unknown fields
1371 if lc
in value_attrs
or lc
in boolean_attrs
:
1373 if k
in boolean_attrs
and v
is None:
1374 # boolean cookie-attribute is present, but has no value
1375 # (like "discard", rather than "port=80")
1378 # only first value is significant
1382 debug(" missing value for domain attribute")
1385 # RFC 2965 section 3.3.3
1389 # Prefer max-age to expires (like Mozilla)
1392 debug(" missing or invalid value for expires "
1393 "attribute: treating as session cookie")
1400 debug(" missing or invalid (non-numeric) value for "
1401 "max-age attribute")
1404 # convert RFC 2965 Max-Age to seconds since epoch
1405 # XXX Strictly you're supposed to follow RFC 2616
1406 # age-calculation rules. Remember that zero Max-Age is a
1407 # is a request to discard (old and new) cookie, though.
1410 if (k
in value_attrs
) or (k
in boolean_attrs
):
1412 k
not in ("port", "comment", "commenturl")):
1413 debug(" missing value for %s attribute" % k
)
1423 cookie_tuples
.append((name
, value
, standard
, rest
))
1425 return cookie_tuples
1427 def _cookie_from_cookie_tuple(self
, tup
, request
):
1428 # standard is dict of standard cookie-attributes, rest is dict of the
1430 name
, value
, standard
, rest
= tup
1432 domain
= standard
.get("domain", Absent
)
1433 path
= standard
.get("path", Absent
)
1434 port
= standard
.get("port", Absent
)
1435 expires
= standard
.get("expires", Absent
)
1437 # set the easy defaults
1438 version
= standard
.get("version", None)
1439 if version
is not None: version
= int(version
)
1440 secure
= standard
.get("secure", False)
1441 # (discard is also set if expires is Absent)
1442 discard
= standard
.get("discard", False)
1443 comment
= standard
.get("comment", None)
1444 comment_url
= standard
.get("commenturl", None)
1447 if path
is not Absent
and path
!= "":
1448 path_specified
= True
1449 path
= escape_path(path
)
1451 path_specified
= False
1452 path
= request_path(request
)
1456 # Netscape spec parts company from reality here
1460 if len(path
) == 0: path
= "/"
1462 # set default domain
1463 domain_specified
= domain
is not Absent
1464 # but first we have to remember whether it starts with a dot
1465 domain_initial_dot
= False
1466 if domain_specified
:
1467 domain_initial_dot
= bool(domain
.startswith("."))
1468 if domain
is Absent
:
1469 req_host
, erhn
= eff_request_host(request
)
1471 elif not domain
.startswith("."):
1475 port_specified
= False
1476 if port
is not Absent
:
1478 # Port attr present, but has no value: default to request port.
1479 # Cookie should then only be sent back on that port.
1480 port
= request_port(request
)
1482 port_specified
= True
1483 port
= re
.sub(r
"\s+", "", port
)
1485 # No port attr present. Cookie can be sent back on any port.
1488 # set default expires and discard
1489 if expires
is Absent
:
1492 elif expires
<= self
._now
:
1493 # Expiry date in past is request to delete cookie. This can't be
1494 # in DefaultCookiePolicy, because can't delete cookies there.
1496 self
.clear(domain
, path
, name
)
1499 debug("Expiring cookie, domain='%s', path='%s', name='%s'",
1503 return Cookie(version
,
1505 port
, port_specified
,
1506 domain
, domain_specified
, domain_initial_dot
,
1507 path
, path_specified
,
1515 def _cookies_from_attrs_set(self
, attrs_set
, request
):
1516 cookie_tuples
= self
._normalized
_cookie
_tuples
(attrs_set
)
1519 for tup
in cookie_tuples
:
1520 cookie
= self
._cookie
_from
_cookie
_tuple
(tup
, request
)
1521 if cookie
: cookies
.append(cookie
)
1524 def _process_rfc2109_cookies(self
, cookies
):
1525 rfc2109_as_ns
= getattr(self
._policy
, 'rfc2109_as_netscape', None)
1526 if rfc2109_as_ns
is None:
1527 rfc2109_as_ns
= not self
._policy
.rfc2965
1528 for cookie
in cookies
:
1529 if cookie
.version
== 1:
1530 cookie
.rfc2109
= True
1532 # treat 2109 cookies as Netscape cookies rather than
1533 # as RFC2965 cookies
1536 def make_cookies(self
, response
, request
):
1537 """Return sequence of Cookie objects extracted from response object."""
1538 # get cookie-attributes for RFC 2965 and Netscape protocols
1539 headers
= response
.info()
1540 rfc2965_hdrs
= headers
.getheaders("Set-Cookie2")
1541 ns_hdrs
= headers
.getheaders("Set-Cookie")
1543 rfc2965
= self
._policy
.rfc2965
1544 netscape
= self
._policy
.netscape
1546 if ((not rfc2965_hdrs
and not ns_hdrs
) or
1547 (not ns_hdrs
and not rfc2965
) or
1548 (not rfc2965_hdrs
and not netscape
) or
1549 (not netscape
and not rfc2965
)):
1550 return [] # no relevant cookie headers: quick exit
1553 cookies
= self
._cookies
_from
_attrs
_set
(
1554 split_header_words(rfc2965_hdrs
), request
)
1556 reraise_unmasked_exceptions()
1559 if ns_hdrs
and netscape
:
1561 # RFC 2109 and Netscape cookies
1562 ns_cookies
= self
._cookies
_from
_attrs
_set
(
1563 parse_ns_headers(ns_hdrs
), request
)
1565 reraise_unmasked_exceptions()
1567 self
._process
_rfc
2109_cookies
(ns_cookies
)
1569 # Look for Netscape cookies (from Set-Cookie headers) that match
1570 # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
1571 # For each match, keep the RFC 2965 cookie and ignore the Netscape
1572 # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
1573 # bundled in with the Netscape cookies for this purpose, which is
1574 # reasonable behaviour.
1577 for cookie
in cookies
:
1578 lookup
[(cookie
.domain
, cookie
.path
, cookie
.name
)] = None
1580 def no_matching_rfc2965(ns_cookie
, lookup
=lookup
):
1581 key
= ns_cookie
.domain
, ns_cookie
.path
, ns_cookie
.name
1582 return key
not in lookup
1583 ns_cookies
= filter(no_matching_rfc2965
, ns_cookies
)
1586 cookies
.extend(ns_cookies
)
1590 def set_cookie_if_ok(self
, cookie
, request
):
1591 """Set a cookie if policy says it's OK to do so."""
1592 self
._cookies
_lock
.acquire()
1593 self
._policy
._now
= self
._now
= int(time
.time())
1595 if self
._policy
.set_ok(cookie
, request
):
1596 self
.set_cookie(cookie
)
1598 self
._cookies
_lock
.release()
1600 def set_cookie(self
, cookie
):
1601 """Set a cookie, without checking whether or not it should be set."""
1603 self
._cookies
_lock
.acquire()
1605 if cookie
.domain
not in c
: c
[cookie
.domain
] = {}
1606 c2
= c
[cookie
.domain
]
1607 if cookie
.path
not in c2
: c2
[cookie
.path
] = {}
1608 c3
= c2
[cookie
.path
]
1609 c3
[cookie
.name
] = cookie
1611 self
._cookies
_lock
.release()
1613 def extract_cookies(self
, response
, request
):
1614 """Extract cookies from response, where allowable given the request."""
1615 debug("extract_cookies: %s", response
.info())
1616 self
._cookies
_lock
.acquire()
1617 self
._policy
._now
= self
._now
= int(time
.time())
1619 for cookie
in self
.make_cookies(response
, request
):
1620 if self
._policy
.set_ok(cookie
, request
):
1621 debug(" setting cookie: %s", cookie
)
1622 self
.set_cookie(cookie
)
1623 self
._cookies
_lock
.release()
1625 def clear(self
, domain
=None, path
=None, name
=None):
1626 """Clear some cookies.
1628 Invoking this method without arguments will clear all cookies. If
1629 given a single argument, only cookies belonging to that domain will be
1630 removed. If given two arguments, cookies belonging to the specified
1631 path within that domain are removed. If given three arguments, then
1632 the cookie with the specified name, path and domain is removed.
1634 Raises KeyError if no matching cookie exists.
1637 if name
is not None:
1638 if (domain
is None) or (path
is None):
1640 "domain and path must be given to remove a cookie by name")
1641 del self
._cookies
[domain
][path
][name
]
1642 elif path
is not None:
1645 "domain must be given to remove cookies by path")
1646 del self
._cookies
[domain
][path
]
1647 elif domain
is not None:
1648 del self
._cookies
[domain
]
1652 def clear_session_cookies(self
):
1653 """Discard all session cookies.
1655 Note that the .save() method won't save session cookies anyway, unless
1656 you ask otherwise by passing a true ignore_discard argument.
1659 self
._cookies
_lock
.acquire()
1662 self
.clear(cookie
.domain
, cookie
.path
, cookie
.name
)
1663 self
._cookies
_lock
.release()
1665 def clear_expired_cookies(self
):
1666 """Discard all expired cookies.
1668 You probably don't need to call this method: expired cookies are never
1669 sent back to the server (provided you're using DefaultCookiePolicy),
1670 this method is called by CookieJar itself every so often, and the
1671 .save() method won't save expired cookies anyway (unless you ask
1672 otherwise by passing a true ignore_expires argument).
1675 self
._cookies
_lock
.acquire()
1678 if cookie
.is_expired(now
):
1679 self
.clear(cookie
.domain
, cookie
.path
, cookie
.name
)
1680 self
._cookies
_lock
.release()
1683 return deepvalues(self
._cookies
)
1686 """Return number of contained cookies."""
1688 for cookie
in self
: i
= i
+ 1
1693 for cookie
in self
: r
.append(repr(cookie
))
1694 return "<%s[%s]>" % (self
.__class
__, ", ".join(r
))
1698 for cookie
in self
: r
.append(str(cookie
))
1699 return "<%s[%s]>" % (self
.__class
__, ", ".join(r
))
1702 # derives from IOError for backwards-compatibility with Python 2.4.0
1703 class LoadError(IOError): pass
1705 class FileCookieJar(CookieJar
):
1706 """CookieJar that can be loaded from and saved to a file."""
1708 def __init__(self
, filename
=None, delayload
=False, policy
=None):
1710 Cookies are NOT loaded from the named file until either the .load() or
1711 .revert() method is called.
1714 CookieJar
.__init
__(self
, policy
)
1715 if filename
is not None:
1719 raise ValueError("filename must be string-like")
1720 self
.filename
= filename
1721 self
.delayload
= bool(delayload
)
1723 def save(self
, filename
=None, ignore_discard
=False, ignore_expires
=False):
1724 """Save cookies to a file."""
1725 raise NotImplementedError()
1727 def load(self
, filename
=None, ignore_discard
=False, ignore_expires
=False):
1728 """Load cookies from a file."""
1729 if filename
is None:
1730 if self
.filename
is not None: filename
= self
.filename
1731 else: raise ValueError(MISSING_FILENAME_TEXT
)
1735 self
._really
_load
(f
, filename
, ignore_discard
, ignore_expires
)
1739 def revert(self
, filename
=None,
1740 ignore_discard
=False, ignore_expires
=False):
1741 """Clear all cookies and reload cookies from a saved file.
1743 Raises LoadError (or IOError) if reversion is not successful; the
1744 object's state will not be altered if this happens.
1747 if filename
is None:
1748 if self
.filename
is not None: filename
= self
.filename
1749 else: raise ValueError(MISSING_FILENAME_TEXT
)
1751 self
._cookies
_lock
.acquire()
1753 old_state
= copy
.deepcopy(self
._cookies
)
1756 self
.load(filename
, ignore_discard
, ignore_expires
)
1757 except (LoadError
, IOError):
1758 self
._cookies
= old_state
1761 self
._cookies
_lock
.release()
1763 from _LWPCookieJar
import LWPCookieJar
, lwp_cookie_str
1764 from _MozillaCookieJar
import MozillaCookieJar