1 """HTTP cookie handling for web clients.
3 This module has (now fairly distant) origins in Gisle Aas' Perl module
4 HTTP::Cookies, from the libwww-perl library.
6 Docstrings, comments and debug strings in this code refer to the
7 attributes of the HTTP cookie system as cookie-attributes, to distinguish
8 them clearly from Python attributes.
10 Class diagram (note that BSDDBCookieJar and the MSIE* classes are not
11 distributed with the Python standard library, but are available from
12 http://wwwsearch.sf.net/):
18 MozillaCookieJar | LWPCookieJar \ \
22 | / MSIEDBCookieJar BSDDBCookieJar
28 __all__
= ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
29 'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar']
31 import re
, urlparse
, copy
, time
, urllib
33 import threading
as _threading
35 import dummy_threading
as _threading
36 import httplib
# only for the default HTTP port
37 from calendar
import timegm
39 debug
= False # set to True to enable debugging via the logging module
48 logger
= logging
.getLogger("cookielib")
49 return logger
.debug(*args
)
52 DEFAULT_HTTP_PORT
= str(httplib
.HTTP_PORT
)
53 MISSING_FILENAME_TEXT
= ("a filename was not supplied (nor was the CookieJar "
54 "instance initialised with one)")
56 def _warn_unhandled_exception():
57 # There are a few catch-all except: statements in this module, for
58 # catching input that's bad in unexpected ways. Warn if any
59 # exceptions are caught there.
60 import warnings
, traceback
, StringIO
61 f
= StringIO
.StringIO()
62 traceback
.print_exc(None, f
)
64 warnings
.warn("cookielib bug!\n%s" % msg
, stacklevel
=2)
67 # Date/time conversion
68 # -----------------------------------------------------------------------------
72 year
, month
, mday
, hour
, min, sec
= tt
[:6]
73 if ((year
>= EPOCH_YEAR
) and (1 <= month
<= 12) and (1 <= mday
<= 31) and
74 (0 <= hour
<= 24) and (0 <= min <= 59) and (0 <= sec
<= 61)):
79 DAYS
= ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
80 MONTHS
= ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
81 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
83 for month
in MONTHS
: MONTHS_LOWER
.append(month
.lower())
85 def time2isoz(t
=None):
86 """Return a string representing time in seconds since epoch, t.
88 If the function is called without an argument, it will use the current
91 The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
92 representing Universal Time (UTC, aka GMT). An example of this format is:
97 if t
is None: t
= time
.time()
98 year
, mon
, mday
, hour
, min, sec
= time
.gmtime(t
)[:6]
99 return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
100 year
, mon
, mday
, hour
, min, sec
)
102 def time2netscape(t
=None):
103 """Return a string representing time in seconds since epoch, t.
105 If the function is called without an argument, it will use the current
108 The format of the returned string is like this:
110 Wed, DD-Mon-YYYY HH:MM:SS GMT
113 if t
is None: t
= time
.time()
114 year
, mon
, mday
, hour
, min, sec
, wday
= time
.gmtime(t
)[:7]
115 return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
116 DAYS
[wday
], mday
, MONTHS
[mon
-1], year
, hour
, min, sec
)
119 UTC_ZONES
= {"GMT": None, "UTC": None, "UT": None, "Z": None}
121 TIMEZONE_RE
= re
.compile(r
"^([-+])?(\d\d?):?(\d\d)?$")
122 def offset_from_tz_string(tz
):
127 m
= TIMEZONE_RE
.search(tz
)
129 offset
= 3600 * int(m
.group(2))
131 offset
= offset
+ 60 * int(m
.group(3))
132 if m
.group(1) == '-':
136 def _str2time(day
, mon
, yr
, hr
, min, sec
, tz
):
137 # translate month name to number
138 # month numbers start with 1 (January)
140 mon
= MONTHS_LOWER
.index(mon
.lower())+1
142 # maybe it's already a number
152 # make sure clock elements are defined
153 if hr
is None: hr
= 0
154 if min is None: min = 0
155 if sec
is None: sec
= 0
164 # find "obvious" year
165 cur_yr
= time
.localtime(time
.time())[0]
171 if m
> 0: yr
= yr
+ 100
174 # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
175 t
= _timegm((yr
, mon
, day
, hr
, min, sec
, tz
))
178 # adjust time using timezone string, to get absolute time since epoch
182 offset
= offset_from_tz_string(tz
)
189 STRICT_DATE_RE
= re
.compile(
190 r
"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
191 "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
192 WEEKDAY_RE
= re
.compile(
193 r
"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re
.I
)
194 LOOSE_HTTP_DATE_RE
= re
.compile(
202 (?:\s+|:) # separator before clock
203 (\d\d?):(\d\d) # hour:min
204 (?::(\d\d))? # optional seconds
207 ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
209 (?:\(\w+\))? # ASCII representation of timezone in parens.
212 """Returns time in seconds since epoch of time represented by a string.
214 Return value is an integer.
216 None is returned if the format of str is unrecognized, the time is outside
217 the representable range, or the timezone string is not recognized. If the
218 string contains no timezone, UTC is assumed.
220 The timezone in the string may be numerical (like "-0800" or "+0100") or a
221 string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
222 timezone strings equivalent to UTC (zero offset) are known to the function.
224 The function loosely parses the following formats:
226 Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
227 Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
228 Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
229 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
230 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
231 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
233 The parser ignores leading and trailing whitespace. The time may be
236 If the year is given with only 2 digits, the function will select the
237 century that makes the year closest to the current date.
240 # fast exit for strictly conforming string
241 m
= STRICT_DATE_RE
.search(text
)
244 mon
= MONTHS_LOWER
.index(g
[1].lower()) + 1
245 tt
= (int(g
[2]), mon
, int(g
[0]),
246 int(g
[3]), int(g
[4]), float(g
[5]))
249 # No, we need some messy parsing...
253 text
= WEEKDAY_RE
.sub("", text
, 1) # Useless weekday
255 # tz is time zone specifier string
256 day
, mon
, yr
, hr
, min, sec
, tz
= [None]*7
259 m
= LOOSE_HTTP_DATE_RE
.search(text
)
261 day
, mon
, yr
, hr
, min, sec
, tz
= m
.groups()
263 return None # bad format
265 return _str2time(day
, mon
, yr
, hr
, min, sec
, tz
)
267 ISO_DATE_RE
= re
.compile(
271 (\d\d?) # numerical month
275 (?:\s+|[-:Tt]) # separator before clock
276 (\d\d?):?(\d\d) # hour:min
277 (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
280 ([-+]?\d\d?:?(:?\d\d)?
281 |Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
285 As for http2time, but parses the ISO 8601 formats:
287 1994-02-03 14:15:29 -0100 -- ISO 8601 format
288 1994-02-03 14:15:29 -- zone is optional
289 1994-02-03 -- only date
290 1994-02-03T14:15:29 -- Use T as separator
291 19940203T141529Z -- ISO 8601 compact format
292 19940203 -- only date
298 # tz is time zone specifier string
299 day
, mon
, yr
, hr
, min, sec
, tz
= [None]*7
302 m
= ISO_DATE_RE
.search(text
)
304 # XXX there's an extra bit of the timezone I'm ignoring here: is
305 # this the right thing to do?
306 yr
, mon
, day
, hr
, min, sec
, tz
, _
= m
.groups()
308 return None # bad format
310 return _str2time(day
, mon
, yr
, hr
, min, sec
, tz
)
314 # -----------------------------------------------------------------------------
316 def unmatched(match
):
317 """Return unmatched part of re.Match object."""
318 start
, end
= match
.span(0)
319 return match
.string
[:start
]+match
.string
[end
:]
321 HEADER_TOKEN_RE
= re
.compile(r
"^\s*([^=\s;,]+)")
322 HEADER_QUOTED_VALUE_RE
= re
.compile(r
"^\s*=\s*\"([^
\"\\]*(?
:\\.[^
\"\\]*)*)\"")
323 HEADER_VALUE_RE = re.compile(r"^\s
*=\s
*([^\s
;,]*)")
324 HEADER_ESCAPE_RE = re.compile(r"\\(.)")
325 def split_header_words(header_values):
326 r"""Parse header values into a list of lists containing key,value pairs.
328 The function knows how to deal with ",", ";" and "=" as well as quoted
329 values after "=". A list of space separated tokens are parsed as if they
330 were separated by ";".
332 If the header_values passed as argument contains multiple values, then they
333 are treated as if they were a single value separated by comma ",".
335 This means that this function is useful for parsing header fields that
336 follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
337 the requirement for tokens).
340 header = (token | parameter) *( [";"] (token | parameter))
342 token = 1*<any CHAR except CTLs or separators>
343 separators = "(" | ")" | "<" | ">" | "@"
344 | "," | ";" | ":" | "\" |
<">
345 | "/" | "[" | "]" | "?
" | "="
346 | "{" | "}" | SP | HT
348 quoted-string = ( <"> *(qdtext | quoted
-pair
) <"> )
349 qdtext = <any TEXT except <">>
350 quoted
-pair
= "\" CHAR
352 parameter = attribute "=" value
354 value = token | quoted-string
356 Each header is represented by a list of key/value pairs. The value for a
357 simple token (not part of a parameter) is None. Syntactically incorrect
358 headers will not necessarily be parsed as you would want.
360 This is easier to describe with some examples:
362 >>> split_header_words(['foo="bar
"; port="80,81"; discard, bar=baz'])
363 [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
364 >>> split_header_words(['text/html; charset="iso
-8859-1"'])
365 [[('text/html', None), ('charset', 'iso-8859-1')]]
366 >>> split_header_words([r'Basic realm="\"foo
\bar
\""'])
367 [[('Basic', None), ('realm', '"foobar
"')]]
370 assert not isinstance(header_values, basestring)
372 for text in header_values:
376 m = HEADER_TOKEN_RE.search(text)
380 m = HEADER_QUOTED_VALUE_RE.search(text)
384 value = HEADER_ESCAPE_RE.sub(r"\
1", value)
386 m = HEADER_VALUE_RE.search(text)
387 if m: # unquoted value
390 value = value.rstrip()
392 # no value, a lone token
394 pairs.append((name, value))
395 elif text.lstrip().startswith(","):
396 # concatenated headers, as per RFC 2616 section 4.2
397 text = text.lstrip()[1:]
398 if pairs: result.append(pairs)
402 non_junk, nr_junk_chars = re.subn("^
[=\s
;]*", "", text)
403 assert nr_junk_chars > 0, (
404 "split_header_words bug
: '%s', '%s', %s" %
405 (orig_text, text, pairs))
407 if pairs: result.append(pairs)
410 HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
411 def join_header_words(lists):
412 """Do the inverse (almost) of the conversion done by split_header_words.
414 Takes a list of lists of (key, value) pairs and produces a single header
415 value. Attribute values are quoted if needed.
417 >>> join_header_words([[("text
/plain
", None), ("charset
", "iso
-8859/1")]])
418 'text/plain; charset="iso
-8859/1"'
419 >>> join_header_words([[("text
/plain
", None)], [("charset
", "iso
-8859/1")]])
420 'text/plain, charset="iso
-8859/1"'
428 if not re.search(r"^\w
+$
", v):
429 v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\
1", v) # escape " and \
433 if attr
: headers
.append("; ".join(attr
))
434 return ", ".join(headers
)
436 def parse_ns_headers(ns_headers
):
437 """Ad-hoc parser for Netscape protocol cookie-attributes.
439 The old Netscape cookie format for Set-Cookie can for instance contain
440 an unquoted "," in the expires field, so we have to use this ad-hoc
441 parser instead of split_header_words.
443 XXX This may not make the best possible effort to parse all the crap
444 that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
445 parser is probably better, so could do worse than following that if
446 this ever gives any trouble.
448 Currently, this is also used for parsing RFC 2109 cookies.
451 known_attrs
= ("expires", "domain", "path", "secure",
452 # RFC 2109 attrs (may turn up in Netscape cookies, too)
456 for ns_header
in ns_headers
:
459 for ii
, param
in enumerate(re
.split(r
";\s*", ns_header
)):
460 param
= param
.rstrip()
461 if param
== "": continue
465 k
, v
= re
.split(r
"\s*=\s*", param
, 1)
469 if lc
in known_attrs
:
472 # This is an RFC 2109 cookie.
475 # convert expires date to seconds since epoch
476 if v
.startswith('"'): v
= v
[1:]
477 if v
.endswith('"'): v
= v
[:-1]
478 v
= http2time(v
) # None if invalid
483 pairs
.append(("version", "0"))
489 IPV4_RE
= re
.compile(r
"\.\d+$")
491 """Return True if text is a host domain name."""
493 # This may well be wrong. Which RFC is HDN defined in, if any (for
494 # the purposes of RFC 2965)?
495 # For the current implementation, what about IPv6? Remember to look
496 # at other uses of IPV4_RE also, if change this.
497 if IPV4_RE
.search(text
):
501 if text
[0] == "." or text
[-1] == ".":
505 def domain_match(A
, B
):
506 """Return True if domain A domain-matches domain B, according to RFC 2965.
508 A and B may be host domain names or IP addresses.
512 Host names can be specified either as an IP address or a HDN string.
513 Sometimes we compare one host name with another. (Such comparisons SHALL
514 be case-insensitive.) Host A's name domain-matches host B's if
516 * their host name strings string-compare equal; or
518 * A is a HDN string and has the form NB, where N is a non-empty
519 name string, B has the form .B', and B' is a HDN string. (So,
520 x.y.com domain-matches .Y.com but not Y.com.)
522 Note that domain-match is not a commutative operation: a.b.c.com
523 domain-matches .c.com, but not the reverse.
526 # Note that, if A or B are IP addresses, the only relevant part of the
527 # definition of the domain-match algorithm is the direct string-compare.
535 if i
== -1 or i
== 0:
536 # A does not have form NB, or N is the empty string
538 if not B
.startswith("."):
540 if not is_HDN(B
[1:]):
544 def liberal_is_HDN(text
):
545 """Return True if text is a sort-of-like a host domain name.
547 For accepting/blocking domains.
550 if IPV4_RE
.search(text
):
554 def user_domain_match(A
, B
):
555 """For blocking/accepting domains.
557 A and B may be host domain names or IP addresses.
562 if not (liberal_is_HDN(A
) and liberal_is_HDN(B
)):
567 initial_dot
= B
.startswith(".")
568 if initial_dot
and A
.endswith(B
):
570 if not initial_dot
and A
== B
:
574 cut_port_re
= re
.compile(r
":\d+$")
575 def request_host(request
):
576 """Return request-host, as defined by RFC 2965.
578 Variation from RFC: returned value is lowercased, for convenient
582 url
= request
.get_full_url()
583 host
= urlparse
.urlparse(url
)[1]
585 host
= request
.get_header("Host", "")
587 # remove port, if present
588 host
= cut_port_re
.sub("", host
, 1)
591 def eff_request_host(request
):
592 """Return a tuple (request-host, effective request-host name).
594 As defined by RFC 2965, except both are lowercased.
597 erhn
= req_host
= request_host(request
)
598 if req_host
.find(".") == -1 and not IPV4_RE
.search(req_host
):
599 erhn
= req_host
+ ".local"
600 return req_host
, erhn
602 def request_path(request
):
603 """request-URI, as defined by RFC 2965."""
604 url
= request
.get_full_url()
605 #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(url)
606 #req_path = escape_path("".join(urlparse.urlparse(url)[2:]))
607 path
, parameters
, query
, frag
= urlparse
.urlparse(url
)[2:]
609 path
= "%s;%s" % (path
, parameters
)
610 path
= escape_path(path
)
611 req_path
= urlparse
.urlunparse(("", "", path
, "", query
, frag
))
612 if not req_path
.startswith("/"):
613 # fix bad RFC 2396 absoluteURI
614 req_path
= "/"+req_path
617 def request_port(request
):
618 host
= request
.get_host()
625 _debug("nonnumeric port: '%s'", port
)
628 port
= DEFAULT_HTTP_PORT
631 # Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
632 # need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
633 HTTP_PATH_SAFE
= "%/;:@&=+$,!~*'()"
634 ESCAPED_CHAR_RE
= re
.compile(r
"%([0-9a-fA-F][0-9a-fA-F])")
635 def uppercase_escaped_char(match
):
636 return "%%%s" % match
.group(1).upper()
637 def escape_path(path
):
638 """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
639 # There's no knowing what character encoding was used to create URLs
640 # containing %-escapes, but since we have to pick one to escape invalid
641 # path characters, we pick UTF-8, as recommended in the HTML 4.0
643 # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
644 # And here, kind of: draft-fielding-uri-rfc2396bis-03
645 # (And in draft IRI specification: draft-duerst-iri-05)
646 # (And here, for new URI schemes: RFC 2718)
647 if isinstance(path
, unicode):
648 path
= path
.encode("utf-8")
649 path
= urllib
.quote(path
, HTTP_PATH_SAFE
)
650 path
= ESCAPED_CHAR_RE
.sub(uppercase_escaped_char
, path
)
654 """Return reach of host h, as defined by RFC 2965, section 1.
656 The reach R of a host name H is defined as follows:
660 - H is the host domain name of a host; and,
662 - H has the form A.B; and
664 - A has no embedded (that is, interior) dots; and
666 - B has at least one embedded dot, or B is the string "local".
667 then the reach of H is .B.
669 * Otherwise, the reach of H is H.
671 >>> reach("www.acme.com")
673 >>> reach("acme.com")
675 >>> reach("acme.local")
681 #a = h[:i] # this line is only here to show what a is
684 if is_HDN(h
) and (i
>= 0 or b
== "local"):
688 def is_third_party(request
):
691 RFC 2965, section 3.3.6:
693 An unverifiable transaction is to a third-party host if its request-
694 host U does not domain-match the reach R of the request-host O in the
698 req_host
= request_host(request
)
699 if not domain_match(req_host
, reach(request
.get_origin_req_host())):
708 This class represents both Netscape and RFC 2965 cookies.
710 This is deliberately a very simple class. It just holds attributes. It's
711 possible to construct Cookie instances that don't comply with the cookie
712 standards. CookieJar.make_cookies is the factory function for Cookie
713 objects -- it deals with cookie parsing, supplying defaults, and
714 normalising to the representation used in this class. CookiePolicy is
715 responsible for checking them to see whether they should be accepted from
716 and returned to the server.
718 Note that the port may be present in the headers, but unspecified ("Port"
719 rather than"Port=80", for example); if this is the case, port is None.
723 def __init__(self
, version
, name
, value
,
724 port
, port_specified
,
725 domain
, domain_specified
, domain_initial_dot
,
726 path
, path_specified
,
736 if version
is not None: version
= int(version
)
737 if expires
is not None: expires
= int(expires
)
738 if port
is None and port_specified
is True:
739 raise ValueError("if port is None, port_specified must be false")
741 self
.version
= version
745 self
.port_specified
= port_specified
746 # normalise case, as per RFC 2965 section 3.3.3
747 self
.domain
= domain
.lower()
748 self
.domain_specified
= domain_specified
749 # Sigh. We need to know whether the domain given in the
750 # cookie-attribute had an initial dot, in order to follow RFC 2965
751 # (as clarified in draft errata). Needed for the returned $Domain
753 self
.domain_initial_dot
= domain_initial_dot
755 self
.path_specified
= path_specified
757 self
.expires
= expires
758 self
.discard
= discard
759 self
.comment
= comment
760 self
.comment_url
= comment_url
761 self
.rfc2109
= rfc2109
763 self
._rest
= copy
.copy(rest
)
765 def has_nonstandard_attr(self
, name
):
766 return name
in self
._rest
767 def get_nonstandard_attr(self
, name
, default
=None):
768 return self
._rest
.get(name
, default
)
769 def set_nonstandard_attr(self
, name
, value
):
770 self
._rest
[name
] = value
772 def is_expired(self
, now
=None):
773 if now
is None: now
= time
.time()
774 if (self
.expires
is not None) and (self
.expires
<= now
):
779 if self
.port
is None: p
= ""
780 else: p
= ":"+self
.port
781 limit
= self
.domain
+ p
+ self
.path
782 if self
.value
is not None:
783 namevalue
= "%s=%s" % (self
.name
, self
.value
)
785 namevalue
= self
.name
786 return "<Cookie %s for %s>" % (namevalue
, limit
)
790 for name
in ("version", "name", "value",
791 "port", "port_specified",
792 "domain", "domain_specified", "domain_initial_dot",
793 "path", "path_specified",
794 "secure", "expires", "discard", "comment", "comment_url",
796 attr
= getattr(self
, name
)
797 args
.append("%s=%s" % (name
, repr(attr
)))
798 args
.append("rest=%s" % repr(self
._rest
))
799 args
.append("rfc2109=%s" % repr(self
.rfc2109
))
800 return "Cookie(%s)" % ", ".join(args
)
804 """Defines which cookies get accepted from and returned to server.
806 May also modify cookies, though this is probably a bad idea.
808 The subclass DefaultCookiePolicy defines the standard rules for Netscape
809 and RFC 2965 cookies -- override that if you want a customised policy.
812 def set_ok(self
, cookie
, request
):
813 """Return true if (and only if) cookie should be accepted from server.
815 Currently, pre-expired cookies never get this far -- the CookieJar
816 class deletes such cookies itself.
819 raise NotImplementedError()
821 def return_ok(self
, cookie
, request
):
822 """Return true if (and only if) cookie should be returned to server."""
823 raise NotImplementedError()
825 def domain_return_ok(self
, domain
, request
):
826 """Return false if cookies should not be returned, given cookie domain.
830 def path_return_ok(self
, path
, request
):
831 """Return false if cookies should not be returned, given cookie path.
836 class DefaultCookiePolicy(CookiePolicy
):
837 """Implements the standard rules for accepting and returning cookies."""
839 DomainStrictNoDots
= 1
840 DomainStrictNonDomain
= 2
841 DomainRFC2965Match
= 4
844 DomainStrict
= DomainStrictNoDots|DomainStrictNonDomain
847 blocked_domains
=None, allowed_domains
=None,
848 netscape
=True, rfc2965
=False,
849 rfc2109_as_netscape
=None,
852 strict_rfc2965_unverifiable
=True,
853 strict_ns_unverifiable
=False,
854 strict_ns_domain
=DomainLiberal
,
855 strict_ns_set_initial_dollar
=False,
856 strict_ns_set_path
=False,
858 """Constructor arguments should be passed as keyword arguments only."""
859 self
.netscape
= netscape
860 self
.rfc2965
= rfc2965
861 self
.rfc2109_as_netscape
= rfc2109_as_netscape
862 self
.hide_cookie2
= hide_cookie2
863 self
.strict_domain
= strict_domain
864 self
.strict_rfc2965_unverifiable
= strict_rfc2965_unverifiable
865 self
.strict_ns_unverifiable
= strict_ns_unverifiable
866 self
.strict_ns_domain
= strict_ns_domain
867 self
.strict_ns_set_initial_dollar
= strict_ns_set_initial_dollar
868 self
.strict_ns_set_path
= strict_ns_set_path
870 if blocked_domains
is not None:
871 self
._blocked
_domains
= tuple(blocked_domains
)
873 self
._blocked
_domains
= ()
875 if allowed_domains
is not None:
876 allowed_domains
= tuple(allowed_domains
)
877 self
._allowed
_domains
= allowed_domains
879 def blocked_domains(self
):
880 """Return the sequence of blocked domains (as a tuple)."""
881 return self
._blocked
_domains
882 def set_blocked_domains(self
, blocked_domains
):
883 """Set the sequence of blocked domains."""
884 self
._blocked
_domains
= tuple(blocked_domains
)
886 def is_blocked(self
, domain
):
887 for blocked_domain
in self
._blocked
_domains
:
888 if user_domain_match(domain
, blocked_domain
):
892 def allowed_domains(self
):
893 """Return None, or the sequence of allowed domains (as a tuple)."""
894 return self
._allowed
_domains
895 def set_allowed_domains(self
, allowed_domains
):
896 """Set the sequence of allowed domains, or None."""
897 if allowed_domains
is not None:
898 allowed_domains
= tuple(allowed_domains
)
899 self
._allowed
_domains
= allowed_domains
901 def is_not_allowed(self
, domain
):
902 if self
._allowed
_domains
is None:
904 for allowed_domain
in self
._allowed
_domains
:
905 if user_domain_match(domain
, allowed_domain
):
909 def set_ok(self
, cookie
, request
):
911 If you override .set_ok(), be sure to call this method. If it returns
912 false, so should your subclass (assuming your subclass wants to be more
913 strict about which cookies to accept).
916 _debug(" - checking cookie %s=%s", cookie
.name
, cookie
.value
)
918 assert cookie
.name
is not None
920 for n
in "version", "verifiability", "name", "path", "domain", "port":
921 fn_name
= "set_ok_"+n
922 fn
= getattr(self
, fn_name
)
923 if not fn(cookie
, request
):
928 def set_ok_version(self
, cookie
, request
):
929 if cookie
.version
is None:
930 # Version is always set to 0 by parse_ns_headers if it's a Netscape
931 # cookie, so this must be an invalid RFC 2965 cookie.
932 _debug(" Set-Cookie2 without version attribute (%s=%s)",
933 cookie
.name
, cookie
.value
)
935 if cookie
.version
> 0 and not self
.rfc2965
:
936 _debug(" RFC 2965 cookies are switched off")
938 elif cookie
.version
== 0 and not self
.netscape
:
939 _debug(" Netscape cookies are switched off")
943 def set_ok_verifiability(self
, cookie
, request
):
944 if request
.is_unverifiable() and is_third_party(request
):
945 if cookie
.version
> 0 and self
.strict_rfc2965_unverifiable
:
946 _debug(" third-party RFC 2965 cookie during "
947 "unverifiable transaction")
949 elif cookie
.version
== 0 and self
.strict_ns_unverifiable
:
950 _debug(" third-party Netscape cookie during "
951 "unverifiable transaction")
955 def set_ok_name(self
, cookie
, request
):
956 # Try and stop servers setting V0 cookies designed to hack other
957 # servers that know both V0 and V1 protocols.
958 if (cookie
.version
== 0 and self
.strict_ns_set_initial_dollar
and
959 cookie
.name
.startswith("$")):
960 _debug(" illegal name (starts with '$'): '%s'", cookie
.name
)
964 def set_ok_path(self
, cookie
, request
):
965 if cookie
.path_specified
:
966 req_path
= request_path(request
)
967 if ((cookie
.version
> 0 or
968 (cookie
.version
== 0 and self
.strict_ns_set_path
)) and
969 not req_path
.startswith(cookie
.path
)):
970 _debug(" path attribute %s is not a prefix of request "
971 "path %s", cookie
.path
, req_path
)
975 def set_ok_domain(self
, cookie
, request
):
976 if self
.is_blocked(cookie
.domain
):
977 _debug(" domain %s is in user block-list", cookie
.domain
)
979 if self
.is_not_allowed(cookie
.domain
):
980 _debug(" domain %s is not in user allow-list", cookie
.domain
)
982 if cookie
.domain_specified
:
983 req_host
, erhn
= eff_request_host(request
)
984 domain
= cookie
.domain
985 if self
.strict_domain
and (domain
.count(".") >= 2):
986 # XXX This should probably be compared with the Konqueror
987 # (kcookiejar.cpp) and Mozilla implementations, but it's a
989 i
= domain
.rfind(".")
990 j
= domain
.rfind(".", 0, i
)
991 if j
== 0: # domain like .foo.bar
994 if sld
.lower() in ("co", "ac", "com", "edu", "org", "net",
995 "gov", "mil", "int", "aero", "biz", "cat", "coop",
996 "info", "jobs", "mobi", "museum", "name", "pro",
997 "travel", "eu") and len(tld
) == 2:
999 _debug(" country-code second level domain %s", domain
)
1001 if domain
.startswith("."):
1002 undotted_domain
= domain
[1:]
1004 undotted_domain
= domain
1005 embedded_dots
= (undotted_domain
.find(".") >= 0)
1006 if not embedded_dots
and domain
!= ".local":
1007 _debug(" non-local domain %s contains no embedded dot",
1010 if cookie
.version
== 0:
1011 if (not erhn
.endswith(domain
) and
1012 (not erhn
.startswith(".") and
1013 not ("."+erhn
).endswith(domain
))):
1014 _debug(" effective request-host %s (even with added "
1015 "initial dot) does not end end with %s",
1018 if (cookie
.version
> 0 or
1019 (self
.strict_ns_domain
& self
.DomainRFC2965Match
)):
1020 if not domain_match(erhn
, domain
):
1021 _debug(" effective request-host %s does not domain-match "
1024 if (cookie
.version
> 0 or
1025 (self
.strict_ns_domain
& self
.DomainStrictNoDots
)):
1026 host_prefix
= req_host
[:-len(domain
)]
1027 if (host_prefix
.find(".") >= 0 and
1028 not IPV4_RE
.search(req_host
)):
1029 _debug(" host prefix %s for domain %s contains a dot",
1030 host_prefix
, domain
)
1034 def set_ok_port(self
, cookie
, request
):
1035 if cookie
.port_specified
:
1036 req_port
= request_port(request
)
1037 if req_port
is None:
1040 req_port
= str(req_port
)
1041 for p
in cookie
.port
.split(","):
1045 _debug(" bad port %s (not numeric)", p
)
1050 _debug(" request port (%s) not found in %s",
1051 req_port
, cookie
.port
)
1055 def return_ok(self
, cookie
, request
):
1057 If you override .return_ok(), be sure to call this method. If it
1058 returns false, so should your subclass (assuming your subclass wants to
1059 be more strict about which cookies to return).
1062 # Path has already been checked by .path_return_ok(), and domain
1063 # blocking done by .domain_return_ok().
1064 _debug(" - checking cookie %s=%s", cookie
.name
, cookie
.value
)
1066 for n
in "version", "verifiability", "secure", "expires", "port", "domain":
1067 fn_name
= "return_ok_"+n
1068 fn
= getattr(self
, fn_name
)
1069 if not fn(cookie
, request
):
1073 def return_ok_version(self
, cookie
, request
):
1074 if cookie
.version
> 0 and not self
.rfc2965
:
1075 _debug(" RFC 2965 cookies are switched off")
1077 elif cookie
.version
== 0 and not self
.netscape
:
1078 _debug(" Netscape cookies are switched off")
1082 def return_ok_verifiability(self
, cookie
, request
):
1083 if request
.is_unverifiable() and is_third_party(request
):
1084 if cookie
.version
> 0 and self
.strict_rfc2965_unverifiable
:
1085 _debug(" third-party RFC 2965 cookie during unverifiable "
1088 elif cookie
.version
== 0 and self
.strict_ns_unverifiable
:
1089 _debug(" third-party Netscape cookie during unverifiable "
1094 def return_ok_secure(self
, cookie
, request
):
1095 if cookie
.secure
and request
.get_type() != "https":
1096 _debug(" secure cookie with non-secure request")
1100 def return_ok_expires(self
, cookie
, request
):
1101 if cookie
.is_expired(self
._now
):
1102 _debug(" cookie expired")
1106 def return_ok_port(self
, cookie
, request
):
1108 req_port
= request_port(request
)
1109 if req_port
is None:
1111 for p
in cookie
.port
.split(","):
1115 _debug(" request port %s does not match cookie port %s",
1116 req_port
, cookie
.port
)
1120 def return_ok_domain(self
, cookie
, request
):
1121 req_host
, erhn
= eff_request_host(request
)
1122 domain
= cookie
.domain
1124 # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
1125 if (cookie
.version
== 0 and
1126 (self
.strict_ns_domain
& self
.DomainStrictNonDomain
) and
1127 not cookie
.domain_specified
and domain
!= erhn
):
1128 _debug(" cookie with unspecified domain does not string-compare "
1129 "equal to request domain")
1132 if cookie
.version
> 0 and not domain_match(erhn
, domain
):
1133 _debug(" effective request-host name %s does not domain-match "
1134 "RFC 2965 cookie domain %s", erhn
, domain
)
1136 if cookie
.version
== 0 and not ("."+erhn
).endswith(domain
):
1137 _debug(" request-host %s does not match Netscape cookie domain "
1138 "%s", req_host
, domain
)
1142 def domain_return_ok(self
, domain
, request
):
1143 # Liberal check of. This is here as an optimization to avoid
1144 # having to load lots of MSIE cookie files unless necessary.
1145 req_host
, erhn
= eff_request_host(request
)
1146 if not req_host
.startswith("."):
1147 req_host
= "."+req_host
1148 if not erhn
.startswith("."):
1150 if not (req_host
.endswith(domain
) or erhn
.endswith(domain
)):
1151 #_debug(" request domain %s does not match cookie domain %s",
1155 if self
.is_blocked(domain
):
1156 _debug(" domain %s is in user block-list", domain
)
1158 if self
.is_not_allowed(domain
):
1159 _debug(" domain %s is not in user allow-list", domain
)
1164 def path_return_ok(self
, path
, request
):
1165 _debug("- checking cookie path=%s", path
)
1166 req_path
= request_path(request
)
1167 if not req_path
.startswith(path
):
1168 _debug(" %s does not path-match %s", req_path
, path
)
1173 def vals_sorted_by_key(adict
):
1176 return map(adict
.get
, keys
)
1178 def deepvalues(mapping
):
1179 """Iterates over nested mapping, depth-first, in sorted order by key."""
1180 values
= vals_sorted_by_key(mapping
)
1185 except AttributeError:
1189 for subobj
in deepvalues(obj
):
1195 # Used as second parameter to dict.get() method, to distinguish absent
1196 # dict key from one with a None value.
1200 """Collection of HTTP cookies.
1202 You may not need to know about this class: try
1203 urllib2.build_opener(HTTPCookieProcessor).open(url).
1207 non_word_re
= re
.compile(r
"\W")
1208 quote_re
= re
.compile(r
"([\"\\])")
1209 strict_domain_re = re.compile(r"\
.?
[^
.]*")
1210 domain_re = re.compile(r"[^
.]*")
1211 dots_re = re.compile(r"^\
.+")
1213 magic_re = r"^\
#LWP-Cookies-(\d+\.\d+)"
1215 def __init__(self
, policy
=None):
1217 policy
= DefaultCookiePolicy()
1218 self
._policy
= policy
1220 self
._cookies
_lock
= _threading
.RLock()
1223 def set_policy(self
, policy
):
1224 self
._policy
= policy
1226 def _cookies_for_domain(self
, domain
, request
):
1228 if not self
._policy
.domain_return_ok(domain
, request
):
1230 _debug("Checking %s for cookies to return", domain
)
1231 cookies_by_path
= self
._cookies
[domain
]
1232 for path
in cookies_by_path
.keys():
1233 if not self
._policy
.path_return_ok(path
, request
):
1235 cookies_by_name
= cookies_by_path
[path
]
1236 for cookie
in cookies_by_name
.values():
1237 if not self
._policy
.return_ok(cookie
, request
):
1238 _debug(" not returning cookie")
1240 _debug(" it's a match")
1241 cookies
.append(cookie
)
1244 def _cookies_for_request(self
, request
):
1245 """Return a list of cookies to be returned to server."""
1247 for domain
in self
._cookies
.keys():
1248 cookies
.extend(self
._cookies
_for
_domain
(domain
, request
))
1251 def _cookie_attrs(self
, cookies
):
1252 """Return a list of cookie-attributes to be returned to server.
1254 like ['foo="bar"; $Path="/"', ...]
1256 The $Version attribute is also added when appropriate (currently only
1260 # add cookies in order of most specific (ie. longest) path first
1261 def decreasing_size(a
, b
): return cmp(len(b
.path
), len(a
.path
))
1262 cookies
.sort(decreasing_size
)
1267 for cookie
in cookies
:
1268 # set version of Cookie header
1270 # What should it be if multiple matching Set-Cookie headers have
1271 # different versions themselves?
1272 # Answer: there is no answer; was supposed to be settled by
1273 # RFC 2965 errata, but that may never appear...
1274 version
= cookie
.version
1278 attrs
.append("$Version=%s" % version
)
1280 # quote cookie value if necessary
1281 # (not for Netscape protocol, which already has any quotes
1282 # intact, due to the poorly-specified Netscape Cookie: syntax)
1283 if ((cookie
.value
is not None) and
1284 self
.non_word_re
.search(cookie
.value
) and version
> 0):
1285 value
= self
.quote_re
.sub(r
"\\\1", cookie
.value
)
1287 value
= cookie
.value
1289 # add cookie-attributes to be returned in Cookie header
1290 if cookie
.value
is None:
1291 attrs
.append(cookie
.name
)
1293 attrs
.append("%s=%s" % (cookie
.name
, value
))
1295 if cookie
.path_specified
:
1296 attrs
.append('$Path="%s"' % cookie
.path
)
1297 if cookie
.domain
.startswith("."):
1298 domain
= cookie
.domain
1299 if (not cookie
.domain_initial_dot
and
1300 domain
.startswith(".")):
1302 attrs
.append('$Domain="%s"' % domain
)
1303 if cookie
.port
is not None:
1305 if cookie
.port_specified
:
1306 p
= p
+ ('="%s"' % cookie
.port
)
1311 def add_cookie_header(self
, request
):
1312 """Add correct Cookie: header to request (urllib2.Request object).
1314 The Cookie2 header is also added unless policy.hide_cookie2 is true.
1317 _debug("add_cookie_header")
1318 self
._cookies
_lock
.acquire()
1321 self
._policy
._now
= self
._now
= int(time
.time())
1323 cookies
= self
._cookies
_for
_request
(request
)
1325 attrs
= self
._cookie
_attrs
(cookies
)
1327 if not request
.has_header("Cookie"):
1328 request
.add_unredirected_header(
1329 "Cookie", "; ".join(attrs
))
1331 # if necessary, advertise that we know RFC 2965
1332 if (self
._policy
.rfc2965
and not self
._policy
.hide_cookie2
and
1333 not request
.has_header("Cookie2")):
1334 for cookie
in cookies
:
1335 if cookie
.version
!= 1:
1336 request
.add_unredirected_header("Cookie2", '$Version="1"')
1340 self
._cookies
_lock
.release()
1342 self
.clear_expired_cookies()
1344 def _normalized_cookie_tuples(self
, attrs_set
):
1345 """Return list of tuples containing normalised cookie information.
1347 attrs_set is the list of lists of key,value pairs extracted from
1348 the Set-Cookie or Set-Cookie2 headers.
1350 Tuples are name, value, standard, rest, where name and value are the
1351 cookie name and value, standard is a dictionary containing the standard
1352 cookie-attributes (discard, secure, version, expires or max-age,
1353 domain, path and port) and rest is a dictionary containing the rest of
1354 the cookie-attributes.
1359 boolean_attrs
= "discard", "secure"
1360 value_attrs
= ("version",
1361 "expires", "max-age",
1362 "domain", "path", "port",
1363 "comment", "commenturl")
1365 for cookie_attrs
in attrs_set
:
1366 name
, value
= cookie_attrs
[0]
1368 # Build dictionary of standard cookie-attributes (standard) and
1369 # dictionary of other cookie-attributes (rest).
1371 # Note: expiry time is normalised to seconds since epoch. V0
1372 # cookies should have the Expires cookie-attribute, and V1 cookies
1373 # should have Max-Age, but since V1 includes RFC 2109 cookies (and
1374 # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
1375 # accept either (but prefer Max-Age).
1382 for k
, v
in cookie_attrs
[1:]:
1384 # don't lose case distinction for unknown fields
1385 if lc
in value_attrs
or lc
in boolean_attrs
:
1387 if k
in boolean_attrs
and v
is None:
1388 # boolean cookie-attribute is present, but has no value
1389 # (like "discard", rather than "port=80")
1392 # only first value is significant
1396 _debug(" missing value for domain attribute")
1399 # RFC 2965 section 3.3.3
1403 # Prefer max-age to expires (like Mozilla)
1406 _debug(" missing or invalid value for expires "
1407 "attribute: treating as session cookie")
1414 _debug(" missing or invalid (non-numeric) value for "
1415 "max-age attribute")
1418 # convert RFC 2965 Max-Age to seconds since epoch
1419 # XXX Strictly you're supposed to follow RFC 2616
1420 # age-calculation rules. Remember that zero Max-Age is a
1421 # is a request to discard (old and new) cookie, though.
1424 if (k
in value_attrs
) or (k
in boolean_attrs
):
1426 k
not in ("port", "comment", "commenturl")):
1427 _debug(" missing value for %s attribute" % k
)
1437 cookie_tuples
.append((name
, value
, standard
, rest
))
1439 return cookie_tuples
1441 def _cookie_from_cookie_tuple(self
, tup
, request
):
1442 # standard is dict of standard cookie-attributes, rest is dict of the
1444 name
, value
, standard
, rest
= tup
1446 domain
= standard
.get("domain", Absent
)
1447 path
= standard
.get("path", Absent
)
1448 port
= standard
.get("port", Absent
)
1449 expires
= standard
.get("expires", Absent
)
1451 # set the easy defaults
1452 version
= standard
.get("version", None)
1453 if version
is not None: version
= int(version
)
1454 secure
= standard
.get("secure", False)
1455 # (discard is also set if expires is Absent)
1456 discard
= standard
.get("discard", False)
1457 comment
= standard
.get("comment", None)
1458 comment_url
= standard
.get("commenturl", None)
1461 if path
is not Absent
and path
!= "":
1462 path_specified
= True
1463 path
= escape_path(path
)
1465 path_specified
= False
1466 path
= request_path(request
)
1470 # Netscape spec parts company from reality here
1474 if len(path
) == 0: path
= "/"
1476 # set default domain
1477 domain_specified
= domain
is not Absent
1478 # but first we have to remember whether it starts with a dot
1479 domain_initial_dot
= False
1480 if domain_specified
:
1481 domain_initial_dot
= bool(domain
.startswith("."))
1482 if domain
is Absent
:
1483 req_host
, erhn
= eff_request_host(request
)
1485 elif not domain
.startswith("."):
1489 port_specified
= False
1490 if port
is not Absent
:
1492 # Port attr present, but has no value: default to request port.
1493 # Cookie should then only be sent back on that port.
1494 port
= request_port(request
)
1496 port_specified
= True
1497 port
= re
.sub(r
"\s+", "", port
)
1499 # No port attr present. Cookie can be sent back on any port.
1502 # set default expires and discard
1503 if expires
is Absent
:
1506 elif expires
<= self
._now
:
1507 # Expiry date in past is request to delete cookie. This can't be
1508 # in DefaultCookiePolicy, because can't delete cookies there.
1510 self
.clear(domain
, path
, name
)
1513 _debug("Expiring cookie, domain='%s', path='%s', name='%s'",
1517 return Cookie(version
,
1519 port
, port_specified
,
1520 domain
, domain_specified
, domain_initial_dot
,
1521 path
, path_specified
,
1529 def _cookies_from_attrs_set(self
, attrs_set
, request
):
1530 cookie_tuples
= self
._normalized
_cookie
_tuples
(attrs_set
)
1533 for tup
in cookie_tuples
:
1534 cookie
= self
._cookie
_from
_cookie
_tuple
(tup
, request
)
1535 if cookie
: cookies
.append(cookie
)
1538 def _process_rfc2109_cookies(self
, cookies
):
1539 rfc2109_as_ns
= getattr(self
._policy
, 'rfc2109_as_netscape', None)
1540 if rfc2109_as_ns
is None:
1541 rfc2109_as_ns
= not self
._policy
.rfc2965
1542 for cookie
in cookies
:
1543 if cookie
.version
== 1:
1544 cookie
.rfc2109
= True
1546 # treat 2109 cookies as Netscape cookies rather than
1547 # as RFC2965 cookies
1550 def make_cookies(self
, response
, request
):
1551 """Return sequence of Cookie objects extracted from response object."""
1552 # get cookie-attributes for RFC 2965 and Netscape protocols
1553 headers
= response
.info()
1554 rfc2965_hdrs
= headers
.getheaders("Set-Cookie2")
1555 ns_hdrs
= headers
.getheaders("Set-Cookie")
1557 rfc2965
= self
._policy
.rfc2965
1558 netscape
= self
._policy
.netscape
1560 if ((not rfc2965_hdrs
and not ns_hdrs
) or
1561 (not ns_hdrs
and not rfc2965
) or
1562 (not rfc2965_hdrs
and not netscape
) or
1563 (not netscape
and not rfc2965
)):
1564 return [] # no relevant cookie headers: quick exit
1567 cookies
= self
._cookies
_from
_attrs
_set
(
1568 split_header_words(rfc2965_hdrs
), request
)
1570 _warn_unhandled_exception()
1573 if ns_hdrs
and netscape
:
1575 # RFC 2109 and Netscape cookies
1576 ns_cookies
= self
._cookies
_from
_attrs
_set
(
1577 parse_ns_headers(ns_hdrs
), request
)
1579 _warn_unhandled_exception()
1581 self
._process
_rfc
2109_cookies
(ns_cookies
)
1583 # Look for Netscape cookies (from Set-Cookie headers) that match
1584 # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
1585 # For each match, keep the RFC 2965 cookie and ignore the Netscape
1586 # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
1587 # bundled in with the Netscape cookies for this purpose, which is
1588 # reasonable behaviour.
1591 for cookie
in cookies
:
1592 lookup
[(cookie
.domain
, cookie
.path
, cookie
.name
)] = None
1594 def no_matching_rfc2965(ns_cookie
, lookup
=lookup
):
1595 key
= ns_cookie
.domain
, ns_cookie
.path
, ns_cookie
.name
1596 return key
not in lookup
1597 ns_cookies
= filter(no_matching_rfc2965
, ns_cookies
)
1600 cookies
.extend(ns_cookies
)
1604 def set_cookie_if_ok(self
, cookie
, request
):
1605 """Set a cookie if policy says it's OK to do so."""
1606 self
._cookies
_lock
.acquire()
1608 self
._policy
._now
= self
._now
= int(time
.time())
1610 if self
._policy
.set_ok(cookie
, request
):
1611 self
.set_cookie(cookie
)
1615 self
._cookies
_lock
.release()
1617 def set_cookie(self
, cookie
):
1618 """Set a cookie, without checking whether or not it should be set."""
1620 self
._cookies
_lock
.acquire()
1622 if cookie
.domain
not in c
: c
[cookie
.domain
] = {}
1623 c2
= c
[cookie
.domain
]
1624 if cookie
.path
not in c2
: c2
[cookie
.path
] = {}
1625 c3
= c2
[cookie
.path
]
1626 c3
[cookie
.name
] = cookie
1628 self
._cookies
_lock
.release()
1630 def extract_cookies(self
, response
, request
):
1631 """Extract cookies from response, where allowable given the request."""
1632 _debug("extract_cookies: %s", response
.info())
1633 self
._cookies
_lock
.acquire()
1635 self
._policy
._now
= self
._now
= int(time
.time())
1637 for cookie
in self
.make_cookies(response
, request
):
1638 if self
._policy
.set_ok(cookie
, request
):
1639 _debug(" setting cookie: %s", cookie
)
1640 self
.set_cookie(cookie
)
1642 self
._cookies
_lock
.release()
1644 def clear(self
, domain
=None, path
=None, name
=None):
1645 """Clear some cookies.
1647 Invoking this method without arguments will clear all cookies. If
1648 given a single argument, only cookies belonging to that domain will be
1649 removed. If given two arguments, cookies belonging to the specified
1650 path within that domain are removed. If given three arguments, then
1651 the cookie with the specified name, path and domain is removed.
1653 Raises KeyError if no matching cookie exists.
1656 if name
is not None:
1657 if (domain
is None) or (path
is None):
1659 "domain and path must be given to remove a cookie by name")
1660 del self
._cookies
[domain
][path
][name
]
1661 elif path
is not None:
1664 "domain must be given to remove cookies by path")
1665 del self
._cookies
[domain
][path
]
1666 elif domain
is not None:
1667 del self
._cookies
[domain
]
1671 def clear_session_cookies(self
):
1672 """Discard all session cookies.
1674 Note that the .save() method won't save session cookies anyway, unless
1675 you ask otherwise by passing a true ignore_discard argument.
1678 self
._cookies
_lock
.acquire()
1682 self
.clear(cookie
.domain
, cookie
.path
, cookie
.name
)
1684 self
._cookies
_lock
.release()
1686 def clear_expired_cookies(self
):
1687 """Discard all expired cookies.
1689 You probably don't need to call this method: expired cookies are never
1690 sent back to the server (provided you're using DefaultCookiePolicy),
1691 this method is called by CookieJar itself every so often, and the
1692 .save() method won't save expired cookies anyway (unless you ask
1693 otherwise by passing a true ignore_expires argument).
1696 self
._cookies
_lock
.acquire()
1700 if cookie
.is_expired(now
):
1701 self
.clear(cookie
.domain
, cookie
.path
, cookie
.name
)
1703 self
._cookies
_lock
.release()
1706 return deepvalues(self
._cookies
)
1709 """Return number of contained cookies."""
1711 for cookie
in self
: i
= i
+ 1
1716 for cookie
in self
: r
.append(repr(cookie
))
1717 return "<%s[%s]>" % (self
.__class
__, ", ".join(r
))
1721 for cookie
in self
: r
.append(str(cookie
))
1722 return "<%s[%s]>" % (self
.__class
__, ", ".join(r
))
1725 # derives from IOError for backwards-compatibility with Python 2.4.0
1726 class LoadError(IOError): pass
1728 class FileCookieJar(CookieJar
):
1729 """CookieJar that can be loaded from and saved to a file."""
1731 def __init__(self
, filename
=None, delayload
=False, policy
=None):
1733 Cookies are NOT loaded from the named file until either the .load() or
1734 .revert() method is called.
1737 CookieJar
.__init
__(self
, policy
)
1738 if filename
is not None:
1742 raise ValueError("filename must be string-like")
1743 self
.filename
= filename
1744 self
.delayload
= bool(delayload
)
1746 def save(self
, filename
=None, ignore_discard
=False, ignore_expires
=False):
1747 """Save cookies to a file."""
1748 raise NotImplementedError()
1750 def load(self
, filename
=None, ignore_discard
=False, ignore_expires
=False):
1751 """Load cookies from a file."""
1752 if filename
is None:
1753 if self
.filename
is not None: filename
= self
.filename
1754 else: raise ValueError(MISSING_FILENAME_TEXT
)
1758 self
._really
_load
(f
, filename
, ignore_discard
, ignore_expires
)
1762 def revert(self
, filename
=None,
1763 ignore_discard
=False, ignore_expires
=False):
1764 """Clear all cookies and reload cookies from a saved file.
1766 Raises LoadError (or IOError) if reversion is not successful; the
1767 object's state will not be altered if this happens.
1770 if filename
is None:
1771 if self
.filename
is not None: filename
= self
.filename
1772 else: raise ValueError(MISSING_FILENAME_TEXT
)
1774 self
._cookies
_lock
.acquire()
1777 old_state
= copy
.deepcopy(self
._cookies
)
1780 self
.load(filename
, ignore_discard
, ignore_expires
)
1781 except (LoadError
, IOError):
1782 self
._cookies
= old_state
1786 self
._cookies
_lock
.release()
1788 from _LWPCookieJar
import LWPCookieJar
, lwp_cookie_str
1789 from _MozillaCookieJar
import MozillaCookieJar