1 """Parse, manipulate and render cookies in a convenient way.
3 Copyright (c) 2011-2014, Sasha Hart.
5 Permission is hereby granted, free of charge, to any person obtaining a copy of
6 this software and associated documentation files (the "Software"), to deal in
7 the Software without restriction, including without limitation the rights to
8 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
9 of the Software, and to permit persons to whom the Software is furnished to do
10 so, subject to the following conditions:
12 The above copyright notice and this permission notice shall be included in all
13 copies or substantial portions of the Software.
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 from unicodedata
import normalize
29 if sys
.version_info
>= (3, 0, 0): # pragma: no cover
30 from urllib
.parse
import (
31 quote
as _default_quote
, unquote
as _default_unquote
)
34 else: # pragma: no cover
36 quote
as _default_quote
, unquote
as _default_unquote
)
39 def _total_seconds(td
):
40 """Wrapper to work around lack of .total_seconds() method in Python 3.1.
42 if hasattr(td
, "total_seconds"):
43 return td
.total_seconds()
44 return td
.days
* 3600 * 24 + td
.seconds
+ td
.microseconds
/ 100000.0
46 # see test_encoding_assumptions for how these magical safe= parms were figured
47 # out. the differences are because of what cookie-octet may contain
48 # vs the more liberal spec for extension-av
49 default_cookie_quote
= lambda item
: _default_quote(
50 item
, safe
='!#$%&\'()*+/:<=>?@[]^`{|}~')
52 default_extension_quote
= lambda item
: _default_quote(
53 item
, safe
=' !"#$%&\'()*+,/:<=>?@[\\]^`{|}~')
55 default_unquote
= _default_unquote
58 def _report_invalid_cookie(data
):
59 "How this module logs a bad cookie when exception suppressed"
60 logging
.error("invalid Cookie: %r", data
)
63 def _report_unknown_attribute(name
):
64 "How this module logs an unknown attribute when exception suppressed"
65 logging
.error("unknown Cookie attribute: %r", name
)
68 def _report_invalid_attribute(name
, value
, reason
):
69 "How this module logs a bad attribute when exception suppressed"
70 logging
.error("invalid Cookie attribute (%s): %r=%r", reason
, name
, value
)
73 class CookieError(Exception):
74 """Base class for this module's exceptions, so you can catch them all if
78 Exception.__init
__(self
)
81 class InvalidCookieError(CookieError
):
82 """Raised when attempting to parse or construct a cookie which is
83 syntactically invalid (in any way that has possibly serious implications).
85 def __init__(self
, data
=None, message
=""):
86 CookieError
.__init
__(self
)
88 self
.message
= message
91 return '%r %r' % (self
.message
, self
.data
)
94 class InvalidCookieAttributeError(CookieError
):
95 """Raised when setting an invalid attribute on a Cookie.
97 def __init__(self
, name
, value
, reason
=None):
98 CookieError
.__init
__(self
)
104 prefix
= ("%s: " % self
.reason
) if self
.reason
else ""
105 if self
.name
is None:
106 return '%s%r' % (prefix
, self
.value
)
107 return '%s%r = %r' % (prefix
, self
.name
, self
.value
)
110 class Definitions(object):
111 """Namespace to hold definitions used in cookie parsing (mostly pieces of
114 These are separated out for individual testing against examples and RFC
115 grammar, and kept here to avoid cluttering other namespaces.
117 # Most of the following are set down or cited in RFC 6265 4.1.1
119 # This is the grammar's 'cookie-name' defined as 'token' per RFC 2616 2.2.
120 COOKIE_NAME
= r
"!#$%&'*+\-.0-9A-Z^_`a-z|~"
122 # 'cookie-octet' - as used twice in definition of 'cookie-value'
123 COOKIE_OCTET
= r
"\x21\x23-\x2B\--\x3A\x3C-\x5B\]-\x7E"
125 # extension-av - also happens to be a superset of cookie-av and path-value
126 EXTENSION_AV
= """ !"#$%&\\\\'()*+,\-./0-9:<=>?@A-Z[\\]^_`a-z{|}~"""
128 # This is for the first pass parse on a Set-Cookie: response header. It
129 # includes cookie-value, cookie-pair, set-cookie-string, cookie-av.
130 # extension-av is used to extract the chunk containing variable-length,
131 # unordered attributes. The second pass then uses ATTR to break out each
132 # attribute and extract it appropriately.
133 # As compared with the RFC production grammar, it is must more liberal with
134 # space characters, in order not to break on data made by barbarians.
135 SET_COOKIE_HEADER
= """(?x) # Verbose mode
136 ^(?:Set-Cookie:[ ]*)?
140 # Accept anything in quotes - this is not RFC 6265, but might ease
141 # working with older code that half-heartedly works with 2965. Accept
142 # spaces inside tokens up front, so we can deal with that error one
143 # cookie at a time, after this first pass.
144 (?P<value>(?:"{value}*")|(?:[{cookie_octet} ]*))
147 # Extract everything up to the end in one chunk, which will be broken
148 # down in the second pass. Don't match if there's any unexpected
149 # garbage at the end (hence the \Z; $ matches before newline).
150 (?P<attrs>(?:;[ ]*[{cookie_av}]+)*)
151 """.format(name
=COOKIE_NAME
, cookie_av
=EXTENSION_AV
+ ";",
152 cookie_octet
=COOKIE_OCTET
, value
="[^;]")
154 # Now we specify the individual patterns for the attribute extraction pass
155 # of Set-Cookie parsing (mapping to *-av in the RFC grammar). Things which
156 # don't match any of these but are in extension-av are simply ignored;
157 # anything else should be rejected in the first pass (SET_COOKIE_HEADER).
159 # Max-Age attribute. These are digits, they are expressed this way
160 # because that is how they are expressed in the RFC.
161 MAX_AGE_AV
= "Max-Age=(?P<max_age>[\x30-\x39]+)"
163 # Domain attribute; a label is one part of the domain
164 LABEL
= '{let_dig}(?:(?:{let_dig_hyp}+)?{let_dig})?'.format(
165 let_dig
="[A-Za-z0-9]", let_dig_hyp
="[0-9A-Za-z\-]")
166 DOMAIN
= "\.?(?:{label}\.)*(?:{label})".format(label
=LABEL
)
167 # Parse initial period though it's wrong, as RFC 6265 4.1.2.3
168 DOMAIN_AV
= "Domain=(?P<domain>{domain})".format(domain
=DOMAIN
)
170 # Path attribute. We don't take special care with quotes because
171 # they are hardly used, they don't allow invalid characters per RFC 6265,
172 # and " is a valid character to occur in a path value anyway.
173 PATH_AV
= 'Path=(?P<path>[%s]+)' % EXTENSION_AV
175 # Expires attribute. This gets big because of date parsing, which needs to
176 # support a large range of formats, so it's broken down into pieces.
178 # Generate a mapping of months to use in render/parse, to avoid
179 # localizations which might be produced by strftime (e.g. %a -> Mayo)
180 month_list
= ["January", "February", "March", "April", "May", "June",
181 "July", "August", "September", "October", "November",
183 month_abbr_list
= [item
[:3] for item
in month_list
]
185 for index
, name
in enumerate(month_list
):
187 month_numbers
[name
[:3]] = index
+ 1
188 month_numbers
[name
] = index
+ 1
189 # Use the same list to create regexps for months.
190 MONTH_SHORT
= "(?:" + "|".join(item
[:3] for item
in month_list
) + ")"
191 MONTH_LONG
= "(?:" + "|".join(item
for item
in month_list
) + ")"
193 # Same drill with weekdays, for the same reason.
194 weekday_list
= ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday",
195 "Saturday", "Sunday"]
196 weekday_abbr_list
= [item
[:3] for item
in weekday_list
]
197 WEEKDAY_SHORT
= "(?:" + "|".join(item
[:3] for item
in weekday_list
) + ")"
198 WEEKDAY_LONG
= "(?:" + "|".join(item
for item
in weekday_list
) + ")"
200 # This regexp tries to exclude obvious nonsense in the first pass.
201 DAY_OF_MONTH
= "(?:[0 ]?[1-9]|[12][0-9]|[3][01])(?!\d)"
203 # Here is the overall date format; ~99% of cases fold into one generalized
204 # syntax like RFC 1123, and many of the rest use asctime-like formats.
205 # (see test_date_formats for a full exegesis)
206 DATE
= """(?ix) # Case-insensitive mode, verbose mode
208 (?P<weekday>(?:{wdy}|{weekday}),[ ])?
211 (?P<month>{mon}|{month})
213 # This does not support 3-digit years, which are rare and don't
214 # seem to have one canonical interpretation.
215 (?P<year>(?:\d{{2}}|\d{{4}}))
218 (?P<hour>(?:[ 0][0-9]|[01][0-9]|2[0-3]))
219 :(?P<minute>(?:0[0-9]|[1-5][0-9]))
220 (?::(?P<second>\d{{2}}))?
223 # Support asctime format, e.g. 'Sun Nov 6 08:49:37 1994'
224 (?P<weekday2>{wdy})[ ]
226 (?P<day2>[ ]\d|\d\d)[ ]
229 (?::(?P<second2>\d\d)?)[ ]
231 (?:[ ]GMT)? # GMT (Amazon)
234 DATE
= DATE
.format(wdy
=WEEKDAY_SHORT
, weekday
=WEEKDAY_LONG
,
235 day
=DAY_OF_MONTH
, mon
=MONTH_SHORT
, month
=MONTH_LONG
)
237 EXPIRES_AV
= "Expires=(?P<expires>%s)" % DATE
239 # Now we're ready to define a regexp which can match any number of attrs
240 # in the variable portion of the Set-Cookie header (like the unnamed latter
241 # part of set-cookie-string in the grammar). Each regexp of any complexity
242 # is split out for testing by itself.
243 ATTR
= """(?ix) # Case-insensitive mode, verbose mode
244 # Always start with start or semicolon and any number of spaces
246 # Big disjunction of attribute patterns (*_AV), with named capture
247 # groups to extract everything in one pass. Anything unrecognized
248 # goes in the 'unrecognized' capture group for reporting.
253 |(?P<secure>Secure=?)
254 |(?P<httponly>HttpOnly=?)
255 |Version=(?P<version>[{stuff}]+)
256 |Comment=(?P<comment>[{stuff}]+)
257 |(?P<unrecognized>[{stuff}]+)
259 # End with any number of spaces not matched by the preceding (up to the
260 # next semicolon) - but do not capture these.
262 """.format(expires
=EXPIRES_AV
, max_age
=MAX_AGE_AV
, domain
=DOMAIN_AV
,
263 path
=PATH_AV
, stuff
=EXTENSION_AV
)
265 # For request data ("Cookie: ") parsing, with finditer cf. RFC 6265 4.2.1
266 COOKIE
= """(?x) # Verbose mode
267 (?: # Either something close to valid...
269 # Match starts at start of string, or at separator.
270 # Split on comma for the sake of legacy code (RFC 2109/2965),
271 # and since it only breaks when invalid commas are put in values.
272 # see http://bugs.python.org/issue1210326
275 # 1 or more valid token characters making up the name (captured)
276 # with colon added to accommodate users of some old Java apps, etc.
283 # While 6265 provides only for cookie-octet, this allows just about
284 # anything in quotes (like in RFC 2616); people stuck on RFC
285 # 2109/2965 will expect it to work this way. The non-quoted token
286 # allows interior spaces ('\x20'), which is not valid. In both
287 # cases, the decision of whether to allow these is downstream.
293 [{value}][{value} ]*[{value}]+
297 # ... Or something way off-spec - extract to report and move on
301 # Trailing spaces after value
303 # Must end with ; or be at end of string (don't consume this though,
304 # so use the lookahead assertion ?=
306 """.format(name
=COOKIE_NAME
, value
=COOKIE_OCTET
)
308 # Precompile externally useful definitions into re objects.
309 COOKIE_NAME_RE
= re
.compile("^([%s:]+)\Z" % COOKIE_NAME
)
310 COOKIE_RE
= re
.compile(COOKIE
)
311 SET_COOKIE_HEADER_RE
= re
.compile(SET_COOKIE_HEADER
)
312 ATTR_RE
= re
.compile(ATTR
)
313 DATE_RE
= re
.compile(DATE
)
314 DOMAIN_RE
= re
.compile(DOMAIN
)
315 PATH_RE
= re
.compile('^([%s]+)\Z' % EXTENSION_AV
)
316 EOL
= re
.compile("(?:\r\n|\n)")
319 def strip_spaces_and_quotes(value
):
320 """Remove invalid whitespace and/or single pair of dquotes and return None
323 Used to prepare cookie values, path, and domain attributes in a way which
324 tolerates simple formatting mistakes and standards variations.
326 value
= value
.strip() if value
else ""
327 if value
and len(value
) > 1 and (value
[0] == value
[-1] == '"'):
334 def parse_string(data
, unquote
=default_unquote
):
335 """Decode URL-encoded strings to UTF-8 containing the escaped chars.
340 # We'll soon need to unquote to recover our UTF-8 data.
341 # In Python 2, unquote crashes on chars beyond ASCII. So encode functions
342 # had better not include anything beyond ASCII in data.
343 # In Python 3, unquote crashes on bytes objects, requiring conversion to
344 # str objects (unicode) using decode().
345 # But in Python 2, the same decode causes unquote to butcher the data.
346 # So in that case, just leave the bytes.
347 if isinstance(data
, bytes
):
348 if sys
.version_info
> (3, 0, 0): # pragma: no cover
349 data
= data
.decode('ascii')
350 # Recover URL encoded data
351 unquoted
= unquote(data
)
352 # Without this step, Python 2 may have good URL decoded *bytes*,
353 # which will therefore not normalize as unicode and not compare to
355 if isinstance(unquoted
, bytes
):
356 unquoted
= unquoted
.decode('utf-8')
360 def parse_date(value
):
361 """Parse an RFC 1123 or asctime-like format date string to produce
362 a Python datetime object (without a timezone).
364 # Do the regex magic; also enforces 2 or 4 digit years
365 match
= Definitions
.DATE_RE
.match(value
) if value
else None
368 # We're going to extract and prepare captured data in 'data'.
370 captured
= match
.groupdict()
371 fields
= ['year', 'month', 'day', 'hour', 'minute', 'second']
372 # If we matched on the RFC 1123 family format
375 data
[field
] = captured
[field
]
376 # If we matched on the asctime format, use year2 etc.
379 data
[field
] = captured
[field
+ "2"]
381 # Interpret lame 2-digit years - base the cutoff on UNIX epoch, in case
382 # someone sets a '70' cookie meaning 'distant past'. This won't break for
383 # 58 years and people who use 2-digit years are asking for it anyway.
390 # Clamp to [1900, 9999]: strftime has min 1900, datetime has max 9999
391 data
['year'] = max(1900, min(year
, 9999))
392 # Other things which are numbers should convert to integer
393 for field
in ['day', 'hour', 'minute', 'second']:
394 if data
[field
] is None:
396 data
[field
] = int(data
[field
])
397 # Look up the number datetime needs for the named month
398 data
['month'] = Definitions
.month_numbers
[data
['month'].lower()]
399 return datetime
.datetime(**data
)
402 def parse_domain(value
):
403 """Parse and validate an incoming Domain attribute value.
405 value
= strip_spaces_and_quotes(value
)
407 assert valid_domain(value
)
411 def parse_path(value
):
412 """Parse and validate an incoming Path attribute value.
414 value
= strip_spaces_and_quotes(value
)
415 assert valid_path(value
)
419 def parse_value(value
, allow_spaces
=True, unquote
=default_unquote
):
420 "Process a cookie value"
423 value
= strip_spaces_and_quotes(value
)
424 value
= parse_string(value
, unquote
=unquote
)
426 assert ' ' not in value
430 def valid_name(name
):
431 "Validate a cookie name string"
432 if isinstance(name
, bytes
):
433 name
= name
.decode('ascii')
434 if not Definitions
.COOKIE_NAME_RE
.match(name
):
436 # This module doesn't support $identifiers, which are part of an obsolete
437 # and highly complex standard which is never used.
443 def valid_value(value
, quote
=default_cookie_quote
, unquote
=default_unquote
):
444 """Validate a cookie value string.
446 This is generic across quote/unquote functions because it directly verifies
447 the encoding round-trip using the specified quote/unquote functions.
448 So if you use different quote/unquote functions, use something like this
449 as a replacement for valid_value::
451 my_valid_value = lambda value: valid_value(value, quote=my_quote,
457 # Put the value through a round trip with the given quote and unquote
458 # functions, so we will know whether data will get lost or not in the event
459 # that we don't complain.
460 encoded
= encode_cookie_value(value
, quote
=quote
)
461 decoded
= parse_string(encoded
, unquote
=unquote
)
463 # If the original string made the round trip, this is a valid value for the
464 # given quote and unquote functions. Since the round trip can generate
465 # different unicode forms, normalize before comparing, so we can ignore
466 # trivial inequalities.
467 decoded_normalized
= (normalize("NFKD", decoded
)
468 if not isinstance(decoded
, bytes
) else decoded
)
469 value_normalized
= (normalize("NFKD", value
)
470 if not isinstance(value
, bytes
) else value
)
471 if decoded_normalized
== value_normalized
:
476 def valid_date(date
):
477 "Validate an expires datetime object"
478 # We want something that acts like a datetime. In particular,
479 # strings indicate a failure to parse down to an object and ints are
480 # nonstandard and ambiguous at best.
481 if not hasattr(date
, 'tzinfo'):
483 # Relevant RFCs define UTC as 'close enough' to GMT, and the maximum
484 # difference between UTC and GMT is often stated to be less than a second.
485 if date
.tzinfo
is None or _total_seconds(date
.utcoffset()) < 1.1:
490 def valid_domain(domain
):
491 "Validate a cookie domain ASCII string"
492 # Using encoding on domain would confuse browsers into not sending cookies.
493 # Generate UnicodeDecodeError up front if it can't store as ASCII.
494 domain
.encode('ascii')
495 # Domains starting with periods are not RFC-valid, but this is very common
496 # in existing cookies, so they should still parse with DOMAIN_AV.
497 if Definitions
.DOMAIN_RE
.match(domain
):
502 def valid_path(value
):
503 "Validate a cookie path ASCII string"
504 # Generate UnicodeDecodeError if path can't store as ASCII.
505 value
.encode("ascii")
506 # Cookies without leading slash will likely be ignored, raise ASAP.
507 if not (value
and value
[0] == "/"):
509 if not Definitions
.PATH_RE
.match(value
):
514 def valid_max_age(number
):
515 "Validate a cookie Max-Age"
516 if isinstance(number
, basestring
):
518 number
= long(number
)
519 except (ValueError, TypeError):
521 if number
>= 0 and number
% 1 == 0:
526 def encode_cookie_value(data
, quote
=default_cookie_quote
):
527 """URL-encode strings to make them safe for a cookie value.
529 By default this uses urllib quoting, as used in many other cookie
530 implementations and in other Python code, instead of an ad hoc escaping
531 mechanism which includes backslashes (these also being illegal chars in RFC
537 # encode() to ASCII bytes so quote won't crash on non-ASCII.
538 # but doing that to bytes objects is nonsense.
539 # On Python 2 encode crashes if s is bytes containing non-ASCII.
540 # On Python 3 encode crashes on all byte objects.
541 if not isinstance(data
, bytes
):
542 data
= data
.encode("utf-8")
544 # URL encode data so it is safe for cookie value
547 # Don't force to bytes, so that downstream can use proper string API rather
548 # than crippled bytes, and to encourage encoding to be done just once.
552 def encode_extension_av(data
, quote
=default_extension_quote
):
553 """URL-encode strings to make them safe for an extension-av
554 (extension attribute value): <any CHAR except CTLs or ";">
561 def render_date(date
):
562 """Render a date (e.g. an Expires value) per RFCs 6265/2616/1123.
564 Don't give this localized (timezone-aware) datetimes. If you use them,
565 convert them to GMT before passing them to this. There are too many
566 conversion corner cases to handle this universally.
570 assert valid_date(date
)
571 # Avoid %a and %b, which can change with locale, breaking compliance
572 weekday
= Definitions
.weekday_abbr_list
[date
.weekday()]
573 month
= Definitions
.month_abbr_list
[date
.month
- 1]
574 return date
.strftime("{day}, %d {month} %Y %H:%M:%S GMT"
575 ).format(day
=weekday
, month
=month
)
578 def render_domain(domain
):
586 def _parse_request(header_data
, ignore_bad_cookies
=False):
587 """Turn one or more lines of 'Cookie:' header data into a dict mapping
588 cookie names to cookie values (raw strings).
591 for line
in Definitions
.EOL
.split(header_data
.strip()):
592 matches
= Definitions
.COOKIE_RE
.finditer(line
)
593 matches
= [item
for item
in matches
]
594 for match
in matches
:
595 invalid
= match
.group('invalid')
597 if not ignore_bad_cookies
:
598 raise InvalidCookieError(data
=invalid
)
599 _report_invalid_cookie(invalid
)
601 name
= match
.group('name')
602 values
= cookies_dict
.get(name
)
603 value
= match
.group('value').strip('"')
607 cookies_dict
[name
] = [value
]
609 if not ignore_bad_cookies
:
610 raise InvalidCookieError(data
=line
)
611 _report_invalid_cookie(line
)
615 def parse_one_response(line
, ignore_bad_cookies
=False,
616 ignore_bad_attributes
=True):
617 """Turn one 'Set-Cookie:' line into a dict mapping attribute names to
618 attribute values (raw strings).
621 # Basic validation, extract name/value/attrs-chunk
622 match
= Definitions
.SET_COOKIE_HEADER_RE
.match(line
)
624 if not ignore_bad_cookies
:
625 raise InvalidCookieError(data
=line
)
626 _report_invalid_cookie(line
)
629 'name': match
.group('name'),
630 'value': match
.group('value')})
631 # Extract individual attrs from the attrs chunk
632 for match
in Definitions
.ATTR_RE
.finditer(match
.group('attrs')):
633 captured
= dict((k
, v
) for (k
, v
) in match
.groupdict().items() if v
)
634 unrecognized
= captured
.get('unrecognized', None)
636 if not ignore_bad_attributes
:
637 raise InvalidCookieAttributeError(None, unrecognized
,
639 _report_unknown_attribute(unrecognized
)
642 for key
in ('secure', 'httponly'):
643 if captured
.get(key
):
645 # ignore subcomponents of expires - they're still there to avoid doing
647 timekeys
= ('weekday', 'month', 'day', 'hour', 'minute', 'second',
649 if 'year' in captured
:
652 elif 'year2' in captured
:
654 del captured
[key
+ "2"]
655 cookie_dict
.update(captured
)
659 def _parse_response(header_data
, ignore_bad_cookies
=False,
660 ignore_bad_attributes
=True):
661 """Turn one or more lines of 'Set-Cookie:' header data into a list of dicts
662 mapping attribute names to attribute values (as plain strings).
665 for line
in Definitions
.EOL
.split(header_data
.strip()):
668 cookie_dict
= parse_one_response(
669 line
, ignore_bad_cookies
=ignore_bad_cookies
,
670 ignore_bad_attributes
=ignore_bad_attributes
)
673 cookie_dicts
.append(cookie_dict
)
675 if not ignore_bad_cookies
:
676 raise InvalidCookieError(data
=header_data
)
677 _report_invalid_cookie(header_data
)
681 class Cookie(object):
682 """Provide a simple interface for creating, modifying, and rendering
683 individual HTTP cookies.
685 Cookie attributes are represented as normal Python object attributes.
686 Parsing, rendering and validation are reconfigurable per-attribute. The
687 default behavior is intended to comply with RFC 6265, URL-encoding illegal
688 characters where necessary. For example: the default behavior for the
689 Expires attribute is to parse strings as datetimes using parse_date,
690 validate that any set value is a datetime, and render the attribute per the
691 preferred date format in RFC 1123.
693 def __init__(self
, name
, value
, **kwargs
):
694 # If we don't have or can't set a name value, we don't want to return
695 # junk, so we must break control flow. And we don't want to use
696 # InvalidCookieAttributeError, because users may want to catch that to
697 # suppress all complaining about funky attributes.
700 except InvalidCookieAttributeError
:
701 raise InvalidCookieError(message
="invalid name for new Cookie",
706 except InvalidCookieAttributeError
:
707 raise InvalidCookieError(message
="invalid value for new Cookie",
710 self
._set
_attributes
(kwargs
, ignore_bad_attributes
=False)
712 def _set_attributes(self
, attrs
, ignore_bad_attributes
=False):
713 for attr_name
, attr_value
in attrs
.items():
714 if not attr_name
in self
.attribute_names
:
715 if not ignore_bad_attributes
:
716 raise InvalidCookieAttributeError(
717 attr_name
, attr_value
,
718 "unknown cookie attribute '%s'" % attr_name
)
719 _report_unknown_attribute(attr_name
)
722 setattr(self
, attr_name
, attr_value
)
723 except InvalidCookieAttributeError
as error
:
724 if not ignore_bad_attributes
:
726 _report_invalid_attribute(attr_name
, attr_value
, error
.reason
)
730 def from_dict(cls
, cookie_dict
, ignore_bad_attributes
=True):
731 """Construct an instance from a dict of strings to parse.
733 The main difference between this and Cookie(name, value, **kwargs) is
734 that the values in the argument to this method are parsed.
736 If ignore_bad_attributes=True (default), values which did not parse
737 are set to '' in order to avoid passing bad data.
739 name
= cookie_dict
.get('name', None)
741 raise InvalidCookieError("Cookie must have name")
742 raw_value
= cookie_dict
.get('value', '')
743 # Absence or failure of parser here is fatal; errors in present name
744 # and value should be found by Cookie.__init__.
745 value
= cls
.attribute_parsers
['value'](raw_value
)
746 cookie
= cls(name
, value
)
748 # Parse values from serialized formats into objects
750 for key
, value
in cookie_dict
.items():
751 # Don't want to pass name/value to _set_attributes
752 if key
in ('name', 'value'):
754 parser
= cls
.attribute_parsers
.get(key
)
756 # Don't let totally unknown attributes pass silently
757 if not ignore_bad_attributes
:
758 raise InvalidCookieAttributeError(
759 key
, value
, "unknown cookie attribute '%s'" % key
)
760 _report_unknown_attribute(key
)
763 parsed_value
= parser(value
)
764 except Exception as e
:
765 reason
= "did not parse with %r: %r" % (parser
, e
)
766 if not ignore_bad_attributes
:
767 raise InvalidCookieAttributeError(
769 _report_invalid_attribute(key
, value
, reason
)
771 parsed
[key
] = parsed_value
773 # Set the parsed objects (does object validation automatically)
774 cookie
._set
_attributes
(parsed
, ignore_bad_attributes
)
778 def from_string(cls
, line
, ignore_bad_cookies
=False,
779 ignore_bad_attributes
=True):
780 "Construct a Cookie object from a line of Set-Cookie header data."
781 cookie_dict
= parse_one_response(
782 line
, ignore_bad_cookies
=ignore_bad_cookies
,
783 ignore_bad_attributes
=ignore_bad_attributes
)
786 return cls
.from_dict(
787 cookie_dict
, ignore_bad_attributes
=ignore_bad_attributes
)
790 this_dict
= {'name': self
.name
, 'value': self
.value
}
791 this_dict
.update(self
.attributes())
794 def validate(self
, name
, value
):
795 """Validate a cookie attribute with an appropriate validator.
797 The value comes in already parsed (for example, an expires value
798 should be a datetime). Called automatically when an attribute
801 validator
= self
.attribute_validators
.get(name
, None)
803 return True if validator(value
) else False
806 def __setattr__(self
, name
, value
):
807 """Attributes mentioned in attribute_names get validated using
808 functions in attribute_validators, raising an exception on failure.
809 Others get left alone.
811 if name
in self
.attribute_names
or name
in ("name", "value"):
812 if name
== 'name' and not value
:
813 raise InvalidCookieError(message
="Cookies must have names")
814 # Ignore None values indicating unset attr. Other invalids should
815 # raise error so users of __setattr__ can learn.
816 if value
is not None:
817 if not self
.validate(name
, value
):
818 raise InvalidCookieAttributeError(
819 name
, value
, "did not validate with " +
820 repr(self
.attribute_validators
.get(name
)))
821 object.__setattr
__(self
, name
, value
)
823 def __getattr__(self
, name
):
824 """Provide for acting like everything in attribute_names is
825 automatically set to None, rather than having to do so explicitly and
828 if name
in self
.attribute_names
:
830 raise AttributeError(name
)
832 def attributes(self
):
833 """Export this cookie's attributes as a dict of encoded values.
835 This is an important part of the code for rendering attributes, e.g.
839 # Only look for attributes registered in attribute_names.
840 for python_attr_name
, cookie_attr_name
in self
.attribute_names
.items():
841 value
= getattr(self
, python_attr_name
)
842 renderer
= self
.attribute_renderers
.get(python_attr_name
, None)
844 value
= renderer(value
)
845 # If renderer returns None, or it's just natively none, then the
846 # value is suppressed entirely - does not appear in any rendering.
849 dictionary
[cookie_attr_name
] = value
852 def render_request(self
):
853 """Render as a string formatted for HTTP request headers
854 (simple 'Cookie: ' style).
856 # Use whatever renderers are defined for name and value.
857 name
, value
= self
.name
, self
.value
858 renderer
= self
.attribute_renderers
.get('name', None)
860 name
= renderer(name
)
861 renderer
= self
.attribute_renderers
.get('value', None)
863 value
= renderer(value
)
864 return ''.join((name
, "=", value
))
866 def render_response(self
):
867 """Render as a string formatted for HTTP response headers
868 (detailed 'Set-Cookie: ' style).
870 # Use whatever renderers are defined for name and value.
871 # (.attributes() is responsible for all other rendering.)
872 name
, value
= self
.name
, self
.value
873 renderer
= self
.attribute_renderers
.get('name', None)
875 name
= renderer(name
)
876 renderer
= self
.attribute_renderers
.get('value', None)
878 value
= renderer(value
)
880 ['{0}={1}'.format(name
, value
)] +
881 [key
if isinstance(val
, bool) else '='.join((key
, val
))
882 for key
, val
in self
.attributes().items()]
885 def __eq__(self
, other
):
886 attrs
= ['name', 'value'] + list(self
.attribute_names
.keys())
888 mine
= getattr(self
, attr
, None)
889 his
= getattr(other
, attr
, None)
890 if isinstance(mine
, bytes
):
891 mine
= mine
.decode('utf-8')
892 if isinstance(his
, bytes
):
893 his
= his
.decode('utf-8')
895 if mine
and mine
[0] == '.':
897 if his
and his
[0] == '.':
903 def __ne__(self
, other
):
904 return not self
.__eq
__(other
)
906 # Add a name and its proper rendering to this dict to register an attribute
907 # as exportable. The key is the name of the Cookie object attribute in
908 # Python, and it is mapped to the name you want in the output.
909 # 'name' and 'value' should not be here.
911 'expires': 'Expires',
912 'max_age': 'Max-Age',
915 'comment': 'Comment',
916 'version': 'Version',
918 'httponly': 'HttpOnly',
921 # Register single-parameter functions in this dictionary to have them
922 # used for encoding outgoing values (e.g. as RFC compliant strings,
923 # as base64, encrypted stuff, etc.)
924 # These are called by the property generated by cookie_attribute().
925 # Usually it would be wise not to define a renderer for name, but it is
926 # supported in case there is ever a real need.
927 attribute_renderers
= {
928 'value': encode_cookie_value
,
929 'domain': render_domain
,
930 'expires': render_date
,
931 'max_age': lambda item
: str(item
) if item
is not None else None,
932 'secure': lambda item
: True if item
else False,
933 'httponly': lambda item
: True if item
else False,
934 'comment': encode_extension_av
,
935 'version': lambda item
: (str(item
) if isinstance(item
, int)
936 else encode_extension_av(item
)),
939 # Register single-parameter functions in this dictionary to have them used
940 # for decoding incoming values for use in the Python API (e.g. into nice
941 # objects, numbers, unicode strings, etc.)
942 # These are called by the property generated by cookie_attribute().
943 attribute_parsers
= {
944 'value': parse_value
,
945 'expires': parse_date
,
946 'domain': parse_domain
,
948 'max_age': lambda item
: long(strip_spaces_and_quotes(item
)),
949 'comment': parse_string
,
950 'version': lambda item
: int(strip_spaces_and_quotes(item
)),
951 'secure': lambda item
: True if item
else False,
952 'httponly': lambda item
: True if item
else False,
955 # Register single-parameter functions which return a true value for
956 # acceptable values, and a false value for unacceptable ones. An
957 # attribute's validator is run after it is parsed or when it is directly
958 # set, and InvalidCookieAttribute is raised if validation fails (and the
959 # validator doesn't raise a different exception prior)
960 attribute_validators
= {
962 'value': valid_value
,
963 'expires': valid_date
,
964 'domain': valid_domain
,
966 'max_age': valid_max_age
,
967 'comment': valid_value
,
968 'version': lambda number
: re
.match("^\d+\Z", str(number
)),
969 'secure': lambda item
: item
is True or item
is False,
970 'httponly': lambda item
: item
is True or item
is False,
975 """Represent a set of cookies indexed by name.
977 This class bundles together a set of Cookie objects and provides
978 a convenient interface to them. for parsing and producing cookie headers.
979 In basic operation it acts just like a dict of Cookie objects, but it adds
980 additional convenience methods for the usual cookie tasks: add cookie
981 objects by their names, create new cookie objects under specified names,
982 parse HTTP request or response data into new cookie objects automatically
983 stored in the dict, and render the set in formats suitable for HTTP request
986 DEFAULT_COOKIE_CLASS
= Cookie
988 def __init__(self
, *args
, **kwargs
):
990 self
.all_cookies
= []
991 self
.cookie_class
= kwargs
.get(
992 "_cookie_class", self
.DEFAULT_COOKIE_CLASS
)
993 self
.add(*args
, **kwargs
)
995 def add(self
, *args
, **kwargs
):
996 """Add Cookie objects by their names, or create new ones under
999 Any unnamed arguments are interpreted as existing cookies, and
1000 are added under the value in their .name attribute. With keyword
1001 arguments, the key is interpreted as the cookie name and the
1002 value as the UNENCODED value stored in the cookie.
1004 # Only the first one is accessible through the main interface,
1005 # others accessible through get_all (all_cookies).
1007 self
.all_cookies
.append(cookie
)
1008 if cookie
.name
in self
:
1010 self
[cookie
.name
] = cookie
1011 for key
, value
in kwargs
.items():
1012 cookie
= self
.cookie_class(key
, value
)
1013 self
.all_cookies
.append(cookie
)
1018 def get_all(self
, key
):
1019 return [cookie
for cookie
in self
.all_cookies
1020 if cookie
.name
== key
]
1022 def parse_request(self
, header_data
, ignore_bad_cookies
=False):
1023 """Parse 'Cookie' header data into Cookie objects, and add them to
1024 this Cookies object.
1026 :arg header_data: string containing only 'Cookie:' request headers or
1027 header values (as in CGI/WSGI HTTP_COOKIE); if more than one, they must
1028 be separated by CRLF (\\r\\n).
1030 :arg ignore_bad_cookies: if set, will log each syntactically invalid
1031 cookie (at the granularity of semicolon-delimited blocks) rather than
1032 raising an exception at the first bad cookie.
1034 :returns: a Cookies instance containing Cookie objects parsed from
1038 If you want to parse 'Set-Cookie:' response headers, please use
1039 parse_response instead. parse_request will happily turn 'expires=frob'
1040 into a separate cookie without complaining, according to the grammar.
1042 cookies_dict
= _parse_request(
1043 header_data
, ignore_bad_cookies
=ignore_bad_cookies
)
1045 for name
, values
in cookies_dict
.items():
1046 for value
in values
:
1047 # Use from_dict to check name and parse value
1048 cookie_dict
= {'name': name
, 'value': value
}
1050 cookie
= self
.cookie_class
.from_dict(cookie_dict
)
1051 except InvalidCookieError
:
1052 if not ignore_bad_cookies
:
1055 cookie_objects
.append(cookie
)
1057 self
.add(*cookie_objects
)
1058 except InvalidCookieError
:
1059 if not ignore_bad_cookies
:
1061 _report_invalid_cookie(header_data
)
1064 def parse_response(self
, header_data
, ignore_bad_cookies
=False,
1065 ignore_bad_attributes
=True):
1066 """Parse 'Set-Cookie' header data into Cookie objects, and add them to
1067 this Cookies object.
1069 :arg header_data: string containing only 'Set-Cookie:' request headers
1070 or their corresponding header values; if more than one, they must be
1071 separated by CRLF (\\r\\n).
1073 :arg ignore_bad_cookies: if set, will log each syntactically invalid
1074 cookie rather than raising an exception at the first bad cookie. (This
1075 includes cookies which have noncompliant characters in the attribute
1078 :arg ignore_bad_attributes: defaults to True, which means to log but
1079 not raise an error when a particular attribute is unrecognized. (This
1080 does not necessarily mean that the attribute is invalid, although that
1081 would often be the case.) if unset, then an error will be raised at the
1082 first semicolon-delimited block which has an unknown attribute.
1084 :returns: a Cookies instance containing Cookie objects parsed from
1085 header_data, each with recognized attributes populated.
1088 If you want to parse 'Cookie:' headers (i.e., data like what's sent
1089 with an HTTP request, which has only name=value pairs and no
1090 attributes), then please use parse_request instead. Such lines often
1091 contain multiple name=value pairs, and parse_response will throw away
1092 the pairs after the first one, which will probably generate errors or
1093 confusing behavior. (Since there's no perfect way to automatically
1094 determine which kind of parsing to do, you have to tell it manually by
1095 choosing correctly from parse_request between part_response.)
1097 cookie_dicts
= _parse_response(
1099 ignore_bad_cookies
=ignore_bad_cookies
,
1100 ignore_bad_attributes
=ignore_bad_attributes
)
1102 for cookie_dict
in cookie_dicts
:
1103 cookie
= self
.cookie_class
.from_dict(cookie_dict
)
1104 cookie_objects
.append(cookie
)
1105 self
.add(*cookie_objects
)
1109 def from_request(cls
, header_data
, ignore_bad_cookies
=False):
1110 "Construct a Cookies object from request header data."
1112 cookies
.parse_request(
1113 header_data
, ignore_bad_cookies
=ignore_bad_cookies
)
1117 def from_response(cls
, header_data
, ignore_bad_cookies
=False,
1118 ignore_bad_attributes
=True):
1119 "Construct a Cookies object from response header data."
1121 cookies
.parse_response(
1123 ignore_bad_cookies
=ignore_bad_cookies
,
1124 ignore_bad_attributes
=ignore_bad_attributes
)
1127 def render_request(self
, sort
=True):
1128 """Render the dict's Cookie objects into a string formatted for HTTP
1129 request headers (simple 'Cookie: ' style).
1133 cookie
.render_request() for cookie
in self
.values()))
1134 return ("; ".join(sorted(
1135 cookie
.render_request() for cookie
in self
.values())))
1137 def render_response(self
, sort
=True):
1138 """Render the dict's Cookie objects into list of strings formatted for
1139 HTTP response headers (detailed 'Set-Cookie: ' style).
1141 rendered
= [cookie
.render_response() for cookie
in self
.values()]
1142 return rendered
if not sort
else sorted(rendered
)
1145 return "Cookies(%s)" % ', '.join("%s=%r" % (name
, cookie
.value
) for
1146 (name
, cookie
) in self
.items())
1148 def __eq__(self
, other
):
1149 """Test if a Cookies object is globally 'equal' to another one by
1150 seeing if it looks like a dict such that d[k] == self[k]. This depends
1151 on each Cookie object reporting its equality correctly.
1153 if not hasattr(other
, "keys"):
1156 keys
= sorted(set(self
.keys()) |
set(other
.keys()))
1160 if not key
in other
:
1162 if self
[key
] != other
[key
]:
1164 except (TypeError, KeyError):
1168 def __ne__(self
, other
):
1169 return not self
.__eq
__(other
)