third_party/python/cookies/cookies.py

   1 """Parse, manipulate and render cookies in a convenient way.
   2
   3 Copyright (c) 2011-2014, Sasha Hart.
   4
   5 Permission is hereby granted, free of charge, to any person obtaining a copy of
   6 this software and associated documentation files (the "Software"), to deal in
   7 the Software without restriction, including without limitation the rights to
   8 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   9 of the Software, and to permit persons to whom the Software is furnished to do
  10 so, subject to the following conditions:
  11
  12 The above copyright notice and this permission notice shall be included in all
  13 copies or substantial portions of the Software.
  14
  15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21 SOFTWARE.
  22 """
  23 __version__ = "2.2.1"
  24 import re
  25 import datetime
  26 import logging
  27 import sys
  28 from unicodedata import normalize
  29 if sys.version_info >= (3, 0, 0):  # pragma: no cover
  30     from urllib.parse import (
  31         quote as _default_quote, unquote as _default_unquote)
  32     basestring = str
  33     long = int
  34 else:  # pragma: no cover
  35     from urllib import (
  36         quote as _default_quote, unquote as _default_unquote)
  37
  38
  39 def _total_seconds(td):
  40     """Wrapper to work around lack of .total_seconds() method in Python 3.1.
  41     """
  42     if hasattr(td, "total_seconds"):
  43         return td.total_seconds()
  44     return td.days * 3600 * 24 + td.seconds + td.microseconds / 100000.0
  45
  46 # see test_encoding_assumptions for how these magical safe= parms were figured
  47 # out. the differences are because of what cookie-octet may contain
  48 # vs the more liberal spec for extension-av
  49 default_cookie_quote = lambda item: _default_quote(
  50     item, safe='!#$%&\'()*+/:<=>?@[]^`{|}~')
  51
  52 default_extension_quote = lambda item: _default_quote(
  53     item, safe=' !"#$%&\'()*+,/:<=>?@[\\]^`{|}~')
  54
  55 default_unquote = _default_unquote
  56
  57
  58 def _report_invalid_cookie(data):
  59     "How this module logs a bad cookie when exception suppressed"
  60     logging.error("invalid Cookie: %r", data)
  61
  62
  63 def _report_unknown_attribute(name):
  64     "How this module logs an unknown attribute when exception suppressed"
  65     logging.error("unknown Cookie attribute: %r", name)
  66
  67
  68 def _report_invalid_attribute(name, value, reason):
  69     "How this module logs a bad attribute when exception suppressed"
  70     logging.error("invalid Cookie attribute (%s): %r=%r", reason, name, value)
  71
  72
  73 class CookieError(Exception):
  74     """Base class for this module's exceptions, so you can catch them all if
  75     you want to.
  76     """
  77     def __init__(self):
  78         Exception.__init__(self)
  79
  80
  81 class InvalidCookieError(CookieError):
  82     """Raised when attempting to parse or construct a cookie which is
  83     syntactically invalid (in any way that has possibly serious implications).
  84     """
  85     def __init__(self, data=None, message=""):
  86         CookieError.__init__(self)
  87         self.data = data
  88         self.message = message
  89
  90     def __str__(self):
  91         return '%r %r' % (self.message, self.data)
  92
  93
  94 class InvalidCookieAttributeError(CookieError):
  95     """Raised when setting an invalid attribute on a Cookie.
  96     """
  97     def __init__(self, name, value, reason=None):
  98         CookieError.__init__(self)
  99         self.name = name
 100         self.value = value
 101         self.reason = reason
 102
 103     def __str__(self):
 104         prefix = ("%s: " % self.reason) if self.reason else ""
 105         if self.name is None:
 106             return '%s%r' % (prefix, self.value)
 107         return '%s%r = %r' % (prefix, self.name, self.value)
 108
 109
 110 class Definitions(object):
 111     """Namespace to hold definitions used in cookie parsing (mostly pieces of
 112     regex).
 113
 114     These are separated out for individual testing against examples and RFC
 115     grammar, and kept here to avoid cluttering other namespaces.
 116     """
 117     # Most of the following are set down or cited in RFC 6265 4.1.1
 118
 119     # This is the grammar's 'cookie-name' defined as 'token' per RFC 2616 2.2.
 120     COOKIE_NAME = r"!#$%&'*+\-.0-9A-Z^_`a-z|~"
 121
 122     # 'cookie-octet' - as used twice in definition of 'cookie-value'
 123     COOKIE_OCTET = r"\x21\x23-\x2B\--\x3A\x3C-\x5B\]-\x7E"
 124
 125     # extension-av - also happens to be a superset of cookie-av and path-value
 126     EXTENSION_AV = """ !"#$%&\\\\'()*+,\-./0-9:<=>?@A-Z[\\]^_`a-z{|}~"""
 127
 128     # This is for the first pass parse on a Set-Cookie: response header. It
 129     # includes cookie-value, cookie-pair, set-cookie-string, cookie-av.
 130     # extension-av is used to extract the chunk containing variable-length,
 131     # unordered attributes. The second pass then uses ATTR to break out each
 132     # attribute and extract it appropriately.
 133     # As compared with the RFC production grammar, it is must more liberal with
 134     # space characters, in order not to break on data made by barbarians.
 135     SET_COOKIE_HEADER = """(?x) # Verbose mode
 136         ^(?:Set-Cookie:[ ]*)?
 137         (?P<name>[{name}:]+)
 138         [ ]*=[ ]*
 139
 140         # Accept anything in quotes - this is not RFC 6265, but might ease
 141         # working with older code that half-heartedly works with 2965. Accept
 142         # spaces inside tokens up front, so we can deal with that error one
 143         # cookie at a time, after this first pass.
 144         (?P<value>(?:"{value}*")|(?:[{cookie_octet} ]*))
 145         [ ]*
 146
 147         # Extract everything up to the end in one chunk, which will be broken
 148         # down in the second pass. Don't match if there's any unexpected
 149         # garbage at the end (hence the \Z; $ matches before newline).
 150         (?P<attrs>(?:;[ ]*[{cookie_av}]+)*)
 151         """.format(name=COOKIE_NAME, cookie_av=EXTENSION_AV + ";",
 152                    cookie_octet=COOKIE_OCTET, value="[^;]")
 153
 154     # Now we specify the individual patterns for the attribute extraction pass
 155     # of Set-Cookie parsing (mapping to *-av in the RFC grammar). Things which
 156     # don't match any of these but are in extension-av are simply ignored;
 157     # anything else should be rejected in the first pass (SET_COOKIE_HEADER).
 158
 159     # Max-Age attribute. These are digits, they are expressed this way
 160     # because that is how they are expressed in the RFC.
 161     MAX_AGE_AV = "Max-Age=(?P<max_age>[\x30-\x39]+)"
 162
 163     # Domain attribute; a label is one part of the domain
 164     LABEL = '{let_dig}(?:(?:{let_dig_hyp}+)?{let_dig})?'.format(
 165             let_dig="[A-Za-z0-9]", let_dig_hyp="[0-9A-Za-z\-]")
 166     DOMAIN = "\.?(?:{label}\.)*(?:{label})".format(label=LABEL)
 167     # Parse initial period though it's wrong, as RFC 6265 4.1.2.3
 168     DOMAIN_AV = "Domain=(?P<domain>{domain})".format(domain=DOMAIN)
 169
 170     # Path attribute. We don't take special care with quotes because
 171     # they are hardly used, they don't allow invalid characters per RFC 6265,
 172     # and " is a valid character to occur in a path value anyway.
 173     PATH_AV = 'Path=(?P<path>[%s]+)' % EXTENSION_AV
 174
 175     # Expires attribute. This gets big because of date parsing, which needs to
 176     # support a large range of formats, so it's broken down into pieces.
 177
 178     # Generate a mapping of months to use in render/parse, to avoid
 179     # localizations which might be produced by strftime (e.g. %a -> Mayo)
 180     month_list = ["January", "February", "March", "April", "May", "June",
 181                   "July", "August", "September", "October", "November",
 182                   "December"]
 183     month_abbr_list = [item[:3] for item in month_list]
 184     month_numbers = {}
 185     for index, name in enumerate(month_list):
 186         name = name.lower()
 187         month_numbers[name[:3]] = index + 1
 188         month_numbers[name] = index + 1
 189     # Use the same list to create regexps for months.
 190     MONTH_SHORT = "(?:" + "|".join(item[:3] for item in month_list) + ")"
 191     MONTH_LONG = "(?:" + "|".join(item for item in month_list) + ")"
 192
 193     # Same drill with weekdays, for the same reason.
 194     weekday_list = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday",
 195                     "Saturday", "Sunday"]
 196     weekday_abbr_list = [item[:3] for item in weekday_list]
 197     WEEKDAY_SHORT = "(?:" + "|".join(item[:3] for item in weekday_list) + ")"
 198     WEEKDAY_LONG = "(?:" + "|".join(item for item in weekday_list) + ")"
 199
 200     # This regexp tries to exclude obvious nonsense in the first pass.
 201     DAY_OF_MONTH = "(?:[0 ]?[1-9]|[12][0-9]|[3][01])(?!\d)"
 202
 203     # Here is the overall date format; ~99% of cases fold into one generalized
 204     # syntax like RFC 1123, and many of the rest use asctime-like formats.
 205     # (see test_date_formats for a full exegesis)
 206     DATE = """(?ix) # Case-insensitive mode, verbose mode
 207         (?:
 208             (?P<weekday>(?:{wdy}|{weekday}),[ ])?
 209             (?P<day>{day})
 210             [ \-]
 211             (?P<month>{mon}|{month})
 212             [ \-]
 213             # This does not support 3-digit years, which are rare and don't
 214             # seem to have one canonical interpretation.
 215             (?P<year>(?:\d{{2}}|\d{{4}}))
 216             [ ]
 217             # HH:MM[:SS] GMT
 218             (?P<hour>(?:[ 0][0-9]|[01][0-9]|2[0-3]))
 219             :(?P<minute>(?:0[0-9]|[1-5][0-9]))
 220             (?::(?P<second>\d{{2}}))?
 221             [ ]GMT
 222         |
 223             # Support asctime format, e.g. 'Sun Nov  6 08:49:37 1994'
 224             (?P<weekday2>{wdy})[ ]
 225             (?P<month2>{mon})[ ]
 226             (?P<day2>[ ]\d|\d\d)[ ]
 227             (?P<hour2>\d\d):
 228             (?P<minute2>\d\d)
 229             (?::(?P<second2>\d\d)?)[ ]
 230             (?P<year2>\d\d\d\d)
 231             (?:[ ]GMT)?  # GMT (Amazon)
 232         )
 233     """
 234     DATE = DATE.format(wdy=WEEKDAY_SHORT, weekday=WEEKDAY_LONG,
 235                        day=DAY_OF_MONTH, mon=MONTH_SHORT, month=MONTH_LONG)
 236
 237     EXPIRES_AV = "Expires=(?P<expires>%s)" % DATE
 238
 239     # Now we're ready to define a regexp which can match any number of attrs
 240     # in the variable portion of the Set-Cookie header (like the unnamed latter
 241     # part of set-cookie-string in the grammar). Each regexp of any complexity
 242     # is split out for testing by itself.
 243     ATTR = """(?ix)  # Case-insensitive mode, verbose mode
 244         # Always start with start or semicolon and any number of spaces
 245         (?:^|;)[ ]*(?:
 246             # Big disjunction of attribute patterns (*_AV), with named capture
 247             # groups to extract everything in one pass. Anything unrecognized
 248             # goes in the 'unrecognized' capture group for reporting.
 249             {expires}
 250             |{max_age}
 251             |{domain}
 252             |{path}
 253             |(?P<secure>Secure=?)
 254             |(?P<httponly>HttpOnly=?)
 255             |Version=(?P<version>[{stuff}]+)
 256             |Comment=(?P<comment>[{stuff}]+)
 257             |(?P<unrecognized>[{stuff}]+)
 258         )
 259         # End with any number of spaces not matched by the preceding (up to the
 260         # next semicolon) - but do not capture these.
 261         [ ]*
 262     """.format(expires=EXPIRES_AV, max_age=MAX_AGE_AV, domain=DOMAIN_AV,
 263                path=PATH_AV, stuff=EXTENSION_AV)
 264
 265     # For request data ("Cookie: ") parsing, with finditer cf. RFC 6265 4.2.1
 266     COOKIE = """(?x) # Verbose mode
 267         (?: # Either something close to valid...
 268
 269             # Match starts at start of string, or at separator.
 270             # Split on comma for the sake of legacy code (RFC 2109/2965),
 271             # and since it only breaks when invalid commas are put in values.
 272             # see http://bugs.python.org/issue1210326
 273             (?:^Cookie:|^|;|,)
 274
 275             # 1 or more valid token characters making up the name (captured)
 276             # with colon added to accommodate users of some old Java apps, etc.
 277             [ ]*
 278             (?P<name>[{name}:]+)
 279             [ ]*
 280             =
 281             [ ]*
 282
 283             # While 6265 provides only for cookie-octet, this allows just about
 284             # anything in quotes (like in RFC 2616); people stuck on RFC
 285             # 2109/2965 will expect it to work this way. The non-quoted token
 286             # allows interior spaces ('\x20'), which is not valid. In both
 287             # cases, the decision of whether to allow these is downstream.
 288             (?P<value>
 289                 ["][^\00-\31"]*["]
 290                 |
 291                 [{value}]
 292                 |
 293                 [{value}][{value} ]*[{value}]+
 294                 |
 295                 )
 296
 297         # ... Or something way off-spec - extract to report and move on
 298         |
 299             (?P<invalid>[^;]+)
 300         )
 301         # Trailing spaces after value
 302         [ ]*
 303         # Must end with ; or be at end of string (don't consume this though,
 304         # so use the lookahead assertion ?=
 305         (?=;|\Z)
 306     """.format(name=COOKIE_NAME, value=COOKIE_OCTET)
 307
 308     # Precompile externally useful definitions into re objects.
 309     COOKIE_NAME_RE = re.compile("^([%s:]+)\Z" % COOKIE_NAME)
 310     COOKIE_RE = re.compile(COOKIE)
 311     SET_COOKIE_HEADER_RE = re.compile(SET_COOKIE_HEADER)
 312     ATTR_RE = re.compile(ATTR)
 313     DATE_RE = re.compile(DATE)
 314     DOMAIN_RE = re.compile(DOMAIN)
 315     PATH_RE = re.compile('^([%s]+)\Z' % EXTENSION_AV)
 316     EOL = re.compile("(?:\r\n|\n)")
 317
 318
 319 def strip_spaces_and_quotes(value):
 320     """Remove invalid whitespace and/or single pair of dquotes and return None
 321     for empty strings.
 322
 323     Used to prepare cookie values, path, and domain attributes in a way which
 324     tolerates simple formatting mistakes and standards variations.
 325     """
 326     value = value.strip() if value else ""
 327     if value and len(value) > 1 and (value[0] == value[-1] == '"'):
 328         value = value[1:-1]
 329     if not value:
 330         value = ""
 331     return value
 332
 333
 334 def parse_string(data, unquote=default_unquote):
 335     """Decode URL-encoded strings to UTF-8 containing the escaped chars.
 336     """
 337     if data is None:
 338         return None
 339
 340     # We'll soon need to unquote to recover our UTF-8 data.
 341     # In Python 2, unquote crashes on chars beyond ASCII. So encode functions
 342     # had better not include anything beyond ASCII in data.
 343     # In Python 3, unquote crashes on bytes objects, requiring conversion to
 344     # str objects (unicode) using decode().
 345     # But in Python 2, the same decode causes unquote to butcher the data.
 346     # So in that case, just leave the bytes.
 347     if isinstance(data, bytes):
 348         if sys.version_info > (3, 0, 0):  # pragma: no cover
 349             data = data.decode('ascii')
 350     # Recover URL encoded data
 351     unquoted = unquote(data)
 352     # Without this step, Python 2 may have good URL decoded *bytes*,
 353     # which will therefore not normalize as unicode and not compare to
 354     # the original.
 355     if isinstance(unquoted, bytes):
 356         unquoted = unquoted.decode('utf-8')
 357     return unquoted
 358
 359
 360 def parse_date(value):
 361     """Parse an RFC 1123 or asctime-like format date string to produce
 362     a Python datetime object (without a timezone).
 363     """
 364     # Do the regex magic; also enforces 2 or 4 digit years
 365     match = Definitions.DATE_RE.match(value) if value else None
 366     if not match:
 367         return None
 368     # We're going to extract and prepare captured data in 'data'.
 369     data = {}
 370     captured = match.groupdict()
 371     fields = ['year', 'month', 'day', 'hour', 'minute', 'second']
 372     # If we matched on the RFC 1123 family format
 373     if captured['year']:
 374         for field in fields:
 375             data[field] = captured[field]
 376     # If we matched on the asctime format, use year2 etc.
 377     else:
 378         for field in fields:
 379             data[field] = captured[field + "2"]
 380     year = data['year']
 381     # Interpret lame 2-digit years - base the cutoff on UNIX epoch, in case
 382     # someone sets a '70' cookie meaning 'distant past'. This won't break for
 383     # 58 years and people who use 2-digit years are asking for it anyway.
 384     if len(year) == 2:
 385         if int(year) < 70:
 386             year = "20" + year
 387         else:
 388             year = "19" + year
 389     year = int(year)
 390     # Clamp to [1900, 9999]: strftime has min 1900, datetime has max 9999
 391     data['year'] = max(1900, min(year, 9999))
 392     # Other things which are numbers should convert to integer
 393     for field in ['day', 'hour', 'minute', 'second']:
 394         if data[field] is None:
 395             data[field] = 0
 396         data[field] = int(data[field])
 397     # Look up the number datetime needs for the named month
 398     data['month'] = Definitions.month_numbers[data['month'].lower()]
 399     return datetime.datetime(**data)
 400
 401
 402 def parse_domain(value):
 403     """Parse and validate an incoming Domain attribute value.
 404     """
 405     value = strip_spaces_and_quotes(value)
 406     if value:
 407         assert valid_domain(value)
 408     return value
 409
 410
 411 def parse_path(value):
 412     """Parse and validate an incoming Path attribute value.
 413     """
 414     value = strip_spaces_and_quotes(value)
 415     assert valid_path(value)
 416     return value
 417
 418
 419 def parse_value(value, allow_spaces=True, unquote=default_unquote):
 420     "Process a cookie value"
 421     if value is None:
 422         return None
 423     value = strip_spaces_and_quotes(value)
 424     value = parse_string(value, unquote=unquote)
 425     if not allow_spaces:
 426         assert ' ' not in value
 427     return value
 428
 429
 430 def valid_name(name):
 431     "Validate a cookie name string"
 432     if isinstance(name, bytes):
 433         name = name.decode('ascii')
 434     if not Definitions.COOKIE_NAME_RE.match(name):
 435         return False
 436     # This module doesn't support $identifiers, which are part of an obsolete
 437     # and highly complex standard which is never used.
 438     if name[0] == "$":
 439         return False
 440     return True
 441
 442
 443 def valid_value(value, quote=default_cookie_quote, unquote=default_unquote):
 444     """Validate a cookie value string.
 445
 446     This is generic across quote/unquote functions because it directly verifies
 447     the encoding round-trip using the specified quote/unquote functions.
 448     So if you use different quote/unquote functions, use something like this
 449     as a replacement for valid_value::
 450
 451         my_valid_value = lambda value: valid_value(value, quote=my_quote,
 452                                                           unquote=my_unquote)
 453     """
 454     if value is None:
 455         return False
 456
 457     # Put the value through a round trip with the given quote and unquote
 458     # functions, so we will know whether data will get lost or not in the event
 459     # that we don't complain.
 460     encoded = encode_cookie_value(value, quote=quote)
 461     decoded = parse_string(encoded, unquote=unquote)
 462
 463     # If the original string made the round trip, this is a valid value for the
 464     # given quote and unquote functions. Since the round trip can generate
 465     # different unicode forms, normalize before comparing, so we can ignore
 466     # trivial inequalities.
 467     decoded_normalized = (normalize("NFKD", decoded)
 468                           if not isinstance(decoded, bytes) else decoded)
 469     value_normalized = (normalize("NFKD", value)
 470                         if not isinstance(value, bytes) else value)
 471     if decoded_normalized == value_normalized:
 472         return True
 473     return False
 474
 475
 476 def valid_date(date):
 477     "Validate an expires datetime object"
 478     # We want something that acts like a datetime. In particular,
 479     # strings indicate a failure to parse down to an object and ints are
 480     # nonstandard and ambiguous at best.
 481     if not hasattr(date, 'tzinfo'):
 482         return False
 483     # Relevant RFCs define UTC as 'close enough' to GMT, and the maximum
 484     # difference between UTC and GMT is often stated to be less than a second.
 485     if date.tzinfo is None or _total_seconds(date.utcoffset()) < 1.1:
 486         return True
 487     return False
 488
 489
 490 def valid_domain(domain):
 491     "Validate a cookie domain ASCII string"
 492     # Using encoding on domain would confuse browsers into not sending cookies.
 493     # Generate UnicodeDecodeError up front if it can't store as ASCII.
 494     domain.encode('ascii')
 495     # Domains starting with periods are not RFC-valid, but this is very common
 496     # in existing cookies, so they should still parse with DOMAIN_AV.
 497     if Definitions.DOMAIN_RE.match(domain):
 498         return True
 499     return False
 500
 501
 502 def valid_path(value):
 503     "Validate a cookie path ASCII string"
 504     # Generate UnicodeDecodeError if path can't store as ASCII.
 505     value.encode("ascii")
 506     # Cookies without leading slash will likely be ignored, raise ASAP.
 507     if not (value and value[0] == "/"):
 508         return False
 509     if not Definitions.PATH_RE.match(value):
 510         return False
 511     return True
 512
 513
 514 def valid_max_age(number):
 515     "Validate a cookie Max-Age"
 516     if isinstance(number, basestring):
 517         try:
 518             number = long(number)
 519         except (ValueError, TypeError):
 520             return False
 521     if number >= 0 and number % 1 == 0:
 522         return True
 523     return False
 524
 525
 526 def encode_cookie_value(data, quote=default_cookie_quote):
 527     """URL-encode strings to make them safe for a cookie value.
 528
 529     By default this uses urllib quoting, as used in many other cookie
 530     implementations and in other Python code, instead of an ad hoc escaping
 531     mechanism which includes backslashes (these also being illegal chars in RFC
 532     6265).
 533     """
 534     if data is None:
 535         return None
 536
 537     # encode() to ASCII bytes so quote won't crash on non-ASCII.
 538     # but doing that to bytes objects is nonsense.
 539     # On Python 2 encode crashes if s is bytes containing non-ASCII.
 540     # On Python 3 encode crashes on all byte objects.
 541     if not isinstance(data, bytes):
 542         data = data.encode("utf-8")
 543
 544     # URL encode data so it is safe for cookie value
 545     quoted = quote(data)
 546
 547     # Don't force to bytes, so that downstream can use proper string API rather
 548     # than crippled bytes, and to encourage encoding to be done just once.
 549     return quoted
 550
 551
 552 def encode_extension_av(data, quote=default_extension_quote):
 553     """URL-encode strings to make them safe for an extension-av
 554     (extension attribute value): <any CHAR except CTLs or ";">
 555     """
 556     if not data:
 557         return ''
 558     return quote(data)
 559
 560
 561 def render_date(date):
 562     """Render a date (e.g. an Expires value) per RFCs 6265/2616/1123.
 563
 564     Don't give this localized (timezone-aware) datetimes. If you use them,
 565     convert them to GMT before passing them to this. There are too many
 566     conversion corner cases to handle this universally.
 567     """
 568     if not date:
 569         return None
 570     assert valid_date(date)
 571     # Avoid %a and %b, which can change with locale, breaking compliance
 572     weekday = Definitions.weekday_abbr_list[date.weekday()]
 573     month = Definitions.month_abbr_list[date.month - 1]
 574     return date.strftime("{day}, %d {month} %Y %H:%M:%S GMT"
 575                          ).format(day=weekday, month=month)
 576
 577
 578 def render_domain(domain):
 579     if not domain:
 580         return None
 581     if domain[0] == '.':
 582         return domain[1:]
 583     return domain
 584
 585
 586 def _parse_request(header_data, ignore_bad_cookies=False):
 587     """Turn one or more lines of 'Cookie:' header data into a dict mapping
 588     cookie names to cookie values (raw strings).
 589     """
 590     cookies_dict = {}
 591     for line in Definitions.EOL.split(header_data.strip()):
 592         matches = Definitions.COOKIE_RE.finditer(line)
 593         matches = [item for item in matches]
 594         for match in matches:
 595             invalid = match.group('invalid')
 596             if invalid:
 597                 if not ignore_bad_cookies:
 598                     raise InvalidCookieError(data=invalid)
 599                 _report_invalid_cookie(invalid)
 600                 continue
 601             name = match.group('name')
 602             values = cookies_dict.get(name)
 603             value = match.group('value').strip('"')
 604             if values:
 605                 values.append(value)
 606             else:
 607                 cookies_dict[name] = [value]
 608         if not matches:
 609             if not ignore_bad_cookies:
 610                 raise InvalidCookieError(data=line)
 611             _report_invalid_cookie(line)
 612     return cookies_dict
 613
 614
 615 def parse_one_response(line, ignore_bad_cookies=False,
 616                        ignore_bad_attributes=True):
 617     """Turn one 'Set-Cookie:' line into a dict mapping attribute names to
 618     attribute values (raw strings).
 619     """
 620     cookie_dict = {}
 621     # Basic validation, extract name/value/attrs-chunk
 622     match = Definitions.SET_COOKIE_HEADER_RE.match(line)
 623     if not match:
 624         if not ignore_bad_cookies:
 625             raise InvalidCookieError(data=line)
 626         _report_invalid_cookie(line)
 627         return None
 628     cookie_dict.update({
 629         'name': match.group('name'),
 630         'value': match.group('value')})
 631     # Extract individual attrs from the attrs chunk
 632     for match in Definitions.ATTR_RE.finditer(match.group('attrs')):
 633         captured = dict((k, v) for (k, v) in match.groupdict().items() if v)
 634         unrecognized = captured.get('unrecognized', None)
 635         if unrecognized:
 636             if not ignore_bad_attributes:
 637                 raise InvalidCookieAttributeError(None, unrecognized,
 638                                                   "unrecognized")
 639             _report_unknown_attribute(unrecognized)
 640             continue
 641         # for unary flags
 642         for key in ('secure', 'httponly'):
 643             if captured.get(key):
 644                 captured[key] = True
 645         # ignore subcomponents of expires - they're still there to avoid doing
 646         # two passes
 647         timekeys = ('weekday', 'month', 'day', 'hour', 'minute', 'second',
 648                     'year')
 649         if 'year' in captured:
 650             for key in timekeys:
 651                 del captured[key]
 652         elif 'year2' in captured:
 653             for key in timekeys:
 654                 del captured[key + "2"]
 655         cookie_dict.update(captured)
 656     return cookie_dict
 657
 658
 659 def _parse_response(header_data, ignore_bad_cookies=False,
 660                     ignore_bad_attributes=True):
 661     """Turn one or more lines of 'Set-Cookie:' header data into a list of dicts
 662     mapping attribute names to attribute values (as plain strings).
 663     """
 664     cookie_dicts = []
 665     for line in Definitions.EOL.split(header_data.strip()):
 666         if not line:
 667             break
 668         cookie_dict = parse_one_response(
 669             line, ignore_bad_cookies=ignore_bad_cookies,
 670             ignore_bad_attributes=ignore_bad_attributes)
 671         if not cookie_dict:
 672             continue
 673         cookie_dicts.append(cookie_dict)
 674     if not cookie_dicts:
 675         if not ignore_bad_cookies:
 676             raise InvalidCookieError(data=header_data)
 677         _report_invalid_cookie(header_data)
 678     return cookie_dicts
 679
 680
 681 class Cookie(object):
 682     """Provide a simple interface for creating, modifying, and rendering
 683     individual HTTP cookies.
 684
 685     Cookie attributes are represented as normal Python object attributes.
 686     Parsing, rendering and validation are reconfigurable per-attribute. The
 687     default behavior is intended to comply with RFC 6265, URL-encoding illegal
 688     characters where necessary. For example: the default behavior for the
 689     Expires attribute is to parse strings as datetimes using parse_date,
 690     validate that any set value is a datetime, and render the attribute per the
 691     preferred date format in RFC 1123.
 692     """
 693     def __init__(self, name, value, **kwargs):
 694         # If we don't have or can't set a name value, we don't want to return
 695         # junk, so we must break control flow. And we don't want to use
 696         # InvalidCookieAttributeError, because users may want to catch that to
 697         # suppress all complaining about funky attributes.
 698         try:
 699             self.name = name
 700         except InvalidCookieAttributeError:
 701             raise InvalidCookieError(message="invalid name for new Cookie",
 702                                      data=name)
 703         value = value or ''
 704         try:
 705             self.value = value
 706         except InvalidCookieAttributeError:
 707             raise InvalidCookieError(message="invalid value for new Cookie",
 708                                      data=value)
 709         if kwargs:
 710             self._set_attributes(kwargs, ignore_bad_attributes=False)
 711
 712     def _set_attributes(self, attrs, ignore_bad_attributes=False):
 713         for attr_name, attr_value in attrs.items():
 714             if not attr_name in self.attribute_names:
 715                 if not ignore_bad_attributes:
 716                     raise InvalidCookieAttributeError(
 717                         attr_name, attr_value,
 718                         "unknown cookie attribute '%s'" % attr_name)
 719                 _report_unknown_attribute(attr_name)
 720
 721             try:
 722                 setattr(self, attr_name, attr_value)
 723             except InvalidCookieAttributeError as error:
 724                 if not ignore_bad_attributes:
 725                     raise
 726                 _report_invalid_attribute(attr_name, attr_value, error.reason)
 727                 continue
 728
 729     @classmethod
 730     def from_dict(cls, cookie_dict, ignore_bad_attributes=True):
 731         """Construct an instance from a dict of strings to parse.
 732
 733         The main difference between this and Cookie(name, value, **kwargs) is
 734         that the values in the argument to this method are parsed.
 735
 736         If ignore_bad_attributes=True (default), values which did not parse
 737         are set to '' in order to avoid passing bad data.
 738         """
 739         name = cookie_dict.get('name', None)
 740         if not name:
 741             raise InvalidCookieError("Cookie must have name")
 742         raw_value = cookie_dict.get('value', '')
 743         # Absence or failure of parser here is fatal; errors in present name
 744         # and value should be found by Cookie.__init__.
 745         value = cls.attribute_parsers['value'](raw_value)
 746         cookie = cls(name, value)
 747
 748         # Parse values from serialized formats into objects
 749         parsed = {}
 750         for key, value in cookie_dict.items():
 751             # Don't want to pass name/value to _set_attributes
 752             if key in ('name', 'value'):
 753                 continue
 754             parser = cls.attribute_parsers.get(key)
 755             if not parser:
 756                 # Don't let totally unknown attributes pass silently
 757                 if not ignore_bad_attributes:
 758                     raise InvalidCookieAttributeError(
 759                         key, value, "unknown cookie attribute '%s'" % key)
 760                 _report_unknown_attribute(key)
 761                 continue
 762             try:
 763                 parsed_value = parser(value)
 764             except Exception as e:
 765                 reason = "did not parse with %r: %r" % (parser, e)
 766                 if not ignore_bad_attributes:
 767                     raise InvalidCookieAttributeError(
 768                         key, value, reason)
 769                 _report_invalid_attribute(key, value, reason)
 770                 parsed_value = ''
 771             parsed[key] = parsed_value
 772
 773         # Set the parsed objects (does object validation automatically)
 774         cookie._set_attributes(parsed, ignore_bad_attributes)
 775         return cookie
 776
 777     @classmethod
 778     def from_string(cls, line, ignore_bad_cookies=False,
 779                     ignore_bad_attributes=True):
 780         "Construct a Cookie object from a line of Set-Cookie header data."
 781         cookie_dict = parse_one_response(
 782             line, ignore_bad_cookies=ignore_bad_cookies,
 783             ignore_bad_attributes=ignore_bad_attributes)
 784         if not cookie_dict:
 785             return None
 786         return cls.from_dict(
 787             cookie_dict, ignore_bad_attributes=ignore_bad_attributes)
 788
 789     def to_dict(self):
 790         this_dict = {'name': self.name, 'value': self.value}
 791         this_dict.update(self.attributes())
 792         return this_dict
 793
 794     def validate(self, name, value):
 795         """Validate a cookie attribute with an appropriate validator.
 796
 797         The value comes in already parsed (for example, an expires value
 798         should be a datetime). Called automatically when an attribute
 799         value is set.
 800         """
 801         validator = self.attribute_validators.get(name, None)
 802         if validator:
 803             return True if validator(value) else False
 804         return True
 805
 806     def __setattr__(self, name, value):
 807         """Attributes mentioned in attribute_names get validated using
 808         functions in attribute_validators, raising an exception on failure.
 809         Others get left alone.
 810         """
 811         if name in self.attribute_names or name in ("name", "value"):
 812             if name == 'name' and not value:
 813                 raise InvalidCookieError(message="Cookies must have names")
 814             # Ignore None values indicating unset attr. Other invalids should
 815             # raise error so users of __setattr__ can learn.
 816             if value is not None:
 817                 if not self.validate(name, value):
 818                     raise InvalidCookieAttributeError(
 819                         name, value, "did not validate with " +
 820                         repr(self.attribute_validators.get(name)))
 821         object.__setattr__(self, name, value)
 822
 823     def __getattr__(self, name):
 824         """Provide for acting like everything in attribute_names is
 825         automatically set to None, rather than having to do so explicitly and
 826         only at import time.
 827         """
 828         if name in self.attribute_names:
 829             return None
 830         raise AttributeError(name)
 831
 832     def attributes(self):
 833         """Export this cookie's attributes as a dict of encoded values.
 834
 835         This is an important part of the code for rendering attributes, e.g.
 836         render_response().
 837         """
 838         dictionary = {}
 839         # Only look for attributes registered in attribute_names.
 840         for python_attr_name, cookie_attr_name in self.attribute_names.items():
 841             value = getattr(self, python_attr_name)
 842             renderer = self.attribute_renderers.get(python_attr_name, None)
 843             if renderer:
 844                 value = renderer(value)
 845             # If renderer returns None, or it's just natively none, then the
 846             # value is suppressed entirely - does not appear in any rendering.
 847             if not value:
 848                 continue
 849             dictionary[cookie_attr_name] = value
 850         return dictionary
 851
 852     def render_request(self):
 853         """Render as a string formatted for HTTP request headers
 854         (simple 'Cookie: ' style).
 855         """
 856         # Use whatever renderers are defined for name and value.
 857         name, value = self.name, self.value
 858         renderer = self.attribute_renderers.get('name', None)
 859         if renderer:
 860             name = renderer(name)
 861         renderer = self.attribute_renderers.get('value', None)
 862         if renderer:
 863             value = renderer(value)
 864         return ''.join((name, "=", value))
 865
 866     def render_response(self):
 867         """Render as a string formatted for HTTP response headers
 868         (detailed 'Set-Cookie: ' style).
 869         """
 870         # Use whatever renderers are defined for name and value.
 871         # (.attributes() is responsible for all other rendering.)
 872         name, value = self.name, self.value
 873         renderer = self.attribute_renderers.get('name', None)
 874         if renderer:
 875             name = renderer(name)
 876         renderer = self.attribute_renderers.get('value', None)
 877         if renderer:
 878             value = renderer(value)
 879         return '; '.join(
 880             ['{0}={1}'.format(name, value)] +
 881             [key if isinstance(val, bool) else '='.join((key, val))
 882              for key, val in self.attributes().items()]
 883         )
 884
 885     def __eq__(self, other):
 886         attrs = ['name', 'value'] + list(self.attribute_names.keys())
 887         for attr in attrs:
 888             mine = getattr(self, attr, None)
 889             his = getattr(other, attr, None)
 890             if isinstance(mine, bytes):
 891                 mine = mine.decode('utf-8')
 892             if isinstance(his, bytes):
 893                 his = his.decode('utf-8')
 894             if attr == 'domain':
 895                 if mine and mine[0] == '.':
 896                     mine = mine[1:]
 897                 if his and his[0] == '.':
 898                     his = his[1:]
 899             if mine != his:
 900                 return False
 901         return True
 902
 903     def __ne__(self, other):
 904         return not self.__eq__(other)
 905
 906     # Add a name and its proper rendering to this dict to register an attribute
 907     # as exportable. The key is the name of the Cookie object attribute in
 908     # Python, and it is mapped to the name you want in the output.
 909     # 'name' and 'value' should not be here.
 910     attribute_names = {
 911         'expires':  'Expires',
 912         'max_age':  'Max-Age',
 913         'domain':   'Domain',
 914         'path':     'Path',
 915         'comment':  'Comment',
 916         'version':  'Version',
 917         'secure':   'Secure',
 918         'httponly': 'HttpOnly',
 919     }
 920
 921     # Register single-parameter functions in this dictionary to have them
 922     # used for encoding outgoing values (e.g. as RFC compliant strings,
 923     # as base64, encrypted stuff, etc.)
 924     # These are called by the property generated by cookie_attribute().
 925     # Usually it would be wise not to define a renderer for name, but it is
 926     # supported in case there is ever a real need.
 927     attribute_renderers = {
 928         'value':    encode_cookie_value,
 929         'domain':   render_domain,
 930         'expires':  render_date,
 931         'max_age':  lambda item: str(item) if item is not None else None,
 932         'secure':   lambda item: True if item else False,
 933         'httponly': lambda item: True if item else False,
 934         'comment':  encode_extension_av,
 935         'version':  lambda item: (str(item) if isinstance(item, int)
 936                                   else encode_extension_av(item)),
 937     }
 938
 939     # Register single-parameter functions in this dictionary to have them used
 940     # for decoding incoming values for use in the Python API (e.g. into nice
 941     # objects, numbers, unicode strings, etc.)
 942     # These are called by the property generated by cookie_attribute().
 943     attribute_parsers = {
 944         'value':    parse_value,
 945         'expires':  parse_date,
 946         'domain':   parse_domain,
 947         'path':     parse_path,
 948         'max_age':  lambda item: long(strip_spaces_and_quotes(item)),
 949         'comment':  parse_string,
 950         'version':  lambda item: int(strip_spaces_and_quotes(item)),
 951         'secure':   lambda item: True if item else False,
 952         'httponly': lambda item: True if item else False,
 953     }
 954
 955     # Register single-parameter functions which return a true value for
 956     # acceptable values, and a false value for unacceptable ones. An
 957     # attribute's validator is run after it is parsed or when it is directly
 958     # set, and InvalidCookieAttribute is raised if validation fails (and the
 959     # validator doesn't raise a different exception prior)
 960     attribute_validators = {
 961         'name':     valid_name,
 962         'value':    valid_value,
 963         'expires':  valid_date,
 964         'domain':   valid_domain,
 965         'path':     valid_path,
 966         'max_age':  valid_max_age,
 967         'comment':  valid_value,
 968         'version':  lambda number: re.match("^\d+\Z", str(number)),
 969         'secure':   lambda item: item is True or item is False,
 970         'httponly': lambda item: item is True or item is False,
 971     }
 972
 973
 974 class Cookies(dict):
 975     """Represent a set of cookies indexed by name.
 976
 977     This class bundles together a set of Cookie objects and provides
 978     a convenient interface to them. for parsing and producing cookie headers.
 979     In basic operation it acts just like a dict of Cookie objects, but it adds
 980     additional convenience methods for the usual cookie tasks: add cookie
 981     objects by their names, create new cookie objects under specified names,
 982     parse HTTP request or response data into new cookie objects automatically
 983     stored in the dict, and render the set in formats suitable for HTTP request
 984     or response headers.
 985     """
 986     DEFAULT_COOKIE_CLASS = Cookie
 987
 988     def __init__(self, *args, **kwargs):
 989         dict.__init__(self)
 990         self.all_cookies = []
 991         self.cookie_class = kwargs.get(
 992             "_cookie_class", self.DEFAULT_COOKIE_CLASS)
 993         self.add(*args, **kwargs)
 994
 995     def add(self, *args, **kwargs):
 996         """Add Cookie objects by their names, or create new ones under
 997         specified names.
 998
 999         Any unnamed arguments are interpreted as existing cookies, and
1000         are added under the value in their .name attribute. With keyword
1001         arguments, the key is interpreted as the cookie name and the
1002         value as the UNENCODED value stored in the cookie.
1003         """
1004         # Only the first one is accessible through the main interface,
1005         # others accessible through get_all (all_cookies).
1006         for cookie in args:
1007             self.all_cookies.append(cookie)
1008             if cookie.name in self:
1009                 continue
1010             self[cookie.name] = cookie
1011         for key, value in kwargs.items():
1012             cookie = self.cookie_class(key, value)
1013             self.all_cookies.append(cookie)
1014             if key in self:
1015                 continue
1016             self[key] = cookie
1017
1018     def get_all(self, key):
1019         return [cookie for cookie in self.all_cookies
1020                 if cookie.name == key]
1021
1022     def parse_request(self, header_data, ignore_bad_cookies=False):
1023         """Parse 'Cookie' header data into Cookie objects, and add them to
1024         this Cookies object.
1025
1026         :arg header_data: string containing only 'Cookie:' request headers or
1027         header values (as in CGI/WSGI HTTP_COOKIE); if more than one, they must
1028         be separated by CRLF (\\r\\n).
1029
1030         :arg ignore_bad_cookies: if set, will log each syntactically invalid
1031         cookie (at the granularity of semicolon-delimited blocks) rather than
1032         raising an exception at the first bad cookie.
1033
1034         :returns: a Cookies instance containing Cookie objects parsed from
1035         header_data.
1036
1037         .. note::
1038         If you want to parse 'Set-Cookie:' response headers, please use
1039         parse_response instead. parse_request will happily turn 'expires=frob'
1040         into a separate cookie without complaining, according to the grammar.
1041         """
1042         cookies_dict = _parse_request(
1043             header_data, ignore_bad_cookies=ignore_bad_cookies)
1044         cookie_objects = []
1045         for name, values in cookies_dict.items():
1046             for value in values:
1047                 # Use from_dict to check name and parse value
1048                 cookie_dict = {'name': name, 'value': value}
1049                 try:
1050                     cookie = self.cookie_class.from_dict(cookie_dict)
1051                 except InvalidCookieError:
1052                     if not ignore_bad_cookies:
1053                         raise
1054                 else:
1055                     cookie_objects.append(cookie)
1056         try:
1057             self.add(*cookie_objects)
1058         except InvalidCookieError:
1059             if not ignore_bad_cookies:
1060                 raise
1061             _report_invalid_cookie(header_data)
1062         return self
1063
1064     def parse_response(self, header_data, ignore_bad_cookies=False,
1065                        ignore_bad_attributes=True):
1066         """Parse 'Set-Cookie' header data into Cookie objects, and add them to
1067         this Cookies object.
1068
1069         :arg header_data: string containing only 'Set-Cookie:' request headers
1070         or their corresponding header values; if more than one, they must be
1071         separated by CRLF (\\r\\n).
1072
1073         :arg ignore_bad_cookies: if set, will log each syntactically invalid
1074         cookie rather than raising an exception at the first bad cookie. (This
1075         includes cookies which have noncompliant characters in the attribute
1076         section).
1077
1078         :arg ignore_bad_attributes: defaults to True, which means to log but
1079         not raise an error when a particular attribute is unrecognized. (This
1080         does not necessarily mean that the attribute is invalid, although that
1081         would often be the case.) if unset, then an error will be raised at the
1082         first semicolon-delimited block which has an unknown attribute.
1083
1084         :returns: a Cookies instance containing Cookie objects parsed from
1085         header_data, each with recognized attributes populated.
1086
1087         .. note::
1088         If you want to parse 'Cookie:' headers (i.e., data like what's sent
1089         with an HTTP request, which has only name=value pairs and no
1090         attributes), then please use parse_request instead. Such lines often
1091         contain multiple name=value pairs, and parse_response will throw away
1092         the pairs after the first one, which will probably generate errors or
1093         confusing behavior. (Since there's no perfect way to automatically
1094         determine which kind of parsing to do, you have to tell it manually by
1095         choosing correctly from parse_request between part_response.)
1096         """
1097         cookie_dicts = _parse_response(
1098             header_data,
1099             ignore_bad_cookies=ignore_bad_cookies,
1100             ignore_bad_attributes=ignore_bad_attributes)
1101         cookie_objects = []
1102         for cookie_dict in cookie_dicts:
1103             cookie = self.cookie_class.from_dict(cookie_dict)
1104             cookie_objects.append(cookie)
1105         self.add(*cookie_objects)
1106         return self
1107
1108     @classmethod
1109     def from_request(cls, header_data, ignore_bad_cookies=False):
1110         "Construct a Cookies object from request header data."
1111         cookies = cls()
1112         cookies.parse_request(
1113             header_data, ignore_bad_cookies=ignore_bad_cookies)
1114         return cookies
1115
1116     @classmethod
1117     def from_response(cls, header_data, ignore_bad_cookies=False,
1118                       ignore_bad_attributes=True):
1119         "Construct a Cookies object from response header data."
1120         cookies = cls()
1121         cookies.parse_response(
1122             header_data,
1123             ignore_bad_cookies=ignore_bad_cookies,
1124             ignore_bad_attributes=ignore_bad_attributes)
1125         return cookies
1126
1127     def render_request(self, sort=True):
1128         """Render the dict's Cookie objects into a string formatted for HTTP
1129         request headers (simple 'Cookie: ' style).
1130         """
1131         if not sort:
1132             return ("; ".join(
1133                 cookie.render_request() for cookie in self.values()))
1134         return ("; ".join(sorted(
1135             cookie.render_request() for cookie in self.values())))
1136
1137     def render_response(self, sort=True):
1138         """Render the dict's Cookie objects into list of strings formatted for
1139         HTTP response headers (detailed 'Set-Cookie: ' style).
1140         """
1141         rendered = [cookie.render_response() for cookie in self.values()]
1142         return rendered if not sort else sorted(rendered)
1143
1144     def __repr__(self):
1145         return "Cookies(%s)" % ', '.join("%s=%r" % (name, cookie.value) for
1146                                          (name, cookie) in self.items())
1147
1148     def __eq__(self, other):
1149         """Test if a Cookies object is globally 'equal' to another one by
1150         seeing if it looks like a dict such that d[k] == self[k]. This depends
1151         on each Cookie object reporting its equality correctly.
1152         """
1153         if not hasattr(other, "keys"):
1154             return False
1155         try:
1156             keys = sorted(set(self.keys()) | set(other.keys()))
1157             for key in keys:
1158                 if not key in self:
1159                     return False
1160                 if not key in other:
1161                     return False
1162                 if self[key] != other[key]:
1163                     return False
1164         except (TypeError, KeyError):
1165             raise
1166         return True
1167
1168     def __ne__(self, other):
1169         return not self.__eq__(other)