mygpo/utils.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of my.gpodder.org.
   4 #
   5 # my.gpodder.org is free software: you can redistribute it and/or modify it
   6 # under the terms of the GNU Affero General Public License as published by
   7 # the Free Software Foundation, either version 3 of the License, or (at your
   8 # option) any later version.
   9 #
  10 # my.gpodder.org is distributed in the hope that it will be useful, but
  11 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  12 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
  13 # License for more details.
  14 #
  15 # You should have received a copy of the GNU Affero General Public License
  16 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
  17 #
  18
  19 import functools
  20 import types
  21 import subprocess
  22 import os
  23 import operator
  24 import sys
  25 import re
  26 import collections
  27 from datetime import datetime, timedelta, date
  28 import time
  29 import hashlib
  30 import urlparse
  31 import urllib
  32 import urllib2
  33 import zlib
  34
  35 from django.conf import settings
  36
  37 from mygpo.core.json import json
  38
  39
  40 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
  41     """
  42     >>> from_d = datetime(2010, 01, 01)
  43     >>> to_d = datetime(2010, 01, 05)
  44     >>> list(daterange(from_d, to_d))
  45     [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
  46     """
  47
  48     if to_date is None:
  49         if isinstance(from_date, datetime):
  50             to_date = datetime.now()
  51         else:
  52             to_date = date.today()
  53
  54     while from_date <= to_date:
  55         yield from_date
  56         from_date = from_date + leap
  57     return
  58
  59 def format_time(value):
  60     """Format an offset (in seconds) to a string
  61
  62     The offset should be an integer or float value.
  63
  64     >>> format_time(0)
  65     '00:00'
  66     >>> format_time(20)
  67     '00:20'
  68     >>> format_time(3600)
  69     '01:00:00'
  70     >>> format_time(10921)
  71     '03:02:01'
  72     """
  73     try:
  74         dt = datetime.utcfromtimestamp(value)
  75     except ValueError:
  76         return ''
  77
  78     if dt.hour == 0:
  79         return dt.strftime('%M:%S')
  80     else:
  81         return dt.strftime('%H:%M:%S')
  82
  83 def parse_time(value):
  84     """
  85     >>> parse_time(10)
  86     10
  87
  88     >>> parse_time('05:10') #5*60+10
  89     310
  90
  91     >>> parse_time('1:05:10') #60*60+5*60+10
  92     3910
  93     """
  94     if value is None:
  95         raise ValueError('None value in parse_time')
  96
  97     if isinstance(value, int):
  98         # Don't need to parse already-converted time value
  99         return value
 100
 101     if value == '':
 102         raise ValueError('Empty valueing in parse_time')
 103
 104     for format in ('%H:%M:%S', '%M:%S'):
 105         try:
 106             t = time.strptime(value, format)
 107             return t.tm_hour * 60*60 + t.tm_min * 60 + t.tm_sec
 108         except ValueError, e:
 109             continue
 110
 111     return int(value)
 112
 113
 114 def parse_bool(val):
 115     """
 116     >>> parse_bool('True')
 117     True
 118
 119     >>> parse_bool('true')
 120     True
 121
 122     >>> parse_bool('')
 123     False
 124     """
 125     if isinstance(val, bool):
 126         return val
 127     if val.lower() == 'true':
 128         return True
 129     return False
 130
 131
 132 def iterate_together(lists, key=lambda x: x, reverse=False):
 133     """
 134     takes ordered, possibly sparse, lists with similar items
 135     (some items have a corresponding item in the other lists, some don't).
 136
 137     It then yield tuples of corresponding items, where one element is None is
 138     there is no corresponding entry in one of the lists.
 139
 140     Tuples where both elements are None are skipped.
 141
 142     The results of the key method are used for the comparisons.
 143
 144     If reverse is True, the lists are expected to be sorted in reverse order
 145     and the results will also be sorted reverse
 146
 147     >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
 148     [(1, 1), (2, None), (None, 3)]
 149
 150     >>> list(iterate_together([[], []]))
 151     []
 152
 153     >>> list(iterate_together([range(1, 3), range(3, 5)]))
 154     [(1, None), (2, None), (None, 3), (None, 4)]
 155
 156     >>> list(iterate_together([range(1, 3), []]))
 157     [(1, None), (2, None)]
 158
 159     >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
 160     [(1, None), (3, 3)]
 161     """
 162
 163     Next = collections.namedtuple('Next', 'item more')
 164     min_ = min if not reverse else max
 165     lt_  = operator.lt if not reverse else operator.gt
 166
 167     lists = [iter(l) for l in lists]
 168
 169     def _take(it):
 170         try:
 171             i = it.next()
 172             while i is None:
 173                 i = it.next()
 174             return Next(i, True)
 175         except StopIteration:
 176             return Next(None, False)
 177
 178     def new_res():
 179         return [None]*len(lists)
 180
 181     # take first bunch of items
 182     items = [_take(l) for l in lists]
 183
 184     while any(i.item is not None or i.more for i in items):
 185
 186         res = new_res()
 187
 188         for n, item in enumerate(items):
 189
 190             if item.item is None:
 191                 continue
 192
 193             if all(x is None for x in res):
 194                 res[n] = item.item
 195                 continue
 196
 197             min_v = min_(filter(lambda x: x is not None, res), key=key)
 198
 199             if key(item.item) == key(min_v):
 200                 res[n] = item.item
 201
 202             elif lt_(key(item.item), key(min_v)):
 203                 res = new_res()
 204                 res[n] = item.item
 205
 206         for n, x in enumerate(res):
 207             if x is not None:
 208                 items[n] = _take(lists[n])
 209
 210         yield tuple(res)
 211
 212
 213 def progress(val, max_val, status_str='', max_width=50, stream=sys.stdout):
 214
 215     factor = float(val)/max_val if max_val > 0 else 0
 216
 217     # progress as percentage
 218     percentage_str = '{val:.2%}'.format(val=factor)
 219
 220     # progress bar filled with #s
 221     factor = min(int(factor*max_width), max_width)
 222     progress_str = '#' * factor + ' ' * (max_width-factor)
 223
 224     #insert percentage into bar
 225     percentage_start = int((max_width-len(percentage_str))/2)
 226     progress_str = progress_str[:percentage_start] + \
 227                    percentage_str + \
 228                    progress_str[percentage_start+len(percentage_str):]
 229
 230     print >> stream, '\r',
 231     print >> stream, '[ %s ] %s / %s | %s' % (
 232         progress_str,
 233         val,
 234         max_val,
 235         status_str),
 236     stream.flush()
 237
 238
 239 def set_cmp(list, simplify):
 240     """
 241     Builds a set out of a list but uses the results of simplify to determine equality between items
 242     """
 243     simpl = lambda x: (simplify(x), x)
 244     lst = dict(map(simpl, list))
 245     return lst.values()
 246
 247
 248 def first(it):
 249     """
 250     returns the first not-None object or None if the iterator is exhausted
 251     """
 252     for x in it:
 253         if x is not None:
 254             return x
 255     return None
 256
 257
 258 def intersect(a, b):
 259     return list(set(a) & set(b))
 260
 261
 262
 263 def remove_control_chars(s):
 264     all_chars = (unichr(i) for i in xrange(0x110000))
 265     control_chars = ''.join(map(unichr, range(0,32) + range(127,160)))
 266     control_char_re = re.compile('[%s]' % re.escape(control_chars))
 267
 268     return control_char_re.sub('', s)
 269
 270
 271 def unzip(a):
 272     return tuple(map(list,zip(*a)))
 273
 274
 275 def parse_range(s, min, max, default=None):
 276     """
 277     Parses the string and returns its value. If the value is outside the given
 278     range, its closest number within the range is returned
 279
 280     >>> parse_range('5', 0, 10)
 281     5
 282
 283     >>> parse_range('0', 5, 10)
 284     5
 285
 286     >>> parse_range('15',0, 10)
 287     10
 288
 289     >>> parse_range('x', 0, 20)
 290     10
 291
 292     >>> parse_range('x', 0, 20, 20)
 293     20
 294     """
 295     try:
 296         val = int(s)
 297         if val < min:
 298             return min
 299         if val > max:
 300             return max
 301         return val
 302
 303     except (ValueError, TypeError):
 304         return default if default is not None else (max-min)/2
 305
 306
 307
 308 def flatten(l):
 309     return [item for sublist in l for item in sublist]
 310
 311
 312 def linearize(key, iterators, reverse=False):
 313     """
 314     Linearizes a number of iterators, sorted by some comparison function
 315     """
 316
 317     iters = [iter(i) for i in iterators]
 318     vals = []
 319     for i in iters:
 320         try:
 321             v = i.next()
 322             vals. append( (v, i) )
 323         except StopIteration:
 324             continue
 325
 326     while vals:
 327         vals = sorted(vals, key=lambda x: key(x[0]), reverse=reverse)
 328         val, it = vals.pop(0)
 329         yield val
 330         try:
 331             next_val = it.next()
 332             vals.append( (next_val, it) )
 333         except StopIteration:
 334             pass
 335
 336
 337 def skip_pairs(iterator, cmp=cmp):
 338     """ Skips pairs of equal items
 339
 340     >>> list(skip_pairs([]))
 341     []
 342
 343     >>> list(skip_pairs([1]))
 344     [1]
 345
 346     >>> list(skip_pairs([1, 2, 3]))
 347     [1, 2, 3]
 348
 349     >>> list(skip_pairs([1, 1]))
 350     []
 351
 352     >>> list(skip_pairs([1, 2, 2]))
 353     [1]
 354
 355     >>> list(skip_pairs([1, 2, 2, 3]))
 356     [1, 3]
 357
 358     >>> list(skip_pairs([1, 2, 2, 2]))
 359     [1, 2]
 360
 361     >>> list(skip_pairs([1, 2, 2, 2, 2, 3]))
 362     [1, 3]
 363     """
 364
 365     iterator = iter(iterator)
 366     next = iterator.next()
 367
 368     while True:
 369         item = next
 370         try:
 371             next = iterator.next()
 372         except StopIteration as e:
 373             yield item
 374             raise e
 375
 376         if cmp(item, next) == 0:
 377             next = iterator.next()
 378         else:
 379             yield item
 380
 381
 382 def get_timestamp(datetime_obj):
 383     """ Returns the timestamp as an int for the given datetime object
 384
 385     >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
 386     1302168606
 387
 388     >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
 389     0
 390     """
 391     return int(time.mktime(datetime_obj.timetuple()))
 392
 393
 394
 395 re_url = re.compile('^https?://')
 396
 397 def is_url(string):
 398     """ Returns true if a string looks like an URL
 399
 400     >>> is_url('http://example.com/some-path/file.xml')
 401     True
 402
 403     >>> is_url('something else')
 404     False
 405     """
 406
 407     return bool(re_url.match(string))
 408
 409
 410
 411 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
 412 # this does not increase asymptotical complexity
 413 # but can still waste more time than it saves.
 414 def shortest_of(strings):
 415     return min(strings, key=len)
 416
 417 def longest_substr(strings):
 418     """
 419     Returns the longest common substring of the given strings
 420     """
 421
 422     substr = ""
 423     if not strings:
 424         return substr
 425     reference = shortest_of(strings)
 426     length = len(reference)
 427     #find a suitable slice i:j
 428     for i in xrange(length):
 429         #only consider strings long at least len(substr) + 1
 430         for j in xrange(i + len(substr) + 1, length):
 431             candidate = reference[i:j]
 432             if all(candidate in text for text in strings):
 433                 substr = candidate
 434     return substr
 435
 436
 437
 438 def additional_value(it, gen_val, val_changed=lambda _: True):
 439     """ Provides an additional value to the elements, calculated when needed
 440
 441     For the elements from the iterator, some additional value can be computed
 442     by gen_val (which might be an expensive computation).
 443
 444     If the elements in the iterator are ordered so that some subsequent
 445     elements would generate the same additional value, val_changed can be
 446     provided, which receives the next element from the iterator and the
 447     previous additional value. If the element would generate the same
 448     additional value (val_changed returns False), its computation is skipped.
 449
 450     >>> # get the next full hundred higher than x
 451     >>> # this will probably be an expensive calculation
 452     >>> next_hundred = lambda x: x + 100-(x % 100)
 453
 454     >>> # returns True if h is not the value that next_hundred(x) would provide
 455     >>> # this should be a relatively cheap calculation, compared to the above
 456     >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
 457
 458     >>> xs = [0, 50, 100, 101, 199, 200, 201]
 459     >>> list(additional_value(xs, next_hundred, diff_hundred))
 460     [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
 461     """
 462
 463     _none = object()
 464     current = _none
 465
 466     for x in it:
 467         if current is _none or val_changed(x, current):
 468             current = gen_val(x)
 469
 470         yield (x, current)
 471
 472
 473 def file_hash(f, h=hashlib.md5, block_size=2**20):
 474     """ returns the hash of the contents of a file """
 475     f_hash = h()
 476     for chunk in iter(lambda: f.read(block_size), ''):
 477         f_hash.update(chunk)
 478     return f_hash
 479
 480
 481
 482 def split_list(l, prop):
 483     """ split elements that satisfy a property, and those that don't """
 484     match   = filter(prop, l)
 485     nomatch = [x for x in l if x not in match]
 486     return match, nomatch
 487
 488
 489 def sorted_chain(links, key, reverse=False):
 490     """ Takes a list of iters can iterates over sorted elements
 491
 492     Each elment of links should be a tuple of (sort_key, iterator). The
 493     elements of each iterator should be sorted already. sort_key should
 494     indicate the key of the first element and needs to be comparable to the
 495     result of key(elem).
 496
 497     The function returns an iterator over the globally sorted element that
 498     ensures that as little iterators as possible are evaluated.  When
 499     evaluating """
 500
 501     # mixed_list initially contains all placeholders; later evaluated
 502     # elements (from the iterators) are mixed in
 503     mixed_list = [(k, link, True) for k, link in links]
 504
 505     while mixed_list:
 506         _, item, expand = mixed_list.pop(0)
 507
 508         # found an element (from an earlier expansion), yield it
 509         if not expand:
 510             yield item
 511             continue
 512
 513         # found an iter that needs to be expanded.
 514         # The iterator is fully consumed
 515         new_items = [(key(i), i, False) for i in item]
 516
 517         # sort links (placeholders) and elements together
 518         mixed_list = sorted(mixed_list + new_items, key=lambda (k, _v, _e): k,
 519                 reverse=reverse)
 520
 521
 522 def url_add_authentication(url, username, password):
 523     """
 524     Adds authentication data (username, password) to a given
 525     URL in order to construct an authenticated URL.
 526
 527     >>> url_add_authentication('https://host.com/', '', None)
 528     'https://host.com/'
 529     >>> url_add_authentication('http://example.org/', None, None)
 530     'http://example.org/'
 531     >>> url_add_authentication('telnet://host.com/', 'foo', 'bar')
 532     'telnet://foo:bar@host.com/'
 533     >>> url_add_authentication('ftp://example.org', 'billy', None)
 534     'ftp://billy@example.org'
 535     >>> url_add_authentication('ftp://example.org', 'billy', '')
 536     'ftp://billy:@example.org'
 537     >>> url_add_authentication('http://localhost/x', 'aa', 'bc')
 538     'http://aa:bc@localhost/x'
 539     >>> url_add_authentication('http://blubb.lan/u.html', 'i/o', 'P@ss:')
 540     'http://i%2Fo:P@ss:@blubb.lan/u.html'
 541     >>> url_add_authentication('http://a:b@x.org/', 'c', 'd')
 542     'http://c:d@x.org/'
 543     >>> url_add_authentication('http://i%2F:P%40%3A@cx.lan', 'P@x', 'i/')
 544     'http://P@x:i%2F@cx.lan'
 545     >>> url_add_authentication('http://x.org/', 'a b', 'c d')
 546     'http://a%20b:c%20d@x.org/'
 547     """
 548     if username is None or username == '':
 549         return url
 550
 551     # Relaxations of the strict quoting rules (bug 1521):
 552     # 1. Accept '@' in username and password
 553     # 2. Acecpt ':' in password only
 554     username = urllib.quote(username, safe='@')
 555
 556     if password is not None:
 557         password = urllib.quote(password, safe='@:')
 558         auth_string = ':'.join((username, password))
 559     else:
 560         auth_string = username
 561
 562     url = url_strip_authentication(url)
 563
 564     url_parts = list(urlparse.urlsplit(url))
 565     # url_parts[1] is the HOST part of the URL
 566     url_parts[1] = '@'.join((auth_string, url_parts[1]))
 567
 568     return urlparse.urlunsplit(url_parts)
 569
 570
 571 def urlopen(url, headers=None, data=None):
 572     """
 573     An URL opener with the User-agent set to gPodder (with version)
 574     """
 575     username, password = username_password_from_url(url)
 576     if username is not None or password is not None:
 577         url = url_strip_authentication(url)
 578         password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
 579         password_mgr.add_password(None, url, username, password)
 580         handler = urllib2.HTTPBasicAuthHandler(password_mgr)
 581         opener = urllib2.build_opener(handler)
 582     else:
 583         opener = urllib2.build_opener()
 584
 585     if headers is None:
 586         headers = {}
 587     else:
 588         headers = dict(headers)
 589
 590     headers.update({'User-agent': settings.USER_AGENT})
 591     request = urllib2.Request(url, data=data, headers=headers)
 592     return opener.open(request)
 593
 594
 595
 596 def username_password_from_url(url):
 597     r"""
 598     Returns a tuple (username,password) containing authentication
 599     data from the specified URL or (None,None) if no authentication
 600     data can be found in the URL.
 601
 602     See Section 3.1 of RFC 1738 (http://www.ietf.org/rfc/rfc1738.txt)
 603
 604     >>> username_password_from_url('https://@host.com/')
 605     ('', None)
 606     >>> username_password_from_url('telnet://host.com/')
 607     (None, None)
 608     >>> username_password_from_url('ftp://foo:@host.com/')
 609     ('foo', '')
 610     >>> username_password_from_url('http://a:b@host.com/')
 611     ('a', 'b')
 612     >>> username_password_from_url(1)
 613     Traceback (most recent call last):
 614       ...
 615     ValueError: URL has to be a string or unicode object.
 616     >>> username_password_from_url(None)
 617     Traceback (most recent call last):
 618       ...
 619     ValueError: URL has to be a string or unicode object.
 620     >>> username_password_from_url('http://a@b:c@host.com/')
 621     ('a@b', 'c')
 622     >>> username_password_from_url('ftp://a:b:c@host.com/')
 623     ('a', 'b:c')
 624     >>> username_password_from_url('http://i%2Fo:P%40ss%3A@host.com/')
 625     ('i/o', 'P@ss:')
 626     >>> username_password_from_url('ftp://%C3%B6sterreich@host.com/')
 627     ('\xc3\xb6sterreich', None)
 628     >>> username_password_from_url('http://w%20x:y%20z@example.org/')
 629     ('w x', 'y z')
 630     >>> username_password_from_url('http://example.com/x@y:z@test.com/')
 631     (None, None)
 632     """
 633     if type(url) not in (str, unicode):
 634         raise ValueError('URL has to be a string or unicode object.')
 635
 636     (username, password) = (None, None)
 637
 638     (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
 639
 640     if '@' in netloc:
 641         (authentication, netloc) = netloc.rsplit('@', 1)
 642         if ':' in authentication:
 643             (username, password) = authentication.split(':', 1)
 644
 645             # RFC1738 dictates that we should not allow ['/', '@', ':']
 646             # characters in the username and password field (Section 3.1):
 647             #
 648             # 1. The "/" can't be in there at this point because of the way
 649             #    urlparse (which we use above) works.
 650             # 2. Due to gPodder bug 1521, we allow "@" in the username and
 651             #    password field. We use netloc.rsplit('@', 1), which will
 652             #    make sure that we split it at the last '@' in netloc.
 653             # 3. The colon must be excluded (RFC2617, Section 2) in the
 654             #    username, but is apparently allowed in the password. This
 655             #    is handled by the authentication.split(':', 1) above, and
 656             #    will cause any extraneous ':'s to be part of the password.
 657
 658             username = urllib.unquote(username)
 659             password = urllib.unquote(password)
 660         else:
 661             username = urllib.unquote(authentication)
 662
 663     return (username, password)
 664
 665
 666 def url_strip_authentication(url):
 667     """
 668     Strips authentication data from an URL. Returns the URL with
 669     the authentication data removed from it.
 670
 671     >>> url_strip_authentication('https://host.com/')
 672     'https://host.com/'
 673     >>> url_strip_authentication('telnet://foo:bar@host.com/')
 674     'telnet://host.com/'
 675     >>> url_strip_authentication('ftp://billy@example.org')
 676     'ftp://example.org'
 677     >>> url_strip_authentication('ftp://billy:@example.org')
 678     'ftp://example.org'
 679     >>> url_strip_authentication('http://aa:bc@localhost/x')
 680     'http://localhost/x'
 681     >>> url_strip_authentication('http://i%2Fo:P%40ss%3A@blubb.lan/u.html')
 682     'http://blubb.lan/u.html'
 683     >>> url_strip_authentication('http://c:d@x.org/')
 684     'http://x.org/'
 685     >>> url_strip_authentication('http://P%40%3A:i%2F@cx.lan')
 686     'http://cx.lan'
 687     >>> url_strip_authentication('http://x@x.com:s3cret@example.com/')
 688     'http://example.com/'
 689     """
 690     url_parts = list(urlparse.urlsplit(url))
 691     # url_parts[1] is the HOST part of the URL
 692
 693     # Remove existing authentication data
 694     if '@' in url_parts[1]:
 695         url_parts[1] = url_parts[1].rsplit('@', 1)[1]
 696
 697     return urlparse.urlunsplit(url_parts)
 698
 699
 700 # Native filesystem encoding detection
 701 encoding = sys.getfilesystemencoding()
 702
 703 def sanitize_encoding(filename):
 704     r"""
 705     Generate a sanitized version of a string (i.e.
 706     remove invalid characters and encode in the
 707     detected native language encoding).
 708
 709     >>> sanitize_encoding('\x80')
 710     ''
 711     >>> sanitize_encoding(u'unicode')
 712     'unicode'
 713     """
 714     # The encoding problem goes away in Python 3.. hopefully!
 715     if sys.version_info >= (3, 0):
 716         return filename
 717
 718     global encoding
 719     if not isinstance(filename, unicode):
 720         filename = filename.decode(encoding, 'ignore')
 721     return filename.encode(encoding, 'ignore')
 722
 723
 724 def get_git_head():
 725     """ returns the commit and message of the current git HEAD """
 726
 727     try:
 728         pr = subprocess.Popen('/usr/bin/git log -n 1 --oneline'.split(),
 729             cwd = settings.BASE_DIR,
 730             stdout = subprocess.PIPE,
 731             stderr = subprocess.PIPE,
 732         )
 733
 734     except OSError:
 735         return None, None
 736
 737     (out, err) = pr.communicate()
 738     if err:
 739         return None, None
 740
 741     outs = out.split()
 742     commit = outs[0]
 743     msg = ' ' .join(outs[1:])
 744     return commit, msg
 745
 746
 747
 748 # https://gist.github.com/samuraisam/901117
 749
 750 default_fudge = timedelta(seconds=0, microseconds=0, days=0)
 751
 752 def deep_eq(_v1, _v2, datetime_fudge=default_fudge, _assert=False):
 753   """
 754   Tests for deep equality between two python data structures recursing
 755   into sub-structures if necessary. Works with all python types including
 756   iterators and generators. This function was dreampt up to test API responses
 757   but could be used for anything. Be careful. With deeply nested structures
 758   you may blow the stack.
 759
 760   Options:
 761             datetime_fudge => this is a datetime.timedelta object which, when
 762                               comparing dates, will accept values that differ
 763                               by the number of seconds specified
 764             _assert        => passing yes for this will raise an assertion error
 765                               when values do not match, instead of returning
 766                               false (very useful in combination with pdb)
 767
 768   Doctests included:
 769
 770   >>> x1, y1 = ({'a': 'b'}, {'a': 'b'})
 771   >>> deep_eq(x1, y1)
 772   True
 773   >>> x2, y2 = ({'a': 'b'}, {'b': 'a'})
 774   >>> deep_eq(x2, y2)
 775   False
 776   >>> x3, y3 = ({'a': {'b': 'c'}}, {'a': {'b': 'c'}})
 777   >>> deep_eq(x3, y3)
 778   True
 779   >>> x4, y4 = ({'c': 't', 'a': {'b': 'c'}}, {'a': {'b': 'n'}, 'c': 't'})
 780   >>> deep_eq(x4, y4)
 781   False
 782   >>> x5, y5 = ({'a': [1,2,3]}, {'a': [1,2,3]})
 783   >>> deep_eq(x5, y5)
 784   True
 785   >>> x6, y6 = ({'a': [1,'b',8]}, {'a': [2,'b',8]})
 786   >>> deep_eq(x6, y6)
 787   False
 788   >>> x7, y7 = ('a', 'a')
 789   >>> deep_eq(x7, y7)
 790   True
 791   >>> x8, y8 = (['p','n',['asdf']], ['p','n',['asdf']])
 792   >>> deep_eq(x8, y8)
 793   True
 794   >>> x9, y9 = (['p','n',['asdf',['omg']]], ['p', 'n', ['asdf',['nowai']]])
 795   >>> deep_eq(x9, y9)
 796   False
 797   >>> x10, y10 = (1, 2)
 798   >>> deep_eq(x10, y10)
 799   False
 800   >>> deep_eq((str(p) for p in xrange(10)), (str(p) for p in xrange(10)))
 801   True
 802   >>> str(deep_eq(range(4), range(4)))
 803   'True'
 804   >>> deep_eq(xrange(100), xrange(100))
 805   True
 806   >>> deep_eq(xrange(2), xrange(5))
 807   False
 808   >>> from datetime import datetime, timedelta
 809   >>> d1, d2 = (datetime.now(), datetime.now() + timedelta(seconds=4))
 810   >>> deep_eq(d1, d2)
 811   False
 812   >>> deep_eq(d1, d2, datetime_fudge=timedelta(seconds=5))
 813   True
 814   """
 815   _deep_eq = functools.partial(deep_eq, datetime_fudge=datetime_fudge,
 816                                _assert=_assert)
 817
 818   def _check_assert(R, a, b, reason=''):
 819     if _assert and not R:
 820       assert 0, "an assertion has failed in deep_eq (%s) %s != %s" % (
 821         reason, str(a), str(b))
 822     return R
 823
 824   def _deep_dict_eq(d1, d2):
 825     k1, k2 = (sorted(d1.keys()), sorted(d2.keys()))
 826     if k1 != k2: # keys should be exactly equal
 827       return _check_assert(False, k1, k2, "keys")
 828
 829     return _check_assert(operator.eq(sum(_deep_eq(d1[k], d2[k])
 830                                        for k in k1),
 831                                      len(k1)), d1, d2, "dictionaries")
 832
 833   def _deep_iter_eq(l1, l2):
 834     if len(l1) != len(l2):
 835       return _check_assert(False, l1, l2, "lengths")
 836     return _check_assert(operator.eq(sum(_deep_eq(v1, v2)
 837                                       for v1, v2 in zip(l1, l2)),
 838                                      len(l1)), l1, l2, "iterables")
 839
 840   def op(a, b):
 841     _op = operator.eq
 842     if type(a) == datetime and type(b) == datetime:
 843       s = datetime_fudge.seconds
 844       t1, t2 = (time.mktime(a.timetuple()), time.mktime(b.timetuple()))
 845       l = t1 - t2
 846       l = -l if l > 0 else l
 847       return _check_assert((-s if s > 0 else s) <= l, a, b, "dates")
 848     return _check_assert(_op(a, b), a, b, "values")
 849
 850   c1, c2 = (_v1, _v2)
 851
 852   # guard against strings because they are iterable and their
 853   # elements yield iterables infinitely.
 854   # I N C E P T I O N
 855   for t in types.StringTypes:
 856     if isinstance(_v1, t):
 857       break
 858   else:
 859     if isinstance(_v1, types.DictType):
 860       op = _deep_dict_eq
 861     else:
 862       try:
 863         c1, c2 = (list(iter(_v1)), list(iter(_v2)))
 864       except TypeError:
 865         c1, c2 = _v1, _v2
 866       else:
 867         op = _deep_iter_eq
 868
 869   return op(c1, c2)
 870
 871
 872 def parse_request_body(request):
 873     """ returns the parsed request body, handles gzip encoding """
 874
 875     raw_body = request.body
 876     content_enc = request.META.get('HTTP_CONTENT_ENCODING')
 877
 878     if content_enc == 'gzip':
 879         raw_body = zlib.decompress(raw_body)
 880
 881     return json.loads(raw_body)
 882
 883
 884 def normalize_feed_url(url):
 885     """
 886     Converts any URL to http:// or ftp:// so that it can be
 887     used with "wget". If the URL cannot be converted (invalid
 888     or unknown scheme), "None" is returned.
 889
 890     This will also normalize feed:// and itpc:// to http://.
 891
 892     >>> normalize_feed_url('itpc://example.org/podcast.rss')
 893     'http://example.org/podcast.rss'
 894
 895     If no URL scheme is defined (e.g. "curry.com"), we will
 896     simply assume the user intends to add a http:// feed.
 897
 898     >>> normalize_feed_url('curry.com')
 899     'http://curry.com/'
 900
 901     There are even some more shortcuts for advanced users
 902     and lazy typists (see the source for details).
 903
 904     >>> normalize_feed_url('fb:43FPodcast')
 905     'http://feeds.feedburner.com/43FPodcast'
 906
 907     It will also take care of converting the domain name to
 908     all-lowercase (because domains are not case sensitive):
 909
 910     >>> normalize_feed_url('http://Example.COM/')
 911     'http://example.com/'
 912
 913     Some other minimalistic changes are also taken care of,
 914     e.g. a ? with an empty query is removed:
 915
 916     >>> normalize_feed_url('http://example.org/test?')
 917     'http://example.org/test'
 918
 919     Leading and trailing whitespace is removed
 920
 921     >>> normalize_feed_url(' http://example.com/podcast.rss ')
 922     'http://example.com/podcast.rss'
 923
 924     HTTP Authentication is removed to protect users' privacy
 925
 926     >>> normalize_feed_url('http://a@b:c@host.com/')
 927     'http://host.com/'
 928     >>> normalize_feed_url('ftp://a:b:c@host.com/')
 929     'ftp://host.com/'
 930     >>> normalize_feed_url('http://i%2Fo:P%40ss%3A@host.com/')
 931     'http://host.com/'
 932     >>> normalize_feed_url('ftp://%C3%B6sterreich@host.com/')
 933     'ftp://host.com/'
 934     >>> normalize_feed_url('http://w%20x:y%20z@example.org/')
 935     'http://example.org/'
 936     >>> normalize_feed_url('http://example.com/x@y:z@test.com/')
 937     'http://example.com/x%40y%3Az%40test.com/'
 938     >>> normalize_feed_url('http://en.wikipedia.org/wiki/Ä')
 939     'http://en.wikipedia.org/wiki/%C3%84'
 940     >>> normalize_feed_url('http://en.wikipedia.org/w/index.php?title=Ä&action=edit')
 941     'http://en.wikipedia.org/w/index.php?title=%C3%84&action=edit'
 942     """
 943     url = url.strip()
 944     if not url or len(url) < 8:
 945         return None
 946
 947     if isinstance(url, unicode):
 948         url = url.encode('utf-8', 'ignore')
 949
 950     # This is a list of prefixes that you can use to minimize the amount of
 951     # keystrokes that you have to use.
 952     # Feel free to suggest other useful prefixes, and I'll add them here.
 953     PREFIXES = {
 954             'fb:': 'http://feeds.feedburner.com/%s',
 955             'yt:': 'http://www.youtube.com/rss/user/%s/videos.rss',
 956             'sc:': 'http://soundcloud.com/%s',
 957             'fm4od:': 'http://onapp1.orf.at/webcam/fm4/fod/%s.xspf',
 958             # YouTube playlists. To get a list of playlists per-user, use:
 959             # https://gdata.youtube.com/feeds/api/users/<username>/playlists
 960             'ytpl:': 'http://gdata.youtube.com/feeds/api/playlists/%s',
 961     }
 962
 963     for prefix, expansion in PREFIXES.iteritems():
 964         if url.startswith(prefix):
 965             url = expansion % (url[len(prefix):],)
 966             break
 967
 968     # Assume HTTP for URLs without scheme
 969     if not '://' in url:
 970         url = 'http://' + url
 971
 972     scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
 973
 974     # Schemes and domain names are case insensitive
 975     scheme, netloc = scheme.lower(), netloc.lower()
 976
 977     # encode non-encoded characters
 978     path = urllib.quote(path, '/%')
 979     query = urllib.quote_plus(query, ':&=')
 980
 981     # Remove authentication to protect users' privacy
 982     netloc = netloc.rsplit('@', 1)[-1]
 983
 984     # Normalize empty paths to "/"
 985     if path == '':
 986         path = '/'
 987
 988     # feed://, itpc:// and itms:// are really http://
 989     if scheme in ('feed', 'itpc', 'itms'):
 990         scheme = 'http'
 991
 992     if scheme not in ('http', 'https', 'ftp', 'file'):
 993         return None
 994
 995     # urlunsplit might return "a slighty different, but equivalent URL"
 996     return urlparse.urlunsplit((scheme, netloc, path, query, fragment))