mygpo/utils.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of my.gpodder.org.
   4 #
   5 # my.gpodder.org is free software: you can redistribute it and/or modify it
   6 # under the terms of the GNU Affero General Public License as published by
   7 # the Free Software Foundation, either version 3 of the License, or (at your
   8 # option) any later version.
   9 #
  10 # my.gpodder.org is distributed in the hope that it will be useful, but
  11 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  12 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
  13 # License for more details.
  14 #
  15 # You should have received a copy of the GNU Affero General Public License
  16 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
  17 #
  18
  19 import functools
  20 import types
  21 import subprocess
  22 import os
  23 import operator
  24 import sys
  25 import re
  26 import collections
  27 import itertools
  28 from datetime import datetime, timedelta, date
  29 import time
  30 import hashlib
  31 import urlparse
  32 import urllib
  33 import urllib2
  34 import zlib
  35 import shlex
  36
  37 from django.conf import settings
  38
  39 from mygpo.core.json import json
  40
  41
  42 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
  43     """
  44     >>> from_d = datetime(2010, 01, 01)
  45     >>> to_d = datetime(2010, 01, 05)
  46     >>> list(daterange(from_d, to_d))
  47     [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
  48     """
  49
  50     if to_date is None:
  51         if isinstance(from_date, datetime):
  52             to_date = datetime.now()
  53         else:
  54             to_date = date.today()
  55
  56     while from_date <= to_date:
  57         yield from_date
  58         from_date = from_date + leap
  59     return
  60
  61 def format_time(value):
  62     """Format an offset (in seconds) to a string
  63
  64     The offset should be an integer or float value.
  65
  66     >>> format_time(0)
  67     '00:00'
  68     >>> format_time(20)
  69     '00:20'
  70     >>> format_time(3600)
  71     '01:00:00'
  72     >>> format_time(10921)
  73     '03:02:01'
  74     """
  75     try:
  76         dt = datetime.utcfromtimestamp(value)
  77     except ValueError:
  78         return ''
  79
  80     if dt.hour == 0:
  81         return dt.strftime('%M:%S')
  82     else:
  83         return dt.strftime('%H:%M:%S')
  84
  85 def parse_time(value):
  86     """
  87     >>> parse_time(10)
  88     10
  89
  90     >>> parse_time('05:10') #5*60+10
  91     310
  92
  93     >>> parse_time('1:05:10') #60*60+5*60+10
  94     3910
  95     """
  96     if value is None:
  97         raise ValueError('None value in parse_time')
  98
  99     if isinstance(value, int):
 100         # Don't need to parse already-converted time value
 101         return value
 102
 103     if value == '':
 104         raise ValueError('Empty valueing in parse_time')
 105
 106     for format in ('%H:%M:%S', '%M:%S'):
 107         try:
 108             t = time.strptime(value, format)
 109             return t.tm_hour * 60*60 + t.tm_min * 60 + t.tm_sec
 110         except ValueError, e:
 111             continue
 112
 113     return int(value)
 114
 115
 116 def parse_bool(val):
 117     """
 118     >>> parse_bool('True')
 119     True
 120
 121     >>> parse_bool('true')
 122     True
 123
 124     >>> parse_bool('')
 125     False
 126     """
 127     if isinstance(val, bool):
 128         return val
 129     if val.lower() == 'true':
 130         return True
 131     return False
 132
 133
 134 def iterate_together(lists, key=lambda x: x, reverse=False):
 135     """
 136     takes ordered, possibly sparse, lists with similar items
 137     (some items have a corresponding item in the other lists, some don't).
 138
 139     It then yield tuples of corresponding items, where one element is None is
 140     there is no corresponding entry in one of the lists.
 141
 142     Tuples where both elements are None are skipped.
 143
 144     The results of the key method are used for the comparisons.
 145
 146     If reverse is True, the lists are expected to be sorted in reverse order
 147     and the results will also be sorted reverse
 148
 149     >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
 150     [(1, 1), (2, None), (None, 3)]
 151
 152     >>> list(iterate_together([[], []]))
 153     []
 154
 155     >>> list(iterate_together([range(1, 3), range(3, 5)]))
 156     [(1, None), (2, None), (None, 3), (None, 4)]
 157
 158     >>> list(iterate_together([range(1, 3), []]))
 159     [(1, None), (2, None)]
 160
 161     >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
 162     [(1, None), (3, 3)]
 163     """
 164
 165     Next = collections.namedtuple('Next', 'item more')
 166     min_ = min if not reverse else max
 167     lt_  = operator.lt if not reverse else operator.gt
 168
 169     lists = [iter(l) for l in lists]
 170
 171     def _take(it):
 172         try:
 173             i = it.next()
 174             while i is None:
 175                 i = it.next()
 176             return Next(i, True)
 177         except StopIteration:
 178             return Next(None, False)
 179
 180     def new_res():
 181         return [None]*len(lists)
 182
 183     # take first bunch of items
 184     items = [_take(l) for l in lists]
 185
 186     while any(i.item is not None or i.more for i in items):
 187
 188         res = new_res()
 189
 190         for n, item in enumerate(items):
 191
 192             if item.item is None:
 193                 continue
 194
 195             if all(x is None for x in res):
 196                 res[n] = item.item
 197                 continue
 198
 199             min_v = min_(filter(lambda x: x is not None, res), key=key)
 200
 201             if key(item.item) == key(min_v):
 202                 res[n] = item.item
 203
 204             elif lt_(key(item.item), key(min_v)):
 205                 res = new_res()
 206                 res[n] = item.item
 207
 208         for n, x in enumerate(res):
 209             if x is not None:
 210                 items[n] = _take(lists[n])
 211
 212         yield tuple(res)
 213
 214
 215 def progress(val, max_val, status_str='', max_width=50, stream=sys.stdout):
 216
 217     factor = float(val)/max_val if max_val > 0 else 0
 218
 219     # progress as percentage
 220     percentage_str = '{val:.2%}'.format(val=factor)
 221
 222     # progress bar filled with #s
 223     factor = min(int(factor*max_width), max_width)
 224     progress_str = '#' * factor + ' ' * (max_width-factor)
 225
 226     #insert percentage into bar
 227     percentage_start = int((max_width-len(percentage_str))/2)
 228     progress_str = progress_str[:percentage_start] + \
 229                    percentage_str + \
 230                    progress_str[percentage_start+len(percentage_str):]
 231
 232     print >> stream, '\r',
 233     print >> stream, '[ %s ] %s / %s | %s' % (
 234         progress_str,
 235         val,
 236         max_val,
 237         status_str),
 238     stream.flush()
 239
 240
 241 def set_cmp(list, simplify):
 242     """
 243     Builds a set out of a list but uses the results of simplify to determine equality between items
 244     """
 245     simpl = lambda x: (simplify(x), x)
 246     lst = dict(map(simpl, list))
 247     return lst.values()
 248
 249
 250 def first(it):
 251     """
 252     returns the first not-None object or None if the iterator is exhausted
 253     """
 254     for x in it:
 255         if x is not None:
 256             return x
 257     return None
 258
 259
 260 def intersect(a, b):
 261     return list(set(a) & set(b))
 262
 263
 264
 265 def remove_control_chars(s):
 266     all_chars = (unichr(i) for i in xrange(0x110000))
 267     control_chars = ''.join(map(unichr, range(0,32) + range(127,160)))
 268     control_char_re = re.compile('[%s]' % re.escape(control_chars))
 269
 270     return control_char_re.sub('', s)
 271
 272
 273 def unzip(a):
 274     return tuple(map(list,zip(*a)))
 275
 276
 277 def parse_range(s, min, max, default=None):
 278     """
 279     Parses the string and returns its value. If the value is outside the given
 280     range, its closest number within the range is returned
 281
 282     >>> parse_range('5', 0, 10)
 283     5
 284
 285     >>> parse_range('0', 5, 10)
 286     5
 287
 288     >>> parse_range('15',0, 10)
 289     10
 290
 291     >>> parse_range('x', 0, 20)
 292     10
 293
 294     >>> parse_range('x', 0, 20, 20)
 295     20
 296     """
 297     try:
 298         val = int(s)
 299         if val < min:
 300             return min
 301         if val > max:
 302             return max
 303         return val
 304
 305     except (ValueError, TypeError):
 306         return default if default is not None else (max-min)/2
 307
 308
 309
 310 def flatten(l):
 311     return [item for sublist in l for item in sublist]
 312
 313
 314 def linearize(key, iterators, reverse=False):
 315     """
 316     Linearizes a number of iterators, sorted by some comparison function
 317     """
 318
 319     iters = [iter(i) for i in iterators]
 320     vals = []
 321     for i in iters:
 322         try:
 323             v = i.next()
 324             vals. append( (v, i) )
 325         except StopIteration:
 326             continue
 327
 328     while vals:
 329         vals = sorted(vals, key=lambda x: key(x[0]), reverse=reverse)
 330         val, it = vals.pop(0)
 331         yield val
 332         try:
 333             next_val = it.next()
 334             vals.append( (next_val, it) )
 335         except StopIteration:
 336             pass
 337
 338
 339 def skip_pairs(iterator, cmp=cmp):
 340     """ Skips pairs of equal items
 341
 342     >>> list(skip_pairs([]))
 343     []
 344
 345     >>> list(skip_pairs([1]))
 346     [1]
 347
 348     >>> list(skip_pairs([1, 2, 3]))
 349     [1, 2, 3]
 350
 351     >>> list(skip_pairs([1, 1]))
 352     []
 353
 354     >>> list(skip_pairs([1, 2, 2]))
 355     [1]
 356
 357     >>> list(skip_pairs([1, 2, 2, 3]))
 358     [1, 3]
 359
 360     >>> list(skip_pairs([1, 2, 2, 2]))
 361     [1, 2]
 362
 363     >>> list(skip_pairs([1, 2, 2, 2, 2, 3]))
 364     [1, 3]
 365     """
 366
 367     iterator = iter(iterator)
 368     next = iterator.next()
 369
 370     while True:
 371         item = next
 372         try:
 373             next = iterator.next()
 374         except StopIteration as e:
 375             yield item
 376             raise e
 377
 378         if cmp(item, next) == 0:
 379             next = iterator.next()
 380         else:
 381             yield item
 382
 383
 384 def get_timestamp(datetime_obj):
 385     """ Returns the timestamp as an int for the given datetime object
 386
 387     >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
 388     1302168606
 389
 390     >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
 391     0
 392     """
 393     return int(time.mktime(datetime_obj.timetuple()))
 394
 395
 396
 397 re_url = re.compile('^https?://')
 398
 399 def is_url(string):
 400     """ Returns true if a string looks like an URL
 401
 402     >>> is_url('http://example.com/some-path/file.xml')
 403     True
 404
 405     >>> is_url('something else')
 406     False
 407     """
 408
 409     return bool(re_url.match(string))
 410
 411
 412
 413 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
 414 # this does not increase asymptotical complexity
 415 # but can still waste more time than it saves.
 416 def shortest_of(strings):
 417     return min(strings, key=len)
 418
 419 def longest_substr(strings):
 420     """
 421     Returns the longest common substring of the given strings
 422     """
 423
 424     substr = ""
 425     if not strings:
 426         return substr
 427     reference = shortest_of(strings)
 428     length = len(reference)
 429     #find a suitable slice i:j
 430     for i in xrange(length):
 431         #only consider strings long at least len(substr) + 1
 432         for j in xrange(i + len(substr) + 1, length):
 433             candidate = reference[i:j]
 434             if all(candidate in text for text in strings):
 435                 substr = candidate
 436     return substr
 437
 438
 439
 440 def additional_value(it, gen_val, val_changed=lambda _: True):
 441     """ Provides an additional value to the elements, calculated when needed
 442
 443     For the elements from the iterator, some additional value can be computed
 444     by gen_val (which might be an expensive computation).
 445
 446     If the elements in the iterator are ordered so that some subsequent
 447     elements would generate the same additional value, val_changed can be
 448     provided, which receives the next element from the iterator and the
 449     previous additional value. If the element would generate the same
 450     additional value (val_changed returns False), its computation is skipped.
 451
 452     >>> # get the next full hundred higher than x
 453     >>> # this will probably be an expensive calculation
 454     >>> next_hundred = lambda x: x + 100-(x % 100)
 455
 456     >>> # returns True if h is not the value that next_hundred(x) would provide
 457     >>> # this should be a relatively cheap calculation, compared to the above
 458     >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
 459
 460     >>> xs = [0, 50, 100, 101, 199, 200, 201]
 461     >>> list(additional_value(xs, next_hundred, diff_hundred))
 462     [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
 463     """
 464
 465     _none = object()
 466     current = _none
 467
 468     for x in it:
 469         if current is _none or val_changed(x, current):
 470             current = gen_val(x)
 471
 472         yield (x, current)
 473
 474
 475 def file_hash(f, h=hashlib.md5, block_size=2**20):
 476     """ returns the hash of the contents of a file """
 477     f_hash = h()
 478     for chunk in iter(lambda: f.read(block_size), ''):
 479         f_hash.update(chunk)
 480     return f_hash
 481
 482
 483
 484 def split_list(l, prop):
 485     """ split elements that satisfy a property, and those that don't """
 486     match   = filter(prop, l)
 487     nomatch = [x for x in l if x not in match]
 488     return match, nomatch
 489
 490
 491 def sorted_chain(links, key, reverse=False):
 492     """ Takes a list of iters can iterates over sorted elements
 493
 494     Each elment of links should be a tuple of (sort_key, iterator). The
 495     elements of each iterator should be sorted already. sort_key should
 496     indicate the key of the first element and needs to be comparable to the
 497     result of key(elem).
 498
 499     The function returns an iterator over the globally sorted element that
 500     ensures that as little iterators as possible are evaluated.  When
 501     evaluating """
 502
 503     # mixed_list initially contains all placeholders; later evaluated
 504     # elements (from the iterators) are mixed in
 505     mixed_list = [(k, link, True) for k, link in links]
 506
 507     while mixed_list:
 508         _, item, expand = mixed_list.pop(0)
 509
 510         # found an element (from an earlier expansion), yield it
 511         if not expand:
 512             yield item
 513             continue
 514
 515         # found an iter that needs to be expanded.
 516         # The iterator is fully consumed
 517         new_items = [(key(i), i, False) for i in item]
 518
 519         # sort links (placeholders) and elements together
 520         mixed_list = sorted(mixed_list + new_items, key=lambda (k, _v, _e): k,
 521                 reverse=reverse)
 522
 523
 524 def url_add_authentication(url, username, password):
 525     """
 526     Adds authentication data (username, password) to a given
 527     URL in order to construct an authenticated URL.
 528
 529     >>> url_add_authentication('https://host.com/', '', None)
 530     'https://host.com/'
 531     >>> url_add_authentication('http://example.org/', None, None)
 532     'http://example.org/'
 533     >>> url_add_authentication('telnet://host.com/', 'foo', 'bar')
 534     'telnet://foo:bar@host.com/'
 535     >>> url_add_authentication('ftp://example.org', 'billy', None)
 536     'ftp://billy@example.org'
 537     >>> url_add_authentication('ftp://example.org', 'billy', '')
 538     'ftp://billy:@example.org'
 539     >>> url_add_authentication('http://localhost/x', 'aa', 'bc')
 540     'http://aa:bc@localhost/x'
 541     >>> url_add_authentication('http://blubb.lan/u.html', 'i/o', 'P@ss:')
 542     'http://i%2Fo:P@ss:@blubb.lan/u.html'
 543     >>> url_add_authentication('http://a:b@x.org/', 'c', 'd')
 544     'http://c:d@x.org/'
 545     >>> url_add_authentication('http://i%2F:P%40%3A@cx.lan', 'P@x', 'i/')
 546     'http://P@x:i%2F@cx.lan'
 547     >>> url_add_authentication('http://x.org/', 'a b', 'c d')
 548     'http://a%20b:c%20d@x.org/'
 549     """
 550     if username is None or username == '':
 551         return url
 552
 553     # Relaxations of the strict quoting rules (bug 1521):
 554     # 1. Accept '@' in username and password
 555     # 2. Acecpt ':' in password only
 556     username = urllib.quote(username, safe='@')
 557
 558     if password is not None:
 559         password = urllib.quote(password, safe='@:')
 560         auth_string = ':'.join((username, password))
 561     else:
 562         auth_string = username
 563
 564     url = url_strip_authentication(url)
 565
 566     url_parts = list(urlparse.urlsplit(url))
 567     # url_parts[1] is the HOST part of the URL
 568     url_parts[1] = '@'.join((auth_string, url_parts[1]))
 569
 570     return urlparse.urlunsplit(url_parts)
 571
 572
 573 def urlopen(url, headers=None, data=None):
 574     """
 575     An URL opener with the User-agent set to gPodder (with version)
 576     """
 577     username, password = username_password_from_url(url)
 578     if username is not None or password is not None:
 579         url = url_strip_authentication(url)
 580         password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
 581         password_mgr.add_password(None, url, username, password)
 582         handler = urllib2.HTTPBasicAuthHandler(password_mgr)
 583         opener = urllib2.build_opener(handler)
 584     else:
 585         opener = urllib2.build_opener()
 586
 587     if headers is None:
 588         headers = {}
 589     else:
 590         headers = dict(headers)
 591
 592     headers.update({'User-agent': settings.USER_AGENT})
 593     request = urllib2.Request(url, data=data, headers=headers)
 594     return opener.open(request)
 595
 596
 597
 598 def username_password_from_url(url):
 599     r"""
 600     Returns a tuple (username,password) containing authentication
 601     data from the specified URL or (None,None) if no authentication
 602     data can be found in the URL.
 603
 604     See Section 3.1 of RFC 1738 (http://www.ietf.org/rfc/rfc1738.txt)
 605
 606     >>> username_password_from_url('https://@host.com/')
 607     ('', None)
 608     >>> username_password_from_url('telnet://host.com/')
 609     (None, None)
 610     >>> username_password_from_url('ftp://foo:@host.com/')
 611     ('foo', '')
 612     >>> username_password_from_url('http://a:b@host.com/')
 613     ('a', 'b')
 614     >>> username_password_from_url(1)
 615     Traceback (most recent call last):
 616       ...
 617     ValueError: URL has to be a string or unicode object.
 618     >>> username_password_from_url(None)
 619     Traceback (most recent call last):
 620       ...
 621     ValueError: URL has to be a string or unicode object.
 622     >>> username_password_from_url('http://a@b:c@host.com/')
 623     ('a@b', 'c')
 624     >>> username_password_from_url('ftp://a:b:c@host.com/')
 625     ('a', 'b:c')
 626     >>> username_password_from_url('http://i%2Fo:P%40ss%3A@host.com/')
 627     ('i/o', 'P@ss:')
 628     >>> username_password_from_url('ftp://%C3%B6sterreich@host.com/')
 629     ('\xc3\xb6sterreich', None)
 630     >>> username_password_from_url('http://w%20x:y%20z@example.org/')
 631     ('w x', 'y z')
 632     >>> username_password_from_url('http://example.com/x@y:z@test.com/')
 633     (None, None)
 634     """
 635     if type(url) not in (str, unicode):
 636         raise ValueError('URL has to be a string or unicode object.')
 637
 638     (username, password) = (None, None)
 639
 640     (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
 641
 642     if '@' in netloc:
 643         (authentication, netloc) = netloc.rsplit('@', 1)
 644         if ':' in authentication:
 645             (username, password) = authentication.split(':', 1)
 646
 647             # RFC1738 dictates that we should not allow ['/', '@', ':']
 648             # characters in the username and password field (Section 3.1):
 649             #
 650             # 1. The "/" can't be in there at this point because of the way
 651             #    urlparse (which we use above) works.
 652             # 2. Due to gPodder bug 1521, we allow "@" in the username and
 653             #    password field. We use netloc.rsplit('@', 1), which will
 654             #    make sure that we split it at the last '@' in netloc.
 655             # 3. The colon must be excluded (RFC2617, Section 2) in the
 656             #    username, but is apparently allowed in the password. This
 657             #    is handled by the authentication.split(':', 1) above, and
 658             #    will cause any extraneous ':'s to be part of the password.
 659
 660             username = urllib.unquote(username)
 661             password = urllib.unquote(password)
 662         else:
 663             username = urllib.unquote(authentication)
 664
 665     return (username, password)
 666
 667
 668 def url_strip_authentication(url):
 669     """
 670     Strips authentication data from an URL. Returns the URL with
 671     the authentication data removed from it.
 672
 673     >>> url_strip_authentication('https://host.com/')
 674     'https://host.com/'
 675     >>> url_strip_authentication('telnet://foo:bar@host.com/')
 676     'telnet://host.com/'
 677     >>> url_strip_authentication('ftp://billy@example.org')
 678     'ftp://example.org'
 679     >>> url_strip_authentication('ftp://billy:@example.org')
 680     'ftp://example.org'
 681     >>> url_strip_authentication('http://aa:bc@localhost/x')
 682     'http://localhost/x'
 683     >>> url_strip_authentication('http://i%2Fo:P%40ss%3A@blubb.lan/u.html')
 684     'http://blubb.lan/u.html'
 685     >>> url_strip_authentication('http://c:d@x.org/')
 686     'http://x.org/'
 687     >>> url_strip_authentication('http://P%40%3A:i%2F@cx.lan')
 688     'http://cx.lan'
 689     >>> url_strip_authentication('http://x@x.com:s3cret@example.com/')
 690     'http://example.com/'
 691     """
 692     url_parts = list(urlparse.urlsplit(url))
 693     # url_parts[1] is the HOST part of the URL
 694
 695     # Remove existing authentication data
 696     if '@' in url_parts[1]:
 697         url_parts[1] = url_parts[1].rsplit('@', 1)[1]
 698
 699     return urlparse.urlunsplit(url_parts)
 700
 701
 702 # Native filesystem encoding detection
 703 encoding = sys.getfilesystemencoding()
 704
 705 def sanitize_encoding(filename):
 706     r"""
 707     Generate a sanitized version of a string (i.e.
 708     remove invalid characters and encode in the
 709     detected native language encoding).
 710
 711     >>> sanitize_encoding('\x80')
 712     ''
 713     >>> sanitize_encoding(u'unicode')
 714     'unicode'
 715     """
 716     # The encoding problem goes away in Python 3.. hopefully!
 717     if sys.version_info >= (3, 0):
 718         return filename
 719
 720     global encoding
 721     if not isinstance(filename, unicode):
 722         filename = filename.decode(encoding, 'ignore')
 723     return filename.encode(encoding, 'ignore')
 724
 725
 726 def get_git_head():
 727     """ returns the commit and message of the current git HEAD """
 728
 729     try:
 730         pr = subprocess.Popen('/usr/bin/git log -n 1 --oneline'.split(),
 731             cwd = settings.BASE_DIR,
 732             stdout = subprocess.PIPE,
 733             stderr = subprocess.PIPE,
 734         )
 735
 736     except OSError:
 737         return None, None
 738
 739     (out, err) = pr.communicate()
 740     if err:
 741         return None, None
 742
 743     outs = out.split()
 744     commit = outs[0]
 745     msg = ' ' .join(outs[1:])
 746     return commit, msg
 747
 748
 749
 750 # https://gist.github.com/samuraisam/901117
 751
 752 default_fudge = timedelta(seconds=0, microseconds=0, days=0)
 753
 754 def deep_eq(_v1, _v2, datetime_fudge=default_fudge, _assert=False):
 755   """
 756   Tests for deep equality between two python data structures recursing
 757   into sub-structures if necessary. Works with all python types including
 758   iterators and generators. This function was dreampt up to test API responses
 759   but could be used for anything. Be careful. With deeply nested structures
 760   you may blow the stack.
 761
 762   Options:
 763             datetime_fudge => this is a datetime.timedelta object which, when
 764                               comparing dates, will accept values that differ
 765                               by the number of seconds specified
 766             _assert        => passing yes for this will raise an assertion error
 767                               when values do not match, instead of returning
 768                               false (very useful in combination with pdb)
 769
 770   Doctests included:
 771
 772   >>> x1, y1 = ({'a': 'b'}, {'a': 'b'})
 773   >>> deep_eq(x1, y1)
 774   True
 775   >>> x2, y2 = ({'a': 'b'}, {'b': 'a'})
 776   >>> deep_eq(x2, y2)
 777   False
 778   >>> x3, y3 = ({'a': {'b': 'c'}}, {'a': {'b': 'c'}})
 779   >>> deep_eq(x3, y3)
 780   True
 781   >>> x4, y4 = ({'c': 't', 'a': {'b': 'c'}}, {'a': {'b': 'n'}, 'c': 't'})
 782   >>> deep_eq(x4, y4)
 783   False
 784   >>> x5, y5 = ({'a': [1,2,3]}, {'a': [1,2,3]})
 785   >>> deep_eq(x5, y5)
 786   True
 787   >>> x6, y6 = ({'a': [1,'b',8]}, {'a': [2,'b',8]})
 788   >>> deep_eq(x6, y6)
 789   False
 790   >>> x7, y7 = ('a', 'a')
 791   >>> deep_eq(x7, y7)
 792   True
 793   >>> x8, y8 = (['p','n',['asdf']], ['p','n',['asdf']])
 794   >>> deep_eq(x8, y8)
 795   True
 796   >>> x9, y9 = (['p','n',['asdf',['omg']]], ['p', 'n', ['asdf',['nowai']]])
 797   >>> deep_eq(x9, y9)
 798   False
 799   >>> x10, y10 = (1, 2)
 800   >>> deep_eq(x10, y10)
 801   False
 802   >>> deep_eq((str(p) for p in xrange(10)), (str(p) for p in xrange(10)))
 803   True
 804   >>> str(deep_eq(range(4), range(4)))
 805   'True'
 806   >>> deep_eq(xrange(100), xrange(100))
 807   True
 808   >>> deep_eq(xrange(2), xrange(5))
 809   False
 810   >>> from datetime import datetime, timedelta
 811   >>> d1, d2 = (datetime.now(), datetime.now() + timedelta(seconds=4))
 812   >>> deep_eq(d1, d2)
 813   False
 814   >>> deep_eq(d1, d2, datetime_fudge=timedelta(seconds=5))
 815   True
 816   """
 817   _deep_eq = functools.partial(deep_eq, datetime_fudge=datetime_fudge,
 818                                _assert=_assert)
 819
 820   def _check_assert(R, a, b, reason=''):
 821     if _assert and not R:
 822       assert 0, "an assertion has failed in deep_eq (%s) %s != %s" % (
 823         reason, str(a), str(b))
 824     return R
 825
 826   def _deep_dict_eq(d1, d2):
 827     k1, k2 = (sorted(d1.keys()), sorted(d2.keys()))
 828     if k1 != k2: # keys should be exactly equal
 829       return _check_assert(False, k1, k2, "keys")
 830
 831     return _check_assert(operator.eq(sum(_deep_eq(d1[k], d2[k])
 832                                        for k in k1),
 833                                      len(k1)), d1, d2, "dictionaries")
 834
 835   def _deep_iter_eq(l1, l2):
 836     if len(l1) != len(l2):
 837       return _check_assert(False, l1, l2, "lengths")
 838     return _check_assert(operator.eq(sum(_deep_eq(v1, v2)
 839                                       for v1, v2 in zip(l1, l2)),
 840                                      len(l1)), l1, l2, "iterables")
 841
 842   def op(a, b):
 843     _op = operator.eq
 844     if type(a) == datetime and type(b) == datetime:
 845       s = datetime_fudge.seconds
 846       t1, t2 = (time.mktime(a.timetuple()), time.mktime(b.timetuple()))
 847       l = t1 - t2
 848       l = -l if l > 0 else l
 849       return _check_assert((-s if s > 0 else s) <= l, a, b, "dates")
 850     return _check_assert(_op(a, b), a, b, "values")
 851
 852   c1, c2 = (_v1, _v2)
 853
 854   # guard against strings because they are iterable and their
 855   # elements yield iterables infinitely.
 856   # I N C E P T I O N
 857   for t in types.StringTypes:
 858     if isinstance(_v1, t):
 859       break
 860   else:
 861     if isinstance(_v1, types.DictType):
 862       op = _deep_dict_eq
 863     else:
 864       try:
 865         c1, c2 = (list(iter(_v1)), list(iter(_v2)))
 866       except TypeError:
 867         c1, c2 = _v1, _v2
 868       else:
 869         op = _deep_iter_eq
 870
 871   return op(c1, c2)
 872
 873
 874 def parse_request_body(request):
 875     """ returns the parsed request body, handles gzip encoding """
 876
 877     raw_body = request.body
 878     content_enc = request.META.get('HTTP_CONTENT_ENCODING')
 879
 880     if content_enc == 'gzip':
 881         raw_body = zlib.decompress(raw_body)
 882
 883     return json.loads(raw_body)
 884
 885
 886 def normalize_feed_url(url):
 887     """
 888     Converts any URL to http:// or ftp:// so that it can be
 889     used with "wget". If the URL cannot be converted (invalid
 890     or unknown scheme), "None" is returned.
 891
 892     This will also normalize feed:// and itpc:// to http://.
 893
 894     >>> normalize_feed_url('itpc://example.org/podcast.rss')
 895     'http://example.org/podcast.rss'
 896
 897     If no URL scheme is defined (e.g. "curry.com"), we will
 898     simply assume the user intends to add a http:// feed.
 899
 900     >>> normalize_feed_url('curry.com')
 901     'http://curry.com/'
 902
 903     There are even some more shortcuts for advanced users
 904     and lazy typists (see the source for details).
 905
 906     >>> normalize_feed_url('fb:43FPodcast')
 907     'http://feeds.feedburner.com/43FPodcast'
 908
 909     It will also take care of converting the domain name to
 910     all-lowercase (because domains are not case sensitive):
 911
 912     >>> normalize_feed_url('http://Example.COM/')
 913     'http://example.com/'
 914
 915     Some other minimalistic changes are also taken care of,
 916     e.g. a ? with an empty query is removed:
 917
 918     >>> normalize_feed_url('http://example.org/test?')
 919     'http://example.org/test'
 920
 921     Leading and trailing whitespace is removed
 922
 923     >>> normalize_feed_url(' http://example.com/podcast.rss ')
 924     'http://example.com/podcast.rss'
 925
 926     HTTP Authentication is removed to protect users' privacy
 927
 928     >>> normalize_feed_url('http://a@b:c@host.com/')
 929     'http://host.com/'
 930     >>> normalize_feed_url('ftp://a:b:c@host.com/')
 931     'ftp://host.com/'
 932     >>> normalize_feed_url('http://i%2Fo:P%40ss%3A@host.com/')
 933     'http://host.com/'
 934     >>> normalize_feed_url('ftp://%C3%B6sterreich@host.com/')
 935     'ftp://host.com/'
 936     >>> normalize_feed_url('http://w%20x:y%20z@example.org/')
 937     'http://example.org/'
 938     >>> normalize_feed_url('http://example.com/x@y:z@test.com/')
 939     'http://example.com/x%40y%3Az%40test.com/'
 940     >>> normalize_feed_url('http://en.wikipedia.org/wiki/Ä')
 941     'http://en.wikipedia.org/wiki/%C3%84'
 942     >>> normalize_feed_url('http://en.wikipedia.org/w/index.php?title=Ä&action=edit')
 943     'http://en.wikipedia.org/w/index.php?title=%C3%84&action=edit'
 944     """
 945     url = url.strip()
 946     if not url or len(url) < 8:
 947         return None
 948
 949     if isinstance(url, unicode):
 950         url = url.encode('utf-8', 'ignore')
 951
 952     # This is a list of prefixes that you can use to minimize the amount of
 953     # keystrokes that you have to use.
 954     # Feel free to suggest other useful prefixes, and I'll add them here.
 955     PREFIXES = {
 956             'fb:': 'http://feeds.feedburner.com/%s',
 957             'yt:': 'http://www.youtube.com/rss/user/%s/videos.rss',
 958             'sc:': 'http://soundcloud.com/%s',
 959             'fm4od:': 'http://onapp1.orf.at/webcam/fm4/fod/%s.xspf',
 960             # YouTube playlists. To get a list of playlists per-user, use:
 961             # https://gdata.youtube.com/feeds/api/users/<username>/playlists
 962             'ytpl:': 'http://gdata.youtube.com/feeds/api/playlists/%s',
 963     }
 964
 965     for prefix, expansion in PREFIXES.iteritems():
 966         if url.startswith(prefix):
 967             url = expansion % (url[len(prefix):],)
 968             break
 969
 970     # Assume HTTP for URLs without scheme
 971     if not '://' in url:
 972         url = 'http://' + url
 973
 974     scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
 975
 976     # Schemes and domain names are case insensitive
 977     scheme, netloc = scheme.lower(), netloc.lower()
 978
 979     # encode non-encoded characters
 980     path = urllib.quote(path, '/%')
 981     query = urllib.quote_plus(query, ':&=')
 982
 983     # Remove authentication to protect users' privacy
 984     netloc = netloc.rsplit('@', 1)[-1]
 985
 986     # Normalize empty paths to "/"
 987     if path == '':
 988         path = '/'
 989
 990     # feed://, itpc:// and itms:// are really http://
 991     if scheme in ('feed', 'itpc', 'itms'):
 992         scheme = 'http'
 993
 994     if scheme not in ('http', 'https', 'ftp', 'file'):
 995         return None
 996
 997     # urlunsplit might return "a slighty different, but equivalent URL"
 998     return urlparse.urlunsplit((scheme, netloc, path, query, fragment))
 999
1000
1001 def partition(items, predicate=bool):
1002     a, b = itertools.tee((predicate(item), item) for item in items)
1003     return ((item for pred, item in a if not pred),
1004             (item for pred, item in b if pred))
1005
1006
1007 def split_quoted(s):
1008     """ Splits a quoted string
1009
1010     >>> split_quoted('some "quoted text"') == ['some', 'quoted text']
1011     True
1012
1013     >>> split_quoted('"quoted text') == ['quoted', 'text']
1014     True
1015
1016     # 4 quotes here are 2 in the doctest is one in the actual string
1017     >>> split_quoted('text\\\\') == ['text']
1018     True
1019     """
1020
1021     try:
1022         # split by whitespace, preserve quoted substrings
1023         keywords = shlex.split(s)
1024
1025     except ValueError:
1026         # No closing quotation (eg '"text')
1027         # No escaped character (eg '\')
1028         s = s.replace('"', '').replace("'", '').replace('\\', '')
1029         keywords = shlex.split(s)
1030
1031     return keywords