mygpo/utils.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of my.gpodder.org.
   4 #
   5 # my.gpodder.org is free software: you can redistribute it and/or modify it
   6 # under the terms of the GNU Affero General Public License as published by
   7 # the Free Software Foundation, either version 3 of the License, or (at your
   8 # option) any later version.
   9 #
  10 # my.gpodder.org is distributed in the hope that it will be useful, but
  11 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  12 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
  13 # License for more details.
  14 #
  15 # You should have received a copy of the GNU Affero General Public License
  16 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
  17 #
  18
  19 import functools
  20 import types
  21 import subprocess
  22 import os
  23 import operator
  24 import sys
  25 import re
  26 import collections
  27 import itertools
  28 from datetime import datetime, timedelta, date
  29 import time
  30 import hashlib
  31 import urlparse
  32 import urllib
  33 import urllib2
  34 import zlib
  35
  36 from django.conf import settings
  37
  38 from mygpo.core.json import json
  39
  40
  41 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
  42     """
  43     >>> from_d = datetime(2010, 01, 01)
  44     >>> to_d = datetime(2010, 01, 05)
  45     >>> list(daterange(from_d, to_d))
  46     [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
  47     """
  48
  49     if to_date is None:
  50         if isinstance(from_date, datetime):
  51             to_date = datetime.now()
  52         else:
  53             to_date = date.today()
  54
  55     while from_date <= to_date:
  56         yield from_date
  57         from_date = from_date + leap
  58     return
  59
  60 def format_time(value):
  61     """Format an offset (in seconds) to a string
  62
  63     The offset should be an integer or float value.
  64
  65     >>> format_time(0)
  66     '00:00'
  67     >>> format_time(20)
  68     '00:20'
  69     >>> format_time(3600)
  70     '01:00:00'
  71     >>> format_time(10921)
  72     '03:02:01'
  73     """
  74     try:
  75         dt = datetime.utcfromtimestamp(value)
  76     except ValueError:
  77         return ''
  78
  79     if dt.hour == 0:
  80         return dt.strftime('%M:%S')
  81     else:
  82         return dt.strftime('%H:%M:%S')
  83
  84 def parse_time(value):
  85     """
  86     >>> parse_time(10)
  87     10
  88
  89     >>> parse_time('05:10') #5*60+10
  90     310
  91
  92     >>> parse_time('1:05:10') #60*60+5*60+10
  93     3910
  94     """
  95     if value is None:
  96         raise ValueError('None value in parse_time')
  97
  98     if isinstance(value, int):
  99         # Don't need to parse already-converted time value
 100         return value
 101
 102     if value == '':
 103         raise ValueError('Empty valueing in parse_time')
 104
 105     for format in ('%H:%M:%S', '%M:%S'):
 106         try:
 107             t = time.strptime(value, format)
 108             return t.tm_hour * 60*60 + t.tm_min * 60 + t.tm_sec
 109         except ValueError, e:
 110             continue
 111
 112     return int(value)
 113
 114
 115 def parse_bool(val):
 116     """
 117     >>> parse_bool('True')
 118     True
 119
 120     >>> parse_bool('true')
 121     True
 122
 123     >>> parse_bool('')
 124     False
 125     """
 126     if isinstance(val, bool):
 127         return val
 128     if val.lower() == 'true':
 129         return True
 130     return False
 131
 132
 133 def iterate_together(lists, key=lambda x: x, reverse=False):
 134     """
 135     takes ordered, possibly sparse, lists with similar items
 136     (some items have a corresponding item in the other lists, some don't).
 137
 138     It then yield tuples of corresponding items, where one element is None is
 139     there is no corresponding entry in one of the lists.
 140
 141     Tuples where both elements are None are skipped.
 142
 143     The results of the key method are used for the comparisons.
 144
 145     If reverse is True, the lists are expected to be sorted in reverse order
 146     and the results will also be sorted reverse
 147
 148     >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
 149     [(1, 1), (2, None), (None, 3)]
 150
 151     >>> list(iterate_together([[], []]))
 152     []
 153
 154     >>> list(iterate_together([range(1, 3), range(3, 5)]))
 155     [(1, None), (2, None), (None, 3), (None, 4)]
 156
 157     >>> list(iterate_together([range(1, 3), []]))
 158     [(1, None), (2, None)]
 159
 160     >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
 161     [(1, None), (3, 3)]
 162     """
 163
 164     Next = collections.namedtuple('Next', 'item more')
 165     min_ = min if not reverse else max
 166     lt_  = operator.lt if not reverse else operator.gt
 167
 168     lists = [iter(l) for l in lists]
 169
 170     def _take(it):
 171         try:
 172             i = it.next()
 173             while i is None:
 174                 i = it.next()
 175             return Next(i, True)
 176         except StopIteration:
 177             return Next(None, False)
 178
 179     def new_res():
 180         return [None]*len(lists)
 181
 182     # take first bunch of items
 183     items = [_take(l) for l in lists]
 184
 185     while any(i.item is not None or i.more for i in items):
 186
 187         res = new_res()
 188
 189         for n, item in enumerate(items):
 190
 191             if item.item is None:
 192                 continue
 193
 194             if all(x is None for x in res):
 195                 res[n] = item.item
 196                 continue
 197
 198             min_v = min_(filter(lambda x: x is not None, res), key=key)
 199
 200             if key(item.item) == key(min_v):
 201                 res[n] = item.item
 202
 203             elif lt_(key(item.item), key(min_v)):
 204                 res = new_res()
 205                 res[n] = item.item
 206
 207         for n, x in enumerate(res):
 208             if x is not None:
 209                 items[n] = _take(lists[n])
 210
 211         yield tuple(res)
 212
 213
 214 def progress(val, max_val, status_str='', max_width=50, stream=sys.stdout):
 215
 216     factor = float(val)/max_val if max_val > 0 else 0
 217
 218     # progress as percentage
 219     percentage_str = '{val:.2%}'.format(val=factor)
 220
 221     # progress bar filled with #s
 222     factor = min(int(factor*max_width), max_width)
 223     progress_str = '#' * factor + ' ' * (max_width-factor)
 224
 225     #insert percentage into bar
 226     percentage_start = int((max_width-len(percentage_str))/2)
 227     progress_str = progress_str[:percentage_start] + \
 228                    percentage_str + \
 229                    progress_str[percentage_start+len(percentage_str):]
 230
 231     print >> stream, '\r',
 232     print >> stream, '[ %s ] %s / %s | %s' % (
 233         progress_str,
 234         val,
 235         max_val,
 236         status_str),
 237     stream.flush()
 238
 239
 240 def set_cmp(list, simplify):
 241     """
 242     Builds a set out of a list but uses the results of simplify to determine equality between items
 243     """
 244     simpl = lambda x: (simplify(x), x)
 245     lst = dict(map(simpl, list))
 246     return lst.values()
 247
 248
 249 def first(it):
 250     """
 251     returns the first not-None object or None if the iterator is exhausted
 252     """
 253     for x in it:
 254         if x is not None:
 255             return x
 256     return None
 257
 258
 259 def intersect(a, b):
 260     return list(set(a) & set(b))
 261
 262
 263
 264 def remove_control_chars(s):
 265     all_chars = (unichr(i) for i in xrange(0x110000))
 266     control_chars = ''.join(map(unichr, range(0,32) + range(127,160)))
 267     control_char_re = re.compile('[%s]' % re.escape(control_chars))
 268
 269     return control_char_re.sub('', s)
 270
 271
 272 def unzip(a):
 273     return tuple(map(list,zip(*a)))
 274
 275
 276 def parse_range(s, min, max, default=None):
 277     """
 278     Parses the string and returns its value. If the value is outside the given
 279     range, its closest number within the range is returned
 280
 281     >>> parse_range('5', 0, 10)
 282     5
 283
 284     >>> parse_range('0', 5, 10)
 285     5
 286
 287     >>> parse_range('15',0, 10)
 288     10
 289
 290     >>> parse_range('x', 0, 20)
 291     10
 292
 293     >>> parse_range('x', 0, 20, 20)
 294     20
 295     """
 296     try:
 297         val = int(s)
 298         if val < min:
 299             return min
 300         if val > max:
 301             return max
 302         return val
 303
 304     except (ValueError, TypeError):
 305         return default if default is not None else (max-min)/2
 306
 307
 308
 309 def flatten(l):
 310     return [item for sublist in l for item in sublist]
 311
 312
 313 def linearize(key, iterators, reverse=False):
 314     """
 315     Linearizes a number of iterators, sorted by some comparison function
 316     """
 317
 318     iters = [iter(i) for i in iterators]
 319     vals = []
 320     for i in iters:
 321         try:
 322             v = i.next()
 323             vals. append( (v, i) )
 324         except StopIteration:
 325             continue
 326
 327     while vals:
 328         vals = sorted(vals, key=lambda x: key(x[0]), reverse=reverse)
 329         val, it = vals.pop(0)
 330         yield val
 331         try:
 332             next_val = it.next()
 333             vals.append( (next_val, it) )
 334         except StopIteration:
 335             pass
 336
 337
 338 def skip_pairs(iterator, cmp=cmp):
 339     """ Skips pairs of equal items
 340
 341     >>> list(skip_pairs([]))
 342     []
 343
 344     >>> list(skip_pairs([1]))
 345     [1]
 346
 347     >>> list(skip_pairs([1, 2, 3]))
 348     [1, 2, 3]
 349
 350     >>> list(skip_pairs([1, 1]))
 351     []
 352
 353     >>> list(skip_pairs([1, 2, 2]))
 354     [1]
 355
 356     >>> list(skip_pairs([1, 2, 2, 3]))
 357     [1, 3]
 358
 359     >>> list(skip_pairs([1, 2, 2, 2]))
 360     [1, 2]
 361
 362     >>> list(skip_pairs([1, 2, 2, 2, 2, 3]))
 363     [1, 3]
 364     """
 365
 366     iterator = iter(iterator)
 367     next = iterator.next()
 368
 369     while True:
 370         item = next
 371         try:
 372             next = iterator.next()
 373         except StopIteration as e:
 374             yield item
 375             raise e
 376
 377         if cmp(item, next) == 0:
 378             next = iterator.next()
 379         else:
 380             yield item
 381
 382
 383 def get_timestamp(datetime_obj):
 384     """ Returns the timestamp as an int for the given datetime object
 385
 386     >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
 387     1302168606
 388
 389     >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
 390     0
 391     """
 392     return int(time.mktime(datetime_obj.timetuple()))
 393
 394
 395
 396 re_url = re.compile('^https?://')
 397
 398 def is_url(string):
 399     """ Returns true if a string looks like an URL
 400
 401     >>> is_url('http://example.com/some-path/file.xml')
 402     True
 403
 404     >>> is_url('something else')
 405     False
 406     """
 407
 408     return bool(re_url.match(string))
 409
 410
 411
 412 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
 413 # this does not increase asymptotical complexity
 414 # but can still waste more time than it saves.
 415 def shortest_of(strings):
 416     return min(strings, key=len)
 417
 418 def longest_substr(strings):
 419     """
 420     Returns the longest common substring of the given strings
 421     """
 422
 423     substr = ""
 424     if not strings:
 425         return substr
 426     reference = shortest_of(strings)
 427     length = len(reference)
 428     #find a suitable slice i:j
 429     for i in xrange(length):
 430         #only consider strings long at least len(substr) + 1
 431         for j in xrange(i + len(substr) + 1, length):
 432             candidate = reference[i:j]
 433             if all(candidate in text for text in strings):
 434                 substr = candidate
 435     return substr
 436
 437
 438
 439 def additional_value(it, gen_val, val_changed=lambda _: True):
 440     """ Provides an additional value to the elements, calculated when needed
 441
 442     For the elements from the iterator, some additional value can be computed
 443     by gen_val (which might be an expensive computation).
 444
 445     If the elements in the iterator are ordered so that some subsequent
 446     elements would generate the same additional value, val_changed can be
 447     provided, which receives the next element from the iterator and the
 448     previous additional value. If the element would generate the same
 449     additional value (val_changed returns False), its computation is skipped.
 450
 451     >>> # get the next full hundred higher than x
 452     >>> # this will probably be an expensive calculation
 453     >>> next_hundred = lambda x: x + 100-(x % 100)
 454
 455     >>> # returns True if h is not the value that next_hundred(x) would provide
 456     >>> # this should be a relatively cheap calculation, compared to the above
 457     >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
 458
 459     >>> xs = [0, 50, 100, 101, 199, 200, 201]
 460     >>> list(additional_value(xs, next_hundred, diff_hundred))
 461     [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
 462     """
 463
 464     _none = object()
 465     current = _none
 466
 467     for x in it:
 468         if current is _none or val_changed(x, current):
 469             current = gen_val(x)
 470
 471         yield (x, current)
 472
 473
 474 def file_hash(f, h=hashlib.md5, block_size=2**20):
 475     """ returns the hash of the contents of a file """
 476     f_hash = h()
 477     for chunk in iter(lambda: f.read(block_size), ''):
 478         f_hash.update(chunk)
 479     return f_hash
 480
 481
 482
 483 def split_list(l, prop):
 484     """ split elements that satisfy a property, and those that don't """
 485     match   = filter(prop, l)
 486     nomatch = [x for x in l if x not in match]
 487     return match, nomatch
 488
 489
 490 def sorted_chain(links, key, reverse=False):
 491     """ Takes a list of iters can iterates over sorted elements
 492
 493     Each elment of links should be a tuple of (sort_key, iterator). The
 494     elements of each iterator should be sorted already. sort_key should
 495     indicate the key of the first element and needs to be comparable to the
 496     result of key(elem).
 497
 498     The function returns an iterator over the globally sorted element that
 499     ensures that as little iterators as possible are evaluated.  When
 500     evaluating """
 501
 502     # mixed_list initially contains all placeholders; later evaluated
 503     # elements (from the iterators) are mixed in
 504     mixed_list = [(k, link, True) for k, link in links]
 505
 506     while mixed_list:
 507         _, item, expand = mixed_list.pop(0)
 508
 509         # found an element (from an earlier expansion), yield it
 510         if not expand:
 511             yield item
 512             continue
 513
 514         # found an iter that needs to be expanded.
 515         # The iterator is fully consumed
 516         new_items = [(key(i), i, False) for i in item]
 517
 518         # sort links (placeholders) and elements together
 519         mixed_list = sorted(mixed_list + new_items, key=lambda (k, _v, _e): k,
 520                 reverse=reverse)
 521
 522
 523 def url_add_authentication(url, username, password):
 524     """
 525     Adds authentication data (username, password) to a given
 526     URL in order to construct an authenticated URL.
 527
 528     >>> url_add_authentication('https://host.com/', '', None)
 529     'https://host.com/'
 530     >>> url_add_authentication('http://example.org/', None, None)
 531     'http://example.org/'
 532     >>> url_add_authentication('telnet://host.com/', 'foo', 'bar')
 533     'telnet://foo:bar@host.com/'
 534     >>> url_add_authentication('ftp://example.org', 'billy', None)
 535     'ftp://billy@example.org'
 536     >>> url_add_authentication('ftp://example.org', 'billy', '')
 537     'ftp://billy:@example.org'
 538     >>> url_add_authentication('http://localhost/x', 'aa', 'bc')
 539     'http://aa:bc@localhost/x'
 540     >>> url_add_authentication('http://blubb.lan/u.html', 'i/o', 'P@ss:')
 541     'http://i%2Fo:P@ss:@blubb.lan/u.html'
 542     >>> url_add_authentication('http://a:b@x.org/', 'c', 'd')
 543     'http://c:d@x.org/'
 544     >>> url_add_authentication('http://i%2F:P%40%3A@cx.lan', 'P@x', 'i/')
 545     'http://P@x:i%2F@cx.lan'
 546     >>> url_add_authentication('http://x.org/', 'a b', 'c d')
 547     'http://a%20b:c%20d@x.org/'
 548     """
 549     if username is None or username == '':
 550         return url
 551
 552     # Relaxations of the strict quoting rules (bug 1521):
 553     # 1. Accept '@' in username and password
 554     # 2. Acecpt ':' in password only
 555     username = urllib.quote(username, safe='@')
 556
 557     if password is not None:
 558         password = urllib.quote(password, safe='@:')
 559         auth_string = ':'.join((username, password))
 560     else:
 561         auth_string = username
 562
 563     url = url_strip_authentication(url)
 564
 565     url_parts = list(urlparse.urlsplit(url))
 566     # url_parts[1] is the HOST part of the URL
 567     url_parts[1] = '@'.join((auth_string, url_parts[1]))
 568
 569     return urlparse.urlunsplit(url_parts)
 570
 571
 572 def urlopen(url, headers=None, data=None):
 573     """
 574     An URL opener with the User-agent set to gPodder (with version)
 575     """
 576     username, password = username_password_from_url(url)
 577     if username is not None or password is not None:
 578         url = url_strip_authentication(url)
 579         password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
 580         password_mgr.add_password(None, url, username, password)
 581         handler = urllib2.HTTPBasicAuthHandler(password_mgr)
 582         opener = urllib2.build_opener(handler)
 583     else:
 584         opener = urllib2.build_opener()
 585
 586     if headers is None:
 587         headers = {}
 588     else:
 589         headers = dict(headers)
 590
 591     headers.update({'User-agent': settings.USER_AGENT})
 592     request = urllib2.Request(url, data=data, headers=headers)
 593     return opener.open(request)
 594
 595
 596
 597 def username_password_from_url(url):
 598     r"""
 599     Returns a tuple (username,password) containing authentication
 600     data from the specified URL or (None,None) if no authentication
 601     data can be found in the URL.
 602
 603     See Section 3.1 of RFC 1738 (http://www.ietf.org/rfc/rfc1738.txt)
 604
 605     >>> username_password_from_url('https://@host.com/')
 606     ('', None)
 607     >>> username_password_from_url('telnet://host.com/')
 608     (None, None)
 609     >>> username_password_from_url('ftp://foo:@host.com/')
 610     ('foo', '')
 611     >>> username_password_from_url('http://a:b@host.com/')
 612     ('a', 'b')
 613     >>> username_password_from_url(1)
 614     Traceback (most recent call last):
 615       ...
 616     ValueError: URL has to be a string or unicode object.
 617     >>> username_password_from_url(None)
 618     Traceback (most recent call last):
 619       ...
 620     ValueError: URL has to be a string or unicode object.
 621     >>> username_password_from_url('http://a@b:c@host.com/')
 622     ('a@b', 'c')
 623     >>> username_password_from_url('ftp://a:b:c@host.com/')
 624     ('a', 'b:c')
 625     >>> username_password_from_url('http://i%2Fo:P%40ss%3A@host.com/')
 626     ('i/o', 'P@ss:')
 627     >>> username_password_from_url('ftp://%C3%B6sterreich@host.com/')
 628     ('\xc3\xb6sterreich', None)
 629     >>> username_password_from_url('http://w%20x:y%20z@example.org/')
 630     ('w x', 'y z')
 631     >>> username_password_from_url('http://example.com/x@y:z@test.com/')
 632     (None, None)
 633     """
 634     if type(url) not in (str, unicode):
 635         raise ValueError('URL has to be a string or unicode object.')
 636
 637     (username, password) = (None, None)
 638
 639     (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
 640
 641     if '@' in netloc:
 642         (authentication, netloc) = netloc.rsplit('@', 1)
 643         if ':' in authentication:
 644             (username, password) = authentication.split(':', 1)
 645
 646             # RFC1738 dictates that we should not allow ['/', '@', ':']
 647             # characters in the username and password field (Section 3.1):
 648             #
 649             # 1. The "/" can't be in there at this point because of the way
 650             #    urlparse (which we use above) works.
 651             # 2. Due to gPodder bug 1521, we allow "@" in the username and
 652             #    password field. We use netloc.rsplit('@', 1), which will
 653             #    make sure that we split it at the last '@' in netloc.
 654             # 3. The colon must be excluded (RFC2617, Section 2) in the
 655             #    username, but is apparently allowed in the password. This
 656             #    is handled by the authentication.split(':', 1) above, and
 657             #    will cause any extraneous ':'s to be part of the password.
 658
 659             username = urllib.unquote(username)
 660             password = urllib.unquote(password)
 661         else:
 662             username = urllib.unquote(authentication)
 663
 664     return (username, password)
 665
 666
 667 def url_strip_authentication(url):
 668     """
 669     Strips authentication data from an URL. Returns the URL with
 670     the authentication data removed from it.
 671
 672     >>> url_strip_authentication('https://host.com/')
 673     'https://host.com/'
 674     >>> url_strip_authentication('telnet://foo:bar@host.com/')
 675     'telnet://host.com/'
 676     >>> url_strip_authentication('ftp://billy@example.org')
 677     'ftp://example.org'
 678     >>> url_strip_authentication('ftp://billy:@example.org')
 679     'ftp://example.org'
 680     >>> url_strip_authentication('http://aa:bc@localhost/x')
 681     'http://localhost/x'
 682     >>> url_strip_authentication('http://i%2Fo:P%40ss%3A@blubb.lan/u.html')
 683     'http://blubb.lan/u.html'
 684     >>> url_strip_authentication('http://c:d@x.org/')
 685     'http://x.org/'
 686     >>> url_strip_authentication('http://P%40%3A:i%2F@cx.lan')
 687     'http://cx.lan'
 688     >>> url_strip_authentication('http://x@x.com:s3cret@example.com/')
 689     'http://example.com/'
 690     """
 691     url_parts = list(urlparse.urlsplit(url))
 692     # url_parts[1] is the HOST part of the URL
 693
 694     # Remove existing authentication data
 695     if '@' in url_parts[1]:
 696         url_parts[1] = url_parts[1].rsplit('@', 1)[1]
 697
 698     return urlparse.urlunsplit(url_parts)
 699
 700
 701 # Native filesystem encoding detection
 702 encoding = sys.getfilesystemencoding()
 703
 704 def sanitize_encoding(filename):
 705     r"""
 706     Generate a sanitized version of a string (i.e.
 707     remove invalid characters and encode in the
 708     detected native language encoding).
 709
 710     >>> sanitize_encoding('\x80')
 711     ''
 712     >>> sanitize_encoding(u'unicode')
 713     'unicode'
 714     """
 715     # The encoding problem goes away in Python 3.. hopefully!
 716     if sys.version_info >= (3, 0):
 717         return filename
 718
 719     global encoding
 720     if not isinstance(filename, unicode):
 721         filename = filename.decode(encoding, 'ignore')
 722     return filename.encode(encoding, 'ignore')
 723
 724
 725 def get_git_head():
 726     """ returns the commit and message of the current git HEAD """
 727
 728     try:
 729         pr = subprocess.Popen('/usr/bin/git log -n 1 --oneline'.split(),
 730             cwd = settings.BASE_DIR,
 731             stdout = subprocess.PIPE,
 732             stderr = subprocess.PIPE,
 733         )
 734
 735     except OSError:
 736         return None, None
 737
 738     (out, err) = pr.communicate()
 739     if err:
 740         return None, None
 741
 742     outs = out.split()
 743     commit = outs[0]
 744     msg = ' ' .join(outs[1:])
 745     return commit, msg
 746
 747
 748
 749 # https://gist.github.com/samuraisam/901117
 750
 751 default_fudge = timedelta(seconds=0, microseconds=0, days=0)
 752
 753 def deep_eq(_v1, _v2, datetime_fudge=default_fudge, _assert=False):
 754   """
 755   Tests for deep equality between two python data structures recursing
 756   into sub-structures if necessary. Works with all python types including
 757   iterators and generators. This function was dreampt up to test API responses
 758   but could be used for anything. Be careful. With deeply nested structures
 759   you may blow the stack.
 760
 761   Options:
 762             datetime_fudge => this is a datetime.timedelta object which, when
 763                               comparing dates, will accept values that differ
 764                               by the number of seconds specified
 765             _assert        => passing yes for this will raise an assertion error
 766                               when values do not match, instead of returning
 767                               false (very useful in combination with pdb)
 768
 769   Doctests included:
 770
 771   >>> x1, y1 = ({'a': 'b'}, {'a': 'b'})
 772   >>> deep_eq(x1, y1)
 773   True
 774   >>> x2, y2 = ({'a': 'b'}, {'b': 'a'})
 775   >>> deep_eq(x2, y2)
 776   False
 777   >>> x3, y3 = ({'a': {'b': 'c'}}, {'a': {'b': 'c'}})
 778   >>> deep_eq(x3, y3)
 779   True
 780   >>> x4, y4 = ({'c': 't', 'a': {'b': 'c'}}, {'a': {'b': 'n'}, 'c': 't'})
 781   >>> deep_eq(x4, y4)
 782   False
 783   >>> x5, y5 = ({'a': [1,2,3]}, {'a': [1,2,3]})
 784   >>> deep_eq(x5, y5)
 785   True
 786   >>> x6, y6 = ({'a': [1,'b',8]}, {'a': [2,'b',8]})
 787   >>> deep_eq(x6, y6)
 788   False
 789   >>> x7, y7 = ('a', 'a')
 790   >>> deep_eq(x7, y7)
 791   True
 792   >>> x8, y8 = (['p','n',['asdf']], ['p','n',['asdf']])
 793   >>> deep_eq(x8, y8)
 794   True
 795   >>> x9, y9 = (['p','n',['asdf',['omg']]], ['p', 'n', ['asdf',['nowai']]])
 796   >>> deep_eq(x9, y9)
 797   False
 798   >>> x10, y10 = (1, 2)
 799   >>> deep_eq(x10, y10)
 800   False
 801   >>> deep_eq((str(p) for p in xrange(10)), (str(p) for p in xrange(10)))
 802   True
 803   >>> str(deep_eq(range(4), range(4)))
 804   'True'
 805   >>> deep_eq(xrange(100), xrange(100))
 806   True
 807   >>> deep_eq(xrange(2), xrange(5))
 808   False
 809   >>> from datetime import datetime, timedelta
 810   >>> d1, d2 = (datetime.now(), datetime.now() + timedelta(seconds=4))
 811   >>> deep_eq(d1, d2)
 812   False
 813   >>> deep_eq(d1, d2, datetime_fudge=timedelta(seconds=5))
 814   True
 815   """
 816   _deep_eq = functools.partial(deep_eq, datetime_fudge=datetime_fudge,
 817                                _assert=_assert)
 818
 819   def _check_assert(R, a, b, reason=''):
 820     if _assert and not R:
 821       assert 0, "an assertion has failed in deep_eq (%s) %s != %s" % (
 822         reason, str(a), str(b))
 823     return R
 824
 825   def _deep_dict_eq(d1, d2):
 826     k1, k2 = (sorted(d1.keys()), sorted(d2.keys()))
 827     if k1 != k2: # keys should be exactly equal
 828       return _check_assert(False, k1, k2, "keys")
 829
 830     return _check_assert(operator.eq(sum(_deep_eq(d1[k], d2[k])
 831                                        for k in k1),
 832                                      len(k1)), d1, d2, "dictionaries")
 833
 834   def _deep_iter_eq(l1, l2):
 835     if len(l1) != len(l2):
 836       return _check_assert(False, l1, l2, "lengths")
 837     return _check_assert(operator.eq(sum(_deep_eq(v1, v2)
 838                                       for v1, v2 in zip(l1, l2)),
 839                                      len(l1)), l1, l2, "iterables")
 840
 841   def op(a, b):
 842     _op = operator.eq
 843     if type(a) == datetime and type(b) == datetime:
 844       s = datetime_fudge.seconds
 845       t1, t2 = (time.mktime(a.timetuple()), time.mktime(b.timetuple()))
 846       l = t1 - t2
 847       l = -l if l > 0 else l
 848       return _check_assert((-s if s > 0 else s) <= l, a, b, "dates")
 849     return _check_assert(_op(a, b), a, b, "values")
 850
 851   c1, c2 = (_v1, _v2)
 852
 853   # guard against strings because they are iterable and their
 854   # elements yield iterables infinitely.
 855   # I N C E P T I O N
 856   for t in types.StringTypes:
 857     if isinstance(_v1, t):
 858       break
 859   else:
 860     if isinstance(_v1, types.DictType):
 861       op = _deep_dict_eq
 862     else:
 863       try:
 864         c1, c2 = (list(iter(_v1)), list(iter(_v2)))
 865       except TypeError:
 866         c1, c2 = _v1, _v2
 867       else:
 868         op = _deep_iter_eq
 869
 870   return op(c1, c2)
 871
 872
 873 def parse_request_body(request):
 874     """ returns the parsed request body, handles gzip encoding """
 875
 876     raw_body = request.body
 877     content_enc = request.META.get('HTTP_CONTENT_ENCODING')
 878
 879     if content_enc == 'gzip':
 880         raw_body = zlib.decompress(raw_body)
 881
 882     return json.loads(raw_body)
 883
 884
 885 def normalize_feed_url(url):
 886     """
 887     Converts any URL to http:// or ftp:// so that it can be
 888     used with "wget". If the URL cannot be converted (invalid
 889     or unknown scheme), "None" is returned.
 890
 891     This will also normalize feed:// and itpc:// to http://.
 892
 893     >>> normalize_feed_url('itpc://example.org/podcast.rss')
 894     'http://example.org/podcast.rss'
 895
 896     If no URL scheme is defined (e.g. "curry.com"), we will
 897     simply assume the user intends to add a http:// feed.
 898
 899     >>> normalize_feed_url('curry.com')
 900     'http://curry.com/'
 901
 902     There are even some more shortcuts for advanced users
 903     and lazy typists (see the source for details).
 904
 905     >>> normalize_feed_url('fb:43FPodcast')
 906     'http://feeds.feedburner.com/43FPodcast'
 907
 908     It will also take care of converting the domain name to
 909     all-lowercase (because domains are not case sensitive):
 910
 911     >>> normalize_feed_url('http://Example.COM/')
 912     'http://example.com/'
 913
 914     Some other minimalistic changes are also taken care of,
 915     e.g. a ? with an empty query is removed:
 916
 917     >>> normalize_feed_url('http://example.org/test?')
 918     'http://example.org/test'
 919
 920     Leading and trailing whitespace is removed
 921
 922     >>> normalize_feed_url(' http://example.com/podcast.rss ')
 923     'http://example.com/podcast.rss'
 924
 925     HTTP Authentication is removed to protect users' privacy
 926
 927     >>> normalize_feed_url('http://a@b:c@host.com/')
 928     'http://host.com/'
 929     >>> normalize_feed_url('ftp://a:b:c@host.com/')
 930     'ftp://host.com/'
 931     >>> normalize_feed_url('http://i%2Fo:P%40ss%3A@host.com/')
 932     'http://host.com/'
 933     >>> normalize_feed_url('ftp://%C3%B6sterreich@host.com/')
 934     'ftp://host.com/'
 935     >>> normalize_feed_url('http://w%20x:y%20z@example.org/')
 936     'http://example.org/'
 937     >>> normalize_feed_url('http://example.com/x@y:z@test.com/')
 938     'http://example.com/x%40y%3Az%40test.com/'
 939     >>> normalize_feed_url('http://en.wikipedia.org/wiki/Ä')
 940     'http://en.wikipedia.org/wiki/%C3%84'
 941     >>> normalize_feed_url('http://en.wikipedia.org/w/index.php?title=Ä&action=edit')
 942     'http://en.wikipedia.org/w/index.php?title=%C3%84&action=edit'
 943     """
 944     url = url.strip()
 945     if not url or len(url) < 8:
 946         return None
 947
 948     if isinstance(url, unicode):
 949         url = url.encode('utf-8', 'ignore')
 950
 951     # This is a list of prefixes that you can use to minimize the amount of
 952     # keystrokes that you have to use.
 953     # Feel free to suggest other useful prefixes, and I'll add them here.
 954     PREFIXES = {
 955             'fb:': 'http://feeds.feedburner.com/%s',
 956             'yt:': 'http://www.youtube.com/rss/user/%s/videos.rss',
 957             'sc:': 'http://soundcloud.com/%s',
 958             'fm4od:': 'http://onapp1.orf.at/webcam/fm4/fod/%s.xspf',
 959             # YouTube playlists. To get a list of playlists per-user, use:
 960             # https://gdata.youtube.com/feeds/api/users/<username>/playlists
 961             'ytpl:': 'http://gdata.youtube.com/feeds/api/playlists/%s',
 962     }
 963
 964     for prefix, expansion in PREFIXES.iteritems():
 965         if url.startswith(prefix):
 966             url = expansion % (url[len(prefix):],)
 967             break
 968
 969     # Assume HTTP for URLs without scheme
 970     if not '://' in url:
 971         url = 'http://' + url
 972
 973     scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
 974
 975     # Schemes and domain names are case insensitive
 976     scheme, netloc = scheme.lower(), netloc.lower()
 977
 978     # encode non-encoded characters
 979     path = urllib.quote(path, '/%')
 980     query = urllib.quote_plus(query, ':&=')
 981
 982     # Remove authentication to protect users' privacy
 983     netloc = netloc.rsplit('@', 1)[-1]
 984
 985     # Normalize empty paths to "/"
 986     if path == '':
 987         path = '/'
 988
 989     # feed://, itpc:// and itms:// are really http://
 990     if scheme in ('feed', 'itpc', 'itms'):
 991         scheme = 'http'
 992
 993     if scheme not in ('http', 'https', 'ftp', 'file'):
 994         return None
 995
 996     # urlunsplit might return "a slighty different, but equivalent URL"
 997     return urlparse.urlunsplit((scheme, netloc, path, query, fragment))
 998
 999
1000 def partition(items, predicate=bool):
1001     a, b = itertools.tee((predicate(item), item) for item in items)
1002     return ((item for pred, item in a if not pred),
1003             (item for pred, item in b if pred))