mygpo/utils.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of my.gpodder.org.
   4 #
   5 # my.gpodder.org is free software: you can redistribute it and/or modify it
   6 # under the terms of the GNU Affero General Public License as published by
   7 # the Free Software Foundation, either version 3 of the License, or (at your
   8 # option) any later version.
   9 #
  10 # my.gpodder.org is distributed in the hope that it will be useful, but
  11 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  12 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
  13 # License for more details.
  14 #
  15 # You should have received a copy of the GNU Affero General Public License
  16 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
  17 #
  18
  19 import functools
  20 import types
  21 import subprocess
  22 import os
  23 import operator
  24 import sys
  25 import re
  26 import collections
  27 import itertools
  28 from datetime import datetime, timedelta, date
  29 import time
  30 import hashlib
  31 import urlparse
  32 import urllib
  33 import urllib2
  34 import zlib
  35 import shlex
  36
  37 from django.conf import settings
  38 from django.core.urlresolvers import reverse
  39
  40 from mygpo.core.json import json
  41
  42
  43 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
  44     """
  45     >>> from_d = datetime(2010, 01, 01)
  46     >>> to_d = datetime(2010, 01, 05)
  47     >>> list(daterange(from_d, to_d))
  48     [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
  49     """
  50
  51     if to_date is None:
  52         if isinstance(from_date, datetime):
  53             to_date = datetime.now()
  54         else:
  55             to_date = date.today()
  56
  57     while from_date <= to_date:
  58         yield from_date
  59         from_date = from_date + leap
  60     return
  61
  62 def format_time(value):
  63     """Format an offset (in seconds) to a string
  64
  65     The offset should be an integer or float value.
  66
  67     >>> format_time(0)
  68     '00:00'
  69     >>> format_time(20)
  70     '00:20'
  71     >>> format_time(3600)
  72     '01:00:00'
  73     >>> format_time(10921)
  74     '03:02:01'
  75     """
  76     try:
  77         dt = datetime.utcfromtimestamp(value)
  78     except (ValueError, TypeError):
  79         return ''
  80
  81     if dt.hour == 0:
  82         return dt.strftime('%M:%S')
  83     else:
  84         return dt.strftime('%H:%M:%S')
  85
  86 def parse_time(value):
  87     """
  88     >>> parse_time(10)
  89     10
  90
  91     >>> parse_time('05:10') #5*60+10
  92     310
  93
  94     >>> parse_time('1:05:10') #60*60+5*60+10
  95     3910
  96     """
  97     if value is None:
  98         raise ValueError('None value in parse_time')
  99
 100     if isinstance(value, int):
 101         # Don't need to parse already-converted time value
 102         return value
 103
 104     if value == '':
 105         raise ValueError('Empty valueing in parse_time')
 106
 107     for format in ('%H:%M:%S', '%M:%S'):
 108         try:
 109             t = time.strptime(value, format)
 110             return t.tm_hour * 60*60 + t.tm_min * 60 + t.tm_sec
 111         except ValueError, e:
 112             continue
 113
 114     return int(value)
 115
 116
 117 def parse_bool(val):
 118     """
 119     >>> parse_bool('True')
 120     True
 121
 122     >>> parse_bool('true')
 123     True
 124
 125     >>> parse_bool('')
 126     False
 127     """
 128     if isinstance(val, bool):
 129         return val
 130     if val.lower() == 'true':
 131         return True
 132     return False
 133
 134
 135 def iterate_together(lists, key=lambda x: x, reverse=False):
 136     """
 137     takes ordered, possibly sparse, lists with similar items
 138     (some items have a corresponding item in the other lists, some don't).
 139
 140     It then yield tuples of corresponding items, where one element is None is
 141     there is no corresponding entry in one of the lists.
 142
 143     Tuples where both elements are None are skipped.
 144
 145     The results of the key method are used for the comparisons.
 146
 147     If reverse is True, the lists are expected to be sorted in reverse order
 148     and the results will also be sorted reverse
 149
 150     >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
 151     [(1, 1), (2, None), (None, 3)]
 152
 153     >>> list(iterate_together([[], []]))
 154     []
 155
 156     >>> list(iterate_together([range(1, 3), range(3, 5)]))
 157     [(1, None), (2, None), (None, 3), (None, 4)]
 158
 159     >>> list(iterate_together([range(1, 3), []]))
 160     [(1, None), (2, None)]
 161
 162     >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
 163     [(1, None), (3, 3)]
 164     """
 165
 166     Next = collections.namedtuple('Next', 'item more')
 167     min_ = min if not reverse else max
 168     lt_  = operator.lt if not reverse else operator.gt
 169
 170     lists = [iter(l) for l in lists]
 171
 172     def _take(it):
 173         try:
 174             i = it.next()
 175             while i is None:
 176                 i = it.next()
 177             return Next(i, True)
 178         except StopIteration:
 179             return Next(None, False)
 180
 181     def new_res():
 182         return [None]*len(lists)
 183
 184     # take first bunch of items
 185     items = [_take(l) for l in lists]
 186
 187     while any(i.item is not None or i.more for i in items):
 188
 189         res = new_res()
 190
 191         for n, item in enumerate(items):
 192
 193             if item.item is None:
 194                 continue
 195
 196             if all(x is None for x in res):
 197                 res[n] = item.item
 198                 continue
 199
 200             min_v = min_(filter(lambda x: x is not None, res), key=key)
 201
 202             if key(item.item) == key(min_v):
 203                 res[n] = item.item
 204
 205             elif lt_(key(item.item), key(min_v)):
 206                 res = new_res()
 207                 res[n] = item.item
 208
 209         for n, x in enumerate(res):
 210             if x is not None:
 211                 items[n] = _take(lists[n])
 212
 213         yield tuple(res)
 214
 215
 216 def progress(val, max_val, status_str='', max_width=50, stream=sys.stdout):
 217
 218     factor = float(val)/max_val if max_val > 0 else 0
 219
 220     # progress as percentage
 221     percentage_str = '{val:.2%}'.format(val=factor)
 222
 223     # progress bar filled with #s
 224     factor = min(int(factor*max_width), max_width)
 225     progress_str = '#' * factor + ' ' * (max_width-factor)
 226
 227     #insert percentage into bar
 228     percentage_start = int((max_width-len(percentage_str))/2)
 229     progress_str = progress_str[:percentage_start] + \
 230                    percentage_str + \
 231                    progress_str[percentage_start+len(percentage_str):]
 232
 233     print >> stream, '\r',
 234     print >> stream, '[ %s ] %s / %s | %s' % (
 235         progress_str,
 236         val,
 237         max_val,
 238         status_str),
 239     stream.flush()
 240
 241
 242 def set_cmp(list, simplify):
 243     """
 244     Builds a set out of a list but uses the results of simplify to determine equality between items
 245     """
 246     simpl = lambda x: (simplify(x), x)
 247     lst = dict(map(simpl, list))
 248     return lst.values()
 249
 250
 251 def first(it):
 252     """
 253     returns the first not-None object or None if the iterator is exhausted
 254     """
 255     for x in it:
 256         if x is not None:
 257             return x
 258     return None
 259
 260
 261 def intersect(a, b):
 262     return list(set(a) & set(b))
 263
 264
 265
 266 def remove_control_chars(s):
 267     all_chars = (unichr(i) for i in xrange(0x110000))
 268     control_chars = ''.join(map(unichr, range(0,32) + range(127,160)))
 269     control_char_re = re.compile('[%s]' % re.escape(control_chars))
 270
 271     return control_char_re.sub('', s)
 272
 273
 274 def unzip(a):
 275     return tuple(map(list,zip(*a)))
 276
 277
 278 def parse_range(s, min, max, default=None):
 279     """
 280     Parses the string and returns its value. If the value is outside the given
 281     range, its closest number within the range is returned
 282
 283     >>> parse_range('5', 0, 10)
 284     5
 285
 286     >>> parse_range('0', 5, 10)
 287     5
 288
 289     >>> parse_range('15',0, 10)
 290     10
 291
 292     >>> parse_range('x', 0, 20)
 293     10
 294
 295     >>> parse_range('x', 0, 20, 20)
 296     20
 297     """
 298     try:
 299         val = int(s)
 300         if val < min:
 301             return min
 302         if val > max:
 303             return max
 304         return val
 305
 306     except (ValueError, TypeError):
 307         return default if default is not None else (max-min)/2
 308
 309
 310
 311 def flatten(l):
 312     return [item for sublist in l for item in sublist]
 313
 314
 315 def linearize(key, iterators, reverse=False):
 316     """
 317     Linearizes a number of iterators, sorted by some comparison function
 318     """
 319
 320     iters = [iter(i) for i in iterators]
 321     vals = []
 322     for i in iters:
 323         try:
 324             v = i.next()
 325             vals. append( (v, i) )
 326         except StopIteration:
 327             continue
 328
 329     while vals:
 330         vals = sorted(vals, key=lambda x: key(x[0]), reverse=reverse)
 331         val, it = vals.pop(0)
 332         yield val
 333         try:
 334             next_val = it.next()
 335             vals.append( (next_val, it) )
 336         except StopIteration:
 337             pass
 338
 339
 340 def skip_pairs(iterator, cmp=cmp):
 341     """ Skips pairs of equal items
 342
 343     >>> list(skip_pairs([]))
 344     []
 345
 346     >>> list(skip_pairs([1]))
 347     [1]
 348
 349     >>> list(skip_pairs([1, 2, 3]))
 350     [1, 2, 3]
 351
 352     >>> list(skip_pairs([1, 1]))
 353     []
 354
 355     >>> list(skip_pairs([1, 2, 2]))
 356     [1]
 357
 358     >>> list(skip_pairs([1, 2, 2, 3]))
 359     [1, 3]
 360
 361     >>> list(skip_pairs([1, 2, 2, 2]))
 362     [1, 2]
 363
 364     >>> list(skip_pairs([1, 2, 2, 2, 2, 3]))
 365     [1, 3]
 366     """
 367
 368     iterator = iter(iterator)
 369     next = iterator.next()
 370
 371     while True:
 372         item = next
 373         try:
 374             next = iterator.next()
 375         except StopIteration as e:
 376             yield item
 377             raise e
 378
 379         if cmp(item, next) == 0:
 380             next = iterator.next()
 381         else:
 382             yield item
 383
 384
 385 def get_timestamp(datetime_obj):
 386     """ Returns the timestamp as an int for the given datetime object
 387
 388     >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
 389     1302168606
 390
 391     >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
 392     0
 393     """
 394     return int(time.mktime(datetime_obj.timetuple()))
 395
 396
 397
 398 re_url = re.compile('^https?://')
 399
 400 def is_url(string):
 401     """ Returns true if a string looks like an URL
 402
 403     >>> is_url('http://example.com/some-path/file.xml')
 404     True
 405
 406     >>> is_url('something else')
 407     False
 408     """
 409
 410     return bool(re_url.match(string))
 411
 412
 413
 414 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
 415 # this does not increase asymptotical complexity
 416 # but can still waste more time than it saves.
 417 def shortest_of(strings):
 418     return min(strings, key=len)
 419
 420 def longest_substr(strings):
 421     """
 422     Returns the longest common substring of the given strings
 423     """
 424
 425     substr = ""
 426     if not strings:
 427         return substr
 428     reference = shortest_of(strings)
 429     length = len(reference)
 430     #find a suitable slice i:j
 431     for i in xrange(length):
 432         #only consider strings long at least len(substr) + 1
 433         for j in xrange(i + len(substr) + 1, length):
 434             candidate = reference[i:j]
 435             if all(candidate in text for text in strings):
 436                 substr = candidate
 437     return substr
 438
 439
 440
 441 def additional_value(it, gen_val, val_changed=lambda _: True):
 442     """ Provides an additional value to the elements, calculated when needed
 443
 444     For the elements from the iterator, some additional value can be computed
 445     by gen_val (which might be an expensive computation).
 446
 447     If the elements in the iterator are ordered so that some subsequent
 448     elements would generate the same additional value, val_changed can be
 449     provided, which receives the next element from the iterator and the
 450     previous additional value. If the element would generate the same
 451     additional value (val_changed returns False), its computation is skipped.
 452
 453     >>> # get the next full hundred higher than x
 454     >>> # this will probably be an expensive calculation
 455     >>> next_hundred = lambda x: x + 100-(x % 100)
 456
 457     >>> # returns True if h is not the value that next_hundred(x) would provide
 458     >>> # this should be a relatively cheap calculation, compared to the above
 459     >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
 460
 461     >>> xs = [0, 50, 100, 101, 199, 200, 201]
 462     >>> list(additional_value(xs, next_hundred, diff_hundred))
 463     [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
 464     """
 465
 466     _none = object()
 467     current = _none
 468
 469     for x in it:
 470         if current is _none or val_changed(x, current):
 471             current = gen_val(x)
 472
 473         yield (x, current)
 474
 475
 476 def file_hash(f, h=hashlib.md5, block_size=2**20):
 477     """ returns the hash of the contents of a file """
 478     f_hash = h()
 479     for chunk in iter(lambda: f.read(block_size), ''):
 480         f_hash.update(chunk)
 481     return f_hash
 482
 483
 484
 485 def split_list(l, prop):
 486     """ split elements that satisfy a property, and those that don't """
 487     match   = filter(prop, l)
 488     nomatch = [x for x in l if x not in match]
 489     return match, nomatch
 490
 491
 492 def sorted_chain(links, key, reverse=False):
 493     """ Takes a list of iters can iterates over sorted elements
 494
 495     Each elment of links should be a tuple of (sort_key, iterator). The
 496     elements of each iterator should be sorted already. sort_key should
 497     indicate the key of the first element and needs to be comparable to the
 498     result of key(elem).
 499
 500     The function returns an iterator over the globally sorted element that
 501     ensures that as little iterators as possible are evaluated.  When
 502     evaluating """
 503
 504     # mixed_list initially contains all placeholders; later evaluated
 505     # elements (from the iterators) are mixed in
 506     mixed_list = [(k, link, True) for k, link in links]
 507
 508     while mixed_list:
 509         _, item, expand = mixed_list.pop(0)
 510
 511         # found an element (from an earlier expansion), yield it
 512         if not expand:
 513             yield item
 514             continue
 515
 516         # found an iter that needs to be expanded.
 517         # The iterator is fully consumed
 518         new_items = [(key(i), i, False) for i in item]
 519
 520         # sort links (placeholders) and elements together
 521         mixed_list = sorted(mixed_list + new_items, key=lambda (k, _v, _e): k,
 522                 reverse=reverse)
 523
 524
 525 def url_add_authentication(url, username, password):
 526     """
 527     Adds authentication data (username, password) to a given
 528     URL in order to construct an authenticated URL.
 529
 530     >>> url_add_authentication('https://host.com/', '', None)
 531     'https://host.com/'
 532     >>> url_add_authentication('http://example.org/', None, None)
 533     'http://example.org/'
 534     >>> url_add_authentication('telnet://host.com/', 'foo', 'bar')
 535     'telnet://foo:bar@host.com/'
 536     >>> url_add_authentication('ftp://example.org', 'billy', None)
 537     'ftp://billy@example.org'
 538     >>> url_add_authentication('ftp://example.org', 'billy', '')
 539     'ftp://billy:@example.org'
 540     >>> url_add_authentication('http://localhost/x', 'aa', 'bc')
 541     'http://aa:bc@localhost/x'
 542     >>> url_add_authentication('http://blubb.lan/u.html', 'i/o', 'P@ss:')
 543     'http://i%2Fo:P@ss:@blubb.lan/u.html'
 544     >>> url_add_authentication('http://a:b@x.org/', 'c', 'd')
 545     'http://c:d@x.org/'
 546     >>> url_add_authentication('http://i%2F:P%40%3A@cx.lan', 'P@x', 'i/')
 547     'http://P@x:i%2F@cx.lan'
 548     >>> url_add_authentication('http://x.org/', 'a b', 'c d')
 549     'http://a%20b:c%20d@x.org/'
 550     """
 551     if username is None or username == '':
 552         return url
 553
 554     # Relaxations of the strict quoting rules (bug 1521):
 555     # 1. Accept '@' in username and password
 556     # 2. Acecpt ':' in password only
 557     username = urllib.quote(username, safe='@')
 558
 559     if password is not None:
 560         password = urllib.quote(password, safe='@:')
 561         auth_string = ':'.join((username, password))
 562     else:
 563         auth_string = username
 564
 565     url = url_strip_authentication(url)
 566
 567     url_parts = list(urlparse.urlsplit(url))
 568     # url_parts[1] is the HOST part of the URL
 569     url_parts[1] = '@'.join((auth_string, url_parts[1]))
 570
 571     return urlparse.urlunsplit(url_parts)
 572
 573
 574 def urlopen(url, headers=None, data=None):
 575     """
 576     An URL opener with the User-agent set to gPodder (with version)
 577     """
 578     username, password = username_password_from_url(url)
 579     if username is not None or password is not None:
 580         url = url_strip_authentication(url)
 581         password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
 582         password_mgr.add_password(None, url, username, password)
 583         handler = urllib2.HTTPBasicAuthHandler(password_mgr)
 584         opener = urllib2.build_opener(handler)
 585     else:
 586         opener = urllib2.build_opener()
 587
 588     if headers is None:
 589         headers = {}
 590     else:
 591         headers = dict(headers)
 592
 593     headers.update({'User-agent': settings.USER_AGENT})
 594     request = urllib2.Request(url, data=data, headers=headers)
 595     return opener.open(request)
 596
 597
 598
 599 def username_password_from_url(url):
 600     r"""
 601     Returns a tuple (username,password) containing authentication
 602     data from the specified URL or (None,None) if no authentication
 603     data can be found in the URL.
 604
 605     See Section 3.1 of RFC 1738 (http://www.ietf.org/rfc/rfc1738.txt)
 606
 607     >>> username_password_from_url('https://@host.com/')
 608     ('', None)
 609     >>> username_password_from_url('telnet://host.com/')
 610     (None, None)
 611     >>> username_password_from_url('ftp://foo:@host.com/')
 612     ('foo', '')
 613     >>> username_password_from_url('http://a:b@host.com/')
 614     ('a', 'b')
 615     >>> username_password_from_url(1)
 616     Traceback (most recent call last):
 617       ...
 618     ValueError: URL has to be a string or unicode object.
 619     >>> username_password_from_url(None)
 620     Traceback (most recent call last):
 621       ...
 622     ValueError: URL has to be a string or unicode object.
 623     >>> username_password_from_url('http://a@b:c@host.com/')
 624     ('a@b', 'c')
 625     >>> username_password_from_url('ftp://a:b:c@host.com/')
 626     ('a', 'b:c')
 627     >>> username_password_from_url('http://i%2Fo:P%40ss%3A@host.com/')
 628     ('i/o', 'P@ss:')
 629     >>> username_password_from_url('ftp://%C3%B6sterreich@host.com/')
 630     ('\xc3\xb6sterreich', None)
 631     >>> username_password_from_url('http://w%20x:y%20z@example.org/')
 632     ('w x', 'y z')
 633     >>> username_password_from_url('http://example.com/x@y:z@test.com/')
 634     (None, None)
 635     """
 636     if type(url) not in (str, unicode):
 637         raise ValueError('URL has to be a string or unicode object.')
 638
 639     (username, password) = (None, None)
 640
 641     (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
 642
 643     if '@' in netloc:
 644         (authentication, netloc) = netloc.rsplit('@', 1)
 645         if ':' in authentication:
 646             (username, password) = authentication.split(':', 1)
 647
 648             # RFC1738 dictates that we should not allow ['/', '@', ':']
 649             # characters in the username and password field (Section 3.1):
 650             #
 651             # 1. The "/" can't be in there at this point because of the way
 652             #    urlparse (which we use above) works.
 653             # 2. Due to gPodder bug 1521, we allow "@" in the username and
 654             #    password field. We use netloc.rsplit('@', 1), which will
 655             #    make sure that we split it at the last '@' in netloc.
 656             # 3. The colon must be excluded (RFC2617, Section 2) in the
 657             #    username, but is apparently allowed in the password. This
 658             #    is handled by the authentication.split(':', 1) above, and
 659             #    will cause any extraneous ':'s to be part of the password.
 660
 661             username = urllib.unquote(username)
 662             password = urllib.unquote(password)
 663         else:
 664             username = urllib.unquote(authentication)
 665
 666     return (username, password)
 667
 668
 669 def url_strip_authentication(url):
 670     """
 671     Strips authentication data from an URL. Returns the URL with
 672     the authentication data removed from it.
 673
 674     >>> url_strip_authentication('https://host.com/')
 675     'https://host.com/'
 676     >>> url_strip_authentication('telnet://foo:bar@host.com/')
 677     'telnet://host.com/'
 678     >>> url_strip_authentication('ftp://billy@example.org')
 679     'ftp://example.org'
 680     >>> url_strip_authentication('ftp://billy:@example.org')
 681     'ftp://example.org'
 682     >>> url_strip_authentication('http://aa:bc@localhost/x')
 683     'http://localhost/x'
 684     >>> url_strip_authentication('http://i%2Fo:P%40ss%3A@blubb.lan/u.html')
 685     'http://blubb.lan/u.html'
 686     >>> url_strip_authentication('http://c:d@x.org/')
 687     'http://x.org/'
 688     >>> url_strip_authentication('http://P%40%3A:i%2F@cx.lan')
 689     'http://cx.lan'
 690     >>> url_strip_authentication('http://x@x.com:s3cret@example.com/')
 691     'http://example.com/'
 692     """
 693     url_parts = list(urlparse.urlsplit(url))
 694     # url_parts[1] is the HOST part of the URL
 695
 696     # Remove existing authentication data
 697     if '@' in url_parts[1]:
 698         url_parts[1] = url_parts[1].rsplit('@', 1)[1]
 699
 700     return urlparse.urlunsplit(url_parts)
 701
 702
 703 # Native filesystem encoding detection
 704 encoding = sys.getfilesystemencoding()
 705
 706 def sanitize_encoding(filename):
 707     r"""
 708     Generate a sanitized version of a string (i.e.
 709     remove invalid characters and encode in the
 710     detected native language encoding).
 711
 712     >>> sanitize_encoding('\x80')
 713     ''
 714     >>> sanitize_encoding(u'unicode')
 715     'unicode'
 716     """
 717     # The encoding problem goes away in Python 3.. hopefully!
 718     if sys.version_info >= (3, 0):
 719         return filename
 720
 721     global encoding
 722     if not isinstance(filename, unicode):
 723         filename = filename.decode(encoding, 'ignore')
 724     return filename.encode(encoding, 'ignore')
 725
 726
 727 def get_git_head():
 728     """ returns the commit and message of the current git HEAD """
 729
 730     try:
 731         pr = subprocess.Popen('/usr/bin/git log -n 1 --oneline'.split(),
 732             cwd = settings.BASE_DIR,
 733             stdout = subprocess.PIPE,
 734             stderr = subprocess.PIPE,
 735         )
 736
 737     except OSError:
 738         return None, None
 739
 740     (out, err) = pr.communicate()
 741     if err:
 742         return None, None
 743
 744     outs = out.split()
 745     commit = outs[0]
 746     msg = ' ' .join(outs[1:])
 747     return commit, msg
 748
 749
 750
 751 # https://gist.github.com/samuraisam/901117
 752
 753 default_fudge = timedelta(seconds=0, microseconds=0, days=0)
 754
 755 def deep_eq(_v1, _v2, datetime_fudge=default_fudge, _assert=False):
 756   """
 757   Tests for deep equality between two python data structures recursing
 758   into sub-structures if necessary. Works with all python types including
 759   iterators and generators. This function was dreampt up to test API responses
 760   but could be used for anything. Be careful. With deeply nested structures
 761   you may blow the stack.
 762
 763   Options:
 764             datetime_fudge => this is a datetime.timedelta object which, when
 765                               comparing dates, will accept values that differ
 766                               by the number of seconds specified
 767             _assert        => passing yes for this will raise an assertion error
 768                               when values do not match, instead of returning
 769                               false (very useful in combination with pdb)
 770
 771   Doctests included:
 772
 773   >>> x1, y1 = ({'a': 'b'}, {'a': 'b'})
 774   >>> deep_eq(x1, y1)
 775   True
 776   >>> x2, y2 = ({'a': 'b'}, {'b': 'a'})
 777   >>> deep_eq(x2, y2)
 778   False
 779   >>> x3, y3 = ({'a': {'b': 'c'}}, {'a': {'b': 'c'}})
 780   >>> deep_eq(x3, y3)
 781   True
 782   >>> x4, y4 = ({'c': 't', 'a': {'b': 'c'}}, {'a': {'b': 'n'}, 'c': 't'})
 783   >>> deep_eq(x4, y4)
 784   False
 785   >>> x5, y5 = ({'a': [1,2,3]}, {'a': [1,2,3]})
 786   >>> deep_eq(x5, y5)
 787   True
 788   >>> x6, y6 = ({'a': [1,'b',8]}, {'a': [2,'b',8]})
 789   >>> deep_eq(x6, y6)
 790   False
 791   >>> x7, y7 = ('a', 'a')
 792   >>> deep_eq(x7, y7)
 793   True
 794   >>> x8, y8 = (['p','n',['asdf']], ['p','n',['asdf']])
 795   >>> deep_eq(x8, y8)
 796   True
 797   >>> x9, y9 = (['p','n',['asdf',['omg']]], ['p', 'n', ['asdf',['nowai']]])
 798   >>> deep_eq(x9, y9)
 799   False
 800   >>> x10, y10 = (1, 2)
 801   >>> deep_eq(x10, y10)
 802   False
 803   >>> deep_eq((str(p) for p in xrange(10)), (str(p) for p in xrange(10)))
 804   True
 805   >>> str(deep_eq(range(4), range(4)))
 806   'True'
 807   >>> deep_eq(xrange(100), xrange(100))
 808   True
 809   >>> deep_eq(xrange(2), xrange(5))
 810   False
 811   >>> from datetime import datetime, timedelta
 812   >>> d1, d2 = (datetime.now(), datetime.now() + timedelta(seconds=4))
 813   >>> deep_eq(d1, d2)
 814   False
 815   >>> deep_eq(d1, d2, datetime_fudge=timedelta(seconds=5))
 816   True
 817   """
 818   _deep_eq = functools.partial(deep_eq, datetime_fudge=datetime_fudge,
 819                                _assert=_assert)
 820
 821   def _check_assert(R, a, b, reason=''):
 822     if _assert and not R:
 823       assert 0, "an assertion has failed in deep_eq (%s) %s != %s" % (
 824         reason, str(a), str(b))
 825     return R
 826
 827   def _deep_dict_eq(d1, d2):
 828     k1, k2 = (sorted(d1.keys()), sorted(d2.keys()))
 829     if k1 != k2: # keys should be exactly equal
 830       return _check_assert(False, k1, k2, "keys")
 831
 832     return _check_assert(operator.eq(sum(_deep_eq(d1[k], d2[k])
 833                                        for k in k1),
 834                                      len(k1)), d1, d2, "dictionaries")
 835
 836   def _deep_iter_eq(l1, l2):
 837     if len(l1) != len(l2):
 838       return _check_assert(False, l1, l2, "lengths")
 839     return _check_assert(operator.eq(sum(_deep_eq(v1, v2)
 840                                       for v1, v2 in zip(l1, l2)),
 841                                      len(l1)), l1, l2, "iterables")
 842
 843   def op(a, b):
 844     _op = operator.eq
 845     if type(a) == datetime and type(b) == datetime:
 846       s = datetime_fudge.seconds
 847       t1, t2 = (time.mktime(a.timetuple()), time.mktime(b.timetuple()))
 848       l = t1 - t2
 849       l = -l if l > 0 else l
 850       return _check_assert((-s if s > 0 else s) <= l, a, b, "dates")
 851     return _check_assert(_op(a, b), a, b, "values")
 852
 853   c1, c2 = (_v1, _v2)
 854
 855   # guard against strings because they are iterable and their
 856   # elements yield iterables infinitely.
 857   # I N C E P T I O N
 858   for t in types.StringTypes:
 859     if isinstance(_v1, t):
 860       break
 861   else:
 862     if isinstance(_v1, types.DictType):
 863       op = _deep_dict_eq
 864     else:
 865       try:
 866         c1, c2 = (list(iter(_v1)), list(iter(_v2)))
 867       except TypeError:
 868         c1, c2 = _v1, _v2
 869       else:
 870         op = _deep_iter_eq
 871
 872   return op(c1, c2)
 873
 874
 875 def parse_request_body(request):
 876     """ returns the parsed request body, handles gzip encoding """
 877
 878     raw_body = request.body
 879     content_enc = request.META.get('HTTP_CONTENT_ENCODING')
 880
 881     if content_enc == 'gzip':
 882         raw_body = zlib.decompress(raw_body)
 883
 884     return json.loads(raw_body)
 885
 886
 887 def normalize_feed_url(url):
 888     """
 889     Converts any URL to http:// or ftp:// so that it can be
 890     used with "wget". If the URL cannot be converted (invalid
 891     or unknown scheme), "None" is returned.
 892
 893     This will also normalize feed:// and itpc:// to http://.
 894
 895     >>> normalize_feed_url('itpc://example.org/podcast.rss')
 896     'http://example.org/podcast.rss'
 897
 898     If no URL scheme is defined (e.g. "curry.com"), we will
 899     simply assume the user intends to add a http:// feed.
 900
 901     >>> normalize_feed_url('curry.com')
 902     'http://curry.com/'
 903
 904     There are even some more shortcuts for advanced users
 905     and lazy typists (see the source for details).
 906
 907     >>> normalize_feed_url('fb:43FPodcast')
 908     'http://feeds.feedburner.com/43FPodcast'
 909
 910     It will also take care of converting the domain name to
 911     all-lowercase (because domains are not case sensitive):
 912
 913     >>> normalize_feed_url('http://Example.COM/')
 914     'http://example.com/'
 915
 916     Some other minimalistic changes are also taken care of,
 917     e.g. a ? with an empty query is removed:
 918
 919     >>> normalize_feed_url('http://example.org/test?')
 920     'http://example.org/test'
 921
 922     Leading and trailing whitespace is removed
 923
 924     >>> normalize_feed_url(' http://example.com/podcast.rss ')
 925     'http://example.com/podcast.rss'
 926
 927     HTTP Authentication is removed to protect users' privacy
 928
 929     >>> normalize_feed_url('http://a@b:c@host.com/')
 930     'http://host.com/'
 931     >>> normalize_feed_url('ftp://a:b:c@host.com/')
 932     'ftp://host.com/'
 933     >>> normalize_feed_url('http://i%2Fo:P%40ss%3A@host.com/')
 934     'http://host.com/'
 935     >>> normalize_feed_url('ftp://%C3%B6sterreich@host.com/')
 936     'ftp://host.com/'
 937     >>> normalize_feed_url('http://w%20x:y%20z@example.org/')
 938     'http://example.org/'
 939     >>> normalize_feed_url('http://example.com/x@y:z@test.com/')
 940     'http://example.com/x%40y%3Az%40test.com/'
 941     >>> normalize_feed_url('http://en.wikipedia.org/wiki/Ä')
 942     'http://en.wikipedia.org/wiki/%C3%84'
 943     >>> normalize_feed_url('http://en.wikipedia.org/w/index.php?title=Ä&action=edit')
 944     'http://en.wikipedia.org/w/index.php?title=%C3%84&action=edit'
 945     """
 946     url = url.strip()
 947     if not url or len(url) < 8:
 948         return None
 949
 950     if isinstance(url, unicode):
 951         url = url.encode('utf-8', 'ignore')
 952
 953     # This is a list of prefixes that you can use to minimize the amount of
 954     # keystrokes that you have to use.
 955     # Feel free to suggest other useful prefixes, and I'll add them here.
 956     PREFIXES = {
 957             'fb:': 'http://feeds.feedburner.com/%s',
 958             'yt:': 'http://www.youtube.com/rss/user/%s/videos.rss',
 959             'sc:': 'http://soundcloud.com/%s',
 960             'fm4od:': 'http://onapp1.orf.at/webcam/fm4/fod/%s.xspf',
 961             # YouTube playlists. To get a list of playlists per-user, use:
 962             # https://gdata.youtube.com/feeds/api/users/<username>/playlists
 963             'ytpl:': 'http://gdata.youtube.com/feeds/api/playlists/%s',
 964     }
 965
 966     for prefix, expansion in PREFIXES.iteritems():
 967         if url.startswith(prefix):
 968             url = expansion % (url[len(prefix):],)
 969             break
 970
 971     # Assume HTTP for URLs without scheme
 972     if not '://' in url:
 973         url = 'http://' + url
 974
 975     scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
 976
 977     # Schemes and domain names are case insensitive
 978     scheme, netloc = scheme.lower(), netloc.lower()
 979
 980     # encode non-encoded characters
 981     path = urllib.quote(path, '/%')
 982     query = urllib.quote_plus(query, ':&=')
 983
 984     # Remove authentication to protect users' privacy
 985     netloc = netloc.rsplit('@', 1)[-1]
 986
 987     # Normalize empty paths to "/"
 988     if path == '':
 989         path = '/'
 990
 991     # feed://, itpc:// and itms:// are really http://
 992     if scheme in ('feed', 'itpc', 'itms'):
 993         scheme = 'http'
 994
 995     if scheme not in ('http', 'https', 'ftp', 'file'):
 996         return None
 997
 998     # urlunsplit might return "a slighty different, but equivalent URL"
 999     return urlparse.urlunsplit((scheme, netloc, path, query, fragment))
1000
1001
1002 def partition(items, predicate=bool):
1003     a, b = itertools.tee((predicate(item), item) for item in items)
1004     return ((item for pred, item in a if not pred),
1005             (item for pred, item in b if pred))
1006
1007
1008 def split_quoted(s):
1009     """ Splits a quoted string
1010
1011     >>> split_quoted('some "quoted text"') == ['some', 'quoted text']
1012     True
1013
1014     >>> split_quoted('"quoted text') == ['quoted', 'text']
1015     True
1016
1017     # 4 quotes here are 2 in the doctest is one in the actual string
1018     >>> split_quoted('text\\\\') == ['text']
1019     True
1020     """
1021
1022     try:
1023         # split by whitespace, preserve quoted substrings
1024         keywords = shlex.split(s)
1025
1026     except ValueError:
1027         # No closing quotation (eg '"text')
1028         # No escaped character (eg '\')
1029         s = s.replace('"', '').replace("'", '').replace('\\', '')
1030         keywords = shlex.split(s)
1031
1032     return keywords
1033
1034
1035 def edit_link(obj):
1036     """ Return the link to the Django Admin Edit page """
1037     return reverse('admin:%s_%s_change' % (obj._meta.app_label,
1038                                            obj._meta.module_name),
1039                    args=(obj.pk,))
1040
1041
1042 def random_token(length=32):
1043     import random
1044     import string
1045     return "".join(random.sample(string.letters+string.digits, length))