mygpo/utils.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of my.gpodder.org.
   4 #
   5 # my.gpodder.org is free software: you can redistribute it and/or modify it
   6 # under the terms of the GNU Affero General Public License as published by
   7 # the Free Software Foundation, either version 3 of the License, or (at your
   8 # option) any later version.
   9 #
  10 # my.gpodder.org is distributed in the hope that it will be useful, but
  11 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  12 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
  13 # License for more details.
  14 #
  15 # You should have received a copy of the GNU Affero General Public License
  16 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
  17 #
  18
  19 import functools
  20 import types
  21 import subprocess
  22 import os
  23 import operator
  24 import sys
  25 import re
  26 import collections
  27 import itertools
  28 from datetime import datetime, timedelta, date
  29 import time
  30 import hashlib
  31 import urlparse
  32 import urllib
  33 import urllib2
  34 import zlib
  35 import shlex
  36
  37 from django.db import transaction, IntegrityError
  38 from django.conf import settings
  39 from django.core.urlresolvers import reverse
  40
  41 from mygpo.core.json import json
  42
  43 import logging
  44 logger = logging.getLogger(__name__)
  45
  46
  47 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
  48     """
  49     >>> from_d = datetime(2010, 01, 01)
  50     >>> to_d = datetime(2010, 01, 05)
  51     >>> list(daterange(from_d, to_d))
  52     [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
  53     """
  54
  55     if to_date is None:
  56         if isinstance(from_date, datetime):
  57             to_date = datetime.utcnow()
  58         else:
  59             to_date = date.today()
  60
  61     while from_date <= to_date:
  62         yield from_date
  63         from_date = from_date + leap
  64     return
  65
  66 def format_time(value):
  67     """Format an offset (in seconds) to a string
  68
  69     The offset should be an integer or float value.
  70
  71     >>> format_time(0)
  72     '00:00'
  73     >>> format_time(20)
  74     '00:20'
  75     >>> format_time(3600)
  76     '01:00:00'
  77     >>> format_time(10921)
  78     '03:02:01'
  79     """
  80     try:
  81         dt = datetime.utcfromtimestamp(value)
  82     except (ValueError, TypeError):
  83         return ''
  84
  85     if dt.hour == 0:
  86         return dt.strftime('%M:%S')
  87     else:
  88         return dt.strftime('%H:%M:%S')
  89
  90 def parse_time(value):
  91     """
  92     >>> parse_time(10)
  93     10
  94
  95     >>> parse_time('05:10') #5*60+10
  96     310
  97
  98     >>> parse_time('1:05:10') #60*60+5*60+10
  99     3910
 100     """
 101     if value is None:
 102         raise ValueError('None value in parse_time')
 103
 104     if isinstance(value, int):
 105         # Don't need to parse already-converted time value
 106         return value
 107
 108     if value == '':
 109         raise ValueError('Empty valueing in parse_time')
 110
 111     for format in ('%H:%M:%S', '%M:%S'):
 112         try:
 113             t = time.strptime(value, format)
 114             return t.tm_hour * 60*60 + t.tm_min * 60 + t.tm_sec
 115         except ValueError, e:
 116             continue
 117
 118     return int(value)
 119
 120
 121 def parse_bool(val):
 122     """
 123     >>> parse_bool('True')
 124     True
 125
 126     >>> parse_bool('true')
 127     True
 128
 129     >>> parse_bool('')
 130     False
 131     """
 132     if isinstance(val, bool):
 133         return val
 134     if val.lower() == 'true':
 135         return True
 136     return False
 137
 138
 139 def iterate_together(lists, key=lambda x: x, reverse=False):
 140     """
 141     takes ordered, possibly sparse, lists with similar items
 142     (some items have a corresponding item in the other lists, some don't).
 143
 144     It then yield tuples of corresponding items, where one element is None is
 145     there is no corresponding entry in one of the lists.
 146
 147     Tuples where both elements are None are skipped.
 148
 149     The results of the key method are used for the comparisons.
 150
 151     If reverse is True, the lists are expected to be sorted in reverse order
 152     and the results will also be sorted reverse
 153
 154     >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
 155     [(1, 1), (2, None), (None, 3)]
 156
 157     >>> list(iterate_together([[], []]))
 158     []
 159
 160     >>> list(iterate_together([range(1, 3), range(3, 5)]))
 161     [(1, None), (2, None), (None, 3), (None, 4)]
 162
 163     >>> list(iterate_together([range(1, 3), []]))
 164     [(1, None), (2, None)]
 165
 166     >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
 167     [(1, None), (3, 3)]
 168     """
 169
 170     Next = collections.namedtuple('Next', 'item more')
 171     min_ = min if not reverse else max
 172     lt_  = operator.lt if not reverse else operator.gt
 173
 174     lists = [iter(l) for l in lists]
 175
 176     def _take(it):
 177         try:
 178             i = it.next()
 179             while i is None:
 180                 i = it.next()
 181             return Next(i, True)
 182         except StopIteration:
 183             return Next(None, False)
 184
 185     def new_res():
 186         return [None]*len(lists)
 187
 188     # take first bunch of items
 189     items = [_take(l) for l in lists]
 190
 191     while any(i.item is not None or i.more for i in items):
 192
 193         res = new_res()
 194
 195         for n, item in enumerate(items):
 196
 197             if item.item is None:
 198                 continue
 199
 200             if all(x is None for x in res):
 201                 res[n] = item.item
 202                 continue
 203
 204             min_v = min_(filter(lambda x: x is not None, res), key=key)
 205
 206             if key(item.item) == key(min_v):
 207                 res[n] = item.item
 208
 209             elif lt_(key(item.item), key(min_v)):
 210                 res = new_res()
 211                 res[n] = item.item
 212
 213         for n, x in enumerate(res):
 214             if x is not None:
 215                 items[n] = _take(lists[n])
 216
 217         yield tuple(res)
 218
 219
 220 def progress(val, max_val, status_str='', max_width=50, stream=sys.stdout):
 221
 222     factor = float(val)/max_val if max_val > 0 else 0
 223
 224     # progress as percentage
 225     percentage_str = '{val:.2%}'.format(val=factor)
 226
 227     # progress bar filled with #s
 228     factor = min(int(factor*max_width), max_width)
 229     progress_str = '#' * factor + ' ' * (max_width-factor)
 230
 231     #insert percentage into bar
 232     percentage_start = int((max_width-len(percentage_str))/2)
 233     progress_str = progress_str[:percentage_start] + \
 234                    percentage_str + \
 235                    progress_str[percentage_start+len(percentage_str):]
 236
 237     print >> stream, '\r',
 238     print >> stream, '[ %s ] %s / %s | %s' % (
 239         progress_str,
 240         val,
 241         max_val,
 242         status_str),
 243     stream.flush()
 244
 245
 246 def set_cmp(list, simplify):
 247     """
 248     Builds a set out of a list but uses the results of simplify to determine equality between items
 249     """
 250     simpl = lambda x: (simplify(x), x)
 251     lst = dict(map(simpl, list))
 252     return lst.values()
 253
 254
 255 def first(it):
 256     """
 257     returns the first not-None object or None if the iterator is exhausted
 258     """
 259     for x in it:
 260         if x is not None:
 261             return x
 262     return None
 263
 264
 265 def intersect(a, b):
 266     return list(set(a) & set(b))
 267
 268
 269
 270 def remove_control_chars(s):
 271     all_chars = (unichr(i) for i in xrange(0x110000))
 272     control_chars = ''.join(map(unichr, range(0,32) + range(127,160)))
 273     control_char_re = re.compile('[%s]' % re.escape(control_chars))
 274
 275     return control_char_re.sub('', s)
 276
 277
 278 def unzip(a):
 279     return tuple(map(list,zip(*a)))
 280
 281
 282 def parse_range(s, min, max, default=None):
 283     """
 284     Parses the string and returns its value. If the value is outside the given
 285     range, its closest number within the range is returned
 286
 287     >>> parse_range('5', 0, 10)
 288     5
 289
 290     >>> parse_range('0', 5, 10)
 291     5
 292
 293     >>> parse_range('15',0, 10)
 294     10
 295
 296     >>> parse_range('x', 0, 20)
 297     10
 298
 299     >>> parse_range('x', 0, 20, 20)
 300     20
 301     """
 302     try:
 303         val = int(s)
 304         if val < min:
 305             return min
 306         if val > max:
 307             return max
 308         return val
 309
 310     except (ValueError, TypeError):
 311         return default if default is not None else (max-min)/2
 312
 313
 314
 315 def flatten(l):
 316     return [item for sublist in l for item in sublist]
 317
 318
 319 def linearize(key, iterators, reverse=False):
 320     """
 321     Linearizes a number of iterators, sorted by some comparison function
 322     """
 323
 324     iters = [iter(i) for i in iterators]
 325     vals = []
 326     for i in iters:
 327         try:
 328             v = i.next()
 329             vals. append( (v, i) )
 330         except StopIteration:
 331             continue
 332
 333     while vals:
 334         vals = sorted(vals, key=lambda x: key(x[0]), reverse=reverse)
 335         val, it = vals.pop(0)
 336         yield val
 337         try:
 338             next_val = it.next()
 339             vals.append( (next_val, it) )
 340         except StopIteration:
 341             pass
 342
 343
 344 def skip_pairs(iterator, cmp=cmp):
 345     """ Skips pairs of equal items
 346
 347     >>> list(skip_pairs([]))
 348     []
 349
 350     >>> list(skip_pairs([1]))
 351     [1]
 352
 353     >>> list(skip_pairs([1, 2, 3]))
 354     [1, 2, 3]
 355
 356     >>> list(skip_pairs([1, 1]))
 357     []
 358
 359     >>> list(skip_pairs([1, 2, 2]))
 360     [1]
 361
 362     >>> list(skip_pairs([1, 2, 2, 3]))
 363     [1, 3]
 364
 365     >>> list(skip_pairs([1, 2, 2, 2]))
 366     [1, 2]
 367
 368     >>> list(skip_pairs([1, 2, 2, 2, 2, 3]))
 369     [1, 3]
 370     """
 371
 372     iterator = iter(iterator)
 373     next = iterator.next()
 374
 375     while True:
 376         item = next
 377         try:
 378             next = iterator.next()
 379         except StopIteration as e:
 380             yield item
 381             raise e
 382
 383         if cmp(item, next) == 0:
 384             next = iterator.next()
 385         else:
 386             yield item
 387
 388
 389 def get_timestamp(datetime_obj):
 390     """ Returns the timestamp as an int for the given datetime object
 391
 392     >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
 393     1302168606
 394
 395     >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
 396     0
 397     """
 398     return int(time.mktime(datetime_obj.timetuple()))
 399
 400
 401
 402 re_url = re.compile('^https?://')
 403
 404 def is_url(string):
 405     """ Returns true if a string looks like an URL
 406
 407     >>> is_url('http://example.com/some-path/file.xml')
 408     True
 409
 410     >>> is_url('something else')
 411     False
 412     """
 413
 414     return bool(re_url.match(string))
 415
 416
 417
 418 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
 419 # this does not increase asymptotical complexity
 420 # but can still waste more time than it saves.
 421 def shortest_of(strings):
 422     return min(strings, key=len)
 423
 424 def longest_substr(strings):
 425     """
 426     Returns the longest common substring of the given strings
 427     """
 428
 429     substr = ""
 430     if not strings:
 431         return substr
 432     reference = shortest_of(strings)
 433     length = len(reference)
 434     #find a suitable slice i:j
 435     for i in xrange(length):
 436         #only consider strings long at least len(substr) + 1
 437         for j in xrange(i + len(substr) + 1, length):
 438             candidate = reference[i:j]
 439             if all(candidate in text for text in strings):
 440                 substr = candidate
 441     return substr
 442
 443
 444
 445 def additional_value(it, gen_val, val_changed=lambda _: True):
 446     """ Provides an additional value to the elements, calculated when needed
 447
 448     For the elements from the iterator, some additional value can be computed
 449     by gen_val (which might be an expensive computation).
 450
 451     If the elements in the iterator are ordered so that some subsequent
 452     elements would generate the same additional value, val_changed can be
 453     provided, which receives the next element from the iterator and the
 454     previous additional value. If the element would generate the same
 455     additional value (val_changed returns False), its computation is skipped.
 456
 457     >>> # get the next full hundred higher than x
 458     >>> # this will probably be an expensive calculation
 459     >>> next_hundred = lambda x: x + 100-(x % 100)
 460
 461     >>> # returns True if h is not the value that next_hundred(x) would provide
 462     >>> # this should be a relatively cheap calculation, compared to the above
 463     >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
 464
 465     >>> xs = [0, 50, 100, 101, 199, 200, 201]
 466     >>> list(additional_value(xs, next_hundred, diff_hundred))
 467     [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
 468     """
 469
 470     _none = object()
 471     current = _none
 472
 473     for x in it:
 474         if current is _none or val_changed(x, current):
 475             current = gen_val(x)
 476
 477         yield (x, current)
 478
 479
 480 def file_hash(f, h=hashlib.md5, block_size=2**20):
 481     """ returns the hash of the contents of a file """
 482     f_hash = h()
 483     for chunk in iter(lambda: f.read(block_size), ''):
 484         f_hash.update(chunk)
 485     return f_hash
 486
 487
 488
 489 def split_list(l, prop):
 490     """ split elements that satisfy a property, and those that don't """
 491     match   = filter(prop, l)
 492     nomatch = [x for x in l if x not in match]
 493     return match, nomatch
 494
 495
 496 def sorted_chain(links, key, reverse=False):
 497     """ Takes a list of iters can iterates over sorted elements
 498
 499     Each elment of links should be a tuple of (sort_key, iterator). The
 500     elements of each iterator should be sorted already. sort_key should
 501     indicate the key of the first element and needs to be comparable to the
 502     result of key(elem).
 503
 504     The function returns an iterator over the globally sorted element that
 505     ensures that as little iterators as possible are evaluated.  When
 506     evaluating """
 507
 508     # mixed_list initially contains all placeholders; later evaluated
 509     # elements (from the iterators) are mixed in
 510     mixed_list = [(k, link, True) for k, link in links]
 511
 512     while mixed_list:
 513         _, item, expand = mixed_list.pop(0)
 514
 515         # found an element (from an earlier expansion), yield it
 516         if not expand:
 517             yield item
 518             continue
 519
 520         # found an iter that needs to be expanded.
 521         # The iterator is fully consumed
 522         new_items = [(key(i), i, False) for i in item]
 523
 524         # sort links (placeholders) and elements together
 525         mixed_list = sorted(mixed_list + new_items, key=lambda (k, _v, _e): k,
 526                 reverse=reverse)
 527
 528
 529 def url_add_authentication(url, username, password):
 530     """
 531     Adds authentication data (username, password) to a given
 532     URL in order to construct an authenticated URL.
 533
 534     >>> url_add_authentication('https://host.com/', '', None)
 535     'https://host.com/'
 536     >>> url_add_authentication('http://example.org/', None, None)
 537     'http://example.org/'
 538     >>> url_add_authentication('telnet://host.com/', 'foo', 'bar')
 539     'telnet://foo:bar@host.com/'
 540     >>> url_add_authentication('ftp://example.org', 'billy', None)
 541     'ftp://billy@example.org'
 542     >>> url_add_authentication('ftp://example.org', 'billy', '')
 543     'ftp://billy:@example.org'
 544     >>> url_add_authentication('http://localhost/x', 'aa', 'bc')
 545     'http://aa:bc@localhost/x'
 546     >>> url_add_authentication('http://blubb.lan/u.html', 'i/o', 'P@ss:')
 547     'http://i%2Fo:P@ss:@blubb.lan/u.html'
 548     >>> url_add_authentication('http://a:b@x.org/', 'c', 'd')
 549     'http://c:d@x.org/'
 550     >>> url_add_authentication('http://i%2F:P%40%3A@cx.lan', 'P@x', 'i/')
 551     'http://P@x:i%2F@cx.lan'
 552     >>> url_add_authentication('http://x.org/', 'a b', 'c d')
 553     'http://a%20b:c%20d@x.org/'
 554     """
 555     if username is None or username == '':
 556         return url
 557
 558     # Relaxations of the strict quoting rules (bug 1521):
 559     # 1. Accept '@' in username and password
 560     # 2. Acecpt ':' in password only
 561     username = urllib.quote(username, safe='@')
 562
 563     if password is not None:
 564         password = urllib.quote(password, safe='@:')
 565         auth_string = ':'.join((username, password))
 566     else:
 567         auth_string = username
 568
 569     url = url_strip_authentication(url)
 570
 571     url_parts = list(urlparse.urlsplit(url))
 572     # url_parts[1] is the HOST part of the URL
 573     url_parts[1] = '@'.join((auth_string, url_parts[1]))
 574
 575     return urlparse.urlunsplit(url_parts)
 576
 577
 578 def urlopen(url, headers=None, data=None):
 579     """
 580     An URL opener with the User-agent set to gPodder (with version)
 581     """
 582     username, password = username_password_from_url(url)
 583     if username is not None or password is not None:
 584         url = url_strip_authentication(url)
 585         password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
 586         password_mgr.add_password(None, url, username, password)
 587         handler = urllib2.HTTPBasicAuthHandler(password_mgr)
 588         opener = urllib2.build_opener(handler)
 589     else:
 590         opener = urllib2.build_opener()
 591
 592     if headers is None:
 593         headers = {}
 594     else:
 595         headers = dict(headers)
 596
 597     headers.update({'User-agent': settings.USER_AGENT})
 598     request = urllib2.Request(url, data=data, headers=headers)
 599     return opener.open(request)
 600
 601
 602
 603 def username_password_from_url(url):
 604     r"""
 605     Returns a tuple (username,password) containing authentication
 606     data from the specified URL or (None,None) if no authentication
 607     data can be found in the URL.
 608
 609     See Section 3.1 of RFC 1738 (http://www.ietf.org/rfc/rfc1738.txt)
 610
 611     >>> username_password_from_url('https://@host.com/')
 612     ('', None)
 613     >>> username_password_from_url('telnet://host.com/')
 614     (None, None)
 615     >>> username_password_from_url('ftp://foo:@host.com/')
 616     ('foo', '')
 617     >>> username_password_from_url('http://a:b@host.com/')
 618     ('a', 'b')
 619     >>> username_password_from_url(1)
 620     Traceback (most recent call last):
 621       ...
 622     ValueError: URL has to be a string or unicode object.
 623     >>> username_password_from_url(None)
 624     Traceback (most recent call last):
 625       ...
 626     ValueError: URL has to be a string or unicode object.
 627     >>> username_password_from_url('http://a@b:c@host.com/')
 628     ('a@b', 'c')
 629     >>> username_password_from_url('ftp://a:b:c@host.com/')
 630     ('a', 'b:c')
 631     >>> username_password_from_url('http://i%2Fo:P%40ss%3A@host.com/')
 632     ('i/o', 'P@ss:')
 633     >>> username_password_from_url('ftp://%C3%B6sterreich@host.com/')
 634     ('\xc3\xb6sterreich', None)
 635     >>> username_password_from_url('http://w%20x:y%20z@example.org/')
 636     ('w x', 'y z')
 637     >>> username_password_from_url('http://example.com/x@y:z@test.com/')
 638     (None, None)
 639     """
 640     if type(url) not in (str, unicode):
 641         raise ValueError('URL has to be a string or unicode object.')
 642
 643     (username, password) = (None, None)
 644
 645     (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
 646
 647     if '@' in netloc:
 648         (authentication, netloc) = netloc.rsplit('@', 1)
 649         if ':' in authentication:
 650             (username, password) = authentication.split(':', 1)
 651
 652             # RFC1738 dictates that we should not allow ['/', '@', ':']
 653             # characters in the username and password field (Section 3.1):
 654             #
 655             # 1. The "/" can't be in there at this point because of the way
 656             #    urlparse (which we use above) works.
 657             # 2. Due to gPodder bug 1521, we allow "@" in the username and
 658             #    password field. We use netloc.rsplit('@', 1), which will
 659             #    make sure that we split it at the last '@' in netloc.
 660             # 3. The colon must be excluded (RFC2617, Section 2) in the
 661             #    username, but is apparently allowed in the password. This
 662             #    is handled by the authentication.split(':', 1) above, and
 663             #    will cause any extraneous ':'s to be part of the password.
 664
 665             username = urllib.unquote(username)
 666             password = urllib.unquote(password)
 667         else:
 668             username = urllib.unquote(authentication)
 669
 670     return (username, password)
 671
 672
 673 def url_strip_authentication(url):
 674     """
 675     Strips authentication data from an URL. Returns the URL with
 676     the authentication data removed from it.
 677
 678     >>> url_strip_authentication('https://host.com/')
 679     'https://host.com/'
 680     >>> url_strip_authentication('telnet://foo:bar@host.com/')
 681     'telnet://host.com/'
 682     >>> url_strip_authentication('ftp://billy@example.org')
 683     'ftp://example.org'
 684     >>> url_strip_authentication('ftp://billy:@example.org')
 685     'ftp://example.org'
 686     >>> url_strip_authentication('http://aa:bc@localhost/x')
 687     'http://localhost/x'
 688     >>> url_strip_authentication('http://i%2Fo:P%40ss%3A@blubb.lan/u.html')
 689     'http://blubb.lan/u.html'
 690     >>> url_strip_authentication('http://c:d@x.org/')
 691     'http://x.org/'
 692     >>> url_strip_authentication('http://P%40%3A:i%2F@cx.lan')
 693     'http://cx.lan'
 694     >>> url_strip_authentication('http://x@x.com:s3cret@example.com/')
 695     'http://example.com/'
 696     """
 697     url_parts = list(urlparse.urlsplit(url))
 698     # url_parts[1] is the HOST part of the URL
 699
 700     # Remove existing authentication data
 701     if '@' in url_parts[1]:
 702         url_parts[1] = url_parts[1].rsplit('@', 1)[1]
 703
 704     return urlparse.urlunsplit(url_parts)
 705
 706
 707 # Native filesystem encoding detection
 708 encoding = sys.getfilesystemencoding()
 709
 710 def sanitize_encoding(filename):
 711     r"""
 712     Generate a sanitized version of a string (i.e.
 713     remove invalid characters and encode in the
 714     detected native language encoding).
 715
 716     >>> sanitize_encoding('\x80')
 717     ''
 718     >>> sanitize_encoding(u'unicode')
 719     'unicode'
 720     """
 721     # The encoding problem goes away in Python 3.. hopefully!
 722     if sys.version_info >= (3, 0):
 723         return filename
 724
 725     global encoding
 726     if not isinstance(filename, unicode):
 727         filename = filename.decode(encoding, 'ignore')
 728     return filename.encode(encoding, 'ignore')
 729
 730
 731 def get_git_head():
 732     """ returns the commit and message of the current git HEAD """
 733
 734     try:
 735         pr = subprocess.Popen('/usr/bin/git log -n 1 --oneline'.split(),
 736             cwd = settings.BASE_DIR,
 737             stdout = subprocess.PIPE,
 738             stderr = subprocess.PIPE,
 739         )
 740
 741     except OSError:
 742         return None, None
 743
 744     (out, err) = pr.communicate()
 745     if err:
 746         return None, None
 747
 748     outs = out.split()
 749     commit = outs[0]
 750     msg = ' ' .join(outs[1:])
 751     return commit, msg
 752
 753
 754
 755 # https://gist.github.com/samuraisam/901117
 756
 757 default_fudge = timedelta(seconds=0, microseconds=0, days=0)
 758
 759 def deep_eq(_v1, _v2, datetime_fudge=default_fudge, _assert=False):
 760   """
 761   Tests for deep equality between two python data structures recursing
 762   into sub-structures if necessary. Works with all python types including
 763   iterators and generators. This function was dreampt up to test API responses
 764   but could be used for anything. Be careful. With deeply nested structures
 765   you may blow the stack.
 766
 767   Options:
 768             datetime_fudge => this is a datetime.timedelta object which, when
 769                               comparing dates, will accept values that differ
 770                               by the number of seconds specified
 771             _assert        => passing yes for this will raise an assertion error
 772                               when values do not match, instead of returning
 773                               false (very useful in combination with pdb)
 774
 775   Doctests included:
 776
 777   >>> x1, y1 = ({'a': 'b'}, {'a': 'b'})
 778   >>> deep_eq(x1, y1)
 779   True
 780   >>> x2, y2 = ({'a': 'b'}, {'b': 'a'})
 781   >>> deep_eq(x2, y2)
 782   False
 783   >>> x3, y3 = ({'a': {'b': 'c'}}, {'a': {'b': 'c'}})
 784   >>> deep_eq(x3, y3)
 785   True
 786   >>> x4, y4 = ({'c': 't', 'a': {'b': 'c'}}, {'a': {'b': 'n'}, 'c': 't'})
 787   >>> deep_eq(x4, y4)
 788   False
 789   >>> x5, y5 = ({'a': [1,2,3]}, {'a': [1,2,3]})
 790   >>> deep_eq(x5, y5)
 791   True
 792   >>> x6, y6 = ({'a': [1,'b',8]}, {'a': [2,'b',8]})
 793   >>> deep_eq(x6, y6)
 794   False
 795   >>> x7, y7 = ('a', 'a')
 796   >>> deep_eq(x7, y7)
 797   True
 798   >>> x8, y8 = (['p','n',['asdf']], ['p','n',['asdf']])
 799   >>> deep_eq(x8, y8)
 800   True
 801   >>> x9, y9 = (['p','n',['asdf',['omg']]], ['p', 'n', ['asdf',['nowai']]])
 802   >>> deep_eq(x9, y9)
 803   False
 804   >>> x10, y10 = (1, 2)
 805   >>> deep_eq(x10, y10)
 806   False
 807   >>> deep_eq((str(p) for p in xrange(10)), (str(p) for p in xrange(10)))
 808   True
 809   >>> str(deep_eq(range(4), range(4)))
 810   'True'
 811   >>> deep_eq(xrange(100), xrange(100))
 812   True
 813   >>> deep_eq(xrange(2), xrange(5))
 814   False
 815   >>> from datetime import datetime, timedelta
 816   >>> d1, d2 = (datetime.utcnow(), datetime.utcnow() + timedelta(seconds=4))
 817   >>> deep_eq(d1, d2)
 818   False
 819   >>> deep_eq(d1, d2, datetime_fudge=timedelta(seconds=5))
 820   True
 821   """
 822   _deep_eq = functools.partial(deep_eq, datetime_fudge=datetime_fudge,
 823                                _assert=_assert)
 824
 825   def _check_assert(R, a, b, reason=''):
 826     if _assert and not R:
 827       assert 0, "an assertion has failed in deep_eq (%s) %s != %s" % (
 828         reason, str(a), str(b))
 829     return R
 830
 831   def _deep_dict_eq(d1, d2):
 832     k1, k2 = (sorted(d1.keys()), sorted(d2.keys()))
 833     if k1 != k2: # keys should be exactly equal
 834       return _check_assert(False, k1, k2, "keys")
 835
 836     return _check_assert(operator.eq(sum(_deep_eq(d1[k], d2[k])
 837                                        for k in k1),
 838                                      len(k1)), d1, d2, "dictionaries")
 839
 840   def _deep_iter_eq(l1, l2):
 841     if len(l1) != len(l2):
 842       return _check_assert(False, l1, l2, "lengths")
 843     return _check_assert(operator.eq(sum(_deep_eq(v1, v2)
 844                                       for v1, v2 in zip(l1, l2)),
 845                                      len(l1)), l1, l2, "iterables")
 846
 847   def op(a, b):
 848     _op = operator.eq
 849     if type(a) == datetime and type(b) == datetime:
 850       s = datetime_fudge.seconds
 851       t1, t2 = (time.mktime(a.timetuple()), time.mktime(b.timetuple()))
 852       l = t1 - t2
 853       l = -l if l > 0 else l
 854       return _check_assert((-s if s > 0 else s) <= l, a, b, "dates")
 855     return _check_assert(_op(a, b), a, b, "values")
 856
 857   c1, c2 = (_v1, _v2)
 858
 859   # guard against strings because they are iterable and their
 860   # elements yield iterables infinitely.
 861   # I N C E P T I O N
 862   for t in types.StringTypes:
 863     if isinstance(_v1, t):
 864       break
 865   else:
 866     if isinstance(_v1, types.DictType):
 867       op = _deep_dict_eq
 868     else:
 869       try:
 870         c1, c2 = (list(iter(_v1)), list(iter(_v2)))
 871       except TypeError:
 872         c1, c2 = _v1, _v2
 873       else:
 874         op = _deep_iter_eq
 875
 876   return op(c1, c2)
 877
 878
 879 def parse_request_body(request):
 880     """ returns the parsed request body, handles gzip encoding """
 881
 882     raw_body = request.body
 883     content_enc = request.META.get('HTTP_CONTENT_ENCODING')
 884
 885     if content_enc == 'gzip':
 886         raw_body = zlib.decompress(raw_body)
 887
 888     return json.loads(raw_body)
 889
 890
 891 def normalize_feed_url(url):
 892     """
 893     Converts any URL to http:// or ftp:// so that it can be
 894     used with "wget". If the URL cannot be converted (invalid
 895     or unknown scheme), "None" is returned.
 896
 897     This will also normalize feed:// and itpc:// to http://.
 898
 899     >>> normalize_feed_url('itpc://example.org/podcast.rss')
 900     'http://example.org/podcast.rss'
 901
 902     If no URL scheme is defined (e.g. "curry.com"), we will
 903     simply assume the user intends to add a http:// feed.
 904
 905     >>> normalize_feed_url('curry.com')
 906     'http://curry.com/'
 907
 908     There are even some more shortcuts for advanced users
 909     and lazy typists (see the source for details).
 910
 911     >>> normalize_feed_url('fb:43FPodcast')
 912     'http://feeds.feedburner.com/43FPodcast'
 913
 914     It will also take care of converting the domain name to
 915     all-lowercase (because domains are not case sensitive):
 916
 917     >>> normalize_feed_url('http://Example.COM/')
 918     'http://example.com/'
 919
 920     Some other minimalistic changes are also taken care of,
 921     e.g. a ? with an empty query is removed:
 922
 923     >>> normalize_feed_url('http://example.org/test?')
 924     'http://example.org/test'
 925
 926     Leading and trailing whitespace is removed
 927
 928     >>> normalize_feed_url(' http://example.com/podcast.rss ')
 929     'http://example.com/podcast.rss'
 930
 931     HTTP Authentication is removed to protect users' privacy
 932
 933     >>> normalize_feed_url('http://a@b:c@host.com/')
 934     'http://host.com/'
 935     >>> normalize_feed_url('ftp://a:b:c@host.com/')
 936     'ftp://host.com/'
 937     >>> normalize_feed_url('http://i%2Fo:P%40ss%3A@host.com/')
 938     'http://host.com/'
 939     >>> normalize_feed_url('ftp://%C3%B6sterreich@host.com/')
 940     'ftp://host.com/'
 941     >>> normalize_feed_url('http://w%20x:y%20z@example.org/')
 942     'http://example.org/'
 943     >>> normalize_feed_url('http://example.com/x@y:z@test.com/')
 944     'http://example.com/x%40y%3Az%40test.com/'
 945     >>> normalize_feed_url('http://en.wikipedia.org/wiki/Ä')
 946     'http://en.wikipedia.org/wiki/%C3%84'
 947     >>> normalize_feed_url('http://en.wikipedia.org/w/index.php?title=Ä&action=edit')
 948     'http://en.wikipedia.org/w/index.php?title=%C3%84&action=edit'
 949     """
 950     url = url.strip()
 951     if not url or len(url) < 8:
 952         return None
 953
 954     if isinstance(url, unicode):
 955         url = url.encode('utf-8', 'ignore')
 956
 957     # This is a list of prefixes that you can use to minimize the amount of
 958     # keystrokes that you have to use.
 959     # Feel free to suggest other useful prefixes, and I'll add them here.
 960     PREFIXES = {
 961             'fb:': 'http://feeds.feedburner.com/%s',
 962             'yt:': 'http://www.youtube.com/rss/user/%s/videos.rss',
 963             'sc:': 'http://soundcloud.com/%s',
 964             'fm4od:': 'http://onapp1.orf.at/webcam/fm4/fod/%s.xspf',
 965             # YouTube playlists. To get a list of playlists per-user, use:
 966             # https://gdata.youtube.com/feeds/api/users/<username>/playlists
 967             'ytpl:': 'http://gdata.youtube.com/feeds/api/playlists/%s',
 968     }
 969
 970     for prefix, expansion in PREFIXES.iteritems():
 971         if url.startswith(prefix):
 972             url = expansion % (url[len(prefix):],)
 973             break
 974
 975     # Assume HTTP for URLs without scheme
 976     if not '://' in url:
 977         url = 'http://' + url
 978
 979     scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
 980
 981     # Schemes and domain names are case insensitive
 982     scheme, netloc = scheme.lower(), netloc.lower()
 983
 984     # encode non-encoded characters
 985     path = urllib.quote(path, '/%')
 986     query = urllib.quote_plus(query, ':&=')
 987
 988     # Remove authentication to protect users' privacy
 989     netloc = netloc.rsplit('@', 1)[-1]
 990
 991     # Normalize empty paths to "/"
 992     if path == '':
 993         path = '/'
 994
 995     # feed://, itpc:// and itms:// are really http://
 996     if scheme in ('feed', 'itpc', 'itms'):
 997         scheme = 'http'
 998
 999     if scheme not in ('http', 'https', 'ftp', 'file'):
1000         return None
1001
1002     # urlunsplit might return "a slighty different, but equivalent URL"
1003     return urlparse.urlunsplit((scheme, netloc, path, query, fragment))
1004
1005
1006 def partition(items, predicate=bool):
1007     a, b = itertools.tee((predicate(item), item) for item in items)
1008     return ((item for pred, item in a if not pred),
1009             (item for pred, item in b if pred))
1010
1011
1012 def split_quoted(s):
1013     """ Splits a quoted string
1014
1015     >>> split_quoted('some "quoted text"') == ['some', 'quoted text']
1016     True
1017
1018     >>> split_quoted('"quoted text') == ['quoted', 'text']
1019     True
1020
1021     # 4 quotes here are 2 in the doctest is one in the actual string
1022     >>> split_quoted('text\\\\') == ['text']
1023     True
1024     """
1025
1026     try:
1027         # split by whitespace, preserve quoted substrings
1028         keywords = shlex.split(s)
1029
1030     except ValueError:
1031         # No closing quotation (eg '"text')
1032         # No escaped character (eg '\')
1033         s = s.replace('"', '').replace("'", '').replace('\\', '')
1034         keywords = shlex.split(s)
1035
1036     return keywords
1037
1038
1039 def edit_link(obj):
1040     """ Return the link to the Django Admin Edit page """
1041     return reverse('admin:%s_%s_change' % (obj._meta.app_label,
1042                                            obj._meta.module_name),
1043                    args=(obj.pk,))
1044
1045
1046 def random_token(length=32):
1047     import random
1048     import string
1049     return "".join(random.sample(string.letters+string.digits, length))
1050
1051
1052 def to_maxlength(cls, field, val):
1053     """ Cut val to the maximum length of cls's field """
1054     if val is None:
1055         return None
1056
1057     max_length = cls._meta.get_field(field).max_length
1058     orig_length = len(val)
1059     if orig_length > max_length:
1060         val = val[:max_length]
1061         logger.warn('%s.%s length reduced from %d to %d',
1062                     cls.__name__, field, orig_length, max_length)
1063
1064     return val
1065
1066
1067 def get_domain(url):
1068     """ Returns the domain name of a URL
1069
1070     >>> get_domain('http://example.com')
1071     'example.com'
1072
1073     >>> get_domain('https://example.com:80/my-podcast/feed.rss')
1074     'example.com'
1075     """
1076     netloc = urlparse.urlparse(url).netloc
1077     try:
1078         port_idx = netloc.index(':')
1079         return netloc[:port_idx]
1080
1081     except ValueError:
1082         return netloc
1083
1084
1085 def set_ordered_entries(obj, new_entries, existing, EntryClass,
1086                         value_name, parent_name):
1087     """ Update the object's entries to the given list
1088
1089     'new_entries' should be a list of objects that are later wrapped in
1090     EntryClass instances. 'value_name' is the name of the EntryClass property
1091     that contains the values; 'parent_name' is the one that references obj.
1092
1093     Entries that do not exist are created. Existing entries that are not in
1094     'new_entries' are deleted. """
1095
1096     logger.info('%d existing entries', len(existing))
1097
1098     logger.info('%d new entries', len(new_entries))
1099
1100     with transaction.atomic():
1101         max_order = max([s.order for s in existing.values()] +
1102                         [len(new_entries)])
1103         logger.info('Renumbering entries starting from %d', max_order+1)
1104         for n, entry in enumerate(existing.values(), max_order+1):
1105             entry.order = n
1106             entry.save()
1107
1108     logger.info('%d existing entries', len(existing))
1109
1110     for n, entry in enumerate(new_entries):
1111         try:
1112             e = existing.pop(entry)
1113             logger.info('Updating existing entry %d: %s', n, entry)
1114             e.order = n
1115             e.save()
1116         except KeyError:
1117             logger.info('Creating new entry %d: %s', n, entry)
1118             try:
1119                 links = {
1120                     value_name: entry,
1121                     parent_name: obj,
1122                 }
1123                 from mygpo.podcasts.models import ScopedModel
1124                 if issubclass(EntryClass, ScopedModel):
1125                     links['scope'] = obj.scope
1126
1127                 EntryClass.objects.create(order=n, **links)
1128             except IntegrityError as ie:
1129                 logger.warn('Could not create enry for %s: %s', obj, ie)
1130
1131     with transaction.atomic():
1132         delete = [s.pk for s in existing.values()]
1133         logger.info('Deleting %d entries', len(delete))
1134         EntryClass.objects.filter(id__in=delete).delete()