mygpo/utils.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of my.gpodder.org.
   4 #
   5 # my.gpodder.org is free software: you can redistribute it and/or modify it
   6 # under the terms of the GNU Affero General Public License as published by
   7 # the Free Software Foundation, either version 3 of the License, or (at your
   8 # option) any later version.
   9 #
  10 # my.gpodder.org is distributed in the hope that it will be useful, but
  11 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  12 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
  13 # License for more details.
  14 #
  15 # You should have received a copy of the GNU Affero General Public License
  16 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
  17 #
  18
  19 import json
  20 import functools
  21 import types
  22 import subprocess
  23 import os
  24 import operator
  25 import sys
  26 import re
  27 import collections
  28 import itertools
  29 from datetime import datetime, timedelta, date
  30 import time
  31 import hashlib
  32 import urllib.parse
  33 import urllib.request, urllib.parse, urllib.error
  34 import urllib.request, urllib.error, urllib.parse
  35 import zlib
  36 import shlex
  37
  38 from django.db import transaction, IntegrityError
  39 from django.conf import settings
  40 from django.core.urlresolvers import reverse
  41
  42 import logging
  43 logger = logging.getLogger(__name__)
  44
  45
  46 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
  47     """
  48     >>> from_d = datetime(2010, 1, 1)
  49     >>> to_d = datetime(2010, 1, 5)
  50     >>> list(daterange(from_d, to_d))
  51     [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
  52     """
  53
  54     if to_date is None:
  55         if isinstance(from_date, datetime):
  56             to_date = datetime.utcnow()
  57         else:
  58             to_date = date.today()
  59
  60     while from_date <= to_date:
  61         yield from_date
  62         from_date = from_date + leap
  63     return
  64
  65 def format_time(value):
  66     """Format an offset (in seconds) to a string
  67
  68     The offset should be an integer or float value.
  69
  70     >>> format_time(0)
  71     '00:00'
  72     >>> format_time(20)
  73     '00:20'
  74     >>> format_time(3600)
  75     '01:00:00'
  76     >>> format_time(10921)
  77     '03:02:01'
  78     """
  79     try:
  80         dt = datetime.utcfromtimestamp(value)
  81     except (ValueError, TypeError):
  82         return ''
  83
  84     if dt.hour == 0:
  85         return dt.strftime('%M:%S')
  86     else:
  87         return dt.strftime('%H:%M:%S')
  88
  89 def parse_time(value):
  90     """
  91     >>> parse_time(10)
  92     10
  93
  94     >>> parse_time('05:10') #5*60+10
  95     310
  96
  97     >>> parse_time('1:05:10') #60*60+5*60+10
  98     3910
  99     """
 100     if value is None:
 101         raise ValueError('None value in parse_time')
 102
 103     if isinstance(value, int):
 104         # Don't need to parse already-converted time value
 105         return value
 106
 107     if value == '':
 108         raise ValueError('Empty valueing in parse_time')
 109
 110     for format in ('%H:%M:%S', '%M:%S'):
 111         try:
 112             t = time.strptime(value, format)
 113             return t.tm_hour * 60*60 + t.tm_min * 60 + t.tm_sec
 114         except ValueError as e:
 115             continue
 116
 117     return int(value)
 118
 119
 120 def parse_bool(val):
 121     """
 122     >>> parse_bool('True')
 123     True
 124
 125     >>> parse_bool('true')
 126     True
 127
 128     >>> parse_bool('')
 129     False
 130     """
 131     if isinstance(val, bool):
 132         return val
 133     if val.lower() == 'true':
 134         return True
 135     return False
 136
 137
 138 def iterate_together(lists, key=lambda x: x, reverse=False):
 139     """
 140     takes ordered, possibly sparse, lists with similar items
 141     (some items have a corresponding item in the other lists, some don't).
 142
 143     It then yield tuples of corresponding items, where one element is None is
 144     there is no corresponding entry in one of the lists.
 145
 146     Tuples where both elements are None are skipped.
 147
 148     The results of the key method are used for the comparisons.
 149
 150     If reverse is True, the lists are expected to be sorted in reverse order
 151     and the results will also be sorted reverse
 152
 153     >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
 154     [(1, 1), (2, None), (None, 3)]
 155
 156     >>> list(iterate_together([[], []]))
 157     []
 158
 159     >>> list(iterate_together([range(1, 3), range(3, 5)]))
 160     [(1, None), (2, None), (None, 3), (None, 4)]
 161
 162     >>> list(iterate_together([range(1, 3), []]))
 163     [(1, None), (2, None)]
 164
 165     >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
 166     [(1, None), (3, 3)]
 167     """
 168
 169     Next = collections.namedtuple('Next', 'item more')
 170     min_ = min if not reverse else max
 171     lt_  = operator.lt if not reverse else operator.gt
 172
 173     lists = [iter(l) for l in lists]
 174
 175     def _take(it):
 176         try:
 177             i = next(it)
 178             while i is None:
 179                 i = next(it)
 180             return Next(i, True)
 181         except StopIteration:
 182             return Next(None, False)
 183
 184     def new_res():
 185         return [None]*len(lists)
 186
 187     # take first bunch of items
 188     items = [_take(l) for l in lists]
 189
 190     while any(i.item is not None or i.more for i in items):
 191
 192         res = new_res()
 193
 194         for n, item in enumerate(items):
 195
 196             if item.item is None:
 197                 continue
 198
 199             if all(x is None for x in res):
 200                 res[n] = item.item
 201                 continue
 202
 203             min_v = min_(filter(lambda x: x is not None, res), key=key)
 204
 205             if key(item.item) == key(min_v):
 206                 res[n] = item.item
 207
 208             elif lt_(key(item.item), key(min_v)):
 209                 res = new_res()
 210                 res[n] = item.item
 211
 212         for n, x in enumerate(res):
 213             if x is not None:
 214                 items[n] = _take(lists[n])
 215
 216         yield tuple(res)
 217
 218
 219 def progress(val, max_val, status_str='', max_width=50, stream=sys.stdout):
 220
 221     factor = float(val)/max_val if max_val > 0 else 0
 222
 223     # progress as percentage
 224     percentage_str = '{val:.2%}'.format(val=factor)
 225
 226     # progress bar filled with #s
 227     factor = min(int(factor*max_width), max_width)
 228     progress_str = '#' * factor + ' ' * (max_width-factor)
 229
 230     #insert percentage into bar
 231     percentage_start = int((max_width-len(percentage_str))/2)
 232     progress_str = progress_str[:percentage_start] + \
 233                    percentage_str + \
 234                    progress_str[percentage_start+len(percentage_str):]
 235
 236     print('\r', end=' ', file=stream)
 237     print('[ %s ] %s / %s | %s' % (
 238         progress_str,
 239         val,
 240         max_val,
 241         status_str), end=' ', file=stream)
 242     stream.flush()
 243
 244
 245 def set_cmp(list, simplify):
 246     """
 247     Builds a set out of a list but uses the results of simplify to determine equality between items
 248     """
 249     simpl = lambda x: (simplify(x), x)
 250     lst = dict(map(simpl, list))
 251     return list(lst.values())
 252
 253
 254 def first(it):
 255     """
 256     returns the first not-None object or None if the iterator is exhausted
 257     """
 258     for x in it:
 259         if x is not None:
 260             return x
 261     return None
 262
 263
 264 def intersect(a, b):
 265     return list(set(a) & set(b))
 266
 267
 268
 269 def remove_control_chars(s):
 270     all_chars = (chr(i) for i in range(0x110000))
 271     control_chars = ''.join(map(chr, list(range(0,32)) + list(range(127,160))))
 272     control_char_re = re.compile('[%s]' % re.escape(control_chars))
 273
 274     return control_char_re.sub('', s)
 275
 276
 277 def unzip(a):
 278     return tuple(map(list,zip(*a)))
 279
 280
 281 def parse_range(s, min, max, default=None):
 282     """
 283     Parses the string and returns its value. If the value is outside the given
 284     range, its closest number within the range is returned
 285
 286     >>> parse_range('5', 0, 10)
 287     5
 288
 289     >>> parse_range('0', 5.0, 10)
 290     5.0
 291
 292     >>> parse_range('15',0, 10)
 293     10
 294
 295     >>> parse_range('x', 0., 20)
 296     10.0
 297
 298     >>> parse_range('x', 0, 20, 20)
 299     20
 300     """
 301     out_type = type(min)
 302
 303     try:
 304         val = int(s)
 305         if val < min:
 306             return min
 307         if val > max:
 308             return max
 309         return val
 310
 311     except (ValueError, TypeError):
 312         return default if default is not None else out_type((max-min)/2)
 313
 314
 315
 316 def flatten(l):
 317     return [item for sublist in l for item in sublist]
 318
 319
 320 def linearize(key, iterators, reverse=False):
 321     """
 322     Linearizes a number of iterators, sorted by some comparison function
 323     """
 324
 325     iters = [iter(i) for i in iterators]
 326     vals = []
 327     for i in iters:
 328         try:
 329             v = next(i)
 330             vals. append( (v, i) )
 331         except StopIteration:
 332             continue
 333
 334     while vals:
 335         vals = sorted(vals, key=lambda x: key(x[0]), reverse=reverse)
 336         val, it = vals.pop(0)
 337         yield val
 338         try:
 339             next_val = next(it)
 340             vals.append( (next_val, it) )
 341         except StopIteration:
 342             pass
 343
 344
 345 def get_timestamp(datetime_obj):
 346     """ Returns the timestamp as an int for the given datetime object
 347
 348     >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
 349     1302168606
 350
 351     >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
 352     0
 353     """
 354     return int(time.mktime(datetime_obj.timetuple()))
 355
 356
 357
 358 re_url = re.compile('^https?://')
 359
 360 def is_url(string):
 361     """ Returns true if a string looks like an URL
 362
 363     >>> is_url('http://example.com/some-path/file.xml')
 364     True
 365
 366     >>> is_url('something else')
 367     False
 368     """
 369
 370     return bool(re_url.match(string))
 371
 372
 373
 374 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
 375 # this does not increase asymptotical complexity
 376 # but can still waste more time than it saves.
 377 def shortest_of(strings):
 378     return min(strings, key=len)
 379
 380 def longest_substr(strings):
 381     """
 382     Returns the longest common substring of the given strings
 383     """
 384
 385     substr = ""
 386     if not strings:
 387         return substr
 388     reference = shortest_of(strings)
 389     length = len(reference)
 390     #find a suitable slice i:j
 391     for i in range(length):
 392         #only consider strings long at least len(substr) + 1
 393         for j in range(i + len(substr) + 1, length):
 394             candidate = reference[i:j]
 395             if all(candidate in text for text in strings):
 396                 substr = candidate
 397     return substr
 398
 399
 400
 401 def additional_value(it, gen_val, val_changed=lambda _: True):
 402     """ Provides an additional value to the elements, calculated when needed
 403
 404     For the elements from the iterator, some additional value can be computed
 405     by gen_val (which might be an expensive computation).
 406
 407     If the elements in the iterator are ordered so that some subsequent
 408     elements would generate the same additional value, val_changed can be
 409     provided, which receives the next element from the iterator and the
 410     previous additional value. If the element would generate the same
 411     additional value (val_changed returns False), its computation is skipped.
 412
 413     >>> # get the next full hundred higher than x
 414     >>> # this will probably be an expensive calculation
 415     >>> next_hundred = lambda x: x + 100-(x % 100)
 416
 417     >>> # returns True if h is not the value that next_hundred(x) would provide
 418     >>> # this should be a relatively cheap calculation, compared to the above
 419     >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
 420
 421     >>> xs = [0, 50, 100, 101, 199, 200, 201]
 422     >>> list(additional_value(xs, next_hundred, diff_hundred))
 423     [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
 424     """
 425
 426     _none = object()
 427     current = _none
 428
 429     for x in it:
 430         if current is _none or val_changed(x, current):
 431             current = gen_val(x)
 432
 433         yield (x, current)
 434
 435
 436 def file_hash(f, h=hashlib.md5, block_size=2**20):
 437     """ returns the hash of the contents of a file """
 438     f_hash = h()
 439     for chunk in iter(lambda: f.read(block_size), ''):
 440         f_hash.update(chunk)
 441     return f_hash
 442
 443
 444
 445 def split_list(l, prop):
 446     """ split elements that satisfy a property, and those that don't """
 447     match   = list(filter(prop, l))
 448     nomatch = [x for x in l if x not in match]
 449     return match, nomatch
 450
 451
 452 def sorted_chain(links, key, reverse=False):
 453     """ Takes a list of iters can iterates over sorted elements
 454
 455     Each elment of links should be a tuple of (sort_key, iterator). The
 456     elements of each iterator should be sorted already. sort_key should
 457     indicate the key of the first element and needs to be comparable to the
 458     result of key(elem).
 459
 460     The function returns an iterator over the globally sorted element that
 461     ensures that as little iterators as possible are evaluated.  When
 462     evaluating """
 463
 464     # mixed_list initially contains all placeholders; later evaluated
 465     # elements (from the iterators) are mixed in
 466     mixed_list = [(k, link, True) for k, link in links]
 467
 468     while mixed_list:
 469         _, item, expand = mixed_list.pop(0)
 470
 471         # found an element (from an earlier expansion), yield it
 472         if not expand:
 473             yield item
 474             continue
 475
 476         # found an iter that needs to be expanded.
 477         # The iterator is fully consumed
 478         new_items = [(key(i), i, False) for i in item]
 479
 480         # sort links (placeholders) and elements together
 481         mixed_list = sorted(mixed_list + new_items, key=lambda t: t[0],
 482                 reverse=reverse)
 483
 484
 485 def url_add_authentication(url, username, password):
 486     """
 487     Adds authentication data (username, password) to a given
 488     URL in order to construct an authenticated URL.
 489
 490     >>> url_add_authentication('https://host.com/', '', None)
 491     'https://host.com/'
 492     >>> url_add_authentication('http://example.org/', None, None)
 493     'http://example.org/'
 494     >>> url_add_authentication('telnet://host.com/', 'foo', 'bar')
 495     'telnet://foo:bar@host.com/'
 496     >>> url_add_authentication('ftp://example.org', 'billy', None)
 497     'ftp://billy@example.org'
 498     >>> url_add_authentication('ftp://example.org', 'billy', '')
 499     'ftp://billy:@example.org'
 500     >>> url_add_authentication('http://localhost/x', 'aa', 'bc')
 501     'http://aa:bc@localhost/x'
 502     >>> url_add_authentication('http://blubb.lan/u.html', 'i/o', 'P@ss:')
 503     'http://i%2Fo:P@ss:@blubb.lan/u.html'
 504     >>> url_add_authentication('http://a:b@x.org/', 'c', 'd')
 505     'http://c:d@x.org/'
 506     >>> url_add_authentication('http://i%2F:P%40%3A@cx.lan', 'P@x', 'i/')
 507     'http://P@x:i%2F@cx.lan'
 508     >>> url_add_authentication('http://x.org/', 'a b', 'c d')
 509     'http://a%20b:c%20d@x.org/'
 510     """
 511     if username is None or username == '':
 512         return url
 513
 514     # Relaxations of the strict quoting rules (bug 1521):
 515     # 1. Accept '@' in username and password
 516     # 2. Acecpt ':' in password only
 517     username = urllib.parse.quote(username, safe='@')
 518
 519     if password is not None:
 520         password = urllib.parse.quote(password, safe='@:')
 521         auth_string = ':'.join((username, password))
 522     else:
 523         auth_string = username
 524
 525     url = url_strip_authentication(url)
 526
 527     url_parts = list(urllib.parse.urlsplit(url))
 528     # url_parts[1] is the HOST part of the URL
 529     url_parts[1] = '@'.join((auth_string, url_parts[1]))
 530
 531     return urllib.parse.urlunsplit(url_parts)
 532
 533
 534 def urlopen(url, headers=None, data=None):
 535     """
 536     An URL opener with the User-agent set to gPodder (with version)
 537     """
 538     username, password = username_password_from_url(url)
 539     if username is not None or password is not None:
 540         url = url_strip_authentication(url)
 541         password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
 542         password_mgr.add_password(None, url, username, password)
 543         handler = urllib.request.HTTPBasicAuthHandler(password_mgr)
 544         opener = urllib.request.build_opener(handler)
 545     else:
 546         opener = urllib.request.build_opener()
 547
 548     if headers is None:
 549         headers = {}
 550     else:
 551         headers = dict(headers)
 552
 553     headers.update({'User-agent': settings.USER_AGENT})
 554     request = urllib.request.Request(url, data=data, headers=headers)
 555     return opener.open(request)
 556
 557
 558
 559 def username_password_from_url(url):
 560     r"""
 561     Returns a tuple (username,password) containing authentication
 562     data from the specified URL or (None,None) if no authentication
 563     data can be found in the URL.
 564
 565     See Section 3.1 of RFC 1738 (http://www.ietf.org/rfc/rfc1738.txt)
 566
 567     >>> username_password_from_url('https://@host.com/')
 568     ('', None)
 569     >>> username_password_from_url('telnet://host.com/')
 570     (None, None)
 571     >>> username_password_from_url('ftp://foo:@host.com/')
 572     ('foo', '')
 573     >>> username_password_from_url('http://a:b@host.com/')
 574     ('a', 'b')
 575     >>> username_password_from_url(1)
 576     Traceback (most recent call last):
 577       ...
 578     ValueError: URL has to be a string or unicode object.
 579     >>> username_password_from_url(None)
 580     Traceback (most recent call last):
 581       ...
 582     ValueError: URL has to be a string or unicode object.
 583     >>> username_password_from_url('http://a@b:c@host.com/')
 584     ('a@b', 'c')
 585     >>> username_password_from_url('ftp://a:b:c@host.com/')
 586     ('a', 'b:c')
 587     >>> username_password_from_url('http://i%2Fo:P%40ss%3A@host.com/')
 588     ('i/o', 'P@ss:')
 589     >>> username_password_from_url('ftp://%C3%B6sterreich@host.com/')
 590     ('österreich', None)
 591     >>> username_password_from_url('http://w%20x:y%20z@example.org/')
 592     ('w x', 'y z')
 593     >>> username_password_from_url('http://example.com/x@y:z@test.com/')
 594     (None, None)
 595     """
 596     if type(url) not in (str, str):
 597         raise ValueError('URL has to be a string or unicode object.')
 598
 599     (username, password) = (None, None)
 600
 601     (scheme, netloc, path, params, query, fragment) = urllib.parse.urlparse(url)
 602
 603     if '@' in netloc:
 604         (authentication, netloc) = netloc.rsplit('@', 1)
 605         if ':' in authentication:
 606             (username, password) = authentication.split(':', 1)
 607
 608             # RFC1738 dictates that we should not allow ['/', '@', ':']
 609             # characters in the username and password field (Section 3.1):
 610             #
 611             # 1. The "/" can't be in there at this point because of the way
 612             #    urlparse (which we use above) works.
 613             # 2. Due to gPodder bug 1521, we allow "@" in the username and
 614             #    password field. We use netloc.rsplit('@', 1), which will
 615             #    make sure that we split it at the last '@' in netloc.
 616             # 3. The colon must be excluded (RFC2617, Section 2) in the
 617             #    username, but is apparently allowed in the password. This
 618             #    is handled by the authentication.split(':', 1) above, and
 619             #    will cause any extraneous ':'s to be part of the password.
 620
 621             username = urllib.parse.unquote(username)
 622             password = urllib.parse.unquote(password)
 623         else:
 624             username = urllib.parse.unquote(authentication)
 625
 626     return (username, password)
 627
 628
 629 def url_strip_authentication(url):
 630     """
 631     Strips authentication data from an URL. Returns the URL with
 632     the authentication data removed from it.
 633
 634     >>> url_strip_authentication('https://host.com/')
 635     'https://host.com/'
 636     >>> url_strip_authentication('telnet://foo:bar@host.com/')
 637     'telnet://host.com/'
 638     >>> url_strip_authentication('ftp://billy@example.org')
 639     'ftp://example.org'
 640     >>> url_strip_authentication('ftp://billy:@example.org')
 641     'ftp://example.org'
 642     >>> url_strip_authentication('http://aa:bc@localhost/x')
 643     'http://localhost/x'
 644     >>> url_strip_authentication('http://i%2Fo:P%40ss%3A@blubb.lan/u.html')
 645     'http://blubb.lan/u.html'
 646     >>> url_strip_authentication('http://c:d@x.org/')
 647     'http://x.org/'
 648     >>> url_strip_authentication('http://P%40%3A:i%2F@cx.lan')
 649     'http://cx.lan'
 650     >>> url_strip_authentication('http://x@x.com:s3cret@example.com/')
 651     'http://example.com/'
 652     """
 653     url_parts = list(urllib.parse.urlsplit(url))
 654     # url_parts[1] is the HOST part of the URL
 655
 656     # Remove existing authentication data
 657     if '@' in url_parts[1]:
 658         url_parts[1] = url_parts[1].rsplit('@', 1)[1]
 659
 660     return urllib.parse.urlunsplit(url_parts)
 661
 662
 663 # Native filesystem encoding detection
 664 encoding = sys.getfilesystemencoding()
 665
 666
 667 def get_git_head():
 668     """ returns the commit and message of the current git HEAD """
 669
 670     try:
 671         pr = subprocess.Popen('/usr/bin/git log -n 1 --oneline'.split(),
 672             cwd = settings.BASE_DIR,
 673             stdout = subprocess.PIPE,
 674             stderr = subprocess.PIPE,
 675         )
 676
 677     except OSError:
 678         return None, None
 679
 680     (out, err) = pr.communicate()
 681     if err:
 682         return None, None
 683
 684     outs = out.split()
 685     commit = outs[0]
 686     msg = ' ' .join(outs[1:])
 687     return commit, msg
 688
 689
 690 def parse_request_body(request):
 691     """ returns the parsed request body, handles gzip encoding """
 692
 693     raw_body = request.body
 694     content_enc = request.META.get('HTTP_CONTENT_ENCODING')
 695
 696     if content_enc == 'gzip':
 697         raw_body = zlib.decompress(raw_body)
 698
 699     return json.loads(raw_body.decode('utf-8'))
 700
 701
 702 def normalize_feed_url(url):
 703     """
 704     Converts any URL to http:// or ftp:// so that it can be
 705     used with "wget". If the URL cannot be converted (invalid
 706     or unknown scheme), "None" is returned.
 707
 708     This will also normalize feed:// and itpc:// to http://.
 709
 710     >>> normalize_feed_url('itpc://example.org/podcast.rss')
 711     'http://example.org/podcast.rss'
 712
 713     If no URL scheme is defined (e.g. "curry.com"), we will
 714     simply assume the user intends to add a http:// feed.
 715
 716     >>> normalize_feed_url('curry.com')
 717     'http://curry.com/'
 718
 719     There are even some more shortcuts for advanced users
 720     and lazy typists (see the source for details).
 721
 722     >>> normalize_feed_url('fb:43FPodcast')
 723     'http://feeds.feedburner.com/43FPodcast'
 724
 725     It will also take care of converting the domain name to
 726     all-lowercase (because domains are not case sensitive):
 727
 728     >>> normalize_feed_url('http://Example.COM/')
 729     'http://example.com/'
 730
 731     Some other minimalistic changes are also taken care of,
 732     e.g. a ? with an empty query is removed:
 733
 734     >>> normalize_feed_url('http://example.org/test?')
 735     'http://example.org/test'
 736
 737     Leading and trailing whitespace is removed
 738
 739     >>> normalize_feed_url(' http://example.com/podcast.rss ')
 740     'http://example.com/podcast.rss'
 741
 742     HTTP Authentication is removed to protect users' privacy
 743
 744     >>> normalize_feed_url('http://a@b:c@host.com/')
 745     'http://host.com/'
 746     >>> normalize_feed_url('ftp://a:b:c@host.com/')
 747     'ftp://host.com/'
 748     >>> normalize_feed_url('http://i%2Fo:P%40ss%3A@host.com/')
 749     'http://host.com/'
 750     >>> normalize_feed_url('ftp://%C3%B6sterreich@host.com/')
 751     'ftp://host.com/'
 752     >>> normalize_feed_url('http://w%20x:y%20z@example.org/')
 753     'http://example.org/'
 754     >>> normalize_feed_url('http://example.com/x@y:z@test.com/')
 755     'http://example.com/x%40y%3Az%40test.com/'
 756     >>> normalize_feed_url('http://en.wikipedia.org/wiki/Ä')
 757     'http://en.wikipedia.org/wiki/%C3%84'
 758     >>> normalize_feed_url('http://en.wikipedia.org/w/index.php?title=Ä&action=edit')
 759     'http://en.wikipedia.org/w/index.php?title=%C3%84&action=edit'
 760     """
 761     url = url.strip()
 762     if not url or len(url) < 8:
 763         return None
 764
 765     # This is a list of prefixes that you can use to minimize the amount of
 766     # keystrokes that you have to use.
 767     # Feel free to suggest other useful prefixes, and I'll add them here.
 768     PREFIXES = {
 769             'fb:': 'http://feeds.feedburner.com/%s',
 770             'yt:': 'http://www.youtube.com/rss/user/%s/videos.rss',
 771             'sc:': 'http://soundcloud.com/%s',
 772             'fm4od:': 'http://onapp1.orf.at/webcam/fm4/fod/%s.xspf',
 773             # YouTube playlists. To get a list of playlists per-user, use:
 774             # https://gdata.youtube.com/feeds/api/users/<username>/playlists
 775             'ytpl:': 'http://gdata.youtube.com/feeds/api/playlists/%s',
 776     }
 777
 778     for prefix, expansion in PREFIXES.items():
 779         if url.startswith(prefix):
 780             url = expansion % (url[len(prefix):],)
 781             break
 782
 783     # Assume HTTP for URLs without scheme
 784     if not '://' in url:
 785         url = 'http://' + url
 786
 787     scheme, netloc, path, query, fragment = urllib.parse.urlsplit(url)
 788
 789     # Schemes and domain names are case insensitive
 790     scheme, netloc = scheme.lower(), netloc.lower()
 791
 792     # encode non-encoded characters
 793     path = urllib.parse.quote(path, '/%')
 794     query = urllib.parse.quote_plus(query, ':&=')
 795
 796     # Remove authentication to protect users' privacy
 797     netloc = netloc.rsplit('@', 1)[-1]
 798
 799     # Normalize empty paths to "/"
 800     if path == '':
 801         path = '/'
 802
 803     # feed://, itpc:// and itms:// are really http://
 804     if scheme in ('feed', 'itpc', 'itms'):
 805         scheme = 'http'
 806
 807     if scheme not in ('http', 'https', 'ftp', 'file'):
 808         return None
 809
 810     # urlunsplit might return "a slighty different, but equivalent URL"
 811     return urllib.parse.urlunsplit((scheme, netloc, path, query, fragment))
 812
 813
 814 def partition(items, predicate=bool):
 815     a, b = itertools.tee((predicate(item), item) for item in items)
 816     return ((item for pred, item in a if not pred),
 817             (item for pred, item in b if pred))
 818
 819
 820 def split_quoted(s):
 821     """ Splits a quoted string
 822
 823     >>> split_quoted('some "quoted text"') == ['some', 'quoted text']
 824     True
 825
 826     >>> split_quoted('"quoted text') == ['quoted', 'text']
 827     True
 828
 829     # 4 quotes here are 2 in the doctest is one in the actual string
 830     >>> split_quoted('text\\\\') == ['text']
 831     True
 832     """
 833
 834     try:
 835         # split by whitespace, preserve quoted substrings
 836         keywords = shlex.split(s)
 837
 838     except ValueError:
 839         # No closing quotation (eg '"text')
 840         # No escaped character (eg '\')
 841         s = s.replace('"', '').replace("'", '').replace('\\', '')
 842         keywords = shlex.split(s)
 843
 844     return keywords
 845
 846
 847 def edit_link(obj):
 848     """ Return the link to the Django Admin Edit page """
 849     return reverse('admin:%s_%s_change' % (obj._meta.app_label,
 850                                            obj._meta.model_name),
 851                    args=(obj.pk,))
 852
 853
 854 def random_token(length=32):
 855     import random
 856     import string
 857     return "".join(random.sample(string.ascii_letters+string.digits, length))
 858
 859
 860 def to_maxlength(cls, field, val):
 861     """ Cut val to the maximum length of cls's field """
 862     if val is None:
 863         return None
 864
 865     max_length = cls._meta.get_field(field).max_length
 866     orig_length = len(val)
 867     if orig_length > max_length:
 868         val = val[:max_length]
 869         logger.warn('%s.%s length reduced from %d to %d',
 870                     cls.__name__, field, orig_length, max_length)
 871
 872     return val
 873
 874
 875 def get_domain(url):
 876     """ Returns the domain name of a URL
 877
 878     >>> get_domain('http://example.com')
 879     'example.com'
 880
 881     >>> get_domain('https://example.com:80/my-podcast/feed.rss')
 882     'example.com'
 883     """
 884     netloc = urllib.parse.urlparse(url).netloc
 885     try:
 886         port_idx = netloc.index(':')
 887         return netloc[:port_idx]
 888
 889     except ValueError:
 890         return netloc
 891
 892
 893 def set_ordered_entries(obj, new_entries, existing, EntryClass,
 894                         value_name, parent_name):
 895     """ Update the object's entries to the given list
 896
 897     'new_entries' should be a list of objects that are later wrapped in
 898     EntryClass instances. 'value_name' is the name of the EntryClass property
 899     that contains the values; 'parent_name' is the one that references obj.
 900
 901     Entries that do not exist are created. Existing entries that are not in
 902     'new_entries' are deleted. """
 903
 904     logger.info('%d existing entries', len(existing))
 905
 906     logger.info('%d new entries', len(new_entries))
 907
 908     with transaction.atomic():
 909         max_order = max([s.order for s in existing.values()] +
 910                         [len(new_entries)])
 911         logger.info('Renumbering entries starting from %d', max_order+1)
 912         for n, entry in enumerate(existing.values(), max_order+1):
 913             entry.order = n
 914             entry.save()
 915
 916     logger.info('%d existing entries', len(existing))
 917
 918     for n, entry in enumerate(new_entries):
 919         try:
 920             e = existing.pop(entry)
 921             logger.info('Updating existing entry %d: %s', n, entry)
 922             e.order = n
 923             e.save()
 924         except KeyError:
 925             logger.info('Creating new entry %d: %s', n, entry)
 926             try:
 927                 links = {
 928                     value_name: entry,
 929                     parent_name: obj,
 930                 }
 931                 from mygpo.podcasts.models import ScopedModel
 932                 if issubclass(EntryClass, ScopedModel):
 933                     links['scope'] = obj.scope
 934
 935                 EntryClass.objects.create(order=n, **links)
 936             except IntegrityError as ie:
 937                 logger.warn('Could not create enry for %s: %s', obj, ie)
 938
 939     with transaction.atomic():
 940         delete = [s.pk for s in existing.values()]
 941         logger.info('Deleting %d entries', len(delete))
 942         EntryClass.objects.filter(id__in=delete).delete()