mygpo/utils.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of my.gpodder.org.
   4 #
   5 # my.gpodder.org is free software: you can redistribute it and/or modify it
   6 # under the terms of the GNU Affero General Public License as published by
   7 # the Free Software Foundation, either version 3 of the License, or (at your
   8 # option) any later version.
   9 #
  10 # my.gpodder.org is distributed in the hope that it will be useful, but
  11 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  12 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
  13 # License for more details.
  14 #
  15 # You should have received a copy of the GNU Affero General Public License
  16 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
  17 #
  18
  19 import functools
  20 import types
  21 import subprocess
  22 import os
  23 import operator
  24 import sys
  25 import re
  26 import collections
  27 import itertools
  28 from datetime import datetime, timedelta, date
  29 import time
  30 import hashlib
  31 import urllib.parse
  32 import urllib.request, urllib.parse, urllib.error
  33 import urllib.request, urllib.error, urllib.parse
  34 import zlib
  35 import shlex
  36
  37 from django.db import transaction, IntegrityError
  38 from django.conf import settings
  39 from django.core.urlresolvers import reverse
  40
  41 from mygpo.core.json import json
  42
  43 import logging
  44 logger = logging.getLogger(__name__)
  45
  46
  47 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
  48     """
  49     >>> from_d = datetime(2010, 1, 1)
  50     >>> to_d = datetime(2010, 1, 5)
  51     >>> list(daterange(from_d, to_d))
  52     [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
  53     """
  54
  55     if to_date is None:
  56         if isinstance(from_date, datetime):
  57             to_date = datetime.utcnow()
  58         else:
  59             to_date = date.today()
  60
  61     while from_date <= to_date:
  62         yield from_date
  63         from_date = from_date + leap
  64     return
  65
  66 def format_time(value):
  67     """Format an offset (in seconds) to a string
  68
  69     The offset should be an integer or float value.
  70
  71     >>> format_time(0)
  72     '00:00'
  73     >>> format_time(20)
  74     '00:20'
  75     >>> format_time(3600)
  76     '01:00:00'
  77     >>> format_time(10921)
  78     '03:02:01'
  79     """
  80     try:
  81         dt = datetime.utcfromtimestamp(value)
  82     except (ValueError, TypeError):
  83         return ''
  84
  85     if dt.hour == 0:
  86         return dt.strftime('%M:%S')
  87     else:
  88         return dt.strftime('%H:%M:%S')
  89
  90 def parse_time(value):
  91     """
  92     >>> parse_time(10)
  93     10
  94
  95     >>> parse_time('05:10') #5*60+10
  96     310
  97
  98     >>> parse_time('1:05:10') #60*60+5*60+10
  99     3910
 100     """
 101     if value is None:
 102         raise ValueError('None value in parse_time')
 103
 104     if isinstance(value, int):
 105         # Don't need to parse already-converted time value
 106         return value
 107
 108     if value == '':
 109         raise ValueError('Empty valueing in parse_time')
 110
 111     for format in ('%H:%M:%S', '%M:%S'):
 112         try:
 113             t = time.strptime(value, format)
 114             return t.tm_hour * 60*60 + t.tm_min * 60 + t.tm_sec
 115         except ValueError as e:
 116             continue
 117
 118     return int(value)
 119
 120
 121 def parse_bool(val):
 122     """
 123     >>> parse_bool('True')
 124     True
 125
 126     >>> parse_bool('true')
 127     True
 128
 129     >>> parse_bool('')
 130     False
 131     """
 132     if isinstance(val, bool):
 133         return val
 134     if val.lower() == 'true':
 135         return True
 136     return False
 137
 138
 139 def iterate_together(lists, key=lambda x: x, reverse=False):
 140     """
 141     takes ordered, possibly sparse, lists with similar items
 142     (some items have a corresponding item in the other lists, some don't).
 143
 144     It then yield tuples of corresponding items, where one element is None is
 145     there is no corresponding entry in one of the lists.
 146
 147     Tuples where both elements are None are skipped.
 148
 149     The results of the key method are used for the comparisons.
 150
 151     If reverse is True, the lists are expected to be sorted in reverse order
 152     and the results will also be sorted reverse
 153
 154     >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
 155     [(1, 1), (2, None), (None, 3)]
 156
 157     >>> list(iterate_together([[], []]))
 158     []
 159
 160     >>> list(iterate_together([range(1, 3), range(3, 5)]))
 161     [(1, None), (2, None), (None, 3), (None, 4)]
 162
 163     >>> list(iterate_together([range(1, 3), []]))
 164     [(1, None), (2, None)]
 165
 166     >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
 167     [(1, None), (3, 3)]
 168     """
 169
 170     Next = collections.namedtuple('Next', 'item more')
 171     min_ = min if not reverse else max
 172     lt_  = operator.lt if not reverse else operator.gt
 173
 174     lists = [iter(l) for l in lists]
 175
 176     def _take(it):
 177         try:
 178             i = next(it)
 179             while i is None:
 180                 i = next(it)
 181             return Next(i, True)
 182         except StopIteration:
 183             return Next(None, False)
 184
 185     def new_res():
 186         return [None]*len(lists)
 187
 188     # take first bunch of items
 189     items = [_take(l) for l in lists]
 190
 191     while any(i.item is not None or i.more for i in items):
 192
 193         res = new_res()
 194
 195         for n, item in enumerate(items):
 196
 197             if item.item is None:
 198                 continue
 199
 200             if all(x is None for x in res):
 201                 res[n] = item.item
 202                 continue
 203
 204             min_v = min_(filter(lambda x: x is not None, res), key=key)
 205
 206             if key(item.item) == key(min_v):
 207                 res[n] = item.item
 208
 209             elif lt_(key(item.item), key(min_v)):
 210                 res = new_res()
 211                 res[n] = item.item
 212
 213         for n, x in enumerate(res):
 214             if x is not None:
 215                 items[n] = _take(lists[n])
 216
 217         yield tuple(res)
 218
 219
 220 def progress(val, max_val, status_str='', max_width=50, stream=sys.stdout):
 221
 222     factor = float(val)/max_val if max_val > 0 else 0
 223
 224     # progress as percentage
 225     percentage_str = '{val:.2%}'.format(val=factor)
 226
 227     # progress bar filled with #s
 228     factor = min(int(factor*max_width), max_width)
 229     progress_str = '#' * factor + ' ' * (max_width-factor)
 230
 231     #insert percentage into bar
 232     percentage_start = int((max_width-len(percentage_str))/2)
 233     progress_str = progress_str[:percentage_start] + \
 234                    percentage_str + \
 235                    progress_str[percentage_start+len(percentage_str):]
 236
 237     print('\r', end=' ', file=stream)
 238     print('[ %s ] %s / %s | %s' % (
 239         progress_str,
 240         val,
 241         max_val,
 242         status_str), end=' ', file=stream)
 243     stream.flush()
 244
 245
 246 def set_cmp(list, simplify):
 247     """
 248     Builds a set out of a list but uses the results of simplify to determine equality between items
 249     """
 250     simpl = lambda x: (simplify(x), x)
 251     lst = dict(map(simpl, list))
 252     return list(lst.values())
 253
 254
 255 def first(it):
 256     """
 257     returns the first not-None object or None if the iterator is exhausted
 258     """
 259     for x in it:
 260         if x is not None:
 261             return x
 262     return None
 263
 264
 265 def intersect(a, b):
 266     return list(set(a) & set(b))
 267
 268
 269
 270 def remove_control_chars(s):
 271     all_chars = (chr(i) for i in range(0x110000))
 272     control_chars = ''.join(map(chr, list(range(0,32)) + list(range(127,160))))
 273     control_char_re = re.compile('[%s]' % re.escape(control_chars))
 274
 275     return control_char_re.sub('', s)
 276
 277
 278 def unzip(a):
 279     return tuple(map(list,zip(*a)))
 280
 281
 282 def parse_range(s, min, max, default=None):
 283     """
 284     Parses the string and returns its value. If the value is outside the given
 285     range, its closest number within the range is returned
 286
 287     >>> parse_range('5', 0, 10)
 288     5
 289
 290     >>> parse_range('0', 5.0, 10)
 291     5.0
 292
 293     >>> parse_range('15',0, 10)
 294     10
 295
 296     >>> parse_range('x', 0., 20)
 297     10.0
 298
 299     >>> parse_range('x', 0, 20, 20)
 300     20
 301     """
 302     out_type = type(min)
 303
 304     try:
 305         val = int(s)
 306         if val < min:
 307             return min
 308         if val > max:
 309             return max
 310         return val
 311
 312     except (ValueError, TypeError):
 313         return default if default is not None else out_type((max-min)/2)
 314
 315
 316
 317 def flatten(l):
 318     return [item for sublist in l for item in sublist]
 319
 320
 321 def linearize(key, iterators, reverse=False):
 322     """
 323     Linearizes a number of iterators, sorted by some comparison function
 324     """
 325
 326     iters = [iter(i) for i in iterators]
 327     vals = []
 328     for i in iters:
 329         try:
 330             v = next(i)
 331             vals. append( (v, i) )
 332         except StopIteration:
 333             continue
 334
 335     while vals:
 336         vals = sorted(vals, key=lambda x: key(x[0]), reverse=reverse)
 337         val, it = vals.pop(0)
 338         yield val
 339         try:
 340             next_val = next(it)
 341             vals.append( (next_val, it) )
 342         except StopIteration:
 343             pass
 344
 345
 346 def get_timestamp(datetime_obj):
 347     """ Returns the timestamp as an int for the given datetime object
 348
 349     >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
 350     1302168606
 351
 352     >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
 353     0
 354     """
 355     return int(time.mktime(datetime_obj.timetuple()))
 356
 357
 358
 359 re_url = re.compile('^https?://')
 360
 361 def is_url(string):
 362     """ Returns true if a string looks like an URL
 363
 364     >>> is_url('http://example.com/some-path/file.xml')
 365     True
 366
 367     >>> is_url('something else')
 368     False
 369     """
 370
 371     return bool(re_url.match(string))
 372
 373
 374
 375 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
 376 # this does not increase asymptotical complexity
 377 # but can still waste more time than it saves.
 378 def shortest_of(strings):
 379     return min(strings, key=len)
 380
 381 def longest_substr(strings):
 382     """
 383     Returns the longest common substring of the given strings
 384     """
 385
 386     substr = ""
 387     if not strings:
 388         return substr
 389     reference = shortest_of(strings)
 390     length = len(reference)
 391     #find a suitable slice i:j
 392     for i in range(length):
 393         #only consider strings long at least len(substr) + 1
 394         for j in range(i + len(substr) + 1, length):
 395             candidate = reference[i:j]
 396             if all(candidate in text for text in strings):
 397                 substr = candidate
 398     return substr
 399
 400
 401
 402 def additional_value(it, gen_val, val_changed=lambda _: True):
 403     """ Provides an additional value to the elements, calculated when needed
 404
 405     For the elements from the iterator, some additional value can be computed
 406     by gen_val (which might be an expensive computation).
 407
 408     If the elements in the iterator are ordered so that some subsequent
 409     elements would generate the same additional value, val_changed can be
 410     provided, which receives the next element from the iterator and the
 411     previous additional value. If the element would generate the same
 412     additional value (val_changed returns False), its computation is skipped.
 413
 414     >>> # get the next full hundred higher than x
 415     >>> # this will probably be an expensive calculation
 416     >>> next_hundred = lambda x: x + 100-(x % 100)
 417
 418     >>> # returns True if h is not the value that next_hundred(x) would provide
 419     >>> # this should be a relatively cheap calculation, compared to the above
 420     >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
 421
 422     >>> xs = [0, 50, 100, 101, 199, 200, 201]
 423     >>> list(additional_value(xs, next_hundred, diff_hundred))
 424     [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
 425     """
 426
 427     _none = object()
 428     current = _none
 429
 430     for x in it:
 431         if current is _none or val_changed(x, current):
 432             current = gen_val(x)
 433
 434         yield (x, current)
 435
 436
 437 def file_hash(f, h=hashlib.md5, block_size=2**20):
 438     """ returns the hash of the contents of a file """
 439     f_hash = h()
 440     for chunk in iter(lambda: f.read(block_size), ''):
 441         f_hash.update(chunk)
 442     return f_hash
 443
 444
 445
 446 def split_list(l, prop):
 447     """ split elements that satisfy a property, and those that don't """
 448     match   = list(filter(prop, l))
 449     nomatch = [x for x in l if x not in match]
 450     return match, nomatch
 451
 452
 453 def sorted_chain(links, key, reverse=False):
 454     """ Takes a list of iters can iterates over sorted elements
 455
 456     Each elment of links should be a tuple of (sort_key, iterator). The
 457     elements of each iterator should be sorted already. sort_key should
 458     indicate the key of the first element and needs to be comparable to the
 459     result of key(elem).
 460
 461     The function returns an iterator over the globally sorted element that
 462     ensures that as little iterators as possible are evaluated.  When
 463     evaluating """
 464
 465     # mixed_list initially contains all placeholders; later evaluated
 466     # elements (from the iterators) are mixed in
 467     mixed_list = [(k, link, True) for k, link in links]
 468
 469     while mixed_list:
 470         _, item, expand = mixed_list.pop(0)
 471
 472         # found an element (from an earlier expansion), yield it
 473         if not expand:
 474             yield item
 475             continue
 476
 477         # found an iter that needs to be expanded.
 478         # The iterator is fully consumed
 479         new_items = [(key(i), i, False) for i in item]
 480
 481         # sort links (placeholders) and elements together
 482         mixed_list = sorted(mixed_list + new_items, key=lambda t: t[0],
 483                 reverse=reverse)
 484
 485
 486 def url_add_authentication(url, username, password):
 487     """
 488     Adds authentication data (username, password) to a given
 489     URL in order to construct an authenticated URL.
 490
 491     >>> url_add_authentication('https://host.com/', '', None)
 492     'https://host.com/'
 493     >>> url_add_authentication('http://example.org/', None, None)
 494     'http://example.org/'
 495     >>> url_add_authentication('telnet://host.com/', 'foo', 'bar')
 496     'telnet://foo:bar@host.com/'
 497     >>> url_add_authentication('ftp://example.org', 'billy', None)
 498     'ftp://billy@example.org'
 499     >>> url_add_authentication('ftp://example.org', 'billy', '')
 500     'ftp://billy:@example.org'
 501     >>> url_add_authentication('http://localhost/x', 'aa', 'bc')
 502     'http://aa:bc@localhost/x'
 503     >>> url_add_authentication('http://blubb.lan/u.html', 'i/o', 'P@ss:')
 504     'http://i%2Fo:P@ss:@blubb.lan/u.html'
 505     >>> url_add_authentication('http://a:b@x.org/', 'c', 'd')
 506     'http://c:d@x.org/'
 507     >>> url_add_authentication('http://i%2F:P%40%3A@cx.lan', 'P@x', 'i/')
 508     'http://P@x:i%2F@cx.lan'
 509     >>> url_add_authentication('http://x.org/', 'a b', 'c d')
 510     'http://a%20b:c%20d@x.org/'
 511     """
 512     if username is None or username == '':
 513         return url
 514
 515     # Relaxations of the strict quoting rules (bug 1521):
 516     # 1. Accept '@' in username and password
 517     # 2. Acecpt ':' in password only
 518     username = urllib.parse.quote(username, safe='@')
 519
 520     if password is not None:
 521         password = urllib.parse.quote(password, safe='@:')
 522         auth_string = ':'.join((username, password))
 523     else:
 524         auth_string = username
 525
 526     url = url_strip_authentication(url)
 527
 528     url_parts = list(urllib.parse.urlsplit(url))
 529     # url_parts[1] is the HOST part of the URL
 530     url_parts[1] = '@'.join((auth_string, url_parts[1]))
 531
 532     return urllib.parse.urlunsplit(url_parts)
 533
 534
 535 def urlopen(url, headers=None, data=None):
 536     """
 537     An URL opener with the User-agent set to gPodder (with version)
 538     """
 539     username, password = username_password_from_url(url)
 540     if username is not None or password is not None:
 541         url = url_strip_authentication(url)
 542         password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
 543         password_mgr.add_password(None, url, username, password)
 544         handler = urllib.request.HTTPBasicAuthHandler(password_mgr)
 545         opener = urllib.request.build_opener(handler)
 546     else:
 547         opener = urllib.request.build_opener()
 548
 549     if headers is None:
 550         headers = {}
 551     else:
 552         headers = dict(headers)
 553
 554     headers.update({'User-agent': settings.USER_AGENT})
 555     request = urllib.request.Request(url, data=data, headers=headers)
 556     return opener.open(request)
 557
 558
 559
 560 def username_password_from_url(url):
 561     r"""
 562     Returns a tuple (username,password) containing authentication
 563     data from the specified URL or (None,None) if no authentication
 564     data can be found in the URL.
 565
 566     See Section 3.1 of RFC 1738 (http://www.ietf.org/rfc/rfc1738.txt)
 567
 568     >>> username_password_from_url('https://@host.com/')
 569     ('', None)
 570     >>> username_password_from_url('telnet://host.com/')
 571     (None, None)
 572     >>> username_password_from_url('ftp://foo:@host.com/')
 573     ('foo', '')
 574     >>> username_password_from_url('http://a:b@host.com/')
 575     ('a', 'b')
 576     >>> username_password_from_url(1)
 577     Traceback (most recent call last):
 578       ...
 579     ValueError: URL has to be a string or unicode object.
 580     >>> username_password_from_url(None)
 581     Traceback (most recent call last):
 582       ...
 583     ValueError: URL has to be a string or unicode object.
 584     >>> username_password_from_url('http://a@b:c@host.com/')
 585     ('a@b', 'c')
 586     >>> username_password_from_url('ftp://a:b:c@host.com/')
 587     ('a', 'b:c')
 588     >>> username_password_from_url('http://i%2Fo:P%40ss%3A@host.com/')
 589     ('i/o', 'P@ss:')
 590     >>> username_password_from_url('ftp://%C3%B6sterreich@host.com/')
 591     ('österreich', None)
 592     >>> username_password_from_url('http://w%20x:y%20z@example.org/')
 593     ('w x', 'y z')
 594     >>> username_password_from_url('http://example.com/x@y:z@test.com/')
 595     (None, None)
 596     """
 597     if type(url) not in (str, str):
 598         raise ValueError('URL has to be a string or unicode object.')
 599
 600     (username, password) = (None, None)
 601
 602     (scheme, netloc, path, params, query, fragment) = urllib.parse.urlparse(url)
 603
 604     if '@' in netloc:
 605         (authentication, netloc) = netloc.rsplit('@', 1)
 606         if ':' in authentication:
 607             (username, password) = authentication.split(':', 1)
 608
 609             # RFC1738 dictates that we should not allow ['/', '@', ':']
 610             # characters in the username and password field (Section 3.1):
 611             #
 612             # 1. The "/" can't be in there at this point because of the way
 613             #    urlparse (which we use above) works.
 614             # 2. Due to gPodder bug 1521, we allow "@" in the username and
 615             #    password field. We use netloc.rsplit('@', 1), which will
 616             #    make sure that we split it at the last '@' in netloc.
 617             # 3. The colon must be excluded (RFC2617, Section 2) in the
 618             #    username, but is apparently allowed in the password. This
 619             #    is handled by the authentication.split(':', 1) above, and
 620             #    will cause any extraneous ':'s to be part of the password.
 621
 622             username = urllib.parse.unquote(username)
 623             password = urllib.parse.unquote(password)
 624         else:
 625             username = urllib.parse.unquote(authentication)
 626
 627     return (username, password)
 628
 629
 630 def url_strip_authentication(url):
 631     """
 632     Strips authentication data from an URL. Returns the URL with
 633     the authentication data removed from it.
 634
 635     >>> url_strip_authentication('https://host.com/')
 636     'https://host.com/'
 637     >>> url_strip_authentication('telnet://foo:bar@host.com/')
 638     'telnet://host.com/'
 639     >>> url_strip_authentication('ftp://billy@example.org')
 640     'ftp://example.org'
 641     >>> url_strip_authentication('ftp://billy:@example.org')
 642     'ftp://example.org'
 643     >>> url_strip_authentication('http://aa:bc@localhost/x')
 644     'http://localhost/x'
 645     >>> url_strip_authentication('http://i%2Fo:P%40ss%3A@blubb.lan/u.html')
 646     'http://blubb.lan/u.html'
 647     >>> url_strip_authentication('http://c:d@x.org/')
 648     'http://x.org/'
 649     >>> url_strip_authentication('http://P%40%3A:i%2F@cx.lan')
 650     'http://cx.lan'
 651     >>> url_strip_authentication('http://x@x.com:s3cret@example.com/')
 652     'http://example.com/'
 653     """
 654     url_parts = list(urllib.parse.urlsplit(url))
 655     # url_parts[1] is the HOST part of the URL
 656
 657     # Remove existing authentication data
 658     if '@' in url_parts[1]:
 659         url_parts[1] = url_parts[1].rsplit('@', 1)[1]
 660
 661     return urllib.parse.urlunsplit(url_parts)
 662
 663
 664 # Native filesystem encoding detection
 665 encoding = sys.getfilesystemencoding()
 666
 667
 668 def get_git_head():
 669     """ returns the commit and message of the current git HEAD """
 670
 671     try:
 672         pr = subprocess.Popen('/usr/bin/git log -n 1 --oneline'.split(),
 673             cwd = settings.BASE_DIR,
 674             stdout = subprocess.PIPE,
 675             stderr = subprocess.PIPE,
 676         )
 677
 678     except OSError:
 679         return None, None
 680
 681     (out, err) = pr.communicate()
 682     if err:
 683         return None, None
 684
 685     outs = out.split()
 686     commit = outs[0]
 687     msg = ' ' .join(outs[1:])
 688     return commit, msg
 689
 690
 691 def parse_request_body(request):
 692     """ returns the parsed request body, handles gzip encoding """
 693
 694     raw_body = request.body
 695     content_enc = request.META.get('HTTP_CONTENT_ENCODING')
 696
 697     if content_enc == 'gzip':
 698         raw_body = zlib.decompress(raw_body)
 699
 700     return json.loads(raw_body)
 701
 702
 703 def normalize_feed_url(url):
 704     """
 705     Converts any URL to http:// or ftp:// so that it can be
 706     used with "wget". If the URL cannot be converted (invalid
 707     or unknown scheme), "None" is returned.
 708
 709     This will also normalize feed:// and itpc:// to http://.
 710
 711     >>> normalize_feed_url('itpc://example.org/podcast.rss')
 712     'http://example.org/podcast.rss'
 713
 714     If no URL scheme is defined (e.g. "curry.com"), we will
 715     simply assume the user intends to add a http:// feed.
 716
 717     >>> normalize_feed_url('curry.com')
 718     'http://curry.com/'
 719
 720     There are even some more shortcuts for advanced users
 721     and lazy typists (see the source for details).
 722
 723     >>> normalize_feed_url('fb:43FPodcast')
 724     'http://feeds.feedburner.com/43FPodcast'
 725
 726     It will also take care of converting the domain name to
 727     all-lowercase (because domains are not case sensitive):
 728
 729     >>> normalize_feed_url('http://Example.COM/')
 730     'http://example.com/'
 731
 732     Some other minimalistic changes are also taken care of,
 733     e.g. a ? with an empty query is removed:
 734
 735     >>> normalize_feed_url('http://example.org/test?')
 736     'http://example.org/test'
 737
 738     Leading and trailing whitespace is removed
 739
 740     >>> normalize_feed_url(' http://example.com/podcast.rss ')
 741     'http://example.com/podcast.rss'
 742
 743     HTTP Authentication is removed to protect users' privacy
 744
 745     >>> normalize_feed_url('http://a@b:c@host.com/')
 746     'http://host.com/'
 747     >>> normalize_feed_url('ftp://a:b:c@host.com/')
 748     'ftp://host.com/'
 749     >>> normalize_feed_url('http://i%2Fo:P%40ss%3A@host.com/')
 750     'http://host.com/'
 751     >>> normalize_feed_url('ftp://%C3%B6sterreich@host.com/')
 752     'ftp://host.com/'
 753     >>> normalize_feed_url('http://w%20x:y%20z@example.org/')
 754     'http://example.org/'
 755     >>> normalize_feed_url('http://example.com/x@y:z@test.com/')
 756     'http://example.com/x%40y%3Az%40test.com/'
 757     >>> normalize_feed_url('http://en.wikipedia.org/wiki/Ä')
 758     'http://en.wikipedia.org/wiki/%C3%84'
 759     >>> normalize_feed_url('http://en.wikipedia.org/w/index.php?title=Ä&action=edit')
 760     'http://en.wikipedia.org/w/index.php?title=%C3%84&action=edit'
 761     """
 762     url = url.strip()
 763     if not url or len(url) < 8:
 764         return None
 765
 766     # This is a list of prefixes that you can use to minimize the amount of
 767     # keystrokes that you have to use.
 768     # Feel free to suggest other useful prefixes, and I'll add them here.
 769     PREFIXES = {
 770             'fb:': 'http://feeds.feedburner.com/%s',
 771             'yt:': 'http://www.youtube.com/rss/user/%s/videos.rss',
 772             'sc:': 'http://soundcloud.com/%s',
 773             'fm4od:': 'http://onapp1.orf.at/webcam/fm4/fod/%s.xspf',
 774             # YouTube playlists. To get a list of playlists per-user, use:
 775             # https://gdata.youtube.com/feeds/api/users/<username>/playlists
 776             'ytpl:': 'http://gdata.youtube.com/feeds/api/playlists/%s',
 777     }
 778
 779     for prefix, expansion in PREFIXES.items():
 780         if url.startswith(prefix):
 781             url = expansion % (url[len(prefix):],)
 782             break
 783
 784     # Assume HTTP for URLs without scheme
 785     if not '://' in url:
 786         url = 'http://' + url
 787
 788     scheme, netloc, path, query, fragment = urllib.parse.urlsplit(url)
 789
 790     # Schemes and domain names are case insensitive
 791     scheme, netloc = scheme.lower(), netloc.lower()
 792
 793     # encode non-encoded characters
 794     path = urllib.parse.quote(path, '/%')
 795     query = urllib.parse.quote_plus(query, ':&=')
 796
 797     # Remove authentication to protect users' privacy
 798     netloc = netloc.rsplit('@', 1)[-1]
 799
 800     # Normalize empty paths to "/"
 801     if path == '':
 802         path = '/'
 803
 804     # feed://, itpc:// and itms:// are really http://
 805     if scheme in ('feed', 'itpc', 'itms'):
 806         scheme = 'http'
 807
 808     if scheme not in ('http', 'https', 'ftp', 'file'):
 809         return None
 810
 811     # urlunsplit might return "a slighty different, but equivalent URL"
 812     return urllib.parse.urlunsplit((scheme, netloc, path, query, fragment))
 813
 814
 815 def partition(items, predicate=bool):
 816     a, b = itertools.tee((predicate(item), item) for item in items)
 817     return ((item for pred, item in a if not pred),
 818             (item for pred, item in b if pred))
 819
 820
 821 def split_quoted(s):
 822     """ Splits a quoted string
 823
 824     >>> split_quoted('some "quoted text"') == ['some', 'quoted text']
 825     True
 826
 827     >>> split_quoted('"quoted text') == ['quoted', 'text']
 828     True
 829
 830     # 4 quotes here are 2 in the doctest is one in the actual string
 831     >>> split_quoted('text\\\\') == ['text']
 832     True
 833     """
 834
 835     try:
 836         # split by whitespace, preserve quoted substrings
 837         keywords = shlex.split(s)
 838
 839     except ValueError:
 840         # No closing quotation (eg '"text')
 841         # No escaped character (eg '\')
 842         s = s.replace('"', '').replace("'", '').replace('\\', '')
 843         keywords = shlex.split(s)
 844
 845     return keywords
 846
 847
 848 def edit_link(obj):
 849     """ Return the link to the Django Admin Edit page """
 850     return reverse('admin:%s_%s_change' % (obj._meta.app_label,
 851                                            obj._meta.model_name),
 852                    args=(obj.pk,))
 853
 854
 855 def random_token(length=32):
 856     import random
 857     import string
 858     return "".join(random.sample(string.ascii_letters+string.digits, length))
 859
 860
 861 def to_maxlength(cls, field, val):
 862     """ Cut val to the maximum length of cls's field """
 863     if val is None:
 864         return None
 865
 866     max_length = cls._meta.get_field(field).max_length
 867     orig_length = len(val)
 868     if orig_length > max_length:
 869         val = val[:max_length]
 870         logger.warn('%s.%s length reduced from %d to %d',
 871                     cls.__name__, field, orig_length, max_length)
 872
 873     return val
 874
 875
 876 def get_domain(url):
 877     """ Returns the domain name of a URL
 878
 879     >>> get_domain('http://example.com')
 880     'example.com'
 881
 882     >>> get_domain('https://example.com:80/my-podcast/feed.rss')
 883     'example.com'
 884     """
 885     netloc = urllib.parse.urlparse(url).netloc
 886     try:
 887         port_idx = netloc.index(':')
 888         return netloc[:port_idx]
 889
 890     except ValueError:
 891         return netloc
 892
 893
 894 def set_ordered_entries(obj, new_entries, existing, EntryClass,
 895                         value_name, parent_name):
 896     """ Update the object's entries to the given list
 897
 898     'new_entries' should be a list of objects that are later wrapped in
 899     EntryClass instances. 'value_name' is the name of the EntryClass property
 900     that contains the values; 'parent_name' is the one that references obj.
 901
 902     Entries that do not exist are created. Existing entries that are not in
 903     'new_entries' are deleted. """
 904
 905     logger.info('%d existing entries', len(existing))
 906
 907     logger.info('%d new entries', len(new_entries))
 908
 909     with transaction.atomic():
 910         max_order = max([s.order for s in existing.values()] +
 911                         [len(new_entries)])
 912         logger.info('Renumbering entries starting from %d', max_order+1)
 913         for n, entry in enumerate(existing.values(), max_order+1):
 914             entry.order = n
 915             entry.save()
 916
 917     logger.info('%d existing entries', len(existing))
 918
 919     for n, entry in enumerate(new_entries):
 920         try:
 921             e = existing.pop(entry)
 922             logger.info('Updating existing entry %d: %s', n, entry)
 923             e.order = n
 924             e.save()
 925         except KeyError:
 926             logger.info('Creating new entry %d: %s', n, entry)
 927             try:
 928                 links = {
 929                     value_name: entry,
 930                     parent_name: obj,
 931                 }
 932                 from mygpo.podcasts.models import ScopedModel
 933                 if issubclass(EntryClass, ScopedModel):
 934                     links['scope'] = obj.scope
 935
 936                 EntryClass.objects.create(order=n, **links)
 937             except IntegrityError as ie:
 938                 logger.warn('Could not create enry for %s: %s', obj, ie)
 939
 940     with transaction.atomic():
 941         delete = [s.pk for s in existing.values()]
 942         logger.info('Deleting %d entries', len(delete))
 943         EntryClass.objects.filter(id__in=delete).delete()