mygpo/utils.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of my.gpodder.org.
   4 #
   5 # my.gpodder.org is free software: you can redistribute it and/or modify it
   6 # under the terms of the GNU Affero General Public License as published by
   7 # the Free Software Foundation, either version 3 of the License, or (at your
   8 # option) any later version.
   9 #
  10 # my.gpodder.org is distributed in the hope that it will be useful, but
  11 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  12 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
  13 # License for more details.
  14 #
  15 # You should have received a copy of the GNU Affero General Public License
  16 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
  17 #
  18
  19 import json
  20 import functools
  21 import types
  22 import subprocess
  23 import os
  24 import operator
  25 import sys
  26 import re
  27 import collections
  28 import itertools
  29 from datetime import datetime, timedelta, date
  30 import time
  31 import hashlib
  32 import urllib.parse
  33 import urllib.request, urllib.parse, urllib.error
  34 import urllib.request, urllib.error, urllib.parse
  35 import zlib
  36 import shlex
  37
  38 from django.db import transaction, IntegrityError
  39 from django.conf import settings
  40 from django.core.urlresolvers import reverse
  41
  42 import logging
  43 logger = logging.getLogger(__name__)
  44
  45
  46 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
  47     """
  48     >>> from_d = datetime(2010, 1, 1)
  49     >>> to_d = datetime(2010, 1, 5)
  50     >>> list(daterange(from_d, to_d))
  51     [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
  52     """
  53
  54     if to_date is None:
  55         if isinstance(from_date, datetime):
  56             to_date = datetime.utcnow()
  57         else:
  58             to_date = date.today()
  59
  60     while from_date <= to_date:
  61         yield from_date
  62         from_date = from_date + leap
  63     return
  64
  65 def format_time(value):
  66     """Format an offset (in seconds) to a string
  67
  68     The offset should be an integer or float value.
  69
  70     >>> format_time(0)
  71     '00:00'
  72     >>> format_time(20)
  73     '00:20'
  74     >>> format_time(3600)
  75     '01:00:00'
  76     >>> format_time(10921)
  77     '03:02:01'
  78     """
  79     try:
  80         dt = datetime.utcfromtimestamp(value)
  81     except (ValueError, TypeError):
  82         return ''
  83
  84     if dt.hour == 0:
  85         return dt.strftime('%M:%S')
  86     else:
  87         return dt.strftime('%H:%M:%S')
  88
  89 def parse_time(value):
  90     """
  91     >>> parse_time(10)
  92     10
  93
  94     >>> parse_time('05:10') #5*60+10
  95     310
  96
  97     >>> parse_time('1:05:10') #60*60+5*60+10
  98     3910
  99     """
 100     if value is None:
 101         raise ValueError('None value in parse_time')
 102
 103     if isinstance(value, int):
 104         # Don't need to parse already-converted time value
 105         return value
 106
 107     if value == '':
 108         raise ValueError('Empty valueing in parse_time')
 109
 110     for format in ('%H:%M:%S', '%M:%S'):
 111         try:
 112             t = time.strptime(value, format)
 113             return t.tm_hour * 60*60 + t.tm_min * 60 + t.tm_sec
 114         except ValueError as e:
 115             continue
 116
 117     return int(value)
 118
 119
 120 def parse_bool(val):
 121     """
 122     >>> parse_bool('True')
 123     True
 124
 125     >>> parse_bool('true')
 126     True
 127
 128     >>> parse_bool('')
 129     False
 130     """
 131     if isinstance(val, bool):
 132         return val
 133     if val.lower() == 'true':
 134         return True
 135     return False
 136
 137
 138 def iterate_together(lists, key=lambda x: x, reverse=False):
 139     """
 140     takes ordered, possibly sparse, lists with similar items
 141     (some items have a corresponding item in the other lists, some don't).
 142
 143     It then yield tuples of corresponding items, where one element is None is
 144     there is no corresponding entry in one of the lists.
 145
 146     Tuples where both elements are None are skipped.
 147
 148     The results of the key method are used for the comparisons.
 149
 150     If reverse is True, the lists are expected to be sorted in reverse order
 151     and the results will also be sorted reverse
 152
 153     >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
 154     [(1, 1), (2, None), (None, 3)]
 155
 156     >>> list(iterate_together([[], []]))
 157     []
 158
 159     >>> list(iterate_together([range(1, 3), range(3, 5)]))
 160     [(1, None), (2, None), (None, 3), (None, 4)]
 161
 162     >>> list(iterate_together([range(1, 3), []]))
 163     [(1, None), (2, None)]
 164
 165     >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
 166     [(1, None), (3, 3)]
 167     """
 168
 169     Next = collections.namedtuple('Next', 'item more')
 170     min_ = min if not reverse else max
 171     lt_  = operator.lt if not reverse else operator.gt
 172
 173     lists = [iter(l) for l in lists]
 174
 175     def _take(it):
 176         try:
 177             i = next(it)
 178             while i is None:
 179                 i = next(it)
 180             return Next(i, True)
 181         except StopIteration:
 182             return Next(None, False)
 183
 184     def new_res():
 185         return [None]*len(lists)
 186
 187     # take first bunch of items
 188     items = [_take(l) for l in lists]
 189
 190     while any(i.item is not None or i.more for i in items):
 191
 192         res = new_res()
 193
 194         for n, item in enumerate(items):
 195
 196             if item.item is None:
 197                 continue
 198
 199             if all(x is None for x in res):
 200                 res[n] = item.item
 201                 continue
 202
 203             min_v = min_(filter(lambda x: x is not None, res), key=key)
 204
 205             if key(item.item) == key(min_v):
 206                 res[n] = item.item
 207
 208             elif lt_(key(item.item), key(min_v)):
 209                 res = new_res()
 210                 res[n] = item.item
 211
 212         for n, x in enumerate(res):
 213             if x is not None:
 214                 items[n] = _take(lists[n])
 215
 216         yield tuple(res)
 217
 218
 219 def progress(val, max_val, status_str='', max_width=50, stream=sys.stdout):
 220
 221     factor = float(val)/max_val if max_val > 0 else 0
 222
 223     # progress as percentage
 224     percentage_str = '{val:.2%}'.format(val=factor)
 225
 226     # progress bar filled with #s
 227     factor = min(int(factor*max_width), max_width)
 228     progress_str = '#' * factor + ' ' * (max_width-factor)
 229
 230     #insert percentage into bar
 231     percentage_start = int((max_width-len(percentage_str))/2)
 232     progress_str = progress_str[:percentage_start] + \
 233                    percentage_str + \
 234                    progress_str[percentage_start+len(percentage_str):]
 235
 236     print('\r', end=' ', file=stream)
 237     print('[ %s ] %s / %s | %s' % (
 238         progress_str,
 239         val,
 240         max_val,
 241         status_str), end=' ', file=stream)
 242     stream.flush()
 243
 244
 245 def set_cmp(list, simplify):
 246     """
 247     Builds a set out of a list but uses the results of simplify to determine equality between items
 248     """
 249     simpl = lambda x: (simplify(x), x)
 250     lst = dict(map(simpl, list))
 251     return list(lst.values())
 252
 253
 254 def first(it):
 255     """
 256     returns the first not-None object or None if the iterator is exhausted
 257     """
 258     for x in it:
 259         if x is not None:
 260             return x
 261     return None
 262
 263
 264 def intersect(a, b):
 265     return list(set(a) & set(b))
 266
 267
 268
 269 def remove_control_chars(s):
 270     all_chars = (chr(i) for i in range(0x110000))
 271     control_chars = ''.join(map(chr, list(range(0,32)) + list(range(127,160))))
 272     control_char_re = re.compile('[%s]' % re.escape(control_chars))
 273
 274     return control_char_re.sub('', s)
 275
 276
 277 def unzip(a):
 278     return tuple(map(list,zip(*a)))
 279
 280
 281 def parse_range(s, min, max, default=None):
 282     """
 283     Parses the string and returns its value. If the value is outside the given
 284     range, its closest number within the range is returned
 285
 286     >>> parse_range('5', 0, 10)
 287     5
 288
 289     >>> parse_range('0', 5.0, 10)
 290     5.0
 291
 292     >>> parse_range('15',0, 10)
 293     10
 294
 295     >>> parse_range('x', 0., 20)
 296     10.0
 297
 298     >>> parse_range('x', 0, 20, 20)
 299     20
 300     """
 301     out_type = type(min)
 302
 303     try:
 304         val = int(s)
 305         if val < min:
 306             return min
 307         if val > max:
 308             return max
 309         return val
 310
 311     except (ValueError, TypeError):
 312         return default if default is not None else out_type((max-min)/2)
 313
 314
 315
 316 def flatten(l):
 317     return [item for sublist in l for item in sublist]
 318
 319
 320 def linearize(key, iterators, reverse=False):
 321     """
 322     Linearizes a number of iterators, sorted by some comparison function
 323     """
 324
 325     iters = [iter(i) for i in iterators]
 326     vals = []
 327     for i in iters:
 328         try:
 329             v = next(i)
 330             vals. append( (v, i) )
 331         except StopIteration:
 332             continue
 333
 334     while vals:
 335         vals = sorted(vals, key=lambda x: key(x[0]), reverse=reverse)
 336         val, it = vals.pop(0)
 337         yield val
 338         try:
 339             next_val = next(it)
 340             vals.append( (next_val, it) )
 341         except StopIteration:
 342             pass
 343
 344
 345 def get_timestamp(datetime_obj):
 346     """ Returns the timestamp as an int for the given datetime object
 347
 348     >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
 349     1302168606
 350
 351     >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
 352     0
 353     """
 354     return int(time.mktime(datetime_obj.timetuple()))
 355
 356
 357
 358 re_url = re.compile('^https?://')
 359
 360 def is_url(string):
 361     """ Returns true if a string looks like an URL
 362
 363     >>> is_url('http://example.com/some-path/file.xml')
 364     True
 365
 366     >>> is_url('something else')
 367     False
 368     """
 369
 370     return bool(re_url.match(string))
 371
 372
 373
 374 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
 375 # this does not increase asymptotical complexity
 376 # but can still waste more time than it saves.
 377 def shortest_of(strings):
 378     return min(strings, key=len)
 379
 380 def longest_substr(strings):
 381     """
 382     Returns the longest common substring of the given strings
 383     """
 384
 385     substr = ""
 386     if not strings:
 387         return substr
 388     reference = shortest_of(strings)
 389     length = len(reference)
 390     #find a suitable slice i:j
 391     for i in range(length):
 392         #only consider strings long at least len(substr) + 1
 393         for j in range(i + len(substr) + 1, length):
 394             candidate = reference[i:j]
 395             if all(candidate in text for text in strings):
 396                 substr = candidate
 397     return substr
 398
 399
 400
 401 def additional_value(it, gen_val, val_changed=lambda _: True):
 402     """ Provides an additional value to the elements, calculated when needed
 403
 404     For the elements from the iterator, some additional value can be computed
 405     by gen_val (which might be an expensive computation).
 406
 407     If the elements in the iterator are ordered so that some subsequent
 408     elements would generate the same additional value, val_changed can be
 409     provided, which receives the next element from the iterator and the
 410     previous additional value. If the element would generate the same
 411     additional value (val_changed returns False), its computation is skipped.
 412
 413     >>> # get the next full hundred higher than x
 414     >>> # this will probably be an expensive calculation
 415     >>> next_hundred = lambda x: x + 100-(x % 100)
 416
 417     >>> # returns True if h is not the value that next_hundred(x) would provide
 418     >>> # this should be a relatively cheap calculation, compared to the above
 419     >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
 420
 421     >>> xs = [0, 50, 100, 101, 199, 200, 201]
 422     >>> list(additional_value(xs, next_hundred, diff_hundred))
 423     [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
 424     """
 425
 426     _none = object()
 427     current = _none
 428
 429     for x in it:
 430         if current is _none or val_changed(x, current):
 431             current = gen_val(x)
 432
 433         yield (x, current)
 434
 435
 436 def file_hash(f, h=hashlib.md5, block_size=2**20):
 437     """ returns the hash of the contents of a file """
 438     f_hash = h()
 439     while True:
 440         buf = f.read(block_size)
 441         if not buf:
 442             break
 443         f_hash.update( buf )
 444
 445     return f_hash
 446
 447
 448 def split_list(l, prop):
 449     """ split elements that satisfy a property, and those that don't """
 450     match   = list(filter(prop, l))
 451     nomatch = [x for x in l if x not in match]
 452     return match, nomatch
 453
 454
 455 def sorted_chain(links, key, reverse=False):
 456     """ Takes a list of iters can iterates over sorted elements
 457
 458     Each elment of links should be a tuple of (sort_key, iterator). The
 459     elements of each iterator should be sorted already. sort_key should
 460     indicate the key of the first element and needs to be comparable to the
 461     result of key(elem).
 462
 463     The function returns an iterator over the globally sorted element that
 464     ensures that as little iterators as possible are evaluated.  When
 465     evaluating """
 466
 467     # mixed_list initially contains all placeholders; later evaluated
 468     # elements (from the iterators) are mixed in
 469     mixed_list = [(k, link, True) for k, link in links]
 470
 471     while mixed_list:
 472         _, item, expand = mixed_list.pop(0)
 473
 474         # found an element (from an earlier expansion), yield it
 475         if not expand:
 476             yield item
 477             continue
 478
 479         # found an iter that needs to be expanded.
 480         # The iterator is fully consumed
 481         new_items = [(key(i), i, False) for i in item]
 482
 483         # sort links (placeholders) and elements together
 484         mixed_list = sorted(mixed_list + new_items, key=lambda t: t[0],
 485                 reverse=reverse)
 486
 487
 488 def url_add_authentication(url, username, password):
 489     """
 490     Adds authentication data (username, password) to a given
 491     URL in order to construct an authenticated URL.
 492
 493     >>> url_add_authentication('https://host.com/', '', None)
 494     'https://host.com/'
 495     >>> url_add_authentication('http://example.org/', None, None)
 496     'http://example.org/'
 497     >>> url_add_authentication('telnet://host.com/', 'foo', 'bar')
 498     'telnet://foo:bar@host.com/'
 499     >>> url_add_authentication('ftp://example.org', 'billy', None)
 500     'ftp://billy@example.org'
 501     >>> url_add_authentication('ftp://example.org', 'billy', '')
 502     'ftp://billy:@example.org'
 503     >>> url_add_authentication('http://localhost/x', 'aa', 'bc')
 504     'http://aa:bc@localhost/x'
 505     >>> url_add_authentication('http://blubb.lan/u.html', 'i/o', 'P@ss:')
 506     'http://i%2Fo:P@ss:@blubb.lan/u.html'
 507     >>> url_add_authentication('http://a:b@x.org/', 'c', 'd')
 508     'http://c:d@x.org/'
 509     >>> url_add_authentication('http://i%2F:P%40%3A@cx.lan', 'P@x', 'i/')
 510     'http://P@x:i%2F@cx.lan'
 511     >>> url_add_authentication('http://x.org/', 'a b', 'c d')
 512     'http://a%20b:c%20d@x.org/'
 513     """
 514     if username is None or username == '':
 515         return url
 516
 517     # Relaxations of the strict quoting rules (bug 1521):
 518     # 1. Accept '@' in username and password
 519     # 2. Acecpt ':' in password only
 520     username = urllib.parse.quote(username, safe='@')
 521
 522     if password is not None:
 523         password = urllib.parse.quote(password, safe='@:')
 524         auth_string = ':'.join((username, password))
 525     else:
 526         auth_string = username
 527
 528     url = url_strip_authentication(url)
 529
 530     url_parts = list(urllib.parse.urlsplit(url))
 531     # url_parts[1] is the HOST part of the URL
 532     url_parts[1] = '@'.join((auth_string, url_parts[1]))
 533
 534     return urllib.parse.urlunsplit(url_parts)
 535
 536
 537 def urlopen(url, headers=None, data=None):
 538     """
 539     An URL opener with the User-agent set to gPodder (with version)
 540     """
 541     username, password = username_password_from_url(url)
 542     if username is not None or password is not None:
 543         url = url_strip_authentication(url)
 544         password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
 545         password_mgr.add_password(None, url, username, password)
 546         handler = urllib.request.HTTPBasicAuthHandler(password_mgr)
 547         opener = urllib.request.build_opener(handler)
 548     else:
 549         opener = urllib.request.build_opener()
 550
 551     if headers is None:
 552         headers = {}
 553     else:
 554         headers = dict(headers)
 555
 556     headers.update({'User-agent': settings.USER_AGENT})
 557     request = urllib.request.Request(url, data=data, headers=headers)
 558     return opener.open(request)
 559
 560
 561
 562 def username_password_from_url(url):
 563     r"""
 564     Returns a tuple (username,password) containing authentication
 565     data from the specified URL or (None,None) if no authentication
 566     data can be found in the URL.
 567
 568     See Section 3.1 of RFC 1738 (http://www.ietf.org/rfc/rfc1738.txt)
 569
 570     >>> username_password_from_url('https://@host.com/')
 571     ('', None)
 572     >>> username_password_from_url('telnet://host.com/')
 573     (None, None)
 574     >>> username_password_from_url('ftp://foo:@host.com/')
 575     ('foo', '')
 576     >>> username_password_from_url('http://a:b@host.com/')
 577     ('a', 'b')
 578     >>> username_password_from_url(1)
 579     Traceback (most recent call last):
 580       ...
 581     ValueError: URL has to be a string or unicode object.
 582     >>> username_password_from_url(None)
 583     Traceback (most recent call last):
 584       ...
 585     ValueError: URL has to be a string or unicode object.
 586     >>> username_password_from_url('http://a@b:c@host.com/')
 587     ('a@b', 'c')
 588     >>> username_password_from_url('ftp://a:b:c@host.com/')
 589     ('a', 'b:c')
 590     >>> username_password_from_url('http://i%2Fo:P%40ss%3A@host.com/')
 591     ('i/o', 'P@ss:')
 592     >>> username_password_from_url('ftp://%C3%B6sterreich@host.com/')
 593     ('österreich', None)
 594     >>> username_password_from_url('http://w%20x:y%20z@example.org/')
 595     ('w x', 'y z')
 596     >>> username_password_from_url('http://example.com/x@y:z@test.com/')
 597     (None, None)
 598     """
 599     if type(url) not in (str, str):
 600         raise ValueError('URL has to be a string or unicode object.')
 601
 602     (username, password) = (None, None)
 603
 604     (scheme, netloc, path, params, query, fragment) = urllib.parse.urlparse(url)
 605
 606     if '@' in netloc:
 607         (authentication, netloc) = netloc.rsplit('@', 1)
 608         if ':' in authentication:
 609             (username, password) = authentication.split(':', 1)
 610
 611             # RFC1738 dictates that we should not allow ['/', '@', ':']
 612             # characters in the username and password field (Section 3.1):
 613             #
 614             # 1. The "/" can't be in there at this point because of the way
 615             #    urlparse (which we use above) works.
 616             # 2. Due to gPodder bug 1521, we allow "@" in the username and
 617             #    password field. We use netloc.rsplit('@', 1), which will
 618             #    make sure that we split it at the last '@' in netloc.
 619             # 3. The colon must be excluded (RFC2617, Section 2) in the
 620             #    username, but is apparently allowed in the password. This
 621             #    is handled by the authentication.split(':', 1) above, and
 622             #    will cause any extraneous ':'s to be part of the password.
 623
 624             username = urllib.parse.unquote(username)
 625             password = urllib.parse.unquote(password)
 626         else:
 627             username = urllib.parse.unquote(authentication)
 628
 629     return (username, password)
 630
 631
 632 def url_strip_authentication(url):
 633     """
 634     Strips authentication data from an URL. Returns the URL with
 635     the authentication data removed from it.
 636
 637     >>> url_strip_authentication('https://host.com/')
 638     'https://host.com/'
 639     >>> url_strip_authentication('telnet://foo:bar@host.com/')
 640     'telnet://host.com/'
 641     >>> url_strip_authentication('ftp://billy@example.org')
 642     'ftp://example.org'
 643     >>> url_strip_authentication('ftp://billy:@example.org')
 644     'ftp://example.org'
 645     >>> url_strip_authentication('http://aa:bc@localhost/x')
 646     'http://localhost/x'
 647     >>> url_strip_authentication('http://i%2Fo:P%40ss%3A@blubb.lan/u.html')
 648     'http://blubb.lan/u.html'
 649     >>> url_strip_authentication('http://c:d@x.org/')
 650     'http://x.org/'
 651     >>> url_strip_authentication('http://P%40%3A:i%2F@cx.lan')
 652     'http://cx.lan'
 653     >>> url_strip_authentication('http://x@x.com:s3cret@example.com/')
 654     'http://example.com/'
 655     """
 656     url_parts = list(urllib.parse.urlsplit(url))
 657     # url_parts[1] is the HOST part of the URL
 658
 659     # Remove existing authentication data
 660     if '@' in url_parts[1]:
 661         url_parts[1] = url_parts[1].rsplit('@', 1)[1]
 662
 663     return urllib.parse.urlunsplit(url_parts)
 664
 665
 666 # Native filesystem encoding detection
 667 encoding = sys.getfilesystemencoding()
 668
 669
 670 def get_git_head():
 671     """ returns the commit and message of the current git HEAD """
 672
 673     try:
 674         pr = subprocess.Popen('/usr/bin/git log -n 1 --oneline'.split(),
 675             cwd = settings.BASE_DIR,
 676             stdout = subprocess.PIPE,
 677             stderr = subprocess.PIPE,
 678         )
 679
 680     except OSError:
 681         return None, None
 682
 683     (out, err) = pr.communicate()
 684     if err:
 685         return None, None
 686
 687     outs = [o.decode('utf-8') for o in out.split()]
 688     commit = outs[0]
 689     msg = ' ' .join(outs[1:])
 690     return commit, msg
 691
 692
 693 def parse_request_body(request):
 694     """ returns the parsed request body, handles gzip encoding """
 695
 696     raw_body = request.body
 697     content_enc = request.META.get('HTTP_CONTENT_ENCODING')
 698
 699     if content_enc == 'gzip':
 700         raw_body = zlib.decompress(raw_body)
 701
 702     return json.loads(raw_body.decode('utf-8'))
 703
 704
 705 def normalize_feed_url(url):
 706     """
 707     Converts any URL to http:// or ftp:// so that it can be
 708     used with "wget". If the URL cannot be converted (invalid
 709     or unknown scheme), "None" is returned.
 710
 711     This will also normalize feed:// and itpc:// to http://.
 712
 713     >>> normalize_feed_url('itpc://example.org/podcast.rss')
 714     'http://example.org/podcast.rss'
 715
 716     If no URL scheme is defined (e.g. "curry.com"), we will
 717     simply assume the user intends to add a http:// feed.
 718
 719     >>> normalize_feed_url('curry.com')
 720     'http://curry.com/'
 721
 722     There are even some more shortcuts for advanced users
 723     and lazy typists (see the source for details).
 724
 725     >>> normalize_feed_url('fb:43FPodcast')
 726     'http://feeds.feedburner.com/43FPodcast'
 727
 728     It will also take care of converting the domain name to
 729     all-lowercase (because domains are not case sensitive):
 730
 731     >>> normalize_feed_url('http://Example.COM/')
 732     'http://example.com/'
 733
 734     Some other minimalistic changes are also taken care of,
 735     e.g. a ? with an empty query is removed:
 736
 737     >>> normalize_feed_url('http://example.org/test?')
 738     'http://example.org/test'
 739
 740     Leading and trailing whitespace is removed
 741
 742     >>> normalize_feed_url(' http://example.com/podcast.rss ')
 743     'http://example.com/podcast.rss'
 744
 745     HTTP Authentication is removed to protect users' privacy
 746
 747     >>> normalize_feed_url('http://a@b:c@host.com/')
 748     'http://host.com/'
 749     >>> normalize_feed_url('ftp://a:b:c@host.com/')
 750     'ftp://host.com/'
 751     >>> normalize_feed_url('http://i%2Fo:P%40ss%3A@host.com/')
 752     'http://host.com/'
 753     >>> normalize_feed_url('ftp://%C3%B6sterreich@host.com/')
 754     'ftp://host.com/'
 755     >>> normalize_feed_url('http://w%20x:y%20z@example.org/')
 756     'http://example.org/'
 757     >>> normalize_feed_url('http://example.com/x@y:z@test.com/')
 758     'http://example.com/x%40y%3Az%40test.com/'
 759     >>> normalize_feed_url('http://en.wikipedia.org/wiki/Ä')
 760     'http://en.wikipedia.org/wiki/%C3%84'
 761     >>> normalize_feed_url('http://en.wikipedia.org/w/index.php?title=Ä&action=edit')
 762     'http://en.wikipedia.org/w/index.php?title=%C3%84&action=edit'
 763     """
 764     url = url.strip()
 765     if not url or len(url) < 8:
 766         return None
 767
 768     # This is a list of prefixes that you can use to minimize the amount of
 769     # keystrokes that you have to use.
 770     # Feel free to suggest other useful prefixes, and I'll add them here.
 771     PREFIXES = {
 772             'fb:': 'http://feeds.feedburner.com/%s',
 773             'yt:': 'http://www.youtube.com/rss/user/%s/videos.rss',
 774             'sc:': 'http://soundcloud.com/%s',
 775             'fm4od:': 'http://onapp1.orf.at/webcam/fm4/fod/%s.xspf',
 776             # YouTube playlists. To get a list of playlists per-user, use:
 777             # https://gdata.youtube.com/feeds/api/users/<username>/playlists
 778             'ytpl:': 'http://gdata.youtube.com/feeds/api/playlists/%s',
 779     }
 780
 781     for prefix, expansion in PREFIXES.items():
 782         if url.startswith(prefix):
 783             url = expansion % (url[len(prefix):],)
 784             break
 785
 786     # Assume HTTP for URLs without scheme
 787     if not '://' in url:
 788         url = 'http://' + url
 789
 790     scheme, netloc, path, query, fragment = urllib.parse.urlsplit(url)
 791
 792     # Schemes and domain names are case insensitive
 793     scheme, netloc = scheme.lower(), netloc.lower()
 794
 795     # encode non-encoded characters
 796     path = urllib.parse.quote(path, '/%')
 797     query = urllib.parse.quote_plus(query, ':&=')
 798
 799     # Remove authentication to protect users' privacy
 800     netloc = netloc.rsplit('@', 1)[-1]
 801
 802     # Normalize empty paths to "/"
 803     if path == '':
 804         path = '/'
 805
 806     # feed://, itpc:// and itms:// are really http://
 807     if scheme in ('feed', 'itpc', 'itms'):
 808         scheme = 'http'
 809
 810     if scheme not in ('http', 'https', 'ftp', 'file'):
 811         return None
 812
 813     # urlunsplit might return "a slighty different, but equivalent URL"
 814     return urllib.parse.urlunsplit((scheme, netloc, path, query, fragment))
 815
 816
 817 def partition(items, predicate=bool):
 818     a, b = itertools.tee((predicate(item), item) for item in items)
 819     return ((item for pred, item in a if not pred),
 820             (item for pred, item in b if pred))
 821
 822
 823 def split_quoted(s):
 824     """ Splits a quoted string
 825
 826     >>> split_quoted('some "quoted text"') == ['some', 'quoted text']
 827     True
 828
 829     >>> split_quoted('"quoted text') == ['quoted', 'text']
 830     True
 831
 832     # 4 quotes here are 2 in the doctest is one in the actual string
 833     >>> split_quoted('text\\\\') == ['text']
 834     True
 835     """
 836
 837     try:
 838         # split by whitespace, preserve quoted substrings
 839         keywords = shlex.split(s)
 840
 841     except ValueError:
 842         # No closing quotation (eg '"text')
 843         # No escaped character (eg '\')
 844         s = s.replace('"', '').replace("'", '').replace('\\', '')
 845         keywords = shlex.split(s)
 846
 847     return keywords
 848
 849
 850 def edit_link(obj):
 851     """ Return the link to the Django Admin Edit page """
 852     return reverse('admin:%s_%s_change' % (obj._meta.app_label,
 853                                            obj._meta.model_name),
 854                    args=(obj.pk,))
 855
 856
 857 def random_token(length=32):
 858     import random
 859     import string
 860     return "".join(random.sample(string.ascii_letters+string.digits, length))
 861
 862
 863 def to_maxlength(cls, field, val):
 864     """ Cut val to the maximum length of cls's field """
 865     if val is None:
 866         return None
 867
 868     max_length = cls._meta.get_field(field).max_length
 869     orig_length = len(val)
 870     if orig_length > max_length:
 871         val = val[:max_length]
 872         logger.warn('%s.%s length reduced from %d to %d',
 873                     cls.__name__, field, orig_length, max_length)
 874
 875     return val
 876
 877
 878 def get_domain(url):
 879     """ Returns the domain name of a URL
 880
 881     >>> get_domain('http://example.com')
 882     'example.com'
 883
 884     >>> get_domain('https://example.com:80/my-podcast/feed.rss')
 885     'example.com'
 886     """
 887     netloc = urllib.parse.urlparse(url).netloc
 888     try:
 889         port_idx = netloc.index(':')
 890         return netloc[:port_idx]
 891
 892     except ValueError:
 893         return netloc
 894
 895
 896 def set_ordered_entries(obj, new_entries, existing, EntryClass,
 897                         value_name, parent_name):
 898     """ Update the object's entries to the given list
 899
 900     'new_entries' should be a list of objects that are later wrapped in
 901     EntryClass instances. 'value_name' is the name of the EntryClass property
 902     that contains the values; 'parent_name' is the one that references obj.
 903
 904     Entries that do not exist are created. Existing entries that are not in
 905     'new_entries' are deleted. """
 906
 907     logger.info('%d existing entries', len(existing))
 908
 909     logger.info('%d new entries', len(new_entries))
 910
 911     with transaction.atomic():
 912         max_order = max([s.order for s in existing.values()] +
 913                         [len(new_entries)])
 914         logger.info('Renumbering entries starting from %d', max_order+1)
 915         for n, entry in enumerate(existing.values(), max_order+1):
 916             entry.order = n
 917             entry.save()
 918
 919     logger.info('%d existing entries', len(existing))
 920
 921     for n, entry in enumerate(new_entries):
 922         try:
 923             e = existing.pop(entry)
 924             logger.info('Updating existing entry %d: %s', n, entry)
 925             e.order = n
 926             e.save()
 927         except KeyError:
 928             logger.info('Creating new entry %d: %s', n, entry)
 929             try:
 930                 links = {
 931                     value_name: entry,
 932                     parent_name: obj,
 933                 }
 934                 from mygpo.podcasts.models import ScopedModel
 935                 if issubclass(EntryClass, ScopedModel):
 936                     links['scope'] = obj.scope
 937
 938                 EntryClass.objects.create(order=n, **links)
 939             except IntegrityError as ie:
 940                 logger.warn('Could not create enry for %s: %s', obj, ie)
 941
 942     with transaction.atomic():
 943         delete = [s.pk for s in existing.values()]
 944         logger.info('Deleting %d entries', len(delete))
 945         EntryClass.objects.filter(id__in=delete).delete()