Implement podcast indexing, improve searching
[mygpo.git] / mygpo / utils.py
blob09ba3ff4390a28c5d34ab12e1b91c8e3363b6969
1 # -*- coding: utf-8 -*-
3 # This file is part of my.gpodder.org.
5 # my.gpodder.org is free software: you can redistribute it and/or modify it
6 # under the terms of the GNU Affero General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or (at your
8 # option) any later version.
10 # my.gpodder.org is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
13 # License for more details.
15 # You should have received a copy of the GNU Affero General Public License
16 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
19 import json
20 import functools
21 import types
22 import subprocess
23 import os
24 import operator
25 import sys
26 import re
27 import collections
28 import itertools
29 from datetime import datetime, timedelta, date
30 import time
31 import hashlib
32 import urllib.parse
33 import urllib.request, urllib.parse, urllib.error
34 import urllib.request, urllib.error, urllib.parse
35 import zlib
36 import shlex
38 from django.db import transaction, IntegrityError
39 from django.conf import settings
40 from django.core.urlresolvers import reverse
42 import logging
43 logger = logging.getLogger(__name__)
46 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
47 """
48 >>> from_d = datetime(2010, 1, 1)
49 >>> to_d = datetime(2010, 1, 5)
50 >>> list(daterange(from_d, to_d))
51 [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
52 """
54 if to_date is None:
55 if isinstance(from_date, datetime):
56 to_date = datetime.utcnow()
57 else:
58 to_date = date.today()
60 while from_date <= to_date:
61 yield from_date
62 from_date = from_date + leap
63 return
65 def format_time(value):
66 """Format an offset (in seconds) to a string
68 The offset should be an integer or float value.
70 >>> format_time(0)
71 '00:00'
72 >>> format_time(20)
73 '00:20'
74 >>> format_time(3600)
75 '01:00:00'
76 >>> format_time(10921)
77 '03:02:01'
78 """
79 try:
80 dt = datetime.utcfromtimestamp(value)
81 except (ValueError, TypeError):
82 return ''
84 if dt.hour == 0:
85 return dt.strftime('%M:%S')
86 else:
87 return dt.strftime('%H:%M:%S')
89 def parse_time(value):
90 """
91 >>> parse_time(10)
94 >>> parse_time('05:10') #5*60+10
95 310
97 >>> parse_time('1:05:10') #60*60+5*60+10
98 3910
99 """
100 if value is None:
101 raise ValueError('None value in parse_time')
103 if isinstance(value, int):
104 # Don't need to parse already-converted time value
105 return value
107 if value == '':
108 raise ValueError('Empty valueing in parse_time')
110 for format in ('%H:%M:%S', '%M:%S'):
111 try:
112 t = time.strptime(value, format)
113 return t.tm_hour * 60*60 + t.tm_min * 60 + t.tm_sec
114 except ValueError as e:
115 continue
117 return int(value)
120 def parse_bool(val):
122 >>> parse_bool('True')
123 True
125 >>> parse_bool('true')
126 True
128 >>> parse_bool('')
129 False
131 if isinstance(val, bool):
132 return val
133 if val.lower() == 'true':
134 return True
135 return False
138 def iterate_together(lists, key=lambda x: x, reverse=False):
140 takes ordered, possibly sparse, lists with similar items
141 (some items have a corresponding item in the other lists, some don't).
143 It then yield tuples of corresponding items, where one element is None is
144 there is no corresponding entry in one of the lists.
146 Tuples where both elements are None are skipped.
148 The results of the key method are used for the comparisons.
150 If reverse is True, the lists are expected to be sorted in reverse order
151 and the results will also be sorted reverse
153 >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
154 [(1, 1), (2, None), (None, 3)]
156 >>> list(iterate_together([[], []]))
159 >>> list(iterate_together([range(1, 3), range(3, 5)]))
160 [(1, None), (2, None), (None, 3), (None, 4)]
162 >>> list(iterate_together([range(1, 3), []]))
163 [(1, None), (2, None)]
165 >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
166 [(1, None), (3, 3)]
169 Next = collections.namedtuple('Next', 'item more')
170 min_ = min if not reverse else max
171 lt_ = operator.lt if not reverse else operator.gt
173 lists = [iter(l) for l in lists]
175 def _take(it):
176 try:
177 i = next(it)
178 while i is None:
179 i = next(it)
180 return Next(i, True)
181 except StopIteration:
182 return Next(None, False)
184 def new_res():
185 return [None]*len(lists)
187 # take first bunch of items
188 items = [_take(l) for l in lists]
190 while any(i.item is not None or i.more for i in items):
192 res = new_res()
194 for n, item in enumerate(items):
196 if item.item is None:
197 continue
199 if all(x is None for x in res):
200 res[n] = item.item
201 continue
203 min_v = min_(filter(lambda x: x is not None, res), key=key)
205 if key(item.item) == key(min_v):
206 res[n] = item.item
208 elif lt_(key(item.item), key(min_v)):
209 res = new_res()
210 res[n] = item.item
212 for n, x in enumerate(res):
213 if x is not None:
214 items[n] = _take(lists[n])
216 yield tuple(res)
219 def progress(val, max_val, status_str='', max_width=50, stream=sys.stdout):
221 factor = float(val)/max_val if max_val > 0 else 0
223 # progress as percentage
224 percentage_str = '{val:.2%}'.format(val=factor)
226 # progress bar filled with #s
227 factor = min(int(factor*max_width), max_width)
228 progress_str = '#' * factor + ' ' * (max_width-factor)
230 #insert percentage into bar
231 percentage_start = int((max_width-len(percentage_str))/2)
232 progress_str = progress_str[:percentage_start] + \
233 percentage_str + \
234 progress_str[percentage_start+len(percentage_str):]
236 print('\r', end=' ', file=stream)
237 print('[ %s ] %s / %s | %s' % (
238 progress_str,
239 val,
240 max_val,
241 status_str), end=' ', file=stream)
242 stream.flush()
245 def set_cmp(list, simplify):
247 Builds a set out of a list but uses the results of simplify to determine equality between items
249 simpl = lambda x: (simplify(x), x)
250 lst = dict(map(simpl, list))
251 return list(lst.values())
254 def first(it):
256 returns the first not-None object or None if the iterator is exhausted
258 for x in it:
259 if x is not None:
260 return x
261 return None
264 def intersect(a, b):
265 return list(set(a) & set(b))
269 def remove_control_chars(s):
270 all_chars = (chr(i) for i in range(0x110000))
271 control_chars = ''.join(map(chr, list(range(0,32)) + list(range(127,160))))
272 control_char_re = re.compile('[%s]' % re.escape(control_chars))
274 return control_char_re.sub('', s)
277 def unzip(a):
278 return tuple(map(list,zip(*a)))
281 def parse_range(s, min, max, default=None):
283 Parses the string and returns its value. If the value is outside the given
284 range, its closest number within the range is returned
286 >>> parse_range('5', 0, 10)
289 >>> parse_range('0', 5.0, 10)
292 >>> parse_range('15',0, 10)
295 >>> parse_range('x', 0., 20)
296 10.0
298 >>> parse_range('x', 0, 20, 20)
301 out_type = type(min)
303 try:
304 val = int(s)
305 if val < min:
306 return min
307 if val > max:
308 return max
309 return val
311 except (ValueError, TypeError):
312 return default if default is not None else out_type((max-min)/2)
316 def flatten(l):
317 return [item for sublist in l for item in sublist]
320 def linearize(key, iterators, reverse=False):
322 Linearizes a number of iterators, sorted by some comparison function
325 iters = [iter(i) for i in iterators]
326 vals = []
327 for i in iters:
328 try:
329 v = next(i)
330 vals. append( (v, i) )
331 except StopIteration:
332 continue
334 while vals:
335 vals = sorted(vals, key=lambda x: key(x[0]), reverse=reverse)
336 val, it = vals.pop(0)
337 yield val
338 try:
339 next_val = next(it)
340 vals.append( (next_val, it) )
341 except StopIteration:
342 pass
345 def get_timestamp(datetime_obj):
346 """ Returns the timestamp as an int for the given datetime object
348 >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
349 1302168606
351 >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
354 return int(time.mktime(datetime_obj.timetuple()))
358 re_url = re.compile('^https?://')
360 def is_url(string):
361 """ Returns true if a string looks like an URL
363 >>> is_url('http://example.com/some-path/file.xml')
364 True
366 >>> is_url('something else')
367 False
370 return bool(re_url.match(string))
374 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
375 # this does not increase asymptotical complexity
376 # but can still waste more time than it saves.
377 def shortest_of(strings):
378 return min(strings, key=len)
380 def longest_substr(strings):
382 Returns the longest common substring of the given strings
385 substr = ""
386 if not strings:
387 return substr
388 reference = shortest_of(strings)
389 length = len(reference)
390 #find a suitable slice i:j
391 for i in range(length):
392 #only consider strings long at least len(substr) + 1
393 for j in range(i + len(substr) + 1, length):
394 candidate = reference[i:j]
395 if all(candidate in text for text in strings):
396 substr = candidate
397 return substr
401 def additional_value(it, gen_val, val_changed=lambda _: True):
402 """ Provides an additional value to the elements, calculated when needed
404 For the elements from the iterator, some additional value can be computed
405 by gen_val (which might be an expensive computation).
407 If the elements in the iterator are ordered so that some subsequent
408 elements would generate the same additional value, val_changed can be
409 provided, which receives the next element from the iterator and the
410 previous additional value. If the element would generate the same
411 additional value (val_changed returns False), its computation is skipped.
413 >>> # get the next full hundred higher than x
414 >>> # this will probably be an expensive calculation
415 >>> next_hundred = lambda x: x + 100-(x % 100)
417 >>> # returns True if h is not the value that next_hundred(x) would provide
418 >>> # this should be a relatively cheap calculation, compared to the above
419 >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
421 >>> xs = [0, 50, 100, 101, 199, 200, 201]
422 >>> list(additional_value(xs, next_hundred, diff_hundred))
423 [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
426 _none = object()
427 current = _none
429 for x in it:
430 if current is _none or val_changed(x, current):
431 current = gen_val(x)
433 yield (x, current)
436 def file_hash(f, h=hashlib.md5, block_size=2**20):
437 """ returns the hash of the contents of a file """
438 f_hash = h()
439 while True:
440 buf = f.read(block_size)
441 if not buf:
442 break
443 f_hash.update( buf )
445 return f_hash
448 def split_list(l, prop):
449 """ split elements that satisfy a property, and those that don't """
450 match = list(filter(prop, l))
451 nomatch = [x for x in l if x not in match]
452 return match, nomatch
455 def sorted_chain(links, key, reverse=False):
456 """ Takes a list of iters can iterates over sorted elements
458 Each elment of links should be a tuple of (sort_key, iterator). The
459 elements of each iterator should be sorted already. sort_key should
460 indicate the key of the first element and needs to be comparable to the
461 result of key(elem).
463 The function returns an iterator over the globally sorted element that
464 ensures that as little iterators as possible are evaluated. When
465 evaluating """
467 # mixed_list initially contains all placeholders; later evaluated
468 # elements (from the iterators) are mixed in
469 mixed_list = [(k, link, True) for k, link in links]
471 while mixed_list:
472 _, item, expand = mixed_list.pop(0)
474 # found an element (from an earlier expansion), yield it
475 if not expand:
476 yield item
477 continue
479 # found an iter that needs to be expanded.
480 # The iterator is fully consumed
481 new_items = [(key(i), i, False) for i in item]
483 # sort links (placeholders) and elements together
484 mixed_list = sorted(mixed_list + new_items, key=lambda t: t[0],
485 reverse=reverse)
488 def url_add_authentication(url, username, password):
490 Adds authentication data (username, password) to a given
491 URL in order to construct an authenticated URL.
493 >>> url_add_authentication('https://host.com/', '', None)
494 'https://host.com/'
495 >>> url_add_authentication('http://example.org/', None, None)
496 'http://example.org/'
497 >>> url_add_authentication('telnet://host.com/', 'foo', 'bar')
498 'telnet://foo:bar@host.com/'
499 >>> url_add_authentication('ftp://example.org', 'billy', None)
500 'ftp://billy@example.org'
501 >>> url_add_authentication('ftp://example.org', 'billy', '')
502 'ftp://billy:@example.org'
503 >>> url_add_authentication('http://localhost/x', 'aa', 'bc')
504 'http://aa:bc@localhost/x'
505 >>> url_add_authentication('http://blubb.lan/u.html', 'i/o', 'P@ss:')
506 'http://i%2Fo:P@ss:@blubb.lan/u.html'
507 >>> url_add_authentication('http://a:b@x.org/', 'c', 'd')
508 'http://c:d@x.org/'
509 >>> url_add_authentication('http://i%2F:P%40%3A@cx.lan', 'P@x', 'i/')
510 'http://P@x:i%2F@cx.lan'
511 >>> url_add_authentication('http://x.org/', 'a b', 'c d')
512 'http://a%20b:c%20d@x.org/'
514 if username is None or username == '':
515 return url
517 # Relaxations of the strict quoting rules (bug 1521):
518 # 1. Accept '@' in username and password
519 # 2. Acecpt ':' in password only
520 username = urllib.parse.quote(username, safe='@')
522 if password is not None:
523 password = urllib.parse.quote(password, safe='@:')
524 auth_string = ':'.join((username, password))
525 else:
526 auth_string = username
528 url = url_strip_authentication(url)
530 url_parts = list(urllib.parse.urlsplit(url))
531 # url_parts[1] is the HOST part of the URL
532 url_parts[1] = '@'.join((auth_string, url_parts[1]))
534 return urllib.parse.urlunsplit(url_parts)
537 def urlopen(url, headers=None, data=None):
539 An URL opener with the User-agent set to gPodder (with version)
541 username, password = username_password_from_url(url)
542 if username is not None or password is not None:
543 url = url_strip_authentication(url)
544 password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
545 password_mgr.add_password(None, url, username, password)
546 handler = urllib.request.HTTPBasicAuthHandler(password_mgr)
547 opener = urllib.request.build_opener(handler)
548 else:
549 opener = urllib.request.build_opener()
551 if headers is None:
552 headers = {}
553 else:
554 headers = dict(headers)
556 headers.update({'User-agent': settings.USER_AGENT})
557 request = urllib.request.Request(url, data=data, headers=headers)
558 return opener.open(request)
562 def username_password_from_url(url):
563 r"""
564 Returns a tuple (username,password) containing authentication
565 data from the specified URL or (None,None) if no authentication
566 data can be found in the URL.
568 See Section 3.1 of RFC 1738 (http://www.ietf.org/rfc/rfc1738.txt)
570 >>> username_password_from_url('https://@host.com/')
571 ('', None)
572 >>> username_password_from_url('telnet://host.com/')
573 (None, None)
574 >>> username_password_from_url('ftp://foo:@host.com/')
575 ('foo', '')
576 >>> username_password_from_url('http://a:b@host.com/')
577 ('a', 'b')
578 >>> username_password_from_url(1)
579 Traceback (most recent call last):
581 ValueError: URL has to be a string or unicode object.
582 >>> username_password_from_url(None)
583 Traceback (most recent call last):
585 ValueError: URL has to be a string or unicode object.
586 >>> username_password_from_url('http://a@b:c@host.com/')
587 ('a@b', 'c')
588 >>> username_password_from_url('ftp://a:b:c@host.com/')
589 ('a', 'b:c')
590 >>> username_password_from_url('http://i%2Fo:P%40ss%3A@host.com/')
591 ('i/o', 'P@ss:')
592 >>> username_password_from_url('ftp://%C3%B6sterreich@host.com/')
593 ('österreich', None)
594 >>> username_password_from_url('http://w%20x:y%20z@example.org/')
595 ('w x', 'y z')
596 >>> username_password_from_url('http://example.com/x@y:z@test.com/')
597 (None, None)
599 if type(url) not in (str, str):
600 raise ValueError('URL has to be a string or unicode object.')
602 (username, password) = (None, None)
604 (scheme, netloc, path, params, query, fragment) = urllib.parse.urlparse(url)
606 if '@' in netloc:
607 (authentication, netloc) = netloc.rsplit('@', 1)
608 if ':' in authentication:
609 (username, password) = authentication.split(':', 1)
611 # RFC1738 dictates that we should not allow ['/', '@', ':']
612 # characters in the username and password field (Section 3.1):
614 # 1. The "/" can't be in there at this point because of the way
615 # urlparse (which we use above) works.
616 # 2. Due to gPodder bug 1521, we allow "@" in the username and
617 # password field. We use netloc.rsplit('@', 1), which will
618 # make sure that we split it at the last '@' in netloc.
619 # 3. The colon must be excluded (RFC2617, Section 2) in the
620 # username, but is apparently allowed in the password. This
621 # is handled by the authentication.split(':', 1) above, and
622 # will cause any extraneous ':'s to be part of the password.
624 username = urllib.parse.unquote(username)
625 password = urllib.parse.unquote(password)
626 else:
627 username = urllib.parse.unquote(authentication)
629 return (username, password)
632 def url_strip_authentication(url):
634 Strips authentication data from an URL. Returns the URL with
635 the authentication data removed from it.
637 >>> url_strip_authentication('https://host.com/')
638 'https://host.com/'
639 >>> url_strip_authentication('telnet://foo:bar@host.com/')
640 'telnet://host.com/'
641 >>> url_strip_authentication('ftp://billy@example.org')
642 'ftp://example.org'
643 >>> url_strip_authentication('ftp://billy:@example.org')
644 'ftp://example.org'
645 >>> url_strip_authentication('http://aa:bc@localhost/x')
646 'http://localhost/x'
647 >>> url_strip_authentication('http://i%2Fo:P%40ss%3A@blubb.lan/u.html')
648 'http://blubb.lan/u.html'
649 >>> url_strip_authentication('http://c:d@x.org/')
650 'http://x.org/'
651 >>> url_strip_authentication('http://P%40%3A:i%2F@cx.lan')
652 'http://cx.lan'
653 >>> url_strip_authentication('http://x@x.com:s3cret@example.com/')
654 'http://example.com/'
656 url_parts = list(urllib.parse.urlsplit(url))
657 # url_parts[1] is the HOST part of the URL
659 # Remove existing authentication data
660 if '@' in url_parts[1]:
661 url_parts[1] = url_parts[1].rsplit('@', 1)[1]
663 return urllib.parse.urlunsplit(url_parts)
666 # Native filesystem encoding detection
667 encoding = sys.getfilesystemencoding()
670 def get_git_head():
671 """ returns the commit and message of the current git HEAD """
673 try:
674 pr = subprocess.Popen('/usr/bin/git log -n 1 --oneline'.split(),
675 cwd = settings.BASE_DIR,
676 stdout = subprocess.PIPE,
677 stderr = subprocess.PIPE,
680 except OSError:
681 return None, None
683 (out, err) = pr.communicate()
684 if err:
685 return None, None
687 outs = [o.decode('utf-8') for o in out.split()]
688 commit = outs[0]
689 msg = ' ' .join(outs[1:])
690 return commit, msg
693 def parse_request_body(request):
694 """ returns the parsed request body, handles gzip encoding """
696 raw_body = request.body
697 content_enc = request.META.get('HTTP_CONTENT_ENCODING')
699 if content_enc == 'gzip':
700 raw_body = zlib.decompress(raw_body)
702 return json.loads(raw_body.decode('utf-8'))
705 def normalize_feed_url(url):
707 Converts any URL to http:// or ftp:// so that it can be
708 used with "wget". If the URL cannot be converted (invalid
709 or unknown scheme), "None" is returned.
711 This will also normalize feed:// and itpc:// to http://.
713 >>> normalize_feed_url('itpc://example.org/podcast.rss')
714 'http://example.org/podcast.rss'
716 If no URL scheme is defined (e.g. "curry.com"), we will
717 simply assume the user intends to add a http:// feed.
719 >>> normalize_feed_url('curry.com')
720 'http://curry.com/'
722 There are even some more shortcuts for advanced users
723 and lazy typists (see the source for details).
725 >>> normalize_feed_url('fb:43FPodcast')
726 'http://feeds.feedburner.com/43FPodcast'
728 It will also take care of converting the domain name to
729 all-lowercase (because domains are not case sensitive):
731 >>> normalize_feed_url('http://Example.COM/')
732 'http://example.com/'
734 Some other minimalistic changes are also taken care of,
735 e.g. a ? with an empty query is removed:
737 >>> normalize_feed_url('http://example.org/test?')
738 'http://example.org/test'
740 Leading and trailing whitespace is removed
742 >>> normalize_feed_url(' http://example.com/podcast.rss ')
743 'http://example.com/podcast.rss'
745 HTTP Authentication is removed to protect users' privacy
747 >>> normalize_feed_url('http://a@b:c@host.com/')
748 'http://host.com/'
749 >>> normalize_feed_url('ftp://a:b:c@host.com/')
750 'ftp://host.com/'
751 >>> normalize_feed_url('http://i%2Fo:P%40ss%3A@host.com/')
752 'http://host.com/'
753 >>> normalize_feed_url('ftp://%C3%B6sterreich@host.com/')
754 'ftp://host.com/'
755 >>> normalize_feed_url('http://w%20x:y%20z@example.org/')
756 'http://example.org/'
757 >>> normalize_feed_url('http://example.com/x@y:z@test.com/')
758 'http://example.com/x%40y%3Az%40test.com/'
759 >>> normalize_feed_url('http://en.wikipedia.org/wiki/Ä')
760 'http://en.wikipedia.org/wiki/%C3%84'
761 >>> normalize_feed_url('http://en.wikipedia.org/w/index.php?title=Ä&action=edit')
762 'http://en.wikipedia.org/w/index.php?title=%C3%84&action=edit'
764 url = url.strip()
765 if not url or len(url) < 8:
766 return None
768 # This is a list of prefixes that you can use to minimize the amount of
769 # keystrokes that you have to use.
770 # Feel free to suggest other useful prefixes, and I'll add them here.
771 PREFIXES = {
772 'fb:': 'http://feeds.feedburner.com/%s',
773 'yt:': 'http://www.youtube.com/rss/user/%s/videos.rss',
774 'sc:': 'http://soundcloud.com/%s',
775 'fm4od:': 'http://onapp1.orf.at/webcam/fm4/fod/%s.xspf',
776 # YouTube playlists. To get a list of playlists per-user, use:
777 # https://gdata.youtube.com/feeds/api/users/<username>/playlists
778 'ytpl:': 'http://gdata.youtube.com/feeds/api/playlists/%s',
781 for prefix, expansion in PREFIXES.items():
782 if url.startswith(prefix):
783 url = expansion % (url[len(prefix):],)
784 break
786 # Assume HTTP for URLs without scheme
787 if not '://' in url:
788 url = 'http://' + url
790 scheme, netloc, path, query, fragment = urllib.parse.urlsplit(url)
792 # Schemes and domain names are case insensitive
793 scheme, netloc = scheme.lower(), netloc.lower()
795 # encode non-encoded characters
796 path = urllib.parse.quote(path, '/%')
797 query = urllib.parse.quote_plus(query, ':&=')
799 # Remove authentication to protect users' privacy
800 netloc = netloc.rsplit('@', 1)[-1]
802 # Normalize empty paths to "/"
803 if path == '':
804 path = '/'
806 # feed://, itpc:// and itms:// are really http://
807 if scheme in ('feed', 'itpc', 'itms'):
808 scheme = 'http'
810 if scheme not in ('http', 'https', 'ftp', 'file'):
811 return None
813 # urlunsplit might return "a slighty different, but equivalent URL"
814 return urllib.parse.urlunsplit((scheme, netloc, path, query, fragment))
817 def partition(items, predicate=bool):
818 a, b = itertools.tee((predicate(item), item) for item in items)
819 return ((item for pred, item in a if not pred),
820 (item for pred, item in b if pred))
823 def split_quoted(s):
824 """ Splits a quoted string
826 >>> split_quoted('some "quoted text"') == ['some', 'quoted text']
827 True
829 >>> split_quoted('"quoted text') == ['quoted', 'text']
830 True
832 # 4 quotes here are 2 in the doctest is one in the actual string
833 >>> split_quoted('text\\\\') == ['text']
834 True
837 try:
838 # split by whitespace, preserve quoted substrings
839 keywords = shlex.split(s)
841 except ValueError:
842 # No closing quotation (eg '"text')
843 # No escaped character (eg '\')
844 s = s.replace('"', '').replace("'", '').replace('\\', '')
845 keywords = shlex.split(s)
847 return keywords
850 def edit_link(obj):
851 """ Return the link to the Django Admin Edit page """
852 return reverse('admin:%s_%s_change' % (obj._meta.app_label,
853 obj._meta.model_name),
854 args=(obj.pk,))
857 def random_token(length=32):
858 import random
859 import string
860 return "".join(random.sample(string.ascii_letters+string.digits, length))
863 def to_maxlength(cls, field, val):
864 """ Cut val to the maximum length of cls's field """
865 if val is None:
866 return None
868 max_length = cls._meta.get_field(field).max_length
869 orig_length = len(val)
870 if orig_length > max_length:
871 val = val[:max_length]
872 logger.warn('%s.%s length reduced from %d to %d',
873 cls.__name__, field, orig_length, max_length)
875 return val
878 def get_domain(url):
879 """ Returns the domain name of a URL
881 >>> get_domain('http://example.com')
882 'example.com'
884 >>> get_domain('https://example.com:80/my-podcast/feed.rss')
885 'example.com'
887 netloc = urllib.parse.urlparse(url).netloc
888 try:
889 port_idx = netloc.index(':')
890 return netloc[:port_idx]
892 except ValueError:
893 return netloc
896 def set_ordered_entries(obj, new_entries, existing, EntryClass,
897 value_name, parent_name):
898 """ Update the object's entries to the given list
900 'new_entries' should be a list of objects that are later wrapped in
901 EntryClass instances. 'value_name' is the name of the EntryClass property
902 that contains the values; 'parent_name' is the one that references obj.
904 Entries that do not exist are created. Existing entries that are not in
905 'new_entries' are deleted. """
907 logger.info('%d existing entries', len(existing))
909 logger.info('%d new entries', len(new_entries))
911 with transaction.atomic():
912 max_order = max([s.order for s in existing.values()] +
913 [len(new_entries)])
914 logger.info('Renumbering entries starting from %d', max_order+1)
915 for n, entry in enumerate(existing.values(), max_order+1):
916 entry.order = n
917 entry.save()
919 logger.info('%d existing entries', len(existing))
921 for n, entry in enumerate(new_entries):
922 try:
923 e = existing.pop(entry)
924 logger.info('Updating existing entry %d: %s', n, entry)
925 e.order = n
926 e.save()
927 except KeyError:
928 logger.info('Creating new entry %d: %s', n, entry)
929 try:
930 links = {
931 value_name: entry,
932 parent_name: obj,
934 from mygpo.podcasts.models import ScopedModel
935 if issubclass(EntryClass, ScopedModel):
936 links['scope'] = obj.scope
938 EntryClass.objects.create(order=n, **links)
939 except IntegrityError as ie:
940 logger.warn('Could not create enry for %s: %s', obj, ie)
942 with transaction.atomic():
943 delete = [s.pk for s in existing.values()]
944 logger.info('Deleting %d entries', len(delete))
945 EntryClass.objects.filter(id__in=delete).delete()