[Podcasts] make Episode.duration a BigInteger
[mygpo.git] / mygpo / utils.py
blob544b3aea44f3770fe14685d667370d7915d1986b
1 # -*- coding: utf-8 -*-
3 # This file is part of my.gpodder.org.
5 # my.gpodder.org is free software: you can redistribute it and/or modify it
6 # under the terms of the GNU Affero General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or (at your
8 # option) any later version.
10 # my.gpodder.org is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
13 # License for more details.
15 # You should have received a copy of the GNU Affero General Public License
16 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
19 import functools
20 import types
21 import subprocess
22 import os
23 import operator
24 import sys
25 import re
26 import collections
27 import itertools
28 from datetime import datetime, timedelta, date
29 import time
30 import hashlib
31 import urlparse
32 import urllib
33 import urllib2
34 import zlib
35 import shlex
37 from django.db import transaction, IntegrityError
38 from django.conf import settings
39 from django.core.urlresolvers import reverse
41 from mygpo.core.json import json
43 import logging
44 logger = logging.getLogger(__name__)
47 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
48 """
49 >>> from_d = datetime(2010, 01, 01)
50 >>> to_d = datetime(2010, 01, 05)
51 >>> list(daterange(from_d, to_d))
52 [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
53 """
55 if to_date is None:
56 if isinstance(from_date, datetime):
57 to_date = datetime.utcnow()
58 else:
59 to_date = date.today()
61 while from_date <= to_date:
62 yield from_date
63 from_date = from_date + leap
64 return
66 def format_time(value):
67 """Format an offset (in seconds) to a string
69 The offset should be an integer or float value.
71 >>> format_time(0)
72 '00:00'
73 >>> format_time(20)
74 '00:20'
75 >>> format_time(3600)
76 '01:00:00'
77 >>> format_time(10921)
78 '03:02:01'
79 """
80 try:
81 dt = datetime.utcfromtimestamp(value)
82 except (ValueError, TypeError):
83 return ''
85 if dt.hour == 0:
86 return dt.strftime('%M:%S')
87 else:
88 return dt.strftime('%H:%M:%S')
90 def parse_time(value):
91 """
92 >>> parse_time(10)
95 >>> parse_time('05:10') #5*60+10
96 310
98 >>> parse_time('1:05:10') #60*60+5*60+10
99 3910
101 if value is None:
102 raise ValueError('None value in parse_time')
104 if isinstance(value, int):
105 # Don't need to parse already-converted time value
106 return value
108 if value == '':
109 raise ValueError('Empty valueing in parse_time')
111 for format in ('%H:%M:%S', '%M:%S'):
112 try:
113 t = time.strptime(value, format)
114 return t.tm_hour * 60*60 + t.tm_min * 60 + t.tm_sec
115 except ValueError, e:
116 continue
118 return int(value)
121 def parse_bool(val):
123 >>> parse_bool('True')
124 True
126 >>> parse_bool('true')
127 True
129 >>> parse_bool('')
130 False
132 if isinstance(val, bool):
133 return val
134 if val.lower() == 'true':
135 return True
136 return False
139 def iterate_together(lists, key=lambda x: x, reverse=False):
141 takes ordered, possibly sparse, lists with similar items
142 (some items have a corresponding item in the other lists, some don't).
144 It then yield tuples of corresponding items, where one element is None is
145 there is no corresponding entry in one of the lists.
147 Tuples where both elements are None are skipped.
149 The results of the key method are used for the comparisons.
151 If reverse is True, the lists are expected to be sorted in reverse order
152 and the results will also be sorted reverse
154 >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
155 [(1, 1), (2, None), (None, 3)]
157 >>> list(iterate_together([[], []]))
160 >>> list(iterate_together([range(1, 3), range(3, 5)]))
161 [(1, None), (2, None), (None, 3), (None, 4)]
163 >>> list(iterate_together([range(1, 3), []]))
164 [(1, None), (2, None)]
166 >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
167 [(1, None), (3, 3)]
170 Next = collections.namedtuple('Next', 'item more')
171 min_ = min if not reverse else max
172 lt_ = operator.lt if not reverse else operator.gt
174 lists = [iter(l) for l in lists]
176 def _take(it):
177 try:
178 i = it.next()
179 while i is None:
180 i = it.next()
181 return Next(i, True)
182 except StopIteration:
183 return Next(None, False)
185 def new_res():
186 return [None]*len(lists)
188 # take first bunch of items
189 items = [_take(l) for l in lists]
191 while any(i.item is not None or i.more for i in items):
193 res = new_res()
195 for n, item in enumerate(items):
197 if item.item is None:
198 continue
200 if all(x is None for x in res):
201 res[n] = item.item
202 continue
204 min_v = min_(filter(lambda x: x is not None, res), key=key)
206 if key(item.item) == key(min_v):
207 res[n] = item.item
209 elif lt_(key(item.item), key(min_v)):
210 res = new_res()
211 res[n] = item.item
213 for n, x in enumerate(res):
214 if x is not None:
215 items[n] = _take(lists[n])
217 yield tuple(res)
220 def progress(val, max_val, status_str='', max_width=50, stream=sys.stdout):
222 factor = float(val)/max_val if max_val > 0 else 0
224 # progress as percentage
225 percentage_str = '{val:.2%}'.format(val=factor)
227 # progress bar filled with #s
228 factor = min(int(factor*max_width), max_width)
229 progress_str = '#' * factor + ' ' * (max_width-factor)
231 #insert percentage into bar
232 percentage_start = int((max_width-len(percentage_str))/2)
233 progress_str = progress_str[:percentage_start] + \
234 percentage_str + \
235 progress_str[percentage_start+len(percentage_str):]
237 print >> stream, '\r',
238 print >> stream, '[ %s ] %s / %s | %s' % (
239 progress_str,
240 val,
241 max_val,
242 status_str),
243 stream.flush()
246 def set_cmp(list, simplify):
248 Builds a set out of a list but uses the results of simplify to determine equality between items
250 simpl = lambda x: (simplify(x), x)
251 lst = dict(map(simpl, list))
252 return lst.values()
255 def first(it):
257 returns the first not-None object or None if the iterator is exhausted
259 for x in it:
260 if x is not None:
261 return x
262 return None
265 def intersect(a, b):
266 return list(set(a) & set(b))
270 def remove_control_chars(s):
271 all_chars = (unichr(i) for i in xrange(0x110000))
272 control_chars = ''.join(map(unichr, range(0,32) + range(127,160)))
273 control_char_re = re.compile('[%s]' % re.escape(control_chars))
275 return control_char_re.sub('', s)
278 def unzip(a):
279 return tuple(map(list,zip(*a)))
282 def parse_range(s, min, max, default=None):
284 Parses the string and returns its value. If the value is outside the given
285 range, its closest number within the range is returned
287 >>> parse_range('5', 0, 10)
290 >>> parse_range('0', 5, 10)
293 >>> parse_range('15',0, 10)
296 >>> parse_range('x', 0, 20)
299 >>> parse_range('x', 0, 20, 20)
302 try:
303 val = int(s)
304 if val < min:
305 return min
306 if val > max:
307 return max
308 return val
310 except (ValueError, TypeError):
311 return default if default is not None else (max-min)/2
315 def flatten(l):
316 return [item for sublist in l for item in sublist]
319 def linearize(key, iterators, reverse=False):
321 Linearizes a number of iterators, sorted by some comparison function
324 iters = [iter(i) for i in iterators]
325 vals = []
326 for i in iters:
327 try:
328 v = i.next()
329 vals. append( (v, i) )
330 except StopIteration:
331 continue
333 while vals:
334 vals = sorted(vals, key=lambda x: key(x[0]), reverse=reverse)
335 val, it = vals.pop(0)
336 yield val
337 try:
338 next_val = it.next()
339 vals.append( (next_val, it) )
340 except StopIteration:
341 pass
344 def skip_pairs(iterator, cmp=cmp):
345 """ Skips pairs of equal items
347 >>> list(skip_pairs([]))
350 >>> list(skip_pairs([1]))
353 >>> list(skip_pairs([1, 2, 3]))
354 [1, 2, 3]
356 >>> list(skip_pairs([1, 1]))
359 >>> list(skip_pairs([1, 2, 2]))
362 >>> list(skip_pairs([1, 2, 2, 3]))
363 [1, 3]
365 >>> list(skip_pairs([1, 2, 2, 2]))
366 [1, 2]
368 >>> list(skip_pairs([1, 2, 2, 2, 2, 3]))
369 [1, 3]
372 iterator = iter(iterator)
373 next = iterator.next()
375 while True:
376 item = next
377 try:
378 next = iterator.next()
379 except StopIteration as e:
380 yield item
381 raise e
383 if cmp(item, next) == 0:
384 next = iterator.next()
385 else:
386 yield item
389 def get_timestamp(datetime_obj):
390 """ Returns the timestamp as an int for the given datetime object
392 >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
393 1302168606
395 >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
398 return int(time.mktime(datetime_obj.timetuple()))
402 re_url = re.compile('^https?://')
404 def is_url(string):
405 """ Returns true if a string looks like an URL
407 >>> is_url('http://example.com/some-path/file.xml')
408 True
410 >>> is_url('something else')
411 False
414 return bool(re_url.match(string))
418 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
419 # this does not increase asymptotical complexity
420 # but can still waste more time than it saves.
421 def shortest_of(strings):
422 return min(strings, key=len)
424 def longest_substr(strings):
426 Returns the longest common substring of the given strings
429 substr = ""
430 if not strings:
431 return substr
432 reference = shortest_of(strings)
433 length = len(reference)
434 #find a suitable slice i:j
435 for i in xrange(length):
436 #only consider strings long at least len(substr) + 1
437 for j in xrange(i + len(substr) + 1, length):
438 candidate = reference[i:j]
439 if all(candidate in text for text in strings):
440 substr = candidate
441 return substr
445 def additional_value(it, gen_val, val_changed=lambda _: True):
446 """ Provides an additional value to the elements, calculated when needed
448 For the elements from the iterator, some additional value can be computed
449 by gen_val (which might be an expensive computation).
451 If the elements in the iterator are ordered so that some subsequent
452 elements would generate the same additional value, val_changed can be
453 provided, which receives the next element from the iterator and the
454 previous additional value. If the element would generate the same
455 additional value (val_changed returns False), its computation is skipped.
457 >>> # get the next full hundred higher than x
458 >>> # this will probably be an expensive calculation
459 >>> next_hundred = lambda x: x + 100-(x % 100)
461 >>> # returns True if h is not the value that next_hundred(x) would provide
462 >>> # this should be a relatively cheap calculation, compared to the above
463 >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
465 >>> xs = [0, 50, 100, 101, 199, 200, 201]
466 >>> list(additional_value(xs, next_hundred, diff_hundred))
467 [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
470 _none = object()
471 current = _none
473 for x in it:
474 if current is _none or val_changed(x, current):
475 current = gen_val(x)
477 yield (x, current)
480 def file_hash(f, h=hashlib.md5, block_size=2**20):
481 """ returns the hash of the contents of a file """
482 f_hash = h()
483 for chunk in iter(lambda: f.read(block_size), ''):
484 f_hash.update(chunk)
485 return f_hash
489 def split_list(l, prop):
490 """ split elements that satisfy a property, and those that don't """
491 match = filter(prop, l)
492 nomatch = [x for x in l if x not in match]
493 return match, nomatch
496 def sorted_chain(links, key, reverse=False):
497 """ Takes a list of iters can iterates over sorted elements
499 Each elment of links should be a tuple of (sort_key, iterator). The
500 elements of each iterator should be sorted already. sort_key should
501 indicate the key of the first element and needs to be comparable to the
502 result of key(elem).
504 The function returns an iterator over the globally sorted element that
505 ensures that as little iterators as possible are evaluated. When
506 evaluating """
508 # mixed_list initially contains all placeholders; later evaluated
509 # elements (from the iterators) are mixed in
510 mixed_list = [(k, link, True) for k, link in links]
512 while mixed_list:
513 _, item, expand = mixed_list.pop(0)
515 # found an element (from an earlier expansion), yield it
516 if not expand:
517 yield item
518 continue
520 # found an iter that needs to be expanded.
521 # The iterator is fully consumed
522 new_items = [(key(i), i, False) for i in item]
524 # sort links (placeholders) and elements together
525 mixed_list = sorted(mixed_list + new_items, key=lambda (k, _v, _e): k,
526 reverse=reverse)
529 def url_add_authentication(url, username, password):
531 Adds authentication data (username, password) to a given
532 URL in order to construct an authenticated URL.
534 >>> url_add_authentication('https://host.com/', '', None)
535 'https://host.com/'
536 >>> url_add_authentication('http://example.org/', None, None)
537 'http://example.org/'
538 >>> url_add_authentication('telnet://host.com/', 'foo', 'bar')
539 'telnet://foo:bar@host.com/'
540 >>> url_add_authentication('ftp://example.org', 'billy', None)
541 'ftp://billy@example.org'
542 >>> url_add_authentication('ftp://example.org', 'billy', '')
543 'ftp://billy:@example.org'
544 >>> url_add_authentication('http://localhost/x', 'aa', 'bc')
545 'http://aa:bc@localhost/x'
546 >>> url_add_authentication('http://blubb.lan/u.html', 'i/o', 'P@ss:')
547 'http://i%2Fo:P@ss:@blubb.lan/u.html'
548 >>> url_add_authentication('http://a:b@x.org/', 'c', 'd')
549 'http://c:d@x.org/'
550 >>> url_add_authentication('http://i%2F:P%40%3A@cx.lan', 'P@x', 'i/')
551 'http://P@x:i%2F@cx.lan'
552 >>> url_add_authentication('http://x.org/', 'a b', 'c d')
553 'http://a%20b:c%20d@x.org/'
555 if username is None or username == '':
556 return url
558 # Relaxations of the strict quoting rules (bug 1521):
559 # 1. Accept '@' in username and password
560 # 2. Acecpt ':' in password only
561 username = urllib.quote(username, safe='@')
563 if password is not None:
564 password = urllib.quote(password, safe='@:')
565 auth_string = ':'.join((username, password))
566 else:
567 auth_string = username
569 url = url_strip_authentication(url)
571 url_parts = list(urlparse.urlsplit(url))
572 # url_parts[1] is the HOST part of the URL
573 url_parts[1] = '@'.join((auth_string, url_parts[1]))
575 return urlparse.urlunsplit(url_parts)
578 def urlopen(url, headers=None, data=None):
580 An URL opener with the User-agent set to gPodder (with version)
582 username, password = username_password_from_url(url)
583 if username is not None or password is not None:
584 url = url_strip_authentication(url)
585 password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
586 password_mgr.add_password(None, url, username, password)
587 handler = urllib2.HTTPBasicAuthHandler(password_mgr)
588 opener = urllib2.build_opener(handler)
589 else:
590 opener = urllib2.build_opener()
592 if headers is None:
593 headers = {}
594 else:
595 headers = dict(headers)
597 headers.update({'User-agent': settings.USER_AGENT})
598 request = urllib2.Request(url, data=data, headers=headers)
599 return opener.open(request)
603 def username_password_from_url(url):
604 r"""
605 Returns a tuple (username,password) containing authentication
606 data from the specified URL or (None,None) if no authentication
607 data can be found in the URL.
609 See Section 3.1 of RFC 1738 (http://www.ietf.org/rfc/rfc1738.txt)
611 >>> username_password_from_url('https://@host.com/')
612 ('', None)
613 >>> username_password_from_url('telnet://host.com/')
614 (None, None)
615 >>> username_password_from_url('ftp://foo:@host.com/')
616 ('foo', '')
617 >>> username_password_from_url('http://a:b@host.com/')
618 ('a', 'b')
619 >>> username_password_from_url(1)
620 Traceback (most recent call last):
622 ValueError: URL has to be a string or unicode object.
623 >>> username_password_from_url(None)
624 Traceback (most recent call last):
626 ValueError: URL has to be a string or unicode object.
627 >>> username_password_from_url('http://a@b:c@host.com/')
628 ('a@b', 'c')
629 >>> username_password_from_url('ftp://a:b:c@host.com/')
630 ('a', 'b:c')
631 >>> username_password_from_url('http://i%2Fo:P%40ss%3A@host.com/')
632 ('i/o', 'P@ss:')
633 >>> username_password_from_url('ftp://%C3%B6sterreich@host.com/')
634 ('\xc3\xb6sterreich', None)
635 >>> username_password_from_url('http://w%20x:y%20z@example.org/')
636 ('w x', 'y z')
637 >>> username_password_from_url('http://example.com/x@y:z@test.com/')
638 (None, None)
640 if type(url) not in (str, unicode):
641 raise ValueError('URL has to be a string or unicode object.')
643 (username, password) = (None, None)
645 (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
647 if '@' in netloc:
648 (authentication, netloc) = netloc.rsplit('@', 1)
649 if ':' in authentication:
650 (username, password) = authentication.split(':', 1)
652 # RFC1738 dictates that we should not allow ['/', '@', ':']
653 # characters in the username and password field (Section 3.1):
655 # 1. The "/" can't be in there at this point because of the way
656 # urlparse (which we use above) works.
657 # 2. Due to gPodder bug 1521, we allow "@" in the username and
658 # password field. We use netloc.rsplit('@', 1), which will
659 # make sure that we split it at the last '@' in netloc.
660 # 3. The colon must be excluded (RFC2617, Section 2) in the
661 # username, but is apparently allowed in the password. This
662 # is handled by the authentication.split(':', 1) above, and
663 # will cause any extraneous ':'s to be part of the password.
665 username = urllib.unquote(username)
666 password = urllib.unquote(password)
667 else:
668 username = urllib.unquote(authentication)
670 return (username, password)
673 def url_strip_authentication(url):
675 Strips authentication data from an URL. Returns the URL with
676 the authentication data removed from it.
678 >>> url_strip_authentication('https://host.com/')
679 'https://host.com/'
680 >>> url_strip_authentication('telnet://foo:bar@host.com/')
681 'telnet://host.com/'
682 >>> url_strip_authentication('ftp://billy@example.org')
683 'ftp://example.org'
684 >>> url_strip_authentication('ftp://billy:@example.org')
685 'ftp://example.org'
686 >>> url_strip_authentication('http://aa:bc@localhost/x')
687 'http://localhost/x'
688 >>> url_strip_authentication('http://i%2Fo:P%40ss%3A@blubb.lan/u.html')
689 'http://blubb.lan/u.html'
690 >>> url_strip_authentication('http://c:d@x.org/')
691 'http://x.org/'
692 >>> url_strip_authentication('http://P%40%3A:i%2F@cx.lan')
693 'http://cx.lan'
694 >>> url_strip_authentication('http://x@x.com:s3cret@example.com/')
695 'http://example.com/'
697 url_parts = list(urlparse.urlsplit(url))
698 # url_parts[1] is the HOST part of the URL
700 # Remove existing authentication data
701 if '@' in url_parts[1]:
702 url_parts[1] = url_parts[1].rsplit('@', 1)[1]
704 return urlparse.urlunsplit(url_parts)
707 # Native filesystem encoding detection
708 encoding = sys.getfilesystemencoding()
710 def sanitize_encoding(filename):
711 r"""
712 Generate a sanitized version of a string (i.e.
713 remove invalid characters and encode in the
714 detected native language encoding).
716 >>> sanitize_encoding('\x80')
718 >>> sanitize_encoding(u'unicode')
719 'unicode'
721 # The encoding problem goes away in Python 3.. hopefully!
722 if sys.version_info >= (3, 0):
723 return filename
725 global encoding
726 if not isinstance(filename, unicode):
727 filename = filename.decode(encoding, 'ignore')
728 return filename.encode(encoding, 'ignore')
731 def get_git_head():
732 """ returns the commit and message of the current git HEAD """
734 try:
735 pr = subprocess.Popen('/usr/bin/git log -n 1 --oneline'.split(),
736 cwd = settings.BASE_DIR,
737 stdout = subprocess.PIPE,
738 stderr = subprocess.PIPE,
741 except OSError:
742 return None, None
744 (out, err) = pr.communicate()
745 if err:
746 return None, None
748 outs = out.split()
749 commit = outs[0]
750 msg = ' ' .join(outs[1:])
751 return commit, msg
755 # https://gist.github.com/samuraisam/901117
757 default_fudge = timedelta(seconds=0, microseconds=0, days=0)
759 def deep_eq(_v1, _v2, datetime_fudge=default_fudge, _assert=False):
761 Tests for deep equality between two python data structures recursing
762 into sub-structures if necessary. Works with all python types including
763 iterators and generators. This function was dreampt up to test API responses
764 but could be used for anything. Be careful. With deeply nested structures
765 you may blow the stack.
767 Options:
768 datetime_fudge => this is a datetime.timedelta object which, when
769 comparing dates, will accept values that differ
770 by the number of seconds specified
771 _assert => passing yes for this will raise an assertion error
772 when values do not match, instead of returning
773 false (very useful in combination with pdb)
775 Doctests included:
777 >>> x1, y1 = ({'a': 'b'}, {'a': 'b'})
778 >>> deep_eq(x1, y1)
779 True
780 >>> x2, y2 = ({'a': 'b'}, {'b': 'a'})
781 >>> deep_eq(x2, y2)
782 False
783 >>> x3, y3 = ({'a': {'b': 'c'}}, {'a': {'b': 'c'}})
784 >>> deep_eq(x3, y3)
785 True
786 >>> x4, y4 = ({'c': 't', 'a': {'b': 'c'}}, {'a': {'b': 'n'}, 'c': 't'})
787 >>> deep_eq(x4, y4)
788 False
789 >>> x5, y5 = ({'a': [1,2,3]}, {'a': [1,2,3]})
790 >>> deep_eq(x5, y5)
791 True
792 >>> x6, y6 = ({'a': [1,'b',8]}, {'a': [2,'b',8]})
793 >>> deep_eq(x6, y6)
794 False
795 >>> x7, y7 = ('a', 'a')
796 >>> deep_eq(x7, y7)
797 True
798 >>> x8, y8 = (['p','n',['asdf']], ['p','n',['asdf']])
799 >>> deep_eq(x8, y8)
800 True
801 >>> x9, y9 = (['p','n',['asdf',['omg']]], ['p', 'n', ['asdf',['nowai']]])
802 >>> deep_eq(x9, y9)
803 False
804 >>> x10, y10 = (1, 2)
805 >>> deep_eq(x10, y10)
806 False
807 >>> deep_eq((str(p) for p in xrange(10)), (str(p) for p in xrange(10)))
808 True
809 >>> str(deep_eq(range(4), range(4)))
810 'True'
811 >>> deep_eq(xrange(100), xrange(100))
812 True
813 >>> deep_eq(xrange(2), xrange(5))
814 False
815 >>> from datetime import datetime, timedelta
816 >>> d1, d2 = (datetime.utcnow(), datetime.utcnow() + timedelta(seconds=4))
817 >>> deep_eq(d1, d2)
818 False
819 >>> deep_eq(d1, d2, datetime_fudge=timedelta(seconds=5))
820 True
822 _deep_eq = functools.partial(deep_eq, datetime_fudge=datetime_fudge,
823 _assert=_assert)
825 def _check_assert(R, a, b, reason=''):
826 if _assert and not R:
827 assert 0, "an assertion has failed in deep_eq (%s) %s != %s" % (
828 reason, str(a), str(b))
829 return R
831 def _deep_dict_eq(d1, d2):
832 k1, k2 = (sorted(d1.keys()), sorted(d2.keys()))
833 if k1 != k2: # keys should be exactly equal
834 return _check_assert(False, k1, k2, "keys")
836 return _check_assert(operator.eq(sum(_deep_eq(d1[k], d2[k])
837 for k in k1),
838 len(k1)), d1, d2, "dictionaries")
840 def _deep_iter_eq(l1, l2):
841 if len(l1) != len(l2):
842 return _check_assert(False, l1, l2, "lengths")
843 return _check_assert(operator.eq(sum(_deep_eq(v1, v2)
844 for v1, v2 in zip(l1, l2)),
845 len(l1)), l1, l2, "iterables")
847 def op(a, b):
848 _op = operator.eq
849 if type(a) == datetime and type(b) == datetime:
850 s = datetime_fudge.seconds
851 t1, t2 = (time.mktime(a.timetuple()), time.mktime(b.timetuple()))
852 l = t1 - t2
853 l = -l if l > 0 else l
854 return _check_assert((-s if s > 0 else s) <= l, a, b, "dates")
855 return _check_assert(_op(a, b), a, b, "values")
857 c1, c2 = (_v1, _v2)
859 # guard against strings because they are iterable and their
860 # elements yield iterables infinitely.
861 # I N C E P T I O N
862 for t in types.StringTypes:
863 if isinstance(_v1, t):
864 break
865 else:
866 if isinstance(_v1, types.DictType):
867 op = _deep_dict_eq
868 else:
869 try:
870 c1, c2 = (list(iter(_v1)), list(iter(_v2)))
871 except TypeError:
872 c1, c2 = _v1, _v2
873 else:
874 op = _deep_iter_eq
876 return op(c1, c2)
879 def parse_request_body(request):
880 """ returns the parsed request body, handles gzip encoding """
882 raw_body = request.body
883 content_enc = request.META.get('HTTP_CONTENT_ENCODING')
885 if content_enc == 'gzip':
886 raw_body = zlib.decompress(raw_body)
888 return json.loads(raw_body)
891 def normalize_feed_url(url):
893 Converts any URL to http:// or ftp:// so that it can be
894 used with "wget". If the URL cannot be converted (invalid
895 or unknown scheme), "None" is returned.
897 This will also normalize feed:// and itpc:// to http://.
899 >>> normalize_feed_url('itpc://example.org/podcast.rss')
900 'http://example.org/podcast.rss'
902 If no URL scheme is defined (e.g. "curry.com"), we will
903 simply assume the user intends to add a http:// feed.
905 >>> normalize_feed_url('curry.com')
906 'http://curry.com/'
908 There are even some more shortcuts for advanced users
909 and lazy typists (see the source for details).
911 >>> normalize_feed_url('fb:43FPodcast')
912 'http://feeds.feedburner.com/43FPodcast'
914 It will also take care of converting the domain name to
915 all-lowercase (because domains are not case sensitive):
917 >>> normalize_feed_url('http://Example.COM/')
918 'http://example.com/'
920 Some other minimalistic changes are also taken care of,
921 e.g. a ? with an empty query is removed:
923 >>> normalize_feed_url('http://example.org/test?')
924 'http://example.org/test'
926 Leading and trailing whitespace is removed
928 >>> normalize_feed_url(' http://example.com/podcast.rss ')
929 'http://example.com/podcast.rss'
931 HTTP Authentication is removed to protect users' privacy
933 >>> normalize_feed_url('http://a@b:c@host.com/')
934 'http://host.com/'
935 >>> normalize_feed_url('ftp://a:b:c@host.com/')
936 'ftp://host.com/'
937 >>> normalize_feed_url('http://i%2Fo:P%40ss%3A@host.com/')
938 'http://host.com/'
939 >>> normalize_feed_url('ftp://%C3%B6sterreich@host.com/')
940 'ftp://host.com/'
941 >>> normalize_feed_url('http://w%20x:y%20z@example.org/')
942 'http://example.org/'
943 >>> normalize_feed_url('http://example.com/x@y:z@test.com/')
944 'http://example.com/x%40y%3Az%40test.com/'
945 >>> normalize_feed_url('http://en.wikipedia.org/wiki/Ä')
946 'http://en.wikipedia.org/wiki/%C3%84'
947 >>> normalize_feed_url('http://en.wikipedia.org/w/index.php?title=Ä&action=edit')
948 'http://en.wikipedia.org/w/index.php?title=%C3%84&action=edit'
950 url = url.strip()
951 if not url or len(url) < 8:
952 return None
954 if isinstance(url, unicode):
955 url = url.encode('utf-8', 'ignore')
957 # This is a list of prefixes that you can use to minimize the amount of
958 # keystrokes that you have to use.
959 # Feel free to suggest other useful prefixes, and I'll add them here.
960 PREFIXES = {
961 'fb:': 'http://feeds.feedburner.com/%s',
962 'yt:': 'http://www.youtube.com/rss/user/%s/videos.rss',
963 'sc:': 'http://soundcloud.com/%s',
964 'fm4od:': 'http://onapp1.orf.at/webcam/fm4/fod/%s.xspf',
965 # YouTube playlists. To get a list of playlists per-user, use:
966 # https://gdata.youtube.com/feeds/api/users/<username>/playlists
967 'ytpl:': 'http://gdata.youtube.com/feeds/api/playlists/%s',
970 for prefix, expansion in PREFIXES.iteritems():
971 if url.startswith(prefix):
972 url = expansion % (url[len(prefix):],)
973 break
975 # Assume HTTP for URLs without scheme
976 if not '://' in url:
977 url = 'http://' + url
979 scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
981 # Schemes and domain names are case insensitive
982 scheme, netloc = scheme.lower(), netloc.lower()
984 # encode non-encoded characters
985 path = urllib.quote(path, '/%')
986 query = urllib.quote_plus(query, ':&=')
988 # Remove authentication to protect users' privacy
989 netloc = netloc.rsplit('@', 1)[-1]
991 # Normalize empty paths to "/"
992 if path == '':
993 path = '/'
995 # feed://, itpc:// and itms:// are really http://
996 if scheme in ('feed', 'itpc', 'itms'):
997 scheme = 'http'
999 if scheme not in ('http', 'https', 'ftp', 'file'):
1000 return None
1002 # urlunsplit might return "a slighty different, but equivalent URL"
1003 return urlparse.urlunsplit((scheme, netloc, path, query, fragment))
1006 def partition(items, predicate=bool):
1007 a, b = itertools.tee((predicate(item), item) for item in items)
1008 return ((item for pred, item in a if not pred),
1009 (item for pred, item in b if pred))
1012 def split_quoted(s):
1013 """ Splits a quoted string
1015 >>> split_quoted('some "quoted text"') == ['some', 'quoted text']
1016 True
1018 >>> split_quoted('"quoted text') == ['quoted', 'text']
1019 True
1021 # 4 quotes here are 2 in the doctest is one in the actual string
1022 >>> split_quoted('text\\\\') == ['text']
1023 True
1026 try:
1027 # split by whitespace, preserve quoted substrings
1028 keywords = shlex.split(s)
1030 except ValueError:
1031 # No closing quotation (eg '"text')
1032 # No escaped character (eg '\')
1033 s = s.replace('"', '').replace("'", '').replace('\\', '')
1034 keywords = shlex.split(s)
1036 return keywords
1039 def edit_link(obj):
1040 """ Return the link to the Django Admin Edit page """
1041 return reverse('admin:%s_%s_change' % (obj._meta.app_label,
1042 obj._meta.module_name),
1043 args=(obj.pk,))
1046 def random_token(length=32):
1047 import random
1048 import string
1049 return "".join(random.sample(string.letters+string.digits, length))
1052 def to_maxlength(cls, field, val):
1053 """ Cut val to the maximum length of cls's field """
1054 if val is None:
1055 return None
1057 max_length = cls._meta.get_field(field).max_length
1058 orig_length = len(val)
1059 if orig_length > max_length:
1060 val = val[:max_length]
1061 logger.warn('%s.%s length reduced from %d to %d',
1062 cls.__name__, field, orig_length, max_length)
1064 return val
1067 def get_domain(url):
1068 """ Returns the domain name of a URL
1070 >>> get_domain('http://example.com')
1071 'example.com'
1073 >>> get_domain('https://example.com:80/my-podcast/feed.rss')
1074 'example.com'
1076 netloc = urlparse.urlparse(url).netloc
1077 try:
1078 port_idx = netloc.index(':')
1079 return netloc[:port_idx]
1081 except ValueError:
1082 return netloc
1085 def set_ordered_entries(obj, new_entries, existing, EntryClass,
1086 value_name, parent_name):
1087 """ Update the object's entries to the given list
1089 'new_entries' should be a list of objects that are later wrapped in
1090 EntryClass instances. 'value_name' is the name of the EntryClass property
1091 that contains the values; 'parent_name' is the one that references obj.
1093 Entries that do not exist are created. Existing entries that are not in
1094 'new_entries' are deleted. """
1096 logger.info('%d existing entries', len(existing))
1098 logger.info('%d new entries', len(new_entries))
1100 with transaction.atomic():
1101 max_order = max([s.order for s in existing.values()] +
1102 [len(new_entries)])
1103 logger.info('Renumbering entries starting from %d', max_order+1)
1104 for n, entry in enumerate(existing.values(), max_order+1):
1105 entry.order = n
1106 entry.save()
1108 logger.info('%d existing entries', len(existing))
1110 for n, entry in enumerate(new_entries):
1111 try:
1112 e = existing.pop(entry)
1113 logger.info('Updating existing entry %d: %s', n, entry)
1114 e.order = n
1115 e.save()
1116 except KeyError:
1117 logger.info('Creating new entry %d: %s', n, entry)
1118 try:
1119 links = {
1120 value_name: entry,
1121 parent_name: obj,
1123 from mygpo.podcasts.models import ScopedModel
1124 if issubclass(EntryClass, ScopedModel):
1125 links['scope'] = obj.scope
1127 EntryClass.objects.create(order=n, **links)
1128 except IntegrityError as ie:
1129 logger.warn('Could not create enry for %s: %s', obj, ie)
1131 with transaction.atomic():
1132 delete = [s.pk for s in existing.values()]
1133 logger.info('Deleting %d entries', len(delete))
1134 EntryClass.objects.filter(id__in=delete).delete()