add admin task for unifying the slugs of episodes
[mygpo.git] / mygpo / utils.py
blob47ee4ad304e5e073adf809775ece547c5fdcb8a4
1 # -*- coding: utf-8 -*-
3 # This file is part of my.gpodder.org.
5 # my.gpodder.org is free software: you can redistribute it and/or modify it
6 # under the terms of the GNU Affero General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or (at your
8 # option) any later version.
10 # my.gpodder.org is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
13 # License for more details.
15 # You should have received a copy of the GNU Affero General Public License
16 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
19 import functools
20 import types
21 import subprocess
22 import os
23 import operator
24 import sys
25 import re
26 import collections
27 import itertools
28 from datetime import datetime, timedelta, date
29 import time
30 import hashlib
31 import urlparse
32 import urllib
33 import urllib2
34 import zlib
36 from django.conf import settings
38 from mygpo.core.json import json
41 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
42 """
43 >>> from_d = datetime(2010, 01, 01)
44 >>> to_d = datetime(2010, 01, 05)
45 >>> list(daterange(from_d, to_d))
46 [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
47 """
49 if to_date is None:
50 if isinstance(from_date, datetime):
51 to_date = datetime.now()
52 else:
53 to_date = date.today()
55 while from_date <= to_date:
56 yield from_date
57 from_date = from_date + leap
58 return
60 def format_time(value):
61 """Format an offset (in seconds) to a string
63 The offset should be an integer or float value.
65 >>> format_time(0)
66 '00:00'
67 >>> format_time(20)
68 '00:20'
69 >>> format_time(3600)
70 '01:00:00'
71 >>> format_time(10921)
72 '03:02:01'
73 """
74 try:
75 dt = datetime.utcfromtimestamp(value)
76 except ValueError:
77 return ''
79 if dt.hour == 0:
80 return dt.strftime('%M:%S')
81 else:
82 return dt.strftime('%H:%M:%S')
84 def parse_time(value):
85 """
86 >>> parse_time(10)
89 >>> parse_time('05:10') #5*60+10
90 310
92 >>> parse_time('1:05:10') #60*60+5*60+10
93 3910
94 """
95 if value is None:
96 raise ValueError('None value in parse_time')
98 if isinstance(value, int):
99 # Don't need to parse already-converted time value
100 return value
102 if value == '':
103 raise ValueError('Empty valueing in parse_time')
105 for format in ('%H:%M:%S', '%M:%S'):
106 try:
107 t = time.strptime(value, format)
108 return t.tm_hour * 60*60 + t.tm_min * 60 + t.tm_sec
109 except ValueError, e:
110 continue
112 return int(value)
115 def parse_bool(val):
117 >>> parse_bool('True')
118 True
120 >>> parse_bool('true')
121 True
123 >>> parse_bool('')
124 False
126 if isinstance(val, bool):
127 return val
128 if val.lower() == 'true':
129 return True
130 return False
133 def iterate_together(lists, key=lambda x: x, reverse=False):
135 takes ordered, possibly sparse, lists with similar items
136 (some items have a corresponding item in the other lists, some don't).
138 It then yield tuples of corresponding items, where one element is None is
139 there is no corresponding entry in one of the lists.
141 Tuples where both elements are None are skipped.
143 The results of the key method are used for the comparisons.
145 If reverse is True, the lists are expected to be sorted in reverse order
146 and the results will also be sorted reverse
148 >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
149 [(1, 1), (2, None), (None, 3)]
151 >>> list(iterate_together([[], []]))
154 >>> list(iterate_together([range(1, 3), range(3, 5)]))
155 [(1, None), (2, None), (None, 3), (None, 4)]
157 >>> list(iterate_together([range(1, 3), []]))
158 [(1, None), (2, None)]
160 >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
161 [(1, None), (3, 3)]
164 Next = collections.namedtuple('Next', 'item more')
165 min_ = min if not reverse else max
166 lt_ = operator.lt if not reverse else operator.gt
168 lists = [iter(l) for l in lists]
170 def _take(it):
171 try:
172 i = it.next()
173 while i is None:
174 i = it.next()
175 return Next(i, True)
176 except StopIteration:
177 return Next(None, False)
179 def new_res():
180 return [None]*len(lists)
182 # take first bunch of items
183 items = [_take(l) for l in lists]
185 while any(i.item is not None or i.more for i in items):
187 res = new_res()
189 for n, item in enumerate(items):
191 if item.item is None:
192 continue
194 if all(x is None for x in res):
195 res[n] = item.item
196 continue
198 min_v = min_(filter(lambda x: x is not None, res), key=key)
200 if key(item.item) == key(min_v):
201 res[n] = item.item
203 elif lt_(key(item.item), key(min_v)):
204 res = new_res()
205 res[n] = item.item
207 for n, x in enumerate(res):
208 if x is not None:
209 items[n] = _take(lists[n])
211 yield tuple(res)
214 def progress(val, max_val, status_str='', max_width=50, stream=sys.stdout):
216 factor = float(val)/max_val if max_val > 0 else 0
218 # progress as percentage
219 percentage_str = '{val:.2%}'.format(val=factor)
221 # progress bar filled with #s
222 factor = min(int(factor*max_width), max_width)
223 progress_str = '#' * factor + ' ' * (max_width-factor)
225 #insert percentage into bar
226 percentage_start = int((max_width-len(percentage_str))/2)
227 progress_str = progress_str[:percentage_start] + \
228 percentage_str + \
229 progress_str[percentage_start+len(percentage_str):]
231 print >> stream, '\r',
232 print >> stream, '[ %s ] %s / %s | %s' % (
233 progress_str,
234 val,
235 max_val,
236 status_str),
237 stream.flush()
240 def set_cmp(list, simplify):
242 Builds a set out of a list but uses the results of simplify to determine equality between items
244 simpl = lambda x: (simplify(x), x)
245 lst = dict(map(simpl, list))
246 return lst.values()
249 def first(it):
251 returns the first not-None object or None if the iterator is exhausted
253 for x in it:
254 if x is not None:
255 return x
256 return None
259 def intersect(a, b):
260 return list(set(a) & set(b))
264 def remove_control_chars(s):
265 all_chars = (unichr(i) for i in xrange(0x110000))
266 control_chars = ''.join(map(unichr, range(0,32) + range(127,160)))
267 control_char_re = re.compile('[%s]' % re.escape(control_chars))
269 return control_char_re.sub('', s)
272 def unzip(a):
273 return tuple(map(list,zip(*a)))
276 def parse_range(s, min, max, default=None):
278 Parses the string and returns its value. If the value is outside the given
279 range, its closest number within the range is returned
281 >>> parse_range('5', 0, 10)
284 >>> parse_range('0', 5, 10)
287 >>> parse_range('15',0, 10)
290 >>> parse_range('x', 0, 20)
293 >>> parse_range('x', 0, 20, 20)
296 try:
297 val = int(s)
298 if val < min:
299 return min
300 if val > max:
301 return max
302 return val
304 except (ValueError, TypeError):
305 return default if default is not None else (max-min)/2
309 def flatten(l):
310 return [item for sublist in l for item in sublist]
313 def linearize(key, iterators, reverse=False):
315 Linearizes a number of iterators, sorted by some comparison function
318 iters = [iter(i) for i in iterators]
319 vals = []
320 for i in iters:
321 try:
322 v = i.next()
323 vals. append( (v, i) )
324 except StopIteration:
325 continue
327 while vals:
328 vals = sorted(vals, key=lambda x: key(x[0]), reverse=reverse)
329 val, it = vals.pop(0)
330 yield val
331 try:
332 next_val = it.next()
333 vals.append( (next_val, it) )
334 except StopIteration:
335 pass
338 def skip_pairs(iterator, cmp=cmp):
339 """ Skips pairs of equal items
341 >>> list(skip_pairs([]))
344 >>> list(skip_pairs([1]))
347 >>> list(skip_pairs([1, 2, 3]))
348 [1, 2, 3]
350 >>> list(skip_pairs([1, 1]))
353 >>> list(skip_pairs([1, 2, 2]))
356 >>> list(skip_pairs([1, 2, 2, 3]))
357 [1, 3]
359 >>> list(skip_pairs([1, 2, 2, 2]))
360 [1, 2]
362 >>> list(skip_pairs([1, 2, 2, 2, 2, 3]))
363 [1, 3]
366 iterator = iter(iterator)
367 next = iterator.next()
369 while True:
370 item = next
371 try:
372 next = iterator.next()
373 except StopIteration as e:
374 yield item
375 raise e
377 if cmp(item, next) == 0:
378 next = iterator.next()
379 else:
380 yield item
383 def get_timestamp(datetime_obj):
384 """ Returns the timestamp as an int for the given datetime object
386 >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
387 1302168606
389 >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
392 return int(time.mktime(datetime_obj.timetuple()))
396 re_url = re.compile('^https?://')
398 def is_url(string):
399 """ Returns true if a string looks like an URL
401 >>> is_url('http://example.com/some-path/file.xml')
402 True
404 >>> is_url('something else')
405 False
408 return bool(re_url.match(string))
412 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
413 # this does not increase asymptotical complexity
414 # but can still waste more time than it saves.
415 def shortest_of(strings):
416 return min(strings, key=len)
418 def longest_substr(strings):
420 Returns the longest common substring of the given strings
423 substr = ""
424 if not strings:
425 return substr
426 reference = shortest_of(strings)
427 length = len(reference)
428 #find a suitable slice i:j
429 for i in xrange(length):
430 #only consider strings long at least len(substr) + 1
431 for j in xrange(i + len(substr) + 1, length):
432 candidate = reference[i:j]
433 if all(candidate in text for text in strings):
434 substr = candidate
435 return substr
439 def additional_value(it, gen_val, val_changed=lambda _: True):
440 """ Provides an additional value to the elements, calculated when needed
442 For the elements from the iterator, some additional value can be computed
443 by gen_val (which might be an expensive computation).
445 If the elements in the iterator are ordered so that some subsequent
446 elements would generate the same additional value, val_changed can be
447 provided, which receives the next element from the iterator and the
448 previous additional value. If the element would generate the same
449 additional value (val_changed returns False), its computation is skipped.
451 >>> # get the next full hundred higher than x
452 >>> # this will probably be an expensive calculation
453 >>> next_hundred = lambda x: x + 100-(x % 100)
455 >>> # returns True if h is not the value that next_hundred(x) would provide
456 >>> # this should be a relatively cheap calculation, compared to the above
457 >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
459 >>> xs = [0, 50, 100, 101, 199, 200, 201]
460 >>> list(additional_value(xs, next_hundred, diff_hundred))
461 [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
464 _none = object()
465 current = _none
467 for x in it:
468 if current is _none or val_changed(x, current):
469 current = gen_val(x)
471 yield (x, current)
474 def file_hash(f, h=hashlib.md5, block_size=2**20):
475 """ returns the hash of the contents of a file """
476 f_hash = h()
477 for chunk in iter(lambda: f.read(block_size), ''):
478 f_hash.update(chunk)
479 return f_hash
483 def split_list(l, prop):
484 """ split elements that satisfy a property, and those that don't """
485 match = filter(prop, l)
486 nomatch = [x for x in l if x not in match]
487 return match, nomatch
490 def sorted_chain(links, key, reverse=False):
491 """ Takes a list of iters can iterates over sorted elements
493 Each elment of links should be a tuple of (sort_key, iterator). The
494 elements of each iterator should be sorted already. sort_key should
495 indicate the key of the first element and needs to be comparable to the
496 result of key(elem).
498 The function returns an iterator over the globally sorted element that
499 ensures that as little iterators as possible are evaluated. When
500 evaluating """
502 # mixed_list initially contains all placeholders; later evaluated
503 # elements (from the iterators) are mixed in
504 mixed_list = [(k, link, True) for k, link in links]
506 while mixed_list:
507 _, item, expand = mixed_list.pop(0)
509 # found an element (from an earlier expansion), yield it
510 if not expand:
511 yield item
512 continue
514 # found an iter that needs to be expanded.
515 # The iterator is fully consumed
516 new_items = [(key(i), i, False) for i in item]
518 # sort links (placeholders) and elements together
519 mixed_list = sorted(mixed_list + new_items, key=lambda (k, _v, _e): k,
520 reverse=reverse)
523 def url_add_authentication(url, username, password):
525 Adds authentication data (username, password) to a given
526 URL in order to construct an authenticated URL.
528 >>> url_add_authentication('https://host.com/', '', None)
529 'https://host.com/'
530 >>> url_add_authentication('http://example.org/', None, None)
531 'http://example.org/'
532 >>> url_add_authentication('telnet://host.com/', 'foo', 'bar')
533 'telnet://foo:bar@host.com/'
534 >>> url_add_authentication('ftp://example.org', 'billy', None)
535 'ftp://billy@example.org'
536 >>> url_add_authentication('ftp://example.org', 'billy', '')
537 'ftp://billy:@example.org'
538 >>> url_add_authentication('http://localhost/x', 'aa', 'bc')
539 'http://aa:bc@localhost/x'
540 >>> url_add_authentication('http://blubb.lan/u.html', 'i/o', 'P@ss:')
541 'http://i%2Fo:P@ss:@blubb.lan/u.html'
542 >>> url_add_authentication('http://a:b@x.org/', 'c', 'd')
543 'http://c:d@x.org/'
544 >>> url_add_authentication('http://i%2F:P%40%3A@cx.lan', 'P@x', 'i/')
545 'http://P@x:i%2F@cx.lan'
546 >>> url_add_authentication('http://x.org/', 'a b', 'c d')
547 'http://a%20b:c%20d@x.org/'
549 if username is None or username == '':
550 return url
552 # Relaxations of the strict quoting rules (bug 1521):
553 # 1. Accept '@' in username and password
554 # 2. Acecpt ':' in password only
555 username = urllib.quote(username, safe='@')
557 if password is not None:
558 password = urllib.quote(password, safe='@:')
559 auth_string = ':'.join((username, password))
560 else:
561 auth_string = username
563 url = url_strip_authentication(url)
565 url_parts = list(urlparse.urlsplit(url))
566 # url_parts[1] is the HOST part of the URL
567 url_parts[1] = '@'.join((auth_string, url_parts[1]))
569 return urlparse.urlunsplit(url_parts)
572 def urlopen(url, headers=None, data=None):
574 An URL opener with the User-agent set to gPodder (with version)
576 username, password = username_password_from_url(url)
577 if username is not None or password is not None:
578 url = url_strip_authentication(url)
579 password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
580 password_mgr.add_password(None, url, username, password)
581 handler = urllib2.HTTPBasicAuthHandler(password_mgr)
582 opener = urllib2.build_opener(handler)
583 else:
584 opener = urllib2.build_opener()
586 if headers is None:
587 headers = {}
588 else:
589 headers = dict(headers)
591 headers.update({'User-agent': settings.USER_AGENT})
592 request = urllib2.Request(url, data=data, headers=headers)
593 return opener.open(request)
597 def username_password_from_url(url):
598 r"""
599 Returns a tuple (username,password) containing authentication
600 data from the specified URL or (None,None) if no authentication
601 data can be found in the URL.
603 See Section 3.1 of RFC 1738 (http://www.ietf.org/rfc/rfc1738.txt)
605 >>> username_password_from_url('https://@host.com/')
606 ('', None)
607 >>> username_password_from_url('telnet://host.com/')
608 (None, None)
609 >>> username_password_from_url('ftp://foo:@host.com/')
610 ('foo', '')
611 >>> username_password_from_url('http://a:b@host.com/')
612 ('a', 'b')
613 >>> username_password_from_url(1)
614 Traceback (most recent call last):
616 ValueError: URL has to be a string or unicode object.
617 >>> username_password_from_url(None)
618 Traceback (most recent call last):
620 ValueError: URL has to be a string or unicode object.
621 >>> username_password_from_url('http://a@b:c@host.com/')
622 ('a@b', 'c')
623 >>> username_password_from_url('ftp://a:b:c@host.com/')
624 ('a', 'b:c')
625 >>> username_password_from_url('http://i%2Fo:P%40ss%3A@host.com/')
626 ('i/o', 'P@ss:')
627 >>> username_password_from_url('ftp://%C3%B6sterreich@host.com/')
628 ('\xc3\xb6sterreich', None)
629 >>> username_password_from_url('http://w%20x:y%20z@example.org/')
630 ('w x', 'y z')
631 >>> username_password_from_url('http://example.com/x@y:z@test.com/')
632 (None, None)
634 if type(url) not in (str, unicode):
635 raise ValueError('URL has to be a string or unicode object.')
637 (username, password) = (None, None)
639 (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
641 if '@' in netloc:
642 (authentication, netloc) = netloc.rsplit('@', 1)
643 if ':' in authentication:
644 (username, password) = authentication.split(':', 1)
646 # RFC1738 dictates that we should not allow ['/', '@', ':']
647 # characters in the username and password field (Section 3.1):
649 # 1. The "/" can't be in there at this point because of the way
650 # urlparse (which we use above) works.
651 # 2. Due to gPodder bug 1521, we allow "@" in the username and
652 # password field. We use netloc.rsplit('@', 1), which will
653 # make sure that we split it at the last '@' in netloc.
654 # 3. The colon must be excluded (RFC2617, Section 2) in the
655 # username, but is apparently allowed in the password. This
656 # is handled by the authentication.split(':', 1) above, and
657 # will cause any extraneous ':'s to be part of the password.
659 username = urllib.unquote(username)
660 password = urllib.unquote(password)
661 else:
662 username = urllib.unquote(authentication)
664 return (username, password)
667 def url_strip_authentication(url):
669 Strips authentication data from an URL. Returns the URL with
670 the authentication data removed from it.
672 >>> url_strip_authentication('https://host.com/')
673 'https://host.com/'
674 >>> url_strip_authentication('telnet://foo:bar@host.com/')
675 'telnet://host.com/'
676 >>> url_strip_authentication('ftp://billy@example.org')
677 'ftp://example.org'
678 >>> url_strip_authentication('ftp://billy:@example.org')
679 'ftp://example.org'
680 >>> url_strip_authentication('http://aa:bc@localhost/x')
681 'http://localhost/x'
682 >>> url_strip_authentication('http://i%2Fo:P%40ss%3A@blubb.lan/u.html')
683 'http://blubb.lan/u.html'
684 >>> url_strip_authentication('http://c:d@x.org/')
685 'http://x.org/'
686 >>> url_strip_authentication('http://P%40%3A:i%2F@cx.lan')
687 'http://cx.lan'
688 >>> url_strip_authentication('http://x@x.com:s3cret@example.com/')
689 'http://example.com/'
691 url_parts = list(urlparse.urlsplit(url))
692 # url_parts[1] is the HOST part of the URL
694 # Remove existing authentication data
695 if '@' in url_parts[1]:
696 url_parts[1] = url_parts[1].rsplit('@', 1)[1]
698 return urlparse.urlunsplit(url_parts)
701 # Native filesystem encoding detection
702 encoding = sys.getfilesystemencoding()
704 def sanitize_encoding(filename):
705 r"""
706 Generate a sanitized version of a string (i.e.
707 remove invalid characters and encode in the
708 detected native language encoding).
710 >>> sanitize_encoding('\x80')
712 >>> sanitize_encoding(u'unicode')
713 'unicode'
715 # The encoding problem goes away in Python 3.. hopefully!
716 if sys.version_info >= (3, 0):
717 return filename
719 global encoding
720 if not isinstance(filename, unicode):
721 filename = filename.decode(encoding, 'ignore')
722 return filename.encode(encoding, 'ignore')
725 def get_git_head():
726 """ returns the commit and message of the current git HEAD """
728 try:
729 pr = subprocess.Popen('/usr/bin/git log -n 1 --oneline'.split(),
730 cwd = settings.BASE_DIR,
731 stdout = subprocess.PIPE,
732 stderr = subprocess.PIPE,
735 except OSError:
736 return None, None
738 (out, err) = pr.communicate()
739 if err:
740 return None, None
742 outs = out.split()
743 commit = outs[0]
744 msg = ' ' .join(outs[1:])
745 return commit, msg
749 # https://gist.github.com/samuraisam/901117
751 default_fudge = timedelta(seconds=0, microseconds=0, days=0)
753 def deep_eq(_v1, _v2, datetime_fudge=default_fudge, _assert=False):
755 Tests for deep equality between two python data structures recursing
756 into sub-structures if necessary. Works with all python types including
757 iterators and generators. This function was dreampt up to test API responses
758 but could be used for anything. Be careful. With deeply nested structures
759 you may blow the stack.
761 Options:
762 datetime_fudge => this is a datetime.timedelta object which, when
763 comparing dates, will accept values that differ
764 by the number of seconds specified
765 _assert => passing yes for this will raise an assertion error
766 when values do not match, instead of returning
767 false (very useful in combination with pdb)
769 Doctests included:
771 >>> x1, y1 = ({'a': 'b'}, {'a': 'b'})
772 >>> deep_eq(x1, y1)
773 True
774 >>> x2, y2 = ({'a': 'b'}, {'b': 'a'})
775 >>> deep_eq(x2, y2)
776 False
777 >>> x3, y3 = ({'a': {'b': 'c'}}, {'a': {'b': 'c'}})
778 >>> deep_eq(x3, y3)
779 True
780 >>> x4, y4 = ({'c': 't', 'a': {'b': 'c'}}, {'a': {'b': 'n'}, 'c': 't'})
781 >>> deep_eq(x4, y4)
782 False
783 >>> x5, y5 = ({'a': [1,2,3]}, {'a': [1,2,3]})
784 >>> deep_eq(x5, y5)
785 True
786 >>> x6, y6 = ({'a': [1,'b',8]}, {'a': [2,'b',8]})
787 >>> deep_eq(x6, y6)
788 False
789 >>> x7, y7 = ('a', 'a')
790 >>> deep_eq(x7, y7)
791 True
792 >>> x8, y8 = (['p','n',['asdf']], ['p','n',['asdf']])
793 >>> deep_eq(x8, y8)
794 True
795 >>> x9, y9 = (['p','n',['asdf',['omg']]], ['p', 'n', ['asdf',['nowai']]])
796 >>> deep_eq(x9, y9)
797 False
798 >>> x10, y10 = (1, 2)
799 >>> deep_eq(x10, y10)
800 False
801 >>> deep_eq((str(p) for p in xrange(10)), (str(p) for p in xrange(10)))
802 True
803 >>> str(deep_eq(range(4), range(4)))
804 'True'
805 >>> deep_eq(xrange(100), xrange(100))
806 True
807 >>> deep_eq(xrange(2), xrange(5))
808 False
809 >>> from datetime import datetime, timedelta
810 >>> d1, d2 = (datetime.now(), datetime.now() + timedelta(seconds=4))
811 >>> deep_eq(d1, d2)
812 False
813 >>> deep_eq(d1, d2, datetime_fudge=timedelta(seconds=5))
814 True
816 _deep_eq = functools.partial(deep_eq, datetime_fudge=datetime_fudge,
817 _assert=_assert)
819 def _check_assert(R, a, b, reason=''):
820 if _assert and not R:
821 assert 0, "an assertion has failed in deep_eq (%s) %s != %s" % (
822 reason, str(a), str(b))
823 return R
825 def _deep_dict_eq(d1, d2):
826 k1, k2 = (sorted(d1.keys()), sorted(d2.keys()))
827 if k1 != k2: # keys should be exactly equal
828 return _check_assert(False, k1, k2, "keys")
830 return _check_assert(operator.eq(sum(_deep_eq(d1[k], d2[k])
831 for k in k1),
832 len(k1)), d1, d2, "dictionaries")
834 def _deep_iter_eq(l1, l2):
835 if len(l1) != len(l2):
836 return _check_assert(False, l1, l2, "lengths")
837 return _check_assert(operator.eq(sum(_deep_eq(v1, v2)
838 for v1, v2 in zip(l1, l2)),
839 len(l1)), l1, l2, "iterables")
841 def op(a, b):
842 _op = operator.eq
843 if type(a) == datetime and type(b) == datetime:
844 s = datetime_fudge.seconds
845 t1, t2 = (time.mktime(a.timetuple()), time.mktime(b.timetuple()))
846 l = t1 - t2
847 l = -l if l > 0 else l
848 return _check_assert((-s if s > 0 else s) <= l, a, b, "dates")
849 return _check_assert(_op(a, b), a, b, "values")
851 c1, c2 = (_v1, _v2)
853 # guard against strings because they are iterable and their
854 # elements yield iterables infinitely.
855 # I N C E P T I O N
856 for t in types.StringTypes:
857 if isinstance(_v1, t):
858 break
859 else:
860 if isinstance(_v1, types.DictType):
861 op = _deep_dict_eq
862 else:
863 try:
864 c1, c2 = (list(iter(_v1)), list(iter(_v2)))
865 except TypeError:
866 c1, c2 = _v1, _v2
867 else:
868 op = _deep_iter_eq
870 return op(c1, c2)
873 def parse_request_body(request):
874 """ returns the parsed request body, handles gzip encoding """
876 raw_body = request.body
877 content_enc = request.META.get('HTTP_CONTENT_ENCODING')
879 if content_enc == 'gzip':
880 raw_body = zlib.decompress(raw_body)
882 return json.loads(raw_body)
885 def normalize_feed_url(url):
887 Converts any URL to http:// or ftp:// so that it can be
888 used with "wget". If the URL cannot be converted (invalid
889 or unknown scheme), "None" is returned.
891 This will also normalize feed:// and itpc:// to http://.
893 >>> normalize_feed_url('itpc://example.org/podcast.rss')
894 'http://example.org/podcast.rss'
896 If no URL scheme is defined (e.g. "curry.com"), we will
897 simply assume the user intends to add a http:// feed.
899 >>> normalize_feed_url('curry.com')
900 'http://curry.com/'
902 There are even some more shortcuts for advanced users
903 and lazy typists (see the source for details).
905 >>> normalize_feed_url('fb:43FPodcast')
906 'http://feeds.feedburner.com/43FPodcast'
908 It will also take care of converting the domain name to
909 all-lowercase (because domains are not case sensitive):
911 >>> normalize_feed_url('http://Example.COM/')
912 'http://example.com/'
914 Some other minimalistic changes are also taken care of,
915 e.g. a ? with an empty query is removed:
917 >>> normalize_feed_url('http://example.org/test?')
918 'http://example.org/test'
920 Leading and trailing whitespace is removed
922 >>> normalize_feed_url(' http://example.com/podcast.rss ')
923 'http://example.com/podcast.rss'
925 HTTP Authentication is removed to protect users' privacy
927 >>> normalize_feed_url('http://a@b:c@host.com/')
928 'http://host.com/'
929 >>> normalize_feed_url('ftp://a:b:c@host.com/')
930 'ftp://host.com/'
931 >>> normalize_feed_url('http://i%2Fo:P%40ss%3A@host.com/')
932 'http://host.com/'
933 >>> normalize_feed_url('ftp://%C3%B6sterreich@host.com/')
934 'ftp://host.com/'
935 >>> normalize_feed_url('http://w%20x:y%20z@example.org/')
936 'http://example.org/'
937 >>> normalize_feed_url('http://example.com/x@y:z@test.com/')
938 'http://example.com/x%40y%3Az%40test.com/'
939 >>> normalize_feed_url('http://en.wikipedia.org/wiki/Ä')
940 'http://en.wikipedia.org/wiki/%C3%84'
941 >>> normalize_feed_url('http://en.wikipedia.org/w/index.php?title=Ä&action=edit')
942 'http://en.wikipedia.org/w/index.php?title=%C3%84&action=edit'
944 url = url.strip()
945 if not url or len(url) < 8:
946 return None
948 if isinstance(url, unicode):
949 url = url.encode('utf-8', 'ignore')
951 # This is a list of prefixes that you can use to minimize the amount of
952 # keystrokes that you have to use.
953 # Feel free to suggest other useful prefixes, and I'll add them here.
954 PREFIXES = {
955 'fb:': 'http://feeds.feedburner.com/%s',
956 'yt:': 'http://www.youtube.com/rss/user/%s/videos.rss',
957 'sc:': 'http://soundcloud.com/%s',
958 'fm4od:': 'http://onapp1.orf.at/webcam/fm4/fod/%s.xspf',
959 # YouTube playlists. To get a list of playlists per-user, use:
960 # https://gdata.youtube.com/feeds/api/users/<username>/playlists
961 'ytpl:': 'http://gdata.youtube.com/feeds/api/playlists/%s',
964 for prefix, expansion in PREFIXES.iteritems():
965 if url.startswith(prefix):
966 url = expansion % (url[len(prefix):],)
967 break
969 # Assume HTTP for URLs without scheme
970 if not '://' in url:
971 url = 'http://' + url
973 scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
975 # Schemes and domain names are case insensitive
976 scheme, netloc = scheme.lower(), netloc.lower()
978 # encode non-encoded characters
979 path = urllib.quote(path, '/%')
980 query = urllib.quote_plus(query, ':&=')
982 # Remove authentication to protect users' privacy
983 netloc = netloc.rsplit('@', 1)[-1]
985 # Normalize empty paths to "/"
986 if path == '':
987 path = '/'
989 # feed://, itpc:// and itms:// are really http://
990 if scheme in ('feed', 'itpc', 'itms'):
991 scheme = 'http'
993 if scheme not in ('http', 'https', 'ftp', 'file'):
994 return None
996 # urlunsplit might return "a slighty different, but equivalent URL"
997 return urlparse.urlunsplit((scheme, netloc, path, query, fragment))
1000 def partition(items, predicate=bool):
1001 a, b = itertools.tee((predicate(item), item) for item in items)
1002 return ((item for pred, item in a if not pred),
1003 (item for pred, item in b if pred))