normalize_feed_url() encodes characters
[mygpo.git] / mygpo / utils.py
blobd0605d65b8275b0944b11bed6e827e1e38adc12b
1 # -*- coding: utf-8 -*-
3 # This file is part of my.gpodder.org.
5 # my.gpodder.org is free software: you can redistribute it and/or modify it
6 # under the terms of the GNU Affero General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or (at your
8 # option) any later version.
10 # my.gpodder.org is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
13 # License for more details.
15 # You should have received a copy of the GNU Affero General Public License
16 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
19 import functools
20 import types
21 import subprocess
22 import os
23 import operator
24 import sys
25 import re
26 import collections
27 from datetime import datetime, timedelta, date
28 import time
29 import hashlib
30 import urlparse
31 import urllib
32 import urllib2
33 import zlib
35 from django.conf import settings
37 from mygpo.core.json import json
40 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
41 """
42 >>> from_d = datetime(2010, 01, 01)
43 >>> to_d = datetime(2010, 01, 05)
44 >>> list(daterange(from_d, to_d))
45 [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
46 """
48 if to_date is None:
49 if isinstance(from_date, datetime):
50 to_date = datetime.now()
51 else:
52 to_date = date.today()
54 while from_date <= to_date:
55 yield from_date
56 from_date = from_date + leap
57 return
59 def format_time(value):
60 """Format an offset (in seconds) to a string
62 The offset should be an integer or float value.
64 >>> format_time(0)
65 '00:00'
66 >>> format_time(20)
67 '00:20'
68 >>> format_time(3600)
69 '01:00:00'
70 >>> format_time(10921)
71 '03:02:01'
72 """
73 try:
74 dt = datetime.utcfromtimestamp(value)
75 except ValueError:
76 return ''
78 if dt.hour == 0:
79 return dt.strftime('%M:%S')
80 else:
81 return dt.strftime('%H:%M:%S')
83 def parse_time(value):
84 """
85 >>> parse_time(10)
88 >>> parse_time('05:10') #5*60+10
89 310
91 >>> parse_time('1:05:10') #60*60+5*60+10
92 3910
93 """
94 if value is None:
95 raise ValueError('None value in parse_time')
97 if isinstance(value, int):
98 # Don't need to parse already-converted time value
99 return value
101 if value == '':
102 raise ValueError('Empty valueing in parse_time')
104 for format in ('%H:%M:%S', '%M:%S'):
105 try:
106 t = time.strptime(value, format)
107 return t.tm_hour * 60*60 + t.tm_min * 60 + t.tm_sec
108 except ValueError, e:
109 continue
111 return int(value)
114 def parse_bool(val):
116 >>> parse_bool('True')
117 True
119 >>> parse_bool('true')
120 True
122 >>> parse_bool('')
123 False
125 if isinstance(val, bool):
126 return val
127 if val.lower() == 'true':
128 return True
129 return False
132 def iterate_together(lists, key=lambda x: x, reverse=False):
134 takes ordered, possibly sparse, lists with similar items
135 (some items have a corresponding item in the other lists, some don't).
137 It then yield tuples of corresponding items, where one element is None is
138 there is no corresponding entry in one of the lists.
140 Tuples where both elements are None are skipped.
142 The results of the key method are used for the comparisons.
144 If reverse is True, the lists are expected to be sorted in reverse order
145 and the results will also be sorted reverse
147 >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
148 [(1, 1), (2, None), (None, 3)]
150 >>> list(iterate_together([[], []]))
153 >>> list(iterate_together([range(1, 3), range(3, 5)]))
154 [(1, None), (2, None), (None, 3), (None, 4)]
156 >>> list(iterate_together([range(1, 3), []]))
157 [(1, None), (2, None)]
159 >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
160 [(1, None), (3, 3)]
163 Next = collections.namedtuple('Next', 'item more')
164 min_ = min if not reverse else max
165 lt_ = operator.lt if not reverse else operator.gt
167 lists = [iter(l) for l in lists]
169 def _take(it):
170 try:
171 i = it.next()
172 while i is None:
173 i = it.next()
174 return Next(i, True)
175 except StopIteration:
176 return Next(None, False)
178 def new_res():
179 return [None]*len(lists)
181 # take first bunch of items
182 items = [_take(l) for l in lists]
184 while any(i.item is not None or i.more for i in items):
186 res = new_res()
188 for n, item in enumerate(items):
190 if item.item is None:
191 continue
193 if all(x is None for x in res):
194 res[n] = item.item
195 continue
197 min_v = min_(filter(lambda x: x is not None, res), key=key)
199 if key(item.item) == key(min_v):
200 res[n] = item.item
202 elif lt_(key(item.item), key(min_v)):
203 res = new_res()
204 res[n] = item.item
206 for n, x in enumerate(res):
207 if x is not None:
208 items[n] = _take(lists[n])
210 yield tuple(res)
213 def progress(val, max_val, status_str='', max_width=50, stream=sys.stdout):
215 factor = float(val)/max_val if max_val > 0 else 0
217 # progress as percentage
218 percentage_str = '{val:.2%}'.format(val=factor)
220 # progress bar filled with #s
221 factor = min(int(factor*max_width), max_width)
222 progress_str = '#' * factor + ' ' * (max_width-factor)
224 #insert percentage into bar
225 percentage_start = int((max_width-len(percentage_str))/2)
226 progress_str = progress_str[:percentage_start] + \
227 percentage_str + \
228 progress_str[percentage_start+len(percentage_str):]
230 print >> stream, '\r',
231 print >> stream, '[ %s ] %s / %s | %s' % (
232 progress_str,
233 val,
234 max_val,
235 status_str),
236 stream.flush()
239 def set_cmp(list, simplify):
241 Builds a set out of a list but uses the results of simplify to determine equality between items
243 simpl = lambda x: (simplify(x), x)
244 lst = dict(map(simpl, list))
245 return lst.values()
248 def first(it):
250 returns the first not-None object or None if the iterator is exhausted
252 for x in it:
253 if x is not None:
254 return x
255 return None
258 def intersect(a, b):
259 return list(set(a) & set(b))
263 def remove_control_chars(s):
264 all_chars = (unichr(i) for i in xrange(0x110000))
265 control_chars = ''.join(map(unichr, range(0,32) + range(127,160)))
266 control_char_re = re.compile('[%s]' % re.escape(control_chars))
268 return control_char_re.sub('', s)
271 def unzip(a):
272 return tuple(map(list,zip(*a)))
275 def parse_range(s, min, max, default=None):
277 Parses the string and returns its value. If the value is outside the given
278 range, its closest number within the range is returned
280 >>> parse_range('5', 0, 10)
283 >>> parse_range('0', 5, 10)
286 >>> parse_range('15',0, 10)
289 >>> parse_range('x', 0, 20)
292 >>> parse_range('x', 0, 20, 20)
295 try:
296 val = int(s)
297 if val < min:
298 return min
299 if val > max:
300 return max
301 return val
303 except (ValueError, TypeError):
304 return default if default is not None else (max-min)/2
308 def flatten(l):
309 return [item for sublist in l for item in sublist]
312 def linearize(key, iterators, reverse=False):
314 Linearizes a number of iterators, sorted by some comparison function
317 iters = [iter(i) for i in iterators]
318 vals = []
319 for i in iters:
320 try:
321 v = i.next()
322 vals. append( (v, i) )
323 except StopIteration:
324 continue
326 while vals:
327 vals = sorted(vals, key=lambda x: key(x[0]), reverse=reverse)
328 val, it = vals.pop(0)
329 yield val
330 try:
331 next_val = it.next()
332 vals.append( (next_val, it) )
333 except StopIteration:
334 pass
337 def skip_pairs(iterator, cmp=cmp):
338 """ Skips pairs of equal items
340 >>> list(skip_pairs([]))
343 >>> list(skip_pairs([1]))
346 >>> list(skip_pairs([1, 2, 3]))
347 [1, 2, 3]
349 >>> list(skip_pairs([1, 1]))
352 >>> list(skip_pairs([1, 2, 2]))
355 >>> list(skip_pairs([1, 2, 2, 3]))
356 [1, 3]
358 >>> list(skip_pairs([1, 2, 2, 2]))
359 [1, 2]
361 >>> list(skip_pairs([1, 2, 2, 2, 2, 3]))
362 [1, 3]
365 iterator = iter(iterator)
366 next = iterator.next()
368 while True:
369 item = next
370 try:
371 next = iterator.next()
372 except StopIteration as e:
373 yield item
374 raise e
376 if cmp(item, next) == 0:
377 next = iterator.next()
378 else:
379 yield item
382 def get_timestamp(datetime_obj):
383 """ Returns the timestamp as an int for the given datetime object
385 >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
386 1302168606
388 >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
391 return int(time.mktime(datetime_obj.timetuple()))
395 re_url = re.compile('^https?://')
397 def is_url(string):
398 """ Returns true if a string looks like an URL
400 >>> is_url('http://example.com/some-path/file.xml')
401 True
403 >>> is_url('something else')
404 False
407 return bool(re_url.match(string))
411 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
412 # this does not increase asymptotical complexity
413 # but can still waste more time than it saves.
414 def shortest_of(strings):
415 return min(strings, key=len)
417 def longest_substr(strings):
419 Returns the longest common substring of the given strings
422 substr = ""
423 if not strings:
424 return substr
425 reference = shortest_of(strings)
426 length = len(reference)
427 #find a suitable slice i:j
428 for i in xrange(length):
429 #only consider strings long at least len(substr) + 1
430 for j in xrange(i + len(substr) + 1, length):
431 candidate = reference[i:j]
432 if all(candidate in text for text in strings):
433 substr = candidate
434 return substr
438 def additional_value(it, gen_val, val_changed=lambda _: True):
439 """ Provides an additional value to the elements, calculated when needed
441 For the elements from the iterator, some additional value can be computed
442 by gen_val (which might be an expensive computation).
444 If the elements in the iterator are ordered so that some subsequent
445 elements would generate the same additional value, val_changed can be
446 provided, which receives the next element from the iterator and the
447 previous additional value. If the element would generate the same
448 additional value (val_changed returns False), its computation is skipped.
450 >>> # get the next full hundred higher than x
451 >>> # this will probably be an expensive calculation
452 >>> next_hundred = lambda x: x + 100-(x % 100)
454 >>> # returns True if h is not the value that next_hundred(x) would provide
455 >>> # this should be a relatively cheap calculation, compared to the above
456 >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
458 >>> xs = [0, 50, 100, 101, 199, 200, 201]
459 >>> list(additional_value(xs, next_hundred, diff_hundred))
460 [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
463 _none = object()
464 current = _none
466 for x in it:
467 if current is _none or val_changed(x, current):
468 current = gen_val(x)
470 yield (x, current)
473 def file_hash(f, h=hashlib.md5, block_size=2**20):
474 """ returns the hash of the contents of a file """
475 f_hash = h()
476 for chunk in iter(lambda: f.read(block_size), ''):
477 f_hash.update(chunk)
478 return f_hash
482 def split_list(l, prop):
483 """ split elements that satisfy a property, and those that don't """
484 match = filter(prop, l)
485 nomatch = [x for x in l if x not in match]
486 return match, nomatch
489 def sorted_chain(links, key, reverse=False):
490 """ Takes a list of iters can iterates over sorted elements
492 Each elment of links should be a tuple of (sort_key, iterator). The
493 elements of each iterator should be sorted already. sort_key should
494 indicate the key of the first element and needs to be comparable to the
495 result of key(elem).
497 The function returns an iterator over the globally sorted element that
498 ensures that as little iterators as possible are evaluated. When
499 evaluating """
501 # mixed_list initially contains all placeholders; later evaluated
502 # elements (from the iterators) are mixed in
503 mixed_list = [(k, link, True) for k, link in links]
505 while mixed_list:
506 _, item, expand = mixed_list.pop(0)
508 # found an element (from an earlier expansion), yield it
509 if not expand:
510 yield item
511 continue
513 # found an iter that needs to be expanded.
514 # The iterator is fully consumed
515 new_items = [(key(i), i, False) for i in item]
517 # sort links (placeholders) and elements together
518 mixed_list = sorted(mixed_list + new_items, key=lambda (k, _v, _e): k,
519 reverse=reverse)
522 def url_add_authentication(url, username, password):
524 Adds authentication data (username, password) to a given
525 URL in order to construct an authenticated URL.
527 >>> url_add_authentication('https://host.com/', '', None)
528 'https://host.com/'
529 >>> url_add_authentication('http://example.org/', None, None)
530 'http://example.org/'
531 >>> url_add_authentication('telnet://host.com/', 'foo', 'bar')
532 'telnet://foo:bar@host.com/'
533 >>> url_add_authentication('ftp://example.org', 'billy', None)
534 'ftp://billy@example.org'
535 >>> url_add_authentication('ftp://example.org', 'billy', '')
536 'ftp://billy:@example.org'
537 >>> url_add_authentication('http://localhost/x', 'aa', 'bc')
538 'http://aa:bc@localhost/x'
539 >>> url_add_authentication('http://blubb.lan/u.html', 'i/o', 'P@ss:')
540 'http://i%2Fo:P@ss:@blubb.lan/u.html'
541 >>> url_add_authentication('http://a:b@x.org/', 'c', 'd')
542 'http://c:d@x.org/'
543 >>> url_add_authentication('http://i%2F:P%40%3A@cx.lan', 'P@x', 'i/')
544 'http://P@x:i%2F@cx.lan'
545 >>> url_add_authentication('http://x.org/', 'a b', 'c d')
546 'http://a%20b:c%20d@x.org/'
548 if username is None or username == '':
549 return url
551 # Relaxations of the strict quoting rules (bug 1521):
552 # 1. Accept '@' in username and password
553 # 2. Acecpt ':' in password only
554 username = urllib.quote(username, safe='@')
556 if password is not None:
557 password = urllib.quote(password, safe='@:')
558 auth_string = ':'.join((username, password))
559 else:
560 auth_string = username
562 url = url_strip_authentication(url)
564 url_parts = list(urlparse.urlsplit(url))
565 # url_parts[1] is the HOST part of the URL
566 url_parts[1] = '@'.join((auth_string, url_parts[1]))
568 return urlparse.urlunsplit(url_parts)
571 def urlopen(url, headers=None, data=None):
573 An URL opener with the User-agent set to gPodder (with version)
575 username, password = username_password_from_url(url)
576 if username is not None or password is not None:
577 url = url_strip_authentication(url)
578 password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
579 password_mgr.add_password(None, url, username, password)
580 handler = urllib2.HTTPBasicAuthHandler(password_mgr)
581 opener = urllib2.build_opener(handler)
582 else:
583 opener = urllib2.build_opener()
585 if headers is None:
586 headers = {}
587 else:
588 headers = dict(headers)
590 headers.update({'User-agent': settings.USER_AGENT})
591 request = urllib2.Request(url, data=data, headers=headers)
592 return opener.open(request)
596 def username_password_from_url(url):
597 r"""
598 Returns a tuple (username,password) containing authentication
599 data from the specified URL or (None,None) if no authentication
600 data can be found in the URL.
602 See Section 3.1 of RFC 1738 (http://www.ietf.org/rfc/rfc1738.txt)
604 >>> username_password_from_url('https://@host.com/')
605 ('', None)
606 >>> username_password_from_url('telnet://host.com/')
607 (None, None)
608 >>> username_password_from_url('ftp://foo:@host.com/')
609 ('foo', '')
610 >>> username_password_from_url('http://a:b@host.com/')
611 ('a', 'b')
612 >>> username_password_from_url(1)
613 Traceback (most recent call last):
615 ValueError: URL has to be a string or unicode object.
616 >>> username_password_from_url(None)
617 Traceback (most recent call last):
619 ValueError: URL has to be a string or unicode object.
620 >>> username_password_from_url('http://a@b:c@host.com/')
621 ('a@b', 'c')
622 >>> username_password_from_url('ftp://a:b:c@host.com/')
623 ('a', 'b:c')
624 >>> username_password_from_url('http://i%2Fo:P%40ss%3A@host.com/')
625 ('i/o', 'P@ss:')
626 >>> username_password_from_url('ftp://%C3%B6sterreich@host.com/')
627 ('\xc3\xb6sterreich', None)
628 >>> username_password_from_url('http://w%20x:y%20z@example.org/')
629 ('w x', 'y z')
630 >>> username_password_from_url('http://example.com/x@y:z@test.com/')
631 (None, None)
633 if type(url) not in (str, unicode):
634 raise ValueError('URL has to be a string or unicode object.')
636 (username, password) = (None, None)
638 (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
640 if '@' in netloc:
641 (authentication, netloc) = netloc.rsplit('@', 1)
642 if ':' in authentication:
643 (username, password) = authentication.split(':', 1)
645 # RFC1738 dictates that we should not allow ['/', '@', ':']
646 # characters in the username and password field (Section 3.1):
648 # 1. The "/" can't be in there at this point because of the way
649 # urlparse (which we use above) works.
650 # 2. Due to gPodder bug 1521, we allow "@" in the username and
651 # password field. We use netloc.rsplit('@', 1), which will
652 # make sure that we split it at the last '@' in netloc.
653 # 3. The colon must be excluded (RFC2617, Section 2) in the
654 # username, but is apparently allowed in the password. This
655 # is handled by the authentication.split(':', 1) above, and
656 # will cause any extraneous ':'s to be part of the password.
658 username = urllib.unquote(username)
659 password = urllib.unquote(password)
660 else:
661 username = urllib.unquote(authentication)
663 return (username, password)
666 def url_strip_authentication(url):
668 Strips authentication data from an URL. Returns the URL with
669 the authentication data removed from it.
671 >>> url_strip_authentication('https://host.com/')
672 'https://host.com/'
673 >>> url_strip_authentication('telnet://foo:bar@host.com/')
674 'telnet://host.com/'
675 >>> url_strip_authentication('ftp://billy@example.org')
676 'ftp://example.org'
677 >>> url_strip_authentication('ftp://billy:@example.org')
678 'ftp://example.org'
679 >>> url_strip_authentication('http://aa:bc@localhost/x')
680 'http://localhost/x'
681 >>> url_strip_authentication('http://i%2Fo:P%40ss%3A@blubb.lan/u.html')
682 'http://blubb.lan/u.html'
683 >>> url_strip_authentication('http://c:d@x.org/')
684 'http://x.org/'
685 >>> url_strip_authentication('http://P%40%3A:i%2F@cx.lan')
686 'http://cx.lan'
687 >>> url_strip_authentication('http://x@x.com:s3cret@example.com/')
688 'http://example.com/'
690 url_parts = list(urlparse.urlsplit(url))
691 # url_parts[1] is the HOST part of the URL
693 # Remove existing authentication data
694 if '@' in url_parts[1]:
695 url_parts[1] = url_parts[1].rsplit('@', 1)[1]
697 return urlparse.urlunsplit(url_parts)
700 # Native filesystem encoding detection
701 encoding = sys.getfilesystemencoding()
703 def sanitize_encoding(filename):
704 r"""
705 Generate a sanitized version of a string (i.e.
706 remove invalid characters and encode in the
707 detected native language encoding).
709 >>> sanitize_encoding('\x80')
711 >>> sanitize_encoding(u'unicode')
712 'unicode'
714 # The encoding problem goes away in Python 3.. hopefully!
715 if sys.version_info >= (3, 0):
716 return filename
718 global encoding
719 if not isinstance(filename, unicode):
720 filename = filename.decode(encoding, 'ignore')
721 return filename.encode(encoding, 'ignore')
724 def get_git_head():
725 """ returns the commit and message of the current git HEAD """
727 try:
728 pr = subprocess.Popen('/usr/bin/git log -n 1 --oneline'.split(),
729 cwd = settings.BASE_DIR,
730 stdout = subprocess.PIPE,
731 stderr = subprocess.PIPE,
734 except OSError:
735 return None, None
737 (out, err) = pr.communicate()
738 if err:
739 return None, None
741 outs = out.split()
742 commit = outs[0]
743 msg = ' ' .join(outs[1:])
744 return commit, msg
748 # https://gist.github.com/samuraisam/901117
750 default_fudge = timedelta(seconds=0, microseconds=0, days=0)
752 def deep_eq(_v1, _v2, datetime_fudge=default_fudge, _assert=False):
754 Tests for deep equality between two python data structures recursing
755 into sub-structures if necessary. Works with all python types including
756 iterators and generators. This function was dreampt up to test API responses
757 but could be used for anything. Be careful. With deeply nested structures
758 you may blow the stack.
760 Options:
761 datetime_fudge => this is a datetime.timedelta object which, when
762 comparing dates, will accept values that differ
763 by the number of seconds specified
764 _assert => passing yes for this will raise an assertion error
765 when values do not match, instead of returning
766 false (very useful in combination with pdb)
768 Doctests included:
770 >>> x1, y1 = ({'a': 'b'}, {'a': 'b'})
771 >>> deep_eq(x1, y1)
772 True
773 >>> x2, y2 = ({'a': 'b'}, {'b': 'a'})
774 >>> deep_eq(x2, y2)
775 False
776 >>> x3, y3 = ({'a': {'b': 'c'}}, {'a': {'b': 'c'}})
777 >>> deep_eq(x3, y3)
778 True
779 >>> x4, y4 = ({'c': 't', 'a': {'b': 'c'}}, {'a': {'b': 'n'}, 'c': 't'})
780 >>> deep_eq(x4, y4)
781 False
782 >>> x5, y5 = ({'a': [1,2,3]}, {'a': [1,2,3]})
783 >>> deep_eq(x5, y5)
784 True
785 >>> x6, y6 = ({'a': [1,'b',8]}, {'a': [2,'b',8]})
786 >>> deep_eq(x6, y6)
787 False
788 >>> x7, y7 = ('a', 'a')
789 >>> deep_eq(x7, y7)
790 True
791 >>> x8, y8 = (['p','n',['asdf']], ['p','n',['asdf']])
792 >>> deep_eq(x8, y8)
793 True
794 >>> x9, y9 = (['p','n',['asdf',['omg']]], ['p', 'n', ['asdf',['nowai']]])
795 >>> deep_eq(x9, y9)
796 False
797 >>> x10, y10 = (1, 2)
798 >>> deep_eq(x10, y10)
799 False
800 >>> deep_eq((str(p) for p in xrange(10)), (str(p) for p in xrange(10)))
801 True
802 >>> str(deep_eq(range(4), range(4)))
803 'True'
804 >>> deep_eq(xrange(100), xrange(100))
805 True
806 >>> deep_eq(xrange(2), xrange(5))
807 False
808 >>> from datetime import datetime, timedelta
809 >>> d1, d2 = (datetime.now(), datetime.now() + timedelta(seconds=4))
810 >>> deep_eq(d1, d2)
811 False
812 >>> deep_eq(d1, d2, datetime_fudge=timedelta(seconds=5))
813 True
815 _deep_eq = functools.partial(deep_eq, datetime_fudge=datetime_fudge,
816 _assert=_assert)
818 def _check_assert(R, a, b, reason=''):
819 if _assert and not R:
820 assert 0, "an assertion has failed in deep_eq (%s) %s != %s" % (
821 reason, str(a), str(b))
822 return R
824 def _deep_dict_eq(d1, d2):
825 k1, k2 = (sorted(d1.keys()), sorted(d2.keys()))
826 if k1 != k2: # keys should be exactly equal
827 return _check_assert(False, k1, k2, "keys")
829 return _check_assert(operator.eq(sum(_deep_eq(d1[k], d2[k])
830 for k in k1),
831 len(k1)), d1, d2, "dictionaries")
833 def _deep_iter_eq(l1, l2):
834 if len(l1) != len(l2):
835 return _check_assert(False, l1, l2, "lengths")
836 return _check_assert(operator.eq(sum(_deep_eq(v1, v2)
837 for v1, v2 in zip(l1, l2)),
838 len(l1)), l1, l2, "iterables")
840 def op(a, b):
841 _op = operator.eq
842 if type(a) == datetime and type(b) == datetime:
843 s = datetime_fudge.seconds
844 t1, t2 = (time.mktime(a.timetuple()), time.mktime(b.timetuple()))
845 l = t1 - t2
846 l = -l if l > 0 else l
847 return _check_assert((-s if s > 0 else s) <= l, a, b, "dates")
848 return _check_assert(_op(a, b), a, b, "values")
850 c1, c2 = (_v1, _v2)
852 # guard against strings because they are iterable and their
853 # elements yield iterables infinitely.
854 # I N C E P T I O N
855 for t in types.StringTypes:
856 if isinstance(_v1, t):
857 break
858 else:
859 if isinstance(_v1, types.DictType):
860 op = _deep_dict_eq
861 else:
862 try:
863 c1, c2 = (list(iter(_v1)), list(iter(_v2)))
864 except TypeError:
865 c1, c2 = _v1, _v2
866 else:
867 op = _deep_iter_eq
869 return op(c1, c2)
872 def parse_request_body(request):
873 """ returns the parsed request body, handles gzip encoding """
875 raw_body = request.body
876 content_enc = request.META.get('HTTP_CONTENT_ENCODING')
878 if content_enc == 'gzip':
879 raw_body = zlib.decompress(raw_body)
881 return json.loads(raw_body)
884 def normalize_feed_url(url):
886 Converts any URL to http:// or ftp:// so that it can be
887 used with "wget". If the URL cannot be converted (invalid
888 or unknown scheme), "None" is returned.
890 This will also normalize feed:// and itpc:// to http://.
892 >>> normalize_feed_url('itpc://example.org/podcast.rss')
893 'http://example.org/podcast.rss'
895 If no URL scheme is defined (e.g. "curry.com"), we will
896 simply assume the user intends to add a http:// feed.
898 >>> normalize_feed_url('curry.com')
899 'http://curry.com/'
901 There are even some more shortcuts for advanced users
902 and lazy typists (see the source for details).
904 >>> normalize_feed_url('fb:43FPodcast')
905 'http://feeds.feedburner.com/43FPodcast'
907 It will also take care of converting the domain name to
908 all-lowercase (because domains are not case sensitive):
910 >>> normalize_feed_url('http://Example.COM/')
911 'http://example.com/'
913 Some other minimalistic changes are also taken care of,
914 e.g. a ? with an empty query is removed:
916 >>> normalize_feed_url('http://example.org/test?')
917 'http://example.org/test'
919 Leading and trailing whitespace is removed
921 >>> normalize_feed_url(' http://example.com/podcast.rss ')
922 'http://example.com/podcast.rss'
924 HTTP Authentication is removed to protect users' privacy
926 >>> normalize_feed_url('http://a@b:c@host.com/')
927 'http://host.com/'
928 >>> normalize_feed_url('ftp://a:b:c@host.com/')
929 'ftp://host.com/'
930 >>> normalize_feed_url('http://i%2Fo:P%40ss%3A@host.com/')
931 'http://host.com/'
932 >>> normalize_feed_url('ftp://%C3%B6sterreich@host.com/')
933 'ftp://host.com/'
934 >>> normalize_feed_url('http://w%20x:y%20z@example.org/')
935 'http://example.org/'
936 >>> normalize_feed_url('http://example.com/x@y:z@test.com/')
937 'http://example.com/x%40y%3Az%40test.com/'
938 >>> normalize_feed_url('http://en.wikipedia.org/wiki/Ä')
939 'http://en.wikipedia.org/wiki/%C3%84'
940 >>> normalize_feed_url('http://en.wikipedia.org/w/index.php?title=Ä&action=edit')
941 'http://en.wikipedia.org/w/index.php?title=%C3%84&action=edit'
943 url = url.strip()
944 if not url or len(url) < 8:
945 return None
947 if isinstance(url, unicode):
948 url = url.encode('utf-8', 'ignore')
950 # This is a list of prefixes that you can use to minimize the amount of
951 # keystrokes that you have to use.
952 # Feel free to suggest other useful prefixes, and I'll add them here.
953 PREFIXES = {
954 'fb:': 'http://feeds.feedburner.com/%s',
955 'yt:': 'http://www.youtube.com/rss/user/%s/videos.rss',
956 'sc:': 'http://soundcloud.com/%s',
957 'fm4od:': 'http://onapp1.orf.at/webcam/fm4/fod/%s.xspf',
958 # YouTube playlists. To get a list of playlists per-user, use:
959 # https://gdata.youtube.com/feeds/api/users/<username>/playlists
960 'ytpl:': 'http://gdata.youtube.com/feeds/api/playlists/%s',
963 for prefix, expansion in PREFIXES.iteritems():
964 if url.startswith(prefix):
965 url = expansion % (url[len(prefix):],)
966 break
968 # Assume HTTP for URLs without scheme
969 if not '://' in url:
970 url = 'http://' + url
972 scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
974 # Schemes and domain names are case insensitive
975 scheme, netloc = scheme.lower(), netloc.lower()
977 # encode non-encoded characters
978 path = urllib.quote(path, '/%')
979 query = urllib.quote_plus(query, ':&=')
981 # Remove authentication to protect users' privacy
982 netloc = netloc.rsplit('@', 1)[-1]
984 # Normalize empty paths to "/"
985 if path == '':
986 path = '/'
988 # feed://, itpc:// and itms:// are really http://
989 if scheme in ('feed', 'itpc', 'itms'):
990 scheme = 'http'
992 if scheme not in ('http', 'https', 'ftp', 'file'):
993 return None
995 # urlunsplit might return "a slighty different, but equivalent URL"
996 return urlparse.urlunsplit((scheme, netloc, path, query, fragment))