replace sanitizing rules with gPodder's normalize_feed_url()
[mygpo.git] / mygpo / utils.py
blobc482404919ecfffd3c286415d2aabaf8507749a2
2 # This file is part of my.gpodder.org.
4 # my.gpodder.org is free software: you can redistribute it and/or modify it
5 # under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or (at your
7 # option) any later version.
9 # my.gpodder.org is distributed in the hope that it will be useful, but
10 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
12 # License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
18 import functools
19 import types
20 import subprocess
21 import os
22 import operator
23 import sys
24 import re
25 import collections
26 from datetime import datetime, timedelta, date
27 import time
28 import hashlib
29 import urlparse
30 import urllib
31 import urllib2
32 import zlib
34 from django.conf import settings
36 from mygpo.core.json import json
39 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
40 """
41 >>> from_d = datetime(2010, 01, 01)
42 >>> to_d = datetime(2010, 01, 05)
43 >>> list(daterange(from_d, to_d))
44 [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
45 """
47 if to_date is None:
48 if isinstance(from_date, datetime):
49 to_date = datetime.now()
50 else:
51 to_date = date.today()
53 while from_date <= to_date:
54 yield from_date
55 from_date = from_date + leap
56 return
58 def format_time(value):
59 """Format an offset (in seconds) to a string
61 The offset should be an integer or float value.
63 >>> format_time(0)
64 '00:00'
65 >>> format_time(20)
66 '00:20'
67 >>> format_time(3600)
68 '01:00:00'
69 >>> format_time(10921)
70 '03:02:01'
71 """
72 try:
73 dt = datetime.utcfromtimestamp(value)
74 except ValueError:
75 return ''
77 if dt.hour == 0:
78 return dt.strftime('%M:%S')
79 else:
80 return dt.strftime('%H:%M:%S')
82 def parse_time(value):
83 """
84 >>> parse_time(10)
87 >>> parse_time('05:10') #5*60+10
88 310
90 >>> parse_time('1:05:10') #60*60+5*60+10
91 3910
92 """
93 if value is None:
94 raise ValueError('None value in parse_time')
96 if isinstance(value, int):
97 # Don't need to parse already-converted time value
98 return value
100 if value == '':
101 raise ValueError('Empty valueing in parse_time')
103 for format in ('%H:%M:%S', '%M:%S'):
104 try:
105 t = time.strptime(value, format)
106 return t.tm_hour * 60*60 + t.tm_min * 60 + t.tm_sec
107 except ValueError, e:
108 continue
110 return int(value)
113 def parse_bool(val):
115 >>> parse_bool('True')
116 True
118 >>> parse_bool('true')
119 True
121 >>> parse_bool('')
122 False
124 if isinstance(val, bool):
125 return val
126 if val.lower() == 'true':
127 return True
128 return False
131 def iterate_together(lists, key=lambda x: x, reverse=False):
133 takes ordered, possibly sparse, lists with similar items
134 (some items have a corresponding item in the other lists, some don't).
136 It then yield tuples of corresponding items, where one element is None is
137 there is no corresponding entry in one of the lists.
139 Tuples where both elements are None are skipped.
141 The results of the key method are used for the comparisons.
143 If reverse is True, the lists are expected to be sorted in reverse order
144 and the results will also be sorted reverse
146 >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
147 [(1, 1), (2, None), (None, 3)]
149 >>> list(iterate_together([[], []]))
152 >>> list(iterate_together([range(1, 3), range(3, 5)]))
153 [(1, None), (2, None), (None, 3), (None, 4)]
155 >>> list(iterate_together([range(1, 3), []]))
156 [(1, None), (2, None)]
158 >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
159 [(1, None), (3, 3)]
162 Next = collections.namedtuple('Next', 'item more')
163 min_ = min if not reverse else max
164 lt_ = operator.lt if not reverse else operator.gt
166 lists = [iter(l) for l in lists]
168 def _take(it):
169 try:
170 i = it.next()
171 while i is None:
172 i = it.next()
173 return Next(i, True)
174 except StopIteration:
175 return Next(None, False)
177 def new_res():
178 return [None]*len(lists)
180 # take first bunch of items
181 items = [_take(l) for l in lists]
183 while any(i.item is not None or i.more for i in items):
185 res = new_res()
187 for n, item in enumerate(items):
189 if item.item is None:
190 continue
192 if all(x is None for x in res):
193 res[n] = item.item
194 continue
196 min_v = min_(filter(lambda x: x is not None, res), key=key)
198 if key(item.item) == key(min_v):
199 res[n] = item.item
201 elif lt_(key(item.item), key(min_v)):
202 res = new_res()
203 res[n] = item.item
205 for n, x in enumerate(res):
206 if x is not None:
207 items[n] = _take(lists[n])
209 yield tuple(res)
212 def progress(val, max_val, status_str='', max_width=50, stream=sys.stdout):
214 factor = float(val)/max_val if max_val > 0 else 0
216 # progress as percentage
217 percentage_str = '{val:.2%}'.format(val=factor)
219 # progress bar filled with #s
220 factor = min(int(factor*max_width), max_width)
221 progress_str = '#' * factor + ' ' * (max_width-factor)
223 #insert percentage into bar
224 percentage_start = int((max_width-len(percentage_str))/2)
225 progress_str = progress_str[:percentage_start] + \
226 percentage_str + \
227 progress_str[percentage_start+len(percentage_str):]
229 print >> stream, '\r',
230 print >> stream, '[ %s ] %s / %s | %s' % (
231 progress_str,
232 val,
233 max_val,
234 status_str),
235 stream.flush()
238 def set_cmp(list, simplify):
240 Builds a set out of a list but uses the results of simplify to determine equality between items
242 simpl = lambda x: (simplify(x), x)
243 lst = dict(map(simpl, list))
244 return lst.values()
247 def first(it):
249 returns the first not-None object or None if the iterator is exhausted
251 for x in it:
252 if x is not None:
253 return x
254 return None
257 def intersect(a, b):
258 return list(set(a) & set(b))
262 def remove_control_chars(s):
263 all_chars = (unichr(i) for i in xrange(0x110000))
264 control_chars = ''.join(map(unichr, range(0,32) + range(127,160)))
265 control_char_re = re.compile('[%s]' % re.escape(control_chars))
267 return control_char_re.sub('', s)
270 def unzip(a):
271 return tuple(map(list,zip(*a)))
274 def parse_range(s, min, max, default=None):
276 Parses the string and returns its value. If the value is outside the given
277 range, its closest number within the range is returned
279 >>> parse_range('5', 0, 10)
282 >>> parse_range('0', 5, 10)
285 >>> parse_range('15',0, 10)
288 >>> parse_range('x', 0, 20)
291 >>> parse_range('x', 0, 20, 20)
294 try:
295 val = int(s)
296 if val < min:
297 return min
298 if val > max:
299 return max
300 return val
302 except (ValueError, TypeError):
303 return default if default is not None else (max-min)/2
307 def flatten(l):
308 return [item for sublist in l for item in sublist]
311 def linearize(key, iterators, reverse=False):
313 Linearizes a number of iterators, sorted by some comparison function
316 iters = [iter(i) for i in iterators]
317 vals = []
318 for i in iters:
319 try:
320 v = i.next()
321 vals. append( (v, i) )
322 except StopIteration:
323 continue
325 while vals:
326 vals = sorted(vals, key=lambda x: key(x[0]), reverse=reverse)
327 val, it = vals.pop(0)
328 yield val
329 try:
330 next_val = it.next()
331 vals.append( (next_val, it) )
332 except StopIteration:
333 pass
336 def skip_pairs(iterator, cmp=cmp):
337 """ Skips pairs of equal items
339 >>> list(skip_pairs([]))
342 >>> list(skip_pairs([1]))
345 >>> list(skip_pairs([1, 2, 3]))
346 [1, 2, 3]
348 >>> list(skip_pairs([1, 1]))
351 >>> list(skip_pairs([1, 2, 2]))
354 >>> list(skip_pairs([1, 2, 2, 3]))
355 [1, 3]
357 >>> list(skip_pairs([1, 2, 2, 2]))
358 [1, 2]
360 >>> list(skip_pairs([1, 2, 2, 2, 2, 3]))
361 [1, 3]
364 iterator = iter(iterator)
365 next = iterator.next()
367 while True:
368 item = next
369 try:
370 next = iterator.next()
371 except StopIteration as e:
372 yield item
373 raise e
375 if cmp(item, next) == 0:
376 next = iterator.next()
377 else:
378 yield item
381 def get_timestamp(datetime_obj):
382 """ Returns the timestamp as an int for the given datetime object
384 >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
385 1302168606
387 >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
390 return int(time.mktime(datetime_obj.timetuple()))
394 re_url = re.compile('^https?://')
396 def is_url(string):
397 """ Returns true if a string looks like an URL
399 >>> is_url('http://example.com/some-path/file.xml')
400 True
402 >>> is_url('something else')
403 False
406 return bool(re_url.match(string))
410 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
411 # this does not increase asymptotical complexity
412 # but can still waste more time than it saves.
413 def shortest_of(strings):
414 return min(strings, key=len)
416 def longest_substr(strings):
418 Returns the longest common substring of the given strings
421 substr = ""
422 if not strings:
423 return substr
424 reference = shortest_of(strings)
425 length = len(reference)
426 #find a suitable slice i:j
427 for i in xrange(length):
428 #only consider strings long at least len(substr) + 1
429 for j in xrange(i + len(substr) + 1, length):
430 candidate = reference[i:j]
431 if all(candidate in text for text in strings):
432 substr = candidate
433 return substr
437 def additional_value(it, gen_val, val_changed=lambda _: True):
438 """ Provides an additional value to the elements, calculated when needed
440 For the elements from the iterator, some additional value can be computed
441 by gen_val (which might be an expensive computation).
443 If the elements in the iterator are ordered so that some subsequent
444 elements would generate the same additional value, val_changed can be
445 provided, which receives the next element from the iterator and the
446 previous additional value. If the element would generate the same
447 additional value (val_changed returns False), its computation is skipped.
449 >>> # get the next full hundred higher than x
450 >>> # this will probably be an expensive calculation
451 >>> next_hundred = lambda x: x + 100-(x % 100)
453 >>> # returns True if h is not the value that next_hundred(x) would provide
454 >>> # this should be a relatively cheap calculation, compared to the above
455 >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
457 >>> xs = [0, 50, 100, 101, 199, 200, 201]
458 >>> list(additional_value(xs, next_hundred, diff_hundred))
459 [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
462 _none = object()
463 current = _none
465 for x in it:
466 if current is _none or val_changed(x, current):
467 current = gen_val(x)
469 yield (x, current)
472 def file_hash(f, h=hashlib.md5, block_size=2**20):
473 """ returns the hash of the contents of a file """
474 f_hash = h()
475 for chunk in iter(lambda: f.read(block_size), ''):
476 f_hash.update(chunk)
477 return f_hash
481 def split_list(l, prop):
482 """ split elements that satisfy a property, and those that don't """
483 match = filter(prop, l)
484 nomatch = [x for x in l if x not in match]
485 return match, nomatch
488 def sorted_chain(links, key, reverse=False):
489 """ Takes a list of iters can iterates over sorted elements
491 Each elment of links should be a tuple of (sort_key, iterator). The
492 elements of each iterator should be sorted already. sort_key should
493 indicate the key of the first element and needs to be comparable to the
494 result of key(elem).
496 The function returns an iterator over the globally sorted element that
497 ensures that as little iterators as possible are evaluated. When
498 evaluating """
500 # mixed_list initially contains all placeholders; later evaluated
501 # elements (from the iterators) are mixed in
502 mixed_list = [(k, link, True) for k, link in links]
504 while mixed_list:
505 _, item, expand = mixed_list.pop(0)
507 # found an element (from an earlier expansion), yield it
508 if not expand:
509 yield item
510 continue
512 # found an iter that needs to be expanded.
513 # The iterator is fully consumed
514 new_items = [(key(i), i, False) for i in item]
516 # sort links (placeholders) and elements together
517 mixed_list = sorted(mixed_list + new_items, key=lambda (k, _v, _e): k,
518 reverse=reverse)
521 def url_add_authentication(url, username, password):
523 Adds authentication data (username, password) to a given
524 URL in order to construct an authenticated URL.
526 >>> url_add_authentication('https://host.com/', '', None)
527 'https://host.com/'
528 >>> url_add_authentication('http://example.org/', None, None)
529 'http://example.org/'
530 >>> url_add_authentication('telnet://host.com/', 'foo', 'bar')
531 'telnet://foo:bar@host.com/'
532 >>> url_add_authentication('ftp://example.org', 'billy', None)
533 'ftp://billy@example.org'
534 >>> url_add_authentication('ftp://example.org', 'billy', '')
535 'ftp://billy:@example.org'
536 >>> url_add_authentication('http://localhost/x', 'aa', 'bc')
537 'http://aa:bc@localhost/x'
538 >>> url_add_authentication('http://blubb.lan/u.html', 'i/o', 'P@ss:')
539 'http://i%2Fo:P@ss:@blubb.lan/u.html'
540 >>> url_add_authentication('http://a:b@x.org/', 'c', 'd')
541 'http://c:d@x.org/'
542 >>> url_add_authentication('http://i%2F:P%40%3A@cx.lan', 'P@x', 'i/')
543 'http://P@x:i%2F@cx.lan'
544 >>> url_add_authentication('http://x.org/', 'a b', 'c d')
545 'http://a%20b:c%20d@x.org/'
547 if username is None or username == '':
548 return url
550 # Relaxations of the strict quoting rules (bug 1521):
551 # 1. Accept '@' in username and password
552 # 2. Acecpt ':' in password only
553 username = urllib.quote(username, safe='@')
555 if password is not None:
556 password = urllib.quote(password, safe='@:')
557 auth_string = ':'.join((username, password))
558 else:
559 auth_string = username
561 url = url_strip_authentication(url)
563 url_parts = list(urlparse.urlsplit(url))
564 # url_parts[1] is the HOST part of the URL
565 url_parts[1] = '@'.join((auth_string, url_parts[1]))
567 return urlparse.urlunsplit(url_parts)
570 def urlopen(url, headers=None, data=None):
572 An URL opener with the User-agent set to gPodder (with version)
574 username, password = username_password_from_url(url)
575 if username is not None or password is not None:
576 url = url_strip_authentication(url)
577 password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
578 password_mgr.add_password(None, url, username, password)
579 handler = urllib2.HTTPBasicAuthHandler(password_mgr)
580 opener = urllib2.build_opener(handler)
581 else:
582 opener = urllib2.build_opener()
584 if headers is None:
585 headers = {}
586 else:
587 headers = dict(headers)
589 headers.update({'User-agent': settings.USER_AGENT})
590 request = urllib2.Request(url, data=data, headers=headers)
591 return opener.open(request)
595 def username_password_from_url(url):
596 r"""
597 Returns a tuple (username,password) containing authentication
598 data from the specified URL or (None,None) if no authentication
599 data can be found in the URL.
601 See Section 3.1 of RFC 1738 (http://www.ietf.org/rfc/rfc1738.txt)
603 >>> username_password_from_url('https://@host.com/')
604 ('', None)
605 >>> username_password_from_url('telnet://host.com/')
606 (None, None)
607 >>> username_password_from_url('ftp://foo:@host.com/')
608 ('foo', '')
609 >>> username_password_from_url('http://a:b@host.com/')
610 ('a', 'b')
611 >>> username_password_from_url(1)
612 Traceback (most recent call last):
614 ValueError: URL has to be a string or unicode object.
615 >>> username_password_from_url(None)
616 Traceback (most recent call last):
618 ValueError: URL has to be a string or unicode object.
619 >>> username_password_from_url('http://a@b:c@host.com/')
620 ('a@b', 'c')
621 >>> username_password_from_url('ftp://a:b:c@host.com/')
622 ('a', 'b:c')
623 >>> username_password_from_url('http://i%2Fo:P%40ss%3A@host.com/')
624 ('i/o', 'P@ss:')
625 >>> username_password_from_url('ftp://%C3%B6sterreich@host.com/')
626 ('\xc3\xb6sterreich', None)
627 >>> username_password_from_url('http://w%20x:y%20z@example.org/')
628 ('w x', 'y z')
629 >>> username_password_from_url('http://example.com/x@y:z@test.com/')
630 (None, None)
632 if type(url) not in (str, unicode):
633 raise ValueError('URL has to be a string or unicode object.')
635 (username, password) = (None, None)
637 (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
639 if '@' in netloc:
640 (authentication, netloc) = netloc.rsplit('@', 1)
641 if ':' in authentication:
642 (username, password) = authentication.split(':', 1)
644 # RFC1738 dictates that we should not allow ['/', '@', ':']
645 # characters in the username and password field (Section 3.1):
647 # 1. The "/" can't be in there at this point because of the way
648 # urlparse (which we use above) works.
649 # 2. Due to gPodder bug 1521, we allow "@" in the username and
650 # password field. We use netloc.rsplit('@', 1), which will
651 # make sure that we split it at the last '@' in netloc.
652 # 3. The colon must be excluded (RFC2617, Section 2) in the
653 # username, but is apparently allowed in the password. This
654 # is handled by the authentication.split(':', 1) above, and
655 # will cause any extraneous ':'s to be part of the password.
657 username = urllib.unquote(username)
658 password = urllib.unquote(password)
659 else:
660 username = urllib.unquote(authentication)
662 return (username, password)
665 def url_strip_authentication(url):
667 Strips authentication data from an URL. Returns the URL with
668 the authentication data removed from it.
670 >>> url_strip_authentication('https://host.com/')
671 'https://host.com/'
672 >>> url_strip_authentication('telnet://foo:bar@host.com/')
673 'telnet://host.com/'
674 >>> url_strip_authentication('ftp://billy@example.org')
675 'ftp://example.org'
676 >>> url_strip_authentication('ftp://billy:@example.org')
677 'ftp://example.org'
678 >>> url_strip_authentication('http://aa:bc@localhost/x')
679 'http://localhost/x'
680 >>> url_strip_authentication('http://i%2Fo:P%40ss%3A@blubb.lan/u.html')
681 'http://blubb.lan/u.html'
682 >>> url_strip_authentication('http://c:d@x.org/')
683 'http://x.org/'
684 >>> url_strip_authentication('http://P%40%3A:i%2F@cx.lan')
685 'http://cx.lan'
686 >>> url_strip_authentication('http://x@x.com:s3cret@example.com/')
687 'http://example.com/'
689 url_parts = list(urlparse.urlsplit(url))
690 # url_parts[1] is the HOST part of the URL
692 # Remove existing authentication data
693 if '@' in url_parts[1]:
694 url_parts[1] = url_parts[1].rsplit('@', 1)[1]
696 return urlparse.urlunsplit(url_parts)
699 # Native filesystem encoding detection
700 encoding = sys.getfilesystemencoding()
702 def sanitize_encoding(filename):
703 r"""
704 Generate a sanitized version of a string (i.e.
705 remove invalid characters and encode in the
706 detected native language encoding).
708 >>> sanitize_encoding('\x80')
710 >>> sanitize_encoding(u'unicode')
711 'unicode'
713 # The encoding problem goes away in Python 3.. hopefully!
714 if sys.version_info >= (3, 0):
715 return filename
717 global encoding
718 if not isinstance(filename, unicode):
719 filename = filename.decode(encoding, 'ignore')
720 return filename.encode(encoding, 'ignore')
723 def get_git_head():
724 """ returns the commit and message of the current git HEAD """
726 try:
727 pr = subprocess.Popen('/usr/bin/git log -n 1 --oneline'.split(),
728 cwd = settings.BASE_DIR,
729 stdout = subprocess.PIPE,
730 stderr = subprocess.PIPE,
733 except OSError:
734 return None, None
736 (out, err) = pr.communicate()
737 if err:
738 return None, None
740 outs = out.split()
741 commit = outs[0]
742 msg = ' ' .join(outs[1:])
743 return commit, msg
747 # https://gist.github.com/samuraisam/901117
749 default_fudge = timedelta(seconds=0, microseconds=0, days=0)
751 def deep_eq(_v1, _v2, datetime_fudge=default_fudge, _assert=False):
753 Tests for deep equality between two python data structures recursing
754 into sub-structures if necessary. Works with all python types including
755 iterators and generators. This function was dreampt up to test API responses
756 but could be used for anything. Be careful. With deeply nested structures
757 you may blow the stack.
759 Options:
760 datetime_fudge => this is a datetime.timedelta object which, when
761 comparing dates, will accept values that differ
762 by the number of seconds specified
763 _assert => passing yes for this will raise an assertion error
764 when values do not match, instead of returning
765 false (very useful in combination with pdb)
767 Doctests included:
769 >>> x1, y1 = ({'a': 'b'}, {'a': 'b'})
770 >>> deep_eq(x1, y1)
771 True
772 >>> x2, y2 = ({'a': 'b'}, {'b': 'a'})
773 >>> deep_eq(x2, y2)
774 False
775 >>> x3, y3 = ({'a': {'b': 'c'}}, {'a': {'b': 'c'}})
776 >>> deep_eq(x3, y3)
777 True
778 >>> x4, y4 = ({'c': 't', 'a': {'b': 'c'}}, {'a': {'b': 'n'}, 'c': 't'})
779 >>> deep_eq(x4, y4)
780 False
781 >>> x5, y5 = ({'a': [1,2,3]}, {'a': [1,2,3]})
782 >>> deep_eq(x5, y5)
783 True
784 >>> x6, y6 = ({'a': [1,'b',8]}, {'a': [2,'b',8]})
785 >>> deep_eq(x6, y6)
786 False
787 >>> x7, y7 = ('a', 'a')
788 >>> deep_eq(x7, y7)
789 True
790 >>> x8, y8 = (['p','n',['asdf']], ['p','n',['asdf']])
791 >>> deep_eq(x8, y8)
792 True
793 >>> x9, y9 = (['p','n',['asdf',['omg']]], ['p', 'n', ['asdf',['nowai']]])
794 >>> deep_eq(x9, y9)
795 False
796 >>> x10, y10 = (1, 2)
797 >>> deep_eq(x10, y10)
798 False
799 >>> deep_eq((str(p) for p in xrange(10)), (str(p) for p in xrange(10)))
800 True
801 >>> str(deep_eq(range(4), range(4)))
802 'True'
803 >>> deep_eq(xrange(100), xrange(100))
804 True
805 >>> deep_eq(xrange(2), xrange(5))
806 False
807 >>> from datetime import datetime, timedelta
808 >>> d1, d2 = (datetime.now(), datetime.now() + timedelta(seconds=4))
809 >>> deep_eq(d1, d2)
810 False
811 >>> deep_eq(d1, d2, datetime_fudge=timedelta(seconds=5))
812 True
814 _deep_eq = functools.partial(deep_eq, datetime_fudge=datetime_fudge,
815 _assert=_assert)
817 def _check_assert(R, a, b, reason=''):
818 if _assert and not R:
819 assert 0, "an assertion has failed in deep_eq (%s) %s != %s" % (
820 reason, str(a), str(b))
821 return R
823 def _deep_dict_eq(d1, d2):
824 k1, k2 = (sorted(d1.keys()), sorted(d2.keys()))
825 if k1 != k2: # keys should be exactly equal
826 return _check_assert(False, k1, k2, "keys")
828 return _check_assert(operator.eq(sum(_deep_eq(d1[k], d2[k])
829 for k in k1),
830 len(k1)), d1, d2, "dictionaries")
832 def _deep_iter_eq(l1, l2):
833 if len(l1) != len(l2):
834 return _check_assert(False, l1, l2, "lengths")
835 return _check_assert(operator.eq(sum(_deep_eq(v1, v2)
836 for v1, v2 in zip(l1, l2)),
837 len(l1)), l1, l2, "iterables")
839 def op(a, b):
840 _op = operator.eq
841 if type(a) == datetime and type(b) == datetime:
842 s = datetime_fudge.seconds
843 t1, t2 = (time.mktime(a.timetuple()), time.mktime(b.timetuple()))
844 l = t1 - t2
845 l = -l if l > 0 else l
846 return _check_assert((-s if s > 0 else s) <= l, a, b, "dates")
847 return _check_assert(_op(a, b), a, b, "values")
849 c1, c2 = (_v1, _v2)
851 # guard against strings because they are iterable and their
852 # elements yield iterables infinitely.
853 # I N C E P T I O N
854 for t in types.StringTypes:
855 if isinstance(_v1, t):
856 break
857 else:
858 if isinstance(_v1, types.DictType):
859 op = _deep_dict_eq
860 else:
861 try:
862 c1, c2 = (list(iter(_v1)), list(iter(_v2)))
863 except TypeError:
864 c1, c2 = _v1, _v2
865 else:
866 op = _deep_iter_eq
868 return op(c1, c2)
871 def parse_request_body(request):
872 """ returns the parsed request body, handles gzip encoding """
874 raw_body = request.body
875 content_enc = request.META.get('HTTP_CONTENT_ENCODING')
877 if content_enc == 'gzip':
878 raw_body = zlib.decompress(raw_body)
880 return json.loads(raw_body)
883 def normalize_feed_url(url):
885 Converts any URL to http:// or ftp:// so that it can be
886 used with "wget". If the URL cannot be converted (invalid
887 or unknown scheme), "None" is returned.
889 This will also normalize feed:// and itpc:// to http://.
891 >>> normalize_feed_url('itpc://example.org/podcast.rss')
892 'http://example.org/podcast.rss'
894 If no URL scheme is defined (e.g. "curry.com"), we will
895 simply assume the user intends to add a http:// feed.
897 >>> normalize_feed_url('curry.com')
898 'http://curry.com/'
900 There are even some more shortcuts for advanced users
901 and lazy typists (see the source for details).
903 >>> normalize_feed_url('fb:43FPodcast')
904 'http://feeds.feedburner.com/43FPodcast'
906 It will also take care of converting the domain name to
907 all-lowercase (because domains are not case sensitive):
909 >>> normalize_feed_url('http://Example.COM/')
910 'http://example.com/'
912 Some other minimalistic changes are also taken care of,
913 e.g. a ? with an empty query is removed:
915 >>> normalize_feed_url('http://example.org/test?')
916 'http://example.org/test'
918 Leading and trailing whitespace is removed
920 >>> normalize_feed_url(' http://example.com/podcast.rss ')
921 'http://example.com/podcast.rss'
923 HTTP Authentication is removed to protect users' privacy
925 >>> normalize_feed_url('http://a@b:c@host.com/')
926 'http://host.com/'
927 >>> normalize_feed_url('ftp://a:b:c@host.com/')
928 'ftp://host.com/'
929 >>> normalize_feed_url('http://i%2Fo:P%40ss%3A@host.com/')
930 'http://host.com/'
931 >>> normalize_feed_url('ftp://%C3%B6sterreich@host.com/')
932 'ftp://host.com/'
933 >>> normalize_feed_url('http://w%20x:y%20z@example.org/')
934 'http://example.org/'
935 >>> normalize_feed_url('http://example.com/x@y:z@test.com/')
936 'http://example.com/x@y:z@test.com/'
938 url = url.strip()
939 if not url or len(url) < 8:
940 return None
942 # This is a list of prefixes that you can use to minimize the amount of
943 # keystrokes that you have to use.
944 # Feel free to suggest other useful prefixes, and I'll add them here.
945 PREFIXES = {
946 'fb:': 'http://feeds.feedburner.com/%s',
947 'yt:': 'http://www.youtube.com/rss/user/%s/videos.rss',
948 'sc:': 'http://soundcloud.com/%s',
949 'fm4od:': 'http://onapp1.orf.at/webcam/fm4/fod/%s.xspf',
950 # YouTube playlists. To get a list of playlists per-user, use:
951 # https://gdata.youtube.com/feeds/api/users/<username>/playlists
952 'ytpl:': 'http://gdata.youtube.com/feeds/api/playlists/%s',
955 for prefix, expansion in PREFIXES.iteritems():
956 if url.startswith(prefix):
957 url = expansion % (url[len(prefix):],)
958 break
960 # Assume HTTP for URLs without scheme
961 if not '://' in url:
962 url = 'http://' + url
964 scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
966 # Schemes and domain names are case insensitive
967 scheme, netloc = scheme.lower(), netloc.lower()
969 # Remove authentication to protect users' privacy
970 netloc = netloc.rsplit('@', 1)[-1]
972 # Normalize empty paths to "/"
973 if path == '':
974 path = '/'
976 # feed://, itpc:// and itms:// are really http://
977 if scheme in ('feed', 'itpc', 'itms'):
978 scheme = 'http'
980 if scheme not in ('http', 'https', 'ftp', 'file'):
981 return None
983 # urlunsplit might return "a slighty different, but equivalent URL"
984 return urlparse.urlunsplit((scheme, netloc, path, query, fragment))