extract split_quoted() into utils.py
[mygpo.git] / mygpo / utils.py
blob1b1bdb48fb3889330c783d318bba469307befdab
1 # -*- coding: utf-8 -*-
3 # This file is part of my.gpodder.org.
5 # my.gpodder.org is free software: you can redistribute it and/or modify it
6 # under the terms of the GNU Affero General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or (at your
8 # option) any later version.
10 # my.gpodder.org is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
13 # License for more details.
15 # You should have received a copy of the GNU Affero General Public License
16 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
19 import functools
20 import types
21 import subprocess
22 import os
23 import operator
24 import sys
25 import re
26 import collections
27 import itertools
28 from datetime import datetime, timedelta, date
29 import time
30 import hashlib
31 import urlparse
32 import urllib
33 import urllib2
34 import zlib
35 import shlex
37 from django.conf import settings
39 from mygpo.core.json import json
42 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
43 """
44 >>> from_d = datetime(2010, 01, 01)
45 >>> to_d = datetime(2010, 01, 05)
46 >>> list(daterange(from_d, to_d))
47 [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
48 """
50 if to_date is None:
51 if isinstance(from_date, datetime):
52 to_date = datetime.now()
53 else:
54 to_date = date.today()
56 while from_date <= to_date:
57 yield from_date
58 from_date = from_date + leap
59 return
61 def format_time(value):
62 """Format an offset (in seconds) to a string
64 The offset should be an integer or float value.
66 >>> format_time(0)
67 '00:00'
68 >>> format_time(20)
69 '00:20'
70 >>> format_time(3600)
71 '01:00:00'
72 >>> format_time(10921)
73 '03:02:01'
74 """
75 try:
76 dt = datetime.utcfromtimestamp(value)
77 except ValueError:
78 return ''
80 if dt.hour == 0:
81 return dt.strftime('%M:%S')
82 else:
83 return dt.strftime('%H:%M:%S')
85 def parse_time(value):
86 """
87 >>> parse_time(10)
90 >>> parse_time('05:10') #5*60+10
91 310
93 >>> parse_time('1:05:10') #60*60+5*60+10
94 3910
95 """
96 if value is None:
97 raise ValueError('None value in parse_time')
99 if isinstance(value, int):
100 # Don't need to parse already-converted time value
101 return value
103 if value == '':
104 raise ValueError('Empty valueing in parse_time')
106 for format in ('%H:%M:%S', '%M:%S'):
107 try:
108 t = time.strptime(value, format)
109 return t.tm_hour * 60*60 + t.tm_min * 60 + t.tm_sec
110 except ValueError, e:
111 continue
113 return int(value)
116 def parse_bool(val):
118 >>> parse_bool('True')
119 True
121 >>> parse_bool('true')
122 True
124 >>> parse_bool('')
125 False
127 if isinstance(val, bool):
128 return val
129 if val.lower() == 'true':
130 return True
131 return False
134 def iterate_together(lists, key=lambda x: x, reverse=False):
136 takes ordered, possibly sparse, lists with similar items
137 (some items have a corresponding item in the other lists, some don't).
139 It then yield tuples of corresponding items, where one element is None is
140 there is no corresponding entry in one of the lists.
142 Tuples where both elements are None are skipped.
144 The results of the key method are used for the comparisons.
146 If reverse is True, the lists are expected to be sorted in reverse order
147 and the results will also be sorted reverse
149 >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
150 [(1, 1), (2, None), (None, 3)]
152 >>> list(iterate_together([[], []]))
155 >>> list(iterate_together([range(1, 3), range(3, 5)]))
156 [(1, None), (2, None), (None, 3), (None, 4)]
158 >>> list(iterate_together([range(1, 3), []]))
159 [(1, None), (2, None)]
161 >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
162 [(1, None), (3, 3)]
165 Next = collections.namedtuple('Next', 'item more')
166 min_ = min if not reverse else max
167 lt_ = operator.lt if not reverse else operator.gt
169 lists = [iter(l) for l in lists]
171 def _take(it):
172 try:
173 i = it.next()
174 while i is None:
175 i = it.next()
176 return Next(i, True)
177 except StopIteration:
178 return Next(None, False)
180 def new_res():
181 return [None]*len(lists)
183 # take first bunch of items
184 items = [_take(l) for l in lists]
186 while any(i.item is not None or i.more for i in items):
188 res = new_res()
190 for n, item in enumerate(items):
192 if item.item is None:
193 continue
195 if all(x is None for x in res):
196 res[n] = item.item
197 continue
199 min_v = min_(filter(lambda x: x is not None, res), key=key)
201 if key(item.item) == key(min_v):
202 res[n] = item.item
204 elif lt_(key(item.item), key(min_v)):
205 res = new_res()
206 res[n] = item.item
208 for n, x in enumerate(res):
209 if x is not None:
210 items[n] = _take(lists[n])
212 yield tuple(res)
215 def progress(val, max_val, status_str='', max_width=50, stream=sys.stdout):
217 factor = float(val)/max_val if max_val > 0 else 0
219 # progress as percentage
220 percentage_str = '{val:.2%}'.format(val=factor)
222 # progress bar filled with #s
223 factor = min(int(factor*max_width), max_width)
224 progress_str = '#' * factor + ' ' * (max_width-factor)
226 #insert percentage into bar
227 percentage_start = int((max_width-len(percentage_str))/2)
228 progress_str = progress_str[:percentage_start] + \
229 percentage_str + \
230 progress_str[percentage_start+len(percentage_str):]
232 print >> stream, '\r',
233 print >> stream, '[ %s ] %s / %s | %s' % (
234 progress_str,
235 val,
236 max_val,
237 status_str),
238 stream.flush()
241 def set_cmp(list, simplify):
243 Builds a set out of a list but uses the results of simplify to determine equality between items
245 simpl = lambda x: (simplify(x), x)
246 lst = dict(map(simpl, list))
247 return lst.values()
250 def first(it):
252 returns the first not-None object or None if the iterator is exhausted
254 for x in it:
255 if x is not None:
256 return x
257 return None
260 def intersect(a, b):
261 return list(set(a) & set(b))
265 def remove_control_chars(s):
266 all_chars = (unichr(i) for i in xrange(0x110000))
267 control_chars = ''.join(map(unichr, range(0,32) + range(127,160)))
268 control_char_re = re.compile('[%s]' % re.escape(control_chars))
270 return control_char_re.sub('', s)
273 def unzip(a):
274 return tuple(map(list,zip(*a)))
277 def parse_range(s, min, max, default=None):
279 Parses the string and returns its value. If the value is outside the given
280 range, its closest number within the range is returned
282 >>> parse_range('5', 0, 10)
285 >>> parse_range('0', 5, 10)
288 >>> parse_range('15',0, 10)
291 >>> parse_range('x', 0, 20)
294 >>> parse_range('x', 0, 20, 20)
297 try:
298 val = int(s)
299 if val < min:
300 return min
301 if val > max:
302 return max
303 return val
305 except (ValueError, TypeError):
306 return default if default is not None else (max-min)/2
310 def flatten(l):
311 return [item for sublist in l for item in sublist]
314 def linearize(key, iterators, reverse=False):
316 Linearizes a number of iterators, sorted by some comparison function
319 iters = [iter(i) for i in iterators]
320 vals = []
321 for i in iters:
322 try:
323 v = i.next()
324 vals. append( (v, i) )
325 except StopIteration:
326 continue
328 while vals:
329 vals = sorted(vals, key=lambda x: key(x[0]), reverse=reverse)
330 val, it = vals.pop(0)
331 yield val
332 try:
333 next_val = it.next()
334 vals.append( (next_val, it) )
335 except StopIteration:
336 pass
339 def skip_pairs(iterator, cmp=cmp):
340 """ Skips pairs of equal items
342 >>> list(skip_pairs([]))
345 >>> list(skip_pairs([1]))
348 >>> list(skip_pairs([1, 2, 3]))
349 [1, 2, 3]
351 >>> list(skip_pairs([1, 1]))
354 >>> list(skip_pairs([1, 2, 2]))
357 >>> list(skip_pairs([1, 2, 2, 3]))
358 [1, 3]
360 >>> list(skip_pairs([1, 2, 2, 2]))
361 [1, 2]
363 >>> list(skip_pairs([1, 2, 2, 2, 2, 3]))
364 [1, 3]
367 iterator = iter(iterator)
368 next = iterator.next()
370 while True:
371 item = next
372 try:
373 next = iterator.next()
374 except StopIteration as e:
375 yield item
376 raise e
378 if cmp(item, next) == 0:
379 next = iterator.next()
380 else:
381 yield item
384 def get_timestamp(datetime_obj):
385 """ Returns the timestamp as an int for the given datetime object
387 >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
388 1302168606
390 >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
393 return int(time.mktime(datetime_obj.timetuple()))
397 re_url = re.compile('^https?://')
399 def is_url(string):
400 """ Returns true if a string looks like an URL
402 >>> is_url('http://example.com/some-path/file.xml')
403 True
405 >>> is_url('something else')
406 False
409 return bool(re_url.match(string))
413 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
414 # this does not increase asymptotical complexity
415 # but can still waste more time than it saves.
416 def shortest_of(strings):
417 return min(strings, key=len)
419 def longest_substr(strings):
421 Returns the longest common substring of the given strings
424 substr = ""
425 if not strings:
426 return substr
427 reference = shortest_of(strings)
428 length = len(reference)
429 #find a suitable slice i:j
430 for i in xrange(length):
431 #only consider strings long at least len(substr) + 1
432 for j in xrange(i + len(substr) + 1, length):
433 candidate = reference[i:j]
434 if all(candidate in text for text in strings):
435 substr = candidate
436 return substr
440 def additional_value(it, gen_val, val_changed=lambda _: True):
441 """ Provides an additional value to the elements, calculated when needed
443 For the elements from the iterator, some additional value can be computed
444 by gen_val (which might be an expensive computation).
446 If the elements in the iterator are ordered so that some subsequent
447 elements would generate the same additional value, val_changed can be
448 provided, which receives the next element from the iterator and the
449 previous additional value. If the element would generate the same
450 additional value (val_changed returns False), its computation is skipped.
452 >>> # get the next full hundred higher than x
453 >>> # this will probably be an expensive calculation
454 >>> next_hundred = lambda x: x + 100-(x % 100)
456 >>> # returns True if h is not the value that next_hundred(x) would provide
457 >>> # this should be a relatively cheap calculation, compared to the above
458 >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
460 >>> xs = [0, 50, 100, 101, 199, 200, 201]
461 >>> list(additional_value(xs, next_hundred, diff_hundred))
462 [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
465 _none = object()
466 current = _none
468 for x in it:
469 if current is _none or val_changed(x, current):
470 current = gen_val(x)
472 yield (x, current)
475 def file_hash(f, h=hashlib.md5, block_size=2**20):
476 """ returns the hash of the contents of a file """
477 f_hash = h()
478 for chunk in iter(lambda: f.read(block_size), ''):
479 f_hash.update(chunk)
480 return f_hash
484 def split_list(l, prop):
485 """ split elements that satisfy a property, and those that don't """
486 match = filter(prop, l)
487 nomatch = [x for x in l if x not in match]
488 return match, nomatch
491 def sorted_chain(links, key, reverse=False):
492 """ Takes a list of iters can iterates over sorted elements
494 Each elment of links should be a tuple of (sort_key, iterator). The
495 elements of each iterator should be sorted already. sort_key should
496 indicate the key of the first element and needs to be comparable to the
497 result of key(elem).
499 The function returns an iterator over the globally sorted element that
500 ensures that as little iterators as possible are evaluated. When
501 evaluating """
503 # mixed_list initially contains all placeholders; later evaluated
504 # elements (from the iterators) are mixed in
505 mixed_list = [(k, link, True) for k, link in links]
507 while mixed_list:
508 _, item, expand = mixed_list.pop(0)
510 # found an element (from an earlier expansion), yield it
511 if not expand:
512 yield item
513 continue
515 # found an iter that needs to be expanded.
516 # The iterator is fully consumed
517 new_items = [(key(i), i, False) for i in item]
519 # sort links (placeholders) and elements together
520 mixed_list = sorted(mixed_list + new_items, key=lambda (k, _v, _e): k,
521 reverse=reverse)
524 def url_add_authentication(url, username, password):
526 Adds authentication data (username, password) to a given
527 URL in order to construct an authenticated URL.
529 >>> url_add_authentication('https://host.com/', '', None)
530 'https://host.com/'
531 >>> url_add_authentication('http://example.org/', None, None)
532 'http://example.org/'
533 >>> url_add_authentication('telnet://host.com/', 'foo', 'bar')
534 'telnet://foo:bar@host.com/'
535 >>> url_add_authentication('ftp://example.org', 'billy', None)
536 'ftp://billy@example.org'
537 >>> url_add_authentication('ftp://example.org', 'billy', '')
538 'ftp://billy:@example.org'
539 >>> url_add_authentication('http://localhost/x', 'aa', 'bc')
540 'http://aa:bc@localhost/x'
541 >>> url_add_authentication('http://blubb.lan/u.html', 'i/o', 'P@ss:')
542 'http://i%2Fo:P@ss:@blubb.lan/u.html'
543 >>> url_add_authentication('http://a:b@x.org/', 'c', 'd')
544 'http://c:d@x.org/'
545 >>> url_add_authentication('http://i%2F:P%40%3A@cx.lan', 'P@x', 'i/')
546 'http://P@x:i%2F@cx.lan'
547 >>> url_add_authentication('http://x.org/', 'a b', 'c d')
548 'http://a%20b:c%20d@x.org/'
550 if username is None or username == '':
551 return url
553 # Relaxations of the strict quoting rules (bug 1521):
554 # 1. Accept '@' in username and password
555 # 2. Acecpt ':' in password only
556 username = urllib.quote(username, safe='@')
558 if password is not None:
559 password = urllib.quote(password, safe='@:')
560 auth_string = ':'.join((username, password))
561 else:
562 auth_string = username
564 url = url_strip_authentication(url)
566 url_parts = list(urlparse.urlsplit(url))
567 # url_parts[1] is the HOST part of the URL
568 url_parts[1] = '@'.join((auth_string, url_parts[1]))
570 return urlparse.urlunsplit(url_parts)
573 def urlopen(url, headers=None, data=None):
575 An URL opener with the User-agent set to gPodder (with version)
577 username, password = username_password_from_url(url)
578 if username is not None or password is not None:
579 url = url_strip_authentication(url)
580 password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
581 password_mgr.add_password(None, url, username, password)
582 handler = urllib2.HTTPBasicAuthHandler(password_mgr)
583 opener = urllib2.build_opener(handler)
584 else:
585 opener = urllib2.build_opener()
587 if headers is None:
588 headers = {}
589 else:
590 headers = dict(headers)
592 headers.update({'User-agent': settings.USER_AGENT})
593 request = urllib2.Request(url, data=data, headers=headers)
594 return opener.open(request)
598 def username_password_from_url(url):
599 r"""
600 Returns a tuple (username,password) containing authentication
601 data from the specified URL or (None,None) if no authentication
602 data can be found in the URL.
604 See Section 3.1 of RFC 1738 (http://www.ietf.org/rfc/rfc1738.txt)
606 >>> username_password_from_url('https://@host.com/')
607 ('', None)
608 >>> username_password_from_url('telnet://host.com/')
609 (None, None)
610 >>> username_password_from_url('ftp://foo:@host.com/')
611 ('foo', '')
612 >>> username_password_from_url('http://a:b@host.com/')
613 ('a', 'b')
614 >>> username_password_from_url(1)
615 Traceback (most recent call last):
617 ValueError: URL has to be a string or unicode object.
618 >>> username_password_from_url(None)
619 Traceback (most recent call last):
621 ValueError: URL has to be a string or unicode object.
622 >>> username_password_from_url('http://a@b:c@host.com/')
623 ('a@b', 'c')
624 >>> username_password_from_url('ftp://a:b:c@host.com/')
625 ('a', 'b:c')
626 >>> username_password_from_url('http://i%2Fo:P%40ss%3A@host.com/')
627 ('i/o', 'P@ss:')
628 >>> username_password_from_url('ftp://%C3%B6sterreich@host.com/')
629 ('\xc3\xb6sterreich', None)
630 >>> username_password_from_url('http://w%20x:y%20z@example.org/')
631 ('w x', 'y z')
632 >>> username_password_from_url('http://example.com/x@y:z@test.com/')
633 (None, None)
635 if type(url) not in (str, unicode):
636 raise ValueError('URL has to be a string or unicode object.')
638 (username, password) = (None, None)
640 (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
642 if '@' in netloc:
643 (authentication, netloc) = netloc.rsplit('@', 1)
644 if ':' in authentication:
645 (username, password) = authentication.split(':', 1)
647 # RFC1738 dictates that we should not allow ['/', '@', ':']
648 # characters in the username and password field (Section 3.1):
650 # 1. The "/" can't be in there at this point because of the way
651 # urlparse (which we use above) works.
652 # 2. Due to gPodder bug 1521, we allow "@" in the username and
653 # password field. We use netloc.rsplit('@', 1), which will
654 # make sure that we split it at the last '@' in netloc.
655 # 3. The colon must be excluded (RFC2617, Section 2) in the
656 # username, but is apparently allowed in the password. This
657 # is handled by the authentication.split(':', 1) above, and
658 # will cause any extraneous ':'s to be part of the password.
660 username = urllib.unquote(username)
661 password = urllib.unquote(password)
662 else:
663 username = urllib.unquote(authentication)
665 return (username, password)
668 def url_strip_authentication(url):
670 Strips authentication data from an URL. Returns the URL with
671 the authentication data removed from it.
673 >>> url_strip_authentication('https://host.com/')
674 'https://host.com/'
675 >>> url_strip_authentication('telnet://foo:bar@host.com/')
676 'telnet://host.com/'
677 >>> url_strip_authentication('ftp://billy@example.org')
678 'ftp://example.org'
679 >>> url_strip_authentication('ftp://billy:@example.org')
680 'ftp://example.org'
681 >>> url_strip_authentication('http://aa:bc@localhost/x')
682 'http://localhost/x'
683 >>> url_strip_authentication('http://i%2Fo:P%40ss%3A@blubb.lan/u.html')
684 'http://blubb.lan/u.html'
685 >>> url_strip_authentication('http://c:d@x.org/')
686 'http://x.org/'
687 >>> url_strip_authentication('http://P%40%3A:i%2F@cx.lan')
688 'http://cx.lan'
689 >>> url_strip_authentication('http://x@x.com:s3cret@example.com/')
690 'http://example.com/'
692 url_parts = list(urlparse.urlsplit(url))
693 # url_parts[1] is the HOST part of the URL
695 # Remove existing authentication data
696 if '@' in url_parts[1]:
697 url_parts[1] = url_parts[1].rsplit('@', 1)[1]
699 return urlparse.urlunsplit(url_parts)
702 # Native filesystem encoding detection
703 encoding = sys.getfilesystemencoding()
705 def sanitize_encoding(filename):
706 r"""
707 Generate a sanitized version of a string (i.e.
708 remove invalid characters and encode in the
709 detected native language encoding).
711 >>> sanitize_encoding('\x80')
713 >>> sanitize_encoding(u'unicode')
714 'unicode'
716 # The encoding problem goes away in Python 3.. hopefully!
717 if sys.version_info >= (3, 0):
718 return filename
720 global encoding
721 if not isinstance(filename, unicode):
722 filename = filename.decode(encoding, 'ignore')
723 return filename.encode(encoding, 'ignore')
726 def get_git_head():
727 """ returns the commit and message of the current git HEAD """
729 try:
730 pr = subprocess.Popen('/usr/bin/git log -n 1 --oneline'.split(),
731 cwd = settings.BASE_DIR,
732 stdout = subprocess.PIPE,
733 stderr = subprocess.PIPE,
736 except OSError:
737 return None, None
739 (out, err) = pr.communicate()
740 if err:
741 return None, None
743 outs = out.split()
744 commit = outs[0]
745 msg = ' ' .join(outs[1:])
746 return commit, msg
750 # https://gist.github.com/samuraisam/901117
752 default_fudge = timedelta(seconds=0, microseconds=0, days=0)
754 def deep_eq(_v1, _v2, datetime_fudge=default_fudge, _assert=False):
756 Tests for deep equality between two python data structures recursing
757 into sub-structures if necessary. Works with all python types including
758 iterators and generators. This function was dreampt up to test API responses
759 but could be used for anything. Be careful. With deeply nested structures
760 you may blow the stack.
762 Options:
763 datetime_fudge => this is a datetime.timedelta object which, when
764 comparing dates, will accept values that differ
765 by the number of seconds specified
766 _assert => passing yes for this will raise an assertion error
767 when values do not match, instead of returning
768 false (very useful in combination with pdb)
770 Doctests included:
772 >>> x1, y1 = ({'a': 'b'}, {'a': 'b'})
773 >>> deep_eq(x1, y1)
774 True
775 >>> x2, y2 = ({'a': 'b'}, {'b': 'a'})
776 >>> deep_eq(x2, y2)
777 False
778 >>> x3, y3 = ({'a': {'b': 'c'}}, {'a': {'b': 'c'}})
779 >>> deep_eq(x3, y3)
780 True
781 >>> x4, y4 = ({'c': 't', 'a': {'b': 'c'}}, {'a': {'b': 'n'}, 'c': 't'})
782 >>> deep_eq(x4, y4)
783 False
784 >>> x5, y5 = ({'a': [1,2,3]}, {'a': [1,2,3]})
785 >>> deep_eq(x5, y5)
786 True
787 >>> x6, y6 = ({'a': [1,'b',8]}, {'a': [2,'b',8]})
788 >>> deep_eq(x6, y6)
789 False
790 >>> x7, y7 = ('a', 'a')
791 >>> deep_eq(x7, y7)
792 True
793 >>> x8, y8 = (['p','n',['asdf']], ['p','n',['asdf']])
794 >>> deep_eq(x8, y8)
795 True
796 >>> x9, y9 = (['p','n',['asdf',['omg']]], ['p', 'n', ['asdf',['nowai']]])
797 >>> deep_eq(x9, y9)
798 False
799 >>> x10, y10 = (1, 2)
800 >>> deep_eq(x10, y10)
801 False
802 >>> deep_eq((str(p) for p in xrange(10)), (str(p) for p in xrange(10)))
803 True
804 >>> str(deep_eq(range(4), range(4)))
805 'True'
806 >>> deep_eq(xrange(100), xrange(100))
807 True
808 >>> deep_eq(xrange(2), xrange(5))
809 False
810 >>> from datetime import datetime, timedelta
811 >>> d1, d2 = (datetime.now(), datetime.now() + timedelta(seconds=4))
812 >>> deep_eq(d1, d2)
813 False
814 >>> deep_eq(d1, d2, datetime_fudge=timedelta(seconds=5))
815 True
817 _deep_eq = functools.partial(deep_eq, datetime_fudge=datetime_fudge,
818 _assert=_assert)
820 def _check_assert(R, a, b, reason=''):
821 if _assert and not R:
822 assert 0, "an assertion has failed in deep_eq (%s) %s != %s" % (
823 reason, str(a), str(b))
824 return R
826 def _deep_dict_eq(d1, d2):
827 k1, k2 = (sorted(d1.keys()), sorted(d2.keys()))
828 if k1 != k2: # keys should be exactly equal
829 return _check_assert(False, k1, k2, "keys")
831 return _check_assert(operator.eq(sum(_deep_eq(d1[k], d2[k])
832 for k in k1),
833 len(k1)), d1, d2, "dictionaries")
835 def _deep_iter_eq(l1, l2):
836 if len(l1) != len(l2):
837 return _check_assert(False, l1, l2, "lengths")
838 return _check_assert(operator.eq(sum(_deep_eq(v1, v2)
839 for v1, v2 in zip(l1, l2)),
840 len(l1)), l1, l2, "iterables")
842 def op(a, b):
843 _op = operator.eq
844 if type(a) == datetime and type(b) == datetime:
845 s = datetime_fudge.seconds
846 t1, t2 = (time.mktime(a.timetuple()), time.mktime(b.timetuple()))
847 l = t1 - t2
848 l = -l if l > 0 else l
849 return _check_assert((-s if s > 0 else s) <= l, a, b, "dates")
850 return _check_assert(_op(a, b), a, b, "values")
852 c1, c2 = (_v1, _v2)
854 # guard against strings because they are iterable and their
855 # elements yield iterables infinitely.
856 # I N C E P T I O N
857 for t in types.StringTypes:
858 if isinstance(_v1, t):
859 break
860 else:
861 if isinstance(_v1, types.DictType):
862 op = _deep_dict_eq
863 else:
864 try:
865 c1, c2 = (list(iter(_v1)), list(iter(_v2)))
866 except TypeError:
867 c1, c2 = _v1, _v2
868 else:
869 op = _deep_iter_eq
871 return op(c1, c2)
874 def parse_request_body(request):
875 """ returns the parsed request body, handles gzip encoding """
877 raw_body = request.body
878 content_enc = request.META.get('HTTP_CONTENT_ENCODING')
880 if content_enc == 'gzip':
881 raw_body = zlib.decompress(raw_body)
883 return json.loads(raw_body)
886 def normalize_feed_url(url):
888 Converts any URL to http:// or ftp:// so that it can be
889 used with "wget". If the URL cannot be converted (invalid
890 or unknown scheme), "None" is returned.
892 This will also normalize feed:// and itpc:// to http://.
894 >>> normalize_feed_url('itpc://example.org/podcast.rss')
895 'http://example.org/podcast.rss'
897 If no URL scheme is defined (e.g. "curry.com"), we will
898 simply assume the user intends to add a http:// feed.
900 >>> normalize_feed_url('curry.com')
901 'http://curry.com/'
903 There are even some more shortcuts for advanced users
904 and lazy typists (see the source for details).
906 >>> normalize_feed_url('fb:43FPodcast')
907 'http://feeds.feedburner.com/43FPodcast'
909 It will also take care of converting the domain name to
910 all-lowercase (because domains are not case sensitive):
912 >>> normalize_feed_url('http://Example.COM/')
913 'http://example.com/'
915 Some other minimalistic changes are also taken care of,
916 e.g. a ? with an empty query is removed:
918 >>> normalize_feed_url('http://example.org/test?')
919 'http://example.org/test'
921 Leading and trailing whitespace is removed
923 >>> normalize_feed_url(' http://example.com/podcast.rss ')
924 'http://example.com/podcast.rss'
926 HTTP Authentication is removed to protect users' privacy
928 >>> normalize_feed_url('http://a@b:c@host.com/')
929 'http://host.com/'
930 >>> normalize_feed_url('ftp://a:b:c@host.com/')
931 'ftp://host.com/'
932 >>> normalize_feed_url('http://i%2Fo:P%40ss%3A@host.com/')
933 'http://host.com/'
934 >>> normalize_feed_url('ftp://%C3%B6sterreich@host.com/')
935 'ftp://host.com/'
936 >>> normalize_feed_url('http://w%20x:y%20z@example.org/')
937 'http://example.org/'
938 >>> normalize_feed_url('http://example.com/x@y:z@test.com/')
939 'http://example.com/x%40y%3Az%40test.com/'
940 >>> normalize_feed_url('http://en.wikipedia.org/wiki/Ä')
941 'http://en.wikipedia.org/wiki/%C3%84'
942 >>> normalize_feed_url('http://en.wikipedia.org/w/index.php?title=Ä&action=edit')
943 'http://en.wikipedia.org/w/index.php?title=%C3%84&action=edit'
945 url = url.strip()
946 if not url or len(url) < 8:
947 return None
949 if isinstance(url, unicode):
950 url = url.encode('utf-8', 'ignore')
952 # This is a list of prefixes that you can use to minimize the amount of
953 # keystrokes that you have to use.
954 # Feel free to suggest other useful prefixes, and I'll add them here.
955 PREFIXES = {
956 'fb:': 'http://feeds.feedburner.com/%s',
957 'yt:': 'http://www.youtube.com/rss/user/%s/videos.rss',
958 'sc:': 'http://soundcloud.com/%s',
959 'fm4od:': 'http://onapp1.orf.at/webcam/fm4/fod/%s.xspf',
960 # YouTube playlists. To get a list of playlists per-user, use:
961 # https://gdata.youtube.com/feeds/api/users/<username>/playlists
962 'ytpl:': 'http://gdata.youtube.com/feeds/api/playlists/%s',
965 for prefix, expansion in PREFIXES.iteritems():
966 if url.startswith(prefix):
967 url = expansion % (url[len(prefix):],)
968 break
970 # Assume HTTP for URLs without scheme
971 if not '://' in url:
972 url = 'http://' + url
974 scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
976 # Schemes and domain names are case insensitive
977 scheme, netloc = scheme.lower(), netloc.lower()
979 # encode non-encoded characters
980 path = urllib.quote(path, '/%')
981 query = urllib.quote_plus(query, ':&=')
983 # Remove authentication to protect users' privacy
984 netloc = netloc.rsplit('@', 1)[-1]
986 # Normalize empty paths to "/"
987 if path == '':
988 path = '/'
990 # feed://, itpc:// and itms:// are really http://
991 if scheme in ('feed', 'itpc', 'itms'):
992 scheme = 'http'
994 if scheme not in ('http', 'https', 'ftp', 'file'):
995 return None
997 # urlunsplit might return "a slighty different, but equivalent URL"
998 return urlparse.urlunsplit((scheme, netloc, path, query, fragment))
1001 def partition(items, predicate=bool):
1002 a, b = itertools.tee((predicate(item), item) for item in items)
1003 return ((item for pred, item in a if not pred),
1004 (item for pred, item in b if pred))
1007 def split_quoted(s):
1008 """ Splits a quoted string
1010 >>> split_quoted('some "quoted text"') == ['some', 'quoted text']
1011 True
1013 >>> split_quoted('"quoted text') == ['quoted', 'text']
1014 True
1016 # 4 quotes here are 2 in the doctest is one in the actual string
1017 >>> split_quoted('text\\\\') == ['text']
1018 True
1021 try:
1022 # split by whitespace, preserve quoted substrings
1023 keywords = shlex.split(s)
1025 except ValueError:
1026 # No closing quotation (eg '"text')
1027 # No escaped character (eg '\')
1028 s = s.replace('"', '').replace("'", '').replace('\\', '')
1029 keywords = shlex.split(s)
1031 return keywords