[Migration] remove CouchDB fulltext index
[mygpo.git] / mygpo / utils.py
blob5ec67f34df189e6d1c10d752b63f08845a5cf42d
1 # -*- coding: utf-8 -*-
3 # This file is part of my.gpodder.org.
5 # my.gpodder.org is free software: you can redistribute it and/or modify it
6 # under the terms of the GNU Affero General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or (at your
8 # option) any later version.
10 # my.gpodder.org is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
13 # License for more details.
15 # You should have received a copy of the GNU Affero General Public License
16 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
19 import functools
20 import types
21 import subprocess
22 import os
23 import operator
24 import sys
25 import re
26 import collections
27 import itertools
28 from datetime import datetime, timedelta, date
29 import time
30 import hashlib
31 import urlparse
32 import urllib
33 import urllib2
34 import zlib
35 import shlex
37 from django.conf import settings
38 from django.core.urlresolvers import reverse
40 from mygpo.core.json import json
43 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
44 """
45 >>> from_d = datetime(2010, 01, 01)
46 >>> to_d = datetime(2010, 01, 05)
47 >>> list(daterange(from_d, to_d))
48 [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
49 """
51 if to_date is None:
52 if isinstance(from_date, datetime):
53 to_date = datetime.now()
54 else:
55 to_date = date.today()
57 while from_date <= to_date:
58 yield from_date
59 from_date = from_date + leap
60 return
62 def format_time(value):
63 """Format an offset (in seconds) to a string
65 The offset should be an integer or float value.
67 >>> format_time(0)
68 '00:00'
69 >>> format_time(20)
70 '00:20'
71 >>> format_time(3600)
72 '01:00:00'
73 >>> format_time(10921)
74 '03:02:01'
75 """
76 try:
77 dt = datetime.utcfromtimestamp(value)
78 except (ValueError, TypeError):
79 return ''
81 if dt.hour == 0:
82 return dt.strftime('%M:%S')
83 else:
84 return dt.strftime('%H:%M:%S')
86 def parse_time(value):
87 """
88 >>> parse_time(10)
91 >>> parse_time('05:10') #5*60+10
92 310
94 >>> parse_time('1:05:10') #60*60+5*60+10
95 3910
96 """
97 if value is None:
98 raise ValueError('None value in parse_time')
100 if isinstance(value, int):
101 # Don't need to parse already-converted time value
102 return value
104 if value == '':
105 raise ValueError('Empty valueing in parse_time')
107 for format in ('%H:%M:%S', '%M:%S'):
108 try:
109 t = time.strptime(value, format)
110 return t.tm_hour * 60*60 + t.tm_min * 60 + t.tm_sec
111 except ValueError, e:
112 continue
114 return int(value)
117 def parse_bool(val):
119 >>> parse_bool('True')
120 True
122 >>> parse_bool('true')
123 True
125 >>> parse_bool('')
126 False
128 if isinstance(val, bool):
129 return val
130 if val.lower() == 'true':
131 return True
132 return False
135 def iterate_together(lists, key=lambda x: x, reverse=False):
137 takes ordered, possibly sparse, lists with similar items
138 (some items have a corresponding item in the other lists, some don't).
140 It then yield tuples of corresponding items, where one element is None is
141 there is no corresponding entry in one of the lists.
143 Tuples where both elements are None are skipped.
145 The results of the key method are used for the comparisons.
147 If reverse is True, the lists are expected to be sorted in reverse order
148 and the results will also be sorted reverse
150 >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
151 [(1, 1), (2, None), (None, 3)]
153 >>> list(iterate_together([[], []]))
156 >>> list(iterate_together([range(1, 3), range(3, 5)]))
157 [(1, None), (2, None), (None, 3), (None, 4)]
159 >>> list(iterate_together([range(1, 3), []]))
160 [(1, None), (2, None)]
162 >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
163 [(1, None), (3, 3)]
166 Next = collections.namedtuple('Next', 'item more')
167 min_ = min if not reverse else max
168 lt_ = operator.lt if not reverse else operator.gt
170 lists = [iter(l) for l in lists]
172 def _take(it):
173 try:
174 i = it.next()
175 while i is None:
176 i = it.next()
177 return Next(i, True)
178 except StopIteration:
179 return Next(None, False)
181 def new_res():
182 return [None]*len(lists)
184 # take first bunch of items
185 items = [_take(l) for l in lists]
187 while any(i.item is not None or i.more for i in items):
189 res = new_res()
191 for n, item in enumerate(items):
193 if item.item is None:
194 continue
196 if all(x is None for x in res):
197 res[n] = item.item
198 continue
200 min_v = min_(filter(lambda x: x is not None, res), key=key)
202 if key(item.item) == key(min_v):
203 res[n] = item.item
205 elif lt_(key(item.item), key(min_v)):
206 res = new_res()
207 res[n] = item.item
209 for n, x in enumerate(res):
210 if x is not None:
211 items[n] = _take(lists[n])
213 yield tuple(res)
216 def progress(val, max_val, status_str='', max_width=50, stream=sys.stdout):
218 factor = float(val)/max_val if max_val > 0 else 0
220 # progress as percentage
221 percentage_str = '{val:.2%}'.format(val=factor)
223 # progress bar filled with #s
224 factor = min(int(factor*max_width), max_width)
225 progress_str = '#' * factor + ' ' * (max_width-factor)
227 #insert percentage into bar
228 percentage_start = int((max_width-len(percentage_str))/2)
229 progress_str = progress_str[:percentage_start] + \
230 percentage_str + \
231 progress_str[percentage_start+len(percentage_str):]
233 print >> stream, '\r',
234 print >> stream, '[ %s ] %s / %s | %s' % (
235 progress_str,
236 val,
237 max_val,
238 status_str),
239 stream.flush()
242 def set_cmp(list, simplify):
244 Builds a set out of a list but uses the results of simplify to determine equality between items
246 simpl = lambda x: (simplify(x), x)
247 lst = dict(map(simpl, list))
248 return lst.values()
251 def first(it):
253 returns the first not-None object or None if the iterator is exhausted
255 for x in it:
256 if x is not None:
257 return x
258 return None
261 def intersect(a, b):
262 return list(set(a) & set(b))
266 def remove_control_chars(s):
267 all_chars = (unichr(i) for i in xrange(0x110000))
268 control_chars = ''.join(map(unichr, range(0,32) + range(127,160)))
269 control_char_re = re.compile('[%s]' % re.escape(control_chars))
271 return control_char_re.sub('', s)
274 def unzip(a):
275 return tuple(map(list,zip(*a)))
278 def parse_range(s, min, max, default=None):
280 Parses the string and returns its value. If the value is outside the given
281 range, its closest number within the range is returned
283 >>> parse_range('5', 0, 10)
286 >>> parse_range('0', 5, 10)
289 >>> parse_range('15',0, 10)
292 >>> parse_range('x', 0, 20)
295 >>> parse_range('x', 0, 20, 20)
298 try:
299 val = int(s)
300 if val < min:
301 return min
302 if val > max:
303 return max
304 return val
306 except (ValueError, TypeError):
307 return default if default is not None else (max-min)/2
311 def flatten(l):
312 return [item for sublist in l for item in sublist]
315 def linearize(key, iterators, reverse=False):
317 Linearizes a number of iterators, sorted by some comparison function
320 iters = [iter(i) for i in iterators]
321 vals = []
322 for i in iters:
323 try:
324 v = i.next()
325 vals. append( (v, i) )
326 except StopIteration:
327 continue
329 while vals:
330 vals = sorted(vals, key=lambda x: key(x[0]), reverse=reverse)
331 val, it = vals.pop(0)
332 yield val
333 try:
334 next_val = it.next()
335 vals.append( (next_val, it) )
336 except StopIteration:
337 pass
340 def skip_pairs(iterator, cmp=cmp):
341 """ Skips pairs of equal items
343 >>> list(skip_pairs([]))
346 >>> list(skip_pairs([1]))
349 >>> list(skip_pairs([1, 2, 3]))
350 [1, 2, 3]
352 >>> list(skip_pairs([1, 1]))
355 >>> list(skip_pairs([1, 2, 2]))
358 >>> list(skip_pairs([1, 2, 2, 3]))
359 [1, 3]
361 >>> list(skip_pairs([1, 2, 2, 2]))
362 [1, 2]
364 >>> list(skip_pairs([1, 2, 2, 2, 2, 3]))
365 [1, 3]
368 iterator = iter(iterator)
369 next = iterator.next()
371 while True:
372 item = next
373 try:
374 next = iterator.next()
375 except StopIteration as e:
376 yield item
377 raise e
379 if cmp(item, next) == 0:
380 next = iterator.next()
381 else:
382 yield item
385 def get_timestamp(datetime_obj):
386 """ Returns the timestamp as an int for the given datetime object
388 >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
389 1302168606
391 >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
394 return int(time.mktime(datetime_obj.timetuple()))
398 re_url = re.compile('^https?://')
400 def is_url(string):
401 """ Returns true if a string looks like an URL
403 >>> is_url('http://example.com/some-path/file.xml')
404 True
406 >>> is_url('something else')
407 False
410 return bool(re_url.match(string))
414 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
415 # this does not increase asymptotical complexity
416 # but can still waste more time than it saves.
417 def shortest_of(strings):
418 return min(strings, key=len)
420 def longest_substr(strings):
422 Returns the longest common substring of the given strings
425 substr = ""
426 if not strings:
427 return substr
428 reference = shortest_of(strings)
429 length = len(reference)
430 #find a suitable slice i:j
431 for i in xrange(length):
432 #only consider strings long at least len(substr) + 1
433 for j in xrange(i + len(substr) + 1, length):
434 candidate = reference[i:j]
435 if all(candidate in text for text in strings):
436 substr = candidate
437 return substr
441 def additional_value(it, gen_val, val_changed=lambda _: True):
442 """ Provides an additional value to the elements, calculated when needed
444 For the elements from the iterator, some additional value can be computed
445 by gen_val (which might be an expensive computation).
447 If the elements in the iterator are ordered so that some subsequent
448 elements would generate the same additional value, val_changed can be
449 provided, which receives the next element from the iterator and the
450 previous additional value. If the element would generate the same
451 additional value (val_changed returns False), its computation is skipped.
453 >>> # get the next full hundred higher than x
454 >>> # this will probably be an expensive calculation
455 >>> next_hundred = lambda x: x + 100-(x % 100)
457 >>> # returns True if h is not the value that next_hundred(x) would provide
458 >>> # this should be a relatively cheap calculation, compared to the above
459 >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
461 >>> xs = [0, 50, 100, 101, 199, 200, 201]
462 >>> list(additional_value(xs, next_hundred, diff_hundred))
463 [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
466 _none = object()
467 current = _none
469 for x in it:
470 if current is _none or val_changed(x, current):
471 current = gen_val(x)
473 yield (x, current)
476 def file_hash(f, h=hashlib.md5, block_size=2**20):
477 """ returns the hash of the contents of a file """
478 f_hash = h()
479 for chunk in iter(lambda: f.read(block_size), ''):
480 f_hash.update(chunk)
481 return f_hash
485 def split_list(l, prop):
486 """ split elements that satisfy a property, and those that don't """
487 match = filter(prop, l)
488 nomatch = [x for x in l if x not in match]
489 return match, nomatch
492 def sorted_chain(links, key, reverse=False):
493 """ Takes a list of iters can iterates over sorted elements
495 Each elment of links should be a tuple of (sort_key, iterator). The
496 elements of each iterator should be sorted already. sort_key should
497 indicate the key of the first element and needs to be comparable to the
498 result of key(elem).
500 The function returns an iterator over the globally sorted element that
501 ensures that as little iterators as possible are evaluated. When
502 evaluating """
504 # mixed_list initially contains all placeholders; later evaluated
505 # elements (from the iterators) are mixed in
506 mixed_list = [(k, link, True) for k, link in links]
508 while mixed_list:
509 _, item, expand = mixed_list.pop(0)
511 # found an element (from an earlier expansion), yield it
512 if not expand:
513 yield item
514 continue
516 # found an iter that needs to be expanded.
517 # The iterator is fully consumed
518 new_items = [(key(i), i, False) for i in item]
520 # sort links (placeholders) and elements together
521 mixed_list = sorted(mixed_list + new_items, key=lambda (k, _v, _e): k,
522 reverse=reverse)
525 def url_add_authentication(url, username, password):
527 Adds authentication data (username, password) to a given
528 URL in order to construct an authenticated URL.
530 >>> url_add_authentication('https://host.com/', '', None)
531 'https://host.com/'
532 >>> url_add_authentication('http://example.org/', None, None)
533 'http://example.org/'
534 >>> url_add_authentication('telnet://host.com/', 'foo', 'bar')
535 'telnet://foo:bar@host.com/'
536 >>> url_add_authentication('ftp://example.org', 'billy', None)
537 'ftp://billy@example.org'
538 >>> url_add_authentication('ftp://example.org', 'billy', '')
539 'ftp://billy:@example.org'
540 >>> url_add_authentication('http://localhost/x', 'aa', 'bc')
541 'http://aa:bc@localhost/x'
542 >>> url_add_authentication('http://blubb.lan/u.html', 'i/o', 'P@ss:')
543 'http://i%2Fo:P@ss:@blubb.lan/u.html'
544 >>> url_add_authentication('http://a:b@x.org/', 'c', 'd')
545 'http://c:d@x.org/'
546 >>> url_add_authentication('http://i%2F:P%40%3A@cx.lan', 'P@x', 'i/')
547 'http://P@x:i%2F@cx.lan'
548 >>> url_add_authentication('http://x.org/', 'a b', 'c d')
549 'http://a%20b:c%20d@x.org/'
551 if username is None or username == '':
552 return url
554 # Relaxations of the strict quoting rules (bug 1521):
555 # 1. Accept '@' in username and password
556 # 2. Acecpt ':' in password only
557 username = urllib.quote(username, safe='@')
559 if password is not None:
560 password = urllib.quote(password, safe='@:')
561 auth_string = ':'.join((username, password))
562 else:
563 auth_string = username
565 url = url_strip_authentication(url)
567 url_parts = list(urlparse.urlsplit(url))
568 # url_parts[1] is the HOST part of the URL
569 url_parts[1] = '@'.join((auth_string, url_parts[1]))
571 return urlparse.urlunsplit(url_parts)
574 def urlopen(url, headers=None, data=None):
576 An URL opener with the User-agent set to gPodder (with version)
578 username, password = username_password_from_url(url)
579 if username is not None or password is not None:
580 url = url_strip_authentication(url)
581 password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
582 password_mgr.add_password(None, url, username, password)
583 handler = urllib2.HTTPBasicAuthHandler(password_mgr)
584 opener = urllib2.build_opener(handler)
585 else:
586 opener = urllib2.build_opener()
588 if headers is None:
589 headers = {}
590 else:
591 headers = dict(headers)
593 headers.update({'User-agent': settings.USER_AGENT})
594 request = urllib2.Request(url, data=data, headers=headers)
595 return opener.open(request)
599 def username_password_from_url(url):
600 r"""
601 Returns a tuple (username,password) containing authentication
602 data from the specified URL or (None,None) if no authentication
603 data can be found in the URL.
605 See Section 3.1 of RFC 1738 (http://www.ietf.org/rfc/rfc1738.txt)
607 >>> username_password_from_url('https://@host.com/')
608 ('', None)
609 >>> username_password_from_url('telnet://host.com/')
610 (None, None)
611 >>> username_password_from_url('ftp://foo:@host.com/')
612 ('foo', '')
613 >>> username_password_from_url('http://a:b@host.com/')
614 ('a', 'b')
615 >>> username_password_from_url(1)
616 Traceback (most recent call last):
618 ValueError: URL has to be a string or unicode object.
619 >>> username_password_from_url(None)
620 Traceback (most recent call last):
622 ValueError: URL has to be a string or unicode object.
623 >>> username_password_from_url('http://a@b:c@host.com/')
624 ('a@b', 'c')
625 >>> username_password_from_url('ftp://a:b:c@host.com/')
626 ('a', 'b:c')
627 >>> username_password_from_url('http://i%2Fo:P%40ss%3A@host.com/')
628 ('i/o', 'P@ss:')
629 >>> username_password_from_url('ftp://%C3%B6sterreich@host.com/')
630 ('\xc3\xb6sterreich', None)
631 >>> username_password_from_url('http://w%20x:y%20z@example.org/')
632 ('w x', 'y z')
633 >>> username_password_from_url('http://example.com/x@y:z@test.com/')
634 (None, None)
636 if type(url) not in (str, unicode):
637 raise ValueError('URL has to be a string or unicode object.')
639 (username, password) = (None, None)
641 (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
643 if '@' in netloc:
644 (authentication, netloc) = netloc.rsplit('@', 1)
645 if ':' in authentication:
646 (username, password) = authentication.split(':', 1)
648 # RFC1738 dictates that we should not allow ['/', '@', ':']
649 # characters in the username and password field (Section 3.1):
651 # 1. The "/" can't be in there at this point because of the way
652 # urlparse (which we use above) works.
653 # 2. Due to gPodder bug 1521, we allow "@" in the username and
654 # password field. We use netloc.rsplit('@', 1), which will
655 # make sure that we split it at the last '@' in netloc.
656 # 3. The colon must be excluded (RFC2617, Section 2) in the
657 # username, but is apparently allowed in the password. This
658 # is handled by the authentication.split(':', 1) above, and
659 # will cause any extraneous ':'s to be part of the password.
661 username = urllib.unquote(username)
662 password = urllib.unquote(password)
663 else:
664 username = urllib.unquote(authentication)
666 return (username, password)
669 def url_strip_authentication(url):
671 Strips authentication data from an URL. Returns the URL with
672 the authentication data removed from it.
674 >>> url_strip_authentication('https://host.com/')
675 'https://host.com/'
676 >>> url_strip_authentication('telnet://foo:bar@host.com/')
677 'telnet://host.com/'
678 >>> url_strip_authentication('ftp://billy@example.org')
679 'ftp://example.org'
680 >>> url_strip_authentication('ftp://billy:@example.org')
681 'ftp://example.org'
682 >>> url_strip_authentication('http://aa:bc@localhost/x')
683 'http://localhost/x'
684 >>> url_strip_authentication('http://i%2Fo:P%40ss%3A@blubb.lan/u.html')
685 'http://blubb.lan/u.html'
686 >>> url_strip_authentication('http://c:d@x.org/')
687 'http://x.org/'
688 >>> url_strip_authentication('http://P%40%3A:i%2F@cx.lan')
689 'http://cx.lan'
690 >>> url_strip_authentication('http://x@x.com:s3cret@example.com/')
691 'http://example.com/'
693 url_parts = list(urlparse.urlsplit(url))
694 # url_parts[1] is the HOST part of the URL
696 # Remove existing authentication data
697 if '@' in url_parts[1]:
698 url_parts[1] = url_parts[1].rsplit('@', 1)[1]
700 return urlparse.urlunsplit(url_parts)
703 # Native filesystem encoding detection
704 encoding = sys.getfilesystemencoding()
706 def sanitize_encoding(filename):
707 r"""
708 Generate a sanitized version of a string (i.e.
709 remove invalid characters and encode in the
710 detected native language encoding).
712 >>> sanitize_encoding('\x80')
714 >>> sanitize_encoding(u'unicode')
715 'unicode'
717 # The encoding problem goes away in Python 3.. hopefully!
718 if sys.version_info >= (3, 0):
719 return filename
721 global encoding
722 if not isinstance(filename, unicode):
723 filename = filename.decode(encoding, 'ignore')
724 return filename.encode(encoding, 'ignore')
727 def get_git_head():
728 """ returns the commit and message of the current git HEAD """
730 try:
731 pr = subprocess.Popen('/usr/bin/git log -n 1 --oneline'.split(),
732 cwd = settings.BASE_DIR,
733 stdout = subprocess.PIPE,
734 stderr = subprocess.PIPE,
737 except OSError:
738 return None, None
740 (out, err) = pr.communicate()
741 if err:
742 return None, None
744 outs = out.split()
745 commit = outs[0]
746 msg = ' ' .join(outs[1:])
747 return commit, msg
751 # https://gist.github.com/samuraisam/901117
753 default_fudge = timedelta(seconds=0, microseconds=0, days=0)
755 def deep_eq(_v1, _v2, datetime_fudge=default_fudge, _assert=False):
757 Tests for deep equality between two python data structures recursing
758 into sub-structures if necessary. Works with all python types including
759 iterators and generators. This function was dreampt up to test API responses
760 but could be used for anything. Be careful. With deeply nested structures
761 you may blow the stack.
763 Options:
764 datetime_fudge => this is a datetime.timedelta object which, when
765 comparing dates, will accept values that differ
766 by the number of seconds specified
767 _assert => passing yes for this will raise an assertion error
768 when values do not match, instead of returning
769 false (very useful in combination with pdb)
771 Doctests included:
773 >>> x1, y1 = ({'a': 'b'}, {'a': 'b'})
774 >>> deep_eq(x1, y1)
775 True
776 >>> x2, y2 = ({'a': 'b'}, {'b': 'a'})
777 >>> deep_eq(x2, y2)
778 False
779 >>> x3, y3 = ({'a': {'b': 'c'}}, {'a': {'b': 'c'}})
780 >>> deep_eq(x3, y3)
781 True
782 >>> x4, y4 = ({'c': 't', 'a': {'b': 'c'}}, {'a': {'b': 'n'}, 'c': 't'})
783 >>> deep_eq(x4, y4)
784 False
785 >>> x5, y5 = ({'a': [1,2,3]}, {'a': [1,2,3]})
786 >>> deep_eq(x5, y5)
787 True
788 >>> x6, y6 = ({'a': [1,'b',8]}, {'a': [2,'b',8]})
789 >>> deep_eq(x6, y6)
790 False
791 >>> x7, y7 = ('a', 'a')
792 >>> deep_eq(x7, y7)
793 True
794 >>> x8, y8 = (['p','n',['asdf']], ['p','n',['asdf']])
795 >>> deep_eq(x8, y8)
796 True
797 >>> x9, y9 = (['p','n',['asdf',['omg']]], ['p', 'n', ['asdf',['nowai']]])
798 >>> deep_eq(x9, y9)
799 False
800 >>> x10, y10 = (1, 2)
801 >>> deep_eq(x10, y10)
802 False
803 >>> deep_eq((str(p) for p in xrange(10)), (str(p) for p in xrange(10)))
804 True
805 >>> str(deep_eq(range(4), range(4)))
806 'True'
807 >>> deep_eq(xrange(100), xrange(100))
808 True
809 >>> deep_eq(xrange(2), xrange(5))
810 False
811 >>> from datetime import datetime, timedelta
812 >>> d1, d2 = (datetime.now(), datetime.now() + timedelta(seconds=4))
813 >>> deep_eq(d1, d2)
814 False
815 >>> deep_eq(d1, d2, datetime_fudge=timedelta(seconds=5))
816 True
818 _deep_eq = functools.partial(deep_eq, datetime_fudge=datetime_fudge,
819 _assert=_assert)
821 def _check_assert(R, a, b, reason=''):
822 if _assert and not R:
823 assert 0, "an assertion has failed in deep_eq (%s) %s != %s" % (
824 reason, str(a), str(b))
825 return R
827 def _deep_dict_eq(d1, d2):
828 k1, k2 = (sorted(d1.keys()), sorted(d2.keys()))
829 if k1 != k2: # keys should be exactly equal
830 return _check_assert(False, k1, k2, "keys")
832 return _check_assert(operator.eq(sum(_deep_eq(d1[k], d2[k])
833 for k in k1),
834 len(k1)), d1, d2, "dictionaries")
836 def _deep_iter_eq(l1, l2):
837 if len(l1) != len(l2):
838 return _check_assert(False, l1, l2, "lengths")
839 return _check_assert(operator.eq(sum(_deep_eq(v1, v2)
840 for v1, v2 in zip(l1, l2)),
841 len(l1)), l1, l2, "iterables")
843 def op(a, b):
844 _op = operator.eq
845 if type(a) == datetime and type(b) == datetime:
846 s = datetime_fudge.seconds
847 t1, t2 = (time.mktime(a.timetuple()), time.mktime(b.timetuple()))
848 l = t1 - t2
849 l = -l if l > 0 else l
850 return _check_assert((-s if s > 0 else s) <= l, a, b, "dates")
851 return _check_assert(_op(a, b), a, b, "values")
853 c1, c2 = (_v1, _v2)
855 # guard against strings because they are iterable and their
856 # elements yield iterables infinitely.
857 # I N C E P T I O N
858 for t in types.StringTypes:
859 if isinstance(_v1, t):
860 break
861 else:
862 if isinstance(_v1, types.DictType):
863 op = _deep_dict_eq
864 else:
865 try:
866 c1, c2 = (list(iter(_v1)), list(iter(_v2)))
867 except TypeError:
868 c1, c2 = _v1, _v2
869 else:
870 op = _deep_iter_eq
872 return op(c1, c2)
875 def parse_request_body(request):
876 """ returns the parsed request body, handles gzip encoding """
878 raw_body = request.body
879 content_enc = request.META.get('HTTP_CONTENT_ENCODING')
881 if content_enc == 'gzip':
882 raw_body = zlib.decompress(raw_body)
884 return json.loads(raw_body)
887 def normalize_feed_url(url):
889 Converts any URL to http:// or ftp:// so that it can be
890 used with "wget". If the URL cannot be converted (invalid
891 or unknown scheme), "None" is returned.
893 This will also normalize feed:// and itpc:// to http://.
895 >>> normalize_feed_url('itpc://example.org/podcast.rss')
896 'http://example.org/podcast.rss'
898 If no URL scheme is defined (e.g. "curry.com"), we will
899 simply assume the user intends to add a http:// feed.
901 >>> normalize_feed_url('curry.com')
902 'http://curry.com/'
904 There are even some more shortcuts for advanced users
905 and lazy typists (see the source for details).
907 >>> normalize_feed_url('fb:43FPodcast')
908 'http://feeds.feedburner.com/43FPodcast'
910 It will also take care of converting the domain name to
911 all-lowercase (because domains are not case sensitive):
913 >>> normalize_feed_url('http://Example.COM/')
914 'http://example.com/'
916 Some other minimalistic changes are also taken care of,
917 e.g. a ? with an empty query is removed:
919 >>> normalize_feed_url('http://example.org/test?')
920 'http://example.org/test'
922 Leading and trailing whitespace is removed
924 >>> normalize_feed_url(' http://example.com/podcast.rss ')
925 'http://example.com/podcast.rss'
927 HTTP Authentication is removed to protect users' privacy
929 >>> normalize_feed_url('http://a@b:c@host.com/')
930 'http://host.com/'
931 >>> normalize_feed_url('ftp://a:b:c@host.com/')
932 'ftp://host.com/'
933 >>> normalize_feed_url('http://i%2Fo:P%40ss%3A@host.com/')
934 'http://host.com/'
935 >>> normalize_feed_url('ftp://%C3%B6sterreich@host.com/')
936 'ftp://host.com/'
937 >>> normalize_feed_url('http://w%20x:y%20z@example.org/')
938 'http://example.org/'
939 >>> normalize_feed_url('http://example.com/x@y:z@test.com/')
940 'http://example.com/x%40y%3Az%40test.com/'
941 >>> normalize_feed_url('http://en.wikipedia.org/wiki/Ä')
942 'http://en.wikipedia.org/wiki/%C3%84'
943 >>> normalize_feed_url('http://en.wikipedia.org/w/index.php?title=Ä&action=edit')
944 'http://en.wikipedia.org/w/index.php?title=%C3%84&action=edit'
946 url = url.strip()
947 if not url or len(url) < 8:
948 return None
950 if isinstance(url, unicode):
951 url = url.encode('utf-8', 'ignore')
953 # This is a list of prefixes that you can use to minimize the amount of
954 # keystrokes that you have to use.
955 # Feel free to suggest other useful prefixes, and I'll add them here.
956 PREFIXES = {
957 'fb:': 'http://feeds.feedburner.com/%s',
958 'yt:': 'http://www.youtube.com/rss/user/%s/videos.rss',
959 'sc:': 'http://soundcloud.com/%s',
960 'fm4od:': 'http://onapp1.orf.at/webcam/fm4/fod/%s.xspf',
961 # YouTube playlists. To get a list of playlists per-user, use:
962 # https://gdata.youtube.com/feeds/api/users/<username>/playlists
963 'ytpl:': 'http://gdata.youtube.com/feeds/api/playlists/%s',
966 for prefix, expansion in PREFIXES.iteritems():
967 if url.startswith(prefix):
968 url = expansion % (url[len(prefix):],)
969 break
971 # Assume HTTP for URLs without scheme
972 if not '://' in url:
973 url = 'http://' + url
975 scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
977 # Schemes and domain names are case insensitive
978 scheme, netloc = scheme.lower(), netloc.lower()
980 # encode non-encoded characters
981 path = urllib.quote(path, '/%')
982 query = urllib.quote_plus(query, ':&=')
984 # Remove authentication to protect users' privacy
985 netloc = netloc.rsplit('@', 1)[-1]
987 # Normalize empty paths to "/"
988 if path == '':
989 path = '/'
991 # feed://, itpc:// and itms:// are really http://
992 if scheme in ('feed', 'itpc', 'itms'):
993 scheme = 'http'
995 if scheme not in ('http', 'https', 'ftp', 'file'):
996 return None
998 # urlunsplit might return "a slighty different, but equivalent URL"
999 return urlparse.urlunsplit((scheme, netloc, path, query, fragment))
1002 def partition(items, predicate=bool):
1003 a, b = itertools.tee((predicate(item), item) for item in items)
1004 return ((item for pred, item in a if not pred),
1005 (item for pred, item in b if pred))
1008 def split_quoted(s):
1009 """ Splits a quoted string
1011 >>> split_quoted('some "quoted text"') == ['some', 'quoted text']
1012 True
1014 >>> split_quoted('"quoted text') == ['quoted', 'text']
1015 True
1017 # 4 quotes here are 2 in the doctest is one in the actual string
1018 >>> split_quoted('text\\\\') == ['text']
1019 True
1022 try:
1023 # split by whitespace, preserve quoted substrings
1024 keywords = shlex.split(s)
1026 except ValueError:
1027 # No closing quotation (eg '"text')
1028 # No escaped character (eg '\')
1029 s = s.replace('"', '').replace("'", '').replace('\\', '')
1030 keywords = shlex.split(s)
1032 return keywords
1035 def edit_link(obj):
1036 """ Return the link to the Django Admin Edit page """
1037 return reverse('admin:%s_%s_change' % (obj._meta.app_label,
1038 obj._meta.module_name),
1039 args=(obj.pk,))