update Authentication API
[mygpo.git] / mygpo / utils.py
blob38cc1e4e5bed116a7a8ec898161da05e730d92eb
2 # This file is part of my.gpodder.org.
4 # my.gpodder.org is free software: you can redistribute it and/or modify it
5 # under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or (at your
7 # option) any later version.
9 # my.gpodder.org is distributed in the hope that it will be useful, but
10 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
12 # License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
18 import operator
19 import sys
20 import re
21 import collections
22 from datetime import datetime, timedelta, date
23 import time
25 from django.core.cache import cache
28 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
29 """
30 >>> from_d = datetime(2010, 01, 01)
31 >>> to_d = datetime(2010, 01, 05)
32 >>> list(daterange(from_d, to_d))
33 [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
34 """
36 if to_date is None:
37 if isinstance(from_date, datetime):
38 to_date = datetime.now()
39 else:
40 to_date = date.today()
42 while from_date <= to_date:
43 yield from_date
44 from_date = from_date + leap
45 return
47 def format_time(value):
48 """Format an offset (in seconds) to a string
50 The offset should be an integer or float value.
52 >>> format_time(0)
53 '00:00'
54 >>> format_time(20)
55 '00:20'
56 >>> format_time(3600)
57 '01:00:00'
58 >>> format_time(10921)
59 '03:02:01'
60 """
61 try:
62 dt = datetime.utcfromtimestamp(value)
63 except ValueError:
64 return ''
66 if dt.hour == 0:
67 return dt.strftime('%M:%S')
68 else:
69 return dt.strftime('%H:%M:%S')
71 def parse_time(value):
72 """
73 >>> parse_time(10)
76 >>> parse_time('05:10') #5*60+10
77 310
79 >>> parse_time('1:05:10') #60*60+5*60+10
80 3910
81 """
82 if value is None:
83 raise ValueError('None value in parse_time')
85 if isinstance(value, int):
86 # Don't need to parse already-converted time value
87 return value
89 if value == '':
90 raise ValueError('Empty valueing in parse_time')
92 for format in ('%H:%M:%S', '%M:%S'):
93 try:
94 t = time.strptime(value, format)
95 return t.tm_hour * 60*60 + t.tm_min * 60 + t.tm_sec
96 except ValueError, e:
97 continue
99 return int(value)
102 def parse_bool(val):
104 >>> parse_bool('True')
105 True
107 >>> parse_bool('true')
108 True
110 >>> parse_bool('')
111 False
113 if isinstance(val, bool):
114 return val
115 if val.lower() == 'true':
116 return True
117 return False
120 def iterate_together(lists, key=lambda x: x, reverse=False):
122 takes ordered, possibly sparse, lists with similar items
123 (some items have a corresponding item in the other lists, some don't).
125 It then yield tuples of corresponding items, where one element is None is
126 there is no corresponding entry in one of the lists.
128 Tuples where both elements are None are skipped.
130 The results of the key method are used for the comparisons.
132 If reverse is True, the lists are expected to be sorted in reverse order
133 and the results will also be sorted reverse
135 >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
136 [(1, 1), (2, None), (None, 3)]
138 >>> list(iterate_together([[], []]))
141 >>> list(iterate_together([range(1, 3), range(3, 5)]))
142 [(1, None), (2, None), (None, 3), (None, 4)]
144 >>> list(iterate_together([range(1, 3), []]))
145 [(1, None), (2, None)]
147 >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
148 [(1, None), (3, 3)]
151 Next = collections.namedtuple('Next', 'item more')
152 min_ = min if not reverse else max
153 lt_ = operator.lt if not reverse else operator.gt
155 lists = [iter(l) for l in lists]
157 def _take(it):
158 try:
159 i = it.next()
160 while i is None:
161 i = it.next()
162 return Next(i, True)
163 except StopIteration:
164 return Next(None, False)
166 def new_res():
167 return [None]*len(lists)
169 # take first bunch of items
170 items = [_take(l) for l in lists]
172 while any(i.item is not None or i.more for i in items):
174 res = new_res()
176 for n, item in enumerate(items):
178 if item.item is None:
179 continue
181 if all(x is None for x in res):
182 res[n] = item.item
183 continue
185 min_v = min_(filter(lambda x: x is not None, res), key=key)
187 if key(item.item) == key(min_v):
188 res[n] = item.item
190 elif lt_(key(item.item), key(min_v)):
191 res = new_res()
192 res[n] = item.item
194 for n, x in enumerate(res):
195 if x is not None:
196 items[n] = _take(lists[n])
198 yield tuple(res)
201 def progress(val, max_val, status_str='', max_width=50, stream=sys.stdout):
202 print >> stream, '\r',
203 print >> stream, '[ %s ] %s / %s | %s' % (
204 '#'*int(float(val)/max_val*max_width) +
205 ' ' * (max_width-(int(float(val)/max_val*max_width))),
206 val,
207 max_val,
208 status_str),
209 stream.flush()
212 def set_cmp(list, simplify):
214 Builds a set out of a list but uses the results of simplify to determine equality between items
216 simpl = lambda x: (simplify(x), x)
217 lst = dict(map(simpl, list))
218 return lst.values()
221 def first(it):
223 returns the first not-None object or None if the iterator is exhausted
225 for x in it:
226 if x != None:
227 return x
228 return None
231 def intersect(a, b):
232 return list(set(a) & set(b))
236 def multi_request_view(cls, view, wrap=True, auto_advance=True,
237 *args, **kwargs):
239 splits up a view request into several requests, which reduces
240 the server load of the number of returned objects is large.
242 NOTE: As such a split request is obviously not atomical anymore, results
243 might skip some elements of contain some twice
245 If auto_advance is False the method will always request the same range.
246 This can be useful when the view contain unprocessed items and the caller
247 processes the items, thus removing them from the view before the next
248 request.
251 per_page = kwargs.get('limit', 1000)
252 kwargs['limit'] = per_page + 1
253 db = cls.get_db()
254 wrapper = kwargs.pop('wrapper', cls.wrap)
255 cont = True
257 while cont:
259 resp = db.view(view, *args, **kwargs)
260 cont = False
262 for n, obj in enumerate(resp.iterator()):
264 key = obj['key']
266 if wrap:
267 doc = wrapper(obj['doc'])
268 docid = doc._id
269 else:
270 docid = obj['id']
271 doc = obj
273 if n == per_page:
274 if auto_advance:
275 kwargs['startkey'] = key
276 kwargs['startkey_docid'] = docid
277 if 'skip' in kwargs:
278 del kwargs['skip']
280 # we reached the end of the page, load next one
281 cont = True
282 break
284 yield doc
287 def remove_control_chars(s):
288 import unicodedata, re
290 all_chars = (unichr(i) for i in xrange(0x110000))
291 control_chars = ''.join(map(unichr, range(0,32) + range(127,160)))
292 control_char_re = re.compile('[%s]' % re.escape(control_chars))
294 return control_char_re.sub('', s)
297 def unzip(a):
298 return tuple(map(list,zip(*a)))
301 def parse_range(s, min, max, default=None):
303 Parses the string and returns its value. If the value is outside the given
304 range, its closest number within the range is returned
306 >>> parse_range('5', 0, 10)
309 >>> parse_range('0', 5, 10)
312 >>> parse_range('15',0, 10)
315 >>> parse_range('x', 0, 20)
318 >>> parse_range('x', 0, 20, 20)
321 try:
322 val = int(s)
323 if val < min:
324 return min
325 if val > max:
326 return max
327 return val
329 except (ValueError, TypeError):
330 return default if default is not None else (max-min)/2
333 def get_to_dict(cls, ids, get_id=lambda x: x._id, use_cache=False):
335 ids = list(set(ids))
336 objs = dict()
338 cache_objs = []
339 if use_cache:
340 for id in ids:
341 obj = cache.get(id)
342 if obj is not None:
343 cache_objs.append(obj)
344 ids.remove(id)
346 db_objs = list(cls.get_multi(ids))
348 for obj in (cache_objs + db_objs):
350 # get_multi returns dict {'key': _id, 'error': 'not found'}
351 # for non-existing objects
352 if isinstance(obj, dict) and 'error' in obj:
353 _id = obj['key']
354 objs[_id] = None
355 continue
357 ids = obj.get_ids() if hasattr(obj, 'get_ids') else [get_id(obj)]
358 for i in ids:
359 objs[i] = obj
361 if use_cache:
362 for obj in db_objs:
363 cache.set(get_id(obj), obj)
365 return objs
368 def flatten(l):
369 return [item for sublist in l for item in sublist]
372 def linearize(key, iterators, reverse=False):
374 Linearizes a number of iterators, sorted by some comparison function
377 iters = [iter(i) for i in iterators]
378 vals = []
379 for i in iters:
380 try:
381 v = i.next()
382 vals. append( (v, i) )
383 except StopIteration:
384 continue
386 while vals:
387 vals = sorted(vals, key=lambda x: key(x[0]), reverse=reverse)
388 val, it = vals.pop(0)
389 yield val
390 try:
391 next_val = it.next()
392 vals.append( (next_val, it) )
393 except StopIteration:
394 pass
397 def skip_pairs(iterator, cmp=cmp):
398 """ Skips pairs of equal items
400 >>> list(skip_pairs([]))
403 >>> list(skip_pairs([1]))
406 >>> list(skip_pairs([1, 2, 3]))
407 [1, 2, 3]
409 >>> list(skip_pairs([1, 1]))
412 >>> list(skip_pairs([1, 2, 2]))
415 >>> list(skip_pairs([1, 2, 2, 3]))
416 [1, 3]
418 >>> list(skip_pairs([1, 2, 2, 2]))
419 [1, 2]
421 >>> list(skip_pairs([1, 2, 2, 2, 2, 3]))
422 [1, 3]
425 iterator = iter(iterator)
426 next = iterator.next()
428 while True:
429 item = next
430 try:
431 next = iterator.next()
432 except StopIteration as e:
433 yield item
434 raise e
436 if cmp(item, next) == 0:
437 next = iterator.next()
438 else:
439 yield item
442 def get_timestamp(datetime_obj):
443 """ Returns the timestamp as an int for the given datetime object
445 >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
446 1302168606
448 >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
451 return int(time.mktime(datetime_obj.timetuple()))
455 re_url = re.compile('^https?://')
457 def is_url(string):
458 """ Returns true if a string looks like an URL
460 >>> is_url('http://example.com/some-path/file.xml')
461 True
463 >>> is_url('something else')
464 False
467 return bool(re_url.match(string))
470 def is_couchdb_id(id_str):
471 import string
472 import operator
473 import functools
474 f = functools.partial(operator.contains, string.hexdigits)
475 return len(id_str) == 32 and all(map(f, id_str))
478 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
479 # this does not increase asymptotical complexity
480 # but can still waste more time than it saves.
481 def shortest_of(strings):
482 return min(strings, key=len)
484 def longest_substr(strings):
486 Returns the longest common substring of the given strings
489 substr = ""
490 if not strings:
491 return substr
492 reference = shortest_of(strings) #strings[0]
493 length = len(reference)
494 #find a suitable slice i:j
495 for i in xrange(length):
496 #only consider strings long at least len(substr) + 1
497 for j in xrange(i + len(substr) + 1, length):
498 candidate = reference[i:j]
499 if all(candidate in text for text in strings):
500 substr = candidate
501 return substr
505 def additional_value(it, gen_val, val_changed=lambda _: True):
506 """ Provides an additional value to the elements, calculated when needed
508 For the elements from the iterator, some additional value can be computed
509 by gen_val (which might be an expensive computation).
511 If the elements in the iterator are ordered so that some subsequent
512 elements would generate the same additional value, val_changed can be
513 provided, which receives the next element from the iterator and the
514 previous additional value. If the element would generate the same
515 additional value (val_changed returns False), its computation is skipped.
517 >>> # get the next full hundred higher than x
518 >>> # this will probably be an expensive calculation
519 >>> next_hundred = lambda x: x + 100-(x % 100)
521 >>> # returns True if h is not the value that next_hundred(x) would provide
522 >>> # this should be a relatively cheap calculation, compared to the above
523 >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
525 >>> xs = [0, 50, 100, 101, 199, 200, 201]
526 >>> list(additional_value(xs, next_hundred, diff_hundred))
527 [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
530 _none = object()
531 current = _none
533 for x in it:
534 if current is _none or val_changed(x, current):
535 current = gen_val(x)
537 yield (x, current)