cache_result w/o "timeout" param
[mygpo.git] / mygpo / utils.py
blob98bfe64e3231414fd32c2db2200b571653987ca7
2 # This file is part of my.gpodder.org.
4 # my.gpodder.org is free software: you can redistribute it and/or modify it
5 # under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or (at your
7 # option) any later version.
9 # my.gpodder.org is distributed in the hope that it will be useful, but
10 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
12 # License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
18 import operator
19 import sys
20 import re
21 import collections
22 from datetime import datetime, timedelta, date
23 import time
24 import hashlib
26 from django.core.cache import cache
28 from mygpo.couch import get_main_database
31 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
32 """
33 >>> from_d = datetime(2010, 01, 01)
34 >>> to_d = datetime(2010, 01, 05)
35 >>> list(daterange(from_d, to_d))
36 [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
37 """
39 if to_date is None:
40 if isinstance(from_date, datetime):
41 to_date = datetime.now()
42 else:
43 to_date = date.today()
45 while from_date <= to_date:
46 yield from_date
47 from_date = from_date + leap
48 return
50 def format_time(value):
51 """Format an offset (in seconds) to a string
53 The offset should be an integer or float value.
55 >>> format_time(0)
56 '00:00'
57 >>> format_time(20)
58 '00:20'
59 >>> format_time(3600)
60 '01:00:00'
61 >>> format_time(10921)
62 '03:02:01'
63 """
64 try:
65 dt = datetime.utcfromtimestamp(value)
66 except ValueError:
67 return ''
69 if dt.hour == 0:
70 return dt.strftime('%M:%S')
71 else:
72 return dt.strftime('%H:%M:%S')
74 def parse_time(value):
75 """
76 >>> parse_time(10)
79 >>> parse_time('05:10') #5*60+10
80 310
82 >>> parse_time('1:05:10') #60*60+5*60+10
83 3910
84 """
85 if value is None:
86 raise ValueError('None value in parse_time')
88 if isinstance(value, int):
89 # Don't need to parse already-converted time value
90 return value
92 if value == '':
93 raise ValueError('Empty valueing in parse_time')
95 for format in ('%H:%M:%S', '%M:%S'):
96 try:
97 t = time.strptime(value, format)
98 return t.tm_hour * 60*60 + t.tm_min * 60 + t.tm_sec
99 except ValueError, e:
100 continue
102 return int(value)
105 def parse_bool(val):
107 >>> parse_bool('True')
108 True
110 >>> parse_bool('true')
111 True
113 >>> parse_bool('')
114 False
116 if isinstance(val, bool):
117 return val
118 if val.lower() == 'true':
119 return True
120 return False
123 def iterate_together(lists, key=lambda x: x, reverse=False):
125 takes ordered, possibly sparse, lists with similar items
126 (some items have a corresponding item in the other lists, some don't).
128 It then yield tuples of corresponding items, where one element is None is
129 there is no corresponding entry in one of the lists.
131 Tuples where both elements are None are skipped.
133 The results of the key method are used for the comparisons.
135 If reverse is True, the lists are expected to be sorted in reverse order
136 and the results will also be sorted reverse
138 >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
139 [(1, 1), (2, None), (None, 3)]
141 >>> list(iterate_together([[], []]))
144 >>> list(iterate_together([range(1, 3), range(3, 5)]))
145 [(1, None), (2, None), (None, 3), (None, 4)]
147 >>> list(iterate_together([range(1, 3), []]))
148 [(1, None), (2, None)]
150 >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
151 [(1, None), (3, 3)]
154 Next = collections.namedtuple('Next', 'item more')
155 min_ = min if not reverse else max
156 lt_ = operator.lt if not reverse else operator.gt
158 lists = [iter(l) for l in lists]
160 def _take(it):
161 try:
162 i = it.next()
163 while i is None:
164 i = it.next()
165 return Next(i, True)
166 except StopIteration:
167 return Next(None, False)
169 def new_res():
170 return [None]*len(lists)
172 # take first bunch of items
173 items = [_take(l) for l in lists]
175 while any(i.item is not None or i.more for i in items):
177 res = new_res()
179 for n, item in enumerate(items):
181 if item.item is None:
182 continue
184 if all(x is None for x in res):
185 res[n] = item.item
186 continue
188 min_v = min_(filter(lambda x: x is not None, res), key=key)
190 if key(item.item) == key(min_v):
191 res[n] = item.item
193 elif lt_(key(item.item), key(min_v)):
194 res = new_res()
195 res[n] = item.item
197 for n, x in enumerate(res):
198 if x is not None:
199 items[n] = _take(lists[n])
201 yield tuple(res)
204 def progress(val, max_val, status_str='', max_width=50, stream=sys.stdout):
206 # progress as percentage
207 percentage_str = '{val:.2%}'.format(val=float(val)/max_val)
209 # progress bar filled with #s
210 progress_str = '#'*int(float(val)/max_val*max_width) + \
211 ' ' * (max_width-(int(float(val)/max_val*max_width)))
213 #insert percentage into bar
214 percentage_start = int((max_width-len(percentage_str))/2)
215 progress_str = progress_str[:percentage_start] + \
216 percentage_str + \
217 progress_str[percentage_start+len(percentage_str):]
219 print >> stream, '\r',
220 print >> stream, '[ %s ] %s / %s | %s' % (
221 progress_str,
222 val,
223 max_val,
224 status_str),
225 stream.flush()
228 def set_cmp(list, simplify):
230 Builds a set out of a list but uses the results of simplify to determine equality between items
232 simpl = lambda x: (simplify(x), x)
233 lst = dict(map(simpl, list))
234 return lst.values()
237 def first(it):
239 returns the first not-None object or None if the iterator is exhausted
241 for x in it:
242 if x != None:
243 return x
244 return None
247 def intersect(a, b):
248 return list(set(a) & set(b))
252 def multi_request_view(cls, view, wrap=True, auto_advance=True,
253 *args, **kwargs):
255 splits up a view request into several requests, which reduces
256 the server load of the number of returned objects is large.
258 NOTE: As such a split request is obviously not atomical anymore, results
259 might skip some elements of contain some twice
261 If auto_advance is False the method will always request the same range.
262 This can be useful when the view contain unprocessed items and the caller
263 processes the items, thus removing them from the view before the next
264 request.
267 per_page = kwargs.get('limit', 1000)
268 kwargs['limit'] = per_page + 1
269 db = get_main_database()
270 wrapper = kwargs.pop('wrapper', cls.wrap)
271 cont = True
273 while cont:
275 resp = db.view(view, *args, **kwargs)
276 cont = False
278 for n, obj in enumerate(resp.iterator()):
280 key = obj['key']
282 if wrap:
283 doc = wrapper(obj['doc']) if wrapper else obj['doc']
284 docid = doc._id if wrapper else obj['id']
285 else:
286 docid = obj.get('id', None)
287 doc = obj
289 if n == per_page:
290 if auto_advance:
291 kwargs['startkey'] = key
292 if docid is not None:
293 kwargs['startkey_docid'] = docid
294 if 'skip' in kwargs:
295 del kwargs['skip']
297 # we reached the end of the page, load next one
298 cont = True
299 break
301 yield doc
304 def remove_control_chars(s):
305 import unicodedata, re
307 all_chars = (unichr(i) for i in xrange(0x110000))
308 control_chars = ''.join(map(unichr, range(0,32) + range(127,160)))
309 control_char_re = re.compile('[%s]' % re.escape(control_chars))
311 return control_char_re.sub('', s)
314 def unzip(a):
315 return tuple(map(list,zip(*a)))
318 def parse_range(s, min, max, default=None):
320 Parses the string and returns its value. If the value is outside the given
321 range, its closest number within the range is returned
323 >>> parse_range('5', 0, 10)
326 >>> parse_range('0', 5, 10)
329 >>> parse_range('15',0, 10)
332 >>> parse_range('x', 0, 20)
335 >>> parse_range('x', 0, 20, 20)
338 try:
339 val = int(s)
340 if val < min:
341 return min
342 if val > max:
343 return max
344 return val
346 except (ValueError, TypeError):
347 return default if default is not None else (max-min)/2
350 def get_to_dict(cls, ids, get_id=lambda x: x._id, use_cache=False):
352 ids = list(set(ids))
353 objs = dict()
355 cache_objs = []
356 if use_cache:
357 res = cache.get_many(ids)
358 cache_objs.extend(res.values())
359 ids = [x for x in ids if x not in res.keys()]
361 db_objs = list(cls.get_multi(ids))
363 for obj in (cache_objs + db_objs):
365 # get_multi returns dict {'key': _id, 'error': 'not found'}
366 # for non-existing objects
367 if isinstance(obj, dict) and 'error' in obj:
368 _id = obj['key']
369 objs[_id] = None
370 continue
372 ids = obj.get_ids() if hasattr(obj, 'get_ids') else [get_id(obj)]
373 for i in ids:
374 objs[i] = obj
376 if use_cache:
377 cache.set_many(dict( (get_id(obj), obj) for obj in db_objs))
379 return objs
382 def flatten(l):
383 return [item for sublist in l for item in sublist]
386 def linearize(key, iterators, reverse=False):
388 Linearizes a number of iterators, sorted by some comparison function
391 iters = [iter(i) for i in iterators]
392 vals = []
393 for i in iters:
394 try:
395 v = i.next()
396 vals. append( (v, i) )
397 except StopIteration:
398 continue
400 while vals:
401 vals = sorted(vals, key=lambda x: key(x[0]), reverse=reverse)
402 val, it = vals.pop(0)
403 yield val
404 try:
405 next_val = it.next()
406 vals.append( (next_val, it) )
407 except StopIteration:
408 pass
411 def skip_pairs(iterator, cmp=cmp):
412 """ Skips pairs of equal items
414 >>> list(skip_pairs([]))
417 >>> list(skip_pairs([1]))
420 >>> list(skip_pairs([1, 2, 3]))
421 [1, 2, 3]
423 >>> list(skip_pairs([1, 1]))
426 >>> list(skip_pairs([1, 2, 2]))
429 >>> list(skip_pairs([1, 2, 2, 3]))
430 [1, 3]
432 >>> list(skip_pairs([1, 2, 2, 2]))
433 [1, 2]
435 >>> list(skip_pairs([1, 2, 2, 2, 2, 3]))
436 [1, 3]
439 iterator = iter(iterator)
440 next = iterator.next()
442 while True:
443 item = next
444 try:
445 next = iterator.next()
446 except StopIteration as e:
447 yield item
448 raise e
450 if cmp(item, next) == 0:
451 next = iterator.next()
452 else:
453 yield item
456 def get_timestamp(datetime_obj):
457 """ Returns the timestamp as an int for the given datetime object
459 >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
460 1302168606
462 >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
465 return int(time.mktime(datetime_obj.timetuple()))
469 re_url = re.compile('^https?://')
471 def is_url(string):
472 """ Returns true if a string looks like an URL
474 >>> is_url('http://example.com/some-path/file.xml')
475 True
477 >>> is_url('something else')
478 False
481 return bool(re_url.match(string))
484 def is_couchdb_id(id_str):
485 import string
486 import operator
487 import functools
488 f = functools.partial(operator.contains, string.hexdigits)
489 return len(id_str) == 32 and all(map(f, id_str))
492 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
493 # this does not increase asymptotical complexity
494 # but can still waste more time than it saves.
495 def shortest_of(strings):
496 return min(strings, key=len)
498 def longest_substr(strings):
500 Returns the longest common substring of the given strings
503 substr = ""
504 if not strings:
505 return substr
506 reference = shortest_of(strings) #strings[0]
507 length = len(reference)
508 #find a suitable slice i:j
509 for i in xrange(length):
510 #only consider strings long at least len(substr) + 1
511 for j in xrange(i + len(substr) + 1, length):
512 candidate = reference[i:j]
513 if all(candidate in text for text in strings):
514 substr = candidate
515 return substr
519 def additional_value(it, gen_val, val_changed=lambda _: True):
520 """ Provides an additional value to the elements, calculated when needed
522 For the elements from the iterator, some additional value can be computed
523 by gen_val (which might be an expensive computation).
525 If the elements in the iterator are ordered so that some subsequent
526 elements would generate the same additional value, val_changed can be
527 provided, which receives the next element from the iterator and the
528 previous additional value. If the element would generate the same
529 additional value (val_changed returns False), its computation is skipped.
531 >>> # get the next full hundred higher than x
532 >>> # this will probably be an expensive calculation
533 >>> next_hundred = lambda x: x + 100-(x % 100)
535 >>> # returns True if h is not the value that next_hundred(x) would provide
536 >>> # this should be a relatively cheap calculation, compared to the above
537 >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
539 >>> xs = [0, 50, 100, 101, 199, 200, 201]
540 >>> list(additional_value(xs, next_hundred, diff_hundred))
541 [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
544 _none = object()
545 current = _none
547 for x in it:
548 if current is _none or val_changed(x, current):
549 current = gen_val(x)
551 yield (x, current)
554 def file_hash(f, h=hashlib.md5, block_size=2**20):
555 """ returns the hash of the contents of a file """
556 f_hash = h()
557 for chunk in iter(lambda: f.read(block_size), ''):
558 f_hash.update(chunk)
559 return f_hash