refactor db.couchdb module, add caching
[mygpo.git] / mygpo / utils.py
blob996d7b8df43c8c9dcfd7cd64260250b0dc53e845
2 # This file is part of my.gpodder.org.
4 # my.gpodder.org is free software: you can redistribute it and/or modify it
5 # under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or (at your
7 # option) any later version.
9 # my.gpodder.org is distributed in the hope that it will be useful, but
10 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
12 # License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
18 import operator
19 import sys
20 import re
21 import collections
22 from datetime import datetime, timedelta, date
23 import time
24 import hashlib
26 from django.core.cache import cache
28 from mygpo.couch import get_main_database
31 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
32 """
33 >>> from_d = datetime(2010, 01, 01)
34 >>> to_d = datetime(2010, 01, 05)
35 >>> list(daterange(from_d, to_d))
36 [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
37 """
39 if to_date is None:
40 if isinstance(from_date, datetime):
41 to_date = datetime.now()
42 else:
43 to_date = date.today()
45 while from_date <= to_date:
46 yield from_date
47 from_date = from_date + leap
48 return
50 def format_time(value):
51 """Format an offset (in seconds) to a string
53 The offset should be an integer or float value.
55 >>> format_time(0)
56 '00:00'
57 >>> format_time(20)
58 '00:20'
59 >>> format_time(3600)
60 '01:00:00'
61 >>> format_time(10921)
62 '03:02:01'
63 """
64 try:
65 dt = datetime.utcfromtimestamp(value)
66 except ValueError:
67 return ''
69 if dt.hour == 0:
70 return dt.strftime('%M:%S')
71 else:
72 return dt.strftime('%H:%M:%S')
74 def parse_time(value):
75 """
76 >>> parse_time(10)
79 >>> parse_time('05:10') #5*60+10
80 310
82 >>> parse_time('1:05:10') #60*60+5*60+10
83 3910
84 """
85 if value is None:
86 raise ValueError('None value in parse_time')
88 if isinstance(value, int):
89 # Don't need to parse already-converted time value
90 return value
92 if value == '':
93 raise ValueError('Empty valueing in parse_time')
95 for format in ('%H:%M:%S', '%M:%S'):
96 try:
97 t = time.strptime(value, format)
98 return t.tm_hour * 60*60 + t.tm_min * 60 + t.tm_sec
99 except ValueError, e:
100 continue
102 return int(value)
105 def parse_bool(val):
107 >>> parse_bool('True')
108 True
110 >>> parse_bool('true')
111 True
113 >>> parse_bool('')
114 False
116 if isinstance(val, bool):
117 return val
118 if val.lower() == 'true':
119 return True
120 return False
123 def iterate_together(lists, key=lambda x: x, reverse=False):
125 takes ordered, possibly sparse, lists with similar items
126 (some items have a corresponding item in the other lists, some don't).
128 It then yield tuples of corresponding items, where one element is None is
129 there is no corresponding entry in one of the lists.
131 Tuples where both elements are None are skipped.
133 The results of the key method are used for the comparisons.
135 If reverse is True, the lists are expected to be sorted in reverse order
136 and the results will also be sorted reverse
138 >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
139 [(1, 1), (2, None), (None, 3)]
141 >>> list(iterate_together([[], []]))
144 >>> list(iterate_together([range(1, 3), range(3, 5)]))
145 [(1, None), (2, None), (None, 3), (None, 4)]
147 >>> list(iterate_together([range(1, 3), []]))
148 [(1, None), (2, None)]
150 >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
151 [(1, None), (3, 3)]
154 Next = collections.namedtuple('Next', 'item more')
155 min_ = min if not reverse else max
156 lt_ = operator.lt if not reverse else operator.gt
158 lists = [iter(l) for l in lists]
160 def _take(it):
161 try:
162 i = it.next()
163 while i is None:
164 i = it.next()
165 return Next(i, True)
166 except StopIteration:
167 return Next(None, False)
169 def new_res():
170 return [None]*len(lists)
172 # take first bunch of items
173 items = [_take(l) for l in lists]
175 while any(i.item is not None or i.more for i in items):
177 res = new_res()
179 for n, item in enumerate(items):
181 if item.item is None:
182 continue
184 if all(x is None for x in res):
185 res[n] = item.item
186 continue
188 min_v = min_(filter(lambda x: x is not None, res), key=key)
190 if key(item.item) == key(min_v):
191 res[n] = item.item
193 elif lt_(key(item.item), key(min_v)):
194 res = new_res()
195 res[n] = item.item
197 for n, x in enumerate(res):
198 if x is not None:
199 items[n] = _take(lists[n])
201 yield tuple(res)
204 def progress(val, max_val, status_str='', max_width=50, stream=sys.stdout):
206 # progress as percentage
207 percentage_str = '{val:.2%}'.format(val=float(val)/max_val)
209 # progress bar filled with #s
210 progress_str = '#'*int(float(val)/max_val*max_width) + \
211 ' ' * (max_width-(int(float(val)/max_val*max_width)))
213 #insert percentage into bar
214 percentage_start = int((max_width-len(percentage_str))/2)
215 progress_str = progress_str[:percentage_start] + \
216 percentage_str + \
217 progress_str[percentage_start+len(percentage_str):]
219 print >> stream, '\r',
220 print >> stream, '[ %s ] %s / %s | %s' % (
221 progress_str,
222 val,
223 max_val,
224 status_str),
225 stream.flush()
228 def set_cmp(list, simplify):
230 Builds a set out of a list but uses the results of simplify to determine equality between items
232 simpl = lambda x: (simplify(x), x)
233 lst = dict(map(simpl, list))
234 return lst.values()
237 def first(it):
239 returns the first not-None object or None if the iterator is exhausted
241 for x in it:
242 if x != None:
243 return x
244 return None
247 def intersect(a, b):
248 return list(set(a) & set(b))
252 def remove_control_chars(s):
253 import unicodedata, re
255 all_chars = (unichr(i) for i in xrange(0x110000))
256 control_chars = ''.join(map(unichr, range(0,32) + range(127,160)))
257 control_char_re = re.compile('[%s]' % re.escape(control_chars))
259 return control_char_re.sub('', s)
262 def unzip(a):
263 return tuple(map(list,zip(*a)))
266 def parse_range(s, min, max, default=None):
268 Parses the string and returns its value. If the value is outside the given
269 range, its closest number within the range is returned
271 >>> parse_range('5', 0, 10)
274 >>> parse_range('0', 5, 10)
277 >>> parse_range('15',0, 10)
280 >>> parse_range('x', 0, 20)
283 >>> parse_range('x', 0, 20, 20)
286 try:
287 val = int(s)
288 if val < min:
289 return min
290 if val > max:
291 return max
292 return val
294 except (ValueError, TypeError):
295 return default if default is not None else (max-min)/2
299 def flatten(l):
300 return [item for sublist in l for item in sublist]
303 def linearize(key, iterators, reverse=False):
305 Linearizes a number of iterators, sorted by some comparison function
308 iters = [iter(i) for i in iterators]
309 vals = []
310 for i in iters:
311 try:
312 v = i.next()
313 vals. append( (v, i) )
314 except StopIteration:
315 continue
317 while vals:
318 vals = sorted(vals, key=lambda x: key(x[0]), reverse=reverse)
319 val, it = vals.pop(0)
320 yield val
321 try:
322 next_val = it.next()
323 vals.append( (next_val, it) )
324 except StopIteration:
325 pass
328 def skip_pairs(iterator, cmp=cmp):
329 """ Skips pairs of equal items
331 >>> list(skip_pairs([]))
334 >>> list(skip_pairs([1]))
337 >>> list(skip_pairs([1, 2, 3]))
338 [1, 2, 3]
340 >>> list(skip_pairs([1, 1]))
343 >>> list(skip_pairs([1, 2, 2]))
346 >>> list(skip_pairs([1, 2, 2, 3]))
347 [1, 3]
349 >>> list(skip_pairs([1, 2, 2, 2]))
350 [1, 2]
352 >>> list(skip_pairs([1, 2, 2, 2, 2, 3]))
353 [1, 3]
356 iterator = iter(iterator)
357 next = iterator.next()
359 while True:
360 item = next
361 try:
362 next = iterator.next()
363 except StopIteration as e:
364 yield item
365 raise e
367 if cmp(item, next) == 0:
368 next = iterator.next()
369 else:
370 yield item
373 def get_timestamp(datetime_obj):
374 """ Returns the timestamp as an int for the given datetime object
376 >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
377 1302168606
379 >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
382 return int(time.mktime(datetime_obj.timetuple()))
386 re_url = re.compile('^https?://')
388 def is_url(string):
389 """ Returns true if a string looks like an URL
391 >>> is_url('http://example.com/some-path/file.xml')
392 True
394 >>> is_url('something else')
395 False
398 return bool(re_url.match(string))
402 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
403 # this does not increase asymptotical complexity
404 # but can still waste more time than it saves.
405 def shortest_of(strings):
406 return min(strings, key=len)
408 def longest_substr(strings):
410 Returns the longest common substring of the given strings
413 substr = ""
414 if not strings:
415 return substr
416 reference = shortest_of(strings) #strings[0]
417 length = len(reference)
418 #find a suitable slice i:j
419 for i in xrange(length):
420 #only consider strings long at least len(substr) + 1
421 for j in xrange(i + len(substr) + 1, length):
422 candidate = reference[i:j]
423 if all(candidate in text for text in strings):
424 substr = candidate
425 return substr
429 def additional_value(it, gen_val, val_changed=lambda _: True):
430 """ Provides an additional value to the elements, calculated when needed
432 For the elements from the iterator, some additional value can be computed
433 by gen_val (which might be an expensive computation).
435 If the elements in the iterator are ordered so that some subsequent
436 elements would generate the same additional value, val_changed can be
437 provided, which receives the next element from the iterator and the
438 previous additional value. If the element would generate the same
439 additional value (val_changed returns False), its computation is skipped.
441 >>> # get the next full hundred higher than x
442 >>> # this will probably be an expensive calculation
443 >>> next_hundred = lambda x: x + 100-(x % 100)
445 >>> # returns True if h is not the value that next_hundred(x) would provide
446 >>> # this should be a relatively cheap calculation, compared to the above
447 >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
449 >>> xs = [0, 50, 100, 101, 199, 200, 201]
450 >>> list(additional_value(xs, next_hundred, diff_hundred))
451 [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
454 _none = object()
455 current = _none
457 for x in it:
458 if current is _none or val_changed(x, current):
459 current = gen_val(x)
461 yield (x, current)
464 def file_hash(f, h=hashlib.md5, block_size=2**20):
465 """ returns the hash of the contents of a file """
466 f_hash = h()
467 for chunk in iter(lambda: f.read(block_size), ''):
468 f_hash.update(chunk)
469 return f_hash