more CSS cleanup
[mygpo.git] / mygpo / utils.py
blobcf235257471288473886ebbd24b338d82aa94cbe
2 # This file is part of my.gpodder.org.
4 # my.gpodder.org is free software: you can redistribute it and/or modify it
5 # under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or (at your
7 # option) any later version.
9 # my.gpodder.org is distributed in the hope that it will be useful, but
10 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
12 # License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
18 import operator
19 import sys
20 import re
21 import collections
22 from datetime import datetime, timedelta, date
23 import time
24 import hashlib
26 from django.core.cache import cache
29 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
30 """
31 >>> from_d = datetime(2010, 01, 01)
32 >>> to_d = datetime(2010, 01, 05)
33 >>> list(daterange(from_d, to_d))
34 [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
35 """
37 if to_date is None:
38 if isinstance(from_date, datetime):
39 to_date = datetime.now()
40 else:
41 to_date = date.today()
43 while from_date <= to_date:
44 yield from_date
45 from_date = from_date + leap
46 return
48 def format_time(value):
49 """Format an offset (in seconds) to a string
51 The offset should be an integer or float value.
53 >>> format_time(0)
54 '00:00'
55 >>> format_time(20)
56 '00:20'
57 >>> format_time(3600)
58 '01:00:00'
59 >>> format_time(10921)
60 '03:02:01'
61 """
62 try:
63 dt = datetime.utcfromtimestamp(value)
64 except ValueError:
65 return ''
67 if dt.hour == 0:
68 return dt.strftime('%M:%S')
69 else:
70 return dt.strftime('%H:%M:%S')
72 def parse_time(value):
73 """
74 >>> parse_time(10)
77 >>> parse_time('05:10') #5*60+10
78 310
80 >>> parse_time('1:05:10') #60*60+5*60+10
81 3910
82 """
83 if value is None:
84 raise ValueError('None value in parse_time')
86 if isinstance(value, int):
87 # Don't need to parse already-converted time value
88 return value
90 if value == '':
91 raise ValueError('Empty valueing in parse_time')
93 for format in ('%H:%M:%S', '%M:%S'):
94 try:
95 t = time.strptime(value, format)
96 return t.tm_hour * 60*60 + t.tm_min * 60 + t.tm_sec
97 except ValueError, e:
98 continue
100 return int(value)
103 def parse_bool(val):
105 >>> parse_bool('True')
106 True
108 >>> parse_bool('true')
109 True
111 >>> parse_bool('')
112 False
114 if isinstance(val, bool):
115 return val
116 if val.lower() == 'true':
117 return True
118 return False
121 def iterate_together(lists, key=lambda x: x, reverse=False):
123 takes ordered, possibly sparse, lists with similar items
124 (some items have a corresponding item in the other lists, some don't).
126 It then yield tuples of corresponding items, where one element is None is
127 there is no corresponding entry in one of the lists.
129 Tuples where both elements are None are skipped.
131 The results of the key method are used for the comparisons.
133 If reverse is True, the lists are expected to be sorted in reverse order
134 and the results will also be sorted reverse
136 >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
137 [(1, 1), (2, None), (None, 3)]
139 >>> list(iterate_together([[], []]))
142 >>> list(iterate_together([range(1, 3), range(3, 5)]))
143 [(1, None), (2, None), (None, 3), (None, 4)]
145 >>> list(iterate_together([range(1, 3), []]))
146 [(1, None), (2, None)]
148 >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
149 [(1, None), (3, 3)]
152 Next = collections.namedtuple('Next', 'item more')
153 min_ = min if not reverse else max
154 lt_ = operator.lt if not reverse else operator.gt
156 lists = [iter(l) for l in lists]
158 def _take(it):
159 try:
160 i = it.next()
161 while i is None:
162 i = it.next()
163 return Next(i, True)
164 except StopIteration:
165 return Next(None, False)
167 def new_res():
168 return [None]*len(lists)
170 # take first bunch of items
171 items = [_take(l) for l in lists]
173 while any(i.item is not None or i.more for i in items):
175 res = new_res()
177 for n, item in enumerate(items):
179 if item.item is None:
180 continue
182 if all(x is None for x in res):
183 res[n] = item.item
184 continue
186 min_v = min_(filter(lambda x: x is not None, res), key=key)
188 if key(item.item) == key(min_v):
189 res[n] = item.item
191 elif lt_(key(item.item), key(min_v)):
192 res = new_res()
193 res[n] = item.item
195 for n, x in enumerate(res):
196 if x is not None:
197 items[n] = _take(lists[n])
199 yield tuple(res)
202 def progress(val, max_val, status_str='', max_width=50, stream=sys.stdout):
204 # progress as percentage
205 percentage_str = '{val:.2%}'.format(val=float(val)/max_val)
207 # progress bar filled with #s
208 progress_str = '#'*int(float(val)/max_val*max_width) + \
209 ' ' * (max_width-(int(float(val)/max_val*max_width)))
211 #insert percentage into bar
212 percentage_start = int((max_width-len(percentage_str))/2)
213 progress_str = progress_str[:percentage_start] + \
214 percentage_str + \
215 progress_str[percentage_start+len(percentage_str):]
217 print >> stream, '\r',
218 print >> stream, '[ %s ] %s / %s | %s' % (
219 progress_str,
220 val,
221 max_val,
222 status_str),
223 stream.flush()
226 def set_cmp(list, simplify):
228 Builds a set out of a list but uses the results of simplify to determine equality between items
230 simpl = lambda x: (simplify(x), x)
231 lst = dict(map(simpl, list))
232 return lst.values()
235 def first(it):
237 returns the first not-None object or None if the iterator is exhausted
239 for x in it:
240 if x != None:
241 return x
242 return None
245 def intersect(a, b):
246 return list(set(a) & set(b))
250 def multi_request_view(cls, view, wrap=True, auto_advance=True,
251 *args, **kwargs):
253 splits up a view request into several requests, which reduces
254 the server load of the number of returned objects is large.
256 NOTE: As such a split request is obviously not atomical anymore, results
257 might skip some elements of contain some twice
259 If auto_advance is False the method will always request the same range.
260 This can be useful when the view contain unprocessed items and the caller
261 processes the items, thus removing them from the view before the next
262 request.
265 per_page = kwargs.get('limit', 1000)
266 kwargs['limit'] = per_page + 1
267 db = cls.get_db()
268 wrapper = kwargs.pop('wrapper', cls.wrap)
269 cont = True
271 while cont:
273 resp = db.view(view, *args, **kwargs)
274 cont = False
276 for n, obj in enumerate(resp.iterator()):
278 key = obj['key']
280 if wrap:
281 doc = wrapper(obj['doc']) if wrapper else obj['doc']
282 docid = doc._id if wrapper else obj['id']
283 else:
284 docid = obj.get('id', None)
285 doc = obj
287 if n == per_page:
288 if auto_advance:
289 kwargs['startkey'] = key
290 if docid is not None:
291 kwargs['startkey_docid'] = docid
292 if 'skip' in kwargs:
293 del kwargs['skip']
295 # we reached the end of the page, load next one
296 cont = True
297 break
299 yield doc
302 def remove_control_chars(s):
303 import unicodedata, re
305 all_chars = (unichr(i) for i in xrange(0x110000))
306 control_chars = ''.join(map(unichr, range(0,32) + range(127,160)))
307 control_char_re = re.compile('[%s]' % re.escape(control_chars))
309 return control_char_re.sub('', s)
312 def unzip(a):
313 return tuple(map(list,zip(*a)))
316 def parse_range(s, min, max, default=None):
318 Parses the string and returns its value. If the value is outside the given
319 range, its closest number within the range is returned
321 >>> parse_range('5', 0, 10)
324 >>> parse_range('0', 5, 10)
327 >>> parse_range('15',0, 10)
330 >>> parse_range('x', 0, 20)
333 >>> parse_range('x', 0, 20, 20)
336 try:
337 val = int(s)
338 if val < min:
339 return min
340 if val > max:
341 return max
342 return val
344 except (ValueError, TypeError):
345 return default if default is not None else (max-min)/2
348 def get_to_dict(cls, ids, get_id=lambda x: x._id, use_cache=False):
350 ids = list(set(ids))
351 objs = dict()
353 cache_objs = []
354 if use_cache:
355 for id in ids:
356 obj = cache.get(id)
357 if obj is not None:
358 cache_objs.append(obj)
359 ids.remove(id)
361 db_objs = list(cls.get_multi(ids))
363 for obj in (cache_objs + db_objs):
365 # get_multi returns dict {'key': _id, 'error': 'not found'}
366 # for non-existing objects
367 if isinstance(obj, dict) and 'error' in obj:
368 _id = obj['key']
369 objs[_id] = None
370 continue
372 ids = obj.get_ids() if hasattr(obj, 'get_ids') else [get_id(obj)]
373 for i in ids:
374 objs[i] = obj
376 if use_cache:
377 for obj in db_objs:
378 cache.set(get_id(obj), obj)
380 return objs
383 def flatten(l):
384 return [item for sublist in l for item in sublist]
387 def linearize(key, iterators, reverse=False):
389 Linearizes a number of iterators, sorted by some comparison function
392 iters = [iter(i) for i in iterators]
393 vals = []
394 for i in iters:
395 try:
396 v = i.next()
397 vals. append( (v, i) )
398 except StopIteration:
399 continue
401 while vals:
402 vals = sorted(vals, key=lambda x: key(x[0]), reverse=reverse)
403 val, it = vals.pop(0)
404 yield val
405 try:
406 next_val = it.next()
407 vals.append( (next_val, it) )
408 except StopIteration:
409 pass
412 def skip_pairs(iterator, cmp=cmp):
413 """ Skips pairs of equal items
415 >>> list(skip_pairs([]))
418 >>> list(skip_pairs([1]))
421 >>> list(skip_pairs([1, 2, 3]))
422 [1, 2, 3]
424 >>> list(skip_pairs([1, 1]))
427 >>> list(skip_pairs([1, 2, 2]))
430 >>> list(skip_pairs([1, 2, 2, 3]))
431 [1, 3]
433 >>> list(skip_pairs([1, 2, 2, 2]))
434 [1, 2]
436 >>> list(skip_pairs([1, 2, 2, 2, 2, 3]))
437 [1, 3]
440 iterator = iter(iterator)
441 next = iterator.next()
443 while True:
444 item = next
445 try:
446 next = iterator.next()
447 except StopIteration as e:
448 yield item
449 raise e
451 if cmp(item, next) == 0:
452 next = iterator.next()
453 else:
454 yield item
457 def get_timestamp(datetime_obj):
458 """ Returns the timestamp as an int for the given datetime object
460 >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
461 1302168606
463 >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
466 return int(time.mktime(datetime_obj.timetuple()))
470 re_url = re.compile('^https?://')
472 def is_url(string):
473 """ Returns true if a string looks like an URL
475 >>> is_url('http://example.com/some-path/file.xml')
476 True
478 >>> is_url('something else')
479 False
482 return bool(re_url.match(string))
485 def is_couchdb_id(id_str):
486 import string
487 import operator
488 import functools
489 f = functools.partial(operator.contains, string.hexdigits)
490 return len(id_str) == 32 and all(map(f, id_str))
493 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
494 # this does not increase asymptotical complexity
495 # but can still waste more time than it saves.
496 def shortest_of(strings):
497 return min(strings, key=len)
499 def longest_substr(strings):
501 Returns the longest common substring of the given strings
504 substr = ""
505 if not strings:
506 return substr
507 reference = shortest_of(strings) #strings[0]
508 length = len(reference)
509 #find a suitable slice i:j
510 for i in xrange(length):
511 #only consider strings long at least len(substr) + 1
512 for j in xrange(i + len(substr) + 1, length):
513 candidate = reference[i:j]
514 if all(candidate in text for text in strings):
515 substr = candidate
516 return substr
520 def additional_value(it, gen_val, val_changed=lambda _: True):
521 """ Provides an additional value to the elements, calculated when needed
523 For the elements from the iterator, some additional value can be computed
524 by gen_val (which might be an expensive computation).
526 If the elements in the iterator are ordered so that some subsequent
527 elements would generate the same additional value, val_changed can be
528 provided, which receives the next element from the iterator and the
529 previous additional value. If the element would generate the same
530 additional value (val_changed returns False), its computation is skipped.
532 >>> # get the next full hundred higher than x
533 >>> # this will probably be an expensive calculation
534 >>> next_hundred = lambda x: x + 100-(x % 100)
536 >>> # returns True if h is not the value that next_hundred(x) would provide
537 >>> # this should be a relatively cheap calculation, compared to the above
538 >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
540 >>> xs = [0, 50, 100, 101, 199, 200, 201]
541 >>> list(additional_value(xs, next_hundred, diff_hundred))
542 [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
545 _none = object()
546 current = _none
548 for x in it:
549 if current is _none or val_changed(x, current):
550 current = gen_val(x)
552 yield (x, current)
555 def file_hash(f, h=hashlib.md5, block_size=2**20):
556 """ returns the hash of the contents of a file """
557 f_hash = h()
558 for chunk in iter(lambda: f.read(block_size), ''):
559 f_hash.update(chunk)
560 return f_hash