remove now unused code
[mygpo.git] / mygpo / utils.py
blob3c7782b6c0ac11562ff82adef779b4d29332c479
2 # This file is part of my.gpodder.org.
4 # my.gpodder.org is free software: you can redistribute it and/or modify it
5 # under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or (at your
7 # option) any later version.
9 # my.gpodder.org is distributed in the hope that it will be useful, but
10 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
12 # License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
18 import operator
19 import sys
20 import re
21 import collections
22 from datetime import datetime, timedelta, date
23 import time
25 from django.core.cache import cache
28 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
29 """
30 >>> from_d = datetime(2010, 01, 01)
31 >>> to_d = datetime(2010, 01, 05)
32 >>> list(daterange(from_d, to_d))
33 [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
34 """
36 if to_date is None:
37 if isinstance(from_date, datetime):
38 to_date = datetime.now()
39 else:
40 to_date = date.today()
42 while from_date <= to_date:
43 yield from_date
44 from_date = from_date + leap
45 return
47 def format_time(value):
48 """Format an offset (in seconds) to a string
50 The offset should be an integer or float value.
52 >>> format_time(0)
53 '00:00'
54 >>> format_time(20)
55 '00:20'
56 >>> format_time(3600)
57 '01:00:00'
58 >>> format_time(10921)
59 '03:02:01'
60 """
61 try:
62 dt = datetime.utcfromtimestamp(value)
63 except ValueError:
64 return ''
66 if dt.hour == 0:
67 return dt.strftime('%M:%S')
68 else:
69 return dt.strftime('%H:%M:%S')
71 def parse_time(value):
72 """
73 >>> parse_time(10)
76 >>> parse_time('05:10') #5*60+10
77 310
79 >>> parse_time('1:05:10') #60*60+5*60+10
80 3910
81 """
82 if value is None:
83 raise ValueError('None value in parse_time')
85 if isinstance(value, int):
86 # Don't need to parse already-converted time value
87 return value
89 if value == '':
90 raise ValueError('Empty valueing in parse_time')
92 for format in ('%H:%M:%S', '%M:%S'):
93 try:
94 t = time.strptime(value, format)
95 return t.tm_hour * 60*60 + t.tm_min * 60 + t.tm_sec
96 except ValueError, e:
97 continue
99 return int(value)
102 def parse_bool(val):
104 >>> parse_bool('True')
105 True
107 >>> parse_bool('true')
108 True
110 >>> parse_bool('')
111 False
113 if isinstance(val, bool):
114 return val
115 if val.lower() == 'true':
116 return True
117 return False
120 def iterate_together(lists, key=lambda x: x, reverse=False):
122 takes ordered, possibly sparse, lists with similar items
123 (some items have a corresponding item in the other lists, some don't).
125 It then yield tuples of corresponding items, where one element is None is
126 there is no corresponding entry in one of the lists.
128 Tuples where both elements are None are skipped.
130 The results of the key method are used for the comparisons.
132 If reverse is True, the lists are expected to be sorted in reverse order
133 and the results will also be sorted reverse
135 >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
136 [(1, 1), (2, None), (None, 3)]
138 >>> list(iterate_together([[], []]))
141 >>> list(iterate_together([range(1, 3), range(3, 5)]))
142 [(1, None), (2, None), (None, 3), (None, 4)]
144 >>> list(iterate_together([range(1, 3), []]))
145 [(1, None), (2, None)]
147 >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
148 [(1, None), (3, 3)]
151 Next = collections.namedtuple('Next', 'item more')
152 min_ = min if not reverse else max
153 lt_ = operator.lt if not reverse else operator.gt
155 lists = [iter(l) for l in lists]
157 def _take(it):
158 try:
159 i = it.next()
160 while i is None:
161 i = it.next()
162 return Next(i, True)
163 except StopIteration:
164 return Next(None, False)
166 def new_res():
167 return [None]*len(lists)
169 # take first bunch of items
170 items = [_take(l) for l in lists]
172 while any(i.item is not None or i.more for i in items):
174 res = new_res()
176 for n, item in enumerate(items):
178 if item.item is None:
179 continue
181 if all(x is None for x in res):
182 res[n] = item.item
183 continue
185 min_v = min_(filter(lambda x: x is not None, res), key=key)
187 if key(item.item) == key(min_v):
188 res[n] = item.item
190 elif lt_(key(item.item), key(min_v)):
191 res = new_res()
192 res[n] = item.item
194 for n, x in enumerate(res):
195 if x is not None:
196 items[n] = _take(lists[n])
198 yield tuple(res)
201 def progress(val, max_val, status_str='', max_width=50, stream=sys.stdout):
202 print >> stream, '\r',
203 print >> stream, '[ %s ] %s / %s | %s' % (
204 '#'*int(float(val)/max_val*max_width) +
205 ' ' * (max_width-(int(float(val)/max_val*max_width))),
206 val,
207 max_val,
208 status_str),
209 stream.flush()
212 def set_cmp(list, simplify):
214 Builds a set out of a list but uses the results of simplify to determine equality between items
216 simpl = lambda x: (simplify(x), x)
217 lst = dict(map(simpl, list))
218 return lst.values()
221 def first(it):
223 returns the first not-None object or None if the iterator is exhausted
225 for x in it:
226 if x != None:
227 return x
228 return None
231 def intersect(a, b):
232 return list(set(a) & set(b))
236 def multi_request_view(cls, view, wrap=True, auto_advance=True,
237 *args, **kwargs):
239 splits up a view request into several requests, which reduces
240 the server load of the number of returned objects is large.
242 NOTE: As such a split request is obviously not atomical anymore, results
243 might skip some elements of contain some twice
245 If auto_advance is False the method will always request the same range.
246 This can be useful when the view contain unprocessed items and the caller
247 processes the items, thus removing them from the view before the next
248 request.
251 per_page = kwargs.get('limit', 1000)
252 kwargs['limit'] = per_page + 1
253 db = cls.get_db()
254 wrapper = kwargs.pop('wrapper', cls.wrap)
255 cont = True
257 while cont:
259 resp = db.view(view, *args, **kwargs)
260 cont = False
262 for n, obj in enumerate(resp.iterator()):
264 key = obj['key']
266 if wrap:
267 doc = wrapper(obj['doc']) if wrapper else obj['doc']
268 docid = doc._id if wrapper else obj['id']
269 else:
270 docid = obj.get('id', None)
271 doc = obj
273 if n == per_page:
274 if auto_advance:
275 kwargs['startkey'] = key
276 if docid is not None:
277 kwargs['startkey_docid'] = docid
278 if 'skip' in kwargs:
279 del kwargs['skip']
281 # we reached the end of the page, load next one
282 cont = True
283 break
285 yield doc
288 def remove_control_chars(s):
289 import unicodedata, re
291 all_chars = (unichr(i) for i in xrange(0x110000))
292 control_chars = ''.join(map(unichr, range(0,32) + range(127,160)))
293 control_char_re = re.compile('[%s]' % re.escape(control_chars))
295 return control_char_re.sub('', s)
298 def unzip(a):
299 return tuple(map(list,zip(*a)))
302 def parse_range(s, min, max, default=None):
304 Parses the string and returns its value. If the value is outside the given
305 range, its closest number within the range is returned
307 >>> parse_range('5', 0, 10)
310 >>> parse_range('0', 5, 10)
313 >>> parse_range('15',0, 10)
316 >>> parse_range('x', 0, 20)
319 >>> parse_range('x', 0, 20, 20)
322 try:
323 val = int(s)
324 if val < min:
325 return min
326 if val > max:
327 return max
328 return val
330 except (ValueError, TypeError):
331 return default if default is not None else (max-min)/2
334 def get_to_dict(cls, ids, get_id=lambda x: x._id, use_cache=False):
336 ids = list(set(ids))
337 objs = dict()
339 cache_objs = []
340 if use_cache:
341 for id in ids:
342 obj = cache.get(id)
343 if obj is not None:
344 cache_objs.append(obj)
345 ids.remove(id)
347 db_objs = list(cls.get_multi(ids))
349 for obj in (cache_objs + db_objs):
351 # get_multi returns dict {'key': _id, 'error': 'not found'}
352 # for non-existing objects
353 if isinstance(obj, dict) and 'error' in obj:
354 _id = obj['key']
355 objs[_id] = None
356 continue
358 ids = obj.get_ids() if hasattr(obj, 'get_ids') else [get_id(obj)]
359 for i in ids:
360 objs[i] = obj
362 if use_cache:
363 for obj in db_objs:
364 cache.set(get_id(obj), obj)
366 return objs
369 def flatten(l):
370 return [item for sublist in l for item in sublist]
373 def linearize(key, iterators, reverse=False):
375 Linearizes a number of iterators, sorted by some comparison function
378 iters = [iter(i) for i in iterators]
379 vals = []
380 for i in iters:
381 try:
382 v = i.next()
383 vals. append( (v, i) )
384 except StopIteration:
385 continue
387 while vals:
388 vals = sorted(vals, key=lambda x: key(x[0]), reverse=reverse)
389 val, it = vals.pop(0)
390 yield val
391 try:
392 next_val = it.next()
393 vals.append( (next_val, it) )
394 except StopIteration:
395 pass
398 def skip_pairs(iterator, cmp=cmp):
399 """ Skips pairs of equal items
401 >>> list(skip_pairs([]))
404 >>> list(skip_pairs([1]))
407 >>> list(skip_pairs([1, 2, 3]))
408 [1, 2, 3]
410 >>> list(skip_pairs([1, 1]))
413 >>> list(skip_pairs([1, 2, 2]))
416 >>> list(skip_pairs([1, 2, 2, 3]))
417 [1, 3]
419 >>> list(skip_pairs([1, 2, 2, 2]))
420 [1, 2]
422 >>> list(skip_pairs([1, 2, 2, 2, 2, 3]))
423 [1, 3]
426 iterator = iter(iterator)
427 next = iterator.next()
429 while True:
430 item = next
431 try:
432 next = iterator.next()
433 except StopIteration as e:
434 yield item
435 raise e
437 if cmp(item, next) == 0:
438 next = iterator.next()
439 else:
440 yield item
443 def get_timestamp(datetime_obj):
444 """ Returns the timestamp as an int for the given datetime object
446 >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
447 1302168606
449 >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
452 return int(time.mktime(datetime_obj.timetuple()))
456 re_url = re.compile('^https?://')
458 def is_url(string):
459 """ Returns true if a string looks like an URL
461 >>> is_url('http://example.com/some-path/file.xml')
462 True
464 >>> is_url('something else')
465 False
468 return bool(re_url.match(string))
471 def is_couchdb_id(id_str):
472 import string
473 import operator
474 import functools
475 f = functools.partial(operator.contains, string.hexdigits)
476 return len(id_str) == 32 and all(map(f, id_str))
479 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
480 # this does not increase asymptotical complexity
481 # but can still waste more time than it saves.
482 def shortest_of(strings):
483 return min(strings, key=len)
485 def longest_substr(strings):
487 Returns the longest common substring of the given strings
490 substr = ""
491 if not strings:
492 return substr
493 reference = shortest_of(strings) #strings[0]
494 length = len(reference)
495 #find a suitable slice i:j
496 for i in xrange(length):
497 #only consider strings long at least len(substr) + 1
498 for j in xrange(i + len(substr) + 1, length):
499 candidate = reference[i:j]
500 if all(candidate in text for text in strings):
501 substr = candidate
502 return substr
506 def additional_value(it, gen_val, val_changed=lambda _: True):
507 """ Provides an additional value to the elements, calculated when needed
509 For the elements from the iterator, some additional value can be computed
510 by gen_val (which might be an expensive computation).
512 If the elements in the iterator are ordered so that some subsequent
513 elements would generate the same additional value, val_changed can be
514 provided, which receives the next element from the iterator and the
515 previous additional value. If the element would generate the same
516 additional value (val_changed returns False), its computation is skipped.
518 >>> # get the next full hundred higher than x
519 >>> # this will probably be an expensive calculation
520 >>> next_hundred = lambda x: x + 100-(x % 100)
522 >>> # returns True if h is not the value that next_hundred(x) would provide
523 >>> # this should be a relatively cheap calculation, compared to the above
524 >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
526 >>> xs = [0, 50, 100, 101, 199, 200, 201]
527 >>> list(additional_value(xs, next_hundred, diff_hundred))
528 [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
531 _none = object()
532 current = _none
534 for x in it:
535 if current is _none or val_changed(x, current):
536 current = gen_val(x)
538 yield (x, current)