remove unused imports of get_main_database
[mygpo.git] / mygpo / utils.py
blob5c6c891a06532e9295f920a8987d455c89b5cfc8
2 # This file is part of my.gpodder.org.
4 # my.gpodder.org is free software: you can redistribute it and/or modify it
5 # under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or (at your
7 # option) any later version.
9 # my.gpodder.org is distributed in the hope that it will be useful, but
10 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
12 # License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
18 import operator
19 import sys
20 import re
21 import collections
22 from datetime import datetime, timedelta, date
23 import time
24 import hashlib
26 from django.core.cache import cache
29 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
30 """
31 >>> from_d = datetime(2010, 01, 01)
32 >>> to_d = datetime(2010, 01, 05)
33 >>> list(daterange(from_d, to_d))
34 [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
35 """
37 if to_date is None:
38 if isinstance(from_date, datetime):
39 to_date = datetime.now()
40 else:
41 to_date = date.today()
43 while from_date <= to_date:
44 yield from_date
45 from_date = from_date + leap
46 return
48 def format_time(value):
49 """Format an offset (in seconds) to a string
51 The offset should be an integer or float value.
53 >>> format_time(0)
54 '00:00'
55 >>> format_time(20)
56 '00:20'
57 >>> format_time(3600)
58 '01:00:00'
59 >>> format_time(10921)
60 '03:02:01'
61 """
62 try:
63 dt = datetime.utcfromtimestamp(value)
64 except ValueError:
65 return ''
67 if dt.hour == 0:
68 return dt.strftime('%M:%S')
69 else:
70 return dt.strftime('%H:%M:%S')
72 def parse_time(value):
73 """
74 >>> parse_time(10)
77 >>> parse_time('05:10') #5*60+10
78 310
80 >>> parse_time('1:05:10') #60*60+5*60+10
81 3910
82 """
83 if value is None:
84 raise ValueError('None value in parse_time')
86 if isinstance(value, int):
87 # Don't need to parse already-converted time value
88 return value
90 if value == '':
91 raise ValueError('Empty valueing in parse_time')
93 for format in ('%H:%M:%S', '%M:%S'):
94 try:
95 t = time.strptime(value, format)
96 return t.tm_hour * 60*60 + t.tm_min * 60 + t.tm_sec
97 except ValueError, e:
98 continue
100 return int(value)
103 def parse_bool(val):
105 >>> parse_bool('True')
106 True
108 >>> parse_bool('true')
109 True
111 >>> parse_bool('')
112 False
114 if isinstance(val, bool):
115 return val
116 if val.lower() == 'true':
117 return True
118 return False
121 def iterate_together(lists, key=lambda x: x, reverse=False):
123 takes ordered, possibly sparse, lists with similar items
124 (some items have a corresponding item in the other lists, some don't).
126 It then yield tuples of corresponding items, where one element is None is
127 there is no corresponding entry in one of the lists.
129 Tuples where both elements are None are skipped.
131 The results of the key method are used for the comparisons.
133 If reverse is True, the lists are expected to be sorted in reverse order
134 and the results will also be sorted reverse
136 >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
137 [(1, 1), (2, None), (None, 3)]
139 >>> list(iterate_together([[], []]))
142 >>> list(iterate_together([range(1, 3), range(3, 5)]))
143 [(1, None), (2, None), (None, 3), (None, 4)]
145 >>> list(iterate_together([range(1, 3), []]))
146 [(1, None), (2, None)]
148 >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
149 [(1, None), (3, 3)]
152 Next = collections.namedtuple('Next', 'item more')
153 min_ = min if not reverse else max
154 lt_ = operator.lt if not reverse else operator.gt
156 lists = [iter(l) for l in lists]
158 def _take(it):
159 try:
160 i = it.next()
161 while i is None:
162 i = it.next()
163 return Next(i, True)
164 except StopIteration:
165 return Next(None, False)
167 def new_res():
168 return [None]*len(lists)
170 # take first bunch of items
171 items = [_take(l) for l in lists]
173 while any(i.item is not None or i.more for i in items):
175 res = new_res()
177 for n, item in enumerate(items):
179 if item.item is None:
180 continue
182 if all(x is None for x in res):
183 res[n] = item.item
184 continue
186 min_v = min_(filter(lambda x: x is not None, res), key=key)
188 if key(item.item) == key(min_v):
189 res[n] = item.item
191 elif lt_(key(item.item), key(min_v)):
192 res = new_res()
193 res[n] = item.item
195 for n, x in enumerate(res):
196 if x is not None:
197 items[n] = _take(lists[n])
199 yield tuple(res)
202 def progress(val, max_val, status_str='', max_width=50, stream=sys.stdout):
204 # progress as percentage
205 percentage_str = '{val:.2%}'.format(val=float(val)/max_val)
207 # progress bar filled with #s
208 progress_str = '#'*int(float(val)/max_val*max_width) + \
209 ' ' * (max_width-(int(float(val)/max_val*max_width)))
211 #insert percentage into bar
212 percentage_start = int((max_width-len(percentage_str))/2)
213 progress_str = progress_str[:percentage_start] + \
214 percentage_str + \
215 progress_str[percentage_start+len(percentage_str):]
217 print >> stream, '\r',
218 print >> stream, '[ %s ] %s / %s | %s' % (
219 progress_str,
220 val,
221 max_val,
222 status_str),
223 stream.flush()
226 def set_cmp(list, simplify):
228 Builds a set out of a list but uses the results of simplify to determine equality between items
230 simpl = lambda x: (simplify(x), x)
231 lst = dict(map(simpl, list))
232 return lst.values()
235 def first(it):
237 returns the first not-None object or None if the iterator is exhausted
239 for x in it:
240 if x != None:
241 return x
242 return None
245 def intersect(a, b):
246 return list(set(a) & set(b))
250 def remove_control_chars(s):
251 import unicodedata, re
253 all_chars = (unichr(i) for i in xrange(0x110000))
254 control_chars = ''.join(map(unichr, range(0,32) + range(127,160)))
255 control_char_re = re.compile('[%s]' % re.escape(control_chars))
257 return control_char_re.sub('', s)
260 def unzip(a):
261 return tuple(map(list,zip(*a)))
264 def parse_range(s, min, max, default=None):
266 Parses the string and returns its value. If the value is outside the given
267 range, its closest number within the range is returned
269 >>> parse_range('5', 0, 10)
272 >>> parse_range('0', 5, 10)
275 >>> parse_range('15',0, 10)
278 >>> parse_range('x', 0, 20)
281 >>> parse_range('x', 0, 20, 20)
284 try:
285 val = int(s)
286 if val < min:
287 return min
288 if val > max:
289 return max
290 return val
292 except (ValueError, TypeError):
293 return default if default is not None else (max-min)/2
297 def flatten(l):
298 return [item for sublist in l for item in sublist]
301 def linearize(key, iterators, reverse=False):
303 Linearizes a number of iterators, sorted by some comparison function
306 iters = [iter(i) for i in iterators]
307 vals = []
308 for i in iters:
309 try:
310 v = i.next()
311 vals. append( (v, i) )
312 except StopIteration:
313 continue
315 while vals:
316 vals = sorted(vals, key=lambda x: key(x[0]), reverse=reverse)
317 val, it = vals.pop(0)
318 yield val
319 try:
320 next_val = it.next()
321 vals.append( (next_val, it) )
322 except StopIteration:
323 pass
326 def skip_pairs(iterator, cmp=cmp):
327 """ Skips pairs of equal items
329 >>> list(skip_pairs([]))
332 >>> list(skip_pairs([1]))
335 >>> list(skip_pairs([1, 2, 3]))
336 [1, 2, 3]
338 >>> list(skip_pairs([1, 1]))
341 >>> list(skip_pairs([1, 2, 2]))
344 >>> list(skip_pairs([1, 2, 2, 3]))
345 [1, 3]
347 >>> list(skip_pairs([1, 2, 2, 2]))
348 [1, 2]
350 >>> list(skip_pairs([1, 2, 2, 2, 2, 3]))
351 [1, 3]
354 iterator = iter(iterator)
355 next = iterator.next()
357 while True:
358 item = next
359 try:
360 next = iterator.next()
361 except StopIteration as e:
362 yield item
363 raise e
365 if cmp(item, next) == 0:
366 next = iterator.next()
367 else:
368 yield item
371 def get_timestamp(datetime_obj):
372 """ Returns the timestamp as an int for the given datetime object
374 >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
375 1302168606
377 >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
380 return int(time.mktime(datetime_obj.timetuple()))
384 re_url = re.compile('^https?://')
386 def is_url(string):
387 """ Returns true if a string looks like an URL
389 >>> is_url('http://example.com/some-path/file.xml')
390 True
392 >>> is_url('something else')
393 False
396 return bool(re_url.match(string))
400 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
401 # this does not increase asymptotical complexity
402 # but can still waste more time than it saves.
403 def shortest_of(strings):
404 return min(strings, key=len)
406 def longest_substr(strings):
408 Returns the longest common substring of the given strings
411 substr = ""
412 if not strings:
413 return substr
414 reference = shortest_of(strings) #strings[0]
415 length = len(reference)
416 #find a suitable slice i:j
417 for i in xrange(length):
418 #only consider strings long at least len(substr) + 1
419 for j in xrange(i + len(substr) + 1, length):
420 candidate = reference[i:j]
421 if all(candidate in text for text in strings):
422 substr = candidate
423 return substr
427 def additional_value(it, gen_val, val_changed=lambda _: True):
428 """ Provides an additional value to the elements, calculated when needed
430 For the elements from the iterator, some additional value can be computed
431 by gen_val (which might be an expensive computation).
433 If the elements in the iterator are ordered so that some subsequent
434 elements would generate the same additional value, val_changed can be
435 provided, which receives the next element from the iterator and the
436 previous additional value. If the element would generate the same
437 additional value (val_changed returns False), its computation is skipped.
439 >>> # get the next full hundred higher than x
440 >>> # this will probably be an expensive calculation
441 >>> next_hundred = lambda x: x + 100-(x % 100)
443 >>> # returns True if h is not the value that next_hundred(x) would provide
444 >>> # this should be a relatively cheap calculation, compared to the above
445 >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
447 >>> xs = [0, 50, 100, 101, 199, 200, 201]
448 >>> list(additional_value(xs, next_hundred, diff_hundred))
449 [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
452 _none = object()
453 current = _none
455 for x in it:
456 if current is _none or val_changed(x, current):
457 current = gen_val(x)
459 yield (x, current)
462 def file_hash(f, h=hashlib.md5, block_size=2**20):
463 """ returns the hash of the contents of a file """
464 f_hash = h()
465 for chunk in iter(lambda: f.read(block_size), ''):
466 f_hash.update(chunk)
467 return f_hash
471 def split_list(l, prop):
472 """ split elements that satisfy a property, and those that don't """
473 match = filter(prop, l)
474 nomatch = [x for x in l if x not in match]
475 return match, nomatch