2 # This file is part of my.gpodder.org.
4 # my.gpodder.org is free software: you can redistribute it and/or modify it
5 # under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or (at your
7 # option) any later version.
9 # my.gpodder.org is distributed in the hope that it will be useful, but
10 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
12 # License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
22 from datetime
import datetime
, timedelta
, date
26 from django
.core
.cache
import cache
29 def daterange(from_date
, to_date
=None, leap
=timedelta(days
=1)):
31 >>> from_d = datetime(2010, 01, 01)
32 >>> to_d = datetime(2010, 01, 05)
33 >>> list(daterange(from_d, to_d))
34 [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
38 if isinstance(from_date
, datetime
):
39 to_date
= datetime
.now()
41 to_date
= date
.today()
43 while from_date
<= to_date
:
45 from_date
= from_date
+ leap
48 def format_time(value
):
49 """Format an offset (in seconds) to a string
51 The offset should be an integer or float value.
59 >>> format_time(10921)
63 dt
= datetime
.utcfromtimestamp(value
)
68 return dt
.strftime('%M:%S')
70 return dt
.strftime('%H:%M:%S')
72 def parse_time(value
):
77 >>> parse_time('05:10') #5*60+10
80 >>> parse_time('1:05:10') #60*60+5*60+10
84 raise ValueError('None value in parse_time')
86 if isinstance(value
, int):
87 # Don't need to parse already-converted time value
91 raise ValueError('Empty valueing in parse_time')
93 for format
in ('%H:%M:%S', '%M:%S'):
95 t
= time
.strptime(value
, format
)
96 return t
.tm_hour
* 60*60 + t
.tm_min
* 60 + t
.tm_sec
105 >>> parse_bool('True')
108 >>> parse_bool('true')
114 if isinstance(val
, bool):
116 if val
.lower() == 'true':
121 def iterate_together(lists
, key
=lambda x
: x
, reverse
=False):
123 takes ordered, possibly sparse, lists with similar items
124 (some items have a corresponding item in the other lists, some don't).
126 It then yield tuples of corresponding items, where one element is None is
127 there is no corresponding entry in one of the lists.
129 Tuples where both elements are None are skipped.
131 The results of the key method are used for the comparisons.
133 If reverse is True, the lists are expected to be sorted in reverse order
134 and the results will also be sorted reverse
136 >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
137 [(1, 1), (2, None), (None, 3)]
139 >>> list(iterate_together([[], []]))
142 >>> list(iterate_together([range(1, 3), range(3, 5)]))
143 [(1, None), (2, None), (None, 3), (None, 4)]
145 >>> list(iterate_together([range(1, 3), []]))
146 [(1, None), (2, None)]
148 >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
152 Next
= collections
.namedtuple('Next', 'item more')
153 min_
= min if not reverse
else max
154 lt_
= operator
.lt
if not reverse
else operator
.gt
156 lists
= [iter(l
) for l
in lists
]
164 except StopIteration:
165 return Next(None, False)
168 return [None]*len(lists
)
170 # take first bunch of items
171 items
= [_take(l
) for l
in lists
]
173 while any(i
.item
is not None or i
.more
for i
in items
):
177 for n
, item
in enumerate(items
):
179 if item
.item
is None:
182 if all(x
is None for x
in res
):
186 min_v
= min_(filter(lambda x
: x
is not None, res
), key
=key
)
188 if key(item
.item
) == key(min_v
):
191 elif lt_(key(item
.item
), key(min_v
)):
195 for n
, x
in enumerate(res
):
197 items
[n
] = _take(lists
[n
])
202 def progress(val
, max_val
, status_str
='', max_width
=50, stream
=sys
.stdout
):
204 # progress as percentage
205 percentage_str
= '{val:.2%}'.format(val
=float(val
)/max_val
)
207 # progress bar filled with #s
208 progress_str
= '#'*int(float(val
)/max_val
*max_width
) + \
209 ' ' * (max_width
-(int(float(val
)/max_val
*max_width
)))
211 #insert percentage into bar
212 percentage_start
= int((max_width
-len(percentage_str
))/2)
213 progress_str
= progress_str
[:percentage_start
] + \
215 progress_str
[percentage_start
+len(percentage_str
):]
217 print >> stream
, '\r',
218 print >> stream
, '[ %s ] %s / %s | %s' % (
226 def set_cmp(list, simplify
):
228 Builds a set out of a list but uses the results of simplify to determine equality between items
230 simpl
= lambda x
: (simplify(x
), x
)
231 lst
= dict(map(simpl
, list))
237 returns the first not-None object or None if the iterator is exhausted
246 return list(set(a
) & set(b
))
250 def multi_request_view(cls
, view
, wrap
=True, auto_advance
=True,
253 splits up a view request into several requests, which reduces
254 the server load of the number of returned objects is large.
256 NOTE: As such a split request is obviously not atomical anymore, results
257 might skip some elements of contain some twice
259 If auto_advance is False the method will always request the same range.
260 This can be useful when the view contain unprocessed items and the caller
261 processes the items, thus removing them from the view before the next
265 per_page
= kwargs
.get('limit', 1000)
266 kwargs
['limit'] = per_page
+ 1
268 wrapper
= kwargs
.pop('wrapper', cls
.wrap
)
273 resp
= db
.view(view
, *args
, **kwargs
)
276 for n
, obj
in enumerate(resp
.iterator()):
281 doc
= wrapper(obj
['doc']) if wrapper
else obj
['doc']
282 docid
= doc
._id
if wrapper
else obj
['id']
284 docid
= obj
.get('id', None)
289 kwargs
['startkey'] = key
290 if docid
is not None:
291 kwargs
['startkey_docid'] = docid
295 # we reached the end of the page, load next one
302 def remove_control_chars(s
):
303 import unicodedata
, re
305 all_chars
= (unichr(i
) for i
in xrange(0x110000))
306 control_chars
= ''.join(map(unichr, range(0,32) + range(127,160)))
307 control_char_re
= re
.compile('[%s]' % re
.escape(control_chars
))
309 return control_char_re
.sub('', s
)
313 return tuple(map(list,zip(*a
)))
316 def parse_range(s
, min, max, default
=None):
318 Parses the string and returns its value. If the value is outside the given
319 range, its closest number within the range is returned
321 >>> parse_range('5', 0, 10)
324 >>> parse_range('0', 5, 10)
327 >>> parse_range('15',0, 10)
330 >>> parse_range('x', 0, 20)
333 >>> parse_range('x', 0, 20, 20)
344 except (ValueError, TypeError):
345 return default
if default
is not None else (max-min)/2
348 def get_to_dict(cls
, ids
, get_id
=lambda x
: x
._id
, use_cache
=False):
358 cache_objs
.append(obj
)
361 db_objs
= list(cls
.get_multi(ids
))
363 for obj
in (cache_objs
+ db_objs
):
365 # get_multi returns dict {'key': _id, 'error': 'not found'}
366 # for non-existing objects
367 if isinstance(obj
, dict) and 'error' in obj
:
372 ids
= obj
.get_ids() if hasattr(obj
, 'get_ids') else [get_id(obj
)]
378 cache
.set(get_id(obj
), obj
)
384 return [item
for sublist
in l
for item
in sublist
]
387 def linearize(key
, iterators
, reverse
=False):
389 Linearizes a number of iterators, sorted by some comparison function
392 iters
= [iter(i
) for i
in iterators
]
397 vals
. append( (v
, i
) )
398 except StopIteration:
402 vals
= sorted(vals
, key
=lambda x
: key(x
[0]), reverse
=reverse
)
403 val
, it
= vals
.pop(0)
407 vals
.append( (next_val
, it
) )
408 except StopIteration:
412 def skip_pairs(iterator
, cmp=cmp):
413 """ Skips pairs of equal items
415 >>> list(skip_pairs([]))
418 >>> list(skip_pairs([1]))
421 >>> list(skip_pairs([1, 2, 3]))
424 >>> list(skip_pairs([1, 1]))
427 >>> list(skip_pairs([1, 2, 2]))
430 >>> list(skip_pairs([1, 2, 2, 3]))
433 >>> list(skip_pairs([1, 2, 2, 2]))
436 >>> list(skip_pairs([1, 2, 2, 2, 2, 3]))
440 iterator
= iter(iterator
)
441 next
= iterator
.next()
446 next
= iterator
.next()
447 except StopIteration as e
:
451 if cmp(item
, next
) == 0:
452 next
= iterator
.next()
457 def get_timestamp(datetime_obj
):
458 """ Returns the timestamp as an int for the given datetime object
460 >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
463 >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
466 return int(time
.mktime(datetime_obj
.timetuple()))
470 re_url
= re
.compile('^https?://')
473 """ Returns true if a string looks like an URL
475 >>> is_url('http://example.com/some-path/file.xml')
478 >>> is_url('something else')
482 return bool(re_url
.match(string
))
485 def is_couchdb_id(id_str
):
489 f
= functools
.partial(operator
.contains
, string
.hexdigits
)
490 return len(id_str
) == 32 and all(map(f
, id_str
))
493 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
494 # this does not increase asymptotical complexity
495 # but can still waste more time than it saves.
496 def shortest_of(strings
):
497 return min(strings
, key
=len)
499 def longest_substr(strings
):
501 Returns the longest common substring of the given strings
507 reference
= shortest_of(strings
) #strings[0]
508 length
= len(reference
)
509 #find a suitable slice i:j
510 for i
in xrange(length
):
511 #only consider strings long at least len(substr) + 1
512 for j
in xrange(i
+ len(substr
) + 1, length
):
513 candidate
= reference
[i
:j
]
514 if all(candidate
in text
for text
in strings
):
520 def additional_value(it
, gen_val
, val_changed
=lambda _
: True):
521 """ Provides an additional value to the elements, calculated when needed
523 For the elements from the iterator, some additional value can be computed
524 by gen_val (which might be an expensive computation).
526 If the elements in the iterator are ordered so that some subsequent
527 elements would generate the same additional value, val_changed can be
528 provided, which receives the next element from the iterator and the
529 previous additional value. If the element would generate the same
530 additional value (val_changed returns False), its computation is skipped.
532 >>> # get the next full hundred higher than x
533 >>> # this will probably be an expensive calculation
534 >>> next_hundred = lambda x: x + 100-(x % 100)
536 >>> # returns True if h is not the value that next_hundred(x) would provide
537 >>> # this should be a relatively cheap calculation, compared to the above
538 >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
540 >>> xs = [0, 50, 100, 101, 199, 200, 201]
541 >>> list(additional_value(xs, next_hundred, diff_hundred))
542 [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
549 if current
is _none
or val_changed(x
, current
):
555 def file_hash(f
, h
=hashlib
.md5
, block_size
=2**20):
556 """ returns the hash of the contents of a file """
558 for chunk
in iter(lambda: f
.read(block_size
), ''):