2 # This file is part of my.gpodder.org.
4 # my.gpodder.org is free software: you can redistribute it and/or modify it
5 # under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or (at your
7 # option) any later version.
9 # my.gpodder.org is distributed in the hope that it will be useful, but
10 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
12 # License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
22 from datetime
import datetime
, timedelta
, date
26 from django
.core
.cache
import cache
28 from mygpo
.couch
import get_main_database
31 def daterange(from_date
, to_date
=None, leap
=timedelta(days
=1)):
33 >>> from_d = datetime(2010, 01, 01)
34 >>> to_d = datetime(2010, 01, 05)
35 >>> list(daterange(from_d, to_d))
36 [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
40 if isinstance(from_date
, datetime
):
41 to_date
= datetime
.now()
43 to_date
= date
.today()
45 while from_date
<= to_date
:
47 from_date
= from_date
+ leap
50 def format_time(value
):
51 """Format an offset (in seconds) to a string
53 The offset should be an integer or float value.
61 >>> format_time(10921)
65 dt
= datetime
.utcfromtimestamp(value
)
70 return dt
.strftime('%M:%S')
72 return dt
.strftime('%H:%M:%S')
74 def parse_time(value
):
79 >>> parse_time('05:10') #5*60+10
82 >>> parse_time('1:05:10') #60*60+5*60+10
86 raise ValueError('None value in parse_time')
88 if isinstance(value
, int):
89 # Don't need to parse already-converted time value
93 raise ValueError('Empty valueing in parse_time')
95 for format
in ('%H:%M:%S', '%M:%S'):
97 t
= time
.strptime(value
, format
)
98 return t
.tm_hour
* 60*60 + t
.tm_min
* 60 + t
.tm_sec
107 >>> parse_bool('True')
110 >>> parse_bool('true')
116 if isinstance(val
, bool):
118 if val
.lower() == 'true':
123 def iterate_together(lists
, key
=lambda x
: x
, reverse
=False):
125 takes ordered, possibly sparse, lists with similar items
126 (some items have a corresponding item in the other lists, some don't).
128 It then yield tuples of corresponding items, where one element is None is
129 there is no corresponding entry in one of the lists.
131 Tuples where both elements are None are skipped.
133 The results of the key method are used for the comparisons.
135 If reverse is True, the lists are expected to be sorted in reverse order
136 and the results will also be sorted reverse
138 >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
139 [(1, 1), (2, None), (None, 3)]
141 >>> list(iterate_together([[], []]))
144 >>> list(iterate_together([range(1, 3), range(3, 5)]))
145 [(1, None), (2, None), (None, 3), (None, 4)]
147 >>> list(iterate_together([range(1, 3), []]))
148 [(1, None), (2, None)]
150 >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
154 Next
= collections
.namedtuple('Next', 'item more')
155 min_
= min if not reverse
else max
156 lt_
= operator
.lt
if not reverse
else operator
.gt
158 lists
= [iter(l
) for l
in lists
]
166 except StopIteration:
167 return Next(None, False)
170 return [None]*len(lists
)
172 # take first bunch of items
173 items
= [_take(l
) for l
in lists
]
175 while any(i
.item
is not None or i
.more
for i
in items
):
179 for n
, item
in enumerate(items
):
181 if item
.item
is None:
184 if all(x
is None for x
in res
):
188 min_v
= min_(filter(lambda x
: x
is not None, res
), key
=key
)
190 if key(item
.item
) == key(min_v
):
193 elif lt_(key(item
.item
), key(min_v
)):
197 for n
, x
in enumerate(res
):
199 items
[n
] = _take(lists
[n
])
204 def progress(val
, max_val
, status_str
='', max_width
=50, stream
=sys
.stdout
):
206 # progress as percentage
207 percentage_str
= '{val:.2%}'.format(val
=float(val
)/max_val
)
209 # progress bar filled with #s
210 progress_str
= '#'*int(float(val
)/max_val
*max_width
) + \
211 ' ' * (max_width
-(int(float(val
)/max_val
*max_width
)))
213 #insert percentage into bar
214 percentage_start
= int((max_width
-len(percentage_str
))/2)
215 progress_str
= progress_str
[:percentage_start
] + \
217 progress_str
[percentage_start
+len(percentage_str
):]
219 print >> stream
, '\r',
220 print >> stream
, '[ %s ] %s / %s | %s' % (
228 def set_cmp(list, simplify
):
230 Builds a set out of a list but uses the results of simplify to determine equality between items
232 simpl
= lambda x
: (simplify(x
), x
)
233 lst
= dict(map(simpl
, list))
239 returns the first not-None object or None if the iterator is exhausted
248 return list(set(a
) & set(b
))
252 def multi_request_view(cls
, view
, wrap
=True, auto_advance
=True,
255 splits up a view request into several requests, which reduces
256 the server load of the number of returned objects is large.
258 NOTE: As such a split request is obviously not atomical anymore, results
259 might skip some elements of contain some twice
261 If auto_advance is False the method will always request the same range.
262 This can be useful when the view contain unprocessed items and the caller
263 processes the items, thus removing them from the view before the next
267 per_page
= kwargs
.get('limit', 1000)
268 kwargs
['limit'] = per_page
+ 1
269 db
= get_main_database()
270 wrapper
= kwargs
.pop('wrapper', cls
.wrap
)
275 resp
= db
.view(view
, *args
, **kwargs
)
278 for n
, obj
in enumerate(resp
.iterator()):
283 doc
= wrapper(obj
['doc']) if wrapper
else obj
['doc']
284 docid
= doc
._id
if wrapper
else obj
['id']
286 docid
= obj
.get('id', None)
291 kwargs
['startkey'] = key
292 if docid
is not None:
293 kwargs
['startkey_docid'] = docid
297 # we reached the end of the page, load next one
304 def remove_control_chars(s
):
305 import unicodedata
, re
307 all_chars
= (unichr(i
) for i
in xrange(0x110000))
308 control_chars
= ''.join(map(unichr, range(0,32) + range(127,160)))
309 control_char_re
= re
.compile('[%s]' % re
.escape(control_chars
))
311 return control_char_re
.sub('', s
)
315 return tuple(map(list,zip(*a
)))
318 def parse_range(s
, min, max, default
=None):
320 Parses the string and returns its value. If the value is outside the given
321 range, its closest number within the range is returned
323 >>> parse_range('5', 0, 10)
326 >>> parse_range('0', 5, 10)
329 >>> parse_range('15',0, 10)
332 >>> parse_range('x', 0, 20)
335 >>> parse_range('x', 0, 20, 20)
346 except (ValueError, TypeError):
347 return default
if default
is not None else (max-min)/2
350 def get_to_dict(cls
, ids
, get_id
=lambda x
: x
._id
, use_cache
=False):
357 res
= cache
.get_many(ids
)
358 cache_objs
.extend(res
.values())
359 ids
= [x
for x
in ids
if x
not in res
.keys()]
361 db_objs
= list(cls
.get_multi(ids
))
363 for obj
in (cache_objs
+ db_objs
):
365 # get_multi returns dict {'key': _id, 'error': 'not found'}
366 # for non-existing objects
367 if isinstance(obj
, dict) and 'error' in obj
:
372 ids
= obj
.get_ids() if hasattr(obj
, 'get_ids') else [get_id(obj
)]
377 cache
.set_many(dict( (get_id(obj
), obj
) for obj
in db_objs
))
383 return [item
for sublist
in l
for item
in sublist
]
386 def linearize(key
, iterators
, reverse
=False):
388 Linearizes a number of iterators, sorted by some comparison function
391 iters
= [iter(i
) for i
in iterators
]
396 vals
. append( (v
, i
) )
397 except StopIteration:
401 vals
= sorted(vals
, key
=lambda x
: key(x
[0]), reverse
=reverse
)
402 val
, it
= vals
.pop(0)
406 vals
.append( (next_val
, it
) )
407 except StopIteration:
411 def skip_pairs(iterator
, cmp=cmp):
412 """ Skips pairs of equal items
414 >>> list(skip_pairs([]))
417 >>> list(skip_pairs([1]))
420 >>> list(skip_pairs([1, 2, 3]))
423 >>> list(skip_pairs([1, 1]))
426 >>> list(skip_pairs([1, 2, 2]))
429 >>> list(skip_pairs([1, 2, 2, 3]))
432 >>> list(skip_pairs([1, 2, 2, 2]))
435 >>> list(skip_pairs([1, 2, 2, 2, 2, 3]))
439 iterator
= iter(iterator
)
440 next
= iterator
.next()
445 next
= iterator
.next()
446 except StopIteration as e
:
450 if cmp(item
, next
) == 0:
451 next
= iterator
.next()
456 def get_timestamp(datetime_obj
):
457 """ Returns the timestamp as an int for the given datetime object
459 >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
462 >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
465 return int(time
.mktime(datetime_obj
.timetuple()))
469 re_url
= re
.compile('^https?://')
472 """ Returns true if a string looks like an URL
474 >>> is_url('http://example.com/some-path/file.xml')
477 >>> is_url('something else')
481 return bool(re_url
.match(string
))
484 def is_couchdb_id(id_str
):
488 f
= functools
.partial(operator
.contains
, string
.hexdigits
)
489 return len(id_str
) == 32 and all(map(f
, id_str
))
492 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
493 # this does not increase asymptotical complexity
494 # but can still waste more time than it saves.
495 def shortest_of(strings
):
496 return min(strings
, key
=len)
498 def longest_substr(strings
):
500 Returns the longest common substring of the given strings
506 reference
= shortest_of(strings
) #strings[0]
507 length
= len(reference
)
508 #find a suitable slice i:j
509 for i
in xrange(length
):
510 #only consider strings long at least len(substr) + 1
511 for j
in xrange(i
+ len(substr
) + 1, length
):
512 candidate
= reference
[i
:j
]
513 if all(candidate
in text
for text
in strings
):
519 def additional_value(it
, gen_val
, val_changed
=lambda _
: True):
520 """ Provides an additional value to the elements, calculated when needed
522 For the elements from the iterator, some additional value can be computed
523 by gen_val (which might be an expensive computation).
525 If the elements in the iterator are ordered so that some subsequent
526 elements would generate the same additional value, val_changed can be
527 provided, which receives the next element from the iterator and the
528 previous additional value. If the element would generate the same
529 additional value (val_changed returns False), its computation is skipped.
531 >>> # get the next full hundred higher than x
532 >>> # this will probably be an expensive calculation
533 >>> next_hundred = lambda x: x + 100-(x % 100)
535 >>> # returns True if h is not the value that next_hundred(x) would provide
536 >>> # this should be a relatively cheap calculation, compared to the above
537 >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
539 >>> xs = [0, 50, 100, 101, 199, 200, 201]
540 >>> list(additional_value(xs, next_hundred, diff_hundred))
541 [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
548 if current
is _none
or val_changed(x
, current
):
554 def file_hash(f
, h
=hashlib
.md5
, block_size
=2**20):
555 """ returns the hash of the contents of a file """
557 for chunk
in iter(lambda: f
.read(block_size
), ''):