mygpo/utils.py

   1 #
   2 # This file is part of my.gpodder.org.
   3 #
   4 # my.gpodder.org is free software: you can redistribute it and/or modify it
   5 # under the terms of the GNU Affero General Public License as published by
   6 # the Free Software Foundation, either version 3 of the License, or (at your
   7 # option) any later version.
   8 #
   9 # my.gpodder.org is distributed in the hope that it will be useful, but
  10 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  11 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
  12 # License for more details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
  16 #
  17
  18 import operator
  19 import sys
  20 import re
  21 import collections
  22 from datetime import datetime, timedelta, date
  23 import time
  24 import hashlib
  25
  26 from django.core.cache import cache
  27
  28 from mygpo.couch import get_main_database
  29
  30
  31 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
  32     """
  33     >>> from_d = datetime(2010, 01, 01)
  34     >>> to_d = datetime(2010, 01, 05)
  35     >>> list(daterange(from_d, to_d))
  36     [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
  37     """
  38
  39     if to_date is None:
  40         if isinstance(from_date, datetime):
  41             to_date = datetime.now()
  42         else:
  43             to_date = date.today()
  44
  45     while from_date <= to_date:
  46         yield from_date
  47         from_date = from_date + leap
  48     return
  49
  50 def format_time(value):
  51     """Format an offset (in seconds) to a string
  52
  53     The offset should be an integer or float value.
  54
  55     >>> format_time(0)
  56     '00:00'
  57     >>> format_time(20)
  58     '00:20'
  59     >>> format_time(3600)
  60     '01:00:00'
  61     >>> format_time(10921)
  62     '03:02:01'
  63     """
  64     try:
  65         dt = datetime.utcfromtimestamp(value)
  66     except ValueError:
  67         return ''
  68
  69     if dt.hour == 0:
  70         return dt.strftime('%M:%S')
  71     else:
  72         return dt.strftime('%H:%M:%S')
  73
  74 def parse_time(value):
  75     """
  76     >>> parse_time(10)
  77     10
  78
  79     >>> parse_time('05:10') #5*60+10
  80     310
  81
  82     >>> parse_time('1:05:10') #60*60+5*60+10
  83     3910
  84     """
  85     if value is None:
  86         raise ValueError('None value in parse_time')
  87
  88     if isinstance(value, int):
  89         # Don't need to parse already-converted time value
  90         return value
  91
  92     if value == '':
  93         raise ValueError('Empty valueing in parse_time')
  94
  95     for format in ('%H:%M:%S', '%M:%S'):
  96         try:
  97             t = time.strptime(value, format)
  98             return t.tm_hour * 60*60 + t.tm_min * 60 + t.tm_sec
  99         except ValueError, e:
 100             continue
 101
 102     return int(value)
 103
 104
 105 def parse_bool(val):
 106     """
 107     >>> parse_bool('True')
 108     True
 109
 110     >>> parse_bool('true')
 111     True
 112
 113     >>> parse_bool('')
 114     False
 115     """
 116     if isinstance(val, bool):
 117         return val
 118     if val.lower() == 'true':
 119         return True
 120     return False
 121
 122
 123 def iterate_together(lists, key=lambda x: x, reverse=False):
 124     """
 125     takes ordered, possibly sparse, lists with similar items
 126     (some items have a corresponding item in the other lists, some don't).
 127
 128     It then yield tuples of corresponding items, where one element is None is
 129     there is no corresponding entry in one of the lists.
 130
 131     Tuples where both elements are None are skipped.
 132
 133     The results of the key method are used for the comparisons.
 134
 135     If reverse is True, the lists are expected to be sorted in reverse order
 136     and the results will also be sorted reverse
 137
 138     >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
 139     [(1, 1), (2, None), (None, 3)]
 140
 141     >>> list(iterate_together([[], []]))
 142     []
 143
 144     >>> list(iterate_together([range(1, 3), range(3, 5)]))
 145     [(1, None), (2, None), (None, 3), (None, 4)]
 146
 147     >>> list(iterate_together([range(1, 3), []]))
 148     [(1, None), (2, None)]
 149
 150     >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
 151     [(1, None), (3, 3)]
 152     """
 153
 154     Next = collections.namedtuple('Next', 'item more')
 155     min_ = min if not reverse else max
 156     lt_  = operator.lt if not reverse else operator.gt
 157
 158     lists = [iter(l) for l in lists]
 159
 160     def _take(it):
 161         try:
 162             i = it.next()
 163             while i is None:
 164                 i = it.next()
 165             return Next(i, True)
 166         except StopIteration:
 167             return Next(None, False)
 168
 169     def new_res():
 170         return [None]*len(lists)
 171
 172     # take first bunch of items
 173     items = [_take(l) for l in lists]
 174
 175     while any(i.item is not None or i.more for i in items):
 176
 177         res = new_res()
 178
 179         for n, item in enumerate(items):
 180
 181             if item.item is None:
 182                 continue
 183
 184             if all(x is None for x in res):
 185                 res[n] = item.item
 186                 continue
 187
 188             min_v = min_(filter(lambda x: x is not None, res), key=key)
 189
 190             if key(item.item) == key(min_v):
 191                 res[n] = item.item
 192
 193             elif lt_(key(item.item), key(min_v)):
 194                 res = new_res()
 195                 res[n] = item.item
 196
 197         for n, x in enumerate(res):
 198             if x is not None:
 199                 items[n] = _take(lists[n])
 200
 201         yield tuple(res)
 202
 203
 204 def progress(val, max_val, status_str='', max_width=50, stream=sys.stdout):
 205
 206     # progress as percentage
 207     percentage_str = '{val:.2%}'.format(val=float(val)/max_val)
 208
 209     # progress bar filled with #s
 210     progress_str = '#'*int(float(val)/max_val*max_width) + \
 211                    ' ' * (max_width-(int(float(val)/max_val*max_width)))
 212
 213     #insert percentage into bar
 214     percentage_start = int((max_width-len(percentage_str))/2)
 215     progress_str = progress_str[:percentage_start] + \
 216                    percentage_str + \
 217                    progress_str[percentage_start+len(percentage_str):]
 218
 219     print >> stream, '\r',
 220     print >> stream, '[ %s ] %s / %s | %s' % (
 221         progress_str,
 222         val,
 223         max_val,
 224         status_str),
 225     stream.flush()
 226
 227
 228 def set_cmp(list, simplify):
 229     """
 230     Builds a set out of a list but uses the results of simplify to determine equality between items
 231     """
 232     simpl = lambda x: (simplify(x), x)
 233     lst = dict(map(simpl, list))
 234     return lst.values()
 235
 236
 237 def first(it):
 238     """
 239     returns the first not-None object or None if the iterator is exhausted
 240     """
 241     for x in it:
 242         if x != None:
 243             return x
 244     return None
 245
 246
 247 def intersect(a, b):
 248      return list(set(a) & set(b))
 249
 250
 251
 252 def remove_control_chars(s):
 253     import unicodedata, re
 254
 255     all_chars = (unichr(i) for i in xrange(0x110000))
 256     control_chars = ''.join(map(unichr, range(0,32) + range(127,160)))
 257     control_char_re = re.compile('[%s]' % re.escape(control_chars))
 258
 259     return control_char_re.sub('', s)
 260
 261
 262 def unzip(a):
 263     return tuple(map(list,zip(*a)))
 264
 265
 266 def parse_range(s, min, max, default=None):
 267     """
 268     Parses the string and returns its value. If the value is outside the given
 269     range, its closest number within the range is returned
 270
 271     >>> parse_range('5', 0, 10)
 272     5
 273
 274     >>> parse_range('0', 5, 10)
 275     5
 276
 277     >>> parse_range('15',0, 10)
 278     10
 279
 280     >>> parse_range('x', 0, 20)
 281     10
 282
 283     >>> parse_range('x', 0, 20, 20)
 284     20
 285     """
 286     try:
 287         val = int(s)
 288         if val < min:
 289             return min
 290         if val > max:
 291             return max
 292         return val
 293
 294     except (ValueError, TypeError):
 295         return default if default is not None else (max-min)/2
 296
 297
 298
 299 def flatten(l):
 300     return [item for sublist in l for item in sublist]
 301
 302
 303 def linearize(key, iterators, reverse=False):
 304     """
 305     Linearizes a number of iterators, sorted by some comparison function
 306     """
 307
 308     iters = [iter(i) for i in iterators]
 309     vals = []
 310     for i in iters:
 311         try:
 312             v = i.next()
 313             vals. append( (v, i) )
 314         except StopIteration:
 315             continue
 316
 317     while vals:
 318         vals = sorted(vals, key=lambda x: key(x[0]), reverse=reverse)
 319         val, it = vals.pop(0)
 320         yield val
 321         try:
 322             next_val = it.next()
 323             vals.append( (next_val, it) )
 324         except StopIteration:
 325             pass
 326
 327
 328 def skip_pairs(iterator, cmp=cmp):
 329     """ Skips pairs of equal items
 330
 331     >>> list(skip_pairs([]))
 332     []
 333
 334     >>> list(skip_pairs([1]))
 335     [1]
 336
 337     >>> list(skip_pairs([1, 2, 3]))
 338     [1, 2, 3]
 339
 340     >>> list(skip_pairs([1, 1]))
 341     []
 342
 343     >>> list(skip_pairs([1, 2, 2]))
 344     [1]
 345
 346     >>> list(skip_pairs([1, 2, 2, 3]))
 347     [1, 3]
 348
 349     >>> list(skip_pairs([1, 2, 2, 2]))
 350     [1, 2]
 351
 352     >>> list(skip_pairs([1, 2, 2, 2, 2, 3]))
 353     [1, 3]
 354     """
 355
 356     iterator = iter(iterator)
 357     next = iterator.next()
 358
 359     while True:
 360         item = next
 361         try:
 362             next = iterator.next()
 363         except StopIteration as e:
 364             yield item
 365             raise e
 366
 367         if cmp(item, next) == 0:
 368             next = iterator.next()
 369         else:
 370             yield item
 371
 372
 373 def get_timestamp(datetime_obj):
 374     """ Returns the timestamp as an int for the given datetime object
 375
 376     >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
 377     1302168606
 378
 379     >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
 380     0
 381     """
 382     return int(time.mktime(datetime_obj.timetuple()))
 383
 384
 385
 386 re_url = re.compile('^https?://')
 387
 388 def is_url(string):
 389     """ Returns true if a string looks like an URL
 390
 391     >>> is_url('http://example.com/some-path/file.xml')
 392     True
 393
 394     >>> is_url('something else')
 395     False
 396     """
 397
 398     return bool(re_url.match(string))
 399
 400
 401 def is_couchdb_id(id_str):
 402     import string
 403     import operator
 404     import functools
 405     f = functools.partial(operator.contains, string.hexdigits)
 406     return len(id_str) == 32 and all(map(f, id_str))
 407
 408
 409 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
 410 # this does not increase asymptotical complexity
 411 # but can still waste more time than it saves.
 412 def shortest_of(strings):
 413     return min(strings, key=len)
 414
 415 def longest_substr(strings):
 416     """
 417     Returns the longest common substring of the given strings
 418     """
 419
 420     substr = ""
 421     if not strings:
 422         return substr
 423     reference = shortest_of(strings) #strings[0]
 424     length = len(reference)
 425     #find a suitable slice i:j
 426     for i in xrange(length):
 427         #only consider strings long at least len(substr) + 1
 428         for j in xrange(i + len(substr) + 1, length):
 429             candidate = reference[i:j]
 430             if all(candidate in text for text in strings):
 431                 substr = candidate
 432     return substr
 433
 434
 435
 436 def additional_value(it, gen_val, val_changed=lambda _: True):
 437     """ Provides an additional value to the elements, calculated when needed
 438
 439     For the elements from the iterator, some additional value can be computed
 440     by gen_val (which might be an expensive computation).
 441
 442     If the elements in the iterator are ordered so that some subsequent
 443     elements would generate the same additional value, val_changed can be
 444     provided, which receives the next element from the iterator and the
 445     previous additional value. If the element would generate the same
 446     additional value (val_changed returns False), its computation is skipped.
 447
 448     >>> # get the next full hundred higher than x
 449     >>> # this will probably be an expensive calculation
 450     >>> next_hundred = lambda x: x + 100-(x % 100)
 451
 452     >>> # returns True if h is not the value that next_hundred(x) would provide
 453     >>> # this should be a relatively cheap calculation, compared to the above
 454     >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
 455
 456     >>> xs = [0, 50, 100, 101, 199, 200, 201]
 457     >>> list(additional_value(xs, next_hundred, diff_hundred))
 458     [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
 459     """
 460
 461     _none = object()
 462     current = _none
 463
 464     for x in it:
 465         if current is _none or val_changed(x, current):
 466             current = gen_val(x)
 467
 468         yield (x, current)
 469
 470
 471 def file_hash(f, h=hashlib.md5, block_size=2**20):
 472     """ returns the hash of the contents of a file """
 473     f_hash = h()
 474     for chunk in iter(lambda: f.read(block_size), ''):
 475          f_hash.update(chunk)
 476     return f_hash