mygpo/utils.py

   1 #
   2 # This file is part of my.gpodder.org.
   3 #
   4 # my.gpodder.org is free software: you can redistribute it and/or modify it
   5 # under the terms of the GNU Affero General Public License as published by
   6 # the Free Software Foundation, either version 3 of the License, or (at your
   7 # option) any later version.
   8 #
   9 # my.gpodder.org is distributed in the hope that it will be useful, but
  10 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  11 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
  12 # License for more details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
  16 #
  17
  18 import operator
  19 import sys
  20 import re
  21 import collections
  22 from datetime import datetime, timedelta, date
  23 import time
  24 import hashlib
  25
  26 from django.core.cache import cache
  27
  28
  29 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
  30     """
  31     >>> from_d = datetime(2010, 01, 01)
  32     >>> to_d = datetime(2010, 01, 05)
  33     >>> list(daterange(from_d, to_d))
  34     [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
  35     """
  36
  37     if to_date is None:
  38         if isinstance(from_date, datetime):
  39             to_date = datetime.now()
  40         else:
  41             to_date = date.today()
  42
  43     while from_date <= to_date:
  44         yield from_date
  45         from_date = from_date + leap
  46     return
  47
  48 def format_time(value):
  49     """Format an offset (in seconds) to a string
  50
  51     The offset should be an integer or float value.
  52
  53     >>> format_time(0)
  54     '00:00'
  55     >>> format_time(20)
  56     '00:20'
  57     >>> format_time(3600)
  58     '01:00:00'
  59     >>> format_time(10921)
  60     '03:02:01'
  61     """
  62     try:
  63         dt = datetime.utcfromtimestamp(value)
  64     except ValueError:
  65         return ''
  66
  67     if dt.hour == 0:
  68         return dt.strftime('%M:%S')
  69     else:
  70         return dt.strftime('%H:%M:%S')
  71
  72 def parse_time(value):
  73     """
  74     >>> parse_time(10)
  75     10
  76
  77     >>> parse_time('05:10') #5*60+10
  78     310
  79
  80     >>> parse_time('1:05:10') #60*60+5*60+10
  81     3910
  82     """
  83     if value is None:
  84         raise ValueError('None value in parse_time')
  85
  86     if isinstance(value, int):
  87         # Don't need to parse already-converted time value
  88         return value
  89
  90     if value == '':
  91         raise ValueError('Empty valueing in parse_time')
  92
  93     for format in ('%H:%M:%S', '%M:%S'):
  94         try:
  95             t = time.strptime(value, format)
  96             return t.tm_hour * 60*60 + t.tm_min * 60 + t.tm_sec
  97         except ValueError, e:
  98             continue
  99
 100     return int(value)
 101
 102
 103 def parse_bool(val):
 104     """
 105     >>> parse_bool('True')
 106     True
 107
 108     >>> parse_bool('true')
 109     True
 110
 111     >>> parse_bool('')
 112     False
 113     """
 114     if isinstance(val, bool):
 115         return val
 116     if val.lower() == 'true':
 117         return True
 118     return False
 119
 120
 121 def iterate_together(lists, key=lambda x: x, reverse=False):
 122     """
 123     takes ordered, possibly sparse, lists with similar items
 124     (some items have a corresponding item in the other lists, some don't).
 125
 126     It then yield tuples of corresponding items, where one element is None is
 127     there is no corresponding entry in one of the lists.
 128
 129     Tuples where both elements are None are skipped.
 130
 131     The results of the key method are used for the comparisons.
 132
 133     If reverse is True, the lists are expected to be sorted in reverse order
 134     and the results will also be sorted reverse
 135
 136     >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
 137     [(1, 1), (2, None), (None, 3)]
 138
 139     >>> list(iterate_together([[], []]))
 140     []
 141
 142     >>> list(iterate_together([range(1, 3), range(3, 5)]))
 143     [(1, None), (2, None), (None, 3), (None, 4)]
 144
 145     >>> list(iterate_together([range(1, 3), []]))
 146     [(1, None), (2, None)]
 147
 148     >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
 149     [(1, None), (3, 3)]
 150     """
 151
 152     Next = collections.namedtuple('Next', 'item more')
 153     min_ = min if not reverse else max
 154     lt_  = operator.lt if not reverse else operator.gt
 155
 156     lists = [iter(l) for l in lists]
 157
 158     def _take(it):
 159         try:
 160             i = it.next()
 161             while i is None:
 162                 i = it.next()
 163             return Next(i, True)
 164         except StopIteration:
 165             return Next(None, False)
 166
 167     def new_res():
 168         return [None]*len(lists)
 169
 170     # take first bunch of items
 171     items = [_take(l) for l in lists]
 172
 173     while any(i.item is not None or i.more for i in items):
 174
 175         res = new_res()
 176
 177         for n, item in enumerate(items):
 178
 179             if item.item is None:
 180                 continue
 181
 182             if all(x is None for x in res):
 183                 res[n] = item.item
 184                 continue
 185
 186             min_v = min_(filter(lambda x: x is not None, res), key=key)
 187
 188             if key(item.item) == key(min_v):
 189                 res[n] = item.item
 190
 191             elif lt_(key(item.item), key(min_v)):
 192                 res = new_res()
 193                 res[n] = item.item
 194
 195         for n, x in enumerate(res):
 196             if x is not None:
 197                 items[n] = _take(lists[n])
 198
 199         yield tuple(res)
 200
 201
 202 def progress(val, max_val, status_str='', max_width=50, stream=sys.stdout):
 203
 204     # progress as percentage
 205     percentage_str = '{val:.2%}'.format(val=float(val)/max_val)
 206
 207     # progress bar filled with #s
 208     progress_str = '#'*int(float(val)/max_val*max_width) + \
 209                    ' ' * (max_width-(int(float(val)/max_val*max_width)))
 210
 211     #insert percentage into bar
 212     percentage_start = int((max_width-len(percentage_str))/2)
 213     progress_str = progress_str[:percentage_start] + \
 214                    percentage_str + \
 215                    progress_str[percentage_start+len(percentage_str):]
 216
 217     print >> stream, '\r',
 218     print >> stream, '[ %s ] %s / %s | %s' % (
 219         progress_str,
 220         val,
 221         max_val,
 222         status_str),
 223     stream.flush()
 224
 225
 226 def set_cmp(list, simplify):
 227     """
 228     Builds a set out of a list but uses the results of simplify to determine equality between items
 229     """
 230     simpl = lambda x: (simplify(x), x)
 231     lst = dict(map(simpl, list))
 232     return lst.values()
 233
 234
 235 def first(it):
 236     """
 237     returns the first not-None object or None if the iterator is exhausted
 238     """
 239     for x in it:
 240         if x != None:
 241             return x
 242     return None
 243
 244
 245 def intersect(a, b):
 246      return list(set(a) & set(b))
 247
 248
 249
 250 def remove_control_chars(s):
 251     import unicodedata, re
 252
 253     all_chars = (unichr(i) for i in xrange(0x110000))
 254     control_chars = ''.join(map(unichr, range(0,32) + range(127,160)))
 255     control_char_re = re.compile('[%s]' % re.escape(control_chars))
 256
 257     return control_char_re.sub('', s)
 258
 259
 260 def unzip(a):
 261     return tuple(map(list,zip(*a)))
 262
 263
 264 def parse_range(s, min, max, default=None):
 265     """
 266     Parses the string and returns its value. If the value is outside the given
 267     range, its closest number within the range is returned
 268
 269     >>> parse_range('5', 0, 10)
 270     5
 271
 272     >>> parse_range('0', 5, 10)
 273     5
 274
 275     >>> parse_range('15',0, 10)
 276     10
 277
 278     >>> parse_range('x', 0, 20)
 279     10
 280
 281     >>> parse_range('x', 0, 20, 20)
 282     20
 283     """
 284     try:
 285         val = int(s)
 286         if val < min:
 287             return min
 288         if val > max:
 289             return max
 290         return val
 291
 292     except (ValueError, TypeError):
 293         return default if default is not None else (max-min)/2
 294
 295
 296
 297 def flatten(l):
 298     return [item for sublist in l for item in sublist]
 299
 300
 301 def linearize(key, iterators, reverse=False):
 302     """
 303     Linearizes a number of iterators, sorted by some comparison function
 304     """
 305
 306     iters = [iter(i) for i in iterators]
 307     vals = []
 308     for i in iters:
 309         try:
 310             v = i.next()
 311             vals. append( (v, i) )
 312         except StopIteration:
 313             continue
 314
 315     while vals:
 316         vals = sorted(vals, key=lambda x: key(x[0]), reverse=reverse)
 317         val, it = vals.pop(0)
 318         yield val
 319         try:
 320             next_val = it.next()
 321             vals.append( (next_val, it) )
 322         except StopIteration:
 323             pass
 324
 325
 326 def skip_pairs(iterator, cmp=cmp):
 327     """ Skips pairs of equal items
 328
 329     >>> list(skip_pairs([]))
 330     []
 331
 332     >>> list(skip_pairs([1]))
 333     [1]
 334
 335     >>> list(skip_pairs([1, 2, 3]))
 336     [1, 2, 3]
 337
 338     >>> list(skip_pairs([1, 1]))
 339     []
 340
 341     >>> list(skip_pairs([1, 2, 2]))
 342     [1]
 343
 344     >>> list(skip_pairs([1, 2, 2, 3]))
 345     [1, 3]
 346
 347     >>> list(skip_pairs([1, 2, 2, 2]))
 348     [1, 2]
 349
 350     >>> list(skip_pairs([1, 2, 2, 2, 2, 3]))
 351     [1, 3]
 352     """
 353
 354     iterator = iter(iterator)
 355     next = iterator.next()
 356
 357     while True:
 358         item = next
 359         try:
 360             next = iterator.next()
 361         except StopIteration as e:
 362             yield item
 363             raise e
 364
 365         if cmp(item, next) == 0:
 366             next = iterator.next()
 367         else:
 368             yield item
 369
 370
 371 def get_timestamp(datetime_obj):
 372     """ Returns the timestamp as an int for the given datetime object
 373
 374     >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
 375     1302168606
 376
 377     >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
 378     0
 379     """
 380     return int(time.mktime(datetime_obj.timetuple()))
 381
 382
 383
 384 re_url = re.compile('^https?://')
 385
 386 def is_url(string):
 387     """ Returns true if a string looks like an URL
 388
 389     >>> is_url('http://example.com/some-path/file.xml')
 390     True
 391
 392     >>> is_url('something else')
 393     False
 394     """
 395
 396     return bool(re_url.match(string))
 397
 398
 399
 400 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
 401 # this does not increase asymptotical complexity
 402 # but can still waste more time than it saves.
 403 def shortest_of(strings):
 404     return min(strings, key=len)
 405
 406 def longest_substr(strings):
 407     """
 408     Returns the longest common substring of the given strings
 409     """
 410
 411     substr = ""
 412     if not strings:
 413         return substr
 414     reference = shortest_of(strings) #strings[0]
 415     length = len(reference)
 416     #find a suitable slice i:j
 417     for i in xrange(length):
 418         #only consider strings long at least len(substr) + 1
 419         for j in xrange(i + len(substr) + 1, length):
 420             candidate = reference[i:j]
 421             if all(candidate in text for text in strings):
 422                 substr = candidate
 423     return substr
 424
 425
 426
 427 def additional_value(it, gen_val, val_changed=lambda _: True):
 428     """ Provides an additional value to the elements, calculated when needed
 429
 430     For the elements from the iterator, some additional value can be computed
 431     by gen_val (which might be an expensive computation).
 432
 433     If the elements in the iterator are ordered so that some subsequent
 434     elements would generate the same additional value, val_changed can be
 435     provided, which receives the next element from the iterator and the
 436     previous additional value. If the element would generate the same
 437     additional value (val_changed returns False), its computation is skipped.
 438
 439     >>> # get the next full hundred higher than x
 440     >>> # this will probably be an expensive calculation
 441     >>> next_hundred = lambda x: x + 100-(x % 100)
 442
 443     >>> # returns True if h is not the value that next_hundred(x) would provide
 444     >>> # this should be a relatively cheap calculation, compared to the above
 445     >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
 446
 447     >>> xs = [0, 50, 100, 101, 199, 200, 201]
 448     >>> list(additional_value(xs, next_hundred, diff_hundred))
 449     [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
 450     """
 451
 452     _none = object()
 453     current = _none
 454
 455     for x in it:
 456         if current is _none or val_changed(x, current):
 457             current = gen_val(x)
 458
 459         yield (x, current)
 460
 461
 462 def file_hash(f, h=hashlib.md5, block_size=2**20):
 463     """ returns the hash of the contents of a file """
 464     f_hash = h()
 465     for chunk in iter(lambda: f.read(block_size), ''):
 466          f_hash.update(chunk)
 467     return f_hash
 468
 469
 470
 471 def split_list(l, prop):
 472     """ split elements that satisfy a property, and those that don't """
 473     match   = filter(prop, l)
 474     nomatch = [x for x in l if x not in match]
 475     return match, nomatch