mygpo/utils.py

   1 #
   2 # This file is part of my.gpodder.org.
   3 #
   4 # my.gpodder.org is free software: you can redistribute it and/or modify it
   5 # under the terms of the GNU Affero General Public License as published by
   6 # the Free Software Foundation, either version 3 of the License, or (at your
   7 # option) any later version.
   8 #
   9 # my.gpodder.org is distributed in the hope that it will be useful, but
  10 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  11 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
  12 # License for more details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
  16 #
  17
  18 import operator
  19 import sys
  20 import re
  21 import collections
  22 from datetime import datetime, timedelta, date
  23 import time
  24
  25 from django.core.cache import cache
  26
  27
  28 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
  29     """
  30     >>> from_d = datetime(2010, 01, 01)
  31     >>> to_d = datetime(2010, 01, 05)
  32     >>> list(daterange(from_d, to_d))
  33     [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
  34     """
  35
  36     if to_date is None:
  37         if isinstance(from_date, datetime):
  38             to_date = datetime.now()
  39         else:
  40             to_date = date.today()
  41
  42     while from_date <= to_date:
  43         yield from_date
  44         from_date = from_date + leap
  45     return
  46
  47 def format_time(value):
  48     """Format an offset (in seconds) to a string
  49
  50     The offset should be an integer or float value.
  51
  52     >>> format_time(0)
  53     '00:00'
  54     >>> format_time(20)
  55     '00:20'
  56     >>> format_time(3600)
  57     '01:00:00'
  58     >>> format_time(10921)
  59     '03:02:01'
  60     """
  61     try:
  62         dt = datetime.utcfromtimestamp(value)
  63     except ValueError:
  64         return ''
  65
  66     if dt.hour == 0:
  67         return dt.strftime('%M:%S')
  68     else:
  69         return dt.strftime('%H:%M:%S')
  70
  71 def parse_time(value):
  72     """
  73     >>> parse_time(10)
  74     10
  75
  76     >>> parse_time('05:10') #5*60+10
  77     310
  78
  79     >>> parse_time('1:05:10') #60*60+5*60+10
  80     3910
  81     """
  82     if value is None:
  83         raise ValueError('None value in parse_time')
  84
  85     if isinstance(value, int):
  86         # Don't need to parse already-converted time value
  87         return value
  88
  89     if value == '':
  90         raise ValueError('Empty valueing in parse_time')
  91
  92     for format in ('%H:%M:%S', '%M:%S'):
  93         try:
  94             t = time.strptime(value, format)
  95             return t.tm_hour * 60*60 + t.tm_min * 60 + t.tm_sec
  96         except ValueError, e:
  97             continue
  98
  99     return int(value)
 100
 101
 102 def parse_bool(val):
 103     """
 104     >>> parse_bool('True')
 105     True
 106
 107     >>> parse_bool('true')
 108     True
 109
 110     >>> parse_bool('')
 111     False
 112     """
 113     if isinstance(val, bool):
 114         return val
 115     if val.lower() == 'true':
 116         return True
 117     return False
 118
 119
 120 def iterate_together(lists, key=lambda x: x, reverse=False):
 121     """
 122     takes ordered, possibly sparse, lists with similar items
 123     (some items have a corresponding item in the other lists, some don't).
 124
 125     It then yield tuples of corresponding items, where one element is None is
 126     there is no corresponding entry in one of the lists.
 127
 128     Tuples where both elements are None are skipped.
 129
 130     The results of the key method are used for the comparisons.
 131
 132     If reverse is True, the lists are expected to be sorted in reverse order
 133     and the results will also be sorted reverse
 134
 135     >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
 136     [(1, 1), (2, None), (None, 3)]
 137
 138     >>> list(iterate_together([[], []]))
 139     []
 140
 141     >>> list(iterate_together([range(1, 3), range(3, 5)]))
 142     [(1, None), (2, None), (None, 3), (None, 4)]
 143
 144     >>> list(iterate_together([range(1, 3), []]))
 145     [(1, None), (2, None)]
 146
 147     >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
 148     [(1, None), (3, 3)]
 149     """
 150
 151     Next = collections.namedtuple('Next', 'item more')
 152     min_ = min if not reverse else max
 153     lt_  = operator.lt if not reverse else operator.gt
 154
 155     lists = [iter(l) for l in lists]
 156
 157     def _take(it):
 158         try:
 159             i = it.next()
 160             while i is None:
 161                 i = it.next()
 162             return Next(i, True)
 163         except StopIteration:
 164             return Next(None, False)
 165
 166     def new_res():
 167         return [None]*len(lists)
 168
 169     # take first bunch of items
 170     items = [_take(l) for l in lists]
 171
 172     while any(i.item is not None or i.more for i in items):
 173
 174         res = new_res()
 175
 176         for n, item in enumerate(items):
 177
 178             if item.item is None:
 179                 continue
 180
 181             if all(x is None for x in res):
 182                 res[n] = item.item
 183                 continue
 184
 185             min_v = min_(filter(lambda x: x is not None, res), key=key)
 186
 187             if key(item.item) == key(min_v):
 188                 res[n] = item.item
 189
 190             elif lt_(key(item.item), key(min_v)):
 191                 res = new_res()
 192                 res[n] = item.item
 193
 194         for n, x in enumerate(res):
 195             if x is not None:
 196                 items[n] = _take(lists[n])
 197
 198         yield tuple(res)
 199
 200
 201 def progress(val, max_val, status_str='', max_width=50, stream=sys.stdout):
 202     print >> stream, '\r',
 203     print >> stream, '[ %s ] %s / %s | %s' % (
 204         '#'*int(float(val)/max_val*max_width) +
 205         ' ' * (max_width-(int(float(val)/max_val*max_width))),
 206         val,
 207         max_val,
 208         status_str),
 209     stream.flush()
 210
 211
 212 def set_cmp(list, simplify):
 213     """
 214     Builds a set out of a list but uses the results of simplify to determine equality between items
 215     """
 216     simpl = lambda x: (simplify(x), x)
 217     lst = dict(map(simpl, list))
 218     return lst.values()
 219
 220
 221 def first(it):
 222     """
 223     returns the first not-None object or None if the iterator is exhausted
 224     """
 225     for x in it:
 226         if x != None:
 227             return x
 228     return None
 229
 230
 231 def intersect(a, b):
 232      return list(set(a) & set(b))
 233
 234
 235
 236 def multi_request_view(cls, view, wrap=True, auto_advance=True,
 237         *args, **kwargs):
 238     """
 239     splits up a view request into several requests, which reduces
 240     the server load of the number of returned objects is large.
 241
 242     NOTE: As such a split request is obviously not atomical anymore, results
 243     might skip some elements of contain some twice
 244
 245     If auto_advance is False the method will always request the same range.
 246     This can be useful when the view contain unprocessed items and the caller
 247     processes the items, thus removing them from the view before the next
 248     request.
 249     """
 250
 251     per_page = kwargs.get('limit', 1000)
 252     kwargs['limit'] = per_page + 1
 253     db = cls.get_db()
 254     wrapper = kwargs.pop('wrapper', cls.wrap)
 255     cont = True
 256
 257     while cont:
 258
 259         resp = db.view(view, *args, **kwargs)
 260         cont = False
 261
 262         for n, obj in enumerate(resp.iterator()):
 263
 264             key = obj['key']
 265
 266             if wrap:
 267                 doc = wrapper(obj['doc'])
 268                 docid = doc._id
 269             else:
 270                 docid = obj['id']
 271                 doc = obj
 272
 273             if n == per_page:
 274                 if auto_advance:
 275                     kwargs['startkey'] = key
 276                     kwargs['startkey_docid'] = docid
 277                     if 'skip' in kwargs:
 278                         del kwargs['skip']
 279
 280                 # we reached the end of the page, load next one
 281                 cont = True
 282                 break
 283
 284             yield doc
 285
 286
 287 def remove_control_chars(s):
 288     import unicodedata, re
 289
 290     all_chars = (unichr(i) for i in xrange(0x110000))
 291     control_chars = ''.join(map(unichr, range(0,32) + range(127,160)))
 292     control_char_re = re.compile('[%s]' % re.escape(control_chars))
 293
 294     return control_char_re.sub('', s)
 295
 296
 297 def unzip(a):
 298     return tuple(map(list,zip(*a)))
 299
 300
 301 def parse_range(s, min, max, default=None):
 302     """
 303     Parses the string and returns its value. If the value is outside the given
 304     range, its closest number within the range is returned
 305
 306     >>> parse_range('5', 0, 10)
 307     5
 308
 309     >>> parse_range('0', 5, 10)
 310     5
 311
 312     >>> parse_range('15',0, 10)
 313     10
 314
 315     >>> parse_range('x', 0, 20)
 316     10
 317
 318     >>> parse_range('x', 0, 20, 20)
 319     20
 320     """
 321     try:
 322         val = int(s)
 323         if val < min:
 324             return min
 325         if val > max:
 326             return max
 327         return val
 328
 329     except (ValueError, TypeError):
 330         return default if default is not None else (max-min)/2
 331
 332
 333 def get_to_dict(cls, ids, get_id=lambda x: x._id, use_cache=False):
 334
 335     ids = list(set(ids))
 336     objs = dict()
 337
 338     cache_objs = []
 339     if use_cache:
 340         for id in ids:
 341             obj = cache.get(id)
 342             if obj is not None:
 343                 cache_objs.append(obj)
 344                 ids.remove(id)
 345
 346     db_objs = list(cls.get_multi(ids))
 347
 348     for obj in (cache_objs + db_objs):
 349
 350         # get_multi returns dict {'key': _id, 'error': 'not found'}
 351         # for non-existing objects
 352         if isinstance(obj, dict) and 'error' in obj:
 353             _id = obj['key']
 354             objs[_id] = None
 355             continue
 356
 357         ids = obj.get_ids() if hasattr(obj, 'get_ids') else [get_id(obj)]
 358         for i in ids:
 359             objs[i] = obj
 360
 361     if use_cache:
 362         for obj in db_objs:
 363             cache.set(get_id(obj), obj)
 364
 365     return objs
 366
 367
 368 def flatten(l):
 369     return [item for sublist in l for item in sublist]
 370
 371
 372 def linearize(key, iterators, reverse=False):
 373     """
 374     Linearizes a number of iterators, sorted by some comparison function
 375     """
 376
 377     iters = [iter(i) for i in iterators]
 378     vals = []
 379     for i in iters:
 380         try:
 381             v = i.next()
 382             vals. append( (v, i) )
 383         except StopIteration:
 384             continue
 385
 386     while vals:
 387         vals = sorted(vals, key=lambda x: key(x[0]), reverse=reverse)
 388         val, it = vals.pop(0)
 389         yield val
 390         try:
 391             next_val = it.next()
 392             vals.append( (next_val, it) )
 393         except StopIteration:
 394             pass
 395
 396
 397 def skip_pairs(iterator, cmp=cmp):
 398     """ Skips pairs of equal items
 399
 400     >>> list(skip_pairs([]))
 401     []
 402
 403     >>> list(skip_pairs([1]))
 404     [1]
 405
 406     >>> list(skip_pairs([1, 2, 3]))
 407     [1, 2, 3]
 408
 409     >>> list(skip_pairs([1, 1]))
 410     []
 411
 412     >>> list(skip_pairs([1, 2, 2]))
 413     [1]
 414
 415     >>> list(skip_pairs([1, 2, 2, 3]))
 416     [1, 3]
 417
 418     >>> list(skip_pairs([1, 2, 2, 2]))
 419     [1, 2]
 420
 421     >>> list(skip_pairs([1, 2, 2, 2, 2, 3]))
 422     [1, 3]
 423     """
 424
 425     iterator = iter(iterator)
 426     next = iterator.next()
 427
 428     while True:
 429         item = next
 430         try:
 431             next = iterator.next()
 432         except StopIteration as e:
 433             yield item
 434             raise e
 435
 436         if cmp(item, next) == 0:
 437             next = iterator.next()
 438         else:
 439             yield item
 440
 441
 442 def get_timestamp(datetime_obj):
 443     """ Returns the timestamp as an int for the given datetime object
 444
 445     >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
 446     1302168606
 447
 448     >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
 449     0
 450     """
 451     return int(time.mktime(datetime_obj.timetuple()))
 452
 453
 454
 455 re_url = re.compile('^https?://')
 456
 457 def is_url(string):
 458     """ Returns true if a string looks like an URL
 459
 460     >>> is_url('http://example.com/some-path/file.xml')
 461     True
 462
 463     >>> is_url('something else')
 464     False
 465     """
 466
 467     return bool(re_url.match(string))
 468
 469
 470 def is_couchdb_id(id_str):
 471     import string
 472     import operator
 473     import functools
 474     f = functools.partial(operator.contains, string.hexdigits)
 475     return len(id_str) == 32 and all(map(f, id_str))
 476
 477
 478 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
 479 # this does not increase asymptotical complexity
 480 # but can still waste more time than it saves.
 481 def shortest_of(strings):
 482     return min(strings, key=len)
 483
 484 def longest_substr(strings):
 485     """
 486     Returns the longest common substring of the given strings
 487     """
 488
 489     substr = ""
 490     if not strings:
 491         return substr
 492     reference = shortest_of(strings) #strings[0]
 493     length = len(reference)
 494     #find a suitable slice i:j
 495     for i in xrange(length):
 496         #only consider strings long at least len(substr) + 1
 497         for j in xrange(i + len(substr) + 1, length):
 498             candidate = reference[i:j]
 499             if all(candidate in text for text in strings):
 500                 substr = candidate
 501     return substr
 502
 503
 504
 505 def additional_value(it, gen_val, val_changed=lambda _: True):
 506     """ Provides an additional value to the elements, calculated when needed
 507
 508     For the elements from the iterator, some additional value can be computed
 509     by gen_val (which might be an expensive computation).
 510
 511     If the elements in the iterator are ordered so that some subsequent
 512     elements would generate the same additional value, val_changed can be
 513     provided, which receives the next element from the iterator and the
 514     previous additional value. If the element would generate the same
 515     additional value (val_changed returns False), its computation is skipped.
 516
 517     >>> # get the next full hundred higher than x
 518     >>> # this will probably be an expensive calculation
 519     >>> next_hundred = lambda x: x + 100-(x % 100)
 520
 521     >>> # returns True if h is not the value that next_hundred(x) would provide
 522     >>> # this should be a relatively cheap calculation, compared to the above
 523     >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
 524
 525     >>> xs = [0, 50, 100, 101, 199, 200, 201]
 526     >>> list(additional_value(xs, next_hundred, diff_hundred))
 527     [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
 528     """
 529
 530     _none = object()
 531     current = _none
 532
 533     for x in it:
 534         if current is _none or val_changed(x, current):
 535             current = gen_val(x)
 536
 537         yield (x, current)