mygpo/utils.py

   1 #
   2 # This file is part of my.gpodder.org.
   3 #
   4 # my.gpodder.org is free software: you can redistribute it and/or modify it
   5 # under the terms of the GNU Affero General Public License as published by
   6 # the Free Software Foundation, either version 3 of the License, or (at your
   7 # option) any later version.
   8 #
   9 # my.gpodder.org is distributed in the hope that it will be useful, but
  10 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  11 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
  12 # License for more details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
  16 #
  17
  18 import sys
  19 import re
  20 import collections
  21 from datetime import datetime, timedelta, date
  22 import time
  23
  24 from django.core.cache import cache
  25
  26
  27 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
  28     """
  29     >>> from_d = datetime(2010, 01, 01)
  30     >>> to_d = datetime(2010, 01, 05)
  31     >>> list(daterange(from_d, to_d))
  32     [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
  33     """
  34
  35     if to_date is None:
  36         if isinstance(from_date, datetime):
  37             to_date = datetime.now()
  38         else:
  39             to_date = date.today()
  40
  41     while from_date <= to_date:
  42         yield from_date
  43         from_date = from_date + leap
  44     return
  45
  46 def format_time(value):
  47     """Format an offset (in seconds) to a string
  48
  49     The offset should be an integer or float value.
  50
  51     >>> format_time(0)
  52     '00:00'
  53     >>> format_time(20)
  54     '00:20'
  55     >>> format_time(3600)
  56     '01:00:00'
  57     >>> format_time(10921)
  58     '03:02:01'
  59     """
  60     try:
  61         dt = datetime.utcfromtimestamp(value)
  62     except ValueError:
  63         return ''
  64
  65     if dt.hour == 0:
  66         return dt.strftime('%M:%S')
  67     else:
  68         return dt.strftime('%H:%M:%S')
  69
  70 def parse_time(value):
  71     """
  72     >>> parse_time(10)
  73     10
  74
  75     >>> parse_time('05:10') #5*60+10
  76     310
  77
  78     >>> parse_time('1:05:10') #60*60+5*60+10
  79     3910
  80     """
  81     if value is None:
  82         raise ValueError('None value in parse_time')
  83
  84     if isinstance(value, int):
  85         # Don't need to parse already-converted time value
  86         return value
  87
  88     if value == '':
  89         raise ValueError('Empty valueing in parse_time')
  90
  91     for format in ('%H:%M:%S', '%M:%S'):
  92         try:
  93             t = time.strptime(value, format)
  94             return t.tm_hour * 60*60 + t.tm_min * 60 + t.tm_sec
  95         except ValueError, e:
  96             continue
  97
  98     return int(value)
  99
 100
 101 def parse_bool(val):
 102     """
 103     >>> parse_bool('True')
 104     True
 105
 106     >>> parse_bool('true')
 107     True
 108
 109     >>> parse_bool('')
 110     False
 111     """
 112     if isinstance(val, bool):
 113         return val
 114     if val.lower() == 'true':
 115         return True
 116     return False
 117
 118
 119 def iterate_together(l1, l2, compare=lambda x, y: cmp(x, y)):
 120     """
 121     takes two ordered, possible sparse, lists l1 and l2 with similar items
 122     (some items have a corresponding item in the other list, some don't).
 123
 124     It then yield tuples of corresponding items, where one element is None is
 125     there is no corresponding entry in one of the lists.
 126
 127     Tuples where both elements are None are skipped.
 128
 129     compare is a method for comparing items from both lists; it defaults
 130     to cmp.
 131
 132     >>> list(iterate_together(range(1, 3), range(1, 4, 2)))
 133     [(1, 1), (2, None), (None, 3)]
 134
 135     >>> list(iterate_together([], []))
 136     []
 137
 138     >>> list(iterate_together(range(1, 3), range(3, 5)))
 139     [(1, None), (2, None), (None, 3), (None, 4)]
 140
 141     >>> list(iterate_together(range(1, 3), []))
 142     [(1, None), (2, None)]
 143
 144     >>> list(iterate_together([1, None, 3], [None, None, 3]))
 145     [(1, None), (3, 3)]
 146     """
 147
 148     l1 = iter(l1)
 149     l2 = iter(l2)
 150
 151     def _take(it):
 152         try:
 153             i = it.next()
 154             while i is None:
 155                 i = it.next()
 156             return i, True
 157         except StopIteration:
 158             return None, False
 159
 160     i1, more1 = _take(l1)
 161     i2, more2 = _take(l2)
 162
 163     while more1 or more2:
 164         if not more2 or (i1 is not None and compare(i1, i2) < 0):
 165             yield(i1, None)
 166             i1, more1 = _take(l1)
 167
 168         elif not more1 or (i2 is not None and compare(i1, i2) > 0):
 169             yield(None, i2)
 170             i2, more2 = _take(l2)
 171
 172         elif compare(i1, i2) == 0:
 173             yield(i1, i2)
 174             i1, more1 = _take(l1)
 175             i2, more2 = _take(l2)
 176
 177
 178 def progress(val, max_val, status_str='', max_width=50, stream=sys.stdout):
 179     print >> stream, '\r',
 180     print >> stream, '[ %s ] %s / %s | %s' % (
 181         '#'*int(float(val)/max_val*max_width) +
 182         ' ' * (max_width-(int(float(val)/max_val*max_width))),
 183         val,
 184         max_val,
 185         status_str),
 186     stream.flush()
 187
 188
 189 def set_cmp(list, simplify):
 190     """
 191     Builds a set out of a list but uses the results of simplify to determine equality between items
 192     """
 193     simpl = lambda x: (simplify(x), x)
 194     lst = dict(map(simpl, list))
 195     return lst.values()
 196
 197
 198 def first(it):
 199     """
 200     returns the first not-None object or None if the iterator is exhausted
 201     """
 202     for x in it:
 203         if x != None:
 204             return x
 205     return None
 206
 207
 208 def intersect(a, b):
 209      return list(set(a) & set(b))
 210
 211
 212 def multi_request_view(cls, view, wrap=True, *args, **kwargs):
 213     """
 214     splits up a view request into several requests, which reduces
 215     the server load of the number of returned objects is large.
 216
 217     NOTE: As such a split request is obviously not atomical anymore, results
 218     might skip some elements of contain some twice
 219     """
 220
 221     per_page = kwargs.get('limit', 1000)
 222     kwargs['limit'] = per_page + 1
 223     db = cls.get_db()
 224     cont = True
 225
 226     while cont:
 227
 228         resp = db.view(view, *args, **kwargs)
 229         cont = False
 230
 231         for n, obj in enumerate(resp.iterator()):
 232
 233             key = obj['key']
 234
 235             if wrap:
 236                 doc = cls.wrap(obj['doc'])
 237                 docid = doc._id
 238             else:
 239                 docid = obj['id']
 240                 doc = obj
 241
 242             if n == per_page:
 243                 kwargs['startkey'] = key
 244                 kwargs['startkey_docid'] = docid
 245                 if 'skip' in kwargs:
 246                     del kwargs['skip']
 247
 248                 # we reached the end of the page, load next one
 249                 cont = True
 250                 break
 251
 252             yield doc
 253
 254
 255 def remove_control_chars(s):
 256     import unicodedata, re
 257
 258     all_chars = (unichr(i) for i in xrange(0x110000))
 259     control_chars = ''.join(map(unichr, range(0,32) + range(127,160)))
 260     control_char_re = re.compile('[%s]' % re.escape(control_chars))
 261
 262     return control_char_re.sub('', s)
 263
 264
 265 def unzip(a):
 266     return tuple(map(list,zip(*a)))
 267
 268
 269 def parse_range(s, min, max, default=None):
 270     """
 271     Parses the string and returns its value. If the value is outside the given
 272     range, its closest number within the range is returned
 273
 274     >>> parse_range('5', 0, 10)
 275     5
 276
 277     >>> parse_range('0', 5, 10)
 278     5
 279
 280     >>> parse_range('15',0, 10)
 281     10
 282
 283     >>> parse_range('x', 0, 20)
 284     10
 285
 286     >>> parse_range('x', 0, 20, 20)
 287     20
 288     """
 289     try:
 290         val = int(s)
 291         if val < min:
 292             return min
 293         if val > max:
 294             return max
 295         return val
 296
 297     except (ValueError, TypeError):
 298         return default if default is not None else (max-min)/2
 299
 300
 301 def get_to_dict(cls, ids, get_id=lambda x: x._id, use_cache=False):
 302
 303     ids = list(set(ids))
 304     objs = dict()
 305
 306     cache_objs = []
 307     if use_cache:
 308         for id in ids:
 309             obj = cache.get(id)
 310             if obj is not None:
 311                 cache_objs.append(obj)
 312                 ids.remove(id)
 313
 314     db_objs = list(cls.get_multi(ids))
 315
 316     if use_cache:
 317         for obj in db_objs:
 318             cache.set(get_id(obj), obj)
 319
 320     return dict((get_id(obj), obj) for obj in cache_objs + db_objs)
 321
 322
 323 def flatten(l):
 324     return [item for sublist in l for item in sublist]
 325
 326
 327 def linearize(key, iterators, reverse=False):
 328     """
 329     Linearizes a number of iterators, sorted by some comparison function
 330     """
 331
 332     iters = [iter(i) for i in iterators]
 333     vals = []
 334     for i in iters:
 335         try:
 336             v = i.next()
 337             vals. append( (v, i) )
 338         except StopIteration:
 339             continue
 340
 341     while vals:
 342         vals = sorted(vals, key=lambda x: key(x[0]), reverse=reverse)
 343         val, it = vals.pop(0)
 344         yield val
 345         try:
 346             next_val = it.next()
 347             vals.append( (next_val, it) )
 348         except StopIteration:
 349             pass
 350
 351
 352 def skip_pairs(iterator, cmp=cmp):
 353     """ Skips pairs of equal items
 354
 355     >>> list(skip_pairs([]))
 356     []
 357
 358     >>> list(skip_pairs([1]))
 359     [1]
 360
 361     >>> list(skip_pairs([1, 2, 3]))
 362     [1, 2, 3]
 363
 364     >>> list(skip_pairs([1, 1]))
 365     []
 366
 367     >>> list(skip_pairs([1, 2, 2]))
 368     [1]
 369
 370     >>> list(skip_pairs([1, 2, 2, 3]))
 371     [1, 3]
 372
 373     >>> list(skip_pairs([1, 2, 2, 2]))
 374     [1, 2]
 375
 376     >>> list(skip_pairs([1, 2, 2, 2, 2, 3]))
 377     [1, 3]
 378     """
 379
 380     iterator = iter(iterator)
 381     next = iterator.next()
 382
 383     while True:
 384         item = next
 385         try:
 386             next = iterator.next()
 387         except StopIteration as e:
 388             yield item
 389             raise e
 390
 391         if cmp(item, next) == 0:
 392             next = iterator.next()
 393         else:
 394             yield item
 395
 396
 397 def get_timestamp(datetime_obj):
 398     """ Returns the timestamp as an int for the given datetime object
 399
 400     >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
 401     1302168606
 402
 403     >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
 404     0
 405     """
 406     return int(time.mktime(datetime_obj.timetuple()))
 407
 408
 409
 410 re_url = re.compile('^https?://')
 411
 412 def is_url(string):
 413     """ Returns true if a string looks like an URL
 414
 415     >>> is_url('http://example.com/some-path/file.xml')
 416     True
 417
 418     >>> is_url('something else')
 419     False
 420     """
 421
 422     return bool(re_url.match(string))