mygpo/db/couchdb/podcast.py

   1 from hashlib import sha1
   2 from random import random
   3
   4 from restkit import RequestFailed
   5
   6 from django.core.cache import cache
   7
   8 from mygpo.core.models import Podcast, PodcastGroup, PodcastSubscriberData
   9 from mygpo.decorators import repeat_on_conflict
  10 from mygpo.cache import cache_result
  11 from mygpo.couch import get_main_database
  12 from mygpo.db import QueryParameterMissing
  13 from mygpo.db.couchdb.utils import multi_request_view, is_couchdb_id
  14
  15
  16 def podcast_slugs(base_slug):
  17     res = Podcast.view('podcasts/by_slug',
  18             startkey = [base_slug, None],
  19             endkey   = [base_slug + 'ZZZZZ', None],
  20             wrap_doc = False,
  21         )
  22     return [r['key'][0] for r in res]
  23
  24
  25 @cache_result(timeout=60*60)
  26 def podcast_count():
  27     return Podcast.view('podcasts/by_id',
  28             limit = 0,
  29             stale = 'update_after',
  30         ).total_rows
  31
  32
  33 @cache_result(timeout=60*60)
  34 def podcasts_for_tag(tag):
  35     """ Returns the podcasts with the current tag.
  36
  37     Some podcasts might be returned twice """
  38
  39     if not tag:
  40         raise QueryParameterMissing('tag')
  41
  42     res = multi_request_view(Podcast, 'podcasts/by_tag',
  43             wrap        = False,
  44             startkey    = [tag, None],
  45             endkey      = [tag, {}],
  46             reduce      = True,
  47             group       = True,
  48             group_level = 2
  49         )
  50
  51     for r in res:
  52         yield (r['key'][1], r['value'])
  53
  54     res = multi_request_view(Podcast, 'usertags/podcasts',
  55             wrap        = False,
  56             startkey    = [tag, None],
  57             endkey      = [tag, {}],
  58             reduce      = True,
  59             group       = True,
  60             group_level = 2
  61         )
  62
  63     for r in res:
  64         yield (r['key'][1], r['value'])
  65
  66
  67 @cache_result(timeout=60*60)
  68 def get_podcast_languages():
  69     """ Returns all 2-letter language codes that are used by podcasts.
  70
  71     It filters obviously invalid strings, but does not check if any
  72     of these codes is contained in ISO 639. """
  73
  74     from mygpo.web.utils import sanitize_language_codes
  75
  76     res = Podcast.view('podcasts/by_language',
  77             group_level = 1,
  78             stale       = 'ok',
  79         )
  80
  81     langs = [r['key'][0] for r in res]
  82     sane_lang = sanitize_language_codes(langs)
  83     sane_lang.sort()
  84     return sane_lang
  85
  86
  87 @cache_result(timeout=60*60)
  88 def podcast_by_id(podcast_id, current_id=False):
  89
  90     if not podcast_id:
  91         raise QueryParameterMissing('podcast_id')
  92
  93     r = Podcast.view('podcasts/by_id',
  94             key          = podcast_id,
  95             classes      = [Podcast, PodcastGroup],
  96             include_docs = True,
  97         )
  98
  99     if not r:
 100         return None
 101
 102     podcast_group = r.first()
 103     return podcast_group.get_podcast_by_id(podcast_id, current_id)
 104
 105
 106
 107 @cache_result(timeout=60*60)
 108 def podcastgroup_by_id(group_id):
 109
 110     if not group_id:
 111         raise QueryParameterMissing('group_id')
 112
 113     return PodcastGroup.get(group_id)
 114
 115
 116
 117 @cache_result(timeout=60*60)
 118 def podcast_for_slug(slug):
 119
 120     if not slug:
 121         raise QueryParameterMissing('slug')
 122
 123     r = Podcast.view('podcasts/by_slug',
 124             startkey     = [slug, None],
 125             endkey       = [slug, {}],
 126             include_docs = True,
 127             wrap_doc     = False,
 128         )
 129
 130     if not r:
 131         return None
 132
 133     res = r.first()
 134     doc = res['doc']
 135     if doc['doc_type'] == 'Podcast':
 136         return Podcast.wrap(doc)
 137     else:
 138         pid = res['key'][1]
 139         pg = PodcastGroup.wrap(doc)
 140         return pg.get_podcast_by_id(pid)
 141
 142
 143 @cache_result(timeout=60*60)
 144 def podcast_for_slug_id(slug_id):
 145     """ Returns the Podcast for either an CouchDB-ID for a Slug """
 146
 147     if is_couchdb_id(slug_id):
 148         return podcast_by_id(slug_id)
 149     else:
 150         return podcast_for_slug(slug_id)
 151
 152
 153 @cache_result(timeout=60*60)
 154 def podcastgroup_for_slug_id(slug_id):
 155     """ Returns the Podcast for either an CouchDB-ID for a Slug """
 156
 157     if not slug_id:
 158         raise QueryParameterMissing('slug_id')
 159
 160     if is_couchdb_id(slug_id):
 161         return PodcastGroup.get(slug_id)
 162
 163     else:
 164         #TODO: implement
 165         return PodcastGroup.for_slug(slug_id)
 166
 167
 168
 169 def podcasts_by_id(ids):
 170
 171     if ids is None:
 172         raise QueryParameterMissing('ids')
 173
 174     if not ids:
 175         return []
 176
 177     r = Podcast.view('podcasts/by_id',
 178             keys         = ids,
 179             include_docs = True,
 180             wrap_doc     = False
 181         )
 182
 183     return map(_wrap_podcast_group, r)
 184
 185
 186
 187 @cache_result(timeout=60*60)
 188 def podcast_for_oldid(oldid):
 189
 190     if not oldid:
 191         raise QueryParameterMissing('oldid')
 192
 193     r = Podcast.view('podcasts/by_oldid',
 194             key          = long(oldid),
 195             classes      = [Podcast, PodcastGroup],
 196             include_docs = True,
 197         )
 198
 199     if not r:
 200         return None
 201
 202     podcast_group = r.first()
 203     return podcast_group.get_podcast_by_oldid(oldid)
 204
 205
 206 @cache_result(timeout=60*60)
 207 def podcastgroup_for_oldid(oldid):
 208
 209     if not oldid:
 210         raise QueryParameterMissing('oldid')
 211
 212     r = PodcastGroup.view('podcasts/groups_by_oldid',
 213             key          = long(oldid),
 214             include_docs = True,
 215         )
 216
 217     return r.one() if r else None
 218
 219
 220
 221 def podcast_for_url(url, create=False):
 222
 223     if not url:
 224         raise QueryParameterMissing('url')
 225
 226     key = 'podcast-by-url-%s' % sha1(url).hexdigest()
 227
 228     podcast = cache.get(key)
 229     if podcast:
 230         return podcast
 231
 232     r = Podcast.view('podcasts/by_url',
 233             key=url,
 234             classes=[Podcast, PodcastGroup],
 235             include_docs=True
 236         )
 237
 238     if r:
 239         podcast_group = r.first()
 240         podcast = podcast_group.get_podcast_by_url(url)
 241         cache.set(key, podcast)
 242         return podcast
 243
 244     if create:
 245         podcast = Podcast()
 246         podcast.urls = [url]
 247         podcast.save()
 248         cache.set(key, podcast)
 249         return podcast
 250
 251     return None
 252
 253
 254
 255
 256 def random_podcasts(language='', chunk_size=5):
 257     """ Returns an iterator of random podcasts
 258
 259     optionaly a language code can be specified. If given the podcasts will
 260     be restricted to this language. chunk_size determines how many podcasts
 261     will be fetched at once """
 262
 263     while True:
 264         rnd = random()
 265         res = Podcast.view('podcasts/random',
 266                 startkey     = [language, rnd],
 267                 include_docs = True,
 268                 limit        = chunk_size,
 269                 stale        = 'ok',
 270                 wrap_doc     = False,
 271             )
 272
 273         if not res:
 274             break
 275
 276         for r in res:
 277             obj = r['doc']
 278             if obj['doc_type'] == 'Podcast':
 279                 yield Podcast.wrap(obj)
 280
 281             elif obj['doc_type'] == 'PodcastGroup':
 282                 yield PodcastGroup.wrap(obj)
 283
 284
 285
 286 def podcasts_by_last_update():
 287     res = Podcast.view('podcasts/by_last_update',
 288             include_docs = True,
 289             stale        = 'update_after',
 290             wrap_doc     = False,
 291         )
 292
 293     return map(_wrap_podcast_group_key1, res)
 294
 295
 296
 297
 298 def all_podcasts():
 299     from mygpo.db.couchdb.utils import multi_request_view
 300     res = multi_request_view(Podcast,'podcasts/by_id',
 301             wrap         = False,
 302             include_docs = True,
 303             stale        = 'update_after',
 304         )
 305
 306     for r in res:
 307         obj = r['doc']
 308         if obj['doc_type'] == 'Podcast':
 309             yield Podcast.wrap(obj)
 310         else:
 311             pid = r[u'key']
 312             pg = PodcastGroup.wrap(obj)
 313             podcast = pg.get_podcast_by_id(pid)
 314             yield podcast
 315
 316
 317 def all_podcasts_groups(cls):
 318     return cls.view('podcasts/podcasts_groups', include_docs=True,
 319         classes=[Podcast, PodcastGroup]).iterator()
 320
 321
 322
 323 def podcasts_to_dict(ids, use_cache=False):
 324
 325     if ids is None:
 326         raise QueryParameterMissing('ids')
 327
 328     if not ids:
 329         return dict()
 330
 331
 332     ids = list(set(ids))
 333     objs = dict()
 334
 335     cache_objs = []
 336     if use_cache:
 337         res = cache.get_many(ids)
 338         cache_objs.extend(res.values())
 339         ids = [x for x in ids if x not in res.keys()]
 340
 341     db_objs = podcasts_by_id(ids)
 342
 343     for obj in (cache_objs + db_objs):
 344
 345         # get_multi returns dict {'key': _id, 'error': 'not found'}
 346         # for non-existing objects
 347         if isinstance(obj, dict) and 'error' in obj:
 348             _id = obj['key']
 349             objs[_id] = None
 350             continue
 351
 352         for i in obj.get_ids():
 353             objs[i] = obj
 354
 355     if use_cache:
 356         cache.set_many(dict( (obj.get_id(), obj) for obj in db_objs))
 357
 358     return objs
 359
 360
 361
 362 def podcasts_need_update():
 363     db = get_main_database()
 364     res = db.view('episodes/need_update',
 365             group_level = 1,
 366             reduce      = True,
 367         )
 368
 369     for r in res:
 370         podcast_id = r['key']
 371         podcast = podcast_by_id(podcast_id)
 372         if podcast:
 373             yield podcast
 374
 375
 376 def subscriberdata_for_podcast(podcast_id):
 377
 378     if not podcast_id:
 379         raise QueryParameterMissing('podcast_id')
 380
 381     r = PodcastSubscriberData.view('podcasts/subscriber_data',
 382             key          = podcast_id,
 383             include_docs = True,
 384         )
 385
 386     if r:
 387         return r.first()
 388
 389     data = PodcastSubscriberData()
 390     data.podcast = podcast_id
 391     return data
 392
 393
 394
 395 def _wrap_podcast_group(res):
 396     if res['doc']['doc_type'] == 'Podcast':
 397         return Podcast.wrap(res['doc'])
 398     else:
 399         pg = PodcastGroup.wrap(res['doc'])
 400         id = res['key']
 401         return pg.get_podcast_by_id(id)
 402
 403
 404 def _wrap_podcast_group_key1(res):
 405     obj = res['doc']
 406     if obj['doc_type'] == 'Podcast':
 407         return Podcast.wrap(obj)
 408
 409     else:
 410         pid = res[u'key'][1]
 411         pg = PodcastGroup.wrap(obj)
 412         podcast = pg.get_podcast_by_id(pid)
 413         return podcast
 414
 415
 416
 417 def search_wrapper(result):
 418     doc = result['doc']
 419     if doc['doc_type'] == 'Podcast':
 420         p = Podcast.wrap(doc)
 421     elif doc['doc_type'] == 'PodcastGroup':
 422         p = PodcastGroup.wrap(doc)
 423     p._id = result['id']
 424     return p
 425
 426
 427 @cache_result(timeout=60*60)
 428 def search(q, offset=0, num_results=20):
 429
 430     if not q:
 431         return [], 0
 432
 433     db = get_main_database()
 434
 435     #FIXME current couchdbkit can't parse responses for multi-query searches
 436     q = q.replace(',', '')
 437
 438     try:
 439         res = db.search('podcasts/search',
 440                 wrapper      = search_wrapper,
 441                 include_docs = True,
 442                 limit        = num_results,
 443                 stale        = 'update_after',
 444                 skip         = offset,
 445                 q            = q,
 446                 sort='\\subscribers<int>')
 447
 448         return list(res), res.total_rows
 449
 450     except RequestFailed:
 451         return [], 0
 452
 453
 454 @repeat_on_conflict(['podcast'])
 455 def update_additional_data(podcast, twitter):
 456     podcast.twitter = twitter
 457     podcast.save()
 458
 459     # clear the whole cache until we have a better invalidation mechanism
 460     cache.clear()