mygpo/db/couchdb/podcast.py

   1 from hashlib import sha1
   2 from random import random
   3
   4 from restkit import RequestFailed
   5
   6 from django.core.cache import cache
   7
   8 from mygpo.core.models import Podcast, PodcastGroup, PodcastSubscriberData
   9 from mygpo.decorators import repeat_on_conflict
  10 from mygpo.cache import cache_result
  11 from mygpo.db.couchdb import get_main_database
  12 from mygpo.db import QueryParameterMissing
  13 from mygpo.db.couchdb.utils import multi_request_view, is_couchdb_id
  14
  15
  16 def podcast_slugs(base_slug):
  17     res = Podcast.view('podcasts/by_slug',
  18             startkey = [base_slug, None],
  19             endkey   = [base_slug + 'ZZZZZ', None],
  20             wrap_doc = False,
  21         )
  22     return [r['key'][0] for r in res]
  23
  24
  25 @cache_result(timeout=60*60)
  26 def podcast_count():
  27     return Podcast.view('podcasts/by_id',
  28             limit = 0,
  29             stale = 'update_after',
  30         ).total_rows
  31
  32
  33 @cache_result(timeout=60*60)
  34 def podcasts_for_tag(tag):
  35     """ Returns the podcasts with the current tag.
  36
  37     Some podcasts might be returned twice """
  38
  39     if not tag:
  40         raise QueryParameterMissing('tag')
  41
  42     res = multi_request_view(Podcast, 'podcasts/by_tag',
  43             wrap        = False,
  44             startkey    = [tag, None],
  45             endkey      = [tag, {}],
  46             reduce      = True,
  47             group       = True,
  48             group_level = 2
  49         )
  50
  51     for r in res:
  52         yield (r['key'][1], r['value'])
  53
  54     res = multi_request_view(Podcast, 'usertags/podcasts',
  55             wrap        = False,
  56             startkey    = [tag, None],
  57             endkey      = [tag, {}],
  58             reduce      = True,
  59             group       = True,
  60             group_level = 2
  61         )
  62
  63     for r in res:
  64         yield (r['key'][1], r['value'])
  65
  66
  67 @cache_result(timeout=60*60)
  68 def get_podcast_languages():
  69     """ Returns all 2-letter language codes that are used by podcasts.
  70
  71     It filters obviously invalid strings, but does not check if any
  72     of these codes is contained in ISO 639. """
  73
  74     from mygpo.web.utils import sanitize_language_codes
  75
  76     res = Podcast.view('podcasts/by_language',
  77             group_level = 1,
  78             stale       = 'ok',
  79         )
  80
  81     langs = [r['key'][0] for r in res]
  82     sane_lang = sanitize_language_codes(langs)
  83     sane_lang.sort()
  84     return sane_lang
  85
  86
  87 @cache_result(timeout=60*60)
  88 def podcast_by_id(podcast_id, current_id=False):
  89
  90     if not podcast_id:
  91         raise QueryParameterMissing('podcast_id')
  92
  93     r = Podcast.view('podcasts/by_id',
  94             key          = podcast_id,
  95             classes      = [Podcast, PodcastGroup],
  96             include_docs = True,
  97         )
  98
  99     if not r:
 100         return None
 101
 102     podcast_group = r.first()
 103     return podcast_group.get_podcast_by_id(podcast_id, current_id)
 104
 105
 106
 107 @cache_result(timeout=60*60)
 108 def podcastgroup_by_id(group_id):
 109
 110     if not group_id:
 111         raise QueryParameterMissing('group_id')
 112
 113     return PodcastGroup.get(group_id)
 114
 115
 116
 117 @cache_result(timeout=60*60)
 118 def podcast_for_slug(slug):
 119
 120     if not slug:
 121         raise QueryParameterMissing('slug')
 122
 123     r = Podcast.view('podcasts/by_slug',
 124             startkey     = [slug, None],
 125             endkey       = [slug, {}],
 126             include_docs = True,
 127             wrap_doc     = False,
 128         )
 129
 130     if not r:
 131         return None
 132
 133     res = r.first()
 134     doc = res['doc']
 135     if doc['doc_type'] == 'Podcast':
 136         return Podcast.wrap(doc)
 137     else:
 138         pid = res['key'][1]
 139         pg = PodcastGroup.wrap(doc)
 140         return pg.get_podcast_by_id(pid)
 141
 142
 143 @cache_result(timeout=60*60)
 144 def podcast_for_slug_id(slug_id):
 145     """ Returns the Podcast for either an CouchDB-ID for a Slug """
 146
 147     if is_couchdb_id(slug_id):
 148         return podcast_by_id(slug_id)
 149     else:
 150         return podcast_for_slug(slug_id)
 151
 152
 153 @cache_result(timeout=60*60)
 154 def podcastgroup_for_slug_id(slug_id):
 155     """ Returns the Podcast for either an CouchDB-ID for a Slug """
 156
 157     if not slug_id:
 158         raise QueryParameterMissing('slug_id')
 159
 160     if is_couchdb_id(slug_id):
 161         return PodcastGroup.get(slug_id)
 162
 163     else:
 164         #TODO: implement
 165         return PodcastGroup.for_slug(slug_id)
 166
 167
 168
 169 def podcasts_by_id(ids):
 170
 171     if ids is None:
 172         raise QueryParameterMissing('ids')
 173
 174     if not ids:
 175         return []
 176
 177     r = Podcast.view('podcasts/by_id',
 178             keys         = ids,
 179             include_docs = True,
 180             wrap_doc     = False
 181         )
 182
 183     return map(_wrap_podcast_group, r)
 184
 185
 186
 187 @cache_result(timeout=60*60)
 188 def podcast_for_oldid(oldid):
 189
 190     if not oldid:
 191         raise QueryParameterMissing('oldid')
 192
 193     r = Podcast.view('podcasts/by_oldid',
 194             key          = long(oldid),
 195             classes      = [Podcast, PodcastGroup],
 196             include_docs = True,
 197         )
 198
 199     if not r:
 200         return None
 201
 202     podcast_group = r.first()
 203     return podcast_group.get_podcast_by_oldid(oldid)
 204
 205
 206 @cache_result(timeout=60*60)
 207 def podcastgroup_for_oldid(oldid):
 208
 209     if not oldid:
 210         raise QueryParameterMissing('oldid')
 211
 212     r = PodcastGroup.view('podcasts/groups_by_oldid',
 213             key          = long(oldid),
 214             include_docs = True,
 215         )
 216
 217     return r.one() if r else None
 218
 219
 220
 221 def podcast_for_url(url, create=False):
 222
 223     if not url:
 224         raise QueryParameterMissing('url')
 225
 226     key = 'podcast-by-url-%s' % sha1(url).hexdigest()
 227
 228     podcast = cache.get(key)
 229     if podcast:
 230         return podcast
 231
 232     r = Podcast.view('podcasts/by_url',
 233             key=url,
 234             classes=[Podcast, PodcastGroup],
 235             include_docs=True
 236         )
 237
 238     if r:
 239         podcast_group = r.first()
 240         podcast = podcast_group.get_podcast_by_url(url)
 241         cache.set(key, podcast)
 242         return podcast
 243
 244     if create:
 245         podcast = Podcast()
 246         podcast.urls = [url]
 247         podcast.save()
 248         cache.set(key, podcast)
 249         return podcast
 250
 251     return None
 252
 253
 254
 255
 256 def random_podcasts(language='', chunk_size=5):
 257     """ Returns an iterator of random podcasts
 258
 259     optionaly a language code can be specified. If given the podcasts will
 260     be restricted to this language. chunk_size determines how many podcasts
 261     will be fetched at once """
 262
 263     while True:
 264         rnd = random()
 265         res = Podcast.view('podcasts/random',
 266                 startkey     = [language, rnd],
 267                 include_docs = True,
 268                 limit        = chunk_size,
 269                 stale        = 'ok',
 270                 wrap_doc     = False,
 271             )
 272
 273         if not res:
 274             break
 275
 276         for r in res:
 277             obj = r['doc']
 278             if obj['doc_type'] == 'Podcast':
 279                 yield Podcast.wrap(obj)
 280
 281             elif obj['doc_type'] == 'PodcastGroup':
 282                 yield PodcastGroup.wrap(obj)
 283
 284
 285
 286 def podcasts_by_last_update():
 287     res = Podcast.view('podcasts/by_last_update',
 288             include_docs = True,
 289             stale        = 'update_after',
 290             wrap_doc     = False,
 291         )
 292
 293     return map(_wrap_podcast_group_key1, res)
 294
 295
 296
 297
 298 def all_podcasts():
 299     from mygpo.db.couchdb.utils import multi_request_view
 300     res = multi_request_view(Podcast,'podcasts/by_id',
 301             wrap         = False,
 302             include_docs = True,
 303             stale        = 'update_after',
 304         )
 305
 306     for r in res:
 307         obj = r['doc']
 308         if obj['doc_type'] == 'Podcast':
 309             yield Podcast.wrap(obj)
 310         else:
 311             pid = r[u'key']
 312             pg = PodcastGroup.wrap(obj)
 313             podcast = pg.get_podcast_by_id(pid)
 314             yield podcast
 315
 316
 317 def all_podcasts_groups(cls):
 318     return cls.view('podcasts/podcasts_groups', include_docs=True,
 319         classes=[Podcast, PodcastGroup]).iterator()
 320
 321
 322
 323 def podcasts_to_dict(ids, use_cache=False):
 324
 325     if ids is None:
 326         raise QueryParameterMissing('ids')
 327
 328     if not ids:
 329         return dict()
 330
 331
 332     ids = list(set(ids))
 333     objs = dict()
 334
 335     cache_objs = []
 336     if use_cache:
 337         res = cache.get_many(ids)
 338         cache_objs.extend(res.values())
 339         ids = [x for x in ids if x not in res.keys()]
 340
 341     db_objs = podcasts_by_id(ids)
 342
 343     for obj in (cache_objs + db_objs):
 344
 345         # get_multi returns dict {'key': _id, 'error': 'not found'}
 346         # for non-existing objects
 347         if isinstance(obj, dict) and 'error' in obj:
 348             _id = obj['key']
 349             objs[_id] = None
 350             continue
 351
 352         for i in obj.get_ids():
 353             objs[i] = obj
 354
 355     if use_cache:
 356         cache.set_many(dict( (obj.get_id(), obj) for obj in db_objs))
 357
 358     return objs
 359
 360
 361
 362 def podcasts_need_update():
 363     db = get_main_database()
 364     res = db.view('episodes/need_update',
 365             group_level = 1,
 366             reduce      = True,
 367         )
 368
 369     for r in res:
 370         podcast_id = r['key']
 371         podcast = podcast_by_id(podcast_id)
 372         if podcast:
 373             yield podcast
 374
 375
 376 @cache_result(timeout=60*60)
 377 def get_flattr_podcasts(offset=0, limit=20):
 378     """ returns all podcasts that contain Flattr payment URLs """
 379
 380     r = Podcast.view('podcasts/flattr',
 381             skip         = offset,
 382             limit        = limit,
 383             classes      = [Podcast, PodcastGroup],
 384             include_docs = True,
 385             reduce       = False,
 386         )
 387
 388     return list(r)
 389
 390
 391 @cache_result(timeout=60*60)
 392 def get_flattr_podcast_count():
 393     """ returns the number of podcasts that contain Flattr payment URLs """
 394     r = list(Podcast.view('podcasts/flattr'))
 395     return r[0]['value']
 396
 397
 398 def subscriberdata_for_podcast(podcast_id):
 399
 400     if not podcast_id:
 401         raise QueryParameterMissing('podcast_id')
 402
 403     r = PodcastSubscriberData.view('podcasts/subscriber_data',
 404             key          = podcast_id,
 405             include_docs = True,
 406         )
 407
 408     if r:
 409         return r.first()
 410
 411     data = PodcastSubscriberData()
 412     data.podcast = podcast_id
 413     return data
 414
 415
 416
 417 def _wrap_podcast_group(res):
 418     if res['doc']['doc_type'] == 'Podcast':
 419         return Podcast.wrap(res['doc'])
 420     else:
 421         pg = PodcastGroup.wrap(res['doc'])
 422         id = res['key']
 423         return pg.get_podcast_by_id(id)
 424
 425
 426 def _wrap_podcast_group_key1(res):
 427     obj = res['doc']
 428     if obj['doc_type'] == 'Podcast':
 429         return Podcast.wrap(obj)
 430
 431     else:
 432         pid = res[u'key'][1]
 433         pg = PodcastGroup.wrap(obj)
 434         podcast = pg.get_podcast_by_id(pid)
 435         return podcast
 436
 437
 438
 439 def search_wrapper(result):
 440     doc = result['doc']
 441     if doc['doc_type'] == 'Podcast':
 442         p = Podcast.wrap(doc)
 443     elif doc['doc_type'] == 'PodcastGroup':
 444         p = PodcastGroup.wrap(doc)
 445     p._id = result['id']
 446     return p
 447
 448
 449 @cache_result(timeout=60*60)
 450 def search(q, offset=0, num_results=20):
 451
 452     if not q:
 453         return [], 0
 454
 455     db = get_main_database()
 456
 457     #FIXME current couchdbkit can't parse responses for multi-query searches
 458     q = q.replace(',', '')
 459
 460     try:
 461         res = db.search('podcasts/search',
 462                 wrapper      = search_wrapper,
 463                 include_docs = True,
 464                 limit        = num_results,
 465                 stale        = 'update_after',
 466                 skip         = offset,
 467                 q            = q,
 468                 sort='\\subscribers<int>')
 469
 470         return list(res), res.total_rows
 471
 472     except RequestFailed:
 473         return [], 0
 474
 475
 476 @repeat_on_conflict(['podcast'])
 477 def update_additional_data(podcast, twitter):
 478     podcast.twitter = twitter
 479     podcast.save()
 480
 481     # clear the whole cache until we have a better invalidation mechanism
 482     cache.clear()