mygpo/db/couchdb/podcast.py

   1 from hashlib import sha1
   2 from random import random
   3
   4 from restkit import RequestFailed
   5
   6 from django.core.cache import cache
   7
   8 from mygpo.core.models import Podcast, PodcastGroup, PodcastSubscriberData
   9 from mygpo.core.signals import incomplete_obj
  10 from mygpo.decorators import repeat_on_conflict
  11 from mygpo.cache import cache_result
  12 from mygpo.db.couchdb import get_main_database
  13 from mygpo.db import QueryParameterMissing
  14 from mygpo.db.couchdb.utils import multi_request_view, is_couchdb_id
  15
  16
  17 def podcast_slugs(base_slug):
  18     res = Podcast.view('podcasts/by_slug',
  19             startkey = [base_slug, None],
  20             endkey   = [base_slug + 'ZZZZZ', None],
  21             wrap_doc = False,
  22         )
  23     return [r['key'][0] for r in res]
  24
  25
  26 @cache_result(timeout=60*60)
  27 def podcast_count():
  28     return Podcast.view('podcasts/by_id',
  29             limit = 0,
  30             stale = 'update_after',
  31         ).total_rows
  32
  33
  34 @cache_result(timeout=60*60)
  35 def podcasts_for_tag(tag):
  36     """ Returns the podcasts with the current tag.
  37
  38     Some podcasts might be returned twice """
  39
  40     if not tag:
  41         raise QueryParameterMissing('tag')
  42
  43     res = multi_request_view(Podcast, 'podcasts/by_tag',
  44             wrap        = False,
  45             startkey    = [tag, None],
  46             endkey      = [tag, {}],
  47             reduce      = True,
  48             group       = True,
  49             group_level = 2
  50         )
  51
  52     for r in res:
  53         yield (r['key'][1], r['value'])
  54
  55     res = multi_request_view(Podcast, 'usertags/podcasts',
  56             wrap        = False,
  57             startkey    = [tag, None],
  58             endkey      = [tag, {}],
  59             reduce      = True,
  60             group       = True,
  61             group_level = 2
  62         )
  63
  64     for r in res:
  65         yield (r['key'][1], r['value'])
  66
  67
  68 @cache_result(timeout=60*60)
  69 def get_podcast_languages():
  70     """ Returns all 2-letter language codes that are used by podcasts.
  71
  72     It filters obviously invalid strings, but does not check if any
  73     of these codes is contained in ISO 639. """
  74
  75     from mygpo.web.utils import sanitize_language_codes
  76
  77     res = Podcast.view('podcasts/by_language',
  78             group_level = 1,
  79             stale       = 'ok',
  80         )
  81
  82     langs = [r['key'][0] for r in res]
  83     sane_lang = sanitize_language_codes(langs)
  84     sane_lang.sort()
  85     return sane_lang
  86
  87
  88 @cache_result(timeout=60*60)
  89 def podcast_by_id(podcast_id, current_id=False):
  90
  91     if not podcast_id:
  92         raise QueryParameterMissing('podcast_id')
  93
  94     r = Podcast.view('podcasts/by_id',
  95             key          = podcast_id,
  96             classes      = [Podcast, PodcastGroup],
  97             include_docs = True,
  98         )
  99
 100     if not r:
 101         return None
 102
 103     podcast_group = r.first()
 104
 105     podcast = podcast_group.get_podcast_by_id(podcast_id, current_id)
 106
 107     if podcast.needs_update:
 108         incomplete_obj.send_robust(sender=podcast)
 109
 110     return podcast
 111
 112
 113
 114 @cache_result(timeout=60*60)
 115 def podcastgroup_by_id(group_id):
 116
 117     if not group_id:
 118         raise QueryParameterMissing('group_id')
 119
 120     pg = PodcastGroup.get(group_id)
 121
 122     if pg.needs_update:
 123         incomplete_obj.send_robust(sender=pg)
 124
 125     return pg
 126
 127
 128
 129 @cache_result(timeout=60*60)
 130 def podcast_for_slug(slug):
 131
 132     if not slug:
 133         raise QueryParameterMissing('slug')
 134
 135     r = Podcast.view('podcasts/by_slug',
 136             startkey     = [slug, None],
 137             endkey       = [slug, {}],
 138             include_docs = True,
 139             wrap_doc     = False,
 140         )
 141
 142     if not r:
 143         return None
 144
 145     res = r.first()
 146     doc = res['doc']
 147     if doc['doc_type'] == 'Podcast':
 148         obj = Podcast.wrap(doc)
 149     else:
 150         pid = res['key'][1]
 151         pg = PodcastGroup.wrap(doc)
 152         obj = pg.get_podcast_by_id(pid)
 153
 154     if obj.needs_update:
 155         raise incomplete_obj.send_robust(sender=obj)
 156
 157     return obj
 158
 159
 160 @cache_result(timeout=60*60)
 161 def podcast_for_slug_id(slug_id):
 162     """ Returns the Podcast for either an CouchDB-ID for a Slug """
 163
 164     if is_couchdb_id(slug_id):
 165         return podcast_by_id(slug_id)
 166     else:
 167         return podcast_for_slug(slug_id)
 168
 169
 170 @cache_result(timeout=60*60)
 171 def podcastgroup_for_slug_id(slug_id):
 172     """ Returns the Podcast for either an CouchDB-ID for a Slug """
 173
 174     if not slug_id:
 175         raise QueryParameterMissing('slug_id')
 176
 177     if is_couchdb_id(slug_id):
 178         return podcastgroup_by_id(slug_id)
 179
 180     else:
 181         #TODO: implement
 182         return PodcastGroup.for_slug(slug_id)
 183
 184
 185
 186 def podcasts_by_id(ids):
 187
 188     if ids is None:
 189         raise QueryParameterMissing('ids')
 190
 191     if not ids:
 192         return []
 193
 194     r = Podcast.view('podcasts/by_id',
 195             keys         = ids,
 196             include_docs = True,
 197             wrap_doc     = False
 198         )
 199
 200     podcasts = map(_wrap_podcast_group, r)
 201
 202     for podcast in podcasts:
 203         if podcast.needs_update:
 204             incomplete_obj.send_robust(sender=podcast)
 205
 206     return podcasts
 207
 208
 209
 210 @cache_result(timeout=60*60)
 211 def podcast_for_oldid(oldid):
 212
 213     if not oldid:
 214         raise QueryParameterMissing('oldid')
 215
 216     r = Podcast.view('podcasts/by_oldid',
 217             key          = long(oldid),
 218             classes      = [Podcast, PodcastGroup],
 219             include_docs = True,
 220         )
 221
 222     if not r:
 223         return None
 224
 225     podcast_group = r.first()
 226     podcast = podcast_group.get_podcast_by_oldid(oldid)
 227
 228     if podcast.needs_update:
 229         incomplete_obj.send_robust(sender=podcast)
 230
 231     return podcast
 232
 233
 234 @cache_result(timeout=60*60)
 235 def podcastgroup_for_oldid(oldid):
 236
 237     if not oldid:
 238         raise QueryParameterMissing('oldid')
 239
 240     r = PodcastGroup.view('podcasts/groups_by_oldid',
 241             key          = long(oldid),
 242             include_docs = True,
 243         )
 244
 245     if not r:
 246         return None
 247
 248     pg = r.one()
 249
 250     if pg.needs_update:
 251         incomplete_obj.send_robust(sender=pg)
 252
 253     return pg
 254
 255
 256 def podcast_for_url(url, create=False):
 257
 258     if not url:
 259         raise QueryParameterMissing('url')
 260
 261     key = 'podcast-by-url-%s' % sha1(url.encode('utf-8')).hexdigest()
 262
 263     podcast = cache.get(key)
 264     if podcast:
 265         return podcast
 266
 267     r = Podcast.view('podcasts/by_url',
 268             key=url,
 269             classes=[Podcast, PodcastGroup],
 270             include_docs=True
 271         )
 272
 273     if r:
 274         podcast_group = r.first()
 275         podcast = podcast_group.get_podcast_by_url(url)
 276
 277         if podcast.needs_update:
 278             incomplete_obj.send_robust(sender=podcast)
 279         else:
 280             cache.set(key, podcast)
 281
 282         return podcast
 283
 284     if create:
 285         podcast = Podcast()
 286         podcast.urls = [url]
 287         podcast.save()
 288         incomplete_obj.send_robust(sender=podcast)
 289         return podcast
 290
 291     return None
 292
 293
 294
 295
 296 def random_podcasts(language='', chunk_size=5):
 297     """ Returns an iterator of random podcasts
 298
 299     optionaly a language code can be specified. If given the podcasts will
 300     be restricted to this language. chunk_size determines how many podcasts
 301     will be fetched at once """
 302
 303     while True:
 304         rnd = random()
 305         res = Podcast.view('podcasts/random',
 306                 startkey     = [language, rnd],
 307                 include_docs = True,
 308                 limit        = chunk_size,
 309                 stale        = 'ok',
 310                 wrap_doc     = False,
 311             )
 312
 313         if not res:
 314             break
 315
 316         for r in res:
 317
 318             # The view podcasts/random does not include incomplete podcasts,
 319             # so we don't need to send any 'incomplete_obj' signals here
 320
 321             obj = r['doc']
 322             if obj['doc_type'] == 'Podcast':
 323                 yield Podcast.wrap(obj)
 324
 325             elif obj['doc_type'] == 'PodcastGroup':
 326                 yield PodcastGroup.wrap(obj)
 327
 328
 329
 330 def podcasts_by_last_update():
 331     res = Podcast.view('podcasts/by_last_update',
 332             include_docs = True,
 333             stale        = 'update_after',
 334             wrap_doc     = False,
 335         )
 336
 337     # TODO: this method is only used for retrieving podcasts to update;
 338     #       should we really send 'incomplete_obj' signals here?
 339
 340     return map(_wrap_podcast_group_key1, res)
 341
 342
 343
 344
 345 def all_podcasts():
 346     from mygpo.db.couchdb.utils import multi_request_view
 347     res = multi_request_view(Podcast,'podcasts/by_id',
 348             wrap         = False,
 349             include_docs = True,
 350             stale        = 'update_after',
 351         )
 352
 353     # TODO: this method is only used for maintenance purposes; should we
 354     #       really send 'incomplete_obj' signals here?
 355
 356     for r in res:
 357         obj = r['doc']
 358         if obj['doc_type'] == 'Podcast':
 359             yield Podcast.wrap(obj)
 360         else:
 361             pid = r[u'key']
 362             pg = PodcastGroup.wrap(obj)
 363             podcast = pg.get_podcast_by_id(pid)
 364             yield podcast
 365
 366
 367 def all_podcasts_groups(cls):
 368     return cls.view('podcasts/podcasts_groups', include_docs=True,
 369         classes=[Podcast, PodcastGroup]).iterator()
 370
 371
 372
 373 def podcasts_to_dict(ids, use_cache=False):
 374
 375     if ids is None:
 376         raise QueryParameterMissing('ids')
 377
 378     if not ids:
 379         return dict()
 380
 381
 382     ids = list(set(ids))
 383     objs = dict()
 384
 385     cache_objs = []
 386     if use_cache:
 387         res = cache.get_many(ids)
 388         cache_objs.extend(res.values())
 389         ids = [x for x in ids if x not in res.keys()]
 390
 391     db_objs = podcasts_by_id(ids)
 392
 393     for obj in (cache_objs + db_objs):
 394
 395         # get_multi returns dict {'key': _id, 'error': 'not found'}
 396         # for non-existing objects
 397         if isinstance(obj, dict) and 'error' in obj:
 398             _id = obj['key']
 399             objs[_id] = None
 400             continue
 401
 402         for i in obj.get_ids():
 403             objs[i] = obj
 404
 405     if use_cache:
 406         cache.set_many(dict( (obj.get_id(), obj) for obj in db_objs))
 407
 408     return objs
 409
 410
 411
 412 def podcasts_need_update():
 413     db = get_main_database()
 414     res = db.view('episodes/need_update',
 415             group_level = 1,
 416             reduce      = True,
 417         )
 418
 419     # TODO: this method is only used for retrieving podcasts to update;
 420     #       should we really send 'incomplete_obj' signals here?
 421
 422     for r in res:
 423         podcast_id = r['key']
 424         podcast = podcast_by_id(podcast_id)
 425         if podcast:
 426             yield podcast
 427
 428
 429 @cache_result(timeout=60*60)
 430 def get_flattr_podcasts(offset=0, limit=20):
 431     """ returns all podcasts that contain Flattr payment URLs """
 432
 433     r = Podcast.view('podcasts/flattr',
 434             skip         = offset,
 435             limit        = limit,
 436             classes      = [Podcast, PodcastGroup],
 437             include_docs = True,
 438             reduce       = False,
 439         )
 440
 441     podcasts = list(r)
 442
 443     for podcast in podcasts:
 444         if podcast.needs_update:
 445             incomplete_obj.send_robust(sender=podcast)
 446
 447     return podcasts
 448
 449
 450 @cache_result(timeout=60*60)
 451 def get_flattr_podcast_count():
 452     """ returns the number of podcasts that contain Flattr payment URLs """
 453     r = list(Podcast.view('podcasts/flattr'))
 454     return r[0]['value']
 455
 456
 457 def subscriberdata_for_podcast(podcast_id):
 458
 459     if not podcast_id:
 460         raise QueryParameterMissing('podcast_id')
 461
 462     r = PodcastSubscriberData.view('podcasts/subscriber_data',
 463             key          = podcast_id,
 464             include_docs = True,
 465         )
 466
 467     if r:
 468         return r.first()
 469
 470     data = PodcastSubscriberData()
 471     data.podcast = podcast_id
 472     return data
 473
 474
 475
 476 def _wrap_podcast_group(res):
 477     if res['doc']['doc_type'] == 'Podcast':
 478         return Podcast.wrap(res['doc'])
 479     else:
 480         pg = PodcastGroup.wrap(res['doc'])
 481         id = res['key']
 482         return pg.get_podcast_by_id(id)
 483
 484
 485 def _wrap_podcast_group_key1(res):
 486     obj = res['doc']
 487     if obj['doc_type'] == 'Podcast':
 488         return Podcast.wrap(obj)
 489
 490     else:
 491         pid = res[u'key'][1]
 492         pg = PodcastGroup.wrap(obj)
 493         podcast = pg.get_podcast_by_id(pid)
 494         return podcast
 495
 496
 497
 498 def search_wrapper(result):
 499     doc = result['doc']
 500     if doc['doc_type'] == 'Podcast':
 501         p = Podcast.wrap(doc)
 502     elif doc['doc_type'] == 'PodcastGroup':
 503         p = PodcastGroup.wrap(doc)
 504     p._id = result['id']
 505     return p
 506
 507
 508 @cache_result(timeout=60*60)
 509 def search(q, offset=0, num_results=20):
 510
 511     if not q:
 512         return [], 0
 513
 514     db = get_main_database()
 515
 516     #FIXME current couchdbkit can't parse responses for multi-query searches
 517     q = q.replace(',', '')
 518
 519     try:
 520         res = db.search('podcasts/search',
 521                 wrapper      = search_wrapper,
 522                 include_docs = True,
 523                 limit        = num_results,
 524                 stale        = 'update_after',
 525                 skip         = offset,
 526                 q            = q,
 527                 sort='\\subscribers<int>')
 528
 529         podcasts = list(res)
 530
 531         for podcast in podcasts:
 532             if podcast.needs_update:
 533                 incomplete_obj.send_robust(sender=podcast)
 534
 535         return podcasts, res.total_rows
 536
 537     except RequestFailed:
 538         return [], 0
 539
 540
 541 @repeat_on_conflict(['podcast'])
 542 def update_additional_data(podcast, twitter):
 543     podcast.twitter = twitter
 544     podcast.save()
 545
 546     # clear the whole cache until we have a better invalidation mechanism
 547     cache.clear()