mygpo/db/couchdb/podcast.py

   1 from hashlib import sha1
   2 from random import random
   3 from datetime import datetime
   4
   5 from restkit import RequestFailed
   6
   7 from django.core.cache import cache
   8
   9 from mygpo.core.models import Podcast, PodcastGroup, PodcastSubscriberData
  10 from mygpo.core.signals import incomplete_obj
  11 from mygpo.decorators import repeat_on_conflict
  12 from mygpo.cache import cache_result
  13 from mygpo.utils import get_timestamp
  14 from mygpo.db.couchdb import get_main_database, get_userdata_database
  15 from mygpo.db import QueryParameterMissing
  16 from mygpo.db.couchdb.utils import multi_request_view, is_couchdb_id
  17
  18
  19 def podcast_slugs(base_slug):
  20     res = Podcast.view('podcasts/by_slug',
  21             startkey = [base_slug, None],
  22             endkey   = [base_slug + 'ZZZZZ', None],
  23             wrap_doc = False,
  24         )
  25     return [r['key'][0] for r in res]
  26
  27
  28 @cache_result(timeout=60*60)
  29 def podcast_count():
  30     return Podcast.view('podcasts/by_id',
  31             limit = 0,
  32             stale = 'update_after',
  33         ).total_rows
  34
  35
  36 @cache_result(timeout=60*60)
  37 def podcasts_for_tag(tag):
  38     """ Returns the podcasts with the current tag.
  39
  40     Some podcasts might be returned twice """
  41
  42     if not tag:
  43         raise QueryParameterMissing('tag')
  44
  45     res = multi_request_view(Podcast, 'podcasts/by_tag',
  46             wrap        = False,
  47             startkey    = [tag, None],
  48             endkey      = [tag, {}],
  49             reduce      = True,
  50             group       = True,
  51             group_level = 2
  52         )
  53
  54     for r in res:
  55         yield (r['key'][1], r['value'])
  56
  57     udb = get_userdata_database()
  58     res = multi_request_view(udb, 'usertags/podcasts',
  59             wrap        = False,
  60             startkey    = [tag, None],
  61             endkey      = [tag, {}],
  62             reduce      = True,
  63             group       = True,
  64             group_level = 2
  65         )
  66
  67     for r in res:
  68         yield (r['key'][1], r['value'])
  69
  70
  71 @cache_result(timeout=60*60)
  72 def get_podcast_languages():
  73     """ Returns all 2-letter language codes that are used by podcasts.
  74
  75     It filters obviously invalid strings, but does not check if any
  76     of these codes is contained in ISO 639. """
  77
  78     from mygpo.web.utils import sanitize_language_codes
  79
  80     res = Podcast.view('podcasts/by_language',
  81             group_level = 1,
  82             stale       = 'ok',
  83         )
  84
  85     langs = [r['key'][0] for r in res]
  86     sane_lang = sanitize_language_codes(langs)
  87     sane_lang.sort()
  88     return sane_lang
  89
  90
  91 def podcast_by_id_uncached(podcast_id, current_id=False):
  92
  93     if not podcast_id:
  94         raise QueryParameterMissing('podcast_id')
  95
  96     r = Podcast.view('podcasts/by_id',
  97             key          = podcast_id,
  98             classes      = [Podcast, PodcastGroup],
  99             include_docs = True,
 100         )
 101
 102     if not r:
 103         return None
 104
 105     podcast_group = r.first()
 106
 107     podcast = podcast_group.get_podcast_by_id(podcast_id, current_id)
 108
 109     if podcast.needs_update:
 110         incomplete_obj.send_robust(sender=podcast)
 111
 112     return podcast
 113
 114
 115 podcast_by_id = cache_result(timeout=60*60)(podcast_by_id_uncached)
 116
 117
 118 @cache_result(timeout=60*60)
 119 def podcastgroup_by_id(group_id):
 120
 121     if not group_id:
 122         raise QueryParameterMissing('group_id')
 123
 124     pg = PodcastGroup.get(group_id)
 125
 126     if pg.needs_update:
 127         incomplete_obj.send_robust(sender=pg)
 128
 129     return pg
 130
 131
 132
 133 @cache_result(timeout=60*60)
 134 def podcast_for_slug(slug):
 135
 136     if not slug:
 137         raise QueryParameterMissing('slug')
 138
 139     r = Podcast.view('podcasts/by_slug',
 140             startkey     = [slug, None],
 141             endkey       = [slug, {}],
 142             include_docs = True,
 143             wrap_doc     = False,
 144         )
 145
 146     if not r:
 147         return None
 148
 149     res = r.first()
 150     doc = res['doc']
 151     if doc['doc_type'] == 'Podcast':
 152         obj = Podcast.wrap(doc)
 153     else:
 154         pid = res['key'][1]
 155         pg = PodcastGroup.wrap(doc)
 156
 157         if pid == pg._id:
 158             # TODO: we don't return PodcastGroups atm
 159             return None
 160
 161         obj = pg.get_podcast_by_id(pid)
 162
 163     if obj.needs_update:
 164         incomplete_obj.send_robust(sender=obj)
 165
 166     return obj
 167
 168
 169 @cache_result(timeout=60*60)
 170 def podcast_for_slug_id(slug_id):
 171     """ Returns the Podcast for either an CouchDB-ID for a Slug """
 172
 173     if is_couchdb_id(slug_id):
 174         return podcast_by_id(slug_id)
 175     else:
 176         return podcast_for_slug(slug_id)
 177
 178
 179 @cache_result(timeout=60*60)
 180 def podcastgroup_for_slug_id(slug_id):
 181     """ Returns the Podcast for either an CouchDB-ID for a Slug """
 182
 183     if not slug_id:
 184         raise QueryParameterMissing('slug_id')
 185
 186     if is_couchdb_id(slug_id):
 187         return podcastgroup_by_id(slug_id)
 188
 189     else:
 190         #TODO: implement
 191         return PodcastGroup.for_slug(slug_id)
 192
 193
 194
 195 def podcasts_by_id(ids):
 196
 197     if ids is None:
 198         raise QueryParameterMissing('ids')
 199
 200     if not ids:
 201         return []
 202
 203     r = Podcast.view('podcasts/by_id',
 204             keys         = ids,
 205             include_docs = True,
 206             wrap_doc     = False
 207         )
 208
 209     podcasts = map(_wrap_podcast_group, r)
 210
 211     for podcast in podcasts:
 212         if podcast.needs_update:
 213             incomplete_obj.send_robust(sender=podcast)
 214
 215     return podcasts
 216
 217
 218 def podcasts_groups_by_id(ids):
 219     """ gets podcast groups and top-level podcasts for the given ids """
 220
 221     if ids is None:
 222         raise QueryParameterMissing('ids')
 223
 224     if not ids:
 225         return
 226
 227     db = get_main_database()
 228     res = db.view('podcasts/podcasts_groups',
 229             keys         = ids,
 230             include_docs = True,
 231         )
 232
 233     for r in res:
 234         doc = r['doc']
 235
 236         if not doc:
 237             yield None
 238
 239         if doc['doc_type'] == 'Podcast':
 240             obj = Podcast.wrap(doc)
 241
 242         elif doc['doc_type'] == 'PodcastGroup':
 243             obj = PodcastGroup.wrap(doc)
 244
 245         else:
 246             logger.error('podcasts_groups_by_id retrieved unknown doc_type '
 247                 '"%s" for params %s', doc['doc_type'], res.params)
 248             continue
 249
 250         if obj.needs_update:
 251             incomplete_obj.send_robust(sender=obj)
 252
 253         yield obj
 254
 255
 256
 257 @cache_result(timeout=60*60)
 258 def podcast_for_oldid(oldid):
 259
 260     if not oldid:
 261         raise QueryParameterMissing('oldid')
 262
 263     r = Podcast.view('podcasts/by_oldid',
 264             key          = long(oldid),
 265             classes      = [Podcast, PodcastGroup],
 266             include_docs = True,
 267         )
 268
 269     if not r:
 270         return None
 271
 272     podcast_group = r.first()
 273     podcast = podcast_group.get_podcast_by_oldid(oldid)
 274
 275     if podcast.needs_update:
 276         incomplete_obj.send_robust(sender=podcast)
 277
 278     return podcast
 279
 280
 281 @cache_result(timeout=60*60)
 282 def podcastgroup_for_oldid(oldid):
 283
 284     if not oldid:
 285         raise QueryParameterMissing('oldid')
 286
 287     r = PodcastGroup.view('podcasts/groups_by_oldid',
 288             key          = long(oldid),
 289             include_docs = True,
 290         )
 291
 292     if not r:
 293         return None
 294
 295     pg = r.one()
 296
 297     if pg.needs_update:
 298         incomplete_obj.send_robust(sender=pg)
 299
 300     return pg
 301
 302
 303 def podcast_for_url(url, create=False):
 304
 305     if not url:
 306         raise QueryParameterMissing('url')
 307
 308     key = 'podcast-by-url-%s' % sha1(url.encode('utf-8')).hexdigest()
 309
 310     podcast = cache.get(key)
 311     if podcast:
 312         return podcast
 313
 314     r = Podcast.view('podcasts/by_url',
 315             key=url,
 316             classes=[Podcast, PodcastGroup],
 317             include_docs=True
 318         )
 319
 320     if r:
 321         podcast_group = r.first()
 322         podcast = podcast_group.get_podcast_by_url(url)
 323
 324         if podcast.needs_update:
 325             incomplete_obj.send_robust(sender=podcast)
 326         else:
 327             cache.set(key, podcast)
 328
 329         return podcast
 330
 331     if create:
 332         podcast = Podcast()
 333         podcast.created_timestamp = get_timestamp(datetime.utcnow())
 334         podcast.urls = [url]
 335         podcast.save()
 336         incomplete_obj.send_robust(sender=podcast)
 337         return podcast
 338
 339     return None
 340
 341
 342
 343
 344 def random_podcasts(language='', chunk_size=5):
 345     """ Returns an iterator of random podcasts
 346
 347     optionaly a language code can be specified. If given the podcasts will
 348     be restricted to this language. chunk_size determines how many podcasts
 349     will be fetched at once """
 350
 351     while True:
 352         rnd = random()
 353         res = Podcast.view('podcasts/random',
 354                 startkey     = [language, rnd],
 355                 include_docs = True,
 356                 limit        = chunk_size,
 357                 stale        = 'ok',
 358                 wrap_doc     = False,
 359             )
 360
 361         if not res:
 362             break
 363
 364         for r in res:
 365
 366             # The view podcasts/random does not include incomplete podcasts,
 367             # so we don't need to send any 'incomplete_obj' signals here
 368
 369             obj = r['doc']
 370             if obj['doc_type'] == 'Podcast':
 371                 yield Podcast.wrap(obj)
 372
 373             elif obj['doc_type'] == 'PodcastGroup':
 374                 yield PodcastGroup.wrap(obj)
 375
 376
 377
 378 def podcasts_by_last_update():
 379     res = Podcast.view('podcasts/by_last_update',
 380             include_docs = True,
 381             stale        = 'update_after',
 382             wrap_doc     = False,
 383         )
 384
 385     # TODO: this method is only used for retrieving podcasts to update;
 386     #       should we really send 'incomplete_obj' signals here?
 387
 388     return map(_wrap_podcast_group_key1, res)
 389
 390
 391
 392
 393 def all_podcasts():
 394     from mygpo.db.couchdb.utils import multi_request_view
 395     res = multi_request_view(Podcast,'podcasts/by_id',
 396             wrap         = False,
 397             include_docs = True,
 398             stale        = 'update_after',
 399         )
 400
 401     # TODO: this method is only used for maintenance purposes; should we
 402     #       really send 'incomplete_obj' signals here?
 403
 404     for r in res:
 405         obj = r['doc']
 406         if obj['doc_type'] == 'Podcast':
 407             yield Podcast.wrap(obj)
 408         else:
 409             pid = r[u'key']
 410             pg = PodcastGroup.wrap(obj)
 411             podcast = pg.get_podcast_by_id(pid)
 412             yield podcast
 413
 414
 415 def podcasts_to_dict(ids, use_cache=False):
 416
 417     if ids is None:
 418         raise QueryParameterMissing('ids')
 419
 420     if not ids:
 421         return dict()
 422
 423
 424     ids = list(set(ids))
 425     objs = dict()
 426
 427     cache_objs = []
 428     if use_cache:
 429         res = cache.get_many(ids)
 430         cache_objs.extend(res.values())
 431         ids = [x for x in ids if x not in res.keys()]
 432
 433     db_objs = podcasts_by_id(ids)
 434
 435     for obj in (cache_objs + db_objs):
 436
 437         # get_multi returns dict {'key': _id, 'error': 'not found'}
 438         # for non-existing objects
 439         if isinstance(obj, dict) and 'error' in obj:
 440             _id = obj['key']
 441             objs[_id] = None
 442             continue
 443
 444         for i in obj.get_ids():
 445             objs[i] = obj
 446
 447     if use_cache:
 448         cache.set_many(dict( (obj.get_id(), obj) for obj in db_objs))
 449
 450     return objs
 451
 452
 453
 454 def podcasts_need_update():
 455     db = get_main_database()
 456     res = db.view('episodes/need_update',
 457             group_level = 1,
 458             reduce      = True,
 459             limit       = 100,
 460         )
 461
 462     # TODO: this method is only used for retrieving podcasts to update;
 463     #       should we really send 'incomplete_obj' signals here?
 464
 465     for r in res:
 466         podcast_id = r['key']
 467         podcast = podcast_by_id(podcast_id)
 468         if podcast:
 469             yield podcast
 470
 471
 472 @cache_result(timeout=60*60)
 473 def get_flattr_podcasts(offset=0, limit=20):
 474     """ returns all podcasts that contain Flattr payment URLs """
 475
 476     r = Podcast.view('podcasts/flattr',
 477             skip         = offset,
 478             limit        = limit,
 479             classes      = [Podcast, PodcastGroup],
 480             include_docs = True,
 481             reduce       = False,
 482         )
 483
 484     podcasts = list(r)
 485
 486     for podcast in podcasts:
 487         if podcast.needs_update:
 488             incomplete_obj.send_robust(sender=podcast)
 489
 490     return podcasts
 491
 492
 493 @cache_result(timeout=60*60)
 494 def get_flattr_podcast_count():
 495     """ returns the number of podcasts that contain Flattr payment URLs """
 496     r = list(Podcast.view('podcasts/flattr'))
 497     return r[0]['value']
 498
 499
 500 @cache_result(timeout=60*60)
 501 def get_license_podcasts(offset=0, limit=20):
 502     """ returns a page of podcasts w/ license information """
 503
 504     r = Podcast.view('podcasts/license',
 505             skip = offset,
 506             limit = limit,
 507             classes = [Podcast, PodcastGroup],
 508             include_docs = True,
 509             reduce = False,
 510     )
 511
 512     podcasts = list(r)
 513
 514     for podcast in podcasts:
 515         if podcast.needs_update:
 516             incomplete_obj.send_robust(sender=podcast)
 517
 518     return podcasts
 519
 520
 521 @cache_result(timeout=60*60)
 522 def get_license_podcast_count():
 523     """ returns the number of podcasts that contain license information """
 524     r = list(Podcast.view('podcasts/license'))
 525     return r[0]['value'] if r else 0
 526
 527
 528 def subscriberdata_for_podcast(podcast_id):
 529
 530     if not podcast_id:
 531         raise QueryParameterMissing('podcast_id')
 532
 533     r = PodcastSubscriberData.view('podcasts/subscriber_data',
 534             key          = podcast_id,
 535             include_docs = True,
 536         )
 537
 538     if r:
 539         return r.first()
 540
 541     data = PodcastSubscriberData()
 542     data.podcast = podcast_id
 543     return data
 544
 545
 546
 547 def _wrap_podcast_group(res):
 548     if res['doc']['doc_type'] == 'Podcast':
 549         return Podcast.wrap(res['doc'])
 550     else:
 551         pg = PodcastGroup.wrap(res['doc'])
 552         id = res['key']
 553         return pg.get_podcast_by_id(id)
 554
 555
 556 def _wrap_podcast_group_key1(res):
 557     obj = res['doc']
 558     if obj['doc_type'] == 'Podcast':
 559         return Podcast.wrap(obj)
 560
 561     else:
 562         pid = res[u'key'][1]
 563         pg = PodcastGroup.wrap(obj)
 564         podcast = pg.get_podcast_by_id(pid)
 565         return podcast
 566
 567
 568
 569 def search_wrapper(result):
 570     doc = result['doc']
 571     if doc['doc_type'] == 'Podcast':
 572         p = Podcast.wrap(doc)
 573     elif doc['doc_type'] == 'PodcastGroup':
 574         p = PodcastGroup.wrap(doc)
 575     p._id = result['id']
 576     return p
 577
 578
 579 @cache_result(timeout=60*60)
 580 def search(q, offset=0, num_results=20):
 581
 582     if not q:
 583         return [], 0
 584
 585     db = get_main_database()
 586
 587     #FIXME current couchdbkit can't parse responses for multi-query searches
 588     q = q.replace(',', '')
 589
 590     try:
 591         res = db.search('podcasts/search',
 592                 wrapper      = search_wrapper,
 593                 include_docs = True,
 594                 limit        = num_results,
 595                 stale        = 'update_after',
 596                 skip         = offset,
 597                 q            = q,
 598                 sort='\\subscribers<int>')
 599
 600         podcasts = list(res)
 601
 602         for podcast in podcasts:
 603             if podcast.needs_update:
 604                 incomplete_obj.send_robust(sender=podcast)
 605
 606         return podcasts, res.total_rows
 607
 608     except RequestFailed:
 609         return [], 0
 610
 611
 612 def reload_podcast(podcast):
 613     return podcast_by_id_uncached(podcast.get_id())
 614
 615
 616 @repeat_on_conflict(['podcast'], reload_f=reload_podcast)
 617 def update_additional_data(podcast, twitter):
 618     podcast.twitter = twitter
 619     podcast.save()
 620
 621     # clear the whole cache until we have a better invalidation mechanism
 622     cache.clear()
 623
 624
 625 @repeat_on_conflict(['podcast'], reload_f=reload_podcast)
 626 def update_related_podcasts(podcast, related):
 627     if podcast.related_podcasts == related:
 628         return
 629
 630     podcast.related_podcasts = related
 631     podcast.save()
 632
 633
 634 @repeat_on_conflict(['podcast'], reload_f=reload_podcast)
 635 def delete_podcast(podcast):
 636     podcast.delete()