mygpo/db/couchdb/podcast.py

   1 from hashlib import sha1
   2 from random import random
   3 from datetime import datetime
   4
   5 from restkit import RequestFailed
   6
   7 from django.core.cache import cache
   8
   9 from mygpo.core.models import Podcast, PodcastGroup, PodcastSubscriberData
  10 from mygpo.core.signals import incomplete_obj
  11 from mygpo.decorators import repeat_on_conflict
  12 from mygpo.cache import cache_result
  13 from mygpo.utils import get_timestamp
  14 from mygpo.db.couchdb import get_main_database, get_userdata_database
  15 from mygpo.db import QueryParameterMissing
  16 from mygpo.db.couchdb.utils import multi_request_view, is_couchdb_id
  17
  18
  19 def podcast_slugs(base_slug):
  20     res = Podcast.view('podcasts/by_slug',
  21             startkey = [base_slug, None],
  22             endkey   = [base_slug + 'ZZZZZ', None],
  23             wrap_doc = False,
  24         )
  25     return [r['key'][0] for r in res]
  26
  27
  28 @cache_result(timeout=60*60)
  29 def podcast_count():
  30     return Podcast.view('podcasts/by_id',
  31             limit = 0,
  32             stale = 'update_after',
  33         ).total_rows
  34
  35
  36 @cache_result(timeout=60*60)
  37 def podcasts_for_tag(tag):
  38     """ Returns the podcasts with the current tag.
  39
  40     Some podcasts might be returned twice """
  41
  42     if not tag:
  43         raise QueryParameterMissing('tag')
  44
  45     res = multi_request_view(Podcast, 'podcasts/by_tag',
  46             wrap        = False,
  47             startkey    = [tag, None],
  48             endkey      = [tag, {}],
  49             reduce      = True,
  50             group       = True,
  51             group_level = 2
  52         )
  53
  54     for r in res:
  55         yield (r['key'][1], r['value'])
  56
  57     udb = get_userdata_database()
  58     res = multi_request_view(udb, 'usertags/podcasts',
  59             wrap        = False,
  60             startkey    = [tag, None],
  61             endkey      = [tag, {}],
  62             reduce      = True,
  63             group       = True,
  64             group_level = 2
  65         )
  66
  67     for r in res:
  68         yield (r['key'][1], r['value'])
  69
  70
  71 @cache_result(timeout=60*60)
  72 def get_podcast_languages():
  73     """ Returns all 2-letter language codes that are used by podcasts.
  74
  75     It filters obviously invalid strings, but does not check if any
  76     of these codes is contained in ISO 639. """
  77
  78     from mygpo.web.utils import sanitize_language_codes
  79
  80     res = Podcast.view('podcasts/by_language',
  81             group_level = 1,
  82             stale       = 'ok',
  83         )
  84
  85     langs = [r['key'][0] for r in res]
  86     sane_lang = sanitize_language_codes(langs)
  87     sane_lang.sort()
  88     return sane_lang
  89
  90
  91 def podcast_by_id_uncached(podcast_id, current_id=False):
  92
  93     if not podcast_id:
  94         raise QueryParameterMissing('podcast_id')
  95
  96     r = Podcast.view('podcasts/by_id',
  97             key          = podcast_id,
  98             classes      = [Podcast, PodcastGroup],
  99             include_docs = True,
 100         )
 101
 102     if not r:
 103         return None
 104
 105     podcast_group = r.first()
 106
 107     podcast = podcast_group.get_podcast_by_id(podcast_id, current_id)
 108
 109     if podcast.needs_update:
 110         incomplete_obj.send_robust(sender=podcast)
 111
 112     return podcast
 113
 114
 115 podcast_by_id = cache_result(timeout=60*60)(podcast_by_id_uncached)
 116
 117
 118 @cache_result(timeout=60*60)
 119 def podcastgroup_by_id(group_id):
 120
 121     if not group_id:
 122         raise QueryParameterMissing('group_id')
 123
 124     pg = PodcastGroup.get(group_id)
 125
 126     if pg.needs_update:
 127         incomplete_obj.send_robust(sender=pg)
 128
 129     return pg
 130
 131
 132
 133 @cache_result(timeout=60*60)
 134 def podcast_for_slug(slug):
 135
 136     if not slug:
 137         raise QueryParameterMissing('slug')
 138
 139     r = Podcast.view('podcasts/by_slug',
 140             startkey     = [slug, None],
 141             endkey       = [slug, {}],
 142             include_docs = True,
 143             wrap_doc     = False,
 144         )
 145
 146     if not r:
 147         return None
 148
 149     res = r.first()
 150     doc = res['doc']
 151     if doc['doc_type'] == 'Podcast':
 152         obj = Podcast.wrap(doc)
 153     else:
 154         pid = res['key'][1]
 155         pg = PodcastGroup.wrap(doc)
 156
 157         if pid == pg._id:
 158             # TODO: we don't return PodcastGroups atm
 159             return None
 160
 161         obj = pg.get_podcast_by_id(pid)
 162
 163     if obj.needs_update:
 164         incomplete_obj.send_robust(sender=obj)
 165
 166     return obj
 167
 168
 169 @cache_result(timeout=60*60)
 170 def podcast_for_slug_id(slug_id):
 171     """ Returns the Podcast for either an CouchDB-ID for a Slug """
 172
 173     if is_couchdb_id(slug_id):
 174         return podcast_by_id(slug_id)
 175     else:
 176         return podcast_for_slug(slug_id)
 177
 178
 179 @cache_result(timeout=60*60)
 180 def podcastgroup_for_slug_id(slug_id):
 181     """ Returns the Podcast for either an CouchDB-ID for a Slug """
 182
 183     if not slug_id:
 184         raise QueryParameterMissing('slug_id')
 185
 186     if is_couchdb_id(slug_id):
 187         return podcastgroup_by_id(slug_id)
 188
 189     else:
 190         #TODO: implement
 191         return PodcastGroup.for_slug(slug_id)
 192
 193
 194
 195 def podcasts_by_id(ids):
 196
 197     if ids is None:
 198         raise QueryParameterMissing('ids')
 199
 200     if not ids:
 201         return []
 202
 203     r = Podcast.view('podcasts/by_id',
 204             keys         = ids,
 205             include_docs = True,
 206             wrap_doc     = False
 207         )
 208
 209     podcasts = map(_wrap_podcast_group, r)
 210
 211     for podcast in podcasts:
 212         if podcast.needs_update:
 213             incomplete_obj.send_robust(sender=podcast)
 214
 215     return podcasts
 216
 217
 218 def podcasts_groups_by_id(ids):
 219     """ gets podcast groups and top-level podcasts for the given ids """
 220
 221     if ids is None:
 222         raise QueryParameterMissing('ids')
 223
 224     if not ids:
 225         return
 226
 227     db = get_main_database()
 228     res = db.view('podcasts/podcasts_groups',
 229             keys         = ids,
 230             include_docs = True,
 231             classes      = [Podcast, PodcastGroup],
 232         )
 233
 234     for r in res:
 235         doc = r['doc']
 236
 237         if not doc:
 238             yield None
 239
 240         if doc['doc_type'] == 'Podcast':
 241             obj = Podcast.wrap(doc)
 242
 243         elif doc['doc_type'] == 'PodcastGroup':
 244             obj = PodcastGroup.wrap(doc)
 245
 246         else:
 247             logger.error('podcasts_groups_by_id retrieved unknown doc_type '
 248                 '"%s" for params %s', doc['doc_type'], res.params)
 249             continue
 250
 251         if obj.needs_update:
 252             incomplete_obj.send_robust(sender=obj)
 253
 254         yield obj
 255
 256
 257
 258 @cache_result(timeout=60*60)
 259 def podcast_for_oldid(oldid):
 260
 261     if not oldid:
 262         raise QueryParameterMissing('oldid')
 263
 264     r = Podcast.view('podcasts/by_oldid',
 265             key          = long(oldid),
 266             classes      = [Podcast, PodcastGroup],
 267             include_docs = True,
 268         )
 269
 270     if not r:
 271         return None
 272
 273     podcast_group = r.first()
 274     podcast = podcast_group.get_podcast_by_oldid(oldid)
 275
 276     if podcast.needs_update:
 277         incomplete_obj.send_robust(sender=podcast)
 278
 279     return podcast
 280
 281
 282 @cache_result(timeout=60*60)
 283 def podcastgroup_for_oldid(oldid):
 284
 285     if not oldid:
 286         raise QueryParameterMissing('oldid')
 287
 288     r = PodcastGroup.view('podcasts/groups_by_oldid',
 289             key          = long(oldid),
 290             include_docs = True,
 291         )
 292
 293     if not r:
 294         return None
 295
 296     pg = r.one()
 297
 298     if pg.needs_update:
 299         incomplete_obj.send_robust(sender=pg)
 300
 301     return pg
 302
 303
 304 def podcast_for_url(url, create=False):
 305
 306     if not url:
 307         raise QueryParameterMissing('url')
 308
 309     key = 'podcast-by-url-%s' % sha1(url.encode('utf-8')).hexdigest()
 310
 311     podcast = cache.get(key)
 312     if podcast:
 313         return podcast
 314
 315     r = Podcast.view('podcasts/by_url',
 316             key=url,
 317             classes=[Podcast, PodcastGroup],
 318             include_docs=True
 319         )
 320
 321     if r:
 322         podcast_group = r.first()
 323         podcast = podcast_group.get_podcast_by_url(url)
 324
 325         if podcast.needs_update:
 326             incomplete_obj.send_robust(sender=podcast)
 327         else:
 328             cache.set(key, podcast)
 329
 330         return podcast
 331
 332     if create:
 333         podcast = Podcast()
 334         podcast.created_timestamp = get_timestamp(datetime.utcnow())
 335         podcast.urls = [url]
 336         podcast.save()
 337         incomplete_obj.send_robust(sender=podcast)
 338         return podcast
 339
 340     return None
 341
 342
 343
 344
 345 def random_podcasts(language='', chunk_size=5):
 346     """ Returns an iterator of random podcasts
 347
 348     optionaly a language code can be specified. If given the podcasts will
 349     be restricted to this language. chunk_size determines how many podcasts
 350     will be fetched at once """
 351
 352     while True:
 353         rnd = random()
 354         res = Podcast.view('podcasts/random',
 355                 startkey     = [language, rnd],
 356                 include_docs = True,
 357                 limit        = chunk_size,
 358                 stale        = 'ok',
 359                 wrap_doc     = False,
 360             )
 361
 362         if not res:
 363             break
 364
 365         for r in res:
 366
 367             # The view podcasts/random does not include incomplete podcasts,
 368             # so we don't need to send any 'incomplete_obj' signals here
 369
 370             obj = r['doc']
 371             if obj['doc_type'] == 'Podcast':
 372                 yield Podcast.wrap(obj)
 373
 374             elif obj['doc_type'] == 'PodcastGroup':
 375                 yield PodcastGroup.wrap(obj)
 376
 377
 378
 379 def podcasts_by_last_update():
 380     res = Podcast.view('podcasts/by_last_update',
 381             include_docs = True,
 382             stale        = 'update_after',
 383             wrap_doc     = False,
 384         )
 385
 386     # TODO: this method is only used for retrieving podcasts to update;
 387     #       should we really send 'incomplete_obj' signals here?
 388
 389     return map(_wrap_podcast_group_key1, res)
 390
 391
 392
 393
 394 def all_podcasts():
 395     from mygpo.db.couchdb.utils import multi_request_view
 396     res = multi_request_view(Podcast,'podcasts/by_id',
 397             wrap         = False,
 398             include_docs = True,
 399             stale        = 'update_after',
 400         )
 401
 402     # TODO: this method is only used for maintenance purposes; should we
 403     #       really send 'incomplete_obj' signals here?
 404
 405     for r in res:
 406         obj = r['doc']
 407         if obj['doc_type'] == 'Podcast':
 408             yield Podcast.wrap(obj)
 409         else:
 410             pid = r[u'key']
 411             pg = PodcastGroup.wrap(obj)
 412             podcast = pg.get_podcast_by_id(pid)
 413             yield podcast
 414
 415
 416 def podcasts_to_dict(ids, use_cache=False):
 417
 418     if ids is None:
 419         raise QueryParameterMissing('ids')
 420
 421     if not ids:
 422         return dict()
 423
 424
 425     ids = list(set(ids))
 426     objs = dict()
 427
 428     cache_objs = []
 429     if use_cache:
 430         res = cache.get_many(ids)
 431         cache_objs.extend(res.values())
 432         ids = [x for x in ids if x not in res.keys()]
 433
 434     db_objs = podcasts_by_id(ids)
 435
 436     for obj in (cache_objs + db_objs):
 437
 438         # get_multi returns dict {'key': _id, 'error': 'not found'}
 439         # for non-existing objects
 440         if isinstance(obj, dict) and 'error' in obj:
 441             _id = obj['key']
 442             objs[_id] = None
 443             continue
 444
 445         for i in obj.get_ids():
 446             objs[i] = obj
 447
 448     if use_cache:
 449         cache.set_many(dict( (obj.get_id(), obj) for obj in db_objs))
 450
 451     return objs
 452
 453
 454
 455 def podcasts_need_update():
 456     db = get_main_database()
 457     res = db.view('episodes/need_update',
 458             group_level = 1,
 459             reduce      = True,
 460             limit       = 100,
 461         )
 462
 463     # TODO: this method is only used for retrieving podcasts to update;
 464     #       should we really send 'incomplete_obj' signals here?
 465
 466     for r in res:
 467         podcast_id = r['key']
 468         podcast = podcast_by_id(podcast_id)
 469         if podcast:
 470             yield podcast
 471
 472
 473 @cache_result(timeout=60*60)
 474 def get_flattr_podcasts(offset=0, limit=20):
 475     """ returns all podcasts that contain Flattr payment URLs """
 476
 477     r = Podcast.view('podcasts/flattr',
 478             skip         = offset,
 479             limit        = limit,
 480             classes      = [Podcast, PodcastGroup],
 481             include_docs = True,
 482             reduce       = False,
 483         )
 484
 485     podcasts = list(r)
 486
 487     for podcast in podcasts:
 488         if podcast.needs_update:
 489             incomplete_obj.send_robust(sender=podcast)
 490
 491     return podcasts
 492
 493
 494 @cache_result(timeout=60*60)
 495 def get_flattr_podcast_count():
 496     """ returns the number of podcasts that contain Flattr payment URLs """
 497     r = list(Podcast.view('podcasts/flattr'))
 498     return r[0]['value']
 499
 500
 501 @cache_result(timeout=60*60)
 502 def get_license_podcasts(offset=0, limit=20):
 503     """ returns a page of podcasts w/ license information """
 504
 505     r = Podcast.view('podcasts/license',
 506             skip = offset,
 507             limit = limit,
 508             classes = [Podcast, PodcastGroup],
 509             include_docs = True,
 510             reduce = False,
 511     )
 512
 513     podcasts = list(r)
 514
 515     for podcast in podcasts:
 516         if podcast.needs_update:
 517             incomplete_obj.send_robust(sender=podcast)
 518
 519     return podcasts
 520
 521
 522 @cache_result(timeout=60*60)
 523 def get_license_podcast_count():
 524     """ returns the number of podcasts that contain license information """
 525     r = list(Podcast.view('podcasts/license'))
 526     return r[0]['value'] if r else 0
 527
 528
 529 def subscriberdata_for_podcast(podcast_id):
 530
 531     if not podcast_id:
 532         raise QueryParameterMissing('podcast_id')
 533
 534     r = PodcastSubscriberData.view('podcasts/subscriber_data',
 535             key          = podcast_id,
 536             include_docs = True,
 537         )
 538
 539     if r:
 540         return r.first()
 541
 542     data = PodcastSubscriberData()
 543     data.podcast = podcast_id
 544     return data
 545
 546
 547
 548 def _wrap_podcast_group(res):
 549     if res['doc']['doc_type'] == 'Podcast':
 550         return Podcast.wrap(res['doc'])
 551     else:
 552         pg = PodcastGroup.wrap(res['doc'])
 553         id = res['key']
 554         return pg.get_podcast_by_id(id)
 555
 556
 557 def _wrap_podcast_group_key1(res):
 558     obj = res['doc']
 559     if obj['doc_type'] == 'Podcast':
 560         return Podcast.wrap(obj)
 561
 562     else:
 563         pid = res[u'key'][1]
 564         pg = PodcastGroup.wrap(obj)
 565         podcast = pg.get_podcast_by_id(pid)
 566         return podcast
 567
 568
 569
 570 def search_wrapper(result):
 571     doc = result['doc']
 572     if doc['doc_type'] == 'Podcast':
 573         p = Podcast.wrap(doc)
 574     elif doc['doc_type'] == 'PodcastGroup':
 575         p = PodcastGroup.wrap(doc)
 576     p._id = result['id']
 577     return p
 578
 579
 580 @cache_result(timeout=60*60)
 581 def search(q, offset=0, num_results=20):
 582
 583     if not q:
 584         return [], 0
 585
 586     db = get_main_database()
 587
 588     #FIXME current couchdbkit can't parse responses for multi-query searches
 589     q = q.replace(',', '')
 590
 591     try:
 592         res = db.search('podcasts/search',
 593                 wrapper      = search_wrapper,
 594                 include_docs = True,
 595                 limit        = num_results,
 596                 stale        = 'update_after',
 597                 skip         = offset,
 598                 q            = q,
 599                 sort='\\subscribers<int>')
 600
 601         podcasts = list(res)
 602
 603         for podcast in podcasts:
 604             if podcast.needs_update:
 605                 incomplete_obj.send_robust(sender=podcast)
 606
 607         return podcasts, res.total_rows
 608
 609     except RequestFailed:
 610         return [], 0
 611
 612
 613 def reload_podcast(podcast):
 614     return podcast_by_id_uncached(podcast.get_id())
 615
 616
 617 @repeat_on_conflict(['podcast'], reload_f=reload_podcast)
 618 def update_additional_data(podcast, twitter):
 619     podcast.twitter = twitter
 620     podcast.save()
 621
 622     # clear the whole cache until we have a better invalidation mechanism
 623     cache.clear()
 624
 625
 626 @repeat_on_conflict(['podcast'], reload_f=reload_podcast)
 627 def update_related_podcasts(podcast, related):
 628     if podcast.related_podcasts == related:
 629         return
 630
 631     podcast.related_podcasts = related
 632     podcast.save()
 633
 634
 635 @repeat_on_conflict(['podcast'], reload_f=reload_podcast)
 636 def delete_podcast(podcast):
 637     podcast.delete()