mygpo/db/couchdb/podcast.py

   1 from hashlib import sha1
   2 from random import random
   3 from datetime import datetime
   4
   5 from restkit import RequestFailed
   6
   7 from django.core.cache import cache
   8
   9 from mygpo.core.models import Podcast, PodcastGroup, PodcastSubscriberData
  10 from mygpo.core.signals import incomplete_obj
  11 from mygpo.decorators import repeat_on_conflict
  12 from mygpo.cache import cache_result
  13 from mygpo.utils import get_timestamp
  14 from mygpo.db.couchdb import get_main_database
  15 from mygpo.db import QueryParameterMissing
  16 from mygpo.db.couchdb.utils import multi_request_view, is_couchdb_id
  17
  18
  19 def podcast_slugs(base_slug):
  20     res = Podcast.view('podcasts/by_slug',
  21             startkey = [base_slug, None],
  22             endkey   = [base_slug + 'ZZZZZ', None],
  23             wrap_doc = False,
  24         )
  25     return [r['key'][0] for r in res]
  26
  27
  28 @cache_result(timeout=60*60)
  29 def podcast_count():
  30     return Podcast.view('podcasts/by_id',
  31             limit = 0,
  32             stale = 'update_after',
  33         ).total_rows
  34
  35
  36 @cache_result(timeout=60*60)
  37 def podcasts_for_tag(tag):
  38     """ Returns the podcasts with the current tag.
  39
  40     Some podcasts might be returned twice """
  41
  42     if not tag:
  43         raise QueryParameterMissing('tag')
  44
  45     res = multi_request_view(Podcast, 'podcasts/by_tag',
  46             wrap        = False,
  47             startkey    = [tag, None],
  48             endkey      = [tag, {}],
  49             reduce      = True,
  50             group       = True,
  51             group_level = 2
  52         )
  53
  54     for r in res:
  55         yield (r['key'][1], r['value'])
  56
  57     res = multi_request_view(Podcast, 'usertags/podcasts',
  58             wrap        = False,
  59             startkey    = [tag, None],
  60             endkey      = [tag, {}],
  61             reduce      = True,
  62             group       = True,
  63             group_level = 2
  64         )
  65
  66     for r in res:
  67         yield (r['key'][1], r['value'])
  68
  69
  70 @cache_result(timeout=60*60)
  71 def get_podcast_languages():
  72     """ Returns all 2-letter language codes that are used by podcasts.
  73
  74     It filters obviously invalid strings, but does not check if any
  75     of these codes is contained in ISO 639. """
  76
  77     from mygpo.web.utils import sanitize_language_codes
  78
  79     res = Podcast.view('podcasts/by_language',
  80             group_level = 1,
  81             stale       = 'ok',
  82         )
  83
  84     langs = [r['key'][0] for r in res]
  85     sane_lang = sanitize_language_codes(langs)
  86     sane_lang.sort()
  87     return sane_lang
  88
  89
  90 @cache_result(timeout=60*60)
  91 def podcast_by_id(podcast_id, current_id=False):
  92
  93     if not podcast_id:
  94         raise QueryParameterMissing('podcast_id')
  95
  96     r = Podcast.view('podcasts/by_id',
  97             key          = podcast_id,
  98             classes      = [Podcast, PodcastGroup],
  99             include_docs = True,
 100         )
 101
 102     if not r:
 103         return None
 104
 105     podcast_group = r.first()
 106
 107     podcast = podcast_group.get_podcast_by_id(podcast_id, current_id)
 108
 109     if podcast.needs_update:
 110         incomplete_obj.send_robust(sender=podcast)
 111
 112     return podcast
 113
 114
 115
 116 @cache_result(timeout=60*60)
 117 def podcastgroup_by_id(group_id):
 118
 119     if not group_id:
 120         raise QueryParameterMissing('group_id')
 121
 122     pg = PodcastGroup.get(group_id)
 123
 124     if pg.needs_update:
 125         incomplete_obj.send_robust(sender=pg)
 126
 127     return pg
 128
 129
 130
 131 @cache_result(timeout=60*60)
 132 def podcast_for_slug(slug):
 133
 134     if not slug:
 135         raise QueryParameterMissing('slug')
 136
 137     r = Podcast.view('podcasts/by_slug',
 138             startkey     = [slug, None],
 139             endkey       = [slug, {}],
 140             include_docs = True,
 141             wrap_doc     = False,
 142         )
 143
 144     if not r:
 145         return None
 146
 147     res = r.first()
 148     doc = res['doc']
 149     if doc['doc_type'] == 'Podcast':
 150         obj = Podcast.wrap(doc)
 151     else:
 152         pid = res['key'][1]
 153         pg = PodcastGroup.wrap(doc)
 154         obj = pg.get_podcast_by_id(pid)
 155
 156     if obj.needs_update:
 157         incomplete_obj.send_robust(sender=obj)
 158
 159     return obj
 160
 161
 162 @cache_result(timeout=60*60)
 163 def podcast_for_slug_id(slug_id):
 164     """ Returns the Podcast for either an CouchDB-ID for a Slug """
 165
 166     if is_couchdb_id(slug_id):
 167         return podcast_by_id(slug_id)
 168     else:
 169         return podcast_for_slug(slug_id)
 170
 171
 172 @cache_result(timeout=60*60)
 173 def podcastgroup_for_slug_id(slug_id):
 174     """ Returns the Podcast for either an CouchDB-ID for a Slug """
 175
 176     if not slug_id:
 177         raise QueryParameterMissing('slug_id')
 178
 179     if is_couchdb_id(slug_id):
 180         return podcastgroup_by_id(slug_id)
 181
 182     else:
 183         #TODO: implement
 184         return PodcastGroup.for_slug(slug_id)
 185
 186
 187
 188 def podcasts_by_id(ids):
 189
 190     if ids is None:
 191         raise QueryParameterMissing('ids')
 192
 193     if not ids:
 194         return []
 195
 196     r = Podcast.view('podcasts/by_id',
 197             keys         = ids,
 198             include_docs = True,
 199             wrap_doc     = False
 200         )
 201
 202     podcasts = map(_wrap_podcast_group, r)
 203
 204     for podcast in podcasts:
 205         if podcast.needs_update:
 206             incomplete_obj.send_robust(sender=podcast)
 207
 208     return podcasts
 209
 210
 211 def podcasts_groups_by_id(ids):
 212     """ gets podcast groups and top-level podcasts for the given ids """
 213
 214     if ids is None:
 215         raise QueryParameterMissing('ids')
 216
 217     if not ids:
 218         return
 219
 220     db = get_main_database()
 221     res = db.view('_all_docs',
 222             keys         = ids,
 223             include_docs = True,
 224             classes      = [Podcast, PodcastGroup],
 225         )
 226
 227     for r in res:
 228         doc = r['doc']
 229         if doc['doc_type'] == 'Podcast':
 230             obj = Podcast.wrap(doc)
 231
 232         elif doc['doc_type'] == 'PodcastGroup':
 233             obj = PodcastGroup.wrap(doc)
 234
 235         else:
 236             logger.error('podcasts_groups_by_id retrieved unknown doc_type '
 237                 '"%s" for params %s', doc['doc_type'], res.params)
 238             continue
 239
 240         if obj.needs_update:
 241             incomplete_obj.send_robust(sender=obj)
 242
 243         yield obj
 244
 245
 246
 247 @cache_result(timeout=60*60)
 248 def podcast_for_oldid(oldid):
 249
 250     if not oldid:
 251         raise QueryParameterMissing('oldid')
 252
 253     r = Podcast.view('podcasts/by_oldid',
 254             key          = long(oldid),
 255             classes      = [Podcast, PodcastGroup],
 256             include_docs = True,
 257         )
 258
 259     if not r:
 260         return None
 261
 262     podcast_group = r.first()
 263     podcast = podcast_group.get_podcast_by_oldid(oldid)
 264
 265     if podcast.needs_update:
 266         incomplete_obj.send_robust(sender=podcast)
 267
 268     return podcast
 269
 270
 271 @cache_result(timeout=60*60)
 272 def podcastgroup_for_oldid(oldid):
 273
 274     if not oldid:
 275         raise QueryParameterMissing('oldid')
 276
 277     r = PodcastGroup.view('podcasts/groups_by_oldid',
 278             key          = long(oldid),
 279             include_docs = True,
 280         )
 281
 282     if not r:
 283         return None
 284
 285     pg = r.one()
 286
 287     if pg.needs_update:
 288         incomplete_obj.send_robust(sender=pg)
 289
 290     return pg
 291
 292
 293 def podcast_for_url(url, create=False):
 294
 295     if not url:
 296         raise QueryParameterMissing('url')
 297
 298     key = 'podcast-by-url-%s' % sha1(url.encode('utf-8')).hexdigest()
 299
 300     podcast = cache.get(key)
 301     if podcast:
 302         return podcast
 303
 304     r = Podcast.view('podcasts/by_url',
 305             key=url,
 306             classes=[Podcast, PodcastGroup],
 307             include_docs=True
 308         )
 309
 310     if r:
 311         podcast_group = r.first()
 312         podcast = podcast_group.get_podcast_by_url(url)
 313
 314         if podcast.needs_update:
 315             incomplete_obj.send_robust(sender=podcast)
 316         else:
 317             cache.set(key, podcast)
 318
 319         return podcast
 320
 321     if create:
 322         podcast = Podcast()
 323         podcast.created_timestamp = get_timestamp(datetime.utcnow())
 324         podcast.urls = [url]
 325         podcast.save()
 326         incomplete_obj.send_robust(sender=podcast)
 327         return podcast
 328
 329     return None
 330
 331
 332
 333
 334 def random_podcasts(language='', chunk_size=5):
 335     """ Returns an iterator of random podcasts
 336
 337     optionaly a language code can be specified. If given the podcasts will
 338     be restricted to this language. chunk_size determines how many podcasts
 339     will be fetched at once """
 340
 341     while True:
 342         rnd = random()
 343         res = Podcast.view('podcasts/random',
 344                 startkey     = [language, rnd],
 345                 include_docs = True,
 346                 limit        = chunk_size,
 347                 stale        = 'ok',
 348                 wrap_doc     = False,
 349             )
 350
 351         if not res:
 352             break
 353
 354         for r in res:
 355
 356             # The view podcasts/random does not include incomplete podcasts,
 357             # so we don't need to send any 'incomplete_obj' signals here
 358
 359             obj = r['doc']
 360             if obj['doc_type'] == 'Podcast':
 361                 yield Podcast.wrap(obj)
 362
 363             elif obj['doc_type'] == 'PodcastGroup':
 364                 yield PodcastGroup.wrap(obj)
 365
 366
 367
 368 def podcasts_by_last_update():
 369     res = Podcast.view('podcasts/by_last_update',
 370             include_docs = True,
 371             stale        = 'update_after',
 372             wrap_doc     = False,
 373         )
 374
 375     # TODO: this method is only used for retrieving podcasts to update;
 376     #       should we really send 'incomplete_obj' signals here?
 377
 378     return map(_wrap_podcast_group_key1, res)
 379
 380
 381
 382
 383 def all_podcasts():
 384     from mygpo.db.couchdb.utils import multi_request_view
 385     res = multi_request_view(Podcast,'podcasts/by_id',
 386             wrap         = False,
 387             include_docs = True,
 388             stale        = 'update_after',
 389         )
 390
 391     # TODO: this method is only used for maintenance purposes; should we
 392     #       really send 'incomplete_obj' signals here?
 393
 394     for r in res:
 395         obj = r['doc']
 396         if obj['doc_type'] == 'Podcast':
 397             yield Podcast.wrap(obj)
 398         else:
 399             pid = r[u'key']
 400             pg = PodcastGroup.wrap(obj)
 401             podcast = pg.get_podcast_by_id(pid)
 402             yield podcast
 403
 404
 405 def all_podcasts_groups(cls):
 406     return cls.view('podcasts/podcasts_groups', include_docs=True,
 407         classes=[Podcast, PodcastGroup]).iterator()
 408
 409
 410
 411 def podcasts_to_dict(ids, use_cache=False):
 412
 413     if ids is None:
 414         raise QueryParameterMissing('ids')
 415
 416     if not ids:
 417         return dict()
 418
 419
 420     ids = list(set(ids))
 421     objs = dict()
 422
 423     cache_objs = []
 424     if use_cache:
 425         res = cache.get_many(ids)
 426         cache_objs.extend(res.values())
 427         ids = [x for x in ids if x not in res.keys()]
 428
 429     db_objs = podcasts_by_id(ids)
 430
 431     for obj in (cache_objs + db_objs):
 432
 433         # get_multi returns dict {'key': _id, 'error': 'not found'}
 434         # for non-existing objects
 435         if isinstance(obj, dict) and 'error' in obj:
 436             _id = obj['key']
 437             objs[_id] = None
 438             continue
 439
 440         for i in obj.get_ids():
 441             objs[i] = obj
 442
 443     if use_cache:
 444         cache.set_many(dict( (obj.get_id(), obj) for obj in db_objs))
 445
 446     return objs
 447
 448
 449
 450 def podcasts_need_update():
 451     db = get_main_database()
 452     res = db.view('episodes/need_update',
 453             group_level = 1,
 454             reduce      = True,
 455             limit       = 100,
 456         )
 457
 458     # TODO: this method is only used for retrieving podcasts to update;
 459     #       should we really send 'incomplete_obj' signals here?
 460
 461     for r in res:
 462         podcast_id = r['key']
 463         podcast = podcast_by_id(podcast_id)
 464         if podcast:
 465             yield podcast
 466
 467
 468 @cache_result(timeout=60*60)
 469 def get_flattr_podcasts(offset=0, limit=20):
 470     """ returns all podcasts that contain Flattr payment URLs """
 471
 472     r = Podcast.view('podcasts/flattr',
 473             skip         = offset,
 474             limit        = limit,
 475             classes      = [Podcast, PodcastGroup],
 476             include_docs = True,
 477             reduce       = False,
 478         )
 479
 480     podcasts = list(r)
 481
 482     for podcast in podcasts:
 483         if podcast.needs_update:
 484             incomplete_obj.send_robust(sender=podcast)
 485
 486     return podcasts
 487
 488
 489 @cache_result(timeout=60*60)
 490 def get_flattr_podcast_count():
 491     """ returns the number of podcasts that contain Flattr payment URLs """
 492     r = list(Podcast.view('podcasts/flattr'))
 493     return r[0]['value']
 494
 495
 496 def subscriberdata_for_podcast(podcast_id):
 497
 498     if not podcast_id:
 499         raise QueryParameterMissing('podcast_id')
 500
 501     r = PodcastSubscriberData.view('podcasts/subscriber_data',
 502             key          = podcast_id,
 503             include_docs = True,
 504         )
 505
 506     if r:
 507         return r.first()
 508
 509     data = PodcastSubscriberData()
 510     data.podcast = podcast_id
 511     return data
 512
 513
 514
 515 def _wrap_podcast_group(res):
 516     if res['doc']['doc_type'] == 'Podcast':
 517         return Podcast.wrap(res['doc'])
 518     else:
 519         pg = PodcastGroup.wrap(res['doc'])
 520         id = res['key']
 521         return pg.get_podcast_by_id(id)
 522
 523
 524 def _wrap_podcast_group_key1(res):
 525     obj = res['doc']
 526     if obj['doc_type'] == 'Podcast':
 527         return Podcast.wrap(obj)
 528
 529     else:
 530         pid = res[u'key'][1]
 531         pg = PodcastGroup.wrap(obj)
 532         podcast = pg.get_podcast_by_id(pid)
 533         return podcast
 534
 535
 536
 537 def search_wrapper(result):
 538     doc = result['doc']
 539     if doc['doc_type'] == 'Podcast':
 540         p = Podcast.wrap(doc)
 541     elif doc['doc_type'] == 'PodcastGroup':
 542         p = PodcastGroup.wrap(doc)
 543     p._id = result['id']
 544     return p
 545
 546
 547 @cache_result(timeout=60*60)
 548 def search(q, offset=0, num_results=20):
 549
 550     if not q:
 551         return [], 0
 552
 553     db = get_main_database()
 554
 555     #FIXME current couchdbkit can't parse responses for multi-query searches
 556     q = q.replace(',', '')
 557
 558     try:
 559         res = db.search('podcasts/search',
 560                 wrapper      = search_wrapper,
 561                 include_docs = True,
 562                 limit        = num_results,
 563                 stale        = 'update_after',
 564                 skip         = offset,
 565                 q            = q,
 566                 sort='\\subscribers<int>')
 567
 568         podcasts = list(res)
 569
 570         for podcast in podcasts:
 571             if podcast.needs_update:
 572                 incomplete_obj.send_robust(sender=podcast)
 573
 574         return podcasts, res.total_rows
 575
 576     except RequestFailed:
 577         return [], 0
 578
 579
 580 @repeat_on_conflict(['podcast'])
 581 def update_additional_data(podcast, twitter):
 582     podcast.twitter = twitter
 583     podcast.save()
 584
 585     # clear the whole cache until we have a better invalidation mechanism
 586     cache.clear()
 587
 588
 589 @repeat_on_conflict(['podcast'])
 590 def update_related_podcasts(podcast, related):
 591     if podcast.related_podcasts == related:
 592         return
 593
 594     podcast.related_podcasts = related
 595     podcast.save()
 596
 597
 598 @repeat_on_conflict(['podcast'])
 599 def delete_podcast(podcast):
 600     podcast.delete()