mygpo/db/couchdb/podcast.py

   1 from hashlib import sha1
   2 from random import random
   3 from datetime import datetime
   4
   5 from restkit import RequestFailed
   6
   7 from django.core.cache import cache
   8
   9 from mygpo.core.models import Podcast, PodcastGroup, PodcastSubscriberData
  10 from mygpo.core.signals import incomplete_obj
  11 from mygpo.decorators import repeat_on_conflict
  12 from mygpo.cache import cache_result
  13 from mygpo.utils import get_timestamp
  14 from mygpo.db.couchdb import get_main_database
  15 from mygpo.db import QueryParameterMissing
  16 from mygpo.db.couchdb.utils import multi_request_view, is_couchdb_id
  17
  18
  19 def podcast_slugs(base_slug):
  20     res = Podcast.view('podcasts/by_slug',
  21             startkey = [base_slug, None],
  22             endkey   = [base_slug + 'ZZZZZ', None],
  23             wrap_doc = False,
  24         )
  25     return [r['key'][0] for r in res]
  26
  27
  28 @cache_result(timeout=60*60)
  29 def podcast_count():
  30     return Podcast.view('podcasts/by_id',
  31             limit = 0,
  32             stale = 'update_after',
  33         ).total_rows
  34
  35
  36 @cache_result(timeout=60*60)
  37 def podcasts_for_tag(tag):
  38     """ Returns the podcasts with the current tag.
  39
  40     Some podcasts might be returned twice """
  41
  42     if not tag:
  43         raise QueryParameterMissing('tag')
  44
  45     res = multi_request_view(Podcast, 'podcasts/by_tag',
  46             wrap        = False,
  47             startkey    = [tag, None],
  48             endkey      = [tag, {}],
  49             reduce      = True,
  50             group       = True,
  51             group_level = 2
  52         )
  53
  54     for r in res:
  55         yield (r['key'][1], r['value'])
  56
  57     res = multi_request_view(Podcast, 'usertags/podcasts',
  58             wrap        = False,
  59             startkey    = [tag, None],
  60             endkey      = [tag, {}],
  61             reduce      = True,
  62             group       = True,
  63             group_level = 2
  64         )
  65
  66     for r in res:
  67         yield (r['key'][1], r['value'])
  68
  69
  70 @cache_result(timeout=60*60)
  71 def get_podcast_languages():
  72     """ Returns all 2-letter language codes that are used by podcasts.
  73
  74     It filters obviously invalid strings, but does not check if any
  75     of these codes is contained in ISO 639. """
  76
  77     from mygpo.web.utils import sanitize_language_codes
  78
  79     res = Podcast.view('podcasts/by_language',
  80             group_level = 1,
  81             stale       = 'ok',
  82         )
  83
  84     langs = [r['key'][0] for r in res]
  85     sane_lang = sanitize_language_codes(langs)
  86     sane_lang.sort()
  87     return sane_lang
  88
  89
  90 @cache_result(timeout=60*60)
  91 def podcast_by_id(podcast_id, current_id=False):
  92
  93     if not podcast_id:
  94         raise QueryParameterMissing('podcast_id')
  95
  96     r = Podcast.view('podcasts/by_id',
  97             key          = podcast_id,
  98             classes      = [Podcast, PodcastGroup],
  99             include_docs = True,
 100         )
 101
 102     if not r:
 103         return None
 104
 105     podcast_group = r.first()
 106
 107     podcast = podcast_group.get_podcast_by_id(podcast_id, current_id)
 108
 109     if podcast.needs_update:
 110         incomplete_obj.send_robust(sender=podcast)
 111
 112     return podcast
 113
 114
 115
 116 @cache_result(timeout=60*60)
 117 def podcastgroup_by_id(group_id):
 118
 119     if not group_id:
 120         raise QueryParameterMissing('group_id')
 121
 122     pg = PodcastGroup.get(group_id)
 123
 124     if pg.needs_update:
 125         incomplete_obj.send_robust(sender=pg)
 126
 127     return pg
 128
 129
 130
 131 @cache_result(timeout=60*60)
 132 def podcast_for_slug(slug):
 133
 134     if not slug:
 135         raise QueryParameterMissing('slug')
 136
 137     r = Podcast.view('podcasts/by_slug',
 138             startkey     = [slug, None],
 139             endkey       = [slug, {}],
 140             include_docs = True,
 141             wrap_doc     = False,
 142         )
 143
 144     if not r:
 145         return None
 146
 147     res = r.first()
 148     doc = res['doc']
 149     if doc['doc_type'] == 'Podcast':
 150         obj = Podcast.wrap(doc)
 151     else:
 152         pid = res['key'][1]
 153         pg = PodcastGroup.wrap(doc)
 154         obj = pg.get_podcast_by_id(pid)
 155
 156     if obj.needs_update:
 157         incomplete_obj.send_robust(sender=obj)
 158
 159     return obj
 160
 161
 162 @cache_result(timeout=60*60)
 163 def podcast_for_slug_id(slug_id):
 164     """ Returns the Podcast for either an CouchDB-ID for a Slug """
 165
 166     if is_couchdb_id(slug_id):
 167         return podcast_by_id(slug_id)
 168     else:
 169         return podcast_for_slug(slug_id)
 170
 171
 172 @cache_result(timeout=60*60)
 173 def podcastgroup_for_slug_id(slug_id):
 174     """ Returns the Podcast for either an CouchDB-ID for a Slug """
 175
 176     if not slug_id:
 177         raise QueryParameterMissing('slug_id')
 178
 179     if is_couchdb_id(slug_id):
 180         return podcastgroup_by_id(slug_id)
 181
 182     else:
 183         #TODO: implement
 184         return PodcastGroup.for_slug(slug_id)
 185
 186
 187
 188 def podcasts_by_id(ids):
 189
 190     if ids is None:
 191         raise QueryParameterMissing('ids')
 192
 193     if not ids:
 194         return []
 195
 196     r = Podcast.view('podcasts/by_id',
 197             keys         = ids,
 198             include_docs = True,
 199             wrap_doc     = False
 200         )
 201
 202     podcasts = map(_wrap_podcast_group, r)
 203
 204     for podcast in podcasts:
 205         if podcast.needs_update:
 206             incomplete_obj.send_robust(sender=podcast)
 207
 208     return podcasts
 209
 210
 211
 212 @cache_result(timeout=60*60)
 213 def podcast_for_oldid(oldid):
 214
 215     if not oldid:
 216         raise QueryParameterMissing('oldid')
 217
 218     r = Podcast.view('podcasts/by_oldid',
 219             key          = long(oldid),
 220             classes      = [Podcast, PodcastGroup],
 221             include_docs = True,
 222         )
 223
 224     if not r:
 225         return None
 226
 227     podcast_group = r.first()
 228     podcast = podcast_group.get_podcast_by_oldid(oldid)
 229
 230     if podcast.needs_update:
 231         incomplete_obj.send_robust(sender=podcast)
 232
 233     return podcast
 234
 235
 236 @cache_result(timeout=60*60)
 237 def podcastgroup_for_oldid(oldid):
 238
 239     if not oldid:
 240         raise QueryParameterMissing('oldid')
 241
 242     r = PodcastGroup.view('podcasts/groups_by_oldid',
 243             key          = long(oldid),
 244             include_docs = True,
 245         )
 246
 247     if not r:
 248         return None
 249
 250     pg = r.one()
 251
 252     if pg.needs_update:
 253         incomplete_obj.send_robust(sender=pg)
 254
 255     return pg
 256
 257
 258 def podcast_for_url(url, create=False):
 259
 260     if not url:
 261         raise QueryParameterMissing('url')
 262
 263     key = 'podcast-by-url-%s' % sha1(url.encode('utf-8')).hexdigest()
 264
 265     podcast = cache.get(key)
 266     if podcast:
 267         return podcast
 268
 269     r = Podcast.view('podcasts/by_url',
 270             key=url,
 271             classes=[Podcast, PodcastGroup],
 272             include_docs=True
 273         )
 274
 275     if r:
 276         podcast_group = r.first()
 277         podcast = podcast_group.get_podcast_by_url(url)
 278
 279         if podcast.needs_update:
 280             incomplete_obj.send_robust(sender=podcast)
 281         else:
 282             cache.set(key, podcast)
 283
 284         return podcast
 285
 286     if create:
 287         podcast = Podcast()
 288         podcast.created_timestamp = get_timestamp(datetime.utcnow())
 289         podcast.urls = [url]
 290         podcast.save()
 291         incomplete_obj.send_robust(sender=podcast)
 292         return podcast
 293
 294     return None
 295
 296
 297
 298
 299 def random_podcasts(language='', chunk_size=5):
 300     """ Returns an iterator of random podcasts
 301
 302     optionaly a language code can be specified. If given the podcasts will
 303     be restricted to this language. chunk_size determines how many podcasts
 304     will be fetched at once """
 305
 306     while True:
 307         rnd = random()
 308         res = Podcast.view('podcasts/random',
 309                 startkey     = [language, rnd],
 310                 include_docs = True,
 311                 limit        = chunk_size,
 312                 stale        = 'ok',
 313                 wrap_doc     = False,
 314             )
 315
 316         if not res:
 317             break
 318
 319         for r in res:
 320
 321             # The view podcasts/random does not include incomplete podcasts,
 322             # so we don't need to send any 'incomplete_obj' signals here
 323
 324             obj = r['doc']
 325             if obj['doc_type'] == 'Podcast':
 326                 yield Podcast.wrap(obj)
 327
 328             elif obj['doc_type'] == 'PodcastGroup':
 329                 yield PodcastGroup.wrap(obj)
 330
 331
 332
 333 def podcasts_by_last_update():
 334     res = Podcast.view('podcasts/by_last_update',
 335             include_docs = True,
 336             stale        = 'update_after',
 337             wrap_doc     = False,
 338         )
 339
 340     # TODO: this method is only used for retrieving podcasts to update;
 341     #       should we really send 'incomplete_obj' signals here?
 342
 343     return map(_wrap_podcast_group_key1, res)
 344
 345
 346
 347
 348 def all_podcasts():
 349     from mygpo.db.couchdb.utils import multi_request_view
 350     res = multi_request_view(Podcast,'podcasts/by_id',
 351             wrap         = False,
 352             include_docs = True,
 353             stale        = 'update_after',
 354         )
 355
 356     # TODO: this method is only used for maintenance purposes; should we
 357     #       really send 'incomplete_obj' signals here?
 358
 359     for r in res:
 360         obj = r['doc']
 361         if obj['doc_type'] == 'Podcast':
 362             yield Podcast.wrap(obj)
 363         else:
 364             pid = r[u'key']
 365             pg = PodcastGroup.wrap(obj)
 366             podcast = pg.get_podcast_by_id(pid)
 367             yield podcast
 368
 369
 370 def all_podcasts_groups(cls):
 371     return cls.view('podcasts/podcasts_groups', include_docs=True,
 372         classes=[Podcast, PodcastGroup]).iterator()
 373
 374
 375
 376 def podcasts_to_dict(ids, use_cache=False):
 377
 378     if ids is None:
 379         raise QueryParameterMissing('ids')
 380
 381     if not ids:
 382         return dict()
 383
 384
 385     ids = list(set(ids))
 386     objs = dict()
 387
 388     cache_objs = []
 389     if use_cache:
 390         res = cache.get_many(ids)
 391         cache_objs.extend(res.values())
 392         ids = [x for x in ids if x not in res.keys()]
 393
 394     db_objs = podcasts_by_id(ids)
 395
 396     for obj in (cache_objs + db_objs):
 397
 398         # get_multi returns dict {'key': _id, 'error': 'not found'}
 399         # for non-existing objects
 400         if isinstance(obj, dict) and 'error' in obj:
 401             _id = obj['key']
 402             objs[_id] = None
 403             continue
 404
 405         for i in obj.get_ids():
 406             objs[i] = obj
 407
 408     if use_cache:
 409         cache.set_many(dict( (obj.get_id(), obj) for obj in db_objs))
 410
 411     return objs
 412
 413
 414
 415 def podcasts_need_update():
 416     db = get_main_database()
 417     res = db.view('episodes/need_update',
 418             group_level = 1,
 419             reduce      = True,
 420             limit       = 100,
 421         )
 422
 423     # TODO: this method is only used for retrieving podcasts to update;
 424     #       should we really send 'incomplete_obj' signals here?
 425
 426     for r in res:
 427         podcast_id = r['key']
 428         podcast = podcast_by_id(podcast_id)
 429         if podcast:
 430             yield podcast
 431
 432
 433 @cache_result(timeout=60*60)
 434 def get_flattr_podcasts(offset=0, limit=20):
 435     """ returns all podcasts that contain Flattr payment URLs """
 436
 437     r = Podcast.view('podcasts/flattr',
 438             skip         = offset,
 439             limit        = limit,
 440             classes      = [Podcast, PodcastGroup],
 441             include_docs = True,
 442             reduce       = False,
 443         )
 444
 445     podcasts = list(r)
 446
 447     for podcast in podcasts:
 448         if podcast.needs_update:
 449             incomplete_obj.send_robust(sender=podcast)
 450
 451     return podcasts
 452
 453
 454 @cache_result(timeout=60*60)
 455 def get_flattr_podcast_count():
 456     """ returns the number of podcasts that contain Flattr payment URLs """
 457     r = list(Podcast.view('podcasts/flattr'))
 458     return r[0]['value']
 459
 460
 461 def subscriberdata_for_podcast(podcast_id):
 462
 463     if not podcast_id:
 464         raise QueryParameterMissing('podcast_id')
 465
 466     r = PodcastSubscriberData.view('podcasts/subscriber_data',
 467             key          = podcast_id,
 468             include_docs = True,
 469         )
 470
 471     if r:
 472         return r.first()
 473
 474     data = PodcastSubscriberData()
 475     data.podcast = podcast_id
 476     return data
 477
 478
 479
 480 def _wrap_podcast_group(res):
 481     if res['doc']['doc_type'] == 'Podcast':
 482         return Podcast.wrap(res['doc'])
 483     else:
 484         pg = PodcastGroup.wrap(res['doc'])
 485         id = res['key']
 486         return pg.get_podcast_by_id(id)
 487
 488
 489 def _wrap_podcast_group_key1(res):
 490     obj = res['doc']
 491     if obj['doc_type'] == 'Podcast':
 492         return Podcast.wrap(obj)
 493
 494     else:
 495         pid = res[u'key'][1]
 496         pg = PodcastGroup.wrap(obj)
 497         podcast = pg.get_podcast_by_id(pid)
 498         return podcast
 499
 500
 501
 502 def search_wrapper(result):
 503     doc = result['doc']
 504     if doc['doc_type'] == 'Podcast':
 505         p = Podcast.wrap(doc)
 506     elif doc['doc_type'] == 'PodcastGroup':
 507         p = PodcastGroup.wrap(doc)
 508     p._id = result['id']
 509     return p
 510
 511
 512 @cache_result(timeout=60*60)
 513 def search(q, offset=0, num_results=20):
 514
 515     if not q:
 516         return [], 0
 517
 518     db = get_main_database()
 519
 520     #FIXME current couchdbkit can't parse responses for multi-query searches
 521     q = q.replace(',', '')
 522
 523     try:
 524         res = db.search('podcasts/search',
 525                 wrapper      = search_wrapper,
 526                 include_docs = True,
 527                 limit        = num_results,
 528                 stale        = 'update_after',
 529                 skip         = offset,
 530                 q            = q,
 531                 sort='\\subscribers<int>')
 532
 533         podcasts = list(res)
 534
 535         for podcast in podcasts:
 536             if podcast.needs_update:
 537                 incomplete_obj.send_robust(sender=podcast)
 538
 539         return podcasts, res.total_rows
 540
 541     except RequestFailed:
 542         return [], 0
 543
 544
 545 @repeat_on_conflict(['podcast'])
 546 def update_additional_data(podcast, twitter):
 547     podcast.twitter = twitter
 548     podcast.save()
 549
 550     # clear the whole cache until we have a better invalidation mechanism
 551     cache.clear()
 552
 553
 554 @repeat_on_conflict(['podcast'])
 555 def update_related_podcasts(podcast, related):
 556     if podcast.related_podcasts == related:
 557         return
 558
 559     podcast.related_podcasts = related
 560     podcast.save()
 561
 562
 563 @repeat_on_conflict(['podcast'])
 564 def delete_podcast(podcast):
 565     podcast.delete()