mygpo/db/couchdb/podcast.py

   1 from hashlib import sha1
   2 from random import random
   3 from datetime import datetime
   4
   5 from restkit import RequestFailed
   6
   7 from django.core.cache import cache
   8
   9 from mygpo.core.models import Podcast, PodcastGroup, PodcastSubscriberData
  10 from mygpo.core.signals import incomplete_obj
  11 from mygpo.decorators import repeat_on_conflict
  12 from mygpo.cache import cache_result
  13 from mygpo.utils import get_timestamp
  14 from mygpo.db.couchdb import get_main_database
  15 from mygpo.db import QueryParameterMissing
  16 from mygpo.db.couchdb.utils import multi_request_view, is_couchdb_id
  17
  18
  19 def podcast_slugs(base_slug):
  20     res = Podcast.view('podcasts/by_slug',
  21             startkey = [base_slug, None],
  22             endkey   = [base_slug + 'ZZZZZ', None],
  23             wrap_doc = False,
  24         )
  25     return [r['key'][0] for r in res]
  26
  27
  28 @cache_result(timeout=60*60)
  29 def podcast_count():
  30     return Podcast.view('podcasts/by_id',
  31             limit = 0,
  32             stale = 'update_after',
  33         ).total_rows
  34
  35
  36 @cache_result(timeout=60*60)
  37 def podcasts_for_tag(tag):
  38     """ Returns the podcasts with the current tag.
  39
  40     Some podcasts might be returned twice """
  41
  42     if not tag:
  43         raise QueryParameterMissing('tag')
  44
  45     res = multi_request_view(Podcast, 'podcasts/by_tag',
  46             wrap        = False,
  47             startkey    = [tag, None],
  48             endkey      = [tag, {}],
  49             reduce      = True,
  50             group       = True,
  51             group_level = 2
  52         )
  53
  54     for r in res:
  55         yield (r['key'][1], r['value'])
  56
  57     res = multi_request_view(Podcast, 'usertags/podcasts',
  58             wrap        = False,
  59             startkey    = [tag, None],
  60             endkey      = [tag, {}],
  61             reduce      = True,
  62             group       = True,
  63             group_level = 2
  64         )
  65
  66     for r in res:
  67         yield (r['key'][1], r['value'])
  68
  69
  70 @cache_result(timeout=60*60)
  71 def get_podcast_languages():
  72     """ Returns all 2-letter language codes that are used by podcasts.
  73
  74     It filters obviously invalid strings, but does not check if any
  75     of these codes is contained in ISO 639. """
  76
  77     from mygpo.web.utils import sanitize_language_codes
  78
  79     res = Podcast.view('podcasts/by_language',
  80             group_level = 1,
  81             stale       = 'ok',
  82         )
  83
  84     langs = [r['key'][0] for r in res]
  85     sane_lang = sanitize_language_codes(langs)
  86     sane_lang.sort()
  87     return sane_lang
  88
  89
  90 @cache_result(timeout=60*60)
  91 def podcast_by_id(podcast_id, current_id=False):
  92
  93     if not podcast_id:
  94         raise QueryParameterMissing('podcast_id')
  95
  96     r = Podcast.view('podcasts/by_id',
  97             key          = podcast_id,
  98             classes      = [Podcast, PodcastGroup],
  99             include_docs = True,
 100         )
 101
 102     if not r:
 103         return None
 104
 105     podcast_group = r.first()
 106
 107     podcast = podcast_group.get_podcast_by_id(podcast_id, current_id)
 108
 109     if podcast.needs_update:
 110         incomplete_obj.send_robust(sender=podcast)
 111
 112     return podcast
 113
 114
 115
 116 @cache_result(timeout=60*60)
 117 def podcastgroup_by_id(group_id):
 118
 119     if not group_id:
 120         raise QueryParameterMissing('group_id')
 121
 122     pg = PodcastGroup.get(group_id)
 123
 124     if pg.needs_update:
 125         incomplete_obj.send_robust(sender=pg)
 126
 127     return pg
 128
 129
 130
 131 @cache_result(timeout=60*60)
 132 def podcast_for_slug(slug):
 133
 134     if not slug:
 135         raise QueryParameterMissing('slug')
 136
 137     r = Podcast.view('podcasts/by_slug',
 138             startkey     = [slug, None],
 139             endkey       = [slug, {}],
 140             include_docs = True,
 141             wrap_doc     = False,
 142         )
 143
 144     if not r:
 145         return None
 146
 147     res = r.first()
 148     doc = res['doc']
 149     if doc['doc_type'] == 'Podcast':
 150         obj = Podcast.wrap(doc)
 151     else:
 152         pid = res['key'][1]
 153         pg = PodcastGroup.wrap(doc)
 154         obj = pg.get_podcast_by_id(pid)
 155
 156     if obj.needs_update:
 157         incomplete_obj.send_robust(sender=obj)
 158
 159     return obj
 160
 161
 162 @cache_result(timeout=60*60)
 163 def podcast_for_slug_id(slug_id):
 164     """ Returns the Podcast for either an CouchDB-ID for a Slug """
 165
 166     if is_couchdb_id(slug_id):
 167         return podcast_by_id(slug_id)
 168     else:
 169         return podcast_for_slug(slug_id)
 170
 171
 172 @cache_result(timeout=60*60)
 173 def podcastgroup_for_slug_id(slug_id):
 174     """ Returns the Podcast for either an CouchDB-ID for a Slug """
 175
 176     if not slug_id:
 177         raise QueryParameterMissing('slug_id')
 178
 179     if is_couchdb_id(slug_id):
 180         return podcastgroup_by_id(slug_id)
 181
 182     else:
 183         #TODO: implement
 184         return PodcastGroup.for_slug(slug_id)
 185
 186
 187
 188 def podcasts_by_id(ids):
 189
 190     if ids is None:
 191         raise QueryParameterMissing('ids')
 192
 193     if not ids:
 194         return []
 195
 196     r = Podcast.view('podcasts/by_id',
 197             keys         = ids,
 198             include_docs = True,
 199             wrap_doc     = False
 200         )
 201
 202     podcasts = map(_wrap_podcast_group, r)
 203
 204     for podcast in podcasts:
 205         if podcast.needs_update:
 206             incomplete_obj.send_robust(sender=podcast)
 207
 208     return podcasts
 209
 210
 211
 212 @cache_result(timeout=60*60)
 213 def podcast_for_oldid(oldid):
 214
 215     if not oldid:
 216         raise QueryParameterMissing('oldid')
 217
 218     r = Podcast.view('podcasts/by_oldid',
 219             key          = long(oldid),
 220             classes      = [Podcast, PodcastGroup],
 221             include_docs = True,
 222         )
 223
 224     if not r:
 225         return None
 226
 227     podcast_group = r.first()
 228     podcast = podcast_group.get_podcast_by_oldid(oldid)
 229
 230     if podcast.needs_update:
 231         incomplete_obj.send_robust(sender=podcast)
 232
 233     return podcast
 234
 235
 236 @cache_result(timeout=60*60)
 237 def podcastgroup_for_oldid(oldid):
 238
 239     if not oldid:
 240         raise QueryParameterMissing('oldid')
 241
 242     r = PodcastGroup.view('podcasts/groups_by_oldid',
 243             key          = long(oldid),
 244             include_docs = True,
 245         )
 246
 247     if not r:
 248         return None
 249
 250     pg = r.one()
 251
 252     if pg.needs_update:
 253         incomplete_obj.send_robust(sender=pg)
 254
 255     return pg
 256
 257
 258 def podcast_for_url(url, create=False):
 259
 260     if not url:
 261         raise QueryParameterMissing('url')
 262
 263     key = 'podcast-by-url-%s' % sha1(url.encode('utf-8')).hexdigest()
 264
 265     podcast = cache.get(key)
 266     if podcast:
 267         return podcast
 268
 269     r = Podcast.view('podcasts/by_url',
 270             key=url,
 271             classes=[Podcast, PodcastGroup],
 272             include_docs=True
 273         )
 274
 275     if r:
 276         podcast_group = r.first()
 277         podcast = podcast_group.get_podcast_by_url(url)
 278
 279         if podcast.needs_update:
 280             incomplete_obj.send_robust(sender=podcast)
 281         else:
 282             cache.set(key, podcast)
 283
 284         return podcast
 285
 286     if create:
 287         podcast = Podcast()
 288         podcast.created_timestamp = get_timestamp(datetime.utcnow())
 289         podcast.urls = [url]
 290         podcast.save()
 291         incomplete_obj.send_robust(sender=podcast)
 292         return podcast
 293
 294     return None
 295
 296
 297
 298
 299 def random_podcasts(language='', chunk_size=5):
 300     """ Returns an iterator of random podcasts
 301
 302     optionaly a language code can be specified. If given the podcasts will
 303     be restricted to this language. chunk_size determines how many podcasts
 304     will be fetched at once """
 305
 306     while True:
 307         rnd = random()
 308         res = Podcast.view('podcasts/random',
 309                 startkey     = [language, rnd],
 310                 include_docs = True,
 311                 limit        = chunk_size,
 312                 stale        = 'ok',
 313                 wrap_doc     = False,
 314             )
 315
 316         if not res:
 317             break
 318
 319         for r in res:
 320
 321             # The view podcasts/random does not include incomplete podcasts,
 322             # so we don't need to send any 'incomplete_obj' signals here
 323
 324             obj = r['doc']
 325             if obj['doc_type'] == 'Podcast':
 326                 yield Podcast.wrap(obj)
 327
 328             elif obj['doc_type'] == 'PodcastGroup':
 329                 yield PodcastGroup.wrap(obj)
 330
 331
 332
 333 def podcasts_by_last_update():
 334     res = Podcast.view('podcasts/by_last_update',
 335             include_docs = True,
 336             stale        = 'update_after',
 337             wrap_doc     = False,
 338         )
 339
 340     # TODO: this method is only used for retrieving podcasts to update;
 341     #       should we really send 'incomplete_obj' signals here?
 342
 343     return map(_wrap_podcast_group_key1, res)
 344
 345
 346
 347
 348 def all_podcasts():
 349     from mygpo.db.couchdb.utils import multi_request_view
 350     res = multi_request_view(Podcast,'podcasts/by_id',
 351             wrap         = False,
 352             include_docs = True,
 353             stale        = 'update_after',
 354         )
 355
 356     # TODO: this method is only used for maintenance purposes; should we
 357     #       really send 'incomplete_obj' signals here?
 358
 359     for r in res:
 360         obj = r['doc']
 361         if obj['doc_type'] == 'Podcast':
 362             yield Podcast.wrap(obj)
 363         else:
 364             pid = r[u'key']
 365             pg = PodcastGroup.wrap(obj)
 366             podcast = pg.get_podcast_by_id(pid)
 367             yield podcast
 368
 369
 370 def all_podcasts_groups(cls):
 371     return cls.view('podcasts/podcasts_groups', include_docs=True,
 372         classes=[Podcast, PodcastGroup]).iterator()
 373
 374
 375
 376 def podcasts_to_dict(ids, use_cache=False):
 377
 378     if ids is None:
 379         raise QueryParameterMissing('ids')
 380
 381     if not ids:
 382         return dict()
 383
 384
 385     ids = list(set(ids))
 386     objs = dict()
 387
 388     cache_objs = []
 389     if use_cache:
 390         res = cache.get_many(ids)
 391         cache_objs.extend(res.values())
 392         ids = [x for x in ids if x not in res.keys()]
 393
 394     db_objs = podcasts_by_id(ids)
 395
 396     for obj in (cache_objs + db_objs):
 397
 398         # get_multi returns dict {'key': _id, 'error': 'not found'}
 399         # for non-existing objects
 400         if isinstance(obj, dict) and 'error' in obj:
 401             _id = obj['key']
 402             objs[_id] = None
 403             continue
 404
 405         for i in obj.get_ids():
 406             objs[i] = obj
 407
 408     if use_cache:
 409         cache.set_many(dict( (obj.get_id(), obj) for obj in db_objs))
 410
 411     return objs
 412
 413
 414
 415 def podcasts_need_update():
 416     db = get_main_database()
 417     res = db.view('episodes/need_update',
 418             group_level = 1,
 419             reduce      = True,
 420         )
 421
 422     # TODO: this method is only used for retrieving podcasts to update;
 423     #       should we really send 'incomplete_obj' signals here?
 424
 425     for r in res:
 426         podcast_id = r['key']
 427         podcast = podcast_by_id(podcast_id)
 428         if podcast:
 429             yield podcast
 430
 431
 432 @cache_result(timeout=60*60)
 433 def get_flattr_podcasts(offset=0, limit=20):
 434     """ returns all podcasts that contain Flattr payment URLs """
 435
 436     r = Podcast.view('podcasts/flattr',
 437             skip         = offset,
 438             limit        = limit,
 439             classes      = [Podcast, PodcastGroup],
 440             include_docs = True,
 441             reduce       = False,
 442         )
 443
 444     podcasts = list(r)
 445
 446     for podcast in podcasts:
 447         if podcast.needs_update:
 448             incomplete_obj.send_robust(sender=podcast)
 449
 450     return podcasts
 451
 452
 453 @cache_result(timeout=60*60)
 454 def get_flattr_podcast_count():
 455     """ returns the number of podcasts that contain Flattr payment URLs """
 456     r = list(Podcast.view('podcasts/flattr'))
 457     return r[0]['value']
 458
 459
 460 def subscriberdata_for_podcast(podcast_id):
 461
 462     if not podcast_id:
 463         raise QueryParameterMissing('podcast_id')
 464
 465     r = PodcastSubscriberData.view('podcasts/subscriber_data',
 466             key          = podcast_id,
 467             include_docs = True,
 468         )
 469
 470     if r:
 471         return r.first()
 472
 473     data = PodcastSubscriberData()
 474     data.podcast = podcast_id
 475     return data
 476
 477
 478
 479 def _wrap_podcast_group(res):
 480     if res['doc']['doc_type'] == 'Podcast':
 481         return Podcast.wrap(res['doc'])
 482     else:
 483         pg = PodcastGroup.wrap(res['doc'])
 484         id = res['key']
 485         return pg.get_podcast_by_id(id)
 486
 487
 488 def _wrap_podcast_group_key1(res):
 489     obj = res['doc']
 490     if obj['doc_type'] == 'Podcast':
 491         return Podcast.wrap(obj)
 492
 493     else:
 494         pid = res[u'key'][1]
 495         pg = PodcastGroup.wrap(obj)
 496         podcast = pg.get_podcast_by_id(pid)
 497         return podcast
 498
 499
 500
 501 def search_wrapper(result):
 502     doc = result['doc']
 503     if doc['doc_type'] == 'Podcast':
 504         p = Podcast.wrap(doc)
 505     elif doc['doc_type'] == 'PodcastGroup':
 506         p = PodcastGroup.wrap(doc)
 507     p._id = result['id']
 508     return p
 509
 510
 511 @cache_result(timeout=60*60)
 512 def search(q, offset=0, num_results=20):
 513
 514     if not q:
 515         return [], 0
 516
 517     db = get_main_database()
 518
 519     #FIXME current couchdbkit can't parse responses for multi-query searches
 520     q = q.replace(',', '')
 521
 522     try:
 523         res = db.search('podcasts/search',
 524                 wrapper      = search_wrapper,
 525                 include_docs = True,
 526                 limit        = num_results,
 527                 stale        = 'update_after',
 528                 skip         = offset,
 529                 q            = q,
 530                 sort='\\subscribers<int>')
 531
 532         podcasts = list(res)
 533
 534         for podcast in podcasts:
 535             if podcast.needs_update:
 536                 incomplete_obj.send_robust(sender=podcast)
 537
 538         return podcasts, res.total_rows
 539
 540     except RequestFailed:
 541         return [], 0
 542
 543
 544 @repeat_on_conflict(['podcast'])
 545 def update_additional_data(podcast, twitter):
 546     podcast.twitter = twitter
 547     podcast.save()
 548
 549     # clear the whole cache until we have a better invalidation mechanism
 550     cache.clear()