Merge branch 'master' into design
[mygpo.git] / mygpo / db / couchdb / podcast.py
blobcd28d8291e4b4c567f6f927d2d9e169376eb7a71
1 from hashlib import sha1
2 from random import random
3 from datetime import datetime
5 from restkit import RequestFailed
7 from django.core.cache import cache
9 from mygpo.core.models import Podcast, PodcastGroup, PodcastSubscriberData
10 from mygpo.core.signals import incomplete_obj
11 from mygpo.decorators import repeat_on_conflict
12 from mygpo.cache import cache_result
13 from mygpo.utils import get_timestamp
14 from mygpo.db.couchdb import get_main_database, get_userdata_database
15 from mygpo.db import QueryParameterMissing
16 from mygpo.db.couchdb.utils import multi_request_view, is_couchdb_id
19 def podcast_slugs(base_slug):
20 res = Podcast.view('podcasts/by_slug',
21 startkey = [base_slug, None],
22 endkey = [base_slug + 'ZZZZZ', None],
23 wrap_doc = False,
25 return [r['key'][0] for r in res]
28 @cache_result(timeout=60*60)
29 def podcast_count():
30 return Podcast.view('podcasts/by_id',
31 limit = 0,
32 stale = 'update_after',
33 ).total_rows
36 @cache_result(timeout=60*60)
37 def podcasts_for_tag(tag):
38 """ Returns the podcasts with the current tag.
40 Some podcasts might be returned twice """
42 if not tag:
43 raise QueryParameterMissing('tag')
45 res = multi_request_view(Podcast, 'podcasts/by_tag',
46 wrap = False,
47 startkey = [tag, None],
48 endkey = [tag, {}],
49 reduce = True,
50 group = True,
51 group_level = 2
54 for r in res:
55 yield (r['key'][1], r['value'])
57 udb = get_userdata_database()
58 res = multi_request_view(udb, 'usertags/podcasts',
59 wrap = False,
60 startkey = [tag, None],
61 endkey = [tag, {}],
62 reduce = True,
63 group = True,
64 group_level = 2
67 for r in res:
68 yield (r['key'][1], r['value'])
71 @cache_result(timeout=60*60)
72 def get_podcast_languages():
73 """ Returns all 2-letter language codes that are used by podcasts.
75 It filters obviously invalid strings, but does not check if any
76 of these codes is contained in ISO 639. """
78 from mygpo.web.utils import sanitize_language_codes
80 res = Podcast.view('podcasts/by_language',
81 group_level = 1,
82 stale = 'ok',
85 langs = [r['key'][0] for r in res]
86 sane_lang = sanitize_language_codes(langs)
87 sane_lang.sort()
88 return sane_lang
91 def podcast_by_id_uncached(podcast_id, current_id=False):
93 if not podcast_id:
94 raise QueryParameterMissing('podcast_id')
96 r = Podcast.view('podcasts/by_id',
97 key = podcast_id,
98 classes = [Podcast, PodcastGroup],
99 include_docs = True,
102 if not r:
103 return None
105 podcast_group = r.first()
107 podcast = podcast_group.get_podcast_by_id(podcast_id, current_id)
109 if podcast.needs_update:
110 incomplete_obj.send_robust(sender=podcast)
112 return podcast
115 podcast_by_id = cache_result(timeout=60*60)(podcast_by_id_uncached)
118 @cache_result(timeout=60*60)
119 def podcastgroup_by_id(group_id):
121 if not group_id:
122 raise QueryParameterMissing('group_id')
124 pg = PodcastGroup.get(group_id)
126 if pg.needs_update:
127 incomplete_obj.send_robust(sender=pg)
129 return pg
133 @cache_result(timeout=60*60)
134 def podcast_for_slug(slug):
136 if not slug:
137 raise QueryParameterMissing('slug')
139 r = Podcast.view('podcasts/by_slug',
140 startkey = [slug, None],
141 endkey = [slug, {}],
142 include_docs = True,
143 wrap_doc = False,
146 if not r:
147 return None
149 res = r.first()
150 doc = res['doc']
151 if doc['doc_type'] == 'Podcast':
152 obj = Podcast.wrap(doc)
153 else:
154 pid = res['key'][1]
155 pg = PodcastGroup.wrap(doc)
157 if pid == pg._id:
158 # TODO: we don't return PodcastGroups atm
159 return None
161 obj = pg.get_podcast_by_id(pid)
163 if obj.needs_update:
164 incomplete_obj.send_robust(sender=obj)
166 return obj
169 @cache_result(timeout=60*60)
170 def podcast_for_slug_id(slug_id):
171 """ Returns the Podcast for either an CouchDB-ID for a Slug """
173 if is_couchdb_id(slug_id):
174 return podcast_by_id(slug_id)
175 else:
176 return podcast_for_slug(slug_id)
179 @cache_result(timeout=60*60)
180 def podcastgroup_for_slug_id(slug_id):
181 """ Returns the Podcast for either an CouchDB-ID for a Slug """
183 if not slug_id:
184 raise QueryParameterMissing('slug_id')
186 if is_couchdb_id(slug_id):
187 return podcastgroup_by_id(slug_id)
189 else:
190 #TODO: implement
191 return PodcastGroup.for_slug(slug_id)
195 def podcasts_by_id(ids):
197 if ids is None:
198 raise QueryParameterMissing('ids')
200 if not ids:
201 return []
203 r = Podcast.view('podcasts/by_id',
204 keys = ids,
205 include_docs = True,
206 wrap_doc = False
209 podcasts = map(_wrap_podcast_group, r)
211 for podcast in podcasts:
212 if podcast.needs_update:
213 incomplete_obj.send_robust(sender=podcast)
215 return podcasts
218 def podcasts_groups_by_id(ids):
219 """ gets podcast groups and top-level podcasts for the given ids """
221 if ids is None:
222 raise QueryParameterMissing('ids')
224 if not ids:
225 return
227 db = get_main_database()
228 res = db.view('podcasts/podcasts_groups',
229 keys = ids,
230 include_docs = True,
233 for r in res:
234 doc = r['doc']
236 if not doc:
237 yield None
239 if doc['doc_type'] == 'Podcast':
240 obj = Podcast.wrap(doc)
242 elif doc['doc_type'] == 'PodcastGroup':
243 obj = PodcastGroup.wrap(doc)
245 else:
246 logger.error('podcasts_groups_by_id retrieved unknown doc_type '
247 '"%s" for params %s', doc['doc_type'], res.params)
248 continue
250 if obj.needs_update:
251 incomplete_obj.send_robust(sender=obj)
253 yield obj
257 @cache_result(timeout=60*60)
258 def podcast_for_oldid(oldid):
260 if not oldid:
261 raise QueryParameterMissing('oldid')
263 r = Podcast.view('podcasts/by_oldid',
264 key = long(oldid),
265 classes = [Podcast, PodcastGroup],
266 include_docs = True,
269 if not r:
270 return None
272 podcast_group = r.first()
273 podcast = podcast_group.get_podcast_by_oldid(oldid)
275 if podcast.needs_update:
276 incomplete_obj.send_robust(sender=podcast)
278 return podcast
281 @cache_result(timeout=60*60)
282 def podcastgroup_for_oldid(oldid):
284 if not oldid:
285 raise QueryParameterMissing('oldid')
287 r = PodcastGroup.view('podcasts/groups_by_oldid',
288 key = long(oldid),
289 include_docs = True,
292 if not r:
293 return None
295 pg = r.one()
297 if pg.needs_update:
298 incomplete_obj.send_robust(sender=pg)
300 return pg
303 def podcast_for_url(url, create=False):
305 if not url:
306 raise QueryParameterMissing('url')
308 key = 'podcast-by-url-%s' % sha1(url.encode('utf-8')).hexdigest()
310 podcast = cache.get(key)
311 if podcast:
312 return podcast
314 r = Podcast.view('podcasts/by_url',
315 key=url,
316 classes=[Podcast, PodcastGroup],
317 include_docs=True
320 if r:
321 podcast_group = r.first()
322 podcast = podcast_group.get_podcast_by_url(url)
324 if podcast.needs_update:
325 incomplete_obj.send_robust(sender=podcast)
326 else:
327 cache.set(key, podcast)
329 return podcast
331 if create:
332 podcast = Podcast()
333 podcast.created_timestamp = get_timestamp(datetime.utcnow())
334 podcast.urls = [url]
335 podcast.save()
336 incomplete_obj.send_robust(sender=podcast)
337 return podcast
339 return None
344 def random_podcasts(language='', chunk_size=5):
345 """ Returns an iterator of random podcasts
347 optionaly a language code can be specified. If given the podcasts will
348 be restricted to this language. chunk_size determines how many podcasts
349 will be fetched at once """
351 while True:
352 rnd = random()
353 res = Podcast.view('podcasts/random',
354 startkey = [language, rnd],
355 include_docs = True,
356 limit = chunk_size,
357 stale = 'ok',
358 wrap_doc = False,
361 if not res:
362 break
364 for r in res:
366 # The view podcasts/random does not include incomplete podcasts,
367 # so we don't need to send any 'incomplete_obj' signals here
369 obj = r['doc']
370 if obj['doc_type'] == 'Podcast':
371 yield Podcast.wrap(obj)
373 elif obj['doc_type'] == 'PodcastGroup':
374 yield PodcastGroup.wrap(obj)
378 def podcasts_by_last_update():
379 res = Podcast.view('podcasts/by_last_update',
380 include_docs = True,
381 stale = 'update_after',
382 wrap_doc = False,
385 # TODO: this method is only used for retrieving podcasts to update;
386 # should we really send 'incomplete_obj' signals here?
388 return map(_wrap_podcast_group_key1, res)
393 def all_podcasts():
394 from mygpo.db.couchdb.utils import multi_request_view
395 res = multi_request_view(Podcast,'podcasts/by_id',
396 wrap = False,
397 include_docs = True,
398 stale = 'update_after',
401 # TODO: this method is only used for maintenance purposes; should we
402 # really send 'incomplete_obj' signals here?
404 for r in res:
405 obj = r['doc']
406 if obj['doc_type'] == 'Podcast':
407 yield Podcast.wrap(obj)
408 else:
409 pid = r[u'key']
410 pg = PodcastGroup.wrap(obj)
411 podcast = pg.get_podcast_by_id(pid)
412 yield podcast
415 def podcasts_to_dict(ids, use_cache=False):
417 if ids is None:
418 raise QueryParameterMissing('ids')
420 if not ids:
421 return dict()
424 ids = list(set(ids))
425 objs = dict()
427 cache_objs = []
428 if use_cache:
429 res = cache.get_many(ids)
430 cache_objs.extend(res.values())
431 ids = [x for x in ids if x not in res.keys()]
433 db_objs = podcasts_by_id(ids)
435 for obj in (cache_objs + db_objs):
437 # get_multi returns dict {'key': _id, 'error': 'not found'}
438 # for non-existing objects
439 if isinstance(obj, dict) and 'error' in obj:
440 _id = obj['key']
441 objs[_id] = None
442 continue
444 for i in obj.get_ids():
445 objs[i] = obj
447 if use_cache:
448 cache.set_many(dict( (obj.get_id(), obj) for obj in db_objs))
450 return objs
454 def podcasts_need_update():
455 db = get_main_database()
456 res = db.view('episodes/need_update',
457 group_level = 1,
458 reduce = True,
459 limit = 100,
462 # TODO: this method is only used for retrieving podcasts to update;
463 # should we really send 'incomplete_obj' signals here?
465 for r in res:
466 podcast_id = r['key']
467 podcast = podcast_by_id(podcast_id)
468 if podcast:
469 yield podcast
472 @cache_result(timeout=60*60)
473 def get_flattr_podcasts(offset=0, limit=20):
474 """ returns all podcasts that contain Flattr payment URLs """
476 r = Podcast.view('podcasts/flattr',
477 skip = offset,
478 limit = limit,
479 classes = [Podcast, PodcastGroup],
480 include_docs = True,
481 reduce = False,
484 podcasts = list(r)
486 for podcast in podcasts:
487 if podcast.needs_update:
488 incomplete_obj.send_robust(sender=podcast)
490 return podcasts
493 @cache_result(timeout=60*60)
494 def get_flattr_podcast_count():
495 """ returns the number of podcasts that contain Flattr payment URLs """
496 r = list(Podcast.view('podcasts/flattr'))
497 return r[0]['value']
500 @cache_result(timeout=60*60)
501 def get_license_podcasts(offset=0, limit=20):
502 """ returns a page of podcasts w/ license information """
504 r = Podcast.view('podcasts/license',
505 skip = offset,
506 limit = limit,
507 classes = [Podcast, PodcastGroup],
508 include_docs = True,
509 reduce = False,
512 podcasts = list(r)
514 for podcast in podcasts:
515 if podcast.needs_update:
516 incomplete_obj.send_robust(sender=podcast)
518 return podcasts
521 @cache_result(timeout=60*60)
522 def get_license_podcast_count():
523 """ returns the number of podcasts that contain license information """
524 r = list(Podcast.view('podcasts/license'))
525 return r[0]['value'] if r else 0
528 def subscriberdata_for_podcast(podcast_id):
530 if not podcast_id:
531 raise QueryParameterMissing('podcast_id')
533 r = PodcastSubscriberData.view('podcasts/subscriber_data',
534 key = podcast_id,
535 include_docs = True,
538 if r:
539 return r.first()
541 data = PodcastSubscriberData()
542 data.podcast = podcast_id
543 return data
547 def _wrap_podcast_group(res):
548 if res['doc']['doc_type'] == 'Podcast':
549 return Podcast.wrap(res['doc'])
550 else:
551 pg = PodcastGroup.wrap(res['doc'])
552 id = res['key']
553 return pg.get_podcast_by_id(id)
556 def _wrap_podcast_group_key1(res):
557 obj = res['doc']
558 if obj['doc_type'] == 'Podcast':
559 return Podcast.wrap(obj)
561 else:
562 pid = res[u'key'][1]
563 pg = PodcastGroup.wrap(obj)
564 podcast = pg.get_podcast_by_id(pid)
565 return podcast
569 def search_wrapper(result):
570 doc = result['doc']
571 if doc['doc_type'] == 'Podcast':
572 p = Podcast.wrap(doc)
573 elif doc['doc_type'] == 'PodcastGroup':
574 p = PodcastGroup.wrap(doc)
575 p._id = result['id']
576 return p
579 @cache_result(timeout=60*60)
580 def search(q, offset=0, num_results=20):
582 if not q:
583 return [], 0
585 db = get_main_database()
587 #FIXME current couchdbkit can't parse responses for multi-query searches
588 q = q.replace(',', '')
590 try:
591 res = db.search('podcasts/search',
592 wrapper = search_wrapper,
593 include_docs = True,
594 limit = num_results,
595 stale = 'update_after',
596 skip = offset,
597 q = q,
598 sort='\\subscribers<int>')
600 podcasts = list(res)
602 for podcast in podcasts:
603 if podcast.needs_update:
604 incomplete_obj.send_robust(sender=podcast)
606 return podcasts, res.total_rows
608 except RequestFailed:
609 return [], 0
612 def reload_podcast(podcast):
613 return podcast_by_id_uncached(podcast.get_id())
616 @repeat_on_conflict(['podcast'], reload_f=reload_podcast)
617 def update_additional_data(podcast, twitter):
618 podcast.twitter = twitter
619 podcast.save()
621 # clear the whole cache until we have a better invalidation mechanism
622 cache.clear()
625 @repeat_on_conflict(['podcast'], reload_f=reload_podcast)
626 def update_related_podcasts(podcast, related):
627 if podcast.related_podcasts == related:
628 return
630 podcast.related_podcasts = related
631 podcast.save()
634 @repeat_on_conflict(['podcast'], reload_f=reload_podcast)
635 def delete_podcast(podcast):
636 podcast.delete()