Merge branch 'master' into design
[mygpo.git] / mygpo / db / couchdb / podcast.py
blob4bb3baeef6ed136b7d2f8d016fe97e4adc0e0058
1 from hashlib import sha1
2 from random import random
3 from datetime import datetime
5 from restkit import RequestFailed
7 from django.core.cache import cache
9 from mygpo.core.models import Podcast, PodcastGroup, PodcastSubscriberData
10 from mygpo.core.signals import incomplete_obj
11 from mygpo.decorators import repeat_on_conflict
12 from mygpo.cache import cache_result
13 from mygpo.utils import get_timestamp
14 from mygpo.db.couchdb import get_main_database, get_userdata_database
15 from mygpo.db import QueryParameterMissing
16 from mygpo.db.couchdb.utils import multi_request_view, is_couchdb_id
19 def podcast_slugs(base_slug):
20 res = Podcast.view('podcasts/by_slug',
21 startkey = [base_slug, None],
22 endkey = [base_slug + 'ZZZZZ', None],
23 wrap_doc = False,
25 return [r['key'][0] for r in res]
28 @cache_result(timeout=60*60)
29 def podcast_count():
30 return Podcast.view('podcasts/by_id',
31 limit = 0,
32 stale = 'update_after',
33 ).total_rows
36 @cache_result(timeout=60*60)
37 def podcasts_for_tag(tag):
38 """ Returns the podcasts with the current tag.
40 Some podcasts might be returned twice """
42 if not tag:
43 raise QueryParameterMissing('tag')
45 res = multi_request_view(Podcast, 'podcasts/by_tag',
46 wrap = False,
47 startkey = [tag, None],
48 endkey = [tag, {}],
49 reduce = True,
50 group = True,
51 group_level = 2
54 for r in res:
55 yield (r['key'][1], r['value'])
57 udb = get_userdata_database()
58 res = multi_request_view(udb, 'usertags/podcasts',
59 wrap = False,
60 startkey = [tag, None],
61 endkey = [tag, {}],
62 reduce = True,
63 group = True,
64 group_level = 2
67 for r in res:
68 yield (r['key'][1], r['value'])
71 @cache_result(timeout=60*60)
72 def get_podcast_languages():
73 """ Returns all 2-letter language codes that are used by podcasts.
75 It filters obviously invalid strings, but does not check if any
76 of these codes is contained in ISO 639. """
78 from mygpo.web.utils import sanitize_language_codes
80 res = Podcast.view('podcasts/by_language',
81 group_level = 1,
82 stale = 'ok',
85 langs = [r['key'][0] for r in res]
86 sane_lang = sanitize_language_codes(langs)
87 sane_lang.sort()
88 return sane_lang
91 def podcast_by_id_uncached(podcast_id, current_id=False):
93 if not podcast_id:
94 raise QueryParameterMissing('podcast_id')
96 r = Podcast.view('podcasts/by_id',
97 key = podcast_id,
98 classes = [Podcast, PodcastGroup],
99 include_docs = True,
102 if not r:
103 return None
105 podcast_group = r.first()
107 podcast = podcast_group.get_podcast_by_id(podcast_id, current_id)
109 if podcast.needs_update:
110 incomplete_obj.send_robust(sender=podcast)
112 return podcast
115 podcast_by_id = cache_result(timeout=60*60)(podcast_by_id_uncached)
118 @cache_result(timeout=60*60)
119 def podcastgroup_by_id(group_id):
121 if not group_id:
122 raise QueryParameterMissing('group_id')
124 pg = PodcastGroup.get(group_id)
126 if pg.needs_update:
127 incomplete_obj.send_robust(sender=pg)
129 return pg
133 @cache_result(timeout=60*60)
134 def podcast_for_slug(slug):
136 if not slug:
137 raise QueryParameterMissing('slug')
139 r = Podcast.view('podcasts/by_slug',
140 startkey = [slug, None],
141 endkey = [slug, {}],
142 include_docs = True,
143 wrap_doc = False,
146 if not r:
147 return None
149 res = r.first()
150 doc = res['doc']
151 if doc['doc_type'] == 'Podcast':
152 obj = Podcast.wrap(doc)
153 else:
154 pid = res['key'][1]
155 pg = PodcastGroup.wrap(doc)
157 if pid == pg._id:
158 # TODO: we don't return PodcastGroups atm
159 return None
161 obj = pg.get_podcast_by_id(pid)
163 if obj.needs_update:
164 incomplete_obj.send_robust(sender=obj)
166 return obj
169 @cache_result(timeout=60*60)
170 def podcast_for_slug_id(slug_id):
171 """ Returns the Podcast for either an CouchDB-ID for a Slug """
173 if is_couchdb_id(slug_id):
174 return podcast_by_id(slug_id)
175 else:
176 return podcast_for_slug(slug_id)
179 @cache_result(timeout=60*60)
180 def podcastgroup_for_slug_id(slug_id):
181 """ Returns the Podcast for either an CouchDB-ID for a Slug """
183 if not slug_id:
184 raise QueryParameterMissing('slug_id')
186 if is_couchdb_id(slug_id):
187 return podcastgroup_by_id(slug_id)
189 else:
190 #TODO: implement
191 return PodcastGroup.for_slug(slug_id)
195 def podcasts_by_id(ids):
197 if ids is None:
198 raise QueryParameterMissing('ids')
200 if not ids:
201 return []
203 r = Podcast.view('podcasts/by_id',
204 keys = ids,
205 include_docs = True,
206 wrap_doc = False
209 podcasts = map(_wrap_podcast_group, r)
211 for podcast in podcasts:
212 if podcast.needs_update:
213 incomplete_obj.send_robust(sender=podcast)
215 return podcasts
218 def podcasts_groups_by_id(ids):
219 """ gets podcast groups and top-level podcasts for the given ids """
221 if ids is None:
222 raise QueryParameterMissing('ids')
224 if not ids:
225 return
227 db = get_main_database()
228 res = db.view('podcasts/podcasts_groups',
229 keys = ids,
230 include_docs = True,
231 classes = [Podcast, PodcastGroup],
234 for r in res:
235 doc = r['doc']
237 if not doc:
238 yield None
240 if doc['doc_type'] == 'Podcast':
241 obj = Podcast.wrap(doc)
243 elif doc['doc_type'] == 'PodcastGroup':
244 obj = PodcastGroup.wrap(doc)
246 else:
247 logger.error('podcasts_groups_by_id retrieved unknown doc_type '
248 '"%s" for params %s', doc['doc_type'], res.params)
249 continue
251 if obj.needs_update:
252 incomplete_obj.send_robust(sender=obj)
254 yield obj
258 @cache_result(timeout=60*60)
259 def podcast_for_oldid(oldid):
261 if not oldid:
262 raise QueryParameterMissing('oldid')
264 r = Podcast.view('podcasts/by_oldid',
265 key = long(oldid),
266 classes = [Podcast, PodcastGroup],
267 include_docs = True,
270 if not r:
271 return None
273 podcast_group = r.first()
274 podcast = podcast_group.get_podcast_by_oldid(oldid)
276 if podcast.needs_update:
277 incomplete_obj.send_robust(sender=podcast)
279 return podcast
282 @cache_result(timeout=60*60)
283 def podcastgroup_for_oldid(oldid):
285 if not oldid:
286 raise QueryParameterMissing('oldid')
288 r = PodcastGroup.view('podcasts/groups_by_oldid',
289 key = long(oldid),
290 include_docs = True,
293 if not r:
294 return None
296 pg = r.one()
298 if pg.needs_update:
299 incomplete_obj.send_robust(sender=pg)
301 return pg
304 def podcast_for_url(url, create=False):
306 if not url:
307 raise QueryParameterMissing('url')
309 key = 'podcast-by-url-%s' % sha1(url.encode('utf-8')).hexdigest()
311 podcast = cache.get(key)
312 if podcast:
313 return podcast
315 r = Podcast.view('podcasts/by_url',
316 key=url,
317 classes=[Podcast, PodcastGroup],
318 include_docs=True
321 if r:
322 podcast_group = r.first()
323 podcast = podcast_group.get_podcast_by_url(url)
325 if podcast.needs_update:
326 incomplete_obj.send_robust(sender=podcast)
327 else:
328 cache.set(key, podcast)
330 return podcast
332 if create:
333 podcast = Podcast()
334 podcast.created_timestamp = get_timestamp(datetime.utcnow())
335 podcast.urls = [url]
336 podcast.save()
337 incomplete_obj.send_robust(sender=podcast)
338 return podcast
340 return None
345 def random_podcasts(language='', chunk_size=5):
346 """ Returns an iterator of random podcasts
348 optionaly a language code can be specified. If given the podcasts will
349 be restricted to this language. chunk_size determines how many podcasts
350 will be fetched at once """
352 while True:
353 rnd = random()
354 res = Podcast.view('podcasts/random',
355 startkey = [language, rnd],
356 include_docs = True,
357 limit = chunk_size,
358 stale = 'ok',
359 wrap_doc = False,
362 if not res:
363 break
365 for r in res:
367 # The view podcasts/random does not include incomplete podcasts,
368 # so we don't need to send any 'incomplete_obj' signals here
370 obj = r['doc']
371 if obj['doc_type'] == 'Podcast':
372 yield Podcast.wrap(obj)
374 elif obj['doc_type'] == 'PodcastGroup':
375 yield PodcastGroup.wrap(obj)
379 def podcasts_by_last_update():
380 res = Podcast.view('podcasts/by_last_update',
381 include_docs = True,
382 stale = 'update_after',
383 wrap_doc = False,
386 # TODO: this method is only used for retrieving podcasts to update;
387 # should we really send 'incomplete_obj' signals here?
389 return map(_wrap_podcast_group_key1, res)
394 def all_podcasts():
395 from mygpo.db.couchdb.utils import multi_request_view
396 res = multi_request_view(Podcast,'podcasts/by_id',
397 wrap = False,
398 include_docs = True,
399 stale = 'update_after',
402 # TODO: this method is only used for maintenance purposes; should we
403 # really send 'incomplete_obj' signals here?
405 for r in res:
406 obj = r['doc']
407 if obj['doc_type'] == 'Podcast':
408 yield Podcast.wrap(obj)
409 else:
410 pid = r[u'key']
411 pg = PodcastGroup.wrap(obj)
412 podcast = pg.get_podcast_by_id(pid)
413 yield podcast
416 def podcasts_to_dict(ids, use_cache=False):
418 if ids is None:
419 raise QueryParameterMissing('ids')
421 if not ids:
422 return dict()
425 ids = list(set(ids))
426 objs = dict()
428 cache_objs = []
429 if use_cache:
430 res = cache.get_many(ids)
431 cache_objs.extend(res.values())
432 ids = [x for x in ids if x not in res.keys()]
434 db_objs = podcasts_by_id(ids)
436 for obj in (cache_objs + db_objs):
438 # get_multi returns dict {'key': _id, 'error': 'not found'}
439 # for non-existing objects
440 if isinstance(obj, dict) and 'error' in obj:
441 _id = obj['key']
442 objs[_id] = None
443 continue
445 for i in obj.get_ids():
446 objs[i] = obj
448 if use_cache:
449 cache.set_many(dict( (obj.get_id(), obj) for obj in db_objs))
451 return objs
455 def podcasts_need_update():
456 db = get_main_database()
457 res = db.view('episodes/need_update',
458 group_level = 1,
459 reduce = True,
460 limit = 100,
463 # TODO: this method is only used for retrieving podcasts to update;
464 # should we really send 'incomplete_obj' signals here?
466 for r in res:
467 podcast_id = r['key']
468 podcast = podcast_by_id(podcast_id)
469 if podcast:
470 yield podcast
473 @cache_result(timeout=60*60)
474 def get_flattr_podcasts(offset=0, limit=20):
475 """ returns all podcasts that contain Flattr payment URLs """
477 r = Podcast.view('podcasts/flattr',
478 skip = offset,
479 limit = limit,
480 classes = [Podcast, PodcastGroup],
481 include_docs = True,
482 reduce = False,
485 podcasts = list(r)
487 for podcast in podcasts:
488 if podcast.needs_update:
489 incomplete_obj.send_robust(sender=podcast)
491 return podcasts
494 @cache_result(timeout=60*60)
495 def get_flattr_podcast_count():
496 """ returns the number of podcasts that contain Flattr payment URLs """
497 r = list(Podcast.view('podcasts/flattr'))
498 return r[0]['value']
501 @cache_result(timeout=60*60)
502 def get_license_podcasts(offset=0, limit=20):
503 """ returns a page of podcasts w/ license information """
505 r = Podcast.view('podcasts/license',
506 skip = offset,
507 limit = limit,
508 classes = [Podcast, PodcastGroup],
509 include_docs = True,
510 reduce = False,
513 podcasts = list(r)
515 for podcast in podcasts:
516 if podcast.needs_update:
517 incomplete_obj.send_robust(sender=podcast)
519 return podcasts
522 @cache_result(timeout=60*60)
523 def get_license_podcast_count():
524 """ returns the number of podcasts that contain license information """
525 r = list(Podcast.view('podcasts/license'))
526 return r[0]['value'] if r else 0
529 def subscriberdata_for_podcast(podcast_id):
531 if not podcast_id:
532 raise QueryParameterMissing('podcast_id')
534 r = PodcastSubscriberData.view('podcasts/subscriber_data',
535 key = podcast_id,
536 include_docs = True,
539 if r:
540 return r.first()
542 data = PodcastSubscriberData()
543 data.podcast = podcast_id
544 return data
548 def _wrap_podcast_group(res):
549 if res['doc']['doc_type'] == 'Podcast':
550 return Podcast.wrap(res['doc'])
551 else:
552 pg = PodcastGroup.wrap(res['doc'])
553 id = res['key']
554 return pg.get_podcast_by_id(id)
557 def _wrap_podcast_group_key1(res):
558 obj = res['doc']
559 if obj['doc_type'] == 'Podcast':
560 return Podcast.wrap(obj)
562 else:
563 pid = res[u'key'][1]
564 pg = PodcastGroup.wrap(obj)
565 podcast = pg.get_podcast_by_id(pid)
566 return podcast
570 def search_wrapper(result):
571 doc = result['doc']
572 if doc['doc_type'] == 'Podcast':
573 p = Podcast.wrap(doc)
574 elif doc['doc_type'] == 'PodcastGroup':
575 p = PodcastGroup.wrap(doc)
576 p._id = result['id']
577 return p
580 @cache_result(timeout=60*60)
581 def search(q, offset=0, num_results=20):
583 if not q:
584 return [], 0
586 db = get_main_database()
588 #FIXME current couchdbkit can't parse responses for multi-query searches
589 q = q.replace(',', '')
591 try:
592 res = db.search('podcasts/search',
593 wrapper = search_wrapper,
594 include_docs = True,
595 limit = num_results,
596 stale = 'update_after',
597 skip = offset,
598 q = q,
599 sort='\\subscribers<int>')
601 podcasts = list(res)
603 for podcast in podcasts:
604 if podcast.needs_update:
605 incomplete_obj.send_robust(sender=podcast)
607 return podcasts, res.total_rows
609 except RequestFailed:
610 return [], 0
613 def reload_podcast(podcast):
614 return podcast_by_id_uncached(podcast.get_id())
617 @repeat_on_conflict(['podcast'], reload_f=reload_podcast)
618 def update_additional_data(podcast, twitter):
619 podcast.twitter = twitter
620 podcast.save()
622 # clear the whole cache until we have a better invalidation mechanism
623 cache.clear()
626 @repeat_on_conflict(['podcast'], reload_f=reload_podcast)
627 def update_related_podcasts(podcast, related):
628 if podcast.related_podcasts == related:
629 return
631 podcast.related_podcasts = related
632 podcast.save()
635 @repeat_on_conflict(['podcast'], reload_f=reload_podcast)
636 def delete_podcast(podcast):
637 podcast.delete()