fix missing podcasts in podcast lists
[mygpo.git] / mygpo / db / couchdb / podcast.py
blob6284deeacf3d98e614bb0a8c429dc08b41fcfe6d
1 from hashlib import sha1
2 from random import random
3 from datetime import datetime
5 from restkit import RequestFailed
7 from django.core.cache import cache
9 from mygpo.core.models import Podcast, PodcastGroup, PodcastSubscriberData
10 from mygpo.core.signals import incomplete_obj
11 from mygpo.decorators import repeat_on_conflict
12 from mygpo.cache import cache_result
13 from mygpo.utils import get_timestamp
14 from mygpo.db.couchdb import get_main_database
15 from mygpo.db import QueryParameterMissing
16 from mygpo.db.couchdb.utils import multi_request_view, is_couchdb_id
19 def podcast_slugs(base_slug):
20 res = Podcast.view('podcasts/by_slug',
21 startkey = [base_slug, None],
22 endkey = [base_slug + 'ZZZZZ', None],
23 wrap_doc = False,
25 return [r['key'][0] for r in res]
28 @cache_result(timeout=60*60)
29 def podcast_count():
30 return Podcast.view('podcasts/by_id',
31 limit = 0,
32 stale = 'update_after',
33 ).total_rows
36 @cache_result(timeout=60*60)
37 def podcasts_for_tag(tag):
38 """ Returns the podcasts with the current tag.
40 Some podcasts might be returned twice """
42 if not tag:
43 raise QueryParameterMissing('tag')
45 res = multi_request_view(Podcast, 'podcasts/by_tag',
46 wrap = False,
47 startkey = [tag, None],
48 endkey = [tag, {}],
49 reduce = True,
50 group = True,
51 group_level = 2
54 for r in res:
55 yield (r['key'][1], r['value'])
57 res = multi_request_view(Podcast, 'usertags/podcasts',
58 wrap = False,
59 startkey = [tag, None],
60 endkey = [tag, {}],
61 reduce = True,
62 group = True,
63 group_level = 2
66 for r in res:
67 yield (r['key'][1], r['value'])
70 @cache_result(timeout=60*60)
71 def get_podcast_languages():
72 """ Returns all 2-letter language codes that are used by podcasts.
74 It filters obviously invalid strings, but does not check if any
75 of these codes is contained in ISO 639. """
77 from mygpo.web.utils import sanitize_language_codes
79 res = Podcast.view('podcasts/by_language',
80 group_level = 1,
81 stale = 'ok',
84 langs = [r['key'][0] for r in res]
85 sane_lang = sanitize_language_codes(langs)
86 sane_lang.sort()
87 return sane_lang
90 @cache_result(timeout=60*60)
91 def podcast_by_id(podcast_id, current_id=False):
93 if not podcast_id:
94 raise QueryParameterMissing('podcast_id')
96 r = Podcast.view('podcasts/by_id',
97 key = podcast_id,
98 classes = [Podcast, PodcastGroup],
99 include_docs = True,
102 if not r:
103 return None
105 podcast_group = r.first()
107 podcast = podcast_group.get_podcast_by_id(podcast_id, current_id)
109 if podcast.needs_update:
110 incomplete_obj.send_robust(sender=podcast)
112 return podcast
116 @cache_result(timeout=60*60)
117 def podcastgroup_by_id(group_id):
119 if not group_id:
120 raise QueryParameterMissing('group_id')
122 pg = PodcastGroup.get(group_id)
124 if pg.needs_update:
125 incomplete_obj.send_robust(sender=pg)
127 return pg
131 @cache_result(timeout=60*60)
132 def podcast_for_slug(slug):
134 if not slug:
135 raise QueryParameterMissing('slug')
137 r = Podcast.view('podcasts/by_slug',
138 startkey = [slug, None],
139 endkey = [slug, {}],
140 include_docs = True,
141 wrap_doc = False,
144 if not r:
145 return None
147 res = r.first()
148 doc = res['doc']
149 if doc['doc_type'] == 'Podcast':
150 obj = Podcast.wrap(doc)
151 else:
152 pid = res['key'][1]
153 pg = PodcastGroup.wrap(doc)
154 obj = pg.get_podcast_by_id(pid)
156 if obj.needs_update:
157 incomplete_obj.send_robust(sender=obj)
159 return obj
162 @cache_result(timeout=60*60)
163 def podcast_for_slug_id(slug_id):
164 """ Returns the Podcast for either an CouchDB-ID for a Slug """
166 if is_couchdb_id(slug_id):
167 return podcast_by_id(slug_id)
168 else:
169 return podcast_for_slug(slug_id)
172 @cache_result(timeout=60*60)
173 def podcastgroup_for_slug_id(slug_id):
174 """ Returns the Podcast for either an CouchDB-ID for a Slug """
176 if not slug_id:
177 raise QueryParameterMissing('slug_id')
179 if is_couchdb_id(slug_id):
180 return podcastgroup_by_id(slug_id)
182 else:
183 #TODO: implement
184 return PodcastGroup.for_slug(slug_id)
188 def podcasts_by_id(ids):
190 if ids is None:
191 raise QueryParameterMissing('ids')
193 if not ids:
194 return []
196 r = Podcast.view('podcasts/by_id',
197 keys = ids,
198 include_docs = True,
199 wrap_doc = False
202 podcasts = map(_wrap_podcast_group, r)
204 for podcast in podcasts:
205 if podcast.needs_update:
206 incomplete_obj.send_robust(sender=podcast)
208 return podcasts
211 def podcasts_groups_by_id(ids):
212 """ gets podcast groups and top-level podcasts for the given ids """
214 if ids is None:
215 raise QueryParameterMissing('ids')
217 if not ids:
218 return
220 db = get_main_database()
221 res = db.view('_all_docs',
222 keys = ids,
223 include_docs = True,
224 classes = [Podcast, PodcastGroup],
227 for r in res:
228 doc = r['doc']
229 if doc['doc_type'] == 'Podcast':
230 obj = Podcast.wrap(doc)
232 elif doc['doc_type'] == 'PodcastGroup':
233 obj = PodcastGroup.wrap(doc)
235 else:
236 logger.error('podcasts_groups_by_id retrieved unknown doc_type '
237 '"%s" for params %s', doc['doc_type'], res.params)
238 continue
240 if obj.needs_update:
241 incomplete_obj.send_robust(sender=obj)
243 yield obj
247 @cache_result(timeout=60*60)
248 def podcast_for_oldid(oldid):
250 if not oldid:
251 raise QueryParameterMissing('oldid')
253 r = Podcast.view('podcasts/by_oldid',
254 key = long(oldid),
255 classes = [Podcast, PodcastGroup],
256 include_docs = True,
259 if not r:
260 return None
262 podcast_group = r.first()
263 podcast = podcast_group.get_podcast_by_oldid(oldid)
265 if podcast.needs_update:
266 incomplete_obj.send_robust(sender=podcast)
268 return podcast
271 @cache_result(timeout=60*60)
272 def podcastgroup_for_oldid(oldid):
274 if not oldid:
275 raise QueryParameterMissing('oldid')
277 r = PodcastGroup.view('podcasts/groups_by_oldid',
278 key = long(oldid),
279 include_docs = True,
282 if not r:
283 return None
285 pg = r.one()
287 if pg.needs_update:
288 incomplete_obj.send_robust(sender=pg)
290 return pg
293 def podcast_for_url(url, create=False):
295 if not url:
296 raise QueryParameterMissing('url')
298 key = 'podcast-by-url-%s' % sha1(url.encode('utf-8')).hexdigest()
300 podcast = cache.get(key)
301 if podcast:
302 return podcast
304 r = Podcast.view('podcasts/by_url',
305 key=url,
306 classes=[Podcast, PodcastGroup],
307 include_docs=True
310 if r:
311 podcast_group = r.first()
312 podcast = podcast_group.get_podcast_by_url(url)
314 if podcast.needs_update:
315 incomplete_obj.send_robust(sender=podcast)
316 else:
317 cache.set(key, podcast)
319 return podcast
321 if create:
322 podcast = Podcast()
323 podcast.created_timestamp = get_timestamp(datetime.utcnow())
324 podcast.urls = [url]
325 podcast.save()
326 incomplete_obj.send_robust(sender=podcast)
327 return podcast
329 return None
334 def random_podcasts(language='', chunk_size=5):
335 """ Returns an iterator of random podcasts
337 optionaly a language code can be specified. If given the podcasts will
338 be restricted to this language. chunk_size determines how many podcasts
339 will be fetched at once """
341 while True:
342 rnd = random()
343 res = Podcast.view('podcasts/random',
344 startkey = [language, rnd],
345 include_docs = True,
346 limit = chunk_size,
347 stale = 'ok',
348 wrap_doc = False,
351 if not res:
352 break
354 for r in res:
356 # The view podcasts/random does not include incomplete podcasts,
357 # so we don't need to send any 'incomplete_obj' signals here
359 obj = r['doc']
360 if obj['doc_type'] == 'Podcast':
361 yield Podcast.wrap(obj)
363 elif obj['doc_type'] == 'PodcastGroup':
364 yield PodcastGroup.wrap(obj)
368 def podcasts_by_last_update():
369 res = Podcast.view('podcasts/by_last_update',
370 include_docs = True,
371 stale = 'update_after',
372 wrap_doc = False,
375 # TODO: this method is only used for retrieving podcasts to update;
376 # should we really send 'incomplete_obj' signals here?
378 return map(_wrap_podcast_group_key1, res)
383 def all_podcasts():
384 from mygpo.db.couchdb.utils import multi_request_view
385 res = multi_request_view(Podcast,'podcasts/by_id',
386 wrap = False,
387 include_docs = True,
388 stale = 'update_after',
391 # TODO: this method is only used for maintenance purposes; should we
392 # really send 'incomplete_obj' signals here?
394 for r in res:
395 obj = r['doc']
396 if obj['doc_type'] == 'Podcast':
397 yield Podcast.wrap(obj)
398 else:
399 pid = r[u'key']
400 pg = PodcastGroup.wrap(obj)
401 podcast = pg.get_podcast_by_id(pid)
402 yield podcast
405 def all_podcasts_groups(cls):
406 return cls.view('podcasts/podcasts_groups', include_docs=True,
407 classes=[Podcast, PodcastGroup]).iterator()
411 def podcasts_to_dict(ids, use_cache=False):
413 if ids is None:
414 raise QueryParameterMissing('ids')
416 if not ids:
417 return dict()
420 ids = list(set(ids))
421 objs = dict()
423 cache_objs = []
424 if use_cache:
425 res = cache.get_many(ids)
426 cache_objs.extend(res.values())
427 ids = [x for x in ids if x not in res.keys()]
429 db_objs = podcasts_by_id(ids)
431 for obj in (cache_objs + db_objs):
433 # get_multi returns dict {'key': _id, 'error': 'not found'}
434 # for non-existing objects
435 if isinstance(obj, dict) and 'error' in obj:
436 _id = obj['key']
437 objs[_id] = None
438 continue
440 for i in obj.get_ids():
441 objs[i] = obj
443 if use_cache:
444 cache.set_many(dict( (obj.get_id(), obj) for obj in db_objs))
446 return objs
450 def podcasts_need_update():
451 db = get_main_database()
452 res = db.view('episodes/need_update',
453 group_level = 1,
454 reduce = True,
455 limit = 100,
458 # TODO: this method is only used for retrieving podcasts to update;
459 # should we really send 'incomplete_obj' signals here?
461 for r in res:
462 podcast_id = r['key']
463 podcast = podcast_by_id(podcast_id)
464 if podcast:
465 yield podcast
468 @cache_result(timeout=60*60)
469 def get_flattr_podcasts(offset=0, limit=20):
470 """ returns all podcasts that contain Flattr payment URLs """
472 r = Podcast.view('podcasts/flattr',
473 skip = offset,
474 limit = limit,
475 classes = [Podcast, PodcastGroup],
476 include_docs = True,
477 reduce = False,
480 podcasts = list(r)
482 for podcast in podcasts:
483 if podcast.needs_update:
484 incomplete_obj.send_robust(sender=podcast)
486 return podcasts
489 @cache_result(timeout=60*60)
490 def get_flattr_podcast_count():
491 """ returns the number of podcasts that contain Flattr payment URLs """
492 r = list(Podcast.view('podcasts/flattr'))
493 return r[0]['value']
496 def subscriberdata_for_podcast(podcast_id):
498 if not podcast_id:
499 raise QueryParameterMissing('podcast_id')
501 r = PodcastSubscriberData.view('podcasts/subscriber_data',
502 key = podcast_id,
503 include_docs = True,
506 if r:
507 return r.first()
509 data = PodcastSubscriberData()
510 data.podcast = podcast_id
511 return data
515 def _wrap_podcast_group(res):
516 if res['doc']['doc_type'] == 'Podcast':
517 return Podcast.wrap(res['doc'])
518 else:
519 pg = PodcastGroup.wrap(res['doc'])
520 id = res['key']
521 return pg.get_podcast_by_id(id)
524 def _wrap_podcast_group_key1(res):
525 obj = res['doc']
526 if obj['doc_type'] == 'Podcast':
527 return Podcast.wrap(obj)
529 else:
530 pid = res[u'key'][1]
531 pg = PodcastGroup.wrap(obj)
532 podcast = pg.get_podcast_by_id(pid)
533 return podcast
537 def search_wrapper(result):
538 doc = result['doc']
539 if doc['doc_type'] == 'Podcast':
540 p = Podcast.wrap(doc)
541 elif doc['doc_type'] == 'PodcastGroup':
542 p = PodcastGroup.wrap(doc)
543 p._id = result['id']
544 return p
547 @cache_result(timeout=60*60)
548 def search(q, offset=0, num_results=20):
550 if not q:
551 return [], 0
553 db = get_main_database()
555 #FIXME current couchdbkit can't parse responses for multi-query searches
556 q = q.replace(',', '')
558 try:
559 res = db.search('podcasts/search',
560 wrapper = search_wrapper,
561 include_docs = True,
562 limit = num_results,
563 stale = 'update_after',
564 skip = offset,
565 q = q,
566 sort='\\subscribers<int>')
568 podcasts = list(res)
570 for podcast in podcasts:
571 if podcast.needs_update:
572 incomplete_obj.send_robust(sender=podcast)
574 return podcasts, res.total_rows
576 except RequestFailed:
577 return [], 0
580 @repeat_on_conflict(['podcast'])
581 def update_additional_data(podcast, twitter):
582 podcast.twitter = twitter
583 podcast.save()
585 # clear the whole cache until we have a better invalidation mechanism
586 cache.clear()
589 @repeat_on_conflict(['podcast'])
590 def update_related_podcasts(podcast, related):
591 if podcast.related_podcasts == related:
592 return
594 podcast.related_podcasts = related
595 podcast.save()
598 @repeat_on_conflict(['podcast'])
599 def delete_podcast(podcast):
600 podcast.delete()