af5e94b5fea9684734657350ca6bd4aa21588353
[mygpo.git] / mygpo / db / couchdb / podcast.py
blobaf5e94b5fea9684734657350ca6bd4aa21588353
1 from hashlib import sha1
2 from random import random
3 from datetime import datetime
5 from restkit import RequestFailed
7 from django.core.cache import cache
9 from mygpo.core.models import Podcast, PodcastGroup, PodcastSubscriberData
10 from mygpo.core.signals import incomplete_obj
11 from mygpo.decorators import repeat_on_conflict
12 from mygpo.cache import cache_result
13 from mygpo.utils import get_timestamp
14 from mygpo.db.couchdb import get_main_database
15 from mygpo.db import QueryParameterMissing
16 from mygpo.db.couchdb.utils import multi_request_view, is_couchdb_id
19 def podcast_slugs(base_slug):
20 res = Podcast.view('podcasts/by_slug',
21 startkey = [base_slug, None],
22 endkey = [base_slug + 'ZZZZZ', None],
23 wrap_doc = False,
25 return [r['key'][0] for r in res]
28 @cache_result(timeout=60*60)
29 def podcast_count():
30 return Podcast.view('podcasts/by_id',
31 limit = 0,
32 stale = 'update_after',
33 ).total_rows
36 @cache_result(timeout=60*60)
37 def podcasts_for_tag(tag):
38 """ Returns the podcasts with the current tag.
40 Some podcasts might be returned twice """
42 if not tag:
43 raise QueryParameterMissing('tag')
45 res = multi_request_view(Podcast, 'podcasts/by_tag',
46 wrap = False,
47 startkey = [tag, None],
48 endkey = [tag, {}],
49 reduce = True,
50 group = True,
51 group_level = 2
54 for r in res:
55 yield (r['key'][1], r['value'])
57 res = multi_request_view(Podcast, 'usertags/podcasts',
58 wrap = False,
59 startkey = [tag, None],
60 endkey = [tag, {}],
61 reduce = True,
62 group = True,
63 group_level = 2
66 for r in res:
67 yield (r['key'][1], r['value'])
70 @cache_result(timeout=60*60)
71 def get_podcast_languages():
72 """ Returns all 2-letter language codes that are used by podcasts.
74 It filters obviously invalid strings, but does not check if any
75 of these codes is contained in ISO 639. """
77 from mygpo.web.utils import sanitize_language_codes
79 res = Podcast.view('podcasts/by_language',
80 group_level = 1,
81 stale = 'ok',
84 langs = [r['key'][0] for r in res]
85 sane_lang = sanitize_language_codes(langs)
86 sane_lang.sort()
87 return sane_lang
90 @cache_result(timeout=60*60)
91 def podcast_by_id(podcast_id, current_id=False):
93 if not podcast_id:
94 raise QueryParameterMissing('podcast_id')
96 r = Podcast.view('podcasts/by_id',
97 key = podcast_id,
98 classes = [Podcast, PodcastGroup],
99 include_docs = True,
102 if not r:
103 return None
105 podcast_group = r.first()
107 podcast = podcast_group.get_podcast_by_id(podcast_id, current_id)
109 if podcast.needs_update:
110 incomplete_obj.send_robust(sender=podcast)
112 return podcast
116 @cache_result(timeout=60*60)
117 def podcastgroup_by_id(group_id):
119 if not group_id:
120 raise QueryParameterMissing('group_id')
122 pg = PodcastGroup.get(group_id)
124 if pg.needs_update:
125 incomplete_obj.send_robust(sender=pg)
127 return pg
131 @cache_result(timeout=60*60)
132 def podcast_for_slug(slug):
134 if not slug:
135 raise QueryParameterMissing('slug')
137 r = Podcast.view('podcasts/by_slug',
138 startkey = [slug, None],
139 endkey = [slug, {}],
140 include_docs = True,
141 wrap_doc = False,
144 if not r:
145 return None
147 res = r.first()
148 doc = res['doc']
149 if doc['doc_type'] == 'Podcast':
150 obj = Podcast.wrap(doc)
151 else:
152 pid = res['key'][1]
153 pg = PodcastGroup.wrap(doc)
154 obj = pg.get_podcast_by_id(pid)
156 if obj.needs_update:
157 incomplete_obj.send_robust(sender=obj)
159 return obj
162 @cache_result(timeout=60*60)
163 def podcast_for_slug_id(slug_id):
164 """ Returns the Podcast for either an CouchDB-ID for a Slug """
166 if is_couchdb_id(slug_id):
167 return podcast_by_id(slug_id)
168 else:
169 return podcast_for_slug(slug_id)
172 @cache_result(timeout=60*60)
173 def podcastgroup_for_slug_id(slug_id):
174 """ Returns the Podcast for either an CouchDB-ID for a Slug """
176 if not slug_id:
177 raise QueryParameterMissing('slug_id')
179 if is_couchdb_id(slug_id):
180 return podcastgroup_by_id(slug_id)
182 else:
183 #TODO: implement
184 return PodcastGroup.for_slug(slug_id)
188 def podcasts_by_id(ids):
190 if ids is None:
191 raise QueryParameterMissing('ids')
193 if not ids:
194 return []
196 r = Podcast.view('podcasts/by_id',
197 keys = ids,
198 include_docs = True,
199 wrap_doc = False
202 podcasts = map(_wrap_podcast_group, r)
204 for podcast in podcasts:
205 if podcast.needs_update:
206 incomplete_obj.send_robust(sender=podcast)
208 return podcasts
212 @cache_result(timeout=60*60)
213 def podcast_for_oldid(oldid):
215 if not oldid:
216 raise QueryParameterMissing('oldid')
218 r = Podcast.view('podcasts/by_oldid',
219 key = long(oldid),
220 classes = [Podcast, PodcastGroup],
221 include_docs = True,
224 if not r:
225 return None
227 podcast_group = r.first()
228 podcast = podcast_group.get_podcast_by_oldid(oldid)
230 if podcast.needs_update:
231 incomplete_obj.send_robust(sender=podcast)
233 return podcast
236 @cache_result(timeout=60*60)
237 def podcastgroup_for_oldid(oldid):
239 if not oldid:
240 raise QueryParameterMissing('oldid')
242 r = PodcastGroup.view('podcasts/groups_by_oldid',
243 key = long(oldid),
244 include_docs = True,
247 if not r:
248 return None
250 pg = r.one()
252 if pg.needs_update:
253 incomplete_obj.send_robust(sender=pg)
255 return pg
258 def podcast_for_url(url, create=False):
260 if not url:
261 raise QueryParameterMissing('url')
263 key = 'podcast-by-url-%s' % sha1(url.encode('utf-8')).hexdigest()
265 podcast = cache.get(key)
266 if podcast:
267 return podcast
269 r = Podcast.view('podcasts/by_url',
270 key=url,
271 classes=[Podcast, PodcastGroup],
272 include_docs=True
275 if r:
276 podcast_group = r.first()
277 podcast = podcast_group.get_podcast_by_url(url)
279 if podcast.needs_update:
280 incomplete_obj.send_robust(sender=podcast)
281 else:
282 cache.set(key, podcast)
284 return podcast
286 if create:
287 podcast = Podcast()
288 podcast.created_timestamp = get_timestamp(datetime.utcnow())
289 podcast.urls = [url]
290 podcast.save()
291 incomplete_obj.send_robust(sender=podcast)
292 return podcast
294 return None
299 def random_podcasts(language='', chunk_size=5):
300 """ Returns an iterator of random podcasts
302 optionaly a language code can be specified. If given the podcasts will
303 be restricted to this language. chunk_size determines how many podcasts
304 will be fetched at once """
306 while True:
307 rnd = random()
308 res = Podcast.view('podcasts/random',
309 startkey = [language, rnd],
310 include_docs = True,
311 limit = chunk_size,
312 stale = 'ok',
313 wrap_doc = False,
316 if not res:
317 break
319 for r in res:
321 # The view podcasts/random does not include incomplete podcasts,
322 # so we don't need to send any 'incomplete_obj' signals here
324 obj = r['doc']
325 if obj['doc_type'] == 'Podcast':
326 yield Podcast.wrap(obj)
328 elif obj['doc_type'] == 'PodcastGroup':
329 yield PodcastGroup.wrap(obj)
333 def podcasts_by_last_update():
334 res = Podcast.view('podcasts/by_last_update',
335 include_docs = True,
336 stale = 'update_after',
337 wrap_doc = False,
340 # TODO: this method is only used for retrieving podcasts to update;
341 # should we really send 'incomplete_obj' signals here?
343 return map(_wrap_podcast_group_key1, res)
348 def all_podcasts():
349 from mygpo.db.couchdb.utils import multi_request_view
350 res = multi_request_view(Podcast,'podcasts/by_id',
351 wrap = False,
352 include_docs = True,
353 stale = 'update_after',
356 # TODO: this method is only used for maintenance purposes; should we
357 # really send 'incomplete_obj' signals here?
359 for r in res:
360 obj = r['doc']
361 if obj['doc_type'] == 'Podcast':
362 yield Podcast.wrap(obj)
363 else:
364 pid = r[u'key']
365 pg = PodcastGroup.wrap(obj)
366 podcast = pg.get_podcast_by_id(pid)
367 yield podcast
370 def all_podcasts_groups(cls):
371 return cls.view('podcasts/podcasts_groups', include_docs=True,
372 classes=[Podcast, PodcastGroup]).iterator()
376 def podcasts_to_dict(ids, use_cache=False):
378 if ids is None:
379 raise QueryParameterMissing('ids')
381 if not ids:
382 return dict()
385 ids = list(set(ids))
386 objs = dict()
388 cache_objs = []
389 if use_cache:
390 res = cache.get_many(ids)
391 cache_objs.extend(res.values())
392 ids = [x for x in ids if x not in res.keys()]
394 db_objs = podcasts_by_id(ids)
396 for obj in (cache_objs + db_objs):
398 # get_multi returns dict {'key': _id, 'error': 'not found'}
399 # for non-existing objects
400 if isinstance(obj, dict) and 'error' in obj:
401 _id = obj['key']
402 objs[_id] = None
403 continue
405 for i in obj.get_ids():
406 objs[i] = obj
408 if use_cache:
409 cache.set_many(dict( (obj.get_id(), obj) for obj in db_objs))
411 return objs
415 def podcasts_need_update():
416 db = get_main_database()
417 res = db.view('episodes/need_update',
418 group_level = 1,
419 reduce = True,
422 # TODO: this method is only used for retrieving podcasts to update;
423 # should we really send 'incomplete_obj' signals here?
425 for r in res:
426 podcast_id = r['key']
427 podcast = podcast_by_id(podcast_id)
428 if podcast:
429 yield podcast
432 @cache_result(timeout=60*60)
433 def get_flattr_podcasts(offset=0, limit=20):
434 """ returns all podcasts that contain Flattr payment URLs """
436 r = Podcast.view('podcasts/flattr',
437 skip = offset,
438 limit = limit,
439 classes = [Podcast, PodcastGroup],
440 include_docs = True,
441 reduce = False,
444 podcasts = list(r)
446 for podcast in podcasts:
447 if podcast.needs_update:
448 incomplete_obj.send_robust(sender=podcast)
450 return podcasts
453 @cache_result(timeout=60*60)
454 def get_flattr_podcast_count():
455 """ returns the number of podcasts that contain Flattr payment URLs """
456 r = list(Podcast.view('podcasts/flattr'))
457 return r[0]['value']
460 def subscriberdata_for_podcast(podcast_id):
462 if not podcast_id:
463 raise QueryParameterMissing('podcast_id')
465 r = PodcastSubscriberData.view('podcasts/subscriber_data',
466 key = podcast_id,
467 include_docs = True,
470 if r:
471 return r.first()
473 data = PodcastSubscriberData()
474 data.podcast = podcast_id
475 return data
479 def _wrap_podcast_group(res):
480 if res['doc']['doc_type'] == 'Podcast':
481 return Podcast.wrap(res['doc'])
482 else:
483 pg = PodcastGroup.wrap(res['doc'])
484 id = res['key']
485 return pg.get_podcast_by_id(id)
488 def _wrap_podcast_group_key1(res):
489 obj = res['doc']
490 if obj['doc_type'] == 'Podcast':
491 return Podcast.wrap(obj)
493 else:
494 pid = res[u'key'][1]
495 pg = PodcastGroup.wrap(obj)
496 podcast = pg.get_podcast_by_id(pid)
497 return podcast
501 def search_wrapper(result):
502 doc = result['doc']
503 if doc['doc_type'] == 'Podcast':
504 p = Podcast.wrap(doc)
505 elif doc['doc_type'] == 'PodcastGroup':
506 p = PodcastGroup.wrap(doc)
507 p._id = result['id']
508 return p
511 @cache_result(timeout=60*60)
512 def search(q, offset=0, num_results=20):
514 if not q:
515 return [], 0
517 db = get_main_database()
519 #FIXME current couchdbkit can't parse responses for multi-query searches
520 q = q.replace(',', '')
522 try:
523 res = db.search('podcasts/search',
524 wrapper = search_wrapper,
525 include_docs = True,
526 limit = num_results,
527 stale = 'update_after',
528 skip = offset,
529 q = q,
530 sort='\\subscribers<int>')
532 podcasts = list(res)
534 for podcast in podcasts:
535 if podcast.needs_update:
536 incomplete_obj.send_robust(sender=podcast)
538 return podcasts, res.total_rows
540 except RequestFailed:
541 return [], 0
544 @repeat_on_conflict(['podcast'])
545 def update_additional_data(podcast, twitter):
546 podcast.twitter = twitter
547 podcast.save()
549 # clear the whole cache until we have a better invalidation mechanism
550 cache.clear()