59a6c034ea35362b14f4f62e6a96b81cf3668ecf
[mygpo.git] / mygpo / db / couchdb / podcast.py
blob59a6c034ea35362b14f4f62e6a96b81cf3668ecf
1 from hashlib import sha1
2 from random import random
3 from datetime import datetime
5 from restkit import RequestFailed
7 from django.core.cache import cache
9 from mygpo.core.models import Podcast, PodcastGroup, PodcastSubscriberData
10 from mygpo.core.signals import incomplete_obj
11 from mygpo.decorators import repeat_on_conflict
12 from mygpo.cache import cache_result
13 from mygpo.utils import get_timestamp
14 from mygpo.db.couchdb import get_main_database
15 from mygpo.db import QueryParameterMissing
16 from mygpo.db.couchdb.utils import multi_request_view, is_couchdb_id
19 def podcast_slugs(base_slug):
20 res = Podcast.view('podcasts/by_slug',
21 startkey = [base_slug, None],
22 endkey = [base_slug + 'ZZZZZ', None],
23 wrap_doc = False,
25 return [r['key'][0] for r in res]
28 @cache_result(timeout=60*60)
29 def podcast_count():
30 return Podcast.view('podcasts/by_id',
31 limit = 0,
32 stale = 'update_after',
33 ).total_rows
36 @cache_result(timeout=60*60)
37 def podcasts_for_tag(tag):
38 """ Returns the podcasts with the current tag.
40 Some podcasts might be returned twice """
42 if not tag:
43 raise QueryParameterMissing('tag')
45 res = multi_request_view(Podcast, 'podcasts/by_tag',
46 wrap = False,
47 startkey = [tag, None],
48 endkey = [tag, {}],
49 reduce = True,
50 group = True,
51 group_level = 2
54 for r in res:
55 yield (r['key'][1], r['value'])
57 res = multi_request_view(Podcast, 'usertags/podcasts',
58 wrap = False,
59 startkey = [tag, None],
60 endkey = [tag, {}],
61 reduce = True,
62 group = True,
63 group_level = 2
66 for r in res:
67 yield (r['key'][1], r['value'])
70 @cache_result(timeout=60*60)
71 def get_podcast_languages():
72 """ Returns all 2-letter language codes that are used by podcasts.
74 It filters obviously invalid strings, but does not check if any
75 of these codes is contained in ISO 639. """
77 from mygpo.web.utils import sanitize_language_codes
79 res = Podcast.view('podcasts/by_language',
80 group_level = 1,
81 stale = 'ok',
84 langs = [r['key'][0] for r in res]
85 sane_lang = sanitize_language_codes(langs)
86 sane_lang.sort()
87 return sane_lang
90 @cache_result(timeout=60*60)
91 def podcast_by_id(podcast_id, current_id=False):
93 if not podcast_id:
94 raise QueryParameterMissing('podcast_id')
96 r = Podcast.view('podcasts/by_id',
97 key = podcast_id,
98 classes = [Podcast, PodcastGroup],
99 include_docs = True,
102 if not r:
103 return None
105 podcast_group = r.first()
107 podcast = podcast_group.get_podcast_by_id(podcast_id, current_id)
109 if podcast.needs_update:
110 incomplete_obj.send_robust(sender=podcast)
112 return podcast
116 @cache_result(timeout=60*60)
117 def podcastgroup_by_id(group_id):
119 if not group_id:
120 raise QueryParameterMissing('group_id')
122 pg = PodcastGroup.get(group_id)
124 if pg.needs_update:
125 incomplete_obj.send_robust(sender=pg)
127 return pg
131 @cache_result(timeout=60*60)
132 def podcast_for_slug(slug):
134 if not slug:
135 raise QueryParameterMissing('slug')
137 r = Podcast.view('podcasts/by_slug',
138 startkey = [slug, None],
139 endkey = [slug, {}],
140 include_docs = True,
141 wrap_doc = False,
144 if not r:
145 return None
147 res = r.first()
148 doc = res['doc']
149 if doc['doc_type'] == 'Podcast':
150 obj = Podcast.wrap(doc)
151 else:
152 pid = res['key'][1]
153 pg = PodcastGroup.wrap(doc)
154 obj = pg.get_podcast_by_id(pid)
156 if obj.needs_update:
157 incomplete_obj.send_robust(sender=obj)
159 return obj
162 @cache_result(timeout=60*60)
163 def podcast_for_slug_id(slug_id):
164 """ Returns the Podcast for either an CouchDB-ID for a Slug """
166 if is_couchdb_id(slug_id):
167 return podcast_by_id(slug_id)
168 else:
169 return podcast_for_slug(slug_id)
172 @cache_result(timeout=60*60)
173 def podcastgroup_for_slug_id(slug_id):
174 """ Returns the Podcast for either an CouchDB-ID for a Slug """
176 if not slug_id:
177 raise QueryParameterMissing('slug_id')
179 if is_couchdb_id(slug_id):
180 return podcastgroup_by_id(slug_id)
182 else:
183 #TODO: implement
184 return PodcastGroup.for_slug(slug_id)
188 def podcasts_by_id(ids):
190 if ids is None:
191 raise QueryParameterMissing('ids')
193 if not ids:
194 return []
196 r = Podcast.view('podcasts/by_id',
197 keys = ids,
198 include_docs = True,
199 wrap_doc = False
202 podcasts = map(_wrap_podcast_group, r)
204 for podcast in podcasts:
205 if podcast.needs_update:
206 incomplete_obj.send_robust(sender=podcast)
208 return podcasts
212 @cache_result(timeout=60*60)
213 def podcast_for_oldid(oldid):
215 if not oldid:
216 raise QueryParameterMissing('oldid')
218 r = Podcast.view('podcasts/by_oldid',
219 key = long(oldid),
220 classes = [Podcast, PodcastGroup],
221 include_docs = True,
224 if not r:
225 return None
227 podcast_group = r.first()
228 podcast = podcast_group.get_podcast_by_oldid(oldid)
230 if podcast.needs_update:
231 incomplete_obj.send_robust(sender=podcast)
233 return podcast
236 @cache_result(timeout=60*60)
237 def podcastgroup_for_oldid(oldid):
239 if not oldid:
240 raise QueryParameterMissing('oldid')
242 r = PodcastGroup.view('podcasts/groups_by_oldid',
243 key = long(oldid),
244 include_docs = True,
247 if not r:
248 return None
250 pg = r.one()
252 if pg.needs_update:
253 incomplete_obj.send_robust(sender=pg)
255 return pg
258 def podcast_for_url(url, create=False):
260 if not url:
261 raise QueryParameterMissing('url')
263 key = 'podcast-by-url-%s' % sha1(url.encode('utf-8')).hexdigest()
265 podcast = cache.get(key)
266 if podcast:
267 return podcast
269 r = Podcast.view('podcasts/by_url',
270 key=url,
271 classes=[Podcast, PodcastGroup],
272 include_docs=True
275 if r:
276 podcast_group = r.first()
277 podcast = podcast_group.get_podcast_by_url(url)
279 if podcast.needs_update:
280 incomplete_obj.send_robust(sender=podcast)
281 else:
282 cache.set(key, podcast)
284 return podcast
286 if create:
287 podcast = Podcast()
288 podcast.created_timestamp = get_timestamp(datetime.utcnow())
289 podcast.urls = [url]
290 podcast.save()
291 incomplete_obj.send_robust(sender=podcast)
292 return podcast
294 return None
299 def random_podcasts(language='', chunk_size=5):
300 """ Returns an iterator of random podcasts
302 optionaly a language code can be specified. If given the podcasts will
303 be restricted to this language. chunk_size determines how many podcasts
304 will be fetched at once """
306 while True:
307 rnd = random()
308 res = Podcast.view('podcasts/random',
309 startkey = [language, rnd],
310 include_docs = True,
311 limit = chunk_size,
312 stale = 'ok',
313 wrap_doc = False,
316 if not res:
317 break
319 for r in res:
321 # The view podcasts/random does not include incomplete podcasts,
322 # so we don't need to send any 'incomplete_obj' signals here
324 obj = r['doc']
325 if obj['doc_type'] == 'Podcast':
326 yield Podcast.wrap(obj)
328 elif obj['doc_type'] == 'PodcastGroup':
329 yield PodcastGroup.wrap(obj)
333 def podcasts_by_last_update():
334 res = Podcast.view('podcasts/by_last_update',
335 include_docs = True,
336 stale = 'update_after',
337 wrap_doc = False,
340 # TODO: this method is only used for retrieving podcasts to update;
341 # should we really send 'incomplete_obj' signals here?
343 return map(_wrap_podcast_group_key1, res)
348 def all_podcasts():
349 from mygpo.db.couchdb.utils import multi_request_view
350 res = multi_request_view(Podcast,'podcasts/by_id',
351 wrap = False,
352 include_docs = True,
353 stale = 'update_after',
356 # TODO: this method is only used for maintenance purposes; should we
357 # really send 'incomplete_obj' signals here?
359 for r in res:
360 obj = r['doc']
361 if obj['doc_type'] == 'Podcast':
362 yield Podcast.wrap(obj)
363 else:
364 pid = r[u'key']
365 pg = PodcastGroup.wrap(obj)
366 podcast = pg.get_podcast_by_id(pid)
367 yield podcast
370 def all_podcasts_groups(cls):
371 return cls.view('podcasts/podcasts_groups', include_docs=True,
372 classes=[Podcast, PodcastGroup]).iterator()
376 def podcasts_to_dict(ids, use_cache=False):
378 if ids is None:
379 raise QueryParameterMissing('ids')
381 if not ids:
382 return dict()
385 ids = list(set(ids))
386 objs = dict()
388 cache_objs = []
389 if use_cache:
390 res = cache.get_many(ids)
391 cache_objs.extend(res.values())
392 ids = [x for x in ids if x not in res.keys()]
394 db_objs = podcasts_by_id(ids)
396 for obj in (cache_objs + db_objs):
398 # get_multi returns dict {'key': _id, 'error': 'not found'}
399 # for non-existing objects
400 if isinstance(obj, dict) and 'error' in obj:
401 _id = obj['key']
402 objs[_id] = None
403 continue
405 for i in obj.get_ids():
406 objs[i] = obj
408 if use_cache:
409 cache.set_many(dict( (obj.get_id(), obj) for obj in db_objs))
411 return objs
415 def podcasts_need_update():
416 db = get_main_database()
417 res = db.view('episodes/need_update',
418 group_level = 1,
419 reduce = True,
420 limit = 100,
423 # TODO: this method is only used for retrieving podcasts to update;
424 # should we really send 'incomplete_obj' signals here?
426 for r in res:
427 podcast_id = r['key']
428 podcast = podcast_by_id(podcast_id)
429 if podcast:
430 yield podcast
433 @cache_result(timeout=60*60)
434 def get_flattr_podcasts(offset=0, limit=20):
435 """ returns all podcasts that contain Flattr payment URLs """
437 r = Podcast.view('podcasts/flattr',
438 skip = offset,
439 limit = limit,
440 classes = [Podcast, PodcastGroup],
441 include_docs = True,
442 reduce = False,
445 podcasts = list(r)
447 for podcast in podcasts:
448 if podcast.needs_update:
449 incomplete_obj.send_robust(sender=podcast)
451 return podcasts
454 @cache_result(timeout=60*60)
455 def get_flattr_podcast_count():
456 """ returns the number of podcasts that contain Flattr payment URLs """
457 r = list(Podcast.view('podcasts/flattr'))
458 return r[0]['value']
461 def subscriberdata_for_podcast(podcast_id):
463 if not podcast_id:
464 raise QueryParameterMissing('podcast_id')
466 r = PodcastSubscriberData.view('podcasts/subscriber_data',
467 key = podcast_id,
468 include_docs = True,
471 if r:
472 return r.first()
474 data = PodcastSubscriberData()
475 data.podcast = podcast_id
476 return data
480 def _wrap_podcast_group(res):
481 if res['doc']['doc_type'] == 'Podcast':
482 return Podcast.wrap(res['doc'])
483 else:
484 pg = PodcastGroup.wrap(res['doc'])
485 id = res['key']
486 return pg.get_podcast_by_id(id)
489 def _wrap_podcast_group_key1(res):
490 obj = res['doc']
491 if obj['doc_type'] == 'Podcast':
492 return Podcast.wrap(obj)
494 else:
495 pid = res[u'key'][1]
496 pg = PodcastGroup.wrap(obj)
497 podcast = pg.get_podcast_by_id(pid)
498 return podcast
502 def search_wrapper(result):
503 doc = result['doc']
504 if doc['doc_type'] == 'Podcast':
505 p = Podcast.wrap(doc)
506 elif doc['doc_type'] == 'PodcastGroup':
507 p = PodcastGroup.wrap(doc)
508 p._id = result['id']
509 return p
512 @cache_result(timeout=60*60)
513 def search(q, offset=0, num_results=20):
515 if not q:
516 return [], 0
518 db = get_main_database()
520 #FIXME current couchdbkit can't parse responses for multi-query searches
521 q = q.replace(',', '')
523 try:
524 res = db.search('podcasts/search',
525 wrapper = search_wrapper,
526 include_docs = True,
527 limit = num_results,
528 stale = 'update_after',
529 skip = offset,
530 q = q,
531 sort='\\subscribers<int>')
533 podcasts = list(res)
535 for podcast in podcasts:
536 if podcast.needs_update:
537 incomplete_obj.send_robust(sender=podcast)
539 return podcasts, res.total_rows
541 except RequestFailed:
542 return [], 0
545 @repeat_on_conflict(['podcast'])
546 def update_additional_data(podcast, twitter):
547 podcast.twitter = twitter
548 podcast.save()
550 # clear the whole cache until we have a better invalidation mechanism
551 cache.clear()
554 @repeat_on_conflict(['podcast'])
555 def update_related_podcasts(podcast, related):
556 if podcast.related_podcasts == related:
557 return
559 podcast.related_podcasts = related
560 podcast.save()
563 @repeat_on_conflict(['podcast'])
564 def delete_podcast(podcast):
565 podcast.delete()