avoid unicode decode error in podcast caching
[mygpo.git] / mygpo / db / couchdb / podcast.py
blobbfcb3ac24b3fe1184fc3d36799210beb03ef7f38
1 from hashlib import sha1
2 from random import random
4 from restkit import RequestFailed
6 from django.core.cache import cache
8 from mygpo.core.models import Podcast, PodcastGroup, PodcastSubscriberData
9 from mygpo.core.signals import incomplete_obj
10 from mygpo.decorators import repeat_on_conflict
11 from mygpo.cache import cache_result
12 from mygpo.db.couchdb import get_main_database
13 from mygpo.db import QueryParameterMissing
14 from mygpo.db.couchdb.utils import multi_request_view, is_couchdb_id
17 def podcast_slugs(base_slug):
18 res = Podcast.view('podcasts/by_slug',
19 startkey = [base_slug, None],
20 endkey = [base_slug + 'ZZZZZ', None],
21 wrap_doc = False,
23 return [r['key'][0] for r in res]
26 @cache_result(timeout=60*60)
27 def podcast_count():
28 return Podcast.view('podcasts/by_id',
29 limit = 0,
30 stale = 'update_after',
31 ).total_rows
34 @cache_result(timeout=60*60)
35 def podcasts_for_tag(tag):
36 """ Returns the podcasts with the current tag.
38 Some podcasts might be returned twice """
40 if not tag:
41 raise QueryParameterMissing('tag')
43 res = multi_request_view(Podcast, 'podcasts/by_tag',
44 wrap = False,
45 startkey = [tag, None],
46 endkey = [tag, {}],
47 reduce = True,
48 group = True,
49 group_level = 2
52 for r in res:
53 yield (r['key'][1], r['value'])
55 res = multi_request_view(Podcast, 'usertags/podcasts',
56 wrap = False,
57 startkey = [tag, None],
58 endkey = [tag, {}],
59 reduce = True,
60 group = True,
61 group_level = 2
64 for r in res:
65 yield (r['key'][1], r['value'])
68 @cache_result(timeout=60*60)
69 def get_podcast_languages():
70 """ Returns all 2-letter language codes that are used by podcasts.
72 It filters obviously invalid strings, but does not check if any
73 of these codes is contained in ISO 639. """
75 from mygpo.web.utils import sanitize_language_codes
77 res = Podcast.view('podcasts/by_language',
78 group_level = 1,
79 stale = 'ok',
82 langs = [r['key'][0] for r in res]
83 sane_lang = sanitize_language_codes(langs)
84 sane_lang.sort()
85 return sane_lang
88 @cache_result(timeout=60*60)
89 def podcast_by_id(podcast_id, current_id=False):
91 if not podcast_id:
92 raise QueryParameterMissing('podcast_id')
94 r = Podcast.view('podcasts/by_id',
95 key = podcast_id,
96 classes = [Podcast, PodcastGroup],
97 include_docs = True,
100 if not r:
101 return None
103 podcast_group = r.first()
105 podcast = podcast_group.get_podcast_by_id(podcast_id, current_id)
107 if podcast.needs_update:
108 incomplete_obj.send_robust(sender=podcast)
110 return podcast
114 @cache_result(timeout=60*60)
115 def podcastgroup_by_id(group_id):
117 if not group_id:
118 raise QueryParameterMissing('group_id')
120 pg = PodcastGroup.get(group_id)
122 if pg.needs_update:
123 incomplete_obj.send_robust(sender=pg)
125 return pg
129 @cache_result(timeout=60*60)
130 def podcast_for_slug(slug):
132 if not slug:
133 raise QueryParameterMissing('slug')
135 r = Podcast.view('podcasts/by_slug',
136 startkey = [slug, None],
137 endkey = [slug, {}],
138 include_docs = True,
139 wrap_doc = False,
142 if not r:
143 return None
145 res = r.first()
146 doc = res['doc']
147 if doc['doc_type'] == 'Podcast':
148 obj = Podcast.wrap(doc)
149 else:
150 pid = res['key'][1]
151 pg = PodcastGroup.wrap(doc)
152 obj = pg.get_podcast_by_id(pid)
154 if obj.needs_update:
155 raise incomplete_obj.send_robust(sender=obj)
157 return obj
160 @cache_result(timeout=60*60)
161 def podcast_for_slug_id(slug_id):
162 """ Returns the Podcast for either an CouchDB-ID for a Slug """
164 if is_couchdb_id(slug_id):
165 return podcast_by_id(slug_id)
166 else:
167 return podcast_for_slug(slug_id)
170 @cache_result(timeout=60*60)
171 def podcastgroup_for_slug_id(slug_id):
172 """ Returns the Podcast for either an CouchDB-ID for a Slug """
174 if not slug_id:
175 raise QueryParameterMissing('slug_id')
177 if is_couchdb_id(slug_id):
178 return podcastgroup_by_id(slug_id)
180 else:
181 #TODO: implement
182 return PodcastGroup.for_slug(slug_id)
186 def podcasts_by_id(ids):
188 if ids is None:
189 raise QueryParameterMissing('ids')
191 if not ids:
192 return []
194 r = Podcast.view('podcasts/by_id',
195 keys = ids,
196 include_docs = True,
197 wrap_doc = False
200 podcasts = map(_wrap_podcast_group, r)
202 for podcast in podcasts:
203 if podcast.needs_update:
204 incomplete_obj.send_robust(sender=podcast)
206 return podcasts
210 @cache_result(timeout=60*60)
211 def podcast_for_oldid(oldid):
213 if not oldid:
214 raise QueryParameterMissing('oldid')
216 r = Podcast.view('podcasts/by_oldid',
217 key = long(oldid),
218 classes = [Podcast, PodcastGroup],
219 include_docs = True,
222 if not r:
223 return None
225 podcast_group = r.first()
226 podcast = podcast_group.get_podcast_by_oldid(oldid)
228 if podcast.needs_update:
229 incomplete_obj.send_robust(sender=podcast)
231 return podcast
234 @cache_result(timeout=60*60)
235 def podcastgroup_for_oldid(oldid):
237 if not oldid:
238 raise QueryParameterMissing('oldid')
240 r = PodcastGroup.view('podcasts/groups_by_oldid',
241 key = long(oldid),
242 include_docs = True,
245 if not r:
246 return None
248 pg = r.one()
250 if pg.needs_update:
251 incomplete_obj.send_robust(sender=pg)
253 return pg
256 def podcast_for_url(url, create=False):
258 if not url:
259 raise QueryParameterMissing('url')
261 key = 'podcast-by-url-%s' % sha1(url.encode('utf-8')).hexdigest()
263 podcast = cache.get(key)
264 if podcast:
265 return podcast
267 r = Podcast.view('podcasts/by_url',
268 key=url,
269 classes=[Podcast, PodcastGroup],
270 include_docs=True
273 if r:
274 podcast_group = r.first()
275 podcast = podcast_group.get_podcast_by_url(url)
277 if podcast.needs_update:
278 incomplete_obj.send_robust(sender=podcast)
279 else:
280 cache.set(key, podcast)
282 return podcast
284 if create:
285 podcast = Podcast()
286 podcast.urls = [url]
287 podcast.save()
288 incomplete_obj.send_robust(sender=podcast)
289 return podcast
291 return None
296 def random_podcasts(language='', chunk_size=5):
297 """ Returns an iterator of random podcasts
299 optionaly a language code can be specified. If given the podcasts will
300 be restricted to this language. chunk_size determines how many podcasts
301 will be fetched at once """
303 while True:
304 rnd = random()
305 res = Podcast.view('podcasts/random',
306 startkey = [language, rnd],
307 include_docs = True,
308 limit = chunk_size,
309 stale = 'ok',
310 wrap_doc = False,
313 if not res:
314 break
316 for r in res:
318 # The view podcasts/random does not include incomplete podcasts,
319 # so we don't need to send any 'incomplete_obj' signals here
321 obj = r['doc']
322 if obj['doc_type'] == 'Podcast':
323 yield Podcast.wrap(obj)
325 elif obj['doc_type'] == 'PodcastGroup':
326 yield PodcastGroup.wrap(obj)
330 def podcasts_by_last_update():
331 res = Podcast.view('podcasts/by_last_update',
332 include_docs = True,
333 stale = 'update_after',
334 wrap_doc = False,
337 # TODO: this method is only used for retrieving podcasts to update;
338 # should we really send 'incomplete_obj' signals here?
340 return map(_wrap_podcast_group_key1, res)
345 def all_podcasts():
346 from mygpo.db.couchdb.utils import multi_request_view
347 res = multi_request_view(Podcast,'podcasts/by_id',
348 wrap = False,
349 include_docs = True,
350 stale = 'update_after',
353 # TODO: this method is only used for maintenance purposes; should we
354 # really send 'incomplete_obj' signals here?
356 for r in res:
357 obj = r['doc']
358 if obj['doc_type'] == 'Podcast':
359 yield Podcast.wrap(obj)
360 else:
361 pid = r[u'key']
362 pg = PodcastGroup.wrap(obj)
363 podcast = pg.get_podcast_by_id(pid)
364 yield podcast
367 def all_podcasts_groups(cls):
368 return cls.view('podcasts/podcasts_groups', include_docs=True,
369 classes=[Podcast, PodcastGroup]).iterator()
373 def podcasts_to_dict(ids, use_cache=False):
375 if ids is None:
376 raise QueryParameterMissing('ids')
378 if not ids:
379 return dict()
382 ids = list(set(ids))
383 objs = dict()
385 cache_objs = []
386 if use_cache:
387 res = cache.get_many(ids)
388 cache_objs.extend(res.values())
389 ids = [x for x in ids if x not in res.keys()]
391 db_objs = podcasts_by_id(ids)
393 for obj in (cache_objs + db_objs):
395 # get_multi returns dict {'key': _id, 'error': 'not found'}
396 # for non-existing objects
397 if isinstance(obj, dict) and 'error' in obj:
398 _id = obj['key']
399 objs[_id] = None
400 continue
402 for i in obj.get_ids():
403 objs[i] = obj
405 if use_cache:
406 cache.set_many(dict( (obj.get_id(), obj) for obj in db_objs))
408 return objs
412 def podcasts_need_update():
413 db = get_main_database()
414 res = db.view('episodes/need_update',
415 group_level = 1,
416 reduce = True,
419 # TODO: this method is only used for retrieving podcasts to update;
420 # should we really send 'incomplete_obj' signals here?
422 for r in res:
423 podcast_id = r['key']
424 podcast = podcast_by_id(podcast_id)
425 if podcast:
426 yield podcast
429 @cache_result(timeout=60*60)
430 def get_flattr_podcasts(offset=0, limit=20):
431 """ returns all podcasts that contain Flattr payment URLs """
433 r = Podcast.view('podcasts/flattr',
434 skip = offset,
435 limit = limit,
436 classes = [Podcast, PodcastGroup],
437 include_docs = True,
438 reduce = False,
441 podcasts = list(r)
443 for podcast in podcasts:
444 if podcast.needs_update:
445 incomplete_obj.send_robust(sender=podcast)
447 return podcasts
450 @cache_result(timeout=60*60)
451 def get_flattr_podcast_count():
452 """ returns the number of podcasts that contain Flattr payment URLs """
453 r = list(Podcast.view('podcasts/flattr'))
454 return r[0]['value']
457 def subscriberdata_for_podcast(podcast_id):
459 if not podcast_id:
460 raise QueryParameterMissing('podcast_id')
462 r = PodcastSubscriberData.view('podcasts/subscriber_data',
463 key = podcast_id,
464 include_docs = True,
467 if r:
468 return r.first()
470 data = PodcastSubscriberData()
471 data.podcast = podcast_id
472 return data
476 def _wrap_podcast_group(res):
477 if res['doc']['doc_type'] == 'Podcast':
478 return Podcast.wrap(res['doc'])
479 else:
480 pg = PodcastGroup.wrap(res['doc'])
481 id = res['key']
482 return pg.get_podcast_by_id(id)
485 def _wrap_podcast_group_key1(res):
486 obj = res['doc']
487 if obj['doc_type'] == 'Podcast':
488 return Podcast.wrap(obj)
490 else:
491 pid = res[u'key'][1]
492 pg = PodcastGroup.wrap(obj)
493 podcast = pg.get_podcast_by_id(pid)
494 return podcast
498 def search_wrapper(result):
499 doc = result['doc']
500 if doc['doc_type'] == 'Podcast':
501 p = Podcast.wrap(doc)
502 elif doc['doc_type'] == 'PodcastGroup':
503 p = PodcastGroup.wrap(doc)
504 p._id = result['id']
505 return p
508 @cache_result(timeout=60*60)
509 def search(q, offset=0, num_results=20):
511 if not q:
512 return [], 0
514 db = get_main_database()
516 #FIXME current couchdbkit can't parse responses for multi-query searches
517 q = q.replace(',', '')
519 try:
520 res = db.search('podcasts/search',
521 wrapper = search_wrapper,
522 include_docs = True,
523 limit = num_results,
524 stale = 'update_after',
525 skip = offset,
526 q = q,
527 sort='\\subscribers<int>')
529 podcasts = list(res)
531 for podcast in podcasts:
532 if podcast.needs_update:
533 incomplete_obj.send_robust(sender=podcast)
535 return podcasts, res.total_rows
537 except RequestFailed:
538 return [], 0
541 @repeat_on_conflict(['podcast'])
542 def update_additional_data(podcast, twitter):
543 podcast.twitter = twitter
544 podcast.save()
546 # clear the whole cache until we have a better invalidation mechanism
547 cache.clear()