Merge branch 'celery'
[mygpo.git] / mygpo / db / couchdb / podcast.py
blobb43ecdfa82fe4527f05ab09f7f39f61b1bc6fa67
1 from hashlib import sha1
2 from random import random
4 from restkit import RequestFailed
6 from django.core.cache import cache
8 from mygpo.core.models import Podcast, PodcastGroup, PodcastSubscriberData
9 from mygpo.decorators import repeat_on_conflict
10 from mygpo.cache import cache_result
11 from mygpo.couch import get_main_database
12 from mygpo.db import QueryParameterMissing
13 from mygpo.db.couchdb.utils import multi_request_view, is_couchdb_id
16 def podcast_slugs(base_slug):
17 res = Podcast.view('podcasts/by_slug',
18 startkey = [base_slug, None],
19 endkey = [base_slug + 'ZZZZZ', None],
20 wrap_doc = False,
22 return [r['key'][0] for r in res]
25 @cache_result(timeout=60*60)
26 def podcast_count():
27 return Podcast.view('podcasts/by_id',
28 limit = 0,
29 stale = 'update_after',
30 ).total_rows
33 @cache_result(timeout=60*60)
34 def podcasts_for_tag(tag):
35 """ Returns the podcasts with the current tag.
37 Some podcasts might be returned twice """
39 if not tag:
40 raise QueryParameterMissing('tag')
42 res = multi_request_view(Podcast, 'podcasts/by_tag',
43 wrap = False,
44 startkey = [tag, None],
45 endkey = [tag, {}],
46 reduce = True,
47 group = True,
48 group_level = 2
51 for r in res:
52 yield (r['key'][1], r['value'])
54 res = multi_request_view(Podcast, 'usertags/podcasts',
55 wrap = False,
56 startkey = [tag, None],
57 endkey = [tag, {}],
58 reduce = True,
59 group = True,
60 group_level = 2
63 for r in res:
64 yield (r['key'][1], r['value'])
67 @cache_result(timeout=60*60)
68 def get_podcast_languages():
69 """ Returns all 2-letter language codes that are used by podcasts.
71 It filters obviously invalid strings, but does not check if any
72 of these codes is contained in ISO 639. """
74 from mygpo.web.utils import sanitize_language_codes
76 res = Podcast.view('podcasts/by_language',
77 group_level = 1,
78 stale = 'ok',
81 langs = [r['key'][0] for r in res]
82 sane_lang = sanitize_language_codes(langs)
83 sane_lang.sort()
84 return sane_lang
87 @cache_result(timeout=60*60)
88 def podcast_by_id(podcast_id, current_id=False):
90 if not podcast_id:
91 raise QueryParameterMissing('podcast_id')
93 r = Podcast.view('podcasts/by_id',
94 key = podcast_id,
95 classes = [Podcast, PodcastGroup],
96 include_docs = True,
99 if not r:
100 return None
102 podcast_group = r.first()
103 return podcast_group.get_podcast_by_id(podcast_id, current_id)
107 @cache_result(timeout=60*60)
108 def podcastgroup_by_id(group_id):
110 if not group_id:
111 raise QueryParameterMissing('group_id')
113 return PodcastGroup.get(group_id)
117 @cache_result(timeout=60*60)
118 def podcast_for_slug(slug):
120 if not slug:
121 raise QueryParameterMissing('slug')
123 r = Podcast.view('podcasts/by_slug',
124 startkey = [slug, None],
125 endkey = [slug, {}],
126 include_docs = True,
127 wrap_doc = False,
130 if not r:
131 return None
133 res = r.first()
134 doc = res['doc']
135 if doc['doc_type'] == 'Podcast':
136 return Podcast.wrap(doc)
137 else:
138 pid = res['key'][1]
139 pg = PodcastGroup.wrap(doc)
140 return pg.get_podcast_by_id(pid)
143 @cache_result(timeout=60*60)
144 def podcast_for_slug_id(slug_id):
145 """ Returns the Podcast for either an CouchDB-ID for a Slug """
147 if is_couchdb_id(slug_id):
148 return podcast_by_id(slug_id)
149 else:
150 return podcast_for_slug(slug_id)
153 @cache_result(timeout=60*60)
154 def podcastgroup_for_slug_id(slug_id):
155 """ Returns the Podcast for either an CouchDB-ID for a Slug """
157 if not slug_id:
158 raise QueryParameterMissing('slug_id')
160 if is_couchdb_id(slug_id):
161 return PodcastGroup.get(slug_id)
163 else:
164 #TODO: implement
165 return PodcastGroup.for_slug(slug_id)
169 def podcasts_by_id(ids):
171 if ids is None:
172 raise QueryParameterMissing('ids')
174 if not ids:
175 return []
177 r = Podcast.view('podcasts/by_id',
178 keys = ids,
179 include_docs = True,
180 wrap_doc = False
183 return map(_wrap_podcast_group, r)
187 @cache_result(timeout=60*60)
188 def podcast_for_oldid(oldid):
190 if not oldid:
191 raise QueryParameterMissing('oldid')
193 r = Podcast.view('podcasts/by_oldid',
194 key = long(oldid),
195 classes = [Podcast, PodcastGroup],
196 include_docs = True,
199 if not r:
200 return None
202 podcast_group = r.first()
203 return podcast_group.get_podcast_by_oldid(oldid)
206 @cache_result(timeout=60*60)
207 def podcastgroup_for_oldid(oldid):
209 if not oldid:
210 raise QueryParameterMissing('oldid')
212 r = PodcastGroup.view('podcasts/groups_by_oldid',
213 key = long(oldid),
214 include_docs = True,
217 return r.one() if r else None
221 def podcast_for_url(url, create=False):
223 if not url:
224 raise QueryParameterMissing('url')
226 key = 'podcast-by-url-%s' % sha1(url).hexdigest()
228 podcast = cache.get(key)
229 if podcast:
230 return podcast
232 r = Podcast.view('podcasts/by_url',
233 key=url,
234 classes=[Podcast, PodcastGroup],
235 include_docs=True
238 if r:
239 podcast_group = r.first()
240 podcast = podcast_group.get_podcast_by_url(url)
241 cache.set(key, podcast)
242 return podcast
244 if create:
245 podcast = Podcast()
246 podcast.urls = [url]
247 podcast.save()
248 cache.set(key, podcast)
249 return podcast
251 return None
256 def random_podcasts(language='', chunk_size=5):
257 """ Returns an iterator of random podcasts
259 optionaly a language code can be specified. If given the podcasts will
260 be restricted to this language. chunk_size determines how many podcasts
261 will be fetched at once """
263 while True:
264 rnd = random()
265 res = Podcast.view('podcasts/random',
266 startkey = [language, rnd],
267 include_docs = True,
268 limit = chunk_size,
269 stale = 'ok',
270 wrap_doc = False,
273 if not res:
274 break
276 for r in res:
277 obj = r['doc']
278 if obj['doc_type'] == 'Podcast':
279 yield Podcast.wrap(obj)
281 elif obj['doc_type'] == 'PodcastGroup':
282 yield PodcastGroup.wrap(obj)
286 def podcasts_by_last_update():
287 res = Podcast.view('podcasts/by_last_update',
288 include_docs = True,
289 stale = 'update_after',
290 wrap_doc = False,
293 return map(_wrap_podcast_group_key1, res)
298 def all_podcasts():
299 from mygpo.db.couchdb.utils import multi_request_view
300 res = multi_request_view(Podcast,'podcasts/by_id',
301 wrap = False,
302 include_docs = True,
303 stale = 'update_after',
306 for r in res:
307 obj = r['doc']
308 if obj['doc_type'] == 'Podcast':
309 yield Podcast.wrap(obj)
310 else:
311 pid = r[u'key']
312 pg = PodcastGroup.wrap(obj)
313 podcast = pg.get_podcast_by_id(pid)
314 yield podcast
317 def all_podcasts_groups(cls):
318 return cls.view('podcasts/podcasts_groups', include_docs=True,
319 classes=[Podcast, PodcastGroup]).iterator()
323 def podcasts_to_dict(ids, use_cache=False):
325 if ids is None:
326 raise QueryParameterMissing('ids')
328 if not ids:
329 return dict()
332 ids = list(set(ids))
333 objs = dict()
335 cache_objs = []
336 if use_cache:
337 res = cache.get_many(ids)
338 cache_objs.extend(res.values())
339 ids = [x for x in ids if x not in res.keys()]
341 db_objs = podcasts_by_id(ids)
343 for obj in (cache_objs + db_objs):
345 # get_multi returns dict {'key': _id, 'error': 'not found'}
346 # for non-existing objects
347 if isinstance(obj, dict) and 'error' in obj:
348 _id = obj['key']
349 objs[_id] = None
350 continue
352 for i in obj.get_ids():
353 objs[i] = obj
355 if use_cache:
356 cache.set_many(dict( (obj.get_id(), obj) for obj in db_objs))
358 return objs
362 def podcasts_need_update():
363 db = get_main_database()
364 res = db.view('episodes/need_update',
365 group_level = 1,
366 reduce = True,
369 for r in res:
370 podcast_id = r['key']
371 podcast = podcast_by_id(podcast_id)
372 if podcast:
373 yield podcast
376 def subscriberdata_for_podcast(podcast_id):
378 if not podcast_id:
379 raise QueryParameterMissing('podcast_id')
381 r = PodcastSubscriberData.view('podcasts/subscriber_data',
382 key = podcast_id,
383 include_docs = True,
386 if r:
387 return r.first()
389 data = PodcastSubscriberData()
390 data.podcast = podcast_id
391 return data
395 def _wrap_podcast_group(res):
396 if res['doc']['doc_type'] == 'Podcast':
397 return Podcast.wrap(res['doc'])
398 else:
399 pg = PodcastGroup.wrap(res['doc'])
400 id = res['key']
401 return pg.get_podcast_by_id(id)
404 def _wrap_podcast_group_key1(res):
405 obj = res['doc']
406 if obj['doc_type'] == 'Podcast':
407 return Podcast.wrap(obj)
409 else:
410 pid = res[u'key'][1]
411 pg = PodcastGroup.wrap(obj)
412 podcast = pg.get_podcast_by_id(pid)
413 return podcast
417 def search_wrapper(result):
418 doc = result['doc']
419 if doc['doc_type'] == 'Podcast':
420 p = Podcast.wrap(doc)
421 elif doc['doc_type'] == 'PodcastGroup':
422 p = PodcastGroup.wrap(doc)
423 p._id = result['id']
424 return p
427 @cache_result(timeout=60*60)
428 def search(q, offset=0, num_results=20):
430 if not q:
431 return [], 0
433 db = get_main_database()
435 #FIXME current couchdbkit can't parse responses for multi-query searches
436 q = q.replace(',', '')
438 try:
439 res = db.search('podcasts/search',
440 wrapper = search_wrapper,
441 include_docs = True,
442 limit = num_results,
443 stale = 'update_after',
444 skip = offset,
445 q = q,
446 sort='\\subscribers<int>')
448 return list(res), res.total_rows
450 except RequestFailed:
451 return [], 0
454 @repeat_on_conflict(['podcast'])
455 def update_additional_data(podcast, twitter):
456 podcast.twitter = twitter
457 podcast.save()
459 # clear the whole cache until we have a better invalidation mechanism
460 cache.clear()