get Flattr / licensed podcasts from PostgreSQL
[mygpo.git] / mygpo / db / couchdb / podcast.py
blobaab11e18e92ff0b44dfd30b5e9a881e7a20b171d
1 from hashlib import sha1
2 from datetime import datetime
4 from restkit import RequestFailed
5 from couchdbkit import MultipleResultsFound
7 from django.core.cache import cache
9 from mygpo.core.models import Podcast, PodcastGroup, PodcastSubscriberData
10 from mygpo.core.signals import incomplete_obj
11 from mygpo.decorators import repeat_on_conflict
12 from mygpo.cache import cache_result
13 from mygpo.utils import get_timestamp
14 from mygpo.db.couchdb import get_main_database, get_userdata_database, \
15 lucene_query
16 from mygpo.db import QueryParameterMissing
17 from mygpo.db.couchdb import get_main_database, get_single_result
18 from mygpo.db.couchdb.utils import multi_request_view, is_couchdb_id
20 import logging
21 logger = logging.getLogger(__name__)
24 def podcast_slugs(base_slug):
25 res = Podcast.view('podcasts/by_slug',
26 startkey = [base_slug, None],
27 endkey = [base_slug + 'ZZZZZ', None],
28 wrap_doc = False,
30 return [r['key'][0] for r in res]
33 @cache_result(timeout=60*60)
34 def podcasts_for_tag(tag):
35 """ Returns the podcasts with the current tag.
37 Some podcasts might be returned twice """
39 if not tag:
40 raise QueryParameterMissing('tag')
42 res = multi_request_view(Podcast, 'podcasts/by_tag',
43 wrap = False,
44 startkey = [tag, None],
45 endkey = [tag, {}],
46 reduce = True,
47 group = True,
48 group_level = 2
51 for r in res:
52 yield (r['key'][1], r['value'])
54 udb = get_userdata_database()
55 res = multi_request_view(udb, 'usertags/podcasts',
56 wrap = False,
57 startkey = [tag, None],
58 endkey = [tag, {}],
59 reduce = True,
60 group = True,
61 group_level = 2
64 for r in res:
65 yield (r['key'][1], r['value'])
68 def podcast_by_id_uncached(podcast_id, current_id=False):
70 if not podcast_id:
71 raise QueryParameterMissing('podcast_id')
73 db = get_main_database()
74 podcast = get_single_result(db, 'podcasts/by_id',
75 key = podcast_id,
76 include_docs = True,
77 wrapper = _wrap_podcast_group,
80 if not podcast:
81 return None
83 if podcast.needs_update:
84 incomplete_obj.send_robust(sender=podcast)
86 return podcast
89 podcast_by_id = cache_result(timeout=60*60)(podcast_by_id_uncached)
92 @cache_result(timeout=60*60)
93 def podcastgroup_by_id(group_id):
95 if not group_id:
96 raise QueryParameterMissing('group_id')
98 pg = PodcastGroup.get(group_id)
100 if pg.needs_update:
101 incomplete_obj.send_robust(sender=pg)
103 return pg
107 @cache_result(timeout=60*60)
108 def podcast_for_slug(slug):
110 if not slug:
111 raise QueryParameterMissing('slug')
113 db = get_main_database()
114 obj = get_single_result(db, 'podcasts/by_slug',
115 startkey = [slug, None],
116 endkey = [slug, {}],
117 include_docs = True,
118 wrapper = _wrap_podcast_group_key1,
121 if not obj:
122 return None
124 if obj.needs_update:
125 incomplete_obj.send_robust(sender=obj)
127 return obj
130 @cache_result(timeout=60*60)
131 def podcast_for_slug_id(slug_id):
132 """ Returns the Podcast for either an CouchDB-ID for a Slug """
134 if is_couchdb_id(slug_id):
135 return podcast_by_id(slug_id)
136 else:
137 return podcast_for_slug(slug_id)
140 @cache_result(timeout=60*60)
141 def podcastgroup_for_slug_id(slug_id):
142 """ Returns the Podcast for either an CouchDB-ID for a Slug """
144 if not slug_id:
145 raise QueryParameterMissing('slug_id')
147 if is_couchdb_id(slug_id):
148 return podcastgroup_by_id(slug_id)
150 else:
151 #TODO: implement
152 return PodcastGroup.for_slug(slug_id)
156 def podcasts_by_id(ids):
158 if ids is None:
159 raise QueryParameterMissing('ids')
161 if not ids:
162 return []
164 r = Podcast.view('podcasts/by_id',
165 keys = ids,
166 include_docs = True,
167 wrap_doc = False
170 podcasts = map(_wrap_podcast_group, r)
172 for podcast in podcasts:
173 if podcast.needs_update:
174 incomplete_obj.send_robust(sender=podcast)
176 return podcasts
179 def podcasts_groups_by_id(ids):
180 """ gets podcast groups and top-level podcasts for the given ids """
182 if ids is None:
183 raise QueryParameterMissing('ids')
185 if not ids:
186 return
188 db = get_main_database()
189 res = db.view('podcasts/podcasts_groups',
190 keys = ids,
191 include_docs = True,
194 for r in res:
195 obj = _wrap_pg(r)
197 if not obj:
198 yield None
199 continue
201 if obj.needs_update:
202 incomplete_obj.send_robust(sender=obj)
204 yield obj
208 @cache_result(timeout=60*60)
209 def podcast_for_oldid(oldid):
211 if oldid is None:
212 raise QueryParameterMissing('oldid')
214 db = get_main_database()
215 podcast = get_single_result(db, 'podcasts/by_oldid',
216 key = long(oldid),
217 include_docs = True,
218 wrapper = _wrap_podcast_group_key1,
221 if not podcast:
222 return None
224 if podcast.needs_update:
225 incomplete_obj.send_robust(sender=podcast)
227 return podcast
230 @cache_result(timeout=60*60)
231 def podcastgroup_for_oldid(oldid):
233 if not oldid:
234 raise QueryParameterMissing('oldid')
236 db = get_main_database()
237 pg = get_single_result(db, 'podcasts/groups_by_oldid',
238 key = long(oldid),
239 include_docs = True,
240 schema = PodcastGroup,
243 if not pg:
244 return None
246 if pg.needs_update:
247 incomplete_obj.send_robust(sender=pg)
249 return pg
252 def podcast_for_url(url, create=False):
254 if not url:
255 raise QueryParameterMissing('url')
257 key = 'podcast-by-url-%s' % sha1(url.encode('utf-8')).hexdigest()
259 podcast = cache.get(key)
260 if podcast:
261 return podcast
263 db = get_main_database()
264 podcast_group = get_single_result(db, 'podcasts/by_url',
265 key = url,
266 include_docs = True,
267 wrapper = _wrap_pg,
270 if podcast_group:
271 podcast = podcast_group.get_podcast_by_url(url)
273 if podcast.needs_update:
274 incomplete_obj.send_robust(sender=podcast)
275 else:
276 cache.set(key, podcast)
278 return podcast
280 if create:
281 podcast = Podcast()
282 podcast.created_timestamp = get_timestamp(datetime.utcnow())
283 podcast.urls = [url]
284 podcast.save()
285 incomplete_obj.send_robust(sender=podcast)
286 return podcast
288 return None
291 def _wrap_pg(doc):
293 doc = doc['doc']
295 if not doc:
296 return None
298 if doc['doc_type'] == 'Podcast':
299 return Podcast.wrap(doc)
301 elif doc['doc_type'] == 'PodcastGroup':
302 return PodcastGroup.wrap(doc)
304 else:
305 logger.error('received unknown doc_type "%s"', doc['doc_type'])
308 def podcast_duplicates_for_url(url):
310 if not url:
311 raise QueryParameterMissing('url')
313 _view = 'podcasts/by_url'
314 r = Podcast.view(_view,
315 key = url,
316 classes = [Podcast, PodcastGroup],
317 include_docs = True,
320 for pg in r:
321 yield pg.get_podcast_by_url(url)
324 def podcasts_by_last_update(limit=100):
325 res = Podcast.view('podcasts/by_last_update',
326 include_docs = True,
327 stale = 'update_after',
328 wrap_doc = False,
329 limit = limit,
332 # TODO: this method is only used for retrieving podcasts to update;
333 # should we really send 'incomplete_obj' signals here?
335 return map(_wrap_podcast_group_key1, res)
338 def podcasts_by_next_update(limit=100):
339 """ Returns the podcasts that are due for an update next """
341 res = Podcast.view('podcasts/by_next_update',
342 include_docs = True,
343 stale = 'update_after',
344 limit = limit,
345 classes = [Podcast, PodcastGroup],
348 # TODO: this method is only used for retrieving podcasts to update;
349 # should we really send 'incomplete_obj' signals here?
351 return list(res)
354 def podcasts_to_dict(ids, use_cache=False):
356 if ids is None:
357 raise QueryParameterMissing('ids')
359 if not ids:
360 return dict()
363 ids = list(set(ids))
364 objs = dict()
366 cache_objs = []
367 if use_cache:
368 res = cache.get_many(ids)
369 cache_objs.extend(res.values())
370 ids = [x for x in ids if x not in res.keys()]
372 db_objs = podcasts_by_id(ids)
374 for obj in (cache_objs + db_objs):
376 # get_multi returns dict {'key': _id, 'error': 'not found'}
377 # for non-existing objects
378 if isinstance(obj, dict) and 'error' in obj:
379 _id = obj['key']
380 objs[_id] = None
381 continue
383 for i in obj.get_ids():
384 objs[i] = obj
386 if use_cache:
387 cache.set_many(dict( (obj.get_id(), obj) for obj in db_objs))
389 return objs
393 def podcasts_need_update(limit=100):
394 db = get_main_database()
395 res = db.view('episodes/need_update',
396 group_level = 1,
397 reduce = True,
398 limit = limit,
401 # TODO: this method is only used for retrieving podcasts to update;
402 # should we really send 'incomplete_obj' signals here?
404 for r in res:
405 podcast_id = r['key']
406 podcast = podcast_by_id(podcast_id)
407 if podcast:
408 yield podcast
411 def subscriberdata_for_podcast(podcast_id):
413 if not podcast_id:
414 raise QueryParameterMissing('podcast_id')
416 db = get_main_database()
417 data = get_single_result(db, 'podcasts/subscriber_data',
418 key = podcast_id,
419 include_docs = True,
420 schema = PodcastSubscriberData,
423 if not data:
424 data = PodcastSubscriberData()
425 data.podcast = podcast_id
427 return data
431 def _wrap_podcast_group(res):
432 if res['doc']['doc_type'] == 'Podcast':
433 return Podcast.wrap(res['doc'])
434 else:
435 pg = PodcastGroup.wrap(res['doc'])
436 id = res['key']
437 return pg.get_podcast_by_id(id)
440 def _wrap_podcast_group_key1(res):
441 obj = res['doc']
442 if obj['doc_type'] == 'Podcast':
443 return Podcast.wrap(obj)
445 else:
446 pid = res[u'key'][1]
447 pg = PodcastGroup.wrap(obj)
448 podcast = pg.get_podcast_by_id(pid)
449 return podcast
453 def search_wrapper(result):
454 doc = result['doc']
455 if doc['doc_type'] == 'Podcast':
456 p = Podcast.wrap(doc)
457 elif doc['doc_type'] == 'PodcastGroup':
458 p = PodcastGroup.wrap(doc)
459 p._id = result['id']
460 return p
463 @cache_result(timeout=60*60)
464 def search(q, offset=0, num_results=20):
466 if not q:
467 return [], 0
469 db = get_main_database()
471 FIELDS = ['title', 'description']
472 q = lucene_query(FIELDS, q)
474 try:
475 res = db.search('podcasts/search',
476 wrapper = search_wrapper,
477 include_docs = True,
478 limit = num_results,
479 stale = 'update_after',
480 skip = offset,
481 q = q,
484 podcasts = list(res)
486 for podcast in podcasts:
487 if podcast.needs_update:
488 incomplete_obj.send_robust(sender=podcast)
490 return podcasts, res.total_rows
492 except RequestFailed:
493 return [], 0
496 def reload_podcast(podcast):
497 return podcast_by_id_uncached(podcast.get_id())
500 @repeat_on_conflict(['podcast'], reload_f=reload_podcast)
501 def update_additional_data(podcast, twitter):
502 podcast.twitter = twitter
503 podcast.save()
505 # clear the whole cache until we have a better invalidation mechanism
506 cache.clear()
509 @repeat_on_conflict(['podcast'], reload_f=reload_podcast)
510 def update_related_podcasts(podcast, related):
511 if podcast.related_podcasts == related:
512 return
514 podcast.related_podcasts = related
515 podcast.save()
518 @repeat_on_conflict(['podcast'], reload_f=reload_podcast)
519 def delete_podcast(podcast):
520 podcast.delete()