Bump responses from 0.12.1 to 0.13.3
[mygpo.git] / mygpo / search / tasks.py
blob7fb635730ef5f31f525e5abb21bdc4e7f1d8b96d
1 import functools
2 import operator
3 from datetime import timedelta
5 from celery.decorators import periodic_task
6 from django_db_geventpool.utils import close_connection
8 from django.db import transaction
9 from django.contrib.postgres.search import SearchVector
11 from mygpo.podcasts.models import Podcast
13 from . import INDEX_FIELDS
15 from celery.utils.log import get_task_logger
17 logger = get_task_logger(__name__)
20 # interval in which podcast updates are scheduled
21 UPDATE_INTERVAL = timedelta(hours=1)
23 # Maximum number of podcasts to update in one job run
24 MAX_INDEX = 1000
27 @periodic_task(run_every=UPDATE_INTERVAL)
28 @close_connection
29 def update_search_index(run_every=UPDATE_INTERVAL):
30 """ Schedules podcast updates that are due within ``interval`` """
32 logger.info("Updating search index")
34 # We avoid an UPDATE, because it cannot be LIMITed, the thus might
35 # be to expensive in a single statement.
36 # We could use select_for_update(), but there is no need for consistency
37 # between multiple podcasts.
38 to_update = Podcast.objects.filter(search_index_uptodate=False).only("pk")[
39 :MAX_INDEX
42 count = to_update.count()
43 logger.info("Updating search index for {} podcasts".format(count))
45 vectors = _get_search_vectors()
47 for podcast in to_update:
48 Podcast.objects.filter(pk=podcast.pk).update(
49 search_vector=vectors, search_index_uptodate=True
52 logger.info("Finished indexing podcasts")
55 def _get_search_vectors():
56 """ Return the combined search vector to use for indexing podcasts """
57 vectors = []
58 for (field, weight) in INDEX_FIELDS.items():
59 # index the podcast based on the stored language
60 vectors.append(SearchVector(field, weight=weight))
62 # vectors can be combined with +
63 return functools.reduce(operator.__add__, vectors)