Merge branch 'master' into pg-search
[mygpo.git] / mygpo / search / tasks.py
blob93c986fe46770ab6c100b5f1291002dda903a82d
1 import functools
2 import operator
3 from datetime import timedelta
5 from celery.decorators import periodic_task
7 from django.db import transaction
8 from django.contrib.postgres.search import SearchVector
10 from mygpo.podcasts.models import Podcast
12 from . import INDEX_FIELDS
14 from celery.utils.log import get_task_logger
15 logger = get_task_logger(__name__)
18 # interval in which podcast updates are scheduled
19 UPDATE_INTERVAL = timedelta(hours=1)
21 # Maximum number of podcasts to update in one job run
22 MAX_INDEX = 1000
25 @periodic_task(run_every=UPDATE_INTERVAL)
26 def update_search_index(run_every=UPDATE_INTERVAL):
27 """ Schedules podcast updates that are due within ``interval`` """
29 logger.info('Updating search index')
31 # We avoid an UPDATE, because it cannot be LIMITed, the thus might
32 # be to expensive in a single statement.
33 # We could use select_for_update(), but there is no need for consistency
34 # between multiple podcasts.
35 to_update = Podcast.objects\
36 .filter(search_index_uptodate=False)\
37 .only('pk')[:MAX_INDEX]
39 count = to_update.count()
40 logger.info('Updating search index for {} podcasts'.format(count))
42 vectors = _get_search_vectors()
44 for podcast in to_update:
45 Podcast.objects.filter(pk=podcast.pk)\
46 .update(search_vector=vectors,
47 search_index_uptodate=True,
50 logger.info('Finished indexing podcasts')
53 def _get_search_vectors():
54 """ Return the combined search vector to use for indexing podcasts """
55 vectors = []
56 for (field, weight) in INDEX_FIELDS.items():
57 # index the podcast based on the stored language
58 vectors.append(SearchVector(field, weight=weight))
60 # vectors can be combined with +
61 return functools.reduce(operator.__add__, vectors)