3 from datetime
import timedelta
5 from celery
.decorators
import periodic_task
7 from django
.db
import transaction
8 from django
.contrib
.postgres
.search
import SearchVector
10 from mygpo
.podcasts
.models
import Podcast
12 from . import INDEX_FIELDS
14 from celery
.utils
.log
import get_task_logger
15 logger
= get_task_logger(__name__
)
18 # interval in which podcast updates are scheduled
19 UPDATE_INTERVAL
= timedelta(hours
=1)
21 # Maximum number of podcasts to update in one job run
25 @periodic_task(run_every
=UPDATE_INTERVAL
)
26 def update_search_index(run_every
=UPDATE_INTERVAL
):
27 """ Schedules podcast updates that are due within ``interval`` """
29 logger
.info('Updating search index')
31 # We avoid an UPDATE, because it cannot be LIMITed, the thus might
32 # be to expensive in a single statement.
33 # We could use select_for_update(), but there is no need for consistency
34 # between multiple podcasts.
35 to_update
= Podcast
.objects\
36 .filter(search_index_uptodate
=False)\
37 .only('pk')[:MAX_INDEX
]
39 count
= to_update
.count()
40 logger
.info('Updating search index for {} podcasts'.format(count
))
42 vectors
= _get_search_vectors()
44 for podcast
in to_update
:
45 Podcast
.objects
.filter(pk
=podcast
.pk
)\
46 .update(search_vector
=vectors
,
47 search_index_uptodate
=True,
50 logger
.info('Finished indexing podcasts')
53 def _get_search_vectors():
54 """ Return the combined search vector to use for indexing podcasts """
56 for (field
, weight
) in INDEX_FIELDS
.items():
57 # index the podcast based on the stored language
58 vectors
.append(SearchVector(field
, weight
=weight
))
60 # vectors can be combined with +
61 return functools
.reduce(operator
.__add
__, vectors
)