3 from datetime
import timedelta
5 from celery
.decorators
import periodic_task
6 from django_db_geventpool
.utils
import close_connection
8 from django
.db
import transaction
9 from django
.contrib
.postgres
.search
import SearchVector
11 from mygpo
.podcasts
.models
import Podcast
13 from . import INDEX_FIELDS
15 from celery
.utils
.log
import get_task_logger
17 logger
= get_task_logger(__name__
)
20 # interval in which podcast updates are scheduled
21 UPDATE_INTERVAL
= timedelta(hours
=1)
23 # Maximum number of podcasts to update in one job run
27 @periodic_task(run_every
=UPDATE_INTERVAL
)
29 def update_search_index(run_every
=UPDATE_INTERVAL
):
30 """ Schedules podcast updates that are due within ``interval`` """
32 logger
.info("Updating search index")
34 # We avoid an UPDATE, because it cannot be LIMITed, the thus might
35 # be to expensive in a single statement.
36 # We could use select_for_update(), but there is no need for consistency
37 # between multiple podcasts.
38 to_update
= Podcast
.objects
.filter(search_index_uptodate
=False).only("pk")[
42 count
= to_update
.count()
43 logger
.info("Updating search index for {} podcasts".format(count
))
45 vectors
= _get_search_vectors()
47 for podcast
in to_update
:
48 Podcast
.objects
.filter(pk
=podcast
.pk
).update(
49 search_vector
=vectors
, search_index_uptodate
=True
52 logger
.info("Finished indexing podcasts")
55 def _get_search_vectors():
56 """ Return the combined search vector to use for indexing podcasts """
58 for (field
, weight
) in INDEX_FIELDS
.items():
59 # index the podcast based on the stored language
60 vectors
.append(SearchVector(field
, weight
=weight
))
62 # vectors can be combined with +
63 return functools
.reduce(operator
.__add
__, vectors
)