65ea8f4c9ea5cef112359f689562972c28bbb47c
[mygpo.git] / mygpo / data / directory.py
blob65ea8f4c9ea5cef112359f689562972c28bbb47c
2 # This file is part of my.gpodder.org.
4 # my.gpodder.org is free software: you can redistribute it and/or modify it
5 # under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or (at your
7 # option) any later version.
9 # my.gpodder.org is distributed in the hope that it will be useful, but
10 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
12 # License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
18 from mygpo.api.models import Podcast, Episode, Subscription
19 from mygpo.data.models import PodcastTag
20 from django.db.models import Sum, Count, Avg
21 from collections import defaultdict
22 import math
24 def get_source_weights():
25 categories = [x['source'] for x in PodcastTag.objects.filter().values('source').distinct()]
26 total_weights = {}
27 for c in categories:
28 tags = PodcastTag.objects.filter(source=c)
29 total = tags.aggregate(total_weight=Sum('weight'))['total_weight']
30 number = tags.aggregate(count=Count('weight'))['count']
31 avg = float(total) / number
32 total_weights[c] = 1. / avg
34 return total_weights
37 def get_weighted_tags(podcast, source_weights):
39 tags = defaultdict(int)
40 for t in PodcastTag.objects.filter(podcast=podcast):
41 tag = t.tag
43 # promote more prominent tags of a podcast, demote less-prominent
44 src_avg = PodcastTag.objects.filter(podcast=podcast, source=t.source).aggregate(weight=Avg('weight'))['weight']
46 tags[tag] = tags[tag] + t.weight / src_avg * source_weights[t.source]
48 try:
49 subscriber_factor = math.log10(podcast.subscriber_count())
50 except ValueError:
51 # 0 subscribers
52 subscriber_factor = 0
54 for t in tags.iterkeys():
55 tags[t] = tags[t] * subscriber_factor
57 return tags
60 def get_weighted_group_tags(group, source_weights):
62 podcast_tags = []
64 for p in group.podcasts():
65 podcast_tags.append(get_weighted_tags(p, source_weights))
67 tags = reduce(lambda x, y: x+y, [x.keys() for x in podcast_tags])
69 max_tags = {}
70 for tag in tags:
71 max_tags[tag] = max([x[tag] for x in podcast_tags])
73 return max_tags