mygpo/data/directory.py

   1 #
   2 # This file is part of my.gpodder.org.
   3 #
   4 # my.gpodder.org is free software: you can redistribute it and/or modify it
   5 # under the terms of the GNU Affero General Public License as published by
   6 # the Free Software Foundation, either version 3 of the License, or (at your
   7 # option) any later version.
   8 #
   9 # my.gpodder.org is distributed in the hope that it will be useful, but
  10 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  11 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
  12 # License for more details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
  16 #
  17
  18 from mygpo.api.models import Podcast, Episode, Subscription
  19 from mygpo.data.models import PodcastTag
  20 from django.db.models import Sum, Count, Avg
  21 from collections import defaultdict
  22 import math
  23
  24 def get_source_weights():
  25     categories = [x['source'] for x in PodcastTag.objects.filter().values('source').distinct()]
  26     total_weights = {}
  27     for c in categories:
  28         tags = PodcastTag.objects.filter(source=c)
  29         total = tags.aggregate(total_weight=Sum('weight'))['total_weight']
  30         number = tags.aggregate(count=Count('weight'))['count']
  31         avg = float(total) / number
  32         total_weights[c] = 1. / avg
  33
  34     return total_weights
  35
  36
  37 def get_weighted_tags(podcast, source_weights):
  38
  39     tags = defaultdict(int)
  40     for t in PodcastTag.objects.filter(podcast=podcast):
  41         tag = t.tag
  42
  43         # promote more prominent tags of a podcast, demote less-prominent
  44         src_avg = PodcastTag.objects.filter(podcast=podcast, source=t.source).aggregate(weight=Avg('weight'))['weight']
  45
  46         tags[tag] = tags[tag] + t.weight / src_avg * source_weights[t.source]
  47
  48     try:
  49         subscriber_factor = math.log10(podcast.subscriber_count())
  50     except ValueError:
  51         # 0 subscribers
  52         subscriber_factor = 0
  53
  54     for t in tags.iterkeys():
  55         tags[t] = tags[t] * subscriber_factor
  56
  57     return tags
  58
  59
  60 def get_weighted_group_tags(group, source_weights):
  61
  62     podcast_tags = []
  63
  64     for p in group.podcasts():
  65         podcast_tags.append(get_weighted_tags(p, source_weights))
  66
  67     tags = reduce(lambda x, y: x+y, [x.keys() for x in podcast_tags])
  68
  69     max_tags = {}
  70     for tag in tags:
  71         max_tags[tag] = max([x[tag] for x in podcast_tags])
  72
  73     return max_tags
  74
  75