move remaining queries into db module
[mygpo.git] / mygpo / maintenance / management / commands / listening-stats.py
blob2196dd85a156fb9881b22955428e15a5c244cfae
1 from math import floor
3 from django.core.management.base import BaseCommand
5 from mygpo.directory.toplist import PodcastToplist
6 from mygpo.users.models import EpisodeUserState
7 from mygpo.utils import progress
8 from mygpo.core.models import Podcast, PodcastGroup
9 from mygpo.decorators import repeat_on_conflict
10 from mygpo.maintenance.management.podcastcmd import PodcastCommand
11 from mygpo.db.couchdb.episode import episodes_for_podcast
12 from mygpo.db.couchdb.episode_state import all_podcast_episode_states
15 class Command(PodcastCommand):
16 """ Calculates the times between an episode is published, first downloaded
17 and played (per user).
19 The times are reported in quantiles of the accumulated values for each
20 podcast. The output contains two lines for each podcast, both starting with
21 the podcast's URL. One for the interval episode published - downloads, one
22 for downloads - plays.
23 """
25 def handle(self, *args, **options):
27 podcasts = self.get_podcasts(*args, **options)
29 for n, podcast in enumerate(podcasts):
30 i1, i2 = self.get_listener_stats(podcast)
32 if i1 or i2:
33 print podcast.url, ' '.join(str(q) for q in quantiles(i1, 100))
34 print podcast.url, ' '.join(str(q) for q in quantiles(i2, 100))
37 def get_listener_stats(self, podcast):
39 # times in seconds between episodes being published,
40 # and first listen events
41 i1 = []
43 # times in seconds between first download and first listen events
44 i2 = []
46 episodes = episodes_for_podcast(podcast)
47 episodes = dict((episode._id, episode.released) for episode in episodes)
49 for state in all_podcast_episode_states(podcast):
50 ep = episodes.get(state.episode, None)
52 dl = self.first_action(state.actions, 'download')
54 if dl and None not in (ep, dl.timestamp):
55 i1.append(total_seconds(dl.timestamp - ep))
57 pl = self.first_action(state.actions, 'play')
59 if None not in (dl, pl) and \
60 None not in (dl.timestamp, pl.timestamp):
61 i2.append(total_seconds(pl.timestamp - dl.timestamp))
63 return i1, i2
66 @staticmethod
67 def first_action(actions, action_type):
68 for a in actions:
69 if a.action == action_type:
70 return a
73 def quantiles(data, intervals=100):
74 """
75 http://en.wikipedia.org/wiki/Quantile
77 Divide DATA in INTERVALS intervals and return the boundaries of
78 the intervals. A median has two intervals. Thus, three values
79 will be returned: the botton of the lower half, the point that
80 divides the lower and upper half and the top of the upper half.
82 Taking the median of [1, 2, 3, 4, 5] returns [1, 3, 5].
84 | | |
85 1 2 3 4 5
86 """
88 data = sorted(data)
90 q = list()
92 if not data:
93 return q
95 q.append(data[0])
96 for i in xrange(intervals - 1):
97 i += 1
98 q.append(data[int(floor(float(i * len(data)) / intervals))])
99 q.append(data[-1])
101 return q
104 def total_seconds(td):
105 """ Returns the total amount of seconds of the timedelta
107 timedelta.total_seconds() is new in Python 2.7
108 http://docs.python.org/library/datetime.html#datetime.timedelta.total_seconds """
109 return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6