Fix day filter
[cds-indico.git] / indico / util / suggestions.py
blob0897f54eb3128bd9a393348346d8803ec877ff50
1 # This file is part of Indico.
2 # Copyright (C) 2002 - 2015 European Organization for Nuclear Research (CERN).
4 # Indico is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU General Public License as
6 # published by the Free Software Foundation; either version 3 of the
7 # License, or (at your option) any later version.
9 # Indico is distributed in the hope that it will be useful, but
10 # WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 # General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with Indico; if not, see <http://www.gnu.org/licenses/>.
17 from __future__ import division
18 from collections import defaultdict
19 from datetime import date, timedelta
20 from itertools import islice
21 from operator import methodcaller
23 from MaKaC.common.indexes import IndexesHolder
24 from MaKaC.common.timezoneUtils import nowutc, utc2server
25 from MaKaC.conference import ConferenceHolder
26 from indico.util.redis import avatar_links
29 def _unique(seq, get_identity=None):
30 exclude = set()
31 for item in seq:
32 identifier = get_identity(item) if get_identity else item
33 if identifier not in exclude:
34 exclude.add(identifier)
35 yield item
38 def _unique_events(seq):
39 return _unique(seq, methodcaller('getId'))
42 def _window(seq, n=2):
43 """Returns a sliding window (of width n) over data from the iterable
44 s -> (s0,s1,...s[n-1]), (s1,s2,...,sn), ...
45 """
46 it = iter(seq)
47 result = tuple(islice(it, n))
48 if len(result) == n:
49 yield result
50 for elem in it:
51 result = result[1:] + (elem,)
52 yield result
55 def _get_blocks(events, attended):
56 blocks = []
57 block = []
58 for event in events:
59 if event not in attended:
60 if block:
61 blocks.append(block)
62 block = []
63 continue
64 block.append(event)
65 if block:
66 blocks.append(block)
67 return blocks
70 def _get_category_score(user, categ, attended_events, debug=False):
71 # avoid stale SQLAlchemy object
72 if debug:
73 print repr(categ)
74 idx = IndexesHolder().getById('categoryDateAll')
75 attended_events_set = set(attended_events)
76 # We care about events in the whole timespan where the user attended some events.
77 # However, this might result in some missed events e.g. if the user was not working for
78 # a year and then returned. So we throw away old blocks (or rather adjust the start time
79 # to the start time of the newest block)
80 first_event_date = attended_events[0].getStartDate().replace(hour=0, minute=0)
81 last_event_date = attended_events[-1].getStartDate().replace(hour=0, minute=0) + timedelta(days=1)
82 blocks = _get_blocks(_unique_events(idx.iterateObjectsIn(categ.getId(), first_event_date, last_event_date)),
83 attended_events_set)
84 for a, b in _window(blocks):
85 # More than 3 months between blocks? Ignore the old block!
86 if b[0].getStartDate() - a[-1].getStartDate() > timedelta(weeks=12):
87 first_event_date = b[0].getStartDate().replace(hour=0, minute=0)
89 # Favorite categories get a higher base score
90 favorite = categ in user.favorite_categories
91 score = 1 if favorite else 0
92 if debug:
93 print '{0:+.3f} - initial'.format(score)
94 # Attendance percentage goes to the score directly. If the attendance is high chances are good that the user
95 # is either very interested in whatever goes on in the category or it's something he has to attend regularily.
96 total = sum(1 for _ in _unique_events(idx.iterateObjectsIn(categ.getId(), first_event_date, last_event_date)))
97 attended_block_event_count = sum(1 for e in attended_events_set if e.getStartDate() >= first_event_date)
98 score += attended_block_event_count / total
99 if debug:
100 print '{0:+.3f} - attendance'.format(score)
101 # If there are lots/few unattended events after the last attended one we also update the score with that
102 total_after = sum(1 for _ in _unique_events(idx.iterateObjectsIn(categ.getId(),
103 last_event_date + timedelta(days=1),
104 None)))
105 if total_after < total * 0.05:
106 score += 0.25
107 elif total_after > total * 0.25:
108 score -= 0.5
109 if debug:
110 print '{0:+.3f} - unattended new events'.format(score)
111 # Lower the score based on how long ago the last attended event was if there are no future events
112 # We start applying this modifier only if the event has been more than 40 days in the past to avoid
113 # it from happening in case of monthly events that are not created early enough.
114 days_since_last_event = (date.today() - last_event_date.date()).days
115 if days_since_last_event > 40:
116 score -= 0.025 * days_since_last_event
117 if debug:
118 print '{0:+.3f} - days since last event'.format(score)
119 # For events in the future however we raise the score
120 now_local = utc2server(nowutc(), False)
121 attending_future = [e for e in _unique_events(idx.iterateObjectsIn(categ.getId(), now_local, last_event_date))
122 if e in attended_events_set]
123 if attending_future:
124 score += 0.25 * len(attending_future)
125 if debug:
126 print '{0:+.3f} - future event count'.format(score)
127 days_to_future_event = (attending_future[0].getStartDate().date() - date.today()).days
128 score += max(0.1, -(max(0, days_to_future_event - 2) / 4) ** (1 / 3) + 2.5)
129 if debug:
130 print '{0:+.3f} - days to next future event'.format(score)
131 return score
134 def get_category_scores(user, debug=False):
135 attendance_roles = {'conference_participant', 'contribution_submission', 'abstract_submitter',
136 'registration_registrant', 'evaluation_submitter'}
137 links = avatar_links.get_links(user)
138 ch = ConferenceHolder()
139 attended = filter(None, (ch.getById(eid, True) for eid, roles in links.iteritems() if attendance_roles & roles))
140 categ_events = defaultdict(list)
141 for event in attended:
142 categ_events[event.getOwner()].append(event)
143 return dict((categ, _get_category_score(user, categ, events, debug))
144 for categ, events in categ_events.iteritems())