Add acceptedStudentsCSVExport function to stats.py script.
[Melange.git] / scripts / stats.py
blob246b09d3a19e3a468d751491c294e18a4fe5bfc3
1 #!/usr/bin/python2.5
3 # Copyright 2009 the Melange authors.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 """Starts an interactive shell with statistic helpers.
18 """
20 __authors__ = [
21 '"Sverre Rabbelier" <sverre@rabbelier.nl>',
25 import cPickle
26 import datetime
27 import operator
28 import sys
29 import time
31 import interactive
34 def dateFetch(queryGen, last=None, batchSize=100):
35 """Iterator that yields an entity in batches.
37 Args:
38 queryGen: should return a Query object
39 last: used to .filter() for last_modified_on
40 batchSize: how many entities to retrieve in one datastore call
42 Retrieved from http://tinyurl.com/d887ll (AppEngine cookbook).
43 """
45 from google.appengine.ext import db
47 # AppEngine will not fetch more than 1000 results
48 batchSize = min(batchSize,1000)
50 query = None
51 done = False
52 count = 0
54 while not done:
55 print count
56 query = queryGen()
57 query.order('last_modified_on')
58 if last:
59 query.filter("last_modified_on > ", last)
60 results = query.fetch(batchSize)
61 for result in results:
62 count += 1
63 yield result
64 if batchSize > len(results):
65 done = True
66 else:
67 last = results[-1].last_modified_on
70 def addKey(target, fieldname):
71 """Adds the key of the specified field.
72 """
74 result = target.copy()
75 result['%s_key' % fieldname] = target[fieldname].key().name()
76 return result
79 def getEntities(model):
80 """Returns all users as dictionary.
81 """
83 def wrapped():
84 gen = lambda: model.all()
85 it = interactive.deepFetch(gen)
87 entities = [(i.key().name(), i) for i in it]
88 return dict(entities)
90 return wrapped
93 def getProps(last=None):
94 """Returns all proposals as a list of dictionaries.
95 """
97 key_order = [
98 'link_id', 'scope_path', 'title', 'abstract', 'content',
99 'additional_info', '_mentor', 'possible_mentors', 'score',
100 'status', '_org', 'created_on', 'last_modified_on']
102 from soc.models.student_proposal import StudentProposal
104 gen = lambda: StudentProposal.all()
106 it = dateFetch(gen, last)
108 proposals = [(i.key().name(), i.toDict(key_order)) for i in it]
109 if proposals:
110 last = i.last_modified_on # last modified entity
111 else:
112 last = datetime.datetime.now()
114 return dict(proposals), last
117 def orgStats(target, orgs):
118 """Retrieves org stats.
121 from soc.logic import dicts
123 orgs = [(v.key(), v) for k, v in orgs.iteritems()]
124 orgs = dict(orgs)
126 grouped = dicts.groupby(target.values(), '_org')
128 grouped = [(orgs[k], v) for k, v in grouped.iteritems()]
129 popularity = [(k.link_id, len(v)) for k, v in grouped]
131 return dict(grouped), dict(popularity)
134 def countStudentsWithProposals():
135 """Retrieves number of Students who have submitted at least one Student Proposal.
138 proposals = getStudentProposals()
139 students = {}
141 for proposal_key in proposals.keys():
142 students[proposals[proposal_key].scope_path] = True
144 return len(students)
147 def printPopularity(popularity):
148 """Prints the popularity for the specified proposals.
151 g = operator.itemgetter(1)
153 for item in sorted(popularity.iteritems(), key=g, reverse=True):
154 print "%s: %d" % item
157 def saveValues(values, saver):
158 """Saves the specified popularities.
161 import logging
162 from google.appengine.ext import db
164 from soc.models.organization import Organization
166 def txn(key, value):
167 org = Organization.get_by_key_name(key)
168 saver(org, value)
169 org.put()
171 for key, value in sorted(values.iteritems()):
172 print key
173 db.run_in_transaction_custom_retries(10, txn, key, value)
175 print "done"
178 def addFollower(follower, proposals, add_public=True, add_private=True):
179 """Adds a user as follower to the specified proposals.
181 Args:
182 follower: the User to add as follower
183 proposals: a list with the StudnetProposals that should be subscribed to
184 add_public: whether the user is subscribed to public updates
185 add_private: whether the user should be subscribed to private updates
188 from soc.models.review_follower import ReviewFollower
190 result = []
192 for i in proposals:
193 properties = {
194 'user': follower,
195 'link_id': follower.link_id,
196 'scope': i,
197 'scope_path': i.key().name(),
198 'key_name': '%s/%s' % (i.key().name(), follower.link_id),
199 'subscribed_public': add_public,
200 'subscribed_private': add_private,
203 entity = ReviewFollower(**properties)
204 result.append(entity)
206 return result
209 def convertProposals(org):
210 """Convert all proposals for the specified organization.
212 Args:
213 org: the organization for which all proposals will be converted
216 from soc.logic.models.student_proposal import logic as proposal_logic
217 from soc.logic.models.student_project import logic as project_logic
219 proposals = proposal_logic.getProposalsToBeAcceptedForOrg(org)
221 print "accepting %d proposals, with %d slots" % (len(proposals), org.slots)
223 for proposal in proposals:
224 fields = {
225 'link_id': 't%i' % (int(time.time()*100)),
226 'scope_path': proposal.org.key().id_or_name(),
227 'scope': proposal.org,
228 'program': proposal.program,
229 'student': proposal.scope,
230 'title': proposal.title,
231 'abstract': proposal.abstract,
232 'mentor': proposal.mentor,
235 project = project_logic.updateOrCreateFromFields(fields, silent=True)
237 fields = {
238 'status':'accepted',
241 proposal_logic.updateEntityProperties(proposal, fields, silent=True)
243 fields = {
244 'status': ['new', 'pending'],
245 'org': org,
248 querygen = lambda: proposal_logic.getQueryForFields(fields)
249 proposals = [i for i in interactive.deepFetch(querygen, batchSize=10)]
251 print "rejecting %d proposals" % len(proposals)
253 fields = {
254 'status': 'rejected',
257 for proposal in proposals:
258 proposal_logic.updateEntityProperties(proposal, fields, silent=True)
261 def startSpam():
262 """Creates the job that is responsible for sending mails.
265 from soc.logic.models.job import logic as job_logic
266 from soc.logic.models.priority_group import logic as priority_logic
267 from soc.logic.models.program import logic as program_logic
269 program_entity = program_logic.getFromKeyName('google/gsoc2009')
271 priority_group = priority_logic.getGroup(priority_logic.EMAIL)
272 job_fields = {
273 'priority_group': priority_group,
274 'task_name': 'setupStudentProposalMailing',
275 'key_data': [program_entity.key()]}
277 job_logic.updateOrCreateFromFields(job_fields)
280 def reviveJobs(amount):
281 """Sets jobs that are stuck in 'aborted' to waiting.
283 Args:
284 amount: the amount of jobs to revive
287 from soc.models.job import Job
289 query = Job.all().filter('status', 'aborted')
290 jobs = query.fetch(amount)
292 if not jobs:
293 print "no dead jobs"
295 for job in jobs:
296 job.status = 'waiting'
297 job.put()
298 print "restarted %d" % job.key().id()
301 def deidleJobs(amount):
302 """Sets jobs that are stuck in 'started' to waiting.
304 Args:
305 amount: the amount of jobs to deidle
308 from soc.models.job import Job
310 query = Job.all().filter('status', 'started')
311 jobs = query.fetch(amount)
313 if not jobs:
314 print "no idle jobs"
316 for job in jobs:
317 job.status = 'waiting'
318 job.put()
319 print "restarted %d" % job.key().id()
322 def deleteEntities(model, step_size=25):
323 """Deletes all entities of the specified type
326 print "Deleting..."
327 count = 0
329 while True:
330 entities = model.all().fetch(step_size)
332 if not entities:
333 break
335 for entity in entities:
336 entity.delete()
338 count += step_size
340 print "deleted %d entities" % count
342 print "Done"
344 def loadPickle(name):
345 """Loads a pickle.
348 f = open(name + '.dat')
349 return cPickle.load(f)
352 def dumpPickle(target, name):
353 """Dumps a pickle.
356 f = open("%s.dat" % name, 'w')
357 cPickle.dump(target, f)
360 def acceptedStudentsCSVExport(csv_filename, program_key_name):
361 """Exports all accepted Students for particular program into CSV file.
363 # TODO(Pawel.Solyga): Add additional Program parameter to this method
364 # so we export students from different programs
365 # TODO(Pawel.SOlyga): Make it universal so it works with both GHOP
366 # and GSoC programs
368 from soc.models.student_project import StudentProject
369 from soc.models.student import Student
370 from soc.models.organization import Organization
372 getStudentProjects = getEntities(StudentProject)
373 student_projects = getStudentProjects()
374 student_projects_amount = len(student_projects)
375 print "Fetched %d Student Projects." % student_projects_amount
376 print "Fetching Student entities from Student Projects."
377 accepted_students = {}
378 student_organization = {}
379 counter = 0
380 for sp_key in student_projects.keys():
381 key = student_projects[sp_key].student.key().name()
382 accepted_students[key] = student_projects[sp_key].student
383 org_name = student_projects[sp_key].scope.name
384 student_organization[key] = org_name
385 counter += 1
386 print str(counter) + '/' + str(student_projects_amount) + ' ' + key + ' (' + org_name + ')'
387 print "All Student entities fetched."
389 students_key_order = ['link_id', 'given_name', 'surname',
390 'name_on_documents', 'email', 'res_street', 'res_city', 'res_state',
391 'res_country', 'res_postalcode', 'phone', 'ship_street', 'ship_city',
392 'ship_state', 'ship_country', 'ship_postalcode', 'birth_date',
393 'tshirt_size', 'tshirt_style', 'name', 'school_name', 'school_country',
394 'major', 'degree']
396 print "Preparing Students data for export."
397 students_data = [accepted_students[i].toDict(students_key_order) for i in accepted_students.keys()]
399 print "Adding organization name to Students data."
400 for student in students_data:
401 student['organization'] = student_organization[program_key_name + '/' + student['link_id']]
403 students_key_order.append('organization')
405 saveDataToCSV(csv_filename, students_data, students_key_order)
406 print "Accepted Students exported to %s file." % csv_filename
409 def saveDataToCSV(csv_filename, data, key_order):
410 """Saves data in order into CSV file.
412 This is a helper function used with acceptedStudentsCSVExport().
415 import csv
416 import StringIO
418 from soc.logic import dicts
420 file_handler = StringIO.StringIO()
422 writer = csv.DictWriter(file_handler, key_order, dialect='excel')
423 writer.writerow(dicts.identity(key_order))
425 # encode the data to UTF-8 to ensure compatibiliy
426 for row_dict in data:
427 for key in row_dict.keys():
428 value = row_dict[key]
429 if isinstance(value, basestring):
430 row_dict[key] = value.encode("utf-8")
431 else:
432 row_dict[key] = str(value)
433 writer.writerow(row_dict)
435 csv_data = file_handler.getvalue()
436 csv_file = open(csv_filename, 'w')
437 csv_file.write(csv_data)
438 csv_file.close()
441 def main(args):
442 """Main routine.
445 interactive.setup()
447 from soc.models.organization import Organization
448 from soc.models.user import User
449 from soc.models.student import Student
450 from soc.models.mentor import Mentor
451 from soc.models.org_admin import OrgAdmin
452 from soc.models.job import Job
453 from soc.models.student_proposal import StudentProposal
454 from soc.models.student_project import StudentProject
456 def slotSaver(org, value):
457 org.slots = value
458 def popSaver(org, value):
459 org.nr_applications = value
460 def rawSaver(org, value):
461 org.slots_calculated = value
463 context = {
464 'load': loadPickle,
465 'dump': dumpPickle,
466 'orgStats': orgStats,
467 'printPopularity': printPopularity,
468 'saveValues': saveValues,
469 'getEntities': getEntities,
470 'deleteEntities': deleteEntities,
471 'getOrgs': getEntities(Organization),
472 'getUsers': getEntities(User),
473 'getStudents': getEntities(Student),
474 'getMentors': getEntities(Mentor),
475 'getOrgAdmins': getEntities(OrgAdmin),
476 'getStudentProjects': getEntities(StudentProject),
477 'getProps': getProps,
478 'countStudentsWithProposals': countStudentsWithProposals,
479 'convertProposals': convertProposals,
480 'addFollower': addFollower,
481 'Organization': Organization,
482 'Job': Job,
483 'User': User,
484 'Student': Student,
485 'Mentor': Mentor,
486 'OrgAdmin': OrgAdmin,
487 'StudentProject': StudentProject,
488 'StudentProposal': StudentProposal,
489 'slotSaver': slotSaver,
490 'popSaver': popSaver,
491 'rawSaver': rawSaver,
492 'startSpam': startSpam,
493 'reviveJobs': reviveJobs,
494 'deidleJobs': deidleJobs,
495 'acceptedStudentsCSVExport': acceptedStudentsCSVExport,
498 interactive.remote(args, context)
500 if __name__ == '__main__':
501 if len(sys.argv) < 2:
502 print "Usage: %s app_id [host]" % (sys.argv[0],)
503 sys.exit(1)
505 main(sys.argv[1:])