Add google-visualization-python project to Melange repository.
[Melange.git] / scripts / stats.py
blobabc758d38d1d4f3b633a4dc5e5243cf46ec9e7b2
1 #!/usr/bin/python2.5
3 # Copyright 2009 the Melange authors.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 """Starts an interactive shell with statistic helpers.
18 """
20 __authors__ = [
21 '"Sverre Rabbelier" <sverre@rabbelier.nl>',
25 import cPickle
26 import datetime
27 import operator
28 import sys
29 import time
31 import interactive
34 def dateFetch(queryGen, last=None, batchSize=100):
35 """Iterator that yields an entity in batches.
37 Args:
38 queryGen: should return a Query object
39 last: used to .filter() for last_modified_on
40 batchSize: how many entities to retrieve in one datastore call
42 Retrieved from http://tinyurl.com/d887ll (AppEngine cookbook).
43 """
45 from google.appengine.ext import db
47 # AppEngine will not fetch more than 1000 results
48 batchSize = min(batchSize,1000)
50 query = None
51 done = False
52 count = 0
54 while not done:
55 print count
56 query = queryGen()
57 query.order('last_modified_on')
58 if last:
59 query.filter("last_modified_on > ", last)
60 results = query.fetch(batchSize)
61 for result in results:
62 count += 1
63 yield result
64 if batchSize > len(results):
65 done = True
66 else:
67 last = results[-1].last_modified_on
70 def addKey(target, fieldname):
71 """Adds the key of the specified field.
72 """
74 result = target.copy()
75 result['%s_key' % fieldname] = target[fieldname].key().name()
76 return result
79 def getEntities(model):
80 """Returns all users as dictionary.
81 """
83 def wrapped():
84 gen = lambda: model.all()
85 it = interactive.deepFetch(gen)
87 entities = [(i.key().name(), i) for i in it]
88 return dict(entities)
90 return wrapped
93 def getProps(last=None):
94 """Returns all proposals as a list of dictionaries.
95 """
97 key_order = [
98 'link_id', 'scope_path', 'title', 'abstract', 'content',
99 'additional_info', '_mentor', 'possible_mentors', 'score',
100 'status', '_org', 'created_on', 'last_modified_on']
102 from soc.models.student_proposal import StudentProposal
104 gen = lambda: StudentProposal.all()
106 it = dateFetch(gen, last)
108 proposals = [(i.key().name(), i.toDict(key_order)) for i in it]
109 if proposals:
110 last = i.last_modified_on # last modified entity
111 else:
112 last = datetime.datetime.now()
114 return dict(proposals), last
117 def orgStats(target, orgs):
118 """Retrieves org stats.
121 from soc.logic import dicts
123 orgs = [(v.key(), v) for k, v in orgs.iteritems()]
124 orgs = dict(orgs)
126 grouped = dicts.groupby(target.values(), '_org')
128 grouped = [(orgs[k], v) for k, v in grouped.iteritems()]
129 popularity = [(k.link_id, len(v)) for k, v in grouped]
131 return dict(grouped), dict(popularity)
134 def countStudentsWithProposals():
135 """Retrieves number of Students who have submitted at least one Student Proposal.
138 proposals = getStudentProposals()
139 students = {}
141 for proposal_key in proposals.keys():
142 students[proposals[proposal_key].scope_path] = True
144 return len(students)
147 def printPopularity(popularity):
148 """Prints the popularity for the specified proposals.
151 g = operator.itemgetter(1)
153 for item in sorted(popularity.iteritems(), key=g, reverse=True):
154 print "%s: %d" % item
157 def saveValues(values, saver):
158 """Saves the specified popularities.
161 import logging
162 from google.appengine.ext import db
164 from soc.models.organization import Organization
166 def txn(key, value):
167 org = Organization.get_by_key_name(key)
168 saver(org, value)
169 org.put()
171 for key, value in sorted(values.iteritems()):
172 print key
173 db.run_in_transaction_custom_retries(10, txn, key, value)
175 print "done"
178 def addFollower(follower, proposals, add_public=True, add_private=True):
179 """Adds a user as follower to the specified proposals.
181 Args:
182 follower: the User to add as follower
183 proposals: a list with the StudnetProposals that should be subscribed to
184 add_public: whether the user is subscribed to public updates
185 add_private: whether the user should be subscribed to private updates
188 from soc.models.review_follower import ReviewFollower
190 result = []
192 for i in proposals:
193 properties = {
194 'user': follower,
195 'link_id': follower.link_id,
196 'scope': i,
197 'scope_path': i.key().name(),
198 'key_name': '%s/%s' % (i.key().name(), follower.link_id),
199 'subscribed_public': add_public,
200 'subscribed_private': add_private,
203 entity = ReviewFollower(**properties)
204 result.append(entity)
206 return result
209 def convertProposals(org):
210 """Convert all proposals for the specified organization.
212 Args:
213 org: the organization for which all proposals will be converted
216 from soc.logic.models.student_proposal import logic as proposal_logic
217 from soc.logic.models.student_project import logic as project_logic
219 proposals = proposal_logic.getProposalsToBeAcceptedForOrg(org)
221 print "accepting %d proposals, with %d slots" % (len(proposals), org.slots)
223 for proposal in proposals:
224 fields = {
225 'link_id': 't%i' % (int(time.time()*100)),
226 'scope_path': proposal.org.key().id_or_name(),
227 'scope': proposal.org,
228 'program': proposal.program,
229 'student': proposal.scope,
230 'title': proposal.title,
231 'abstract': proposal.abstract,
232 'mentor': proposal.mentor,
235 project = project_logic.updateOrCreateFromFields(fields, silent=True)
237 fields = {
238 'status':'accepted',
241 proposal_logic.updateEntityProperties(proposal, fields, silent=True)
243 fields = {
244 'status': ['new', 'pending'],
245 'org': org,
248 querygen = lambda: proposal_logic.getQueryForFields(fields)
249 proposals = [i for i in interactive.deepFetch(querygen, batchSize=10)]
251 print "rejecting %d proposals" % len(proposals)
253 fields = {
254 'status': 'rejected',
257 for proposal in proposals:
258 proposal_logic.updateEntityProperties(proposal, fields, silent=True)
261 def startSpam():
262 """Creates the job that is responsible for sending mails.
265 from soc.logic.models.job import logic as job_logic
266 from soc.logic.models.priority_group import logic as priority_logic
267 from soc.logic.models.program import logic as program_logic
269 program_entity = program_logic.getFromKeyName('google/gsoc2009')
271 priority_group = priority_logic.getGroup(priority_logic.EMAIL)
272 job_fields = {
273 'priority_group': priority_group,
274 'task_name': 'setupStudentProposalMailing',
275 'key_data': [program_entity.key()]}
277 job_logic.updateOrCreateFromFields(job_fields)
280 def startUniqueUserIdConversion():
281 """Creates the job that is responsible for adding unique user ids.
284 from soc.logic.models.job import logic as job_logic
285 from soc.logic.models.priority_group import logic as priority_logic
287 priority_group = priority_logic.getGroup(priority_logic.CONVERT)
288 job_fields = {
289 'priority_group': priority_group,
290 'task_name': 'setupUniqueUserIdAdder'}
292 job_logic.updateOrCreateFromFields(job_fields)
295 def reviveJobs(amount):
296 """Sets jobs that are stuck in 'aborted' to waiting.
298 Args:
299 amount: the amount of jobs to revive
302 from soc.models.job import Job
304 query = Job.all().filter('status', 'aborted')
305 jobs = query.fetch(amount)
307 if not jobs:
308 print "no dead jobs"
310 for job in jobs:
311 job.status = 'waiting'
312 job.put()
313 print "restarted %d" % job.key().id()
316 def deidleJobs(amount):
317 """Sets jobs that are stuck in 'started' to waiting.
319 Args:
320 amount: the amount of jobs to deidle
323 from soc.models.job import Job
325 query = Job.all().filter('status', 'started')
326 jobs = query.fetch(amount)
328 if not jobs:
329 print "no idle jobs"
331 for job in jobs:
332 job.status = 'waiting'
333 job.put()
334 print "restarted %d" % job.key().id()
337 def deleteEntities(model, step_size=25):
338 """Deletes all entities of the specified type
341 print "Deleting..."
342 count = 0
344 while True:
345 entities = model.all().fetch(step_size)
347 if not entities:
348 break
350 for entity in entities:
351 entity.delete()
353 count += step_size
355 print "deleted %d entities" % count
357 print "Done"
359 def loadPickle(name):
360 """Loads a pickle.
363 f = open(name + '.dat')
364 return cPickle.load(f)
367 def dumpPickle(target, name):
368 """Dumps a pickle.
371 f = open("%s.dat" % name, 'w')
372 cPickle.dump(target, f)
375 def acceptedStudentsCSVExport(csv_filename, program_key_name):
376 """Exports all accepted Students for particular program into CSV file.
378 # TODO(Pawel.Solyga): Add additional Program parameter to this method
379 # so we export students from different programs
380 # TODO(Pawel.SOlyga): Make it universal so it works with both GHOP
381 # and GSoC programs
383 from soc.models.student_project import StudentProject
384 from soc.models.student import Student
385 from soc.models.organization import Organization
387 getStudentProjects = getEntities(StudentProject)
388 student_projects = getStudentProjects()
389 student_projects_amount = len(student_projects)
390 print "Fetched %d Student Projects." % student_projects_amount
391 print "Fetching Student entities from Student Projects."
392 accepted_students = {}
393 student_organization = {}
394 counter = 0
395 for sp_key in student_projects.keys():
396 key = student_projects[sp_key].student.key().name()
397 accepted_students[key] = student_projects[sp_key].student
398 org_name = student_projects[sp_key].scope.name
399 student_organization[key] = org_name
400 counter += 1
401 print str(counter) + '/' + str(student_projects_amount) + ' ' + key + ' (' + org_name + ')'
402 print "All Student entities fetched."
404 students_key_order = ['link_id', 'given_name', 'surname',
405 'name_on_documents', 'email', 'res_street', 'res_city', 'res_state',
406 'res_country', 'res_postalcode', 'phone', 'ship_street', 'ship_city',
407 'ship_state', 'ship_country', 'ship_postalcode', 'birth_date',
408 'tshirt_size', 'tshirt_style', 'name', 'school_name', 'school_country',
409 'major', 'degree']
411 print "Preparing Students data for export."
412 students_data = [accepted_students[i].toDict(students_key_order) for i in accepted_students.keys()]
414 print "Adding organization name to Students data."
415 for student in students_data:
416 student['organization'] = student_organization[program_key_name + '/' + student['link_id']]
418 students_key_order.append('organization')
420 saveDataToCSV(csv_filename, students_data, students_key_order)
421 print "Accepted Students exported to %s file." % csv_filename
424 def saveDataToCSV(csv_filename, data, key_order):
425 """Saves data in order into CSV file.
427 This is a helper function used with acceptedStudentsCSVExport().
430 import csv
431 import StringIO
433 from soc.logic import dicts
435 file_handler = StringIO.StringIO()
437 writer = csv.DictWriter(file_handler, key_order, dialect='excel')
438 writer.writerow(dicts.identity(key_order))
440 # encode the data to UTF-8 to ensure compatibiliy
441 for row_dict in data:
442 for key in row_dict.keys():
443 value = row_dict[key]
444 if isinstance(value, basestring):
445 row_dict[key] = value.encode("utf-8")
446 else:
447 row_dict[key] = str(value)
448 writer.writerow(row_dict)
450 csv_data = file_handler.getvalue()
451 csv_file = open(csv_filename, 'w')
452 csv_file.write(csv_data)
453 csv_file.close()
456 def main(args):
457 """Main routine.
460 interactive.setup()
462 from soc.models.organization import Organization
463 from soc.models.user import User
464 from soc.models.student import Student
465 from soc.models.mentor import Mentor
466 from soc.models.org_admin import OrgAdmin
467 from soc.models.job import Job
468 from soc.models.student_proposal import StudentProposal
469 from soc.models.student_project import StudentProject
471 def slotSaver(org, value):
472 org.slots = value
473 def popSaver(org, value):
474 org.nr_applications = value
475 def rawSaver(org, value):
476 org.slots_calculated = value
478 context = {
479 'load': loadPickle,
480 'dump': dumpPickle,
481 'orgStats': orgStats,
482 'printPopularity': printPopularity,
483 'saveValues': saveValues,
484 'getEntities': getEntities,
485 'deleteEntities': deleteEntities,
486 'getOrgs': getEntities(Organization),
487 'getUsers': getEntities(User),
488 'getStudents': getEntities(Student),
489 'getMentors': getEntities(Mentor),
490 'getOrgAdmins': getEntities(OrgAdmin),
491 'getStudentProjects': getEntities(StudentProject),
492 'getProps': getProps,
493 'countStudentsWithProposals': countStudentsWithProposals,
494 'convertProposals': convertProposals,
495 'addFollower': addFollower,
496 'Organization': Organization,
497 'Job': Job,
498 'User': User,
499 'Student': Student,
500 'Mentor': Mentor,
501 'OrgAdmin': OrgAdmin,
502 'StudentProject': StudentProject,
503 'StudentProposal': StudentProposal,
504 'slotSaver': slotSaver,
505 'popSaver': popSaver,
506 'rawSaver': rawSaver,
507 'startSpam': startSpam,
508 'reviveJobs': reviveJobs,
509 'deidleJobs': deidleJobs,
510 'acceptedStudentsCSVExport': acceptedStudentsCSVExport,
511 'startUniqueUserIdConversion': startUniqueUserIdConversion,
514 interactive.remote(args, context)
516 if __name__ == '__main__':
517 if len(sys.argv) < 2:
518 print "Usage: %s app_id [host]" % (sys.argv[0],)
519 sys.exit(1)
521 main(sys.argv[1:])