From 00c9f82cf51d909ac250222f74e932ff5eda17f0 Mon Sep 17 00:00:00 2001 From: =?utf8?q?C=C3=A9dric=20Bosdonnat?= Date: Sun, 7 Feb 2016 00:19:08 +0100 Subject: [PATCH] Export hackers individual raw data as CSV The data exported for each hacker are the date of the first commit, the date of the last commit, the commits count. These help distinguish regular core hackers from one-shot contributors. --- README | 3 +++ csvdump.py | 16 +++++++++++++++- database.py | 8 +++++++- gitdm | 11 ++++++++++- 4 files changed, 35 insertions(+), 3 deletions(-) diff --git a/README b/README index dab372e..e60285d 100644 --- a/README +++ b/README @@ -46,6 +46,9 @@ be: -h file Generate HTML output to the given file + -H file Export individual developer raw data as CSV. These data could be + used to evaluate the fidelity of developers. + -l num Only list the top entries in each report. -n Use --numstat instead of generated patches to get the statistics. diff --git a/csvdump.py b/csvdump.py index c3f6b5a..f0f5e6b 100644 --- a/csvdump.py +++ b/csvdump.py @@ -89,4 +89,18 @@ def OutputCSV (file): writer.writerow ([author_name, stat.email, empl_name, stat.date, stat.added, stat.removed, stat.changesets]) -__all__ = [ 'AccumulatePatch', 'OutputCSV', 'store_patch' ] +def OutputHackersCSV (file, hlist): + if file is None: + return + file.write ("Name,Last affiliation,Activity Start,Activity End,Commits\n") + for hacker in hlist: + if len(hacker.patches) > 0: + file.write ("\"%s\",%s,%s,%s,%d\n"%(hacker.name, \ + hacker.emailemployer (None, hacker.activity_end).name, \ + hacker.activity_start, hacker.activity_end, \ + len(hacker.patches))) + +__all__ = [ 'AccumulatePatch', 'OutputCSV', 'OutputHackersCSV', 'store_patch' ] + + + diff --git a/database.py b/database.py index cb242c1..bf13227 100644 --- a/database.py +++ b/database.py @@ -25,6 +25,8 @@ class Hacker: self.tested = [ ] self.reports = [ ] self.testcred = self.repcred = 0 + self.activity_start = datetime.date.max + self.activity_end = datetime.date.min self.versions = [ ] def addemail (self, email, elist): @@ -34,7 +36,7 @@ class Hacker: def emailemployer (self, email, date): for i in range (0, len (self.email)): - if self.email[i] == email: + if (email is None) or (self.email[i] == email): for edate, empl in self.employer[i]: if edate > date: return empl @@ -46,6 +48,10 @@ class Hacker: self.removed += patch.removed self.changed += max(patch.added, patch.removed) self.patches.append (patch) + if patch.date < self.activity_start: + self.activity_start = patch.date + if patch.date > self.activity_end: + self.activity_end= patch.date # # Note that the author is represented in this release. diff --git a/gitdm b/gitdm index c2b20cd..61318ad 100755 --- a/gitdm +++ b/gitdm @@ -36,6 +36,7 @@ AuthorSOBs = 1 FileFilter = None CSVFile = None CSVPrefix = None +HackersCSV = None AkpmOverLt = 0 DumpDB = 0 CFName = 'gitdm.config' @@ -57,6 +58,7 @@ FileReport = None # -D Output date statistics # -f file Write touched-files report to # -h hfile HTML output to hfile +# -H file Export individual developer raw data as CSV # -l count Maximum length for output lists # -n Use numstats instead of generated patch from git log # -o file File for text output @@ -75,8 +77,9 @@ def ParseOpts(): global DateStats, AuthorSOBs, FileFilter, AkpmOverLt, DumpDB global CFName, CSVFile, CSVPrefix,DirName, Aggregate, Numstat global ReportByFileType, ReportUnknowns, CompanyFilter, FileReport + global HackersCSV - opts, rest = getopt.getopt(sys.argv[1:], 'ab:dC:c:Df:h:l:no:p:r:stUuwx:yz') + opts, rest = getopt.getopt(sys.argv[1:], 'ab:dC:c:Df:H:h:l:no:p:r:stUuwx:yz') for opt in opts: if opt[0] == '-a': AkpmOverLt = 1 @@ -94,6 +97,8 @@ def ParseOpts(): FileReport = opt[1] elif opt[0] == '-h': reports.SetHTMLOutput(open(opt[1], 'w')) + elif opt[0] == '-H': + HackersCSV = open (opt[1], 'w') elif opt[0] == '-l': reports.SetMaxList(int(opt[1])) elif opt[0] == '-n': @@ -551,6 +556,10 @@ if TotalChanged == 0: if DateStats: PrintDateStats() +if HackersCSV: + csvdump.OutputHackersCSV (HackersCSV, hlist); + HackersCSV.close () + if CSVPrefix: csvdump.save_csv(CSVPrefix) -- 2.11.4.GIT