diff options
Diffstat (limited to 'csvdump.py')
-rw-r--r-- | csvdump.py | 88 |
1 files changed, 88 insertions, 0 deletions
diff --git a/csvdump.py b/csvdump.py new file mode 100644 index 0000000..b76a5f6 --- /dev/null +++ b/csvdump.py @@ -0,0 +1,88 @@ +# +# aggregate per-month statistics for people +# +import sys, datetime +import csv + +class CSVStat: + def __init__ (self, name, email, employer, date): + self.name = name + self.email = email + self.employer = employer + self.added = self.removed = 0 + self.date = date + def accumulate (self, p): + self.added = self.added + p.added + self.removed = self.removed + p.removed + +PeriodCommitHash = { } + +def AccumulatePatch (p, Aggregate): + date = "%.2d-%.2d-01"%(p.date.year, p.date.month) + if (Aggregate == 'week'): + date = "%.2d-%.2d"%(p.date.isocalendar()[0], p.date.isocalendar()[1]) + authdatekey = "%s-%s"%(p.author.name, date) + if authdatekey not in PeriodCommitHash: + empl = p.author.emailemployer (p.email, p.date) + stat = CSVStat (p.author.name, p.email, empl, date) + PeriodCommitHash[authdatekey] = stat + else: + stat = PeriodCommitHash[authdatekey] + stat.accumulate (p) + +ChangeSets = [] +FileTypes = [] + +def store_patch(patch): + if not patch.merge: + employer = patch.author.emailemployer(patch.email, patch.date) + employer = employer.name.replace('"', '.').replace ('\\', '.') + author = patch.author.name.replace ('"', '.').replace ('\\', '.') + author = patch.author.name.replace ("'", '.') + try: + domain = patch.email.split('@')[1] + except: + domain = patch.email + ChangeSets.append([patch.commit, str(patch.date), + patch.email, domain, author, employer, + patch.added, patch.removed]) + for (filetype, (added, removed)) in patch.filetypes.iteritems(): + FileTypes.append([patch.commit, filetype, added, removed]) + + +def save_csv (prefix='data'): + # Dump the ChangeSets + if len(ChangeSets) > 0: + fd = open('%s-changesets.csv' % prefix, 'w') + writer = csv.writer (fd, quoting=csv.QUOTE_NONNUMERIC) + writer.writerow (['Commit', 'Date', 'Domain', + 'Email', 'Name', 'Affliation', + 'Added', 'Removed']) + for commit in ChangeSets: + writer.writerow(commit) + + # Dump the file types + if len(FileTypes) > 0: + fd = open('%s-filetypes.csv' % prefix, 'w') + writer = csv.writer (fd, quoting=csv.QUOTE_NONNUMERIC) + + writer.writerow (['Commit', 'Type', 'Added', 'Removed']) + for commit in FileTypes: + writer.writerow(commit) + + + +def OutputCSV (file): + if file is None: + return + writer = csv.writer (file, quoting=csv.QUOTE_NONNUMERIC) + writer.writerow (['Name', 'Email', 'Affliation', 'Date', + 'Added', 'Removed']) + for date, stat in PeriodCommitHash.items(): + # sanitise names " is common and \" sometimes too + empl_name = stat.employer.name.replace ('"', '.').replace ('\\', '.') + author_name = stat.name.replace ('"', '.').replace ('\\', '.') + writer.writerow ([author_name, stat.email, empl_name, stat.date, + stat.added, stat.removed]) + +__all__ = [ 'AccumulatePatch', 'OutputCSV', 'store_patch' ] |