summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README1
-rw-r--r--csvdump.py44
-rwxr-xr-xgitdm16
3 files changed, 56 insertions, 5 deletions
diff --git a/README b/README
index bc2d6b3..8d3922e 100644
--- a/README
+++ b/README
@@ -52,6 +52,7 @@ be:
-o file Write text output to the given file (default is stdout).
+ -p prefix Dump out the database categorized by changeset and by filetype.
-r pat Only generate statistics for changes to files whose
name matches the given regular expression.
diff --git a/csvdump.py b/csvdump.py
index 4e81954..b76a5f6 100644
--- a/csvdump.py
+++ b/csvdump.py
@@ -30,6 +30,48 @@ def AccumulatePatch (p, Aggregate):
stat = PeriodCommitHash[authdatekey]
stat.accumulate (p)
+ChangeSets = []
+FileTypes = []
+
+def store_patch(patch):
+ if not patch.merge:
+ employer = patch.author.emailemployer(patch.email, patch.date)
+ employer = employer.name.replace('"', '.').replace ('\\', '.')
+ author = patch.author.name.replace ('"', '.').replace ('\\', '.')
+ author = patch.author.name.replace ("'", '.')
+ try:
+ domain = patch.email.split('@')[1]
+ except:
+ domain = patch.email
+ ChangeSets.append([patch.commit, str(patch.date),
+ patch.email, domain, author, employer,
+ patch.added, patch.removed])
+ for (filetype, (added, removed)) in patch.filetypes.iteritems():
+ FileTypes.append([patch.commit, filetype, added, removed])
+
+
+def save_csv (prefix='data'):
+ # Dump the ChangeSets
+ if len(ChangeSets) > 0:
+ fd = open('%s-changesets.csv' % prefix, 'w')
+ writer = csv.writer (fd, quoting=csv.QUOTE_NONNUMERIC)
+ writer.writerow (['Commit', 'Date', 'Domain',
+ 'Email', 'Name', 'Affliation',
+ 'Added', 'Removed'])
+ for commit in ChangeSets:
+ writer.writerow(commit)
+
+ # Dump the file types
+ if len(FileTypes) > 0:
+ fd = open('%s-filetypes.csv' % prefix, 'w')
+ writer = csv.writer (fd, quoting=csv.QUOTE_NONNUMERIC)
+
+ writer.writerow (['Commit', 'Type', 'Added', 'Removed'])
+ for commit in FileTypes:
+ writer.writerow(commit)
+
+
+
def OutputCSV (file):
if file is None:
return
@@ -43,4 +85,4 @@ def OutputCSV (file):
writer.writerow ([author_name, stat.email, empl_name, stat.date,
stat.added, stat.removed])
-__all__ = [ 'OutputCSV' ]
+__all__ = [ 'AccumulatePatch', 'OutputCSV', 'store_patch' ]
diff --git a/gitdm b/gitdm
index 41b250c..fe5473c 100755
--- a/gitdm
+++ b/gitdm
@@ -35,6 +35,7 @@ DateStats = 0
AuthorSOBs = 1
FileFilter = None
CSVFile = None
+CSVPrefix = None
AkpmOverLt = 0
DumpDB = 0
CFName = 'gitdm.config'
@@ -54,6 +55,7 @@ Numstat = 0
# -l count Maximum length for output lists
# -n Use numstats instead of generated patch from git log
# -o file File for text output
+# -p prefix Prefix for CSV output
# -r pattern Restrict to files matching pattern
# -s Ignore author SOB lines
# -u Map unknown employers to '(Unknown)'
@@ -64,9 +66,9 @@ Numstat = 0
def ParseOpts ():
global MapUnknown, DevReports
global DateStats, AuthorSOBs, FileFilter, AkpmOverLt, DumpDB
- global CFName, CSVFile, DirName, Aggregate, Numstat
+ global CFName, CSVFile, CSVPrefix,DirName, Aggregate, Numstat
- opts, rest = getopt.getopt (sys.argv[1:], 'ab:dc:Dh:l:no:r:suwx:z')
+ opts, rest = getopt.getopt (sys.argv[1:], 'ab:dc:Dh:l:no:p:r:suwx:z')
for opt in opts:
if opt[0] == '-a':
AkpmOverLt = 1
@@ -86,6 +88,8 @@ def ParseOpts ():
Numstat = 1
elif opt[0] == '-o':
reports.SetOutput (open (opt[1], 'w'))
+ elif opt[0] == '-p':
+ CSVPrefix = opt[1]
elif opt[0] == '-r':
print 'Filter on "%s"' % (opt[1])
FileFilter = re.compile (opt[1])
@@ -418,6 +422,7 @@ for logpatch in patches:
hacker.addreport (p)
CSCount += 1
csvdump.AccumulatePatch (p, Aggregate)
+ csvdump.store_patch (p)
print >> sys.stderr, 'Grabbing changesets...done '
if DumpDB:
@@ -446,8 +451,11 @@ if TotalChanged == 0:
if DateStats:
PrintDateStats ()
-csvdump.OutputCSV (CSVFile)
-if CSVFile is not None:
+if CSVPrefix:
+ csvdump.save_csv (CSVPrefix)
+
+if CSVFile:
+ csvdump.OutputCSV (CSVFile)
CSVFile.close ()
if DevReports: