summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Corbet <corbet@lwn.net>2013-02-08 13:52:31 -0700
committerJonathan Corbet <corbet@lwn.net>2013-02-08 13:52:31 -0700
commit9ca9663de2bba888729f988a3a9819b298157c39 (patch)
tree28dfeedcee14eede9367194049c938cfb8579508
parent7ffc7561e6593a0775e0f5b9d770aa50f8aca104 (diff)
Add a files-touched report
...useful in combination with -C to see where a specific company is working within the kernel.
-rwxr-xr-xgitdm81
-rw-r--r--reports.py15
2 files changed, 82 insertions, 14 deletions
diff --git a/gitdm b/gitdm
index d4e2e6d..d794d2f 100755
--- a/gitdm
+++ b/gitdm
@@ -15,7 +15,7 @@
import database, csvdump, ConfigFile, reports
import getopt, datetime
-import os, re, sys, rfc822, string
+import os, re, sys, rfc822, string, os.path
import logparser
from patterns import patterns
@@ -45,7 +45,7 @@ Numstat = 0
ReportByFileType = 0
ReportUnknowns = False
CompanyFilter = None
-
+FileReport = None
#
# Options:
#
@@ -55,6 +55,7 @@ CompanyFilter = None
# -C company Only consider patches from <company>
# -d Output individual developer stats
# -D Output date statistics
+# -f file Write touched-files report to <file>
# -h hfile HTML output to hfile
# -l count Maximum length for output lists
# -n Use numstats instead of generated patch from git log
@@ -73,9 +74,9 @@ def ParseOpts():
global MapUnknown, DevReports
global DateStats, AuthorSOBs, FileFilter, AkpmOverLt, DumpDB
global CFName, CSVFile, CSVPrefix,DirName, Aggregate, Numstat
- global ReportByFileType, ReportUnknowns, CompanyFilter
+ global ReportByFileType, ReportUnknowns, CompanyFilter, FileReport
- opts, rest = getopt.getopt(sys.argv[1:], 'ab:dC:c:Dh:l:no:p:r:stUuwx:yz')
+ opts, rest = getopt.getopt(sys.argv[1:], 'ab:dC:c:Df:h:l:no:p:r:stUuwx:yz')
for opt in opts:
if opt[0] == '-a':
AkpmOverLt = 1
@@ -89,6 +90,8 @@ def ParseOpts():
DevReports = 0
elif opt[0] == '-D':
DateStats = 1
+ elif opt[0] == '-f':
+ FileReport = opt[1]
elif opt[0] == '-h':
reports.SetHTMLOutput(open(opt[1], 'w'))
elif opt[0] == '-l':
@@ -120,6 +123,26 @@ def ParseOpts():
elif opt[0] == '-z':
DumpDB = 1
+#
+# Tracking for file accesses.
+#
+FileAccesses = { }
+
+def AddAccess(path):
+ try:
+ FileAccesses[path] += 1
+ except KeyError:
+ FileAccesses[path] = 1
+
+def NoteFileAccess(path):
+ if path.startswith('a/') or path.startswith('b/'):
+ path = path[2:]
+ AddAccess(path)
+ path, last = os.path.split(path)
+ while path and path not in ['a', 'b', '/']:
+ AddAccess(path)
+ path, last = os.path.split(path)
+
def LookupStoreHacker(name, email):
@@ -177,6 +200,7 @@ class patch:
self.testers = [ ]
self.reports = [ ]
self.filetypes = {}
+ self.files = [ ]
def addreviewer(self, reviewer):
self.reviews.append(reviewer)
@@ -194,6 +218,10 @@ class patch:
else:
self.filetypes[filetype] = [added, removed]
+ def addfile(self, name):
+ self.files.append(name)
+
+
def parse_numstat(line, file_filter):
"""
Receive a line of text, determine if fits a numstat line and
@@ -232,6 +260,7 @@ def grabpatch(logpatch):
p = patch(m.group(1))
ignore = (FileFilter is not None)
+ need_bline = False
for Line in logpatch[1:]:
#
# Maybe it's an author line?
@@ -307,6 +336,24 @@ def grabpatch(logpatch):
if FileFilter:
ignore = ApplyFileFilter(Line, ignore)
#
+ # If we are tracking files touched, look for a relevant line here.
+ #
+ if FileReport and not ignore:
+ m = patterns['filea'].match(Line)
+ if m:
+ file = m.group(1)
+ if file == '/dev/null':
+ need_bline = True
+ continue
+ p.addfile(m.group(1))
+ continue
+ elif need_bline:
+ m = patterns['fileb'].match(Line)
+ if m:
+ p.addfile(m.group(1))
+ need_bline = False
+ continue
+ #
# OK, maybe it's part of the diff itself.
#
if not ignore:
@@ -316,13 +363,15 @@ def grabpatch(logpatch):
if patterns['rem'].match(Line):
p.removed += 1
else:
- # Get the statistics (lines added/removes) using numstats
- # and without requiring a diff (--numstat instead -p)
- (filename, filetype, added, removed) = parse_numstat(Line, FileFilter)
- if filename:
- p.added += added
- p.removed += removed
- p.addfiletype(filetype, added, removed)
+ #
+ # Grab data in the numstat format.
+ #
+ (filename, filetype, added, removed) = parse_numstat(Line, FileFilter)
+ if filename:
+ p.added += added
+ p.removed += removed
+ p.addfiletype(filetype, added, removed)
+ p.addfile(filename)
if '@' in p.author.name:
GripeAboutAuthorName(p.author.name)
@@ -436,10 +485,15 @@ for logpatch in patches:
# Apply the company filter if it exists.
#
empl = p.author.emailemployer(p.email, p.date)
- print CompanyFilter, empl.name
if CompanyFilter and empl.name != CompanyFilter:
continue
#
+ # Now note the file accesses if need be.
+ #
+ if FileReport:
+ for file in p.files:
+ NoteFileAccess(file)
+ #
# Record some global information - but only if this patch had
# stuff which wasn't ignored.
#
@@ -511,3 +565,6 @@ reports.EmplReports(elist, TotalChanged, CSCount)
if ReportByFileType and Numstat:
reports.ReportByFileType(hlist)
+
+if FileReport:
+ reports.FileAccessReport(FileReport, FileAccesses)
diff --git a/reports.py b/reports.py
index bc1e18c..19fb7ee 100644
--- a/reports.py
+++ b/reports.py
@@ -3,8 +3,8 @@
#
# This code is part of the LWN git data miner.
#
-# Copyright 2007-12 Eklektix, Inc.
-# Copyright 2007-12 Jonathan Corbet <corbet@lwn.net>
+# Copyright 2007-13 Eklektix, Inc.
+# Copyright 2007-13 Jonathan Corbet <corbet@lwn.net>
#
# This file may be distributed under the terms of the GNU General
# Public License, version 2.
@@ -451,3 +451,14 @@ def ReportByFileType (hacker_list):
BeginReport ('General contributions by type')
for filetype, (added, removed, hackers) in total.iteritems():
print filetype, added, removed
+
+#
+# The file access report is a special beast.
+#
+def FileAccessReport(name, accesses):
+ outf = open(name, 'w')
+ files = accesses.keys()
+ files.sort()
+ for file in files:
+ outf.write('%6d %s\n' % (accesses[file], file))
+ outf.close()