diff options
author | Jonathan Corbet <corbet@lwn.net> | 2013-02-08 13:52:31 -0700 |
---|---|---|
committer | Jonathan Corbet <corbet@lwn.net> | 2013-02-08 13:52:31 -0700 |
commit | 9ca9663de2bba888729f988a3a9819b298157c39 (patch) | |
tree | 28dfeedcee14eede9367194049c938cfb8579508 | |
parent | 7ffc7561e6593a0775e0f5b9d770aa50f8aca104 (diff) |
Add a files-touched report
...useful in combination with -C to see where a specific company is working
within the kernel.
-rwxr-xr-x | gitdm | 81 | ||||
-rw-r--r-- | reports.py | 15 |
2 files changed, 82 insertions, 14 deletions
@@ -15,7 +15,7 @@ import database, csvdump, ConfigFile, reports import getopt, datetime -import os, re, sys, rfc822, string +import os, re, sys, rfc822, string, os.path import logparser from patterns import patterns @@ -45,7 +45,7 @@ Numstat = 0 ReportByFileType = 0 ReportUnknowns = False CompanyFilter = None - +FileReport = None # # Options: # @@ -55,6 +55,7 @@ CompanyFilter = None # -C company Only consider patches from <company> # -d Output individual developer stats # -D Output date statistics +# -f file Write touched-files report to <file> # -h hfile HTML output to hfile # -l count Maximum length for output lists # -n Use numstats instead of generated patch from git log @@ -73,9 +74,9 @@ def ParseOpts(): global MapUnknown, DevReports global DateStats, AuthorSOBs, FileFilter, AkpmOverLt, DumpDB global CFName, CSVFile, CSVPrefix,DirName, Aggregate, Numstat - global ReportByFileType, ReportUnknowns, CompanyFilter + global ReportByFileType, ReportUnknowns, CompanyFilter, FileReport - opts, rest = getopt.getopt(sys.argv[1:], 'ab:dC:c:Dh:l:no:p:r:stUuwx:yz') + opts, rest = getopt.getopt(sys.argv[1:], 'ab:dC:c:Df:h:l:no:p:r:stUuwx:yz') for opt in opts: if opt[0] == '-a': AkpmOverLt = 1 @@ -89,6 +90,8 @@ def ParseOpts(): DevReports = 0 elif opt[0] == '-D': DateStats = 1 + elif opt[0] == '-f': + FileReport = opt[1] elif opt[0] == '-h': reports.SetHTMLOutput(open(opt[1], 'w')) elif opt[0] == '-l': @@ -120,6 +123,26 @@ def ParseOpts(): elif opt[0] == '-z': DumpDB = 1 +# +# Tracking for file accesses. +# +FileAccesses = { } + +def AddAccess(path): + try: + FileAccesses[path] += 1 + except KeyError: + FileAccesses[path] = 1 + +def NoteFileAccess(path): + if path.startswith('a/') or path.startswith('b/'): + path = path[2:] + AddAccess(path) + path, last = os.path.split(path) + while path and path not in ['a', 'b', '/']: + AddAccess(path) + path, last = os.path.split(path) + def LookupStoreHacker(name, email): @@ -177,6 +200,7 @@ class patch: self.testers = [ ] self.reports = [ ] self.filetypes = {} + self.files = [ ] def addreviewer(self, reviewer): self.reviews.append(reviewer) @@ -194,6 +218,10 @@ class patch: else: self.filetypes[filetype] = [added, removed] + def addfile(self, name): + self.files.append(name) + + def parse_numstat(line, file_filter): """ Receive a line of text, determine if fits a numstat line and @@ -232,6 +260,7 @@ def grabpatch(logpatch): p = patch(m.group(1)) ignore = (FileFilter is not None) + need_bline = False for Line in logpatch[1:]: # # Maybe it's an author line? @@ -307,6 +336,24 @@ def grabpatch(logpatch): if FileFilter: ignore = ApplyFileFilter(Line, ignore) # + # If we are tracking files touched, look for a relevant line here. + # + if FileReport and not ignore: + m = patterns['filea'].match(Line) + if m: + file = m.group(1) + if file == '/dev/null': + need_bline = True + continue + p.addfile(m.group(1)) + continue + elif need_bline: + m = patterns['fileb'].match(Line) + if m: + p.addfile(m.group(1)) + need_bline = False + continue + # # OK, maybe it's part of the diff itself. # if not ignore: @@ -316,13 +363,15 @@ def grabpatch(logpatch): if patterns['rem'].match(Line): p.removed += 1 else: - # Get the statistics (lines added/removes) using numstats - # and without requiring a diff (--numstat instead -p) - (filename, filetype, added, removed) = parse_numstat(Line, FileFilter) - if filename: - p.added += added - p.removed += removed - p.addfiletype(filetype, added, removed) + # + # Grab data in the numstat format. + # + (filename, filetype, added, removed) = parse_numstat(Line, FileFilter) + if filename: + p.added += added + p.removed += removed + p.addfiletype(filetype, added, removed) + p.addfile(filename) if '@' in p.author.name: GripeAboutAuthorName(p.author.name) @@ -436,10 +485,15 @@ for logpatch in patches: # Apply the company filter if it exists. # empl = p.author.emailemployer(p.email, p.date) - print CompanyFilter, empl.name if CompanyFilter and empl.name != CompanyFilter: continue # + # Now note the file accesses if need be. + # + if FileReport: + for file in p.files: + NoteFileAccess(file) + # # Record some global information - but only if this patch had # stuff which wasn't ignored. # @@ -511,3 +565,6 @@ reports.EmplReports(elist, TotalChanged, CSCount) if ReportByFileType and Numstat: reports.ReportByFileType(hlist) + +if FileReport: + reports.FileAccessReport(FileReport, FileAccesses) @@ -3,8 +3,8 @@ # # This code is part of the LWN git data miner. # -# Copyright 2007-12 Eklektix, Inc. -# Copyright 2007-12 Jonathan Corbet <corbet@lwn.net> +# Copyright 2007-13 Eklektix, Inc. +# Copyright 2007-13 Jonathan Corbet <corbet@lwn.net> # # This file may be distributed under the terms of the GNU General # Public License, version 2. @@ -451,3 +451,14 @@ def ReportByFileType (hacker_list): BeginReport ('General contributions by type') for filetype, (added, removed, hackers) in total.iteritems(): print filetype, added, removed + +# +# The file access report is a special beast. +# +def FileAccessReport(name, accesses): + outf = open(name, 'w') + files = accesses.keys() + files.sort() + for file in files: + outf.write('%6d %s\n' % (accesses[file], file)) + outf.close() |