#!/usr/bin/python # # Create a graph of patch flow into the mainline. # # This code is part of the LWN git data miner. # # Copyright 2007-11 Eklektix, Inc. # Copyright 2007-11 Jonathan Corbet # # This file may be distributed under the terms of the GNU General # Public License, version 2. # import sys from patterns import patterns # # The various types of commit we understand. # class Commit: def __init__(self, id, parent): self.id = id self.parent = parent self.ismerge = 0 self.treepriority = 0 # # Merges are special # class Merge (Commit): def __init__(self, id, parent): Commit.__init__(self, id, parent) self.ismerge = 1 self.internal = 1 # Two branches within a repo? self.parents = [ parent ] def addparent(self, parentid): self.parents.append(parentid) def addtree(self, tree): self.tree = tree self.internal = 0 # # Trees: where the commits come from. # class Tree: def __init__(self, name, url): self.name = name self.url = url self.inputs = [ ] self.commits = [ ] def addcommit(self, id): self.commits.append(id) def addinput(self, tree): if tree not in self.inputs: self.inputs.append(tree) # print '%s -> %s' % (tree.name, self.name) Mainline = Tree('Mainline', 'git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git') KnownTrees = { Mainline.url: Mainline } def NormalizeURL(url): if url[:4] == 'git:': return url if url == '../net-2.6/': url = 'git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6' url = url.replace('master.kernel.org:', 'git://git.kernel.org') if url[-18:] == 'torvalds/linux-2.6': url += '.git' if url[:8] == '/pub/scm': url = 'git://git.kernel.org' + url return url def LookupTree(url): url = NormalizeURL(url) try: return KnownTrees[url] except KeyError: tree = Tree(url, url) KnownTrees[url] = tree return tree # # We track which tree every commit belongs to. # CommitTrees = { } class CTEntry: def __init__ (self, tree, priority, path): self.tree = tree self.priority = priority self.path = path def AddCommitTree(id, entry): # print 'add: ', id, '[', # for tree in entry.path: # print tree.name, # print ']' try: oldentry = CommitTrees[id] if entry.priority < oldentry.priority: CommitTrees[id] = entry except KeyError: CommitTrees[id] = entry def LookupCommitTree(id): try: return CommitTrees[id] except KeyError: print 'Unfound commit %s' % (id) return CTEntry (Mainline, 0, []) # # Input handling with one-line pushback. # SavedLine = None Input = sys.stdin def GetLine(): global SavedLine if SavedLine: ret = SavedLine SavedLine = None return ret return Input.readline() def SaveLine(line): global SavedLine SavedLine = line # # Pull in a commit and see what it is. # def GetCommit(): # # Skip junk up to the next commit. # while 1: line = GetLine() if not line: return None m = patterns['commit'].match(line) if m: break # # Look at the commit and see how many parents we have. # ids = m.group(1).split() if len(ids) <= 1: if len(CommitTrees.values()) > 0: print 'No-Parent commit:', ids[0] return GetCommit() print 'Did you run git with --parents?' print ids sys.exit(1) if len(ids) == 2: # Simple commit return Commit(ids[0], ids[1]) # # OK, we have a merge. # merge = Merge(ids[0], ids[1]) for id in ids[2:]: merge.addparent(id) # # We need to figure out what kind of merge it is, so read through the # descriptive text to the merge line. # while 1: line = GetLine() if not line: print 'EOF looking for merge line' return None # # Maybe it's an external merge? # m = patterns['ExtMerge'].match(line) if m: merge.addtree(LookupTree(m.group(2))) return merge # # OK, maybe it's internal # if patterns['IntMerge'].match(line) or patterns['IntMerge2'].match(line): #print 'Internal:', line[:-1] merge.internal = 1 return merge m = patterns['commit'].match(line) if m: print 'Hit next commit (%s) looking for merge line' % (m.group(1)) SaveLine(line) return GetCommit() # # Print out a tree and its inputs # def PrintTree(tree, indent = ''): print '%s%4d %s' % (indent, len(tree.commits), tree.name) for input in tree.inputs: PrintTree(input, indent + ' ') # # Let's try to build a data structure giving the patch flows. # class FlowNode: def __init__(self, tree): self.tree = tree self.inputs = { } self.commits = 0 def BuildFlowTree(): rootnode = FlowNode(Mainline) notree = Tree('[No tree]', '') for centry in CommitTrees.values(): path = centry.path if not path: path = [ notree ] FillFlowPath(path, rootnode) return rootnode def FillFlowPath(path, node): node.commits += 1 if len(path) == 0: return next, rest = path[0], path[1:] try: nextnode = node.inputs[next.name] except KeyError: nextnode = node.inputs[next.name] = FlowNode(next) return FillFlowPath(rest, nextnode) def PrintFlowTree(ftree, indent = ''): print '%s%3d %s' % (indent, ftree.commits, ftree.tree.name) inputs = ftree.inputs.values() inputs.sort(GVSort) for input in inputs: PrintFlowTree(input, indent + ' ') # # Something for graphviz # GVHeader = '''digraph "runtree" { graph [ label = "Patch flow into the mainline", concentrate = true, nodesep = 0.1, rankdir = LR ]; node [shape = polygon, sides = 4, height = 0.3 fontsize = 8]; ''' MainlineCommits = 0 def GVTree(ftree): global MainlineCommits MainlineCommits = ftree.commits gvf = open('runtree.gv', 'w') gvf.write(GVHeader) inputs = ftree.inputs.values() inputs.sort(GVSort) for input in inputs: GVPrintNode(gvf, input, 'Mainline') gvf.write('}\n') def GVNodeName(treename): sname = treename.split('/') if treename.find('kernel.org') >= 0: return '%s/%s' % (sname[-2], sname[-1]) sep = treename.find ('://') if sep > 0: return treename[sep+3:] return treename def GVSort(n1, n2): return n2.commits - n1.commits def GVPrintNode(gvf, node, parent): name = GVNodeName(node.tree.name) gvf.write ('"%s" -> "%s" [taillabel = "%d", labelfontsize = 8' % (name, parent, node.commits)) gvf.write (', arrowsize = 0.5') if MainlineCommits/node.commits < 20: gvf.write(', color = red') elif MainlineCommits/node.commits < 100: gvf.write(', color = orange'); gvf.write(']\n') inputs = node.inputs.values() if inputs: inputs.sort(GVSort) for input in inputs: GVPrintNode(gvf, input, name) # # Main code. # commit = GetCommit() ncommits = 0 while commit: ncommits += 1 entry = LookupCommitTree(commit.id) tree = entry.tree priority = entry.priority tree.addcommit(commit.id) # # For regular commits, just remember the tree involved # if not commit.ismerge: AddCommitTree(commit.parent, entry) # # For merges we have to deal with all the parents. # else: AddCommitTree(commit.parents[0], CTEntry (tree, priority, entry.path)) if commit.internal: for p in commit.parents[1:]: path = entry.path + [tree] AddCommitTree(p, CTEntry (tree, priority, entry.path)) else: for p in commit.parents[1:]: path = entry.path + [commit.tree] AddCommitTree(p, CTEntry (commit.tree, priority + 1, path)) if commit.tree is not Mainline: tree.addinput(commit.tree) commit = GetCommit() #PrintTree(Mainline) ftree = BuildFlowTree() PrintFlowTree(ftree) GVTree(ftree) print '%d commits total, %d trees' % (MainlineCommits, len (KnownTrees.keys()))