From 5552e8a66d40120793095adb74961a9a49da7e7d Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 15 Aug 2013 10:43:06 -0600 Subject: firstlast: employer tracking So now I can ask: who were people working for when they committed their first patch? Also add options to restrict detailed tracking to a subset of the version history. --- database.py | 2 +- firstlast | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 100 insertions(+), 3 deletions(-) diff --git a/database.py b/database.py index a0cd736..ca48345 100644 --- a/database.py +++ b/database.py @@ -103,7 +103,7 @@ def LookupID (id): except KeyError: return None -def LookupStoreHacker(name, email, mapunknown = False): +def LookupStoreHacker(name, email, mapunknown = True): email = RemapEmail(email) h = LookupEmail(email) if h: # already there diff --git a/firstlast b/firstlast index 3cdd23a..e46917c 100755 --- a/firstlast +++ b/firstlast @@ -23,11 +23,100 @@ def SetupArgs(): required = True) p.add_argument('-d', '--dbdir', help = 'Where to find the config database files', required = False, default = '') + p.add_argument('-f', '--first', help = 'First version for detailed tracking', + required = False, default = '') + p.add_argument('-l', '--last', help = 'Last version for detailed tracking', + required = False, default = '') return p.parse_args() +# +# Try to track the first directory a new developer touches. +# +FirstDirs = { } + +def TrackFirstDirs(patch): + dirs = [ ] + for file in patch.files: + split = file.split('/') + if split[0] in ['arch', 'drivers', 'fs']: + track = '/'.join(split[0:2]) + else: + track = split[0] + if track not in dirs: + dirs.append(track) + for dir in dirs: + try: + FirstDirs[dir] += 1 + except KeyError: + FirstDirs[dir] = 1 + +def cmpdirs(d1, d2): + return FirstDirs[d2] - FirstDirs[d1] + +def PrintFirstDirs(): + dirs = FirstDirs.keys() + dirs.sort(cmpdirs) + for dir in dirs[:20]: + print '%5d: %s' % (FirstDirs[dir], dir) + +# +# Let's also track who they worked for. +# +FirstEmpls = { } + +def TrackFirstEmpl(name): + try: + FirstEmpls[name] += 1 + except KeyError: + FirstEmpls[name] = 1 + +def cmpempls(e1, e2): + return FirstEmpls[e2] - FirstEmpls[e1] + +def PrintFirstEmpls(): + empls = FirstEmpls.keys() + empls.sort(cmpempls) + print '\nEmployers:' + for e in empls[:20]: + print '%5d: %s' % (FirstEmpls[e], e) + +# +# Version comparison stuff. Kernel-specific, obviously. +# +def die(gripe): + sys.stderr.write(gripe + '\n') + sys.exit(1) + +def versionmap(vers): + split = vers.split('.') + if not (2 <= len(split) <= 3): + die('funky version %s' % (vers)) + if split[0] in ['v2', '2']: + return int(split[2]) + if split[0] in ['v3', '3']: + return 100 + int(split[1]) + die('Funky version %s' % (vers)) + +T_First = 0 +T_Last = 999999 + +def SetTrackingVersions(args): + global T_First, T_Last + if args.first: + T_First = versionmap(args.first) + if args.last: + T_Last = versionmap(args.last) + +def TrackingVersion(vers): + return T_First <= versionmap(vers) <= T_Last + +# +# Main program. +# args = SetupArgs() VDB = pickle.load(open(args.versiondb, 'r')) ConfigFile.ConfigFile(args.config, args.dbdir) +SetTrackingVersions(args) Firsts = { } Lasts = { } @@ -64,8 +153,16 @@ for h in database.AllHackers(): Firsts[h.firstvers].append(h) except KeyError: Firsts[h.firstvers] = [h] + # + # Track details, but only for versions we care about + # + if TrackingVersion(h.firstvers): + p = h.patches[-1] + TrackFirstDirs(p) + empl = h.emailemployer(p.email, p.date) + TrackFirstEmpl(empl.name) for v in Lasts.keys(): print v, len(Firsts[v]), len(Lasts[v]) - -database.DumpDB() +PrintFirstDirs() +PrintFirstEmpls() -- cgit v1.2.3