#!/usr/bin/env python3 # # This file is part of the LibreOffice project. # # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. # # Uses https://github.com/gitpython-developers/GitPython # Results published in https://wiki.documentfoundation.org/Development/RegressionHotspots # Run in LibreOffice core directory directing output to a text file. Shouldn't take more than a minute. import sys import re import git import ssl from urllib.request import urlopen, URLError from io import BytesIO def get_fixed_regression_bugs(): url = 'https://bugs.documentfoundation.org/buglist.cgi?f1=component&f2=component&f3=component&f4=component&f5=component&n1=1&n2=1&n3=1&n4=1&n5=1&o1=equals&o2=equals&o3=equals&o4=equals&o5=equals&v1=ci-infra&v2=deletionRequest&v3=FirefoxOS%20app&v4=SI-GUI&v5=WWW&columnlist=&keywords=regression%2C%20&keywords_type=allwords&limit=0&product=LibreOffice&resolution=FIXED&ctype=csv&human=0' ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE try: resp = urlopen(url, context=ctx) except URLError: sys.stderr.write('Error fetching {} -> {}\n'.format(url, URLError.errno)) sys.exit(1) bug_ids=[] for line in [raw.decode('utf-8').strip('\n') for raw in BytesIO(resp.read())][1:]: bug_ids.append(int(line)) return bug_ids def get_dir_counts(file_counts, level): dir_counts = {} for (filename, count) in file_counts.items(): fileparts = filename.split('/') if len(fileparts) > level: dirpart = '/'.join(fileparts[:level]) if dirpart in dir_counts: dir_counts[dirpart]+=count else: dir_counts[dirpart]=count return dir_counts def print_counts(counts): printorder = reversed(sorted((count, name) for (name, count) in counts.items())) # wiki page uses a widget to clamp the output while offering a button to expand print('
')
    for count in printorder:
        # we are mainly interested in the hottest spots, so skip counts below 10
        if count[0] >= 10:
            print('%5d %s' % (count[0], count[1]))
    print('
') if __name__ == '__main__': file_counts = {} excluderegex = re.compile(r'qa/|qadevOOo/|icon-themes/|extras/source/gallery/|extras/source/palettes/|extras/source/templates/|extras/source/truetype/|\.git-hooks|helpcontent2|dictionaries|translations|download\.lst|\.png|\.patch') fixed_regression_ids = get_fixed_regression_bugs() sys.stderr.write('found %d fixed regressions: %s\n' % (len(fixed_regression_ids), fixed_regression_ids)) # build a dictionary of hashes and bug IDs from all commits targeting a report in FDO/TDF Bugzilla # (first commit with fdo# aka freedesktop.org is from 1 Oct 2010) # sometimes people accidentally leave out the #, so take that into account in the regexes gitbugs = {} buglog = git.Git('.').execute(['git', 'log', '--grep=(fdo|tdf)#*', '-E', '--oneline', '--since=1.10.2010']) if buglog: for line in buglog.split('\n'): githash = line.partition(' ')[0] # the regex search will ignore any commits hit by the grep where fdo|tdf# occurred below # the first line - this is desirable as the referred bug ID should appear in the subject line bugid = re.search(r"(?:fdo|tdf)#*([0-9]+)", line) if bugid: gitbugs[githash] = int(bugid.group(1)) # create a list of bug fix hashes by filtering with the bug IDs we got from the Bugzilla query fix_hashes = [key for key, value in gitbugs.items() if value in fixed_regression_ids] for githash in fix_hashes: lognames = git.Git('.').execute(['git', 'show', githash, '--pretty=tformat:', '--name-only']) if lognames: for filename in lognames.split('\n'): if not excluderegex.search(filename): sys.stderr.write('regression fix touched file: %s\n' % filename) if filename in file_counts: file_counts[filename]+=1 else: file_counts[filename]=1 print('=== files ===\n') print_counts(file_counts) print('\n=== fourth level dirs ===\n') print_counts(get_dir_counts(file_counts, 4)) print('\n=== third level dirs ===\n') print_counts(get_dir_counts(file_counts, 3)) print('\n=== second level dirs ===\n') print_counts(get_dir_counts(file_counts, 2)) print('\n=== top level dirs ===\n') print_counts(get_dir_counts(file_counts, 1))