1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
|
#!/usr/bin/env python3
#
# This file is part of the LibreOffice project.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
#
import sys
import re
import sh
from urllib.request import urlopen, URLError
from io import BytesIO
def get_fixed_regression_bugs():
url = 'https://bugs.libreoffice.org/buglist.cgi?bug_status=UNCONFIRMED&bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&bug_status=RESOLVED&bug_status=VERIFIED&bug_status=CLOSED&bug_status=NEEDINFO&bug_status=PLEASETEST&columnlist=&keywords=regression%2C%20&keywords_type=allwords&limit=0&list_id=354018&product=LibreOffice&query_format=advanced&resolution=FIXED&ctype=csv&human=0'
try:
resp = urlopen(url)
except URLError:
sys.stderr.write('Error fetching {}'.format(url))
sys.exit(1)
bug_ids=[]
for line in [raw.decode('utf-8').strip('\n') for raw in BytesIO(resp.read())][1:]:
bug_ids.append(int(line))
return bug_ids
def get_dir_counts(file_counts, level):
dir_counts = {}
for (filename, count) in file_counts.items():
fileparts = filename.split('/')
if len(fileparts) > level:
dirpart = '/'.join(fileparts[:level])
if dirpart in dir_counts:
dir_counts[dirpart]+=count
else:
dir_counts[dirpart]=count
return dir_counts
def print_counts(counts):
printorder = reversed(sorted((count, name) for (name, count) in counts.items()))
for count in printorder:
print('%5d %s' % (count[0], count[1]))
if __name__ == '__main__':
file_counts = {}
statregex = re.compile('^ ([^ ]+) \|')
fixed_regression_ids = get_fixed_regression_bugs()
sys.stderr.write('found %d fixed regressions: %s\n' % (len(fixed_regression_ids), fixed_regression_ids))
for bug_id in fixed_regression_ids:
sys.stderr.write('working on bug %d\n' % bug_id)
# FIXME: use --numstat instead, which does not abbreviate filenames
logstat = sh.git('--no-pager', 'log', '--grep', '[fdo|tdf]#%d' % bug_id, '--stat')
for line in logstat:
match = statregex.search(str(line))
if match and match.group(1):
filename = match.group(1)
sys.stderr.write('regression fix touched file: %s\n' % filename)
if filename in file_counts:
file_counts[filename]+=1
else:
file_counts[filename]=1
print('top level dirs:')
print_counts(get_dir_counts(file_counts, 1))
print('\nsecond level dirs:')
print_counts(get_dir_counts(file_counts, 2))
print('\nthird level dirs:')
print_counts(get_dir_counts(file_counts, 3))
print('\nfourth level dirs:')
print_counts(get_dir_counts(file_counts, 4))
print('\nfiles:')
print_counts(file_counts)
|