#!/usr/bin/python import sys import re import io definitionSet = set() definitionToSourceLocationMap = dict() touchSet = set() # exclude some stuff, mostly because they are some kind of definition of external file formats excludedSourceFiles = set([ "include/svx/msdffdef.hxx", "sw/source/filter/ww8/fields.hxx", "sw/source/filter/inc/wwstyles.hxx", "sw/inc/toxe.hxx", "sw/inc/poolfmt.hxx", "sw/inc/hintids.hxx", "vcl/inc/unx/XIM.h", ]) excludedTypes = set([ "SwVarFormat", "RES_FIELDS", "SwFillOrder", "SwIoDetect", "SwDocumentSettingsPropertyHandles", "SalGenericDataType", "SwDateSubFormat", "XclFutureRecType", "ds_status", "MediaCommand", "EmfPlusHatchStyle" ]) # clang does not always use exactly the same numbers in the type-parameter vars it generates # so I need to substitute them to ensure we can match correctly. normalizeTypeParamsRegex = re.compile(r"type-parameter-\d+-\d+") def normalizeTypeParams( line ): return normalizeTypeParamsRegex.sub("type-parameter-?-?", line) # The parsing here is designed to avoid grabbing stuff which is mixed in from gbuild. # I have not yet found a way of suppressing the gbuild output. with io.open("loplugin.unusedenumvalues.log", "rb", buffering=1024*1024) as txt: for line in txt: tokens = line.strip().split("\t") if tokens[0] == "definition:": funcInfo = (normalizeTypeParams(tokens[1]), tokens[2]) definitionSet.add(funcInfo) definitionToSourceLocationMap[funcInfo] = tokens[3] elif tokens[0] == "touch:": callInfo = (normalizeTypeParams(tokens[1]), tokens[2]) touchSet.add(callInfo) # Invert the definitionToSourceLocationMap # If we see more than one method at the same sourceLocation, it's being autogenerated as part of a template # and we should just ignore sourceLocationToDefinitionMap = {} for k, v in definitionToSourceLocationMap.iteritems(): sourceLocationToDefinitionMap[v] = sourceLocationToDefinitionMap.get(v, []) sourceLocationToDefinitionMap[v].append(k) for k, definitions in sourceLocationToDefinitionMap.iteritems(): if len(definitions) > 1: for d in definitions: definitionSet.remove(d) untouchedSet = set() for d in definitionSet: clazz = d[0] + " " + d[1] if d in touchSet: continue srcLoc = definitionToSourceLocationMap[d] srcLocWithoutLineNo = srcLoc.split(":")[0] # ignore external source code if (srcLoc.startswith("external/")): continue # ignore build folder if (srcLoc.startswith("workdir/")): continue # ignore our stable/URE/UNO api if (srcLoc.startswith("include/com/") or srcLoc.startswith("include/cppu/") or srcLoc.startswith("include/cppuhelper/") or srcLoc.startswith("include/osl/") or srcLoc.startswith("include/rtl/") or srcLoc.startswith("include/sal/") or srcLoc.startswith("include/salhelper/") or srcLoc.startswith("include/systools/") or srcLoc.startswith("include/typelib/") or srcLoc.startswith("include/uno/")): continue if srcLocWithoutLineNo in excludedSourceFiles or d[0] in excludedTypes: continue # structure definitions if srcLoc.startswith("lotuswordpro/"): continue # used in templates to find the last member of an enum if d[1] == "LAST" or d[1].endswith("_END"): continue # used to aid in alignment of enum values if d[1].endswith("FORCE_EQUAL_SIZE"): continue untouchedSet.add((clazz, srcLoc)) # sort the results using a "natural order" so sequences like [item1,item2,item10] sort nicely def natural_sort_key(s, _nsre=re.compile('([0-9]+)')): return [int(text) if text.isdigit() else text.lower() for text in re.split(_nsre, s)] # sort results by name and line number tmp1list = sorted(untouchedSet, key=lambda v: natural_sort_key(v[1])) # print out the results with open("loplugin.unusedenumvalues.report-untouched", "wt") as f: for t in tmp1list: f.write( t[1] + "\n" ) f.write( " " + t[0] + "\n" )