diff options
Diffstat (limited to 'bin/find-can-be-private-symbols.py')
-rwxr-xr-x | bin/find-can-be-private-symbols.py | 160 |
1 files changed, 87 insertions, 73 deletions
diff --git a/bin/find-can-be-private-symbols.py b/bin/find-can-be-private-symbols.py index a5e2459a4a01..da28310196f7 100755 --- a/bin/find-can-be-private-symbols.py +++ b/bin/find-can-be-private-symbols.py @@ -17,28 +17,27 @@ # import subprocess -import sys import re -exported_symbols = set() -imported_symbols = set() -# standalone functions that are exported but not imported -unused_function_exports = set() -classes_with_exported_symbols = set() -classes_with_imported_symbols = set() +exported_symbols1 = set() +imported_symbols1 = set() +exported_symbols2 = set() # decoded +imported_symbols2 = set() # decoded # all names that exist in the source code -all_source_names = set() +#all_source_names = set() -subprocess_find_all_source_names = subprocess.Popen("git grep -oh -P '\\b\\w\\w\\w+\\b' -- '*.h*' | sort -u", stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) -with subprocess_find_all_source_names.stdout as txt: - for line in txt: - line = line.strip() - all_source_names.add(line) -subprocess_find_all_source_names.terminate() +#subprocess_find_all_source_names = subprocess.Popen("git grep -oh -P '\\b\\w\\w\\w+\\b' -- '*.h*' | sort -u", +# stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) +#with subprocess_find_all_source_names.stdout as txt: +# for line in txt: +# line = line.strip() +# all_source_names.add(line) +#subprocess_find_all_source_names.terminate() # find all our shared libs -subprocess_find = subprocess.Popen("find ./instdir -name *.so && find ./workdir/LinkTarget/CppunitTest -name *.so", stdout=subprocess.PIPE, shell=True) +subprocess_find = subprocess.Popen("find ./instdir -name *.so && find ./workdir/LinkTarget/CppunitTest -name *.so", + stdout=subprocess.PIPE, shell=True) with subprocess_find.stdout as txt: for line in txt: sharedlib = line.strip() @@ -51,8 +50,8 @@ with subprocess_find.stdout as txt: for line2_bytes in txt2: line2 = line2_bytes.strip().decode("utf-8") if line_regex.match(line2): - sym = line2.split(" ")[2] - exported_symbols.add(sym) + sym = line2.split(" ")[2].strip() + exported_symbols1.add(sym) subprocess_nm.terminate() # look for imported symbols subprocess_objdump = subprocess.Popen(b"objdump -T " + sharedlib, stdout=subprocess.PIPE, shell=True) @@ -66,10 +65,10 @@ with subprocess_find.stdout as txt: # 0000000000000000 DF *UND* 0000000000000000 _ZN16FilterConfigItem10WriteInt32ERKN3rtl8OUStringEi for line2_bytes in txt2: line2 = line2_bytes.strip().decode("utf-8") - if not("*UND*"in line2): continue + if "*UND*" not in line2: continue tokens = line2.split(" ") - sym = tokens[len(tokens)-1] - imported_symbols.add(sym) + sym = tokens[len(tokens)-1].strip() + imported_symbols1.add(sym) subprocess_objdump.terminate() subprocess_find.terminate() @@ -86,52 +85,82 @@ with subprocess_find.stdout as txt: for line2_bytes in txt2: line2 = line2_bytes.strip().decode("utf-8") sym = line2.split(" ")[1] - imported_symbols.add(sym) + imported_symbols1.add(sym) subprocess_find.terminate() -diff = exported_symbols - imported_symbols -print("exported = " + str(len(exported_symbols))) -print("imported = " + str(len(imported_symbols))) -print("diff = " + str(len(diff))) +#progress = 0; +#for sym in sorted(imported_symbols - exported_symbols): +# progress += 1 +# if (progress % 128 == 0): print( str(int(progress * 100 / len(diff))) + "%") +# filtered_sym = subprocess.check_output(["c++filt", sym]).strip().decode("utf-8") +# if filtered_sym.startswith("non-virtual thunk to "): filtered_sym = filtered_sym[21:] +# elif filtered_sym.startswith("virtual thunk to "): filtered_sym = filtered_sym[17:] +# print("Symbol imported but not exported? " + filtered_sym) -progress = 0; -for sym in sorted(exported_symbols): +# Now we have to symbolize before comparing because sometimes (due to thunks) two +# different encoded names symbolize to the same method/func name +# +progress = 0 +progress_max_len = len(imported_symbols1) + len(exported_symbols1) +for sym in imported_symbols1: progress += 1 - if (progress % 128 == 0): print( str(int(progress * 100 / len(exported_symbols))) + "%") + if (progress % 128 == 0): print( str(int(progress * 100 / progress_max_len)) + "%") filtered_sym = subprocess.check_output(["c++filt", sym]).strip().decode("utf-8") if filtered_sym.startswith("non-virtual thunk to "): filtered_sym = filtered_sym[21:] elif filtered_sym.startswith("virtual thunk to "): filtered_sym = filtered_sym[17:] - i = filtered_sym.find("(") - i = filtered_sym.rfind("::", 0, i) - if i != -1: - classname = filtered_sym[:i] - # find classes where all of the exported symbols are not imported - classes_with_exported_symbols.add(classname) - else: - func = filtered_sym - # find standalone functions which are exported but not imported - if not(sym in imported_symbols): unused_function_exports.add(func) - -progress = 0; -for sym in sorted(imported_symbols): + imported_symbols2.add(filtered_sym) +progress = 0 +for sym in exported_symbols1: progress += 1 - if (progress % 128 == 0): print( str(int(progress * 100 / len(imported_symbols))) + "%") + if (progress % 128 == 0): print( str(int(progress * 100 / progress_max_len)) + "%") filtered_sym = subprocess.check_output(["c++filt", sym]).strip().decode("utf-8") if filtered_sym.startswith("non-virtual thunk to "): filtered_sym = filtered_sym[21:] elif filtered_sym.startswith("virtual thunk to "): filtered_sym = filtered_sym[17:] - i = filtered_sym.find("(") - i = filtered_sym.rfind("::", 0, i) - if i != -1: - classname = filtered_sym[:i] - classes_with_imported_symbols.add(classname) + exported_symbols2.add(filtered_sym) + +unused_exports = exported_symbols2 - imported_symbols2 +print("exported = " + str(len(exported_symbols2))) +print("imported = " + str(len(imported_symbols2))) +print("unused_exports = " + str(len(unused_exports))) + +#def extractFunctionNameFromSignature(sym): +# i = sym.find("(") +# if i == -1: return sym +# return sym[:i] + +# for each class, count how many symbols will become hidden if we mark the class as hidden +can_be_hidden_count = dict() +for sym in exported_symbols2: + i = sym.rfind("::") + if i == -1: continue + clz = sym[:i] + if clz in can_be_hidden_count: + can_be_hidden_count[clz] = can_be_hidden_count[clz] + 1 + else: + can_be_hidden_count[clz] = 1 +for sym in imported_symbols2: + i = sym.rfind("::") + if i == -1: continue + clz = sym[:i] + if clz in can_be_hidden_count: + can_be_hidden_count[clz] = can_be_hidden_count[clz] - 1 + else: + can_be_hidden_count[clz] = -1 +# convert to list, and sort the results in descending order +can_be_hidden_list = list() +for clz in can_be_hidden_count: + cnt = can_be_hidden_count[clz] + if cnt > 0: + can_be_hidden_list.append((cnt, clz)) +can_be_hidden_list.sort(reverse=True) +with open("bin/find-can-be-private-symbols.classes.results", "wt") as f: + for i in can_be_hidden_list: + if i[0] < 10: break + f.write(str(i[0]) + " " + i[1] + "\n") -def extractFunctionNameFromSignature(sym): - i = sym.find("(") - if i == -1: return sym - return sym[:i] with open("bin/find-can-be-private-symbols.functions.results", "wt") as f: - for sym in sorted(unused_function_exports): + for sym in sorted(unused_exports): # Filter out most of the noise. # No idea where these are coming from, but not our code. if sym.startswith("CERT_"): continue @@ -164,7 +193,6 @@ with open("bin/find-can-be-private-symbols.functions.results", "wt") as f: elif sym.startswith("SSL_"): continue elif sym.startswith("VFY_"): continue elif sym.startswith("_PR_"): continue - elif sym.startswith("_"): continue elif sym.startswith("ber_"): continue elif sym.startswith("bfp_"): continue elif sym.startswith("ldap_"): continue @@ -174,14 +202,16 @@ with open("bin/find-can-be-private-symbols.functions.results", "wt") as f: elif sym.startswith("pq"): continue elif sym.startswith("presolve_"): continue elif sym.startswith("sqlite3_"): continue + elif sym.startswith("libepubgen::"): continue + elif sym.startswith("lucene::"): continue + elif sym.startswith("Hunspell::"): continue + elif sym.startswith("sk_"): continue + elif sym.startswith("_Z"): continue # dynamically loaded elif sym.endswith("get_implementation"): continue elif sym.endswith("component_getFactory"): continue - elif sym == "CreateDialogFactory": continue elif sym == "CreateUnoWrapper": continue - elif sym == "CreateWindow": continue elif sym == "ExportDOC": continue - elif sym == "ExportPPT": continue elif sym == "ExportRTF": continue elif sym == "GetSaveWarningOfMSVBAStorage_ww8": continue elif sym == "GetSpecialCharsForEdit": continue @@ -214,22 +244,6 @@ with open("bin/find-can-be-private-symbols.functions.results", "wt") as f: elif sym.startswith("typereg_"): continue elif sym.startswith("uno_"): continue # remove things we found that do not exist in our source code, they're not ours - if not(extractFunctionNameFromSignature(sym) in all_source_names): continue + #if not(extractFunctionNameFromSignature(sym) in all_source_names): continue f.write(sym + "\n") -with open("bin/find-can-be-private-symbols.classes.results", "wt") as f: - for sym in sorted(classes_with_exported_symbols - classes_with_imported_symbols): - # externals - if sym.startswith("libcdr"): continue - elif sym.startswith("libabw"): continue - elif sym.startswith("libebook"): continue - elif sym.startswith("libepubgen"): continue - elif sym.startswith("libfreehand"): continue - elif sym.startswith("libmspub"): continue - elif sym.startswith("libpagemaker"): continue - elif sym.startswith("libqxp"): continue - elif sym.startswith("libvisio"): continue - elif sym.startswith("libzmf"): continue - elif sym.startswith("lucene::"): continue - elif sym.startswith("Sk"): continue - f.write(sym + "\n") |