#! /usr/bin/python import os import sys import string import re ## hash from symbol name to list of symbols with that name, ## where the list of symbols contains a list representing each symbol symbols = {} roots = {} def createBacklinks(name, syms): for s in syms: refs = s[2] for r in refs: ## for each ref, add ourselves as a referencer if symbols.has_key(r): targets = symbols[r] for t in targets: if name not in t[5]: t[5].append(name) def markSymbol(frm, name): if not symbols.has_key(name): print "%s referenced but was not in the objdump" syms = symbols[name] ## print ambiguous references unless they are internal noise like ".L129" if len(syms) > 1 and name[0] != '.': print "Reference to symbol '%s' from '%s' is ambiguous, marking all '%s'" % (name, frm, name) print syms for s in syms: if s[4]: pass ## already marked else: s[4] = 1 refs = s[2] for r in refs: markSymbol(s[0], r) def cmpFilename(a, b): v = cmp(a[1], b[1]) if v == 0: v = cmp(a[0], b[0]) return v def sizeAsString(bytes): if bytes < 1024: return "%d bytes" % bytes elif bytes < 1024*1024: return "%.2gK" % (bytes / 1024.0) else: return "%.2gM" % (bytes / 1024.0 / 1024.0) def printLost(): list = [] filename = None for (name, syms) in symbols.items(): s = syms[0] ## we always mark all or none for now if not s[4] and name[0] != '.': ## skip .L129 type symbols filename = s[3] if not filename: filename = "unknown file" list.append ((name, filename, s[5], s[7])) file_summaries = [] total_unused = 0 total_this_file = 0 filename = None list.sort(cmpFilename) for l in list: next_filename = l[1] if next_filename != filename: if total_this_file > 0: file_summaries.append (" %s may be unused in %s" % (sizeAsString(total_this_file), filename)) print "%s has these symbols not reachable from exported symbols:" % next_filename filename = next_filename total_this_file = 0 print " %s %s" % (l[0], sizeAsString(l[3])) total_unused = total_unused + l[3] total_this_file = total_this_file + l[3] for trace in l[2]: print " referenced from %s" % trace for fs in file_summaries: print fs print "%s total may be unused" % sizeAsString(total_unused) def main(): ## 0001aa44 <_dbus_message_get_network_data>: sym_re = re.compile ('([0-9a-f]+) <([^>]+)>:') ## 1aa49: e8 00 00 00 00 call 1aa4e <_dbus_message_get_network_data+0xa> ref_re = re.compile (' <([^>]+)> *$') ## /home/hp/dbus-cvs/dbus/dbus/dbus-message.c:139 file_re = re.compile ('^(\/[^:].*):[0-9]+$') ## _dbus_message_get_network_data+0xa funcname_re = re.compile ('([^+]+)\+[0-9a-fx]+') ## 00005410 T dbus_address_entries_free dynsym_re = re.compile ('T ([^ \n]+)$') filename = sys.argv[1] command = """ objdump -D --demangle -l %s """ % filename command = string.strip (command) print "Running: %s" % command f = os.popen(command) ## first we find which functions reference which other functions current_sym = None lines = f.readlines() for l in lines: addr = None name = None target = None file = None match = sym_re.match(l) if match: addr = match.group(1) name = match.group(2) else: match = ref_re.search(l) if match: target = match.group(1) else: match = file_re.match(l) if match: file = match.group(1) if name: ## 0 symname, 1 address, 2 references, 3 filename, 4 reached, 5 referenced-by 6 backlinked 7 approx size item = [name, addr, [], None, 0, [], 0, 0] if symbols.has_key(name): symbols[name].append(item) else: symbols[name] = [item] if current_sym: prev_addr = long(current_sym[1], 16) our_addr = long(item[1], 16) item[7] = our_addr - prev_addr if item[7] < 0: print "Computed negative size %d for %s" % (item[7], item[0]) item[7] = 0 current_sym = item elif target and current_sym: match = funcname_re.match(target) if match: ## dump the "+address" target = match.group(1) if target == current_sym[0]: pass ## skip self-references else: current_sym[2].append (target) elif file and current_sym: if file.startswith('/usr/include'): ## inlined libc thingy pass elif current_sym[0].startswith('.debug'): ## debug info pass elif current_sym[3] and current_sym[3] != file: raise Exception ("%s in both %s and %s" % (current_sym[0], current_sym[3], file)) else: current_sym[3] = file ## now we need to find the roots (exported symbols) command = "nm -D %s" % filename print "Running: %s" % command f = os.popen(command) lines = f.readlines () for l in lines: match = dynsym_re.search(l) if match: name = match.group(1) if roots.has_key(name): raise Exception("symbol %s exported twice?" % name) else: roots[name] = 1 print "%d symbols exported from this object" % len(roots) ## these functions are used only indirectly, so we don't ## notice they are used. Manually add them as roots... vtable_roots = ['unix_finalize', 'unix_handle_watch', 'unix_disconnect', 'unix_connection_set', 'unix_do_iteration', 'unix_live_messages_changed', 'unix_get_unix_fd', 'handle_client_data_cookie_sha1_mech', 'handle_client_data_external_mech', 'handle_server_data_cookie_sha1_mech', 'handle_server_data_external_mech', 'handle_client_initial_response_cookie_sha1_mech', 'handle_client_initial_response_external_mech', 'handle_client_shutdown_cookie_sha1_mech', 'handle_client_shutdown_external_mech', 'handle_server_shutdown_cookie_sha1_mech', 'handle_server_shutdown_external_mech' ] for vr in vtable_roots: if roots.has_key(vr): raise Exception("%s is already a root" % vr) roots[vr] = 1 for k in roots.keys(): markSymbol("root", k) for (k, v) in symbols.items(): createBacklinks(k, v) print """ The symbols mentioned below don't appear to be reachable starting from the dynamic exports of the library. However, this program is pretty dumb; a limitation that creates false positives is that it can only trace 'reachable' through hardcoded function calls, if a function is called only through a vtable, it won't be marked reachable (and neither will its children in the call graph). Also, the sizes mentioned are more or less completely bogus. """ print "The following are hardcoded in as vtable roots: %s" % vtable_roots printLost() if __name__ == "__main__": main()