242 lines
7.8 KiB
Python
Executable file
242 lines
7.8 KiB
Python
Executable file
#! /usr/bin/python
|
|
|
|
import os
|
|
import sys
|
|
import string
|
|
import re
|
|
|
|
## hash from symbol name to list of symbols with that name,
|
|
## where the list of symbols contains a list representing each symbol
|
|
symbols = {}
|
|
roots = {}
|
|
|
|
def createBacklinks(name, syms):
|
|
for s in syms:
|
|
refs = s[2]
|
|
for r in refs:
|
|
## for each ref, add ourselves as a referencer
|
|
if symbols.has_key(r):
|
|
targets = symbols[r]
|
|
for t in targets:
|
|
if name not in t[5]:
|
|
t[5].append(name)
|
|
|
|
def markSymbol(frm, name):
|
|
if not symbols.has_key(name):
|
|
print "%s referenced but was not in the objdump"
|
|
syms = symbols[name]
|
|
## print ambiguous references unless they are internal noise like ".L129"
|
|
if len(syms) > 1 and name[0] != '.':
|
|
print "Reference to symbol '%s' from '%s' is ambiguous, marking all '%s'" % (name, frm, name)
|
|
print syms
|
|
for s in syms:
|
|
if s[4]:
|
|
pass ## already marked
|
|
else:
|
|
s[4] = 1
|
|
refs = s[2]
|
|
for r in refs:
|
|
markSymbol(s[0], r)
|
|
|
|
def cmpFilename(a, b):
|
|
v = cmp(a[1], b[1])
|
|
if v == 0:
|
|
v = cmp(a[0], b[0])
|
|
return v
|
|
|
|
def sizeAsString(bytes):
|
|
if bytes < 1024:
|
|
return "%d bytes" % bytes
|
|
elif bytes < 1024*1024:
|
|
return "%.2gK" % (bytes / 1024.0)
|
|
else:
|
|
return "%.2gM" % (bytes / 1024.0 / 1024.0)
|
|
|
|
def printLost():
|
|
list = []
|
|
filename = None
|
|
for (name, syms) in symbols.items():
|
|
s = syms[0] ## we always mark all or none for now
|
|
if not s[4] and name[0] != '.': ## skip .L129 type symbols
|
|
filename = s[3]
|
|
if not filename:
|
|
filename = "unknown file"
|
|
list.append ((name, filename, s[5], s[7]))
|
|
|
|
file_summaries = []
|
|
total_unused = 0
|
|
total_this_file = 0
|
|
filename = None
|
|
list.sort(cmpFilename)
|
|
for l in list:
|
|
next_filename = l[1]
|
|
if next_filename != filename:
|
|
if total_this_file > 0:
|
|
file_summaries.append (" %s may be unused in %s" % (sizeAsString(total_this_file), filename))
|
|
print "%s has these symbols not reachable from exported symbols:" % next_filename
|
|
filename = next_filename
|
|
total_this_file = 0
|
|
print " %s %s" % (l[0], sizeAsString(l[3]))
|
|
total_unused = total_unused + l[3]
|
|
total_this_file = total_this_file + l[3]
|
|
for trace in l[2]:
|
|
print " referenced from %s" % trace
|
|
|
|
for fs in file_summaries:
|
|
print fs
|
|
print "%s total may be unused" % sizeAsString(total_unused)
|
|
|
|
def main():
|
|
|
|
## 0001aa44 <_dbus_message_get_network_data>:
|
|
sym_re = re.compile ('([0-9a-f]+) <([^>]+)>:')
|
|
## 1aa49: e8 00 00 00 00 call 1aa4e <_dbus_message_get_network_data+0xa>
|
|
ref_re = re.compile (' <([^>]+)> *$')
|
|
## /home/hp/dbus-cvs/dbus/dbus/dbus-message.c:139
|
|
file_re = re.compile ('^(\/[^:].*):[0-9]+$')
|
|
## _dbus_message_get_network_data+0xa
|
|
funcname_re = re.compile ('([^+]+)\+[0-9a-fx]+')
|
|
## 00005410 T dbus_address_entries_free
|
|
dynsym_re = re.compile ('T ([^ \n]+)$')
|
|
|
|
filename = sys.argv[1]
|
|
|
|
command = """
|
|
objdump -D --demangle -l %s
|
|
""" % filename
|
|
|
|
command = string.strip (command)
|
|
|
|
print "Running: %s" % command
|
|
|
|
f = os.popen(command)
|
|
|
|
## first we find which functions reference which other functions
|
|
current_sym = None
|
|
lines = f.readlines()
|
|
for l in lines:
|
|
addr = None
|
|
name = None
|
|
target = None
|
|
file = None
|
|
|
|
match = sym_re.match(l)
|
|
if match:
|
|
addr = match.group(1)
|
|
name = match.group(2)
|
|
else:
|
|
match = ref_re.search(l)
|
|
if match:
|
|
target = match.group(1)
|
|
else:
|
|
match = file_re.match(l)
|
|
if match:
|
|
file = match.group(1)
|
|
|
|
if name:
|
|
## 0 symname, 1 address, 2 references, 3 filename, 4 reached, 5 referenced-by 6 backlinked 7 approx size
|
|
item = [name, addr, [], None, 0, [], 0, 0]
|
|
if symbols.has_key(name):
|
|
symbols[name].append(item)
|
|
else:
|
|
symbols[name] = [item]
|
|
|
|
if current_sym:
|
|
prev_addr = long(current_sym[1], 16)
|
|
our_addr = long(item[1], 16)
|
|
item[7] = our_addr - prev_addr
|
|
if item[7] < 0:
|
|
print "Computed negative size %d for %s" % (item[7], item[0])
|
|
item[7] = 0
|
|
|
|
current_sym = item
|
|
|
|
elif target and current_sym:
|
|
match = funcname_re.match(target)
|
|
if match:
|
|
## dump the "+address"
|
|
target = match.group(1)
|
|
if target == current_sym[0]:
|
|
pass ## skip self-references
|
|
else:
|
|
current_sym[2].append (target)
|
|
|
|
elif file and current_sym:
|
|
if file.startswith('/usr/include'):
|
|
## inlined libc thingy
|
|
pass
|
|
elif current_sym[0].startswith('.debug'):
|
|
## debug info
|
|
pass
|
|
elif current_sym[3] and current_sym[3] != file:
|
|
raise Exception ("%s in both %s and %s" % (current_sym[0], current_sym[3], file))
|
|
else:
|
|
current_sym[3] = file
|
|
|
|
## now we need to find the roots (exported symbols)
|
|
command = "nm -D %s" % filename
|
|
print "Running: %s" % command
|
|
f = os.popen(command)
|
|
lines = f.readlines ()
|
|
for l in lines:
|
|
match = dynsym_re.search(l)
|
|
if match:
|
|
name = match.group(1)
|
|
if roots.has_key(name):
|
|
raise Exception("symbol %s exported twice?" % name)
|
|
else:
|
|
roots[name] = 1
|
|
|
|
print "%d symbols exported from this object" % len(roots)
|
|
|
|
## these functions are used only indirectly, so we don't
|
|
## notice they are used. Manually add them as roots...
|
|
vtable_roots = ['unix_finalize',
|
|
'unix_handle_watch',
|
|
'unix_disconnect',
|
|
'unix_connection_set',
|
|
'unix_do_iteration',
|
|
'unix_live_messages_changed',
|
|
'unix_get_unix_fd',
|
|
'handle_client_data_cookie_sha1_mech',
|
|
'handle_client_data_external_mech',
|
|
'handle_server_data_cookie_sha1_mech',
|
|
'handle_server_data_external_mech',
|
|
'handle_client_initial_response_cookie_sha1_mech',
|
|
'handle_client_initial_response_external_mech',
|
|
'handle_client_shutdown_cookie_sha1_mech',
|
|
'handle_client_shutdown_external_mech',
|
|
'handle_server_shutdown_cookie_sha1_mech',
|
|
'handle_server_shutdown_external_mech'
|
|
]
|
|
|
|
for vr in vtable_roots:
|
|
if roots.has_key(vr):
|
|
raise Exception("%s is already a root" % vr)
|
|
roots[vr] = 1
|
|
|
|
for k in roots.keys():
|
|
markSymbol("root", k)
|
|
|
|
for (k, v) in symbols.items():
|
|
createBacklinks(k, v)
|
|
|
|
print """
|
|
|
|
The symbols mentioned below don't appear to be reachable starting from
|
|
the dynamic exports of the library. However, this program is pretty
|
|
dumb; a limitation that creates false positives is that it can only
|
|
trace 'reachable' through hardcoded function calls, if a function is
|
|
called only through a vtable, it won't be marked reachable (and
|
|
neither will its children in the call graph).
|
|
|
|
Also, the sizes mentioned are more or less completely bogus.
|
|
|
|
"""
|
|
|
|
print "The following are hardcoded in as vtable roots: %s" % vtable_roots
|
|
|
|
printLost()
|
|
|
|
if __name__ == "__main__":
|
|
main()
|