398 lines
12 KiB
Python
Executable file
398 lines
12 KiB
Python
Executable file
#!/usr/bin/env python
|
|
# Copyright 2016 The Chromium Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
import argparse
|
|
import bisect
|
|
import collections
|
|
import gzip
|
|
import json
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
|
|
_SYMBOLS_PATH = os.path.abspath(os.path.join(
|
|
os.path.dirname(os.path.realpath(__file__)),
|
|
'..',
|
|
'third_party',
|
|
'symbols'))
|
|
sys.path.append(_SYMBOLS_PATH)
|
|
# pylint: disable=import-error
|
|
import symbols.elf_symbolizer as elf_symbolizer
|
|
|
|
|
|
# Relevant trace event phases from Chromium's
|
|
# src/base/trace_event/common/trace_event_common.h.
|
|
TRACE_EVENT_PHASE_METADATA = 'M'
|
|
TRACE_EVENT_PHASE_MEMORY_DUMP = 'v'
|
|
|
|
|
|
# Matches Android library paths, supports both K (/data/app-lib/<>/lib.so)
|
|
# as well as L+ (/data/app/<>/lib/<>/lib.so). Library name is available
|
|
# via 'name' group.
|
|
ANDROID_PATH_MATCHER = re.compile(
|
|
r'^/data/(?:app/[^/]+/lib/[^/]+/|app-lib/[^/]+/)(?P<name>.*\.so)')
|
|
|
|
# Subpath of output path where unstripped libraries are stored.
|
|
ANDROID_UNSTRIPPED_SUBPATH = 'lib.unstripped'
|
|
|
|
|
|
def FindInSystemPath(binary_name):
|
|
paths = os.environ['PATH'].split(os.pathsep)
|
|
for path in paths:
|
|
binary_path = os.path.join(path, binary_name)
|
|
if os.path.isfile(binary_path):
|
|
return binary_path
|
|
return None
|
|
|
|
|
|
def IsSymbolizableFile(file_path):
|
|
result = subprocess.check_output(['file', '-0', file_path])
|
|
type_string = result[result.find('\0') + 1:]
|
|
return bool(re.match(r'\: (ELF|Mach-O) (32|64)-bit\b', type_string))
|
|
|
|
|
|
class ProcessMemoryMaps(object):
|
|
"""Represents 'process_mmaps' trace file entry."""
|
|
|
|
class Region(object):
|
|
def __init__(self, start_address, size, file_path):
|
|
self._start_address = start_address
|
|
self._size = size
|
|
self._file_path = file_path
|
|
|
|
@property
|
|
def start_address(self):
|
|
return self._start_address
|
|
|
|
@property
|
|
def end_address(self):
|
|
return self._start_address + self._size
|
|
|
|
@property
|
|
def size(self):
|
|
return self._size
|
|
|
|
@property
|
|
def file_path(self):
|
|
return self._file_path
|
|
|
|
def __cmp__(self, other):
|
|
if isinstance(other, type(self)):
|
|
return long(self._start_address).__cmp__(long(other._start_address))
|
|
elif isinstance(other, (long, int)):
|
|
return long(self._start_address).__cmp__(long(other))
|
|
else:
|
|
raise Exception('Cannot compare with %s' % type(other))
|
|
|
|
def __repr__(self):
|
|
return 'Region(0x{:X} - 0x{:X}, {})'.format(
|
|
self.start_address, self.end_address, self.file_path)
|
|
|
|
def __init__(self, process_mmaps):
|
|
"""Parses 'process_mmaps' dictionary."""
|
|
|
|
regions = []
|
|
for region_value in process_mmaps['vm_regions']:
|
|
regions.append(self.Region(
|
|
long(region_value['sa'], 16),
|
|
long(region_value['sz'], 16),
|
|
region_value['mf']))
|
|
regions.sort()
|
|
|
|
# Copy regions without duplicates and check for overlaps.
|
|
self._regions = []
|
|
previous_region = None
|
|
for region in regions:
|
|
if previous_region is not None:
|
|
if region == previous_region:
|
|
continue
|
|
assert region.start_address >= previous_region.end_address, \
|
|
'Regions {} and {} overlap.'.format(previous_region, region)
|
|
previous_region = region
|
|
self._regions.append(region)
|
|
|
|
@property
|
|
def regions(self):
|
|
return self._regions
|
|
|
|
def FindRegion(self, address):
|
|
"""Finds region containing |address|. Returns None if none found."""
|
|
|
|
region_index = bisect.bisect_right(self._regions, address) - 1
|
|
if region_index >= 0:
|
|
region = self._regions[region_index]
|
|
if address >= region.start_address and address < region.end_address:
|
|
return region
|
|
return None
|
|
|
|
|
|
class StackFrames(object):
|
|
"""Represents 'stackFrames' trace file entry."""
|
|
|
|
class PCFrame(object):
|
|
def __init__(self, pc, frame):
|
|
self._modified = False
|
|
self._pc = pc
|
|
self._frame = frame
|
|
|
|
@property
|
|
def modified(self):
|
|
return self._modified
|
|
|
|
@property
|
|
def pc(self):
|
|
return self._pc
|
|
|
|
@property
|
|
def name(self):
|
|
return self._frame['name']
|
|
|
|
@name.setter
|
|
def name(self, value):
|
|
self._modified = True
|
|
self._frame['name'] = value
|
|
|
|
def __init__(self, stack_frames):
|
|
"""Constructs object using 'stackFrames' dictionary."""
|
|
self._pc_frames = []
|
|
for frame in stack_frames.itervalues():
|
|
pc_frame = self._ParsePCFrame(frame)
|
|
if pc_frame:
|
|
self._pc_frames.append(pc_frame)
|
|
|
|
@property
|
|
def pc_frames(self):
|
|
return self._pc_frames
|
|
|
|
@property
|
|
def modified(self):
|
|
return any(f.modified for f in self._pc_frames)
|
|
|
|
_PC_TAG = 'pc:'
|
|
|
|
@classmethod
|
|
def _ParsePCFrame(self, frame):
|
|
name = frame['name']
|
|
if not name.startswith(self._PC_TAG):
|
|
return None
|
|
pc = long(name[len(self._PC_TAG):], 16)
|
|
return self.PCFrame(pc, frame)
|
|
|
|
|
|
class Process(object):
|
|
"""Holds various bits of information about a process in a trace file."""
|
|
|
|
def __init__(self, pid):
|
|
self.pid = pid
|
|
self.name = None
|
|
self.mmaps = None
|
|
self.stack_frames = None
|
|
|
|
|
|
def CollectProcesses(trace):
|
|
"""Parses trace dictionary and returns pid->Process map of all processes
|
|
suitable for symbolization (which have both mmaps and stack_frames).
|
|
"""
|
|
|
|
process_map = {}
|
|
|
|
# Android traces produced via 'chrome://inspect/?tracing#devices' are
|
|
# just list of events.
|
|
events = trace if isinstance(trace, list) else trace['traceEvents']
|
|
for event in events:
|
|
name = event.get('name')
|
|
if not name:
|
|
continue
|
|
|
|
pid = event['pid']
|
|
process = process_map.get(pid)
|
|
if process is None:
|
|
process = Process(pid)
|
|
process_map[pid] = process
|
|
|
|
phase = event['ph']
|
|
if phase == TRACE_EVENT_PHASE_METADATA:
|
|
if name == 'process_name':
|
|
process.name = event['args']['name']
|
|
elif name == 'stackFrames':
|
|
process.stack_frames = StackFrames(event['args']['stackFrames'])
|
|
elif phase == TRACE_EVENT_PHASE_MEMORY_DUMP:
|
|
process_mmaps = event['args']['dumps'].get('process_mmaps')
|
|
if process_mmaps:
|
|
# TODO(dskiba): this parses all process_mmaps, but retains only the
|
|
# last one. We need to parse only once (lazy parsing?).
|
|
process.mmaps = ProcessMemoryMaps(process_mmaps)
|
|
|
|
return [p for p in process_map.itervalues() if p.mmaps and p.stack_frames]
|
|
|
|
|
|
class SymbolizableFile(object):
|
|
"""Holds file path, addresses to symbolize and stack frames to update.
|
|
|
|
This class is a link between ELFSymbolizer and a trace file: it specifies
|
|
what to symbolize (addresses) and what to update with the symbolization
|
|
result (frames).
|
|
"""
|
|
def __init__(self, file_path):
|
|
self.path = file_path
|
|
self.frames_by_address = collections.defaultdict(list)
|
|
|
|
|
|
def ResolveSymbolizableFiles(processes):
|
|
"""Resolves and groups PCs into list of SymbolizableFiles.
|
|
|
|
As part of the grouping process, this function resolves PC from each stack
|
|
frame to the corresponding mmap region. Stack frames that failed to resolve
|
|
are symbolized with '<unresolved>'.
|
|
"""
|
|
symfile_by_path = {}
|
|
for process in processes:
|
|
for frame in process.stack_frames.pc_frames:
|
|
region = process.mmaps.FindRegion(frame.pc)
|
|
if region is None:
|
|
frame.name = '<unresolved>'
|
|
continue
|
|
|
|
symfile = symfile_by_path.get(region.file_path)
|
|
if symfile is None:
|
|
symfile = SymbolizableFile(region.file_path)
|
|
symfile_by_path[symfile.path] = symfile
|
|
|
|
relative_pc = frame.pc - region.start_address
|
|
symfile.frames_by_address[relative_pc].append(frame)
|
|
return symfile_by_path.values()
|
|
|
|
|
|
def SymbolizeFiles(symfiles, addr2line_path):
|
|
"""Symbolizes each file in the given list of SymbolizableFiles
|
|
and updates stack frames with symbolization results."""
|
|
print 'Symbolizing...'
|
|
|
|
def _SubPrintf(message, *args):
|
|
print (' ' + message).format(*args)
|
|
|
|
symbolized = False
|
|
for symfile in symfiles:
|
|
unsymbolized_name = '<{}>'.format(
|
|
symfile.path if symfile.path else 'unnamed')
|
|
|
|
problem = None
|
|
if not os.path.isabs(symfile.path):
|
|
problem = 'not a file'
|
|
elif not os.path.isfile(symfile.path):
|
|
problem = "file doesn't exist"
|
|
elif not IsSymbolizableFile(symfile.path):
|
|
problem = 'file is not symbolizable'
|
|
if problem:
|
|
_SubPrintf("Won't symbolize {} PCs for '{}': {}.",
|
|
len(symfile.frames_by_address),
|
|
symfile.path,
|
|
problem)
|
|
for frames in symfile.frames_by_address.itervalues():
|
|
for frame in frames:
|
|
frame.name = unsymbolized_name
|
|
continue
|
|
|
|
def _SymbolizerCallback(sym_info, frames):
|
|
# Unwind inline chain to the top.
|
|
while sym_info.inlined_by:
|
|
sym_info = sym_info.inlined_by
|
|
|
|
symbolized_name = sym_info.name if sym_info.name else unsymbolized_name
|
|
for frame in frames:
|
|
frame.name = symbolized_name
|
|
|
|
symbolizer = elf_symbolizer.ELFSymbolizer(symfile.path,
|
|
addr2line_path,
|
|
_SymbolizerCallback,
|
|
inlines=True)
|
|
|
|
_SubPrintf('Symbolizing {} PCs from {}...',
|
|
len(symfile.frames_by_address),
|
|
symfile.path)
|
|
|
|
for address, frames in symfile.frames_by_address.iteritems():
|
|
# SymbolizeAsync() asserts that the type of address is int. We operate
|
|
# on longs (since they are raw pointers possibly from 64-bit processes).
|
|
# It's OK to cast here because we're passing relative PC, which should
|
|
# always fit into int.
|
|
symbolizer.SymbolizeAsync(int(address), frames)
|
|
|
|
symbolizer.Join()
|
|
symbolized = True
|
|
|
|
return symbolized
|
|
|
|
|
|
def HaveFilesFromAndroid(symfiles):
|
|
return any(ANDROID_PATH_MATCHER.match(f.path) for f in symfiles)
|
|
|
|
|
|
def RemapAndroidFiles(symfiles, output_path):
|
|
for symfile in symfiles:
|
|
match = ANDROID_PATH_MATCHER.match(symfile.path)
|
|
if match:
|
|
name = match.group('name')
|
|
symfile.path = os.path.join(output_path, ANDROID_UNSTRIPPED_SUBPATH, name)
|
|
|
|
|
|
# Suffix used for backup files.
|
|
BACKUP_FILE_TAG = '.BACKUP'
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('file',
|
|
help='Trace file to symbolize (.json or .json.gz)')
|
|
parser.add_argument('--no-backup',
|
|
dest='backup', default='true', action='store_false',
|
|
help="Don't create {} files".format(BACKUP_FILE_TAG))
|
|
parser.add_argument('--output-directory',
|
|
help='The path to the build output directory, such ' +
|
|
'as out/Debug. Only needed for Android.')
|
|
options = parser.parse_args()
|
|
|
|
trace_file_path = options.file
|
|
def _OpenTraceFile(mode):
|
|
if trace_file_path.endswith('.gz'):
|
|
return gzip.open(trace_file_path, mode + 'b')
|
|
else:
|
|
return open(trace_file_path, mode + 't')
|
|
|
|
addr2line_path = FindInSystemPath('addr2line')
|
|
if addr2line_path is None:
|
|
sys.exit("Can't symbolize - no addr2line in PATH.")
|
|
|
|
print 'Reading trace file...'
|
|
with _OpenTraceFile('r') as trace_file:
|
|
trace = json.load(trace_file)
|
|
|
|
processes = CollectProcesses(trace)
|
|
symfiles = ResolveSymbolizableFiles(processes)
|
|
|
|
# Android trace files don't have any indication they are from Android.
|
|
# So we're checking for Android-specific paths.
|
|
if HaveFilesFromAndroid(symfiles):
|
|
if not options.output_directory:
|
|
parser.error('The trace file appears to be from Android. Please '
|
|
"specify output directory (e.g. 'out/Debug') to properly "
|
|
'symbolize it.')
|
|
RemapAndroidFiles(symfiles, os.path.abspath(options.output_directory))
|
|
|
|
if SymbolizeFiles(symfiles, addr2line_path):
|
|
if options.backup:
|
|
backup_file_path = trace_file_path + BACKUP_FILE_TAG
|
|
print 'Backing up trace file to {}...'.format(backup_file_path)
|
|
os.rename(trace_file_path, backup_file_path)
|
|
|
|
print 'Updating trace file...'
|
|
with _OpenTraceFile('w') as trace_file:
|
|
json.dump(trace, trace_file)
|
|
else:
|
|
print 'No PCs symbolized - not updating trace file.'
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|