#!/usr/bin/python #---------------------------------------------------------------------- # Be sure to add the python path that points to the LLDB shared library. # # To use this in the embedded python interpreter using "lldb": # # cd /path/containing/crashlog.py # lldb # (lldb) script import crashlog # "crashlog" command installed, type "crashlog --help" for detailed help # (lldb) crashlog ~/Library/Logs/DiagnosticReports/a.crash # # The benefit of running the crashlog command inside lldb in the # embedded python interpreter is when the command completes, there # will be a target with all of the files loaded at the locations # described in the crash log. Only the files that have stack frames # in the backtrace will be loaded unless the "--load-all" option # has been specified. This allows users to explore the program in the # state it was in right at crash time. # # On MacOSX csh, tcsh: # ( setenv PYTHONPATH /path/to/LLDB.framework/Resources/Python ; ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash ) # # On MacOSX sh, bash: # PYTHONPATH=/path/to/LLDB.framework/Resources/Python ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash #---------------------------------------------------------------------- from __future__ import print_function from builtins import str from builtins import range from builtins import object import lldb import subprocess import optparse import os import plistlib import re import shlex import sys import time import uuid class Address(object): """Class that represents an address that will be symbolicated""" def __init__(self, target, load_addr): self.target = target self.load_addr = load_addr # The load address that this object represents # the resolved lldb.SBAddress (if any), named so_addr for # section/offset address self.so_addr = None self.sym_ctx = None # The cached symbol context for this address # Any original textual description of this address to be used as a # backup in case symbolication fails self.description = None self.symbolication = None # The cached symbolicated string that describes this address self.inlined = False def __str__(self): s = "%#16.16x" % (self.load_addr) if self.symbolication: s += " %s" % (self.symbolication) elif self.description: s += " %s" % (self.description) elif self.so_addr: s += " %s" % (self.so_addr) return s def resolve_addr(self): if self.so_addr is None: self.so_addr = self.target.ResolveLoadAddress(self.load_addr) return self.so_addr def is_inlined(self): return self.inlined def get_symbol_context(self): if self.sym_ctx is None: sb_addr = self.resolve_addr() if sb_addr: self.sym_ctx = self.target.ResolveSymbolContextForAddress( sb_addr, lldb.eSymbolContextEverything) else: self.sym_ctx = lldb.SBSymbolContext() return self.sym_ctx def get_instructions(self): sym_ctx = self.get_symbol_context() if sym_ctx: function = sym_ctx.GetFunction() if function: return function.GetInstructions(self.target) return sym_ctx.GetSymbol().GetInstructions(self.target) return None def symbolicate(self, verbose=False): if self.symbolication is None: self.symbolication = '' self.inlined = False sym_ctx = self.get_symbol_context() if sym_ctx: module = sym_ctx.GetModule() if module: # Print full source file path in verbose mode if verbose: self.symbolication += str(module.GetFileSpec()) + '`' else: self.symbolication += module.GetFileSpec().GetFilename() + '`' function_start_load_addr = -1 function = sym_ctx.GetFunction() block = sym_ctx.GetBlock() line_entry = sym_ctx.GetLineEntry() symbol = sym_ctx.GetSymbol() inlined_block = block.GetContainingInlinedBlock() if function: self.symbolication += function.GetName() if inlined_block: self.inlined = True self.symbolication += ' [inlined] ' + \ inlined_block.GetInlinedName() block_range_idx = inlined_block.GetRangeIndexForBlockAddress( self.so_addr) if block_range_idx < lldb.UINT32_MAX: block_range_start_addr = inlined_block.GetRangeStartAddress( block_range_idx) function_start_load_addr = block_range_start_addr.GetLoadAddress( self.target) if function_start_load_addr == -1: function_start_load_addr = function.GetStartAddress().GetLoadAddress(self.target) elif symbol: self.symbolication += symbol.GetName() function_start_load_addr = symbol.GetStartAddress().GetLoadAddress(self.target) else: self.symbolication = '' return False # Dump the offset from the current function or symbol if it # is non zero function_offset = self.load_addr - function_start_load_addr if function_offset > 0: self.symbolication += " + %u" % (function_offset) elif function_offset < 0: self.symbolication += " %i (invalid negative offset, file a bug) " % function_offset # Print out any line information if any is available if line_entry.GetFileSpec(): # Print full source file path in verbose mode if verbose: self.symbolication += ' at %s' % line_entry.GetFileSpec() else: self.symbolication += ' at %s' % line_entry.GetFileSpec().GetFilename() self.symbolication += ':%u' % line_entry.GetLine() column = line_entry.GetColumn() if column > 0: self.symbolication += ':%u' % column return True return False class Section(object): """Class that represents an load address range""" sect_info_regex = re.compile('(?P[^=]+)=(?P.*)') addr_regex = re.compile('^\s*(?P0x[0-9A-Fa-f]+)\s*$') range_regex = re.compile( '^\s*(?P0x[0-9A-Fa-f]+)\s*(?P[-+])\s*(?P0x[0-9A-Fa-f]+)\s*$') def __init__(self, start_addr=None, end_addr=None, name=None): self.start_addr = start_addr self.end_addr = end_addr self.name = name @classmethod def InitWithSBTargetAndSBSection(cls, target, section): sect_load_addr = section.GetLoadAddress(target) if sect_load_addr != lldb.LLDB_INVALID_ADDRESS: obj = cls( sect_load_addr, sect_load_addr + section.size, section.name) return obj else: return None def contains(self, addr): return self.start_addr <= addr and addr < self.end_addr def set_from_string(self, s): match = self.sect_info_regex.match(s) if match: self.name = match.group('name') range_str = match.group('range') addr_match = self.addr_regex.match(range_str) if addr_match: self.start_addr = int(addr_match.group('start'), 16) self.end_addr = None return True range_match = self.range_regex.match(range_str) if range_match: self.start_addr = int(range_match.group('start'), 16) self.end_addr = int(range_match.group('end'), 16) op = range_match.group('op') if op == '+': self.end_addr += self.start_addr return True print('error: invalid section info string "%s"' % s) print('Valid section info formats are:') print('Format Example Description') print('--------------------- -----------------------------------------------') print('= __TEXT=0x123000 Section from base address only') print('=- __TEXT=0x123000-0x124000 Section from base address and end address') print('=+ __TEXT=0x123000+0x1000 Section from base address and size') return False def __str__(self): if self.name: if self.end_addr is not None: if self.start_addr is not None: return "%s=[0x%16.16x - 0x%16.16x)" % ( self.name, self.start_addr, self.end_addr) else: if self.start_addr is not None: return "%s=0x%16.16x" % (self.name, self.start_addr) return self.name return "" class Image(object): """A class that represents an executable image and any associated data""" def __init__(self, path, uuid=None): self.path = path self.resolved_path = None self.resolved = False self.unavailable = False self.uuid = uuid self.section_infos = list() self.identifier = None self.version = None self.arch = None self.module = None self.symfile = None self.slide = None @classmethod def InitWithSBTargetAndSBModule(cls, target, module): '''Initialize this Image object with a module from a target.''' obj = cls(module.file.fullpath, module.uuid) obj.resolved_path = module.platform_file.fullpath obj.resolved = True obj.arch = module.triple for section in module.sections: symb_section = Section.InitWithSBTargetAndSBSection( target, section) if symb_section: obj.section_infos.append(symb_section) obj.arch = module.triple obj.module = module obj.symfile = None obj.slide = None return obj def dump(self, prefix): print("%s%s" % (prefix, self)) def debug_dump(self): print('path = "%s"' % (self.path)) print('resolved_path = "%s"' % (self.resolved_path)) print('resolved = %i' % (self.resolved)) print('unavailable = %i' % (self.unavailable)) print('uuid = %s' % (self.uuid)) print('section_infos = %s' % (self.section_infos)) print('identifier = "%s"' % (self.identifier)) print('version = %s' % (self.version)) print('arch = %s' % (self.arch)) print('module = %s' % (self.module)) print('symfile = "%s"' % (self.symfile)) print('slide = %i (0x%x)' % (self.slide, self.slide)) def __str__(self): s = '' if self.uuid: s += "%s " % (self.get_uuid()) if self.arch: s += "%s " % (self.arch) if self.version: s += "%s " % (self.version) resolved_path = self.get_resolved_path() if resolved_path: s += "%s " % (resolved_path) for section_info in self.section_infos: s += ", %s" % (section_info) if self.slide is not None: s += ', slide = 0x%16.16x' % self.slide return s def add_section(self, section): # print "added '%s' to '%s'" % (section, self.path) self.section_infos.append(section) def get_section_containing_load_addr(self, load_addr): for section_info in self.section_infos: if section_info.contains(load_addr): return section_info return None def get_resolved_path(self): if self.resolved_path: return self.resolved_path elif self.path: return self.path return None def get_resolved_path_basename(self): path = self.get_resolved_path() if path: return os.path.basename(path) return None def symfile_basename(self): if self.symfile: return os.path.basename(self.symfile) return None def has_section_load_info(self): return self.section_infos or self.slide is not None def load_module(self, target): if self.unavailable: return None # We already warned that we couldn't find this module, so don't return an error string # Load this module into "target" using the section infos to # set the section load addresses if self.has_section_load_info(): if target: if self.module: if self.section_infos: num_sections_loaded = 0 for section_info in self.section_infos: if section_info.name: section = self.module.FindSection( section_info.name) if section: error = target.SetSectionLoadAddress( section, section_info.start_addr) if error.Success(): num_sections_loaded += 1 else: return 'error: %s' % error.GetCString() else: return 'error: unable to find the section named "%s"' % section_info.name else: return 'error: unable to find "%s" section in "%s"' % ( range.name, self.get_resolved_path()) if num_sections_loaded == 0: return 'error: no sections were successfully loaded' else: err = target.SetModuleLoadAddress( self.module, self.slide) if err.Fail(): return err.GetCString() return None else: return 'error: invalid module' else: return 'error: invalid target' else: return 'error: no section infos' def add_module(self, target): '''Add the Image described in this object to "target" and load the sections if "load" is True.''' if target: # Try and find using UUID only first so that paths need not match # up uuid_str = self.get_normalized_uuid_string() if uuid_str: self.module = target.AddModule(None, None, uuid_str) if not self.module: self.locate_module_and_debug_symbols() if self.unavailable: return None resolved_path = self.get_resolved_path() self.module = target.AddModule( resolved_path, self.arch, uuid_str, self.symfile) if not self.module: return 'error: unable to get module for (%s) "%s"' % ( self.arch, self.get_resolved_path()) if self.has_section_load_info(): return self.load_module(target) else: return None # No sections, the module was added to the target, so success else: return 'error: invalid target' def locate_module_and_debug_symbols(self): # By default, just use the paths that were supplied in: # self.path # self.resolved_path # self.module # self.symfile # Subclasses can inherit from this class and override this function self.resolved = True return True def get_uuid(self): if not self.uuid and self.module: self.uuid = uuid.UUID(self.module.GetUUIDString()) return self.uuid def get_normalized_uuid_string(self): if self.uuid: return str(self.uuid).upper() return None def create_target(self): '''Create a target using the information in this Image object.''' if self.unavailable: return None if self.locate_module_and_debug_symbols(): resolved_path = self.get_resolved_path() path_spec = lldb.SBFileSpec(resolved_path) #result.PutCString ('plist[%s] = %s' % (uuid, self.plist)) error = lldb.SBError() target = lldb.debugger.CreateTarget( resolved_path, self.arch, None, False, error) if target: self.module = target.FindModule(path_spec) if self.has_section_load_info(): err = self.load_module(target) if err: print('ERROR: ', err) return target else: print('error: unable to create a valid target for (%s) "%s"' % (self.arch, self.path)) else: print('error: unable to locate main executable (%s) "%s"' % (self.arch, self.path)) return None class Symbolicator(object): def __init__(self): """A class the represents the information needed to symbolicate addresses in a program""" self.target = None self.images = list() # a list of images to be used when symbolicating self.addr_mask = 0xffffffffffffffff @classmethod def InitWithSBTarget(cls, target): obj = cls() obj.target = target obj.images = list() triple = target.triple if triple: arch = triple.split('-')[0] if "arm" in arch: obj.addr_mask = 0xfffffffffffffffe for module in target.modules: image = Image.InitWithSBTargetAndSBModule(target, module) obj.images.append(image) return obj def __str__(self): s = "Symbolicator:\n" if self.target: s += "Target = '%s'\n" % (self.target) s += "Target modules:\n" for m in self.target.modules: s += str(m) + "\n" s += "Images:\n" for image in self.images: s += ' %s\n' % (image) return s def find_images_with_identifier(self, identifier): images = list() for image in self.images: if image.identifier == identifier: images.append(image) if len(images) == 0: regex_text = '^.*\.%s$' % (re.escape(identifier)) regex = re.compile(regex_text) for image in self.images: if regex.match(image.identifier): images.append(image) return images def find_image_containing_load_addr(self, load_addr): for image in self.images: if image.get_section_containing_load_addr(load_addr): return image return None def create_target(self): if self.target: return self.target if self.images: for image in self.images: self.target = image.create_target() if self.target: if self.target.GetAddressByteSize() == 4: triple = self.target.triple if triple: arch = triple.split('-')[0] if "arm" in arch: self.addr_mask = 0xfffffffffffffffe return self.target return None def symbolicate(self, load_addr, verbose=False): if not self.target: self.create_target() if self.target: live_process = False process = self.target.process if process: state = process.state if state > lldb.eStateUnloaded and state < lldb.eStateDetached: live_process = True # If we don't have a live process, we can attempt to find the image # that a load address belongs to and lazily load its module in the # target, but we shouldn't do any of this if we have a live process if not live_process: image = self.find_image_containing_load_addr(load_addr) if image: image.add_module(self.target) symbolicated_address = Address(self.target, load_addr) if symbolicated_address.symbolicate(verbose): if symbolicated_address.so_addr: symbolicated_addresses = list() symbolicated_addresses.append(symbolicated_address) # See if we were able to reconstruct anything? while True: inlined_parent_so_addr = lldb.SBAddress() inlined_parent_sym_ctx = symbolicated_address.sym_ctx.GetParentOfInlinedScope( symbolicated_address.so_addr, inlined_parent_so_addr) if not inlined_parent_sym_ctx: break if not inlined_parent_so_addr: break symbolicated_address = Address( self.target, inlined_parent_so_addr.GetLoadAddress( self.target)) symbolicated_address.sym_ctx = inlined_parent_sym_ctx symbolicated_address.so_addr = inlined_parent_so_addr symbolicated_address.symbolicate(verbose) # push the new frame onto the new frame stack symbolicated_addresses.append(symbolicated_address) if symbolicated_addresses: return symbolicated_addresses else: print('error: no target in Symbolicator') return None def disassemble_instructions( target, instructions, pc, insts_before_pc, insts_after_pc, non_zeroeth_frame): lines = list() pc_index = -1 comment_column = 50 for inst_idx, inst in enumerate(instructions): inst_pc = inst.GetAddress().GetLoadAddress(target) if pc == inst_pc: pc_index = inst_idx mnemonic = inst.GetMnemonic(target) operands = inst.GetOperands(target) comment = inst.GetComment(target) #data = inst.GetData (target) lines.append("%#16.16x: %8s %s" % (inst_pc, mnemonic, operands)) if comment: line_len = len(lines[-1]) if line_len < comment_column: lines[-1] += ' ' * (comment_column - line_len) lines[-1] += "; %s" % comment if pc_index >= 0: # If we are disassembling the non-zeroeth frame, we need to backup the # PC by 1 if non_zeroeth_frame and pc_index > 0: pc_index = pc_index - 1 if insts_before_pc == -1: start_idx = 0 else: start_idx = pc_index - insts_before_pc if start_idx < 0: start_idx = 0 if insts_before_pc == -1: end_idx = inst_idx else: end_idx = pc_index + insts_after_pc if end_idx > inst_idx: end_idx = inst_idx for i in range(start_idx, end_idx + 1): if i == pc_index: print(' -> ', lines[i]) else: print(' ', lines[i]) def print_module_section_data(section): print(section) section_data = section.GetSectionData() if section_data: ostream = lldb.SBStream() section_data.GetDescription(ostream, section.GetFileAddress()) print(ostream.GetData()) def print_module_section(section, depth): print(section) if depth > 0: num_sub_sections = section.GetNumSubSections() for sect_idx in range(num_sub_sections): print_module_section( section.GetSubSectionAtIndex(sect_idx), depth - 1) def print_module_sections(module, depth): for sect in module.section_iter(): print_module_section(sect, depth) def print_module_symbols(module): for sym in module: print(sym) def Symbolicate(command_args): usage = "usage: %prog [options] [addr2 ...]" description = '''Symbolicate one or more addresses using LLDB's python scripting API..''' parser = optparse.OptionParser( description=description, prog='crashlog.py', usage=usage) parser.add_option( '-v', '--verbose', action='store_true', dest='verbose', help='display verbose debug info', default=False) parser.add_option( '-p', '--platform', type='string', metavar='platform', dest='platform', help='Specify the platform to use when creating the debug target. Valid values include "localhost", "darwin-kernel", "ios-simulator", "remote-freebsd", "remote-macosx", "remote-ios", "remote-linux".') parser.add_option( '-f', '--file', type='string', metavar='file', dest='file', help='Specify a file to use when symbolicating') parser.add_option( '-a', '--arch', type='string', metavar='arch', dest='arch', help='Specify a architecture to use when symbolicating') parser.add_option( '-s', '--slide', type='int', metavar='slide', dest='slide', help='Specify the slide to use on the file specified with the --file option', default=None) parser.add_option( '--section', type='string', action='append', dest='section_strings', help='specify = or =-') try: (options, args) = parser.parse_args(command_args) except: return symbolicator = Symbolicator() images = list() if options.file: image = Image(options.file) image.arch = options.arch # Add any sections that were specified with one or more --section # options if options.section_strings: for section_str in options.section_strings: section = Section() if section.set_from_string(section_str): image.add_section(section) else: sys.exit(1) if options.slide is not None: image.slide = options.slide symbolicator.images.append(image) target = symbolicator.create_target() if options.verbose: print(symbolicator) if target: for addr_str in args: addr = int(addr_str, 0) symbolicated_addrs = symbolicator.symbolicate( addr, options.verbose) for symbolicated_addr in symbolicated_addrs: print(symbolicated_addr) print() else: print('error: no target for %s' % (symbolicator)) if __name__ == '__main__': # Create a new debugger instance lldb.debugger = lldb.SBDebugger.Create() Symbolicate(sys.argv[1:])