1#!/usr/bin/python 2 3#---------------------------------------------------------------------- 4# Be sure to add the python path that points to the LLDB shared library. 5# 6# To use this in the embedded python interpreter using "lldb": 7# 8# cd /path/containing/crashlog.py 9# lldb 10# (lldb) script import crashlog 11# "crashlog" command installed, type "crashlog --help" for detailed help 12# (lldb) crashlog ~/Library/Logs/DiagnosticReports/a.crash 13# 14# The benefit of running the crashlog command inside lldb in the 15# embedded python interpreter is when the command completes, there 16# will be a target with all of the files loaded at the locations 17# described in the crash log. Only the files that have stack frames 18# in the backtrace will be loaded unless the "--load-all" option 19# has been specified. This allows users to explore the program in the 20# state it was in right at crash time. 21# 22# On MacOSX csh, tcsh: 23# ( setenv PYTHONPATH /path/to/LLDB.framework/Resources/Python ; ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash ) 24# 25# On MacOSX sh, bash: 26# PYTHONPATH=/path/to/LLDB.framework/Resources/Python ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash 27#---------------------------------------------------------------------- 28 29from __future__ import print_function 30from builtins import str 31from builtins import range 32from builtins import object 33import lldb 34import subprocess 35import optparse 36import os 37import plistlib 38import re 39import shlex 40import sys 41import time 42import uuid 43 44 45class Address(object): 46 """Class that represents an address that will be symbolicated""" 47 48 def __init__(self, target, load_addr): 49 self.target = target 50 self.load_addr = load_addr # The load address that this object represents 51 # the resolved lldb.SBAddress (if any), named so_addr for 52 # section/offset address 53 self.so_addr = None 54 self.sym_ctx = None # The cached symbol context for this address 55 # Any original textual description of this address to be used as a 56 # backup in case symbolication fails 57 self.description = None 58 self.symbolication = None # The cached symbolicated string that describes this address 59 self.inlined = False 60 61 def __str__(self): 62 s = "%#16.16x" % (self.load_addr) 63 if self.symbolication: 64 s += " %s" % (self.symbolication) 65 elif self.description: 66 s += " %s" % (self.description) 67 elif self.so_addr: 68 s += " %s" % (self.so_addr) 69 return s 70 71 def resolve_addr(self): 72 if self.so_addr is None: 73 self.so_addr = self.target.ResolveLoadAddress(self.load_addr) 74 return self.so_addr 75 76 def is_inlined(self): 77 return self.inlined 78 79 def get_symbol_context(self): 80 if self.sym_ctx is None: 81 sb_addr = self.resolve_addr() 82 if sb_addr: 83 self.sym_ctx = self.target.ResolveSymbolContextForAddress( 84 sb_addr, lldb.eSymbolContextEverything) 85 else: 86 self.sym_ctx = lldb.SBSymbolContext() 87 return self.sym_ctx 88 89 def get_instructions(self): 90 sym_ctx = self.get_symbol_context() 91 if sym_ctx: 92 function = sym_ctx.GetFunction() 93 if function: 94 return function.GetInstructions(self.target) 95 return sym_ctx.GetSymbol().GetInstructions(self.target) 96 return None 97 98 def symbolicate(self, verbose=False): 99 if self.symbolication is None: 100 self.symbolication = '' 101 self.inlined = False 102 sym_ctx = self.get_symbol_context() 103 if sym_ctx: 104 module = sym_ctx.GetModule() 105 if module: 106 # Print full source file path in verbose mode 107 if verbose: 108 self.symbolication += str(module.GetFileSpec()) + '`' 109 else: 110 self.symbolication += module.GetFileSpec().GetFilename() + '`' 111 function_start_load_addr = -1 112 function = sym_ctx.GetFunction() 113 block = sym_ctx.GetBlock() 114 line_entry = sym_ctx.GetLineEntry() 115 symbol = sym_ctx.GetSymbol() 116 inlined_block = block.GetContainingInlinedBlock() 117 if function: 118 self.symbolication += function.GetName() 119 120 if inlined_block: 121 self.inlined = True 122 self.symbolication += ' [inlined] ' + \ 123 inlined_block.GetInlinedName() 124 block_range_idx = inlined_block.GetRangeIndexForBlockAddress( 125 self.so_addr) 126 if block_range_idx < lldb.UINT32_MAX: 127 block_range_start_addr = inlined_block.GetRangeStartAddress( 128 block_range_idx) 129 function_start_load_addr = block_range_start_addr.GetLoadAddress( 130 self.target) 131 if function_start_load_addr == -1: 132 function_start_load_addr = function.GetStartAddress().GetLoadAddress(self.target) 133 elif symbol: 134 self.symbolication += symbol.GetName() 135 function_start_load_addr = symbol.GetStartAddress().GetLoadAddress(self.target) 136 else: 137 self.symbolication = '' 138 return False 139 140 # Dump the offset from the current function or symbol if it 141 # is non zero 142 function_offset = self.load_addr - function_start_load_addr 143 if function_offset > 0: 144 self.symbolication += " + %u" % (function_offset) 145 elif function_offset < 0: 146 self.symbolication += " %i (invalid negative offset, file a bug) " % function_offset 147 148 # Print out any line information if any is available 149 if line_entry.GetFileSpec(): 150 # Print full source file path in verbose mode 151 if verbose: 152 self.symbolication += ' at %s' % line_entry.GetFileSpec() 153 else: 154 self.symbolication += ' at %s' % line_entry.GetFileSpec().GetFilename() 155 self.symbolication += ':%u' % line_entry.GetLine() 156 column = line_entry.GetColumn() 157 if column > 0: 158 self.symbolication += ':%u' % column 159 return True 160 return False 161 162 163class Section(object): 164 """Class that represents an load address range""" 165 sect_info_regex = re.compile('(?P<name>[^=]+)=(?P<range>.*)') 166 addr_regex = re.compile('^\s*(?P<start>0x[0-9A-Fa-f]+)\s*$') 167 range_regex = re.compile( 168 '^\s*(?P<start>0x[0-9A-Fa-f]+)\s*(?P<op>[-+])\s*(?P<end>0x[0-9A-Fa-f]+)\s*$') 169 170 def __init__(self, start_addr=None, end_addr=None, name=None): 171 self.start_addr = start_addr 172 self.end_addr = end_addr 173 self.name = name 174 175 @classmethod 176 def InitWithSBTargetAndSBSection(cls, target, section): 177 sect_load_addr = section.GetLoadAddress(target) 178 if sect_load_addr != lldb.LLDB_INVALID_ADDRESS: 179 obj = cls( 180 sect_load_addr, 181 sect_load_addr + 182 section.size, 183 section.name) 184 return obj 185 else: 186 return None 187 188 def contains(self, addr): 189 return self.start_addr <= addr and addr < self.end_addr 190 191 def set_from_string(self, s): 192 match = self.sect_info_regex.match(s) 193 if match: 194 self.name = match.group('name') 195 range_str = match.group('range') 196 addr_match = self.addr_regex.match(range_str) 197 if addr_match: 198 self.start_addr = int(addr_match.group('start'), 16) 199 self.end_addr = None 200 return True 201 202 range_match = self.range_regex.match(range_str) 203 if range_match: 204 self.start_addr = int(range_match.group('start'), 16) 205 self.end_addr = int(range_match.group('end'), 16) 206 op = range_match.group('op') 207 if op == '+': 208 self.end_addr += self.start_addr 209 return True 210 print('error: invalid section info string "%s"' % s) 211 print('Valid section info formats are:') 212 print('Format Example Description') 213 print('--------------------- -----------------------------------------------') 214 print('<name>=<base> __TEXT=0x123000 Section from base address only') 215 print('<name>=<base>-<end> __TEXT=0x123000-0x124000 Section from base address and end address') 216 print('<name>=<base>+<size> __TEXT=0x123000+0x1000 Section from base address and size') 217 return False 218 219 def __str__(self): 220 if self.name: 221 if self.end_addr is not None: 222 if self.start_addr is not None: 223 return "%s=[0x%16.16x - 0x%16.16x)" % ( 224 self.name, self.start_addr, self.end_addr) 225 else: 226 if self.start_addr is not None: 227 return "%s=0x%16.16x" % (self.name, self.start_addr) 228 return self.name 229 return "<invalid>" 230 231 232class Image(object): 233 """A class that represents an executable image and any associated data""" 234 235 def __init__(self, path, uuid=None): 236 self.path = path 237 self.resolved_path = None 238 self.resolved = False 239 self.unavailable = False 240 self.uuid = uuid 241 self.section_infos = list() 242 self.identifier = None 243 self.version = None 244 self.arch = None 245 self.module = None 246 self.symfile = None 247 self.slide = None 248 249 @classmethod 250 def InitWithSBTargetAndSBModule(cls, target, module): 251 '''Initialize this Image object with a module from a target.''' 252 obj = cls(module.file.fullpath, module.uuid) 253 obj.resolved_path = module.platform_file.fullpath 254 obj.resolved = True 255 obj.arch = module.triple 256 for section in module.sections: 257 symb_section = Section.InitWithSBTargetAndSBSection( 258 target, section) 259 if symb_section: 260 obj.section_infos.append(symb_section) 261 obj.arch = module.triple 262 obj.module = module 263 obj.symfile = None 264 obj.slide = None 265 return obj 266 267 def dump(self, prefix): 268 print("%s%s" % (prefix, self)) 269 270 def debug_dump(self): 271 print('path = "%s"' % (self.path)) 272 print('resolved_path = "%s"' % (self.resolved_path)) 273 print('resolved = %i' % (self.resolved)) 274 print('unavailable = %i' % (self.unavailable)) 275 print('uuid = %s' % (self.uuid)) 276 print('section_infos = %s' % (self.section_infos)) 277 print('identifier = "%s"' % (self.identifier)) 278 print('version = %s' % (self.version)) 279 print('arch = %s' % (self.arch)) 280 print('module = %s' % (self.module)) 281 print('symfile = "%s"' % (self.symfile)) 282 print('slide = %i (0x%x)' % (self.slide, self.slide)) 283 284 def __str__(self): 285 s = '' 286 if self.uuid: 287 s += "%s " % (self.get_uuid()) 288 if self.arch: 289 s += "%s " % (self.arch) 290 if self.version: 291 s += "%s " % (self.version) 292 resolved_path = self.get_resolved_path() 293 if resolved_path: 294 s += "%s " % (resolved_path) 295 for section_info in self.section_infos: 296 s += ", %s" % (section_info) 297 if self.slide is not None: 298 s += ', slide = 0x%16.16x' % self.slide 299 return s 300 301 def add_section(self, section): 302 # print "added '%s' to '%s'" % (section, self.path) 303 self.section_infos.append(section) 304 305 def get_section_containing_load_addr(self, load_addr): 306 for section_info in self.section_infos: 307 if section_info.contains(load_addr): 308 return section_info 309 return None 310 311 def get_resolved_path(self): 312 if self.resolved_path: 313 return self.resolved_path 314 elif self.path: 315 return self.path 316 return None 317 318 def get_resolved_path_basename(self): 319 path = self.get_resolved_path() 320 if path: 321 return os.path.basename(path) 322 return None 323 324 def symfile_basename(self): 325 if self.symfile: 326 return os.path.basename(self.symfile) 327 return None 328 329 def has_section_load_info(self): 330 return self.section_infos or self.slide is not None 331 332 def load_module(self, target): 333 if self.unavailable: 334 return None # We already warned that we couldn't find this module, so don't return an error string 335 # Load this module into "target" using the section infos to 336 # set the section load addresses 337 if self.has_section_load_info(): 338 if target: 339 if self.module: 340 if self.section_infos: 341 num_sections_loaded = 0 342 for section_info in self.section_infos: 343 if section_info.name: 344 section = self.module.FindSection( 345 section_info.name) 346 if section: 347 error = target.SetSectionLoadAddress( 348 section, section_info.start_addr) 349 if error.Success(): 350 num_sections_loaded += 1 351 else: 352 return 'error: %s' % error.GetCString() 353 else: 354 return 'error: unable to find the section named "%s"' % section_info.name 355 else: 356 return 'error: unable to find "%s" section in "%s"' % ( 357 range.name, self.get_resolved_path()) 358 if num_sections_loaded == 0: 359 return 'error: no sections were successfully loaded' 360 else: 361 err = target.SetModuleLoadAddress( 362 self.module, self.slide) 363 if err.Fail(): 364 return err.GetCString() 365 return None 366 else: 367 return 'error: invalid module' 368 else: 369 return 'error: invalid target' 370 else: 371 return 'error: no section infos' 372 373 def add_module(self, target): 374 '''Add the Image described in this object to "target" and load the sections if "load" is True.''' 375 if target: 376 # Try and find using UUID only first so that paths need not match 377 # up 378 uuid_str = self.get_normalized_uuid_string() 379 if uuid_str: 380 self.module = target.AddModule(None, None, uuid_str) 381 if not self.module: 382 self.locate_module_and_debug_symbols() 383 if self.unavailable: 384 return None 385 resolved_path = self.get_resolved_path() 386 self.module = target.AddModule( 387 resolved_path, self.arch, uuid_str, self.symfile) 388 if not self.module: 389 return 'error: unable to get module for (%s) "%s"' % ( 390 self.arch, self.get_resolved_path()) 391 if self.has_section_load_info(): 392 return self.load_module(target) 393 else: 394 return None # No sections, the module was added to the target, so success 395 else: 396 return 'error: invalid target' 397 398 def locate_module_and_debug_symbols(self): 399 # By default, just use the paths that were supplied in: 400 # self.path 401 # self.resolved_path 402 # self.module 403 # self.symfile 404 # Subclasses can inherit from this class and override this function 405 self.resolved = True 406 return True 407 408 def get_uuid(self): 409 if not self.uuid and self.module: 410 self.uuid = uuid.UUID(self.module.GetUUIDString()) 411 return self.uuid 412 413 def get_normalized_uuid_string(self): 414 if self.uuid: 415 return str(self.uuid).upper() 416 return None 417 418 def create_target(self): 419 '''Create a target using the information in this Image object.''' 420 if self.unavailable: 421 return None 422 423 if self.locate_module_and_debug_symbols(): 424 resolved_path = self.get_resolved_path() 425 path_spec = lldb.SBFileSpec(resolved_path) 426 #result.PutCString ('plist[%s] = %s' % (uuid, self.plist)) 427 error = lldb.SBError() 428 target = lldb.debugger.CreateTarget( 429 resolved_path, self.arch, None, False, error) 430 if target: 431 self.module = target.FindModule(path_spec) 432 if self.has_section_load_info(): 433 err = self.load_module(target) 434 if err: 435 print('ERROR: ', err) 436 return target 437 else: 438 print('error: unable to create a valid target for (%s) "%s"' % (self.arch, self.path)) 439 else: 440 print('error: unable to locate main executable (%s) "%s"' % (self.arch, self.path)) 441 return None 442 443 444class Symbolicator(object): 445 446 def __init__(self): 447 """A class the represents the information needed to symbolicate addresses in a program""" 448 self.target = None 449 self.images = list() # a list of images to be used when symbolicating 450 self.addr_mask = 0xffffffffffffffff 451 452 @classmethod 453 def InitWithSBTarget(cls, target): 454 obj = cls() 455 obj.target = target 456 obj.images = list() 457 triple = target.triple 458 if triple: 459 arch = triple.split('-')[0] 460 if "arm" in arch: 461 obj.addr_mask = 0xfffffffffffffffe 462 463 for module in target.modules: 464 image = Image.InitWithSBTargetAndSBModule(target, module) 465 obj.images.append(image) 466 return obj 467 468 def __str__(self): 469 s = "Symbolicator:\n" 470 if self.target: 471 s += "Target = '%s'\n" % (self.target) 472 s += "Target modules:\n" 473 for m in self.target.modules: 474 s += str(m) + "\n" 475 s += "Images:\n" 476 for image in self.images: 477 s += ' %s\n' % (image) 478 return s 479 480 def find_images_with_identifier(self, identifier): 481 images = list() 482 for image in self.images: 483 if image.identifier == identifier: 484 images.append(image) 485 if len(images) == 0: 486 regex_text = '^.*\.%s$' % (re.escape(identifier)) 487 regex = re.compile(regex_text) 488 for image in self.images: 489 if regex.match(image.identifier): 490 images.append(image) 491 return images 492 493 def find_image_containing_load_addr(self, load_addr): 494 for image in self.images: 495 if image.get_section_containing_load_addr(load_addr): 496 return image 497 return None 498 499 def create_target(self): 500 if self.target: 501 return self.target 502 503 if self.images: 504 for image in self.images: 505 self.target = image.create_target() 506 if self.target: 507 if self.target.GetAddressByteSize() == 4: 508 triple = self.target.triple 509 if triple: 510 arch = triple.split('-')[0] 511 if "arm" in arch: 512 self.addr_mask = 0xfffffffffffffffe 513 return self.target 514 return None 515 516 def symbolicate(self, load_addr, verbose=False): 517 if not self.target: 518 self.create_target() 519 if self.target: 520 live_process = False 521 process = self.target.process 522 if process: 523 state = process.state 524 if state > lldb.eStateUnloaded and state < lldb.eStateDetached: 525 live_process = True 526 # If we don't have a live process, we can attempt to find the image 527 # that a load address belongs to and lazily load its module in the 528 # target, but we shouldn't do any of this if we have a live process 529 if not live_process: 530 image = self.find_image_containing_load_addr(load_addr) 531 if image: 532 image.add_module(self.target) 533 symbolicated_address = Address(self.target, load_addr) 534 if symbolicated_address.symbolicate(verbose): 535 if symbolicated_address.so_addr: 536 symbolicated_addresses = list() 537 symbolicated_addresses.append(symbolicated_address) 538 # See if we were able to reconstruct anything? 539 while True: 540 inlined_parent_so_addr = lldb.SBAddress() 541 inlined_parent_sym_ctx = symbolicated_address.sym_ctx.GetParentOfInlinedScope( 542 symbolicated_address.so_addr, inlined_parent_so_addr) 543 if not inlined_parent_sym_ctx: 544 break 545 if not inlined_parent_so_addr: 546 break 547 548 symbolicated_address = Address( 549 self.target, inlined_parent_so_addr.GetLoadAddress( 550 self.target)) 551 symbolicated_address.sym_ctx = inlined_parent_sym_ctx 552 symbolicated_address.so_addr = inlined_parent_so_addr 553 symbolicated_address.symbolicate(verbose) 554 555 # push the new frame onto the new frame stack 556 symbolicated_addresses.append(symbolicated_address) 557 558 if symbolicated_addresses: 559 return symbolicated_addresses 560 else: 561 print('error: no target in Symbolicator') 562 return None 563 564 565def disassemble_instructions( 566 target, 567 instructions, 568 pc, 569 insts_before_pc, 570 insts_after_pc, 571 non_zeroeth_frame): 572 lines = list() 573 pc_index = -1 574 comment_column = 50 575 for inst_idx, inst in enumerate(instructions): 576 inst_pc = inst.GetAddress().GetLoadAddress(target) 577 if pc == inst_pc: 578 pc_index = inst_idx 579 mnemonic = inst.GetMnemonic(target) 580 operands = inst.GetOperands(target) 581 comment = inst.GetComment(target) 582 #data = inst.GetData (target) 583 lines.append("%#16.16x: %8s %s" % (inst_pc, mnemonic, operands)) 584 if comment: 585 line_len = len(lines[-1]) 586 if line_len < comment_column: 587 lines[-1] += ' ' * (comment_column - line_len) 588 lines[-1] += "; %s" % comment 589 590 if pc_index >= 0: 591 # If we are disassembling the non-zeroeth frame, we need to backup the 592 # PC by 1 593 if non_zeroeth_frame and pc_index > 0: 594 pc_index = pc_index - 1 595 if insts_before_pc == -1: 596 start_idx = 0 597 else: 598 start_idx = pc_index - insts_before_pc 599 if start_idx < 0: 600 start_idx = 0 601 if insts_before_pc == -1: 602 end_idx = inst_idx 603 else: 604 end_idx = pc_index + insts_after_pc 605 if end_idx > inst_idx: 606 end_idx = inst_idx 607 for i in range(start_idx, end_idx + 1): 608 if i == pc_index: 609 print(' -> ', lines[i]) 610 else: 611 print(' ', lines[i]) 612 613 614def print_module_section_data(section): 615 print(section) 616 section_data = section.GetSectionData() 617 if section_data: 618 ostream = lldb.SBStream() 619 section_data.GetDescription(ostream, section.GetFileAddress()) 620 print(ostream.GetData()) 621 622 623def print_module_section(section, depth): 624 print(section) 625 if depth > 0: 626 num_sub_sections = section.GetNumSubSections() 627 for sect_idx in range(num_sub_sections): 628 print_module_section( 629 section.GetSubSectionAtIndex(sect_idx), depth - 1) 630 631 632def print_module_sections(module, depth): 633 for sect in module.section_iter(): 634 print_module_section(sect, depth) 635 636 637def print_module_symbols(module): 638 for sym in module: 639 print(sym) 640 641 642def Symbolicate(command_args): 643 644 usage = "usage: %prog [options] <addr1> [addr2 ...]" 645 description = '''Symbolicate one or more addresses using LLDB's python scripting API..''' 646 parser = optparse.OptionParser( 647 description=description, 648 prog='crashlog.py', 649 usage=usage) 650 parser.add_option( 651 '-v', 652 '--verbose', 653 action='store_true', 654 dest='verbose', 655 help='display verbose debug info', 656 default=False) 657 parser.add_option( 658 '-p', 659 '--platform', 660 type='string', 661 metavar='platform', 662 dest='platform', 663 help='Specify the platform to use when creating the debug target. Valid values include "localhost", "darwin-kernel", "ios-simulator", "remote-freebsd", "remote-macosx", "remote-ios", "remote-linux".') 664 parser.add_option( 665 '-f', 666 '--file', 667 type='string', 668 metavar='file', 669 dest='file', 670 help='Specify a file to use when symbolicating') 671 parser.add_option( 672 '-a', 673 '--arch', 674 type='string', 675 metavar='arch', 676 dest='arch', 677 help='Specify a architecture to use when symbolicating') 678 parser.add_option( 679 '-s', 680 '--slide', 681 type='int', 682 metavar='slide', 683 dest='slide', 684 help='Specify the slide to use on the file specified with the --file option', 685 default=None) 686 parser.add_option( 687 '--section', 688 type='string', 689 action='append', 690 dest='section_strings', 691 help='specify <sect-name>=<start-addr> or <sect-name>=<start-addr>-<end-addr>') 692 try: 693 (options, args) = parser.parse_args(command_args) 694 except: 695 return 696 symbolicator = Symbolicator() 697 images = list() 698 if options.file: 699 image = Image(options.file) 700 image.arch = options.arch 701 # Add any sections that were specified with one or more --section 702 # options 703 if options.section_strings: 704 for section_str in options.section_strings: 705 section = Section() 706 if section.set_from_string(section_str): 707 image.add_section(section) 708 else: 709 sys.exit(1) 710 if options.slide is not None: 711 image.slide = options.slide 712 symbolicator.images.append(image) 713 714 target = symbolicator.create_target() 715 if options.verbose: 716 print(symbolicator) 717 if target: 718 for addr_str in args: 719 addr = int(addr_str, 0) 720 symbolicated_addrs = symbolicator.symbolicate( 721 addr, options.verbose) 722 for symbolicated_addr in symbolicated_addrs: 723 print(symbolicated_addr) 724 print() 725 else: 726 print('error: no target for %s' % (symbolicator)) 727 728if __name__ == '__main__': 729 # Create a new debugger instance 730 lldb.debugger = lldb.SBDebugger.Create() 731 Symbolicate(sys.argv[1:]) 732