1#!/usr/bin/python
2
3#----------------------------------------------------------------------
4# Be sure to add the python path that points to the LLDB shared library.
5#
6# To use this in the embedded python interpreter using "lldb":
7#
8#   cd /path/containing/crashlog.py
9#   lldb
10#   (lldb) script import crashlog
11#   "crashlog" command installed, type "crashlog --help" for detailed help
12#   (lldb) crashlog ~/Library/Logs/DiagnosticReports/a.crash
13#
14# The benefit of running the crashlog command inside lldb in the
15# embedded python interpreter is when the command completes, there
16# will be a target with all of the files loaded at the locations
17# described in the crash log. Only the files that have stack frames
18# in the backtrace will be loaded unless the "--load-all" option
19# has been specified. This allows users to explore the program in the
20# state it was in right at crash time.
21#
22# On MacOSX csh, tcsh:
23#   ( setenv PYTHONPATH /path/to/LLDB.framework/Resources/Python ; ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash )
24#
25# On MacOSX sh, bash:
26#   PYTHONPATH=/path/to/LLDB.framework/Resources/Python ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash
27#----------------------------------------------------------------------
28
29from __future__ import print_function
30from builtins import str
31from builtins import range
32from builtins import object
33import lldb
34import subprocess
35import optparse
36import os
37import plistlib
38import re
39import shlex
40import sys
41import time
42import uuid
43
44
45class Address(object):
46    """Class that represents an address that will be symbolicated"""
47
48    def __init__(self, target, load_addr):
49        self.target = target
50        self.load_addr = load_addr  # The load address that this object represents
51        # the resolved lldb.SBAddress (if any), named so_addr for
52        # section/offset address
53        self.so_addr = None
54        self.sym_ctx = None  # The cached symbol context for this address
55        # Any original textual description of this address to be used as a
56        # backup in case symbolication fails
57        self.description = None
58        self.symbolication = None  # The cached symbolicated string that describes this address
59        self.inlined = False
60
61    def __str__(self):
62        s = "%#16.16x" % (self.load_addr)
63        if self.symbolication:
64            s += " %s" % (self.symbolication)
65        elif self.description:
66            s += " %s" % (self.description)
67        elif self.so_addr:
68            s += " %s" % (self.so_addr)
69        return s
70
71    def resolve_addr(self):
72        if self.so_addr is None:
73            self.so_addr = self.target.ResolveLoadAddress(self.load_addr)
74        return self.so_addr
75
76    def is_inlined(self):
77        return self.inlined
78
79    def get_symbol_context(self):
80        if self.sym_ctx is None:
81            sb_addr = self.resolve_addr()
82            if sb_addr:
83                self.sym_ctx = self.target.ResolveSymbolContextForAddress(
84                    sb_addr, lldb.eSymbolContextEverything)
85            else:
86                self.sym_ctx = lldb.SBSymbolContext()
87        return self.sym_ctx
88
89    def get_instructions(self):
90        sym_ctx = self.get_symbol_context()
91        if sym_ctx:
92            function = sym_ctx.GetFunction()
93            if function:
94                return function.GetInstructions(self.target)
95            return sym_ctx.GetSymbol().GetInstructions(self.target)
96        return None
97
98    def symbolicate(self, verbose=False):
99        if self.symbolication is None:
100            self.symbolication = ''
101            self.inlined = False
102            sym_ctx = self.get_symbol_context()
103            if sym_ctx:
104                module = sym_ctx.GetModule()
105                if module:
106                    # Print full source file path in verbose mode
107                    if verbose:
108                        self.symbolication += str(module.GetFileSpec()) + '`'
109                    else:
110                        self.symbolication += module.GetFileSpec().GetFilename() + '`'
111                    function_start_load_addr = -1
112                    function = sym_ctx.GetFunction()
113                    block = sym_ctx.GetBlock()
114                    line_entry = sym_ctx.GetLineEntry()
115                    symbol = sym_ctx.GetSymbol()
116                    inlined_block = block.GetContainingInlinedBlock()
117                    if function:
118                        self.symbolication += function.GetName()
119
120                        if inlined_block:
121                            self.inlined = True
122                            self.symbolication += ' [inlined] ' + \
123                                inlined_block.GetInlinedName()
124                            block_range_idx = inlined_block.GetRangeIndexForBlockAddress(
125                                self.so_addr)
126                            if block_range_idx < lldb.UINT32_MAX:
127                                block_range_start_addr = inlined_block.GetRangeStartAddress(
128                                    block_range_idx)
129                                function_start_load_addr = block_range_start_addr.GetLoadAddress(
130                                    self.target)
131                        if function_start_load_addr == -1:
132                            function_start_load_addr = function.GetStartAddress().GetLoadAddress(self.target)
133                    elif symbol:
134                        self.symbolication += symbol.GetName()
135                        function_start_load_addr = symbol.GetStartAddress().GetLoadAddress(self.target)
136                    else:
137                        self.symbolication = ''
138                        return False
139
140                    # Dump the offset from the current function or symbol if it
141                    # is non zero
142                    function_offset = self.load_addr - function_start_load_addr
143                    if function_offset > 0:
144                        self.symbolication += " + %u" % (function_offset)
145                    elif function_offset < 0:
146                        self.symbolication += " %i (invalid negative offset, file a bug) " % function_offset
147
148                    # Print out any line information if any is available
149                    if line_entry.GetFileSpec():
150                        # Print full source file path in verbose mode
151                        if verbose:
152                            self.symbolication += ' at %s' % line_entry.GetFileSpec()
153                        else:
154                            self.symbolication += ' at %s' % line_entry.GetFileSpec().GetFilename()
155                        self.symbolication += ':%u' % line_entry.GetLine()
156                        column = line_entry.GetColumn()
157                        if column > 0:
158                            self.symbolication += ':%u' % column
159                    return True
160        return False
161
162
163class Section(object):
164    """Class that represents an load address range"""
165    sect_info_regex = re.compile('(?P<name>[^=]+)=(?P<range>.*)')
166    addr_regex = re.compile('^\s*(?P<start>0x[0-9A-Fa-f]+)\s*$')
167    range_regex = re.compile(
168        '^\s*(?P<start>0x[0-9A-Fa-f]+)\s*(?P<op>[-+])\s*(?P<end>0x[0-9A-Fa-f]+)\s*$')
169
170    def __init__(self, start_addr=None, end_addr=None, name=None):
171        self.start_addr = start_addr
172        self.end_addr = end_addr
173        self.name = name
174
175    @classmethod
176    def InitWithSBTargetAndSBSection(cls, target, section):
177        sect_load_addr = section.GetLoadAddress(target)
178        if sect_load_addr != lldb.LLDB_INVALID_ADDRESS:
179            obj = cls(
180                sect_load_addr,
181                sect_load_addr +
182                section.size,
183                section.name)
184            return obj
185        else:
186            return None
187
188    def contains(self, addr):
189        return self.start_addr <= addr and addr < self.end_addr
190
191    def set_from_string(self, s):
192        match = self.sect_info_regex.match(s)
193        if match:
194            self.name = match.group('name')
195            range_str = match.group('range')
196            addr_match = self.addr_regex.match(range_str)
197            if addr_match:
198                self.start_addr = int(addr_match.group('start'), 16)
199                self.end_addr = None
200                return True
201
202            range_match = self.range_regex.match(range_str)
203            if range_match:
204                self.start_addr = int(range_match.group('start'), 16)
205                self.end_addr = int(range_match.group('end'), 16)
206                op = range_match.group('op')
207                if op == '+':
208                    self.end_addr += self.start_addr
209                return True
210        print('error: invalid section info string "%s"' % s)
211        print('Valid section info formats are:')
212        print('Format                Example                    Description')
213        print('--------------------- -----------------------------------------------')
214        print('<name>=<base>        __TEXT=0x123000             Section from base address only')
215        print('<name>=<base>-<end>  __TEXT=0x123000-0x124000    Section from base address and end address')
216        print('<name>=<base>+<size> __TEXT=0x123000+0x1000      Section from base address and size')
217        return False
218
219    def __str__(self):
220        if self.name:
221            if self.end_addr is not None:
222                if self.start_addr is not None:
223                    return "%s=[0x%16.16x - 0x%16.16x)" % (
224                        self.name, self.start_addr, self.end_addr)
225            else:
226                if self.start_addr is not None:
227                    return "%s=0x%16.16x" % (self.name, self.start_addr)
228            return self.name
229        return "<invalid>"
230
231
232class Image(object):
233    """A class that represents an executable image and any associated data"""
234
235    def __init__(self, path, uuid=None):
236        self.path = path
237        self.resolved_path = None
238        self.resolved = False
239        self.unavailable = False
240        self.uuid = uuid
241        self.section_infos = list()
242        self.identifier = None
243        self.version = None
244        self.arch = None
245        self.module = None
246        self.symfile = None
247        self.slide = None
248
249    @classmethod
250    def InitWithSBTargetAndSBModule(cls, target, module):
251        '''Initialize this Image object with a module from a target.'''
252        obj = cls(module.file.fullpath, module.uuid)
253        obj.resolved_path = module.platform_file.fullpath
254        obj.resolved = True
255        obj.arch = module.triple
256        for section in module.sections:
257            symb_section = Section.InitWithSBTargetAndSBSection(
258                target, section)
259            if symb_section:
260                obj.section_infos.append(symb_section)
261        obj.arch = module.triple
262        obj.module = module
263        obj.symfile = None
264        obj.slide = None
265        return obj
266
267    def dump(self, prefix):
268        print("%s%s" % (prefix, self))
269
270    def debug_dump(self):
271        print('path = "%s"' % (self.path))
272        print('resolved_path = "%s"' % (self.resolved_path))
273        print('resolved = %i' % (self.resolved))
274        print('unavailable = %i' % (self.unavailable))
275        print('uuid = %s' % (self.uuid))
276        print('section_infos = %s' % (self.section_infos))
277        print('identifier = "%s"' % (self.identifier))
278        print('version = %s' % (self.version))
279        print('arch = %s' % (self.arch))
280        print('module = %s' % (self.module))
281        print('symfile = "%s"' % (self.symfile))
282        print('slide = %i (0x%x)' % (self.slide, self.slide))
283
284    def __str__(self):
285        s = ''
286        if self.uuid:
287            s += "%s " % (self.get_uuid())
288        if self.arch:
289            s += "%s " % (self.arch)
290        if self.version:
291            s += "%s " % (self.version)
292        resolved_path = self.get_resolved_path()
293        if resolved_path:
294            s += "%s " % (resolved_path)
295        for section_info in self.section_infos:
296            s += ", %s" % (section_info)
297        if self.slide is not None:
298            s += ', slide = 0x%16.16x' % self.slide
299        return s
300
301    def add_section(self, section):
302        # print "added '%s' to '%s'" % (section, self.path)
303        self.section_infos.append(section)
304
305    def get_section_containing_load_addr(self, load_addr):
306        for section_info in self.section_infos:
307            if section_info.contains(load_addr):
308                return section_info
309        return None
310
311    def get_resolved_path(self):
312        if self.resolved_path:
313            return self.resolved_path
314        elif self.path:
315            return self.path
316        return None
317
318    def get_resolved_path_basename(self):
319        path = self.get_resolved_path()
320        if path:
321            return os.path.basename(path)
322        return None
323
324    def symfile_basename(self):
325        if self.symfile:
326            return os.path.basename(self.symfile)
327        return None
328
329    def has_section_load_info(self):
330        return self.section_infos or self.slide is not None
331
332    def load_module(self, target):
333        if self.unavailable:
334            return None  # We already warned that we couldn't find this module, so don't return an error string
335        # Load this module into "target" using the section infos to
336        # set the section load addresses
337        if self.has_section_load_info():
338            if target:
339                if self.module:
340                    if self.section_infos:
341                        num_sections_loaded = 0
342                        for section_info in self.section_infos:
343                            if section_info.name:
344                                section = self.module.FindSection(
345                                    section_info.name)
346                                if section:
347                                    error = target.SetSectionLoadAddress(
348                                        section, section_info.start_addr)
349                                    if error.Success():
350                                        num_sections_loaded += 1
351                                    else:
352                                        return 'error: %s' % error.GetCString()
353                                else:
354                                    return 'error: unable to find the section named "%s"' % section_info.name
355                            else:
356                                return 'error: unable to find "%s" section in "%s"' % (
357                                    range.name, self.get_resolved_path())
358                        if num_sections_loaded == 0:
359                            return 'error: no sections were successfully loaded'
360                    else:
361                        err = target.SetModuleLoadAddress(
362                            self.module, self.slide)
363                        if err.Fail():
364                            return err.GetCString()
365                    return None
366                else:
367                    return 'error: invalid module'
368            else:
369                return 'error: invalid target'
370        else:
371            return 'error: no section infos'
372
373    def add_module(self, target):
374        '''Add the Image described in this object to "target" and load the sections if "load" is True.'''
375        if target:
376            # Try and find using UUID only first so that paths need not match
377            # up
378            uuid_str = self.get_normalized_uuid_string()
379            if uuid_str:
380                self.module = target.AddModule(None, None, uuid_str)
381            if not self.module:
382                self.locate_module_and_debug_symbols()
383                if self.unavailable:
384                    return None
385                resolved_path = self.get_resolved_path()
386                self.module = target.AddModule(
387                    resolved_path, self.arch, uuid_str, self.symfile)
388            if not self.module:
389                return 'error: unable to get module for (%s) "%s"' % (
390                    self.arch, self.get_resolved_path())
391            if self.has_section_load_info():
392                return self.load_module(target)
393            else:
394                return None  # No sections, the module was added to the target, so success
395        else:
396            return 'error: invalid target'
397
398    def locate_module_and_debug_symbols(self):
399        # By default, just use the paths that were supplied in:
400        # self.path
401        # self.resolved_path
402        # self.module
403        # self.symfile
404        # Subclasses can inherit from this class and override this function
405        self.resolved = True
406        return True
407
408    def get_uuid(self):
409        if not self.uuid and self.module:
410            self.uuid = uuid.UUID(self.module.GetUUIDString())
411        return self.uuid
412
413    def get_normalized_uuid_string(self):
414        if self.uuid:
415            return str(self.uuid).upper()
416        return None
417
418    def create_target(self):
419        '''Create a target using the information in this Image object.'''
420        if self.unavailable:
421            return None
422
423        if self.locate_module_and_debug_symbols():
424            resolved_path = self.get_resolved_path()
425            path_spec = lldb.SBFileSpec(resolved_path)
426            #result.PutCString ('plist[%s] = %s' % (uuid, self.plist))
427            error = lldb.SBError()
428            target = lldb.debugger.CreateTarget(
429                resolved_path, self.arch, None, False, error)
430            if target:
431                self.module = target.FindModule(path_spec)
432                if self.has_section_load_info():
433                    err = self.load_module(target)
434                    if err:
435                        print('ERROR: ', err)
436                return target
437            else:
438                print('error: unable to create a valid target for (%s) "%s"' % (self.arch, self.path))
439        else:
440            print('error: unable to locate main executable (%s) "%s"' % (self.arch, self.path))
441        return None
442
443
444class Symbolicator(object):
445
446    def __init__(self):
447        """A class the represents the information needed to symbolicate addresses in a program"""
448        self.target = None
449        self.images = list()  # a list of images to be used when symbolicating
450        self.addr_mask = 0xffffffffffffffff
451
452    @classmethod
453    def InitWithSBTarget(cls, target):
454        obj = cls()
455        obj.target = target
456        obj.images = list()
457        triple = target.triple
458        if triple:
459            arch = triple.split('-')[0]
460            if "arm" in arch:
461                obj.addr_mask = 0xfffffffffffffffe
462
463        for module in target.modules:
464            image = Image.InitWithSBTargetAndSBModule(target, module)
465            obj.images.append(image)
466        return obj
467
468    def __str__(self):
469        s = "Symbolicator:\n"
470        if self.target:
471            s += "Target = '%s'\n" % (self.target)
472            s += "Target modules:\n"
473            for m in self.target.modules:
474                s += str(m) + "\n"
475        s += "Images:\n"
476        for image in self.images:
477            s += '    %s\n' % (image)
478        return s
479
480    def find_images_with_identifier(self, identifier):
481        images = list()
482        for image in self.images:
483            if image.identifier == identifier:
484                images.append(image)
485        if len(images) == 0:
486            regex_text = '^.*\.%s$' % (re.escape(identifier))
487            regex = re.compile(regex_text)
488            for image in self.images:
489                if regex.match(image.identifier):
490                    images.append(image)
491        return images
492
493    def find_image_containing_load_addr(self, load_addr):
494        for image in self.images:
495            if image.get_section_containing_load_addr(load_addr):
496                return image
497        return None
498
499    def create_target(self):
500        if self.target:
501            return self.target
502
503        if self.images:
504            for image in self.images:
505                self.target = image.create_target()
506                if self.target:
507                    if self.target.GetAddressByteSize() == 4:
508                        triple = self.target.triple
509                        if triple:
510                            arch = triple.split('-')[0]
511                            if "arm" in arch:
512                                self.addr_mask = 0xfffffffffffffffe
513                    return self.target
514        return None
515
516    def symbolicate(self, load_addr, verbose=False):
517        if not self.target:
518            self.create_target()
519        if self.target:
520            live_process = False
521            process = self.target.process
522            if process:
523                state = process.state
524                if state > lldb.eStateUnloaded and state < lldb.eStateDetached:
525                    live_process = True
526            # If we don't have a live process, we can attempt to find the image
527            # that a load address belongs to and lazily load its module in the
528            # target, but we shouldn't do any of this if we have a live process
529            if not live_process:
530                image = self.find_image_containing_load_addr(load_addr)
531                if image:
532                    image.add_module(self.target)
533            symbolicated_address = Address(self.target, load_addr)
534            if symbolicated_address.symbolicate(verbose):
535                if symbolicated_address.so_addr:
536                    symbolicated_addresses = list()
537                    symbolicated_addresses.append(symbolicated_address)
538                    # See if we were able to reconstruct anything?
539                    while True:
540                        inlined_parent_so_addr = lldb.SBAddress()
541                        inlined_parent_sym_ctx = symbolicated_address.sym_ctx.GetParentOfInlinedScope(
542                            symbolicated_address.so_addr, inlined_parent_so_addr)
543                        if not inlined_parent_sym_ctx:
544                            break
545                        if not inlined_parent_so_addr:
546                            break
547
548                        symbolicated_address = Address(
549                            self.target, inlined_parent_so_addr.GetLoadAddress(
550                                self.target))
551                        symbolicated_address.sym_ctx = inlined_parent_sym_ctx
552                        symbolicated_address.so_addr = inlined_parent_so_addr
553                        symbolicated_address.symbolicate(verbose)
554
555                        # push the new frame onto the new frame stack
556                        symbolicated_addresses.append(symbolicated_address)
557
558                    if symbolicated_addresses:
559                        return symbolicated_addresses
560        else:
561            print('error: no target in Symbolicator')
562        return None
563
564
565def disassemble_instructions(
566        target,
567        instructions,
568        pc,
569        insts_before_pc,
570        insts_after_pc,
571        non_zeroeth_frame):
572    lines = list()
573    pc_index = -1
574    comment_column = 50
575    for inst_idx, inst in enumerate(instructions):
576        inst_pc = inst.GetAddress().GetLoadAddress(target)
577        if pc == inst_pc:
578            pc_index = inst_idx
579        mnemonic = inst.GetMnemonic(target)
580        operands = inst.GetOperands(target)
581        comment = inst.GetComment(target)
582        #data = inst.GetData (target)
583        lines.append("%#16.16x: %8s %s" % (inst_pc, mnemonic, operands))
584        if comment:
585            line_len = len(lines[-1])
586            if line_len < comment_column:
587                lines[-1] += ' ' * (comment_column - line_len)
588                lines[-1] += "; %s" % comment
589
590    if pc_index >= 0:
591        # If we are disassembling the non-zeroeth frame, we need to backup the
592        # PC by 1
593        if non_zeroeth_frame and pc_index > 0:
594            pc_index = pc_index - 1
595        if insts_before_pc == -1:
596            start_idx = 0
597        else:
598            start_idx = pc_index - insts_before_pc
599        if start_idx < 0:
600            start_idx = 0
601        if insts_before_pc == -1:
602            end_idx = inst_idx
603        else:
604            end_idx = pc_index + insts_after_pc
605        if end_idx > inst_idx:
606            end_idx = inst_idx
607        for i in range(start_idx, end_idx + 1):
608            if i == pc_index:
609                print(' -> ', lines[i])
610            else:
611                print('    ', lines[i])
612
613
614def print_module_section_data(section):
615    print(section)
616    section_data = section.GetSectionData()
617    if section_data:
618        ostream = lldb.SBStream()
619        section_data.GetDescription(ostream, section.GetFileAddress())
620        print(ostream.GetData())
621
622
623def print_module_section(section, depth):
624    print(section)
625    if depth > 0:
626        num_sub_sections = section.GetNumSubSections()
627        for sect_idx in range(num_sub_sections):
628            print_module_section(
629                section.GetSubSectionAtIndex(sect_idx), depth - 1)
630
631
632def print_module_sections(module, depth):
633    for sect in module.section_iter():
634        print_module_section(sect, depth)
635
636
637def print_module_symbols(module):
638    for sym in module:
639        print(sym)
640
641
642def Symbolicate(command_args):
643
644    usage = "usage: %prog [options] <addr1> [addr2 ...]"
645    description = '''Symbolicate one or more addresses using LLDB's python scripting API..'''
646    parser = optparse.OptionParser(
647        description=description,
648        prog='crashlog.py',
649        usage=usage)
650    parser.add_option(
651        '-v',
652        '--verbose',
653        action='store_true',
654        dest='verbose',
655        help='display verbose debug info',
656        default=False)
657    parser.add_option(
658        '-p',
659        '--platform',
660        type='string',
661        metavar='platform',
662        dest='platform',
663        help='Specify the platform to use when creating the debug target. Valid values include "localhost", "darwin-kernel", "ios-simulator", "remote-freebsd", "remote-macosx", "remote-ios", "remote-linux".')
664    parser.add_option(
665        '-f',
666        '--file',
667        type='string',
668        metavar='file',
669        dest='file',
670        help='Specify a file to use when symbolicating')
671    parser.add_option(
672        '-a',
673        '--arch',
674        type='string',
675        metavar='arch',
676        dest='arch',
677        help='Specify a architecture to use when symbolicating')
678    parser.add_option(
679        '-s',
680        '--slide',
681        type='int',
682        metavar='slide',
683        dest='slide',
684        help='Specify the slide to use on the file specified with the --file option',
685        default=None)
686    parser.add_option(
687        '--section',
688        type='string',
689        action='append',
690        dest='section_strings',
691        help='specify <sect-name>=<start-addr> or <sect-name>=<start-addr>-<end-addr>')
692    try:
693        (options, args) = parser.parse_args(command_args)
694    except:
695        return
696    symbolicator = Symbolicator()
697    images = list()
698    if options.file:
699        image = Image(options.file)
700        image.arch = options.arch
701        # Add any sections that were specified with one or more --section
702        # options
703        if options.section_strings:
704            for section_str in options.section_strings:
705                section = Section()
706                if section.set_from_string(section_str):
707                    image.add_section(section)
708                else:
709                    sys.exit(1)
710        if options.slide is not None:
711            image.slide = options.slide
712        symbolicator.images.append(image)
713
714    target = symbolicator.create_target()
715    if options.verbose:
716        print(symbolicator)
717    if target:
718        for addr_str in args:
719            addr = int(addr_str, 0)
720            symbolicated_addrs = symbolicator.symbolicate(
721                addr, options.verbose)
722            for symbolicated_addr in symbolicated_addrs:
723                print(symbolicated_addr)
724            print()
725    else:
726        print('error: no target for %s' % (symbolicator))
727
728if __name__ == '__main__':
729    # Create a new debugger instance
730    lldb.debugger = lldb.SBDebugger.Create()
731    Symbolicate(sys.argv[1:])
732