1#!/usr/bin/python
2
3#----------------------------------------------------------------------
4# Be sure to add the python path that points to the LLDB shared library.
5#
6# To use this in the embedded python interpreter using "lldb":
7#
8#   cd /path/containing/crashlog.py
9#   lldb
10#   (lldb) script import crashlog
11#   "crashlog" command installed, type "crashlog --help" for detailed help
12#   (lldb) crashlog ~/Library/Logs/DiagnosticReports/a.crash
13#
14# The benefit of running the crashlog command inside lldb in the
15# embedded python interpreter is when the command completes, there
16# will be a target with all of the files loaded at the locations
17# described in the crash log. Only the files that have stack frames
18# in the backtrace will be loaded unless the "--load-all" option
19# has been specified. This allows users to explore the program in the
20# state it was in right at crash time.
21#
22# On MacOSX csh, tcsh:
23#   ( setenv PYTHONPATH /path/to/LLDB.framework/Resources/Python ; ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash )
24#
25# On MacOSX sh, bash:
26#   PYTHONPATH=/path/to/LLDB.framework/Resources/Python ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash
27#----------------------------------------------------------------------
28
29import lldb
30import commands
31import optparse
32import os
33import plistlib
34import pprint # pp = pprint.PrettyPrinter(indent=4); pp.pprint(command_args)
35import re
36import shlex
37import sys
38import time
39import uuid
40import lldb.utils.symbolication
41
42PARSE_MODE_NORMAL = 0
43PARSE_MODE_THREAD = 1
44PARSE_MODE_IMAGES = 2
45PARSE_MODE_THREGS = 3
46PARSE_MODE_SYSTEM = 4
47
48class CrashLog(lldb.utils.symbolication.Symbolicator):
49    """Class that does parses darwin crash logs"""
50    thread_state_regex = re.compile('^Thread ([0-9]+) crashed with')
51    thread_regex = re.compile('^Thread ([0-9]+)([^:]*):(.*)')
52    frame_regex = re.compile('^([0-9]+) +([^ ]+) *\t(0x[0-9a-fA-F]+) +(.*)')
53    image_regex_uuid = re.compile('(0x[0-9a-fA-F]+)[- ]+(0x[0-9a-fA-F]+) +[+]?([^ ]+) +([^<]+)<([-0-9a-fA-F]+)> (.*)');
54    image_regex_no_uuid = re.compile('(0x[0-9a-fA-F]+)[- ]+(0x[0-9a-fA-F]+) +[+]?([^ ]+) +([^/]+)/(.*)');
55    empty_line_regex = re.compile('^$')
56
57    class Thread:
58        """Class that represents a thread in a darwin crash log"""
59        def __init__(self, index):
60            self.index = index
61            self.frames = list()
62            self.registers = dict()
63            self.reason = None
64            self.queue = None
65
66        def dump(self, prefix):
67            print "%sThread[%u] %s" % (prefix, self.index, self.reason)
68            if self.frames:
69                print "%s  Frames:" % (prefix)
70                for frame in self.frames:
71                    frame.dump(prefix + '    ')
72            if self.registers:
73                print "%s  Registers:" % (prefix)
74                for reg in self.registers.keys():
75                    print "%s    %-5s = %#16.16x" % (prefix, reg, self.registers[reg])
76
77        def did_crash(self):
78            return self.reason != None
79
80        def __str__(self):
81            s = "Thread[%u]" % self.index
82            if self.reason:
83                s += ' %s' % self.reason
84            return s
85
86
87    class Frame:
88        """Class that represents a stack frame in a thread in a darwin crash log"""
89        def __init__(self, index, pc, description):
90            self.pc = pc
91            self.description = description
92            self.index = index
93
94        def __str__(self):
95            if self.description:
96                return "[%3u] 0x%16.16x %s" % (self.index, self.pc, self.description)
97            else:
98                return "[%3u] 0x%16.16x" % (self.index, self.pc)
99
100    class DarwinImage(lldb.utils.symbolication.Image):
101        """Class that represents a binary images in a darwin crash log"""
102        dsymForUUIDBinary = os.path.expanduser('~rc/bin/dsymForUUID')
103        if not os.path.exists(dsymForUUIDBinary):
104            dsymForUUIDBinary = commands.getoutput('which dsymForUUID')
105
106        dwarfdump_uuid_regex = re.compile('UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*')
107
108        def __init__(self, text_addr_lo, text_addr_hi, identifier, version, uuid, path):
109            lldb.utils.symbolication.Image.__init__(self, path, uuid);
110            self.add_section (lldb.utils.symbolication.Section(text_addr_lo, text_addr_hi, "__TEXT"))
111            self.identifier = identifier
112            self.version = version
113
114        def locate_module_and_debug_symbols(self):
115            if self.resolved_path:
116                # Don't load a module twice...
117                return True
118            print 'Getting symbols for %s %s...' % (self.uuid, self.path),
119            if os.path.exists(self.dsymForUUIDBinary):
120                dsym_for_uuid_command = '%s %s' % (self.dsymForUUIDBinary, self.uuid)
121                s = commands.getoutput(dsym_for_uuid_command)
122                if s:
123                    plist_root = plistlib.readPlistFromString (s)
124                    if plist_root:
125                        plist = plist_root[self.uuid]
126                        if plist:
127                            if 'DBGArchitecture' in plist:
128                                self.arch = plist['DBGArchitecture']
129                            if 'DBGDSYMPath' in plist:
130                                self.symfile = os.path.realpath(plist['DBGDSYMPath'])
131                            if 'DBGSymbolRichExecutable' in plist:
132                                self.resolved_path = os.path.expanduser (plist['DBGSymbolRichExecutable'])
133            if not self.resolved_path and os.path.exists(self.path):
134                dwarfdump_cmd_output = commands.getoutput('dwarfdump --uuid "%s"' % self.path)
135                self_uuid = uuid.UUID(self.uuid)
136                for line in dwarfdump_cmd_output.splitlines():
137                    match = self.dwarfdump_uuid_regex.search (line)
138                    if match:
139                        dwarf_uuid_str = match.group(1)
140                        dwarf_uuid = uuid.UUID(dwarf_uuid_str)
141                        if self_uuid == dwarf_uuid:
142                            self.resolved_path = self.path
143                            self.arch = match.group(2)
144                            break;
145                if not self.resolved_path:
146                    print "error: file %s '%s' doesn't match the UUID in the installed file" % (self.uuid, self.path)
147                    return False
148            if (self.resolved_path and os.path.exists(self.resolved_path)) or (self.path and os.path.exists(self.path)):
149                print 'ok'
150                # if self.resolved_path:
151                #     print '  exe = "%s"' % self.resolved_path
152                # if self.symfile:
153                #     print ' dsym = "%s"' % self.symfile
154                return True
155            return False
156
157
158
159    def __init__(self, path):
160        """CrashLog constructor that take a path to a darwin crash log file"""
161        lldb.utils.symbolication.Symbolicator.__init__(self);
162        self.path = os.path.expanduser(path);
163        self.info_lines = list()
164        self.system_profile = list()
165        self.threads = list()
166        self.idents = list() # A list of the required identifiers for doing all stack backtraces
167        self.crashed_thread_idx = -1
168        self.version = -1
169        self.error = None
170        # With possible initial component of ~ or ~user replaced by that user's home directory.
171        try:
172            f = open(self.path)
173        except IOError:
174            self.error = 'error: cannot open "%s"' % self.path
175            return
176
177        self.file_lines = f.read().splitlines()
178        parse_mode = PARSE_MODE_NORMAL
179        thread = None
180        for line in self.file_lines:
181            # print line
182            line_len = len(line)
183            if line_len == 0:
184                if thread:
185                    if parse_mode == PARSE_MODE_THREAD:
186                        if thread.index == self.crashed_thread_idx:
187                            thread.reason = ''
188                            if self.thread_exception:
189                                thread.reason += self.thread_exception
190                            if self.thread_exception_data:
191                                thread.reason += " (%s)" % self.thread_exception_data
192                        self.threads.append(thread)
193                    thread = None
194                else:
195                    # only append an extra empty line if the previous line
196                    # in the info_lines wasn't empty
197                    if len(self.info_lines) > 0 and len(self.info_lines[-1]):
198                        self.info_lines.append(line)
199                parse_mode = PARSE_MODE_NORMAL
200                # print 'PARSE_MODE_NORMAL'
201            elif parse_mode == PARSE_MODE_NORMAL:
202                if line.startswith ('Process:'):
203                    (self.process_name, pid_with_brackets) = line[8:].strip().split()
204                    self.process_id = pid_with_brackets.strip('[]')
205                elif line.startswith ('Path:'):
206                    self.process_path = line[5:].strip()
207                elif line.startswith ('Identifier:'):
208                    self.process_identifier = line[11:].strip()
209                elif line.startswith ('Version:'):
210                    (self.process_version, compatability_version) = line[8:].strip().split()
211                    self.process_compatability_version = compatability_version.strip('()')
212                elif line.startswith ('Parent Process:'):
213                    (self.parent_process_name, pid_with_brackets) = line[15:].strip().split()
214                    self.parent_process_id = pid_with_brackets.strip('[]')
215                elif line.startswith ('Exception Type:'):
216                    self.thread_exception = line[15:].strip()
217                    continue
218                elif line.startswith ('Exception Codes:'):
219                    self.thread_exception_data = line[16:].strip()
220                    continue
221                elif line.startswith ('Crashed Thread:'):
222                    self.crashed_thread_idx = int(line[15:].strip().split()[0])
223                    continue
224                elif line.startswith ('Report Version:'):
225                    self.version = int(line[15:].strip())
226                    continue
227                elif line.startswith ('System Profile:'):
228                    parse_mode = PARSE_MODE_SYSTEM
229                    continue
230                elif (line.startswith ('Interval Since Last Report:') or
231                      line.startswith ('Crashes Since Last Report:') or
232                      line.startswith ('Per-App Interval Since Last Report:') or
233                      line.startswith ('Per-App Crashes Since Last Report:') or
234                      line.startswith ('Sleep/Wake UUID:') or
235                      line.startswith ('Anonymous UUID:')):
236                    # ignore these
237                    continue
238                elif line.startswith ('Thread'):
239                    thread_state_match = self.thread_state_regex.search (line)
240                    if thread_state_match:
241                        thread_state_match = self.thread_regex.search (line)
242                        thread_idx = int(thread_state_match.group(1))
243                        parse_mode = PARSE_MODE_THREGS
244                        thread = self.threads[thread_idx]
245                    else:
246                        thread_match = self.thread_regex.search (line)
247                        if thread_match:
248                            # print 'PARSE_MODE_THREAD'
249                            parse_mode = PARSE_MODE_THREAD
250                            thread_idx = int(thread_match.group(1))
251                            thread = CrashLog.Thread(thread_idx)
252                    continue
253                elif line.startswith ('Binary Images:'):
254                    parse_mode = PARSE_MODE_IMAGES
255                    continue
256                self.info_lines.append(line.strip())
257            elif parse_mode == PARSE_MODE_THREAD:
258                frame_match = self.frame_regex.search(line)
259                if frame_match:
260                    ident = frame_match.group(2)
261                    if not ident in self.idents:
262                        self.idents.append(ident)
263                    thread.frames.append (CrashLog.Frame(int(frame_match.group(1)), int(frame_match.group(3), 0), frame_match.group(4)))
264                else:
265                    print 'error: frame regex failed for line: "%s"' % line
266            elif parse_mode == PARSE_MODE_IMAGES:
267                image_match = self.image_regex_uuid.search (line)
268                if image_match:
269                    image = CrashLog.DarwinImage (int(image_match.group(1),0),
270                                                  int(image_match.group(2),0),
271                                                  image_match.group(3).strip(),
272                                                  image_match.group(4).strip(),
273                                                  image_match.group(5),
274                                                  image_match.group(6))
275                    self.images.append (image)
276                else:
277                    image_match = self.image_regex_no_uuid.search (line)
278                    if image_match:
279                        image = CrashLog.DarwinImage (int(image_match.group(1),0),
280                                                      int(image_match.group(2),0),
281                                                      image_match.group(3).strip(),
282                                                      image_match.group(4).strip(),
283                                                      None,
284                                                      image_match.group(5))
285                        self.images.append (image)
286                    else:
287                        print "error: image regex failed for: %s" % line
288
289            elif parse_mode == PARSE_MODE_THREGS:
290                stripped_line = line.strip()
291                reg_values = stripped_line.split('  ')
292                for reg_value in reg_values:
293                    (reg, value) = reg_value.split(': ')
294                    thread.registers[reg.strip()] = int(value, 0)
295            elif parse_mode == PARSE_MODE_SYSTEM:
296                self.system_profile.append(line)
297        f.close()
298
299    def dump(self):
300        print "Crash Log File: %s" % (self.path)
301        print "\nThreads:"
302        for thread in self.threads:
303            thread.dump('  ')
304        print "\nImages:"
305        for image in self.images:
306            image.dump('  ')
307
308    def find_image_with_identifier(self, identifier):
309        for image in self.images:
310            if image.identifier == identifier:
311                return image
312        return None
313
314    def create_target(self):
315        #print 'crashlog.create_target()...'
316        target = lldb.utils.symbolication.Symbolicator.create_target(self)
317        if target:
318            return target
319        # We weren't able to open the main executable as, but we can still symbolicate
320        print 'crashlog.create_target()...2'
321        if self.idents:
322            for ident in self.idents:
323                image = self.find_image_with_identifier (ident)
324                if image:
325                    target = image.create_target ()
326                    if target:
327                        return target # success
328        print 'crashlog.create_target()...3'
329        for image in self.images:
330            target = image.create_target ()
331            if target:
332                return target # success
333        print 'crashlog.create_target()...4'
334        print 'error: unable to locate any executables from the crash log'
335        return None
336
337
338def usage():
339    print "Usage: lldb-symbolicate.py [-n name] executable-image"
340    sys.exit(0)
341
342def Symbolicate(debugger, command, result, dict):
343    try:
344        SymbolicateCrashLog (shlex.split(command))
345    except:
346        result.PutCString ("error: python exception %s" % sys.exc_info()[0])
347
348def SymbolicateCrashLog(command_args):
349    usage = "usage: %prog [options] <FILE> [FILE ...]"
350    description='''Symbolicate one or more darwin crash log files to provide source file and line information,
351inlined stack frames back to the concrete functions, and disassemble the location of the crash
352for the first frame of the crashed thread.
353If this script is imported into the LLDB command interpreter, a "crashlog" command will be added to the interpreter
354for use at the LLDB command line. After a crash log has been parsed and symbolicated, a target will have been
355created that has all of the shared libraries loaded at the load addresses found in the crash log file. This allows
356you to explore the program as if it were stopped at the locations described in the crash log and functions can
357be disassembled and lookups can be performed using the addresses found in the crash log.'''
358    parser = optparse.OptionParser(description=description, prog='crashlog.py',usage=usage)
359    parser.add_option('--platform', type='string', metavar='platform', dest='platform', help='specify one platform by name')
360    parser.add_option('-v', '--verbose', action='store_true', dest='verbose', help='display verbose debug info', default=False)
361    parser.add_option('--no-images', action='store_false', dest='show_images', help='don\'t show images in stack frames', default=True)
362    parser.add_option('-a', '--load-all', action='store_true', dest='load_all_images', help='load all executable images, not just the images found in the crashed stack frames', default=False)
363    parser.add_option('--image-list', action='store_true', dest='dump_image_list', help='show image list', default=False)
364    parser.add_option('-g', '--debug-delay', type='int', dest='debug_delay', metavar='NSEC', help='pause for NSEC seconds for debugger', default=0)
365    parser.add_option('-c', '--crashed-only', action='store_true', dest='crashed_only', help='only symbolicate the crashed thread', default=False)
366    parser.add_option('-d', '--disasm-depth', type='int', dest='disassemble_depth', help='set the depth in stack frames that should be disassembled (default is 1)', default=1)
367    parser.add_option('-D', '--disasm-all', action='store_true', dest='disassemble_all_threads', help='enabled disassembly of frames on all threads (not just the crashed thread)', default=False)
368    parser.add_option('-B', '--disasm-before', type='int', dest='disassemble_before', help='the number of instructions to disassemble before the frame PC', default=4)
369    parser.add_option('-A', '--disasm-after', type='int', dest='disassemble_after', help='the number of instructions to disassemble after the frame PC', default=4)
370    loaded_addresses = False
371    try:
372        (options, args) = parser.parse_args(command_args)
373    except:
374        return
375
376    if options.verbose:
377        print 'command_args = %s' % command_args
378        print 'options', options
379        print 'args', args
380
381    if options.debug_delay > 0:
382        print "Waiting %u seconds for debugger to attach..." % options.debug_delay
383        time.sleep(options.debug_delay)
384    error = lldb.SBError()
385    if args:
386        for crash_log_file in args:
387            crash_log = CrashLog(crash_log_file)
388
389            #pp = pprint.PrettyPrinter(indent=4); pp.pprint(args)
390            if crash_log.error:
391                print crash_log.error
392                return
393            if options.verbose:
394                crash_log.dump()
395            if not crash_log.images:
396                print 'error: no images in crash log'
397                return
398
399            target = crash_log.create_target ()
400            if not target:
401                return
402            exe_module = target.GetModuleAtIndex(0)
403            images_to_load = list()
404            loaded_images = list()
405            if options.load_all_images:
406                # --load-all option was specified, load everything up
407                for image in crash_log.images:
408                    images_to_load.append(image)
409            else:
410                # Only load the images found in stack frames for the crashed threads
411                for ident in crash_log.idents:
412                    images = crash_log.find_images_with_identifier (ident)
413                    if images:
414                        for image in images:
415                            images_to_load.append(image)
416                    else:
417                        print 'error: can\'t find image for identifier "%s"' % ident
418
419            for image in images_to_load:
420                if image in loaded_images:
421                    print "warning: skipping %s loaded at %#16.16x duplicate entry (probably commpage)" % (image.path, image.text_addr_lo)
422                else:
423                    err = image.add_module (target)
424                    if err:
425                        print err
426                    else:
427                        #print 'loaded %s' % image
428                        loaded_images.append(image)
429
430            for thread in crash_log.threads:
431                this_thread_crashed = thread.did_crash()
432                if options.crashed_only and this_thread_crashed == False:
433                    continue
434                print "%s" % thread
435                #prev_frame_index = -1
436                for frame_idx, frame in enumerate(thread.frames):
437                    disassemble = (this_thread_crashed or options.disassemble_all_threads) and frame_idx < options.disassemble_depth;
438                    symbolicated_frame_addresses = crash_log.symbolicate (frame.pc)
439                    if symbolicated_frame_addresses:
440                        symbolicated_frame_address_idx = 0
441                        for symbolicated_frame_address in symbolicated_frame_addresses:
442                            print '[%3u] %s' % (frame_idx, symbolicated_frame_address)
443
444                            if symbolicated_frame_address_idx == 0:
445                                if disassemble:
446                                    instructions = symbolicated_frame_address.get_instructions()
447                                    if instructions:
448                                        print
449                                        lldb.utils.symbolication.disassemble_instructions (target,
450                                                                                           instructions,
451                                                                                           frame.pc,
452                                                                                           options.disassemble_before,
453                                                                                           options.disassemble_after, frame.index > 0)
454                                        print
455                            symbolicated_frame_address_idx += 1
456                    else:
457                        print frame
458                print
459
460            if options.dump_image_list:
461                print "Binary Images:"
462                for image in crash_log.images:
463                    print image
464
465if __name__ == '__main__':
466    # Create a new debugger instance
467    lldb.debugger = lldb.SBDebugger.Create()
468    SymbolicateCrashLog (sys.argv[1:])
469elif lldb.debugger:
470    lldb.debugger.HandleCommand('command script add -f lldb.macosx.crashlog.Symbolicate crashlog')
471    print '"crashlog" command installed, type "crashlog --help" for detailed help'
472
473