1#!/usr/bin/python
2
3#----------------------------------------------------------------------
4# Be sure to add the python path that points to the LLDB shared library.
5#
6# To use this in the embedded python interpreter using "lldb":
7#
8#   cd /path/containing/crashlog.py
9#   lldb
10#   (lldb) script import crashlog
11#   "crashlog" command installed, type "crashlog --help" for detailed help
12#   (lldb) crashlog ~/Library/Logs/DiagnosticReports/a.crash
13#
14# The benefit of running the crashlog command inside lldb in the
15# embedded python interpreter is when the command completes, there
16# will be a target with all of the files loaded at the locations
17# described in the crash log. Only the files that have stack frames
18# in the backtrace will be loaded unless the "--load-all" option
19# has been specified. This allows users to explore the program in the
20# state it was in right at crash time.
21#
22# On MacOSX csh, tcsh:
23#   ( setenv PYTHONPATH /path/to/LLDB.framework/Resources/Python ; ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash )
24#
25# On MacOSX sh, bash:
26#   PYTHONPATH=/path/to/LLDB.framework/Resources/Python ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash
27#----------------------------------------------------------------------
28
29import lldb
30import commands
31import optparse
32import os
33import plistlib
34import pprint # pp = pprint.PrettyPrinter(indent=4); pp.pprint(command_args)
35import re
36import shlex
37import sys
38import time
39import uuid
40import symbolication
41
42PARSE_MODE_NORMAL = 0
43PARSE_MODE_THREAD = 1
44PARSE_MODE_IMAGES = 2
45PARSE_MODE_THREGS = 3
46PARSE_MODE_SYSTEM = 4
47
48class CrashLog(symbolication.Symbolicator):
49    """Class that does parses darwin crash logs"""
50    thread_state_regex = re.compile('^Thread ([0-9]+) crashed with')
51    thread_regex = re.compile('^Thread ([0-9]+)([^:]*):(.*)')
52    frame_regex = re.compile('^([0-9]+) +([^ ]+) *\t(0x[0-9a-fA-F]+) +(.*)')
53    image_regex_uuid = re.compile('(0x[0-9a-fA-F]+)[- ]+(0x[0-9a-fA-F]+) +[+]?([^ ]+) +([^<]+)<([-0-9a-fA-F]+)> (.*)');
54    image_regex_no_uuid = re.compile('(0x[0-9a-fA-F]+)[- ]+(0x[0-9a-fA-F]+) +[+]?([^ ]+) +([^/]+)/(.*)');
55    empty_line_regex = re.compile('^$')
56
57    class Thread:
58        """Class that represents a thread in a darwin crash log"""
59        def __init__(self, index):
60            self.index = index
61            self.frames = list()
62            self.registers = dict()
63            self.reason = None
64            self.queue = None
65
66        def dump(self, prefix):
67            print "%sThread[%u] %s" % (prefix, self.index, self.reason)
68            if self.frames:
69                print "%s  Frames:" % (prefix)
70                for frame in self.frames:
71                    frame.dump(prefix + '    ')
72            if self.registers:
73                print "%s  Registers:" % (prefix)
74                for reg in self.registers.keys():
75                    print "%s    %-5s = %#16.16x" % (prefix, reg, self.registers[reg])
76
77        def did_crash(self):
78            return self.reason != None
79
80        def __str__(self):
81            s = "Thread[%u]" % self.index
82            if self.reason:
83                s += ' %s' % self.reason
84            return s
85
86
87    class Frame:
88        """Class that represents a stack frame in a thread in a darwin crash log"""
89        def __init__(self, index, pc, description):
90            self.pc = pc
91            self.description = description
92            self.index = index
93
94        def __str__(self):
95            if self.description:
96                return "[%3u] 0x%16.16x %s" % (self.index, self.pc, self.description)
97            else:
98                return "[%3u] 0x%16.16x" % (self.index, self.pc)
99
100    class DarwinImage(symbolication.Image):
101        """Class that represents a binary images in a darwin crash log"""
102        dsymForUUIDBinary = os.path.expanduser('~rc/bin/dsymForUUID')
103        if not os.path.exists(dsymForUUIDBinary):
104            dsymForUUIDBinary = commands.getoutput('which dsymForUUID')
105
106        dwarfdump_uuid_regex = re.compile('UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*')
107
108        def __init__(self, text_addr_lo, text_addr_hi, identifier, version, uuid, path):
109            symbolication.Image.__init__(self, path, uuid);
110            self.add_section (symbolication.Section(text_addr_lo, text_addr_hi, "__TEXT"))
111            self.identifier = identifier
112            self.version = version
113
114        def locate_module_and_debug_symbols(self):
115            if self.resolved_path:
116                # Don't load a module twice...
117                return 0
118            print 'Locating %s %s...' % (self.uuid, self.path),
119            if os.path.exists(self.dsymForUUIDBinary):
120                dsym_for_uuid_command = '%s %s' % (self.dsymForUUIDBinary, self.uuid)
121                s = commands.getoutput(dsym_for_uuid_command)
122                if s:
123                    plist_root = plistlib.readPlistFromString (s)
124                    if plist_root:
125                        plist = plist_root[self.uuid]
126                        if plist:
127                            if 'DBGArchitecture' in plist:
128                                self.arch = plist['DBGArchitecture']
129                            if 'DBGDSYMPath' in plist:
130                                self.symfile = os.path.realpath(plist['DBGDSYMPath'])
131                            if 'DBGSymbolRichExecutable' in plist:
132                                self.resolved_path = os.path.expanduser (plist['DBGSymbolRichExecutable'])
133            if not self.resolved_path and os.path.exists(self.path):
134                dwarfdump_cmd_output = commands.getoutput('dwarfdump --uuid "%s"' % self.path)
135                self_uuid = uuid.UUID(self.uuid)
136                for line in dwarfdump_cmd_output.splitlines():
137                    match = self.dwarfdump_uuid_regex.search (line)
138                    if match:
139                        dwarf_uuid_str = match.group(1)
140                        dwarf_uuid = uuid.UUID(dwarf_uuid_str)
141                        if self_uuid == dwarf_uuid:
142                            self.resolved_path = self.path
143                            self.arch = match.group(2)
144                            break;
145                if not self.resolved_path:
146                    print "error: file %s '%s' doesn't match the UUID in the installed file" % (self.uuid, self.path)
147                    return 0
148            if (self.resolved_path and os.path.exists(self.resolved_path)) or (self.path and os.path.exists(self.path)):
149                print 'ok'
150                if self.path != self.resolved_path:
151                    print '  exe = "%s"' % self.resolved_path
152                if self.symfile:
153                    print ' dsym = "%s"' % self.symfile
154                return 1
155            else:
156                return 0
157
158
159
160    def __init__(self, path):
161        """CrashLog constructor that take a path to a darwin crash log file"""
162        symbolication.Symbolicator.__init__(self);
163        self.path = os.path.expanduser(path);
164        self.info_lines = list()
165        self.system_profile = list()
166        self.threads = list()
167        self.idents = list() # A list of the required identifiers for doing all stack backtraces
168        self.crashed_thread_idx = -1
169        self.version = -1
170        self.error = None
171        # With possible initial component of ~ or ~user replaced by that user's home directory.
172        try:
173            f = open(self.path)
174        except IOError:
175            self.error = 'error: cannot open "%s"' % self.path
176            return
177
178        self.file_lines = f.read().splitlines()
179        parse_mode = PARSE_MODE_NORMAL
180        thread = None
181        for line in self.file_lines:
182            # print line
183            line_len = len(line)
184            if line_len == 0:
185                if thread:
186                    if parse_mode == PARSE_MODE_THREAD:
187                        if thread.index == self.crashed_thread_idx:
188                            thread.reason = ''
189                            if self.thread_exception:
190                                thread.reason += self.thread_exception
191                            if self.thread_exception_data:
192                                thread.reason += " (%s)" % self.thread_exception_data
193                        self.threads.append(thread)
194                    thread = None
195                else:
196                    # only append an extra empty line if the previous line
197                    # in the info_lines wasn't empty
198                    if len(self.info_lines) > 0 and len(self.info_lines[-1]):
199                        self.info_lines.append(line)
200                parse_mode = PARSE_MODE_NORMAL
201                # print 'PARSE_MODE_NORMAL'
202            elif parse_mode == PARSE_MODE_NORMAL:
203                if line.startswith ('Process:'):
204                    (self.process_name, pid_with_brackets) = line[8:].strip().split()
205                    self.process_id = pid_with_brackets.strip('[]')
206                elif line.startswith ('Path:'):
207                    self.process_path = line[5:].strip()
208                elif line.startswith ('Identifier:'):
209                    self.process_identifier = line[11:].strip()
210                elif line.startswith ('Version:'):
211                    (self.process_version, compatability_version) = line[8:].strip().split()
212                    self.process_compatability_version = compatability_version.strip('()')
213                elif line.startswith ('Parent Process:'):
214                    (self.parent_process_name, pid_with_brackets) = line[15:].strip().split()
215                    self.parent_process_id = pid_with_brackets.strip('[]')
216                elif line.startswith ('Exception Type:'):
217                    self.thread_exception = line[15:].strip()
218                    continue
219                elif line.startswith ('Exception Codes:'):
220                    self.thread_exception_data = line[16:].strip()
221                    continue
222                elif line.startswith ('Crashed Thread:'):
223                    self.crashed_thread_idx = int(line[15:].strip().split()[0])
224                    continue
225                elif line.startswith ('Report Version:'):
226                    self.version = int(line[15:].strip())
227                    continue
228                elif line.startswith ('System Profile:'):
229                    parse_mode = PARSE_MODE_SYSTEM
230                    continue
231                elif (line.startswith ('Interval Since Last Report:') or
232                      line.startswith ('Crashes Since Last Report:') or
233                      line.startswith ('Per-App Interval Since Last Report:') or
234                      line.startswith ('Per-App Crashes Since Last Report:') or
235                      line.startswith ('Sleep/Wake UUID:') or
236                      line.startswith ('Anonymous UUID:')):
237                    # ignore these
238                    continue
239                elif line.startswith ('Thread'):
240                    thread_state_match = self.thread_state_regex.search (line)
241                    if thread_state_match:
242                        thread_state_match = self.thread_regex.search (line)
243                        thread_idx = int(thread_state_match.group(1))
244                        parse_mode = PARSE_MODE_THREGS
245                        thread = self.threads[thread_idx]
246                    else:
247                        thread_match = self.thread_regex.search (line)
248                        if thread_match:
249                            # print 'PARSE_MODE_THREAD'
250                            parse_mode = PARSE_MODE_THREAD
251                            thread_idx = int(thread_match.group(1))
252                            thread = CrashLog.Thread(thread_idx)
253                    continue
254                elif line.startswith ('Binary Images:'):
255                    parse_mode = PARSE_MODE_IMAGES
256                    continue
257                self.info_lines.append(line.strip())
258            elif parse_mode == PARSE_MODE_THREAD:
259                frame_match = self.frame_regex.search(line)
260                if frame_match:
261                    ident = frame_match.group(2)
262                    if not ident in self.idents:
263                        self.idents.append(ident)
264                    thread.frames.append (CrashLog.Frame(int(frame_match.group(1)), int(frame_match.group(3), 0), frame_match.group(4)))
265                else:
266                    print 'error: frame regex failed for line: "%s"' % line
267            elif parse_mode == PARSE_MODE_IMAGES:
268                image_match = self.image_regex_uuid.search (line)
269                if image_match:
270                    image = CrashLog.DarwinImage (int(image_match.group(1),0),
271                                                  int(image_match.group(2),0),
272                                                  image_match.group(3).strip(),
273                                                  image_match.group(4).strip(),
274                                                  image_match.group(5),
275                                                  image_match.group(6))
276                    self.images.append (image)
277                else:
278                    image_match = self.image_regex_no_uuid.search (line)
279                    if image_match:
280                        image = CrashLog.DarwinImage (int(image_match.group(1),0),
281                                                      int(image_match.group(2),0),
282                                                      image_match.group(3).strip(),
283                                                      image_match.group(4).strip(),
284                                                      None,
285                                                      image_match.group(5))
286                        self.images.append (image)
287                    else:
288                        print "error: image regex failed for: %s" % line
289
290            elif parse_mode == PARSE_MODE_THREGS:
291                stripped_line = line.strip()
292                reg_values = stripped_line.split('  ')
293                for reg_value in reg_values:
294                    (reg, value) = reg_value.split(': ')
295                    thread.registers[reg.strip()] = int(value, 0)
296            elif parse_mode == PARSE_MODE_SYSTEM:
297                self.system_profile.append(line)
298        f.close()
299
300    def dump(self):
301        print "Crash Log File: %s" % (self.path)
302        print "\nThreads:"
303        for thread in self.threads:
304            thread.dump('  ')
305        print "\nImages:"
306        for image in self.images:
307            image.dump('  ')
308
309    def find_image_with_identifier(self, identifier):
310        for image in self.images:
311            if image.identifier == identifier:
312                return image
313        return None
314
315    def create_target(self):
316        #print 'crashlog.create_target()...'
317        target = symbolication.Symbolicator.create_target(self)
318        if target:
319            return target
320        # We weren't able to open the main executable as, but we can still symbolicate
321        print 'crashlog.create_target()...2'
322        if self.idents:
323            for ident in self.idents:
324                image = self.find_image_with_identifier (ident)
325                if image:
326                    target = image.create_target ()
327                    if target:
328                        return target # success
329        print 'crashlog.create_target()...3'
330        for image in self.images:
331            target = image.create_target ()
332            if target:
333                return target # success
334        print 'crashlog.create_target()...4'
335        print 'error: unable to locate any executables from the crash log'
336        return None
337
338
339def usage():
340    print "Usage: lldb-symbolicate.py [-n name] executable-image"
341    sys.exit(0)
342
343def Symbolicate(debugger, command, result, dict):
344    try:
345        SymbolicateCrashLog (shlex.split(command))
346    except:
347        result.PutCString ("error: python exception %s" % sys.exc_info()[0])
348
349def SymbolicateCrashLog(command_args):
350    usage = "usage: %prog [options] <FILE> [FILE ...]"
351    description='''Symbolicate one or more darwin crash log files to provide source file and line information,
352inlined stack frames back to the concrete functions, and disassemble the location of the crash
353for the first frame of the crashed thread.
354If this script is imported into the LLDB command interpreter, a "crashlog" command will be added to the interpreter
355for use at the LLDB command line. After a crash log has been parsed and symbolicated, a target will have been
356created that has all of the shared libraries loaded at the load addresses found in the crash log file. This allows
357you to explore the program as if it were stopped at the locations described in the crash log and functions can
358be disassembled and lookups can be performed using the addresses found in the crash log.'''
359    parser = optparse.OptionParser(description=description, prog='crashlog.py',usage=usage)
360    parser.add_option('--platform', type='string', metavar='platform', dest='platform', help='specify one platform by name')
361    parser.add_option('-v', '--verbose', action='store_true', dest='verbose', help='display verbose debug info', default=False)
362    parser.add_option('--no-images', action='store_false', dest='show_images', help='don\'t show images in stack frames', default=True)
363    parser.add_option('-a', '--load-all', action='store_true', dest='load_all_images', help='load all executable images, not just the images found in the crashed stack frames', default=False)
364    parser.add_option('--image-list', action='store_true', dest='dump_image_list', help='show image list', default=False)
365    parser.add_option('-g', '--debug-delay', type='int', dest='debug_delay', metavar='NSEC', help='pause for NSEC seconds for debugger', default=0)
366    parser.add_option('-c', '--crashed-only', action='store_true', dest='crashed_only', help='only symbolicate the crashed thread', default=False)
367    parser.add_option('-d', '--disasm-depth', type='int', dest='disassemble_depth', help='set the depth in stack frames that should be disassembled (default is 1)', default=1)
368    parser.add_option('-D', '--disasm-all', action='store_true', dest='disassemble_all_threads', help='enabled disassembly of frames on all threads (not just the crashed thread)', default=False)
369    parser.add_option('-B', '--disasm-before', type='int', dest='disassemble_before', help='the number of instructions to disassemble before the frame PC', default=4)
370    parser.add_option('-A', '--disasm-after', type='int', dest='disassemble_after', help='the number of instructions to disassemble after the frame PC', default=4)
371    loaded_addresses = False
372    try:
373        (options, args) = parser.parse_args(command_args)
374    except:
375        return
376
377    if options.verbose:
378        print 'command_args = %s' % command_args
379        print 'options', options
380        print 'args', args
381
382    if options.debug_delay > 0:
383        print "Waiting %u seconds for debugger to attach..." % options.debug_delay
384        time.sleep(options.debug_delay)
385    error = lldb.SBError()
386    if args:
387        for crash_log_file in args:
388            crash_log = CrashLog(crash_log_file)
389
390            #pp = pprint.PrettyPrinter(indent=4); pp.pprint(args)
391            if crash_log.error:
392                print crash_log.error
393                return
394            if options.verbose:
395                crash_log.dump()
396            if not crash_log.images:
397                print 'error: no images in crash log'
398                return
399
400            target = crash_log.create_target ()
401            if not target:
402                return
403            exe_module = target.GetModuleAtIndex(0)
404            images_to_load = list()
405            loaded_images = list()
406            if options.load_all_images:
407                # --load-all option was specified, load everything up
408                for image in crash_log.images:
409                    images_to_load.append(image)
410            else:
411                # Only load the images found in stack frames for the crashed threads
412                for ident in crash_log.idents:
413                    images = crash_log.find_images_with_identifier (ident)
414                    if images:
415                        for image in images:
416                            images_to_load.append(image)
417                    else:
418                        print 'error: can\'t find image for identifier "%s"' % ident
419
420            for image in images_to_load:
421                if image in loaded_images:
422                    print "warning: skipping %s loaded at %#16.16x duplicate entry (probably commpage)" % (image.path, image.text_addr_lo)
423                else:
424                    err = image.add_module (target)
425                    if err:
426                        print err
427                    else:
428                        print 'loaded %s' % image
429                        loaded_images.append(image)
430
431            for thread in crash_log.threads:
432                this_thread_crashed = thread.did_crash()
433                if options.crashed_only and this_thread_crashed == False:
434                    continue
435                print "%s" % thread
436                #prev_frame_index = -1
437                for frame_idx, frame in enumerate(thread.frames):
438                    disassemble = (this_thread_crashed or options.disassemble_all_threads) and frame_idx < options.disassemble_depth;
439                    symbolicated_frame_addresses = crash_log.symbolicate (frame.pc)
440                    if symbolicated_frame_addresses:
441                        symbolicated_frame_address_idx = 0
442                        for symbolicated_frame_address in symbolicated_frame_addresses:
443                            print '[%3u] %s' % (frame_idx, symbolicated_frame_address)
444
445                            if symbolicated_frame_address_idx == 0:
446                                if disassemble:
447                                    instructions = symbolicated_frame_address.get_instructions()
448                                    if instructions:
449                                        print
450                                        symbolication.disassemble_instructions (target,
451                                                                                instructions,
452                                                                                frame.pc,
453                                                                                options.disassemble_before,
454                                                                                options.disassemble_after, frame.index > 0)
455                                        print
456                            symbolicated_frame_address_idx += 1
457                    else:
458                        print frame
459                print
460
461            if options.dump_image_list:
462                print "Binary Images:"
463                for image in crash_log.images:
464                    print image
465
466if __name__ == '__main__':
467    # Create a new debugger instance
468    lldb.debugger = lldb.SBDebugger.Create()
469    SymbolicateCrashLog (sys.argv[1:])
470elif lldb.debugger:
471    lldb.debugger.HandleCommand('command script add -f crashlog.Symbolicate crashlog')
472    print '"crashlog" command installed, type "crashlog --help" for detailed help'
473
474