1#!/usr/bin/env python3
2
3#----------------------------------------------------------------------
4# Be sure to add the python path that points to the LLDB shared library.
5#
6# To use this in the embedded python interpreter using "lldb":
7#
8#   cd /path/containing/crashlog.py
9#   lldb
10#   (lldb) script import crashlog
11#   "crashlog" command installed, type "crashlog --help" for detailed help
12#   (lldb) crashlog ~/Library/Logs/DiagnosticReports/a.crash
13#
14# The benefit of running the crashlog command inside lldb in the
15# embedded python interpreter is when the command completes, there
16# will be a target with all of the files loaded at the locations
17# described in the crash log. Only the files that have stack frames
18# in the backtrace will be loaded unless the "--load-all" option
19# has been specified. This allows users to explore the program in the
20# state it was in right at crash time.
21#
22# On MacOSX csh, tcsh:
23#   ( setenv PYTHONPATH /path/to/LLDB.framework/Resources/Python ; ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash )
24#
25# On MacOSX sh, bash:
26#   PYTHONPATH=/path/to/LLDB.framework/Resources/Python ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash
27#----------------------------------------------------------------------
28
29from __future__ import print_function
30import cmd
31import datetime
32import glob
33import optparse
34import os
35import platform
36import plistlib
37import re
38import shlex
39import string
40import subprocess
41import sys
42import time
43import uuid
44import json
45
46try:
47    # First try for LLDB in case PYTHONPATH is already correctly setup.
48    import lldb
49except ImportError:
50    # Ask the command line driver for the path to the lldb module. Copy over
51    # the environment so that SDKROOT is propagated to xcrun.
52    env = os.environ.copy()
53    env['LLDB_DEFAULT_PYTHON_VERSION'] = str(sys.version_info.major)
54    command =  ['xcrun', 'lldb', '-P'] if platform.system() == 'Darwin' else ['lldb', '-P']
55    # Extend the PYTHONPATH if the path exists and isn't already there.
56    lldb_python_path = subprocess.check_output(command, env=env).decode("utf-8").strip()
57    if os.path.exists(lldb_python_path) and not sys.path.__contains__(lldb_python_path):
58        sys.path.append(lldb_python_path)
59    # Try importing LLDB again.
60    try:
61        import lldb
62    except ImportError:
63        print("error: couldn't locate the 'lldb' module, please set PYTHONPATH correctly")
64        sys.exit(1)
65
66from lldb.utils import symbolication
67
68
69def read_plist(s):
70    if sys.version_info.major == 3:
71        return plistlib.loads(s)
72    else:
73        return plistlib.readPlistFromString(s)
74
75class CrashLog(symbolication.Symbolicator):
76    class Thread:
77        """Class that represents a thread in a darwin crash log"""
78
79        def __init__(self, index, app_specific_backtrace):
80            self.index = index
81            self.frames = list()
82            self.idents = list()
83            self.registers = dict()
84            self.reason = None
85            self.queue = None
86            self.crashed = False
87            self.app_specific_backtrace = app_specific_backtrace
88
89        def dump(self, prefix):
90            if self.app_specific_backtrace:
91                print("%Application Specific Backtrace[%u] %s" % (prefix, self.index, self.reason))
92            else:
93                print("%sThread[%u] %s" % (prefix, self.index, self.reason))
94            if self.frames:
95                print("%s  Frames:" % (prefix))
96                for frame in self.frames:
97                    frame.dump(prefix + '    ')
98            if self.registers:
99                print("%s  Registers:" % (prefix))
100                for reg in self.registers.keys():
101                    print("%s    %-8s = %#16.16x" % (prefix, reg, self.registers[reg]))
102
103        def dump_symbolicated(self, crash_log, options):
104            this_thread_crashed = self.app_specific_backtrace
105            if not this_thread_crashed:
106                this_thread_crashed = self.did_crash()
107                if options.crashed_only and this_thread_crashed == False:
108                    return
109
110            print("%s" % self)
111            display_frame_idx = -1
112            for frame_idx, frame in enumerate(self.frames):
113                disassemble = (
114                    this_thread_crashed or options.disassemble_all_threads) and frame_idx < options.disassemble_depth
115                if frame_idx == 0:
116                    symbolicated_frame_addresses = crash_log.symbolicate(
117                        frame.pc & crash_log.addr_mask, options.verbose)
118                else:
119                    # Any frame above frame zero and we have to subtract one to
120                    # get the previous line entry
121                    symbolicated_frame_addresses = crash_log.symbolicate(
122                        (frame.pc & crash_log.addr_mask) - 1, options.verbose)
123
124                if symbolicated_frame_addresses:
125                    symbolicated_frame_address_idx = 0
126                    for symbolicated_frame_address in symbolicated_frame_addresses:
127                        display_frame_idx += 1
128                        print('[%3u] %s' % (frame_idx, symbolicated_frame_address))
129                        if (options.source_all or self.did_crash(
130                        )) and display_frame_idx < options.source_frames and options.source_context:
131                            source_context = options.source_context
132                            line_entry = symbolicated_frame_address.get_symbol_context().line_entry
133                            if line_entry.IsValid():
134                                strm = lldb.SBStream()
135                                if line_entry:
136                                    crash_log.debugger.GetSourceManager().DisplaySourceLinesWithLineNumbers(
137                                        line_entry.file, line_entry.line, source_context, source_context, "->", strm)
138                                source_text = strm.GetData()
139                                if source_text:
140                                    # Indent the source a bit
141                                    indent_str = '    '
142                                    join_str = '\n' + indent_str
143                                    print('%s%s' % (indent_str, join_str.join(source_text.split('\n'))))
144                        if symbolicated_frame_address_idx == 0:
145                            if disassemble:
146                                instructions = symbolicated_frame_address.get_instructions()
147                                if instructions:
148                                    print()
149                                    symbolication.disassemble_instructions(
150                                        crash_log.get_target(),
151                                        instructions,
152                                        frame.pc,
153                                        options.disassemble_before,
154                                        options.disassemble_after,
155                                        frame.index > 0)
156                                    print()
157                        symbolicated_frame_address_idx += 1
158                else:
159                    print(frame)
160            if self.registers:
161                print()
162                for reg in self.registers.keys():
163                    print("    %-8s = %#16.16x" % (reg, self.registers[reg]))
164            elif self.crashed:
165               print()
166               print("No thread state (register information) available")
167
168        def add_ident(self, ident):
169            if ident not in self.idents:
170                self.idents.append(ident)
171
172        def did_crash(self):
173            return self.reason is not None
174
175        def __str__(self):
176            if self.app_specific_backtrace:
177                s = "Application Specific Backtrace[%u]" % self.index
178            else:
179                s = "Thread[%u]" % self.index
180            if self.reason:
181                s += ' %s' % self.reason
182            return s
183
184    class Frame:
185        """Class that represents a stack frame in a thread in a darwin crash log"""
186
187        def __init__(self, index, pc, description):
188            self.pc = pc
189            self.description = description
190            self.index = index
191
192        def __str__(self):
193            if self.description:
194                return "[%3u] 0x%16.16x %s" % (
195                    self.index, self.pc, self.description)
196            else:
197                return "[%3u] 0x%16.16x" % (self.index, self.pc)
198
199        def dump(self, prefix):
200            print("%s%s" % (prefix, str(self)))
201
202    class DarwinImage(symbolication.Image):
203        """Class that represents a binary images in a darwin crash log"""
204        dsymForUUIDBinary = '/usr/local/bin/dsymForUUID'
205        if not os.path.exists(dsymForUUIDBinary):
206            try:
207                dsymForUUIDBinary = subprocess.check_output('which dsymForUUID',
208                                                            shell=True).decode("utf-8").rstrip('\n')
209            except:
210                dsymForUUIDBinary = ""
211
212        dwarfdump_uuid_regex = re.compile(
213            'UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*')
214
215        def __init__(
216                self,
217                text_addr_lo,
218                text_addr_hi,
219                identifier,
220                version,
221                uuid,
222                path,
223                verbose):
224            symbolication.Image.__init__(self, path, uuid)
225            self.add_section(
226                symbolication.Section(
227                    text_addr_lo,
228                    text_addr_hi,
229                    "__TEXT"))
230            self.identifier = identifier
231            self.version = version
232            self.verbose = verbose
233
234        def show_symbol_progress(self):
235            """
236            Hide progress output and errors from system frameworks as they are plentiful.
237            """
238            if self.verbose:
239                return True
240            return not (self.path.startswith("/System/Library/") or
241                        self.path.startswith("/usr/lib/"))
242
243
244        def find_matching_slice(self):
245            dwarfdump_cmd_output = subprocess.check_output(
246                'dwarfdump --uuid "%s"' % self.path, shell=True).decode("utf-8")
247            self_uuid = self.get_uuid()
248            for line in dwarfdump_cmd_output.splitlines():
249                match = self.dwarfdump_uuid_regex.search(line)
250                if match:
251                    dwarf_uuid_str = match.group(1)
252                    dwarf_uuid = uuid.UUID(dwarf_uuid_str)
253                    if self_uuid == dwarf_uuid:
254                        self.resolved_path = self.path
255                        self.arch = match.group(2)
256                        return True
257            if not self.resolved_path:
258                self.unavailable = True
259                if self.show_symbol_progress():
260                    print(("error\n    error: unable to locate '%s' with UUID %s"
261                           % (self.path, self.get_normalized_uuid_string())))
262                return False
263
264        def locate_module_and_debug_symbols(self):
265            # Don't load a module twice...
266            if self.resolved:
267                return True
268            # Mark this as resolved so we don't keep trying
269            self.resolved = True
270            uuid_str = self.get_normalized_uuid_string()
271            if self.show_symbol_progress():
272                print('Getting symbols for %s %s...' % (uuid_str, self.path), end=' ')
273            if os.path.exists(self.dsymForUUIDBinary):
274                dsym_for_uuid_command = '%s %s' % (
275                    self.dsymForUUIDBinary, uuid_str)
276                s = subprocess.check_output(dsym_for_uuid_command, shell=True)
277                if s:
278                    try:
279                        plist_root = read_plist(s)
280                    except:
281                        print(("Got exception: ", sys.exc_info()[1], " handling dsymForUUID output: \n", s))
282                        raise
283                    if plist_root:
284                        plist = plist_root[uuid_str]
285                        if plist:
286                            if 'DBGArchitecture' in plist:
287                                self.arch = plist['DBGArchitecture']
288                            if 'DBGDSYMPath' in plist:
289                                self.symfile = os.path.realpath(
290                                    plist['DBGDSYMPath'])
291                            if 'DBGSymbolRichExecutable' in plist:
292                                self.path = os.path.expanduser(
293                                    plist['DBGSymbolRichExecutable'])
294                                self.resolved_path = self.path
295            if not self.resolved_path and os.path.exists(self.path):
296                if not self.find_matching_slice():
297                    return False
298            if not self.resolved_path and not os.path.exists(self.path):
299                try:
300                    mdfind_results = subprocess.check_output(
301                        ["/usr/bin/mdfind",
302                         "com_apple_xcode_dsym_uuids == %s" % uuid_str]).decode("utf-8").splitlines()
303                    found_matching_slice = False
304                    for dsym in mdfind_results:
305                        dwarf_dir = os.path.join(dsym, 'Contents/Resources/DWARF')
306                        if not os.path.exists(dwarf_dir):
307                            # Not a dSYM bundle, probably an Xcode archive.
308                            continue
309                        print('falling back to binary inside "%s"' % dsym)
310                        self.symfile = dsym
311                        for filename in os.listdir(dwarf_dir):
312                           self.path = os.path.join(dwarf_dir, filename)
313                           if self.find_matching_slice():
314                              found_matching_slice = True
315                              break
316                        if found_matching_slice:
317                           break
318                except:
319                    pass
320            if (self.resolved_path and os.path.exists(self.resolved_path)) or (
321                    self.path and os.path.exists(self.path)):
322                print('ok')
323                return True
324            else:
325                self.unavailable = True
326            return False
327
328    def __init__(self, debugger, path, verbose):
329        """CrashLog constructor that take a path to a darwin crash log file"""
330        symbolication.Symbolicator.__init__(self, debugger)
331        self.path = os.path.expanduser(path)
332        self.info_lines = list()
333        self.system_profile = list()
334        self.threads = list()
335        self.backtraces = list()  # For application specific backtraces
336        self.idents = list()  # A list of the required identifiers for doing all stack backtraces
337        self.errors = list()
338        self.crashed_thread_idx = -1
339        self.version = -1
340        self.target = None
341        self.verbose = verbose
342
343    def dump(self):
344        print("Crash Log File: %s" % (self.path))
345        if self.backtraces:
346            print("\nApplication Specific Backtraces:")
347            for thread in self.backtraces:
348                thread.dump('  ')
349        print("\nThreads:")
350        for thread in self.threads:
351            thread.dump('  ')
352        print("\nImages:")
353        for image in self.images:
354            image.dump('  ')
355
356    def find_image_with_identifier(self, identifier):
357        for image in self.images:
358            if image.identifier == identifier:
359                return image
360        regex_text = '^.*\.%s$' % (re.escape(identifier))
361        regex = re.compile(regex_text)
362        for image in self.images:
363            if regex.match(image.identifier):
364                return image
365        return None
366
367    def create_target(self):
368        if self.target is None:
369            self.target = symbolication.Symbolicator.create_target(self)
370            if self.target:
371                return self.target
372            # We weren't able to open the main executable as, but we can still
373            # symbolicate
374            print('crashlog.create_target()...2')
375            if self.idents:
376                for ident in self.idents:
377                    image = self.find_image_with_identifier(ident)
378                    if image:
379                        self.target = image.create_target(self.debugger)
380                        if self.target:
381                            return self.target  # success
382            print('crashlog.create_target()...3')
383            for image in self.images:
384                self.target = image.create_target(self.debugger)
385                if self.target:
386                    return self.target  # success
387            print('crashlog.create_target()...4')
388            print('error: Unable to locate any executables from the crash log.')
389            print('       Try loading the executable into lldb before running crashlog')
390            print('       and/or make sure the .dSYM bundles can be found by Spotlight.')
391        return self.target
392
393    def get_target(self):
394        return self.target
395
396
397class CrashLogFormatException(Exception):
398    pass
399
400
401class CrashLogParseException(Exception):
402   pass
403
404
405class CrashLogParser:
406    def parse(self, debugger, path, verbose):
407        try:
408            return JSONCrashLogParser(debugger, path, verbose).parse()
409        except CrashLogFormatException:
410            return TextCrashLogParser(debugger, path, verbose).parse()
411
412
413class JSONCrashLogParser:
414    def __init__(self, debugger, path, verbose):
415        self.path = os.path.expanduser(path)
416        self.verbose = verbose
417        self.crashlog = CrashLog(debugger, self.path, self.verbose)
418
419    def parse(self):
420        with open(self.path, 'r') as f:
421            buffer = f.read()
422
423        # Skip the first line if it contains meta data.
424        head, _, tail = buffer.partition('\n')
425        try:
426            metadata = json.loads(head)
427            if 'app_name' in metadata and 'app_version' in metadata:
428                buffer = tail
429        except ValueError:
430            pass
431
432        try:
433            self.data = json.loads(buffer)
434        except ValueError:
435            raise CrashLogFormatException()
436
437        try:
438            self.parse_process_info(self.data)
439            self.parse_images(self.data['usedImages'])
440            self.parse_threads(self.data['threads'])
441            self.parse_errors(self.data)
442            thread = self.crashlog.threads[self.crashlog.crashed_thread_idx]
443            reason = self.parse_crash_reason(self.data['exception'])
444            if thread.reason:
445                thread.reason = '{} {}'.format(thread.reason, reason)
446            else:
447                thread.reason = reason
448        except (KeyError, ValueError, TypeError) as e:
449            raise CrashLogParseException(
450                'Failed to parse JSON crashlog: {}: {}'.format(
451                    type(e).__name__, e))
452
453        return self.crashlog
454
455    def get_used_image(self, idx):
456        return self.data['usedImages'][idx]
457
458    def parse_process_info(self, json_data):
459        self.crashlog.process_id = json_data['pid']
460        self.crashlog.process_identifier = json_data['procName']
461        self.crashlog.process_path = json_data['procPath']
462
463    def parse_crash_reason(self, json_exception):
464        exception_type = json_exception['type']
465        exception_signal = json_exception['signal']
466        if 'codes' in json_exception:
467            exception_extra = " ({})".format(json_exception['codes'])
468        elif 'subtype' in json_exception:
469            exception_extra = " ({})".format(json_exception['subtype'])
470        else:
471            exception_extra = ""
472        return "{} ({}){}".format(exception_type, exception_signal,
473                                  exception_extra)
474
475    def parse_images(self, json_images):
476        idx = 0
477        for json_image in json_images:
478            img_uuid = uuid.UUID(json_image['uuid'])
479            low = int(json_image['base'])
480            high = int(0)
481            name = json_image['name'] if 'name' in json_image else ''
482            path = json_image['path'] if 'path' in json_image else ''
483            version = ''
484            darwin_image = self.crashlog.DarwinImage(low, high, name, version,
485                                                     img_uuid, path,
486                                                     self.verbose)
487            self.crashlog.images.append(darwin_image)
488            idx += 1
489
490    def parse_frames(self, thread, json_frames):
491        idx = 0
492        for json_frame in json_frames:
493            image_id = int(json_frame['imageIndex'])
494            json_image = self.get_used_image(image_id)
495            ident = json_image['name'] if 'name' in json_image else ''
496            thread.add_ident(ident)
497            if ident not in self.crashlog.idents:
498                self.crashlog.idents.append(ident)
499
500            frame_offset = int(json_frame['imageOffset'])
501            image_addr = self.get_used_image(image_id)['base']
502            pc = image_addr + frame_offset
503            thread.frames.append(self.crashlog.Frame(idx, pc, frame_offset))
504            idx += 1
505
506    def parse_threads(self, json_threads):
507        idx = 0
508        for json_thread in json_threads:
509            thread = self.crashlog.Thread(idx, False)
510            if 'name' in json_thread:
511                thread.reason = json_thread['name']
512            if json_thread.get('triggered', False):
513                self.crashlog.crashed_thread_idx = idx
514                thread.crashed = True
515                if 'threadState' in json_thread:
516                    thread.registers = self.parse_thread_registers(
517                        json_thread['threadState'])
518            thread.queue = json_thread.get('queue')
519            self.parse_frames(thread, json_thread.get('frames', []))
520            self.crashlog.threads.append(thread)
521            idx += 1
522
523    def parse_thread_registers(self, json_thread_state):
524        registers = dict()
525        for key, state in json_thread_state.items():
526            try:
527               value = int(state['value'])
528               registers[key] = value
529            except (TypeError, ValueError):
530               pass
531        return registers
532
533    def parse_errors(self, json_data):
534       if 'reportNotes' in json_data:
535          self.crashlog.errors = json_data['reportNotes']
536
537
538class CrashLogParseMode:
539    NORMAL = 0
540    THREAD = 1
541    IMAGES = 2
542    THREGS = 3
543    SYSTEM = 4
544    INSTRS = 5
545
546
547class TextCrashLogParser:
548    parent_process_regex = re.compile('^Parent Process:\s*(.*)\[(\d+)\]')
549    thread_state_regex = re.compile('^Thread ([0-9]+) crashed with')
550    thread_instrs_regex = re.compile('^Thread ([0-9]+) instruction stream')
551    thread_regex = re.compile('^Thread ([0-9]+)([^:]*):(.*)')
552    app_backtrace_regex = re.compile('^Application Specific Backtrace ([0-9]+)([^:]*):(.*)')
553    version = r'(\(.+\)|(arm|x86_)[0-9a-z]+)\s+'
554    frame_regex = re.compile(r'^([0-9]+)' r'\s'                # id
555                             r'+(.+?)'    r'\s+'               # img_name
556                             r'(' +version+ r')?'              # img_version
557                             r'(0x[0-9a-fA-F]{7}[0-9a-fA-F]+)' # addr
558                             r' +(.*)'                         # offs
559                            )
560    null_frame_regex = re.compile(r'^([0-9]+)\s+\?\?\?\s+(0{7}0+) +(.*)')
561    image_regex_uuid = re.compile(r'(0x[0-9a-fA-F]+)'            # img_lo
562                                  r'\s+' '-' r'\s+'              #   -
563                                  r'(0x[0-9a-fA-F]+)'     r'\s+' # img_hi
564                                  r'[+]?(.+?)'            r'\s+' # img_name
565                                  r'(' +version+ ')?'            # img_version
566                                  r'(<([-0-9a-fA-F]+)>\s+)?'     # img_uuid
567                                  r'(/.*)'                       # img_path
568                                 )
569
570
571    def __init__(self, debugger, path, verbose):
572        self.path = os.path.expanduser(path)
573        self.verbose = verbose
574        self.thread = None
575        self.app_specific_backtrace = False
576        self.crashlog = CrashLog(debugger, self.path, self.verbose)
577        self.parse_mode = CrashLogParseMode.NORMAL
578        self.parsers = {
579            CrashLogParseMode.NORMAL : self.parse_normal,
580            CrashLogParseMode.THREAD : self.parse_thread,
581            CrashLogParseMode.IMAGES : self.parse_images,
582            CrashLogParseMode.THREGS : self.parse_thread_registers,
583            CrashLogParseMode.SYSTEM : self.parse_system,
584            CrashLogParseMode.INSTRS : self.parse_instructions,
585        }
586
587    def parse(self):
588        with open(self.path,'r') as f:
589            lines = f.read().splitlines()
590
591        for line in lines:
592            line_len = len(line)
593            if line_len == 0:
594                if self.thread:
595                    if self.parse_mode == CrashLogParseMode.THREAD:
596                        if self.thread.index == self.crashlog.crashed_thread_idx:
597                            self.thread.reason = ''
598                            if self.crashlog.thread_exception:
599                                self.thread.reason += self.crashlog.thread_exception
600                            if self.crashlog.thread_exception_data:
601                                self.thread.reason += " (%s)" % self.crashlog.thread_exception_data
602                        if self.app_specific_backtrace:
603                            self.crashlog.backtraces.append(self.thread)
604                        else:
605                            self.crashlog.threads.append(self.thread)
606                    self.thread = None
607                else:
608                    # only append an extra empty line if the previous line
609                    # in the info_lines wasn't empty
610                    if len(self.crashlog.info_lines) > 0 and len(self.crashlog.info_lines[-1]):
611                        self.crashlog.info_lines.append(line)
612                self.parse_mode = CrashLogParseMode.NORMAL
613            else:
614                self.parsers[self.parse_mode](line)
615
616        return self.crashlog
617
618
619    def parse_normal(self, line):
620        if line.startswith('Process:'):
621            (self.crashlog.process_name, pid_with_brackets) = line[
622                8:].strip().split(' [')
623            self.crashlog.process_id = pid_with_brackets.strip('[]')
624        elif line.startswith('Path:'):
625            self.crashlog.process_path = line[5:].strip()
626        elif line.startswith('Identifier:'):
627            self.crashlog.process_identifier = line[11:].strip()
628        elif line.startswith('Version:'):
629            version_string = line[8:].strip()
630            matched_pair = re.search("(.+)\((.+)\)", version_string)
631            if matched_pair:
632                self.crashlog.process_version = matched_pair.group(1)
633                self.crashlog.process_compatability_version = matched_pair.group(
634                    2)
635            else:
636                self.crashlog.process = version_string
637                self.crashlog.process_compatability_version = version_string
638        elif self.parent_process_regex.search(line):
639            parent_process_match = self.parent_process_regex.search(
640                line)
641            self.crashlog.parent_process_name = parent_process_match.group(1)
642            self.crashlog.parent_process_id = parent_process_match.group(2)
643        elif line.startswith('Exception Type:'):
644            self.crashlog.thread_exception = line[15:].strip()
645            return
646        elif line.startswith('Exception Codes:'):
647            self.crashlog.thread_exception_data = line[16:].strip()
648            return
649        elif line.startswith('Exception Subtype:'): # iOS
650            self.crashlog.thread_exception_data = line[18:].strip()
651            return
652        elif line.startswith('Crashed Thread:'):
653            self.crashlog.crashed_thread_idx = int(line[15:].strip().split()[0])
654            return
655        elif line.startswith('Triggered by Thread:'): # iOS
656            self.crashlog.crashed_thread_idx = int(line[20:].strip().split()[0])
657            return
658        elif line.startswith('Report Version:'):
659            self.crashlog.version = int(line[15:].strip())
660            return
661        elif line.startswith('System Profile:'):
662            self.parse_mode = CrashLogParseMode.SYSTEM
663            return
664        elif (line.startswith('Interval Since Last Report:') or
665                line.startswith('Crashes Since Last Report:') or
666                line.startswith('Per-App Interval Since Last Report:') or
667                line.startswith('Per-App Crashes Since Last Report:') or
668                line.startswith('Sleep/Wake UUID:') or
669                line.startswith('Anonymous UUID:')):
670            # ignore these
671            return
672        elif line.startswith('Thread'):
673            thread_state_match = self.thread_state_regex.search(line)
674            if thread_state_match:
675                self.app_specific_backtrace = False
676                thread_state_match = self.thread_regex.search(line)
677                thread_idx = int(thread_state_match.group(1))
678                self.parse_mode = CrashLogParseMode.THREGS
679                self.thread = self.crashlog.threads[thread_idx]
680                return
681            thread_insts_match  = self.thread_instrs_regex.search(line)
682            if thread_insts_match:
683                self.parse_mode = CrashLogParseMode.INSTRS
684                return
685            thread_match = self.thread_regex.search(line)
686            if thread_match:
687                self.app_specific_backtrace = False
688                self.parse_mode = CrashLogParseMode.THREAD
689                thread_idx = int(thread_match.group(1))
690                self.thread = self.crashlog.Thread(thread_idx, False)
691                return
692            return
693        elif line.startswith('Binary Images:'):
694            self.parse_mode = CrashLogParseMode.IMAGES
695            return
696        elif line.startswith('Application Specific Backtrace'):
697            app_backtrace_match = self.app_backtrace_regex.search(line)
698            if app_backtrace_match:
699                self.parse_mode = CrashLogParseMode.THREAD
700                self.app_specific_backtrace = True
701                idx = int(app_backtrace_match.group(1))
702                self.thread = self.crashlog.Thread(idx, True)
703        elif line.startswith('Last Exception Backtrace:'): # iOS
704            self.parse_mode = CrashLogParseMode.THREAD
705            self.app_specific_backtrace = True
706            idx = 1
707            self.thread = self.crashlog.Thread(idx, True)
708        self.crashlog.info_lines.append(line.strip())
709
710    def parse_thread(self, line):
711        if line.startswith('Thread'):
712            return
713        if self.null_frame_regex.search(line):
714            print('warning: thread parser ignored null-frame: "%s"' % line)
715            return
716        frame_match = self.frame_regex.search(line)
717        if frame_match:
718            (frame_id, frame_img_name, _, frame_img_version, _,
719                frame_addr, frame_ofs) = frame_match.groups()
720            ident = frame_img_name
721            self.thread.add_ident(ident)
722            if ident not in self.crashlog.idents:
723                self.crashlog.idents.append(ident)
724            self.thread.frames.append(self.crashlog.Frame(int(frame_id), int(
725                frame_addr, 0), frame_ofs))
726        else:
727            print('error: frame regex failed for line: "%s"' % line)
728
729    def parse_images(self, line):
730        image_match = self.image_regex_uuid.search(line)
731        if image_match:
732            (img_lo, img_hi, img_name, _, img_version, _,
733                _, img_uuid, img_path) = image_match.groups()
734            image = self.crashlog.DarwinImage(int(img_lo, 0), int(img_hi, 0),
735                                            img_name.strip(),
736                                            img_version.strip()
737                                            if img_version else "",
738                                            uuid.UUID(img_uuid), img_path,
739                                            self.verbose)
740            self.crashlog.images.append(image)
741        else:
742            print("error: image regex failed for: %s" % line)
743
744
745    def parse_thread_registers(self, line):
746        stripped_line = line.strip()
747        # "r12: 0x00007fff6b5939c8  r13: 0x0000000007000006  r14: 0x0000000000002a03  r15: 0x0000000000000c00"
748        reg_values = re.findall(
749            '([a-zA-Z0-9]+: 0[Xx][0-9a-fA-F]+) *', stripped_line)
750        for reg_value in reg_values:
751            (reg, value) = reg_value.split(': ')
752            self.thread.registers[reg.strip()] = int(value, 0)
753
754    def parse_system(self, line):
755        self.crashlog.system_profile.append(line)
756
757    def parse_instructions(self, line):
758        pass
759
760
761def usage():
762    print("Usage: lldb-symbolicate.py [-n name] executable-image")
763    sys.exit(0)
764
765
766class Interactive(cmd.Cmd):
767    '''Interactive prompt for analyzing one or more Darwin crash logs, type "help" to see a list of supported commands.'''
768    image_option_parser = None
769
770    def __init__(self, crash_logs):
771        cmd.Cmd.__init__(self)
772        self.use_rawinput = False
773        self.intro = 'Interactive crashlogs prompt, type "help" to see a list of supported commands.'
774        self.crash_logs = crash_logs
775        self.prompt = '% '
776
777    def default(self, line):
778        '''Catch all for unknown command, which will exit the interpreter.'''
779        print("uknown command: %s" % line)
780        return True
781
782    def do_q(self, line):
783        '''Quit command'''
784        return True
785
786    def do_quit(self, line):
787        '''Quit command'''
788        return True
789
790    def do_symbolicate(self, line):
791        description = '''Symbolicate one or more darwin crash log files by index to provide source file and line information,
792        inlined stack frames back to the concrete functions, and disassemble the location of the crash
793        for the first frame of the crashed thread.'''
794        option_parser = CreateSymbolicateCrashLogOptions(
795            'symbolicate', description, False)
796        command_args = shlex.split(line)
797        try:
798            (options, args) = option_parser.parse_args(command_args)
799        except:
800            return
801
802        if args:
803            # We have arguments, they must valid be crash log file indexes
804            for idx_str in args:
805                idx = int(idx_str)
806                if idx < len(self.crash_logs):
807                    SymbolicateCrashLog(self.crash_logs[idx], options)
808                else:
809                    print('error: crash log index %u is out of range' % (idx))
810        else:
811            # No arguments, symbolicate all crash logs using the options
812            # provided
813            for idx in range(len(self.crash_logs)):
814                SymbolicateCrashLog(self.crash_logs[idx], options)
815
816    def do_list(self, line=None):
817        '''Dump a list of all crash logs that are currently loaded.
818
819        USAGE: list'''
820        print('%u crash logs are loaded:' % len(self.crash_logs))
821        for (crash_log_idx, crash_log) in enumerate(self.crash_logs):
822            print('[%u] = %s' % (crash_log_idx, crash_log.path))
823
824    def do_image(self, line):
825        '''Dump information about one or more binary images in the crash log given an image basename, or all images if no arguments are provided.'''
826        usage = "usage: %prog [options] <PATH> [PATH ...]"
827        description = '''Dump information about one or more images in all crash logs. The <PATH> can be a full path, image basename, or partial path. Searches are done in this order.'''
828        command_args = shlex.split(line)
829        if not self.image_option_parser:
830            self.image_option_parser = optparse.OptionParser(
831                description=description, prog='image', usage=usage)
832            self.image_option_parser.add_option(
833                '-a',
834                '--all',
835                action='store_true',
836                help='show all images',
837                default=False)
838        try:
839            (options, args) = self.image_option_parser.parse_args(command_args)
840        except:
841            return
842
843        if args:
844            for image_path in args:
845                fullpath_search = image_path[0] == '/'
846                for (crash_log_idx, crash_log) in enumerate(self.crash_logs):
847                    matches_found = 0
848                    for (image_idx, image) in enumerate(crash_log.images):
849                        if fullpath_search:
850                            if image.get_resolved_path() == image_path:
851                                matches_found += 1
852                                print('[%u] ' % (crash_log_idx), image)
853                        else:
854                            image_basename = image.get_resolved_path_basename()
855                            if image_basename == image_path:
856                                matches_found += 1
857                                print('[%u] ' % (crash_log_idx), image)
858                    if matches_found == 0:
859                        for (image_idx, image) in enumerate(crash_log.images):
860                            resolved_image_path = image.get_resolved_path()
861                            if resolved_image_path and string.find(
862                                    image.get_resolved_path(), image_path) >= 0:
863                                print('[%u] ' % (crash_log_idx), image)
864        else:
865            for crash_log in self.crash_logs:
866                for (image_idx, image) in enumerate(crash_log.images):
867                    print('[%u] %s' % (image_idx, image))
868        return False
869
870
871def interactive_crashlogs(debugger, options, args):
872    crash_log_files = list()
873    for arg in args:
874        for resolved_path in glob.glob(arg):
875            crash_log_files.append(resolved_path)
876
877    crash_logs = list()
878    for crash_log_file in crash_log_files:
879        try:
880            crash_log = CrashLogParser().parse(debugger, crash_log_file, options.verbose)
881        except Exception as e:
882            print(e)
883            continue
884        if options.debug:
885            crash_log.dump()
886        if not crash_log.images:
887            print('error: no images in crash log "%s"' % (crash_log))
888            continue
889        else:
890            crash_logs.append(crash_log)
891
892    interpreter = Interactive(crash_logs)
893    # List all crash logs that were imported
894    interpreter.do_list()
895    interpreter.cmdloop()
896
897
898def save_crashlog(debugger, command, exe_ctx, result, dict):
899    usage = "usage: %prog [options] <output-path>"
900    description = '''Export the state of current target into a crashlog file'''
901    parser = optparse.OptionParser(
902        description=description,
903        prog='save_crashlog',
904        usage=usage)
905    parser.add_option(
906        '-v',
907        '--verbose',
908        action='store_true',
909        dest='verbose',
910        help='display verbose debug info',
911        default=False)
912    try:
913        (options, args) = parser.parse_args(shlex.split(command))
914    except:
915        result.PutCString("error: invalid options")
916        return
917    if len(args) != 1:
918        result.PutCString(
919            "error: invalid arguments, a single output file is the only valid argument")
920        return
921    out_file = open(args[0], 'w')
922    if not out_file:
923        result.PutCString(
924            "error: failed to open file '%s' for writing...",
925            args[0])
926        return
927    target = exe_ctx.target
928    if target:
929        identifier = target.executable.basename
930        process = exe_ctx.process
931        if process:
932            pid = process.id
933            if pid != lldb.LLDB_INVALID_PROCESS_ID:
934                out_file.write(
935                    'Process:         %s [%u]\n' %
936                    (identifier, pid))
937        out_file.write('Path:            %s\n' % (target.executable.fullpath))
938        out_file.write('Identifier:      %s\n' % (identifier))
939        out_file.write('\nDate/Time:       %s\n' %
940                       (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
941        out_file.write(
942            'OS Version:      Mac OS X %s (%s)\n' %
943            (platform.mac_ver()[0], subprocess.check_output('sysctl -n kern.osversion', shell=True).decode("utf-8")))
944        out_file.write('Report Version:  9\n')
945        for thread_idx in range(process.num_threads):
946            thread = process.thread[thread_idx]
947            out_file.write('\nThread %u:\n' % (thread_idx))
948            for (frame_idx, frame) in enumerate(thread.frames):
949                frame_pc = frame.pc
950                frame_offset = 0
951                if frame.function:
952                    block = frame.GetFrameBlock()
953                    block_range = block.range[frame.addr]
954                    if block_range:
955                        block_start_addr = block_range[0]
956                        frame_offset = frame_pc - block_start_addr.GetLoadAddress(target)
957                    else:
958                        frame_offset = frame_pc - frame.function.addr.GetLoadAddress(target)
959                elif frame.symbol:
960                    frame_offset = frame_pc - frame.symbol.addr.GetLoadAddress(target)
961                out_file.write(
962                    '%-3u %-32s 0x%16.16x %s' %
963                    (frame_idx, frame.module.file.basename, frame_pc, frame.name))
964                if frame_offset > 0:
965                    out_file.write(' + %u' % (frame_offset))
966                line_entry = frame.line_entry
967                if line_entry:
968                    if options.verbose:
969                        # This will output the fullpath + line + column
970                        out_file.write(' %s' % (line_entry))
971                    else:
972                        out_file.write(
973                            ' %s:%u' %
974                            (line_entry.file.basename, line_entry.line))
975                        column = line_entry.column
976                        if column:
977                            out_file.write(':%u' % (column))
978                out_file.write('\n')
979
980        out_file.write('\nBinary Images:\n')
981        for module in target.modules:
982            text_segment = module.section['__TEXT']
983            if text_segment:
984                text_segment_load_addr = text_segment.GetLoadAddress(target)
985                if text_segment_load_addr != lldb.LLDB_INVALID_ADDRESS:
986                    text_segment_end_load_addr = text_segment_load_addr + text_segment.size
987                    identifier = module.file.basename
988                    module_version = '???'
989                    module_version_array = module.GetVersion()
990                    if module_version_array:
991                        module_version = '.'.join(
992                            map(str, module_version_array))
993                    out_file.write(
994                        '    0x%16.16x - 0x%16.16x  %s (%s - ???) <%s> %s\n' %
995                        (text_segment_load_addr,
996                         text_segment_end_load_addr,
997                         identifier,
998                         module_version,
999                         module.GetUUIDString(),
1000                         module.file.fullpath))
1001        out_file.close()
1002    else:
1003        result.PutCString("error: invalid target")
1004
1005
1006def Symbolicate(debugger, command, result, dict):
1007    try:
1008        SymbolicateCrashLogs(debugger, shlex.split(command))
1009    except Exception as e:
1010        result.PutCString("error: python exception: %s" % e)
1011
1012
1013def SymbolicateCrashLog(crash_log, options):
1014    if options.debug:
1015        crash_log.dump()
1016    if not crash_log.images:
1017        print('error: no images in crash log')
1018        return
1019
1020    if options.dump_image_list:
1021        print("Binary Images:")
1022        for image in crash_log.images:
1023            if options.verbose:
1024                print(image.debug_dump())
1025            else:
1026                print(image)
1027
1028    target = crash_log.create_target()
1029    if not target:
1030        return
1031    exe_module = target.GetModuleAtIndex(0)
1032    images_to_load = list()
1033    loaded_images = list()
1034    if options.load_all_images:
1035        # --load-all option was specified, load everything up
1036        for image in crash_log.images:
1037            images_to_load.append(image)
1038    else:
1039        # Only load the images found in stack frames for the crashed threads
1040        if options.crashed_only:
1041            for thread in crash_log.threads:
1042                if thread.did_crash():
1043                    for ident in thread.idents:
1044                        images = crash_log.find_images_with_identifier(ident)
1045                        if images:
1046                            for image in images:
1047                                images_to_load.append(image)
1048                        else:
1049                            print('error: can\'t find image for identifier "%s"' % ident)
1050        else:
1051            for ident in crash_log.idents:
1052                images = crash_log.find_images_with_identifier(ident)
1053                if images:
1054                    for image in images:
1055                        images_to_load.append(image)
1056                else:
1057                    print('error: can\'t find image for identifier "%s"' % ident)
1058
1059    for image in images_to_load:
1060        if image not in loaded_images:
1061            err = image.add_module(target)
1062            if err:
1063                print(err)
1064            else:
1065                loaded_images.append(image)
1066
1067    if crash_log.backtraces:
1068        for thread in crash_log.backtraces:
1069            thread.dump_symbolicated(crash_log, options)
1070            print()
1071
1072    for thread in crash_log.threads:
1073        thread.dump_symbolicated(crash_log, options)
1074        print()
1075
1076    if crash_log.errors:
1077        print("Errors:")
1078        for error in crash_log.errors:
1079            print(error)
1080
1081
1082def CreateSymbolicateCrashLogOptions(
1083        command_name,
1084        description,
1085        add_interactive_options):
1086    usage = "usage: %prog [options] <FILE> [FILE ...]"
1087    option_parser = optparse.OptionParser(
1088        description=description, prog='crashlog', usage=usage)
1089    option_parser.add_option(
1090        '--verbose',
1091        '-v',
1092        action='store_true',
1093        dest='verbose',
1094        help='display verbose debug info',
1095        default=False)
1096    option_parser.add_option(
1097        '--debug',
1098        '-g',
1099        action='store_true',
1100        dest='debug',
1101        help='display verbose debug logging',
1102        default=False)
1103    option_parser.add_option(
1104        '--load-all',
1105        '-a',
1106        action='store_true',
1107        dest='load_all_images',
1108        help='load all executable images, not just the images found in the crashed stack frames',
1109        default=False)
1110    option_parser.add_option(
1111        '--images',
1112        action='store_true',
1113        dest='dump_image_list',
1114        help='show image list',
1115        default=False)
1116    option_parser.add_option(
1117        '--debug-delay',
1118        type='int',
1119        dest='debug_delay',
1120        metavar='NSEC',
1121        help='pause for NSEC seconds for debugger',
1122        default=0)
1123    option_parser.add_option(
1124        '--crashed-only',
1125        '-c',
1126        action='store_true',
1127        dest='crashed_only',
1128        help='only symbolicate the crashed thread',
1129        default=False)
1130    option_parser.add_option(
1131        '--disasm-depth',
1132        '-d',
1133        type='int',
1134        dest='disassemble_depth',
1135        help='set the depth in stack frames that should be disassembled (default is 1)',
1136        default=1)
1137    option_parser.add_option(
1138        '--disasm-all',
1139        '-D',
1140        action='store_true',
1141        dest='disassemble_all_threads',
1142        help='enabled disassembly of frames on all threads (not just the crashed thread)',
1143        default=False)
1144    option_parser.add_option(
1145        '--disasm-before',
1146        '-B',
1147        type='int',
1148        dest='disassemble_before',
1149        help='the number of instructions to disassemble before the frame PC',
1150        default=4)
1151    option_parser.add_option(
1152        '--disasm-after',
1153        '-A',
1154        type='int',
1155        dest='disassemble_after',
1156        help='the number of instructions to disassemble after the frame PC',
1157        default=4)
1158    option_parser.add_option(
1159        '--source-context',
1160        '-C',
1161        type='int',
1162        metavar='NLINES',
1163        dest='source_context',
1164        help='show NLINES source lines of source context (default = 4)',
1165        default=4)
1166    option_parser.add_option(
1167        '--source-frames',
1168        type='int',
1169        metavar='NFRAMES',
1170        dest='source_frames',
1171        help='show source for NFRAMES (default = 4)',
1172        default=4)
1173    option_parser.add_option(
1174        '--source-all',
1175        action='store_true',
1176        dest='source_all',
1177        help='show source for all threads, not just the crashed thread',
1178        default=False)
1179    if add_interactive_options:
1180        option_parser.add_option(
1181            '-i',
1182            '--interactive',
1183            action='store_true',
1184            help='parse all crash logs and enter interactive mode',
1185            default=False)
1186    return option_parser
1187
1188
1189def SymbolicateCrashLogs(debugger, command_args):
1190    description = '''Symbolicate one or more darwin crash log files to provide source file and line information,
1191inlined stack frames back to the concrete functions, and disassemble the location of the crash
1192for the first frame of the crashed thread.
1193If this script is imported into the LLDB command interpreter, a "crashlog" command will be added to the interpreter
1194for use at the LLDB command line. After a crash log has been parsed and symbolicated, a target will have been
1195created that has all of the shared libraries loaded at the load addresses found in the crash log file. This allows
1196you to explore the program as if it were stopped at the locations described in the crash log and functions can
1197be disassembled and lookups can be performed using the addresses found in the crash log.'''
1198    option_parser = CreateSymbolicateCrashLogOptions(
1199        'crashlog', description, True)
1200    try:
1201        (options, args) = option_parser.parse_args(command_args)
1202    except:
1203        return
1204
1205    if options.debug:
1206        print('command_args = %s' % command_args)
1207        print('options', options)
1208        print('args', args)
1209
1210    if options.debug_delay > 0:
1211        print("Waiting %u seconds for debugger to attach..." % options.debug_delay)
1212        time.sleep(options.debug_delay)
1213    error = lldb.SBError()
1214
1215    if args:
1216        if options.interactive:
1217            interactive_crashlogs(debugger, options, args)
1218        else:
1219            for crash_log_file in args:
1220                crash_log = CrashLogParser().parse(debugger, crash_log_file, options.verbose)
1221                SymbolicateCrashLog(crash_log, options)
1222if __name__ == '__main__':
1223    # Create a new debugger instance
1224    debugger = lldb.SBDebugger.Create()
1225    SymbolicateCrashLogs(debugger, sys.argv[1:])
1226    lldb.SBDebugger.Destroy(debugger)
1227elif getattr(lldb, 'debugger', None):
1228    lldb.debugger.HandleCommand(
1229        'command script add -f lldb.macosx.crashlog.Symbolicate crashlog')
1230    lldb.debugger.HandleCommand(
1231        'command script add -f lldb.macosx.crashlog.save_crashlog save_crashlog')
1232