1#!/usr/bin/env python3
2
3#----------------------------------------------------------------------
4# Be sure to add the python path that points to the LLDB shared library.
5#
6# To use this in the embedded python interpreter using "lldb":
7#
8#   cd /path/containing/crashlog.py
9#   lldb
10#   (lldb) script import crashlog
11#   "crashlog" command installed, type "crashlog --help" for detailed help
12#   (lldb) crashlog ~/Library/Logs/DiagnosticReports/a.crash
13#
14# The benefit of running the crashlog command inside lldb in the
15# embedded python interpreter is when the command completes, there
16# will be a target with all of the files loaded at the locations
17# described in the crash log. Only the files that have stack frames
18# in the backtrace will be loaded unless the "--load-all" option
19# has been specified. This allows users to explore the program in the
20# state it was in right at crash time.
21#
22# On MacOSX csh, tcsh:
23#   ( setenv PYTHONPATH /path/to/LLDB.framework/Resources/Python ; ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash )
24#
25# On MacOSX sh, bash:
26#   PYTHONPATH=/path/to/LLDB.framework/Resources/Python ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash
27#----------------------------------------------------------------------
28
29import concurrent.futures
30import contextlib
31import datetime
32import json
33import optparse
34import os
35import platform
36import plistlib
37import re
38import shlex
39import string
40import subprocess
41import sys
42import threading
43import time
44import uuid
45
46
47print_lock = threading.RLock()
48
49try:
50    # First try for LLDB in case PYTHONPATH is already correctly setup.
51    import lldb
52except ImportError:
53    # Ask the command line driver for the path to the lldb module. Copy over
54    # the environment so that SDKROOT is propagated to xcrun.
55    command =  ['xcrun', 'lldb', '-P'] if platform.system() == 'Darwin' else ['lldb', '-P']
56    # Extend the PYTHONPATH if the path exists and isn't already there.
57    lldb_python_path = subprocess.check_output(command).decode("utf-8").strip()
58    if os.path.exists(lldb_python_path) and not sys.path.__contains__(lldb_python_path):
59        sys.path.append(lldb_python_path)
60    # Try importing LLDB again.
61    try:
62        import lldb
63    except ImportError:
64        print("error: couldn't locate the 'lldb' module, please set PYTHONPATH correctly")
65        sys.exit(1)
66
67from lldb.utils import symbolication
68
69def read_plist(s):
70    if sys.version_info.major == 3:
71        return plistlib.loads(s)
72    else:
73        return plistlib.readPlistFromString(s)
74
75class CrashLog(symbolication.Symbolicator):
76    class Thread:
77        """Class that represents a thread in a darwin crash log"""
78
79        def __init__(self, index, app_specific_backtrace):
80            self.index = index
81            self.id = index
82            self.frames = list()
83            self.idents = list()
84            self.registers = dict()
85            self.reason = None
86            self.name = None
87            self.queue = None
88            self.crashed = False
89            self.app_specific_backtrace = app_specific_backtrace
90
91        def dump(self, prefix):
92            if self.app_specific_backtrace:
93                print("%Application Specific Backtrace[%u] %s" % (prefix, self.index, self.reason))
94            else:
95                print("%sThread[%u] %s" % (prefix, self.index, self.reason))
96            if self.frames:
97                print("%s  Frames:" % (prefix))
98                for frame in self.frames:
99                    frame.dump(prefix + '    ')
100            if self.registers:
101                print("%s  Registers:" % (prefix))
102                for reg in self.registers.keys():
103                    print("%s    %-8s = %#16.16x" % (prefix, reg, self.registers[reg]))
104
105        def dump_symbolicated(self, crash_log, options):
106            this_thread_crashed = self.app_specific_backtrace
107            if not this_thread_crashed:
108                this_thread_crashed = self.did_crash()
109                if options.crashed_only and this_thread_crashed == False:
110                    return
111
112            print("%s" % self)
113            display_frame_idx = -1
114            for frame_idx, frame in enumerate(self.frames):
115                disassemble = (
116                    this_thread_crashed or options.disassemble_all_threads) and frame_idx < options.disassemble_depth
117                if frame_idx == 0:
118                    symbolicated_frame_addresses = crash_log.symbolicate(
119                        frame.pc & crash_log.addr_mask, options.verbose)
120                else:
121                    # Any frame above frame zero and we have to subtract one to
122                    # get the previous line entry
123                    symbolicated_frame_addresses = crash_log.symbolicate(
124                        (frame.pc & crash_log.addr_mask) - 1, options.verbose)
125
126                if symbolicated_frame_addresses:
127                    symbolicated_frame_address_idx = 0
128                    for symbolicated_frame_address in symbolicated_frame_addresses:
129                        display_frame_idx += 1
130                        print('[%3u] %s' % (frame_idx, symbolicated_frame_address))
131                        if (options.source_all or self.did_crash(
132                        )) and display_frame_idx < options.source_frames and options.source_context:
133                            source_context = options.source_context
134                            line_entry = symbolicated_frame_address.get_symbol_context().line_entry
135                            if line_entry.IsValid():
136                                strm = lldb.SBStream()
137                                if line_entry:
138                                    crash_log.debugger.GetSourceManager().DisplaySourceLinesWithLineNumbers(
139                                        line_entry.file, line_entry.line, source_context, source_context, "->", strm)
140                                source_text = strm.GetData()
141                                if source_text:
142                                    # Indent the source a bit
143                                    indent_str = '    '
144                                    join_str = '\n' + indent_str
145                                    print('%s%s' % (indent_str, join_str.join(source_text.split('\n'))))
146                        if symbolicated_frame_address_idx == 0:
147                            if disassemble:
148                                instructions = symbolicated_frame_address.get_instructions()
149                                if instructions:
150                                    print()
151                                    symbolication.disassemble_instructions(
152                                        crash_log.get_target(),
153                                        instructions,
154                                        frame.pc,
155                                        options.disassemble_before,
156                                        options.disassemble_after,
157                                        frame.index > 0)
158                                    print()
159                        symbolicated_frame_address_idx += 1
160                else:
161                    print(frame)
162            if self.registers:
163                print()
164                for reg in self.registers.keys():
165                    print("    %-8s = %#16.16x" % (reg, self.registers[reg]))
166            elif self.crashed:
167               print()
168               print("No thread state (register information) available")
169
170        def add_ident(self, ident):
171            if ident not in self.idents:
172                self.idents.append(ident)
173
174        def did_crash(self):
175            return self.reason is not None
176
177        def __str__(self):
178            if self.app_specific_backtrace:
179                s = "Application Specific Backtrace[%u]" % self.index
180            else:
181                s = "Thread[%u]" % self.index
182            if self.reason:
183                s += ' %s' % self.reason
184            return s
185
186    class Frame:
187        """Class that represents a stack frame in a thread in a darwin crash log"""
188
189        def __init__(self, index, pc, description):
190            self.pc = pc
191            self.description = description
192            self.index = index
193
194        def __str__(self):
195            if self.description:
196                return "[%3u] 0x%16.16x %s" % (
197                    self.index, self.pc, self.description)
198            else:
199                return "[%3u] 0x%16.16x" % (self.index, self.pc)
200
201        def dump(self, prefix):
202            print("%s%s" % (prefix, str(self)))
203
204    class DarwinImage(symbolication.Image):
205        """Class that represents a binary images in a darwin crash log"""
206        dsymForUUIDBinary = '/usr/local/bin/dsymForUUID'
207        if not os.path.exists(dsymForUUIDBinary):
208            try:
209                dsymForUUIDBinary = subprocess.check_output('which dsymForUUID',
210                                                            shell=True).decode("utf-8").rstrip('\n')
211            except:
212                dsymForUUIDBinary = ""
213
214        dwarfdump_uuid_regex = re.compile(
215            'UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*')
216
217        def __init__(
218                self,
219                text_addr_lo,
220                text_addr_hi,
221                identifier,
222                version,
223                uuid,
224                path,
225                verbose):
226            symbolication.Image.__init__(self, path, uuid)
227            self.add_section(
228                symbolication.Section(
229                    text_addr_lo,
230                    text_addr_hi,
231                    "__TEXT"))
232            self.identifier = identifier
233            self.version = version
234            self.verbose = verbose
235
236        def show_symbol_progress(self):
237            """
238            Hide progress output and errors from system frameworks as they are plentiful.
239            """
240            if self.verbose:
241                return True
242            return not (self.path.startswith("/System/Library/") or
243                        self.path.startswith("/usr/lib/"))
244
245
246        def find_matching_slice(self):
247            dwarfdump_cmd_output = subprocess.check_output(
248                'dwarfdump --uuid "%s"' % self.path, shell=True).decode("utf-8")
249            self_uuid = self.get_uuid()
250            for line in dwarfdump_cmd_output.splitlines():
251                match = self.dwarfdump_uuid_regex.search(line)
252                if match:
253                    dwarf_uuid_str = match.group(1)
254                    dwarf_uuid = uuid.UUID(dwarf_uuid_str)
255                    if self_uuid == dwarf_uuid:
256                        self.resolved_path = self.path
257                        self.arch = match.group(2)
258                        return True
259            if not self.resolved_path:
260                self.unavailable = True
261                if self.show_symbol_progress():
262                    print(("error\n    error: unable to locate '%s' with UUID %s"
263                           % (self.path, self.get_normalized_uuid_string())))
264                return False
265
266        def locate_module_and_debug_symbols(self):
267            # Don't load a module twice...
268            if self.resolved:
269                return True
270            # Mark this as resolved so we don't keep trying
271            self.resolved = True
272            uuid_str = self.get_normalized_uuid_string()
273            if self.show_symbol_progress():
274                with print_lock:
275                    print('Getting symbols for %s %s...' % (uuid_str, self.path))
276            if os.path.exists(self.dsymForUUIDBinary):
277                dsym_for_uuid_command = '%s %s' % (
278                    self.dsymForUUIDBinary, uuid_str)
279                s = subprocess.check_output(dsym_for_uuid_command, shell=True)
280                if s:
281                    try:
282                        plist_root = read_plist(s)
283                    except:
284                        with print_lock:
285                            print(("Got exception: ", sys.exc_info()[1], " handling dsymForUUID output: \n", s))
286                        raise
287                    if plist_root:
288                        plist = plist_root[uuid_str]
289                        if plist:
290                            if 'DBGArchitecture' in plist:
291                                self.arch = plist['DBGArchitecture']
292                            if 'DBGDSYMPath' in plist:
293                                self.symfile = os.path.realpath(
294                                    plist['DBGDSYMPath'])
295                            if 'DBGSymbolRichExecutable' in plist:
296                                self.path = os.path.expanduser(
297                                    plist['DBGSymbolRichExecutable'])
298                                self.resolved_path = self.path
299            if not self.resolved_path and os.path.exists(self.path):
300                if not self.find_matching_slice():
301                    return False
302            if not self.resolved_path and not os.path.exists(self.path):
303                try:
304                    mdfind_results = subprocess.check_output(
305                        ["/usr/bin/mdfind",
306                         "com_apple_xcode_dsym_uuids == %s" % uuid_str]).decode("utf-8").splitlines()
307                    found_matching_slice = False
308                    for dsym in mdfind_results:
309                        dwarf_dir = os.path.join(dsym, 'Contents/Resources/DWARF')
310                        if not os.path.exists(dwarf_dir):
311                            # Not a dSYM bundle, probably an Xcode archive.
312                            continue
313                        with print_lock:
314                            print('falling back to binary inside "%s"' % dsym)
315                        self.symfile = dsym
316                        for filename in os.listdir(dwarf_dir):
317                           self.path = os.path.join(dwarf_dir, filename)
318                           if self.find_matching_slice():
319                              found_matching_slice = True
320                              break
321                        if found_matching_slice:
322                           break
323                except:
324                    pass
325            if (self.resolved_path and os.path.exists(self.resolved_path)) or (
326                    self.path and os.path.exists(self.path)):
327                with print_lock:
328                    print('Resolved symbols for %s %s...' % (uuid_str, self.path))
329                return True
330            else:
331                self.unavailable = True
332            return False
333
334    def __init__(self, debugger, path, verbose):
335        """CrashLog constructor that take a path to a darwin crash log file"""
336        symbolication.Symbolicator.__init__(self, debugger)
337        self.path = os.path.expanduser(path)
338        self.info_lines = list()
339        self.system_profile = list()
340        self.threads = list()
341        self.backtraces = list()  # For application specific backtraces
342        self.idents = list()  # A list of the required identifiers for doing all stack backtraces
343        self.errors = list()
344        self.crashed_thread_idx = -1
345        self.version = -1
346        self.target = None
347        self.verbose = verbose
348
349    def dump(self):
350        print("Crash Log File: %s" % (self.path))
351        if self.backtraces:
352            print("\nApplication Specific Backtraces:")
353            for thread in self.backtraces:
354                thread.dump('  ')
355        print("\nThreads:")
356        for thread in self.threads:
357            thread.dump('  ')
358        print("\nImages:")
359        for image in self.images:
360            image.dump('  ')
361
362    def set_main_image(self, identifier):
363        for i, image in enumerate(self.images):
364            if image.identifier == identifier:
365                self.images.insert(0, self.images.pop(i))
366                break
367
368    def find_image_with_identifier(self, identifier):
369        for image in self.images:
370            if image.identifier == identifier:
371                return image
372        regex_text = '^.*\.%s$' % (re.escape(identifier))
373        regex = re.compile(regex_text)
374        for image in self.images:
375            if regex.match(image.identifier):
376                return image
377        return None
378
379    def create_target(self):
380        if self.target is None:
381            self.target = symbolication.Symbolicator.create_target(self)
382            if self.target:
383                return self.target
384            # We weren't able to open the main executable as, but we can still
385            # symbolicate
386            print('crashlog.create_target()...2')
387            if self.idents:
388                for ident in self.idents:
389                    image = self.find_image_with_identifier(ident)
390                    if image:
391                        self.target = image.create_target(self.debugger)
392                        if self.target:
393                            return self.target  # success
394            print('crashlog.create_target()...3')
395            for image in self.images:
396                self.target = image.create_target(self.debugger)
397                if self.target:
398                    return self.target  # success
399            print('crashlog.create_target()...4')
400            print('error: Unable to locate any executables from the crash log.')
401            print('       Try loading the executable into lldb before running crashlog')
402            print('       and/or make sure the .dSYM bundles can be found by Spotlight.')
403        return self.target
404
405    def get_target(self):
406        return self.target
407
408
409class CrashLogFormatException(Exception):
410    pass
411
412
413class CrashLogParseException(Exception):
414    pass
415
416
417class CrashLogParser:
418    def parse(self, debugger, path, verbose):
419        try:
420            return JSONCrashLogParser(debugger, path, verbose).parse()
421        except CrashLogFormatException:
422            return TextCrashLogParser(debugger, path, verbose).parse()
423
424
425class JSONCrashLogParser:
426    def __init__(self, debugger, path, verbose):
427        self.path = os.path.expanduser(path)
428        self.verbose = verbose
429        self.crashlog = CrashLog(debugger, self.path, self.verbose)
430
431    def parse_json(self, buffer):
432        try:
433            return json.loads(buffer)
434        except:
435            # The first line can contain meta data. Try stripping it and try
436            # again.
437            head, _, tail = buffer.partition('\n')
438            return json.loads(tail)
439
440    def parse(self):
441        with open(self.path, 'r') as f:
442            buffer = f.read()
443
444        try:
445            self.data = self.parse_json(buffer)
446        except:
447            raise CrashLogFormatException()
448
449        try:
450            self.parse_process_info(self.data)
451            self.parse_images(self.data['usedImages'])
452            self.parse_main_image(self.data)
453            self.parse_threads(self.data['threads'])
454            self.parse_errors(self.data)
455            thread = self.crashlog.threads[self.crashlog.crashed_thread_idx]
456            reason = self.parse_crash_reason(self.data['exception'])
457            if thread.reason:
458                thread.reason = '{} {}'.format(thread.reason, reason)
459            else:
460                thread.reason = reason
461        except (KeyError, ValueError, TypeError) as e:
462            raise CrashLogParseException(
463                'Failed to parse JSON crashlog: {}: {}'.format(
464                    type(e).__name__, e))
465
466        return self.crashlog
467
468    def get_used_image(self, idx):
469        return self.data['usedImages'][idx]
470
471    def parse_process_info(self, json_data):
472        self.crashlog.process_id = json_data['pid']
473        self.crashlog.process_identifier = json_data['procName']
474        self.crashlog.process_path = json_data['procPath']
475
476    def parse_crash_reason(self, json_exception):
477        exception_type = json_exception['type']
478        exception_signal = " "
479        if 'signal' in json_exception:
480            exception_signal += "({})".format(json_exception['signal'])
481
482        if 'codes' in json_exception:
483            exception_extra = " ({})".format(json_exception['codes'])
484        elif 'subtype' in json_exception:
485            exception_extra = " ({})".format(json_exception['subtype'])
486        else:
487            exception_extra = ""
488        return "{}{}{}".format(exception_type, exception_signal,
489                                  exception_extra)
490
491    def parse_images(self, json_images):
492        idx = 0
493        for json_image in json_images:
494            img_uuid = uuid.UUID(json_image['uuid'])
495            low = int(json_image['base'])
496            high = int(0)
497            name = json_image['name'] if 'name' in json_image else ''
498            path = json_image['path'] if 'path' in json_image else ''
499            version = ''
500            darwin_image = self.crashlog.DarwinImage(low, high, name, version,
501                                                     img_uuid, path,
502                                                     self.verbose)
503            self.crashlog.images.append(darwin_image)
504            idx += 1
505
506    def parse_main_image(self, json_data):
507        if 'procName' in json_data:
508            proc_name = json_data['procName']
509            self.crashlog.set_main_image(proc_name)
510
511    def parse_frames(self, thread, json_frames):
512        idx = 0
513        for json_frame in json_frames:
514            image_id = int(json_frame['imageIndex'])
515            json_image = self.get_used_image(image_id)
516            ident = json_image['name'] if 'name' in json_image else ''
517            thread.add_ident(ident)
518            if ident not in self.crashlog.idents:
519                self.crashlog.idents.append(ident)
520
521            frame_offset = int(json_frame['imageOffset'])
522            image_addr = self.get_used_image(image_id)['base']
523            pc = image_addr + frame_offset
524            thread.frames.append(self.crashlog.Frame(idx, pc, frame_offset))
525
526            # on arm64 systems, if it jump through a null function pointer,
527            # we end up at address 0 and the crash reporter unwinder
528            # misses the frame that actually faulted.
529            # But $lr can tell us where the last BL/BLR instruction used
530            # was at, so insert that address as the caller stack frame.
531            if idx == 0 and pc == 0 and "lr" in thread.registers:
532                pc = thread.registers["lr"]
533                for image in self.data['usedImages']:
534                    text_lo = image['base']
535                    text_hi = text_lo + image['size']
536                    if text_lo <= pc < text_hi:
537                      idx += 1
538                      frame_offset = pc - text_lo
539                      thread.frames.append(self.crashlog.Frame(idx, pc, frame_offset))
540                      break
541
542            idx += 1
543
544    def parse_threads(self, json_threads):
545        idx = 0
546        for json_thread in json_threads:
547            thread = self.crashlog.Thread(idx, False)
548            if 'name' in json_thread:
549                thread.name = json_thread['name']
550                thread.reason = json_thread['name']
551            if 'id' in json_thread:
552                thread.id = int(json_thread['id'])
553            if json_thread.get('triggered', False):
554                self.crashlog.crashed_thread_idx = idx
555                thread.crashed = True
556                if 'threadState' in json_thread:
557                    thread.registers = self.parse_thread_registers(
558                        json_thread['threadState'])
559            if 'queue' in json_thread:
560                thread.queue = json_thread.get('queue')
561            self.parse_frames(thread, json_thread.get('frames', []))
562            self.crashlog.threads.append(thread)
563            idx += 1
564
565    def parse_thread_registers(self, json_thread_state, prefix=None):
566        registers = dict()
567        for key, state in json_thread_state.items():
568            if key == "rosetta":
569                registers.update(self.parse_thread_registers(state))
570                continue
571            if key == "x":
572                gpr_dict = { str(idx) : reg for idx,reg in enumerate(state) }
573                registers.update(self.parse_thread_registers(gpr_dict, key))
574                continue
575            try:
576                value = int(state['value'])
577                registers["{}{}".format(prefix or '',key)] = value
578            except (KeyError, ValueError, TypeError):
579                pass
580        return registers
581
582    def parse_errors(self, json_data):
583       if 'reportNotes' in json_data:
584          self.crashlog.errors = json_data['reportNotes']
585
586
587class CrashLogParseMode:
588    NORMAL = 0
589    THREAD = 1
590    IMAGES = 2
591    THREGS = 3
592    SYSTEM = 4
593    INSTRS = 5
594
595
596class TextCrashLogParser:
597    parent_process_regex = re.compile('^Parent Process:\s*(.*)\[(\d+)\]')
598    thread_state_regex = re.compile('^Thread ([0-9]+) crashed with')
599    thread_instrs_regex = re.compile('^Thread ([0-9]+) instruction stream')
600    thread_regex = re.compile('^Thread ([0-9]+)([^:]*):(.*)')
601    app_backtrace_regex = re.compile('^Application Specific Backtrace ([0-9]+)([^:]*):(.*)')
602    version = r'(\(.+\)|(arm|x86_)[0-9a-z]+)\s+'
603    frame_regex = re.compile(r'^([0-9]+)' r'\s'                # id
604                             r'+(.+?)'    r'\s+'               # img_name
605                             r'(' +version+ r')?'              # img_version
606                             r'(0x[0-9a-fA-F]{7}[0-9a-fA-F]+)' # addr
607                             r' +(.*)'                         # offs
608                            )
609    null_frame_regex = re.compile(r'^([0-9]+)\s+\?\?\?\s+(0{7}0+) +(.*)')
610    image_regex_uuid = re.compile(r'(0x[0-9a-fA-F]+)'            # img_lo
611                                  r'\s+' '-' r'\s+'              #   -
612                                  r'(0x[0-9a-fA-F]+)'     r'\s+' # img_hi
613                                  r'[+]?(.+?)'            r'\s+' # img_name
614                                  r'(' +version+ ')?'            # img_version
615                                  r'(<([-0-9a-fA-F]+)>\s+)?'     # img_uuid
616                                  r'(/.*)'                       # img_path
617                                 )
618
619
620    def __init__(self, debugger, path, verbose):
621        self.path = os.path.expanduser(path)
622        self.verbose = verbose
623        self.thread = None
624        self.app_specific_backtrace = False
625        self.crashlog = CrashLog(debugger, self.path, self.verbose)
626        self.parse_mode = CrashLogParseMode.NORMAL
627        self.parsers = {
628            CrashLogParseMode.NORMAL : self.parse_normal,
629            CrashLogParseMode.THREAD : self.parse_thread,
630            CrashLogParseMode.IMAGES : self.parse_images,
631            CrashLogParseMode.THREGS : self.parse_thread_registers,
632            CrashLogParseMode.SYSTEM : self.parse_system,
633            CrashLogParseMode.INSTRS : self.parse_instructions,
634        }
635
636    def parse(self):
637        with open(self.path,'r') as f:
638            lines = f.read().splitlines()
639
640        for line in lines:
641            line_len = len(line)
642            if line_len == 0:
643                if self.thread:
644                    if self.parse_mode == CrashLogParseMode.THREAD:
645                        if self.thread.index == self.crashlog.crashed_thread_idx:
646                            self.thread.reason = ''
647                            if self.crashlog.thread_exception:
648                                self.thread.reason += self.crashlog.thread_exception
649                            if self.crashlog.thread_exception_data:
650                                self.thread.reason += " (%s)" % self.crashlog.thread_exception_data
651                        if self.app_specific_backtrace:
652                            self.crashlog.backtraces.append(self.thread)
653                        else:
654                            self.crashlog.threads.append(self.thread)
655                    self.thread = None
656                else:
657                    # only append an extra empty line if the previous line
658                    # in the info_lines wasn't empty
659                    if len(self.crashlog.info_lines) > 0 and len(self.crashlog.info_lines[-1]):
660                        self.crashlog.info_lines.append(line)
661                self.parse_mode = CrashLogParseMode.NORMAL
662            else:
663                self.parsers[self.parse_mode](line)
664
665        return self.crashlog
666
667
668    def parse_normal(self, line):
669        if line.startswith('Process:'):
670            (self.crashlog.process_name, pid_with_brackets) = line[
671                8:].strip().split(' [')
672            self.crashlog.process_id = pid_with_brackets.strip('[]')
673        elif line.startswith('Path:'):
674            self.crashlog.process_path = line[5:].strip()
675        elif line.startswith('Identifier:'):
676            self.crashlog.process_identifier = line[11:].strip()
677        elif line.startswith('Version:'):
678            version_string = line[8:].strip()
679            matched_pair = re.search("(.+)\((.+)\)", version_string)
680            if matched_pair:
681                self.crashlog.process_version = matched_pair.group(1)
682                self.crashlog.process_compatability_version = matched_pair.group(
683                    2)
684            else:
685                self.crashlog.process = version_string
686                self.crashlog.process_compatability_version = version_string
687        elif self.parent_process_regex.search(line):
688            parent_process_match = self.parent_process_regex.search(
689                line)
690            self.crashlog.parent_process_name = parent_process_match.group(1)
691            self.crashlog.parent_process_id = parent_process_match.group(2)
692        elif line.startswith('Exception Type:'):
693            self.crashlog.thread_exception = line[15:].strip()
694            return
695        elif line.startswith('Exception Codes:'):
696            self.crashlog.thread_exception_data = line[16:].strip()
697            return
698        elif line.startswith('Exception Subtype:'): # iOS
699            self.crashlog.thread_exception_data = line[18:].strip()
700            return
701        elif line.startswith('Crashed Thread:'):
702            self.crashlog.crashed_thread_idx = int(line[15:].strip().split()[0])
703            return
704        elif line.startswith('Triggered by Thread:'): # iOS
705            self.crashlog.crashed_thread_idx = int(line[20:].strip().split()[0])
706            return
707        elif line.startswith('Report Version:'):
708            self.crashlog.version = int(line[15:].strip())
709            return
710        elif line.startswith('System Profile:'):
711            self.parse_mode = CrashLogParseMode.SYSTEM
712            return
713        elif (line.startswith('Interval Since Last Report:') or
714                line.startswith('Crashes Since Last Report:') or
715                line.startswith('Per-App Interval Since Last Report:') or
716                line.startswith('Per-App Crashes Since Last Report:') or
717                line.startswith('Sleep/Wake UUID:') or
718                line.startswith('Anonymous UUID:')):
719            # ignore these
720            return
721        elif line.startswith('Thread'):
722            thread_state_match = self.thread_state_regex.search(line)
723            if thread_state_match:
724                self.app_specific_backtrace = False
725                thread_state_match = self.thread_regex.search(line)
726                thread_idx = int(thread_state_match.group(1))
727                self.parse_mode = CrashLogParseMode.THREGS
728                self.thread = self.crashlog.threads[thread_idx]
729                return
730            thread_insts_match  = self.thread_instrs_regex.search(line)
731            if thread_insts_match:
732                self.parse_mode = CrashLogParseMode.INSTRS
733                return
734            thread_match = self.thread_regex.search(line)
735            if thread_match:
736                self.app_specific_backtrace = False
737                self.parse_mode = CrashLogParseMode.THREAD
738                thread_idx = int(thread_match.group(1))
739                self.thread = self.crashlog.Thread(thread_idx, False)
740                return
741            return
742        elif line.startswith('Binary Images:'):
743            self.parse_mode = CrashLogParseMode.IMAGES
744            return
745        elif line.startswith('Application Specific Backtrace'):
746            app_backtrace_match = self.app_backtrace_regex.search(line)
747            if app_backtrace_match:
748                self.parse_mode = CrashLogParseMode.THREAD
749                self.app_specific_backtrace = True
750                idx = int(app_backtrace_match.group(1))
751                self.thread = self.crashlog.Thread(idx, True)
752        elif line.startswith('Last Exception Backtrace:'): # iOS
753            self.parse_mode = CrashLogParseMode.THREAD
754            self.app_specific_backtrace = True
755            idx = 1
756            self.thread = self.crashlog.Thread(idx, True)
757        self.crashlog.info_lines.append(line.strip())
758
759    def parse_thread(self, line):
760        if line.startswith('Thread'):
761            return
762        if self.null_frame_regex.search(line):
763            print('warning: thread parser ignored null-frame: "%s"' % line)
764            return
765        frame_match = self.frame_regex.search(line)
766        if frame_match:
767            (frame_id, frame_img_name, _, frame_img_version, _,
768                frame_addr, frame_ofs) = frame_match.groups()
769            ident = frame_img_name
770            self.thread.add_ident(ident)
771            if ident not in self.crashlog.idents:
772                self.crashlog.idents.append(ident)
773            self.thread.frames.append(self.crashlog.Frame(int(frame_id), int(
774                frame_addr, 0), frame_ofs))
775        else:
776            print('error: frame regex failed for line: "%s"' % line)
777
778    def parse_images(self, line):
779        image_match = self.image_regex_uuid.search(line)
780        if image_match:
781            (img_lo, img_hi, img_name, _, img_version, _,
782                _, img_uuid, img_path) = image_match.groups()
783            image = self.crashlog.DarwinImage(int(img_lo, 0), int(img_hi, 0),
784                                            img_name.strip(),
785                                            img_version.strip()
786                                            if img_version else "",
787                                            uuid.UUID(img_uuid), img_path,
788                                            self.verbose)
789            self.crashlog.images.append(image)
790        else:
791            print("error: image regex failed for: %s" % line)
792
793
794    def parse_thread_registers(self, line):
795        stripped_line = line.strip()
796        # "r12: 0x00007fff6b5939c8  r13: 0x0000000007000006  r14: 0x0000000000002a03  r15: 0x0000000000000c00"
797        reg_values = re.findall(
798            '([a-zA-Z0-9]+: 0[Xx][0-9a-fA-F]+) *', stripped_line)
799        for reg_value in reg_values:
800            (reg, value) = reg_value.split(': ')
801            self.thread.registers[reg.strip()] = int(value, 0)
802
803    def parse_system(self, line):
804        self.crashlog.system_profile.append(line)
805
806    def parse_instructions(self, line):
807        pass
808
809
810def usage():
811    print("Usage: lldb-symbolicate.py [-n name] executable-image")
812    sys.exit(0)
813
814
815def save_crashlog(debugger, command, exe_ctx, result, dict):
816    usage = "usage: %prog [options] <output-path>"
817    description = '''Export the state of current target into a crashlog file'''
818    parser = optparse.OptionParser(
819        description=description,
820        prog='save_crashlog',
821        usage=usage)
822    parser.add_option(
823        '-v',
824        '--verbose',
825        action='store_true',
826        dest='verbose',
827        help='display verbose debug info',
828        default=False)
829    try:
830        (options, args) = parser.parse_args(shlex.split(command))
831    except:
832        result.PutCString("error: invalid options")
833        return
834    if len(args) != 1:
835        result.PutCString(
836            "error: invalid arguments, a single output file is the only valid argument")
837        return
838    out_file = open(args[0], 'w')
839    if not out_file:
840        result.PutCString(
841            "error: failed to open file '%s' for writing...",
842            args[0])
843        return
844    target = exe_ctx.target
845    if target:
846        identifier = target.executable.basename
847        process = exe_ctx.process
848        if process:
849            pid = process.id
850            if pid != lldb.LLDB_INVALID_PROCESS_ID:
851                out_file.write(
852                    'Process:         %s [%u]\n' %
853                    (identifier, pid))
854        out_file.write('Path:            %s\n' % (target.executable.fullpath))
855        out_file.write('Identifier:      %s\n' % (identifier))
856        out_file.write('\nDate/Time:       %s\n' %
857                       (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
858        out_file.write(
859            'OS Version:      Mac OS X %s (%s)\n' %
860            (platform.mac_ver()[0], subprocess.check_output('sysctl -n kern.osversion', shell=True).decode("utf-8")))
861        out_file.write('Report Version:  9\n')
862        for thread_idx in range(process.num_threads):
863            thread = process.thread[thread_idx]
864            out_file.write('\nThread %u:\n' % (thread_idx))
865            for (frame_idx, frame) in enumerate(thread.frames):
866                frame_pc = frame.pc
867                frame_offset = 0
868                if frame.function:
869                    block = frame.GetFrameBlock()
870                    block_range = block.range[frame.addr]
871                    if block_range:
872                        block_start_addr = block_range[0]
873                        frame_offset = frame_pc - block_start_addr.GetLoadAddress(target)
874                    else:
875                        frame_offset = frame_pc - frame.function.addr.GetLoadAddress(target)
876                elif frame.symbol:
877                    frame_offset = frame_pc - frame.symbol.addr.GetLoadAddress(target)
878                out_file.write(
879                    '%-3u %-32s 0x%16.16x %s' %
880                    (frame_idx, frame.module.file.basename, frame_pc, frame.name))
881                if frame_offset > 0:
882                    out_file.write(' + %u' % (frame_offset))
883                line_entry = frame.line_entry
884                if line_entry:
885                    if options.verbose:
886                        # This will output the fullpath + line + column
887                        out_file.write(' %s' % (line_entry))
888                    else:
889                        out_file.write(
890                            ' %s:%u' %
891                            (line_entry.file.basename, line_entry.line))
892                        column = line_entry.column
893                        if column:
894                            out_file.write(':%u' % (column))
895                out_file.write('\n')
896
897        out_file.write('\nBinary Images:\n')
898        for module in target.modules:
899            text_segment = module.section['__TEXT']
900            if text_segment:
901                text_segment_load_addr = text_segment.GetLoadAddress(target)
902                if text_segment_load_addr != lldb.LLDB_INVALID_ADDRESS:
903                    text_segment_end_load_addr = text_segment_load_addr + text_segment.size
904                    identifier = module.file.basename
905                    module_version = '???'
906                    module_version_array = module.GetVersion()
907                    if module_version_array:
908                        module_version = '.'.join(
909                            map(str, module_version_array))
910                    out_file.write(
911                        '    0x%16.16x - 0x%16.16x  %s (%s - ???) <%s> %s\n' %
912                        (text_segment_load_addr,
913                         text_segment_end_load_addr,
914                         identifier,
915                         module_version,
916                         module.GetUUIDString(),
917                         module.file.fullpath))
918        out_file.close()
919    else:
920        result.PutCString("error: invalid target")
921
922
923class Symbolicate:
924    def __init__(self, debugger, internal_dict):
925        pass
926
927    def __call__(self, debugger, command, exe_ctx, result):
928        SymbolicateCrashLogs(debugger, shlex.split(command))
929
930    def get_short_help(self):
931        return "Symbolicate one or more darwin crash log files."
932
933    def get_long_help(self):
934        option_parser = CrashLogOptionParser()
935        return option_parser.format_help()
936
937
938def SymbolicateCrashLog(crash_log, options):
939    if options.debug:
940        crash_log.dump()
941    if not crash_log.images:
942        print('error: no images in crash log')
943        return
944
945    if options.dump_image_list:
946        print("Binary Images:")
947        for image in crash_log.images:
948            if options.verbose:
949                print(image.debug_dump())
950            else:
951                print(image)
952
953    target = crash_log.create_target()
954    if not target:
955        return
956    exe_module = target.GetModuleAtIndex(0)
957    images_to_load = list()
958    loaded_images = list()
959    if options.load_all_images:
960        # --load-all option was specified, load everything up
961        for image in crash_log.images:
962            images_to_load.append(image)
963    else:
964        # Only load the images found in stack frames for the crashed threads
965        if options.crashed_only:
966            for thread in crash_log.threads:
967                if thread.did_crash():
968                    for ident in thread.idents:
969                        images = crash_log.find_images_with_identifier(ident)
970                        if images:
971                            for image in images:
972                                images_to_load.append(image)
973                        else:
974                            print('error: can\'t find image for identifier "%s"' % ident)
975        else:
976            for ident in crash_log.idents:
977                images = crash_log.find_images_with_identifier(ident)
978                if images:
979                    for image in images:
980                        images_to_load.append(image)
981                else:
982                    print('error: can\'t find image for identifier "%s"' % ident)
983
984    futures = []
985    with concurrent.futures.ThreadPoolExecutor() as executor:
986        def add_module(image, target):
987            return image, image.add_module(target)
988
989        for image in images_to_load:
990            futures.append(executor.submit(add_module, image=image, target=target))
991
992        for future in concurrent.futures.as_completed(futures):
993            image, err = future.result()
994            if err:
995                print(err)
996            else:
997                loaded_images.append(image)
998
999    if crash_log.backtraces:
1000        for thread in crash_log.backtraces:
1001            thread.dump_symbolicated(crash_log, options)
1002            print()
1003
1004    for thread in crash_log.threads:
1005        thread.dump_symbolicated(crash_log, options)
1006        print()
1007
1008    if crash_log.errors:
1009        print("Errors:")
1010        for error in crash_log.errors:
1011            print(error)
1012
1013def load_crashlog_in_scripted_process(debugger, crash_log_file, options):
1014    result = lldb.SBCommandReturnObject()
1015
1016    crashlog_path = os.path.expanduser(crash_log_file)
1017    if not os.path.exists(crashlog_path):
1018        result.PutCString("error: crashlog file %s does not exist" % crashlog_path)
1019
1020    crashlog = CrashLogParser().parse(debugger, crashlog_path, False)
1021
1022    if debugger.GetNumTargets() > 0:
1023        target = debugger.GetTargetAtIndex(0)
1024    else:
1025        target = crashlog.create_target()
1026    if not target:
1027        result.PutCString("error: couldn't create target")
1028        return
1029
1030    ci = debugger.GetCommandInterpreter()
1031    if not ci:
1032        result.PutCString("error: couldn't get command interpreter")
1033        return
1034
1035    res = lldb.SBCommandReturnObject()
1036    ci.HandleCommand('script from lldb.macosx import crashlog_scripted_process', res)
1037    if not res.Succeeded():
1038        result.PutCString("error: couldn't import crashlog scripted process module")
1039        return
1040
1041    structured_data = lldb.SBStructuredData()
1042    structured_data.SetFromJSON(json.dumps({ "crashlog_path" : crashlog_path,
1043                                             "load_all_images": options.load_all_images }))
1044    launch_info = lldb.SBLaunchInfo(None)
1045    launch_info.SetProcessPluginName("ScriptedProcess")
1046    launch_info.SetScriptedProcessClassName("crashlog_scripted_process.CrashLogScriptedProcess")
1047    launch_info.SetScriptedProcessDictionary(structured_data)
1048    error = lldb.SBError()
1049    process = target.Launch(launch_info, error)
1050
1051    if not process or error.Fail():
1052        return
1053
1054    @contextlib.contextmanager
1055    def synchronous(debugger):
1056        async_state = debugger.GetAsync()
1057        debugger.SetAsync(False)
1058        try:
1059            yield
1060        finally:
1061            debugger.SetAsync(async_state)
1062
1063    with synchronous(debugger):
1064        run_options = lldb.SBCommandInterpreterRunOptions()
1065        run_options.SetStopOnError(True)
1066        run_options.SetStopOnCrash(True)
1067        run_options.SetEchoCommands(True)
1068
1069        commands_stream = lldb.SBStream()
1070        commands_stream.Print("process status\n")
1071        commands_stream.Print("thread backtrace\n")
1072        error = debugger.SetInputString(commands_stream.GetData())
1073        if error.Success():
1074            debugger.RunCommandInterpreter(True, False, run_options, 0, False, True)
1075
1076def CreateSymbolicateCrashLogOptions(
1077        command_name,
1078        description,
1079        add_interactive_options):
1080    usage = "usage: %prog [options] <FILE> [FILE ...]"
1081    option_parser = optparse.OptionParser(
1082        description=description, prog='crashlog', usage=usage)
1083    option_parser.add_option(
1084        '--verbose',
1085        '-v',
1086        action='store_true',
1087        dest='verbose',
1088        help='display verbose debug info',
1089        default=False)
1090    option_parser.add_option(
1091        '--debug',
1092        '-g',
1093        action='store_true',
1094        dest='debug',
1095        help='display verbose debug logging',
1096        default=False)
1097    option_parser.add_option(
1098        '--load-all',
1099        '-a',
1100        action='store_true',
1101        dest='load_all_images',
1102        help='load all executable images, not just the images found in the '
1103        'crashed stack frames, loads stackframes for all the threads in '
1104        'interactive mode.',
1105        default=False)
1106    option_parser.add_option(
1107        '--images',
1108        action='store_true',
1109        dest='dump_image_list',
1110        help='show image list',
1111        default=False)
1112    option_parser.add_option(
1113        '--debug-delay',
1114        type='int',
1115        dest='debug_delay',
1116        metavar='NSEC',
1117        help='pause for NSEC seconds for debugger',
1118        default=0)
1119    option_parser.add_option(
1120        '--crashed-only',
1121        '-c',
1122        action='store_true',
1123        dest='crashed_only',
1124        help='only symbolicate the crashed thread',
1125        default=False)
1126    option_parser.add_option(
1127        '--disasm-depth',
1128        '-d',
1129        type='int',
1130        dest='disassemble_depth',
1131        help='set the depth in stack frames that should be disassembled (default is 1)',
1132        default=1)
1133    option_parser.add_option(
1134        '--disasm-all',
1135        '-D',
1136        action='store_true',
1137        dest='disassemble_all_threads',
1138        help='enabled disassembly of frames on all threads (not just the crashed thread)',
1139        default=False)
1140    option_parser.add_option(
1141        '--disasm-before',
1142        '-B',
1143        type='int',
1144        dest='disassemble_before',
1145        help='the number of instructions to disassemble before the frame PC',
1146        default=4)
1147    option_parser.add_option(
1148        '--disasm-after',
1149        '-A',
1150        type='int',
1151        dest='disassemble_after',
1152        help='the number of instructions to disassemble after the frame PC',
1153        default=4)
1154    option_parser.add_option(
1155        '--source-context',
1156        '-C',
1157        type='int',
1158        metavar='NLINES',
1159        dest='source_context',
1160        help='show NLINES source lines of source context (default = 4)',
1161        default=4)
1162    option_parser.add_option(
1163        '--source-frames',
1164        type='int',
1165        metavar='NFRAMES',
1166        dest='source_frames',
1167        help='show source for NFRAMES (default = 4)',
1168        default=4)
1169    option_parser.add_option(
1170        '--source-all',
1171        action='store_true',
1172        dest='source_all',
1173        help='show source for all threads, not just the crashed thread',
1174        default=False)
1175    if add_interactive_options:
1176        option_parser.add_option(
1177            '-i',
1178            '--interactive',
1179            action='store_true',
1180            help='parse a crash log and load it in a ScriptedProcess',
1181            default=False)
1182        option_parser.add_option(
1183            '-b',
1184            '--batch',
1185            action='store_true',
1186            help='dump symbolicated stackframes without creating a debug session',
1187            default=True)
1188    return option_parser
1189
1190
1191def CrashLogOptionParser():
1192    description = '''Symbolicate one or more darwin crash log files to provide source file and line information,
1193inlined stack frames back to the concrete functions, and disassemble the location of the crash
1194for the first frame of the crashed thread.
1195If this script is imported into the LLDB command interpreter, a "crashlog" command will be added to the interpreter
1196for use at the LLDB command line. After a crash log has been parsed and symbolicated, a target will have been
1197created that has all of the shared libraries loaded at the load addresses found in the crash log file. This allows
1198you to explore the program as if it were stopped at the locations described in the crash log and functions can
1199be disassembled and lookups can be performed using the addresses found in the crash log.'''
1200    return CreateSymbolicateCrashLogOptions('crashlog', description, True)
1201
1202def SymbolicateCrashLogs(debugger, command_args):
1203    option_parser = CrashLogOptionParser()
1204    try:
1205        (options, args) = option_parser.parse_args(command_args)
1206    except:
1207        return
1208
1209    if options.debug:
1210        print('command_args = %s' % command_args)
1211        print('options', options)
1212        print('args', args)
1213
1214    if options.debug_delay > 0:
1215        print("Waiting %u seconds for debugger to attach..." % options.debug_delay)
1216        time.sleep(options.debug_delay)
1217    error = lldb.SBError()
1218
1219    def should_run_in_interactive_mode(options, ci):
1220        if options.interactive:
1221            return True
1222        elif options.batch:
1223            return False
1224        # elif ci and ci.IsInteractive():
1225        #     return True
1226        else:
1227            return False
1228
1229    ci = debugger.GetCommandInterpreter()
1230
1231    if args:
1232        for crash_log_file in args:
1233            if should_run_in_interactive_mode(options, ci):
1234                load_crashlog_in_scripted_process(debugger, crash_log_file,
1235                                                  options)
1236            else:
1237                crash_log = CrashLogParser().parse(debugger, crash_log_file, options.verbose)
1238                SymbolicateCrashLog(crash_log, options)
1239
1240if __name__ == '__main__':
1241    # Create a new debugger instance
1242    debugger = lldb.SBDebugger.Create()
1243    SymbolicateCrashLogs(debugger, sys.argv[1:])
1244    lldb.SBDebugger.Destroy(debugger)
1245
1246def __lldb_init_module(debugger, internal_dict):
1247    debugger.HandleCommand(
1248        'command script add -c lldb.macosx.crashlog.Symbolicate crashlog')
1249    debugger.HandleCommand(
1250        'command script add -f lldb.macosx.crashlog.save_crashlog save_crashlog')
1251    print('"crashlog" and "save_crashlog" commands have been installed, use '
1252          'the "--help" options on these commands for detailed help.')
1253