1#!/usr/bin/env python3
2
3#----------------------------------------------------------------------
4# Be sure to add the python path that points to the LLDB shared library.
5#
6# To use this in the embedded python interpreter using "lldb":
7#
8#   cd /path/containing/crashlog.py
9#   lldb
10#   (lldb) script import crashlog
11#   "crashlog" command installed, type "crashlog --help" for detailed help
12#   (lldb) crashlog ~/Library/Logs/DiagnosticReports/a.crash
13#
14# The benefit of running the crashlog command inside lldb in the
15# embedded python interpreter is when the command completes, there
16# will be a target with all of the files loaded at the locations
17# described in the crash log. Only the files that have stack frames
18# in the backtrace will be loaded unless the "--load-all" option
19# has been specified. This allows users to explore the program in the
20# state it was in right at crash time.
21#
22# On MacOSX csh, tcsh:
23#   ( setenv PYTHONPATH /path/to/LLDB.framework/Resources/Python ; ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash )
24#
25# On MacOSX sh, bash:
26#   PYTHONPATH=/path/to/LLDB.framework/Resources/Python ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash
27#----------------------------------------------------------------------
28
29import concurrent.futures
30import contextlib
31import datetime
32import json
33import optparse
34import os
35import platform
36import plistlib
37import re
38import shlex
39import string
40import subprocess
41import sys
42import threading
43import time
44import uuid
45
46
47print_lock = threading.RLock()
48
49try:
50    # First try for LLDB in case PYTHONPATH is already correctly setup.
51    import lldb
52except ImportError:
53    # Ask the command line driver for the path to the lldb module. Copy over
54    # the environment so that SDKROOT is propagated to xcrun.
55    command =  ['xcrun', 'lldb', '-P'] if platform.system() == 'Darwin' else ['lldb', '-P']
56    # Extend the PYTHONPATH if the path exists and isn't already there.
57    lldb_python_path = subprocess.check_output(command).decode("utf-8").strip()
58    if os.path.exists(lldb_python_path) and not sys.path.__contains__(lldb_python_path):
59        sys.path.append(lldb_python_path)
60    # Try importing LLDB again.
61    try:
62        import lldb
63    except ImportError:
64        print("error: couldn't locate the 'lldb' module, please set PYTHONPATH correctly")
65        sys.exit(1)
66
67from lldb.utils import symbolication
68
69def read_plist(s):
70    if sys.version_info.major == 3:
71        return plistlib.loads(s)
72    else:
73        return plistlib.readPlistFromString(s)
74
75class CrashLog(symbolication.Symbolicator):
76    class Thread:
77        """Class that represents a thread in a darwin crash log"""
78
79        def __init__(self, index, app_specific_backtrace):
80            self.index = index
81            self.id = index
82            self.frames = list()
83            self.idents = list()
84            self.registers = dict()
85            self.reason = None
86            self.name = None
87            self.queue = None
88            self.crashed = False
89            self.app_specific_backtrace = app_specific_backtrace
90
91        def dump(self, prefix):
92            if self.app_specific_backtrace:
93                print("%Application Specific Backtrace[%u] %s" % (prefix, self.index, self.reason))
94            else:
95                print("%sThread[%u] %s" % (prefix, self.index, self.reason))
96            if self.frames:
97                print("%s  Frames:" % (prefix))
98                for frame in self.frames:
99                    frame.dump(prefix + '    ')
100            if self.registers:
101                print("%s  Registers:" % (prefix))
102                for reg in self.registers.keys():
103                    print("%s    %-8s = %#16.16x" % (prefix, reg, self.registers[reg]))
104
105        def dump_symbolicated(self, crash_log, options):
106            this_thread_crashed = self.app_specific_backtrace
107            if not this_thread_crashed:
108                this_thread_crashed = self.did_crash()
109                if options.crashed_only and this_thread_crashed == False:
110                    return
111
112            print("%s" % self)
113            display_frame_idx = -1
114            for frame_idx, frame in enumerate(self.frames):
115                disassemble = (
116                    this_thread_crashed or options.disassemble_all_threads) and frame_idx < options.disassemble_depth
117
118                # Except for the zeroth frame, we should subtract 1 from every
119                # frame pc to get the previous line entry.
120                pc = frame.pc & crash_log.addr_mask
121                pc = pc if frame_idx == 0 or pc == 0 else pc - 1
122                symbolicated_frame_addresses = crash_log.symbolicate(pc, options.verbose)
123
124                if symbolicated_frame_addresses:
125                    symbolicated_frame_address_idx = 0
126                    for symbolicated_frame_address in symbolicated_frame_addresses:
127                        display_frame_idx += 1
128                        print('[%3u] %s' % (frame_idx, symbolicated_frame_address))
129                        if (options.source_all or self.did_crash(
130                        )) and display_frame_idx < options.source_frames and options.source_context:
131                            source_context = options.source_context
132                            line_entry = symbolicated_frame_address.get_symbol_context().line_entry
133                            if line_entry.IsValid():
134                                strm = lldb.SBStream()
135                                if line_entry:
136                                    crash_log.debugger.GetSourceManager().DisplaySourceLinesWithLineNumbers(
137                                        line_entry.file, line_entry.line, source_context, source_context, "->", strm)
138                                source_text = strm.GetData()
139                                if source_text:
140                                    # Indent the source a bit
141                                    indent_str = '    '
142                                    join_str = '\n' + indent_str
143                                    print('%s%s' % (indent_str, join_str.join(source_text.split('\n'))))
144                        if symbolicated_frame_address_idx == 0:
145                            if disassemble:
146                                instructions = symbolicated_frame_address.get_instructions()
147                                if instructions:
148                                    print()
149                                    symbolication.disassemble_instructions(
150                                        crash_log.get_target(),
151                                        instructions,
152                                        frame.pc,
153                                        options.disassemble_before,
154                                        options.disassemble_after,
155                                        frame.index > 0)
156                                    print()
157                        symbolicated_frame_address_idx += 1
158                else:
159                    print(frame)
160            if self.registers:
161                print()
162                for reg in self.registers.keys():
163                    print("    %-8s = %#16.16x" % (reg, self.registers[reg]))
164            elif self.crashed:
165               print()
166               print("No thread state (register information) available")
167
168        def add_ident(self, ident):
169            if ident not in self.idents:
170                self.idents.append(ident)
171
172        def did_crash(self):
173            return self.reason is not None
174
175        def __str__(self):
176            if self.app_specific_backtrace:
177                s = "Application Specific Backtrace[%u]" % self.index
178            else:
179                s = "Thread[%u]" % self.index
180            if self.reason:
181                s += ' %s' % self.reason
182            return s
183
184    class Frame:
185        """Class that represents a stack frame in a thread in a darwin crash log"""
186
187        def __init__(self, index, pc, description):
188            self.pc = pc
189            self.description = description
190            self.index = index
191
192        def __str__(self):
193            if self.description:
194                return "[%3u] 0x%16.16x %s" % (
195                    self.index, self.pc, self.description)
196            else:
197                return "[%3u] 0x%16.16x" % (self.index, self.pc)
198
199        def dump(self, prefix):
200            print("%s%s" % (prefix, str(self)))
201
202    class DarwinImage(symbolication.Image):
203        """Class that represents a binary images in a darwin crash log"""
204        dsymForUUIDBinary = '/usr/local/bin/dsymForUUID'
205        if not os.path.exists(dsymForUUIDBinary):
206            try:
207                dsymForUUIDBinary = subprocess.check_output('which dsymForUUID',
208                                                            shell=True).decode("utf-8").rstrip('\n')
209            except:
210                dsymForUUIDBinary = ""
211
212        dwarfdump_uuid_regex = re.compile(
213            'UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*')
214
215        def __init__(
216                self,
217                text_addr_lo,
218                text_addr_hi,
219                identifier,
220                version,
221                uuid,
222                path,
223                verbose):
224            symbolication.Image.__init__(self, path, uuid)
225            self.add_section(
226                symbolication.Section(
227                    text_addr_lo,
228                    text_addr_hi,
229                    "__TEXT"))
230            self.identifier = identifier
231            self.version = version
232            self.verbose = verbose
233
234        def show_symbol_progress(self):
235            """
236            Hide progress output and errors from system frameworks as they are plentiful.
237            """
238            if self.verbose:
239                return True
240            return not (self.path.startswith("/System/Library/") or
241                        self.path.startswith("/usr/lib/"))
242
243
244        def find_matching_slice(self):
245            dwarfdump_cmd_output = subprocess.check_output(
246                'dwarfdump --uuid "%s"' % self.path, shell=True).decode("utf-8")
247            self_uuid = self.get_uuid()
248            for line in dwarfdump_cmd_output.splitlines():
249                match = self.dwarfdump_uuid_regex.search(line)
250                if match:
251                    dwarf_uuid_str = match.group(1)
252                    dwarf_uuid = uuid.UUID(dwarf_uuid_str)
253                    if self_uuid == dwarf_uuid:
254                        self.resolved_path = self.path
255                        self.arch = match.group(2)
256                        return True
257            if not self.resolved_path:
258                self.unavailable = True
259                if self.show_symbol_progress():
260                    print(("error\n    error: unable to locate '%s' with UUID %s"
261                           % (self.path, self.get_normalized_uuid_string())))
262                return False
263
264        def locate_module_and_debug_symbols(self):
265            # Don't load a module twice...
266            if self.resolved:
267                return True
268            # Mark this as resolved so we don't keep trying
269            self.resolved = True
270            uuid_str = self.get_normalized_uuid_string()
271            if self.show_symbol_progress():
272                with print_lock:
273                    print('Getting symbols for %s %s...' % (uuid_str, self.path))
274            if os.path.exists(self.dsymForUUIDBinary):
275                dsym_for_uuid_command = '%s %s' % (
276                    self.dsymForUUIDBinary, uuid_str)
277                s = subprocess.check_output(dsym_for_uuid_command, shell=True)
278                if s:
279                    try:
280                        plist_root = read_plist(s)
281                    except:
282                        with print_lock:
283                            print(("Got exception: ", sys.exc_info()[1], " handling dsymForUUID output: \n", s))
284                        raise
285                    if plist_root:
286                        plist = plist_root[uuid_str]
287                        if plist:
288                            if 'DBGArchitecture' in plist:
289                                self.arch = plist['DBGArchitecture']
290                            if 'DBGDSYMPath' in plist:
291                                self.symfile = os.path.realpath(
292                                    plist['DBGDSYMPath'])
293                            if 'DBGSymbolRichExecutable' in plist:
294                                self.path = os.path.expanduser(
295                                    plist['DBGSymbolRichExecutable'])
296                                self.resolved_path = self.path
297            if not self.resolved_path and os.path.exists(self.path):
298                if not self.find_matching_slice():
299                    return False
300            if not self.resolved_path and not os.path.exists(self.path):
301                try:
302                    mdfind_results = subprocess.check_output(
303                        ["/usr/bin/mdfind",
304                         "com_apple_xcode_dsym_uuids == %s" % uuid_str]).decode("utf-8").splitlines()
305                    found_matching_slice = False
306                    for dsym in mdfind_results:
307                        dwarf_dir = os.path.join(dsym, 'Contents/Resources/DWARF')
308                        if not os.path.exists(dwarf_dir):
309                            # Not a dSYM bundle, probably an Xcode archive.
310                            continue
311                        with print_lock:
312                            print('falling back to binary inside "%s"' % dsym)
313                        self.symfile = dsym
314                        for filename in os.listdir(dwarf_dir):
315                           self.path = os.path.join(dwarf_dir, filename)
316                           if self.find_matching_slice():
317                              found_matching_slice = True
318                              break
319                        if found_matching_slice:
320                           break
321                except:
322                    pass
323            if (self.resolved_path and os.path.exists(self.resolved_path)) or (
324                    self.path and os.path.exists(self.path)):
325                with print_lock:
326                    print('Resolved symbols for %s %s...' % (uuid_str, self.path))
327                return True
328            else:
329                self.unavailable = True
330            return False
331
332    def __init__(self, debugger, path, verbose):
333        """CrashLog constructor that take a path to a darwin crash log file"""
334        symbolication.Symbolicator.__init__(self, debugger)
335        self.path = os.path.expanduser(path)
336        self.info_lines = list()
337        self.system_profile = list()
338        self.threads = list()
339        self.backtraces = list()  # For application specific backtraces
340        self.idents = list()  # A list of the required identifiers for doing all stack backtraces
341        self.errors = list()
342        self.crashed_thread_idx = -1
343        self.version = -1
344        self.target = None
345        self.verbose = verbose
346
347    def dump(self):
348        print("Crash Log File: %s" % (self.path))
349        if self.backtraces:
350            print("\nApplication Specific Backtraces:")
351            for thread in self.backtraces:
352                thread.dump('  ')
353        print("\nThreads:")
354        for thread in self.threads:
355            thread.dump('  ')
356        print("\nImages:")
357        for image in self.images:
358            image.dump('  ')
359
360    def set_main_image(self, identifier):
361        for i, image in enumerate(self.images):
362            if image.identifier == identifier:
363                self.images.insert(0, self.images.pop(i))
364                break
365
366    def find_image_with_identifier(self, identifier):
367        for image in self.images:
368            if image.identifier == identifier:
369                return image
370        regex_text = '^.*\.%s$' % (re.escape(identifier))
371        regex = re.compile(regex_text)
372        for image in self.images:
373            if regex.match(image.identifier):
374                return image
375        return None
376
377    def create_target(self):
378        if self.target is None:
379            self.target = symbolication.Symbolicator.create_target(self)
380            if self.target:
381                return self.target
382            # We weren't able to open the main executable as, but we can still
383            # symbolicate
384            print('crashlog.create_target()...2')
385            if self.idents:
386                for ident in self.idents:
387                    image = self.find_image_with_identifier(ident)
388                    if image:
389                        self.target = image.create_target(self.debugger)
390                        if self.target:
391                            return self.target  # success
392            print('crashlog.create_target()...3')
393            for image in self.images:
394                self.target = image.create_target(self.debugger)
395                if self.target:
396                    return self.target  # success
397            print('crashlog.create_target()...4')
398            print('error: Unable to locate any executables from the crash log.')
399            print('       Try loading the executable into lldb before running crashlog')
400            print('       and/or make sure the .dSYM bundles can be found by Spotlight.')
401        return self.target
402
403    def get_target(self):
404        return self.target
405
406
407class CrashLogFormatException(Exception):
408    pass
409
410
411class CrashLogParseException(Exception):
412    pass
413
414
415class CrashLogParser:
416    def parse(self, debugger, path, verbose):
417        try:
418            return JSONCrashLogParser(debugger, path, verbose).parse()
419        except CrashLogFormatException:
420            return TextCrashLogParser(debugger, path, verbose).parse()
421
422
423class JSONCrashLogParser:
424    def __init__(self, debugger, path, verbose):
425        self.path = os.path.expanduser(path)
426        self.verbose = verbose
427        self.crashlog = CrashLog(debugger, self.path, self.verbose)
428
429    def parse_json(self, buffer):
430        try:
431            return json.loads(buffer)
432        except:
433            # The first line can contain meta data. Try stripping it and try
434            # again.
435            head, _, tail = buffer.partition('\n')
436            return json.loads(tail)
437
438    def parse(self):
439        with open(self.path, 'r') as f:
440            buffer = f.read()
441
442        try:
443            self.data = self.parse_json(buffer)
444        except:
445            raise CrashLogFormatException()
446
447        try:
448            self.parse_process_info(self.data)
449            self.parse_images(self.data['usedImages'])
450            self.parse_main_image(self.data)
451            self.parse_threads(self.data['threads'])
452            self.parse_errors(self.data)
453            thread = self.crashlog.threads[self.crashlog.crashed_thread_idx]
454            reason = self.parse_crash_reason(self.data['exception'])
455            if thread.reason:
456                thread.reason = '{} {}'.format(thread.reason, reason)
457            else:
458                thread.reason = reason
459        except (KeyError, ValueError, TypeError) as e:
460            raise CrashLogParseException(
461                'Failed to parse JSON crashlog: {}: {}'.format(
462                    type(e).__name__, e))
463
464        return self.crashlog
465
466    def get_used_image(self, idx):
467        return self.data['usedImages'][idx]
468
469    def parse_process_info(self, json_data):
470        self.crashlog.process_id = json_data['pid']
471        self.crashlog.process_identifier = json_data['procName']
472        self.crashlog.process_path = json_data['procPath']
473
474    def parse_crash_reason(self, json_exception):
475        exception_type = json_exception['type']
476        exception_signal = " "
477        if 'signal' in json_exception:
478            exception_signal += "({})".format(json_exception['signal'])
479
480        if 'codes' in json_exception:
481            exception_extra = " ({})".format(json_exception['codes'])
482        elif 'subtype' in json_exception:
483            exception_extra = " ({})".format(json_exception['subtype'])
484        else:
485            exception_extra = ""
486        return "{}{}{}".format(exception_type, exception_signal,
487                                  exception_extra)
488
489    def parse_images(self, json_images):
490        idx = 0
491        for json_image in json_images:
492            img_uuid = uuid.UUID(json_image['uuid'])
493            low = int(json_image['base'])
494            high = int(0)
495            name = json_image['name'] if 'name' in json_image else ''
496            path = json_image['path'] if 'path' in json_image else ''
497            version = ''
498            darwin_image = self.crashlog.DarwinImage(low, high, name, version,
499                                                     img_uuid, path,
500                                                     self.verbose)
501            self.crashlog.images.append(darwin_image)
502            idx += 1
503
504    def parse_main_image(self, json_data):
505        if 'procName' in json_data:
506            proc_name = json_data['procName']
507            self.crashlog.set_main_image(proc_name)
508
509    def parse_frames(self, thread, json_frames):
510        idx = 0
511        for json_frame in json_frames:
512            image_id = int(json_frame['imageIndex'])
513            json_image = self.get_used_image(image_id)
514            ident = json_image['name'] if 'name' in json_image else ''
515            thread.add_ident(ident)
516            if ident not in self.crashlog.idents:
517                self.crashlog.idents.append(ident)
518
519            frame_offset = int(json_frame['imageOffset'])
520            image_addr = self.get_used_image(image_id)['base']
521            pc = image_addr + frame_offset
522            thread.frames.append(self.crashlog.Frame(idx, pc, frame_offset))
523
524            # on arm64 systems, if it jump through a null function pointer,
525            # we end up at address 0 and the crash reporter unwinder
526            # misses the frame that actually faulted.
527            # But $lr can tell us where the last BL/BLR instruction used
528            # was at, so insert that address as the caller stack frame.
529            if idx == 0 and pc == 0 and "lr" in thread.registers:
530                pc = thread.registers["lr"]
531                for image in self.data['usedImages']:
532                    text_lo = image['base']
533                    text_hi = text_lo + image['size']
534                    if text_lo <= pc < text_hi:
535                      idx += 1
536                      frame_offset = pc - text_lo
537                      thread.frames.append(self.crashlog.Frame(idx, pc, frame_offset))
538                      break
539
540            idx += 1
541
542    def parse_threads(self, json_threads):
543        idx = 0
544        for json_thread in json_threads:
545            thread = self.crashlog.Thread(idx, False)
546            if 'name' in json_thread:
547                thread.name = json_thread['name']
548                thread.reason = json_thread['name']
549            if 'id' in json_thread:
550                thread.id = int(json_thread['id'])
551            if json_thread.get('triggered', False):
552                self.crashlog.crashed_thread_idx = idx
553                thread.crashed = True
554                if 'threadState' in json_thread:
555                    thread.registers = self.parse_thread_registers(
556                        json_thread['threadState'])
557            if 'queue' in json_thread:
558                thread.queue = json_thread.get('queue')
559            self.parse_frames(thread, json_thread.get('frames', []))
560            self.crashlog.threads.append(thread)
561            idx += 1
562
563    def parse_thread_registers(self, json_thread_state, prefix=None):
564        registers = dict()
565        for key, state in json_thread_state.items():
566            if key == "rosetta":
567                registers.update(self.parse_thread_registers(state))
568                continue
569            if key == "x":
570                gpr_dict = { str(idx) : reg for idx,reg in enumerate(state) }
571                registers.update(self.parse_thread_registers(gpr_dict, key))
572                continue
573            try:
574                value = int(state['value'])
575                registers["{}{}".format(prefix or '',key)] = value
576            except (KeyError, ValueError, TypeError):
577                pass
578        return registers
579
580    def parse_errors(self, json_data):
581       if 'reportNotes' in json_data:
582          self.crashlog.errors = json_data['reportNotes']
583
584
585class CrashLogParseMode:
586    NORMAL = 0
587    THREAD = 1
588    IMAGES = 2
589    THREGS = 3
590    SYSTEM = 4
591    INSTRS = 5
592
593
594class TextCrashLogParser:
595    parent_process_regex = re.compile('^Parent Process:\s*(.*)\[(\d+)\]')
596    thread_state_regex = re.compile('^Thread ([0-9]+) crashed with')
597    thread_instrs_regex = re.compile('^Thread ([0-9]+) instruction stream')
598    thread_regex = re.compile('^Thread ([0-9]+)([^:]*):(.*)')
599    app_backtrace_regex = re.compile('^Application Specific Backtrace ([0-9]+)([^:]*):(.*)')
600    version = r'(\(.+\)|(arm|x86_)[0-9a-z]+)\s+'
601    frame_regex = re.compile(r'^([0-9]+)' r'\s'                # id
602                             r'+(.+?)'    r'\s+'               # img_name
603                             r'(' +version+ r')?'              # img_version
604                             r'(0x[0-9a-fA-F]{7}[0-9a-fA-F]+)' # addr
605                             r' +(.*)'                         # offs
606                            )
607    null_frame_regex = re.compile(r'^([0-9]+)\s+\?\?\?\s+(0{7}0+) +(.*)')
608    image_regex_uuid = re.compile(r'(0x[0-9a-fA-F]+)'            # img_lo
609                                  r'\s+' '-' r'\s+'              #   -
610                                  r'(0x[0-9a-fA-F]+)'     r'\s+' # img_hi
611                                  r'[+]?(.+?)'            r'\s+' # img_name
612                                  r'(' +version+ ')?'            # img_version
613                                  r'(<([-0-9a-fA-F]+)>\s+)?'     # img_uuid
614                                  r'(/.*)'                       # img_path
615                                 )
616
617
618    def __init__(self, debugger, path, verbose):
619        self.path = os.path.expanduser(path)
620        self.verbose = verbose
621        self.thread = None
622        self.app_specific_backtrace = False
623        self.crashlog = CrashLog(debugger, self.path, self.verbose)
624        self.parse_mode = CrashLogParseMode.NORMAL
625        self.parsers = {
626            CrashLogParseMode.NORMAL : self.parse_normal,
627            CrashLogParseMode.THREAD : self.parse_thread,
628            CrashLogParseMode.IMAGES : self.parse_images,
629            CrashLogParseMode.THREGS : self.parse_thread_registers,
630            CrashLogParseMode.SYSTEM : self.parse_system,
631            CrashLogParseMode.INSTRS : self.parse_instructions,
632        }
633
634    def parse(self):
635        with open(self.path,'r') as f:
636            lines = f.read().splitlines()
637
638        for line in lines:
639            line_len = len(line)
640            if line_len == 0:
641                if self.thread:
642                    if self.parse_mode == CrashLogParseMode.THREAD:
643                        if self.thread.index == self.crashlog.crashed_thread_idx:
644                            self.thread.reason = ''
645                            if self.crashlog.thread_exception:
646                                self.thread.reason += self.crashlog.thread_exception
647                            if self.crashlog.thread_exception_data:
648                                self.thread.reason += " (%s)" % self.crashlog.thread_exception_data
649                        if self.app_specific_backtrace:
650                            self.crashlog.backtraces.append(self.thread)
651                        else:
652                            self.crashlog.threads.append(self.thread)
653                    self.thread = None
654                else:
655                    # only append an extra empty line if the previous line
656                    # in the info_lines wasn't empty
657                    if len(self.crashlog.info_lines) > 0 and len(self.crashlog.info_lines[-1]):
658                        self.crashlog.info_lines.append(line)
659                self.parse_mode = CrashLogParseMode.NORMAL
660            else:
661                self.parsers[self.parse_mode](line)
662
663        return self.crashlog
664
665
666    def parse_normal(self, line):
667        if line.startswith('Process:'):
668            (self.crashlog.process_name, pid_with_brackets) = line[
669                8:].strip().split(' [')
670            self.crashlog.process_id = pid_with_brackets.strip('[]')
671        elif line.startswith('Path:'):
672            self.crashlog.process_path = line[5:].strip()
673        elif line.startswith('Identifier:'):
674            self.crashlog.process_identifier = line[11:].strip()
675        elif line.startswith('Version:'):
676            version_string = line[8:].strip()
677            matched_pair = re.search("(.+)\((.+)\)", version_string)
678            if matched_pair:
679                self.crashlog.process_version = matched_pair.group(1)
680                self.crashlog.process_compatability_version = matched_pair.group(
681                    2)
682            else:
683                self.crashlog.process = version_string
684                self.crashlog.process_compatability_version = version_string
685        elif self.parent_process_regex.search(line):
686            parent_process_match = self.parent_process_regex.search(
687                line)
688            self.crashlog.parent_process_name = parent_process_match.group(1)
689            self.crashlog.parent_process_id = parent_process_match.group(2)
690        elif line.startswith('Exception Type:'):
691            self.crashlog.thread_exception = line[15:].strip()
692            return
693        elif line.startswith('Exception Codes:'):
694            self.crashlog.thread_exception_data = line[16:].strip()
695            return
696        elif line.startswith('Exception Subtype:'): # iOS
697            self.crashlog.thread_exception_data = line[18:].strip()
698            return
699        elif line.startswith('Crashed Thread:'):
700            self.crashlog.crashed_thread_idx = int(line[15:].strip().split()[0])
701            return
702        elif line.startswith('Triggered by Thread:'): # iOS
703            self.crashlog.crashed_thread_idx = int(line[20:].strip().split()[0])
704            return
705        elif line.startswith('Report Version:'):
706            self.crashlog.version = int(line[15:].strip())
707            return
708        elif line.startswith('System Profile:'):
709            self.parse_mode = CrashLogParseMode.SYSTEM
710            return
711        elif (line.startswith('Interval Since Last Report:') or
712                line.startswith('Crashes Since Last Report:') or
713                line.startswith('Per-App Interval Since Last Report:') or
714                line.startswith('Per-App Crashes Since Last Report:') or
715                line.startswith('Sleep/Wake UUID:') or
716                line.startswith('Anonymous UUID:')):
717            # ignore these
718            return
719        elif line.startswith('Thread'):
720            thread_state_match = self.thread_state_regex.search(line)
721            if thread_state_match:
722                self.app_specific_backtrace = False
723                thread_state_match = self.thread_regex.search(line)
724                thread_idx = int(thread_state_match.group(1))
725                self.parse_mode = CrashLogParseMode.THREGS
726                self.thread = self.crashlog.threads[thread_idx]
727                return
728            thread_insts_match  = self.thread_instrs_regex.search(line)
729            if thread_insts_match:
730                self.parse_mode = CrashLogParseMode.INSTRS
731                return
732            thread_match = self.thread_regex.search(line)
733            if thread_match:
734                self.app_specific_backtrace = False
735                self.parse_mode = CrashLogParseMode.THREAD
736                thread_idx = int(thread_match.group(1))
737                self.thread = self.crashlog.Thread(thread_idx, False)
738                return
739            return
740        elif line.startswith('Binary Images:'):
741            self.parse_mode = CrashLogParseMode.IMAGES
742            return
743        elif line.startswith('Application Specific Backtrace'):
744            app_backtrace_match = self.app_backtrace_regex.search(line)
745            if app_backtrace_match:
746                self.parse_mode = CrashLogParseMode.THREAD
747                self.app_specific_backtrace = True
748                idx = int(app_backtrace_match.group(1))
749                self.thread = self.crashlog.Thread(idx, True)
750        elif line.startswith('Last Exception Backtrace:'): # iOS
751            self.parse_mode = CrashLogParseMode.THREAD
752            self.app_specific_backtrace = True
753            idx = 1
754            self.thread = self.crashlog.Thread(idx, True)
755        self.crashlog.info_lines.append(line.strip())
756
757    def parse_thread(self, line):
758        if line.startswith('Thread'):
759            return
760        if self.null_frame_regex.search(line):
761            print('warning: thread parser ignored null-frame: "%s"' % line)
762            return
763        frame_match = self.frame_regex.search(line)
764        if frame_match:
765            (frame_id, frame_img_name, _, frame_img_version, _,
766                frame_addr, frame_ofs) = frame_match.groups()
767            ident = frame_img_name
768            self.thread.add_ident(ident)
769            if ident not in self.crashlog.idents:
770                self.crashlog.idents.append(ident)
771            self.thread.frames.append(self.crashlog.Frame(int(frame_id), int(
772                frame_addr, 0), frame_ofs))
773        else:
774            print('error: frame regex failed for line: "%s"' % line)
775
776    def parse_images(self, line):
777        image_match = self.image_regex_uuid.search(line)
778        if image_match:
779            (img_lo, img_hi, img_name, _, img_version, _,
780                _, img_uuid, img_path) = image_match.groups()
781            image = self.crashlog.DarwinImage(int(img_lo, 0), int(img_hi, 0),
782                                            img_name.strip(),
783                                            img_version.strip()
784                                            if img_version else "",
785                                            uuid.UUID(img_uuid), img_path,
786                                            self.verbose)
787            self.crashlog.images.append(image)
788        else:
789            print("error: image regex failed for: %s" % line)
790
791
792    def parse_thread_registers(self, line):
793        stripped_line = line.strip()
794        # "r12: 0x00007fff6b5939c8  r13: 0x0000000007000006  r14: 0x0000000000002a03  r15: 0x0000000000000c00"
795        reg_values = re.findall(
796            '([a-zA-Z0-9]+: 0[Xx][0-9a-fA-F]+) *', stripped_line)
797        for reg_value in reg_values:
798            (reg, value) = reg_value.split(': ')
799            self.thread.registers[reg.strip()] = int(value, 0)
800
801    def parse_system(self, line):
802        self.crashlog.system_profile.append(line)
803
804    def parse_instructions(self, line):
805        pass
806
807
808def usage():
809    print("Usage: lldb-symbolicate.py [-n name] executable-image")
810    sys.exit(0)
811
812
813def save_crashlog(debugger, command, exe_ctx, result, dict):
814    usage = "usage: %prog [options] <output-path>"
815    description = '''Export the state of current target into a crashlog file'''
816    parser = optparse.OptionParser(
817        description=description,
818        prog='save_crashlog',
819        usage=usage)
820    parser.add_option(
821        '-v',
822        '--verbose',
823        action='store_true',
824        dest='verbose',
825        help='display verbose debug info',
826        default=False)
827    try:
828        (options, args) = parser.parse_args(shlex.split(command))
829    except:
830        result.PutCString("error: invalid options")
831        return
832    if len(args) != 1:
833        result.PutCString(
834            "error: invalid arguments, a single output file is the only valid argument")
835        return
836    out_file = open(args[0], 'w')
837    if not out_file:
838        result.PutCString(
839            "error: failed to open file '%s' for writing...",
840            args[0])
841        return
842    target = exe_ctx.target
843    if target:
844        identifier = target.executable.basename
845        process = exe_ctx.process
846        if process:
847            pid = process.id
848            if pid != lldb.LLDB_INVALID_PROCESS_ID:
849                out_file.write(
850                    'Process:         %s [%u]\n' %
851                    (identifier, pid))
852        out_file.write('Path:            %s\n' % (target.executable.fullpath))
853        out_file.write('Identifier:      %s\n' % (identifier))
854        out_file.write('\nDate/Time:       %s\n' %
855                       (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
856        out_file.write(
857            'OS Version:      Mac OS X %s (%s)\n' %
858            (platform.mac_ver()[0], subprocess.check_output('sysctl -n kern.osversion', shell=True).decode("utf-8")))
859        out_file.write('Report Version:  9\n')
860        for thread_idx in range(process.num_threads):
861            thread = process.thread[thread_idx]
862            out_file.write('\nThread %u:\n' % (thread_idx))
863            for (frame_idx, frame) in enumerate(thread.frames):
864                frame_pc = frame.pc
865                frame_offset = 0
866                if frame.function:
867                    block = frame.GetFrameBlock()
868                    block_range = block.range[frame.addr]
869                    if block_range:
870                        block_start_addr = block_range[0]
871                        frame_offset = frame_pc - block_start_addr.GetLoadAddress(target)
872                    else:
873                        frame_offset = frame_pc - frame.function.addr.GetLoadAddress(target)
874                elif frame.symbol:
875                    frame_offset = frame_pc - frame.symbol.addr.GetLoadAddress(target)
876                out_file.write(
877                    '%-3u %-32s 0x%16.16x %s' %
878                    (frame_idx, frame.module.file.basename, frame_pc, frame.name))
879                if frame_offset > 0:
880                    out_file.write(' + %u' % (frame_offset))
881                line_entry = frame.line_entry
882                if line_entry:
883                    if options.verbose:
884                        # This will output the fullpath + line + column
885                        out_file.write(' %s' % (line_entry))
886                    else:
887                        out_file.write(
888                            ' %s:%u' %
889                            (line_entry.file.basename, line_entry.line))
890                        column = line_entry.column
891                        if column:
892                            out_file.write(':%u' % (column))
893                out_file.write('\n')
894
895        out_file.write('\nBinary Images:\n')
896        for module in target.modules:
897            text_segment = module.section['__TEXT']
898            if text_segment:
899                text_segment_load_addr = text_segment.GetLoadAddress(target)
900                if text_segment_load_addr != lldb.LLDB_INVALID_ADDRESS:
901                    text_segment_end_load_addr = text_segment_load_addr + text_segment.size
902                    identifier = module.file.basename
903                    module_version = '???'
904                    module_version_array = module.GetVersion()
905                    if module_version_array:
906                        module_version = '.'.join(
907                            map(str, module_version_array))
908                    out_file.write(
909                        '    0x%16.16x - 0x%16.16x  %s (%s - ???) <%s> %s\n' %
910                        (text_segment_load_addr,
911                         text_segment_end_load_addr,
912                         identifier,
913                         module_version,
914                         module.GetUUIDString(),
915                         module.file.fullpath))
916        out_file.close()
917    else:
918        result.PutCString("error: invalid target")
919
920
921class Symbolicate:
922    def __init__(self, debugger, internal_dict):
923        pass
924
925    def __call__(self, debugger, command, exe_ctx, result):
926        SymbolicateCrashLogs(debugger, shlex.split(command))
927
928    def get_short_help(self):
929        return "Symbolicate one or more darwin crash log files."
930
931    def get_long_help(self):
932        option_parser = CrashLogOptionParser()
933        return option_parser.format_help()
934
935
936def SymbolicateCrashLog(crash_log, options):
937    if options.debug:
938        crash_log.dump()
939    if not crash_log.images:
940        print('error: no images in crash log')
941        return
942
943    if options.dump_image_list:
944        print("Binary Images:")
945        for image in crash_log.images:
946            if options.verbose:
947                print(image.debug_dump())
948            else:
949                print(image)
950
951    target = crash_log.create_target()
952    if not target:
953        return
954    exe_module = target.GetModuleAtIndex(0)
955    images_to_load = list()
956    loaded_images = list()
957    if options.load_all_images:
958        # --load-all option was specified, load everything up
959        for image in crash_log.images:
960            images_to_load.append(image)
961    else:
962        # Only load the images found in stack frames for the crashed threads
963        if options.crashed_only:
964            for thread in crash_log.threads:
965                if thread.did_crash():
966                    for ident in thread.idents:
967                        images = crash_log.find_images_with_identifier(ident)
968                        if images:
969                            for image in images:
970                                images_to_load.append(image)
971                        else:
972                            print('error: can\'t find image for identifier "%s"' % ident)
973        else:
974            for ident in crash_log.idents:
975                images = crash_log.find_images_with_identifier(ident)
976                if images:
977                    for image in images:
978                        images_to_load.append(image)
979                else:
980                    print('error: can\'t find image for identifier "%s"' % ident)
981
982    futures = []
983    with concurrent.futures.ThreadPoolExecutor() as executor:
984        def add_module(image, target):
985            return image, image.add_module(target)
986
987        for image in images_to_load:
988            futures.append(executor.submit(add_module, image=image, target=target))
989
990        for future in concurrent.futures.as_completed(futures):
991            image, err = future.result()
992            if err:
993                print(err)
994            else:
995                loaded_images.append(image)
996
997    if crash_log.backtraces:
998        for thread in crash_log.backtraces:
999            thread.dump_symbolicated(crash_log, options)
1000            print()
1001
1002    for thread in crash_log.threads:
1003        thread.dump_symbolicated(crash_log, options)
1004        print()
1005
1006    if crash_log.errors:
1007        print("Errors:")
1008        for error in crash_log.errors:
1009            print(error)
1010
1011def load_crashlog_in_scripted_process(debugger, crash_log_file, options):
1012    result = lldb.SBCommandReturnObject()
1013
1014    crashlog_path = os.path.expanduser(crash_log_file)
1015    if not os.path.exists(crashlog_path):
1016        result.PutCString("error: crashlog file %s does not exist" % crashlog_path)
1017
1018    crashlog = CrashLogParser().parse(debugger, crashlog_path, False)
1019
1020    if debugger.GetNumTargets() > 0:
1021        target = debugger.GetTargetAtIndex(0)
1022    else:
1023        target = crashlog.create_target()
1024    if not target:
1025        result.PutCString("error: couldn't create target")
1026        return
1027
1028    ci = debugger.GetCommandInterpreter()
1029    if not ci:
1030        result.PutCString("error: couldn't get command interpreter")
1031        return
1032
1033    res = lldb.SBCommandReturnObject()
1034    ci.HandleCommand('script from lldb.macosx import crashlog_scripted_process', res)
1035    if not res.Succeeded():
1036        result.PutCString("error: couldn't import crashlog scripted process module")
1037        return
1038
1039    structured_data = lldb.SBStructuredData()
1040    structured_data.SetFromJSON(json.dumps({ "crashlog_path" : crashlog_path,
1041                                             "load_all_images": options.load_all_images }))
1042    launch_info = lldb.SBLaunchInfo(None)
1043    launch_info.SetProcessPluginName("ScriptedProcess")
1044    launch_info.SetScriptedProcessClassName("crashlog_scripted_process.CrashLogScriptedProcess")
1045    launch_info.SetScriptedProcessDictionary(structured_data)
1046    error = lldb.SBError()
1047    process = target.Launch(launch_info, error)
1048
1049    if not process or error.Fail():
1050        return
1051
1052    @contextlib.contextmanager
1053    def synchronous(debugger):
1054        async_state = debugger.GetAsync()
1055        debugger.SetAsync(False)
1056        try:
1057            yield
1058        finally:
1059            debugger.SetAsync(async_state)
1060
1061    with synchronous(debugger):
1062        run_options = lldb.SBCommandInterpreterRunOptions()
1063        run_options.SetStopOnError(True)
1064        run_options.SetStopOnCrash(True)
1065        run_options.SetEchoCommands(True)
1066
1067        commands_stream = lldb.SBStream()
1068        commands_stream.Print("process status\n")
1069        commands_stream.Print("thread backtrace\n")
1070        error = debugger.SetInputString(commands_stream.GetData())
1071        if error.Success():
1072            debugger.RunCommandInterpreter(True, False, run_options, 0, False, True)
1073
1074def CreateSymbolicateCrashLogOptions(
1075        command_name,
1076        description,
1077        add_interactive_options):
1078    usage = "usage: %prog [options] <FILE> [FILE ...]"
1079    option_parser = optparse.OptionParser(
1080        description=description, prog='crashlog', usage=usage)
1081    option_parser.add_option(
1082        '--verbose',
1083        '-v',
1084        action='store_true',
1085        dest='verbose',
1086        help='display verbose debug info',
1087        default=False)
1088    option_parser.add_option(
1089        '--debug',
1090        '-g',
1091        action='store_true',
1092        dest='debug',
1093        help='display verbose debug logging',
1094        default=False)
1095    option_parser.add_option(
1096        '--load-all',
1097        '-a',
1098        action='store_true',
1099        dest='load_all_images',
1100        help='load all executable images, not just the images found in the '
1101        'crashed stack frames, loads stackframes for all the threads in '
1102        'interactive mode.',
1103        default=False)
1104    option_parser.add_option(
1105        '--images',
1106        action='store_true',
1107        dest='dump_image_list',
1108        help='show image list',
1109        default=False)
1110    option_parser.add_option(
1111        '--debug-delay',
1112        type='int',
1113        dest='debug_delay',
1114        metavar='NSEC',
1115        help='pause for NSEC seconds for debugger',
1116        default=0)
1117    option_parser.add_option(
1118        '--crashed-only',
1119        '-c',
1120        action='store_true',
1121        dest='crashed_only',
1122        help='only symbolicate the crashed thread',
1123        default=False)
1124    option_parser.add_option(
1125        '--disasm-depth',
1126        '-d',
1127        type='int',
1128        dest='disassemble_depth',
1129        help='set the depth in stack frames that should be disassembled (default is 1)',
1130        default=1)
1131    option_parser.add_option(
1132        '--disasm-all',
1133        '-D',
1134        action='store_true',
1135        dest='disassemble_all_threads',
1136        help='enabled disassembly of frames on all threads (not just the crashed thread)',
1137        default=False)
1138    option_parser.add_option(
1139        '--disasm-before',
1140        '-B',
1141        type='int',
1142        dest='disassemble_before',
1143        help='the number of instructions to disassemble before the frame PC',
1144        default=4)
1145    option_parser.add_option(
1146        '--disasm-after',
1147        '-A',
1148        type='int',
1149        dest='disassemble_after',
1150        help='the number of instructions to disassemble after the frame PC',
1151        default=4)
1152    option_parser.add_option(
1153        '--source-context',
1154        '-C',
1155        type='int',
1156        metavar='NLINES',
1157        dest='source_context',
1158        help='show NLINES source lines of source context (default = 4)',
1159        default=4)
1160    option_parser.add_option(
1161        '--source-frames',
1162        type='int',
1163        metavar='NFRAMES',
1164        dest='source_frames',
1165        help='show source for NFRAMES (default = 4)',
1166        default=4)
1167    option_parser.add_option(
1168        '--source-all',
1169        action='store_true',
1170        dest='source_all',
1171        help='show source for all threads, not just the crashed thread',
1172        default=False)
1173    if add_interactive_options:
1174        option_parser.add_option(
1175            '-i',
1176            '--interactive',
1177            action='store_true',
1178            help='parse a crash log and load it in a ScriptedProcess',
1179            default=False)
1180        option_parser.add_option(
1181            '-b',
1182            '--batch',
1183            action='store_true',
1184            help='dump symbolicated stackframes without creating a debug session',
1185            default=True)
1186    return option_parser
1187
1188
1189def CrashLogOptionParser():
1190    description = '''Symbolicate one or more darwin crash log files to provide source file and line information,
1191inlined stack frames back to the concrete functions, and disassemble the location of the crash
1192for the first frame of the crashed thread.
1193If this script is imported into the LLDB command interpreter, a "crashlog" command will be added to the interpreter
1194for use at the LLDB command line. After a crash log has been parsed and symbolicated, a target will have been
1195created that has all of the shared libraries loaded at the load addresses found in the crash log file. This allows
1196you to explore the program as if it were stopped at the locations described in the crash log and functions can
1197be disassembled and lookups can be performed using the addresses found in the crash log.'''
1198    return CreateSymbolicateCrashLogOptions('crashlog', description, True)
1199
1200def SymbolicateCrashLogs(debugger, command_args):
1201    option_parser = CrashLogOptionParser()
1202
1203    if not len(command_args):
1204        option_parser.print_help()
1205        return
1206
1207    try:
1208        (options, args) = option_parser.parse_args(command_args)
1209    except:
1210        return
1211
1212    if options.debug:
1213        print('command_args = %s' % command_args)
1214        print('options', options)
1215        print('args', args)
1216
1217    if options.debug_delay > 0:
1218        print("Waiting %u seconds for debugger to attach..." % options.debug_delay)
1219        time.sleep(options.debug_delay)
1220    error = lldb.SBError()
1221
1222    def should_run_in_interactive_mode(options, ci):
1223        if options.interactive:
1224            return True
1225        elif options.batch:
1226            return False
1227        # elif ci and ci.IsInteractive():
1228        #     return True
1229        else:
1230            return False
1231
1232    ci = debugger.GetCommandInterpreter()
1233
1234    if args:
1235        for crash_log_file in args:
1236            if should_run_in_interactive_mode(options, ci):
1237                load_crashlog_in_scripted_process(debugger, crash_log_file,
1238                                                  options)
1239            else:
1240                crash_log = CrashLogParser().parse(debugger, crash_log_file, options.verbose)
1241                SymbolicateCrashLog(crash_log, options)
1242
1243if __name__ == '__main__':
1244    # Create a new debugger instance
1245    debugger = lldb.SBDebugger.Create()
1246    SymbolicateCrashLogs(debugger, sys.argv[1:])
1247    lldb.SBDebugger.Destroy(debugger)
1248
1249def __lldb_init_module(debugger, internal_dict):
1250    debugger.HandleCommand(
1251        'command script add -c lldb.macosx.crashlog.Symbolicate crashlog')
1252    debugger.HandleCommand(
1253        'command script add -f lldb.macosx.crashlog.save_crashlog save_crashlog')
1254    print('"crashlog" and "save_crashlog" commands have been installed, use '
1255          'the "--help" options on these commands for detailed help.')
1256