1#!/usr/bin/env python3
2
3#----------------------------------------------------------------------
4# Be sure to add the python path that points to the LLDB shared library.
5#
6# To use this in the embedded python interpreter using "lldb":
7#
8#   cd /path/containing/crashlog.py
9#   lldb
10#   (lldb) script import crashlog
11#   "crashlog" command installed, type "crashlog --help" for detailed help
12#   (lldb) crashlog ~/Library/Logs/DiagnosticReports/a.crash
13#
14# The benefit of running the crashlog command inside lldb in the
15# embedded python interpreter is when the command completes, there
16# will be a target with all of the files loaded at the locations
17# described in the crash log. Only the files that have stack frames
18# in the backtrace will be loaded unless the "--load-all" option
19# has been specified. This allows users to explore the program in the
20# state it was in right at crash time.
21#
22# On MacOSX csh, tcsh:
23#   ( setenv PYTHONPATH /path/to/LLDB.framework/Resources/Python ; ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash )
24#
25# On MacOSX sh, bash:
26#   PYTHONPATH=/path/to/LLDB.framework/Resources/Python ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash
27#----------------------------------------------------------------------
28
29from __future__ import print_function
30import cmd
31import contextlib
32import datetime
33import glob
34import json
35import optparse
36import os
37import platform
38import plistlib
39import re
40import shlex
41import string
42import subprocess
43import sys
44import time
45import uuid
46
47try:
48    # First try for LLDB in case PYTHONPATH is already correctly setup.
49    import lldb
50except ImportError:
51    # Ask the command line driver for the path to the lldb module. Copy over
52    # the environment so that SDKROOT is propagated to xcrun.
53    command =  ['xcrun', 'lldb', '-P'] if platform.system() == 'Darwin' else ['lldb', '-P']
54    # Extend the PYTHONPATH if the path exists and isn't already there.
55    lldb_python_path = subprocess.check_output(command).decode("utf-8").strip()
56    if os.path.exists(lldb_python_path) and not sys.path.__contains__(lldb_python_path):
57        sys.path.append(lldb_python_path)
58    # Try importing LLDB again.
59    try:
60        import lldb
61    except ImportError:
62        print("error: couldn't locate the 'lldb' module, please set PYTHONPATH correctly")
63        sys.exit(1)
64
65from lldb.utils import symbolication
66
67def read_plist(s):
68    if sys.version_info.major == 3:
69        return plistlib.loads(s)
70    else:
71        return plistlib.readPlistFromString(s)
72
73class CrashLog(symbolication.Symbolicator):
74    class Thread:
75        """Class that represents a thread in a darwin crash log"""
76
77        def __init__(self, index, app_specific_backtrace):
78            self.index = index
79            self.id = index
80            self.frames = list()
81            self.idents = list()
82            self.registers = dict()
83            self.reason = None
84            self.name = None
85            self.queue = None
86            self.crashed = False
87            self.app_specific_backtrace = app_specific_backtrace
88
89        def dump(self, prefix):
90            if self.app_specific_backtrace:
91                print("%Application Specific Backtrace[%u] %s" % (prefix, self.index, self.reason))
92            else:
93                print("%sThread[%u] %s" % (prefix, self.index, self.reason))
94            if self.frames:
95                print("%s  Frames:" % (prefix))
96                for frame in self.frames:
97                    frame.dump(prefix + '    ')
98            if self.registers:
99                print("%s  Registers:" % (prefix))
100                for reg in self.registers.keys():
101                    print("%s    %-8s = %#16.16x" % (prefix, reg, self.registers[reg]))
102
103        def dump_symbolicated(self, crash_log, options):
104            this_thread_crashed = self.app_specific_backtrace
105            if not this_thread_crashed:
106                this_thread_crashed = self.did_crash()
107                if options.crashed_only and this_thread_crashed == False:
108                    return
109
110            print("%s" % self)
111            display_frame_idx = -1
112            for frame_idx, frame in enumerate(self.frames):
113                disassemble = (
114                    this_thread_crashed or options.disassemble_all_threads) and frame_idx < options.disassemble_depth
115                if frame_idx == 0:
116                    symbolicated_frame_addresses = crash_log.symbolicate(
117                        frame.pc & crash_log.addr_mask, options.verbose)
118                else:
119                    # Any frame above frame zero and we have to subtract one to
120                    # get the previous line entry
121                    symbolicated_frame_addresses = crash_log.symbolicate(
122                        (frame.pc & crash_log.addr_mask) - 1, options.verbose)
123
124                if symbolicated_frame_addresses:
125                    symbolicated_frame_address_idx = 0
126                    for symbolicated_frame_address in symbolicated_frame_addresses:
127                        display_frame_idx += 1
128                        print('[%3u] %s' % (frame_idx, symbolicated_frame_address))
129                        if (options.source_all or self.did_crash(
130                        )) and display_frame_idx < options.source_frames and options.source_context:
131                            source_context = options.source_context
132                            line_entry = symbolicated_frame_address.get_symbol_context().line_entry
133                            if line_entry.IsValid():
134                                strm = lldb.SBStream()
135                                if line_entry:
136                                    crash_log.debugger.GetSourceManager().DisplaySourceLinesWithLineNumbers(
137                                        line_entry.file, line_entry.line, source_context, source_context, "->", strm)
138                                source_text = strm.GetData()
139                                if source_text:
140                                    # Indent the source a bit
141                                    indent_str = '    '
142                                    join_str = '\n' + indent_str
143                                    print('%s%s' % (indent_str, join_str.join(source_text.split('\n'))))
144                        if symbolicated_frame_address_idx == 0:
145                            if disassemble:
146                                instructions = symbolicated_frame_address.get_instructions()
147                                if instructions:
148                                    print()
149                                    symbolication.disassemble_instructions(
150                                        crash_log.get_target(),
151                                        instructions,
152                                        frame.pc,
153                                        options.disassemble_before,
154                                        options.disassemble_after,
155                                        frame.index > 0)
156                                    print()
157                        symbolicated_frame_address_idx += 1
158                else:
159                    print(frame)
160            if self.registers:
161                print()
162                for reg in self.registers.keys():
163                    print("    %-8s = %#16.16x" % (reg, self.registers[reg]))
164            elif self.crashed:
165               print()
166               print("No thread state (register information) available")
167
168        def add_ident(self, ident):
169            if ident not in self.idents:
170                self.idents.append(ident)
171
172        def did_crash(self):
173            return self.reason is not None
174
175        def __str__(self):
176            if self.app_specific_backtrace:
177                s = "Application Specific Backtrace[%u]" % self.index
178            else:
179                s = "Thread[%u]" % self.index
180            if self.reason:
181                s += ' %s' % self.reason
182            return s
183
184    class Frame:
185        """Class that represents a stack frame in a thread in a darwin crash log"""
186
187        def __init__(self, index, pc, description):
188            self.pc = pc
189            self.description = description
190            self.index = index
191
192        def __str__(self):
193            if self.description:
194                return "[%3u] 0x%16.16x %s" % (
195                    self.index, self.pc, self.description)
196            else:
197                return "[%3u] 0x%16.16x" % (self.index, self.pc)
198
199        def dump(self, prefix):
200            print("%s%s" % (prefix, str(self)))
201
202    class DarwinImage(symbolication.Image):
203        """Class that represents a binary images in a darwin crash log"""
204        dsymForUUIDBinary = '/usr/local/bin/dsymForUUID'
205        if not os.path.exists(dsymForUUIDBinary):
206            try:
207                dsymForUUIDBinary = subprocess.check_output('which dsymForUUID',
208                                                            shell=True).decode("utf-8").rstrip('\n')
209            except:
210                dsymForUUIDBinary = ""
211
212        dwarfdump_uuid_regex = re.compile(
213            'UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*')
214
215        def __init__(
216                self,
217                text_addr_lo,
218                text_addr_hi,
219                identifier,
220                version,
221                uuid,
222                path,
223                verbose):
224            symbolication.Image.__init__(self, path, uuid)
225            self.add_section(
226                symbolication.Section(
227                    text_addr_lo,
228                    text_addr_hi,
229                    "__TEXT"))
230            self.identifier = identifier
231            self.version = version
232            self.verbose = verbose
233
234        def show_symbol_progress(self):
235            """
236            Hide progress output and errors from system frameworks as they are plentiful.
237            """
238            if self.verbose:
239                return True
240            return not (self.path.startswith("/System/Library/") or
241                        self.path.startswith("/usr/lib/"))
242
243
244        def find_matching_slice(self):
245            dwarfdump_cmd_output = subprocess.check_output(
246                'dwarfdump --uuid "%s"' % self.path, shell=True).decode("utf-8")
247            self_uuid = self.get_uuid()
248            for line in dwarfdump_cmd_output.splitlines():
249                match = self.dwarfdump_uuid_regex.search(line)
250                if match:
251                    dwarf_uuid_str = match.group(1)
252                    dwarf_uuid = uuid.UUID(dwarf_uuid_str)
253                    if self_uuid == dwarf_uuid:
254                        self.resolved_path = self.path
255                        self.arch = match.group(2)
256                        return True
257            if not self.resolved_path:
258                self.unavailable = True
259                if self.show_symbol_progress():
260                    print(("error\n    error: unable to locate '%s' with UUID %s"
261                           % (self.path, self.get_normalized_uuid_string())))
262                return False
263
264        def locate_module_and_debug_symbols(self):
265            # Don't load a module twice...
266            if self.resolved:
267                return True
268            # Mark this as resolved so we don't keep trying
269            self.resolved = True
270            uuid_str = self.get_normalized_uuid_string()
271            if self.show_symbol_progress():
272                print('Getting symbols for %s %s...\n' % (uuid_str, self.path), end=' ')
273            if os.path.exists(self.dsymForUUIDBinary):
274                dsym_for_uuid_command = '%s %s' % (
275                    self.dsymForUUIDBinary, uuid_str)
276                s = subprocess.check_output(dsym_for_uuid_command, shell=True)
277                if s:
278                    try:
279                        plist_root = read_plist(s)
280                    except:
281                        print(("Got exception: ", sys.exc_info()[1], " handling dsymForUUID output: \n", s))
282                        raise
283                    if plist_root:
284                        plist = plist_root[uuid_str]
285                        if plist:
286                            if 'DBGArchitecture' in plist:
287                                self.arch = plist['DBGArchitecture']
288                            if 'DBGDSYMPath' in plist:
289                                self.symfile = os.path.realpath(
290                                    plist['DBGDSYMPath'])
291                            if 'DBGSymbolRichExecutable' in plist:
292                                self.path = os.path.expanduser(
293                                    plist['DBGSymbolRichExecutable'])
294                                self.resolved_path = self.path
295            if not self.resolved_path and os.path.exists(self.path):
296                if not self.find_matching_slice():
297                    return False
298            if not self.resolved_path and not os.path.exists(self.path):
299                try:
300                    mdfind_results = subprocess.check_output(
301                        ["/usr/bin/mdfind",
302                         "com_apple_xcode_dsym_uuids == %s" % uuid_str]).decode("utf-8").splitlines()
303                    found_matching_slice = False
304                    for dsym in mdfind_results:
305                        dwarf_dir = os.path.join(dsym, 'Contents/Resources/DWARF')
306                        if not os.path.exists(dwarf_dir):
307                            # Not a dSYM bundle, probably an Xcode archive.
308                            continue
309                        print('falling back to binary inside "%s"' % dsym)
310                        self.symfile = dsym
311                        for filename in os.listdir(dwarf_dir):
312                           self.path = os.path.join(dwarf_dir, filename)
313                           if self.find_matching_slice():
314                              found_matching_slice = True
315                              break
316                        if found_matching_slice:
317                           break
318                except:
319                    pass
320            if (self.resolved_path and os.path.exists(self.resolved_path)) or (
321                    self.path and os.path.exists(self.path)):
322                print('Resolved symbols for %s %s...\n' % (uuid_str, self.path), end=' ')
323                return True
324            else:
325                self.unavailable = True
326            return False
327
328    def __init__(self, debugger, path, verbose):
329        """CrashLog constructor that take a path to a darwin crash log file"""
330        symbolication.Symbolicator.__init__(self, debugger)
331        self.path = os.path.expanduser(path)
332        self.info_lines = list()
333        self.system_profile = list()
334        self.threads = list()
335        self.backtraces = list()  # For application specific backtraces
336        self.idents = list()  # A list of the required identifiers for doing all stack backtraces
337        self.errors = list()
338        self.crashed_thread_idx = -1
339        self.version = -1
340        self.target = None
341        self.verbose = verbose
342
343    def dump(self):
344        print("Crash Log File: %s" % (self.path))
345        if self.backtraces:
346            print("\nApplication Specific Backtraces:")
347            for thread in self.backtraces:
348                thread.dump('  ')
349        print("\nThreads:")
350        for thread in self.threads:
351            thread.dump('  ')
352        print("\nImages:")
353        for image in self.images:
354            image.dump('  ')
355
356    def set_main_image(self, identifier):
357        for i, image in enumerate(self.images):
358            if image.identifier == identifier:
359                self.images.insert(0, self.images.pop(i))
360                break
361
362    def find_image_with_identifier(self, identifier):
363        for image in self.images:
364            if image.identifier == identifier:
365                return image
366        regex_text = '^.*\.%s$' % (re.escape(identifier))
367        regex = re.compile(regex_text)
368        for image in self.images:
369            if regex.match(image.identifier):
370                return image
371        return None
372
373    def create_target(self):
374        if self.target is None:
375            self.target = symbolication.Symbolicator.create_target(self)
376            if self.target:
377                return self.target
378            # We weren't able to open the main executable as, but we can still
379            # symbolicate
380            print('crashlog.create_target()...2')
381            if self.idents:
382                for ident in self.idents:
383                    image = self.find_image_with_identifier(ident)
384                    if image:
385                        self.target = image.create_target(self.debugger)
386                        if self.target:
387                            return self.target  # success
388            print('crashlog.create_target()...3')
389            for image in self.images:
390                self.target = image.create_target(self.debugger)
391                if self.target:
392                    return self.target  # success
393            print('crashlog.create_target()...4')
394            print('error: Unable to locate any executables from the crash log.')
395            print('       Try loading the executable into lldb before running crashlog')
396            print('       and/or make sure the .dSYM bundles can be found by Spotlight.')
397        return self.target
398
399    def get_target(self):
400        return self.target
401
402
403class CrashLogFormatException(Exception):
404    pass
405
406
407class CrashLogParseException(Exception):
408    pass
409
410
411class CrashLogParser:
412    def parse(self, debugger, path, verbose):
413        try:
414            return JSONCrashLogParser(debugger, path, verbose).parse()
415        except CrashLogFormatException:
416            return TextCrashLogParser(debugger, path, verbose).parse()
417
418
419class JSONCrashLogParser:
420    def __init__(self, debugger, path, verbose):
421        self.path = os.path.expanduser(path)
422        self.verbose = verbose
423        self.crashlog = CrashLog(debugger, self.path, self.verbose)
424
425    def parse_json(self, buffer):
426        try:
427            return json.loads(buffer)
428        except:
429            # The first line can contain meta data. Try stripping it and try
430            # again.
431            head, _, tail = buffer.partition('\n')
432            return json.loads(tail)
433
434    def parse(self):
435        with open(self.path, 'r') as f:
436            buffer = f.read()
437
438        try:
439            self.data = self.parse_json(buffer)
440        except:
441            raise CrashLogFormatException()
442
443        try:
444            self.parse_process_info(self.data)
445            self.parse_images(self.data['usedImages'])
446            self.parse_main_image(self.data)
447            self.parse_threads(self.data['threads'])
448            self.parse_errors(self.data)
449            thread = self.crashlog.threads[self.crashlog.crashed_thread_idx]
450            reason = self.parse_crash_reason(self.data['exception'])
451            if thread.reason:
452                thread.reason = '{} {}'.format(thread.reason, reason)
453            else:
454                thread.reason = reason
455        except (KeyError, ValueError, TypeError) as e:
456            raise CrashLogParseException(
457                'Failed to parse JSON crashlog: {}: {}'.format(
458                    type(e).__name__, e))
459
460        return self.crashlog
461
462    def get_used_image(self, idx):
463        return self.data['usedImages'][idx]
464
465    def parse_process_info(self, json_data):
466        self.crashlog.process_id = json_data['pid']
467        self.crashlog.process_identifier = json_data['procName']
468        self.crashlog.process_path = json_data['procPath']
469
470    def parse_crash_reason(self, json_exception):
471        exception_type = json_exception['type']
472        exception_signal = " "
473        if 'signal' in json_exception:
474            exception_signal += "({})".format(json_exception['signal'])
475
476        if 'codes' in json_exception:
477            exception_extra = " ({})".format(json_exception['codes'])
478        elif 'subtype' in json_exception:
479            exception_extra = " ({})".format(json_exception['subtype'])
480        else:
481            exception_extra = ""
482        return "{}{}{}".format(exception_type, exception_signal,
483                                  exception_extra)
484
485    def parse_images(self, json_images):
486        idx = 0
487        for json_image in json_images:
488            img_uuid = uuid.UUID(json_image['uuid'])
489            low = int(json_image['base'])
490            high = int(0)
491            name = json_image['name'] if 'name' in json_image else ''
492            path = json_image['path'] if 'path' in json_image else ''
493            version = ''
494            darwin_image = self.crashlog.DarwinImage(low, high, name, version,
495                                                     img_uuid, path,
496                                                     self.verbose)
497            self.crashlog.images.append(darwin_image)
498            idx += 1
499
500    def parse_main_image(self, json_data):
501        if 'procName' in json_data:
502            proc_name = json_data['procName']
503            self.crashlog.set_main_image(proc_name)
504
505    def parse_frames(self, thread, json_frames):
506        idx = 0
507        for json_frame in json_frames:
508            image_id = int(json_frame['imageIndex'])
509            json_image = self.get_used_image(image_id)
510            ident = json_image['name'] if 'name' in json_image else ''
511            thread.add_ident(ident)
512            if ident not in self.crashlog.idents:
513                self.crashlog.idents.append(ident)
514
515            frame_offset = int(json_frame['imageOffset'])
516            image_addr = self.get_used_image(image_id)['base']
517            pc = image_addr + frame_offset
518            thread.frames.append(self.crashlog.Frame(idx, pc, frame_offset))
519
520            # on arm64 systems, if it jump through a null function pointer,
521            # we end up at address 0 and the crash reporter unwinder
522            # misses the frame that actually faulted.
523            # But $lr can tell us where the last BL/BLR instruction used
524            # was at, so insert that address as the caller stack frame.
525            if idx == 0 and pc == 0 and "lr" in thread.registers:
526                pc = thread.registers["lr"]
527                for image in self.data['usedImages']:
528                    text_lo = image['base']
529                    text_hi = text_lo + image['size']
530                    if text_lo <= pc < text_hi:
531                      idx += 1
532                      frame_offset = pc - text_lo
533                      thread.frames.append(self.crashlog.Frame(idx, pc, frame_offset))
534                      break
535
536            idx += 1
537
538    def parse_threads(self, json_threads):
539        idx = 0
540        for json_thread in json_threads:
541            thread = self.crashlog.Thread(idx, False)
542            if 'name' in json_thread:
543                thread.name = json_thread['name']
544                thread.reason = json_thread['name']
545            if 'id' in json_thread:
546                thread.id = int(json_thread['id'])
547            if json_thread.get('triggered', False):
548                self.crashlog.crashed_thread_idx = idx
549                thread.crashed = True
550                if 'threadState' in json_thread:
551                    thread.registers = self.parse_thread_registers(
552                        json_thread['threadState'])
553            if 'queue' in json_thread:
554                thread.queue = json_thread.get('queue')
555            self.parse_frames(thread, json_thread.get('frames', []))
556            self.crashlog.threads.append(thread)
557            idx += 1
558
559    def parse_thread_registers(self, json_thread_state, prefix=None):
560        registers = dict()
561        for key, state in json_thread_state.items():
562            if key == "rosetta":
563                registers.update(self.parse_thread_registers(state))
564                continue
565            if key == "x":
566                gpr_dict = { str(idx) : reg for idx,reg in enumerate(state) }
567                registers.update(self.parse_thread_registers(gpr_dict, key))
568                continue
569            try:
570                value = int(state['value'])
571                registers["{}{}".format(prefix or '',key)] = value
572            except (KeyError, ValueError, TypeError):
573                pass
574        return registers
575
576    def parse_errors(self, json_data):
577       if 'reportNotes' in json_data:
578          self.crashlog.errors = json_data['reportNotes']
579
580
581class CrashLogParseMode:
582    NORMAL = 0
583    THREAD = 1
584    IMAGES = 2
585    THREGS = 3
586    SYSTEM = 4
587    INSTRS = 5
588
589
590class TextCrashLogParser:
591    parent_process_regex = re.compile('^Parent Process:\s*(.*)\[(\d+)\]')
592    thread_state_regex = re.compile('^Thread ([0-9]+) crashed with')
593    thread_instrs_regex = re.compile('^Thread ([0-9]+) instruction stream')
594    thread_regex = re.compile('^Thread ([0-9]+)([^:]*):(.*)')
595    app_backtrace_regex = re.compile('^Application Specific Backtrace ([0-9]+)([^:]*):(.*)')
596    version = r'(\(.+\)|(arm|x86_)[0-9a-z]+)\s+'
597    frame_regex = re.compile(r'^([0-9]+)' r'\s'                # id
598                             r'+(.+?)'    r'\s+'               # img_name
599                             r'(' +version+ r')?'              # img_version
600                             r'(0x[0-9a-fA-F]{7}[0-9a-fA-F]+)' # addr
601                             r' +(.*)'                         # offs
602                            )
603    null_frame_regex = re.compile(r'^([0-9]+)\s+\?\?\?\s+(0{7}0+) +(.*)')
604    image_regex_uuid = re.compile(r'(0x[0-9a-fA-F]+)'            # img_lo
605                                  r'\s+' '-' r'\s+'              #   -
606                                  r'(0x[0-9a-fA-F]+)'     r'\s+' # img_hi
607                                  r'[+]?(.+?)'            r'\s+' # img_name
608                                  r'(' +version+ ')?'            # img_version
609                                  r'(<([-0-9a-fA-F]+)>\s+)?'     # img_uuid
610                                  r'(/.*)'                       # img_path
611                                 )
612
613
614    def __init__(self, debugger, path, verbose):
615        self.path = os.path.expanduser(path)
616        self.verbose = verbose
617        self.thread = None
618        self.app_specific_backtrace = False
619        self.crashlog = CrashLog(debugger, self.path, self.verbose)
620        self.parse_mode = CrashLogParseMode.NORMAL
621        self.parsers = {
622            CrashLogParseMode.NORMAL : self.parse_normal,
623            CrashLogParseMode.THREAD : self.parse_thread,
624            CrashLogParseMode.IMAGES : self.parse_images,
625            CrashLogParseMode.THREGS : self.parse_thread_registers,
626            CrashLogParseMode.SYSTEM : self.parse_system,
627            CrashLogParseMode.INSTRS : self.parse_instructions,
628        }
629
630    def parse(self):
631        with open(self.path,'r') as f:
632            lines = f.read().splitlines()
633
634        for line in lines:
635            line_len = len(line)
636            if line_len == 0:
637                if self.thread:
638                    if self.parse_mode == CrashLogParseMode.THREAD:
639                        if self.thread.index == self.crashlog.crashed_thread_idx:
640                            self.thread.reason = ''
641                            if self.crashlog.thread_exception:
642                                self.thread.reason += self.crashlog.thread_exception
643                            if self.crashlog.thread_exception_data:
644                                self.thread.reason += " (%s)" % self.crashlog.thread_exception_data
645                        if self.app_specific_backtrace:
646                            self.crashlog.backtraces.append(self.thread)
647                        else:
648                            self.crashlog.threads.append(self.thread)
649                    self.thread = None
650                else:
651                    # only append an extra empty line if the previous line
652                    # in the info_lines wasn't empty
653                    if len(self.crashlog.info_lines) > 0 and len(self.crashlog.info_lines[-1]):
654                        self.crashlog.info_lines.append(line)
655                self.parse_mode = CrashLogParseMode.NORMAL
656            else:
657                self.parsers[self.parse_mode](line)
658
659        return self.crashlog
660
661
662    def parse_normal(self, line):
663        if line.startswith('Process:'):
664            (self.crashlog.process_name, pid_with_brackets) = line[
665                8:].strip().split(' [')
666            self.crashlog.process_id = pid_with_brackets.strip('[]')
667        elif line.startswith('Path:'):
668            self.crashlog.process_path = line[5:].strip()
669        elif line.startswith('Identifier:'):
670            self.crashlog.process_identifier = line[11:].strip()
671        elif line.startswith('Version:'):
672            version_string = line[8:].strip()
673            matched_pair = re.search("(.+)\((.+)\)", version_string)
674            if matched_pair:
675                self.crashlog.process_version = matched_pair.group(1)
676                self.crashlog.process_compatability_version = matched_pair.group(
677                    2)
678            else:
679                self.crashlog.process = version_string
680                self.crashlog.process_compatability_version = version_string
681        elif self.parent_process_regex.search(line):
682            parent_process_match = self.parent_process_regex.search(
683                line)
684            self.crashlog.parent_process_name = parent_process_match.group(1)
685            self.crashlog.parent_process_id = parent_process_match.group(2)
686        elif line.startswith('Exception Type:'):
687            self.crashlog.thread_exception = line[15:].strip()
688            return
689        elif line.startswith('Exception Codes:'):
690            self.crashlog.thread_exception_data = line[16:].strip()
691            return
692        elif line.startswith('Exception Subtype:'): # iOS
693            self.crashlog.thread_exception_data = line[18:].strip()
694            return
695        elif line.startswith('Crashed Thread:'):
696            self.crashlog.crashed_thread_idx = int(line[15:].strip().split()[0])
697            return
698        elif line.startswith('Triggered by Thread:'): # iOS
699            self.crashlog.crashed_thread_idx = int(line[20:].strip().split()[0])
700            return
701        elif line.startswith('Report Version:'):
702            self.crashlog.version = int(line[15:].strip())
703            return
704        elif line.startswith('System Profile:'):
705            self.parse_mode = CrashLogParseMode.SYSTEM
706            return
707        elif (line.startswith('Interval Since Last Report:') or
708                line.startswith('Crashes Since Last Report:') or
709                line.startswith('Per-App Interval Since Last Report:') or
710                line.startswith('Per-App Crashes Since Last Report:') or
711                line.startswith('Sleep/Wake UUID:') or
712                line.startswith('Anonymous UUID:')):
713            # ignore these
714            return
715        elif line.startswith('Thread'):
716            thread_state_match = self.thread_state_regex.search(line)
717            if thread_state_match:
718                self.app_specific_backtrace = False
719                thread_state_match = self.thread_regex.search(line)
720                thread_idx = int(thread_state_match.group(1))
721                self.parse_mode = CrashLogParseMode.THREGS
722                self.thread = self.crashlog.threads[thread_idx]
723                return
724            thread_insts_match  = self.thread_instrs_regex.search(line)
725            if thread_insts_match:
726                self.parse_mode = CrashLogParseMode.INSTRS
727                return
728            thread_match = self.thread_regex.search(line)
729            if thread_match:
730                self.app_specific_backtrace = False
731                self.parse_mode = CrashLogParseMode.THREAD
732                thread_idx = int(thread_match.group(1))
733                self.thread = self.crashlog.Thread(thread_idx, False)
734                return
735            return
736        elif line.startswith('Binary Images:'):
737            self.parse_mode = CrashLogParseMode.IMAGES
738            return
739        elif line.startswith('Application Specific Backtrace'):
740            app_backtrace_match = self.app_backtrace_regex.search(line)
741            if app_backtrace_match:
742                self.parse_mode = CrashLogParseMode.THREAD
743                self.app_specific_backtrace = True
744                idx = int(app_backtrace_match.group(1))
745                self.thread = self.crashlog.Thread(idx, True)
746        elif line.startswith('Last Exception Backtrace:'): # iOS
747            self.parse_mode = CrashLogParseMode.THREAD
748            self.app_specific_backtrace = True
749            idx = 1
750            self.thread = self.crashlog.Thread(idx, True)
751        self.crashlog.info_lines.append(line.strip())
752
753    def parse_thread(self, line):
754        if line.startswith('Thread'):
755            return
756        if self.null_frame_regex.search(line):
757            print('warning: thread parser ignored null-frame: "%s"' % line)
758            return
759        frame_match = self.frame_regex.search(line)
760        if frame_match:
761            (frame_id, frame_img_name, _, frame_img_version, _,
762                frame_addr, frame_ofs) = frame_match.groups()
763            ident = frame_img_name
764            self.thread.add_ident(ident)
765            if ident not in self.crashlog.idents:
766                self.crashlog.idents.append(ident)
767            self.thread.frames.append(self.crashlog.Frame(int(frame_id), int(
768                frame_addr, 0), frame_ofs))
769        else:
770            print('error: frame regex failed for line: "%s"' % line)
771
772    def parse_images(self, line):
773        image_match = self.image_regex_uuid.search(line)
774        if image_match:
775            (img_lo, img_hi, img_name, _, img_version, _,
776                _, img_uuid, img_path) = image_match.groups()
777            image = self.crashlog.DarwinImage(int(img_lo, 0), int(img_hi, 0),
778                                            img_name.strip(),
779                                            img_version.strip()
780                                            if img_version else "",
781                                            uuid.UUID(img_uuid), img_path,
782                                            self.verbose)
783            self.crashlog.images.append(image)
784        else:
785            print("error: image regex failed for: %s" % line)
786
787
788    def parse_thread_registers(self, line):
789        stripped_line = line.strip()
790        # "r12: 0x00007fff6b5939c8  r13: 0x0000000007000006  r14: 0x0000000000002a03  r15: 0x0000000000000c00"
791        reg_values = re.findall(
792            '([a-zA-Z0-9]+: 0[Xx][0-9a-fA-F]+) *', stripped_line)
793        for reg_value in reg_values:
794            (reg, value) = reg_value.split(': ')
795            self.thread.registers[reg.strip()] = int(value, 0)
796
797    def parse_system(self, line):
798        self.crashlog.system_profile.append(line)
799
800    def parse_instructions(self, line):
801        pass
802
803
804def usage():
805    print("Usage: lldb-symbolicate.py [-n name] executable-image")
806    sys.exit(0)
807
808
809def save_crashlog(debugger, command, exe_ctx, result, dict):
810    usage = "usage: %prog [options] <output-path>"
811    description = '''Export the state of current target into a crashlog file'''
812    parser = optparse.OptionParser(
813        description=description,
814        prog='save_crashlog',
815        usage=usage)
816    parser.add_option(
817        '-v',
818        '--verbose',
819        action='store_true',
820        dest='verbose',
821        help='display verbose debug info',
822        default=False)
823    try:
824        (options, args) = parser.parse_args(shlex.split(command))
825    except:
826        result.PutCString("error: invalid options")
827        return
828    if len(args) != 1:
829        result.PutCString(
830            "error: invalid arguments, a single output file is the only valid argument")
831        return
832    out_file = open(args[0], 'w')
833    if not out_file:
834        result.PutCString(
835            "error: failed to open file '%s' for writing...",
836            args[0])
837        return
838    target = exe_ctx.target
839    if target:
840        identifier = target.executable.basename
841        process = exe_ctx.process
842        if process:
843            pid = process.id
844            if pid != lldb.LLDB_INVALID_PROCESS_ID:
845                out_file.write(
846                    'Process:         %s [%u]\n' %
847                    (identifier, pid))
848        out_file.write('Path:            %s\n' % (target.executable.fullpath))
849        out_file.write('Identifier:      %s\n' % (identifier))
850        out_file.write('\nDate/Time:       %s\n' %
851                       (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
852        out_file.write(
853            'OS Version:      Mac OS X %s (%s)\n' %
854            (platform.mac_ver()[0], subprocess.check_output('sysctl -n kern.osversion', shell=True).decode("utf-8")))
855        out_file.write('Report Version:  9\n')
856        for thread_idx in range(process.num_threads):
857            thread = process.thread[thread_idx]
858            out_file.write('\nThread %u:\n' % (thread_idx))
859            for (frame_idx, frame) in enumerate(thread.frames):
860                frame_pc = frame.pc
861                frame_offset = 0
862                if frame.function:
863                    block = frame.GetFrameBlock()
864                    block_range = block.range[frame.addr]
865                    if block_range:
866                        block_start_addr = block_range[0]
867                        frame_offset = frame_pc - block_start_addr.GetLoadAddress(target)
868                    else:
869                        frame_offset = frame_pc - frame.function.addr.GetLoadAddress(target)
870                elif frame.symbol:
871                    frame_offset = frame_pc - frame.symbol.addr.GetLoadAddress(target)
872                out_file.write(
873                    '%-3u %-32s 0x%16.16x %s' %
874                    (frame_idx, frame.module.file.basename, frame_pc, frame.name))
875                if frame_offset > 0:
876                    out_file.write(' + %u' % (frame_offset))
877                line_entry = frame.line_entry
878                if line_entry:
879                    if options.verbose:
880                        # This will output the fullpath + line + column
881                        out_file.write(' %s' % (line_entry))
882                    else:
883                        out_file.write(
884                            ' %s:%u' %
885                            (line_entry.file.basename, line_entry.line))
886                        column = line_entry.column
887                        if column:
888                            out_file.write(':%u' % (column))
889                out_file.write('\n')
890
891        out_file.write('\nBinary Images:\n')
892        for module in target.modules:
893            text_segment = module.section['__TEXT']
894            if text_segment:
895                text_segment_load_addr = text_segment.GetLoadAddress(target)
896                if text_segment_load_addr != lldb.LLDB_INVALID_ADDRESS:
897                    text_segment_end_load_addr = text_segment_load_addr + text_segment.size
898                    identifier = module.file.basename
899                    module_version = '???'
900                    module_version_array = module.GetVersion()
901                    if module_version_array:
902                        module_version = '.'.join(
903                            map(str, module_version_array))
904                    out_file.write(
905                        '    0x%16.16x - 0x%16.16x  %s (%s - ???) <%s> %s\n' %
906                        (text_segment_load_addr,
907                         text_segment_end_load_addr,
908                         identifier,
909                         module_version,
910                         module.GetUUIDString(),
911                         module.file.fullpath))
912        out_file.close()
913    else:
914        result.PutCString("error: invalid target")
915
916
917class Symbolicate:
918    def __init__(self, debugger, internal_dict):
919        pass
920
921    def __call__(self, debugger, command, exe_ctx, result):
922        try:
923            SymbolicateCrashLogs(debugger, shlex.split(command))
924        except Exception as e:
925            result.PutCString("error: python exception: %s" % e)
926
927    def get_short_help(self):
928        return "Symbolicate one or more darwin crash log files."
929
930    def get_long_help(self):
931        option_parser = CrashLogOptionParser()
932        return option_parser.format_help()
933
934
935def SymbolicateCrashLog(crash_log, options):
936    if options.debug:
937        crash_log.dump()
938    if not crash_log.images:
939        print('error: no images in crash log')
940        return
941
942    if options.dump_image_list:
943        print("Binary Images:")
944        for image in crash_log.images:
945            if options.verbose:
946                print(image.debug_dump())
947            else:
948                print(image)
949
950    target = crash_log.create_target()
951    if not target:
952        return
953    exe_module = target.GetModuleAtIndex(0)
954    images_to_load = list()
955    loaded_images = list()
956    if options.load_all_images:
957        # --load-all option was specified, load everything up
958        for image in crash_log.images:
959            images_to_load.append(image)
960    else:
961        # Only load the images found in stack frames for the crashed threads
962        if options.crashed_only:
963            for thread in crash_log.threads:
964                if thread.did_crash():
965                    for ident in thread.idents:
966                        images = crash_log.find_images_with_identifier(ident)
967                        if images:
968                            for image in images:
969                                images_to_load.append(image)
970                        else:
971                            print('error: can\'t find image for identifier "%s"' % ident)
972        else:
973            for ident in crash_log.idents:
974                images = crash_log.find_images_with_identifier(ident)
975                if images:
976                    for image in images:
977                        images_to_load.append(image)
978                else:
979                    print('error: can\'t find image for identifier "%s"' % ident)
980
981    for image in images_to_load:
982        if image not in loaded_images:
983            err = image.add_module(target)
984            if err:
985                print(err)
986            else:
987                loaded_images.append(image)
988
989    if crash_log.backtraces:
990        for thread in crash_log.backtraces:
991            thread.dump_symbolicated(crash_log, options)
992            print()
993
994    for thread in crash_log.threads:
995        thread.dump_symbolicated(crash_log, options)
996        print()
997
998    if crash_log.errors:
999        print("Errors:")
1000        for error in crash_log.errors:
1001            print(error)
1002
1003def load_crashlog_in_scripted_process(debugger, crash_log_file, options):
1004    result = lldb.SBCommandReturnObject()
1005
1006    crashlog_path = os.path.expanduser(crash_log_file)
1007    if not os.path.exists(crashlog_path):
1008        result.PutCString("error: crashlog file %s does not exist" % crashlog_path)
1009
1010    try:
1011        crashlog = CrashLogParser().parse(debugger, crashlog_path, False)
1012    except Exception as e:
1013        result.PutCString("error: python exception: %s" % e)
1014        return
1015
1016    if debugger.GetNumTargets() > 0:
1017        target = debugger.GetTargetAtIndex(0)
1018    else:
1019        target = crashlog.create_target()
1020    if not target:
1021        result.PutCString("error: couldn't create target")
1022        return
1023
1024    ci = debugger.GetCommandInterpreter()
1025    if not ci:
1026        result.PutCString("error: couldn't get command interpreter")
1027        return
1028
1029    res = lldb.SBCommandReturnObject()
1030    ci.HandleCommand('script from lldb.macosx import crashlog_scripted_process', res)
1031    if not res.Succeeded():
1032        result.PutCString("error: couldn't import crashlog scripted process module")
1033        return
1034
1035    structured_data = lldb.SBStructuredData()
1036    structured_data.SetFromJSON(json.dumps({ "crashlog_path" : crashlog_path,
1037                                             "load_all_images": options.load_all_images }))
1038    launch_info = lldb.SBLaunchInfo(None)
1039    launch_info.SetProcessPluginName("ScriptedProcess")
1040    launch_info.SetScriptedProcessClassName("crashlog_scripted_process.CrashLogScriptedProcess")
1041    launch_info.SetScriptedProcessDictionary(structured_data)
1042    error = lldb.SBError()
1043    process = target.Launch(launch_info, error)
1044
1045    if not process or error.Fail():
1046        return
1047
1048    @contextlib.contextmanager
1049    def synchronous(debugger):
1050        async_state = debugger.GetAsync()
1051        debugger.SetAsync(False)
1052        try:
1053            yield
1054        finally:
1055            debugger.SetAsync(async_state)
1056
1057    with synchronous(debugger):
1058        run_options = lldb.SBCommandInterpreterRunOptions()
1059        run_options.SetStopOnError(True)
1060        run_options.SetStopOnCrash(True)
1061        run_options.SetEchoCommands(True)
1062
1063        commands_stream = lldb.SBStream()
1064        commands_stream.Print("process status\n")
1065        commands_stream.Print("thread backtrace\n")
1066        error = debugger.SetInputString(commands_stream.GetData())
1067        if error.Success():
1068            debugger.RunCommandInterpreter(True, False, run_options, 0, False, True)
1069
1070def CreateSymbolicateCrashLogOptions(
1071        command_name,
1072        description,
1073        add_interactive_options):
1074    usage = "usage: %prog [options] <FILE> [FILE ...]"
1075    option_parser = optparse.OptionParser(
1076        description=description, prog='crashlog', usage=usage)
1077    option_parser.add_option(
1078        '--verbose',
1079        '-v',
1080        action='store_true',
1081        dest='verbose',
1082        help='display verbose debug info',
1083        default=False)
1084    option_parser.add_option(
1085        '--debug',
1086        '-g',
1087        action='store_true',
1088        dest='debug',
1089        help='display verbose debug logging',
1090        default=False)
1091    option_parser.add_option(
1092        '--load-all',
1093        '-a',
1094        action='store_true',
1095        dest='load_all_images',
1096        help='load all executable images, not just the images found in the '
1097        'crashed stack frames, loads stackframes for all the threads in '
1098        'interactive mode.',
1099        default=False)
1100    option_parser.add_option(
1101        '--images',
1102        action='store_true',
1103        dest='dump_image_list',
1104        help='show image list',
1105        default=False)
1106    option_parser.add_option(
1107        '--debug-delay',
1108        type='int',
1109        dest='debug_delay',
1110        metavar='NSEC',
1111        help='pause for NSEC seconds for debugger',
1112        default=0)
1113    option_parser.add_option(
1114        '--crashed-only',
1115        '-c',
1116        action='store_true',
1117        dest='crashed_only',
1118        help='only symbolicate the crashed thread',
1119        default=False)
1120    option_parser.add_option(
1121        '--disasm-depth',
1122        '-d',
1123        type='int',
1124        dest='disassemble_depth',
1125        help='set the depth in stack frames that should be disassembled (default is 1)',
1126        default=1)
1127    option_parser.add_option(
1128        '--disasm-all',
1129        '-D',
1130        action='store_true',
1131        dest='disassemble_all_threads',
1132        help='enabled disassembly of frames on all threads (not just the crashed thread)',
1133        default=False)
1134    option_parser.add_option(
1135        '--disasm-before',
1136        '-B',
1137        type='int',
1138        dest='disassemble_before',
1139        help='the number of instructions to disassemble before the frame PC',
1140        default=4)
1141    option_parser.add_option(
1142        '--disasm-after',
1143        '-A',
1144        type='int',
1145        dest='disassemble_after',
1146        help='the number of instructions to disassemble after the frame PC',
1147        default=4)
1148    option_parser.add_option(
1149        '--source-context',
1150        '-C',
1151        type='int',
1152        metavar='NLINES',
1153        dest='source_context',
1154        help='show NLINES source lines of source context (default = 4)',
1155        default=4)
1156    option_parser.add_option(
1157        '--source-frames',
1158        type='int',
1159        metavar='NFRAMES',
1160        dest='source_frames',
1161        help='show source for NFRAMES (default = 4)',
1162        default=4)
1163    option_parser.add_option(
1164        '--source-all',
1165        action='store_true',
1166        dest='source_all',
1167        help='show source for all threads, not just the crashed thread',
1168        default=False)
1169    if add_interactive_options:
1170        option_parser.add_option(
1171            '-i',
1172            '--interactive',
1173            action='store_true',
1174            help='parse a crash log and load it in a ScriptedProcess',
1175            default=False)
1176        option_parser.add_option(
1177            '-b',
1178            '--batch',
1179            action='store_true',
1180            help='dump symbolicated stackframes without creating a debug session',
1181            default=True)
1182    return option_parser
1183
1184
1185def CrashLogOptionParser():
1186    description = '''Symbolicate one or more darwin crash log files to provide source file and line information,
1187inlined stack frames back to the concrete functions, and disassemble the location of the crash
1188for the first frame of the crashed thread.
1189If this script is imported into the LLDB command interpreter, a "crashlog" command will be added to the interpreter
1190for use at the LLDB command line. After a crash log has been parsed and symbolicated, a target will have been
1191created that has all of the shared libraries loaded at the load addresses found in the crash log file. This allows
1192you to explore the program as if it were stopped at the locations described in the crash log and functions can
1193be disassembled and lookups can be performed using the addresses found in the crash log.'''
1194    return CreateSymbolicateCrashLogOptions('crashlog', description, True)
1195
1196def SymbolicateCrashLogs(debugger, command_args):
1197    option_parser = CrashLogOptionParser()
1198    try:
1199        (options, args) = option_parser.parse_args(command_args)
1200    except:
1201        return
1202
1203    if options.debug:
1204        print('command_args = %s' % command_args)
1205        print('options', options)
1206        print('args', args)
1207
1208    if options.debug_delay > 0:
1209        print("Waiting %u seconds for debugger to attach..." % options.debug_delay)
1210        time.sleep(options.debug_delay)
1211    error = lldb.SBError()
1212
1213    def should_run_in_interactive_mode(options, ci):
1214        if options.interactive:
1215            return True
1216        elif options.batch:
1217            return False
1218        # elif ci and ci.IsInteractive():
1219        #     return True
1220        else:
1221            return False
1222
1223    ci = debugger.GetCommandInterpreter()
1224
1225    if args:
1226        for crash_log_file in args:
1227            if should_run_in_interactive_mode(options, ci):
1228                load_crashlog_in_scripted_process(debugger, crash_log_file,
1229                                                  options)
1230            else:
1231                crash_log = CrashLogParser().parse(debugger, crash_log_file, options.verbose)
1232                SymbolicateCrashLog(crash_log, options)
1233
1234if __name__ == '__main__':
1235    # Create a new debugger instance
1236    debugger = lldb.SBDebugger.Create()
1237    SymbolicateCrashLogs(debugger, sys.argv[1:])
1238    lldb.SBDebugger.Destroy(debugger)
1239
1240def __lldb_init_module(debugger, internal_dict):
1241    debugger.HandleCommand(
1242        'command script add -c lldb.macosx.crashlog.Symbolicate crashlog')
1243    debugger.HandleCommand(
1244        'command script add -f lldb.macosx.crashlog.save_crashlog save_crashlog')
1245    print('"crashlog" and "save_crashlog" commands have been installed, use '
1246          'the "--help" options on these commands for detailed help.')
1247