1#!/usr/bin/python
2
3import cmd
4import dict_utils
5import file_extract
6import optparse
7import re
8import struct
9import string
10import StringIO
11import sys
12import uuid
13
14# Mach header "magic" constants
15MH_MAGIC                    = 0xfeedface
16MH_CIGAM                    = 0xcefaedfe
17MH_MAGIC_64                 = 0xfeedfacf
18MH_CIGAM_64                 = 0xcffaedfe
19FAT_MAGIC                   = 0xcafebabe
20FAT_CIGAM                   = 0xbebafeca
21
22# Mach haeder "filetype" constants
23MH_OBJECT                   = 0x00000001
24MH_EXECUTE                  = 0x00000002
25MH_FVMLIB                   = 0x00000003
26MH_CORE                     = 0x00000004
27MH_PRELOAD                  = 0x00000005
28MH_DYLIB                    = 0x00000006
29MH_DYLINKER                 = 0x00000007
30MH_BUNDLE                   = 0x00000008
31MH_DYLIB_STUB               = 0x00000009
32MH_DSYM                     = 0x0000000a
33MH_KEXT_BUNDLE              = 0x0000000b
34
35# Mach haeder "flag" constant bits
36MH_NOUNDEFS                 = 0x00000001
37MH_INCRLINK                 = 0x00000002
38MH_DYLDLINK                 = 0x00000004
39MH_BINDATLOAD               = 0x00000008
40MH_PREBOUND                 = 0x00000010
41MH_SPLIT_SEGS               = 0x00000020
42MH_LAZY_INIT                = 0x00000040
43MH_TWOLEVEL                 = 0x00000080
44MH_FORCE_FLAT               = 0x00000100
45MH_NOMULTIDEFS              = 0x00000200
46MH_NOFIXPREBINDING          = 0x00000400
47MH_PREBINDABLE              = 0x00000800
48MH_ALLMODSBOUND             = 0x00001000
49MH_SUBSECTIONS_VIA_SYMBOLS  = 0x00002000
50MH_CANONICAL                = 0x00004000
51MH_WEAK_DEFINES             = 0x00008000
52MH_BINDS_TO_WEAK            = 0x00010000
53MH_ALLOW_STACK_EXECUTION    = 0x00020000
54MH_ROOT_SAFE                = 0x00040000
55MH_SETUID_SAFE              = 0x00080000
56MH_NO_REEXPORTED_DYLIBS     = 0x00100000
57MH_PIE                      = 0x00200000
58MH_DEAD_STRIPPABLE_DYLIB    = 0x00400000
59MH_HAS_TLV_DESCRIPTORS      = 0x00800000
60MH_NO_HEAP_EXECUTION        = 0x01000000
61
62# Mach load command constants
63LC_REQ_DYLD                 = 0x80000000
64LC_SEGMENT                  = 0x00000001
65LC_SYMTAB                   = 0x00000002
66LC_SYMSEG                   = 0x00000003
67LC_THREAD                   = 0x00000004
68LC_UNIXTHREAD               = 0x00000005
69LC_LOADFVMLIB               = 0x00000006
70LC_IDFVMLIB                 = 0x00000007
71LC_IDENT                    = 0x00000008
72LC_FVMFILE                  = 0x00000009
73LC_PREPAGE                  = 0x0000000a
74LC_DYSYMTAB                 = 0x0000000b
75LC_LOAD_DYLIB               = 0x0000000c
76LC_ID_DYLIB                 = 0x0000000d
77LC_LOAD_DYLINKER            = 0x0000000e
78LC_ID_DYLINKER              = 0x0000000f
79LC_PREBOUND_DYLIB           = 0x00000010
80LC_ROUTINES                 = 0x00000011
81LC_SUB_FRAMEWORK            = 0x00000012
82LC_SUB_UMBRELLA             = 0x00000013
83LC_SUB_CLIENT               = 0x00000014
84LC_SUB_LIBRARY              = 0x00000015
85LC_TWOLEVEL_HINTS           = 0x00000016
86LC_PREBIND_CKSUM            = 0x00000017
87LC_LOAD_WEAK_DYLIB          = 0x00000018 | LC_REQ_DYLD
88LC_SEGMENT_64               = 0x00000019
89LC_ROUTINES_64              = 0x0000001a
90LC_UUID                     = 0x0000001b
91LC_RPATH                    = 0x0000001c | LC_REQ_DYLD
92LC_CODE_SIGNATURE           = 0x0000001d
93LC_SEGMENT_SPLIT_INFO       = 0x0000001e
94LC_REEXPORT_DYLIB           = 0x0000001f | LC_REQ_DYLD
95LC_LAZY_LOAD_DYLIB          = 0x00000020
96LC_ENCRYPTION_INFO          = 0x00000021
97LC_DYLD_INFO                = 0x00000022
98LC_DYLD_INFO_ONLY           = 0x00000022 | LC_REQ_DYLD
99LC_LOAD_UPWARD_DYLIB        = 0x00000023 | LC_REQ_DYLD
100LC_VERSION_MIN_MACOSX       = 0x00000024
101LC_VERSION_MIN_IPHONEOS     = 0x00000025
102LC_FUNCTION_STARTS          = 0x00000026
103LC_DYLD_ENVIRONMENT         = 0x00000027
104
105# Mach CPU constants
106CPU_ARCH_MASK               = 0xff000000
107CPU_ARCH_ABI64              = 0x01000000
108CPU_TYPE_ANY                = 0xffffffff
109CPU_TYPE_VAX                = 1
110CPU_TYPE_MC680x0            = 6
111CPU_TYPE_I386               = 7
112CPU_TYPE_X86_64             = CPU_TYPE_I386 | CPU_ARCH_ABI64
113CPU_TYPE_MIPS               = 8
114CPU_TYPE_MC98000            = 10
115CPU_TYPE_HPPA               = 11
116CPU_TYPE_ARM                = 12
117CPU_TYPE_MC88000            = 13
118CPU_TYPE_SPARC              = 14
119CPU_TYPE_I860               = 15
120CPU_TYPE_ALPHA              = 16
121CPU_TYPE_POWERPC            = 18
122CPU_TYPE_POWERPC64          = CPU_TYPE_POWERPC | CPU_ARCH_ABI64
123
124# VM protection constants
125VM_PROT_READ    = 1
126VM_PROT_WRITE   = 2
127VM_PROT_EXECUTE = 4
128
129# VM protection constants
130N_STAB          = 0xe0
131N_PEXT          = 0x10
132N_TYPE          = 0x0e
133N_EXT           = 0x01
134
135# Values for nlist N_TYPE bits of the "Mach.NList.type" field.
136N_UNDF          = 0x0
137N_ABS           = 0x2
138N_SECT          = 0xe
139N_PBUD          = 0xc
140N_INDR          = 0xa
141
142# Section indexes for the "Mach.NList.sect_idx" fields
143NO_SECT         = 0
144MAX_SECT        = 255
145
146# Stab defines
147N_GSYM          = 0x20
148N_FNAME         = 0x22
149N_FUN           = 0x24
150N_STSYM         = 0x26
151N_LCSYM         = 0x28
152N_BNSYM         = 0x2e
153N_OPT           = 0x3c
154N_RSYM          = 0x40
155N_SLINE         = 0x44
156N_ENSYM         = 0x4e
157N_SSYM          = 0x60
158N_SO            = 0x64
159N_OSO           = 0x66
160N_LSYM          = 0x80
161N_BINCL         = 0x82
162N_SOL           = 0x84
163N_PARAMS        = 0x86
164N_VERSION       = 0x88
165N_OLEVEL        = 0x8A
166N_PSYM          = 0xa0
167N_EINCL         = 0xa2
168N_ENTRY         = 0xa4
169N_LBRAC         = 0xc0
170N_EXCL          = 0xc2
171N_RBRAC         = 0xe0
172N_BCOMM         = 0xe2
173N_ECOMM         = 0xe4
174N_ECOML         = 0xe8
175N_LENG          = 0xfe
176
177vm_prot_names = [ '---', 'r--', '-w-', 'rw-', '--x', 'r-x', '-wx', 'rwx' ]
178
179def dump_memory(base_addr, data, hex_bytes_len, num_per_line):
180    hex_bytes = data.encode('hex')
181    if hex_bytes_len == -1:
182        hex_bytes_len = len(hex_bytes)
183    addr = base_addr
184    ascii_str = ''
185    i = 0
186    while i < hex_bytes_len:
187        if ((i/2) % num_per_line) == 0:
188            if i > 0:
189                print ' %s' % (ascii_str)
190                ascii_str = ''
191            print '0x%8.8x:' % (addr+i),
192        hex_byte = hex_bytes[i:i+2]
193        print hex_byte,
194        int_byte = int (hex_byte, 16)
195        ascii_char = '%c' % (int_byte)
196        if int_byte >= 32 and int_byte < 127:
197            ascii_str += ascii_char
198        else:
199            ascii_str += '.'
200        i = i + 2
201    if ascii_str:
202        if (i/2) % num_per_line:
203            padding = num_per_line - ((i/2) % num_per_line)
204        else:
205            padding = 0
206        print '%*s%s' % (padding*3+1,'',ascii_str)
207    print
208
209
210class TerminalColors:
211    '''Simple terminal colors class'''
212    def __init__(self, enabled = True):
213        # TODO: discover terminal type from "file" and disable if
214        # it can't handle the color codes
215        self.enabled = enabled
216
217    def reset(self):
218        '''Reset all terminal colors and formatting.'''
219        if self.enabled:
220            return "\x1b[0m";
221        return ''
222
223    def bold(self, on = True):
224        '''Enable or disable bold depending on the "on" parameter.'''
225        if self.enabled:
226            if on:
227                return "\x1b[1m";
228            else:
229                return "\x1b[22m";
230        return ''
231
232    def italics(self, on = True):
233        '''Enable or disable italics depending on the "on" parameter.'''
234        if self.enabled:
235            if on:
236                return "\x1b[3m";
237            else:
238                return "\x1b[23m";
239        return ''
240
241    def underline(self, on = True):
242        '''Enable or disable underline depending on the "on" parameter.'''
243        if self.enabled:
244            if on:
245                return "\x1b[4m";
246            else:
247                return "\x1b[24m";
248        return ''
249
250    def inverse(self, on = True):
251        '''Enable or disable inverse depending on the "on" parameter.'''
252        if self.enabled:
253            if on:
254                return "\x1b[7m";
255            else:
256                return "\x1b[27m";
257        return ''
258
259    def strike(self, on = True):
260        '''Enable or disable strike through depending on the "on" parameter.'''
261        if self.enabled:
262            if on:
263                return "\x1b[9m";
264            else:
265                return "\x1b[29m";
266        return ''
267
268    def black(self, fg = True):
269        '''Set the foreground or background color to black.
270        The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
271        if self.enabled:
272            if fg:
273                return "\x1b[30m";
274            else:
275                return "\x1b[40m";
276        return ''
277
278    def red(self, fg = True):
279        '''Set the foreground or background color to red.
280        The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
281        if self.enabled:
282            if fg:
283                return "\x1b[31m";
284            else:
285                return "\x1b[41m";
286        return ''
287
288    def green(self, fg = True):
289        '''Set the foreground or background color to green.
290        The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
291        if self.enabled:
292            if fg:
293                return "\x1b[32m";
294            else:
295                return "\x1b[42m";
296        return ''
297
298    def yellow(self, fg = True):
299        '''Set the foreground or background color to yellow.
300        The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
301        if self.enabled:
302            if fg:
303                return "\x1b[43m";
304            else:
305                return "\x1b[33m";
306        return ''
307
308    def blue(self, fg = True):
309        '''Set the foreground or background color to blue.
310        The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
311        if self.enabled:
312            if fg:
313                return "\x1b[34m";
314            else:
315                return "\x1b[44m";
316        return ''
317
318    def magenta(self, fg = True):
319        '''Set the foreground or background color to magenta.
320        The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
321        if self.enabled:
322            if fg:
323                return "\x1b[35m";
324            else:
325                return "\x1b[45m";
326        return ''
327
328    def cyan(self, fg = True):
329        '''Set the foreground or background color to cyan.
330        The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
331        if self.enabled:
332            if fg:
333                return "\x1b[36m";
334            else:
335                return "\x1b[46m";
336        return ''
337
338    def white(self, fg = True):
339        '''Set the foreground or background color to white.
340        The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
341        if self.enabled:
342            if fg:
343                return "\x1b[37m";
344            else:
345                return "\x1b[47m";
346        return ''
347
348    def default(self, fg = True):
349        '''Set the foreground or background color to the default.
350        The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
351        if self.enabled:
352            if fg:
353                return "\x1b[39m";
354            else:
355                return "\x1b[49m";
356        return ''
357
358def swap_unpack_char():
359    """Returns the unpack prefix that will for non-native endian-ness."""
360    if struct.pack('H', 1).startswith("\x00"):
361        return '<'
362    return '>'
363
364
365def dump_hex_bytes(addr, s, bytes_per_line=16):
366    i = 0
367    line = ''
368    for ch in s:
369        if (i % bytes_per_line) == 0:
370            if line:
371                print line
372            line = '%#8.8x: ' % (addr + i)
373        line += "%02X " % ord(ch)
374        i += 1
375    print line
376
377def dump_hex_byte_string_diff(addr, a, b, bytes_per_line=16):
378    i = 0
379    line = ''
380    a_len = len(a)
381    b_len = len(b)
382    if a_len < b_len:
383        max_len = b_len
384    else:
385        max_len = a_len
386    tty_colors = TerminalColors (True)
387    for i in range(max_len):
388        ch = None
389        if i < a_len:
390            ch_a = a[i]
391            ch = ch_a
392        else:
393            ch_a = None
394        if i < b_len:
395            ch_b = b[i]
396            if not ch:
397                ch = ch_b
398        else:
399            ch_b = None
400        mismatch = ch_a != ch_b
401        if (i % bytes_per_line) == 0:
402            if line:
403                print line
404            line = '%#8.8x: ' % (addr + i)
405        if mismatch: line += tty_colors.red()
406        line += "%02X " % ord(ch)
407        if mismatch: line += tty_colors.default()
408        i += 1
409
410    print line
411
412class Mach:
413    """Class that does everything mach-o related"""
414
415    class Arch:
416        """Class that implements mach-o architectures"""
417
418        def __init__(self, c=0, s=0):
419            self.cpu=c
420            self.sub=s
421
422        def set_cpu_type(self, c):
423            self.cpu=c
424        def set_cpu_subtype(self, s):
425            self.sub=s
426        def set_arch(self, c, s):
427            self.cpu=c
428            self.sub=s
429        def is_64_bit(self):
430            return (self.cpu & CPU_ARCH_ABI64) != 0
431
432        cpu_infos = [
433            [ "arm"         , CPU_TYPE_ARM       , CPU_TYPE_ANY ],
434            [ "arm"         , CPU_TYPE_ARM       , 0            ],
435            [ "armv4"       , CPU_TYPE_ARM       , 5            ],
436            [ "armv6"       , CPU_TYPE_ARM       , 6            ],
437            [ "armv5"       , CPU_TYPE_ARM       , 7            ],
438            [ "xscale"      , CPU_TYPE_ARM       , 8            ],
439            [ "armv7"       , CPU_TYPE_ARM       , 9            ],
440            [ "armv7f"      , CPU_TYPE_ARM       , 10           ],
441            [ "armv7s"      , CPU_TYPE_ARM       , 11           ],
442            [ "armv7k"      , CPU_TYPE_ARM       , 12           ],
443            [ "armv7m"      , CPU_TYPE_ARM       , 15           ],
444            [ "armv7em"     , CPU_TYPE_ARM       , 16           ],
445            [ "ppc"         , CPU_TYPE_POWERPC   , CPU_TYPE_ANY ],
446            [ "ppc"         , CPU_TYPE_POWERPC   , 0            ],
447            [ "ppc601"      , CPU_TYPE_POWERPC   , 1            ],
448            [ "ppc602"      , CPU_TYPE_POWERPC   , 2            ],
449            [ "ppc603"      , CPU_TYPE_POWERPC   , 3            ],
450            [ "ppc603e"     , CPU_TYPE_POWERPC   , 4            ],
451            [ "ppc603ev"    , CPU_TYPE_POWERPC   , 5            ],
452            [ "ppc604"      , CPU_TYPE_POWERPC   , 6            ],
453            [ "ppc604e"     , CPU_TYPE_POWERPC   , 7            ],
454            [ "ppc620"      , CPU_TYPE_POWERPC   , 8            ],
455            [ "ppc750"      , CPU_TYPE_POWERPC   , 9            ],
456            [ "ppc7400"     , CPU_TYPE_POWERPC   , 10           ],
457            [ "ppc7450"     , CPU_TYPE_POWERPC   , 11           ],
458            [ "ppc970"      , CPU_TYPE_POWERPC   , 100          ],
459            [ "ppc64"       , CPU_TYPE_POWERPC64 , 0            ],
460            [ "ppc970-64"   , CPU_TYPE_POWERPC64 , 100          ],
461            [ "i386"        , CPU_TYPE_I386      , 3            ],
462            [ "i486"        , CPU_TYPE_I386      , 4            ],
463            [ "i486sx"      , CPU_TYPE_I386      , 0x84         ],
464            [ "i386"        , CPU_TYPE_I386      , CPU_TYPE_ANY ],
465            [ "x86_64"      , CPU_TYPE_X86_64    , 3            ],
466            [ "x86_64"      , CPU_TYPE_X86_64    , CPU_TYPE_ANY ],
467        ]
468
469        def __str__(self):
470            for info in self.cpu_infos:
471                if self.cpu == info[1] and (self.sub & 0x00ffffff) == info[2]:
472                    return info[0]
473            return "{0}.{1}".format(self.cpu,self.sub)
474
475
476    class Magic(dict_utils.Enum):
477
478        enum = {
479            'MH_MAGIC'      : MH_MAGIC,
480            'MH_CIGAM'      : MH_CIGAM,
481            'MH_MAGIC_64'   : MH_MAGIC_64,
482            'MH_CIGAM_64'   : MH_CIGAM_64,
483            'FAT_MAGIC'     : FAT_MAGIC,
484            'FAT_CIGAM'     : FAT_CIGAM
485        }
486
487        def __init__(self, initial_value = 0):
488            dict_utils.Enum.__init__(self, initial_value, self.enum)
489
490        def is_skinny_mach_file(self):
491            return self.value == MH_MAGIC or self.value == MH_CIGAM or self.value == MH_MAGIC_64 or self.value == MH_CIGAM_64
492
493        def is_universal_mach_file(self):
494            return self.value == FAT_MAGIC or self.value == FAT_CIGAM
495
496        def unpack(self, data):
497            data.set_byte_order('native')
498            self.value = data.get_uint32();
499
500        def get_byte_order(self):
501            if self.value == MH_CIGAM or self.value == MH_CIGAM_64 or self.value == FAT_CIGAM:
502                return swap_unpack_char()
503            else:
504                return '='
505
506        def is_64_bit(self):
507            return self.value == MH_MAGIC_64 or self.value == MH_CIGAM_64
508
509    def __init__(self):
510        self.magic = Mach.Magic()
511        self.content = None
512        self.path = None
513
514    def extract (self, path, extractor):
515        self.path = path;
516        self.unpack(extractor)
517
518    def parse(self, path):
519        self.path = path;
520        try:
521            f = open(self.path)
522            file_extractor = file_extract.FileExtract(f, '=')
523            self.unpack(file_extractor)
524            #f.close()
525        except IOError as (errno, strerror):
526            print "I/O error({0}): {1}".format(errno, strerror)
527        except ValueError:
528            print "Could not convert data to an integer."
529        except:
530            print "Unexpected error:", sys.exc_info()[0]
531            raise
532
533    def compare(self, rhs):
534        self.content.compare(rhs.content)
535
536    def dump(self, options = None):
537        self.content.dump(options)
538
539    def dump_header(self, dump_description = True, options = None):
540        self.content.dump_header(dump_description, options)
541
542    def dump_load_commands(self, dump_description = True, options = None):
543        self.content.dump_load_commands(dump_description, options)
544
545    def dump_sections(self, dump_description = True, options = None):
546        self.content.dump_sections(dump_description, options)
547
548    def dump_section_contents(self, options):
549        self.content.dump_section_contents(options)
550
551    def dump_symtab(self, dump_description = True, options = None):
552        self.content.dump_symtab(dump_description, options)
553
554    def dump_symbol_names_matching_regex(self, regex, file=None):
555        self.content.dump_symbol_names_matching_regex(regex, file)
556
557    def description(self):
558        return self.content.description()
559
560    def unpack(self, data):
561        self.magic.unpack(data)
562        if self.magic.is_skinny_mach_file():
563            self.content = Mach.Skinny(self.path)
564        elif self.magic.is_universal_mach_file():
565            self.content = Mach.Universal(self.path)
566        else:
567            self.content = None
568
569        if self.content != None:
570            self.content.unpack(data, self.magic)
571
572    def is_valid(self):
573        return self.content != None
574
575    class Universal:
576
577        def __init__(self, path):
578            self.path       = path
579            self.type       = 'universal'
580            self.file_off   = 0
581            self.magic      = None
582            self.nfat_arch  = 0
583            self.archs      = list()
584
585        def description(self):
586            s = '%#8.8x: %s (' % (self.file_off, self.path)
587            archs_string = ''
588            for arch in self.archs:
589                if len(archs_string):
590                    archs_string += ', '
591                archs_string += '%s' % arch.arch
592            s += archs_string
593            s += ')'
594            return s
595
596        def unpack(self, data, magic = None):
597            self.file_off = data.tell()
598            if magic is None:
599                self.magic = Mach.Magic()
600                self.magic.unpack(data)
601            else:
602                self.magic = magic
603                self.file_off = self.file_off - 4
604            # Universal headers are always in big endian
605            data.set_byte_order('big')
606            self.nfat_arch = data.get_uint32()
607            for i in range(self.nfat_arch):
608                self.archs.append(Mach.Universal.ArchInfo())
609                self.archs[i].unpack(data)
610            for i in range(self.nfat_arch):
611                self.archs[i].mach = Mach.Skinny(self.path)
612                data.seek (self.archs[i].offset, 0)
613                skinny_magic = Mach.Magic()
614                skinny_magic.unpack (data)
615                self.archs[i].mach.unpack(data, skinny_magic)
616
617        def compare(self, rhs):
618            print 'error: comparing two universal files is not supported yet'
619            return False
620
621        def dump(self, options):
622            if options.dump_header:
623                print
624                print "Universal Mach File: magic = %s, nfat_arch = %u" % (self.magic, self.nfat_arch)
625                print
626            if self.nfat_arch > 0:
627                if options.dump_header:
628                    self.archs[0].dump_header(True, options)
629                    for i in range(self.nfat_arch):
630                        self.archs[i].dump_flat(options)
631                if options.dump_header:
632                    print
633                for i in range(self.nfat_arch):
634                    self.archs[i].mach.dump(options)
635
636        def dump_header(self, dump_description = True, options = None):
637            if dump_description:
638                print self.description()
639            for i in range(self.nfat_arch):
640                self.archs[i].mach.dump_header(True, options)
641                print
642
643        def dump_load_commands(self, dump_description = True, options = None):
644            if dump_description:
645                print self.description()
646            for i in range(self.nfat_arch):
647                self.archs[i].mach.dump_load_commands(True, options)
648                print
649
650        def dump_sections(self, dump_description = True, options = None):
651            if dump_description:
652                print self.description()
653            for i in range(self.nfat_arch):
654                self.archs[i].mach.dump_sections(True, options)
655                print
656
657        def dump_section_contents(self, options):
658            for i in range(self.nfat_arch):
659                self.archs[i].mach.dump_section_contents(options)
660                print
661
662        def dump_symtab(self, dump_description = True, options = None):
663            if dump_description:
664                print self.description()
665            for i in range(self.nfat_arch):
666                self.archs[i].mach.dump_symtab(True, options)
667                print
668
669        def dump_symbol_names_matching_regex(self, regex, file=None):
670            for i in range(self.nfat_arch):
671                self.archs[i].mach.dump_symbol_names_matching_regex(regex, file)
672
673        class ArchInfo:
674
675            def __init__(self):
676                self.arch   = Mach.Arch(0,0)
677                self.offset = 0
678                self.size   = 0
679                self.align  = 0
680                self.mach   = None
681
682            def unpack(self, data):
683                # Universal headers are always in big endian
684                data.set_byte_order('big')
685                self.arch.cpu, self.arch.sub, self.offset, self.size, self.align = data.get_n_uint32(5)
686
687            def dump_header(self, dump_description = True, options = None):
688                if options.verbose:
689                    print "CPU        SUBTYPE    OFFSET     SIZE       ALIGN"
690                    print "---------- ---------- ---------- ---------- ----------"
691                else:
692                    print "ARCH       FILEOFFSET FILESIZE   ALIGN"
693                    print "---------- ---------- ---------- ----------"
694            def dump_flat(self, options):
695                if options.verbose:
696                    print "%#8.8x %#8.8x %#8.8x %#8.8x %#8.8x" % (self.arch.cpu, self.arch.sub, self.offset, self.size, self.align)
697                else:
698                    print "%-10s %#8.8x %#8.8x %#8.8x" % (self.arch, self.offset, self.size, self.align)
699            def dump(self):
700                print "   cputype: %#8.8x" % self.arch.cpu
701                print "cpusubtype: %#8.8x" % self.arch.sub
702                print "    offset: %#8.8x" % self.offset
703                print "      size: %#8.8x" % self.size
704                print "     align: %#8.8x" % self.align
705            def __str__(self):
706                return "Mach.Universal.ArchInfo: %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x" % (self.arch.cpu, self.arch.sub, self.offset, self.size, self.align)
707            def __repr__(self):
708                return "Mach.Universal.ArchInfo: %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x" % (self.arch.cpu, self.arch.sub, self.offset, self.size, self.align)
709
710    class Flags:
711
712        def __init__(self, b):
713            self.bits = b
714
715        def __str__(self):
716            s = ''
717            if self.bits & MH_NOUNDEFS:
718                s += 'MH_NOUNDEFS | '
719            if self.bits & MH_INCRLINK:
720                s += 'MH_INCRLINK | '
721            if self.bits & MH_DYLDLINK:
722                s += 'MH_DYLDLINK | '
723            if self.bits & MH_BINDATLOAD:
724                s += 'MH_BINDATLOAD | '
725            if self.bits & MH_PREBOUND:
726                s += 'MH_PREBOUND | '
727            if self.bits & MH_SPLIT_SEGS:
728                s += 'MH_SPLIT_SEGS | '
729            if self.bits & MH_LAZY_INIT:
730                s += 'MH_LAZY_INIT | '
731            if self.bits & MH_TWOLEVEL:
732                s += 'MH_TWOLEVEL | '
733            if self.bits & MH_FORCE_FLAT:
734                s += 'MH_FORCE_FLAT | '
735            if self.bits & MH_NOMULTIDEFS:
736                s += 'MH_NOMULTIDEFS | '
737            if self.bits & MH_NOFIXPREBINDING:
738                s += 'MH_NOFIXPREBINDING | '
739            if self.bits & MH_PREBINDABLE:
740                s += 'MH_PREBINDABLE | '
741            if self.bits & MH_ALLMODSBOUND:
742                s += 'MH_ALLMODSBOUND | '
743            if self.bits & MH_SUBSECTIONS_VIA_SYMBOLS:
744                s += 'MH_SUBSECTIONS_VIA_SYMBOLS | '
745            if self.bits & MH_CANONICAL:
746                s += 'MH_CANONICAL | '
747            if self.bits & MH_WEAK_DEFINES:
748                s += 'MH_WEAK_DEFINES | '
749            if self.bits & MH_BINDS_TO_WEAK:
750                s += 'MH_BINDS_TO_WEAK | '
751            if self.bits & MH_ALLOW_STACK_EXECUTION:
752                s += 'MH_ALLOW_STACK_EXECUTION | '
753            if self.bits & MH_ROOT_SAFE:
754                s += 'MH_ROOT_SAFE | '
755            if self.bits & MH_SETUID_SAFE:
756                s += 'MH_SETUID_SAFE | '
757            if self.bits & MH_NO_REEXPORTED_DYLIBS:
758                s += 'MH_NO_REEXPORTED_DYLIBS | '
759            if self.bits & MH_PIE:
760                s += 'MH_PIE | '
761            if self.bits & MH_DEAD_STRIPPABLE_DYLIB:
762                s += 'MH_DEAD_STRIPPABLE_DYLIB | '
763            if self.bits & MH_HAS_TLV_DESCRIPTORS:
764                s += 'MH_HAS_TLV_DESCRIPTORS | '
765            if self.bits & MH_NO_HEAP_EXECUTION:
766                s += 'MH_NO_HEAP_EXECUTION | '
767            # Strip the trailing " |" if we have any flags
768            if len(s) > 0:
769                s = s[0:-2]
770            return s
771
772    class FileType(dict_utils.Enum):
773
774        enum = {
775            'MH_OBJECT'         : MH_OBJECT        ,
776            'MH_EXECUTE'        : MH_EXECUTE       ,
777            'MH_FVMLIB'         : MH_FVMLIB        ,
778            'MH_CORE'           : MH_CORE          ,
779            'MH_PRELOAD'        : MH_PRELOAD       ,
780            'MH_DYLIB'          : MH_DYLIB         ,
781            'MH_DYLINKER'       : MH_DYLINKER      ,
782            'MH_BUNDLE'         : MH_BUNDLE        ,
783            'MH_DYLIB_STUB'     : MH_DYLIB_STUB    ,
784            'MH_DSYM'           : MH_DSYM          ,
785            'MH_KEXT_BUNDLE'    : MH_KEXT_BUNDLE
786        }
787
788        def __init__(self, initial_value = 0):
789            dict_utils.Enum.__init__(self, initial_value, self.enum)
790
791    class Skinny:
792
793        def __init__(self, path):
794            self.path       = path
795            self.type       = 'skinny'
796            self.data       = None
797            self.file_off   = 0
798            self.magic      = 0
799            self.arch       = Mach.Arch(0,0)
800            self.filetype   = Mach.FileType(0)
801            self.ncmds      = 0
802            self.sizeofcmds = 0
803            self.flags      = Mach.Flags(0)
804            self.uuid       = None
805            self.commands   = list()
806            self.segments   = list()
807            self.sections   = list()
808            self.symbols    = list()
809            self.sections.append(Mach.Section())
810
811        def description(self):
812            return '%#8.8x: %s (%s)' % (self.file_off, self.path, self.arch)
813
814        def unpack(self, data, magic = None):
815            self.data = data
816            self.file_off = data.tell()
817            if magic is None:
818                self.magic = Mach.Magic()
819                self.magic.unpack(data)
820            else:
821                self.magic = magic
822                self.file_off = self.file_off - 4
823            data.set_byte_order(self.magic.get_byte_order())
824            self.arch.cpu, self.arch.sub, self.filetype.value, self.ncmds, self.sizeofcmds, bits = data.get_n_uint32(6)
825            self.flags.bits = bits
826
827            if self.is_64_bit():
828                data.get_uint32() # Skip reserved word in mach_header_64
829
830            for i in range(0,self.ncmds):
831                lc = self.unpack_load_command (data)
832                self.commands.append (lc)
833
834        def get_data(self):
835            if self.data:
836                self.data.set_byte_order(self.magic.get_byte_order())
837                return self.data
838            return None
839
840        def unpack_load_command (self, data):
841            lc = Mach.LoadCommand()
842            lc.unpack (self, data)
843            lc_command = lc.command.get_enum_value();
844            if (lc_command == LC_SEGMENT or
845                lc_command == LC_SEGMENT_64):
846                lc = Mach.SegmentLoadCommand(lc)
847                lc.unpack(self, data)
848            elif (lc_command == LC_LOAD_DYLIB or
849                  lc_command == LC_ID_DYLIB or
850                  lc_command == LC_LOAD_WEAK_DYLIB or
851                  lc_command == LC_REEXPORT_DYLIB):
852                lc = Mach.DylibLoadCommand(lc)
853                lc.unpack(self, data)
854            elif (lc_command == LC_LOAD_DYLINKER or
855                  lc_command == LC_SUB_FRAMEWORK or
856                  lc_command == LC_SUB_CLIENT or
857                  lc_command == LC_SUB_UMBRELLA or
858                  lc_command == LC_SUB_LIBRARY or
859                  lc_command == LC_ID_DYLINKER or
860                  lc_command == LC_RPATH):
861                lc = Mach.LoadDYLDLoadCommand(lc)
862                lc.unpack(self, data)
863            elif (lc_command == LC_DYLD_INFO_ONLY):
864                lc = Mach.DYLDInfoOnlyLoadCommand(lc)
865                lc.unpack(self, data)
866            elif (lc_command == LC_SYMTAB):
867                lc = Mach.SymtabLoadCommand(lc)
868                lc.unpack(self, data)
869            elif (lc_command == LC_DYSYMTAB):
870                lc = Mach.DYLDSymtabLoadCommand(lc)
871                lc.unpack(self, data)
872            elif (lc_command == LC_UUID):
873                lc = Mach.UUIDLoadCommand(lc)
874                lc.unpack(self, data)
875            elif (lc_command == LC_CODE_SIGNATURE or
876                  lc_command == LC_SEGMENT_SPLIT_INFO or
877                  lc_command == LC_FUNCTION_STARTS):
878                lc = Mach.DataBlobLoadCommand(lc)
879                lc.unpack(self, data)
880            elif (lc_command == LC_UNIXTHREAD):
881                lc = Mach.UnixThreadLoadCommand(lc)
882                lc.unpack(self, data)
883            elif (lc_command == LC_ENCRYPTION_INFO):
884                lc = Mach.EncryptionInfoLoadCommand(lc)
885                lc.unpack(self, data)
886            lc.skip(data)
887            return lc
888
889        def compare(self, rhs):
890            print "\nComparing:"
891            print "a) %s %s" % (self.arch, self.path)
892            print "b) %s %s" % (rhs.arch, rhs.path)
893            result = True
894            if self.type == rhs.type:
895                for lhs_section in self.sections[1:]:
896                    rhs_section = rhs.get_section_by_section(lhs_section)
897                    if rhs_section:
898                        print 'comparing %s.%s...' % (lhs_section.segname, lhs_section.sectname),
899                        sys.stdout.flush()
900                        lhs_data = lhs_section.get_contents (self)
901                        rhs_data = rhs_section.get_contents (rhs)
902                        if lhs_data and rhs_data:
903                            if lhs_data == rhs_data:
904                                print 'ok'
905                            else:
906                                lhs_data_len = len(lhs_data)
907                                rhs_data_len = len(rhs_data)
908                                # if lhs_data_len < rhs_data_len:
909                                #     if lhs_data == rhs_data[0:lhs_data_len]:
910                                #         print 'section data for %s matches the first %u bytes' % (lhs_section.sectname, lhs_data_len)
911                                #     else:
912                                #         # TODO: check padding
913                                #         result = False
914                                # elif lhs_data_len > rhs_data_len:
915                                #     if lhs_data[0:rhs_data_len] == rhs_data:
916                                #         print 'section data for %s matches the first %u bytes' % (lhs_section.sectname, lhs_data_len)
917                                #     else:
918                                #         # TODO: check padding
919                                #         result = False
920                                # else:
921                                result = False
922                                print 'error: sections differ'
923                                #print 'a) %s' % (lhs_section)
924                                # dump_hex_byte_string_diff(0, lhs_data, rhs_data)
925                                #print 'b) %s' % (rhs_section)
926                                # dump_hex_byte_string_diff(0, rhs_data, lhs_data)
927                        elif lhs_data and not rhs_data:
928                            print 'error: section data missing from b:'
929                            print 'a) %s' % (lhs_section)
930                            print 'b) %s' % (rhs_section)
931                            result = False
932                        elif not lhs_data and rhs_data:
933                            print 'error: section data missing from a:'
934                            print 'a) %s' % (lhs_section)
935                            print 'b) %s' % (rhs_section)
936                            result = False
937                        elif lhs_section.offset or rhs_section.offset:
938                            print 'error: section data missing for both a and b:'
939                            print 'a) %s' % (lhs_section)
940                            print 'b) %s' % (rhs_section)
941                            result = False
942                        else:
943                            print 'ok'
944                    else:
945                        result = False
946                        print 'error: section %s is missing in %s' % (lhs_section.sectname, rhs.path)
947            else:
948                print 'error: comaparing a %s mach-o file with a %s mach-o file is not supported' % (self.type, rhs.type)
949                result = False
950            if not result:
951                print 'error: mach files differ'
952            return result
953        def dump_header(self, dump_description = True, options = None):
954            if options.verbose:
955                print "MAGIC      CPU        SUBTYPE    FILETYPE   NUM CMDS SIZE CMDS  FLAGS"
956                print "---------- ---------- ---------- ---------- -------- ---------- ----------"
957            else:
958                print "MAGIC        ARCH       FILETYPE       NUM CMDS SIZE CMDS  FLAGS"
959                print "------------ ---------- -------------- -------- ---------- ----------"
960
961        def dump_flat(self, options):
962            if options.verbose:
963                print "%#8.8x %#8.8x %#8.8x %#8.8x %#8u %#8.8x %#8.8x" % (self.magic, self.arch.cpu , self.arch.sub, self.filetype.value, self.ncmds, self.sizeofcmds, self.flags.bits)
964            else:
965                print "%-12s %-10s %-14s %#8u %#8.8x %s" % (self.magic, self.arch, self.filetype, self.ncmds, self.sizeofcmds, self.flags)
966
967        def dump(self, options):
968            if options.dump_header:
969                self.dump_header(True, options)
970            if options.dump_load_commands:
971                self.dump_load_commands(False, options)
972            if options.dump_sections:
973                self.dump_sections(False, options)
974            if options.section_names:
975                self.dump_section_contents(options)
976            if options.dump_symtab:
977                self.get_symtab()
978                if len(self.symbols):
979                    self.dump_sections(False, options)
980                else:
981                    print "No symbols"
982            if options.find_mangled:
983                self.dump_symbol_names_matching_regex (re.compile('^_?_Z'))
984
985        def dump_header(self, dump_description = True, options = None):
986            if dump_description:
987                print self.description()
988            print "Mach Header"
989            print "       magic: %#8.8x %s" % (self.magic.value, self.magic)
990            print "     cputype: %#8.8x %s" % (self.arch.cpu, self.arch)
991            print "  cpusubtype: %#8.8x" % self.arch.sub
992            print "    filetype: %#8.8x %s" % (self.filetype.get_enum_value(), self.filetype.get_enum_name())
993            print "       ncmds: %#8.8x %u" % (self.ncmds, self.ncmds)
994            print "  sizeofcmds: %#8.8x" % self.sizeofcmds
995            print "       flags: %#8.8x %s" % (self.flags.bits, self.flags)
996
997        def dump_load_commands(self, dump_description = True, options = None):
998            if dump_description:
999                print self.description()
1000            for lc in self.commands:
1001                print lc
1002
1003        def get_section_by_name (self, name):
1004            for section in self.sections:
1005                if section.sectname and section.sectname == name:
1006                    return section
1007            return None
1008
1009        def get_section_by_section (self, other_section):
1010            for section in self.sections:
1011                if section.sectname == other_section.sectname and section.segname == other_section.segname:
1012                    return section
1013            return None
1014
1015        def dump_sections(self, dump_description = True, options = None):
1016            if dump_description:
1017                print self.description()
1018            num_sections = len(self.sections)
1019            if num_sections > 1:
1020                self.sections[1].dump_header()
1021                for sect_idx in range(1,num_sections):
1022                    print "%s" % self.sections[sect_idx]
1023
1024        def dump_section_contents(self, options):
1025            saved_section_to_disk = False
1026            for sectname in options.section_names:
1027                section = self.get_section_by_name(sectname)
1028                if section:
1029                    sect_bytes = section.get_contents (self)
1030                    if options.outfile:
1031                        if not saved_section_to_disk:
1032                            outfile = open(options.outfile, 'w')
1033                            if options.extract_modules:
1034                                #print "Extracting modules from mach file..."
1035                                data = file_extract.FileExtract(StringIO.StringIO(sect_bytes), self.data.byte_order)
1036                                version = data.get_uint32()
1037                                num_modules = data.get_uint32()
1038                                #print "version = %u, num_modules = %u" % (version, num_modules)
1039                                for i in range(num_modules):
1040                                    data_offset = data.get_uint64()
1041                                    data_size = data.get_uint64()
1042                                    name_offset = data.get_uint32()
1043                                    language = data.get_uint32()
1044                                    flags = data.get_uint32()
1045                                    data.seek (name_offset)
1046                                    module_name = data.get_c_string()
1047                                    #print "module[%u] data_offset = %#16.16x, data_size = %#16.16x, name_offset = %#16.16x (%s), language = %u, flags = %#x" % (i, data_offset, data_size, name_offset, module_name, language, flags)
1048                                    data.seek (data_offset)
1049                                    outfile.write(data.read_size (data_size))
1050                            else:
1051                                print "Saving section %s to '%s'" % (sectname, options.outfile)
1052                                outfile.write(sect_bytes)
1053                            outfile.close()
1054                            saved_section_to_disk = True
1055                        else:
1056                            print "error: you can only save a single section to disk at a time, skipping section '%s'" % (sectname)
1057                    else:
1058                        print 'section %s:\n' % (sectname)
1059                        section.dump_header()
1060                        print '%s\n' % (section)
1061                        dump_memory (0, sect_bytes, options.max_count, 16)
1062                else:
1063                    print 'error: no section named "%s" was found' % (sectname)
1064
1065        def get_segment(self, segname):
1066            if len(self.segments) == 1 and self.segments[0].segname == '':
1067                return self.segments[0]
1068            for segment in self.segments:
1069                if segment.segname == segname:
1070                    return segment
1071            return None
1072
1073        def get_first_load_command(self, lc_enum_value):
1074            for lc in self.commands:
1075                if lc.command.value == lc_enum_value:
1076                    return lc
1077            return None
1078
1079        def get_symtab(self):
1080            if self.data and not self.symbols:
1081                lc_symtab = self.get_first_load_command (LC_SYMTAB)
1082                if lc_symtab:
1083                    symtab_offset = self.file_off
1084                    if self.data.is_in_memory():
1085                        linkedit_segment = self.get_segment('__LINKEDIT')
1086                        if linkedit_segment:
1087                            linkedit_vmaddr = linkedit_segment.vmaddr
1088                            linkedit_fileoff = linkedit_segment.fileoff
1089                            symtab_offset = linkedit_vmaddr + lc_symtab.symoff - linkedit_fileoff
1090                            symtab_offset = linkedit_vmaddr + lc_symtab.stroff - linkedit_fileoff
1091                    else:
1092                        symtab_offset += lc_symtab.symoff
1093
1094                    self.data.seek (symtab_offset)
1095                    is_64 = self.is_64_bit()
1096                    for i in range(lc_symtab.nsyms):
1097                        nlist = Mach.NList()
1098                        nlist.unpack (self, self.data, lc_symtab)
1099                        self.symbols.append(nlist)
1100                else:
1101                    print "no LC_SYMTAB"
1102
1103        def dump_symtab(self, dump_description = True, options = None):
1104            self.get_symtab()
1105            if dump_description:
1106                print self.description()
1107            for i, symbol in enumerate(self.symbols):
1108                print '[%5u] %s' % (i, symbol)
1109
1110        def dump_symbol_names_matching_regex(self, regex, file=None):
1111            self.get_symtab()
1112            for symbol in self.symbols:
1113                if symbol.name and regex.search (symbol.name):
1114                    print symbol.name
1115                    if file:
1116                        file.write('%s\n' % (symbol.name))
1117
1118        def is_64_bit(self):
1119            return self.magic.is_64_bit()
1120
1121    class LoadCommand:
1122        class Command(dict_utils.Enum):
1123            enum = {
1124                'LC_SEGMENT'                : LC_SEGMENT,
1125                'LC_SYMTAB'                 : LC_SYMTAB,
1126                'LC_SYMSEG'                 : LC_SYMSEG,
1127                'LC_THREAD'                 : LC_THREAD,
1128                'LC_UNIXTHREAD'             : LC_UNIXTHREAD,
1129                'LC_LOADFVMLIB'             : LC_LOADFVMLIB,
1130                'LC_IDFVMLIB'               : LC_IDFVMLIB,
1131                'LC_IDENT'                  : LC_IDENT,
1132                'LC_FVMFILE'                : LC_FVMFILE,
1133                'LC_PREPAGE'                : LC_PREPAGE,
1134                'LC_DYSYMTAB'               : LC_DYSYMTAB,
1135                'LC_LOAD_DYLIB'             : LC_LOAD_DYLIB,
1136                'LC_ID_DYLIB'               : LC_ID_DYLIB,
1137                'LC_LOAD_DYLINKER'          : LC_LOAD_DYLINKER,
1138                'LC_ID_DYLINKER'            : LC_ID_DYLINKER,
1139                'LC_PREBOUND_DYLIB'         : LC_PREBOUND_DYLIB,
1140                'LC_ROUTINES'               : LC_ROUTINES,
1141                'LC_SUB_FRAMEWORK'          : LC_SUB_FRAMEWORK,
1142                'LC_SUB_UMBRELLA'           : LC_SUB_UMBRELLA,
1143                'LC_SUB_CLIENT'             : LC_SUB_CLIENT,
1144                'LC_SUB_LIBRARY'            : LC_SUB_LIBRARY,
1145                'LC_TWOLEVEL_HINTS'         : LC_TWOLEVEL_HINTS,
1146                'LC_PREBIND_CKSUM'          : LC_PREBIND_CKSUM,
1147                'LC_LOAD_WEAK_DYLIB'        : LC_LOAD_WEAK_DYLIB,
1148                'LC_SEGMENT_64'             : LC_SEGMENT_64,
1149                'LC_ROUTINES_64'            : LC_ROUTINES_64,
1150                'LC_UUID'                   : LC_UUID,
1151                'LC_RPATH'                  : LC_RPATH,
1152                'LC_CODE_SIGNATURE'         : LC_CODE_SIGNATURE,
1153                'LC_SEGMENT_SPLIT_INFO'     : LC_SEGMENT_SPLIT_INFO,
1154                'LC_REEXPORT_DYLIB'         : LC_REEXPORT_DYLIB,
1155                'LC_LAZY_LOAD_DYLIB'        : LC_LAZY_LOAD_DYLIB,
1156                'LC_ENCRYPTION_INFO'        : LC_ENCRYPTION_INFO,
1157                'LC_DYLD_INFO'              : LC_DYLD_INFO,
1158                'LC_DYLD_INFO_ONLY'         : LC_DYLD_INFO_ONLY,
1159                'LC_LOAD_UPWARD_DYLIB'      : LC_LOAD_UPWARD_DYLIB,
1160                'LC_VERSION_MIN_MACOSX'     : LC_VERSION_MIN_MACOSX,
1161                'LC_VERSION_MIN_IPHONEOS'   : LC_VERSION_MIN_IPHONEOS,
1162                'LC_FUNCTION_STARTS'        : LC_FUNCTION_STARTS,
1163                'LC_DYLD_ENVIRONMENT'       : LC_DYLD_ENVIRONMENT
1164            }
1165
1166            def __init__(self, initial_value = 0):
1167                dict_utils.Enum.__init__(self, initial_value, self.enum)
1168
1169
1170        def __init__(self, c=None, l=0,o=0):
1171            if c != None:
1172                self.command = c
1173            else:
1174                self.command = Mach.LoadCommand.Command(0)
1175            self.length = l
1176            self.file_off = o
1177
1178        def unpack(self, mach_file, data):
1179            self.file_off = data.tell()
1180            self.command.value, self.length = data.get_n_uint32(2)
1181
1182        def skip(self, data):
1183            data.seek (self.file_off + self.length, 0)
1184
1185        def __str__(self):
1186            lc_name = self.command.get_enum_name()
1187            return '%#8.8x: <%#4.4x> %-24s' % (self.file_off, self.length, lc_name)
1188
1189    class Section:
1190
1191        def __init__(self):
1192            self.index = 0
1193            self.is_64 = False
1194            self.sectname = None
1195            self.segname = None
1196            self.addr = 0
1197            self.size = 0
1198            self.offset = 0
1199            self.align = 0
1200            self.reloff = 0
1201            self.nreloc = 0
1202            self.flags = 0
1203            self.reserved1 = 0
1204            self.reserved2 = 0
1205            self.reserved3 = 0
1206
1207        def unpack(self, is_64, data):
1208            self.is_64 = is_64
1209            self.sectname = data.get_fixed_length_c_string (16, '', True)
1210            self.segname = data.get_fixed_length_c_string (16, '', True)
1211            if self.is_64:
1212                self.addr, self.size = data.get_n_uint64(2)
1213                self.offset, self.align, self.reloff, self.nreloc, self.flags, self.reserved1, self.reserved2, self.reserved3 = data.get_n_uint32(8)
1214            else:
1215                self.addr, self.size = data.get_n_uint32(2)
1216                self.offset, self.align, self.reloff, self.nreloc, self.flags, self.reserved1, self.reserved2 = data.get_n_uint32(7)
1217
1218        def dump_header(self):
1219            if self.is_64:
1220                print "INDEX ADDRESS            SIZE               OFFSET     ALIGN      RELOFF     NRELOC     FLAGS      RESERVED1  RESERVED2  RESERVED3  NAME";
1221                print "===== ------------------ ------------------ ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------------------";
1222            else:
1223                print "INDEX ADDRESS    SIZE       OFFSET     ALIGN      RELOFF     NRELOC     FLAGS      RESERVED1  RESERVED2  NAME";
1224                print "===== ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------------------";
1225
1226        def __str__(self):
1227            if self.is_64:
1228                return "[%3u] %#16.16x %#16.16x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %s.%s" % (self.index, self.addr, self.size, self.offset, self.align, self.reloff, self.nreloc, self.flags, self.reserved1, self.reserved2, self.reserved3, self.segname, self.sectname)
1229            else:
1230                return "[%3u] %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %s.%s" % (self.index, self.addr, self.size, self.offset, self.align, self.reloff, self.nreloc, self.flags, self.reserved1, self.reserved2, self.segname, self.sectname)
1231
1232        def get_contents(self, mach_file):
1233            '''Get the section contents as a python string'''
1234            if self.size > 0 and mach_file.get_segment(self.segname).filesize > 0:
1235                data = mach_file.get_data()
1236                if data:
1237                    section_data_offset = mach_file.file_off + self.offset
1238                    #print '%s.%s is at offset 0x%x with size 0x%x' % (self.segname, self.sectname, section_data_offset, self.size)
1239                    data.push_offset_and_seek (section_data_offset)
1240                    bytes = data.read_size(self.size)
1241                    data.pop_offset_and_seek()
1242                    return bytes
1243            return None
1244
1245    class DylibLoadCommand(LoadCommand):
1246        def __init__(self, lc):
1247            Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1248            self.name = None
1249            self.timestamp = 0
1250            self.current_version = 0
1251            self.compatibility_version = 0
1252
1253        def unpack(self, mach_file, data):
1254            byte_order_char = mach_file.magic.get_byte_order()
1255            name_offset, self.timestamp, self.current_version, self.compatibility_version = data.get_n_uint32(4)
1256            data.seek(self.file_off + name_offset, 0)
1257            self.name = data.get_fixed_length_c_string(self.length - 24)
1258
1259        def __str__(self):
1260            s = Mach.LoadCommand.__str__(self);
1261            s += "%#8.8x %#8.8x %#8.8x " % (self.timestamp, self.current_version, self.compatibility_version)
1262            s += self.name
1263            return s
1264
1265    class LoadDYLDLoadCommand(LoadCommand):
1266        def __init__(self, lc):
1267            Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1268            self.name = None
1269
1270        def unpack(self, mach_file, data):
1271            data.get_uint32()
1272            self.name = data.get_fixed_length_c_string(self.length - 12)
1273
1274        def __str__(self):
1275            s = Mach.LoadCommand.__str__(self);
1276            s += "%s" % self.name
1277            return s
1278
1279    class UnixThreadLoadCommand(LoadCommand):
1280        class ThreadState:
1281            def __init__(self):
1282                self.flavor = 0
1283                self.count = 0
1284                self.register_values = list()
1285
1286            def unpack(self, data):
1287                self.flavor, self.count = data.get_n_uint32(2)
1288                self.register_values = data.get_n_uint32(self.count)
1289
1290            def __str__(self):
1291                s = "flavor = %u, count = %u, regs =" % (self.flavor, self.count)
1292                i = 0
1293                for register_value in self.register_values:
1294                    if i % 8 == 0:
1295                        s += "\n                                            "
1296                    s += " %#8.8x" % register_value
1297                    i += 1
1298                return s
1299
1300        def __init__(self, lc):
1301            Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1302            self.reg_sets = list()
1303
1304        def unpack(self, mach_file, data):
1305            reg_set = Mach.UnixThreadLoadCommand.ThreadState()
1306            reg_set.unpack (data)
1307            self.reg_sets.append(reg_set)
1308
1309        def __str__(self):
1310            s = Mach.LoadCommand.__str__(self);
1311            for reg_set in self.reg_sets:
1312                s += "%s" % reg_set
1313            return s
1314
1315    class DYLDInfoOnlyLoadCommand(LoadCommand):
1316        def __init__(self, lc):
1317            Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1318            self.rebase_off = 0
1319            self.rebase_size = 0
1320            self.bind_off = 0
1321            self.bind_size = 0
1322            self.weak_bind_off = 0
1323            self.weak_bind_size = 0
1324            self.lazy_bind_off = 0
1325            self.lazy_bind_size = 0
1326            self.export_off = 0
1327            self.export_size = 0
1328
1329        def unpack(self, mach_file, data):
1330            byte_order_char = mach_file.magic.get_byte_order()
1331            self.rebase_off, self.rebase_size, self.bind_off, self.bind_size, self.weak_bind_off, self.weak_bind_size, self.lazy_bind_off, self.lazy_bind_size, self.export_off, self.export_size = data.get_n_uint32(10)
1332
1333        def __str__(self):
1334            s = Mach.LoadCommand.__str__(self);
1335            s += "rebase_off = %#8.8x, rebase_size = %u, " % (self.rebase_off, self.rebase_size)
1336            s += "bind_off = %#8.8x, bind_size = %u, " % (self.bind_off, self.bind_size)
1337            s += "weak_bind_off = %#8.8x, weak_bind_size = %u, " % (self.weak_bind_off, self.weak_bind_size)
1338            s += "lazy_bind_off = %#8.8x, lazy_bind_size = %u, " % (self.lazy_bind_off, self.lazy_bind_size)
1339            s += "export_off = %#8.8x, export_size = %u, " % (self.export_off, self.export_size)
1340            return s
1341
1342    class DYLDSymtabLoadCommand(LoadCommand):
1343        def __init__(self, lc):
1344            Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1345            self.ilocalsym = 0
1346            self.nlocalsym = 0
1347            self.iextdefsym = 0
1348            self.nextdefsym = 0
1349            self.iundefsym = 0
1350            self.nundefsym = 0
1351            self.tocoff = 0
1352            self.ntoc = 0
1353            self.modtaboff = 0
1354            self.nmodtab = 0
1355            self.extrefsymoff = 0
1356            self.nextrefsyms = 0
1357            self.indirectsymoff = 0
1358            self.nindirectsyms = 0
1359            self.extreloff = 0
1360            self.nextrel = 0
1361            self.locreloff = 0
1362            self.nlocrel = 0
1363
1364        def unpack(self, mach_file, data):
1365            byte_order_char = mach_file.magic.get_byte_order()
1366            self.ilocalsym, self.nlocalsym, self.iextdefsym, self.nextdefsym, self.iundefsym, self.nundefsym, self.tocoff, self.ntoc, self.modtaboff, self.nmodtab, self.extrefsymoff, self.nextrefsyms, self.indirectsymoff, self.nindirectsyms, self.extreloff, self.nextrel, self.locreloff, self.nlocrel = data.get_n_uint32(18)
1367
1368        def __str__(self):
1369            s = Mach.LoadCommand.__str__(self);
1370            # s += "ilocalsym = %u, nlocalsym = %u, " % (self.ilocalsym, self.nlocalsym)
1371            # s += "iextdefsym = %u, nextdefsym = %u, " % (self.iextdefsym, self.nextdefsym)
1372            # s += "iundefsym %u, nundefsym = %u, " % (self.iundefsym, self.nundefsym)
1373            # s += "tocoff = %#8.8x, ntoc = %u, " % (self.tocoff, self.ntoc)
1374            # s += "modtaboff = %#8.8x, nmodtab = %u, " % (self.modtaboff, self.nmodtab)
1375            # s += "extrefsymoff = %#8.8x, nextrefsyms = %u, " % (self.extrefsymoff, self.nextrefsyms)
1376            # s += "indirectsymoff = %#8.8x, nindirectsyms = %u, " % (self.indirectsymoff, self.nindirectsyms)
1377            # s += "extreloff = %#8.8x, nextrel = %u, " % (self.extreloff, self.nextrel)
1378            # s += "locreloff = %#8.8x, nlocrel = %u" % (self.locreloff, self.nlocrel)
1379            s += "ilocalsym      = %-10u, nlocalsym     = %u\n" % (self.ilocalsym, self.nlocalsym)
1380            s += "                                             iextdefsym     = %-10u, nextdefsym    = %u\n" % (self.iextdefsym, self.nextdefsym)
1381            s += "                                             iundefsym      = %-10u, nundefsym     = %u\n" % (self.iundefsym, self.nundefsym)
1382            s += "                                             tocoff         = %#8.8x, ntoc          = %u\n" % (self.tocoff, self.ntoc)
1383            s += "                                             modtaboff      = %#8.8x, nmodtab       = %u\n" % (self.modtaboff, self.nmodtab)
1384            s += "                                             extrefsymoff   = %#8.8x, nextrefsyms   = %u\n" % (self.extrefsymoff, self.nextrefsyms)
1385            s += "                                             indirectsymoff = %#8.8x, nindirectsyms = %u\n" % (self.indirectsymoff, self.nindirectsyms)
1386            s += "                                             extreloff      = %#8.8x, nextrel       = %u\n" % (self.extreloff, self.nextrel)
1387            s += "                                             locreloff      = %#8.8x, nlocrel       = %u" % (self.locreloff, self.nlocrel)
1388            return s
1389
1390    class SymtabLoadCommand(LoadCommand):
1391        def __init__(self, lc):
1392            Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1393            self.symoff = 0
1394            self.nsyms = 0
1395            self.stroff = 0
1396            self.strsize = 0
1397
1398        def unpack(self, mach_file, data):
1399            byte_order_char = mach_file.magic.get_byte_order()
1400            self.symoff, self.nsyms, self.stroff, self.strsize = data.get_n_uint32(4)
1401
1402        def __str__(self):
1403            s = Mach.LoadCommand.__str__(self);
1404            s += "symoff = %#8.8x, nsyms = %u, stroff = %#8.8x, strsize = %u" % (self.symoff, self.nsyms, self.stroff, self.strsize)
1405            return s
1406
1407
1408    class UUIDLoadCommand(LoadCommand):
1409        def __init__(self, lc):
1410            Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1411            self.uuid = None
1412
1413        def unpack(self, mach_file, data):
1414            uuid_data = data.get_n_uint8(16)
1415            uuid_str = ''
1416            for byte in uuid_data:
1417                uuid_str += '%2.2x' % byte
1418            self.uuid = uuid.UUID(uuid_str)
1419            mach_file.uuid = self.uuid
1420
1421        def __str__(self):
1422            s = Mach.LoadCommand.__str__(self);
1423            s += self.uuid.__str__()
1424            return s
1425
1426    class DataBlobLoadCommand(LoadCommand):
1427        def __init__(self, lc):
1428            Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1429            self.dataoff = 0
1430            self.datasize = 0
1431
1432        def unpack(self, mach_file, data):
1433            byte_order_char = mach_file.magic.get_byte_order()
1434            self.dataoff, self.datasize = data.get_n_uint32(2)
1435
1436        def __str__(self):
1437            s = Mach.LoadCommand.__str__(self);
1438            s += "dataoff = %#8.8x, datasize = %u" % (self.dataoff, self.datasize)
1439            return s
1440
1441    class EncryptionInfoLoadCommand(LoadCommand):
1442        def __init__(self, lc):
1443            Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1444            self.cryptoff = 0
1445            self.cryptsize = 0
1446            self.cryptid = 0
1447
1448        def unpack(self, mach_file, data):
1449            byte_order_char = mach_file.magic.get_byte_order()
1450            self.cryptoff, self.cryptsize, self.cryptid = data.get_n_uint32(3)
1451
1452        def __str__(self):
1453            s = Mach.LoadCommand.__str__(self);
1454            s += "file-range = [%#8.8x - %#8.8x), cryptsize = %u, cryptid = %u" % (self.cryptoff, self.cryptoff + self.cryptsize, self.cryptsize, self.cryptid)
1455            return s
1456
1457    class SegmentLoadCommand(LoadCommand):
1458
1459        def __init__(self, lc):
1460            Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1461            self.segname = None
1462            self.vmaddr = 0
1463            self.vmsize = 0
1464            self.fileoff = 0
1465            self.filesize = 0
1466            self.maxprot = 0
1467            self.initprot = 0
1468            self.nsects = 0
1469            self.flags = 0
1470
1471        def unpack(self, mach_file, data):
1472            is_64 = self.command.get_enum_value() == LC_SEGMENT_64;
1473            self.segname = data.get_fixed_length_c_string (16, '', True)
1474            if is_64:
1475                self.vmaddr, self.vmsize, self.fileoff, self.filesize = data.get_n_uint64(4)
1476            else:
1477                self.vmaddr, self.vmsize, self.fileoff, self.filesize = data.get_n_uint32(4)
1478            self.maxprot, self.initprot, self.nsects, self.flags = data.get_n_uint32(4)
1479            mach_file.segments.append(self)
1480            for i in range(self.nsects):
1481                section = Mach.Section()
1482                section.unpack(is_64, data)
1483                section.index = len (mach_file.sections)
1484                mach_file.sections.append(section)
1485
1486
1487        def __str__(self):
1488            s = Mach.LoadCommand.__str__(self);
1489            if self.command.get_enum_value() == LC_SEGMENT:
1490                s += "%#8.8x %#8.8x %#8.8x %#8.8x " % (self.vmaddr, self.vmsize, self.fileoff, self.filesize)
1491            else:
1492                s += "%#16.16x %#16.16x %#16.16x %#16.16x " % (self.vmaddr, self.vmsize, self.fileoff, self.filesize)
1493            s += "%s %s %3u %#8.8x" % (vm_prot_names[self.maxprot], vm_prot_names[self.initprot], self.nsects, self.flags)
1494            s += ' ' + self.segname
1495            return s
1496
1497    class NList:
1498        class Type:
1499            class Stab(dict_utils.Enum):
1500                enum = {
1501                    'N_GSYM'    : N_GSYM    ,
1502                    'N_FNAME'   : N_FNAME   ,
1503                    'N_FUN'     : N_FUN     ,
1504                    'N_STSYM'   : N_STSYM   ,
1505                    'N_LCSYM'   : N_LCSYM   ,
1506                    'N_BNSYM'   : N_BNSYM   ,
1507                    'N_OPT'     : N_OPT     ,
1508                    'N_RSYM'    : N_RSYM    ,
1509                    'N_SLINE'   : N_SLINE   ,
1510                    'N_ENSYM'   : N_ENSYM   ,
1511                    'N_SSYM'    : N_SSYM    ,
1512                    'N_SO'      : N_SO      ,
1513                    'N_OSO'     : N_OSO     ,
1514                    'N_LSYM'    : N_LSYM    ,
1515                    'N_BINCL'   : N_BINCL   ,
1516                    'N_SOL'     : N_SOL     ,
1517                    'N_PARAMS'  : N_PARAMS  ,
1518                    'N_VERSION' : N_VERSION ,
1519                    'N_OLEVEL'  : N_OLEVEL  ,
1520                    'N_PSYM'    : N_PSYM    ,
1521                    'N_EINCL'   : N_EINCL   ,
1522                    'N_ENTRY'   : N_ENTRY   ,
1523                    'N_LBRAC'   : N_LBRAC   ,
1524                    'N_EXCL'    : N_EXCL    ,
1525                    'N_RBRAC'   : N_RBRAC   ,
1526                    'N_BCOMM'   : N_BCOMM   ,
1527                    'N_ECOMM'   : N_ECOMM   ,
1528                    'N_ECOML'   : N_ECOML   ,
1529                    'N_LENG'    : N_LENG
1530                }
1531
1532                def __init__(self, magic = 0):
1533                    dict_utils.Enum.__init__(self, magic, self.enum)
1534
1535            def __init__(self, t = 0):
1536                self.value = t
1537
1538            def __str__(self):
1539                n_type = self.value
1540                if n_type & N_STAB:
1541                    stab = Mach.NList.Type.Stab(self.value)
1542                    return '%s' % stab
1543                else:
1544                    type = self.value & N_TYPE
1545                    type_str = ''
1546                    if type == N_UNDF:
1547                        type_str = 'N_UNDF'
1548                    elif type == N_ABS:
1549                        type_str = 'N_ABS '
1550                    elif type == N_SECT:
1551                        type_str = 'N_SECT'
1552                    elif type == N_PBUD:
1553                        type_str = 'N_PBUD'
1554                    elif type == N_INDR:
1555                        type_str = 'N_INDR'
1556                    else:
1557                        type_str = "??? (%#2.2x)" % type
1558                    if n_type & N_PEXT:
1559                        type_str += ' | PEXT'
1560                    if n_type & N_EXT:
1561                        type_str += ' | EXT '
1562                    return type_str
1563
1564
1565        def __init__(self):
1566            self.index = 0
1567            self.name_offset = 0
1568            self.name = 0
1569            self.type = Mach.NList.Type()
1570            self.sect_idx = 0
1571            self.desc = 0
1572            self.value = 0
1573
1574        def unpack(self, mach_file, data, symtab_lc):
1575            self.index = len(mach_file.symbols)
1576            self.name_offset = data.get_uint32()
1577            self.type.value, self.sect_idx = data.get_n_uint8(2)
1578            self.desc = data.get_uint16()
1579            if mach_file.is_64_bit():
1580                self.value = data.get_uint64()
1581            else:
1582                self.value = data.get_uint32()
1583            data.push_offset_and_seek (mach_file.file_off + symtab_lc.stroff + self.name_offset)
1584            #print "get string for symbol[%u]" % self.index
1585            self.name = data.get_c_string()
1586            data.pop_offset_and_seek()
1587
1588        def __str__(self):
1589            name_display = ''
1590            if len(self.name):
1591                name_display = ' "%s"' % self.name
1592            return '%#8.8x %#2.2x (%-20s) %#2.2x %#4.4x %16.16x%s' % (self.name_offset, self.type.value, self.type, self.sect_idx, self.desc, self.value, name_display)
1593
1594
1595    class Interactive(cmd.Cmd):
1596        '''Interactive command interpreter to mach-o files.'''
1597
1598        def __init__(self, mach, options):
1599            cmd.Cmd.__init__(self)
1600            self.intro = 'Interactive mach-o command interpreter'
1601            self.prompt = 'mach-o: %s %% ' % mach.path
1602            self.mach = mach
1603            self.options = options
1604
1605        def default(self, line):
1606            '''Catch all for unknown command, which will exit the interpreter.'''
1607            print "uknown command: %s" % line
1608            return True
1609
1610        def do_q(self, line):
1611            '''Quit command'''
1612            return True
1613
1614        def do_quit(self, line):
1615            '''Quit command'''
1616            return True
1617
1618        def do_header(self, line):
1619            '''Dump mach-o file headers'''
1620            self.mach.dump_header(True, self.options)
1621            return False
1622
1623        def do_load(self, line):
1624            '''Dump all mach-o load commands'''
1625            self.mach.dump_load_commands(True, self.options)
1626            return False
1627
1628        def do_sections(self, line):
1629            '''Dump all mach-o sections'''
1630            self.mach.dump_sections(True, self.options)
1631            return False
1632
1633        def do_symtab(self, line):
1634            '''Dump all mach-o symbols in the symbol table'''
1635            self.mach.dump_symtab(True, self.options)
1636            return False
1637
1638if __name__ == '__main__':
1639    parser = optparse.OptionParser(description='A script that parses skinny and universal mach-o files.')
1640    parser.add_option('--arch', '-a', type='string', metavar='arch', dest='archs', action='append', help='specify one or more architectures by name')
1641    parser.add_option('-v', '--verbose', action='store_true', dest='verbose', help='display verbose debug info', default=False)
1642    parser.add_option('-H', '--header', action='store_true', dest='dump_header', help='dump the mach-o file header', default=False)
1643    parser.add_option('-l', '--load-commands', action='store_true', dest='dump_load_commands', help='dump the mach-o load commands', default=False)
1644    parser.add_option('-s', '--symtab', action='store_true', dest='dump_symtab', help='dump the mach-o symbol table', default=False)
1645    parser.add_option('-S', '--sections', action='store_true', dest='dump_sections', help='dump the mach-o sections', default=False)
1646    parser.add_option('--section', type='string', metavar='sectname', dest='section_names', action='append', help='Specify one or more section names to dump', default=[])
1647    parser.add_option('-o', '--out', type='string', dest='outfile', help='Used in conjunction with the --section=NAME option to save a single section\'s data to disk.', default=False)
1648    parser.add_option('-i', '--interactive', action='store_true', dest='interactive', help='enable interactive mode', default=False)
1649    parser.add_option('-m', '--mangled', action='store_true', dest='find_mangled', help='dump all mangled names in a mach file', default=False)
1650    parser.add_option('-c', '--compare', action='store_true', dest='compare', help='compare two mach files', default=False)
1651    parser.add_option('-M', '--extract-modules', action='store_true', dest='extract_modules', help='Extract modules from file', default=False)
1652    parser.add_option('-C', '--count', type='int', dest='max_count', help='Sets the max byte count when dumping section data', default=-1)
1653
1654    (options, mach_files) = parser.parse_args()
1655    if options.extract_modules:
1656        if options.section_names:
1657            print "error: can't use --section option with the --extract-modules option"
1658            exit(1)
1659        if not options.outfile:
1660            print "error: the --output=FILE option must be specified with the --extract-modules option"
1661            exit(1)
1662        options.section_names.append("__apple_ast")
1663    if options.compare:
1664        if len(mach_files) == 2:
1665            mach_a = Mach()
1666            mach_b = Mach()
1667            mach_a.parse(mach_files[0])
1668            mach_b.parse(mach_files[1])
1669            mach_a.compare(mach_b)
1670        else:
1671            print 'error: --compare takes two mach files as arguments'
1672    else:
1673        if not (options.dump_header or options.dump_load_commands or options.dump_symtab or options.dump_sections or options.find_mangled or options.section_names):
1674            options.dump_header = True
1675            options.dump_load_commands = True
1676        if options.verbose:
1677            print 'options', options
1678            print 'mach_files', mach_files
1679        for path in mach_files:
1680            mach = Mach()
1681            mach.parse(path)
1682            if options.interactive:
1683                interpreter = Mach.Interactive(mach, options)
1684                interpreter.cmdloop()
1685            else:
1686                mach.dump(options)
1687
1688