1#!/usr/bin/python
2
3import optparse
4import os
5import shlex
6import struct
7import sys
8
9ARMAG = "!<arch>\n"
10SARMAG = 8
11ARFMAG = "`\n"
12AR_EFMT1 = "#1/"
13
14
15def memdump(src, bytes_per_line=16, address=0):
16    FILTER = ''.join([(len(repr(chr(x))) == 3) and chr(x) or '.'
17                     for x in range(256)])
18    for i in range(0, len(src), bytes_per_line):
19        s = src[i:i+bytes_per_line]
20        hex_bytes = ' '.join(["%02x" % (ord(x)) for x in s])
21        ascii = s.translate(FILTER)
22        print("%#08.8x: %-*s %s" % (address+i, bytes_per_line*3, hex_bytes,
23                                    ascii))
24
25
26class Object(object):
27    def __init__(self, file):
28        def read_str(file, str_len):
29            return file.read(str_len).rstrip('\0 ')
30
31        def read_int(file, str_len, base):
32            return int(read_str(file, str_len), base)
33
34        self.offset = file.tell()
35        self.file = file
36        self.name = read_str(file, 16)
37        self.date = read_int(file, 12, 10)
38        self.uid = read_int(file, 6, 10)
39        self.gid = read_int(file, 6, 10)
40        self.mode = read_int(file, 8, 8)
41        self.size = read_int(file, 10, 10)
42        if file.read(2) != ARFMAG:
43            raise ValueError('invalid BSD object at offset %#08.8x' % (
44                             self.offset))
45        # If we have an extended name read it. Extended names start with
46        name_len = 0
47        if self.name.startswith(AR_EFMT1):
48            name_len = int(self.name[len(AR_EFMT1):], 10)
49            self.name = read_str(file, name_len)
50        self.obj_offset = file.tell()
51        self.obj_size = self.size - name_len
52        file.seek(self.obj_size, 1)
53
54    def dump(self, f=sys.stdout, flat=True):
55        if flat:
56            f.write('%#08.8x: %#08.8x %5u %5u %6o %#08.8x %s\n' % (self.offset,
57                    self.date, self.uid, self.gid, self.mode, self.size,
58                    self.name))
59        else:
60            f.write('%#08.8x: \n' % self.offset)
61            f.write(' name = "%s"\n' % self.name)
62            f.write(' date = %#08.8x\n' % self.date)
63            f.write('  uid = %i\n' % self.uid)
64            f.write('  gid = %i\n' % self.gid)
65            f.write(' mode = %o\n' % self.mode)
66            f.write(' size = %#08.8x\n' % (self.size))
67            self.file.seek(self.obj_offset, 0)
68            first_bytes = self.file.read(4)
69            f.write('bytes = ')
70            memdump(first_bytes)
71
72    def get_bytes(self):
73        saved_pos = self.file.tell()
74        self.file.seek(self.obj_offset, 0)
75        bytes = self.file.read(self.obj_size)
76        self.file.seek(saved_pos, 0)
77        return bytes
78
79
80class StringTable(object):
81    def __init__(self, bytes):
82        self.bytes = bytes
83
84    def get_string(self, offset):
85        length = len(self.bytes)
86        if offset >= length:
87            return None
88        return self.bytes[offset:self.bytes.find('\0', offset)]
89
90
91class Archive(object):
92    def __init__(self, path):
93        self.path = path
94        self.file = open(path, 'r')
95        self.objects = []
96        self.offset_to_object = {}
97        if self.file.read(SARMAG) != ARMAG:
98            print("error: file isn't a BSD archive")
99        while True:
100            try:
101                self.objects.append(Object(self.file))
102            except ValueError:
103                break
104
105    def get_object_at_offset(self, offset):
106        if offset in self.offset_to_object:
107            return self.offset_to_object[offset]
108        for obj in self.objects:
109            if obj.offset == offset:
110                self.offset_to_object[offset] = obj
111                return obj
112        return None
113
114    def find(self, name, mtime=None, f=sys.stdout):
115        '''
116            Find an object(s) by name with optional modification time. There
117            can be multple objects with the same name inside and possibly with
118            the same modification time within a BSD archive so clients must be
119            prepared to get multiple results.
120        '''
121        matches = []
122        for obj in self.objects:
123            if obj.name == name and (mtime is None or mtime == obj.date):
124                matches.append(obj)
125        return matches
126
127    @classmethod
128    def dump_header(self, f=sys.stdout):
129        f.write('            DATE       UID   GID   MODE   SIZE       NAME\n')
130        f.write('            ---------- ----- ----- ------ ---------- '
131                '--------------\n')
132
133    def get_symdef(self):
134        def get_uint32(file):
135            '''Extract a uint32_t from the current file position.'''
136            v, = struct.unpack('=I', file.read(4))
137            return v
138
139        for obj in self.objects:
140            symdef = []
141            if obj.name.startswith("__.SYMDEF"):
142                self.file.seek(obj.obj_offset, 0)
143                ranlib_byte_size = get_uint32(self.file)
144                num_ranlib_structs = ranlib_byte_size/8
145                str_offset_pairs = []
146                for _ in range(num_ranlib_structs):
147                    strx = get_uint32(self.file)
148                    offset = get_uint32(self.file)
149                    str_offset_pairs.append((strx, offset))
150                strtab_len = get_uint32(self.file)
151                strtab = StringTable(self.file.read(strtab_len))
152                for s in str_offset_pairs:
153                    symdef.append((strtab.get_string(s[0]), s[1]))
154            return symdef
155
156    def get_object_dicts(self):
157        '''
158            Returns an array of object dictionaries that contain they following
159            keys:
160                'object': the actual bsd.Object instance
161                'symdefs': an array of symbol names that the object contains
162                           as found in the "__.SYMDEF" item in the archive
163        '''
164        symdefs = self.get_symdef()
165        symdef_dict = {}
166        if symdefs:
167            for (name, offset) in symdefs:
168                if offset in symdef_dict:
169                    object_dict = symdef_dict[offset]
170                else:
171                    object_dict = {
172                        'object': self.get_object_at_offset(offset),
173                        'symdefs': []
174                    }
175                    symdef_dict[offset] = object_dict
176                object_dict['symdefs'].append(name)
177        object_dicts = []
178        for offset in sorted(symdef_dict):
179            object_dicts.append(symdef_dict[offset])
180        return object_dicts
181
182    def dump(self, f=sys.stdout, flat=True):
183        f.write('%s:\n' % self.path)
184        if flat:
185            self.dump_header(f=f)
186        for obj in self.objects:
187            obj.dump(f=f, flat=flat)
188
189
190def main():
191    parser = optparse.OptionParser(
192        prog='bsd',
193        description='Utility for BSD archives')
194    parser.add_option(
195        '--object',
196        type='string',
197        dest='object_name',
198        default=None,
199        help=('Specify the name of a object within the BSD archive to get '
200              'information on'))
201    parser.add_option(
202        '-s', '--symbol',
203        type='string',
204        dest='find_symbol',
205        default=None,
206        help=('Specify the name of a symbol within the BSD archive to get '
207              'information on from SYMDEF'))
208    parser.add_option(
209        '--symdef',
210        action='store_true',
211        dest='symdef',
212        default=False,
213        help=('Dump the information in the SYMDEF.'))
214    parser.add_option(
215        '-v', '--verbose',
216        action='store_true',
217        dest='verbose',
218        default=False,
219        help='Enable verbose output')
220    parser.add_option(
221        '-e', '--extract',
222        action='store_true',
223        dest='extract',
224        default=False,
225        help=('Specify this to extract the object specified with the --object '
226              'option. There must be only one object with a matching name or '
227              'the --mtime option must be specified to uniquely identify a '
228              'single object.'))
229    parser.add_option(
230        '-m', '--mtime',
231        type='int',
232        dest='mtime',
233        default=None,
234        help=('Specify the modification time of the object an object. This '
235              'option is used with either the --object or --extract options.'))
236    parser.add_option(
237        '-o', '--outfile',
238        type='string',
239        dest='outfile',
240        default=None,
241        help=('Specify a different name or path for the file to extract when '
242              'using the --extract option. If this option isn\'t specified, '
243              'then the extracted object file will be extracted into the '
244              'current working directory if a file doesn\'t already exist '
245              'with that name.'))
246
247    (options, args) = parser.parse_args(sys.argv[1:])
248
249    for path in args:
250        archive = Archive(path)
251        if options.object_name:
252            print('%s:\n' % (path))
253            matches = archive.find(options.object_name, options.mtime)
254            if matches:
255                dump_all = True
256                if options.extract:
257                    if len(matches) == 1:
258                        dump_all = False
259                        if options.outfile is None:
260                            outfile_path = matches[0].name
261                        else:
262                            outfile_path = options.outfile
263                        if os.path.exists(outfile_path):
264                            print('error: outfile "%s" already exists' % (
265                              outfile_path))
266                        else:
267                            print('Saving file to "%s"...' % (outfile_path))
268                            with open(outfile_path, 'w') as outfile:
269                                outfile.write(matches[0].get_bytes())
270                    else:
271                        print('error: multiple objects match "%s". Specify '
272                              'the modification time using --mtime.' % (
273                                options.object_name))
274                if dump_all:
275                    for obj in matches:
276                        obj.dump(flat=False)
277            else:
278                print('error: object "%s" not found in archive' % (
279                      options.object_name))
280        elif options.find_symbol:
281            symdefs = archive.get_symdef()
282            if symdefs:
283                success = False
284                for (name, offset) in symdefs:
285                    obj = archive.get_object_at_offset(offset)
286                    if name == options.find_symbol:
287                        print('Found "%s" in:' % (options.find_symbol))
288                        obj.dump(flat=False)
289                        success = True
290                if not success:
291                    print('Didn\'t find "%s" in any objects' % (
292                          options.find_symbol))
293            else:
294                print("error: no __.SYMDEF was found")
295        elif options.symdef:
296            object_dicts = archive.get_object_dicts()
297            for object_dict in object_dicts:
298                object_dict['object'].dump(flat=False)
299                print("symbols:")
300                for name in object_dict['symdefs']:
301                    print("  %s" % (name))
302        else:
303            archive.dump(flat=not options.verbose)
304
305
306if __name__ == '__main__':
307    main()
308
309
310def print_mtime_error(result, dmap_mtime, actual_mtime):
311    print >>result, ("error: modification time in debug map (%#08.8x) doesn't "
312                     "match the .o file modification time (%#08.8x)" % (
313                        dmap_mtime, actual_mtime))
314
315
316def print_file_missing_error(result, path):
317    print >>result, "error: file \"%s\" doesn't exist" % (path)
318
319
320def print_multiple_object_matches(result, object_name, mtime, matches):
321    print >>result, ("error: multiple matches for object '%s' with with "
322                     "modification time %#08.8x:" % (object_name, mtime))
323    Archive.dump_header(f=result)
324    for match in matches:
325        match.dump(f=result, flat=True)
326
327
328def print_archive_object_error(result, object_name, mtime, archive):
329    matches = archive.find(object_name, f=result)
330    if len(matches) > 0:
331        print >>result, ("error: no objects have a modification time that "
332                         "matches %#08.8x for '%s'. Potential matches:" % (
333                            mtime, object_name))
334        Archive.dump_header(f=result)
335        for match in matches:
336            match.dump(f=result, flat=True)
337    else:
338        print >>result, "error: no object named \"%s\" found in archive:" % (
339            object_name)
340        Archive.dump_header(f=result)
341        for match in archive.objects:
342            match.dump(f=result, flat=True)
343        # archive.dump(f=result, flat=True)
344
345
346class VerifyDebugMapCommand:
347    name = "verify-debug-map-objects"
348
349    def create_options(self):
350        usage = "usage: %prog [options]"
351        description = '''This command reports any .o files that are missing
352or whose modification times don't match in the debug map of an executable.'''
353
354        self.parser = optparse.OptionParser(
355            description=description,
356            prog=self.name,
357            usage=usage,
358            add_help_option=False)
359
360        self.parser.add_option(
361            '-e', '--errors',
362            action='store_true',
363            dest='errors',
364            default=False,
365            help="Only show errors")
366
367    def get_short_help(self):
368        return "Verify debug map object files."
369
370    def get_long_help(self):
371        return self.help_string
372
373    def __init__(self, debugger, unused):
374        self.create_options()
375        self.help_string = self.parser.format_help()
376
377    def __call__(self, debugger, command, exe_ctx, result):
378        import lldb
379        # Use the Shell Lexer to properly parse up command options just like a
380        # shell would
381        command_args = shlex.split(command)
382
383        try:
384            (options, args) = self.parser.parse_args(command_args)
385        except:
386            result.SetError("option parsing failed")
387            return
388
389        # Always get program state from the SBExecutionContext passed in
390        target = exe_ctx.GetTarget()
391        if not target.IsValid():
392            result.SetError("invalid target")
393            return
394        archives = {}
395        for module_spec in args:
396            module = target.module[module_spec]
397            if not (module and module.IsValid()):
398                result.SetError('error: invalid module specification: "%s". '
399                                'Specify the full path, basename, or UUID of '
400                                'a module ' % (module_spec))
401                return
402            num_symbols = module.GetNumSymbols()
403            num_errors = 0
404            for i in range(num_symbols):
405                symbol = module.GetSymbolAtIndex(i)
406                if symbol.GetType() != lldb.eSymbolTypeObjectFile:
407                    continue
408                path = symbol.GetName()
409                if not path:
410                    continue
411                # Extract the value of the symbol by dumping the
412                # symbol. The value is the mod time.
413                dmap_mtime = int(str(symbol).split('value = ')
414                                 [1].split(',')[0], 16)
415                if not options.errors:
416                    print >>result, '%s' % (path)
417                if os.path.exists(path):
418                    actual_mtime = int(os.stat(path).st_mtime)
419                    if dmap_mtime != actual_mtime:
420                        num_errors += 1
421                        if options.errors:
422                            print >>result, '%s' % (path),
423                        print_mtime_error(result, dmap_mtime,
424                                          actual_mtime)
425                elif path[-1] == ')':
426                    (archive_path, object_name) = path[0:-1].split('(')
427                    if not archive_path and not object_name:
428                        num_errors += 1
429                        if options.errors:
430                            print >>result, '%s' % (path),
431                        print_file_missing_error(path)
432                        continue
433                    if not os.path.exists(archive_path):
434                        num_errors += 1
435                        if options.errors:
436                            print >>result, '%s' % (path),
437                        print_file_missing_error(archive_path)
438                        continue
439                    if archive_path in archives:
440                        archive = archives[archive_path]
441                    else:
442                        archive = Archive(archive_path)
443                        archives[archive_path] = archive
444                    matches = archive.find(object_name, dmap_mtime)
445                    num_matches = len(matches)
446                    if num_matches == 1:
447                        print >>result, '1 match'
448                        obj = matches[0]
449                        if obj.date != dmap_mtime:
450                            num_errors += 1
451                            if options.errors:
452                                print >>result, '%s' % (path),
453                            print_mtime_error(result, dmap_mtime, obj.date)
454                    elif num_matches == 0:
455                        num_errors += 1
456                        if options.errors:
457                            print >>result, '%s' % (path),
458                        print_archive_object_error(result, object_name,
459                                                   dmap_mtime, archive)
460                    elif num_matches > 1:
461                        num_errors += 1
462                        if options.errors:
463                            print >>result, '%s' % (path),
464                        print_multiple_object_matches(result,
465                                                      object_name,
466                                                      dmap_mtime, matches)
467            if num_errors > 0:
468                print >>result, "%u errors found" % (num_errors)
469            else:
470                print >>result, "No errors detected in debug map"
471
472
473def __lldb_init_module(debugger, dict):
474    # This initializer is being run from LLDB in the embedded command
475    # interpreter.
476    # Add any commands contained in this module to LLDB
477    debugger.HandleCommand(
478        'command script add -c %s.VerifyDebugMapCommand %s' % (
479            __name__, VerifyDebugMapCommand.name))
480    print('The "%s" command has been installed, type "help %s" for detailed '
481          'help.' % (VerifyDebugMapCommand.name, VerifyDebugMapCommand.name))
482