1#!/usr/bin/python 2 3import optparse 4import os 5import shlex 6import struct 7import sys 8 9ARMAG = "!<arch>\n" 10SARMAG = 8 11ARFMAG = "`\n" 12AR_EFMT1 = "#1/" 13 14 15def memdump(src, bytes_per_line=16, address=0): 16 FILTER = ''.join([(len(repr(chr(x))) == 3) and chr(x) or '.' 17 for x in range(256)]) 18 for i in range(0, len(src), bytes_per_line): 19 s = src[i:i+bytes_per_line] 20 hex_bytes = ' '.join(["%02x" % (ord(x)) for x in s]) 21 ascii = s.translate(FILTER) 22 print("%#08.8x: %-*s %s" % (address+i, bytes_per_line*3, hex_bytes, 23 ascii)) 24 25 26class Object(object): 27 def __init__(self, file): 28 def read_str(file, str_len): 29 return file.read(str_len).rstrip('\0 ') 30 31 def read_int(file, str_len, base): 32 return int(read_str(file, str_len), base) 33 34 self.offset = file.tell() 35 self.file = file 36 self.name = read_str(file, 16) 37 self.date = read_int(file, 12, 10) 38 self.uid = read_int(file, 6, 10) 39 self.gid = read_int(file, 6, 10) 40 self.mode = read_int(file, 8, 8) 41 self.size = read_int(file, 10, 10) 42 if file.read(2) != ARFMAG: 43 raise ValueError('invalid BSD object at offset %#08.8x' % ( 44 self.offset)) 45 # If we have an extended name read it. Extended names start with 46 name_len = 0 47 if self.name.startswith(AR_EFMT1): 48 name_len = int(self.name[len(AR_EFMT1):], 10) 49 self.name = read_str(file, name_len) 50 self.obj_offset = file.tell() 51 self.obj_size = self.size - name_len 52 file.seek(self.obj_size, 1) 53 54 def dump(self, f=sys.stdout, flat=True): 55 if flat: 56 f.write('%#08.8x: %#08.8x %5u %5u %6o %#08.8x %s\n' % (self.offset, 57 self.date, self.uid, self.gid, self.mode, self.size, 58 self.name)) 59 else: 60 f.write('%#08.8x: \n' % self.offset) 61 f.write(' name = "%s"\n' % self.name) 62 f.write(' date = %#08.8x\n' % self.date) 63 f.write(' uid = %i\n' % self.uid) 64 f.write(' gid = %i\n' % self.gid) 65 f.write(' mode = %o\n' % self.mode) 66 f.write(' size = %#08.8x\n' % (self.size)) 67 self.file.seek(self.obj_offset, 0) 68 first_bytes = self.file.read(4) 69 f.write('bytes = ') 70 memdump(first_bytes) 71 72 def get_bytes(self): 73 saved_pos = self.file.tell() 74 self.file.seek(self.obj_offset, 0) 75 bytes = self.file.read(self.obj_size) 76 self.file.seek(saved_pos, 0) 77 return bytes 78 79 80class StringTable(object): 81 def __init__(self, bytes): 82 self.bytes = bytes 83 84 def get_string(self, offset): 85 length = len(self.bytes) 86 if offset >= length: 87 return None 88 return self.bytes[offset:self.bytes.find('\0', offset)] 89 90 91class Archive(object): 92 def __init__(self, path): 93 self.path = path 94 self.file = open(path, 'r') 95 self.objects = [] 96 self.offset_to_object = {} 97 if self.file.read(SARMAG) != ARMAG: 98 print("error: file isn't a BSD archive") 99 while True: 100 try: 101 self.objects.append(Object(self.file)) 102 except ValueError: 103 break 104 105 def get_object_at_offset(self, offset): 106 if offset in self.offset_to_object: 107 return self.offset_to_object[offset] 108 for obj in self.objects: 109 if obj.offset == offset: 110 self.offset_to_object[offset] = obj 111 return obj 112 return None 113 114 def find(self, name, mtime=None, f=sys.stdout): 115 ''' 116 Find an object(s) by name with optional modification time. There 117 can be multple objects with the same name inside and possibly with 118 the same modification time within a BSD archive so clients must be 119 prepared to get multiple results. 120 ''' 121 matches = [] 122 for obj in self.objects: 123 if obj.name == name and (mtime is None or mtime == obj.date): 124 matches.append(obj) 125 return matches 126 127 @classmethod 128 def dump_header(self, f=sys.stdout): 129 f.write(' DATE UID GID MODE SIZE NAME\n') 130 f.write(' ---------- ----- ----- ------ ---------- ' 131 '--------------\n') 132 133 def get_symdef(self): 134 def get_uint32(file): 135 '''Extract a uint32_t from the current file position.''' 136 v, = struct.unpack('=I', file.read(4)) 137 return v 138 139 for obj in self.objects: 140 symdef = [] 141 if obj.name.startswith("__.SYMDEF"): 142 self.file.seek(obj.obj_offset, 0) 143 ranlib_byte_size = get_uint32(self.file) 144 num_ranlib_structs = ranlib_byte_size/8 145 str_offset_pairs = [] 146 for _ in range(num_ranlib_structs): 147 strx = get_uint32(self.file) 148 offset = get_uint32(self.file) 149 str_offset_pairs.append((strx, offset)) 150 strtab_len = get_uint32(self.file) 151 strtab = StringTable(self.file.read(strtab_len)) 152 for s in str_offset_pairs: 153 symdef.append((strtab.get_string(s[0]), s[1])) 154 return symdef 155 156 def get_object_dicts(self): 157 ''' 158 Returns an array of object dictionaries that contain they following 159 keys: 160 'object': the actual bsd.Object instance 161 'symdefs': an array of symbol names that the object contains 162 as found in the "__.SYMDEF" item in the archive 163 ''' 164 symdefs = self.get_symdef() 165 symdef_dict = {} 166 if symdefs: 167 for (name, offset) in symdefs: 168 if offset in symdef_dict: 169 object_dict = symdef_dict[offset] 170 else: 171 object_dict = { 172 'object': self.get_object_at_offset(offset), 173 'symdefs': [] 174 } 175 symdef_dict[offset] = object_dict 176 object_dict['symdefs'].append(name) 177 object_dicts = [] 178 for offset in sorted(symdef_dict): 179 object_dicts.append(symdef_dict[offset]) 180 return object_dicts 181 182 def dump(self, f=sys.stdout, flat=True): 183 f.write('%s:\n' % self.path) 184 if flat: 185 self.dump_header(f=f) 186 for obj in self.objects: 187 obj.dump(f=f, flat=flat) 188 189 190def main(): 191 parser = optparse.OptionParser( 192 prog='bsd', 193 description='Utility for BSD archives') 194 parser.add_option( 195 '--object', 196 type='string', 197 dest='object_name', 198 default=None, 199 help=('Specify the name of a object within the BSD archive to get ' 200 'information on')) 201 parser.add_option( 202 '-s', '--symbol', 203 type='string', 204 dest='find_symbol', 205 default=None, 206 help=('Specify the name of a symbol within the BSD archive to get ' 207 'information on from SYMDEF')) 208 parser.add_option( 209 '--symdef', 210 action='store_true', 211 dest='symdef', 212 default=False, 213 help=('Dump the information in the SYMDEF.')) 214 parser.add_option( 215 '-v', '--verbose', 216 action='store_true', 217 dest='verbose', 218 default=False, 219 help='Enable verbose output') 220 parser.add_option( 221 '-e', '--extract', 222 action='store_true', 223 dest='extract', 224 default=False, 225 help=('Specify this to extract the object specified with the --object ' 226 'option. There must be only one object with a matching name or ' 227 'the --mtime option must be specified to uniquely identify a ' 228 'single object.')) 229 parser.add_option( 230 '-m', '--mtime', 231 type='int', 232 dest='mtime', 233 default=None, 234 help=('Specify the modification time of the object an object. This ' 235 'option is used with either the --object or --extract options.')) 236 parser.add_option( 237 '-o', '--outfile', 238 type='string', 239 dest='outfile', 240 default=None, 241 help=('Specify a different name or path for the file to extract when ' 242 'using the --extract option. If this option isn\'t specified, ' 243 'then the extracted object file will be extracted into the ' 244 'current working directory if a file doesn\'t already exist ' 245 'with that name.')) 246 247 (options, args) = parser.parse_args(sys.argv[1:]) 248 249 for path in args: 250 archive = Archive(path) 251 if options.object_name: 252 print('%s:\n' % (path)) 253 matches = archive.find(options.object_name, options.mtime) 254 if matches: 255 dump_all = True 256 if options.extract: 257 if len(matches) == 1: 258 dump_all = False 259 if options.outfile is None: 260 outfile_path = matches[0].name 261 else: 262 outfile_path = options.outfile 263 if os.path.exists(outfile_path): 264 print('error: outfile "%s" already exists' % ( 265 outfile_path)) 266 else: 267 print('Saving file to "%s"...' % (outfile_path)) 268 with open(outfile_path, 'w') as outfile: 269 outfile.write(matches[0].get_bytes()) 270 else: 271 print('error: multiple objects match "%s". Specify ' 272 'the modification time using --mtime.' % ( 273 options.object_name)) 274 if dump_all: 275 for obj in matches: 276 obj.dump(flat=False) 277 else: 278 print('error: object "%s" not found in archive' % ( 279 options.object_name)) 280 elif options.find_symbol: 281 symdefs = archive.get_symdef() 282 if symdefs: 283 success = False 284 for (name, offset) in symdefs: 285 obj = archive.get_object_at_offset(offset) 286 if name == options.find_symbol: 287 print('Found "%s" in:' % (options.find_symbol)) 288 obj.dump(flat=False) 289 success = True 290 if not success: 291 print('Didn\'t find "%s" in any objects' % ( 292 options.find_symbol)) 293 else: 294 print("error: no __.SYMDEF was found") 295 elif options.symdef: 296 object_dicts = archive.get_object_dicts() 297 for object_dict in object_dicts: 298 object_dict['object'].dump(flat=False) 299 print("symbols:") 300 for name in object_dict['symdefs']: 301 print(" %s" % (name)) 302 else: 303 archive.dump(flat=not options.verbose) 304 305 306if __name__ == '__main__': 307 main() 308 309 310def print_mtime_error(result, dmap_mtime, actual_mtime): 311 print >>result, ("error: modification time in debug map (%#08.8x) doesn't " 312 "match the .o file modification time (%#08.8x)" % ( 313 dmap_mtime, actual_mtime)) 314 315 316def print_file_missing_error(result, path): 317 print >>result, "error: file \"%s\" doesn't exist" % (path) 318 319 320def print_multiple_object_matches(result, object_name, mtime, matches): 321 print >>result, ("error: multiple matches for object '%s' with with " 322 "modification time %#08.8x:" % (object_name, mtime)) 323 Archive.dump_header(f=result) 324 for match in matches: 325 match.dump(f=result, flat=True) 326 327 328def print_archive_object_error(result, object_name, mtime, archive): 329 matches = archive.find(object_name, f=result) 330 if len(matches) > 0: 331 print >>result, ("error: no objects have a modification time that " 332 "matches %#08.8x for '%s'. Potential matches:" % ( 333 mtime, object_name)) 334 Archive.dump_header(f=result) 335 for match in matches: 336 match.dump(f=result, flat=True) 337 else: 338 print >>result, "error: no object named \"%s\" found in archive:" % ( 339 object_name) 340 Archive.dump_header(f=result) 341 for match in archive.objects: 342 match.dump(f=result, flat=True) 343 # archive.dump(f=result, flat=True) 344 345 346class VerifyDebugMapCommand: 347 name = "verify-debug-map-objects" 348 349 def create_options(self): 350 usage = "usage: %prog [options]" 351 description = '''This command reports any .o files that are missing 352or whose modification times don't match in the debug map of an executable.''' 353 354 self.parser = optparse.OptionParser( 355 description=description, 356 prog=self.name, 357 usage=usage, 358 add_help_option=False) 359 360 self.parser.add_option( 361 '-e', '--errors', 362 action='store_true', 363 dest='errors', 364 default=False, 365 help="Only show errors") 366 367 def get_short_help(self): 368 return "Verify debug map object files." 369 370 def get_long_help(self): 371 return self.help_string 372 373 def __init__(self, debugger, unused): 374 self.create_options() 375 self.help_string = self.parser.format_help() 376 377 def __call__(self, debugger, command, exe_ctx, result): 378 import lldb 379 # Use the Shell Lexer to properly parse up command options just like a 380 # shell would 381 command_args = shlex.split(command) 382 383 try: 384 (options, args) = self.parser.parse_args(command_args) 385 except: 386 result.SetError("option parsing failed") 387 return 388 389 # Always get program state from the SBExecutionContext passed in 390 target = exe_ctx.GetTarget() 391 if not target.IsValid(): 392 result.SetError("invalid target") 393 return 394 archives = {} 395 for module_spec in args: 396 module = target.module[module_spec] 397 if not (module and module.IsValid()): 398 result.SetError('error: invalid module specification: "%s". ' 399 'Specify the full path, basename, or UUID of ' 400 'a module ' % (module_spec)) 401 return 402 num_symbols = module.GetNumSymbols() 403 num_errors = 0 404 for i in range(num_symbols): 405 symbol = module.GetSymbolAtIndex(i) 406 if symbol.GetType() != lldb.eSymbolTypeObjectFile: 407 continue 408 path = symbol.GetName() 409 if not path: 410 continue 411 # Extract the value of the symbol by dumping the 412 # symbol. The value is the mod time. 413 dmap_mtime = int(str(symbol).split('value = ') 414 [1].split(',')[0], 16) 415 if not options.errors: 416 print >>result, '%s' % (path) 417 if os.path.exists(path): 418 actual_mtime = int(os.stat(path).st_mtime) 419 if dmap_mtime != actual_mtime: 420 num_errors += 1 421 if options.errors: 422 print >>result, '%s' % (path), 423 print_mtime_error(result, dmap_mtime, 424 actual_mtime) 425 elif path[-1] == ')': 426 (archive_path, object_name) = path[0:-1].split('(') 427 if not archive_path and not object_name: 428 num_errors += 1 429 if options.errors: 430 print >>result, '%s' % (path), 431 print_file_missing_error(path) 432 continue 433 if not os.path.exists(archive_path): 434 num_errors += 1 435 if options.errors: 436 print >>result, '%s' % (path), 437 print_file_missing_error(archive_path) 438 continue 439 if archive_path in archives: 440 archive = archives[archive_path] 441 else: 442 archive = Archive(archive_path) 443 archives[archive_path] = archive 444 matches = archive.find(object_name, dmap_mtime) 445 num_matches = len(matches) 446 if num_matches == 1: 447 print >>result, '1 match' 448 obj = matches[0] 449 if obj.date != dmap_mtime: 450 num_errors += 1 451 if options.errors: 452 print >>result, '%s' % (path), 453 print_mtime_error(result, dmap_mtime, obj.date) 454 elif num_matches == 0: 455 num_errors += 1 456 if options.errors: 457 print >>result, '%s' % (path), 458 print_archive_object_error(result, object_name, 459 dmap_mtime, archive) 460 elif num_matches > 1: 461 num_errors += 1 462 if options.errors: 463 print >>result, '%s' % (path), 464 print_multiple_object_matches(result, 465 object_name, 466 dmap_mtime, matches) 467 if num_errors > 0: 468 print >>result, "%u errors found" % (num_errors) 469 else: 470 print >>result, "No errors detected in debug map" 471 472 473def __lldb_init_module(debugger, dict): 474 # This initializer is being run from LLDB in the embedded command 475 # interpreter. 476 # Add any commands contained in this module to LLDB 477 debugger.HandleCommand( 478 'command script add -c %s.VerifyDebugMapCommand %s' % ( 479 __name__, VerifyDebugMapCommand.name)) 480 print('The "%s" command has been installed, type "help %s" for detailed ' 481 'help.' % (VerifyDebugMapCommand.name, VerifyDebugMapCommand.name)) 482