1 //===-- MachODump.cpp - Object file dumping utility for llvm --------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the MachO-specific dumper for llvm-objdump. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm-objdump.h" 15 #include "llvm-c/Disassembler.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/ADT/StringExtras.h" 18 #include "llvm/ADT/Triple.h" 19 #include "llvm/DebugInfo/DIContext.h" 20 #include "llvm/MC/MCAsmInfo.h" 21 #include "llvm/MC/MCContext.h" 22 #include "llvm/MC/MCDisassembler.h" 23 #include "llvm/MC/MCInst.h" 24 #include "llvm/MC/MCInstPrinter.h" 25 #include "llvm/MC/MCInstrAnalysis.h" 26 #include "llvm/MC/MCInstrDesc.h" 27 #include "llvm/MC/MCInstrInfo.h" 28 #include "llvm/MC/MCRegisterInfo.h" 29 #include "llvm/MC/MCSubtargetInfo.h" 30 #include "llvm/Object/MachO.h" 31 #include "llvm/Support/Casting.h" 32 #include "llvm/Support/CommandLine.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/Endian.h" 35 #include "llvm/Support/Format.h" 36 #include "llvm/Support/GraphWriter.h" 37 #include "llvm/Support/MachO.h" 38 #include "llvm/Support/MemoryBuffer.h" 39 #include "llvm/Support/FormattedStream.h" 40 #include "llvm/Support/TargetRegistry.h" 41 #include "llvm/Support/TargetSelect.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include <algorithm> 44 #include <cstring> 45 #include <system_error> 46 using namespace llvm; 47 using namespace object; 48 49 static cl::opt<bool> 50 UseDbg("g", cl::desc("Print line information from debug info if available")); 51 52 static cl::opt<std::string> 53 DSYMFile("dsym", cl::desc("Use .dSYM file for debug info")); 54 55 static cl::opt<bool> 56 FullLeadingAddr("full-leading-addr", 57 cl::desc("Print full leading address")); 58 59 static cl::opt<bool> 60 PrintImmHex("print-imm-hex", 61 cl::desc("Use hex format for immediate values")); 62 63 static std::string ThumbTripleName; 64 65 static const Target *GetTarget(const MachOObjectFile *MachOObj, 66 const char **McpuDefault, 67 const Target **ThumbTarget) { 68 // Figure out the target triple. 69 if (TripleName.empty()) { 70 llvm::Triple TT("unknown-unknown-unknown"); 71 llvm::Triple ThumbTriple = Triple(); 72 TT = MachOObj->getArch(McpuDefault, &ThumbTriple); 73 TripleName = TT.str(); 74 ThumbTripleName = ThumbTriple.str(); 75 } 76 77 // Get the target specific parser. 78 std::string Error; 79 const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); 80 if (TheTarget && ThumbTripleName.empty()) 81 return TheTarget; 82 83 *ThumbTarget = TargetRegistry::lookupTarget(ThumbTripleName, Error); 84 if (*ThumbTarget) 85 return TheTarget; 86 87 errs() << "llvm-objdump: error: unable to get target for '"; 88 if (!TheTarget) 89 errs() << TripleName; 90 else 91 errs() << ThumbTripleName; 92 errs() << "', see --version and --triple.\n"; 93 return nullptr; 94 } 95 96 struct SymbolSorter { 97 bool operator()(const SymbolRef &A, const SymbolRef &B) { 98 SymbolRef::Type AType, BType; 99 A.getType(AType); 100 B.getType(BType); 101 102 uint64_t AAddr, BAddr; 103 if (AType != SymbolRef::ST_Function) 104 AAddr = 0; 105 else 106 A.getAddress(AAddr); 107 if (BType != SymbolRef::ST_Function) 108 BAddr = 0; 109 else 110 B.getAddress(BAddr); 111 return AAddr < BAddr; 112 } 113 }; 114 115 // Types for the storted data in code table that is built before disassembly 116 // and the predicate function to sort them. 117 typedef std::pair<uint64_t, DiceRef> DiceTableEntry; 118 typedef std::vector<DiceTableEntry> DiceTable; 119 typedef DiceTable::iterator dice_table_iterator; 120 121 static bool 122 compareDiceTableEntries(const DiceTableEntry i, 123 const DiceTableEntry j) { 124 return i.first == j.first; 125 } 126 127 static void DumpDataInCode(const char *bytes, uint64_t Size, 128 unsigned short Kind) { 129 uint64_t Value; 130 131 switch (Kind) { 132 case MachO::DICE_KIND_DATA: 133 switch (Size) { 134 case 4: 135 Value = bytes[3] << 24 | 136 bytes[2] << 16 | 137 bytes[1] << 8 | 138 bytes[0]; 139 outs() << "\t.long " << Value; 140 break; 141 case 2: 142 Value = bytes[1] << 8 | 143 bytes[0]; 144 outs() << "\t.short " << Value; 145 break; 146 case 1: 147 Value = bytes[0]; 148 outs() << "\t.byte " << Value; 149 break; 150 } 151 outs() << "\t@ KIND_DATA\n"; 152 break; 153 case MachO::DICE_KIND_JUMP_TABLE8: 154 Value = bytes[0]; 155 outs() << "\t.byte " << Value << "\t@ KIND_JUMP_TABLE8"; 156 break; 157 case MachO::DICE_KIND_JUMP_TABLE16: 158 Value = bytes[1] << 8 | 159 bytes[0]; 160 outs() << "\t.short " << Value << "\t@ KIND_JUMP_TABLE16"; 161 break; 162 case MachO::DICE_KIND_JUMP_TABLE32: 163 Value = bytes[3] << 24 | 164 bytes[2] << 16 | 165 bytes[1] << 8 | 166 bytes[0]; 167 outs() << "\t.long " << Value << "\t@ KIND_JUMP_TABLE32"; 168 break; 169 default: 170 outs() << "\t@ data in code kind = " << Kind << "\n"; 171 break; 172 } 173 } 174 175 static void getSectionsAndSymbols(const MachO::mach_header Header, 176 MachOObjectFile *MachOObj, 177 std::vector<SectionRef> &Sections, 178 std::vector<SymbolRef> &Symbols, 179 SmallVectorImpl<uint64_t> &FoundFns, 180 uint64_t &BaseSegmentAddress) { 181 for (const SymbolRef &Symbol : MachOObj->symbols()) 182 Symbols.push_back(Symbol); 183 184 for (const SectionRef &Section : MachOObj->sections()) { 185 StringRef SectName; 186 Section.getName(SectName); 187 Sections.push_back(Section); 188 } 189 190 MachOObjectFile::LoadCommandInfo Command = 191 MachOObj->getFirstLoadCommandInfo(); 192 bool BaseSegmentAddressSet = false; 193 for (unsigned i = 0; ; ++i) { 194 if (Command.C.cmd == MachO::LC_FUNCTION_STARTS) { 195 // We found a function starts segment, parse the addresses for later 196 // consumption. 197 MachO::linkedit_data_command LLC = 198 MachOObj->getLinkeditDataLoadCommand(Command); 199 200 MachOObj->ReadULEB128s(LLC.dataoff, FoundFns); 201 } 202 else if (Command.C.cmd == MachO::LC_SEGMENT) { 203 MachO::segment_command SLC = 204 MachOObj->getSegmentLoadCommand(Command); 205 StringRef SegName = SLC.segname; 206 if(!BaseSegmentAddressSet && SegName != "__PAGEZERO") { 207 BaseSegmentAddressSet = true; 208 BaseSegmentAddress = SLC.vmaddr; 209 } 210 } 211 212 if (i == Header.ncmds - 1) 213 break; 214 else 215 Command = MachOObj->getNextLoadCommandInfo(Command); 216 } 217 } 218 219 static void DisassembleInputMachO2(StringRef Filename, 220 MachOObjectFile *MachOOF); 221 222 void llvm::DisassembleInputMachO(StringRef Filename) { 223 ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr = 224 MemoryBuffer::getFileOrSTDIN(Filename); 225 if (std::error_code EC = BuffOrErr.getError()) { 226 errs() << "llvm-objdump: " << Filename << ": " << EC.message() << "\n"; 227 return; 228 } 229 std::unique_ptr<MemoryBuffer> Buff = std::move(BuffOrErr.get()); 230 231 std::unique_ptr<MachOObjectFile> MachOOF = std::move( 232 ObjectFile::createMachOObjectFile(Buff.get()->getMemBufferRef()).get()); 233 234 DisassembleInputMachO2(Filename, MachOOF.get()); 235 } 236 237 typedef DenseMap<uint64_t, StringRef> SymbolAddressMap; 238 239 // The block of info used by the Symbolizer call backs. 240 struct DisassembleInfo { 241 bool verbose; 242 MachOObjectFile *O; 243 SectionRef S; 244 SymbolAddressMap *AddrMap; 245 }; 246 247 // SymbolizerGetOpInfo() is the operand information call back function. 248 // This is called to get the symbolic information for operand(s) of an 249 // instruction when it is being done. This routine does this from 250 // the relocation information, symbol table, etc. That block of information 251 // is a pointer to the struct DisassembleInfo that was passed when the 252 // disassembler context was created and passed to back to here when 253 // called back by the disassembler for instruction operands that could have 254 // relocation information. The address of the instruction containing operand is 255 // at the Pc parameter. The immediate value the operand has is passed in 256 // op_info->Value and is at Offset past the start of the instruction and has a 257 // byte Size of 1, 2 or 4. The symbolc information is returned in TagBuf is the 258 // LLVMOpInfo1 struct defined in the header "llvm-c/Disassembler.h" as symbol 259 // names and addends of the symbolic expression to add for the operand. The 260 // value of TagType is currently 1 (for the LLVMOpInfo1 struct). If symbolic 261 // information is returned then this function returns 1 else it returns 0. 262 int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset, 263 uint64_t Size, int TagType, void *TagBuf) { 264 struct DisassembleInfo *info = (struct DisassembleInfo *)DisInfo; 265 struct LLVMOpInfo1 *op_info = (struct LLVMOpInfo1 *)TagBuf; 266 unsigned int value = op_info->Value; 267 268 // Make sure all fields returned are zero if we don't set them. 269 memset((void *)op_info, '\0', sizeof(struct LLVMOpInfo1)); 270 op_info->Value = value; 271 272 // If the TagType is not the value 1 which it code knows about or if no 273 // verbose symbolic information is wanted then just return 0, indicating no 274 // information is being returned. 275 if (TagType != 1 || info->verbose == false) 276 return 0; 277 278 unsigned int Arch = info->O->getArch(); 279 if (Arch == Triple::x86) { 280 return 0; 281 } else if (Arch == Triple::x86_64) { 282 if (Size != 1 && Size != 2 && Size != 4 && Size != 0) 283 return 0; 284 // First search the section's relocation entries (if any) for an entry 285 // for this section offset. 286 uint64_t sect_addr; 287 info->S.getAddress(sect_addr); 288 uint64_t sect_offset = (Pc + Offset) - sect_addr; 289 bool reloc_found = false; 290 DataRefImpl Rel; 291 MachO::any_relocation_info RE; 292 bool isExtern = false; 293 SymbolRef Symbol; 294 for (const RelocationRef &Reloc : info->S.relocations()) { 295 uint64_t RelocOffset; 296 Reloc.getOffset(RelocOffset); 297 if (RelocOffset == sect_offset) { 298 Rel = Reloc.getRawDataRefImpl(); 299 RE = info->O->getRelocation(Rel); 300 // NOTE: Scattered relocations don't exist on x86_64. 301 isExtern = info->O->getPlainRelocationExternal(RE); 302 if (isExtern) { 303 symbol_iterator RelocSym = Reloc.getSymbol(); 304 Symbol = *RelocSym; 305 } 306 reloc_found = true; 307 break; 308 } 309 } 310 if (reloc_found && isExtern) { 311 // The Value passed in will be adjusted by the Pc if the instruction 312 // adds the Pc. But for x86_64 external relocation entries the Value 313 // is the offset from the external symbol. 314 if (info->O->getAnyRelocationPCRel(RE)) 315 op_info->Value -= Pc + Offset + Size; 316 StringRef SymName; 317 Symbol.getName(SymName); 318 const char *name = SymName.data(); 319 unsigned Type = info->O->getAnyRelocationType(RE); 320 if (Type == MachO::X86_64_RELOC_SUBTRACTOR) { 321 DataRefImpl RelNext = Rel; 322 info->O->moveRelocationNext(RelNext); 323 MachO::any_relocation_info RENext = info->O->getRelocation(RelNext); 324 unsigned TypeNext = info->O->getAnyRelocationType(RENext); 325 bool isExternNext = info->O->getPlainRelocationExternal(RENext); 326 unsigned SymbolNum = info->O->getPlainRelocationSymbolNum(RENext); 327 if (TypeNext == MachO::X86_64_RELOC_UNSIGNED && isExternNext) { 328 op_info->SubtractSymbol.Present = 1; 329 op_info->SubtractSymbol.Name = name; 330 symbol_iterator RelocSymNext = info->O->getSymbolByIndex(SymbolNum); 331 Symbol = *RelocSymNext; 332 StringRef SymNameNext; 333 Symbol.getName(SymNameNext); 334 name = SymNameNext.data(); 335 } 336 } 337 // TODO: add the VariantKinds to op_info->VariantKind for relocation types 338 // like: X86_64_RELOC_TLV, X86_64_RELOC_GOT_LOAD and X86_64_RELOC_GOT. 339 op_info->AddSymbol.Present = 1; 340 op_info->AddSymbol.Name = name; 341 return 1; 342 } 343 // TODO: 344 // Second search the external relocation entries of a fully linked image 345 // (if any) for an entry that matches this segment offset. 346 //uint64_t seg_offset = (Pc + Offset); 347 return 0; 348 } else if (Arch == Triple::arm) { 349 return 0; 350 } else if (Arch == Triple::aarch64) { 351 return 0; 352 } else { 353 return 0; 354 } 355 } 356 357 // GuessCstringPointer is passed the address of what might be a pointer to a 358 // literal string in a cstring section. If that address is in a cstring section 359 // it returns a pointer to that string. Else it returns nullptr. 360 const char *GuessCstringPointer(uint64_t ReferenceValue, 361 struct DisassembleInfo *info) { 362 uint32_t LoadCommandCount = info->O->getHeader().ncmds; 363 MachOObjectFile::LoadCommandInfo Load = info->O->getFirstLoadCommandInfo(); 364 for (unsigned I = 0;; ++I) { 365 if (Load.C.cmd == MachO::LC_SEGMENT_64) { 366 MachO::segment_command_64 Seg = info->O->getSegment64LoadCommand(Load); 367 for (unsigned J = 0; J < Seg.nsects; ++J) { 368 MachO::section_64 Sec = info->O->getSection64(Load, J); 369 uint32_t section_type = Sec.flags & MachO::SECTION_TYPE; 370 if (section_type == MachO::S_CSTRING_LITERALS && 371 ReferenceValue >= Sec.addr && 372 ReferenceValue < Sec.addr + Sec.size) { 373 uint64_t sect_offset = ReferenceValue - Sec.addr; 374 uint64_t object_offset = Sec.offset + sect_offset; 375 StringRef MachOContents = info->O->getData(); 376 uint64_t object_size = MachOContents.size(); 377 const char *object_addr = (const char *)MachOContents.data(); 378 if (object_offset < object_size) { 379 const char *name = object_addr + object_offset; 380 return name; 381 } else { 382 return nullptr; 383 } 384 } 385 } 386 } else if (Load.C.cmd == MachO::LC_SEGMENT) { 387 MachO::segment_command Seg = info->O->getSegmentLoadCommand(Load); 388 for (unsigned J = 0; J < Seg.nsects; ++J) { 389 MachO::section Sec = info->O->getSection(Load, J); 390 uint32_t section_type = Sec.flags & MachO::SECTION_TYPE; 391 if (section_type == MachO::S_CSTRING_LITERALS && 392 ReferenceValue >= Sec.addr && 393 ReferenceValue < Sec.addr + Sec.size) { 394 uint64_t sect_offset = ReferenceValue - Sec.addr; 395 uint64_t object_offset = Sec.offset + sect_offset; 396 StringRef MachOContents = info->O->getData(); 397 uint64_t object_size = MachOContents.size(); 398 const char *object_addr = (const char *)MachOContents.data(); 399 if (object_offset < object_size) { 400 const char *name = object_addr + object_offset; 401 return name; 402 } else { 403 return nullptr; 404 } 405 } 406 } 407 } 408 if (I == LoadCommandCount - 1) 409 break; 410 else 411 Load = info->O->getNextLoadCommandInfo(Load); 412 } 413 return nullptr; 414 } 415 416 // GuessIndirectSymbol returns the name of the indirect symbol for the 417 // ReferenceValue passed in or nullptr. This is used when ReferenceValue maybe 418 // an address of a symbol stub or a lazy or non-lazy pointer to associate the 419 // symbol name being referenced by the stub or pointer. 420 static const char *GuessIndirectSymbol(uint64_t ReferenceValue, 421 struct DisassembleInfo *info) { 422 uint32_t LoadCommandCount = info->O->getHeader().ncmds; 423 MachOObjectFile::LoadCommandInfo Load = info->O->getFirstLoadCommandInfo(); 424 MachO::dysymtab_command Dysymtab = info->O->getDysymtabLoadCommand(); 425 MachO::symtab_command Symtab = info->O->getSymtabLoadCommand(); 426 for (unsigned I = 0;; ++I) { 427 if (Load.C.cmd == MachO::LC_SEGMENT_64) { 428 MachO::segment_command_64 Seg = info->O->getSegment64LoadCommand(Load); 429 for (unsigned J = 0; J < Seg.nsects; ++J) { 430 MachO::section_64 Sec = info->O->getSection64(Load, J); 431 uint32_t section_type = Sec.flags & MachO::SECTION_TYPE; 432 if ((section_type == MachO::S_NON_LAZY_SYMBOL_POINTERS || 433 section_type == MachO::S_LAZY_SYMBOL_POINTERS || 434 section_type == MachO::S_LAZY_DYLIB_SYMBOL_POINTERS || 435 section_type == MachO::S_THREAD_LOCAL_VARIABLE_POINTERS || 436 section_type == MachO::S_SYMBOL_STUBS) && 437 ReferenceValue >= Sec.addr && 438 ReferenceValue < Sec.addr + Sec.size) { 439 uint32_t stride; 440 if (section_type == MachO::S_SYMBOL_STUBS) 441 stride = Sec.reserved2; 442 else 443 stride = 8; 444 if (stride == 0) 445 return nullptr; 446 uint32_t index = Sec.reserved1 + (ReferenceValue - Sec.addr) / stride; 447 if (index < Dysymtab.nindirectsyms) { 448 uint32_t indirect_symbol = 449 info->O->getIndirectSymbolTableEntry(Dysymtab, index); 450 if (indirect_symbol < Symtab.nsyms) { 451 symbol_iterator Sym = info->O->getSymbolByIndex(indirect_symbol); 452 SymbolRef Symbol = *Sym; 453 StringRef SymName; 454 Symbol.getName(SymName); 455 const char *name = SymName.data(); 456 return name; 457 } 458 } 459 } 460 } 461 } else if (Load.C.cmd == MachO::LC_SEGMENT) { 462 MachO::segment_command Seg = info->O->getSegmentLoadCommand(Load); 463 for (unsigned J = 0; J < Seg.nsects; ++J) { 464 MachO::section Sec = info->O->getSection(Load, J); 465 uint32_t section_type = Sec.flags & MachO::SECTION_TYPE; 466 if ((section_type == MachO::S_NON_LAZY_SYMBOL_POINTERS || 467 section_type == MachO::S_LAZY_SYMBOL_POINTERS || 468 section_type == MachO::S_LAZY_DYLIB_SYMBOL_POINTERS || 469 section_type == MachO::S_THREAD_LOCAL_VARIABLE_POINTERS || 470 section_type == MachO::S_SYMBOL_STUBS) && 471 ReferenceValue >= Sec.addr && 472 ReferenceValue < Sec.addr + Sec.size) { 473 uint32_t stride; 474 if (section_type == MachO::S_SYMBOL_STUBS) 475 stride = Sec.reserved2; 476 else 477 stride = 4; 478 if (stride == 0) 479 return nullptr; 480 uint32_t index = Sec.reserved1 + (ReferenceValue - Sec.addr) / stride; 481 if (index < Dysymtab.nindirectsyms) { 482 uint32_t indirect_symbol = 483 info->O->getIndirectSymbolTableEntry(Dysymtab, index); 484 if (indirect_symbol < Symtab.nsyms) { 485 symbol_iterator Sym = info->O->getSymbolByIndex(indirect_symbol); 486 SymbolRef Symbol = *Sym; 487 StringRef SymName; 488 Symbol.getName(SymName); 489 const char *name = SymName.data(); 490 return name; 491 } 492 } 493 } 494 } 495 } 496 if (I == LoadCommandCount - 1) 497 break; 498 else 499 Load = info->O->getNextLoadCommandInfo(Load); 500 } 501 return nullptr; 502 } 503 504 // GuessLiteralPointer returns a string which for the item in the Mach-O file 505 // for the address passed in as ReferenceValue for printing as a comment with 506 // the instruction and also returns the corresponding type of that item 507 // indirectly through ReferenceType. 508 // 509 // If ReferenceValue is an address of literal cstring then a pointer to the 510 // cstring is returned and ReferenceType is set to 511 // LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr . 512 // 513 // TODO: other literals such as Objective-C CFStrings refs, Selector refs, 514 // Message refs, Class refs and a Symbol address in a literal pool are yet 515 // to be done here. 516 const char *GuessLiteralPointer(uint64_t ReferenceValue, uint64_t ReferencePC, 517 uint64_t *ReferenceType, 518 struct DisassembleInfo *info) { 519 // TODO: This rouine's code is only for an x86_64 Mach-O file for now. 520 unsigned int Arch = info->O->getArch(); 521 if (Arch != Triple::x86_64) 522 return nullptr; 523 524 // First see if there is an external relocation entry at the ReferencePC. 525 uint64_t sect_addr; 526 info->S.getAddress(sect_addr); 527 uint64_t sect_offset = ReferencePC - sect_addr; 528 bool reloc_found = false; 529 DataRefImpl Rel; 530 MachO::any_relocation_info RE; 531 bool isExtern = false; 532 SymbolRef Symbol; 533 for (const RelocationRef &Reloc : info->S.relocations()) { 534 uint64_t RelocOffset; 535 Reloc.getOffset(RelocOffset); 536 if (RelocOffset == sect_offset) { 537 Rel = Reloc.getRawDataRefImpl(); 538 RE = info->O->getRelocation(Rel); 539 if (info->O->isRelocationScattered(RE)) 540 continue; 541 isExtern = info->O->getPlainRelocationExternal(RE); 542 if (isExtern) { 543 symbol_iterator RelocSym = Reloc.getSymbol(); 544 Symbol = *RelocSym; 545 } 546 reloc_found = true; 547 break; 548 } 549 } 550 // If there is an external relocation entry for a symbol in a section 551 // then used that symbol's value for the value of the reference. 552 if (reloc_found && isExtern) { 553 if (info->O->getAnyRelocationPCRel(RE)) { 554 unsigned Type = info->O->getAnyRelocationType(RE); 555 if (Type == MachO::X86_64_RELOC_SIGNED) { 556 Symbol.getAddress(ReferenceValue); 557 } 558 } 559 } 560 561 // TODO: the code to look for other literals such as Objective-C CFStrings 562 // refs, Selector refs, Message refs, Class refs will be added here. 563 564 const char *name = GuessCstringPointer(ReferenceValue, info); 565 if (name) { 566 // TODO: note when the code is added above for Selector refs and Message 567 // refs we will need check for that here and set the ReferenceType 568 // accordingly. 569 *ReferenceType = LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr; 570 return name; 571 } 572 573 // TODO: look for an indirect symbol with this ReferenceValue which is in 574 // a literal pool. 575 576 return nullptr; 577 } 578 579 // SymbolizerSymbolLookUp is the symbol lookup function passed when creating 580 // the Symbolizer. It looks up the ReferenceValue using the info passed via the 581 // pointer to the struct DisassembleInfo that was passed when MCSymbolizer 582 // is created and returns the symbol name that matches the ReferenceValue or 583 // nullptr if none. The ReferenceType is passed in for the IN type of 584 // reference the instruction is making from the values in defined in the header 585 // "llvm-c/Disassembler.h". On return the ReferenceType can set to a specific 586 // Out type and the ReferenceName will also be set which is added as a comment 587 // to the disassembled instruction. 588 // 589 // If the symbol name is a C++ mangled name then the demangled name is 590 // returned through ReferenceName and ReferenceType is set to 591 // LLVMDisassembler_ReferenceType_DeMangled_Name . 592 // 593 // When this is called to get a symbol name for a branch target then the 594 // ReferenceType will be LLVMDisassembler_ReferenceType_In_Branch and then 595 // SymbolValue will be looked for in the indirect symbol table to determine if 596 // it is an address for a symbol stub. If so then the symbol name for that 597 // stub is returned indirectly through ReferenceName and then ReferenceType is 598 // set to LLVMDisassembler_ReferenceType_Out_SymbolStub. 599 // 600 // When this is called with an value loaded via a PC relative load then 601 // ReferenceType will be LLVMDisassembler_ReferenceType_In_PCrel_Load then the 602 // SymbolValue is checked to be an address of literal pointer, symbol pointer, 603 // or an Objective-C meta data reference. If so the output ReferenceType is 604 // set to correspond to that as well as ReferenceName. 605 const char *SymbolizerSymbolLookUp(void *DisInfo, uint64_t ReferenceValue, 606 uint64_t *ReferenceType, 607 uint64_t ReferencePC, 608 const char **ReferenceName) { 609 struct DisassembleInfo *info = (struct DisassembleInfo *)DisInfo; 610 // If no verbose symbolic information is wanted then just return nullptr. 611 if (info->verbose == false) { 612 *ReferenceName = nullptr; 613 *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; 614 return nullptr; 615 } 616 617 const char *SymbolName = nullptr; 618 StringRef name = info->AddrMap->lookup(ReferenceValue); 619 if (!name.empty()) 620 SymbolName = name.data(); 621 622 if (*ReferenceType == LLVMDisassembler_ReferenceType_In_Branch) { 623 *ReferenceName = GuessIndirectSymbol(ReferenceValue, info); 624 if (*ReferenceName) 625 *ReferenceType = LLVMDisassembler_ReferenceType_Out_SymbolStub; 626 else 627 *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; 628 } 629 else if (*ReferenceType == LLVMDisassembler_ReferenceType_In_PCrel_Load) { 630 *ReferenceName = GuessLiteralPointer(ReferenceValue, ReferencePC, 631 ReferenceType, info); 632 if (*ReferenceName == nullptr) 633 *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; 634 // TODO: other types of references to be added. 635 } else { 636 *ReferenceName = nullptr; 637 *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; 638 } 639 640 return SymbolName; 641 } 642 643 // 644 // This is the memory object used by DisAsm->getInstruction() which has its 645 // BasePC. This then allows the 'address' parameter to getInstruction() to 646 // be the actual PC of the instruction. Then when a branch dispacement is 647 // added to the PC of an instruction, the 'ReferenceValue' passed to the 648 // SymbolizerSymbolLookUp() routine is the correct target addresses. As in 649 // the case of a fully linked Mach-O file where a section being disassembled 650 // generally not linked at address zero. 651 // 652 class DisasmMemoryObject : public MemoryObject { 653 const uint8_t *Bytes; 654 uint64_t Size; 655 uint64_t BasePC; 656 public: 657 DisasmMemoryObject(const uint8_t *bytes, uint64_t size, uint64_t basePC) : 658 Bytes(bytes), Size(size), BasePC(basePC) {} 659 660 uint64_t getBase() const override { return BasePC; } 661 uint64_t getExtent() const override { return Size; } 662 663 int readByte(uint64_t Addr, uint8_t *Byte) const override { 664 if (Addr - BasePC >= Size) 665 return -1; 666 *Byte = Bytes[Addr - BasePC]; 667 return 0; 668 } 669 }; 670 671 /// \brief Emits the comments that are stored in the CommentStream. 672 /// Each comment in the CommentStream must end with a newline. 673 static void emitComments(raw_svector_ostream &CommentStream, 674 SmallString<128> &CommentsToEmit, 675 formatted_raw_ostream &FormattedOS, 676 const MCAsmInfo &MAI) { 677 // Flush the stream before taking its content. 678 CommentStream.flush(); 679 StringRef Comments = CommentsToEmit.str(); 680 // Get the default information for printing a comment. 681 const char *CommentBegin = MAI.getCommentString(); 682 unsigned CommentColumn = MAI.getCommentColumn(); 683 bool IsFirst = true; 684 while (!Comments.empty()) { 685 if (!IsFirst) 686 FormattedOS << '\n'; 687 // Emit a line of comments. 688 FormattedOS.PadToColumn(CommentColumn); 689 size_t Position = Comments.find('\n'); 690 FormattedOS << CommentBegin << ' ' << Comments.substr(0, Position); 691 // Move after the newline character. 692 Comments = Comments.substr(Position + 1); 693 IsFirst = false; 694 } 695 FormattedOS.flush(); 696 697 // Tell the comment stream that the vector changed underneath it. 698 CommentsToEmit.clear(); 699 CommentStream.resync(); 700 } 701 702 static void DisassembleInputMachO2(StringRef Filename, 703 MachOObjectFile *MachOOF) { 704 const char *McpuDefault = nullptr; 705 const Target *ThumbTarget = nullptr; 706 const Target *TheTarget = GetTarget(MachOOF, &McpuDefault, &ThumbTarget); 707 if (!TheTarget) { 708 // GetTarget prints out stuff. 709 return; 710 } 711 if (MCPU.empty() && McpuDefault) 712 MCPU = McpuDefault; 713 714 std::unique_ptr<const MCInstrInfo> InstrInfo(TheTarget->createMCInstrInfo()); 715 std::unique_ptr<MCInstrAnalysis> InstrAnalysis( 716 TheTarget->createMCInstrAnalysis(InstrInfo.get())); 717 std::unique_ptr<const MCInstrInfo> ThumbInstrInfo; 718 std::unique_ptr<MCInstrAnalysis> ThumbInstrAnalysis; 719 if (ThumbTarget) { 720 ThumbInstrInfo.reset(ThumbTarget->createMCInstrInfo()); 721 ThumbInstrAnalysis.reset( 722 ThumbTarget->createMCInstrAnalysis(ThumbInstrInfo.get())); 723 } 724 725 // Package up features to be passed to target/subtarget 726 std::string FeaturesStr; 727 if (MAttrs.size()) { 728 SubtargetFeatures Features; 729 for (unsigned i = 0; i != MAttrs.size(); ++i) 730 Features.AddFeature(MAttrs[i]); 731 FeaturesStr = Features.getString(); 732 } 733 734 // Set up disassembler. 735 std::unique_ptr<const MCRegisterInfo> MRI( 736 TheTarget->createMCRegInfo(TripleName)); 737 std::unique_ptr<const MCAsmInfo> AsmInfo( 738 TheTarget->createMCAsmInfo(*MRI, TripleName)); 739 std::unique_ptr<const MCSubtargetInfo> STI( 740 TheTarget->createMCSubtargetInfo(TripleName, MCPU, FeaturesStr)); 741 MCContext Ctx(AsmInfo.get(), MRI.get(), nullptr); 742 std::unique_ptr<MCDisassembler> DisAsm( 743 TheTarget->createMCDisassembler(*STI, Ctx)); 744 std::unique_ptr<MCSymbolizer> Symbolizer; 745 struct DisassembleInfo SymbolizerInfo; 746 std::unique_ptr<MCRelocationInfo> RelInfo( 747 TheTarget->createMCRelocationInfo(TripleName, Ctx)); 748 if (RelInfo) { 749 Symbolizer.reset(TheTarget->createMCSymbolizer( 750 TripleName, SymbolizerGetOpInfo, SymbolizerSymbolLookUp, 751 &SymbolizerInfo, &Ctx, RelInfo.release())); 752 DisAsm->setSymbolizer(std::move(Symbolizer)); 753 } 754 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 755 std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter( 756 AsmPrinterVariant, *AsmInfo, *InstrInfo, *MRI, *STI)); 757 // Set the display preference for hex vs. decimal immediates. 758 IP->setPrintImmHex(PrintImmHex); 759 // Comment stream and backing vector. 760 SmallString<128> CommentsToEmit; 761 raw_svector_ostream CommentStream(CommentsToEmit); 762 IP->setCommentStream(CommentStream); 763 764 if (!InstrAnalysis || !AsmInfo || !STI || !DisAsm || !IP) { 765 errs() << "error: couldn't initialize disassembler for target " 766 << TripleName << '\n'; 767 return; 768 } 769 770 // Set up thumb disassembler. 771 std::unique_ptr<const MCRegisterInfo> ThumbMRI; 772 std::unique_ptr<const MCAsmInfo> ThumbAsmInfo; 773 std::unique_ptr<const MCSubtargetInfo> ThumbSTI; 774 std::unique_ptr<const MCDisassembler> ThumbDisAsm; 775 std::unique_ptr<MCInstPrinter> ThumbIP; 776 std::unique_ptr<MCContext> ThumbCtx; 777 if (ThumbTarget) { 778 ThumbMRI.reset(ThumbTarget->createMCRegInfo(ThumbTripleName)); 779 ThumbAsmInfo.reset( 780 ThumbTarget->createMCAsmInfo(*ThumbMRI, ThumbTripleName)); 781 ThumbSTI.reset( 782 ThumbTarget->createMCSubtargetInfo(ThumbTripleName, MCPU, FeaturesStr)); 783 ThumbCtx.reset(new MCContext(ThumbAsmInfo.get(), ThumbMRI.get(), nullptr)); 784 ThumbDisAsm.reset(ThumbTarget->createMCDisassembler(*ThumbSTI, *ThumbCtx)); 785 // TODO: add MCSymbolizer here for the ThumbTarget like above for TheTarget. 786 int ThumbAsmPrinterVariant = ThumbAsmInfo->getAssemblerDialect(); 787 ThumbIP.reset(ThumbTarget->createMCInstPrinter( 788 ThumbAsmPrinterVariant, *ThumbAsmInfo, *ThumbInstrInfo, *ThumbMRI, 789 *ThumbSTI)); 790 // Set the display preference for hex vs. decimal immediates. 791 ThumbIP->setPrintImmHex(PrintImmHex); 792 } 793 794 if (ThumbTarget && (!ThumbInstrAnalysis || !ThumbAsmInfo || !ThumbSTI || 795 !ThumbDisAsm || !ThumbIP)) { 796 errs() << "error: couldn't initialize disassembler for target " 797 << ThumbTripleName << '\n'; 798 return; 799 } 800 801 outs() << '\n' << Filename << ":\n\n"; 802 803 MachO::mach_header Header = MachOOF->getHeader(); 804 805 // FIXME: Using the -cfg command line option, this code used to be able to 806 // annotate relocations with the referenced symbol's name, and if this was 807 // inside a __[cf]string section, the data it points to. This is now replaced 808 // by the upcoming MCSymbolizer, which needs the appropriate setup done above. 809 std::vector<SectionRef> Sections; 810 std::vector<SymbolRef> Symbols; 811 SmallVector<uint64_t, 8> FoundFns; 812 uint64_t BaseSegmentAddress; 813 814 getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns, 815 BaseSegmentAddress); 816 817 // Sort the symbols by address, just in case they didn't come in that way. 818 std::sort(Symbols.begin(), Symbols.end(), SymbolSorter()); 819 820 // Build a data in code table that is sorted on by the address of each entry. 821 uint64_t BaseAddress = 0; 822 if (Header.filetype == MachO::MH_OBJECT) 823 Sections[0].getAddress(BaseAddress); 824 else 825 BaseAddress = BaseSegmentAddress; 826 DiceTable Dices; 827 for (dice_iterator DI = MachOOF->begin_dices(), DE = MachOOF->end_dices(); 828 DI != DE; ++DI) { 829 uint32_t Offset; 830 DI->getOffset(Offset); 831 Dices.push_back(std::make_pair(BaseAddress + Offset, *DI)); 832 } 833 array_pod_sort(Dices.begin(), Dices.end()); 834 835 #ifndef NDEBUG 836 raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls(); 837 #else 838 raw_ostream &DebugOut = nulls(); 839 #endif 840 841 std::unique_ptr<DIContext> diContext; 842 ObjectFile *DbgObj = MachOOF; 843 // Try to find debug info and set up the DIContext for it. 844 if (UseDbg) { 845 // A separate DSym file path was specified, parse it as a macho file, 846 // get the sections and supply it to the section name parsing machinery. 847 if (!DSYMFile.empty()) { 848 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr = 849 MemoryBuffer::getFileOrSTDIN(DSYMFile); 850 if (std::error_code EC = BufOrErr.getError()) { 851 errs() << "llvm-objdump: " << Filename << ": " << EC.message() << '\n'; 852 return; 853 } 854 DbgObj = 855 ObjectFile::createMachOObjectFile(BufOrErr.get()->getMemBufferRef()) 856 .get() 857 .release(); 858 } 859 860 // Setup the DIContext 861 diContext.reset(DIContext::getDWARFContext(*DbgObj)); 862 } 863 864 for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) { 865 866 bool SectIsText = false; 867 Sections[SectIdx].isText(SectIsText); 868 if (SectIsText == false) 869 continue; 870 871 StringRef SectName; 872 if (Sections[SectIdx].getName(SectName) || 873 SectName != "__text") 874 continue; // Skip non-text sections 875 876 DataRefImpl DR = Sections[SectIdx].getRawDataRefImpl(); 877 878 StringRef SegmentName = MachOOF->getSectionFinalSegmentName(DR); 879 if (SegmentName != "__TEXT") 880 continue; 881 882 StringRef Bytes; 883 Sections[SectIdx].getContents(Bytes); 884 uint64_t SectAddress = 0; 885 Sections[SectIdx].getAddress(SectAddress); 886 DisasmMemoryObject MemoryObject((const uint8_t *)Bytes.data(), Bytes.size(), 887 SectAddress); 888 bool symbolTableWorked = false; 889 890 // Parse relocations. 891 std::vector<std::pair<uint64_t, SymbolRef>> Relocs; 892 for (const RelocationRef &Reloc : Sections[SectIdx].relocations()) { 893 uint64_t RelocOffset, SectionAddress; 894 Reloc.getOffset(RelocOffset); 895 Sections[SectIdx].getAddress(SectionAddress); 896 RelocOffset -= SectionAddress; 897 898 symbol_iterator RelocSym = Reloc.getSymbol(); 899 900 Relocs.push_back(std::make_pair(RelocOffset, *RelocSym)); 901 } 902 array_pod_sort(Relocs.begin(), Relocs.end()); 903 904 // Create a map of symbol addresses to symbol names for use by 905 // the SymbolizerSymbolLookUp() routine. 906 SymbolAddressMap AddrMap; 907 for (const SymbolRef &Symbol : MachOOF->symbols()) { 908 SymbolRef::Type ST; 909 Symbol.getType(ST); 910 if (ST == SymbolRef::ST_Function || ST == SymbolRef::ST_Data || 911 ST == SymbolRef::ST_Other) { 912 uint64_t Address; 913 Symbol.getAddress(Address); 914 StringRef SymName; 915 Symbol.getName(SymName); 916 AddrMap[Address] = SymName; 917 } 918 } 919 // Set up the block of info used by the Symbolizer call backs. 920 SymbolizerInfo.verbose = true; 921 SymbolizerInfo.O = MachOOF; 922 SymbolizerInfo.S = Sections[SectIdx]; 923 SymbolizerInfo.AddrMap = &AddrMap; 924 925 // Disassemble symbol by symbol. 926 for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) { 927 StringRef SymName; 928 Symbols[SymIdx].getName(SymName); 929 930 SymbolRef::Type ST; 931 Symbols[SymIdx].getType(ST); 932 if (ST != SymbolRef::ST_Function) 933 continue; 934 935 // Make sure the symbol is defined in this section. 936 bool containsSym = false; 937 Sections[SectIdx].containsSymbol(Symbols[SymIdx], containsSym); 938 if (!containsSym) 939 continue; 940 941 // Start at the address of the symbol relative to the section's address. 942 uint64_t SectionAddress = 0; 943 uint64_t Start = 0; 944 Sections[SectIdx].getAddress(SectionAddress); 945 Symbols[SymIdx].getAddress(Start); 946 Start -= SectionAddress; 947 948 // Stop disassembling either at the beginning of the next symbol or at 949 // the end of the section. 950 bool containsNextSym = false; 951 uint64_t NextSym = 0; 952 uint64_t NextSymIdx = SymIdx+1; 953 while (Symbols.size() > NextSymIdx) { 954 SymbolRef::Type NextSymType; 955 Symbols[NextSymIdx].getType(NextSymType); 956 if (NextSymType == SymbolRef::ST_Function) { 957 Sections[SectIdx].containsSymbol(Symbols[NextSymIdx], 958 containsNextSym); 959 Symbols[NextSymIdx].getAddress(NextSym); 960 NextSym -= SectionAddress; 961 break; 962 } 963 ++NextSymIdx; 964 } 965 966 uint64_t SectSize; 967 Sections[SectIdx].getSize(SectSize); 968 uint64_t End = containsNextSym ? NextSym : SectSize; 969 uint64_t Size; 970 971 symbolTableWorked = true; 972 973 DataRefImpl Symb = Symbols[SymIdx].getRawDataRefImpl(); 974 bool isThumb = 975 (MachOOF->getSymbolFlags(Symb) & SymbolRef::SF_Thumb) && ThumbTarget; 976 977 outs() << SymName << ":\n"; 978 DILineInfo lastLine; 979 for (uint64_t Index = Start; Index < End; Index += Size) { 980 MCInst Inst; 981 982 uint64_t PC = SectAddress + Index; 983 if (FullLeadingAddr) { 984 if (MachOOF->is64Bit()) 985 outs() << format("%016" PRIx64, PC); 986 else 987 outs() << format("%08" PRIx64, PC); 988 } else { 989 outs() << format("%8" PRIx64 ":", PC); 990 } 991 if (!NoShowRawInsn) 992 outs() << "\t"; 993 994 // Check the data in code table here to see if this is data not an 995 // instruction to be disassembled. 996 DiceTable Dice; 997 Dice.push_back(std::make_pair(PC, DiceRef())); 998 dice_table_iterator DTI = std::search(Dices.begin(), Dices.end(), 999 Dice.begin(), Dice.end(), 1000 compareDiceTableEntries); 1001 if (DTI != Dices.end()){ 1002 uint16_t Length; 1003 DTI->second.getLength(Length); 1004 DumpBytes(StringRef(Bytes.data() + Index, Length)); 1005 uint16_t Kind; 1006 DTI->second.getKind(Kind); 1007 DumpDataInCode(Bytes.data() + Index, Length, Kind); 1008 continue; 1009 } 1010 1011 SmallVector<char, 64> AnnotationsBytes; 1012 raw_svector_ostream Annotations(AnnotationsBytes); 1013 1014 bool gotInst; 1015 if (isThumb) 1016 gotInst = ThumbDisAsm->getInstruction(Inst, Size, MemoryObject, PC, 1017 DebugOut, Annotations); 1018 else 1019 gotInst = DisAsm->getInstruction(Inst, Size, MemoryObject, PC, 1020 DebugOut, Annotations); 1021 if (gotInst) { 1022 if (!NoShowRawInsn) { 1023 DumpBytes(StringRef(Bytes.data() + Index, Size)); 1024 } 1025 formatted_raw_ostream FormattedOS(outs()); 1026 Annotations.flush(); 1027 StringRef AnnotationsStr = Annotations.str(); 1028 if (isThumb) 1029 ThumbIP->printInst(&Inst, FormattedOS, AnnotationsStr); 1030 else 1031 IP->printInst(&Inst, FormattedOS, AnnotationsStr); 1032 emitComments(CommentStream, CommentsToEmit, FormattedOS, *AsmInfo); 1033 1034 // Print debug info. 1035 if (diContext) { 1036 DILineInfo dli = 1037 diContext->getLineInfoForAddress(PC); 1038 // Print valid line info if it changed. 1039 if (dli != lastLine && dli.Line != 0) 1040 outs() << "\t## " << dli.FileName << ':' << dli.Line << ':' 1041 << dli.Column; 1042 lastLine = dli; 1043 } 1044 outs() << "\n"; 1045 } else { 1046 errs() << "llvm-objdump: warning: invalid instruction encoding\n"; 1047 if (Size == 0) 1048 Size = 1; // skip illegible bytes 1049 } 1050 } 1051 } 1052 if (!symbolTableWorked) { 1053 // Reading the symbol table didn't work, disassemble the whole section. 1054 uint64_t SectAddress; 1055 Sections[SectIdx].getAddress(SectAddress); 1056 uint64_t SectSize; 1057 Sections[SectIdx].getSize(SectSize); 1058 uint64_t InstSize; 1059 for (uint64_t Index = 0; Index < SectSize; Index += InstSize) { 1060 MCInst Inst; 1061 1062 uint64_t PC = SectAddress + Index; 1063 if (DisAsm->getInstruction(Inst, InstSize, MemoryObject, PC, 1064 DebugOut, nulls())) { 1065 if (FullLeadingAddr) { 1066 if (MachOOF->is64Bit()) 1067 outs() << format("%016" PRIx64, PC); 1068 else 1069 outs() << format("%08" PRIx64, PC); 1070 } else { 1071 outs() << format("%8" PRIx64 ":", PC); 1072 } 1073 if (!NoShowRawInsn) { 1074 outs() << "\t"; 1075 DumpBytes(StringRef(Bytes.data() + Index, InstSize)); 1076 } 1077 IP->printInst(&Inst, outs(), ""); 1078 outs() << "\n"; 1079 } else { 1080 errs() << "llvm-objdump: warning: invalid instruction encoding\n"; 1081 if (InstSize == 0) 1082 InstSize = 1; // skip illegible bytes 1083 } 1084 } 1085 } 1086 } 1087 } 1088 1089 1090 //===----------------------------------------------------------------------===// 1091 // __compact_unwind section dumping 1092 //===----------------------------------------------------------------------===// 1093 1094 namespace { 1095 1096 template <typename T> static uint64_t readNext(const char *&Buf) { 1097 using llvm::support::little; 1098 using llvm::support::unaligned; 1099 1100 uint64_t Val = support::endian::read<T, little, unaligned>(Buf); 1101 Buf += sizeof(T); 1102 return Val; 1103 } 1104 1105 struct CompactUnwindEntry { 1106 uint32_t OffsetInSection; 1107 1108 uint64_t FunctionAddr; 1109 uint32_t Length; 1110 uint32_t CompactEncoding; 1111 uint64_t PersonalityAddr; 1112 uint64_t LSDAAddr; 1113 1114 RelocationRef FunctionReloc; 1115 RelocationRef PersonalityReloc; 1116 RelocationRef LSDAReloc; 1117 1118 CompactUnwindEntry(StringRef Contents, unsigned Offset, bool Is64) 1119 : OffsetInSection(Offset) { 1120 if (Is64) 1121 read<uint64_t>(Contents.data() + Offset); 1122 else 1123 read<uint32_t>(Contents.data() + Offset); 1124 } 1125 1126 private: 1127 template<typename UIntPtr> 1128 void read(const char *Buf) { 1129 FunctionAddr = readNext<UIntPtr>(Buf); 1130 Length = readNext<uint32_t>(Buf); 1131 CompactEncoding = readNext<uint32_t>(Buf); 1132 PersonalityAddr = readNext<UIntPtr>(Buf); 1133 LSDAAddr = readNext<UIntPtr>(Buf); 1134 } 1135 }; 1136 } 1137 1138 /// Given a relocation from __compact_unwind, consisting of the RelocationRef 1139 /// and data being relocated, determine the best base Name and Addend to use for 1140 /// display purposes. 1141 /// 1142 /// 1. An Extern relocation will directly reference a symbol (and the data is 1143 /// then already an addend), so use that. 1144 /// 2. Otherwise the data is an offset in the object file's layout; try to find 1145 // a symbol before it in the same section, and use the offset from there. 1146 /// 3. Finally, if all that fails, fall back to an offset from the start of the 1147 /// referenced section. 1148 static void findUnwindRelocNameAddend(const MachOObjectFile *Obj, 1149 std::map<uint64_t, SymbolRef> &Symbols, 1150 const RelocationRef &Reloc, 1151 uint64_t Addr, 1152 StringRef &Name, uint64_t &Addend) { 1153 if (Reloc.getSymbol() != Obj->symbol_end()) { 1154 Reloc.getSymbol()->getName(Name); 1155 Addend = Addr; 1156 return; 1157 } 1158 1159 auto RE = Obj->getRelocation(Reloc.getRawDataRefImpl()); 1160 SectionRef RelocSection = Obj->getRelocationSection(RE); 1161 1162 uint64_t SectionAddr; 1163 RelocSection.getAddress(SectionAddr); 1164 1165 auto Sym = Symbols.upper_bound(Addr); 1166 if (Sym == Symbols.begin()) { 1167 // The first symbol in the object is after this reference, the best we can 1168 // do is section-relative notation. 1169 RelocSection.getName(Name); 1170 Addend = Addr - SectionAddr; 1171 return; 1172 } 1173 1174 // Go back one so that SymbolAddress <= Addr. 1175 --Sym; 1176 1177 section_iterator SymSection = Obj->section_end(); 1178 Sym->second.getSection(SymSection); 1179 if (RelocSection == *SymSection) { 1180 // There's a valid symbol in the same section before this reference. 1181 Sym->second.getName(Name); 1182 Addend = Addr - Sym->first; 1183 return; 1184 } 1185 1186 // There is a symbol before this reference, but it's in a different 1187 // section. Probably not helpful to mention it, so use the section name. 1188 RelocSection.getName(Name); 1189 Addend = Addr - SectionAddr; 1190 } 1191 1192 static void printUnwindRelocDest(const MachOObjectFile *Obj, 1193 std::map<uint64_t, SymbolRef> &Symbols, 1194 const RelocationRef &Reloc, 1195 uint64_t Addr) { 1196 StringRef Name; 1197 uint64_t Addend; 1198 1199 if (!Reloc.getObjectFile()) 1200 return; 1201 1202 findUnwindRelocNameAddend(Obj, Symbols, Reloc, Addr, Name, Addend); 1203 1204 outs() << Name; 1205 if (Addend) 1206 outs() << " + " << format("0x%" PRIx64, Addend); 1207 } 1208 1209 static void 1210 printMachOCompactUnwindSection(const MachOObjectFile *Obj, 1211 std::map<uint64_t, SymbolRef> &Symbols, 1212 const SectionRef &CompactUnwind) { 1213 1214 assert(Obj->isLittleEndian() && 1215 "There should not be a big-endian .o with __compact_unwind"); 1216 1217 bool Is64 = Obj->is64Bit(); 1218 uint32_t PointerSize = Is64 ? sizeof(uint64_t) : sizeof(uint32_t); 1219 uint32_t EntrySize = 3 * PointerSize + 2 * sizeof(uint32_t); 1220 1221 StringRef Contents; 1222 CompactUnwind.getContents(Contents); 1223 1224 SmallVector<CompactUnwindEntry, 4> CompactUnwinds; 1225 1226 // First populate the initial raw offsets, encodings and so on from the entry. 1227 for (unsigned Offset = 0; Offset < Contents.size(); Offset += EntrySize) { 1228 CompactUnwindEntry Entry(Contents.data(), Offset, Is64); 1229 CompactUnwinds.push_back(Entry); 1230 } 1231 1232 // Next we need to look at the relocations to find out what objects are 1233 // actually being referred to. 1234 for (const RelocationRef &Reloc : CompactUnwind.relocations()) { 1235 uint64_t RelocAddress; 1236 Reloc.getOffset(RelocAddress); 1237 1238 uint32_t EntryIdx = RelocAddress / EntrySize; 1239 uint32_t OffsetInEntry = RelocAddress - EntryIdx * EntrySize; 1240 CompactUnwindEntry &Entry = CompactUnwinds[EntryIdx]; 1241 1242 if (OffsetInEntry == 0) 1243 Entry.FunctionReloc = Reloc; 1244 else if (OffsetInEntry == PointerSize + 2 * sizeof(uint32_t)) 1245 Entry.PersonalityReloc = Reloc; 1246 else if (OffsetInEntry == 2 * PointerSize + 2 * sizeof(uint32_t)) 1247 Entry.LSDAReloc = Reloc; 1248 else 1249 llvm_unreachable("Unexpected relocation in __compact_unwind section"); 1250 } 1251 1252 // Finally, we're ready to print the data we've gathered. 1253 outs() << "Contents of __compact_unwind section:\n"; 1254 for (auto &Entry : CompactUnwinds) { 1255 outs() << " Entry at offset " 1256 << format("0x%" PRIx32, Entry.OffsetInSection) << ":\n"; 1257 1258 // 1. Start of the region this entry applies to. 1259 outs() << " start: " 1260 << format("0x%" PRIx64, Entry.FunctionAddr) << ' '; 1261 printUnwindRelocDest(Obj, Symbols, Entry.FunctionReloc, 1262 Entry.FunctionAddr); 1263 outs() << '\n'; 1264 1265 // 2. Length of the region this entry applies to. 1266 outs() << " length: " 1267 << format("0x%" PRIx32, Entry.Length) << '\n'; 1268 // 3. The 32-bit compact encoding. 1269 outs() << " compact encoding: " 1270 << format("0x%08" PRIx32, Entry.CompactEncoding) << '\n'; 1271 1272 // 4. The personality function, if present. 1273 if (Entry.PersonalityReloc.getObjectFile()) { 1274 outs() << " personality function: " 1275 << format("0x%" PRIx64, Entry.PersonalityAddr) << ' '; 1276 printUnwindRelocDest(Obj, Symbols, Entry.PersonalityReloc, 1277 Entry.PersonalityAddr); 1278 outs() << '\n'; 1279 } 1280 1281 // 5. This entry's language-specific data area. 1282 if (Entry.LSDAReloc.getObjectFile()) { 1283 outs() << " LSDA: " 1284 << format("0x%" PRIx64, Entry.LSDAAddr) << ' '; 1285 printUnwindRelocDest(Obj, Symbols, Entry.LSDAReloc, Entry.LSDAAddr); 1286 outs() << '\n'; 1287 } 1288 } 1289 } 1290 1291 //===----------------------------------------------------------------------===// 1292 // __unwind_info section dumping 1293 //===----------------------------------------------------------------------===// 1294 1295 static void printRegularSecondLevelUnwindPage(const char *PageStart) { 1296 const char *Pos = PageStart; 1297 uint32_t Kind = readNext<uint32_t>(Pos); 1298 (void)Kind; 1299 assert(Kind == 2 && "kind for a regular 2nd level index should be 2"); 1300 1301 uint16_t EntriesStart = readNext<uint16_t>(Pos); 1302 uint16_t NumEntries = readNext<uint16_t>(Pos); 1303 1304 Pos = PageStart + EntriesStart; 1305 for (unsigned i = 0; i < NumEntries; ++i) { 1306 uint32_t FunctionOffset = readNext<uint32_t>(Pos); 1307 uint32_t Encoding = readNext<uint32_t>(Pos); 1308 1309 outs() << " [" << i << "]: " 1310 << "function offset=" 1311 << format("0x%08" PRIx32, FunctionOffset) << ", " 1312 << "encoding=" 1313 << format("0x%08" PRIx32, Encoding) 1314 << '\n'; 1315 } 1316 } 1317 1318 static void printCompressedSecondLevelUnwindPage( 1319 const char *PageStart, uint32_t FunctionBase, 1320 const SmallVectorImpl<uint32_t> &CommonEncodings) { 1321 const char *Pos = PageStart; 1322 uint32_t Kind = readNext<uint32_t>(Pos); 1323 (void)Kind; 1324 assert(Kind == 3 && "kind for a compressed 2nd level index should be 3"); 1325 1326 uint16_t EntriesStart = readNext<uint16_t>(Pos); 1327 uint16_t NumEntries = readNext<uint16_t>(Pos); 1328 1329 uint16_t EncodingsStart = readNext<uint16_t>(Pos); 1330 readNext<uint16_t>(Pos); 1331 const auto *PageEncodings = reinterpret_cast<const support::ulittle32_t *>( 1332 PageStart + EncodingsStart); 1333 1334 Pos = PageStart + EntriesStart; 1335 for (unsigned i = 0; i < NumEntries; ++i) { 1336 uint32_t Entry = readNext<uint32_t>(Pos); 1337 uint32_t FunctionOffset = FunctionBase + (Entry & 0xffffff); 1338 uint32_t EncodingIdx = Entry >> 24; 1339 1340 uint32_t Encoding; 1341 if (EncodingIdx < CommonEncodings.size()) 1342 Encoding = CommonEncodings[EncodingIdx]; 1343 else 1344 Encoding = PageEncodings[EncodingIdx - CommonEncodings.size()]; 1345 1346 outs() << " [" << i << "]: " 1347 << "function offset=" 1348 << format("0x%08" PRIx32, FunctionOffset) << ", " 1349 << "encoding[" << EncodingIdx << "]=" 1350 << format("0x%08" PRIx32, Encoding) 1351 << '\n'; 1352 } 1353 } 1354 1355 static void 1356 printMachOUnwindInfoSection(const MachOObjectFile *Obj, 1357 std::map<uint64_t, SymbolRef> &Symbols, 1358 const SectionRef &UnwindInfo) { 1359 1360 assert(Obj->isLittleEndian() && 1361 "There should not be a big-endian .o with __unwind_info"); 1362 1363 outs() << "Contents of __unwind_info section:\n"; 1364 1365 StringRef Contents; 1366 UnwindInfo.getContents(Contents); 1367 const char *Pos = Contents.data(); 1368 1369 //===---------------------------------- 1370 // Section header 1371 //===---------------------------------- 1372 1373 uint32_t Version = readNext<uint32_t>(Pos); 1374 outs() << " Version: " 1375 << format("0x%" PRIx32, Version) << '\n'; 1376 assert(Version == 1 && "only understand version 1"); 1377 1378 uint32_t CommonEncodingsStart = readNext<uint32_t>(Pos); 1379 outs() << " Common encodings array section offset: " 1380 << format("0x%" PRIx32, CommonEncodingsStart) << '\n'; 1381 uint32_t NumCommonEncodings = readNext<uint32_t>(Pos); 1382 outs() << " Number of common encodings in array: " 1383 << format("0x%" PRIx32, NumCommonEncodings) << '\n'; 1384 1385 uint32_t PersonalitiesStart = readNext<uint32_t>(Pos); 1386 outs() << " Personality function array section offset: " 1387 << format("0x%" PRIx32, PersonalitiesStart) << '\n'; 1388 uint32_t NumPersonalities = readNext<uint32_t>(Pos); 1389 outs() << " Number of personality functions in array: " 1390 << format("0x%" PRIx32, NumPersonalities) << '\n'; 1391 1392 uint32_t IndicesStart = readNext<uint32_t>(Pos); 1393 outs() << " Index array section offset: " 1394 << format("0x%" PRIx32, IndicesStart) << '\n'; 1395 uint32_t NumIndices = readNext<uint32_t>(Pos); 1396 outs() << " Number of indices in array: " 1397 << format("0x%" PRIx32, NumIndices) << '\n'; 1398 1399 //===---------------------------------- 1400 // A shared list of common encodings 1401 //===---------------------------------- 1402 1403 // These occupy indices in the range [0, N] whenever an encoding is referenced 1404 // from a compressed 2nd level index table. In practice the linker only 1405 // creates ~128 of these, so that indices are available to embed encodings in 1406 // the 2nd level index. 1407 1408 SmallVector<uint32_t, 64> CommonEncodings; 1409 outs() << " Common encodings: (count = " << NumCommonEncodings << ")\n"; 1410 Pos = Contents.data() + CommonEncodingsStart; 1411 for (unsigned i = 0; i < NumCommonEncodings; ++i) { 1412 uint32_t Encoding = readNext<uint32_t>(Pos); 1413 CommonEncodings.push_back(Encoding); 1414 1415 outs() << " encoding[" << i << "]: " << format("0x%08" PRIx32, Encoding) 1416 << '\n'; 1417 } 1418 1419 1420 //===---------------------------------- 1421 // Personality functions used in this executable 1422 //===---------------------------------- 1423 1424 // There should be only a handful of these (one per source language, 1425 // roughly). Particularly since they only get 2 bits in the compact encoding. 1426 1427 outs() << " Personality functions: (count = " << NumPersonalities << ")\n"; 1428 Pos = Contents.data() + PersonalitiesStart; 1429 for (unsigned i = 0; i < NumPersonalities; ++i) { 1430 uint32_t PersonalityFn = readNext<uint32_t>(Pos); 1431 outs() << " personality[" << i + 1 1432 << "]: " << format("0x%08" PRIx32, PersonalityFn) << '\n'; 1433 } 1434 1435 //===---------------------------------- 1436 // The level 1 index entries 1437 //===---------------------------------- 1438 1439 // These specify an approximate place to start searching for the more detailed 1440 // information, sorted by PC. 1441 1442 struct IndexEntry { 1443 uint32_t FunctionOffset; 1444 uint32_t SecondLevelPageStart; 1445 uint32_t LSDAStart; 1446 }; 1447 1448 SmallVector<IndexEntry, 4> IndexEntries; 1449 1450 outs() << " Top level indices: (count = " << NumIndices << ")\n"; 1451 Pos = Contents.data() + IndicesStart; 1452 for (unsigned i = 0; i < NumIndices; ++i) { 1453 IndexEntry Entry; 1454 1455 Entry.FunctionOffset = readNext<uint32_t>(Pos); 1456 Entry.SecondLevelPageStart = readNext<uint32_t>(Pos); 1457 Entry.LSDAStart = readNext<uint32_t>(Pos); 1458 IndexEntries.push_back(Entry); 1459 1460 outs() << " [" << i << "]: " 1461 << "function offset=" 1462 << format("0x%08" PRIx32, Entry.FunctionOffset) << ", " 1463 << "2nd level page offset=" 1464 << format("0x%08" PRIx32, Entry.SecondLevelPageStart) << ", " 1465 << "LSDA offset=" 1466 << format("0x%08" PRIx32, Entry.LSDAStart) << '\n'; 1467 } 1468 1469 1470 //===---------------------------------- 1471 // Next come the LSDA tables 1472 //===---------------------------------- 1473 1474 // The LSDA layout is rather implicit: it's a contiguous array of entries from 1475 // the first top-level index's LSDAOffset to the last (sentinel). 1476 1477 outs() << " LSDA descriptors:\n"; 1478 Pos = Contents.data() + IndexEntries[0].LSDAStart; 1479 int NumLSDAs = (IndexEntries.back().LSDAStart - IndexEntries[0].LSDAStart) / 1480 (2 * sizeof(uint32_t)); 1481 for (int i = 0; i < NumLSDAs; ++i) { 1482 uint32_t FunctionOffset = readNext<uint32_t>(Pos); 1483 uint32_t LSDAOffset = readNext<uint32_t>(Pos); 1484 outs() << " [" << i << "]: " 1485 << "function offset=" 1486 << format("0x%08" PRIx32, FunctionOffset) << ", " 1487 << "LSDA offset=" 1488 << format("0x%08" PRIx32, LSDAOffset) << '\n'; 1489 } 1490 1491 //===---------------------------------- 1492 // Finally, the 2nd level indices 1493 //===---------------------------------- 1494 1495 // Generally these are 4K in size, and have 2 possible forms: 1496 // + Regular stores up to 511 entries with disparate encodings 1497 // + Compressed stores up to 1021 entries if few enough compact encoding 1498 // values are used. 1499 outs() << " Second level indices:\n"; 1500 for (unsigned i = 0; i < IndexEntries.size() - 1; ++i) { 1501 // The final sentinel top-level index has no associated 2nd level page 1502 if (IndexEntries[i].SecondLevelPageStart == 0) 1503 break; 1504 1505 outs() << " Second level index[" << i << "]: " 1506 << "offset in section=" 1507 << format("0x%08" PRIx32, IndexEntries[i].SecondLevelPageStart) 1508 << ", " 1509 << "base function offset=" 1510 << format("0x%08" PRIx32, IndexEntries[i].FunctionOffset) << '\n'; 1511 1512 Pos = Contents.data() + IndexEntries[i].SecondLevelPageStart; 1513 uint32_t Kind = *reinterpret_cast<const support::ulittle32_t *>(Pos); 1514 if (Kind == 2) 1515 printRegularSecondLevelUnwindPage(Pos); 1516 else if (Kind == 3) 1517 printCompressedSecondLevelUnwindPage(Pos, IndexEntries[i].FunctionOffset, 1518 CommonEncodings); 1519 else 1520 llvm_unreachable("Do not know how to print this kind of 2nd level page"); 1521 1522 } 1523 } 1524 1525 void llvm::printMachOUnwindInfo(const MachOObjectFile *Obj) { 1526 std::map<uint64_t, SymbolRef> Symbols; 1527 for (const SymbolRef &SymRef : Obj->symbols()) { 1528 // Discard any undefined or absolute symbols. They're not going to take part 1529 // in the convenience lookup for unwind info and just take up resources. 1530 section_iterator Section = Obj->section_end(); 1531 SymRef.getSection(Section); 1532 if (Section == Obj->section_end()) 1533 continue; 1534 1535 uint64_t Addr; 1536 SymRef.getAddress(Addr); 1537 Symbols.insert(std::make_pair(Addr, SymRef)); 1538 } 1539 1540 for (const SectionRef &Section : Obj->sections()) { 1541 StringRef SectName; 1542 Section.getName(SectName); 1543 if (SectName == "__compact_unwind") 1544 printMachOCompactUnwindSection(Obj, Symbols, Section); 1545 else if (SectName == "__unwind_info") 1546 printMachOUnwindInfoSection(Obj, Symbols, Section); 1547 else if (SectName == "__eh_frame") 1548 outs() << "llvm-objdump: warning: unhandled __eh_frame section\n"; 1549 1550 } 1551 } 1552 1553 static void PrintMachHeader(uint32_t magic, uint32_t cputype, 1554 uint32_t cpusubtype, uint32_t filetype, 1555 uint32_t ncmds, uint32_t sizeofcmds, uint32_t flags, 1556 bool verbose) { 1557 outs() << "Mach header\n"; 1558 outs() << " magic cputype cpusubtype caps filetype ncmds " 1559 "sizeofcmds flags\n"; 1560 if (verbose) { 1561 if (magic == MachO::MH_MAGIC) 1562 outs() << " MH_MAGIC"; 1563 else if (magic == MachO::MH_MAGIC_64) 1564 outs() << "MH_MAGIC_64"; 1565 else 1566 outs() << format(" 0x%08" PRIx32, magic); 1567 switch (cputype) { 1568 case MachO::CPU_TYPE_I386: 1569 outs() << " I386"; 1570 switch (cpusubtype & ~MachO::CPU_SUBTYPE_MASK) { 1571 case MachO::CPU_SUBTYPE_I386_ALL: 1572 outs() << " ALL"; 1573 break; 1574 default: 1575 outs() << format(" %10d", cpusubtype & ~MachO::CPU_SUBTYPE_MASK); 1576 break; 1577 } 1578 break; 1579 case MachO::CPU_TYPE_X86_64: 1580 outs() << " X86_64"; 1581 case MachO::CPU_SUBTYPE_X86_64_ALL: 1582 outs() << " ALL"; 1583 break; 1584 case MachO::CPU_SUBTYPE_X86_64_H: 1585 outs() << " Haswell"; 1586 outs() << format(" %10d", cpusubtype & ~MachO::CPU_SUBTYPE_MASK); 1587 break; 1588 case MachO::CPU_TYPE_ARM: 1589 outs() << " ARM"; 1590 switch (cpusubtype & ~MachO::CPU_SUBTYPE_MASK) { 1591 case MachO::CPU_SUBTYPE_ARM_ALL: 1592 outs() << " ALL"; 1593 break; 1594 case MachO::CPU_SUBTYPE_ARM_V4T: 1595 outs() << " V4T"; 1596 break; 1597 case MachO::CPU_SUBTYPE_ARM_V5TEJ: 1598 outs() << " V5TEJ"; 1599 break; 1600 case MachO::CPU_SUBTYPE_ARM_XSCALE: 1601 outs() << " XSCALE"; 1602 break; 1603 case MachO::CPU_SUBTYPE_ARM_V6: 1604 outs() << " V6"; 1605 break; 1606 case MachO::CPU_SUBTYPE_ARM_V6M: 1607 outs() << " V6M"; 1608 break; 1609 case MachO::CPU_SUBTYPE_ARM_V7: 1610 outs() << " V7"; 1611 break; 1612 case MachO::CPU_SUBTYPE_ARM_V7EM: 1613 outs() << " V7EM"; 1614 break; 1615 case MachO::CPU_SUBTYPE_ARM_V7K: 1616 outs() << " V7K"; 1617 break; 1618 case MachO::CPU_SUBTYPE_ARM_V7M: 1619 outs() << " V7M"; 1620 break; 1621 case MachO::CPU_SUBTYPE_ARM_V7S: 1622 outs() << " V7S"; 1623 break; 1624 default: 1625 outs() << format(" %10d", cpusubtype & ~MachO::CPU_SUBTYPE_MASK); 1626 break; 1627 } 1628 break; 1629 case MachO::CPU_TYPE_ARM64: 1630 outs() << " ARM64"; 1631 switch (cpusubtype & ~MachO::CPU_SUBTYPE_MASK) { 1632 case MachO::CPU_SUBTYPE_ARM64_ALL: 1633 outs() << " ALL"; 1634 break; 1635 default: 1636 outs() << format(" %10d", cpusubtype & ~MachO::CPU_SUBTYPE_MASK); 1637 break; 1638 } 1639 break; 1640 case MachO::CPU_TYPE_POWERPC: 1641 outs() << " PPC"; 1642 switch (cpusubtype & ~MachO::CPU_SUBTYPE_MASK) { 1643 case MachO::CPU_SUBTYPE_POWERPC_ALL: 1644 outs() << " ALL"; 1645 break; 1646 default: 1647 outs() << format(" %10d", cpusubtype & ~MachO::CPU_SUBTYPE_MASK); 1648 break; 1649 } 1650 break; 1651 case MachO::CPU_TYPE_POWERPC64: 1652 outs() << " PPC64"; 1653 switch (cpusubtype & ~MachO::CPU_SUBTYPE_MASK) { 1654 case MachO::CPU_SUBTYPE_POWERPC_ALL: 1655 outs() << " ALL"; 1656 break; 1657 default: 1658 outs() << format(" %10d", cpusubtype & ~MachO::CPU_SUBTYPE_MASK); 1659 break; 1660 } 1661 break; 1662 } 1663 if ((cpusubtype & MachO::CPU_SUBTYPE_MASK) == MachO::CPU_SUBTYPE_LIB64) { 1664 outs() << " LIB64"; 1665 } else { 1666 outs() << format(" 0x%02" PRIx32, 1667 (cpusubtype & MachO::CPU_SUBTYPE_MASK) >> 24); 1668 } 1669 switch (filetype) { 1670 case MachO::MH_OBJECT: 1671 outs() << " OBJECT"; 1672 break; 1673 case MachO::MH_EXECUTE: 1674 outs() << " EXECUTE"; 1675 break; 1676 case MachO::MH_FVMLIB: 1677 outs() << " FVMLIB"; 1678 break; 1679 case MachO::MH_CORE: 1680 outs() << " CORE"; 1681 break; 1682 case MachO::MH_PRELOAD: 1683 outs() << " PRELOAD"; 1684 break; 1685 case MachO::MH_DYLIB: 1686 outs() << " DYLIB"; 1687 break; 1688 case MachO::MH_DYLIB_STUB: 1689 outs() << " DYLIB_STUB"; 1690 break; 1691 case MachO::MH_DYLINKER: 1692 outs() << " DYLINKER"; 1693 break; 1694 case MachO::MH_BUNDLE: 1695 outs() << " BUNDLE"; 1696 break; 1697 case MachO::MH_DSYM: 1698 outs() << " DSYM"; 1699 break; 1700 case MachO::MH_KEXT_BUNDLE: 1701 outs() << " KEXTBUNDLE"; 1702 break; 1703 default: 1704 outs() << format(" %10u", filetype); 1705 break; 1706 } 1707 outs() << format(" %5u", ncmds); 1708 outs() << format(" %10u", sizeofcmds); 1709 uint32_t f = flags; 1710 if (f & MachO::MH_NOUNDEFS) { 1711 outs() << " NOUNDEFS"; 1712 f &= ~MachO::MH_NOUNDEFS; 1713 } 1714 if (f & MachO::MH_INCRLINK) { 1715 outs() << " INCRLINK"; 1716 f &= ~MachO::MH_INCRLINK; 1717 } 1718 if (f & MachO::MH_DYLDLINK) { 1719 outs() << " DYLDLINK"; 1720 f &= ~MachO::MH_DYLDLINK; 1721 } 1722 if (f & MachO::MH_BINDATLOAD) { 1723 outs() << " BINDATLOAD"; 1724 f &= ~MachO::MH_BINDATLOAD; 1725 } 1726 if (f & MachO::MH_PREBOUND) { 1727 outs() << " PREBOUND"; 1728 f &= ~MachO::MH_PREBOUND; 1729 } 1730 if (f & MachO::MH_SPLIT_SEGS) { 1731 outs() << " SPLIT_SEGS"; 1732 f &= ~MachO::MH_SPLIT_SEGS; 1733 } 1734 if (f & MachO::MH_LAZY_INIT) { 1735 outs() << " LAZY_INIT"; 1736 f &= ~MachO::MH_LAZY_INIT; 1737 } 1738 if (f & MachO::MH_TWOLEVEL) { 1739 outs() << " TWOLEVEL"; 1740 f &= ~MachO::MH_TWOLEVEL; 1741 } 1742 if (f & MachO::MH_FORCE_FLAT) { 1743 outs() << " FORCE_FLAT"; 1744 f &= ~MachO::MH_FORCE_FLAT; 1745 } 1746 if (f & MachO::MH_NOMULTIDEFS) { 1747 outs() << " NOMULTIDEFS"; 1748 f &= ~MachO::MH_NOMULTIDEFS; 1749 } 1750 if (f & MachO::MH_NOFIXPREBINDING) { 1751 outs() << " NOFIXPREBINDING"; 1752 f &= ~MachO::MH_NOFIXPREBINDING; 1753 } 1754 if (f & MachO::MH_PREBINDABLE) { 1755 outs() << " PREBINDABLE"; 1756 f &= ~MachO::MH_PREBINDABLE; 1757 } 1758 if (f & MachO::MH_ALLMODSBOUND) { 1759 outs() << " ALLMODSBOUND"; 1760 f &= ~MachO::MH_ALLMODSBOUND; 1761 } 1762 if (f & MachO::MH_SUBSECTIONS_VIA_SYMBOLS) { 1763 outs() << " SUBSECTIONS_VIA_SYMBOLS"; 1764 f &= ~MachO::MH_SUBSECTIONS_VIA_SYMBOLS; 1765 } 1766 if (f & MachO::MH_CANONICAL) { 1767 outs() << " CANONICAL"; 1768 f &= ~MachO::MH_CANONICAL; 1769 } 1770 if (f & MachO::MH_WEAK_DEFINES) { 1771 outs() << " WEAK_DEFINES"; 1772 f &= ~MachO::MH_WEAK_DEFINES; 1773 } 1774 if (f & MachO::MH_BINDS_TO_WEAK) { 1775 outs() << " BINDS_TO_WEAK"; 1776 f &= ~MachO::MH_BINDS_TO_WEAK; 1777 } 1778 if (f & MachO::MH_ALLOW_STACK_EXECUTION) { 1779 outs() << " ALLOW_STACK_EXECUTION"; 1780 f &= ~MachO::MH_ALLOW_STACK_EXECUTION; 1781 } 1782 if (f & MachO::MH_DEAD_STRIPPABLE_DYLIB) { 1783 outs() << " DEAD_STRIPPABLE_DYLIB"; 1784 f &= ~MachO::MH_DEAD_STRIPPABLE_DYLIB; 1785 } 1786 if (f & MachO::MH_PIE) { 1787 outs() << " PIE"; 1788 f &= ~MachO::MH_PIE; 1789 } 1790 if (f & MachO::MH_NO_REEXPORTED_DYLIBS) { 1791 outs() << " NO_REEXPORTED_DYLIBS"; 1792 f &= ~MachO::MH_NO_REEXPORTED_DYLIBS; 1793 } 1794 if (f & MachO::MH_HAS_TLV_DESCRIPTORS) { 1795 outs() << " MH_HAS_TLV_DESCRIPTORS"; 1796 f &= ~MachO::MH_HAS_TLV_DESCRIPTORS; 1797 } 1798 if (f & MachO::MH_NO_HEAP_EXECUTION) { 1799 outs() << " MH_NO_HEAP_EXECUTION"; 1800 f &= ~MachO::MH_NO_HEAP_EXECUTION; 1801 } 1802 if (f & MachO::MH_APP_EXTENSION_SAFE) { 1803 outs() << " APP_EXTENSION_SAFE"; 1804 f &= ~MachO::MH_APP_EXTENSION_SAFE; 1805 } 1806 if (f != 0 || flags == 0) 1807 outs() << format(" 0x%08" PRIx32, f); 1808 } else { 1809 outs() << format(" 0x%08" PRIx32, magic); 1810 outs() << format(" %7d", cputype); 1811 outs() << format(" %10d", cpusubtype & ~MachO::CPU_SUBTYPE_MASK); 1812 outs() << format(" 0x%02" PRIx32, 1813 (cpusubtype & MachO::CPU_SUBTYPE_MASK) >> 24); 1814 outs() << format(" %10u", filetype); 1815 outs() << format(" %5u", ncmds); 1816 outs() << format(" %10u", sizeofcmds); 1817 outs() << format(" 0x%08" PRIx32, flags); 1818 } 1819 outs() << "\n"; 1820 } 1821 1822 static void PrintSegmentCommand(uint32_t cmd, uint32_t cmdsize, 1823 StringRef SegName, uint64_t vmaddr, 1824 uint64_t vmsize, uint64_t fileoff, 1825 uint64_t filesize, uint32_t maxprot, 1826 uint32_t initprot, uint32_t nsects, 1827 uint32_t flags, uint32_t object_size, 1828 bool verbose) { 1829 uint64_t expected_cmdsize; 1830 if (cmd == MachO::LC_SEGMENT) { 1831 outs() << " cmd LC_SEGMENT\n"; 1832 expected_cmdsize = nsects; 1833 expected_cmdsize *= sizeof(struct MachO::section); 1834 expected_cmdsize += sizeof(struct MachO::segment_command); 1835 } else { 1836 outs() << " cmd LC_SEGMENT_64\n"; 1837 expected_cmdsize = nsects; 1838 expected_cmdsize *= sizeof(struct MachO::section_64); 1839 expected_cmdsize += sizeof(struct MachO::segment_command_64); 1840 } 1841 outs() << " cmdsize " << cmdsize; 1842 if (cmdsize != expected_cmdsize) 1843 outs() << " Inconsistent size\n"; 1844 else 1845 outs() << "\n"; 1846 outs() << " segname " << SegName << "\n"; 1847 if (cmd == MachO::LC_SEGMENT_64) { 1848 outs() << " vmaddr " << format("0x%016" PRIx64, vmaddr) << "\n"; 1849 outs() << " vmsize " << format("0x%016" PRIx64, vmsize) << "\n"; 1850 } else { 1851 outs() << " vmaddr " << format("0x%08" PRIx32, vmaddr) << "\n"; 1852 outs() << " vmsize " << format("0x%08" PRIx32, vmsize) << "\n"; 1853 } 1854 outs() << " fileoff " << fileoff; 1855 if (fileoff > object_size) 1856 outs() << " (past end of file)\n"; 1857 else 1858 outs() << "\n"; 1859 outs() << " filesize " << filesize; 1860 if (fileoff + filesize > object_size) 1861 outs() << " (past end of file)\n"; 1862 else 1863 outs() << "\n"; 1864 if (verbose) { 1865 if ((maxprot & 1866 ~(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | 1867 MachO::VM_PROT_EXECUTE)) != 0) 1868 outs() << " maxprot ?" << format("0x%08" PRIx32, maxprot) << "\n"; 1869 else { 1870 if (maxprot & MachO::VM_PROT_READ) 1871 outs() << " maxprot r"; 1872 else 1873 outs() << " maxprot -"; 1874 if (maxprot & MachO::VM_PROT_WRITE) 1875 outs() << "w"; 1876 else 1877 outs() << "-"; 1878 if (maxprot & MachO::VM_PROT_EXECUTE) 1879 outs() << "x\n"; 1880 else 1881 outs() << "-\n"; 1882 } 1883 if ((initprot & 1884 ~(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | 1885 MachO::VM_PROT_EXECUTE)) != 0) 1886 outs() << " initprot ?" << format("0x%08" PRIx32, initprot) << "\n"; 1887 else { 1888 if (initprot & MachO::VM_PROT_READ) 1889 outs() << " initprot r"; 1890 else 1891 outs() << " initprot -"; 1892 if (initprot & MachO::VM_PROT_WRITE) 1893 outs() << "w"; 1894 else 1895 outs() << "-"; 1896 if (initprot & MachO::VM_PROT_EXECUTE) 1897 outs() << "x\n"; 1898 else 1899 outs() << "-\n"; 1900 } 1901 } else { 1902 outs() << " maxprot " << format("0x%08" PRIx32, maxprot) << "\n"; 1903 outs() << " initprot " << format("0x%08" PRIx32, initprot) << "\n"; 1904 } 1905 outs() << " nsects " << nsects << "\n"; 1906 if (verbose) { 1907 outs() << " flags"; 1908 if (flags == 0) 1909 outs() << " (none)\n"; 1910 else { 1911 if (flags & MachO::SG_HIGHVM) { 1912 outs() << " HIGHVM"; 1913 flags &= ~MachO::SG_HIGHVM; 1914 } 1915 if (flags & MachO::SG_FVMLIB) { 1916 outs() << " FVMLIB"; 1917 flags &= ~MachO::SG_FVMLIB; 1918 } 1919 if (flags & MachO::SG_NORELOC) { 1920 outs() << " NORELOC"; 1921 flags &= ~MachO::SG_NORELOC; 1922 } 1923 if (flags & MachO::SG_PROTECTED_VERSION_1) { 1924 outs() << " PROTECTED_VERSION_1"; 1925 flags &= ~MachO::SG_PROTECTED_VERSION_1; 1926 } 1927 if (flags) 1928 outs() << format(" 0x%08" PRIx32, flags) << " (unknown flags)\n"; 1929 else 1930 outs() << "\n"; 1931 } 1932 } else { 1933 outs() << " flags " << format("0x%" PRIx32, flags) << "\n"; 1934 } 1935 } 1936 1937 static void PrintSection(const char *sectname, const char *segname, 1938 uint64_t addr, uint64_t size, uint32_t offset, 1939 uint32_t align, uint32_t reloff, uint32_t nreloc, 1940 uint32_t flags, uint32_t reserved1, uint32_t reserved2, 1941 uint32_t cmd, const char *sg_segname, 1942 uint32_t filetype, uint32_t object_size, 1943 bool verbose) { 1944 outs() << "Section\n"; 1945 outs() << " sectname " << format("%.16s\n", sectname); 1946 outs() << " segname " << format("%.16s", segname); 1947 if (filetype != MachO::MH_OBJECT && strncmp(sg_segname, segname, 16) != 0) 1948 outs() << " (does not match segment)\n"; 1949 else 1950 outs() << "\n"; 1951 if (cmd == MachO::LC_SEGMENT_64) { 1952 outs() << " addr " << format("0x%016" PRIx64, addr) << "\n"; 1953 outs() << " size " << format("0x%016" PRIx64, size); 1954 } else { 1955 outs() << " addr " << format("0x%08" PRIx32, addr) << "\n"; 1956 outs() << " size " << format("0x%08" PRIx32, size); 1957 } 1958 if ((flags & MachO::S_ZEROFILL) != 0 && offset + size > object_size) 1959 outs() << " (past end of file)\n"; 1960 else 1961 outs() << "\n"; 1962 outs() << " offset " << offset; 1963 if (offset > object_size) 1964 outs() << " (past end of file)\n"; 1965 else 1966 outs() << "\n"; 1967 uint32_t align_shifted = 1 << align; 1968 outs() << " align 2^" << align << " (" << align_shifted << ")\n"; 1969 outs() << " reloff " << reloff; 1970 if (reloff > object_size) 1971 outs() << " (past end of file)\n"; 1972 else 1973 outs() << "\n"; 1974 outs() << " nreloc " << nreloc; 1975 if (reloff + nreloc * sizeof(struct MachO::relocation_info) > object_size) 1976 outs() << " (past end of file)\n"; 1977 else 1978 outs() << "\n"; 1979 uint32_t section_type = flags & MachO::SECTION_TYPE; 1980 if (verbose) { 1981 outs() << " type"; 1982 if (section_type == MachO::S_REGULAR) 1983 outs() << " S_REGULAR\n"; 1984 else if (section_type == MachO::S_ZEROFILL) 1985 outs() << " S_ZEROFILL\n"; 1986 else if (section_type == MachO::S_CSTRING_LITERALS) 1987 outs() << " S_CSTRING_LITERALS\n"; 1988 else if (section_type == MachO::S_4BYTE_LITERALS) 1989 outs() << " S_4BYTE_LITERALS\n"; 1990 else if (section_type == MachO::S_8BYTE_LITERALS) 1991 outs() << " S_8BYTE_LITERALS\n"; 1992 else if (section_type == MachO::S_16BYTE_LITERALS) 1993 outs() << " S_16BYTE_LITERALS\n"; 1994 else if (section_type == MachO::S_LITERAL_POINTERS) 1995 outs() << " S_LITERAL_POINTERS\n"; 1996 else if (section_type == MachO::S_NON_LAZY_SYMBOL_POINTERS) 1997 outs() << " S_NON_LAZY_SYMBOL_POINTERS\n"; 1998 else if (section_type == MachO::S_LAZY_SYMBOL_POINTERS) 1999 outs() << " S_LAZY_SYMBOL_POINTERS\n"; 2000 else if (section_type == MachO::S_SYMBOL_STUBS) 2001 outs() << " S_SYMBOL_STUBS\n"; 2002 else if (section_type == MachO::S_MOD_INIT_FUNC_POINTERS) 2003 outs() << " S_MOD_INIT_FUNC_POINTERS\n"; 2004 else if (section_type == MachO::S_MOD_TERM_FUNC_POINTERS) 2005 outs() << " S_MOD_TERM_FUNC_POINTERS\n"; 2006 else if (section_type == MachO::S_COALESCED) 2007 outs() << " S_COALESCED\n"; 2008 else if (section_type == MachO::S_INTERPOSING) 2009 outs() << " S_INTERPOSING\n"; 2010 else if (section_type == MachO::S_DTRACE_DOF) 2011 outs() << " S_DTRACE_DOF\n"; 2012 else if (section_type == MachO::S_LAZY_DYLIB_SYMBOL_POINTERS) 2013 outs() << " S_LAZY_DYLIB_SYMBOL_POINTERS\n"; 2014 else if (section_type == MachO::S_THREAD_LOCAL_REGULAR) 2015 outs() << " S_THREAD_LOCAL_REGULAR\n"; 2016 else if (section_type == MachO::S_THREAD_LOCAL_ZEROFILL) 2017 outs() << " S_THREAD_LOCAL_ZEROFILL\n"; 2018 else if (section_type == MachO::S_THREAD_LOCAL_VARIABLES) 2019 outs() << " S_THREAD_LOCAL_VARIABLES\n"; 2020 else if (section_type == MachO::S_THREAD_LOCAL_VARIABLE_POINTERS) 2021 outs() << " S_THREAD_LOCAL_VARIABLE_POINTERS\n"; 2022 else if (section_type == MachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS) 2023 outs() << " S_THREAD_LOCAL_INIT_FUNCTION_POINTERS\n"; 2024 else 2025 outs() << format("0x%08" PRIx32, section_type) << "\n"; 2026 outs() << "attributes"; 2027 uint32_t section_attributes = flags & MachO::SECTION_ATTRIBUTES; 2028 if (section_attributes & MachO::S_ATTR_PURE_INSTRUCTIONS) 2029 outs() << " PURE_INSTRUCTIONS"; 2030 if (section_attributes & MachO::S_ATTR_NO_TOC) 2031 outs() << " NO_TOC"; 2032 if (section_attributes & MachO::S_ATTR_STRIP_STATIC_SYMS) 2033 outs() << " STRIP_STATIC_SYMS"; 2034 if (section_attributes & MachO::S_ATTR_NO_DEAD_STRIP) 2035 outs() << " NO_DEAD_STRIP"; 2036 if (section_attributes & MachO::S_ATTR_LIVE_SUPPORT) 2037 outs() << " LIVE_SUPPORT"; 2038 if (section_attributes & MachO::S_ATTR_SELF_MODIFYING_CODE) 2039 outs() << " SELF_MODIFYING_CODE"; 2040 if (section_attributes & MachO::S_ATTR_DEBUG) 2041 outs() << " DEBUG"; 2042 if (section_attributes & MachO::S_ATTR_SOME_INSTRUCTIONS) 2043 outs() << " SOME_INSTRUCTIONS"; 2044 if (section_attributes & MachO::S_ATTR_EXT_RELOC) 2045 outs() << " EXT_RELOC"; 2046 if (section_attributes & MachO::S_ATTR_LOC_RELOC) 2047 outs() << " LOC_RELOC"; 2048 if (section_attributes == 0) 2049 outs() << " (none)"; 2050 outs() << "\n"; 2051 } else 2052 outs() << " flags " << format("0x%08" PRIx32, flags) << "\n"; 2053 outs() << " reserved1 " << reserved1; 2054 if (section_type == MachO::S_SYMBOL_STUBS || 2055 section_type == MachO::S_LAZY_SYMBOL_POINTERS || 2056 section_type == MachO::S_LAZY_DYLIB_SYMBOL_POINTERS || 2057 section_type == MachO::S_NON_LAZY_SYMBOL_POINTERS || 2058 section_type == MachO::S_THREAD_LOCAL_VARIABLE_POINTERS) 2059 outs() << " (index into indirect symbol table)\n"; 2060 else 2061 outs() << "\n"; 2062 outs() << " reserved2 " << reserved2; 2063 if (section_type == MachO::S_SYMBOL_STUBS) 2064 outs() << " (size of stubs)\n"; 2065 else 2066 outs() << "\n"; 2067 } 2068 2069 static void PrintSymtabLoadCommand(MachO::symtab_command st, uint32_t cputype, 2070 uint32_t object_size) { 2071 outs() << " cmd LC_SYMTAB\n"; 2072 outs() << " cmdsize " << st.cmdsize; 2073 if (st.cmdsize != sizeof(struct MachO::symtab_command)) 2074 outs() << " Incorrect size\n"; 2075 else 2076 outs() << "\n"; 2077 outs() << " symoff " << st.symoff; 2078 if (st.symoff > object_size) 2079 outs() << " (past end of file)\n"; 2080 else 2081 outs() << "\n"; 2082 outs() << " nsyms " << st.nsyms; 2083 uint64_t big_size; 2084 if (cputype & MachO::CPU_ARCH_ABI64) { 2085 big_size = st.nsyms; 2086 big_size *= sizeof(struct MachO::nlist_64); 2087 big_size += st.symoff; 2088 if (big_size > object_size) 2089 outs() << " (past end of file)\n"; 2090 else 2091 outs() << "\n"; 2092 } else { 2093 big_size = st.nsyms; 2094 big_size *= sizeof(struct MachO::nlist); 2095 big_size += st.symoff; 2096 if (big_size > object_size) 2097 outs() << " (past end of file)\n"; 2098 else 2099 outs() << "\n"; 2100 } 2101 outs() << " stroff " << st.stroff; 2102 if (st.stroff > object_size) 2103 outs() << " (past end of file)\n"; 2104 else 2105 outs() << "\n"; 2106 outs() << " strsize " << st.strsize; 2107 big_size = st.stroff; 2108 big_size += st.strsize; 2109 if (big_size > object_size) 2110 outs() << " (past end of file)\n"; 2111 else 2112 outs() << "\n"; 2113 } 2114 2115 static void PrintDysymtabLoadCommand(MachO::dysymtab_command dyst, 2116 uint32_t nsyms, uint32_t object_size, 2117 uint32_t cputype) { 2118 outs() << " cmd LC_DYSYMTAB\n"; 2119 outs() << " cmdsize " << dyst.cmdsize; 2120 if (dyst.cmdsize != sizeof(struct MachO::dysymtab_command)) 2121 outs() << " Incorrect size\n"; 2122 else 2123 outs() << "\n"; 2124 outs() << " ilocalsym " << dyst.ilocalsym; 2125 if (dyst.ilocalsym > nsyms) 2126 outs() << " (greater than the number of symbols)\n"; 2127 else 2128 outs() << "\n"; 2129 outs() << " nlocalsym " << dyst.nlocalsym; 2130 uint64_t big_size; 2131 big_size = dyst.ilocalsym; 2132 big_size += dyst.nlocalsym; 2133 if (big_size > nsyms) 2134 outs() << " (past the end of the symbol table)\n"; 2135 else 2136 outs() << "\n"; 2137 outs() << " iextdefsym " << dyst.iextdefsym; 2138 if (dyst.iextdefsym > nsyms) 2139 outs() << " (greater than the number of symbols)\n"; 2140 else 2141 outs() << "\n"; 2142 outs() << " nextdefsym " << dyst.nextdefsym; 2143 big_size = dyst.iextdefsym; 2144 big_size += dyst.nextdefsym; 2145 if (big_size > nsyms) 2146 outs() << " (past the end of the symbol table)\n"; 2147 else 2148 outs() << "\n"; 2149 outs() << " iundefsym " << dyst.iundefsym; 2150 if (dyst.iundefsym > nsyms) 2151 outs() << " (greater than the number of symbols)\n"; 2152 else 2153 outs() << "\n"; 2154 outs() << " nundefsym " << dyst.nundefsym; 2155 big_size = dyst.iundefsym; 2156 big_size += dyst.nundefsym; 2157 if (big_size > nsyms) 2158 outs() << " (past the end of the symbol table)\n"; 2159 else 2160 outs() << "\n"; 2161 outs() << " tocoff " << dyst.tocoff; 2162 if (dyst.tocoff > object_size) 2163 outs() << " (past end of file)\n"; 2164 else 2165 outs() << "\n"; 2166 outs() << " ntoc " << dyst.ntoc; 2167 big_size = dyst.ntoc; 2168 big_size *= sizeof(struct MachO::dylib_table_of_contents); 2169 big_size += dyst.tocoff; 2170 if (big_size > object_size) 2171 outs() << " (past end of file)\n"; 2172 else 2173 outs() << "\n"; 2174 outs() << " modtaboff " << dyst.modtaboff; 2175 if (dyst.modtaboff > object_size) 2176 outs() << " (past end of file)\n"; 2177 else 2178 outs() << "\n"; 2179 outs() << " nmodtab " << dyst.nmodtab; 2180 uint64_t modtabend; 2181 if (cputype & MachO::CPU_ARCH_ABI64) { 2182 modtabend = dyst.nmodtab; 2183 modtabend *= sizeof(struct MachO::dylib_module_64); 2184 modtabend += dyst.modtaboff; 2185 } else { 2186 modtabend = dyst.nmodtab; 2187 modtabend *= sizeof(struct MachO::dylib_module); 2188 modtabend += dyst.modtaboff; 2189 } 2190 if (modtabend > object_size) 2191 outs() << " (past end of file)\n"; 2192 else 2193 outs() << "\n"; 2194 outs() << " extrefsymoff " << dyst.extrefsymoff; 2195 if (dyst.extrefsymoff > object_size) 2196 outs() << " (past end of file)\n"; 2197 else 2198 outs() << "\n"; 2199 outs() << " nextrefsyms " << dyst.nextrefsyms; 2200 big_size = dyst.nextrefsyms; 2201 big_size *= sizeof(struct MachO::dylib_reference); 2202 big_size += dyst.extrefsymoff; 2203 if (big_size > object_size) 2204 outs() << " (past end of file)\n"; 2205 else 2206 outs() << "\n"; 2207 outs() << " indirectsymoff " << dyst.indirectsymoff; 2208 if (dyst.indirectsymoff > object_size) 2209 outs() << " (past end of file)\n"; 2210 else 2211 outs() << "\n"; 2212 outs() << " nindirectsyms " << dyst.nindirectsyms; 2213 big_size = dyst.nindirectsyms; 2214 big_size *= sizeof(uint32_t); 2215 big_size += dyst.indirectsymoff; 2216 if (big_size > object_size) 2217 outs() << " (past end of file)\n"; 2218 else 2219 outs() << "\n"; 2220 outs() << " extreloff " << dyst.extreloff; 2221 if (dyst.extreloff > object_size) 2222 outs() << " (past end of file)\n"; 2223 else 2224 outs() << "\n"; 2225 outs() << " nextrel " << dyst.nextrel; 2226 big_size = dyst.nextrel; 2227 big_size *= sizeof(struct MachO::relocation_info); 2228 big_size += dyst.extreloff; 2229 if (big_size > object_size) 2230 outs() << " (past end of file)\n"; 2231 else 2232 outs() << "\n"; 2233 outs() << " locreloff " << dyst.locreloff; 2234 if (dyst.locreloff > object_size) 2235 outs() << " (past end of file)\n"; 2236 else 2237 outs() << "\n"; 2238 outs() << " nlocrel " << dyst.nlocrel; 2239 big_size = dyst.nlocrel; 2240 big_size *= sizeof(struct MachO::relocation_info); 2241 big_size += dyst.locreloff; 2242 if (big_size > object_size) 2243 outs() << " (past end of file)\n"; 2244 else 2245 outs() << "\n"; 2246 } 2247 2248 static void PrintDyldInfoLoadCommand(MachO::dyld_info_command dc, 2249 uint32_t object_size) { 2250 if (dc.cmd == MachO::LC_DYLD_INFO) 2251 outs() << " cmd LC_DYLD_INFO\n"; 2252 else 2253 outs() << " cmd LC_DYLD_INFO_ONLY\n"; 2254 outs() << " cmdsize " << dc.cmdsize; 2255 if (dc.cmdsize != sizeof(struct MachO::dyld_info_command)) 2256 outs() << " Incorrect size\n"; 2257 else 2258 outs() << "\n"; 2259 outs() << " rebase_off " << dc.rebase_off; 2260 if (dc.rebase_off > object_size) 2261 outs() << " (past end of file)\n"; 2262 else 2263 outs() << "\n"; 2264 outs() << " rebase_size " << dc.rebase_size; 2265 uint64_t big_size; 2266 big_size = dc.rebase_off; 2267 big_size += dc.rebase_size; 2268 if (big_size > object_size) 2269 outs() << " (past end of file)\n"; 2270 else 2271 outs() << "\n"; 2272 outs() << " bind_off " << dc.bind_off; 2273 if (dc.bind_off > object_size) 2274 outs() << " (past end of file)\n"; 2275 else 2276 outs() << "\n"; 2277 outs() << " bind_size " << dc.bind_size; 2278 big_size = dc.bind_off; 2279 big_size += dc.bind_size; 2280 if (big_size > object_size) 2281 outs() << " (past end of file)\n"; 2282 else 2283 outs() << "\n"; 2284 outs() << " weak_bind_off " << dc.weak_bind_off; 2285 if (dc.weak_bind_off > object_size) 2286 outs() << " (past end of file)\n"; 2287 else 2288 outs() << "\n"; 2289 outs() << " weak_bind_size " << dc.weak_bind_size; 2290 big_size = dc.weak_bind_off; 2291 big_size += dc.weak_bind_size; 2292 if (big_size > object_size) 2293 outs() << " (past end of file)\n"; 2294 else 2295 outs() << "\n"; 2296 outs() << " lazy_bind_off " << dc.lazy_bind_off; 2297 if (dc.lazy_bind_off > object_size) 2298 outs() << " (past end of file)\n"; 2299 else 2300 outs() << "\n"; 2301 outs() << " lazy_bind_size " << dc.lazy_bind_size; 2302 big_size = dc.lazy_bind_off; 2303 big_size += dc.lazy_bind_size; 2304 if (big_size > object_size) 2305 outs() << " (past end of file)\n"; 2306 else 2307 outs() << "\n"; 2308 outs() << " export_off " << dc.export_off; 2309 if (dc.export_off > object_size) 2310 outs() << " (past end of file)\n"; 2311 else 2312 outs() << "\n"; 2313 outs() << " export_size " << dc.export_size; 2314 big_size = dc.export_off; 2315 big_size += dc.export_size; 2316 if (big_size > object_size) 2317 outs() << " (past end of file)\n"; 2318 else 2319 outs() << "\n"; 2320 } 2321 2322 static void PrintDyldLoadCommand(MachO::dylinker_command dyld, 2323 const char *Ptr) { 2324 if (dyld.cmd == MachO::LC_ID_DYLINKER) 2325 outs() << " cmd LC_ID_DYLINKER\n"; 2326 else if (dyld.cmd == MachO::LC_LOAD_DYLINKER) 2327 outs() << " cmd LC_LOAD_DYLINKER\n"; 2328 else if (dyld.cmd == MachO::LC_DYLD_ENVIRONMENT) 2329 outs() << " cmd LC_DYLD_ENVIRONMENT\n"; 2330 else 2331 outs() << " cmd ?(" << dyld.cmd << ")\n"; 2332 outs() << " cmdsize " << dyld.cmdsize; 2333 if (dyld.cmdsize < sizeof(struct MachO::dylinker_command)) 2334 outs() << " Incorrect size\n"; 2335 else 2336 outs() << "\n"; 2337 if (dyld.name >= dyld.cmdsize) 2338 outs() << " name ?(bad offset " << dyld.name << ")\n"; 2339 else { 2340 const char *P = (const char *)(Ptr)+dyld.name; 2341 outs() << " name " << P << " (offset " << dyld.name << ")\n"; 2342 } 2343 } 2344 2345 static void PrintUuidLoadCommand(MachO::uuid_command uuid) { 2346 outs() << " cmd LC_UUID\n"; 2347 outs() << " cmdsize " << uuid.cmdsize; 2348 if (uuid.cmdsize != sizeof(struct MachO::uuid_command)) 2349 outs() << " Incorrect size\n"; 2350 else 2351 outs() << "\n"; 2352 outs() << " uuid "; 2353 outs() << format("%02" PRIX32, uuid.uuid[0]); 2354 outs() << format("%02" PRIX32, uuid.uuid[1]); 2355 outs() << format("%02" PRIX32, uuid.uuid[2]); 2356 outs() << format("%02" PRIX32, uuid.uuid[3]); 2357 outs() << "-"; 2358 outs() << format("%02" PRIX32, uuid.uuid[4]); 2359 outs() << format("%02" PRIX32, uuid.uuid[5]); 2360 outs() << "-"; 2361 outs() << format("%02" PRIX32, uuid.uuid[6]); 2362 outs() << format("%02" PRIX32, uuid.uuid[7]); 2363 outs() << "-"; 2364 outs() << format("%02" PRIX32, uuid.uuid[8]); 2365 outs() << format("%02" PRIX32, uuid.uuid[9]); 2366 outs() << "-"; 2367 outs() << format("%02" PRIX32, uuid.uuid[10]); 2368 outs() << format("%02" PRIX32, uuid.uuid[11]); 2369 outs() << format("%02" PRIX32, uuid.uuid[12]); 2370 outs() << format("%02" PRIX32, uuid.uuid[13]); 2371 outs() << format("%02" PRIX32, uuid.uuid[14]); 2372 outs() << format("%02" PRIX32, uuid.uuid[15]); 2373 outs() << "\n"; 2374 } 2375 2376 static void PrintVersionMinLoadCommand(MachO::version_min_command vd) { 2377 if (vd.cmd == MachO::LC_VERSION_MIN_MACOSX) 2378 outs() << " cmd LC_VERSION_MIN_MACOSX\n"; 2379 else if (vd.cmd == MachO::LC_VERSION_MIN_IPHONEOS) 2380 outs() << " cmd LC_VERSION_MIN_IPHONEOS\n"; 2381 else 2382 outs() << " cmd " << vd.cmd << " (?)\n"; 2383 outs() << " cmdsize " << vd.cmdsize; 2384 if (vd.cmdsize != sizeof(struct MachO::version_min_command)) 2385 outs() << " Incorrect size\n"; 2386 else 2387 outs() << "\n"; 2388 outs() << " version " << ((vd.version >> 16) & 0xffff) << "." 2389 << ((vd.version >> 8) & 0xff); 2390 if ((vd.version & 0xff) != 0) 2391 outs() << "." << (vd.version & 0xff); 2392 outs() << "\n"; 2393 if (vd.sdk == 0) 2394 outs() << " sdk n/a\n"; 2395 else { 2396 outs() << " sdk " << ((vd.sdk >> 16) & 0xffff) << "." 2397 << ((vd.sdk >> 8) & 0xff); 2398 } 2399 if ((vd.sdk & 0xff) != 0) 2400 outs() << "." << (vd.sdk & 0xff); 2401 outs() << "\n"; 2402 } 2403 2404 static void PrintSourceVersionCommand(MachO::source_version_command sd) { 2405 outs() << " cmd LC_SOURCE_VERSION\n"; 2406 outs() << " cmdsize " << sd.cmdsize; 2407 if (sd.cmdsize != sizeof(struct MachO::source_version_command)) 2408 outs() << " Incorrect size\n"; 2409 else 2410 outs() << "\n"; 2411 uint64_t a = (sd.version >> 40) & 0xffffff; 2412 uint64_t b = (sd.version >> 30) & 0x3ff; 2413 uint64_t c = (sd.version >> 20) & 0x3ff; 2414 uint64_t d = (sd.version >> 10) & 0x3ff; 2415 uint64_t e = sd.version & 0x3ff; 2416 outs() << " version " << a << "." << b; 2417 if (e != 0) 2418 outs() << "." << c << "." << d << "." << e; 2419 else if (d != 0) 2420 outs() << "." << c << "." << d; 2421 else if (c != 0) 2422 outs() << "." << c; 2423 outs() << "\n"; 2424 } 2425 2426 static void PrintEntryPointCommand(MachO::entry_point_command ep) { 2427 outs() << " cmd LC_MAIN\n"; 2428 outs() << " cmdsize " << ep.cmdsize; 2429 if (ep.cmdsize != sizeof(struct MachO::entry_point_command)) 2430 outs() << " Incorrect size\n"; 2431 else 2432 outs() << "\n"; 2433 outs() << " entryoff " << ep.entryoff << "\n"; 2434 outs() << " stacksize " << ep.stacksize << "\n"; 2435 } 2436 2437 static void PrintDylibCommand(MachO::dylib_command dl, const char *Ptr) { 2438 if (dl.cmd == MachO::LC_ID_DYLIB) 2439 outs() << " cmd LC_ID_DYLIB\n"; 2440 else if (dl.cmd == MachO::LC_LOAD_DYLIB) 2441 outs() << " cmd LC_LOAD_DYLIB\n"; 2442 else if (dl.cmd == MachO::LC_LOAD_WEAK_DYLIB) 2443 outs() << " cmd LC_LOAD_WEAK_DYLIB\n"; 2444 else if (dl.cmd == MachO::LC_REEXPORT_DYLIB) 2445 outs() << " cmd LC_REEXPORT_DYLIB\n"; 2446 else if (dl.cmd == MachO::LC_LAZY_LOAD_DYLIB) 2447 outs() << " cmd LC_LAZY_LOAD_DYLIB\n"; 2448 else if (dl.cmd == MachO::LC_LOAD_UPWARD_DYLIB) 2449 outs() << " cmd LC_LOAD_UPWARD_DYLIB\n"; 2450 else 2451 outs() << " cmd " << dl.cmd << " (unknown)\n"; 2452 outs() << " cmdsize " << dl.cmdsize; 2453 if (dl.cmdsize < sizeof(struct MachO::dylib_command)) 2454 outs() << " Incorrect size\n"; 2455 else 2456 outs() << "\n"; 2457 if (dl.dylib.name < dl.cmdsize) { 2458 const char *P = (const char *)(Ptr)+dl.dylib.name; 2459 outs() << " name " << P << " (offset " << dl.dylib.name << ")\n"; 2460 } else { 2461 outs() << " name ?(bad offset " << dl.dylib.name << ")\n"; 2462 } 2463 outs() << " time stamp " << dl.dylib.timestamp << " "; 2464 time_t t = dl.dylib.timestamp; 2465 outs() << ctime(&t); 2466 outs() << " current version "; 2467 if (dl.dylib.current_version == 0xffffffff) 2468 outs() << "n/a\n"; 2469 else 2470 outs() << ((dl.dylib.current_version >> 16) & 0xffff) << "." 2471 << ((dl.dylib.current_version >> 8) & 0xff) << "." 2472 << (dl.dylib.current_version & 0xff) << "\n"; 2473 outs() << "compatibility version "; 2474 if (dl.dylib.compatibility_version == 0xffffffff) 2475 outs() << "n/a\n"; 2476 else 2477 outs() << ((dl.dylib.compatibility_version >> 16) & 0xffff) << "." 2478 << ((dl.dylib.compatibility_version >> 8) & 0xff) << "." 2479 << (dl.dylib.compatibility_version & 0xff) << "\n"; 2480 } 2481 2482 static void PrintLinkEditDataCommand(MachO::linkedit_data_command ld, 2483 uint32_t object_size) { 2484 if (ld.cmd == MachO::LC_CODE_SIGNATURE) 2485 outs() << " cmd LC_FUNCTION_STARTS\n"; 2486 else if (ld.cmd == MachO::LC_SEGMENT_SPLIT_INFO) 2487 outs() << " cmd LC_SEGMENT_SPLIT_INFO\n"; 2488 else if (ld.cmd == MachO::LC_FUNCTION_STARTS) 2489 outs() << " cmd LC_FUNCTION_STARTS\n"; 2490 else if (ld.cmd == MachO::LC_DATA_IN_CODE) 2491 outs() << " cmd LC_DATA_IN_CODE\n"; 2492 else if (ld.cmd == MachO::LC_DYLIB_CODE_SIGN_DRS) 2493 outs() << " cmd LC_DYLIB_CODE_SIGN_DRS\n"; 2494 else if (ld.cmd == MachO::LC_LINKER_OPTIMIZATION_HINT) 2495 outs() << " cmd LC_LINKER_OPTIMIZATION_HINT\n"; 2496 else 2497 outs() << " cmd " << ld.cmd << " (?)\n"; 2498 outs() << " cmdsize " << ld.cmdsize; 2499 if (ld.cmdsize != sizeof(struct MachO::linkedit_data_command)) 2500 outs() << " Incorrect size\n"; 2501 else 2502 outs() << "\n"; 2503 outs() << " dataoff " << ld.dataoff; 2504 if (ld.dataoff > object_size) 2505 outs() << " (past end of file)\n"; 2506 else 2507 outs() << "\n"; 2508 outs() << " datasize " << ld.datasize; 2509 uint64_t big_size = ld.dataoff; 2510 big_size += ld.datasize; 2511 if (big_size > object_size) 2512 outs() << " (past end of file)\n"; 2513 else 2514 outs() << "\n"; 2515 } 2516 2517 static void PrintLoadCommands(const MachOObjectFile *Obj, uint32_t ncmds, 2518 uint32_t filetype, uint32_t cputype, 2519 bool verbose) { 2520 StringRef Buf = Obj->getData(); 2521 MachOObjectFile::LoadCommandInfo Command = Obj->getFirstLoadCommandInfo(); 2522 for (unsigned i = 0;; ++i) { 2523 outs() << "Load command " << i << "\n"; 2524 if (Command.C.cmd == MachO::LC_SEGMENT) { 2525 MachO::segment_command SLC = Obj->getSegmentLoadCommand(Command); 2526 const char *sg_segname = SLC.segname; 2527 PrintSegmentCommand(SLC.cmd, SLC.cmdsize, SLC.segname, SLC.vmaddr, 2528 SLC.vmsize, SLC.fileoff, SLC.filesize, SLC.maxprot, 2529 SLC.initprot, SLC.nsects, SLC.flags, Buf.size(), 2530 verbose); 2531 for (unsigned j = 0; j < SLC.nsects; j++) { 2532 MachO::section_64 S = Obj->getSection64(Command, j); 2533 PrintSection(S.sectname, S.segname, S.addr, S.size, S.offset, S.align, 2534 S.reloff, S.nreloc, S.flags, S.reserved1, S.reserved2, 2535 SLC.cmd, sg_segname, filetype, Buf.size(), verbose); 2536 } 2537 } else if (Command.C.cmd == MachO::LC_SEGMENT_64) { 2538 MachO::segment_command_64 SLC_64 = Obj->getSegment64LoadCommand(Command); 2539 const char *sg_segname = SLC_64.segname; 2540 PrintSegmentCommand(SLC_64.cmd, SLC_64.cmdsize, SLC_64.segname, 2541 SLC_64.vmaddr, SLC_64.vmsize, SLC_64.fileoff, 2542 SLC_64.filesize, SLC_64.maxprot, SLC_64.initprot, 2543 SLC_64.nsects, SLC_64.flags, Buf.size(), verbose); 2544 for (unsigned j = 0; j < SLC_64.nsects; j++) { 2545 MachO::section_64 S_64 = Obj->getSection64(Command, j); 2546 PrintSection(S_64.sectname, S_64.segname, S_64.addr, S_64.size, 2547 S_64.offset, S_64.align, S_64.reloff, S_64.nreloc, 2548 S_64.flags, S_64.reserved1, S_64.reserved2, SLC_64.cmd, 2549 sg_segname, filetype, Buf.size(), verbose); 2550 } 2551 } else if (Command.C.cmd == MachO::LC_SYMTAB) { 2552 MachO::symtab_command Symtab = Obj->getSymtabLoadCommand(); 2553 PrintSymtabLoadCommand(Symtab, cputype, Buf.size()); 2554 } else if (Command.C.cmd == MachO::LC_DYSYMTAB) { 2555 MachO::dysymtab_command Dysymtab = Obj->getDysymtabLoadCommand(); 2556 MachO::symtab_command Symtab = Obj->getSymtabLoadCommand(); 2557 PrintDysymtabLoadCommand(Dysymtab, Symtab.nsyms, Buf.size(), cputype); 2558 } else if (Command.C.cmd == MachO::LC_DYLD_INFO || 2559 Command.C.cmd == MachO::LC_DYLD_INFO_ONLY) { 2560 MachO::dyld_info_command DyldInfo = Obj->getDyldInfoLoadCommand(Command); 2561 PrintDyldInfoLoadCommand(DyldInfo, Buf.size()); 2562 } else if (Command.C.cmd == MachO::LC_LOAD_DYLINKER || 2563 Command.C.cmd == MachO::LC_ID_DYLINKER || 2564 Command.C.cmd == MachO::LC_DYLD_ENVIRONMENT) { 2565 MachO::dylinker_command Dyld = Obj->getDylinkerCommand(Command); 2566 PrintDyldLoadCommand(Dyld, Command.Ptr); 2567 } else if (Command.C.cmd == MachO::LC_UUID) { 2568 MachO::uuid_command Uuid = Obj->getUuidCommand(Command); 2569 PrintUuidLoadCommand(Uuid); 2570 } else if (Command.C.cmd == MachO::LC_VERSION_MIN_MACOSX) { 2571 MachO::version_min_command Vd = Obj->getVersionMinLoadCommand(Command); 2572 PrintVersionMinLoadCommand(Vd); 2573 } else if (Command.C.cmd == MachO::LC_SOURCE_VERSION) { 2574 MachO::source_version_command Sd = Obj->getSourceVersionCommand(Command); 2575 PrintSourceVersionCommand(Sd); 2576 } else if (Command.C.cmd == MachO::LC_MAIN) { 2577 MachO::entry_point_command Ep = Obj->getEntryPointCommand(Command); 2578 PrintEntryPointCommand(Ep); 2579 } else if (Command.C.cmd == MachO::LC_LOAD_DYLIB) { 2580 MachO::dylib_command Dl = Obj->getDylibIDLoadCommand(Command); 2581 PrintDylibCommand(Dl, Command.Ptr); 2582 } else if (Command.C.cmd == MachO::LC_CODE_SIGNATURE || 2583 Command.C.cmd == MachO::LC_SEGMENT_SPLIT_INFO || 2584 Command.C.cmd == MachO::LC_FUNCTION_STARTS || 2585 Command.C.cmd == MachO::LC_DATA_IN_CODE || 2586 Command.C.cmd == MachO::LC_DYLIB_CODE_SIGN_DRS || 2587 Command.C.cmd == MachO::LC_LINKER_OPTIMIZATION_HINT) { 2588 MachO::linkedit_data_command Ld = 2589 Obj->getLinkeditDataLoadCommand(Command); 2590 PrintLinkEditDataCommand(Ld, Buf.size()); 2591 } else { 2592 outs() << " cmd ?(" << format("0x%08" PRIx32, Command.C.cmd) 2593 << ")\n"; 2594 outs() << " cmdsize " << Command.C.cmdsize << "\n"; 2595 // TODO: get and print the raw bytes of the load command. 2596 } 2597 // TODO: print all the other kinds of load commands. 2598 if (i == ncmds - 1) 2599 break; 2600 else 2601 Command = Obj->getNextLoadCommandInfo(Command); 2602 } 2603 } 2604 2605 static void getAndPrintMachHeader(const MachOObjectFile *Obj, uint32_t &ncmds, 2606 uint32_t &filetype, uint32_t &cputype, 2607 bool verbose) { 2608 if (Obj->is64Bit()) { 2609 MachO::mach_header_64 H_64; 2610 H_64 = Obj->getHeader64(); 2611 PrintMachHeader(H_64.magic, H_64.cputype, H_64.cpusubtype, H_64.filetype, 2612 H_64.ncmds, H_64.sizeofcmds, H_64.flags, verbose); 2613 ncmds = H_64.ncmds; 2614 filetype = H_64.filetype; 2615 cputype = H_64.cputype; 2616 } else { 2617 MachO::mach_header H; 2618 H = Obj->getHeader(); 2619 PrintMachHeader(H.magic, H.cputype, H.cpusubtype, H.filetype, H.ncmds, 2620 H.sizeofcmds, H.flags, verbose); 2621 ncmds = H.ncmds; 2622 filetype = H.filetype; 2623 cputype = H.cputype; 2624 } 2625 } 2626 2627 void llvm::printMachOFileHeader(const object::ObjectFile *Obj) { 2628 const MachOObjectFile *file = dyn_cast<const MachOObjectFile>(Obj); 2629 uint32_t ncmds = 0; 2630 uint32_t filetype = 0; 2631 uint32_t cputype = 0; 2632 getAndPrintMachHeader(file, ncmds, filetype, cputype, true); 2633 PrintLoadCommands(file, ncmds, filetype, cputype, true); 2634 } 2635 2636 //===----------------------------------------------------------------------===// 2637 // export trie dumping 2638 //===----------------------------------------------------------------------===// 2639 2640 void llvm::printMachOExportsTrie(const object::MachOObjectFile *Obj) { 2641 for (const llvm::object::ExportEntry &Entry : Obj->exports()) { 2642 uint64_t Flags = Entry.flags(); 2643 bool ReExport = (Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT); 2644 bool WeakDef = (Flags & MachO::EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION); 2645 bool ThreadLocal = ((Flags & MachO::EXPORT_SYMBOL_FLAGS_KIND_MASK) == 2646 MachO::EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL); 2647 bool Abs = ((Flags & MachO::EXPORT_SYMBOL_FLAGS_KIND_MASK) == 2648 MachO::EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE); 2649 bool Resolver = (Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER); 2650 if (ReExport) 2651 outs() << "[re-export] "; 2652 else 2653 outs() 2654 << format("0x%08llX ", Entry.address()); // FIXME:add in base address 2655 outs() << Entry.name(); 2656 if (WeakDef || ThreadLocal || Resolver || Abs) { 2657 bool NeedsComma = false; 2658 outs() << " ["; 2659 if (WeakDef) { 2660 outs() << "weak_def"; 2661 NeedsComma = true; 2662 } 2663 if (ThreadLocal) { 2664 if (NeedsComma) 2665 outs() << ", "; 2666 outs() << "per-thread"; 2667 NeedsComma = true; 2668 } 2669 if (Abs) { 2670 if (NeedsComma) 2671 outs() << ", "; 2672 outs() << "absolute"; 2673 NeedsComma = true; 2674 } 2675 if (Resolver) { 2676 if (NeedsComma) 2677 outs() << ", "; 2678 outs() << format("resolver=0x%08llX", Entry.other()); 2679 NeedsComma = true; 2680 } 2681 outs() << "]"; 2682 } 2683 if (ReExport) { 2684 StringRef DylibName = "unknown"; 2685 int Ordinal = Entry.other() - 1; 2686 Obj->getLibraryShortNameByIndex(Ordinal, DylibName); 2687 if (Entry.otherName().empty()) 2688 outs() << " (from " << DylibName << ")"; 2689 else 2690 outs() << " (" << Entry.otherName() << " from " << DylibName << ")"; 2691 } 2692 outs() << "\n"; 2693 } 2694 } 2695 2696 2697 //===----------------------------------------------------------------------===// 2698 // rebase table dumping 2699 //===----------------------------------------------------------------------===// 2700 2701 namespace { 2702 class SegInfo { 2703 public: 2704 SegInfo(const object::MachOObjectFile *Obj); 2705 2706 StringRef segmentName(uint32_t SegIndex); 2707 StringRef sectionName(uint32_t SegIndex, uint64_t SegOffset); 2708 uint64_t address(uint32_t SegIndex, uint64_t SegOffset); 2709 2710 private: 2711 struct SectionInfo { 2712 uint64_t Address; 2713 uint64_t Size; 2714 StringRef SectionName; 2715 StringRef SegmentName; 2716 uint64_t OffsetInSegment; 2717 uint64_t SegmentStartAddress; 2718 uint32_t SegmentIndex; 2719 }; 2720 const SectionInfo &findSection(uint32_t SegIndex, uint64_t SegOffset); 2721 SmallVector<SectionInfo, 32> Sections; 2722 }; 2723 } 2724 2725 SegInfo::SegInfo(const object::MachOObjectFile *Obj) { 2726 // Build table of sections so segIndex/offset pairs can be translated. 2727 uint32_t CurSegIndex = Obj->hasPageZeroSegment() ? 1 : 0; 2728 StringRef CurSegName; 2729 uint64_t CurSegAddress; 2730 for (const SectionRef &Section : Obj->sections()) { 2731 SectionInfo Info; 2732 if (error(Section.getName(Info.SectionName))) 2733 return; 2734 if (error(Section.getAddress(Info.Address))) 2735 return; 2736 if (error(Section.getSize(Info.Size))) 2737 return; 2738 Info.SegmentName = 2739 Obj->getSectionFinalSegmentName(Section.getRawDataRefImpl()); 2740 if (!Info.SegmentName.equals(CurSegName)) { 2741 ++CurSegIndex; 2742 CurSegName = Info.SegmentName; 2743 CurSegAddress = Info.Address; 2744 } 2745 Info.SegmentIndex = CurSegIndex - 1; 2746 Info.OffsetInSegment = Info.Address - CurSegAddress; 2747 Info.SegmentStartAddress = CurSegAddress; 2748 Sections.push_back(Info); 2749 } 2750 } 2751 2752 StringRef SegInfo::segmentName(uint32_t SegIndex) { 2753 for (const SectionInfo &SI : Sections) { 2754 if (SI.SegmentIndex == SegIndex) 2755 return SI.SegmentName; 2756 } 2757 llvm_unreachable("invalid segIndex"); 2758 } 2759 2760 const SegInfo::SectionInfo &SegInfo::findSection(uint32_t SegIndex, 2761 uint64_t OffsetInSeg) { 2762 for (const SectionInfo &SI : Sections) { 2763 if (SI.SegmentIndex != SegIndex) 2764 continue; 2765 if (SI.OffsetInSegment > OffsetInSeg) 2766 continue; 2767 if (OffsetInSeg >= (SI.OffsetInSegment + SI.Size)) 2768 continue; 2769 return SI; 2770 } 2771 llvm_unreachable("segIndex and offset not in any section"); 2772 } 2773 2774 StringRef SegInfo::sectionName(uint32_t SegIndex, uint64_t OffsetInSeg) { 2775 return findSection(SegIndex, OffsetInSeg).SectionName; 2776 } 2777 2778 uint64_t SegInfo::address(uint32_t SegIndex, uint64_t OffsetInSeg) { 2779 const SectionInfo &SI = findSection(SegIndex, OffsetInSeg); 2780 return SI.SegmentStartAddress + OffsetInSeg; 2781 } 2782 2783 void llvm::printMachORebaseTable(const object::MachOObjectFile *Obj) { 2784 // Build table of sections so names can used in final output. 2785 SegInfo sectionTable(Obj); 2786 2787 outs() << "segment section address type\n"; 2788 for (const llvm::object::MachORebaseEntry &Entry : Obj->rebaseTable()) { 2789 uint32_t SegIndex = Entry.segmentIndex(); 2790 uint64_t OffsetInSeg = Entry.segmentOffset(); 2791 StringRef SegmentName = sectionTable.segmentName(SegIndex); 2792 StringRef SectionName = sectionTable.sectionName(SegIndex, OffsetInSeg); 2793 uint64_t Address = sectionTable.address(SegIndex, OffsetInSeg); 2794 2795 // Table lines look like: __DATA __nl_symbol_ptr 0x0000F00C pointer 2796 outs() << format("%-8s %-18s 0x%08" PRIX64 " %s\n", 2797 SegmentName.str().c_str(), 2798 SectionName.str().c_str(), Address, 2799 Entry.typeName().str().c_str()); 2800 } 2801 } 2802 2803 static StringRef ordinalName(const object::MachOObjectFile *Obj, int Ordinal) { 2804 StringRef DylibName; 2805 switch (Ordinal) { 2806 case MachO::BIND_SPECIAL_DYLIB_SELF: 2807 return "this-image"; 2808 case MachO::BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE: 2809 return "main-executable"; 2810 case MachO::BIND_SPECIAL_DYLIB_FLAT_LOOKUP: 2811 return "flat-namespace"; 2812 default: 2813 if (Ordinal > 0) { 2814 std::error_code EC = Obj->getLibraryShortNameByIndex(Ordinal-1, 2815 DylibName); 2816 if (EC) 2817 return "<<ordinal too big>>"; 2818 return DylibName; 2819 } 2820 } 2821 return "<<unknown special ordinal>>"; 2822 } 2823 2824 //===----------------------------------------------------------------------===// 2825 // bind table dumping 2826 //===----------------------------------------------------------------------===// 2827 2828 void llvm::printMachOBindTable(const object::MachOObjectFile *Obj) { 2829 // Build table of sections so names can used in final output. 2830 SegInfo sectionTable(Obj); 2831 2832 outs() << "segment section address type " 2833 "addend dylib symbol\n"; 2834 for (const llvm::object::MachOBindEntry &Entry : Obj->bindTable()) { 2835 uint32_t SegIndex = Entry.segmentIndex(); 2836 uint64_t OffsetInSeg = Entry.segmentOffset(); 2837 StringRef SegmentName = sectionTable.segmentName(SegIndex); 2838 StringRef SectionName = sectionTable.sectionName(SegIndex, OffsetInSeg); 2839 uint64_t Address = sectionTable.address(SegIndex, OffsetInSeg); 2840 2841 // Table lines look like: 2842 // __DATA __got 0x00012010 pointer 0 libSystem ___stack_chk_guard 2843 StringRef Attr; 2844 if (Entry.flags() & MachO::BIND_SYMBOL_FLAGS_WEAK_IMPORT) 2845 Attr = " (weak_import)"; 2846 outs() << left_justify(SegmentName, 8) << " " 2847 << left_justify(SectionName, 18) << " " 2848 << format_hex(Address, 10, true) << " " 2849 << left_justify(Entry.typeName(), 8) << " " 2850 << format_decimal(Entry.addend(), 8) << " " 2851 << left_justify(ordinalName(Obj, Entry.ordinal()), 16) << " " 2852 << Entry.symbolName() 2853 << Attr << "\n"; 2854 } 2855 } 2856 2857 //===----------------------------------------------------------------------===// 2858 // lazy bind table dumping 2859 //===----------------------------------------------------------------------===// 2860 2861 void llvm::printMachOLazyBindTable(const object::MachOObjectFile *Obj) { 2862 // Build table of sections so names can used in final output. 2863 SegInfo sectionTable(Obj); 2864 2865 outs() << "segment section address " 2866 "dylib symbol\n"; 2867 for (const llvm::object::MachOBindEntry &Entry : Obj->lazyBindTable()) { 2868 uint32_t SegIndex = Entry.segmentIndex(); 2869 uint64_t OffsetInSeg = Entry.segmentOffset(); 2870 StringRef SegmentName = sectionTable.segmentName(SegIndex); 2871 StringRef SectionName = sectionTable.sectionName(SegIndex, OffsetInSeg); 2872 uint64_t Address = sectionTable.address(SegIndex, OffsetInSeg); 2873 2874 // Table lines look like: 2875 // __DATA __got 0x00012010 libSystem ___stack_chk_guard 2876 outs() << left_justify(SegmentName, 8) << " " 2877 << left_justify(SectionName, 18) << " " 2878 << format_hex(Address, 10, true) << " " 2879 << left_justify(ordinalName(Obj, Entry.ordinal()), 16) << " " 2880 << Entry.symbolName() << "\n"; 2881 } 2882 } 2883 2884 2885 //===----------------------------------------------------------------------===// 2886 // weak bind table dumping 2887 //===----------------------------------------------------------------------===// 2888 2889 void llvm::printMachOWeakBindTable(const object::MachOObjectFile *Obj) { 2890 // Build table of sections so names can used in final output. 2891 SegInfo sectionTable(Obj); 2892 2893 outs() << "segment section address " 2894 "type addend symbol\n"; 2895 for (const llvm::object::MachOBindEntry &Entry : Obj->weakBindTable()) { 2896 // Strong symbols don't have a location to update. 2897 if (Entry.flags() & MachO::BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION) { 2898 outs() << " strong " 2899 << Entry.symbolName() << "\n"; 2900 continue; 2901 } 2902 uint32_t SegIndex = Entry.segmentIndex(); 2903 uint64_t OffsetInSeg = Entry.segmentOffset(); 2904 StringRef SegmentName = sectionTable.segmentName(SegIndex); 2905 StringRef SectionName = sectionTable.sectionName(SegIndex, OffsetInSeg); 2906 uint64_t Address = sectionTable.address(SegIndex, OffsetInSeg); 2907 2908 // Table lines look like: 2909 // __DATA __data 0x00001000 pointer 0 _foo 2910 outs() << left_justify(SegmentName, 8) << " " 2911 << left_justify(SectionName, 18) << " " 2912 << format_hex(Address, 10, true) << " " 2913 << left_justify(Entry.typeName(), 8) << " " 2914 << format_decimal(Entry.addend(), 8) << " " 2915 << Entry.symbolName() << "\n"; 2916 } 2917 } 2918 2919 2920