1 //===------ macho2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "obj2yaml.h" 10 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 11 #include "llvm/Object/MachOUniversal.h" 12 #include "llvm/ObjectYAML/DWARFYAML.h" 13 #include "llvm/ObjectYAML/ObjectYAML.h" 14 #include "llvm/Support/Errc.h" 15 #include "llvm/Support/Error.h" 16 #include "llvm/Support/ErrorHandling.h" 17 #include "llvm/Support/LEB128.h" 18 19 #include <string.h> // for memcpy 20 21 using namespace llvm; 22 23 class MachODumper { 24 25 template <typename StructType> 26 Expected<const char *> processLoadCommandData( 27 MachOYAML::LoadCommand &LC, 28 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, 29 MachOYAML::Object &Y); 30 31 const object::MachOObjectFile &Obj; 32 std::unique_ptr<DWARFContext> DWARFCtx; 33 unsigned RawSegment; 34 void dumpHeader(std::unique_ptr<MachOYAML::Object> &Y); 35 Error dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y); 36 void dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y); 37 void dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y); 38 void dumpFunctionStarts(std::unique_ptr<MachOYAML::Object> &Y); 39 void dumpBindOpcodes(std::vector<MachOYAML::BindOpcode> &BindOpcodes, 40 ArrayRef<uint8_t> OpcodeBuffer, bool Lazy = false); 41 void dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y); 42 void dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y); 43 void dumpIndirectSymbols(std::unique_ptr<MachOYAML::Object> &Y); 44 45 template <typename SectionType> 46 Expected<MachOYAML::Section> constructSectionCommon(SectionType Sec, 47 size_t SecIndex); 48 template <typename SectionType> 49 Expected<MachOYAML::Section> constructSection(SectionType Sec, 50 size_t SecIndex); 51 template <typename SectionType, typename SegmentType> 52 Expected<const char *> 53 extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, 54 std::vector<MachOYAML::Section> &Sections, 55 MachOYAML::Object &Y); 56 57 public: 58 MachODumper(const object::MachOObjectFile &O, 59 std::unique_ptr<DWARFContext> DCtx, unsigned RawSegments) 60 : Obj(O), DWARFCtx(std::move(DCtx)), RawSegment(RawSegments) {} 61 Expected<std::unique_ptr<MachOYAML::Object>> dump(); 62 }; 63 64 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ 65 case MachO::LCName: \ 66 memcpy((void *)&(LC.Data.LCStruct##_data), LoadCmd.Ptr, \ 67 sizeof(MachO::LCStruct)); \ 68 if (Obj.isLittleEndian() != sys::IsLittleEndianHost) \ 69 MachO::swapStruct(LC.Data.LCStruct##_data); \ 70 if (Expected<const char *> ExpectedEndPtr = \ 71 processLoadCommandData<MachO::LCStruct>(LC, LoadCmd, *Y.get())) \ 72 EndPtr = *ExpectedEndPtr; \ 73 else \ 74 return ExpectedEndPtr.takeError(); \ 75 break; 76 77 template <typename SectionType> 78 Expected<MachOYAML::Section> 79 MachODumper::constructSectionCommon(SectionType Sec, size_t SecIndex) { 80 MachOYAML::Section TempSec; 81 memcpy(reinterpret_cast<void *>(&TempSec.sectname[0]), &Sec.sectname[0], 16); 82 memcpy(reinterpret_cast<void *>(&TempSec.segname[0]), &Sec.segname[0], 16); 83 TempSec.addr = Sec.addr; 84 TempSec.size = Sec.size; 85 TempSec.offset = Sec.offset; 86 TempSec.align = Sec.align; 87 TempSec.reloff = Sec.reloff; 88 TempSec.nreloc = Sec.nreloc; 89 TempSec.flags = Sec.flags; 90 TempSec.reserved1 = Sec.reserved1; 91 TempSec.reserved2 = Sec.reserved2; 92 TempSec.reserved3 = 0; 93 if (!MachO::isVirtualSection(Sec.flags & MachO::SECTION_TYPE)) 94 TempSec.content = 95 yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size)); 96 97 if (Expected<object::SectionRef> SecRef = Obj.getSection(SecIndex)) { 98 TempSec.relocations.reserve(TempSec.nreloc); 99 for (const object::RelocationRef &Reloc : SecRef->relocations()) { 100 const object::DataRefImpl Rel = Reloc.getRawDataRefImpl(); 101 const MachO::any_relocation_info RE = Obj.getRelocation(Rel); 102 MachOYAML::Relocation R; 103 R.address = Obj.getAnyRelocationAddress(RE); 104 R.is_pcrel = Obj.getAnyRelocationPCRel(RE); 105 R.length = Obj.getAnyRelocationLength(RE); 106 R.type = Obj.getAnyRelocationType(RE); 107 R.is_scattered = Obj.isRelocationScattered(RE); 108 R.symbolnum = (R.is_scattered ? 0 : Obj.getPlainRelocationSymbolNum(RE)); 109 R.is_extern = 110 (R.is_scattered ? false : Obj.getPlainRelocationExternal(RE)); 111 R.value = (R.is_scattered ? Obj.getScatteredRelocationValue(RE) : 0); 112 TempSec.relocations.push_back(R); 113 } 114 } else { 115 return SecRef.takeError(); 116 } 117 return TempSec; 118 } 119 120 template <> 121 Expected<MachOYAML::Section> MachODumper::constructSection(MachO::section Sec, 122 size_t SecIndex) { 123 Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex); 124 if (TempSec) 125 TempSec->reserved3 = 0; 126 return TempSec; 127 } 128 129 template <> 130 Expected<MachOYAML::Section> 131 MachODumper::constructSection(MachO::section_64 Sec, size_t SecIndex) { 132 Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex); 133 if (TempSec) 134 TempSec->reserved3 = Sec.reserved3; 135 return TempSec; 136 } 137 138 static Error dumpDebugSection(StringRef SecName, DWARFContext &DCtx, 139 DWARFYAML::Data &DWARF) { 140 if (SecName == "__debug_abbrev") { 141 dumpDebugAbbrev(DCtx, DWARF); 142 return Error::success(); 143 } 144 if (SecName == "__debug_aranges") 145 return dumpDebugARanges(DCtx, DWARF); 146 if (SecName == "__debug_info") { 147 dumpDebugInfo(DCtx, DWARF); 148 return Error::success(); 149 } 150 if (SecName == "__debug_line") { 151 dumpDebugLines(DCtx, DWARF); 152 return Error::success(); 153 } 154 if (SecName.startswith("__debug_pub")) { 155 // FIXME: We should extract pub-section dumpers from this function. 156 dumpDebugPubSections(DCtx, DWARF); 157 return Error::success(); 158 } 159 if (SecName == "__debug_ranges") 160 return dumpDebugRanges(DCtx, DWARF); 161 if (SecName == "__debug_str") 162 return dumpDebugStrings(DCtx, DWARF); 163 return createStringError(errc::not_supported, 164 "dumping " + SecName + " section is not supported"); 165 } 166 167 template <typename SectionType, typename SegmentType> 168 Expected<const char *> MachODumper::extractSections( 169 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, 170 std::vector<MachOYAML::Section> &Sections, MachOYAML::Object &Y) { 171 auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize; 172 const SectionType *Curr = 173 reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType)); 174 for (; reinterpret_cast<const void *>(Curr) < End; Curr++) { 175 SectionType Sec; 176 memcpy((void *)&Sec, Curr, sizeof(SectionType)); 177 if (Obj.isLittleEndian() != sys::IsLittleEndianHost) 178 MachO::swapStruct(Sec); 179 // For MachO section indices start from 1. 180 if (Expected<MachOYAML::Section> S = 181 constructSection(Sec, Sections.size() + 1)) { 182 StringRef SecName(S->sectname); 183 184 // Copy data sections if requested. 185 if ((RawSegment & ::RawSegments::data) && 186 StringRef(S->segname).startswith("__DATA")) 187 S->content = 188 yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size)); 189 190 if (SecName.startswith("__debug_")) { 191 // If the DWARF section cannot be successfully parsed, emit raw content 192 // instead of an entry in the DWARF section of the YAML. 193 if (Error Err = dumpDebugSection(SecName, *DWARFCtx.get(), Y.DWARF)) 194 consumeError(std::move(Err)); 195 else 196 S->content.reset(); 197 } 198 Sections.push_back(std::move(*S)); 199 } else 200 return S.takeError(); 201 } 202 return reinterpret_cast<const char *>(Curr); 203 } 204 205 template <typename StructType> 206 Expected<const char *> MachODumper::processLoadCommandData( 207 MachOYAML::LoadCommand &LC, 208 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, 209 MachOYAML::Object &Y) { 210 return LoadCmd.Ptr + sizeof(StructType); 211 } 212 213 template <> 214 Expected<const char *> 215 MachODumper::processLoadCommandData<MachO::segment_command>( 216 MachOYAML::LoadCommand &LC, 217 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, 218 MachOYAML::Object &Y) { 219 return extractSections<MachO::section, MachO::segment_command>( 220 LoadCmd, LC.Sections, Y); 221 } 222 223 template <> 224 Expected<const char *> 225 MachODumper::processLoadCommandData<MachO::segment_command_64>( 226 MachOYAML::LoadCommand &LC, 227 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, 228 MachOYAML::Object &Y) { 229 return extractSections<MachO::section_64, MachO::segment_command_64>( 230 LoadCmd, LC.Sections, Y); 231 } 232 233 template <typename StructType> 234 const char * 235 readString(MachOYAML::LoadCommand &LC, 236 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { 237 auto Start = LoadCmd.Ptr + sizeof(StructType); 238 auto MaxSize = LoadCmd.C.cmdsize - sizeof(StructType); 239 auto Size = strnlen(Start, MaxSize); 240 LC.Content = StringRef(Start, Size).str(); 241 return Start + Size; 242 } 243 244 template <> 245 Expected<const char *> 246 MachODumper::processLoadCommandData<MachO::dylib_command>( 247 MachOYAML::LoadCommand &LC, 248 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, 249 MachOYAML::Object &Y) { 250 return readString<MachO::dylib_command>(LC, LoadCmd); 251 } 252 253 template <> 254 Expected<const char *> 255 MachODumper::processLoadCommandData<MachO::dylinker_command>( 256 MachOYAML::LoadCommand &LC, 257 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, 258 MachOYAML::Object &Y) { 259 return readString<MachO::dylinker_command>(LC, LoadCmd); 260 } 261 262 template <> 263 Expected<const char *> 264 MachODumper::processLoadCommandData<MachO::rpath_command>( 265 MachOYAML::LoadCommand &LC, 266 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, 267 MachOYAML::Object &Y) { 268 return readString<MachO::rpath_command>(LC, LoadCmd); 269 } 270 271 template <> 272 Expected<const char *> 273 MachODumper::processLoadCommandData<MachO::build_version_command>( 274 MachOYAML::LoadCommand &LC, 275 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, 276 MachOYAML::Object &Y) { 277 auto Start = LoadCmd.Ptr + sizeof(MachO::build_version_command); 278 auto NTools = LC.Data.build_version_command_data.ntools; 279 for (unsigned i = 0; i < NTools; ++i) { 280 auto Curr = Start + i * sizeof(MachO::build_tool_version); 281 MachO::build_tool_version BV; 282 memcpy((void *)&BV, Curr, sizeof(MachO::build_tool_version)); 283 if (Obj.isLittleEndian() != sys::IsLittleEndianHost) 284 MachO::swapStruct(BV); 285 LC.Tools.push_back(BV); 286 } 287 return Start + NTools * sizeof(MachO::build_tool_version); 288 } 289 290 Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() { 291 auto Y = std::make_unique<MachOYAML::Object>(); 292 Y->IsLittleEndian = Obj.isLittleEndian(); 293 dumpHeader(Y); 294 if (Error Err = dumpLoadCommands(Y)) 295 return std::move(Err); 296 if (RawSegment & ::RawSegments::linkedit) 297 Y->RawLinkEditSegment = 298 yaml::BinaryRef(Obj.getSegmentContents("__LINKEDIT")); 299 else 300 dumpLinkEdit(Y); 301 302 return std::move(Y); 303 } 304 305 void MachODumper::dumpHeader(std::unique_ptr<MachOYAML::Object> &Y) { 306 Y->Header.magic = Obj.getHeader().magic; 307 Y->Header.cputype = Obj.getHeader().cputype; 308 Y->Header.cpusubtype = Obj.getHeader().cpusubtype; 309 Y->Header.filetype = Obj.getHeader().filetype; 310 Y->Header.ncmds = Obj.getHeader().ncmds; 311 Y->Header.sizeofcmds = Obj.getHeader().sizeofcmds; 312 Y->Header.flags = Obj.getHeader().flags; 313 Y->Header.reserved = 0; 314 } 315 316 Error MachODumper::dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y) { 317 for (auto LoadCmd : Obj.load_commands()) { 318 MachOYAML::LoadCommand LC; 319 const char *EndPtr = LoadCmd.Ptr; 320 switch (LoadCmd.C.cmd) { 321 default: 322 memcpy((void *)&(LC.Data.load_command_data), LoadCmd.Ptr, 323 sizeof(MachO::load_command)); 324 if (Obj.isLittleEndian() != sys::IsLittleEndianHost) 325 MachO::swapStruct(LC.Data.load_command_data); 326 if (Expected<const char *> ExpectedEndPtr = 327 processLoadCommandData<MachO::load_command>(LC, LoadCmd, 328 *Y.get())) 329 EndPtr = *ExpectedEndPtr; 330 else 331 return ExpectedEndPtr.takeError(); 332 break; 333 #include "llvm/BinaryFormat/MachO.def" 334 } 335 auto RemainingBytes = LoadCmd.C.cmdsize - (EndPtr - LoadCmd.Ptr); 336 if (!std::all_of(EndPtr, &EndPtr[RemainingBytes], 337 [](const char C) { return C == 0; })) { 338 LC.PayloadBytes.insert(LC.PayloadBytes.end(), EndPtr, 339 &EndPtr[RemainingBytes]); 340 RemainingBytes = 0; 341 } 342 LC.ZeroPadBytes = RemainingBytes; 343 Y->LoadCommands.push_back(std::move(LC)); 344 } 345 return Error::success(); 346 } 347 348 void MachODumper::dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y) { 349 dumpRebaseOpcodes(Y); 350 dumpBindOpcodes(Y->LinkEdit.BindOpcodes, Obj.getDyldInfoBindOpcodes()); 351 dumpBindOpcodes(Y->LinkEdit.WeakBindOpcodes, 352 Obj.getDyldInfoWeakBindOpcodes()); 353 dumpBindOpcodes(Y->LinkEdit.LazyBindOpcodes, Obj.getDyldInfoLazyBindOpcodes(), 354 true); 355 dumpExportTrie(Y); 356 dumpSymbols(Y); 357 dumpIndirectSymbols(Y); 358 dumpFunctionStarts(Y); 359 } 360 361 void MachODumper::dumpFunctionStarts(std::unique_ptr<MachOYAML::Object> &Y) { 362 MachOYAML::LinkEditData &LEData = Y->LinkEdit; 363 364 auto FunctionStarts = Obj.getFunctionStarts(); 365 for (auto Addr : FunctionStarts) 366 LEData.FunctionStarts.push_back(Addr); 367 } 368 369 void MachODumper::dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y) { 370 MachOYAML::LinkEditData &LEData = Y->LinkEdit; 371 372 auto RebaseOpcodes = Obj.getDyldInfoRebaseOpcodes(); 373 for (auto OpCode = RebaseOpcodes.begin(); OpCode != RebaseOpcodes.end(); 374 ++OpCode) { 375 MachOYAML::RebaseOpcode RebaseOp; 376 RebaseOp.Opcode = 377 static_cast<MachO::RebaseOpcode>(*OpCode & MachO::REBASE_OPCODE_MASK); 378 RebaseOp.Imm = *OpCode & MachO::REBASE_IMMEDIATE_MASK; 379 380 unsigned Count; 381 uint64_t ULEB = 0; 382 383 switch (RebaseOp.Opcode) { 384 case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: 385 386 ULEB = decodeULEB128(OpCode + 1, &Count); 387 RebaseOp.ExtraData.push_back(ULEB); 388 OpCode += Count; 389 LLVM_FALLTHROUGH; 390 // Intentionally no break here -- This opcode has two ULEB values 391 case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: 392 case MachO::REBASE_OPCODE_ADD_ADDR_ULEB: 393 case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES: 394 case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: 395 396 ULEB = decodeULEB128(OpCode + 1, &Count); 397 RebaseOp.ExtraData.push_back(ULEB); 398 OpCode += Count; 399 break; 400 default: 401 break; 402 } 403 404 LEData.RebaseOpcodes.push_back(RebaseOp); 405 406 if (RebaseOp.Opcode == MachO::REBASE_OPCODE_DONE) 407 break; 408 } 409 } 410 411 StringRef ReadStringRef(const uint8_t *Start) { 412 const uint8_t *Itr = Start; 413 for (; *Itr; ++Itr) 414 ; 415 return StringRef(reinterpret_cast<const char *>(Start), Itr - Start); 416 } 417 418 void MachODumper::dumpBindOpcodes( 419 std::vector<MachOYAML::BindOpcode> &BindOpcodes, 420 ArrayRef<uint8_t> OpcodeBuffer, bool Lazy) { 421 for (auto OpCode = OpcodeBuffer.begin(); OpCode != OpcodeBuffer.end(); 422 ++OpCode) { 423 MachOYAML::BindOpcode BindOp; 424 BindOp.Opcode = 425 static_cast<MachO::BindOpcode>(*OpCode & MachO::BIND_OPCODE_MASK); 426 BindOp.Imm = *OpCode & MachO::BIND_IMMEDIATE_MASK; 427 428 unsigned Count; 429 uint64_t ULEB = 0; 430 int64_t SLEB = 0; 431 432 switch (BindOp.Opcode) { 433 case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: 434 ULEB = decodeULEB128(OpCode + 1, &Count); 435 BindOp.ULEBExtraData.push_back(ULEB); 436 OpCode += Count; 437 LLVM_FALLTHROUGH; 438 // Intentionally no break here -- this opcode has two ULEB values 439 440 case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: 441 case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: 442 case MachO::BIND_OPCODE_ADD_ADDR_ULEB: 443 case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: 444 ULEB = decodeULEB128(OpCode + 1, &Count); 445 BindOp.ULEBExtraData.push_back(ULEB); 446 OpCode += Count; 447 break; 448 449 case MachO::BIND_OPCODE_SET_ADDEND_SLEB: 450 SLEB = decodeSLEB128(OpCode + 1, &Count); 451 BindOp.SLEBExtraData.push_back(SLEB); 452 OpCode += Count; 453 break; 454 455 case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: 456 BindOp.Symbol = ReadStringRef(OpCode + 1); 457 OpCode += BindOp.Symbol.size() + 1; 458 break; 459 default: 460 break; 461 } 462 463 BindOpcodes.push_back(BindOp); 464 465 // Lazy bindings have DONE opcodes between operations, so we need to keep 466 // processing after a DONE. 467 if (!Lazy && BindOp.Opcode == MachO::BIND_OPCODE_DONE) 468 break; 469 } 470 } 471 472 /*! 473 * /brief processes a node from the export trie, and its children. 474 * 475 * To my knowledge there is no documentation of the encoded format of this data 476 * other than in the heads of the Apple linker engineers. To that end hopefully 477 * this comment and the implementation below can serve to light the way for 478 * anyone crazy enough to come down this path in the future. 479 * 480 * This function reads and preserves the trie structure of the export trie. To 481 * my knowledge there is no code anywhere else that reads the data and preserves 482 * the Trie. LD64 (sources available at opensource.apple.com) has a similar 483 * implementation that parses the export trie into a vector. That code as well 484 * as LLVM's libObject MachO implementation were the basis for this. 485 * 486 * The export trie is an encoded trie. The node serialization is a bit awkward. 487 * The below pseudo-code is the best description I've come up with for it. 488 * 489 * struct SerializedNode { 490 * ULEB128 TerminalSize; 491 * struct TerminalData { <-- This is only present if TerminalSize > 0 492 * ULEB128 Flags; 493 * ULEB128 Address; <-- Present if (! Flags & REEXPORT ) 494 * ULEB128 Other; <-- Present if ( Flags & REEXPORT || 495 * Flags & STUB_AND_RESOLVER ) 496 * char[] ImportName; <-- Present if ( Flags & REEXPORT ) 497 * } 498 * uint8_t ChildrenCount; 499 * Pair<char[], ULEB128> ChildNameOffsetPair[ChildrenCount]; 500 * SerializedNode Children[ChildrenCount] 501 * } 502 * 503 * Terminal nodes are nodes that represent actual exports. They can appear 504 * anywhere in the tree other than at the root; they do not need to be leaf 505 * nodes. When reading the data out of the trie this routine reads it in-order, 506 * but it puts the child names and offsets directly into the child nodes. This 507 * results in looping over the children twice during serialization and 508 * de-serialization, but it makes the YAML representation more human readable. 509 * 510 * Below is an example of the graph from a "Hello World" executable: 511 * 512 * ------- 513 * | '' | 514 * ------- 515 * | 516 * ------- 517 * | '_' | 518 * ------- 519 * | 520 * |----------------------------------------| 521 * | | 522 * ------------------------ --------------------- 523 * | '_mh_execute_header' | | 'main' | 524 * | Flags: 0x00000000 | | Flags: 0x00000000 | 525 * | Addr: 0x00000000 | | Addr: 0x00001160 | 526 * ------------------------ --------------------- 527 * 528 * This graph represents the trie for the exports "__mh_execute_header" and 529 * "_main". In the graph only the "_main" and "__mh_execute_header" nodes are 530 * terminal. 531 */ 532 533 const uint8_t *processExportNode(const uint8_t *CurrPtr, 534 const uint8_t *const End, 535 MachOYAML::ExportEntry &Entry) { 536 if (CurrPtr >= End) 537 return CurrPtr; 538 unsigned Count = 0; 539 Entry.TerminalSize = decodeULEB128(CurrPtr, &Count); 540 CurrPtr += Count; 541 if (Entry.TerminalSize != 0) { 542 Entry.Flags = decodeULEB128(CurrPtr, &Count); 543 CurrPtr += Count; 544 if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) { 545 Entry.Address = 0; 546 Entry.Other = decodeULEB128(CurrPtr, &Count); 547 CurrPtr += Count; 548 Entry.ImportName = std::string(reinterpret_cast<const char *>(CurrPtr)); 549 } else { 550 Entry.Address = decodeULEB128(CurrPtr, &Count); 551 CurrPtr += Count; 552 if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) { 553 Entry.Other = decodeULEB128(CurrPtr, &Count); 554 CurrPtr += Count; 555 } else 556 Entry.Other = 0; 557 } 558 } 559 uint8_t childrenCount = *CurrPtr++; 560 if (childrenCount == 0) 561 return CurrPtr; 562 563 Entry.Children.insert(Entry.Children.begin(), (size_t)childrenCount, 564 MachOYAML::ExportEntry()); 565 for (auto &Child : Entry.Children) { 566 Child.Name = std::string(reinterpret_cast<const char *>(CurrPtr)); 567 CurrPtr += Child.Name.length() + 1; 568 Child.NodeOffset = decodeULEB128(CurrPtr, &Count); 569 CurrPtr += Count; 570 } 571 for (auto &Child : Entry.Children) { 572 CurrPtr = processExportNode(CurrPtr, End, Child); 573 } 574 return CurrPtr; 575 } 576 577 void MachODumper::dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y) { 578 MachOYAML::LinkEditData &LEData = Y->LinkEdit; 579 auto ExportsTrie = Obj.getDyldInfoExportsTrie(); 580 processExportNode(ExportsTrie.begin(), ExportsTrie.end(), LEData.ExportTrie); 581 } 582 583 template <typename nlist_t> 584 MachOYAML::NListEntry constructNameList(const nlist_t &nlist) { 585 MachOYAML::NListEntry NL; 586 NL.n_strx = nlist.n_strx; 587 NL.n_type = nlist.n_type; 588 NL.n_sect = nlist.n_sect; 589 NL.n_desc = nlist.n_desc; 590 NL.n_value = nlist.n_value; 591 return NL; 592 } 593 594 void MachODumper::dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y) { 595 MachOYAML::LinkEditData &LEData = Y->LinkEdit; 596 597 for (auto Symbol : Obj.symbols()) { 598 MachOYAML::NListEntry NLE = 599 Obj.is64Bit() 600 ? constructNameList<MachO::nlist_64>( 601 Obj.getSymbol64TableEntry(Symbol.getRawDataRefImpl())) 602 : constructNameList<MachO::nlist>( 603 Obj.getSymbolTableEntry(Symbol.getRawDataRefImpl())); 604 LEData.NameList.push_back(NLE); 605 } 606 607 StringRef RemainingTable = Obj.getStringTableData(); 608 while (RemainingTable.size() > 0) { 609 auto SymbolPair = RemainingTable.split('\0'); 610 RemainingTable = SymbolPair.second; 611 LEData.StringTable.push_back(SymbolPair.first); 612 } 613 } 614 615 void MachODumper::dumpIndirectSymbols(std::unique_ptr<MachOYAML::Object> &Y) { 616 MachOYAML::LinkEditData &LEData = Y->LinkEdit; 617 618 MachO::dysymtab_command DLC = Obj.getDysymtabLoadCommand(); 619 for (unsigned i = 0; i < DLC.nindirectsyms; ++i) 620 LEData.IndirectSymbols.push_back(Obj.getIndirectSymbolTableEntry(DLC, i)); 621 } 622 623 Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj, 624 unsigned RawSegments) { 625 std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(Obj); 626 MachODumper Dumper(Obj, std::move(DCtx), RawSegments); 627 Expected<std::unique_ptr<MachOYAML::Object>> YAML = Dumper.dump(); 628 if (!YAML) 629 return YAML.takeError(); 630 631 yaml::YamlObjectFile YAMLFile; 632 YAMLFile.MachO = std::move(YAML.get()); 633 634 yaml::Output Yout(Out); 635 Yout << YAMLFile; 636 return Error::success(); 637 } 638 639 Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj, 640 unsigned RawSegments) { 641 yaml::YamlObjectFile YAMLFile; 642 YAMLFile.FatMachO.reset(new MachOYAML::UniversalBinary()); 643 MachOYAML::UniversalBinary &YAML = *YAMLFile.FatMachO; 644 YAML.Header.magic = Obj.getMagic(); 645 YAML.Header.nfat_arch = Obj.getNumberOfObjects(); 646 647 for (auto Slice : Obj.objects()) { 648 MachOYAML::FatArch arch; 649 arch.cputype = Slice.getCPUType(); 650 arch.cpusubtype = Slice.getCPUSubType(); 651 arch.offset = Slice.getOffset(); 652 arch.size = Slice.getSize(); 653 arch.align = Slice.getAlign(); 654 arch.reserved = Slice.getReserved(); 655 YAML.FatArchs.push_back(arch); 656 657 auto SliceObj = Slice.getAsObjectFile(); 658 if (!SliceObj) 659 return SliceObj.takeError(); 660 661 std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(*SliceObj.get()); 662 MachODumper Dumper(*SliceObj.get(), std::move(DCtx), RawSegments); 663 Expected<std::unique_ptr<MachOYAML::Object>> YAMLObj = Dumper.dump(); 664 if (!YAMLObj) 665 return YAMLObj.takeError(); 666 YAML.Slices.push_back(*YAMLObj.get()); 667 } 668 669 yaml::Output Yout(Out); 670 Yout << YAML; 671 return Error::success(); 672 } 673 674 Error macho2yaml(raw_ostream &Out, const object::Binary &Binary, 675 unsigned RawSegments) { 676 if (const auto *MachOObj = dyn_cast<object::MachOUniversalBinary>(&Binary)) 677 return macho2yaml(Out, *MachOObj, RawSegments); 678 679 if (const auto *MachOObj = dyn_cast<object::MachOObjectFile>(&Binary)) 680 return macho2yaml(Out, *MachOObj, RawSegments); 681 682 llvm_unreachable("unexpected Mach-O file format"); 683 } 684