1 //===------ macho2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "Error.h" 11 #include "obj2yaml.h" 12 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 13 #include "llvm/Object/MachOUniversal.h" 14 #include "llvm/ObjectYAML/ObjectYAML.h" 15 #include "llvm/Support/ErrorHandling.h" 16 #include "llvm/Support/LEB128.h" 17 18 #include <string.h> // for memcpy 19 20 using namespace llvm; 21 22 class MachODumper { 23 24 template <typename StructType> 25 const char *processLoadCommandData( 26 MachOYAML::LoadCommand &LC, 27 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd); 28 29 const object::MachOObjectFile &Obj; 30 void dumpHeader(std::unique_ptr<MachOYAML::Object> &Y); 31 void dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y); 32 void dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y); 33 void dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y); 34 void dumpBindOpcodes(std::vector<MachOYAML::BindOpcode> &BindOpcodes, 35 ArrayRef<uint8_t> OpcodeBuffer, bool Lazy = false); 36 void dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y); 37 void dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y); 38 void dumpDebugAbbrev(DWARFContextInMemory &DCtx, 39 std::unique_ptr<MachOYAML::Object> &Y); 40 void dumpDebugStrings(DWARFContextInMemory &DCtx, 41 std::unique_ptr<MachOYAML::Object> &Y); 42 43 public: 44 MachODumper(const object::MachOObjectFile &O) : Obj(O) {} 45 Expected<std::unique_ptr<MachOYAML::Object>> dump(); 46 }; 47 48 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ 49 case MachO::LCName: \ 50 memcpy((void *) & (LC.Data.LCStruct##_data), LoadCmd.Ptr, \ 51 sizeof(MachO::LCStruct)); \ 52 if (Obj.isLittleEndian() != sys::IsLittleEndianHost) \ 53 MachO::swapStruct(LC.Data.LCStruct##_data); \ 54 EndPtr = processLoadCommandData<MachO::LCStruct>(LC, LoadCmd); \ 55 break; 56 57 template <typename SectionType> 58 MachOYAML::Section constructSectionCommon(SectionType Sec) { 59 MachOYAML::Section TempSec; 60 memcpy(reinterpret_cast<void *>(&TempSec.sectname[0]), &Sec.sectname[0], 16); 61 memcpy(reinterpret_cast<void *>(&TempSec.segname[0]), &Sec.segname[0], 16); 62 TempSec.addr = Sec.addr; 63 TempSec.size = Sec.size; 64 TempSec.offset = Sec.offset; 65 TempSec.align = Sec.align; 66 TempSec.reloff = Sec.reloff; 67 TempSec.nreloc = Sec.nreloc; 68 TempSec.flags = Sec.flags; 69 TempSec.reserved1 = Sec.reserved1; 70 TempSec.reserved2 = Sec.reserved2; 71 TempSec.reserved3 = 0; 72 return TempSec; 73 } 74 75 template <typename SectionType> 76 MachOYAML::Section constructSection(SectionType Sec); 77 78 template <> MachOYAML::Section constructSection(MachO::section Sec) { 79 MachOYAML::Section TempSec = constructSectionCommon(Sec); 80 TempSec.reserved3 = 0; 81 return TempSec; 82 } 83 84 template <> MachOYAML::Section constructSection(MachO::section_64 Sec) { 85 MachOYAML::Section TempSec = constructSectionCommon(Sec); 86 TempSec.reserved3 = Sec.reserved3; 87 return TempSec; 88 } 89 90 template <typename SectionType, typename SegmentType> 91 const char * 92 extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, 93 std::vector<MachOYAML::Section> &Sections, 94 bool IsLittleEndian) { 95 auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize; 96 const SectionType *Curr = 97 reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType)); 98 for (; reinterpret_cast<const void *>(Curr) < End; Curr++) { 99 if (IsLittleEndian != sys::IsLittleEndianHost) { 100 SectionType Sec; 101 memcpy((void *)&Sec, Curr, sizeof(SectionType)); 102 MachO::swapStruct(Sec); 103 Sections.push_back(constructSection(Sec)); 104 } else { 105 Sections.push_back(constructSection(*Curr)); 106 } 107 } 108 return reinterpret_cast<const char *>(Curr); 109 } 110 111 template <typename StructType> 112 const char *MachODumper::processLoadCommandData( 113 MachOYAML::LoadCommand &LC, 114 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { 115 return LoadCmd.Ptr + sizeof(StructType); 116 } 117 118 template <> 119 const char *MachODumper::processLoadCommandData<MachO::segment_command>( 120 MachOYAML::LoadCommand &LC, 121 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { 122 return extractSections<MachO::section, MachO::segment_command>( 123 LoadCmd, LC.Sections, Obj.isLittleEndian()); 124 } 125 126 template <> 127 const char *MachODumper::processLoadCommandData<MachO::segment_command_64>( 128 MachOYAML::LoadCommand &LC, 129 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { 130 return extractSections<MachO::section_64, MachO::segment_command_64>( 131 LoadCmd, LC.Sections, Obj.isLittleEndian()); 132 } 133 134 template <typename StructType> 135 const char * 136 readString(MachOYAML::LoadCommand &LC, 137 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { 138 auto Start = LoadCmd.Ptr + sizeof(StructType); 139 auto MaxSize = LoadCmd.C.cmdsize - sizeof(StructType); 140 auto Size = strnlen(Start, MaxSize); 141 LC.PayloadString = StringRef(Start, Size).str(); 142 return Start + Size; 143 } 144 145 template <> 146 const char *MachODumper::processLoadCommandData<MachO::dylib_command>( 147 MachOYAML::LoadCommand &LC, 148 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { 149 return readString<MachO::dylib_command>(LC, LoadCmd); 150 } 151 152 template <> 153 const char *MachODumper::processLoadCommandData<MachO::dylinker_command>( 154 MachOYAML::LoadCommand &LC, 155 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { 156 return readString<MachO::dylinker_command>(LC, LoadCmd); 157 } 158 159 template <> 160 const char *MachODumper::processLoadCommandData<MachO::rpath_command>( 161 MachOYAML::LoadCommand &LC, 162 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { 163 return readString<MachO::rpath_command>(LC, LoadCmd); 164 } 165 166 Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() { 167 auto Y = make_unique<MachOYAML::Object>(); 168 Y->IsLittleEndian = Obj.isLittleEndian(); 169 dumpHeader(Y); 170 dumpLoadCommands(Y); 171 dumpLinkEdit(Y); 172 173 DWARFContextInMemory DICtx(Obj); 174 if (auto Err = dwarf2yaml(DICtx, Y->DWARF)) 175 return errorCodeToError(Err); 176 return std::move(Y); 177 } 178 179 void MachODumper::dumpHeader(std::unique_ptr<MachOYAML::Object> &Y) { 180 Y->Header.magic = Obj.getHeader().magic; 181 Y->Header.cputype = Obj.getHeader().cputype; 182 Y->Header.cpusubtype = Obj.getHeader().cpusubtype; 183 Y->Header.filetype = Obj.getHeader().filetype; 184 Y->Header.ncmds = Obj.getHeader().ncmds; 185 Y->Header.sizeofcmds = Obj.getHeader().sizeofcmds; 186 Y->Header.flags = Obj.getHeader().flags; 187 Y->Header.reserved = 0; 188 } 189 190 void MachODumper::dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y) { 191 for (auto LoadCmd : Obj.load_commands()) { 192 MachOYAML::LoadCommand LC; 193 const char *EndPtr = LoadCmd.Ptr; 194 switch (LoadCmd.C.cmd) { 195 default: 196 memcpy((void *)&(LC.Data.load_command_data), LoadCmd.Ptr, 197 sizeof(MachO::load_command)); 198 if (Obj.isLittleEndian() != sys::IsLittleEndianHost) 199 MachO::swapStruct(LC.Data.load_command_data); 200 EndPtr = processLoadCommandData<MachO::load_command>(LC, LoadCmd); 201 break; 202 #include "llvm/Support/MachO.def" 203 } 204 auto RemainingBytes = LoadCmd.C.cmdsize - (EndPtr - LoadCmd.Ptr); 205 if (!std::all_of(EndPtr, &EndPtr[RemainingBytes], 206 [](const char C) { return C == 0; })) { 207 LC.PayloadBytes.insert(LC.PayloadBytes.end(), EndPtr, 208 &EndPtr[RemainingBytes]); 209 RemainingBytes = 0; 210 } 211 LC.ZeroPadBytes = RemainingBytes; 212 Y->LoadCommands.push_back(std::move(LC)); 213 } 214 } 215 216 void MachODumper::dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y) { 217 dumpRebaseOpcodes(Y); 218 dumpBindOpcodes(Y->LinkEdit.BindOpcodes, Obj.getDyldInfoBindOpcodes()); 219 dumpBindOpcodes(Y->LinkEdit.WeakBindOpcodes, 220 Obj.getDyldInfoWeakBindOpcodes()); 221 dumpBindOpcodes(Y->LinkEdit.LazyBindOpcodes, Obj.getDyldInfoLazyBindOpcodes(), 222 true); 223 dumpExportTrie(Y); 224 dumpSymbols(Y); 225 } 226 227 void MachODumper::dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y) { 228 MachOYAML::LinkEditData &LEData = Y->LinkEdit; 229 230 auto RebaseOpcodes = Obj.getDyldInfoRebaseOpcodes(); 231 for (auto OpCode = RebaseOpcodes.begin(); OpCode != RebaseOpcodes.end(); 232 ++OpCode) { 233 MachOYAML::RebaseOpcode RebaseOp; 234 RebaseOp.Opcode = 235 static_cast<MachO::RebaseOpcode>(*OpCode & MachO::REBASE_OPCODE_MASK); 236 RebaseOp.Imm = *OpCode & MachO::REBASE_IMMEDIATE_MASK; 237 238 unsigned Count; 239 uint64_t ULEB = 0; 240 241 switch (RebaseOp.Opcode) { 242 case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: 243 244 ULEB = decodeULEB128(OpCode + 1, &Count); 245 RebaseOp.ExtraData.push_back(ULEB); 246 OpCode += Count; 247 // Intentionally no break here -- This opcode has two ULEB values 248 case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: 249 case MachO::REBASE_OPCODE_ADD_ADDR_ULEB: 250 case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES: 251 case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: 252 253 ULEB = decodeULEB128(OpCode + 1, &Count); 254 RebaseOp.ExtraData.push_back(ULEB); 255 OpCode += Count; 256 break; 257 default: 258 break; 259 } 260 261 LEData.RebaseOpcodes.push_back(RebaseOp); 262 263 if (RebaseOp.Opcode == MachO::REBASE_OPCODE_DONE) 264 break; 265 } 266 } 267 268 StringRef ReadStringRef(const uint8_t *Start) { 269 const uint8_t *Itr = Start; 270 for (; *Itr; ++Itr) 271 ; 272 return StringRef(reinterpret_cast<const char *>(Start), Itr - Start); 273 } 274 275 void MachODumper::dumpBindOpcodes( 276 std::vector<MachOYAML::BindOpcode> &BindOpcodes, 277 ArrayRef<uint8_t> OpcodeBuffer, bool Lazy) { 278 for (auto OpCode = OpcodeBuffer.begin(); OpCode != OpcodeBuffer.end(); 279 ++OpCode) { 280 MachOYAML::BindOpcode BindOp; 281 BindOp.Opcode = 282 static_cast<MachO::BindOpcode>(*OpCode & MachO::BIND_OPCODE_MASK); 283 BindOp.Imm = *OpCode & MachO::BIND_IMMEDIATE_MASK; 284 285 unsigned Count; 286 uint64_t ULEB = 0; 287 int64_t SLEB = 0; 288 289 switch (BindOp.Opcode) { 290 case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: 291 ULEB = decodeULEB128(OpCode + 1, &Count); 292 BindOp.ULEBExtraData.push_back(ULEB); 293 OpCode += Count; 294 // Intentionally no break here -- this opcode has two ULEB values 295 296 case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: 297 case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: 298 case MachO::BIND_OPCODE_ADD_ADDR_ULEB: 299 case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: 300 ULEB = decodeULEB128(OpCode + 1, &Count); 301 BindOp.ULEBExtraData.push_back(ULEB); 302 OpCode += Count; 303 break; 304 305 case MachO::BIND_OPCODE_SET_ADDEND_SLEB: 306 SLEB = decodeSLEB128(OpCode + 1, &Count); 307 BindOp.SLEBExtraData.push_back(SLEB); 308 OpCode += Count; 309 break; 310 311 case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: 312 BindOp.Symbol = ReadStringRef(OpCode + 1); 313 OpCode += BindOp.Symbol.size() + 1; 314 break; 315 default: 316 break; 317 } 318 319 BindOpcodes.push_back(BindOp); 320 321 // Lazy bindings have DONE opcodes between operations, so we need to keep 322 // processing after a DONE. 323 if (!Lazy && BindOp.Opcode == MachO::BIND_OPCODE_DONE) 324 break; 325 } 326 } 327 328 /*! 329 * /brief processes a node from the export trie, and its children. 330 * 331 * To my knowledge there is no documentation of the encoded format of this data 332 * other than in the heads of the Apple linker engineers. To that end hopefully 333 * this comment and the implementation below can serve to light the way for 334 * anyone crazy enough to come down this path in the future. 335 * 336 * This function reads and preserves the trie structure of the export trie. To 337 * my knowledge there is no code anywhere else that reads the data and preserves 338 * the Trie. LD64 (sources available at opensource.apple.com) has a similar 339 * implementation that parses the export trie into a vector. That code as well 340 * as LLVM's libObject MachO implementation were the basis for this. 341 * 342 * The export trie is an encoded trie. The node serialization is a bit awkward. 343 * The below pseudo-code is the best description I've come up with for it. 344 * 345 * struct SerializedNode { 346 * ULEB128 TerminalSize; 347 * struct TerminalData { <-- This is only present if TerminalSize > 0 348 * ULEB128 Flags; 349 * ULEB128 Address; <-- Present if (! Flags & REEXPORT ) 350 * ULEB128 Other; <-- Present if ( Flags & REEXPORT || 351 * Flags & STUB_AND_RESOLVER ) 352 * char[] ImportName; <-- Present if ( Flags & REEXPORT ) 353 * } 354 * uint8_t ChildrenCount; 355 * Pair<char[], ULEB128> ChildNameOffsetPair[ChildrenCount]; 356 * SerializedNode Children[ChildrenCount] 357 * } 358 * 359 * Terminal nodes are nodes that represent actual exports. They can appear 360 * anywhere in the tree other than at the root; they do not need to be leaf 361 * nodes. When reading the data out of the trie this routine reads it in-order, 362 * but it puts the child names and offsets directly into the child nodes. This 363 * results in looping over the children twice during serialization and 364 * de-serialization, but it makes the YAML representation more human readable. 365 * 366 * Below is an example of the graph from a "Hello World" executable: 367 * 368 * ------- 369 * | '' | 370 * ------- 371 * | 372 * ------- 373 * | '_' | 374 * ------- 375 * | 376 * |----------------------------------------| 377 * | | 378 * ------------------------ --------------------- 379 * | '_mh_execute_header' | | 'main' | 380 * | Flags: 0x00000000 | | Flags: 0x00000000 | 381 * | Addr: 0x00000000 | | Addr: 0x00001160 | 382 * ------------------------ --------------------- 383 * 384 * This graph represents the trie for the exports "__mh_execute_header" and 385 * "_main". In the graph only the "_main" and "__mh_execute_header" nodes are 386 * terminal. 387 */ 388 389 const uint8_t *processExportNode(const uint8_t *CurrPtr, 390 const uint8_t *const End, 391 MachOYAML::ExportEntry &Entry) { 392 if (CurrPtr >= End) 393 return CurrPtr; 394 unsigned Count = 0; 395 Entry.TerminalSize = decodeULEB128(CurrPtr, &Count); 396 CurrPtr += Count; 397 if (Entry.TerminalSize != 0) { 398 Entry.Flags = decodeULEB128(CurrPtr, &Count); 399 CurrPtr += Count; 400 if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) { 401 Entry.Address = 0; 402 Entry.Other = decodeULEB128(CurrPtr, &Count); 403 CurrPtr += Count; 404 Entry.ImportName = std::string(reinterpret_cast<const char *>(CurrPtr)); 405 } else { 406 Entry.Address = decodeULEB128(CurrPtr, &Count); 407 CurrPtr += Count; 408 if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) { 409 Entry.Other = decodeULEB128(CurrPtr, &Count); 410 CurrPtr += Count; 411 } else 412 Entry.Other = 0; 413 } 414 } 415 uint8_t childrenCount = *CurrPtr++; 416 if (childrenCount == 0) 417 return CurrPtr; 418 419 Entry.Children.insert(Entry.Children.begin(), (size_t)childrenCount, 420 MachOYAML::ExportEntry()); 421 for (auto &Child : Entry.Children) { 422 Child.Name = std::string(reinterpret_cast<const char *>(CurrPtr)); 423 CurrPtr += Child.Name.length() + 1; 424 Child.NodeOffset = decodeULEB128(CurrPtr, &Count); 425 CurrPtr += Count; 426 } 427 for (auto &Child : Entry.Children) { 428 CurrPtr = processExportNode(CurrPtr, End, Child); 429 } 430 return CurrPtr; 431 } 432 433 void MachODumper::dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y) { 434 MachOYAML::LinkEditData &LEData = Y->LinkEdit; 435 auto ExportsTrie = Obj.getDyldInfoExportsTrie(); 436 processExportNode(ExportsTrie.begin(), ExportsTrie.end(), LEData.ExportTrie); 437 } 438 439 template <typename nlist_t> 440 MachOYAML::NListEntry constructNameList(const nlist_t &nlist) { 441 MachOYAML::NListEntry NL; 442 NL.n_strx = nlist.n_strx; 443 NL.n_type = nlist.n_type; 444 NL.n_sect = nlist.n_sect; 445 NL.n_desc = nlist.n_desc; 446 NL.n_value = nlist.n_value; 447 return NL; 448 } 449 450 void MachODumper::dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y) { 451 MachOYAML::LinkEditData &LEData = Y->LinkEdit; 452 453 for (auto Symbol : Obj.symbols()) { 454 MachOYAML::NListEntry NLE = 455 Obj.is64Bit() 456 ? constructNameList<MachO::nlist_64>( 457 Obj.getSymbol64TableEntry(Symbol.getRawDataRefImpl())) 458 : constructNameList<MachO::nlist>( 459 Obj.getSymbolTableEntry(Symbol.getRawDataRefImpl())); 460 LEData.NameList.push_back(NLE); 461 } 462 463 StringRef RemainingTable = Obj.getStringTableData(); 464 while (RemainingTable.size() > 0) { 465 auto SymbolPair = RemainingTable.split('\0'); 466 RemainingTable = SymbolPair.second; 467 LEData.StringTable.push_back(SymbolPair.first); 468 } 469 } 470 471 Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj) { 472 MachODumper Dumper(Obj); 473 Expected<std::unique_ptr<MachOYAML::Object>> YAML = Dumper.dump(); 474 if (!YAML) 475 return YAML.takeError(); 476 477 yaml::YamlObjectFile YAMLFile; 478 YAMLFile.MachO = std::move(YAML.get()); 479 480 yaml::Output Yout(Out); 481 Yout << YAMLFile; 482 return Error::success(); 483 } 484 485 Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj) { 486 yaml::YamlObjectFile YAMLFile; 487 YAMLFile.FatMachO.reset(new MachOYAML::UniversalBinary()); 488 MachOYAML::UniversalBinary &YAML = *YAMLFile.FatMachO; 489 YAML.Header.magic = Obj.getMagic(); 490 YAML.Header.nfat_arch = Obj.getNumberOfObjects(); 491 492 for (auto Slice : Obj.objects()) { 493 MachOYAML::FatArch arch; 494 arch.cputype = Slice.getCPUType(); 495 arch.cpusubtype = Slice.getCPUSubType(); 496 arch.offset = Slice.getOffset(); 497 arch.size = Slice.getSize(); 498 arch.align = Slice.getAlign(); 499 arch.reserved = Slice.getReserved(); 500 YAML.FatArchs.push_back(arch); 501 502 auto SliceObj = Slice.getAsObjectFile(); 503 if (!SliceObj) 504 return SliceObj.takeError(); 505 506 MachODumper Dumper(*SliceObj.get()); 507 Expected<std::unique_ptr<MachOYAML::Object>> YAMLObj = Dumper.dump(); 508 if (!YAMLObj) 509 return YAMLObj.takeError(); 510 YAML.Slices.push_back(*YAMLObj.get()); 511 } 512 513 yaml::Output Yout(Out); 514 Yout << YAML; 515 return Error::success(); 516 } 517 518 std::error_code macho2yaml(raw_ostream &Out, const object::Binary &Binary) { 519 if (const auto *MachOObj = dyn_cast<object::MachOUniversalBinary>(&Binary)) { 520 if (auto Err = macho2yaml(Out, *MachOObj)) { 521 return errorToErrorCode(std::move(Err)); 522 } 523 return obj2yaml_error::success; 524 } 525 526 if (const auto *MachOObj = dyn_cast<object::MachOObjectFile>(&Binary)) { 527 if (auto Err = macho2yaml(Out, *MachOObj)) { 528 return errorToErrorCode(std::move(Err)); 529 } 530 return obj2yaml_error::success; 531 } 532 533 return obj2yaml_error::unsupported_obj_file_format; 534 } 535