1 //===------ macho2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "obj2yaml.h"
10 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
11 #include "llvm/Object/MachOUniversal.h"
12 #include "llvm/ObjectYAML/DWARFYAML.h"
13 #include "llvm/ObjectYAML/ObjectYAML.h"
14 #include "llvm/Support/Errc.h"
15 #include "llvm/Support/Error.h"
16 #include "llvm/Support/ErrorHandling.h"
17 #include "llvm/Support/LEB128.h"
18 
19 #include <string.h> // for memcpy
20 
21 using namespace llvm;
22 
23 class MachODumper {
24 
25   template <typename StructType>
26   Expected<const char *> processLoadCommandData(
27       MachOYAML::LoadCommand &LC,
28       const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
29       MachOYAML::Object &Y);
30 
31   const object::MachOObjectFile &Obj;
32   std::unique_ptr<DWARFContext> DWARFCtx;
33   unsigned RawSegment;
34   void dumpHeader(std::unique_ptr<MachOYAML::Object> &Y);
35   Error dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y);
36   void dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y);
37   void dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y);
38   void dumpFunctionStarts(std::unique_ptr<MachOYAML::Object> &Y);
39   void dumpBindOpcodes(std::vector<MachOYAML::BindOpcode> &BindOpcodes,
40                        ArrayRef<uint8_t> OpcodeBuffer, bool Lazy = false);
41   void dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y);
42   void dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y);
43   void dumpIndirectSymbols(std::unique_ptr<MachOYAML::Object> &Y);
44 
45   template <typename SectionType>
46   Expected<MachOYAML::Section> constructSectionCommon(SectionType Sec,
47                                                       size_t SecIndex);
48   template <typename SectionType>
49   Expected<MachOYAML::Section> constructSection(SectionType Sec,
50                                                 size_t SecIndex);
51   template <typename SectionType, typename SegmentType>
52   Expected<const char *>
53   extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
54                   std::vector<MachOYAML::Section> &Sections,
55                   MachOYAML::Object &Y);
56 
57 public:
MachODumper(const object::MachOObjectFile & O,std::unique_ptr<DWARFContext> DCtx,unsigned RawSegments)58   MachODumper(const object::MachOObjectFile &O,
59               std::unique_ptr<DWARFContext> DCtx, unsigned RawSegments)
60       : Obj(O), DWARFCtx(std::move(DCtx)), RawSegment(RawSegments) {}
61   Expected<std::unique_ptr<MachOYAML::Object>> dump();
62 };
63 
64 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
65   case MachO::LCName:                                                          \
66     memcpy((void *)&(LC.Data.LCStruct##_data), LoadCmd.Ptr,                    \
67            sizeof(MachO::LCStruct));                                           \
68     if (Obj.isLittleEndian() != sys::IsLittleEndianHost)                       \
69       MachO::swapStruct(LC.Data.LCStruct##_data);                              \
70     if (Expected<const char *> ExpectedEndPtr =                                \
71             processLoadCommandData<MachO::LCStruct>(LC, LoadCmd, *Y.get()))    \
72       EndPtr = *ExpectedEndPtr;                                                \
73     else                                                                       \
74       return ExpectedEndPtr.takeError();                                       \
75     break;
76 
77 template <typename SectionType>
78 Expected<MachOYAML::Section>
constructSectionCommon(SectionType Sec,size_t SecIndex)79 MachODumper::constructSectionCommon(SectionType Sec, size_t SecIndex) {
80   MachOYAML::Section TempSec;
81   memcpy(reinterpret_cast<void *>(&TempSec.sectname[0]), &Sec.sectname[0], 16);
82   memcpy(reinterpret_cast<void *>(&TempSec.segname[0]), &Sec.segname[0], 16);
83   TempSec.addr = Sec.addr;
84   TempSec.size = Sec.size;
85   TempSec.offset = Sec.offset;
86   TempSec.align = Sec.align;
87   TempSec.reloff = Sec.reloff;
88   TempSec.nreloc = Sec.nreloc;
89   TempSec.flags = Sec.flags;
90   TempSec.reserved1 = Sec.reserved1;
91   TempSec.reserved2 = Sec.reserved2;
92   TempSec.reserved3 = 0;
93   if (!MachO::isVirtualSection(Sec.flags & MachO::SECTION_TYPE))
94     TempSec.content =
95         yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size));
96 
97   if (Expected<object::SectionRef> SecRef = Obj.getSection(SecIndex)) {
98     TempSec.relocations.reserve(TempSec.nreloc);
99     for (const object::RelocationRef &Reloc : SecRef->relocations()) {
100       const object::DataRefImpl Rel = Reloc.getRawDataRefImpl();
101       const MachO::any_relocation_info RE = Obj.getRelocation(Rel);
102       MachOYAML::Relocation R;
103       R.address = Obj.getAnyRelocationAddress(RE);
104       R.is_pcrel = Obj.getAnyRelocationPCRel(RE);
105       R.length = Obj.getAnyRelocationLength(RE);
106       R.type = Obj.getAnyRelocationType(RE);
107       R.is_scattered = Obj.isRelocationScattered(RE);
108       R.symbolnum = (R.is_scattered ? 0 : Obj.getPlainRelocationSymbolNum(RE));
109       R.is_extern =
110           (R.is_scattered ? false : Obj.getPlainRelocationExternal(RE));
111       R.value = (R.is_scattered ? Obj.getScatteredRelocationValue(RE) : 0);
112       TempSec.relocations.push_back(R);
113     }
114   } else {
115     return SecRef.takeError();
116   }
117   return TempSec;
118 }
119 
120 template <>
constructSection(MachO::section Sec,size_t SecIndex)121 Expected<MachOYAML::Section> MachODumper::constructSection(MachO::section Sec,
122                                                            size_t SecIndex) {
123   Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex);
124   if (TempSec)
125     TempSec->reserved3 = 0;
126   return TempSec;
127 }
128 
129 template <>
130 Expected<MachOYAML::Section>
constructSection(MachO::section_64 Sec,size_t SecIndex)131 MachODumper::constructSection(MachO::section_64 Sec, size_t SecIndex) {
132   Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex);
133   if (TempSec)
134     TempSec->reserved3 = Sec.reserved3;
135   return TempSec;
136 }
137 
dumpDebugSection(StringRef SecName,DWARFContext & DCtx,DWARFYAML::Data & DWARF)138 static Error dumpDebugSection(StringRef SecName, DWARFContext &DCtx,
139                               DWARFYAML::Data &DWARF) {
140   if (SecName == "__debug_abbrev") {
141     dumpDebugAbbrev(DCtx, DWARF);
142     return Error::success();
143   }
144   if (SecName == "__debug_aranges")
145     return dumpDebugARanges(DCtx, DWARF);
146   if (SecName == "__debug_info") {
147     dumpDebugInfo(DCtx, DWARF);
148     return Error::success();
149   }
150   if (SecName == "__debug_line") {
151     dumpDebugLines(DCtx, DWARF);
152     return Error::success();
153   }
154   if (SecName.startswith("__debug_pub")) {
155     // FIXME: We should extract pub-section dumpers from this function.
156     dumpDebugPubSections(DCtx, DWARF);
157     return Error::success();
158   }
159   if (SecName == "__debug_ranges")
160     return dumpDebugRanges(DCtx, DWARF);
161   if (SecName == "__debug_str")
162     return dumpDebugStrings(DCtx, DWARF);
163   return createStringError(errc::not_supported,
164                            "dumping " + SecName + " section is not supported");
165 }
166 
167 template <typename SectionType, typename SegmentType>
extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,std::vector<MachOYAML::Section> & Sections,MachOYAML::Object & Y)168 Expected<const char *> MachODumper::extractSections(
169     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
170     std::vector<MachOYAML::Section> &Sections, MachOYAML::Object &Y) {
171   auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize;
172   const SectionType *Curr =
173       reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType));
174   for (; reinterpret_cast<const void *>(Curr) < End; Curr++) {
175     SectionType Sec;
176     memcpy((void *)&Sec, Curr, sizeof(SectionType));
177     if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
178       MachO::swapStruct(Sec);
179     // For MachO section indices start from 1.
180     if (Expected<MachOYAML::Section> S =
181             constructSection(Sec, Sections.size() + 1)) {
182       StringRef SecName(S->sectname);
183 
184       // Copy data sections if requested.
185       if ((RawSegment & ::RawSegments::data) &&
186           StringRef(S->segname).startswith("__DATA"))
187         S->content =
188             yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size));
189 
190       if (SecName.startswith("__debug_")) {
191         // If the DWARF section cannot be successfully parsed, emit raw content
192         // instead of an entry in the DWARF section of the YAML.
193         if (Error Err = dumpDebugSection(SecName, *DWARFCtx.get(), Y.DWARF))
194           consumeError(std::move(Err));
195         else
196           S->content.reset();
197       }
198       Sections.push_back(std::move(*S));
199     } else
200       return S.takeError();
201   }
202   return reinterpret_cast<const char *>(Curr);
203 }
204 
205 template <typename StructType>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)206 Expected<const char *> MachODumper::processLoadCommandData(
207     MachOYAML::LoadCommand &LC,
208     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
209     MachOYAML::Object &Y) {
210   return LoadCmd.Ptr + sizeof(StructType);
211 }
212 
213 template <>
214 Expected<const char *>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)215 MachODumper::processLoadCommandData<MachO::segment_command>(
216     MachOYAML::LoadCommand &LC,
217     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
218     MachOYAML::Object &Y) {
219   return extractSections<MachO::section, MachO::segment_command>(
220       LoadCmd, LC.Sections, Y);
221 }
222 
223 template <>
224 Expected<const char *>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)225 MachODumper::processLoadCommandData<MachO::segment_command_64>(
226     MachOYAML::LoadCommand &LC,
227     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
228     MachOYAML::Object &Y) {
229   return extractSections<MachO::section_64, MachO::segment_command_64>(
230       LoadCmd, LC.Sections, Y);
231 }
232 
233 template <typename StructType>
234 const char *
readString(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd)235 readString(MachOYAML::LoadCommand &LC,
236            const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
237   auto Start = LoadCmd.Ptr + sizeof(StructType);
238   auto MaxSize = LoadCmd.C.cmdsize - sizeof(StructType);
239   auto Size = strnlen(Start, MaxSize);
240   LC.Content = StringRef(Start, Size).str();
241   return Start + Size;
242 }
243 
244 template <>
245 Expected<const char *>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)246 MachODumper::processLoadCommandData<MachO::dylib_command>(
247     MachOYAML::LoadCommand &LC,
248     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
249     MachOYAML::Object &Y) {
250   return readString<MachO::dylib_command>(LC, LoadCmd);
251 }
252 
253 template <>
254 Expected<const char *>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)255 MachODumper::processLoadCommandData<MachO::dylinker_command>(
256     MachOYAML::LoadCommand &LC,
257     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
258     MachOYAML::Object &Y) {
259   return readString<MachO::dylinker_command>(LC, LoadCmd);
260 }
261 
262 template <>
263 Expected<const char *>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)264 MachODumper::processLoadCommandData<MachO::rpath_command>(
265     MachOYAML::LoadCommand &LC,
266     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
267     MachOYAML::Object &Y) {
268   return readString<MachO::rpath_command>(LC, LoadCmd);
269 }
270 
271 template <>
272 Expected<const char *>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)273 MachODumper::processLoadCommandData<MachO::build_version_command>(
274     MachOYAML::LoadCommand &LC,
275     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
276     MachOYAML::Object &Y) {
277   auto Start = LoadCmd.Ptr + sizeof(MachO::build_version_command);
278   auto NTools = LC.Data.build_version_command_data.ntools;
279   for (unsigned i = 0; i < NTools; ++i) {
280     auto Curr = Start + i * sizeof(MachO::build_tool_version);
281     MachO::build_tool_version BV;
282     memcpy((void *)&BV, Curr, sizeof(MachO::build_tool_version));
283     if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
284       MachO::swapStruct(BV);
285     LC.Tools.push_back(BV);
286   }
287   return Start + NTools * sizeof(MachO::build_tool_version);
288 }
289 
dump()290 Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() {
291   auto Y = std::make_unique<MachOYAML::Object>();
292   Y->IsLittleEndian = Obj.isLittleEndian();
293   dumpHeader(Y);
294   if (Error Err = dumpLoadCommands(Y))
295     return std::move(Err);
296   if (RawSegment & ::RawSegments::linkedit)
297     Y->RawLinkEditSegment =
298         yaml::BinaryRef(Obj.getSegmentContents("__LINKEDIT"));
299   else
300     dumpLinkEdit(Y);
301 
302   return std::move(Y);
303 }
304 
dumpHeader(std::unique_ptr<MachOYAML::Object> & Y)305 void MachODumper::dumpHeader(std::unique_ptr<MachOYAML::Object> &Y) {
306   Y->Header.magic = Obj.getHeader().magic;
307   Y->Header.cputype = Obj.getHeader().cputype;
308   Y->Header.cpusubtype = Obj.getHeader().cpusubtype;
309   Y->Header.filetype = Obj.getHeader().filetype;
310   Y->Header.ncmds = Obj.getHeader().ncmds;
311   Y->Header.sizeofcmds = Obj.getHeader().sizeofcmds;
312   Y->Header.flags = Obj.getHeader().flags;
313   Y->Header.reserved = 0;
314 }
315 
dumpLoadCommands(std::unique_ptr<MachOYAML::Object> & Y)316 Error MachODumper::dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y) {
317   for (auto LoadCmd : Obj.load_commands()) {
318     MachOYAML::LoadCommand LC;
319     const char *EndPtr = LoadCmd.Ptr;
320     switch (LoadCmd.C.cmd) {
321     default:
322       memcpy((void *)&(LC.Data.load_command_data), LoadCmd.Ptr,
323              sizeof(MachO::load_command));
324       if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
325         MachO::swapStruct(LC.Data.load_command_data);
326       if (Expected<const char *> ExpectedEndPtr =
327               processLoadCommandData<MachO::load_command>(LC, LoadCmd,
328                                                           *Y.get()))
329         EndPtr = *ExpectedEndPtr;
330       else
331         return ExpectedEndPtr.takeError();
332       break;
333 #include "llvm/BinaryFormat/MachO.def"
334     }
335     auto RemainingBytes = LoadCmd.C.cmdsize - (EndPtr - LoadCmd.Ptr);
336     if (!std::all_of(EndPtr, &EndPtr[RemainingBytes],
337                      [](const char C) { return C == 0; })) {
338       LC.PayloadBytes.insert(LC.PayloadBytes.end(), EndPtr,
339                              &EndPtr[RemainingBytes]);
340       RemainingBytes = 0;
341     }
342     LC.ZeroPadBytes = RemainingBytes;
343     Y->LoadCommands.push_back(std::move(LC));
344   }
345   return Error::success();
346 }
347 
dumpLinkEdit(std::unique_ptr<MachOYAML::Object> & Y)348 void MachODumper::dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y) {
349   dumpRebaseOpcodes(Y);
350   dumpBindOpcodes(Y->LinkEdit.BindOpcodes, Obj.getDyldInfoBindOpcodes());
351   dumpBindOpcodes(Y->LinkEdit.WeakBindOpcodes,
352                   Obj.getDyldInfoWeakBindOpcodes());
353   dumpBindOpcodes(Y->LinkEdit.LazyBindOpcodes, Obj.getDyldInfoLazyBindOpcodes(),
354                   true);
355   dumpExportTrie(Y);
356   dumpSymbols(Y);
357   dumpIndirectSymbols(Y);
358   dumpFunctionStarts(Y);
359 }
360 
dumpFunctionStarts(std::unique_ptr<MachOYAML::Object> & Y)361 void MachODumper::dumpFunctionStarts(std::unique_ptr<MachOYAML::Object> &Y) {
362   MachOYAML::LinkEditData &LEData = Y->LinkEdit;
363 
364   auto FunctionStarts = Obj.getFunctionStarts();
365   for (auto Addr : FunctionStarts)
366     LEData.FunctionStarts.push_back(Addr);
367 }
368 
dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> & Y)369 void MachODumper::dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y) {
370   MachOYAML::LinkEditData &LEData = Y->LinkEdit;
371 
372   auto RebaseOpcodes = Obj.getDyldInfoRebaseOpcodes();
373   for (auto OpCode = RebaseOpcodes.begin(); OpCode != RebaseOpcodes.end();
374        ++OpCode) {
375     MachOYAML::RebaseOpcode RebaseOp;
376     RebaseOp.Opcode =
377         static_cast<MachO::RebaseOpcode>(*OpCode & MachO::REBASE_OPCODE_MASK);
378     RebaseOp.Imm = *OpCode & MachO::REBASE_IMMEDIATE_MASK;
379 
380     unsigned Count;
381     uint64_t ULEB = 0;
382 
383     switch (RebaseOp.Opcode) {
384     case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
385 
386       ULEB = decodeULEB128(OpCode + 1, &Count);
387       RebaseOp.ExtraData.push_back(ULEB);
388       OpCode += Count;
389       LLVM_FALLTHROUGH;
390     // Intentionally no break here -- This opcode has two ULEB values
391     case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
392     case MachO::REBASE_OPCODE_ADD_ADDR_ULEB:
393     case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
394     case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
395 
396       ULEB = decodeULEB128(OpCode + 1, &Count);
397       RebaseOp.ExtraData.push_back(ULEB);
398       OpCode += Count;
399       break;
400     default:
401       break;
402     }
403 
404     LEData.RebaseOpcodes.push_back(RebaseOp);
405 
406     if (RebaseOp.Opcode == MachO::REBASE_OPCODE_DONE)
407       break;
408   }
409 }
410 
ReadStringRef(const uint8_t * Start)411 StringRef ReadStringRef(const uint8_t *Start) {
412   const uint8_t *Itr = Start;
413   for (; *Itr; ++Itr)
414     ;
415   return StringRef(reinterpret_cast<const char *>(Start), Itr - Start);
416 }
417 
dumpBindOpcodes(std::vector<MachOYAML::BindOpcode> & BindOpcodes,ArrayRef<uint8_t> OpcodeBuffer,bool Lazy)418 void MachODumper::dumpBindOpcodes(
419     std::vector<MachOYAML::BindOpcode> &BindOpcodes,
420     ArrayRef<uint8_t> OpcodeBuffer, bool Lazy) {
421   for (auto OpCode = OpcodeBuffer.begin(); OpCode != OpcodeBuffer.end();
422        ++OpCode) {
423     MachOYAML::BindOpcode BindOp;
424     BindOp.Opcode =
425         static_cast<MachO::BindOpcode>(*OpCode & MachO::BIND_OPCODE_MASK);
426     BindOp.Imm = *OpCode & MachO::BIND_IMMEDIATE_MASK;
427 
428     unsigned Count;
429     uint64_t ULEB = 0;
430     int64_t SLEB = 0;
431 
432     switch (BindOp.Opcode) {
433     case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
434       ULEB = decodeULEB128(OpCode + 1, &Count);
435       BindOp.ULEBExtraData.push_back(ULEB);
436       OpCode += Count;
437       LLVM_FALLTHROUGH;
438     // Intentionally no break here -- this opcode has two ULEB values
439 
440     case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
441     case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
442     case MachO::BIND_OPCODE_ADD_ADDR_ULEB:
443     case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
444       ULEB = decodeULEB128(OpCode + 1, &Count);
445       BindOp.ULEBExtraData.push_back(ULEB);
446       OpCode += Count;
447       break;
448 
449     case MachO::BIND_OPCODE_SET_ADDEND_SLEB:
450       SLEB = decodeSLEB128(OpCode + 1, &Count);
451       BindOp.SLEBExtraData.push_back(SLEB);
452       OpCode += Count;
453       break;
454 
455     case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
456       BindOp.Symbol = ReadStringRef(OpCode + 1);
457       OpCode += BindOp.Symbol.size() + 1;
458       break;
459     default:
460       break;
461     }
462 
463     BindOpcodes.push_back(BindOp);
464 
465     // Lazy bindings have DONE opcodes between operations, so we need to keep
466     // processing after a DONE.
467     if (!Lazy && BindOp.Opcode == MachO::BIND_OPCODE_DONE)
468       break;
469   }
470 }
471 
472 /*!
473  * /brief processes a node from the export trie, and its children.
474  *
475  * To my knowledge there is no documentation of the encoded format of this data
476  * other than in the heads of the Apple linker engineers. To that end hopefully
477  * this comment and the implementation below can serve to light the way for
478  * anyone crazy enough to come down this path in the future.
479  *
480  * This function reads and preserves the trie structure of the export trie. To
481  * my knowledge there is no code anywhere else that reads the data and preserves
482  * the Trie. LD64 (sources available at opensource.apple.com) has a similar
483  * implementation that parses the export trie into a vector. That code as well
484  * as LLVM's libObject MachO implementation were the basis for this.
485  *
486  * The export trie is an encoded trie. The node serialization is a bit awkward.
487  * The below pseudo-code is the best description I've come up with for it.
488  *
489  * struct SerializedNode {
490  *   ULEB128 TerminalSize;
491  *   struct TerminalData { <-- This is only present if TerminalSize > 0
492  *     ULEB128 Flags;
493  *     ULEB128 Address; <-- Present if (! Flags & REEXPORT )
494  *     ULEB128 Other; <-- Present if ( Flags & REEXPORT ||
495  *                                     Flags & STUB_AND_RESOLVER )
496  *     char[] ImportName; <-- Present if ( Flags & REEXPORT )
497  *   }
498  *   uint8_t ChildrenCount;
499  *   Pair<char[], ULEB128> ChildNameOffsetPair[ChildrenCount];
500  *   SerializedNode Children[ChildrenCount]
501  * }
502  *
503  * Terminal nodes are nodes that represent actual exports. They can appear
504  * anywhere in the tree other than at the root; they do not need to be leaf
505  * nodes. When reading the data out of the trie this routine reads it in-order,
506  * but it puts the child names and offsets directly into the child nodes. This
507  * results in looping over the children twice during serialization and
508  * de-serialization, but it makes the YAML representation more human readable.
509  *
510  * Below is an example of the graph from a "Hello World" executable:
511  *
512  * -------
513  * | ''  |
514  * -------
515  *    |
516  * -------
517  * | '_' |
518  * -------
519  *    |
520  *    |----------------------------------------|
521  *    |                                        |
522  *  ------------------------      ---------------------
523  *  | '_mh_execute_header' |      | 'main'            |
524  *  | Flags: 0x00000000    |      | Flags: 0x00000000 |
525  *  | Addr:  0x00000000    |      | Addr:  0x00001160 |
526  *  ------------------------      ---------------------
527  *
528  * This graph represents the trie for the exports "__mh_execute_header" and
529  * "_main". In the graph only the "_main" and "__mh_execute_header" nodes are
530  * terminal.
531 */
532 
processExportNode(const uint8_t * CurrPtr,const uint8_t * const End,MachOYAML::ExportEntry & Entry)533 const uint8_t *processExportNode(const uint8_t *CurrPtr,
534                                  const uint8_t *const End,
535                                  MachOYAML::ExportEntry &Entry) {
536   if (CurrPtr >= End)
537     return CurrPtr;
538   unsigned Count = 0;
539   Entry.TerminalSize = decodeULEB128(CurrPtr, &Count);
540   CurrPtr += Count;
541   if (Entry.TerminalSize != 0) {
542     Entry.Flags = decodeULEB128(CurrPtr, &Count);
543     CurrPtr += Count;
544     if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) {
545       Entry.Address = 0;
546       Entry.Other = decodeULEB128(CurrPtr, &Count);
547       CurrPtr += Count;
548       Entry.ImportName = std::string(reinterpret_cast<const char *>(CurrPtr));
549     } else {
550       Entry.Address = decodeULEB128(CurrPtr, &Count);
551       CurrPtr += Count;
552       if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
553         Entry.Other = decodeULEB128(CurrPtr, &Count);
554         CurrPtr += Count;
555       } else
556         Entry.Other = 0;
557     }
558   }
559   uint8_t childrenCount = *CurrPtr++;
560   if (childrenCount == 0)
561     return CurrPtr;
562 
563   Entry.Children.insert(Entry.Children.begin(), (size_t)childrenCount,
564                         MachOYAML::ExportEntry());
565   for (auto &Child : Entry.Children) {
566     Child.Name = std::string(reinterpret_cast<const char *>(CurrPtr));
567     CurrPtr += Child.Name.length() + 1;
568     Child.NodeOffset = decodeULEB128(CurrPtr, &Count);
569     CurrPtr += Count;
570   }
571   for (auto &Child : Entry.Children) {
572     CurrPtr = processExportNode(CurrPtr, End, Child);
573   }
574   return CurrPtr;
575 }
576 
dumpExportTrie(std::unique_ptr<MachOYAML::Object> & Y)577 void MachODumper::dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y) {
578   MachOYAML::LinkEditData &LEData = Y->LinkEdit;
579   auto ExportsTrie = Obj.getDyldInfoExportsTrie();
580   processExportNode(ExportsTrie.begin(), ExportsTrie.end(), LEData.ExportTrie);
581 }
582 
583 template <typename nlist_t>
constructNameList(const nlist_t & nlist)584 MachOYAML::NListEntry constructNameList(const nlist_t &nlist) {
585   MachOYAML::NListEntry NL;
586   NL.n_strx = nlist.n_strx;
587   NL.n_type = nlist.n_type;
588   NL.n_sect = nlist.n_sect;
589   NL.n_desc = nlist.n_desc;
590   NL.n_value = nlist.n_value;
591   return NL;
592 }
593 
dumpSymbols(std::unique_ptr<MachOYAML::Object> & Y)594 void MachODumper::dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y) {
595   MachOYAML::LinkEditData &LEData = Y->LinkEdit;
596 
597   for (auto Symbol : Obj.symbols()) {
598     MachOYAML::NListEntry NLE =
599         Obj.is64Bit()
600             ? constructNameList<MachO::nlist_64>(
601                   Obj.getSymbol64TableEntry(Symbol.getRawDataRefImpl()))
602             : constructNameList<MachO::nlist>(
603                   Obj.getSymbolTableEntry(Symbol.getRawDataRefImpl()));
604     LEData.NameList.push_back(NLE);
605   }
606 
607   StringRef RemainingTable = Obj.getStringTableData();
608   while (RemainingTable.size() > 0) {
609     auto SymbolPair = RemainingTable.split('\0');
610     RemainingTable = SymbolPair.second;
611     LEData.StringTable.push_back(SymbolPair.first);
612   }
613 }
614 
dumpIndirectSymbols(std::unique_ptr<MachOYAML::Object> & Y)615 void MachODumper::dumpIndirectSymbols(std::unique_ptr<MachOYAML::Object> &Y) {
616   MachOYAML::LinkEditData &LEData = Y->LinkEdit;
617 
618   MachO::dysymtab_command DLC = Obj.getDysymtabLoadCommand();
619   for (unsigned i = 0; i < DLC.nindirectsyms; ++i)
620     LEData.IndirectSymbols.push_back(Obj.getIndirectSymbolTableEntry(DLC, i));
621 }
622 
macho2yaml(raw_ostream & Out,const object::MachOObjectFile & Obj,unsigned RawSegments)623 Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj,
624                  unsigned RawSegments) {
625   std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(Obj);
626   MachODumper Dumper(Obj, std::move(DCtx), RawSegments);
627   Expected<std::unique_ptr<MachOYAML::Object>> YAML = Dumper.dump();
628   if (!YAML)
629     return YAML.takeError();
630 
631   yaml::YamlObjectFile YAMLFile;
632   YAMLFile.MachO = std::move(YAML.get());
633 
634   yaml::Output Yout(Out);
635   Yout << YAMLFile;
636   return Error::success();
637 }
638 
macho2yaml(raw_ostream & Out,const object::MachOUniversalBinary & Obj,unsigned RawSegments)639 Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj,
640                  unsigned RawSegments) {
641   yaml::YamlObjectFile YAMLFile;
642   YAMLFile.FatMachO.reset(new MachOYAML::UniversalBinary());
643   MachOYAML::UniversalBinary &YAML = *YAMLFile.FatMachO;
644   YAML.Header.magic = Obj.getMagic();
645   YAML.Header.nfat_arch = Obj.getNumberOfObjects();
646 
647   for (auto Slice : Obj.objects()) {
648     MachOYAML::FatArch arch;
649     arch.cputype = Slice.getCPUType();
650     arch.cpusubtype = Slice.getCPUSubType();
651     arch.offset = Slice.getOffset();
652     arch.size = Slice.getSize();
653     arch.align = Slice.getAlign();
654     arch.reserved = Slice.getReserved();
655     YAML.FatArchs.push_back(arch);
656 
657     auto SliceObj = Slice.getAsObjectFile();
658     if (!SliceObj)
659       return SliceObj.takeError();
660 
661     std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(*SliceObj.get());
662     MachODumper Dumper(*SliceObj.get(), std::move(DCtx), RawSegments);
663     Expected<std::unique_ptr<MachOYAML::Object>> YAMLObj = Dumper.dump();
664     if (!YAMLObj)
665       return YAMLObj.takeError();
666     YAML.Slices.push_back(*YAMLObj.get());
667   }
668 
669   yaml::Output Yout(Out);
670   Yout << YAML;
671   return Error::success();
672 }
673 
macho2yaml(raw_ostream & Out,const object::Binary & Binary,unsigned RawSegments)674 Error macho2yaml(raw_ostream &Out, const object::Binary &Binary,
675                  unsigned RawSegments) {
676   if (const auto *MachOObj = dyn_cast<object::MachOUniversalBinary>(&Binary))
677     return macho2yaml(Out, *MachOObj, RawSegments);
678 
679   if (const auto *MachOObj = dyn_cast<object::MachOObjectFile>(&Binary))
680     return macho2yaml(Out, *MachOObj, RawSegments);
681 
682   llvm_unreachable("unexpected Mach-O file format");
683 }
684