1 //===------ macho2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "Error.h"
11 #include "obj2yaml.h"
12 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
13 #include "llvm/Object/MachOUniversal.h"
14 #include "llvm/ObjectYAML/ObjectYAML.h"
15 #include "llvm/Support/ErrorHandling.h"
16 #include "llvm/Support/LEB128.h"
17 
18 #include <string.h> // for memcpy
19 
20 using namespace llvm;
21 
22 class MachODumper {
23 
24   template <typename StructType>
25   const char *processLoadCommandData(
26       MachOYAML::LoadCommand &LC,
27       const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd);
28 
29   const object::MachOObjectFile &Obj;
30   void dumpHeader(std::unique_ptr<MachOYAML::Object> &Y);
31   void dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y);
32   void dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y);
33   void dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y);
34   void dumpBindOpcodes(std::vector<MachOYAML::BindOpcode> &BindOpcodes,
35                        ArrayRef<uint8_t> OpcodeBuffer, bool Lazy = false);
36   void dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y);
37   void dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y);
38   void dumpDebugAbbrev(DWARFContextInMemory &DCtx,
39                        std::unique_ptr<MachOYAML::Object> &Y);
40   void dumpDebugStrings(DWARFContextInMemory &DCtx,
41                         std::unique_ptr<MachOYAML::Object> &Y);
42 
43 public:
44   MachODumper(const object::MachOObjectFile &O) : Obj(O) {}
45   Expected<std::unique_ptr<MachOYAML::Object>> dump();
46 };
47 
48 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
49   case MachO::LCName:                                                          \
50     memcpy((void *) & (LC.Data.LCStruct##_data), LoadCmd.Ptr,                  \
51            sizeof(MachO::LCStruct));                                           \
52     if (Obj.isLittleEndian() != sys::IsLittleEndianHost)                       \
53       MachO::swapStruct(LC.Data.LCStruct##_data);                              \
54     EndPtr = processLoadCommandData<MachO::LCStruct>(LC, LoadCmd);             \
55     break;
56 
57 template <typename SectionType>
58 MachOYAML::Section constructSectionCommon(SectionType Sec) {
59   MachOYAML::Section TempSec;
60   memcpy(reinterpret_cast<void *>(&TempSec.sectname[0]), &Sec.sectname[0], 16);
61   memcpy(reinterpret_cast<void *>(&TempSec.segname[0]), &Sec.segname[0], 16);
62   TempSec.addr = Sec.addr;
63   TempSec.size = Sec.size;
64   TempSec.offset = Sec.offset;
65   TempSec.align = Sec.align;
66   TempSec.reloff = Sec.reloff;
67   TempSec.nreloc = Sec.nreloc;
68   TempSec.flags = Sec.flags;
69   TempSec.reserved1 = Sec.reserved1;
70   TempSec.reserved2 = Sec.reserved2;
71   TempSec.reserved3 = 0;
72   return TempSec;
73 }
74 
75 template <typename SectionType>
76 MachOYAML::Section constructSection(SectionType Sec);
77 
78 template <> MachOYAML::Section constructSection(MachO::section Sec) {
79   MachOYAML::Section TempSec = constructSectionCommon(Sec);
80   TempSec.reserved3 = 0;
81   return TempSec;
82 }
83 
84 template <> MachOYAML::Section constructSection(MachO::section_64 Sec) {
85   MachOYAML::Section TempSec = constructSectionCommon(Sec);
86   TempSec.reserved3 = Sec.reserved3;
87   return TempSec;
88 }
89 
90 template <typename SectionType, typename SegmentType>
91 const char *
92 extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
93                 std::vector<MachOYAML::Section> &Sections,
94                 bool IsLittleEndian) {
95   auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize;
96   const SectionType *Curr =
97       reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType));
98   for (; reinterpret_cast<const void *>(Curr) < End; Curr++) {
99     if (IsLittleEndian != sys::IsLittleEndianHost) {
100       SectionType Sec;
101       memcpy((void *)&Sec, Curr, sizeof(SectionType));
102       MachO::swapStruct(Sec);
103       Sections.push_back(constructSection(Sec));
104     } else {
105       Sections.push_back(constructSection(*Curr));
106     }
107   }
108   return reinterpret_cast<const char *>(Curr);
109 }
110 
111 template <typename StructType>
112 const char *MachODumper::processLoadCommandData(
113     MachOYAML::LoadCommand &LC,
114     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
115   return LoadCmd.Ptr + sizeof(StructType);
116 }
117 
118 template <>
119 const char *MachODumper::processLoadCommandData<MachO::segment_command>(
120     MachOYAML::LoadCommand &LC,
121     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
122   return extractSections<MachO::section, MachO::segment_command>(
123       LoadCmd, LC.Sections, Obj.isLittleEndian());
124 }
125 
126 template <>
127 const char *MachODumper::processLoadCommandData<MachO::segment_command_64>(
128     MachOYAML::LoadCommand &LC,
129     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
130   return extractSections<MachO::section_64, MachO::segment_command_64>(
131       LoadCmd, LC.Sections, Obj.isLittleEndian());
132 }
133 
134 template <typename StructType>
135 const char *
136 readString(MachOYAML::LoadCommand &LC,
137            const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
138   auto Start = LoadCmd.Ptr + sizeof(StructType);
139   auto MaxSize = LoadCmd.C.cmdsize - sizeof(StructType);
140   auto Size = strnlen(Start, MaxSize);
141   LC.PayloadString = StringRef(Start, Size).str();
142   return Start + Size;
143 }
144 
145 template <>
146 const char *MachODumper::processLoadCommandData<MachO::dylib_command>(
147     MachOYAML::LoadCommand &LC,
148     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
149   return readString<MachO::dylib_command>(LC, LoadCmd);
150 }
151 
152 template <>
153 const char *MachODumper::processLoadCommandData<MachO::dylinker_command>(
154     MachOYAML::LoadCommand &LC,
155     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
156   return readString<MachO::dylinker_command>(LC, LoadCmd);
157 }
158 
159 template <>
160 const char *MachODumper::processLoadCommandData<MachO::rpath_command>(
161     MachOYAML::LoadCommand &LC,
162     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
163   return readString<MachO::rpath_command>(LC, LoadCmd);
164 }
165 
166 Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() {
167   auto Y = make_unique<MachOYAML::Object>();
168   Y->IsLittleEndian = Obj.isLittleEndian();
169   dumpHeader(Y);
170   dumpLoadCommands(Y);
171   dumpLinkEdit(Y);
172 
173   DWARFContextInMemory DICtx(Obj);
174   if (auto Err = dwarf2yaml(DICtx, Y->DWARF))
175     return errorCodeToError(Err);
176   return std::move(Y);
177 }
178 
179 void MachODumper::dumpHeader(std::unique_ptr<MachOYAML::Object> &Y) {
180   Y->Header.magic = Obj.getHeader().magic;
181   Y->Header.cputype = Obj.getHeader().cputype;
182   Y->Header.cpusubtype = Obj.getHeader().cpusubtype;
183   Y->Header.filetype = Obj.getHeader().filetype;
184   Y->Header.ncmds = Obj.getHeader().ncmds;
185   Y->Header.sizeofcmds = Obj.getHeader().sizeofcmds;
186   Y->Header.flags = Obj.getHeader().flags;
187   Y->Header.reserved = 0;
188 }
189 
190 void MachODumper::dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y) {
191   for (auto LoadCmd : Obj.load_commands()) {
192     MachOYAML::LoadCommand LC;
193     const char *EndPtr = LoadCmd.Ptr;
194     switch (LoadCmd.C.cmd) {
195     default:
196       memcpy((void *)&(LC.Data.load_command_data), LoadCmd.Ptr,
197              sizeof(MachO::load_command));
198       if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
199         MachO::swapStruct(LC.Data.load_command_data);
200       EndPtr = processLoadCommandData<MachO::load_command>(LC, LoadCmd);
201       break;
202 #include "llvm/Support/MachO.def"
203     }
204     auto RemainingBytes = LoadCmd.C.cmdsize - (EndPtr - LoadCmd.Ptr);
205     if (!std::all_of(EndPtr, &EndPtr[RemainingBytes],
206                      [](const char C) { return C == 0; })) {
207       LC.PayloadBytes.insert(LC.PayloadBytes.end(), EndPtr,
208                              &EndPtr[RemainingBytes]);
209       RemainingBytes = 0;
210     }
211     LC.ZeroPadBytes = RemainingBytes;
212     Y->LoadCommands.push_back(std::move(LC));
213   }
214 }
215 
216 void MachODumper::dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y) {
217   dumpRebaseOpcodes(Y);
218   dumpBindOpcodes(Y->LinkEdit.BindOpcodes, Obj.getDyldInfoBindOpcodes());
219   dumpBindOpcodes(Y->LinkEdit.WeakBindOpcodes,
220                   Obj.getDyldInfoWeakBindOpcodes());
221   dumpBindOpcodes(Y->LinkEdit.LazyBindOpcodes, Obj.getDyldInfoLazyBindOpcodes(),
222                   true);
223   dumpExportTrie(Y);
224   dumpSymbols(Y);
225 }
226 
227 void MachODumper::dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y) {
228   MachOYAML::LinkEditData &LEData = Y->LinkEdit;
229 
230   auto RebaseOpcodes = Obj.getDyldInfoRebaseOpcodes();
231   for (auto OpCode = RebaseOpcodes.begin(); OpCode != RebaseOpcodes.end();
232        ++OpCode) {
233     MachOYAML::RebaseOpcode RebaseOp;
234     RebaseOp.Opcode =
235         static_cast<MachO::RebaseOpcode>(*OpCode & MachO::REBASE_OPCODE_MASK);
236     RebaseOp.Imm = *OpCode & MachO::REBASE_IMMEDIATE_MASK;
237 
238     unsigned Count;
239     uint64_t ULEB = 0;
240 
241     switch (RebaseOp.Opcode) {
242     case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
243 
244       ULEB = decodeULEB128(OpCode + 1, &Count);
245       RebaseOp.ExtraData.push_back(ULEB);
246       OpCode += Count;
247     // Intentionally no break here -- This opcode has two ULEB values
248     case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
249     case MachO::REBASE_OPCODE_ADD_ADDR_ULEB:
250     case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
251     case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
252 
253       ULEB = decodeULEB128(OpCode + 1, &Count);
254       RebaseOp.ExtraData.push_back(ULEB);
255       OpCode += Count;
256       break;
257     default:
258       break;
259     }
260 
261     LEData.RebaseOpcodes.push_back(RebaseOp);
262 
263     if (RebaseOp.Opcode == MachO::REBASE_OPCODE_DONE)
264       break;
265   }
266 }
267 
268 StringRef ReadStringRef(const uint8_t *Start) {
269   const uint8_t *Itr = Start;
270   for (; *Itr; ++Itr)
271     ;
272   return StringRef(reinterpret_cast<const char *>(Start), Itr - Start);
273 }
274 
275 void MachODumper::dumpBindOpcodes(
276     std::vector<MachOYAML::BindOpcode> &BindOpcodes,
277     ArrayRef<uint8_t> OpcodeBuffer, bool Lazy) {
278   for (auto OpCode = OpcodeBuffer.begin(); OpCode != OpcodeBuffer.end();
279        ++OpCode) {
280     MachOYAML::BindOpcode BindOp;
281     BindOp.Opcode =
282         static_cast<MachO::BindOpcode>(*OpCode & MachO::BIND_OPCODE_MASK);
283     BindOp.Imm = *OpCode & MachO::BIND_IMMEDIATE_MASK;
284 
285     unsigned Count;
286     uint64_t ULEB = 0;
287     int64_t SLEB = 0;
288 
289     switch (BindOp.Opcode) {
290     case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
291       ULEB = decodeULEB128(OpCode + 1, &Count);
292       BindOp.ULEBExtraData.push_back(ULEB);
293       OpCode += Count;
294     // Intentionally no break here -- this opcode has two ULEB values
295 
296     case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
297     case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
298     case MachO::BIND_OPCODE_ADD_ADDR_ULEB:
299     case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
300       ULEB = decodeULEB128(OpCode + 1, &Count);
301       BindOp.ULEBExtraData.push_back(ULEB);
302       OpCode += Count;
303       break;
304 
305     case MachO::BIND_OPCODE_SET_ADDEND_SLEB:
306       SLEB = decodeSLEB128(OpCode + 1, &Count);
307       BindOp.SLEBExtraData.push_back(SLEB);
308       OpCode += Count;
309       break;
310 
311     case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
312       BindOp.Symbol = ReadStringRef(OpCode + 1);
313       OpCode += BindOp.Symbol.size() + 1;
314       break;
315     default:
316       break;
317     }
318 
319     BindOpcodes.push_back(BindOp);
320 
321     // Lazy bindings have DONE opcodes between operations, so we need to keep
322     // processing after a DONE.
323     if (!Lazy && BindOp.Opcode == MachO::BIND_OPCODE_DONE)
324       break;
325   }
326 }
327 
328 /*!
329  * /brief processes a node from the export trie, and its children.
330  *
331  * To my knowledge there is no documentation of the encoded format of this data
332  * other than in the heads of the Apple linker engineers. To that end hopefully
333  * this comment and the implementation below can serve to light the way for
334  * anyone crazy enough to come down this path in the future.
335  *
336  * This function reads and preserves the trie structure of the export trie. To
337  * my knowledge there is no code anywhere else that reads the data and preserves
338  * the Trie. LD64 (sources available at opensource.apple.com) has a similar
339  * implementation that parses the export trie into a vector. That code as well
340  * as LLVM's libObject MachO implementation were the basis for this.
341  *
342  * The export trie is an encoded trie. The node serialization is a bit awkward.
343  * The below pseudo-code is the best description I've come up with for it.
344  *
345  * struct SerializedNode {
346  *   ULEB128 TerminalSize;
347  *   struct TerminalData { <-- This is only present if TerminalSize > 0
348  *     ULEB128 Flags;
349  *     ULEB128 Address; <-- Present if (! Flags & REEXPORT )
350  *     ULEB128 Other; <-- Present if ( Flags & REEXPORT ||
351  *                                     Flags & STUB_AND_RESOLVER )
352  *     char[] ImportName; <-- Present if ( Flags & REEXPORT )
353  *   }
354  *   uint8_t ChildrenCount;
355  *   Pair<char[], ULEB128> ChildNameOffsetPair[ChildrenCount];
356  *   SerializedNode Children[ChildrenCount]
357  * }
358  *
359  * Terminal nodes are nodes that represent actual exports. They can appear
360  * anywhere in the tree other than at the root; they do not need to be leaf
361  * nodes. When reading the data out of the trie this routine reads it in-order,
362  * but it puts the child names and offsets directly into the child nodes. This
363  * results in looping over the children twice during serialization and
364  * de-serialization, but it makes the YAML representation more human readable.
365  *
366  * Below is an example of the graph from a "Hello World" executable:
367  *
368  * -------
369  * | ''  |
370  * -------
371  *    |
372  * -------
373  * | '_' |
374  * -------
375  *    |
376  *    |----------------------------------------|
377  *    |                                        |
378  *  ------------------------      ---------------------
379  *  | '_mh_execute_header' |      | 'main'            |
380  *  | Flags: 0x00000000    |      | Flags: 0x00000000 |
381  *  | Addr:  0x00000000    |      | Addr:  0x00001160 |
382  *  ------------------------      ---------------------
383  *
384  * This graph represents the trie for the exports "__mh_execute_header" and
385  * "_main". In the graph only the "_main" and "__mh_execute_header" nodes are
386  * terminal.
387 */
388 
389 const uint8_t *processExportNode(const uint8_t *CurrPtr,
390                                  const uint8_t *const End,
391                                  MachOYAML::ExportEntry &Entry) {
392   if (CurrPtr >= End)
393     return CurrPtr;
394   unsigned Count = 0;
395   Entry.TerminalSize = decodeULEB128(CurrPtr, &Count);
396   CurrPtr += Count;
397   if (Entry.TerminalSize != 0) {
398     Entry.Flags = decodeULEB128(CurrPtr, &Count);
399     CurrPtr += Count;
400     if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) {
401       Entry.Address = 0;
402       Entry.Other = decodeULEB128(CurrPtr, &Count);
403       CurrPtr += Count;
404       Entry.ImportName = std::string(reinterpret_cast<const char *>(CurrPtr));
405     } else {
406       Entry.Address = decodeULEB128(CurrPtr, &Count);
407       CurrPtr += Count;
408       if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
409         Entry.Other = decodeULEB128(CurrPtr, &Count);
410         CurrPtr += Count;
411       } else
412         Entry.Other = 0;
413     }
414   }
415   uint8_t childrenCount = *CurrPtr++;
416   if (childrenCount == 0)
417     return CurrPtr;
418 
419   Entry.Children.insert(Entry.Children.begin(), (size_t)childrenCount,
420                         MachOYAML::ExportEntry());
421   for (auto &Child : Entry.Children) {
422     Child.Name = std::string(reinterpret_cast<const char *>(CurrPtr));
423     CurrPtr += Child.Name.length() + 1;
424     Child.NodeOffset = decodeULEB128(CurrPtr, &Count);
425     CurrPtr += Count;
426   }
427   for (auto &Child : Entry.Children) {
428     CurrPtr = processExportNode(CurrPtr, End, Child);
429   }
430   return CurrPtr;
431 }
432 
433 void MachODumper::dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y) {
434   MachOYAML::LinkEditData &LEData = Y->LinkEdit;
435   auto ExportsTrie = Obj.getDyldInfoExportsTrie();
436   processExportNode(ExportsTrie.begin(), ExportsTrie.end(), LEData.ExportTrie);
437 }
438 
439 template <typename nlist_t>
440 MachOYAML::NListEntry constructNameList(const nlist_t &nlist) {
441   MachOYAML::NListEntry NL;
442   NL.n_strx = nlist.n_strx;
443   NL.n_type = nlist.n_type;
444   NL.n_sect = nlist.n_sect;
445   NL.n_desc = nlist.n_desc;
446   NL.n_value = nlist.n_value;
447   return NL;
448 }
449 
450 void MachODumper::dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y) {
451   MachOYAML::LinkEditData &LEData = Y->LinkEdit;
452 
453   for (auto Symbol : Obj.symbols()) {
454     MachOYAML::NListEntry NLE =
455         Obj.is64Bit()
456             ? constructNameList<MachO::nlist_64>(
457                   Obj.getSymbol64TableEntry(Symbol.getRawDataRefImpl()))
458             : constructNameList<MachO::nlist>(
459                   Obj.getSymbolTableEntry(Symbol.getRawDataRefImpl()));
460     LEData.NameList.push_back(NLE);
461   }
462 
463   StringRef RemainingTable = Obj.getStringTableData();
464   while (RemainingTable.size() > 0) {
465     auto SymbolPair = RemainingTable.split('\0');
466     RemainingTable = SymbolPair.second;
467     LEData.StringTable.push_back(SymbolPair.first);
468   }
469 }
470 
471 Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj) {
472   MachODumper Dumper(Obj);
473   Expected<std::unique_ptr<MachOYAML::Object>> YAML = Dumper.dump();
474   if (!YAML)
475     return YAML.takeError();
476 
477   yaml::YamlObjectFile YAMLFile;
478   YAMLFile.MachO = std::move(YAML.get());
479 
480   yaml::Output Yout(Out);
481   Yout << YAMLFile;
482   return Error::success();
483 }
484 
485 Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj) {
486   yaml::YamlObjectFile YAMLFile;
487   YAMLFile.FatMachO.reset(new MachOYAML::UniversalBinary());
488   MachOYAML::UniversalBinary &YAML = *YAMLFile.FatMachO;
489   YAML.Header.magic = Obj.getMagic();
490   YAML.Header.nfat_arch = Obj.getNumberOfObjects();
491 
492   for (auto Slice : Obj.objects()) {
493     MachOYAML::FatArch arch;
494     arch.cputype = Slice.getCPUType();
495     arch.cpusubtype = Slice.getCPUSubType();
496     arch.offset = Slice.getOffset();
497     arch.size = Slice.getSize();
498     arch.align = Slice.getAlign();
499     arch.reserved = Slice.getReserved();
500     YAML.FatArchs.push_back(arch);
501 
502     auto SliceObj = Slice.getAsObjectFile();
503     if (!SliceObj)
504       return SliceObj.takeError();
505 
506     MachODumper Dumper(*SliceObj.get());
507     Expected<std::unique_ptr<MachOYAML::Object>> YAMLObj = Dumper.dump();
508     if (!YAMLObj)
509       return YAMLObj.takeError();
510     YAML.Slices.push_back(*YAMLObj.get());
511   }
512 
513   yaml::Output Yout(Out);
514   Yout << YAML;
515   return Error::success();
516 }
517 
518 std::error_code macho2yaml(raw_ostream &Out, const object::Binary &Binary) {
519   if (const auto *MachOObj = dyn_cast<object::MachOUniversalBinary>(&Binary)) {
520     if (auto Err = macho2yaml(Out, *MachOObj)) {
521       return errorToErrorCode(std::move(Err));
522     }
523     return obj2yaml_error::success;
524   }
525 
526   if (const auto *MachOObj = dyn_cast<object::MachOObjectFile>(&Binary)) {
527     if (auto Err = macho2yaml(Out, *MachOObj)) {
528       return errorToErrorCode(std::move(Err));
529     }
530     return obj2yaml_error::success;
531   }
532 
533   return obj2yaml_error::unsupported_obj_file_format;
534 }
535