1*25d7b4fbSAlexey Lapshin //===- MachOObject.cpp - Mach-O object file model ---------------*- C++ -*-===//
2*25d7b4fbSAlexey Lapshin //
3*25d7b4fbSAlexey Lapshin // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*25d7b4fbSAlexey Lapshin // See https://llvm.org/LICENSE.txt for license information.
5*25d7b4fbSAlexey Lapshin // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*25d7b4fbSAlexey Lapshin //
7*25d7b4fbSAlexey Lapshin //===----------------------------------------------------------------------===//
8*25d7b4fbSAlexey Lapshin 
9*25d7b4fbSAlexey Lapshin #include "MachOObject.h"
10*25d7b4fbSAlexey Lapshin #include "llvm/ADT/SmallPtrSet.h"
11*25d7b4fbSAlexey Lapshin #include <unordered_set>
12*25d7b4fbSAlexey Lapshin 
13*25d7b4fbSAlexey Lapshin using namespace llvm;
14*25d7b4fbSAlexey Lapshin using namespace llvm::objcopy::macho;
15*25d7b4fbSAlexey Lapshin 
getSymbolByIndex(uint32_t Index) const16*25d7b4fbSAlexey Lapshin const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const {
17*25d7b4fbSAlexey Lapshin   assert(Index < Symbols.size() && "invalid symbol index");
18*25d7b4fbSAlexey Lapshin   return Symbols[Index].get();
19*25d7b4fbSAlexey Lapshin }
20*25d7b4fbSAlexey Lapshin 
getSymbolByIndex(uint32_t Index)21*25d7b4fbSAlexey Lapshin SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) {
22*25d7b4fbSAlexey Lapshin   return const_cast<SymbolEntry *>(
23*25d7b4fbSAlexey Lapshin       static_cast<const SymbolTable *>(this)->getSymbolByIndex(Index));
24*25d7b4fbSAlexey Lapshin }
25*25d7b4fbSAlexey Lapshin 
removeSymbols(function_ref<bool (const std::unique_ptr<SymbolEntry> &)> ToRemove)26*25d7b4fbSAlexey Lapshin void SymbolTable::removeSymbols(
27*25d7b4fbSAlexey Lapshin     function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove) {
28*25d7b4fbSAlexey Lapshin   llvm::erase_if(Symbols, ToRemove);
29*25d7b4fbSAlexey Lapshin }
30*25d7b4fbSAlexey Lapshin 
updateLoadCommandIndexes()31*25d7b4fbSAlexey Lapshin void Object::updateLoadCommandIndexes() {
32*25d7b4fbSAlexey Lapshin   static constexpr char TextSegmentName[] = "__TEXT";
33*25d7b4fbSAlexey Lapshin   // Update indices of special load commands
34*25d7b4fbSAlexey Lapshin   for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) {
35*25d7b4fbSAlexey Lapshin     LoadCommand &LC = LoadCommands[Index];
36*25d7b4fbSAlexey Lapshin     switch (LC.MachOLoadCommand.load_command_data.cmd) {
37*25d7b4fbSAlexey Lapshin     case MachO::LC_CODE_SIGNATURE:
38*25d7b4fbSAlexey Lapshin       CodeSignatureCommandIndex = Index;
39*25d7b4fbSAlexey Lapshin       break;
40*25d7b4fbSAlexey Lapshin     case MachO::LC_SEGMENT:
41*25d7b4fbSAlexey Lapshin       if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) ==
42*25d7b4fbSAlexey Lapshin           TextSegmentName)
43*25d7b4fbSAlexey Lapshin         TextSegmentCommandIndex = Index;
44*25d7b4fbSAlexey Lapshin       break;
45*25d7b4fbSAlexey Lapshin     case MachO::LC_SEGMENT_64:
46*25d7b4fbSAlexey Lapshin       if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) ==
47*25d7b4fbSAlexey Lapshin           TextSegmentName)
48*25d7b4fbSAlexey Lapshin         TextSegmentCommandIndex = Index;
49*25d7b4fbSAlexey Lapshin       break;
50*25d7b4fbSAlexey Lapshin     case MachO::LC_SYMTAB:
51*25d7b4fbSAlexey Lapshin       SymTabCommandIndex = Index;
52*25d7b4fbSAlexey Lapshin       break;
53*25d7b4fbSAlexey Lapshin     case MachO::LC_DYSYMTAB:
54*25d7b4fbSAlexey Lapshin       DySymTabCommandIndex = Index;
55*25d7b4fbSAlexey Lapshin       break;
56*25d7b4fbSAlexey Lapshin     case MachO::LC_DYLD_INFO:
57*25d7b4fbSAlexey Lapshin     case MachO::LC_DYLD_INFO_ONLY:
58*25d7b4fbSAlexey Lapshin       DyLdInfoCommandIndex = Index;
59*25d7b4fbSAlexey Lapshin       break;
60*25d7b4fbSAlexey Lapshin     case MachO::LC_DATA_IN_CODE:
61*25d7b4fbSAlexey Lapshin       DataInCodeCommandIndex = Index;
62*25d7b4fbSAlexey Lapshin       break;
63*25d7b4fbSAlexey Lapshin     case MachO::LC_LINKER_OPTIMIZATION_HINT:
64*25d7b4fbSAlexey Lapshin       LinkerOptimizationHintCommandIndex = Index;
65*25d7b4fbSAlexey Lapshin       break;
66*25d7b4fbSAlexey Lapshin     case MachO::LC_FUNCTION_STARTS:
67*25d7b4fbSAlexey Lapshin       FunctionStartsCommandIndex = Index;
68*25d7b4fbSAlexey Lapshin       break;
69*25d7b4fbSAlexey Lapshin     case MachO::LC_DYLD_CHAINED_FIXUPS:
70*25d7b4fbSAlexey Lapshin       ChainedFixupsCommandIndex = Index;
71*25d7b4fbSAlexey Lapshin       break;
72*25d7b4fbSAlexey Lapshin     case MachO::LC_DYLD_EXPORTS_TRIE:
73*25d7b4fbSAlexey Lapshin       ExportsTrieCommandIndex = Index;
74*25d7b4fbSAlexey Lapshin       break;
75*25d7b4fbSAlexey Lapshin     }
76*25d7b4fbSAlexey Lapshin   }
77*25d7b4fbSAlexey Lapshin }
78*25d7b4fbSAlexey Lapshin 
removeLoadCommands(function_ref<bool (const LoadCommand &)> ToRemove)79*25d7b4fbSAlexey Lapshin Error Object::removeLoadCommands(
80*25d7b4fbSAlexey Lapshin     function_ref<bool(const LoadCommand &)> ToRemove) {
81*25d7b4fbSAlexey Lapshin   auto It = std::stable_partition(
82*25d7b4fbSAlexey Lapshin       LoadCommands.begin(), LoadCommands.end(),
83*25d7b4fbSAlexey Lapshin       [&](const LoadCommand &LC) { return !ToRemove(LC); });
84*25d7b4fbSAlexey Lapshin   LoadCommands.erase(It, LoadCommands.end());
85*25d7b4fbSAlexey Lapshin 
86*25d7b4fbSAlexey Lapshin   updateLoadCommandIndexes();
87*25d7b4fbSAlexey Lapshin   return Error::success();
88*25d7b4fbSAlexey Lapshin }
89*25d7b4fbSAlexey Lapshin 
removeSections(function_ref<bool (const std::unique_ptr<Section> &)> ToRemove)90*25d7b4fbSAlexey Lapshin Error Object::removeSections(
91*25d7b4fbSAlexey Lapshin     function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) {
92*25d7b4fbSAlexey Lapshin   DenseMap<uint32_t, const Section *> OldIndexToSection;
93*25d7b4fbSAlexey Lapshin   uint32_t NextSectionIndex = 1;
94*25d7b4fbSAlexey Lapshin   for (LoadCommand &LC : LoadCommands) {
95*25d7b4fbSAlexey Lapshin     auto It = std::stable_partition(
96*25d7b4fbSAlexey Lapshin         std::begin(LC.Sections), std::end(LC.Sections),
97*25d7b4fbSAlexey Lapshin         [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); });
98*25d7b4fbSAlexey Lapshin     for (auto I = LC.Sections.begin(), End = It; I != End; ++I) {
99*25d7b4fbSAlexey Lapshin       OldIndexToSection[(*I)->Index] = I->get();
100*25d7b4fbSAlexey Lapshin       (*I)->Index = NextSectionIndex++;
101*25d7b4fbSAlexey Lapshin     }
102*25d7b4fbSAlexey Lapshin     LC.Sections.erase(It, LC.Sections.end());
103*25d7b4fbSAlexey Lapshin   }
104*25d7b4fbSAlexey Lapshin 
105*25d7b4fbSAlexey Lapshin   auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool {
106*25d7b4fbSAlexey Lapshin     Optional<uint32_t> Section = S->section();
107*25d7b4fbSAlexey Lapshin     return (Section && !OldIndexToSection.count(*Section));
108*25d7b4fbSAlexey Lapshin   };
109*25d7b4fbSAlexey Lapshin 
110*25d7b4fbSAlexey Lapshin   SmallPtrSet<const SymbolEntry *, 2> DeadSymbols;
111*25d7b4fbSAlexey Lapshin   for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols)
112*25d7b4fbSAlexey Lapshin     if (IsDead(Sym))
113*25d7b4fbSAlexey Lapshin       DeadSymbols.insert(Sym.get());
114*25d7b4fbSAlexey Lapshin 
115*25d7b4fbSAlexey Lapshin   for (const LoadCommand &LC : LoadCommands)
116*25d7b4fbSAlexey Lapshin     for (const std::unique_ptr<Section> &Sec : LC.Sections)
117*25d7b4fbSAlexey Lapshin       for (const RelocationInfo &R : Sec->Relocations)
118*25d7b4fbSAlexey Lapshin         if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol))
119*25d7b4fbSAlexey Lapshin           return createStringError(std::errc::invalid_argument,
120*25d7b4fbSAlexey Lapshin                                    "symbol '%s' defined in section with index "
121*25d7b4fbSAlexey Lapshin                                    "'%u' cannot be removed because it is "
122*25d7b4fbSAlexey Lapshin                                    "referenced by a relocation in section '%s'",
123*25d7b4fbSAlexey Lapshin                                    (*R.Symbol)->Name.c_str(),
124*25d7b4fbSAlexey Lapshin                                    *((*R.Symbol)->section()),
125*25d7b4fbSAlexey Lapshin                                    Sec->CanonicalName.c_str());
126*25d7b4fbSAlexey Lapshin   SymTable.removeSymbols(IsDead);
127*25d7b4fbSAlexey Lapshin   for (std::unique_ptr<SymbolEntry> &S : SymTable.Symbols)
128*25d7b4fbSAlexey Lapshin     if (S->section())
129*25d7b4fbSAlexey Lapshin       S->n_sect = OldIndexToSection[S->n_sect]->Index;
130*25d7b4fbSAlexey Lapshin   return Error::success();
131*25d7b4fbSAlexey Lapshin }
132*25d7b4fbSAlexey Lapshin 
nextAvailableSegmentAddress() const133*25d7b4fbSAlexey Lapshin uint64_t Object::nextAvailableSegmentAddress() const {
134*25d7b4fbSAlexey Lapshin   uint64_t HeaderSize =
135*25d7b4fbSAlexey Lapshin       is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
136*25d7b4fbSAlexey Lapshin   uint64_t Addr = HeaderSize + Header.SizeOfCmds;
137*25d7b4fbSAlexey Lapshin   for (const LoadCommand &LC : LoadCommands) {
138*25d7b4fbSAlexey Lapshin     const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
139*25d7b4fbSAlexey Lapshin     switch (MLC.load_command_data.cmd) {
140*25d7b4fbSAlexey Lapshin     case MachO::LC_SEGMENT:
141*25d7b4fbSAlexey Lapshin       Addr = std::max(Addr,
142*25d7b4fbSAlexey Lapshin                       static_cast<uint64_t>(MLC.segment_command_data.vmaddr) +
143*25d7b4fbSAlexey Lapshin                           MLC.segment_command_data.vmsize);
144*25d7b4fbSAlexey Lapshin       break;
145*25d7b4fbSAlexey Lapshin     case MachO::LC_SEGMENT_64:
146*25d7b4fbSAlexey Lapshin       Addr = std::max(Addr, MLC.segment_command_64_data.vmaddr +
147*25d7b4fbSAlexey Lapshin                                 MLC.segment_command_64_data.vmsize);
148*25d7b4fbSAlexey Lapshin       break;
149*25d7b4fbSAlexey Lapshin     default:
150*25d7b4fbSAlexey Lapshin       continue;
151*25d7b4fbSAlexey Lapshin     }
152*25d7b4fbSAlexey Lapshin   }
153*25d7b4fbSAlexey Lapshin   return Addr;
154*25d7b4fbSAlexey Lapshin }
155*25d7b4fbSAlexey Lapshin 
156*25d7b4fbSAlexey Lapshin template <typename SegmentType>
157*25d7b4fbSAlexey Lapshin static void
constructSegment(SegmentType & Seg,llvm::MachO::LoadCommandType CmdType,StringRef SegName,uint64_t SegVMAddr,uint64_t SegVMSize)158*25d7b4fbSAlexey Lapshin constructSegment(SegmentType &Seg, llvm::MachO::LoadCommandType CmdType,
159*25d7b4fbSAlexey Lapshin                  StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize) {
160*25d7b4fbSAlexey Lapshin   assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name");
161*25d7b4fbSAlexey Lapshin   memset(&Seg, 0, sizeof(SegmentType));
162*25d7b4fbSAlexey Lapshin   Seg.cmd = CmdType;
163*25d7b4fbSAlexey Lapshin   strncpy(Seg.segname, SegName.data(), SegName.size());
164*25d7b4fbSAlexey Lapshin   Seg.maxprot |=
165*25d7b4fbSAlexey Lapshin       (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
166*25d7b4fbSAlexey Lapshin   Seg.initprot |=
167*25d7b4fbSAlexey Lapshin       (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
168*25d7b4fbSAlexey Lapshin   Seg.vmaddr = SegVMAddr;
169*25d7b4fbSAlexey Lapshin   Seg.vmsize = SegVMSize;
170*25d7b4fbSAlexey Lapshin }
171*25d7b4fbSAlexey Lapshin 
addSegment(StringRef SegName,uint64_t SegVMSize)172*25d7b4fbSAlexey Lapshin LoadCommand &Object::addSegment(StringRef SegName, uint64_t SegVMSize) {
173*25d7b4fbSAlexey Lapshin   LoadCommand LC;
174*25d7b4fbSAlexey Lapshin   const uint64_t SegVMAddr = nextAvailableSegmentAddress();
175*25d7b4fbSAlexey Lapshin   if (is64Bit())
176*25d7b4fbSAlexey Lapshin     constructSegment(LC.MachOLoadCommand.segment_command_64_data,
177*25d7b4fbSAlexey Lapshin                      MachO::LC_SEGMENT_64, SegName, SegVMAddr, SegVMSize);
178*25d7b4fbSAlexey Lapshin   else
179*25d7b4fbSAlexey Lapshin     constructSegment(LC.MachOLoadCommand.segment_command_data,
180*25d7b4fbSAlexey Lapshin                      MachO::LC_SEGMENT, SegName, SegVMAddr, SegVMSize);
181*25d7b4fbSAlexey Lapshin 
182*25d7b4fbSAlexey Lapshin   LoadCommands.push_back(std::move(LC));
183*25d7b4fbSAlexey Lapshin   return LoadCommands.back();
184*25d7b4fbSAlexey Lapshin }
185*25d7b4fbSAlexey Lapshin 
186*25d7b4fbSAlexey Lapshin /// Extracts a segment name from a string which is possibly non-null-terminated.
extractSegmentName(const char * SegName)187*25d7b4fbSAlexey Lapshin static StringRef extractSegmentName(const char *SegName) {
188*25d7b4fbSAlexey Lapshin   return StringRef(SegName,
189*25d7b4fbSAlexey Lapshin                    strnlen(SegName, sizeof(MachO::segment_command::segname)));
190*25d7b4fbSAlexey Lapshin }
191*25d7b4fbSAlexey Lapshin 
getSegmentName() const192*25d7b4fbSAlexey Lapshin Optional<StringRef> LoadCommand::getSegmentName() const {
193*25d7b4fbSAlexey Lapshin   const MachO::macho_load_command &MLC = MachOLoadCommand;
194*25d7b4fbSAlexey Lapshin   switch (MLC.load_command_data.cmd) {
195*25d7b4fbSAlexey Lapshin   case MachO::LC_SEGMENT:
196*25d7b4fbSAlexey Lapshin     return extractSegmentName(MLC.segment_command_data.segname);
197*25d7b4fbSAlexey Lapshin   case MachO::LC_SEGMENT_64:
198*25d7b4fbSAlexey Lapshin     return extractSegmentName(MLC.segment_command_64_data.segname);
199*25d7b4fbSAlexey Lapshin   default:
200*25d7b4fbSAlexey Lapshin     return None;
201*25d7b4fbSAlexey Lapshin   }
202*25d7b4fbSAlexey Lapshin }
203*25d7b4fbSAlexey Lapshin 
getSegmentVMAddr() const204*25d7b4fbSAlexey Lapshin Optional<uint64_t> LoadCommand::getSegmentVMAddr() const {
205*25d7b4fbSAlexey Lapshin   const MachO::macho_load_command &MLC = MachOLoadCommand;
206*25d7b4fbSAlexey Lapshin   switch (MLC.load_command_data.cmd) {
207*25d7b4fbSAlexey Lapshin   case MachO::LC_SEGMENT:
208*25d7b4fbSAlexey Lapshin     return MLC.segment_command_data.vmaddr;
209*25d7b4fbSAlexey Lapshin   case MachO::LC_SEGMENT_64:
210*25d7b4fbSAlexey Lapshin     return MLC.segment_command_64_data.vmaddr;
211*25d7b4fbSAlexey Lapshin   default:
212*25d7b4fbSAlexey Lapshin     return None;
213*25d7b4fbSAlexey Lapshin   }
214*25d7b4fbSAlexey Lapshin }
215