1*25d7b4fbSAlexey Lapshin //===- MachOObject.cpp - Mach-O object file model ---------------*- C++ -*-===//
2*25d7b4fbSAlexey Lapshin //
3*25d7b4fbSAlexey Lapshin // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*25d7b4fbSAlexey Lapshin // See https://llvm.org/LICENSE.txt for license information.
5*25d7b4fbSAlexey Lapshin // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*25d7b4fbSAlexey Lapshin //
7*25d7b4fbSAlexey Lapshin //===----------------------------------------------------------------------===//
8*25d7b4fbSAlexey Lapshin
9*25d7b4fbSAlexey Lapshin #include "MachOObject.h"
10*25d7b4fbSAlexey Lapshin #include "llvm/ADT/SmallPtrSet.h"
11*25d7b4fbSAlexey Lapshin #include <unordered_set>
12*25d7b4fbSAlexey Lapshin
13*25d7b4fbSAlexey Lapshin using namespace llvm;
14*25d7b4fbSAlexey Lapshin using namespace llvm::objcopy::macho;
15*25d7b4fbSAlexey Lapshin
getSymbolByIndex(uint32_t Index) const16*25d7b4fbSAlexey Lapshin const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const {
17*25d7b4fbSAlexey Lapshin assert(Index < Symbols.size() && "invalid symbol index");
18*25d7b4fbSAlexey Lapshin return Symbols[Index].get();
19*25d7b4fbSAlexey Lapshin }
20*25d7b4fbSAlexey Lapshin
getSymbolByIndex(uint32_t Index)21*25d7b4fbSAlexey Lapshin SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) {
22*25d7b4fbSAlexey Lapshin return const_cast<SymbolEntry *>(
23*25d7b4fbSAlexey Lapshin static_cast<const SymbolTable *>(this)->getSymbolByIndex(Index));
24*25d7b4fbSAlexey Lapshin }
25*25d7b4fbSAlexey Lapshin
removeSymbols(function_ref<bool (const std::unique_ptr<SymbolEntry> &)> ToRemove)26*25d7b4fbSAlexey Lapshin void SymbolTable::removeSymbols(
27*25d7b4fbSAlexey Lapshin function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove) {
28*25d7b4fbSAlexey Lapshin llvm::erase_if(Symbols, ToRemove);
29*25d7b4fbSAlexey Lapshin }
30*25d7b4fbSAlexey Lapshin
updateLoadCommandIndexes()31*25d7b4fbSAlexey Lapshin void Object::updateLoadCommandIndexes() {
32*25d7b4fbSAlexey Lapshin static constexpr char TextSegmentName[] = "__TEXT";
33*25d7b4fbSAlexey Lapshin // Update indices of special load commands
34*25d7b4fbSAlexey Lapshin for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) {
35*25d7b4fbSAlexey Lapshin LoadCommand &LC = LoadCommands[Index];
36*25d7b4fbSAlexey Lapshin switch (LC.MachOLoadCommand.load_command_data.cmd) {
37*25d7b4fbSAlexey Lapshin case MachO::LC_CODE_SIGNATURE:
38*25d7b4fbSAlexey Lapshin CodeSignatureCommandIndex = Index;
39*25d7b4fbSAlexey Lapshin break;
40*25d7b4fbSAlexey Lapshin case MachO::LC_SEGMENT:
41*25d7b4fbSAlexey Lapshin if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) ==
42*25d7b4fbSAlexey Lapshin TextSegmentName)
43*25d7b4fbSAlexey Lapshin TextSegmentCommandIndex = Index;
44*25d7b4fbSAlexey Lapshin break;
45*25d7b4fbSAlexey Lapshin case MachO::LC_SEGMENT_64:
46*25d7b4fbSAlexey Lapshin if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) ==
47*25d7b4fbSAlexey Lapshin TextSegmentName)
48*25d7b4fbSAlexey Lapshin TextSegmentCommandIndex = Index;
49*25d7b4fbSAlexey Lapshin break;
50*25d7b4fbSAlexey Lapshin case MachO::LC_SYMTAB:
51*25d7b4fbSAlexey Lapshin SymTabCommandIndex = Index;
52*25d7b4fbSAlexey Lapshin break;
53*25d7b4fbSAlexey Lapshin case MachO::LC_DYSYMTAB:
54*25d7b4fbSAlexey Lapshin DySymTabCommandIndex = Index;
55*25d7b4fbSAlexey Lapshin break;
56*25d7b4fbSAlexey Lapshin case MachO::LC_DYLD_INFO:
57*25d7b4fbSAlexey Lapshin case MachO::LC_DYLD_INFO_ONLY:
58*25d7b4fbSAlexey Lapshin DyLdInfoCommandIndex = Index;
59*25d7b4fbSAlexey Lapshin break;
60*25d7b4fbSAlexey Lapshin case MachO::LC_DATA_IN_CODE:
61*25d7b4fbSAlexey Lapshin DataInCodeCommandIndex = Index;
62*25d7b4fbSAlexey Lapshin break;
63*25d7b4fbSAlexey Lapshin case MachO::LC_LINKER_OPTIMIZATION_HINT:
64*25d7b4fbSAlexey Lapshin LinkerOptimizationHintCommandIndex = Index;
65*25d7b4fbSAlexey Lapshin break;
66*25d7b4fbSAlexey Lapshin case MachO::LC_FUNCTION_STARTS:
67*25d7b4fbSAlexey Lapshin FunctionStartsCommandIndex = Index;
68*25d7b4fbSAlexey Lapshin break;
69*25d7b4fbSAlexey Lapshin case MachO::LC_DYLD_CHAINED_FIXUPS:
70*25d7b4fbSAlexey Lapshin ChainedFixupsCommandIndex = Index;
71*25d7b4fbSAlexey Lapshin break;
72*25d7b4fbSAlexey Lapshin case MachO::LC_DYLD_EXPORTS_TRIE:
73*25d7b4fbSAlexey Lapshin ExportsTrieCommandIndex = Index;
74*25d7b4fbSAlexey Lapshin break;
75*25d7b4fbSAlexey Lapshin }
76*25d7b4fbSAlexey Lapshin }
77*25d7b4fbSAlexey Lapshin }
78*25d7b4fbSAlexey Lapshin
removeLoadCommands(function_ref<bool (const LoadCommand &)> ToRemove)79*25d7b4fbSAlexey Lapshin Error Object::removeLoadCommands(
80*25d7b4fbSAlexey Lapshin function_ref<bool(const LoadCommand &)> ToRemove) {
81*25d7b4fbSAlexey Lapshin auto It = std::stable_partition(
82*25d7b4fbSAlexey Lapshin LoadCommands.begin(), LoadCommands.end(),
83*25d7b4fbSAlexey Lapshin [&](const LoadCommand &LC) { return !ToRemove(LC); });
84*25d7b4fbSAlexey Lapshin LoadCommands.erase(It, LoadCommands.end());
85*25d7b4fbSAlexey Lapshin
86*25d7b4fbSAlexey Lapshin updateLoadCommandIndexes();
87*25d7b4fbSAlexey Lapshin return Error::success();
88*25d7b4fbSAlexey Lapshin }
89*25d7b4fbSAlexey Lapshin
removeSections(function_ref<bool (const std::unique_ptr<Section> &)> ToRemove)90*25d7b4fbSAlexey Lapshin Error Object::removeSections(
91*25d7b4fbSAlexey Lapshin function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) {
92*25d7b4fbSAlexey Lapshin DenseMap<uint32_t, const Section *> OldIndexToSection;
93*25d7b4fbSAlexey Lapshin uint32_t NextSectionIndex = 1;
94*25d7b4fbSAlexey Lapshin for (LoadCommand &LC : LoadCommands) {
95*25d7b4fbSAlexey Lapshin auto It = std::stable_partition(
96*25d7b4fbSAlexey Lapshin std::begin(LC.Sections), std::end(LC.Sections),
97*25d7b4fbSAlexey Lapshin [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); });
98*25d7b4fbSAlexey Lapshin for (auto I = LC.Sections.begin(), End = It; I != End; ++I) {
99*25d7b4fbSAlexey Lapshin OldIndexToSection[(*I)->Index] = I->get();
100*25d7b4fbSAlexey Lapshin (*I)->Index = NextSectionIndex++;
101*25d7b4fbSAlexey Lapshin }
102*25d7b4fbSAlexey Lapshin LC.Sections.erase(It, LC.Sections.end());
103*25d7b4fbSAlexey Lapshin }
104*25d7b4fbSAlexey Lapshin
105*25d7b4fbSAlexey Lapshin auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool {
106*25d7b4fbSAlexey Lapshin Optional<uint32_t> Section = S->section();
107*25d7b4fbSAlexey Lapshin return (Section && !OldIndexToSection.count(*Section));
108*25d7b4fbSAlexey Lapshin };
109*25d7b4fbSAlexey Lapshin
110*25d7b4fbSAlexey Lapshin SmallPtrSet<const SymbolEntry *, 2> DeadSymbols;
111*25d7b4fbSAlexey Lapshin for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols)
112*25d7b4fbSAlexey Lapshin if (IsDead(Sym))
113*25d7b4fbSAlexey Lapshin DeadSymbols.insert(Sym.get());
114*25d7b4fbSAlexey Lapshin
115*25d7b4fbSAlexey Lapshin for (const LoadCommand &LC : LoadCommands)
116*25d7b4fbSAlexey Lapshin for (const std::unique_ptr<Section> &Sec : LC.Sections)
117*25d7b4fbSAlexey Lapshin for (const RelocationInfo &R : Sec->Relocations)
118*25d7b4fbSAlexey Lapshin if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol))
119*25d7b4fbSAlexey Lapshin return createStringError(std::errc::invalid_argument,
120*25d7b4fbSAlexey Lapshin "symbol '%s' defined in section with index "
121*25d7b4fbSAlexey Lapshin "'%u' cannot be removed because it is "
122*25d7b4fbSAlexey Lapshin "referenced by a relocation in section '%s'",
123*25d7b4fbSAlexey Lapshin (*R.Symbol)->Name.c_str(),
124*25d7b4fbSAlexey Lapshin *((*R.Symbol)->section()),
125*25d7b4fbSAlexey Lapshin Sec->CanonicalName.c_str());
126*25d7b4fbSAlexey Lapshin SymTable.removeSymbols(IsDead);
127*25d7b4fbSAlexey Lapshin for (std::unique_ptr<SymbolEntry> &S : SymTable.Symbols)
128*25d7b4fbSAlexey Lapshin if (S->section())
129*25d7b4fbSAlexey Lapshin S->n_sect = OldIndexToSection[S->n_sect]->Index;
130*25d7b4fbSAlexey Lapshin return Error::success();
131*25d7b4fbSAlexey Lapshin }
132*25d7b4fbSAlexey Lapshin
nextAvailableSegmentAddress() const133*25d7b4fbSAlexey Lapshin uint64_t Object::nextAvailableSegmentAddress() const {
134*25d7b4fbSAlexey Lapshin uint64_t HeaderSize =
135*25d7b4fbSAlexey Lapshin is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
136*25d7b4fbSAlexey Lapshin uint64_t Addr = HeaderSize + Header.SizeOfCmds;
137*25d7b4fbSAlexey Lapshin for (const LoadCommand &LC : LoadCommands) {
138*25d7b4fbSAlexey Lapshin const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
139*25d7b4fbSAlexey Lapshin switch (MLC.load_command_data.cmd) {
140*25d7b4fbSAlexey Lapshin case MachO::LC_SEGMENT:
141*25d7b4fbSAlexey Lapshin Addr = std::max(Addr,
142*25d7b4fbSAlexey Lapshin static_cast<uint64_t>(MLC.segment_command_data.vmaddr) +
143*25d7b4fbSAlexey Lapshin MLC.segment_command_data.vmsize);
144*25d7b4fbSAlexey Lapshin break;
145*25d7b4fbSAlexey Lapshin case MachO::LC_SEGMENT_64:
146*25d7b4fbSAlexey Lapshin Addr = std::max(Addr, MLC.segment_command_64_data.vmaddr +
147*25d7b4fbSAlexey Lapshin MLC.segment_command_64_data.vmsize);
148*25d7b4fbSAlexey Lapshin break;
149*25d7b4fbSAlexey Lapshin default:
150*25d7b4fbSAlexey Lapshin continue;
151*25d7b4fbSAlexey Lapshin }
152*25d7b4fbSAlexey Lapshin }
153*25d7b4fbSAlexey Lapshin return Addr;
154*25d7b4fbSAlexey Lapshin }
155*25d7b4fbSAlexey Lapshin
156*25d7b4fbSAlexey Lapshin template <typename SegmentType>
157*25d7b4fbSAlexey Lapshin static void
constructSegment(SegmentType & Seg,llvm::MachO::LoadCommandType CmdType,StringRef SegName,uint64_t SegVMAddr,uint64_t SegVMSize)158*25d7b4fbSAlexey Lapshin constructSegment(SegmentType &Seg, llvm::MachO::LoadCommandType CmdType,
159*25d7b4fbSAlexey Lapshin StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize) {
160*25d7b4fbSAlexey Lapshin assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name");
161*25d7b4fbSAlexey Lapshin memset(&Seg, 0, sizeof(SegmentType));
162*25d7b4fbSAlexey Lapshin Seg.cmd = CmdType;
163*25d7b4fbSAlexey Lapshin strncpy(Seg.segname, SegName.data(), SegName.size());
164*25d7b4fbSAlexey Lapshin Seg.maxprot |=
165*25d7b4fbSAlexey Lapshin (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
166*25d7b4fbSAlexey Lapshin Seg.initprot |=
167*25d7b4fbSAlexey Lapshin (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
168*25d7b4fbSAlexey Lapshin Seg.vmaddr = SegVMAddr;
169*25d7b4fbSAlexey Lapshin Seg.vmsize = SegVMSize;
170*25d7b4fbSAlexey Lapshin }
171*25d7b4fbSAlexey Lapshin
addSegment(StringRef SegName,uint64_t SegVMSize)172*25d7b4fbSAlexey Lapshin LoadCommand &Object::addSegment(StringRef SegName, uint64_t SegVMSize) {
173*25d7b4fbSAlexey Lapshin LoadCommand LC;
174*25d7b4fbSAlexey Lapshin const uint64_t SegVMAddr = nextAvailableSegmentAddress();
175*25d7b4fbSAlexey Lapshin if (is64Bit())
176*25d7b4fbSAlexey Lapshin constructSegment(LC.MachOLoadCommand.segment_command_64_data,
177*25d7b4fbSAlexey Lapshin MachO::LC_SEGMENT_64, SegName, SegVMAddr, SegVMSize);
178*25d7b4fbSAlexey Lapshin else
179*25d7b4fbSAlexey Lapshin constructSegment(LC.MachOLoadCommand.segment_command_data,
180*25d7b4fbSAlexey Lapshin MachO::LC_SEGMENT, SegName, SegVMAddr, SegVMSize);
181*25d7b4fbSAlexey Lapshin
182*25d7b4fbSAlexey Lapshin LoadCommands.push_back(std::move(LC));
183*25d7b4fbSAlexey Lapshin return LoadCommands.back();
184*25d7b4fbSAlexey Lapshin }
185*25d7b4fbSAlexey Lapshin
186*25d7b4fbSAlexey Lapshin /// Extracts a segment name from a string which is possibly non-null-terminated.
extractSegmentName(const char * SegName)187*25d7b4fbSAlexey Lapshin static StringRef extractSegmentName(const char *SegName) {
188*25d7b4fbSAlexey Lapshin return StringRef(SegName,
189*25d7b4fbSAlexey Lapshin strnlen(SegName, sizeof(MachO::segment_command::segname)));
190*25d7b4fbSAlexey Lapshin }
191*25d7b4fbSAlexey Lapshin
getSegmentName() const192*25d7b4fbSAlexey Lapshin Optional<StringRef> LoadCommand::getSegmentName() const {
193*25d7b4fbSAlexey Lapshin const MachO::macho_load_command &MLC = MachOLoadCommand;
194*25d7b4fbSAlexey Lapshin switch (MLC.load_command_data.cmd) {
195*25d7b4fbSAlexey Lapshin case MachO::LC_SEGMENT:
196*25d7b4fbSAlexey Lapshin return extractSegmentName(MLC.segment_command_data.segname);
197*25d7b4fbSAlexey Lapshin case MachO::LC_SEGMENT_64:
198*25d7b4fbSAlexey Lapshin return extractSegmentName(MLC.segment_command_64_data.segname);
199*25d7b4fbSAlexey Lapshin default:
200*25d7b4fbSAlexey Lapshin return None;
201*25d7b4fbSAlexey Lapshin }
202*25d7b4fbSAlexey Lapshin }
203*25d7b4fbSAlexey Lapshin
getSegmentVMAddr() const204*25d7b4fbSAlexey Lapshin Optional<uint64_t> LoadCommand::getSegmentVMAddr() const {
205*25d7b4fbSAlexey Lapshin const MachO::macho_load_command &MLC = MachOLoadCommand;
206*25d7b4fbSAlexey Lapshin switch (MLC.load_command_data.cmd) {
207*25d7b4fbSAlexey Lapshin case MachO::LC_SEGMENT:
208*25d7b4fbSAlexey Lapshin return MLC.segment_command_data.vmaddr;
209*25d7b4fbSAlexey Lapshin case MachO::LC_SEGMENT_64:
210*25d7b4fbSAlexey Lapshin return MLC.segment_command_64_data.vmaddr;
211*25d7b4fbSAlexey Lapshin default:
212*25d7b4fbSAlexey Lapshin return None;
213*25d7b4fbSAlexey Lapshin }
214*25d7b4fbSAlexey Lapshin }
215