1 //===- MarkLive.cpp -------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MarkLive.h"
10 #include "Config.h"
11 #include "OutputSegment.h"
12 #include "SymbolTable.h"
13 #include "Symbols.h"
14 #include "UnwindInfoSection.h"
15 #include "mach-o/compact_unwind_encoding.h"
16 #include "llvm/Support/TimeProfiler.h"
17 
18 namespace lld {
19 namespace macho {
20 
21 using namespace llvm;
22 using namespace llvm::MachO;
23 
24 struct WhyLiveEntry {
25   InputSection *isec;
26   // Keep track of the entry that caused us to mark `isec` as live.
27   const WhyLiveEntry *prev;
28 
29   WhyLiveEntry(InputSection *isec, const WhyLiveEntry *prev)
30       : isec(isec), prev(prev) {}
31 };
32 
33 // Type-erased interface to MarkLiveImpl. Used for adding roots to the liveness
34 // graph.
35 class MarkLive {
36 public:
37   virtual void enqueue(InputSection *isec, uint64_t off) = 0;
38   virtual void addSym(Symbol *s) = 0;
39   virtual void markTransitively() = 0;
40   virtual ~MarkLive() = default;
41 };
42 
43 template <bool RecordWhyLive> class MarkLiveImpl : public MarkLive {
44 public:
45   // -why_live is a rarely used option, so we don't want support for that flag
46   // to slow down the main -dead_strip code path. As such, we employ templates
47   // to avoid the usage of WhyLiveEntry in the main code path. This saves us
48   // from needless allocations and pointer indirections.
49   using WorklistEntry =
50       std::conditional_t<RecordWhyLive, WhyLiveEntry, InputSection>;
51 
52   void enqueue(InputSection *isec, uint64_t off) override {
53     enqueue(isec, off, nullptr);
54   }
55   void addSym(Symbol *s) override { addSym(s, nullptr); }
56   void markTransitively() override;
57 
58 private:
59   void enqueue(InputSection *isec, uint64_t off, const WorklistEntry *prev);
60   void addSym(Symbol *s, const WorklistEntry *prev);
61   void printWhyLive(Symbol *s, const WorklistEntry *prev);
62   const InputSection *getInputSection(const WorklistEntry *) const;
63   WorklistEntry *makeEntry(InputSection *, const WorklistEntry *prev) const;
64 
65   // We build up a worklist of sections which have been marked as live. We
66   // only push into the worklist when we discover an unmarked section, and we
67   // mark as we push, so sections never appear twice in the list. Literal
68   // sections cannot contain references to other sections, so we only store
69   // ConcatInputSections in our worklist.
70   SmallVector<WorklistEntry *, 256> worklist;
71 };
72 
73 template <bool RecordWhyLive>
74 void MarkLiveImpl<RecordWhyLive>::enqueue(
75     InputSection *isec, uint64_t off,
76     const typename MarkLiveImpl<RecordWhyLive>::WorklistEntry *prev) {
77   if (isec->isLive(off))
78     return;
79   isec->markLive(off);
80   if (auto s = dyn_cast<ConcatInputSection>(isec)) {
81     assert(!s->isCoalescedWeak());
82     worklist.push_back(makeEntry(s, prev));
83   }
84 }
85 
86 template <bool RecordWhyLive>
87 void MarkLiveImpl<RecordWhyLive>::addSym(
88     Symbol *s,
89     const typename MarkLiveImpl<RecordWhyLive>::WorklistEntry *prev) {
90   if (s->used)
91     return;
92   s->used = true;
93   printWhyLive(s, prev);
94   if (auto *d = dyn_cast<Defined>(s)) {
95     if (d->isec)
96       enqueue(d->isec, d->value, prev);
97     if (d->unwindEntry)
98       enqueue(d->unwindEntry, 0, prev);
99   }
100 }
101 
102 static void printWhyLiveImpl(const Symbol *s, const WhyLiveEntry *prev) {
103   std::string out = toString(*s) + " from " + toString(s->getFile());
104   int indent = 2;
105   for (const WhyLiveEntry *entry = prev; entry;
106        entry = entry->prev, indent += 2) {
107     const TinyPtrVector<Defined *> &symbols = entry->isec->symbols;
108     // With .subsections_with_symbols set, most isecs will have exactly one
109     // entry in their symbols vector, so we just print the first one.
110     if (!symbols.empty())
111       out += "\n" + std::string(indent, ' ') + toString(*symbols.front()) +
112              " from " + toString(symbols.front()->getFile());
113   }
114   message(out);
115 }
116 
117 // NOTE: if/when `constexpr if` becomes available, we can simplify a lot of
118 // the partial template specializations below.
119 
120 template <>
121 void MarkLiveImpl<true>::printWhyLive(Symbol *s, const WhyLiveEntry *prev) {
122   if (!config->whyLive.empty() && config->whyLive.match(s->getName()))
123     printWhyLiveImpl(s, prev);
124 }
125 
126 template <>
127 void MarkLiveImpl<false>::printWhyLive(Symbol *s, const InputSection *prev) {}
128 
129 template <>
130 const InputSection *
131 MarkLiveImpl<true>::getInputSection(const WhyLiveEntry *entry) const {
132   return entry->isec;
133 }
134 
135 template <>
136 const InputSection *
137 MarkLiveImpl<false>::getInputSection(const InputSection *isec) const {
138   return isec;
139 }
140 
141 template <>
142 typename MarkLiveImpl<true>::WorklistEntry *MarkLiveImpl<true>::makeEntry(
143     InputSection *isec, const MarkLiveImpl<true>::WorklistEntry *prev) const {
144   if (!isec) {
145     assert(!prev);
146     return nullptr;
147   }
148   return make<WhyLiveEntry>(isec, prev);
149 }
150 
151 template <>
152 typename MarkLiveImpl<false>::WorklistEntry *MarkLiveImpl<false>::makeEntry(
153     InputSection *isec, const MarkLiveImpl<false>::WorklistEntry *prev) const {
154   return isec;
155 }
156 
157 template <bool RecordWhyLive>
158 void MarkLiveImpl<RecordWhyLive>::markTransitively() {
159   do {
160     // Mark things reachable from GC roots as live.
161     while (!worklist.empty()) {
162       WorklistEntry *entry = worklist.pop_back_val();
163       // Entries that get placed onto the worklist always contain
164       // ConcatInputSections. `WhyLiveEntry::prev` may point to entries that
165       // contain other types of InputSections (due to S_ATTR_LIVE_SUPPORT), but
166       // those entries should never be pushed onto the worklist.
167       auto *isec = cast<ConcatInputSection>(getInputSection(entry));
168       assert(isec->live &&
169              "We mark as live when pushing onto the worklist!");
170 
171       // Mark all symbols listed in the relocation table for this section.
172       for (const Reloc &r : isec->relocs) {
173         if (auto *s = r.referent.dyn_cast<Symbol *>())
174           addSym(s, entry);
175         else
176           enqueue(r.referent.get<InputSection *>(), r.addend, entry);
177       }
178       for (Defined *d : getInputSection(entry)->symbols)
179         addSym(d, entry);
180     }
181 
182     // S_ATTR_LIVE_SUPPORT sections are live if they point _to_ a live
183     // section. Process them in a second pass.
184     for (ConcatInputSection *isec : inputSections) {
185       // FIXME: Check if copying all S_ATTR_LIVE_SUPPORT sections into a
186       // separate vector and only walking that here is faster.
187       if (!(isec->getFlags() & S_ATTR_LIVE_SUPPORT) || isec->live)
188         continue;
189 
190       for (const Reloc &r : isec->relocs) {
191         if (auto *s = r.referent.dyn_cast<Symbol *>()) {
192           if (s->isLive()) {
193             InputSection *referentIsec = nullptr;
194             if (auto *d = dyn_cast<Defined>(s))
195               referentIsec = d->isec;
196             enqueue(isec, 0, makeEntry(referentIsec, nullptr));
197           }
198         } else {
199           auto *referentIsec = r.referent.get<InputSection *>();
200           if (referentIsec->isLive(r.addend))
201             enqueue(isec, 0, makeEntry(referentIsec, nullptr));
202         }
203       }
204     }
205 
206     // S_ATTR_LIVE_SUPPORT could have marked additional sections live,
207     // which in turn could mark additional S_ATTR_LIVE_SUPPORT sections live.
208     // Iterate. In practice, the second iteration won't mark additional
209     // S_ATTR_LIVE_SUPPORT sections live.
210   } while (!worklist.empty());
211 }
212 
213 // Set live bit on for each reachable chunk. Unmarked (unreachable)
214 // InputSections will be ignored by Writer, so they will be excluded
215 // from the final output.
216 void markLive() {
217   TimeTraceScope timeScope("markLive");
218   MarkLive *marker;
219   if (config->whyLive.empty())
220     marker = make<MarkLiveImpl<false>>();
221   else
222     marker = make<MarkLiveImpl<true>>();
223   // Add GC roots.
224   if (config->entry)
225     marker->addSym(config->entry);
226   for (Symbol *sym : symtab->getSymbols()) {
227     if (auto *defined = dyn_cast<Defined>(sym)) {
228       // -exported_symbol(s_list)
229       if (!config->exportedSymbols.empty() &&
230           config->exportedSymbols.match(defined->getName())) {
231         // NOTE: Even though exporting private externs is an ill-defined
232         // operation, we are purposely not checking for privateExtern in
233         // order to follow ld64's behavior of treating all exported private
234         // extern symbols as live, irrespective of whether they are autohide.
235         marker->addSym(defined);
236         continue;
237       }
238 
239       // public symbols explicitly marked .no_dead_strip
240       if (defined->referencedDynamically || defined->noDeadStrip) {
241         marker->addSym(defined);
242         continue;
243       }
244 
245       // FIXME: When we implement these flags, make symbols from them GC
246       // roots:
247       // * -reexported_symbol(s_list)
248       // * -alias(-list)
249       // * -init
250 
251       // In dylibs and bundles and in executables with -export_dynamic,
252       // all external functions are GC roots.
253       bool externsAreRoots =
254           config->outputType != MH_EXECUTE || config->exportDynamic;
255       if (externsAreRoots && !defined->privateExtern) {
256         marker->addSym(defined);
257         continue;
258       }
259     }
260   }
261   // -u symbols
262   for (Symbol *sym : config->explicitUndefineds)
263     marker->addSym(sym);
264   // local symbols explicitly marked .no_dead_strip
265   for (const InputFile *file : inputFiles)
266     if (auto *objFile = dyn_cast<ObjFile>(file))
267       for (Symbol *sym : objFile->symbols)
268         if (auto *defined = dyn_cast_or_null<Defined>(sym))
269           if (!defined->isExternal() && defined->noDeadStrip)
270             marker->addSym(defined);
271   if (auto *stubBinder =
272           dyn_cast_or_null<DylibSymbol>(symtab->find("dyld_stub_binder")))
273     marker->addSym(stubBinder);
274   for (ConcatInputSection *isec : inputSections) {
275     // Sections marked no_dead_strip
276     if (isec->getFlags() & S_ATTR_NO_DEAD_STRIP) {
277       marker->enqueue(isec, 0);
278       continue;
279     }
280 
281     // mod_init_funcs, mod_term_funcs sections
282     if (sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS ||
283         sectionType(isec->getFlags()) == S_MOD_TERM_FUNC_POINTERS) {
284       marker->enqueue(isec, 0);
285       continue;
286     }
287   }
288 
289   marker->markTransitively();
290 }
291 
292 } // namespace macho
293 } // namespace lld
294