xref: /llvm-project-15.0.7/lld/MachO/ICF.cpp (revision 5f26d863)
1 //===- ICF.cpp ------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ICF.h"
10 #include "ConcatOutputSection.h"
11 #include "InputSection.h"
12 #include "Symbols.h"
13 #include "UnwindInfoSection.h"
14 
15 #include "lld/Common/CommonLinkerContext.h"
16 #include "llvm/Support/Parallel.h"
17 #include "llvm/Support/TimeProfiler.h"
18 #include "llvm/Support/xxhash.h"
19 
20 #include <atomic>
21 
22 using namespace llvm;
23 using namespace lld;
24 using namespace lld::macho;
25 
26 class ICF {
27 public:
28   ICF(std::vector<ConcatInputSection *> &inputs);
29 
30   void run();
31   void segregate(size_t begin, size_t end,
32                  std::function<bool(const ConcatInputSection *,
33                                     const ConcatInputSection *)>
34                      equals);
35   size_t findBoundary(size_t begin, size_t end);
36   void forEachClassRange(size_t begin, size_t end,
37                          std::function<void(size_t, size_t)> func);
38   void forEachClass(std::function<void(size_t, size_t)> func);
39 
40   // ICF needs a copy of the inputs vector because its equivalence-class
41   // segregation algorithm destroys the proper sequence.
42   std::vector<ConcatInputSection *> icfInputs;
43 };
44 
45 ICF::ICF(std::vector<ConcatInputSection *> &inputs) {
46   icfInputs.assign(inputs.begin(), inputs.end());
47 }
48 
49 // ICF = Identical Code Folding
50 //
51 // We only fold __TEXT,__text, so this is really "code" folding, and not
52 // "COMDAT" folding. String and scalar constant literals are deduplicated
53 // elsewhere.
54 //
55 // Summary of segments & sections:
56 //
57 // The __TEXT segment is readonly at the MMU. Some sections are already
58 // deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are
59 // synthetic and inherently free of duplicates (__TEXT,__stubs &
60 // __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const,
61 // because doing so induces many test failures.
62 //
63 // The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and
64 // thus ineligible for ICF.
65 //
66 // The __DATA_CONST segment is read/write at the MMU, but is logically const to
67 // the application after dyld applies fixups to pointer data. We currently
68 // fold only the __DATA_CONST,__cfstring section.
69 //
70 // The __DATA segment is read/write at the MMU, and as application-writeable
71 // data, none of its sections are eligible for ICF.
72 //
73 // Please see the large block comment in lld/ELF/ICF.cpp for an explanation
74 // of the segregation algorithm.
75 //
76 // FIXME(gkm): implement keep-unique attributes
77 // FIXME(gkm): implement address-significance tables for MachO object files
78 
79 static unsigned icfPass = 0;
80 static std::atomic<bool> icfRepeat{false};
81 
82 // Compare "non-moving" parts of two ConcatInputSections, namely everything
83 // except references to other ConcatInputSections.
84 static bool equalsConstant(const ConcatInputSection *ia,
85                            const ConcatInputSection *ib) {
86   // We can only fold within the same OutputSection.
87   if (ia->parent != ib->parent)
88     return false;
89   if (ia->data.size() != ib->data.size())
90     return false;
91   if (ia->data != ib->data)
92     return false;
93   if (ia->relocs.size() != ib->relocs.size())
94     return false;
95   auto f = [](const Reloc &ra, const Reloc &rb) {
96     if (ra.type != rb.type)
97       return false;
98     if (ra.pcrel != rb.pcrel)
99       return false;
100     if (ra.length != rb.length)
101       return false;
102     if (ra.offset != rb.offset)
103       return false;
104     if (ra.addend != rb.addend)
105       return false;
106     if (ra.referent.is<Symbol *>() != rb.referent.is<Symbol *>())
107       return false;
108 
109     InputSection *isecA, *isecB;
110 
111     uint64_t valueA = 0;
112     uint64_t valueB = 0;
113     if (ra.referent.is<Symbol *>()) {
114       const auto *sa = ra.referent.get<Symbol *>();
115       const auto *sb = rb.referent.get<Symbol *>();
116       if (sa->kind() != sb->kind())
117         return false;
118       if (!isa<Defined>(sa)) {
119         // ICF runs before Undefineds are reported.
120         assert(isa<DylibSymbol>(sa) || isa<Undefined>(sa));
121         return sa == sb;
122       }
123       const auto *da = cast<Defined>(sa);
124       const auto *db = cast<Defined>(sb);
125       if (!da->isec || !db->isec) {
126         assert(da->isAbsolute() && db->isAbsolute());
127         return da->value == db->value;
128       }
129       isecA = da->isec;
130       valueA = da->value;
131       isecB = db->isec;
132       valueB = db->value;
133     } else {
134       isecA = ra.referent.get<InputSection *>();
135       isecB = rb.referent.get<InputSection *>();
136     }
137 
138     if (isecA->parent != isecB->parent)
139       return false;
140     // Sections with identical parents should be of the same kind.
141     assert(isecA->kind() == isecB->kind());
142     // We will compare ConcatInputSection contents in equalsVariable.
143     if (isa<ConcatInputSection>(isecA))
144       return true;
145     // Else we have two literal sections. References to them are equal iff their
146     // offsets in the output section are equal.
147     return isecA->getOffset(valueA + ra.addend) ==
148            isecB->getOffset(valueB + rb.addend);
149   };
150   return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(),
151                     f);
152 }
153 
154 // Compare the "moving" parts of two ConcatInputSections -- i.e. everything not
155 // handled by equalsConstant().
156 static bool equalsVariable(const ConcatInputSection *ia,
157                            const ConcatInputSection *ib) {
158   assert(ia->relocs.size() == ib->relocs.size());
159   auto f = [](const Reloc &ra, const Reloc &rb) {
160     // We already filtered out mismatching values/addends in equalsConstant.
161     if (ra.referent == rb.referent)
162       return true;
163     const ConcatInputSection *isecA, *isecB;
164     if (ra.referent.is<Symbol *>()) {
165       // Matching DylibSymbols are already filtered out by the
166       // identical-referent check above. Non-matching DylibSymbols were filtered
167       // out in equalsConstant(). So we can safely cast to Defined here.
168       const auto *da = cast<Defined>(ra.referent.get<Symbol *>());
169       const auto *db = cast<Defined>(rb.referent.get<Symbol *>());
170       if (da->isAbsolute())
171         return true;
172       isecA = dyn_cast<ConcatInputSection>(da->isec);
173       if (!isecA)
174         return true; // literal sections were checked in equalsConstant.
175       isecB = cast<ConcatInputSection>(db->isec);
176     } else {
177       const auto *sa = ra.referent.get<InputSection *>();
178       const auto *sb = rb.referent.get<InputSection *>();
179       isecA = dyn_cast<ConcatInputSection>(sa);
180       if (!isecA)
181         return true;
182       isecB = cast<ConcatInputSection>(sb);
183     }
184     return isecA->icfEqClass[icfPass % 2] == isecB->icfEqClass[icfPass % 2];
185   };
186   if (!std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), f))
187     return false;
188 
189   // If there are symbols with associated unwind info, check that the unwind
190   // info matches. For simplicity, we only handle the case where there are only
191   // symbols at offset zero within the section (which is typically the case with
192   // .subsections_via_symbols.)
193   auto hasCU = [](Defined *d) { return d->unwindEntry != nullptr; };
194   auto itA = std::find_if(ia->symbols.begin(), ia->symbols.end(), hasCU);
195   auto itB = std::find_if(ib->symbols.begin(), ib->symbols.end(), hasCU);
196   if (itA == ia->symbols.end())
197     return itB == ib->symbols.end();
198   if (itB == ib->symbols.end())
199     return false;
200   const Defined *da = *itA;
201   const Defined *db = *itB;
202   if (da->unwindEntry->icfEqClass[icfPass % 2] !=
203           db->unwindEntry->icfEqClass[icfPass % 2] ||
204       da->value != 0 || db->value != 0)
205     return false;
206   auto isZero = [](Defined *d) { return d->value == 0; };
207   return std::find_if_not(std::next(itA), ia->symbols.end(), isZero) ==
208              ia->symbols.end() &&
209          std::find_if_not(std::next(itB), ib->symbols.end(), isZero) ==
210              ib->symbols.end();
211 }
212 
213 // Find the first InputSection after BEGIN whose equivalence class differs
214 size_t ICF::findBoundary(size_t begin, size_t end) {
215   uint64_t beginHash = icfInputs[begin]->icfEqClass[icfPass % 2];
216   for (size_t i = begin + 1; i < end; ++i)
217     if (beginHash != icfInputs[i]->icfEqClass[icfPass % 2])
218       return i;
219   return end;
220 }
221 
222 // Invoke FUNC on subranges with matching equivalence class
223 void ICF::forEachClassRange(size_t begin, size_t end,
224                             std::function<void(size_t, size_t)> func) {
225   while (begin < end) {
226     size_t mid = findBoundary(begin, end);
227     func(begin, mid);
228     begin = mid;
229   }
230 }
231 
232 // Split icfInputs into shards, then parallelize invocation of FUNC on subranges
233 // with matching equivalence class
234 void ICF::forEachClass(std::function<void(size_t, size_t)> func) {
235   // Only use threads when the benefits outweigh the overhead.
236   const size_t threadingThreshold = 1024;
237   if (icfInputs.size() < threadingThreshold) {
238     forEachClassRange(0, icfInputs.size(), func);
239     ++icfPass;
240     return;
241   }
242 
243   // Shard into non-overlapping intervals, and call FUNC in parallel.  The
244   // sharding must be completed before any calls to FUNC are made so that FUNC
245   // can modify the InputSection in its shard without causing data races.
246   const size_t shards = 256;
247   size_t step = icfInputs.size() / shards;
248   size_t boundaries[shards + 1];
249   boundaries[0] = 0;
250   boundaries[shards] = icfInputs.size();
251   parallelForEachN(1, shards, [&](size_t i) {
252     boundaries[i] = findBoundary((i - 1) * step, icfInputs.size());
253   });
254   parallelForEachN(1, shards + 1, [&](size_t i) {
255     if (boundaries[i - 1] < boundaries[i]) {
256       forEachClassRange(boundaries[i - 1], boundaries[i], func);
257     }
258   });
259   ++icfPass;
260 }
261 
262 void ICF::run() {
263   // Into each origin-section hash, combine all reloc referent section hashes.
264   for (icfPass = 0; icfPass < 2; ++icfPass) {
265     parallelForEach(icfInputs, [&](ConcatInputSection *isec) {
266       uint64_t hash = isec->icfEqClass[icfPass % 2];
267       for (const Reloc &r : isec->relocs) {
268         if (auto *sym = r.referent.dyn_cast<Symbol *>()) {
269           if (auto *dylibSym = dyn_cast<DylibSymbol>(sym))
270             hash += dylibSym->stubsHelperIndex;
271           else if (auto *defined = dyn_cast<Defined>(sym)) {
272             if (defined->isec) {
273               if (auto isec = dyn_cast<ConcatInputSection>(defined->isec))
274                 hash += defined->value + isec->icfEqClass[icfPass % 2];
275               else
276                 hash += defined->isec->kind() +
277                         defined->isec->getOffset(defined->value);
278             } else {
279               hash += defined->value;
280             }
281           } else if (!isa<Undefined>(sym)) // ICF runs before Undefined diags.
282             llvm_unreachable("foldIdenticalSections symbol kind");
283         }
284       }
285       // Set MSB to 1 to avoid collisions with non-hashed classes.
286       isec->icfEqClass[(icfPass + 1) % 2] = hash | (1ull << 63);
287     });
288   }
289 
290   llvm::stable_sort(
291       icfInputs, [](const ConcatInputSection *a, const ConcatInputSection *b) {
292         return a->icfEqClass[0] < b->icfEqClass[0];
293       });
294   forEachClass(
295       [&](size_t begin, size_t end) { segregate(begin, end, equalsConstant); });
296 
297   // Split equivalence groups by comparing relocations until convergence
298   do {
299     icfRepeat = false;
300     forEachClass([&](size_t begin, size_t end) {
301       segregate(begin, end, equalsVariable);
302     });
303   } while (icfRepeat);
304   log("ICF needed " + Twine(icfPass) + " iterations");
305 
306   // Fold sections within equivalence classes
307   forEachClass([&](size_t begin, size_t end) {
308     if (end - begin < 2)
309       return;
310     ConcatInputSection *beginIsec = icfInputs[begin];
311     for (size_t i = begin + 1; i < end; ++i)
312       beginIsec->foldIdentical(icfInputs[i]);
313   });
314 }
315 
316 // Split an equivalence class into smaller classes.
317 void ICF::segregate(
318     size_t begin, size_t end,
319     std::function<bool(const ConcatInputSection *, const ConcatInputSection *)>
320         equals) {
321   while (begin < end) {
322     // Divide [begin, end) into two. Let mid be the start index of the
323     // second group.
324     auto bound = std::stable_partition(icfInputs.begin() + begin + 1,
325                                        icfInputs.begin() + end,
326                                        [&](ConcatInputSection *isec) {
327                                          return equals(icfInputs[begin], isec);
328                                        });
329     size_t mid = bound - icfInputs.begin();
330 
331     // Split [begin, end) into [begin, mid) and [mid, end). We use mid as an
332     // equivalence class ID because every group ends with a unique index.
333     for (size_t i = begin; i < mid; ++i)
334       icfInputs[i]->icfEqClass[(icfPass + 1) % 2] = mid;
335 
336     // If we created a group, we need to iterate the main loop again.
337     if (mid != end)
338       icfRepeat = true;
339 
340     begin = mid;
341   }
342 }
343 
344 void macho::foldIdenticalSections() {
345   TimeTraceScope timeScope("Fold Identical Code Sections");
346   // The ICF equivalence-class segregation algorithm relies on pre-computed
347   // hashes of InputSection::data for the ConcatOutputSection::inputs and all
348   // sections referenced by their relocs. We could recursively traverse the
349   // relocs to find every referenced InputSection, but that precludes easy
350   // parallelization. Therefore, we hash every InputSection here where we have
351   // them all accessible as simple vectors.
352 
353   // If an InputSection is ineligible for ICF, we give it a unique ID to force
354   // it into an unfoldable singleton equivalence class.  Begin the unique-ID
355   // space at inputSections.size(), so that it will never intersect with
356   // equivalence-class IDs which begin at 0. Since hashes & unique IDs never
357   // coexist with equivalence-class IDs, this is not necessary, but might help
358   // someone keep the numbers straight in case we ever need to debug the
359   // ICF::segregate()
360   std::vector<ConcatInputSection *> hashable;
361   uint64_t icfUniqueID = inputSections.size();
362   for (ConcatInputSection *isec : inputSections) {
363     // FIXME: consider non-code __text sections as hashable?
364     bool isHashable = (isCodeSection(isec) || isCfStringSection(isec)) &&
365                       !isec->shouldOmitFromOutput() &&
366                       sectionType(isec->getFlags()) == MachO::S_REGULAR;
367     if (isHashable) {
368       hashable.push_back(isec);
369       for (Defined *d : isec->symbols)
370         if (d->unwindEntry)
371           hashable.push_back(d->unwindEntry);
372     } else {
373       isec->icfEqClass[0] = ++icfUniqueID;
374     }
375   }
376   parallelForEach(hashable, [](ConcatInputSection *isec) {
377     assert(isec->icfEqClass[0] == 0); // don't overwrite a unique ID!
378     // Turn-on the top bit to guarantee that valid hashes have no collisions
379     // with the small-integer unique IDs for ICF-ineligible sections
380     isec->icfEqClass[0] = xxHash64(isec->data) | (1ull << 63);
381   });
382   // Now that every input section is either hashed or marked as unique, run the
383   // segregation algorithm to detect foldable subsections.
384   ICF(hashable).run();
385 }
386