xref: /llvm-project-15.0.7/lld/MachO/ICF.cpp (revision bbffece3)
1 //===- ICF.cpp ------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ICF.h"
10 #include "ConcatOutputSection.h"
11 #include "InputSection.h"
12 #include "Symbols.h"
13 #include "UnwindInfoSection.h"
14 
15 #include "lld/Common/CommonLinkerContext.h"
16 #include "llvm/Support/Parallel.h"
17 #include "llvm/Support/TimeProfiler.h"
18 #include "llvm/Support/xxhash.h"
19 
20 #include <atomic>
21 
22 using namespace llvm;
23 using namespace lld;
24 using namespace lld::macho;
25 
26 static constexpr bool verboseDiagnostics = false;
27 
28 class ICF {
29 public:
30   ICF(std::vector<ConcatInputSection *> &inputs);
31   void run();
32 
33   using EqualsFn = bool (ICF::*)(const ConcatInputSection *,
34                                  const ConcatInputSection *);
35   void segregate(size_t begin, size_t end, EqualsFn);
36   size_t findBoundary(size_t begin, size_t end);
37   void forEachClassRange(size_t begin, size_t end,
38                          llvm::function_ref<void(size_t, size_t)> func);
39   void forEachClass(llvm::function_ref<void(size_t, size_t)> func);
40 
41   bool equalsConstant(const ConcatInputSection *ia,
42                       const ConcatInputSection *ib);
43   bool equalsVariable(const ConcatInputSection *ia,
44                       const ConcatInputSection *ib);
45 
46   // ICF needs a copy of the inputs vector because its equivalence-class
47   // segregation algorithm destroys the proper sequence.
48   std::vector<ConcatInputSection *> icfInputs;
49 
50   unsigned icfPass = 0;
51   std::atomic<bool> icfRepeat{false};
52   std::atomic<uint64_t> equalsConstantCount{0};
53   std::atomic<uint64_t> equalsVariableCount{0};
54 };
55 
56 ICF::ICF(std::vector<ConcatInputSection *> &inputs) {
57   icfInputs.assign(inputs.begin(), inputs.end());
58 }
59 
60 // ICF = Identical Code Folding
61 //
62 // We only fold __TEXT,__text, so this is really "code" folding, and not
63 // "COMDAT" folding. String and scalar constant literals are deduplicated
64 // elsewhere.
65 //
66 // Summary of segments & sections:
67 //
68 // The __TEXT segment is readonly at the MMU. Some sections are already
69 // deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are
70 // synthetic and inherently free of duplicates (__TEXT,__stubs &
71 // __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const,
72 // because doing so induces many test failures.
73 //
74 // The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and
75 // thus ineligible for ICF.
76 //
77 // The __DATA_CONST segment is read/write at the MMU, but is logically const to
78 // the application after dyld applies fixups to pointer data. We currently
79 // fold only the __DATA_CONST,__cfstring section.
80 //
81 // The __DATA segment is read/write at the MMU, and as application-writeable
82 // data, none of its sections are eligible for ICF.
83 //
84 // Please see the large block comment in lld/ELF/ICF.cpp for an explanation
85 // of the segregation algorithm.
86 //
87 // FIXME(gkm): implement keep-unique attributes
88 // FIXME(gkm): implement address-significance tables for MachO object files
89 
90 // Compare "non-moving" parts of two ConcatInputSections, namely everything
91 // except references to other ConcatInputSections.
92 bool ICF::equalsConstant(const ConcatInputSection *ia,
93                          const ConcatInputSection *ib) {
94   if (verboseDiagnostics)
95     ++equalsConstantCount;
96   // We can only fold within the same OutputSection.
97   if (ia->parent != ib->parent)
98     return false;
99   if (ia->data.size() != ib->data.size())
100     return false;
101   if (ia->data != ib->data)
102     return false;
103   if (ia->relocs.size() != ib->relocs.size())
104     return false;
105   auto f = [](const Reloc &ra, const Reloc &rb) {
106     if (ra.type != rb.type)
107       return false;
108     if (ra.pcrel != rb.pcrel)
109       return false;
110     if (ra.length != rb.length)
111       return false;
112     if (ra.offset != rb.offset)
113       return false;
114     if (ra.referent.is<Symbol *>() != rb.referent.is<Symbol *>())
115       return false;
116 
117     InputSection *isecA, *isecB;
118 
119     uint64_t valueA = 0;
120     uint64_t valueB = 0;
121     if (ra.referent.is<Symbol *>()) {
122       const auto *sa = ra.referent.get<Symbol *>();
123       const auto *sb = rb.referent.get<Symbol *>();
124       if (sa->kind() != sb->kind())
125         return false;
126       // ICF runs before Undefineds are treated (and potentially converted into
127       // DylibSymbols).
128       if (isa<DylibSymbol>(sa) || isa<Undefined>(sa))
129         return sa == sb && ra.addend == rb.addend;
130       assert(isa<Defined>(sa));
131       const auto *da = cast<Defined>(sa);
132       const auto *db = cast<Defined>(sb);
133       if (!da->isec || !db->isec) {
134         assert(da->isAbsolute() && db->isAbsolute());
135         return da->value + ra.addend == db->value + rb.addend;
136       }
137       isecA = da->isec;
138       valueA = da->value;
139       isecB = db->isec;
140       valueB = db->value;
141     } else {
142       isecA = ra.referent.get<InputSection *>();
143       isecB = rb.referent.get<InputSection *>();
144     }
145 
146     if (isecA->parent != isecB->parent)
147       return false;
148     // Sections with identical parents should be of the same kind.
149     assert(isecA->kind() == isecB->kind());
150     // We will compare ConcatInputSection contents in equalsVariable.
151     if (isa<ConcatInputSection>(isecA))
152       return ra.addend == rb.addend;
153     // Else we have two literal sections. References to them are equal iff their
154     // offsets in the output section are equal.
155     if (ra.referent.is<Symbol *>())
156       // For symbol relocs, we compare the contents at the symbol address. We
157       // don't do `getOffset(value + addend)` because value + addend may not be
158       // a valid offset in the literal section.
159       return isecA->getOffset(valueA) == isecB->getOffset(valueB) &&
160              ra.addend == rb.addend;
161     else {
162       assert(valueA == 0 && valueB == 0);
163       // For section relocs, we compare the content at the section offset.
164       return isecA->getOffset(ra.addend) == isecB->getOffset(rb.addend);
165     }
166   };
167   return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(),
168                     f);
169 }
170 
171 // Compare the "moving" parts of two ConcatInputSections -- i.e. everything not
172 // handled by equalsConstant().
173 bool ICF::equalsVariable(const ConcatInputSection *ia,
174                          const ConcatInputSection *ib) {
175   if (verboseDiagnostics)
176     ++equalsVariableCount;
177   assert(ia->relocs.size() == ib->relocs.size());
178   auto f = [this](const Reloc &ra, const Reloc &rb) {
179     // We already filtered out mismatching values/addends in equalsConstant.
180     if (ra.referent == rb.referent)
181       return true;
182     const ConcatInputSection *isecA, *isecB;
183     if (ra.referent.is<Symbol *>()) {
184       // Matching DylibSymbols are already filtered out by the
185       // identical-referent check above. Non-matching DylibSymbols were filtered
186       // out in equalsConstant(). So we can safely cast to Defined here.
187       const auto *da = cast<Defined>(ra.referent.get<Symbol *>());
188       const auto *db = cast<Defined>(rb.referent.get<Symbol *>());
189       if (da->isAbsolute())
190         return true;
191       isecA = dyn_cast<ConcatInputSection>(da->isec);
192       if (!isecA)
193         return true; // literal sections were checked in equalsConstant.
194       isecB = cast<ConcatInputSection>(db->isec);
195     } else {
196       const auto *sa = ra.referent.get<InputSection *>();
197       const auto *sb = rb.referent.get<InputSection *>();
198       isecA = dyn_cast<ConcatInputSection>(sa);
199       if (!isecA)
200         return true;
201       isecB = cast<ConcatInputSection>(sb);
202     }
203     return isecA->icfEqClass[icfPass % 2] == isecB->icfEqClass[icfPass % 2];
204   };
205   if (!std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), f))
206     return false;
207 
208   // If there are symbols with associated unwind info, check that the unwind
209   // info matches. For simplicity, we only handle the case where there are only
210   // symbols at offset zero within the section (which is typically the case with
211   // .subsections_via_symbols.)
212   auto hasCU = [](Defined *d) { return d->unwindEntry != nullptr; };
213   auto itA = std::find_if(ia->symbols.begin(), ia->symbols.end(), hasCU);
214   auto itB = std::find_if(ib->symbols.begin(), ib->symbols.end(), hasCU);
215   if (itA == ia->symbols.end())
216     return itB == ib->symbols.end();
217   if (itB == ib->symbols.end())
218     return false;
219   const Defined *da = *itA;
220   const Defined *db = *itB;
221   if (da->unwindEntry->icfEqClass[icfPass % 2] !=
222           db->unwindEntry->icfEqClass[icfPass % 2] ||
223       da->value != 0 || db->value != 0)
224     return false;
225   auto isZero = [](Defined *d) { return d->value == 0; };
226   return std::find_if_not(std::next(itA), ia->symbols.end(), isZero) ==
227              ia->symbols.end() &&
228          std::find_if_not(std::next(itB), ib->symbols.end(), isZero) ==
229              ib->symbols.end();
230 }
231 
232 // Find the first InputSection after BEGIN whose equivalence class differs
233 size_t ICF::findBoundary(size_t begin, size_t end) {
234   uint64_t beginHash = icfInputs[begin]->icfEqClass[icfPass % 2];
235   for (size_t i = begin + 1; i < end; ++i)
236     if (beginHash != icfInputs[i]->icfEqClass[icfPass % 2])
237       return i;
238   return end;
239 }
240 
241 // Invoke FUNC on subranges with matching equivalence class
242 void ICF::forEachClassRange(size_t begin, size_t end,
243                             llvm::function_ref<void(size_t, size_t)> func) {
244   while (begin < end) {
245     size_t mid = findBoundary(begin, end);
246     func(begin, mid);
247     begin = mid;
248   }
249 }
250 
251 // Split icfInputs into shards, then parallelize invocation of FUNC on subranges
252 // with matching equivalence class
253 void ICF::forEachClass(llvm::function_ref<void(size_t, size_t)> func) {
254   // Only use threads when the benefits outweigh the overhead.
255   const size_t threadingThreshold = 1024;
256   if (icfInputs.size() < threadingThreshold) {
257     forEachClassRange(0, icfInputs.size(), func);
258     ++icfPass;
259     return;
260   }
261 
262   // Shard into non-overlapping intervals, and call FUNC in parallel.  The
263   // sharding must be completed before any calls to FUNC are made so that FUNC
264   // can modify the InputSection in its shard without causing data races.
265   const size_t shards = 256;
266   size_t step = icfInputs.size() / shards;
267   size_t boundaries[shards + 1];
268   boundaries[0] = 0;
269   boundaries[shards] = icfInputs.size();
270   parallelForEachN(1, shards, [&](size_t i) {
271     boundaries[i] = findBoundary((i - 1) * step, icfInputs.size());
272   });
273   parallelForEachN(1, shards + 1, [&](size_t i) {
274     if (boundaries[i - 1] < boundaries[i]) {
275       forEachClassRange(boundaries[i - 1], boundaries[i], func);
276     }
277   });
278   ++icfPass;
279 }
280 
281 void ICF::run() {
282   // Into each origin-section hash, combine all reloc referent section hashes.
283   for (icfPass = 0; icfPass < 2; ++icfPass) {
284     parallelForEach(icfInputs, [&](ConcatInputSection *isec) {
285       uint32_t hash = isec->icfEqClass[icfPass % 2];
286       for (const Reloc &r : isec->relocs) {
287         if (auto *sym = r.referent.dyn_cast<Symbol *>()) {
288           if (auto *defined = dyn_cast<Defined>(sym)) {
289             if (defined->isec) {
290               if (auto referentIsec =
291                       dyn_cast<ConcatInputSection>(defined->isec))
292                 hash += defined->value + referentIsec->icfEqClass[icfPass % 2];
293               else
294                 hash += defined->isec->kind() +
295                         defined->isec->getOffset(defined->value);
296             } else {
297               hash += defined->value;
298             }
299           } else {
300             // ICF runs before Undefined diags
301             assert(isa<Undefined>(sym) || isa<DylibSymbol>(sym));
302           }
303         }
304       }
305       // Set MSB to 1 to avoid collisions with non-hashed classes.
306       isec->icfEqClass[(icfPass + 1) % 2] = hash | (1ull << 31);
307     });
308   }
309 
310   llvm::stable_sort(
311       icfInputs, [](const ConcatInputSection *a, const ConcatInputSection *b) {
312         return a->icfEqClass[0] < b->icfEqClass[0];
313       });
314   forEachClass([&](size_t begin, size_t end) {
315     segregate(begin, end, &ICF::equalsConstant);
316   });
317 
318   // Split equivalence groups by comparing relocations until convergence
319   do {
320     icfRepeat = false;
321     forEachClass([&](size_t begin, size_t end) {
322       segregate(begin, end, &ICF::equalsVariable);
323     });
324   } while (icfRepeat);
325   log("ICF needed " + Twine(icfPass) + " iterations");
326   if (verboseDiagnostics) {
327     log("equalsConstant() called " + Twine(equalsConstantCount) + " times");
328     log("equalsVariable() called " + Twine(equalsVariableCount) + " times");
329   }
330 
331   // Fold sections within equivalence classes
332   forEachClass([&](size_t begin, size_t end) {
333     if (end - begin < 2)
334       return;
335     ConcatInputSection *beginIsec = icfInputs[begin];
336     for (size_t i = begin + 1; i < end; ++i)
337       beginIsec->foldIdentical(icfInputs[i]);
338   });
339 }
340 
341 // Split an equivalence class into smaller classes.
342 void ICF::segregate(size_t begin, size_t end, EqualsFn equals) {
343   while (begin < end) {
344     // Divide [begin, end) into two. Let mid be the start index of the
345     // second group.
346     auto bound = std::stable_partition(
347         icfInputs.begin() + begin + 1, icfInputs.begin() + end,
348         [&](ConcatInputSection *isec) {
349           return (this->*equals)(icfInputs[begin], isec);
350         });
351     size_t mid = bound - icfInputs.begin();
352 
353     // Split [begin, end) into [begin, mid) and [mid, end). We use mid as an
354     // equivalence class ID because every group ends with a unique index.
355     for (size_t i = begin; i < mid; ++i)
356       icfInputs[i]->icfEqClass[(icfPass + 1) % 2] = mid;
357 
358     // If we created a group, we need to iterate the main loop again.
359     if (mid != end)
360       icfRepeat = true;
361 
362     begin = mid;
363   }
364 }
365 
366 void macho::foldIdenticalSections() {
367   TimeTraceScope timeScope("Fold Identical Code Sections");
368   // The ICF equivalence-class segregation algorithm relies on pre-computed
369   // hashes of InputSection::data for the ConcatOutputSection::inputs and all
370   // sections referenced by their relocs. We could recursively traverse the
371   // relocs to find every referenced InputSection, but that precludes easy
372   // parallelization. Therefore, we hash every InputSection here where we have
373   // them all accessible as simple vectors.
374 
375   // If an InputSection is ineligible for ICF, we give it a unique ID to force
376   // it into an unfoldable singleton equivalence class.  Begin the unique-ID
377   // space at inputSections.size(), so that it will never intersect with
378   // equivalence-class IDs which begin at 0. Since hashes & unique IDs never
379   // coexist with equivalence-class IDs, this is not necessary, but might help
380   // someone keep the numbers straight in case we ever need to debug the
381   // ICF::segregate()
382   std::vector<ConcatInputSection *> hashable;
383   uint64_t icfUniqueID = inputSections.size();
384   for (ConcatInputSection *isec : inputSections) {
385     // FIXME: consider non-code __text sections as hashable?
386     bool isHashable = (isCodeSection(isec) || isCfStringSection(isec) ||
387                        isClassRefsSection(isec)) &&
388                       !isec->shouldOmitFromOutput() &&
389                       sectionType(isec->getFlags()) == MachO::S_REGULAR;
390     if (isHashable) {
391       hashable.push_back(isec);
392       for (Defined *d : isec->symbols)
393         if (d->unwindEntry)
394           hashable.push_back(d->unwindEntry);
395 
396       // __cfstring has embedded addends that foil ICF's hashing / equality
397       // checks. (We can ignore embedded addends when doing ICF because the same
398       // information gets recorded in our Reloc structs.) We therefore create a
399       // mutable copy of the CFString and zero out the embedded addends before
400       // performing any hashing / equality checks.
401       if (isCfStringSection(isec) || isClassRefsSection(isec)) {
402         // We have to do this copying serially as the BumpPtrAllocator is not
403         // thread-safe. FIXME: Make a thread-safe allocator.
404         MutableArrayRef<uint8_t> copy = isec->data.copy(bAlloc());
405         for (const Reloc &r : isec->relocs)
406           target->relocateOne(copy.data() + r.offset, r, /*va=*/0,
407                               /*relocVA=*/0);
408         isec->data = copy;
409       }
410     } else {
411       isec->icfEqClass[0] = ++icfUniqueID;
412     }
413   }
414   parallelForEach(hashable, [](ConcatInputSection *isec) {
415     assert(isec->icfEqClass[0] == 0); // don't overwrite a unique ID!
416     // Turn-on the top bit to guarantee that valid hashes have no collisions
417     // with the small-integer unique IDs for ICF-ineligible sections
418     isec->icfEqClass[0] = xxHash64(isec->data) | (1ull << 31);
419   });
420   // Now that every input section is either hashed or marked as unique, run the
421   // segregation algorithm to detect foldable subsections.
422   ICF(hashable).run();
423 }
424