1 //===- ICF.cpp ------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ICF.h" 10 #include "ConcatOutputSection.h" 11 #include "InputSection.h" 12 #include "Symbols.h" 13 #include "UnwindInfoSection.h" 14 15 #include "llvm/Support/Parallel.h" 16 #include "llvm/Support/TimeProfiler.h" 17 18 #include <atomic> 19 20 using namespace llvm; 21 using namespace lld; 22 using namespace lld::macho; 23 24 class ICF { 25 public: 26 ICF(std::vector<ConcatInputSection *> &inputs); 27 28 void run(); 29 void segregate(size_t begin, size_t end, 30 std::function<bool(const ConcatInputSection *, 31 const ConcatInputSection *)> 32 equals); 33 size_t findBoundary(size_t begin, size_t end); 34 void forEachClassRange(size_t begin, size_t end, 35 std::function<void(size_t, size_t)> func); 36 void forEachClass(std::function<void(size_t, size_t)> func); 37 38 // ICF needs a copy of the inputs vector because its equivalence-class 39 // segregation algorithm destroys the proper sequence. 40 std::vector<ConcatInputSection *> icfInputs; 41 }; 42 43 ICF::ICF(std::vector<ConcatInputSection *> &inputs) { 44 icfInputs.assign(inputs.begin(), inputs.end()); 45 } 46 47 // ICF = Identical Code Folding 48 // 49 // We only fold __TEXT,__text, so this is really "code" folding, and not 50 // "COMDAT" folding. String and scalar constant literals are deduplicated 51 // elsewhere. 52 // 53 // Summary of segments & sections: 54 // 55 // The __TEXT segment is readonly at the MMU. Some sections are already 56 // deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are 57 // synthetic and inherently free of duplicates (__TEXT,__stubs & 58 // __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const, 59 // because doing so induces many test failures. 60 // 61 // The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and 62 // thus ineligible for ICF. 63 // 64 // The __DATA_CONST segment is read/write at the MMU, but is logically const to 65 // the application after dyld applies fixups to pointer data. We currently 66 // fold only the __DATA_CONST,__cfstring section. 67 // 68 // The __DATA segment is read/write at the MMU, and as application-writeable 69 // data, none of its sections are eligible for ICF. 70 // 71 // Please see the large block comment in lld/ELF/ICF.cpp for an explanation 72 // of the segregation algorithm. 73 // 74 // FIXME(gkm): implement keep-unique attributes 75 // FIXME(gkm): implement address-significance tables for MachO object files 76 77 static unsigned icfPass = 0; 78 static std::atomic<bool> icfRepeat{false}; 79 80 // Compare "non-moving" parts of two ConcatInputSections, namely everything 81 // except references to other ConcatInputSections. 82 static bool equalsConstant(const ConcatInputSection *ia, 83 const ConcatInputSection *ib) { 84 // We can only fold within the same OutputSection. 85 if (ia->parent != ib->parent) 86 return false; 87 if (ia->data.size() != ib->data.size()) 88 return false; 89 if (ia->data != ib->data) 90 return false; 91 if (ia->relocs.size() != ib->relocs.size()) 92 return false; 93 auto f = [](const Reloc &ra, const Reloc &rb) { 94 if (ra.type != rb.type) 95 return false; 96 if (ra.pcrel != rb.pcrel) 97 return false; 98 if (ra.length != rb.length) 99 return false; 100 if (ra.offset != rb.offset) 101 return false; 102 if (ra.addend != rb.addend) 103 return false; 104 if (ra.referent.is<Symbol *>() != rb.referent.is<Symbol *>()) 105 return false; 106 107 InputSection *isecA, *isecB; 108 109 uint64_t valueA = 0; 110 uint64_t valueB = 0; 111 if (ra.referent.is<Symbol *>()) { 112 const auto *sa = ra.referent.get<Symbol *>(); 113 const auto *sb = rb.referent.get<Symbol *>(); 114 if (sa->kind() != sb->kind()) 115 return false; 116 if (!isa<Defined>(sa)) { 117 assert(isa<DylibSymbol>(sa)); 118 return sa == sb; 119 } 120 const auto *da = cast<Defined>(sa); 121 const auto *db = cast<Defined>(sb); 122 if (!da->isec || !db->isec) { 123 assert(da->isAbsolute() && db->isAbsolute()); 124 return da->value == db->value; 125 } 126 isecA = da->isec; 127 valueA = da->value; 128 isecB = db->isec; 129 valueB = db->value; 130 } else { 131 isecA = ra.referent.get<InputSection *>(); 132 isecB = rb.referent.get<InputSection *>(); 133 } 134 135 if (isecA->parent != isecB->parent) 136 return false; 137 // Sections with identical parents should be of the same kind. 138 assert(isecA->kind() == isecB->kind()); 139 // We will compare ConcatInputSection contents in equalsVariable. 140 if (isa<ConcatInputSection>(isecA)) 141 return true; 142 // Else we have two literal sections. References to them are equal iff their 143 // offsets in the output section are equal. 144 return isecA->getOffset(valueA + ra.addend) == 145 isecB->getOffset(valueB + rb.addend); 146 }; 147 return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), 148 f); 149 } 150 151 // Compare the "moving" parts of two ConcatInputSections -- i.e. everything not 152 // handled by equalsConstant(). 153 static bool equalsVariable(const ConcatInputSection *ia, 154 const ConcatInputSection *ib) { 155 assert(ia->relocs.size() == ib->relocs.size()); 156 auto f = [](const Reloc &ra, const Reloc &rb) { 157 // We already filtered out mismatching values/addends in equalsConstant. 158 if (ra.referent == rb.referent) 159 return true; 160 const ConcatInputSection *isecA, *isecB; 161 if (ra.referent.is<Symbol *>()) { 162 // Matching DylibSymbols are already filtered out by the 163 // identical-referent check above. Non-matching DylibSymbols were filtered 164 // out in equalsConstant(). So we can safely cast to Defined here. 165 const auto *da = cast<Defined>(ra.referent.get<Symbol *>()); 166 const auto *db = cast<Defined>(rb.referent.get<Symbol *>()); 167 if (da->isAbsolute()) 168 return true; 169 isecA = dyn_cast<ConcatInputSection>(da->isec); 170 if (!isecA) 171 return true; // literal sections were checked in equalsConstant. 172 isecB = cast<ConcatInputSection>(db->isec); 173 } else { 174 const auto *sa = ra.referent.get<InputSection *>(); 175 const auto *sb = rb.referent.get<InputSection *>(); 176 isecA = dyn_cast<ConcatInputSection>(sa); 177 if (!isecA) 178 return true; 179 isecB = cast<ConcatInputSection>(sb); 180 } 181 return isecA->icfEqClass[icfPass % 2] == isecB->icfEqClass[icfPass % 2]; 182 }; 183 if (!std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), f)) 184 return false; 185 186 // If there are symbols with associated unwind info, check that the unwind 187 // info matches. For simplicity, we only handle the case where there are only 188 // symbols at offset zero within the section (which is typically the case with 189 // .subsections_via_symbols.) 190 auto hasCU = [](Defined *d) { return d->compactUnwind; }; 191 auto itA = std::find_if(ia->symbols.begin(), ia->symbols.end(), hasCU); 192 auto itB = std::find_if(ib->symbols.begin(), ib->symbols.end(), hasCU); 193 if (itA == ia->symbols.end()) 194 return itB == ib->symbols.end(); 195 if (itB == ib->symbols.end()) 196 return false; 197 const Defined *da = *itA; 198 const Defined *db = *itB; 199 if (da->compactUnwind->icfEqClass[icfPass % 2] != 200 db->compactUnwind->icfEqClass[icfPass % 2] || 201 da->value != 0 || db->value != 0) 202 return false; 203 auto isZero = [](Defined *d) { return d->value == 0; }; 204 return std::find_if_not(std::next(itA), ia->symbols.end(), isZero) == 205 ia->symbols.end() && 206 std::find_if_not(std::next(itB), ib->symbols.end(), isZero) == 207 ib->symbols.end(); 208 } 209 210 // Find the first InputSection after BEGIN whose equivalence class differs 211 size_t ICF::findBoundary(size_t begin, size_t end) { 212 uint64_t beginHash = icfInputs[begin]->icfEqClass[icfPass % 2]; 213 for (size_t i = begin + 1; i < end; ++i) 214 if (beginHash != icfInputs[i]->icfEqClass[icfPass % 2]) 215 return i; 216 return end; 217 } 218 219 // Invoke FUNC on subranges with matching equivalence class 220 void ICF::forEachClassRange(size_t begin, size_t end, 221 std::function<void(size_t, size_t)> func) { 222 while (begin < end) { 223 size_t mid = findBoundary(begin, end); 224 func(begin, mid); 225 begin = mid; 226 } 227 } 228 229 // Split icfInputs into shards, then parallelize invocation of FUNC on subranges 230 // with matching equivalence class 231 void ICF::forEachClass(std::function<void(size_t, size_t)> func) { 232 // Only use threads when the benefits outweigh the overhead. 233 const size_t threadingThreshold = 1024; 234 if (icfInputs.size() < threadingThreshold) { 235 forEachClassRange(0, icfInputs.size(), func); 236 ++icfPass; 237 return; 238 } 239 240 // Shard into non-overlapping intervals, and call FUNC in parallel. The 241 // sharding must be completed before any calls to FUNC are made so that FUNC 242 // can modify the InputSection in its shard without causing data races. 243 const size_t shards = 256; 244 size_t step = icfInputs.size() / shards; 245 size_t boundaries[shards + 1]; 246 boundaries[0] = 0; 247 boundaries[shards] = icfInputs.size(); 248 parallelForEachN(1, shards, [&](size_t i) { 249 boundaries[i] = findBoundary((i - 1) * step, icfInputs.size()); 250 }); 251 parallelForEachN(1, shards + 1, [&](size_t i) { 252 if (boundaries[i - 1] < boundaries[i]) { 253 forEachClassRange(boundaries[i - 1], boundaries[i], func); 254 } 255 }); 256 ++icfPass; 257 } 258 259 void ICF::run() { 260 // Into each origin-section hash, combine all reloc referent section hashes. 261 for (icfPass = 0; icfPass < 2; ++icfPass) { 262 parallelForEach(icfInputs, [&](ConcatInputSection *isec) { 263 uint64_t hash = isec->icfEqClass[icfPass % 2]; 264 for (const Reloc &r : isec->relocs) { 265 if (auto *sym = r.referent.dyn_cast<Symbol *>()) { 266 if (auto *dylibSym = dyn_cast<DylibSymbol>(sym)) 267 hash += dylibSym->stubsHelperIndex; 268 else if (auto *defined = dyn_cast<Defined>(sym)) { 269 if (defined->isec) { 270 if (auto isec = dyn_cast<ConcatInputSection>(defined->isec)) 271 hash += defined->value + isec->icfEqClass[icfPass % 2]; 272 else 273 hash += defined->isec->kind() + 274 defined->isec->getOffset(defined->value); 275 } else { 276 hash += defined->value; 277 } 278 } else if (!isa<Undefined>(sym)) 279 llvm_unreachable("foldIdenticalSections symbol kind"); 280 } 281 } 282 // Set MSB to 1 to avoid collisions with non-hashed classes. 283 isec->icfEqClass[(icfPass + 1) % 2] = hash | (1ull << 63); 284 }); 285 } 286 287 llvm::stable_sort( 288 icfInputs, [](const ConcatInputSection *a, const ConcatInputSection *b) { 289 return a->icfEqClass[0] < b->icfEqClass[0]; 290 }); 291 forEachClass( 292 [&](size_t begin, size_t end) { segregate(begin, end, equalsConstant); }); 293 294 // Split equivalence groups by comparing relocations until convergence 295 do { 296 icfRepeat = false; 297 forEachClass([&](size_t begin, size_t end) { 298 segregate(begin, end, equalsVariable); 299 }); 300 } while (icfRepeat); 301 log("ICF needed " + Twine(icfPass) + " iterations"); 302 303 // Fold sections within equivalence classes 304 forEachClass([&](size_t begin, size_t end) { 305 if (end - begin < 2) 306 return; 307 ConcatInputSection *beginIsec = icfInputs[begin]; 308 for (size_t i = begin + 1; i < end; ++i) 309 beginIsec->foldIdentical(icfInputs[i]); 310 }); 311 } 312 313 // Split an equivalence class into smaller classes. 314 void ICF::segregate( 315 size_t begin, size_t end, 316 std::function<bool(const ConcatInputSection *, const ConcatInputSection *)> 317 equals) { 318 while (begin < end) { 319 // Divide [begin, end) into two. Let mid be the start index of the 320 // second group. 321 auto bound = std::stable_partition(icfInputs.begin() + begin + 1, 322 icfInputs.begin() + end, 323 [&](ConcatInputSection *isec) { 324 return equals(icfInputs[begin], isec); 325 }); 326 size_t mid = bound - icfInputs.begin(); 327 328 // Split [begin, end) into [begin, mid) and [mid, end). We use mid as an 329 // equivalence class ID because every group ends with a unique index. 330 for (size_t i = begin; i < mid; ++i) 331 icfInputs[i]->icfEqClass[(icfPass + 1) % 2] = mid; 332 333 // If we created a group, we need to iterate the main loop again. 334 if (mid != end) 335 icfRepeat = true; 336 337 begin = mid; 338 } 339 } 340 341 void macho::foldIdenticalSections() { 342 TimeTraceScope timeScope("Fold Identical Code Sections"); 343 // The ICF equivalence-class segregation algorithm relies on pre-computed 344 // hashes of InputSection::data for the ConcatOutputSection::inputs and all 345 // sections referenced by their relocs. We could recursively traverse the 346 // relocs to find every referenced InputSection, but that precludes easy 347 // parallelization. Therefore, we hash every InputSection here where we have 348 // them all accessible as simple vectors. 349 350 // If an InputSection is ineligible for ICF, we give it a unique ID to force 351 // it into an unfoldable singleton equivalence class. Begin the unique-ID 352 // space at inputSections.size(), so that it will never intersect with 353 // equivalence-class IDs which begin at 0. Since hashes & unique IDs never 354 // coexist with equivalence-class IDs, this is not necessary, but might help 355 // someone keep the numbers straight in case we ever need to debug the 356 // ICF::segregate() 357 std::vector<ConcatInputSection *> hashable; 358 uint64_t icfUniqueID = inputSections.size(); 359 for (ConcatInputSection *isec : inputSections) { 360 // FIXME: consider non-code __text sections as hashable? 361 bool isHashable = (isCodeSection(isec) || isCfStringSection(isec)) && 362 !isec->shouldOmitFromOutput() && isec->isHashableForICF(); 363 if (isHashable) { 364 hashable.push_back(isec); 365 for (Defined *d : isec->symbols) 366 if (d->compactUnwind) 367 hashable.push_back(d->compactUnwind); 368 } else { 369 isec->icfEqClass[0] = ++icfUniqueID; 370 } 371 } 372 parallelForEach(hashable, 373 [](ConcatInputSection *isec) { isec->hashForICF(); }); 374 // Now that every input section is either hashed or marked as unique, run the 375 // segregation algorithm to detect foldable subsections. 376 ICF(hashable).run(); 377 } 378