1 //===- ICF.cpp ------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ICF.h" 10 #include "ConcatOutputSection.h" 11 #include "InputSection.h" 12 #include "Symbols.h" 13 #include "UnwindInfoSection.h" 14 15 #include "lld/Common/CommonLinkerContext.h" 16 #include "llvm/Support/Parallel.h" 17 #include "llvm/Support/TimeProfiler.h" 18 #include "llvm/Support/xxhash.h" 19 20 #include <atomic> 21 22 using namespace llvm; 23 using namespace lld; 24 using namespace lld::macho; 25 26 static constexpr bool verboseDiagnostics = false; 27 28 class ICF { 29 public: 30 ICF(std::vector<ConcatInputSection *> &inputs); 31 void run(); 32 33 using EqualsFn = bool (ICF::*)(const ConcatInputSection *, 34 const ConcatInputSection *); 35 void segregate(size_t begin, size_t end, EqualsFn); 36 size_t findBoundary(size_t begin, size_t end); 37 void forEachClassRange(size_t begin, size_t end, 38 llvm::function_ref<void(size_t, size_t)> func); 39 void forEachClass(llvm::function_ref<void(size_t, size_t)> func); 40 41 bool equalsConstant(const ConcatInputSection *ia, 42 const ConcatInputSection *ib); 43 bool equalsVariable(const ConcatInputSection *ia, 44 const ConcatInputSection *ib); 45 46 // ICF needs a copy of the inputs vector because its equivalence-class 47 // segregation algorithm destroys the proper sequence. 48 std::vector<ConcatInputSection *> icfInputs; 49 50 unsigned icfPass = 0; 51 std::atomic<bool> icfRepeat{false}; 52 std::atomic<uint64_t> equalsConstantCount{0}; 53 std::atomic<uint64_t> equalsVariableCount{0}; 54 }; 55 56 ICF::ICF(std::vector<ConcatInputSection *> &inputs) { 57 icfInputs.assign(inputs.begin(), inputs.end()); 58 } 59 60 // ICF = Identical Code Folding 61 // 62 // We only fold __TEXT,__text, so this is really "code" folding, and not 63 // "COMDAT" folding. String and scalar constant literals are deduplicated 64 // elsewhere. 65 // 66 // Summary of segments & sections: 67 // 68 // The __TEXT segment is readonly at the MMU. Some sections are already 69 // deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are 70 // synthetic and inherently free of duplicates (__TEXT,__stubs & 71 // __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const, 72 // because doing so induces many test failures. 73 // 74 // The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and 75 // thus ineligible for ICF. 76 // 77 // The __DATA_CONST segment is read/write at the MMU, but is logically const to 78 // the application after dyld applies fixups to pointer data. We currently 79 // fold only the __DATA_CONST,__cfstring section. 80 // 81 // The __DATA segment is read/write at the MMU, and as application-writeable 82 // data, none of its sections are eligible for ICF. 83 // 84 // Please see the large block comment in lld/ELF/ICF.cpp for an explanation 85 // of the segregation algorithm. 86 // 87 // FIXME(gkm): implement keep-unique attributes 88 // FIXME(gkm): implement address-significance tables for MachO object files 89 90 // Compare "non-moving" parts of two ConcatInputSections, namely everything 91 // except references to other ConcatInputSections. 92 bool ICF::equalsConstant(const ConcatInputSection *ia, 93 const ConcatInputSection *ib) { 94 if (verboseDiagnostics) 95 ++equalsConstantCount; 96 // We can only fold within the same OutputSection. 97 if (ia->parent != ib->parent) 98 return false; 99 if (ia->data.size() != ib->data.size()) 100 return false; 101 if (ia->data != ib->data) 102 return false; 103 if (ia->relocs.size() != ib->relocs.size()) 104 return false; 105 auto f = [](const Reloc &ra, const Reloc &rb) { 106 if (ra.type != rb.type) 107 return false; 108 if (ra.pcrel != rb.pcrel) 109 return false; 110 if (ra.length != rb.length) 111 return false; 112 if (ra.offset != rb.offset) 113 return false; 114 if (ra.referent.is<Symbol *>() != rb.referent.is<Symbol *>()) 115 return false; 116 117 InputSection *isecA, *isecB; 118 119 uint64_t valueA = 0; 120 uint64_t valueB = 0; 121 if (ra.referent.is<Symbol *>()) { 122 const auto *sa = ra.referent.get<Symbol *>(); 123 const auto *sb = rb.referent.get<Symbol *>(); 124 if (sa->kind() != sb->kind()) 125 return false; 126 // ICF runs before Undefineds are treated (and potentially converted into 127 // DylibSymbols). 128 if (isa<DylibSymbol>(sa) || isa<Undefined>(sa)) 129 return sa == sb && ra.addend == rb.addend; 130 assert(isa<Defined>(sa)); 131 const auto *da = cast<Defined>(sa); 132 const auto *db = cast<Defined>(sb); 133 if (!da->isec || !db->isec) { 134 assert(da->isAbsolute() && db->isAbsolute()); 135 return da->value + ra.addend == db->value + rb.addend; 136 } 137 isecA = da->isec; 138 valueA = da->value; 139 isecB = db->isec; 140 valueB = db->value; 141 } else { 142 isecA = ra.referent.get<InputSection *>(); 143 isecB = rb.referent.get<InputSection *>(); 144 } 145 146 if (isecA->parent != isecB->parent) 147 return false; 148 // Sections with identical parents should be of the same kind. 149 assert(isecA->kind() == isecB->kind()); 150 // We will compare ConcatInputSection contents in equalsVariable. 151 if (isa<ConcatInputSection>(isecA)) 152 return ra.addend == rb.addend; 153 // Else we have two literal sections. References to them are equal iff their 154 // offsets in the output section are equal. 155 if (ra.referent.is<Symbol *>()) 156 // For symbol relocs, we compare the contents at the symbol address. We 157 // don't do `getOffset(value + addend)` because value + addend may not be 158 // a valid offset in the literal section. 159 return isecA->getOffset(valueA) == isecB->getOffset(valueB) && 160 ra.addend == rb.addend; 161 else { 162 assert(valueA == 0 && valueB == 0); 163 // For section relocs, we compare the content at the section offset. 164 return isecA->getOffset(ra.addend) == isecB->getOffset(rb.addend); 165 } 166 }; 167 return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), 168 f); 169 } 170 171 // Compare the "moving" parts of two ConcatInputSections -- i.e. everything not 172 // handled by equalsConstant(). 173 bool ICF::equalsVariable(const ConcatInputSection *ia, 174 const ConcatInputSection *ib) { 175 if (verboseDiagnostics) 176 ++equalsVariableCount; 177 assert(ia->relocs.size() == ib->relocs.size()); 178 auto f = [this](const Reloc &ra, const Reloc &rb) { 179 // We already filtered out mismatching values/addends in equalsConstant. 180 if (ra.referent == rb.referent) 181 return true; 182 const ConcatInputSection *isecA, *isecB; 183 if (ra.referent.is<Symbol *>()) { 184 // Matching DylibSymbols are already filtered out by the 185 // identical-referent check above. Non-matching DylibSymbols were filtered 186 // out in equalsConstant(). So we can safely cast to Defined here. 187 const auto *da = cast<Defined>(ra.referent.get<Symbol *>()); 188 const auto *db = cast<Defined>(rb.referent.get<Symbol *>()); 189 if (da->isAbsolute()) 190 return true; 191 isecA = dyn_cast<ConcatInputSection>(da->isec); 192 if (!isecA) 193 return true; // literal sections were checked in equalsConstant. 194 isecB = cast<ConcatInputSection>(db->isec); 195 } else { 196 const auto *sa = ra.referent.get<InputSection *>(); 197 const auto *sb = rb.referent.get<InputSection *>(); 198 isecA = dyn_cast<ConcatInputSection>(sa); 199 if (!isecA) 200 return true; 201 isecB = cast<ConcatInputSection>(sb); 202 } 203 return isecA->icfEqClass[icfPass % 2] == isecB->icfEqClass[icfPass % 2]; 204 }; 205 if (!std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), f)) 206 return false; 207 208 // If there are symbols with associated unwind info, check that the unwind 209 // info matches. For simplicity, we only handle the case where there are only 210 // symbols at offset zero within the section (which is typically the case with 211 // .subsections_via_symbols.) 212 auto hasCU = [](Defined *d) { return d->unwindEntry != nullptr; }; 213 auto itA = std::find_if(ia->symbols.begin(), ia->symbols.end(), hasCU); 214 auto itB = std::find_if(ib->symbols.begin(), ib->symbols.end(), hasCU); 215 if (itA == ia->symbols.end()) 216 return itB == ib->symbols.end(); 217 if (itB == ib->symbols.end()) 218 return false; 219 const Defined *da = *itA; 220 const Defined *db = *itB; 221 if (da->unwindEntry->icfEqClass[icfPass % 2] != 222 db->unwindEntry->icfEqClass[icfPass % 2] || 223 da->value != 0 || db->value != 0) 224 return false; 225 auto isZero = [](Defined *d) { return d->value == 0; }; 226 return std::find_if_not(std::next(itA), ia->symbols.end(), isZero) == 227 ia->symbols.end() && 228 std::find_if_not(std::next(itB), ib->symbols.end(), isZero) == 229 ib->symbols.end(); 230 } 231 232 // Find the first InputSection after BEGIN whose equivalence class differs 233 size_t ICF::findBoundary(size_t begin, size_t end) { 234 uint64_t beginHash = icfInputs[begin]->icfEqClass[icfPass % 2]; 235 for (size_t i = begin + 1; i < end; ++i) 236 if (beginHash != icfInputs[i]->icfEqClass[icfPass % 2]) 237 return i; 238 return end; 239 } 240 241 // Invoke FUNC on subranges with matching equivalence class 242 void ICF::forEachClassRange(size_t begin, size_t end, 243 llvm::function_ref<void(size_t, size_t)> func) { 244 while (begin < end) { 245 size_t mid = findBoundary(begin, end); 246 func(begin, mid); 247 begin = mid; 248 } 249 } 250 251 // Split icfInputs into shards, then parallelize invocation of FUNC on subranges 252 // with matching equivalence class 253 void ICF::forEachClass(llvm::function_ref<void(size_t, size_t)> func) { 254 // Only use threads when the benefits outweigh the overhead. 255 const size_t threadingThreshold = 1024; 256 if (icfInputs.size() < threadingThreshold) { 257 forEachClassRange(0, icfInputs.size(), func); 258 ++icfPass; 259 return; 260 } 261 262 // Shard into non-overlapping intervals, and call FUNC in parallel. The 263 // sharding must be completed before any calls to FUNC are made so that FUNC 264 // can modify the InputSection in its shard without causing data races. 265 const size_t shards = 256; 266 size_t step = icfInputs.size() / shards; 267 size_t boundaries[shards + 1]; 268 boundaries[0] = 0; 269 boundaries[shards] = icfInputs.size(); 270 parallelForEachN(1, shards, [&](size_t i) { 271 boundaries[i] = findBoundary((i - 1) * step, icfInputs.size()); 272 }); 273 parallelForEachN(1, shards + 1, [&](size_t i) { 274 if (boundaries[i - 1] < boundaries[i]) { 275 forEachClassRange(boundaries[i - 1], boundaries[i], func); 276 } 277 }); 278 ++icfPass; 279 } 280 281 void ICF::run() { 282 // Into each origin-section hash, combine all reloc referent section hashes. 283 for (icfPass = 0; icfPass < 2; ++icfPass) { 284 parallelForEach(icfInputs, [&](ConcatInputSection *isec) { 285 uint32_t hash = isec->icfEqClass[icfPass % 2]; 286 for (const Reloc &r : isec->relocs) { 287 if (auto *sym = r.referent.dyn_cast<Symbol *>()) { 288 if (auto *defined = dyn_cast<Defined>(sym)) { 289 if (defined->isec) { 290 if (auto referentIsec = 291 dyn_cast<ConcatInputSection>(defined->isec)) 292 hash += defined->value + referentIsec->icfEqClass[icfPass % 2]; 293 else 294 hash += defined->isec->kind() + 295 defined->isec->getOffset(defined->value); 296 } else { 297 hash += defined->value; 298 } 299 } else { 300 // ICF runs before Undefined diags 301 assert(isa<Undefined>(sym) || isa<DylibSymbol>(sym)); 302 } 303 } 304 } 305 // Set MSB to 1 to avoid collisions with non-hashed classes. 306 isec->icfEqClass[(icfPass + 1) % 2] = hash | (1ull << 31); 307 }); 308 } 309 310 llvm::stable_sort( 311 icfInputs, [](const ConcatInputSection *a, const ConcatInputSection *b) { 312 return a->icfEqClass[0] < b->icfEqClass[0]; 313 }); 314 forEachClass([&](size_t begin, size_t end) { 315 segregate(begin, end, &ICF::equalsConstant); 316 }); 317 318 // Split equivalence groups by comparing relocations until convergence 319 do { 320 icfRepeat = false; 321 forEachClass([&](size_t begin, size_t end) { 322 segregate(begin, end, &ICF::equalsVariable); 323 }); 324 } while (icfRepeat); 325 log("ICF needed " + Twine(icfPass) + " iterations"); 326 if (verboseDiagnostics) { 327 log("equalsConstant() called " + Twine(equalsConstantCount) + " times"); 328 log("equalsVariable() called " + Twine(equalsVariableCount) + " times"); 329 } 330 331 // Fold sections within equivalence classes 332 forEachClass([&](size_t begin, size_t end) { 333 if (end - begin < 2) 334 return; 335 ConcatInputSection *beginIsec = icfInputs[begin]; 336 for (size_t i = begin + 1; i < end; ++i) 337 beginIsec->foldIdentical(icfInputs[i]); 338 }); 339 } 340 341 // Split an equivalence class into smaller classes. 342 void ICF::segregate(size_t begin, size_t end, EqualsFn equals) { 343 while (begin < end) { 344 // Divide [begin, end) into two. Let mid be the start index of the 345 // second group. 346 auto bound = std::stable_partition( 347 icfInputs.begin() + begin + 1, icfInputs.begin() + end, 348 [&](ConcatInputSection *isec) { 349 return (this->*equals)(icfInputs[begin], isec); 350 }); 351 size_t mid = bound - icfInputs.begin(); 352 353 // Split [begin, end) into [begin, mid) and [mid, end). We use mid as an 354 // equivalence class ID because every group ends with a unique index. 355 for (size_t i = begin; i < mid; ++i) 356 icfInputs[i]->icfEqClass[(icfPass + 1) % 2] = mid; 357 358 // If we created a group, we need to iterate the main loop again. 359 if (mid != end) 360 icfRepeat = true; 361 362 begin = mid; 363 } 364 } 365 366 void macho::foldIdenticalSections() { 367 TimeTraceScope timeScope("Fold Identical Code Sections"); 368 // The ICF equivalence-class segregation algorithm relies on pre-computed 369 // hashes of InputSection::data for the ConcatOutputSection::inputs and all 370 // sections referenced by their relocs. We could recursively traverse the 371 // relocs to find every referenced InputSection, but that precludes easy 372 // parallelization. Therefore, we hash every InputSection here where we have 373 // them all accessible as simple vectors. 374 375 // If an InputSection is ineligible for ICF, we give it a unique ID to force 376 // it into an unfoldable singleton equivalence class. Begin the unique-ID 377 // space at inputSections.size(), so that it will never intersect with 378 // equivalence-class IDs which begin at 0. Since hashes & unique IDs never 379 // coexist with equivalence-class IDs, this is not necessary, but might help 380 // someone keep the numbers straight in case we ever need to debug the 381 // ICF::segregate() 382 std::vector<ConcatInputSection *> hashable; 383 uint64_t icfUniqueID = inputSections.size(); 384 for (ConcatInputSection *isec : inputSections) { 385 // FIXME: consider non-code __text sections as hashable? 386 bool isHashable = (isCodeSection(isec) || isCfStringSection(isec) || 387 isClassRefsSection(isec)) && 388 !isec->shouldOmitFromOutput() && 389 sectionType(isec->getFlags()) == MachO::S_REGULAR; 390 if (isHashable) { 391 hashable.push_back(isec); 392 for (Defined *d : isec->symbols) 393 if (d->unwindEntry) 394 hashable.push_back(d->unwindEntry); 395 396 // __cfstring has embedded addends that foil ICF's hashing / equality 397 // checks. (We can ignore embedded addends when doing ICF because the same 398 // information gets recorded in our Reloc structs.) We therefore create a 399 // mutable copy of the CFString and zero out the embedded addends before 400 // performing any hashing / equality checks. 401 if (isCfStringSection(isec) || isClassRefsSection(isec)) { 402 // We have to do this copying serially as the BumpPtrAllocator is not 403 // thread-safe. FIXME: Make a thread-safe allocator. 404 MutableArrayRef<uint8_t> copy = isec->data.copy(bAlloc()); 405 for (const Reloc &r : isec->relocs) 406 target->relocateOne(copy.data() + r.offset, r, /*va=*/0, 407 /*relocVA=*/0); 408 isec->data = copy; 409 } 410 } else { 411 isec->icfEqClass[0] = ++icfUniqueID; 412 } 413 } 414 parallelForEach(hashable, [](ConcatInputSection *isec) { 415 assert(isec->icfEqClass[0] == 0); // don't overwrite a unique ID! 416 // Turn-on the top bit to guarantee that valid hashes have no collisions 417 // with the small-integer unique IDs for ICF-ineligible sections 418 isec->icfEqClass[0] = xxHash64(isec->data) | (1ull << 31); 419 }); 420 // Now that every input section is either hashed or marked as unique, run the 421 // segregation algorithm to detect foldable subsections. 422 ICF(hashable).run(); 423 } 424