1 //===- ICF.cpp ------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ICF.h" 10 #include "ConcatOutputSection.h" 11 #include "InputSection.h" 12 #include "Symbols.h" 13 #include "UnwindInfoSection.h" 14 15 #include "lld/Common/CommonLinkerContext.h" 16 #include "llvm/Support/Parallel.h" 17 #include "llvm/Support/TimeProfiler.h" 18 #include "llvm/Support/xxhash.h" 19 20 #include <atomic> 21 22 using namespace llvm; 23 using namespace lld; 24 using namespace lld::macho; 25 26 class ICF { 27 public: 28 ICF(std::vector<ConcatInputSection *> &inputs); 29 30 void run(); 31 void segregate(size_t begin, size_t end, 32 std::function<bool(const ConcatInputSection *, 33 const ConcatInputSection *)> 34 equals); 35 size_t findBoundary(size_t begin, size_t end); 36 void forEachClassRange(size_t begin, size_t end, 37 std::function<void(size_t, size_t)> func); 38 void forEachClass(std::function<void(size_t, size_t)> func); 39 40 // ICF needs a copy of the inputs vector because its equivalence-class 41 // segregation algorithm destroys the proper sequence. 42 std::vector<ConcatInputSection *> icfInputs; 43 }; 44 45 ICF::ICF(std::vector<ConcatInputSection *> &inputs) { 46 icfInputs.assign(inputs.begin(), inputs.end()); 47 } 48 49 // ICF = Identical Code Folding 50 // 51 // We only fold __TEXT,__text, so this is really "code" folding, and not 52 // "COMDAT" folding. String and scalar constant literals are deduplicated 53 // elsewhere. 54 // 55 // Summary of segments & sections: 56 // 57 // The __TEXT segment is readonly at the MMU. Some sections are already 58 // deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are 59 // synthetic and inherently free of duplicates (__TEXT,__stubs & 60 // __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const, 61 // because doing so induces many test failures. 62 // 63 // The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and 64 // thus ineligible for ICF. 65 // 66 // The __DATA_CONST segment is read/write at the MMU, but is logically const to 67 // the application after dyld applies fixups to pointer data. We currently 68 // fold only the __DATA_CONST,__cfstring section. 69 // 70 // The __DATA segment is read/write at the MMU, and as application-writeable 71 // data, none of its sections are eligible for ICF. 72 // 73 // Please see the large block comment in lld/ELF/ICF.cpp for an explanation 74 // of the segregation algorithm. 75 // 76 // FIXME(gkm): implement keep-unique attributes 77 // FIXME(gkm): implement address-significance tables for MachO object files 78 79 static unsigned icfPass = 0; 80 static std::atomic<bool> icfRepeat{false}; 81 82 // Compare "non-moving" parts of two ConcatInputSections, namely everything 83 // except references to other ConcatInputSections. 84 static bool equalsConstant(const ConcatInputSection *ia, 85 const ConcatInputSection *ib) { 86 // We can only fold within the same OutputSection. 87 if (ia->parent != ib->parent) 88 return false; 89 if (ia->data.size() != ib->data.size()) 90 return false; 91 if (ia->data != ib->data) 92 return false; 93 if (ia->relocs.size() != ib->relocs.size()) 94 return false; 95 auto f = [](const Reloc &ra, const Reloc &rb) { 96 if (ra.type != rb.type) 97 return false; 98 if (ra.pcrel != rb.pcrel) 99 return false; 100 if (ra.length != rb.length) 101 return false; 102 if (ra.offset != rb.offset) 103 return false; 104 if (ra.addend != rb.addend) 105 return false; 106 if (ra.referent.is<Symbol *>() != rb.referent.is<Symbol *>()) 107 return false; 108 109 InputSection *isecA, *isecB; 110 111 uint64_t valueA = 0; 112 uint64_t valueB = 0; 113 if (ra.referent.is<Symbol *>()) { 114 const auto *sa = ra.referent.get<Symbol *>(); 115 const auto *sb = rb.referent.get<Symbol *>(); 116 if (sa->kind() != sb->kind()) 117 return false; 118 if (!isa<Defined>(sa)) { 119 // ICF runs before Undefineds are reported. 120 assert(isa<DylibSymbol>(sa) || isa<Undefined>(sa)); 121 return sa == sb; 122 } 123 const auto *da = cast<Defined>(sa); 124 const auto *db = cast<Defined>(sb); 125 if (!da->isec || !db->isec) { 126 assert(da->isAbsolute() && db->isAbsolute()); 127 return da->value == db->value; 128 } 129 isecA = da->isec; 130 valueA = da->value; 131 isecB = db->isec; 132 valueB = db->value; 133 } else { 134 isecA = ra.referent.get<InputSection *>(); 135 isecB = rb.referent.get<InputSection *>(); 136 } 137 138 if (isecA->parent != isecB->parent) 139 return false; 140 // Sections with identical parents should be of the same kind. 141 assert(isecA->kind() == isecB->kind()); 142 // We will compare ConcatInputSection contents in equalsVariable. 143 if (isa<ConcatInputSection>(isecA)) 144 return true; 145 // Else we have two literal sections. References to them are equal iff their 146 // offsets in the output section are equal. 147 return isecA->getOffset(valueA + ra.addend) == 148 isecB->getOffset(valueB + rb.addend); 149 }; 150 return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), 151 f); 152 } 153 154 // Compare the "moving" parts of two ConcatInputSections -- i.e. everything not 155 // handled by equalsConstant(). 156 static bool equalsVariable(const ConcatInputSection *ia, 157 const ConcatInputSection *ib) { 158 assert(ia->relocs.size() == ib->relocs.size()); 159 auto f = [](const Reloc &ra, const Reloc &rb) { 160 // We already filtered out mismatching values/addends in equalsConstant. 161 if (ra.referent == rb.referent) 162 return true; 163 const ConcatInputSection *isecA, *isecB; 164 if (ra.referent.is<Symbol *>()) { 165 // Matching DylibSymbols are already filtered out by the 166 // identical-referent check above. Non-matching DylibSymbols were filtered 167 // out in equalsConstant(). So we can safely cast to Defined here. 168 const auto *da = cast<Defined>(ra.referent.get<Symbol *>()); 169 const auto *db = cast<Defined>(rb.referent.get<Symbol *>()); 170 if (da->isAbsolute()) 171 return true; 172 isecA = dyn_cast<ConcatInputSection>(da->isec); 173 if (!isecA) 174 return true; // literal sections were checked in equalsConstant. 175 isecB = cast<ConcatInputSection>(db->isec); 176 } else { 177 const auto *sa = ra.referent.get<InputSection *>(); 178 const auto *sb = rb.referent.get<InputSection *>(); 179 isecA = dyn_cast<ConcatInputSection>(sa); 180 if (!isecA) 181 return true; 182 isecB = cast<ConcatInputSection>(sb); 183 } 184 return isecA->icfEqClass[icfPass % 2] == isecB->icfEqClass[icfPass % 2]; 185 }; 186 if (!std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), f)) 187 return false; 188 189 // If there are symbols with associated unwind info, check that the unwind 190 // info matches. For simplicity, we only handle the case where there are only 191 // symbols at offset zero within the section (which is typically the case with 192 // .subsections_via_symbols.) 193 auto hasCU = [](Defined *d) { return d->unwindEntry != nullptr; }; 194 auto itA = std::find_if(ia->symbols.begin(), ia->symbols.end(), hasCU); 195 auto itB = std::find_if(ib->symbols.begin(), ib->symbols.end(), hasCU); 196 if (itA == ia->symbols.end()) 197 return itB == ib->symbols.end(); 198 if (itB == ib->symbols.end()) 199 return false; 200 const Defined *da = *itA; 201 const Defined *db = *itB; 202 if (da->unwindEntry->icfEqClass[icfPass % 2] != 203 db->unwindEntry->icfEqClass[icfPass % 2] || 204 da->value != 0 || db->value != 0) 205 return false; 206 auto isZero = [](Defined *d) { return d->value == 0; }; 207 return std::find_if_not(std::next(itA), ia->symbols.end(), isZero) == 208 ia->symbols.end() && 209 std::find_if_not(std::next(itB), ib->symbols.end(), isZero) == 210 ib->symbols.end(); 211 } 212 213 // Find the first InputSection after BEGIN whose equivalence class differs 214 size_t ICF::findBoundary(size_t begin, size_t end) { 215 uint64_t beginHash = icfInputs[begin]->icfEqClass[icfPass % 2]; 216 for (size_t i = begin + 1; i < end; ++i) 217 if (beginHash != icfInputs[i]->icfEqClass[icfPass % 2]) 218 return i; 219 return end; 220 } 221 222 // Invoke FUNC on subranges with matching equivalence class 223 void ICF::forEachClassRange(size_t begin, size_t end, 224 std::function<void(size_t, size_t)> func) { 225 while (begin < end) { 226 size_t mid = findBoundary(begin, end); 227 func(begin, mid); 228 begin = mid; 229 } 230 } 231 232 // Split icfInputs into shards, then parallelize invocation of FUNC on subranges 233 // with matching equivalence class 234 void ICF::forEachClass(std::function<void(size_t, size_t)> func) { 235 // Only use threads when the benefits outweigh the overhead. 236 const size_t threadingThreshold = 1024; 237 if (icfInputs.size() < threadingThreshold) { 238 forEachClassRange(0, icfInputs.size(), func); 239 ++icfPass; 240 return; 241 } 242 243 // Shard into non-overlapping intervals, and call FUNC in parallel. The 244 // sharding must be completed before any calls to FUNC are made so that FUNC 245 // can modify the InputSection in its shard without causing data races. 246 const size_t shards = 256; 247 size_t step = icfInputs.size() / shards; 248 size_t boundaries[shards + 1]; 249 boundaries[0] = 0; 250 boundaries[shards] = icfInputs.size(); 251 parallelForEachN(1, shards, [&](size_t i) { 252 boundaries[i] = findBoundary((i - 1) * step, icfInputs.size()); 253 }); 254 parallelForEachN(1, shards + 1, [&](size_t i) { 255 if (boundaries[i - 1] < boundaries[i]) { 256 forEachClassRange(boundaries[i - 1], boundaries[i], func); 257 } 258 }); 259 ++icfPass; 260 } 261 262 void ICF::run() { 263 // Into each origin-section hash, combine all reloc referent section hashes. 264 for (icfPass = 0; icfPass < 2; ++icfPass) { 265 parallelForEach(icfInputs, [&](ConcatInputSection *isec) { 266 uint64_t hash = isec->icfEqClass[icfPass % 2]; 267 for (const Reloc &r : isec->relocs) { 268 if (auto *sym = r.referent.dyn_cast<Symbol *>()) { 269 if (auto *dylibSym = dyn_cast<DylibSymbol>(sym)) 270 hash += dylibSym->stubsHelperIndex; 271 else if (auto *defined = dyn_cast<Defined>(sym)) { 272 if (defined->isec) { 273 if (auto isec = dyn_cast<ConcatInputSection>(defined->isec)) 274 hash += defined->value + isec->icfEqClass[icfPass % 2]; 275 else 276 hash += defined->isec->kind() + 277 defined->isec->getOffset(defined->value); 278 } else { 279 hash += defined->value; 280 } 281 } else if (!isa<Undefined>(sym)) // ICF runs before Undefined diags. 282 llvm_unreachable("foldIdenticalSections symbol kind"); 283 } 284 } 285 // Set MSB to 1 to avoid collisions with non-hashed classes. 286 isec->icfEqClass[(icfPass + 1) % 2] = hash | (1ull << 63); 287 }); 288 } 289 290 llvm::stable_sort( 291 icfInputs, [](const ConcatInputSection *a, const ConcatInputSection *b) { 292 return a->icfEqClass[0] < b->icfEqClass[0]; 293 }); 294 forEachClass( 295 [&](size_t begin, size_t end) { segregate(begin, end, equalsConstant); }); 296 297 // Split equivalence groups by comparing relocations until convergence 298 do { 299 icfRepeat = false; 300 forEachClass([&](size_t begin, size_t end) { 301 segregate(begin, end, equalsVariable); 302 }); 303 } while (icfRepeat); 304 log("ICF needed " + Twine(icfPass) + " iterations"); 305 306 // Fold sections within equivalence classes 307 forEachClass([&](size_t begin, size_t end) { 308 if (end - begin < 2) 309 return; 310 ConcatInputSection *beginIsec = icfInputs[begin]; 311 for (size_t i = begin + 1; i < end; ++i) 312 beginIsec->foldIdentical(icfInputs[i]); 313 }); 314 } 315 316 // Split an equivalence class into smaller classes. 317 void ICF::segregate( 318 size_t begin, size_t end, 319 std::function<bool(const ConcatInputSection *, const ConcatInputSection *)> 320 equals) { 321 while (begin < end) { 322 // Divide [begin, end) into two. Let mid be the start index of the 323 // second group. 324 auto bound = std::stable_partition(icfInputs.begin() + begin + 1, 325 icfInputs.begin() + end, 326 [&](ConcatInputSection *isec) { 327 return equals(icfInputs[begin], isec); 328 }); 329 size_t mid = bound - icfInputs.begin(); 330 331 // Split [begin, end) into [begin, mid) and [mid, end). We use mid as an 332 // equivalence class ID because every group ends with a unique index. 333 for (size_t i = begin; i < mid; ++i) 334 icfInputs[i]->icfEqClass[(icfPass + 1) % 2] = mid; 335 336 // If we created a group, we need to iterate the main loop again. 337 if (mid != end) 338 icfRepeat = true; 339 340 begin = mid; 341 } 342 } 343 344 void macho::foldIdenticalSections() { 345 TimeTraceScope timeScope("Fold Identical Code Sections"); 346 // The ICF equivalence-class segregation algorithm relies on pre-computed 347 // hashes of InputSection::data for the ConcatOutputSection::inputs and all 348 // sections referenced by their relocs. We could recursively traverse the 349 // relocs to find every referenced InputSection, but that precludes easy 350 // parallelization. Therefore, we hash every InputSection here where we have 351 // them all accessible as simple vectors. 352 353 // If an InputSection is ineligible for ICF, we give it a unique ID to force 354 // it into an unfoldable singleton equivalence class. Begin the unique-ID 355 // space at inputSections.size(), so that it will never intersect with 356 // equivalence-class IDs which begin at 0. Since hashes & unique IDs never 357 // coexist with equivalence-class IDs, this is not necessary, but might help 358 // someone keep the numbers straight in case we ever need to debug the 359 // ICF::segregate() 360 std::vector<ConcatInputSection *> hashable; 361 uint64_t icfUniqueID = inputSections.size(); 362 for (ConcatInputSection *isec : inputSections) { 363 // FIXME: consider non-code __text sections as hashable? 364 bool isHashable = (isCodeSection(isec) || isCfStringSection(isec)) && 365 !isec->shouldOmitFromOutput() && 366 sectionType(isec->getFlags()) == MachO::S_REGULAR; 367 if (isHashable) { 368 hashable.push_back(isec); 369 for (Defined *d : isec->symbols) 370 if (d->unwindEntry) 371 hashable.push_back(d->unwindEntry); 372 } else { 373 isec->icfEqClass[0] = ++icfUniqueID; 374 } 375 } 376 parallelForEach(hashable, [](ConcatInputSection *isec) { 377 assert(isec->icfEqClass[0] == 0); // don't overwrite a unique ID! 378 // Turn-on the top bit to guarantee that valid hashes have no collisions 379 // with the small-integer unique IDs for ICF-ineligible sections 380 isec->icfEqClass[0] = xxHash64(isec->data) | (1ull << 63); 381 }); 382 // Now that every input section is either hashed or marked as unique, run the 383 // segregation algorithm to detect foldable subsections. 384 ICF(hashable).run(); 385 } 386