1 //===- ICF.cpp ------------------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // ICF is short for Identical Code Folding. This is a size optimization to 11 // identify and merge two or more read-only sections (typically functions) 12 // that happened to have the same contents. It usually reduces output size 13 // by a few percent. 14 // 15 // In ICF, two sections are considered identical if they have the same 16 // section flags, section data, and relocations. Relocations are tricky, 17 // because two relocations are considered the same if they have the same 18 // relocation types, values, and if they point to the same sections *in 19 // terms of ICF*. 20 // 21 // Here is an example. If foo and bar defined below are compiled to the 22 // same machine instructions, ICF can and should merge the two, although 23 // their relocations point to each other. 24 // 25 // void foo() { bar(); } 26 // void bar() { foo(); } 27 // 28 // If you merge the two, their relocations point to the same section and 29 // thus you know they are mergeable, but how do you know they are 30 // mergeable in the first place? This is not an easy problem to solve. 31 // 32 // What we are doing in LLD is to partition sections into equivalence 33 // classes. Sections in the same equivalence class when the algorithm 34 // terminates are considered identical. Here are details: 35 // 36 // 1. First, we partition sections using their hash values as keys. Hash 37 // values contain section types, section contents and numbers of 38 // relocations. During this step, relocation targets are not taken into 39 // account. We just put sections that apparently differ into different 40 // equivalence classes. 41 // 42 // 2. Next, for each equivalence class, we visit sections to compare 43 // relocation targets. Relocation targets are considered equivalent if 44 // their targets are in the same equivalence class. Sections with 45 // different relocation targets are put into different equivalence 46 // clases. 47 // 48 // 3. If we split an equivalence class in step 2, two relocations 49 // previously target the same equivalence class may now target 50 // different equivalence classes. Therefore, we repeat step 2 until a 51 // convergence is obtained. 52 // 53 // 4. For each equivalence class C, pick an arbitrary section in C, and 54 // merge all the other sections in C with it. 55 // 56 // For small programs, this algorithm needs 3-5 iterations. For large 57 // programs such as Chromium, it takes more than 20 iterations. 58 // 59 // This algorithm was mentioned as an "optimistic algorithm" in [1], 60 // though gold implements a different algorithm than this. 61 // 62 // We parallelize each step so that multiple threads can work on different 63 // equivalence classes concurrently. That gave us a large performance 64 // boost when applying ICF on large programs. For example, MSVC link.exe 65 // or GNU gold takes 10-20 seconds to apply ICF on Chromium, whose output 66 // size is about 1.5 GB, but LLD can finish it in less than 2 seconds on a 67 // 2.8 GHz 40 core machine. Even without threading, LLD's ICF is still 68 // faster than MSVC or gold though. 69 // 70 // [1] Safe ICF: Pointer Safe and Unwinding aware Identical Code Folding 71 // in the Gold Linker 72 // http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/36912.pdf 73 // 74 //===----------------------------------------------------------------------===// 75 76 #include "ICF.h" 77 #include "Config.h" 78 #include "SymbolTable.h" 79 #include "Symbols.h" 80 #include "SyntheticSections.h" 81 #include "Writer.h" 82 #include "lld/Common/Threads.h" 83 #include "llvm/ADT/StringExtras.h" 84 #include "llvm/BinaryFormat/ELF.h" 85 #include "llvm/Object/ELF.h" 86 #include "llvm/Support/xxhash.h" 87 #include <algorithm> 88 #include <atomic> 89 90 using namespace lld; 91 using namespace lld::elf; 92 using namespace llvm; 93 using namespace llvm::ELF; 94 using namespace llvm::object; 95 96 namespace { 97 template <class ELFT> class ICF { 98 public: 99 void run(); 100 101 private: 102 void segregate(size_t Begin, size_t End, bool Constant); 103 104 template <class RelTy> 105 bool constantEq(const InputSection *A, ArrayRef<RelTy> RelsA, 106 const InputSection *B, ArrayRef<RelTy> RelsB); 107 108 template <class RelTy> 109 bool variableEq(const InputSection *A, ArrayRef<RelTy> RelsA, 110 const InputSection *B, ArrayRef<RelTy> RelsB); 111 112 bool equalsConstant(const InputSection *A, const InputSection *B); 113 bool equalsVariable(const InputSection *A, const InputSection *B); 114 115 size_t findBoundary(size_t Begin, size_t End); 116 117 void forEachClassRange(size_t Begin, size_t End, 118 llvm::function_ref<void(size_t, size_t)> Fn); 119 120 void forEachClass(llvm::function_ref<void(size_t, size_t)> Fn); 121 122 std::vector<InputSection *> Sections; 123 124 // We repeat the main loop while `Repeat` is true. 125 std::atomic<bool> Repeat; 126 127 // The main loop counter. 128 int Cnt = 0; 129 130 // We have two locations for equivalence classes. On the first iteration 131 // of the main loop, Class[0] has a valid value, and Class[1] contains 132 // garbage. We read equivalence classes from slot 0 and write to slot 1. 133 // So, Class[0] represents the current class, and Class[1] represents 134 // the next class. On each iteration, we switch their roles and use them 135 // alternately. 136 // 137 // Why are we doing this? Recall that other threads may be working on 138 // other equivalence classes in parallel. They may read sections that we 139 // are updating. We cannot update equivalence classes in place because 140 // it breaks the invariance that all possibly-identical sections must be 141 // in the same equivalence class at any moment. In other words, the for 142 // loop to update equivalence classes is not atomic, and that is 143 // observable from other threads. By writing new classes to other 144 // places, we can keep the invariance. 145 // 146 // Below, `Current` has the index of the current class, and `Next` has 147 // the index of the next class. If threading is enabled, they are either 148 // (0, 1) or (1, 0). 149 // 150 // Note on single-thread: if that's the case, they are always (0, 0) 151 // because we can safely read the next class without worrying about race 152 // conditions. Using the same location makes this algorithm converge 153 // faster because it uses results of the same iteration earlier. 154 int Current = 0; 155 int Next = 0; 156 }; 157 } 158 159 // Returns true if section S is subject of ICF. 160 static bool isEligible(InputSection *S) { 161 if (!S->Live || S->KeepUnique || !(S->Flags & SHF_ALLOC)) 162 return false; 163 164 // Don't merge writable sections. .data.rel.ro sections are marked as writable 165 // but are semantically read-only. 166 if ((S->Flags & SHF_WRITE) && S->Name != ".data.rel.ro" && 167 !S->Name.startswith(".data.rel.ro.")) 168 return false; 169 170 // SHF_LINK_ORDER sections are ICF'd as a unit with their dependent sections, 171 // so we don't consider them for ICF individually. 172 if (S->Flags & SHF_LINK_ORDER) 173 return false; 174 175 // Don't merge synthetic sections as their Data member is not valid and empty. 176 // The Data member needs to be valid for ICF as it is used by ICF to determine 177 // the equality of section contents. 178 if (isa<SyntheticSection>(S)) 179 return false; 180 181 // .init and .fini contains instructions that must be executed to initialize 182 // and finalize the process. They cannot and should not be merged. 183 if (S->Name == ".init" || S->Name == ".fini") 184 return false; 185 186 // A user program may enumerate sections named with a C identifier using 187 // __start_* and __stop_* symbols. We cannot ICF any such sections because 188 // that could change program semantics. 189 if (isValidCIdentifier(S->Name)) 190 return false; 191 192 return true; 193 } 194 195 // Split an equivalence class into smaller classes. 196 template <class ELFT> 197 void ICF<ELFT>::segregate(size_t Begin, size_t End, bool Constant) { 198 // This loop rearranges sections in [Begin, End) so that all sections 199 // that are equal in terms of equals{Constant,Variable} are contiguous 200 // in [Begin, End). 201 // 202 // The algorithm is quadratic in the worst case, but that is not an 203 // issue in practice because the number of the distinct sections in 204 // each range is usually very small. 205 206 while (Begin < End) { 207 // Divide [Begin, End) into two. Let Mid be the start index of the 208 // second group. 209 auto Bound = 210 std::stable_partition(Sections.begin() + Begin + 1, 211 Sections.begin() + End, [&](InputSection *S) { 212 if (Constant) 213 return equalsConstant(Sections[Begin], S); 214 return equalsVariable(Sections[Begin], S); 215 }); 216 size_t Mid = Bound - Sections.begin(); 217 218 // Now we split [Begin, End) into [Begin, Mid) and [Mid, End) by 219 // updating the sections in [Begin, Mid). We use Mid as an equivalence 220 // class ID because every group ends with a unique index. 221 for (size_t I = Begin; I < Mid; ++I) 222 Sections[I]->Class[Next] = Mid; 223 224 // If we created a group, we need to iterate the main loop again. 225 if (Mid != End) 226 Repeat = true; 227 228 Begin = Mid; 229 } 230 } 231 232 // Compare two lists of relocations. 233 template <class ELFT> 234 template <class RelTy> 235 bool ICF<ELFT>::constantEq(const InputSection *SecA, ArrayRef<RelTy> RA, 236 const InputSection *SecB, ArrayRef<RelTy> RB) { 237 for (size_t I = 0; I < RA.size(); ++I) { 238 if (RA[I].r_offset != RB[I].r_offset || 239 RA[I].getType(Config->IsMips64EL) != RB[I].getType(Config->IsMips64EL)) 240 return false; 241 242 uint64_t AddA = getAddend<ELFT>(RA[I]); 243 uint64_t AddB = getAddend<ELFT>(RB[I]); 244 245 Symbol &SA = SecA->template getFile<ELFT>()->getRelocTargetSym(RA[I]); 246 Symbol &SB = SecB->template getFile<ELFT>()->getRelocTargetSym(RB[I]); 247 if (&SA == &SB) { 248 if (AddA == AddB) 249 continue; 250 return false; 251 } 252 253 auto *DA = dyn_cast<Defined>(&SA); 254 auto *DB = dyn_cast<Defined>(&SB); 255 256 // Placeholder symbols generated by linker scripts look the same now but 257 // may have different values later. 258 if (!DA || !DB || DA->ScriptDefined || DB->ScriptDefined) 259 return false; 260 261 // Relocations referring to absolute symbols are constant-equal if their 262 // values are equal. 263 if (!DA->Section && !DB->Section && DA->Value + AddA == DB->Value + AddB) 264 continue; 265 if (!DA->Section || !DB->Section) 266 return false; 267 268 if (DA->Section->kind() != DB->Section->kind()) 269 return false; 270 271 // Relocations referring to InputSections are constant-equal if their 272 // section offsets are equal. 273 if (isa<InputSection>(DA->Section)) { 274 if (DA->Value + AddA == DB->Value + AddB) 275 continue; 276 return false; 277 } 278 279 // Relocations referring to MergeInputSections are constant-equal if their 280 // offsets in the output section are equal. 281 auto *X = dyn_cast<MergeInputSection>(DA->Section); 282 if (!X) 283 return false; 284 auto *Y = cast<MergeInputSection>(DB->Section); 285 if (X->getParent() != Y->getParent()) 286 return false; 287 288 uint64_t OffsetA = 289 SA.isSection() ? X->getOffset(AddA) : X->getOffset(DA->Value) + AddA; 290 uint64_t OffsetB = 291 SB.isSection() ? Y->getOffset(AddB) : Y->getOffset(DB->Value) + AddB; 292 if (OffsetA != OffsetB) 293 return false; 294 } 295 296 return true; 297 } 298 299 // Compare "non-moving" part of two InputSections, namely everything 300 // except relocation targets. 301 template <class ELFT> 302 bool ICF<ELFT>::equalsConstant(const InputSection *A, const InputSection *B) { 303 if (A->NumRelocations != B->NumRelocations || A->Flags != B->Flags || 304 A->getSize() != B->getSize() || A->data() != B->data()) 305 return false; 306 307 // If two sections have different output sections, we cannot merge them. 308 // FIXME: This doesn't do the right thing in the case where there is a linker 309 // script. We probably need to move output section assignment before ICF to 310 // get the correct behaviour here. 311 if (getOutputSectionName(A) != getOutputSectionName(B)) 312 return false; 313 314 if (A->AreRelocsRela) 315 return constantEq(A, A->template relas<ELFT>(), B, 316 B->template relas<ELFT>()); 317 return constantEq(A, A->template rels<ELFT>(), B, B->template rels<ELFT>()); 318 } 319 320 // Compare two lists of relocations. Returns true if all pairs of 321 // relocations point to the same section in terms of ICF. 322 template <class ELFT> 323 template <class RelTy> 324 bool ICF<ELFT>::variableEq(const InputSection *SecA, ArrayRef<RelTy> RA, 325 const InputSection *SecB, ArrayRef<RelTy> RB) { 326 assert(RA.size() == RB.size()); 327 328 for (size_t I = 0; I < RA.size(); ++I) { 329 // The two sections must be identical. 330 Symbol &SA = SecA->template getFile<ELFT>()->getRelocTargetSym(RA[I]); 331 Symbol &SB = SecB->template getFile<ELFT>()->getRelocTargetSym(RB[I]); 332 if (&SA == &SB) 333 continue; 334 335 auto *DA = cast<Defined>(&SA); 336 auto *DB = cast<Defined>(&SB); 337 338 // We already dealt with absolute and non-InputSection symbols in 339 // constantEq, and for InputSections we have already checked everything 340 // except the equivalence class. 341 if (!DA->Section) 342 continue; 343 auto *X = dyn_cast<InputSection>(DA->Section); 344 if (!X) 345 continue; 346 auto *Y = cast<InputSection>(DB->Section); 347 348 // Ineligible sections are in the special equivalence class 0. 349 // They can never be the same in terms of the equivalence class. 350 if (X->Class[Current] == 0) 351 return false; 352 if (X->Class[Current] != Y->Class[Current]) 353 return false; 354 }; 355 356 return true; 357 } 358 359 // Compare "moving" part of two InputSections, namely relocation targets. 360 template <class ELFT> 361 bool ICF<ELFT>::equalsVariable(const InputSection *A, const InputSection *B) { 362 if (A->AreRelocsRela) 363 return variableEq(A, A->template relas<ELFT>(), B, 364 B->template relas<ELFT>()); 365 return variableEq(A, A->template rels<ELFT>(), B, B->template rels<ELFT>()); 366 } 367 368 template <class ELFT> size_t ICF<ELFT>::findBoundary(size_t Begin, size_t End) { 369 uint32_t Class = Sections[Begin]->Class[Current]; 370 for (size_t I = Begin + 1; I < End; ++I) 371 if (Class != Sections[I]->Class[Current]) 372 return I; 373 return End; 374 } 375 376 // Sections in the same equivalence class are contiguous in Sections 377 // vector. Therefore, Sections vector can be considered as contiguous 378 // groups of sections, grouped by the class. 379 // 380 // This function calls Fn on every group within [Begin, End). 381 template <class ELFT> 382 void ICF<ELFT>::forEachClassRange(size_t Begin, size_t End, 383 llvm::function_ref<void(size_t, size_t)> Fn) { 384 while (Begin < End) { 385 size_t Mid = findBoundary(Begin, End); 386 Fn(Begin, Mid); 387 Begin = Mid; 388 } 389 } 390 391 // Call Fn on each equivalence class. 392 template <class ELFT> 393 void ICF<ELFT>::forEachClass(llvm::function_ref<void(size_t, size_t)> Fn) { 394 // If threading is disabled or the number of sections are 395 // too small to use threading, call Fn sequentially. 396 if (!ThreadsEnabled || Sections.size() < 1024) { 397 forEachClassRange(0, Sections.size(), Fn); 398 ++Cnt; 399 return; 400 } 401 402 Current = Cnt % 2; 403 Next = (Cnt + 1) % 2; 404 405 // Shard into non-overlapping intervals, and call Fn in parallel. 406 // The sharding must be completed before any calls to Fn are made 407 // so that Fn can modify the Chunks in its shard without causing data 408 // races. 409 const size_t NumShards = 256; 410 size_t Step = Sections.size() / NumShards; 411 size_t Boundaries[NumShards + 1]; 412 Boundaries[0] = 0; 413 Boundaries[NumShards] = Sections.size(); 414 415 parallelForEachN(1, NumShards, [&](size_t I) { 416 Boundaries[I] = findBoundary((I - 1) * Step, Sections.size()); 417 }); 418 419 parallelForEachN(1, NumShards + 1, [&](size_t I) { 420 if (Boundaries[I - 1] < Boundaries[I]) 421 forEachClassRange(Boundaries[I - 1], Boundaries[I], Fn); 422 }); 423 ++Cnt; 424 } 425 426 static void print(const Twine &S) { 427 if (Config->PrintIcfSections) 428 message(S); 429 } 430 431 // The main function of ICF. 432 template <class ELFT> void ICF<ELFT>::run() { 433 // Collect sections to merge. 434 for (InputSectionBase *Sec : InputSections) 435 if (auto *S = dyn_cast<InputSection>(Sec)) 436 if (isEligible(S)) 437 Sections.push_back(S); 438 439 // Initially, we use hash values to partition sections. 440 parallelForEach(Sections, [&](InputSection *S) { 441 // Set MSB to 1 to avoid collisions with non-hash IDs. 442 S->Class[0] = xxHash64(S->data()) | (1U << 31); 443 }); 444 445 // From now on, sections in Sections vector are ordered so that sections 446 // in the same equivalence class are consecutive in the vector. 447 std::stable_sort(Sections.begin(), Sections.end(), 448 [](InputSection *A, InputSection *B) { 449 return A->Class[0] < B->Class[0]; 450 }); 451 452 // Compare static contents and assign unique IDs for each static content. 453 forEachClass([&](size_t Begin, size_t End) { segregate(Begin, End, true); }); 454 455 // Split groups by comparing relocations until convergence is obtained. 456 do { 457 Repeat = false; 458 forEachClass( 459 [&](size_t Begin, size_t End) { segregate(Begin, End, false); }); 460 } while (Repeat); 461 462 log("ICF needed " + Twine(Cnt) + " iterations"); 463 464 // Merge sections by the equivalence class. 465 forEachClassRange(0, Sections.size(), [&](size_t Begin, size_t End) { 466 if (End - Begin == 1) 467 return; 468 print("selected section " + toString(Sections[Begin])); 469 for (size_t I = Begin + 1; I < End; ++I) { 470 print(" removing identical section " + toString(Sections[I])); 471 Sections[Begin]->replace(Sections[I]); 472 473 // At this point we know sections merged are fully identical and hence 474 // we want to remove duplicate implicit dependencies such as link order 475 // and relocation sections. 476 for (InputSection *IS : Sections[I]->DependentSections) 477 IS->Live = false; 478 } 479 }); 480 } 481 482 // ICF entry point function. 483 template <class ELFT> void elf::doIcf() { ICF<ELFT>().run(); } 484 485 template void elf::doIcf<ELF32LE>(); 486 template void elf::doIcf<ELF32BE>(); 487 template void elf::doIcf<ELF64LE>(); 488 template void elf::doIcf<ELF64BE>(); 489