1 //===- ICF.cpp ------------------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Identical COMDAT Folding is a feature to merge COMDAT sections not by 11 // name (which is regular COMDAT handling) but by contents. If two COMDAT 12 // sections have the same data, relocations, attributes, etc., then the two 13 // are considered identical and merged by the linker. This optimization 14 // makes outputs smaller. 15 // 16 // ICF is theoretically a problem of reducing graphs by merging as many 17 // identical subgraphs as possible, if we consider sections as vertices and 18 // relocations as edges. This may be a bit more complicated problem than you 19 // might think. The order of processing sections matters since merging two 20 // sections can make other sections, whose relocations now point to the same 21 // section, mergeable. Graphs may contain cycles, which is common in COFF. 22 // We need a sophisticated algorithm to do this properly and efficiently. 23 // 24 // What we do in this file is this. We split sections into groups. Sections 25 // in the same group are considered identical. 26 // 27 // First, all sections are grouped by their "constant" values. Constant 28 // values are values that are never changed by ICF, such as section contents, 29 // section name, number of relocations, type and offset of each relocation, 30 // etc. Because we do not care about some relocation targets in this step, 31 // two sections in the same group may not be identical, but at least two 32 // sections in different groups can never be identical. 33 // 34 // Then, we try to split each group by relocation targets. Relocations are 35 // considered identical if and only if the relocation targets are in the 36 // same group. Splitting a group may make more groups to be splittable, 37 // because two relocations that were previously considered identical might 38 // now point to different groups. We repeat this step until the convergence 39 // is obtained. 40 // 41 // This algorithm is so-called "optimistic" algorithm described in 42 // http://research.google.com/pubs/pub36912.html. 43 // 44 //===----------------------------------------------------------------------===// 45 46 #include "Chunks.h" 47 #include "Symbols.h" 48 #include "lld/Core/Parallel.h" 49 #include "llvm/ADT/Hashing.h" 50 #include "llvm/Support/Debug.h" 51 #include "llvm/Support/raw_ostream.h" 52 #include <algorithm> 53 #include <atomic> 54 #include <vector> 55 56 using namespace llvm; 57 58 namespace lld { 59 namespace coff { 60 61 typedef std::vector<SectionChunk *>::iterator ChunkIterator; 62 typedef bool (*Comparator)(const SectionChunk *, const SectionChunk *); 63 64 class ICF { 65 public: 66 void run(const std::vector<Chunk *> &V); 67 68 private: 69 static uint64_t getHash(SectionChunk *C); 70 static bool equalsConstant(const SectionChunk *A, const SectionChunk *B); 71 static bool equalsVariable(const SectionChunk *A, const SectionChunk *B); 72 bool forEachGroup(std::vector<SectionChunk *> &Chunks, Comparator Eq); 73 bool partition(ChunkIterator Begin, ChunkIterator End, Comparator Eq); 74 75 std::atomic<uint64_t> NextID = { 1 }; 76 }; 77 78 // Entry point to ICF. 79 void doICF(const std::vector<Chunk *> &Chunks) { 80 ICF().run(Chunks); 81 } 82 83 uint64_t ICF::getHash(SectionChunk *C) { 84 return hash_combine(C->getPermissions(), 85 hash_value(C->SectionName), 86 C->NumRelocs, 87 C->getAlign(), 88 uint32_t(C->Header->SizeOfRawData), 89 C->Checksum); 90 } 91 92 bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) { 93 if (A->AssocChildren.size() != B->AssocChildren.size() || 94 A->NumRelocs != B->NumRelocs) { 95 return false; 96 } 97 98 // Compare associative sections. 99 for (size_t I = 0, E = A->AssocChildren.size(); I != E; ++I) 100 if (A->AssocChildren[I]->GroupID != B->AssocChildren[I]->GroupID) 101 return false; 102 103 // Compare relocations. 104 auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) { 105 if (R1.Type != R2.Type || 106 R1.VirtualAddress != R2.VirtualAddress) { 107 return false; 108 } 109 SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex)->repl(); 110 SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex)->repl(); 111 if (B1 == B2) 112 return true; 113 if (auto *D1 = dyn_cast<DefinedRegular>(B1)) 114 if (auto *D2 = dyn_cast<DefinedRegular>(B2)) 115 return D1->getValue() == D2->getValue() && 116 D1->getChunk()->GroupID == D2->getChunk()->GroupID; 117 return false; 118 }; 119 if (!std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq)) 120 return false; 121 122 // Compare section attributes and contents. 123 return A->getPermissions() == B->getPermissions() && 124 A->SectionName == B->SectionName && 125 A->getAlign() == B->getAlign() && 126 A->Header->SizeOfRawData == B->Header->SizeOfRawData && 127 A->Checksum == B->Checksum && 128 A->getContents() == B->getContents(); 129 } 130 131 bool ICF::equalsVariable(const SectionChunk *A, const SectionChunk *B) { 132 // Compare associative sections. 133 for (size_t I = 0, E = A->AssocChildren.size(); I != E; ++I) 134 if (A->AssocChildren[I]->GroupID != B->AssocChildren[I]->GroupID) 135 return false; 136 137 // Compare relocations. 138 auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) { 139 SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex)->repl(); 140 SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex)->repl(); 141 if (B1 == B2) 142 return true; 143 if (auto *D1 = dyn_cast<DefinedRegular>(B1)) 144 if (auto *D2 = dyn_cast<DefinedRegular>(B2)) 145 return D1->getChunk()->GroupID == D2->getChunk()->GroupID; 146 return false; 147 }; 148 return std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq); 149 } 150 151 bool ICF::partition(ChunkIterator Begin, ChunkIterator End, Comparator Eq) { 152 bool R = false; 153 for (auto It = Begin;;) { 154 SectionChunk *Head = *It; 155 auto Bound = std::partition(It + 1, End, [&](SectionChunk *SC) { 156 return Eq(Head, SC); 157 }); 158 if (Bound == End) 159 return R; 160 uint64_t ID = NextID++; 161 std::for_each(It, Bound, [&](SectionChunk *SC) { SC->GroupID = ID; }); 162 It = Bound; 163 R = true; 164 } 165 } 166 167 bool ICF::forEachGroup(std::vector<SectionChunk *> &Chunks, Comparator Eq) { 168 bool R = false; 169 for (auto It = Chunks.begin(), End = Chunks.end(); It != End;) { 170 SectionChunk *Head = *It; 171 auto Bound = std::find_if(It + 1, End, [&](SectionChunk *SC) { 172 return SC->GroupID != Head->GroupID; 173 }); 174 if (partition(It, Bound, Eq)) 175 R = true; 176 It = Bound; 177 } 178 return R; 179 } 180 181 // Merge identical COMDAT sections. 182 // Two sections are considered the same if their section headers, 183 // contents and relocations are all the same. 184 void ICF::run(const std::vector<Chunk *> &Vec) { 185 // Collect only mergeable sections and group by hash value. 186 parallel_for_each(Vec.begin(), Vec.end(), [&](Chunk *C) { 187 if (auto *SC = dyn_cast<SectionChunk>(C)) { 188 bool Global = SC->Sym && SC->Sym->isExternal(); 189 bool Writable = SC->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_WRITE; 190 if (SC->isCOMDAT() && SC->isLive() && Global && !Writable) 191 SC->GroupID = getHash(SC) | (uint64_t(1) << 63); 192 } 193 }); 194 std::vector<SectionChunk *> Chunks; 195 for (Chunk *C : Vec) { 196 if (auto *SC = dyn_cast<SectionChunk>(C)) { 197 if (SC->GroupID) { 198 Chunks.push_back(SC); 199 } else { 200 SC->GroupID = NextID++; 201 } 202 } 203 } 204 205 // From now on, sections in Chunks are ordered so that sections in 206 // the same group are consecutive in the vector. 207 std::sort(Chunks.begin(), Chunks.end(), 208 [](SectionChunk *A, SectionChunk *B) { 209 return A->GroupID < B->GroupID; 210 }); 211 212 // Split groups until we get a convergence. 213 int Cnt = 1; 214 forEachGroup(Chunks, equalsConstant); 215 216 for (;;) { 217 if (!forEachGroup(Chunks, equalsVariable)) 218 break; 219 ++Cnt; 220 } 221 if (Config->Verbose) 222 llvm::outs() << "\nICF needed " << Cnt << " iterations.\n"; 223 224 // Merge sections in the same group. 225 for (auto It = Chunks.begin(), End = Chunks.end(); It != End;) { 226 SectionChunk *Head = *It++; 227 auto Bound = std::find_if(It, End, [&](SectionChunk *SC) { 228 return Head->GroupID != SC->GroupID; 229 }); 230 if (It == Bound) 231 continue; 232 if (Config->Verbose) 233 llvm::outs() << "Selected " << Head->getDebugName() << "\n"; 234 while (It != Bound) { 235 SectionChunk *SC = *It++; 236 if (Config->Verbose) 237 llvm::outs() << " Removed " << SC->getDebugName() << "\n"; 238 Head->replace(SC); 239 } 240 } 241 } 242 243 } // namespace coff 244 } // namespace lld 245