1 //===- MarkLive.cpp -------------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements --gc-sections, which is a feature to remove unused 11 // sections from output. Unused sections are sections that are not reachable 12 // from known GC-root symbols or sections. Naturally the feature is 13 // implemented as a mark-sweep garbage collector. 14 // 15 // Here's how it works. Each InputSectionBase has a "Live" bit. The bit is off 16 // by default. Starting with GC-root symbols or sections, markLive function 17 // defined in this file visits all reachable sections to set their Live 18 // bits. Writer will then ignore sections whose Live bits are off, so that 19 // such sections are not included into output. 20 // 21 //===----------------------------------------------------------------------===// 22 23 #include "InputSection.h" 24 #include "LinkerScript.h" 25 #include "OutputSections.h" 26 #include "Strings.h" 27 #include "SymbolTable.h" 28 #include "Symbols.h" 29 #include "Target.h" 30 #include "Writer.h" 31 #include "llvm/ADT/STLExtras.h" 32 #include "llvm/Object/ELF.h" 33 #include <functional> 34 #include <vector> 35 36 using namespace llvm; 37 using namespace llvm::ELF; 38 using namespace llvm::object; 39 using namespace llvm::support::endian; 40 41 using namespace lld; 42 using namespace lld::elf; 43 44 namespace { 45 // A resolved relocation. The Sec and Offset fields are set if the relocation 46 // was resolved to an offset within a section. 47 template <class ELFT> 48 struct ResolvedReloc { 49 InputSectionBase<ELFT> *Sec; 50 typename ELFT::uint Offset; 51 }; 52 } // end anonymous namespace 53 54 template <class ELFT> 55 static typename ELFT::uint getAddend(InputSectionBase<ELFT> &Sec, 56 const typename ELFT::Rel &Rel) { 57 return Target->getImplicitAddend(Sec.Data.begin() + Rel.r_offset, 58 Rel.getType(Config->Mips64EL)); 59 } 60 61 template <class ELFT> 62 static typename ELFT::uint getAddend(InputSectionBase<ELFT> &Sec, 63 const typename ELFT::Rela &Rel) { 64 return Rel.r_addend; 65 } 66 67 template <class ELFT, class RelT> 68 static ResolvedReloc<ELFT> resolveReloc(InputSectionBase<ELFT> &Sec, 69 RelT &Rel) { 70 SymbolBody &B = Sec.getFile()->getRelocTargetSym(Rel); 71 auto *D = dyn_cast<DefinedRegular<ELFT>>(&B); 72 if (!D || !D->Section) 73 return {nullptr, 0}; 74 typename ELFT::uint Offset = D->Value; 75 if (D->isSection()) 76 Offset += getAddend(Sec, Rel); 77 return {D->Section->Repl, Offset}; 78 } 79 80 // Calls Fn for each section that Sec refers to via relocations. 81 template <class ELFT> 82 static void forEachSuccessor(InputSection<ELFT> &Sec, 83 std::function<void(ResolvedReloc<ELFT>)> Fn) { 84 ELFFile<ELFT> &Obj = Sec.getFile()->getObj(); 85 for (const typename ELFT::Shdr *RelSec : Sec.RelocSections) { 86 if (RelSec->sh_type == SHT_RELA) { 87 for (const typename ELFT::Rela &Rel : Obj.relas(RelSec)) 88 Fn(resolveReloc(Sec, Rel)); 89 } else { 90 for (const typename ELFT::Rel &Rel : Obj.rels(RelSec)) 91 Fn(resolveReloc(Sec, Rel)); 92 } 93 } 94 if (Sec.DependentSection) 95 Fn({Sec.DependentSection, 0}); 96 } 97 98 // The .eh_frame section is an unfortunate special case. 99 // The section is divided in CIEs and FDEs and the relocations it can have are 100 // * CIEs can refer to a personality function. 101 // * FDEs can refer to a LSDA 102 // * FDEs refer to the function they contain information about 103 // The last kind of relocation cannot keep the referred section alive, or they 104 // would keep everything alive in a common object file. In fact, each FDE is 105 // alive if the section it refers to is alive. 106 // To keep things simple, in here we just ignore the last relocation kind. The 107 // other two keep the referred section alive. 108 // 109 // A possible improvement would be to fully process .eh_frame in the middle of 110 // the gc pass. With that we would be able to also gc some sections holding 111 // LSDAs and personality functions if we found that they were unused. 112 template <class ELFT, class RelTy> 113 static void 114 scanEhFrameSection(EhInputSection<ELFT> &EH, ArrayRef<RelTy> Rels, 115 std::function<void(ResolvedReloc<ELFT>)> Enqueue) { 116 const endianness E = ELFT::TargetEndianness; 117 for (unsigned I = 0, N = EH.Pieces.size(); I < N; ++I) { 118 EhSectionPiece &Piece = EH.Pieces[I]; 119 unsigned FirstRelI = Piece.FirstRelocation; 120 if (FirstRelI == (unsigned)-1) 121 continue; 122 if (read32<E>(Piece.data().data() + 4) == 0) { 123 // This is a CIE, we only need to worry about the first relocation. It is 124 // known to point to the personality function. 125 Enqueue(resolveReloc(EH, Rels[FirstRelI])); 126 continue; 127 } 128 // This is a FDE. The relocations point to the described function or to 129 // a LSDA. We only need to keep the LSDA alive, so ignore anything that 130 // points to executable sections. 131 typename ELFT::uint PieceEnd = Piece.InputOff + Piece.size(); 132 for (unsigned I2 = FirstRelI, N2 = Rels.size(); I2 < N2; ++I2) { 133 const RelTy &Rel = Rels[I2]; 134 if (Rel.r_offset >= PieceEnd) 135 break; 136 ResolvedReloc<ELFT> R = resolveReloc(EH, Rels[I2]); 137 if (!R.Sec || R.Sec == &InputSection<ELFT>::Discarded) 138 continue; 139 if (R.Sec->getSectionHdr()->sh_flags & SHF_EXECINSTR) 140 continue; 141 Enqueue({R.Sec, 0}); 142 } 143 } 144 } 145 146 template <class ELFT> 147 static void 148 scanEhFrameSection(EhInputSection<ELFT> &EH, 149 std::function<void(ResolvedReloc<ELFT>)> Enqueue) { 150 if (!EH.RelocSection) 151 return; 152 153 // Unfortunately we need to split .eh_frame early since some relocations in 154 // .eh_frame keep other section alive and some don't. 155 EH.split(); 156 157 ELFFile<ELFT> &EObj = EH.getFile()->getObj(); 158 if (EH.RelocSection->sh_type == SHT_RELA) 159 scanEhFrameSection(EH, EObj.relas(EH.RelocSection), Enqueue); 160 else 161 scanEhFrameSection(EH, EObj.rels(EH.RelocSection), Enqueue); 162 } 163 164 // We do not garbage-collect two types of sections: 165 // 1) Sections used by the loader (.init, .fini, .ctors, .dtors or .jcr) 166 // 2) Non-allocatable sections which typically contain debugging information 167 template <class ELFT> static bool isReserved(InputSectionBase<ELFT> *Sec) { 168 switch (Sec->getSectionHdr()->sh_type) { 169 case SHT_FINI_ARRAY: 170 case SHT_INIT_ARRAY: 171 case SHT_NOTE: 172 case SHT_PREINIT_ARRAY: 173 return true; 174 default: 175 if (!(Sec->getSectionHdr()->sh_flags & SHF_ALLOC)) 176 return true; 177 178 // We do not want to reclaim sections if they can be referred 179 // by __start_* and __stop_* symbols. 180 StringRef S = Sec->Name; 181 if (isValidCIdentifier(S)) 182 return true; 183 184 return S.startswith(".ctors") || S.startswith(".dtors") || 185 S.startswith(".init") || S.startswith(".fini") || 186 S.startswith(".jcr"); 187 } 188 } 189 190 // This is the main function of the garbage collector. 191 // Starting from GC-root sections, this function visits all reachable 192 // sections to set their "Live" bits. 193 template <class ELFT> void elf::markLive() { 194 SmallVector<InputSection<ELFT> *, 256> Q; 195 196 auto Enqueue = [&](ResolvedReloc<ELFT> R) { 197 // Skip over discarded sections. This in theory shouldn't happen, because 198 // the ELF spec doesn't allow a relocation to point to a deduplicated 199 // COMDAT section directly. Unfortunately this happens in practice (e.g. 200 // .eh_frame) so we need to add a check. 201 if (!R.Sec || R.Sec == &InputSection<ELFT>::Discarded) 202 return; 203 204 // Usually, a whole section is marked as live or dead, but in mergeable 205 // (splittable) sections, each piece of data has independent liveness bit. 206 // So we explicitly tell it which offset is in use. 207 if (auto *MS = dyn_cast<MergeInputSection<ELFT>>(R.Sec)) 208 MS->markLiveAt(R.Offset); 209 210 if (R.Sec->Live) 211 return; 212 R.Sec->Live = true; 213 // Add input section to the queue. We don't want to consider relocations 214 // from non-allocatable input sections, because we can bring those 215 // allocatable sections to living which otherwise would be dead. 216 if (InputSection<ELFT> *S = dyn_cast<InputSection<ELFT>>(R.Sec)) 217 if (S->getSectionHdr()->sh_flags & SHF_ALLOC) 218 Q.push_back(S); 219 }; 220 221 auto MarkSymbol = [&](const SymbolBody *Sym) { 222 if (auto *D = dyn_cast_or_null<DefinedRegular<ELFT>>(Sym)) 223 Enqueue({D->Section, D->Value}); 224 }; 225 226 // Add GC root symbols. 227 if (Config->EntrySym) 228 MarkSymbol(Config->EntrySym->body()); 229 MarkSymbol(Symtab<ELFT>::X->find(Config->Init)); 230 MarkSymbol(Symtab<ELFT>::X->find(Config->Fini)); 231 for (StringRef S : Config->Undefined) 232 MarkSymbol(Symtab<ELFT>::X->find(S)); 233 234 // Preserve externally-visible symbols if the symbols defined by this 235 // file can interrupt other ELF file's symbols at runtime. 236 for (const Symbol *S : Symtab<ELFT>::X->getSymbols()) 237 if (S->includeInDynsym()) 238 MarkSymbol(S->body()); 239 240 // Preserve special sections and those which are specified in linker 241 // script KEEP command. 242 for (ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) { 243 for (InputSectionBase<ELFT> *Sec : F->getSections()) { 244 if (!Sec || Sec == &InputSection<ELFT>::Discarded) 245 continue; 246 // .eh_frame is always marked as live now, but also it can reference to 247 // sections that contain personality. We preserve all non-text sections 248 // referred by .eh_frame here. 249 if (auto *EH = dyn_cast_or_null<EhInputSection<ELFT>>(Sec)) 250 scanEhFrameSection<ELFT>(*EH, Enqueue); 251 if (isReserved(Sec) || Script<ELFT>::X->shouldKeep(Sec)) 252 Enqueue({Sec, 0}); 253 } 254 } 255 256 // Mark all reachable sections. 257 while (!Q.empty()) 258 forEachSuccessor<ELFT>(*Q.pop_back_val(), Enqueue); 259 } 260 261 template void elf::markLive<ELF32LE>(); 262 template void elf::markLive<ELF32BE>(); 263 template void elf::markLive<ELF64LE>(); 264 template void elf::markLive<ELF64BE>(); 265