1 //===- Chunks.h -------------------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLD_COFF_CHUNKS_H 11 #define LLD_COFF_CHUNKS_H 12 13 #include "Config.h" 14 #include "InputFiles.h" 15 #include "lld/Common/LLVM.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/iterator.h" 18 #include "llvm/ADT/iterator_range.h" 19 #include "llvm/MC/StringTableBuilder.h" 20 #include "llvm/Object/COFF.h" 21 #include <utility> 22 #include <vector> 23 24 namespace lld { 25 namespace coff { 26 27 using llvm::COFF::ImportDirectoryTableEntry; 28 using llvm::object::COFFSymbolRef; 29 using llvm::object::SectionRef; 30 using llvm::object::coff_relocation; 31 using llvm::object::coff_section; 32 33 class Baserel; 34 class Defined; 35 class DefinedImportData; 36 class DefinedRegular; 37 class ObjFile; 38 class OutputSection; 39 class RuntimePseudoReloc; 40 class Symbol; 41 42 // Mask for permissions (discardable, writable, readable, executable, etc). 43 const uint32_t PermMask = 0xFE000000; 44 45 // Mask for section types (code, data, bss). 46 const uint32_t TypeMask = 0x000000E0; 47 48 // A Chunk represents a chunk of data that will occupy space in the 49 // output (if the resolver chose that). It may or may not be backed by 50 // a section of an input file. It could be linker-created data, or 51 // doesn't even have actual data (if common or bss). 52 class Chunk { 53 public: 54 enum Kind { SectionKind, OtherKind }; kind()55 Kind kind() const { return ChunkKind; } 56 virtual ~Chunk() = default; 57 58 // Returns the size of this chunk (even if this is a common or BSS.) 59 virtual size_t getSize() const = 0; 60 61 // Write this chunk to a mmap'ed file, assuming Buf is pointing to 62 // beginning of the file. Because this function may use RVA values 63 // of other chunks for relocations, you need to set them properly 64 // before calling this function. writeTo(uint8_t * Buf)65 virtual void writeTo(uint8_t *Buf) const {} 66 67 // Called by the writer once before assigning addresses and writing 68 // the output. readRelocTargets()69 virtual void readRelocTargets() {} 70 71 // Called if restarting thunk addition. resetRelocTargets()72 virtual void resetRelocTargets() {} 73 74 // Called by the writer after an RVA is assigned, but before calling 75 // getSize(). finalizeContents()76 virtual void finalizeContents() {} 77 78 // The writer sets and uses the addresses. getRVA()79 uint64_t getRVA() const { return RVA; } setRVA(uint64_t V)80 void setRVA(uint64_t V) { RVA = V; } 81 82 // Returns true if this has non-zero data. BSS chunks return 83 // false. If false is returned, the space occupied by this chunk 84 // will be filled with zeros. hasData()85 virtual bool hasData() const { return true; } 86 87 // Returns readable/writable/executable bits. getOutputCharacteristics()88 virtual uint32_t getOutputCharacteristics() const { return 0; } 89 90 // Returns the section name if this is a section chunk. 91 // It is illegal to call this function on non-section chunks. getSectionName()92 virtual StringRef getSectionName() const { 93 llvm_unreachable("unimplemented getSectionName"); 94 } 95 96 // An output section has pointers to chunks in the section, and each 97 // chunk has a back pointer to an output section. setOutputSection(OutputSection * O)98 void setOutputSection(OutputSection *O) { Out = O; } getOutputSection()99 OutputSection *getOutputSection() const { return Out; } 100 101 // Windows-specific. 102 // Collect all locations that contain absolute addresses for base relocations. getBaserels(std::vector<Baserel> * Res)103 virtual void getBaserels(std::vector<Baserel> *Res) {} 104 105 // Returns a human-readable name of this chunk. Chunks are unnamed chunks of 106 // bytes, so this is used only for logging or debugging. getDebugName()107 virtual StringRef getDebugName() { return ""; } 108 109 // The alignment of this chunk. The writer uses the value. 110 uint32_t Alignment = 1; 111 112 protected: ChunkKind(K)113 Chunk(Kind K = OtherKind) : ChunkKind(K) {} 114 const Kind ChunkKind; 115 116 // The RVA of this chunk in the output. The writer sets a value. 117 uint64_t RVA = 0; 118 119 // The output section for this chunk. 120 OutputSection *Out = nullptr; 121 122 public: 123 // The offset from beginning of the output section. The writer sets a value. 124 uint64_t OutputSectionOff = 0; 125 126 // Whether this section needs to be kept distinct from other sections during 127 // ICF. This is set by the driver using address-significance tables. 128 bool KeepUnique = false; 129 }; 130 131 // A chunk corresponding a section of an input file. 132 class SectionChunk final : public Chunk { 133 // Identical COMDAT Folding feature accesses section internal data. 134 friend class ICF; 135 136 public: 137 class symbol_iterator : public llvm::iterator_adaptor_base< 138 symbol_iterator, const coff_relocation *, 139 std::random_access_iterator_tag, Symbol *> { 140 friend SectionChunk; 141 142 ObjFile *File; 143 symbol_iterator(ObjFile * File,const coff_relocation * I)144 symbol_iterator(ObjFile *File, const coff_relocation *I) 145 : symbol_iterator::iterator_adaptor_base(I), File(File) {} 146 147 public: 148 symbol_iterator() = default; 149 150 Symbol *operator*() const { return File->getSymbol(I->SymbolTableIndex); } 151 }; 152 153 SectionChunk(ObjFile *File, const coff_section *Header); classof(const Chunk * C)154 static bool classof(const Chunk *C) { return C->kind() == SectionKind; } 155 void readRelocTargets() override; 156 void resetRelocTargets() override; getSize()157 size_t getSize() const override { return Header->SizeOfRawData; } 158 ArrayRef<uint8_t> getContents() const; 159 void writeTo(uint8_t *Buf) const override; 160 bool hasData() const override; 161 uint32_t getOutputCharacteristics() const override; getSectionName()162 StringRef getSectionName() const override { return SectionName; } 163 void getBaserels(std::vector<Baserel> *Res) override; 164 bool isCOMDAT() const; 165 void applyRelX64(uint8_t *Off, uint16_t Type, OutputSection *OS, uint64_t S, 166 uint64_t P) const; 167 void applyRelX86(uint8_t *Off, uint16_t Type, OutputSection *OS, uint64_t S, 168 uint64_t P) const; 169 void applyRelARM(uint8_t *Off, uint16_t Type, OutputSection *OS, uint64_t S, 170 uint64_t P) const; 171 void applyRelARM64(uint8_t *Off, uint16_t Type, OutputSection *OS, uint64_t S, 172 uint64_t P) const; 173 174 void getRuntimePseudoRelocs(std::vector<RuntimePseudoReloc> &Res); 175 176 // Called if the garbage collector decides to not include this chunk 177 // in a final output. It's supposed to print out a log message to stdout. 178 void printDiscardedMessage() const; 179 180 // Adds COMDAT associative sections to this COMDAT section. A chunk 181 // and its children are treated as a group by the garbage collector. 182 void addAssociative(SectionChunk *Child); 183 184 StringRef getDebugName() override; 185 186 // True if this is a codeview debug info chunk. These will not be laid out in 187 // the image. Instead they will end up in the PDB, if one is requested. isCodeView()188 bool isCodeView() const { 189 return SectionName == ".debug" || SectionName.startswith(".debug$"); 190 } 191 192 // True if this is a DWARF debug info or exception handling chunk. isDWARF()193 bool isDWARF() const { 194 return SectionName.startswith(".debug_") || SectionName == ".eh_frame"; 195 } 196 197 // Allow iteration over the bodies of this chunk's relocated symbols. symbols()198 llvm::iterator_range<symbol_iterator> symbols() const { 199 return llvm::make_range(symbol_iterator(File, Relocs.begin()), 200 symbol_iterator(File, Relocs.end())); 201 } 202 203 // Allow iteration over the associated child chunks for this section. children()204 ArrayRef<SectionChunk *> children() const { return AssocChildren; } 205 206 // The section ID this chunk belongs to in its Obj. 207 uint32_t getSectionNumber() const; 208 209 // A pointer pointing to a replacement for this chunk. 210 // Initially it points to "this" object. If this chunk is merged 211 // with other chunk by ICF, it points to another chunk, 212 // and this chunk is considered as dead. 213 SectionChunk *Repl; 214 215 // The CRC of the contents as described in the COFF spec 4.5.5. 216 // Auxiliary Format 5: Section Definitions. Used for ICF. 217 uint32_t Checksum = 0; 218 219 const coff_section *Header; 220 221 // The file that this chunk was created from. 222 ObjFile *File; 223 224 // The COMDAT leader symbol if this is a COMDAT chunk. 225 DefinedRegular *Sym = nullptr; 226 227 ArrayRef<coff_relocation> Relocs; 228 229 // Used by the garbage collector. 230 bool Live; 231 232 // When inserting a thunk, we need to adjust a relocation to point to 233 // the thunk instead of the actual original target Symbol. 234 std::vector<Symbol *> RelocTargets; 235 236 private: 237 StringRef SectionName; 238 std::vector<SectionChunk *> AssocChildren; 239 240 // Used for ICF (Identical COMDAT Folding) 241 void replace(SectionChunk *Other); 242 uint32_t Class[2] = {0, 0}; 243 }; 244 245 // This class is used to implement an lld-specific feature (not implemented in 246 // MSVC) that minimizes the output size by finding string literals sharing tail 247 // parts and merging them. 248 // 249 // If string tail merging is enabled and a section is identified as containing a 250 // string literal, it is added to a MergeChunk with an appropriate alignment. 251 // The MergeChunk then tail merges the strings using the StringTableBuilder 252 // class and assigns RVAs and section offsets to each of the member chunks based 253 // on the offsets assigned by the StringTableBuilder. 254 class MergeChunk : public Chunk { 255 public: 256 MergeChunk(uint32_t Alignment); 257 static void addSection(SectionChunk *C); 258 void finalizeContents() override; 259 260 uint32_t getOutputCharacteristics() const override; getSectionName()261 StringRef getSectionName() const override { return ".rdata"; } 262 size_t getSize() const override; 263 void writeTo(uint8_t *Buf) const override; 264 265 static std::map<uint32_t, MergeChunk *> Instances; 266 std::vector<SectionChunk *> Sections; 267 268 private: 269 llvm::StringTableBuilder Builder; 270 bool Finalized = false; 271 }; 272 273 // A chunk for common symbols. Common chunks don't have actual data. 274 class CommonChunk : public Chunk { 275 public: 276 CommonChunk(const COFFSymbolRef Sym); getSize()277 size_t getSize() const override { return Sym.getValue(); } hasData()278 bool hasData() const override { return false; } 279 uint32_t getOutputCharacteristics() const override; getSectionName()280 StringRef getSectionName() const override { return ".bss"; } 281 282 private: 283 const COFFSymbolRef Sym; 284 }; 285 286 // A chunk for linker-created strings. 287 class StringChunk : public Chunk { 288 public: StringChunk(StringRef S)289 explicit StringChunk(StringRef S) : Str(S) {} getSize()290 size_t getSize() const override { return Str.size() + 1; } 291 void writeTo(uint8_t *Buf) const override; 292 293 private: 294 StringRef Str; 295 }; 296 297 static const uint8_t ImportThunkX86[] = { 298 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // JMP *0x0 299 }; 300 301 static const uint8_t ImportThunkARM[] = { 302 0x40, 0xf2, 0x00, 0x0c, // mov.w ip, #0 303 0xc0, 0xf2, 0x00, 0x0c, // mov.t ip, #0 304 0xdc, 0xf8, 0x00, 0xf0, // ldr.w pc, [ip] 305 }; 306 307 static const uint8_t ImportThunkARM64[] = { 308 0x10, 0x00, 0x00, 0x90, // adrp x16, #0 309 0x10, 0x02, 0x40, 0xf9, // ldr x16, [x16] 310 0x00, 0x02, 0x1f, 0xd6, // br x16 311 }; 312 313 // Windows-specific. 314 // A chunk for DLL import jump table entry. In a final output, its 315 // contents will be a JMP instruction to some __imp_ symbol. 316 class ImportThunkChunkX64 : public Chunk { 317 public: 318 explicit ImportThunkChunkX64(Defined *S); getSize()319 size_t getSize() const override { return sizeof(ImportThunkX86); } 320 void writeTo(uint8_t *Buf) const override; 321 322 private: 323 Defined *ImpSymbol; 324 }; 325 326 class ImportThunkChunkX86 : public Chunk { 327 public: ImportThunkChunkX86(Defined * S)328 explicit ImportThunkChunkX86(Defined *S) : ImpSymbol(S) {} getSize()329 size_t getSize() const override { return sizeof(ImportThunkX86); } 330 void getBaserels(std::vector<Baserel> *Res) override; 331 void writeTo(uint8_t *Buf) const override; 332 333 private: 334 Defined *ImpSymbol; 335 }; 336 337 class ImportThunkChunkARM : public Chunk { 338 public: ImportThunkChunkARM(Defined * S)339 explicit ImportThunkChunkARM(Defined *S) : ImpSymbol(S) {} getSize()340 size_t getSize() const override { return sizeof(ImportThunkARM); } 341 void getBaserels(std::vector<Baserel> *Res) override; 342 void writeTo(uint8_t *Buf) const override; 343 344 private: 345 Defined *ImpSymbol; 346 }; 347 348 class ImportThunkChunkARM64 : public Chunk { 349 public: ImportThunkChunkARM64(Defined * S)350 explicit ImportThunkChunkARM64(Defined *S) : ImpSymbol(S) {} getSize()351 size_t getSize() const override { return sizeof(ImportThunkARM64); } 352 void writeTo(uint8_t *Buf) const override; 353 354 private: 355 Defined *ImpSymbol; 356 }; 357 358 class RangeExtensionThunkARM : public Chunk { 359 public: RangeExtensionThunkARM(Defined * T)360 explicit RangeExtensionThunkARM(Defined *T) : Target(T) {} 361 size_t getSize() const override; 362 void writeTo(uint8_t *Buf) const override; 363 364 Defined *Target; 365 }; 366 367 class RangeExtensionThunkARM64 : public Chunk { 368 public: RangeExtensionThunkARM64(Defined * T)369 explicit RangeExtensionThunkARM64(Defined *T) : Target(T) {} 370 size_t getSize() const override; 371 void writeTo(uint8_t *Buf) const override; 372 373 Defined *Target; 374 }; 375 376 // Windows-specific. 377 // See comments for DefinedLocalImport class. 378 class LocalImportChunk : public Chunk { 379 public: LocalImportChunk(Defined * S)380 explicit LocalImportChunk(Defined *S) : Sym(S) { 381 Alignment = Config->Wordsize; 382 } 383 size_t getSize() const override; 384 void getBaserels(std::vector<Baserel> *Res) override; 385 void writeTo(uint8_t *Buf) const override; 386 387 private: 388 Defined *Sym; 389 }; 390 391 // Duplicate RVAs are not allowed in RVA tables, so unique symbols by chunk and 392 // offset into the chunk. Order does not matter as the RVA table will be sorted 393 // later. 394 struct ChunkAndOffset { 395 Chunk *InputChunk; 396 uint32_t Offset; 397 398 struct DenseMapInfo { getEmptyKeyChunkAndOffset::DenseMapInfo399 static ChunkAndOffset getEmptyKey() { 400 return {llvm::DenseMapInfo<Chunk *>::getEmptyKey(), 0}; 401 } getTombstoneKeyChunkAndOffset::DenseMapInfo402 static ChunkAndOffset getTombstoneKey() { 403 return {llvm::DenseMapInfo<Chunk *>::getTombstoneKey(), 0}; 404 } getHashValueChunkAndOffset::DenseMapInfo405 static unsigned getHashValue(const ChunkAndOffset &CO) { 406 return llvm::DenseMapInfo<std::pair<Chunk *, uint32_t>>::getHashValue( 407 {CO.InputChunk, CO.Offset}); 408 } isEqualChunkAndOffset::DenseMapInfo409 static bool isEqual(const ChunkAndOffset &LHS, const ChunkAndOffset &RHS) { 410 return LHS.InputChunk == RHS.InputChunk && LHS.Offset == RHS.Offset; 411 } 412 }; 413 }; 414 415 using SymbolRVASet = llvm::DenseSet<ChunkAndOffset>; 416 417 // Table which contains symbol RVAs. Used for /safeseh and /guard:cf. 418 class RVATableChunk : public Chunk { 419 public: RVATableChunk(SymbolRVASet S)420 explicit RVATableChunk(SymbolRVASet S) : Syms(std::move(S)) {} getSize()421 size_t getSize() const override { return Syms.size() * 4; } 422 void writeTo(uint8_t *Buf) const override; 423 424 private: 425 SymbolRVASet Syms; 426 }; 427 428 // Windows-specific. 429 // This class represents a block in .reloc section. 430 // See the PE/COFF spec 5.6 for details. 431 class BaserelChunk : public Chunk { 432 public: 433 BaserelChunk(uint32_t Page, Baserel *Begin, Baserel *End); getSize()434 size_t getSize() const override { return Data.size(); } 435 void writeTo(uint8_t *Buf) const override; 436 437 private: 438 std::vector<uint8_t> Data; 439 }; 440 441 class Baserel { 442 public: Baserel(uint32_t V,uint8_t Ty)443 Baserel(uint32_t V, uint8_t Ty) : RVA(V), Type(Ty) {} Baserel(uint32_t V)444 explicit Baserel(uint32_t V) : Baserel(V, getDefaultType()) {} 445 uint8_t getDefaultType(); 446 447 uint32_t RVA; 448 uint8_t Type; 449 }; 450 451 // This is a placeholder Chunk, to allow attaching a DefinedSynthetic to a 452 // specific place in a section, without any data. This is used for the MinGW 453 // specific symbol __RUNTIME_PSEUDO_RELOC_LIST_END__, even though the concept 454 // of an empty chunk isn't MinGW specific. 455 class EmptyChunk : public Chunk { 456 public: EmptyChunk()457 EmptyChunk() {} getSize()458 size_t getSize() const override { return 0; } writeTo(uint8_t * Buf)459 void writeTo(uint8_t *Buf) const override {} 460 }; 461 462 // MinGW specific, for the "automatic import of variables from DLLs" feature. 463 // This provides the table of runtime pseudo relocations, for variable 464 // references that turned out to need to be imported from a DLL even though 465 // the reference didn't use the dllimport attribute. The MinGW runtime will 466 // process this table after loading, before handling control over to user 467 // code. 468 class PseudoRelocTableChunk : public Chunk { 469 public: PseudoRelocTableChunk(std::vector<RuntimePseudoReloc> & Relocs)470 PseudoRelocTableChunk(std::vector<RuntimePseudoReloc> &Relocs) 471 : Relocs(std::move(Relocs)) { 472 Alignment = 4; 473 } 474 size_t getSize() const override; 475 void writeTo(uint8_t *Buf) const override; 476 477 private: 478 std::vector<RuntimePseudoReloc> Relocs; 479 }; 480 481 // MinGW specific; information about one individual location in the image 482 // that needs to be fixed up at runtime after loading. This represents 483 // one individual element in the PseudoRelocTableChunk table. 484 class RuntimePseudoReloc { 485 public: RuntimePseudoReloc(Defined * Sym,SectionChunk * Target,uint32_t TargetOffset,int Flags)486 RuntimePseudoReloc(Defined *Sym, SectionChunk *Target, uint32_t TargetOffset, 487 int Flags) 488 : Sym(Sym), Target(Target), TargetOffset(TargetOffset), Flags(Flags) {} 489 490 Defined *Sym; 491 SectionChunk *Target; 492 uint32_t TargetOffset; 493 // The Flags field contains the size of the relocation, in bits. No other 494 // flags are currently defined. 495 int Flags; 496 }; 497 498 // MinGW specific. A Chunk that contains one pointer-sized absolute value. 499 class AbsolutePointerChunk : public Chunk { 500 public: AbsolutePointerChunk(uint64_t Value)501 AbsolutePointerChunk(uint64_t Value) : Value(Value) { 502 Alignment = getSize(); 503 } 504 size_t getSize() const override; 505 void writeTo(uint8_t *Buf) const override; 506 507 private: 508 uint64_t Value; 509 }; 510 511 void applyMOV32T(uint8_t *Off, uint32_t V); 512 void applyBranch24T(uint8_t *Off, int32_t V); 513 514 void applyArm64Addr(uint8_t *Off, uint64_t S, uint64_t P, int Shift); 515 void applyArm64Imm(uint8_t *Off, uint64_t Imm, uint32_t RangeLimit); 516 void applyArm64Branch26(uint8_t *Off, int64_t V); 517 518 } // namespace coff 519 } // namespace lld 520 521 namespace llvm { 522 template <> 523 struct DenseMapInfo<lld::coff::ChunkAndOffset> 524 : lld::coff::ChunkAndOffset::DenseMapInfo {}; 525 } 526 527 #endif 528