1 //===- Writer.cpp ---------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "Writer.h" 10 #include "ConcatOutputSection.h" 11 #include "Config.h" 12 #include "ICF.h" 13 #include "InputFiles.h" 14 #include "InputSection.h" 15 #include "MapFile.h" 16 #include "OutputSection.h" 17 #include "OutputSegment.h" 18 #include "SymbolTable.h" 19 #include "Symbols.h" 20 #include "SyntheticSections.h" 21 #include "Target.h" 22 #include "UnwindInfoSection.h" 23 24 #include "lld/Common/Arrays.h" 25 #include "lld/Common/ErrorHandler.h" 26 #include "lld/Common/Memory.h" 27 #include "llvm/BinaryFormat/MachO.h" 28 #include "llvm/Config/llvm-config.h" 29 #include "llvm/Support/LEB128.h" 30 #include "llvm/Support/MathExtras.h" 31 #include "llvm/Support/Parallel.h" 32 #include "llvm/Support/Path.h" 33 #include "llvm/Support/TimeProfiler.h" 34 #include "llvm/Support/xxhash.h" 35 36 #include <algorithm> 37 38 using namespace llvm; 39 using namespace llvm::MachO; 40 using namespace llvm::sys; 41 using namespace lld; 42 using namespace lld::macho; 43 44 namespace { 45 class LCUuid; 46 47 class Writer { 48 public: 49 Writer() : buffer(errorHandler().outputBuffer) {} 50 51 void scanRelocations(); 52 void scanSymbols(); 53 template <class LP> void createOutputSections(); 54 template <class LP> void createLoadCommands(); 55 void foldIdenticalSections(); 56 void finalizeAddresses(); 57 void finalizeLinkEditSegment(); 58 void assignAddresses(OutputSegment *); 59 60 void openFile(); 61 void writeSections(); 62 void writeUuid(); 63 void writeCodeSignature(); 64 void writeOutputFile(); 65 66 template <class LP> void run(); 67 68 std::unique_ptr<FileOutputBuffer> &buffer; 69 uint64_t addr = 0; 70 uint64_t fileOff = 0; 71 MachHeaderSection *header = nullptr; 72 StringTableSection *stringTableSection = nullptr; 73 SymtabSection *symtabSection = nullptr; 74 IndirectSymtabSection *indirectSymtabSection = nullptr; 75 CodeSignatureSection *codeSignatureSection = nullptr; 76 DataInCodeSection *dataInCodeSection = nullptr; 77 FunctionStartsSection *functionStartsSection = nullptr; 78 79 LCUuid *uuidCommand = nullptr; 80 OutputSegment *linkEditSegment = nullptr; 81 DenseMap<NamePair, ConcatOutputSection *> concatOutputSections; 82 }; 83 84 // LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information. 85 class LCDyldInfo final : public LoadCommand { 86 public: 87 LCDyldInfo(RebaseSection *rebaseSection, BindingSection *bindingSection, 88 WeakBindingSection *weakBindingSection, 89 LazyBindingSection *lazyBindingSection, 90 ExportSection *exportSection) 91 : rebaseSection(rebaseSection), bindingSection(bindingSection), 92 weakBindingSection(weakBindingSection), 93 lazyBindingSection(lazyBindingSection), exportSection(exportSection) {} 94 95 uint32_t getSize() const override { return sizeof(dyld_info_command); } 96 97 void writeTo(uint8_t *buf) const override { 98 auto *c = reinterpret_cast<dyld_info_command *>(buf); 99 c->cmd = LC_DYLD_INFO_ONLY; 100 c->cmdsize = getSize(); 101 if (rebaseSection->isNeeded()) { 102 c->rebase_off = rebaseSection->fileOff; 103 c->rebase_size = rebaseSection->getFileSize(); 104 } 105 if (bindingSection->isNeeded()) { 106 c->bind_off = bindingSection->fileOff; 107 c->bind_size = bindingSection->getFileSize(); 108 } 109 if (weakBindingSection->isNeeded()) { 110 c->weak_bind_off = weakBindingSection->fileOff; 111 c->weak_bind_size = weakBindingSection->getFileSize(); 112 } 113 if (lazyBindingSection->isNeeded()) { 114 c->lazy_bind_off = lazyBindingSection->fileOff; 115 c->lazy_bind_size = lazyBindingSection->getFileSize(); 116 } 117 if (exportSection->isNeeded()) { 118 c->export_off = exportSection->fileOff; 119 c->export_size = exportSection->getFileSize(); 120 } 121 } 122 123 RebaseSection *rebaseSection; 124 BindingSection *bindingSection; 125 WeakBindingSection *weakBindingSection; 126 LazyBindingSection *lazyBindingSection; 127 ExportSection *exportSection; 128 }; 129 130 class LCFunctionStarts final : public LoadCommand { 131 public: 132 explicit LCFunctionStarts(FunctionStartsSection *functionStartsSection) 133 : functionStartsSection(functionStartsSection) {} 134 135 uint32_t getSize() const override { return sizeof(linkedit_data_command); } 136 137 void writeTo(uint8_t *buf) const override { 138 auto *c = reinterpret_cast<linkedit_data_command *>(buf); 139 c->cmd = LC_FUNCTION_STARTS; 140 c->cmdsize = getSize(); 141 c->dataoff = functionStartsSection->fileOff; 142 c->datasize = functionStartsSection->getFileSize(); 143 } 144 145 private: 146 FunctionStartsSection *functionStartsSection; 147 }; 148 149 class LCDataInCode final : public LoadCommand { 150 public: 151 explicit LCDataInCode(DataInCodeSection *dataInCodeSection) 152 : dataInCodeSection(dataInCodeSection) {} 153 154 uint32_t getSize() const override { return sizeof(linkedit_data_command); } 155 156 void writeTo(uint8_t *buf) const override { 157 auto *c = reinterpret_cast<linkedit_data_command *>(buf); 158 c->cmd = LC_DATA_IN_CODE; 159 c->cmdsize = getSize(); 160 c->dataoff = dataInCodeSection->fileOff; 161 c->datasize = dataInCodeSection->getFileSize(); 162 } 163 164 private: 165 DataInCodeSection *dataInCodeSection; 166 }; 167 168 class LCDysymtab final : public LoadCommand { 169 public: 170 LCDysymtab(SymtabSection *symtabSection, 171 IndirectSymtabSection *indirectSymtabSection) 172 : symtabSection(symtabSection), 173 indirectSymtabSection(indirectSymtabSection) {} 174 175 uint32_t getSize() const override { return sizeof(dysymtab_command); } 176 177 void writeTo(uint8_t *buf) const override { 178 auto *c = reinterpret_cast<dysymtab_command *>(buf); 179 c->cmd = LC_DYSYMTAB; 180 c->cmdsize = getSize(); 181 182 c->ilocalsym = 0; 183 c->iextdefsym = c->nlocalsym = symtabSection->getNumLocalSymbols(); 184 c->nextdefsym = symtabSection->getNumExternalSymbols(); 185 c->iundefsym = c->iextdefsym + c->nextdefsym; 186 c->nundefsym = symtabSection->getNumUndefinedSymbols(); 187 188 c->indirectsymoff = indirectSymtabSection->fileOff; 189 c->nindirectsyms = indirectSymtabSection->getNumSymbols(); 190 } 191 192 SymtabSection *symtabSection; 193 IndirectSymtabSection *indirectSymtabSection; 194 }; 195 196 template <class LP> class LCSegment final : public LoadCommand { 197 public: 198 LCSegment(StringRef name, OutputSegment *seg) : name(name), seg(seg) {} 199 200 uint32_t getSize() const override { 201 return sizeof(typename LP::segment_command) + 202 seg->numNonHiddenSections() * sizeof(typename LP::section); 203 } 204 205 void writeTo(uint8_t *buf) const override { 206 using SegmentCommand = typename LP::segment_command; 207 using Section = typename LP::section; 208 209 auto *c = reinterpret_cast<SegmentCommand *>(buf); 210 buf += sizeof(SegmentCommand); 211 212 c->cmd = LP::segmentLCType; 213 c->cmdsize = getSize(); 214 memcpy(c->segname, name.data(), name.size()); 215 c->fileoff = seg->fileOff; 216 c->maxprot = seg->maxProt; 217 c->initprot = seg->initProt; 218 219 if (seg->getSections().empty()) 220 return; 221 222 c->vmaddr = seg->firstSection()->addr; 223 c->vmsize = seg->vmSize; 224 c->filesize = seg->fileSize; 225 c->nsects = seg->numNonHiddenSections(); 226 227 for (const OutputSection *osec : seg->getSections()) { 228 if (osec->isHidden()) 229 continue; 230 231 auto *sectHdr = reinterpret_cast<Section *>(buf); 232 buf += sizeof(Section); 233 234 memcpy(sectHdr->sectname, osec->name.data(), osec->name.size()); 235 memcpy(sectHdr->segname, name.data(), name.size()); 236 237 sectHdr->addr = osec->addr; 238 sectHdr->offset = osec->fileOff; 239 sectHdr->align = Log2_32(osec->align); 240 sectHdr->flags = osec->flags; 241 sectHdr->size = osec->getSize(); 242 sectHdr->reserved1 = osec->reserved1; 243 sectHdr->reserved2 = osec->reserved2; 244 } 245 } 246 247 private: 248 StringRef name; 249 OutputSegment *seg; 250 }; 251 252 class LCMain final : public LoadCommand { 253 uint32_t getSize() const override { 254 return sizeof(structs::entry_point_command); 255 } 256 257 void writeTo(uint8_t *buf) const override { 258 auto *c = reinterpret_cast<structs::entry_point_command *>(buf); 259 c->cmd = LC_MAIN; 260 c->cmdsize = getSize(); 261 262 if (config->entry->isInStubs()) 263 c->entryoff = 264 in.stubs->fileOff + config->entry->stubsIndex * target->stubSize; 265 else 266 c->entryoff = config->entry->getVA() - in.header->addr; 267 268 c->stacksize = 0; 269 } 270 }; 271 272 class LCSymtab final : public LoadCommand { 273 public: 274 LCSymtab(SymtabSection *symtabSection, StringTableSection *stringTableSection) 275 : symtabSection(symtabSection), stringTableSection(stringTableSection) {} 276 277 uint32_t getSize() const override { return sizeof(symtab_command); } 278 279 void writeTo(uint8_t *buf) const override { 280 auto *c = reinterpret_cast<symtab_command *>(buf); 281 c->cmd = LC_SYMTAB; 282 c->cmdsize = getSize(); 283 c->symoff = symtabSection->fileOff; 284 c->nsyms = symtabSection->getNumSymbols(); 285 c->stroff = stringTableSection->fileOff; 286 c->strsize = stringTableSection->getFileSize(); 287 } 288 289 SymtabSection *symtabSection = nullptr; 290 StringTableSection *stringTableSection = nullptr; 291 }; 292 293 // There are several dylib load commands that share the same structure: 294 // * LC_LOAD_DYLIB 295 // * LC_ID_DYLIB 296 // * LC_REEXPORT_DYLIB 297 class LCDylib final : public LoadCommand { 298 public: 299 LCDylib(LoadCommandType type, StringRef path, 300 uint32_t compatibilityVersion = 0, uint32_t currentVersion = 0) 301 : type(type), path(path), compatibilityVersion(compatibilityVersion), 302 currentVersion(currentVersion) { 303 instanceCount++; 304 } 305 306 uint32_t getSize() const override { 307 return alignTo(sizeof(dylib_command) + path.size() + 1, 8); 308 } 309 310 void writeTo(uint8_t *buf) const override { 311 auto *c = reinterpret_cast<dylib_command *>(buf); 312 buf += sizeof(dylib_command); 313 314 c->cmd = type; 315 c->cmdsize = getSize(); 316 c->dylib.name = sizeof(dylib_command); 317 c->dylib.timestamp = 0; 318 c->dylib.compatibility_version = compatibilityVersion; 319 c->dylib.current_version = currentVersion; 320 321 memcpy(buf, path.data(), path.size()); 322 buf[path.size()] = '\0'; 323 } 324 325 static uint32_t getInstanceCount() { return instanceCount; } 326 327 private: 328 LoadCommandType type; 329 StringRef path; 330 uint32_t compatibilityVersion; 331 uint32_t currentVersion; 332 static uint32_t instanceCount; 333 }; 334 335 uint32_t LCDylib::instanceCount = 0; 336 337 class LCLoadDylinker final : public LoadCommand { 338 public: 339 uint32_t getSize() const override { 340 return alignTo(sizeof(dylinker_command) + path.size() + 1, 8); 341 } 342 343 void writeTo(uint8_t *buf) const override { 344 auto *c = reinterpret_cast<dylinker_command *>(buf); 345 buf += sizeof(dylinker_command); 346 347 c->cmd = LC_LOAD_DYLINKER; 348 c->cmdsize = getSize(); 349 c->name = sizeof(dylinker_command); 350 351 memcpy(buf, path.data(), path.size()); 352 buf[path.size()] = '\0'; 353 } 354 355 private: 356 // Recent versions of Darwin won't run any binary that has dyld at a 357 // different location. 358 const StringRef path = "/usr/lib/dyld"; 359 }; 360 361 class LCRPath final : public LoadCommand { 362 public: 363 explicit LCRPath(StringRef path) : path(path) {} 364 365 uint32_t getSize() const override { 366 return alignTo(sizeof(rpath_command) + path.size() + 1, target->wordSize); 367 } 368 369 void writeTo(uint8_t *buf) const override { 370 auto *c = reinterpret_cast<rpath_command *>(buf); 371 buf += sizeof(rpath_command); 372 373 c->cmd = LC_RPATH; 374 c->cmdsize = getSize(); 375 c->path = sizeof(rpath_command); 376 377 memcpy(buf, path.data(), path.size()); 378 buf[path.size()] = '\0'; 379 } 380 381 private: 382 StringRef path; 383 }; 384 385 class LCMinVersion final : public LoadCommand { 386 public: 387 explicit LCMinVersion(const PlatformInfo &platformInfo) 388 : platformInfo(platformInfo) {} 389 390 uint32_t getSize() const override { return sizeof(version_min_command); } 391 392 void writeTo(uint8_t *buf) const override { 393 auto *c = reinterpret_cast<version_min_command *>(buf); 394 switch (platformInfo.target.Platform) { 395 case PlatformKind::macOS: 396 c->cmd = LC_VERSION_MIN_MACOSX; 397 break; 398 case PlatformKind::iOS: 399 case PlatformKind::iOSSimulator: 400 c->cmd = LC_VERSION_MIN_IPHONEOS; 401 break; 402 case PlatformKind::tvOS: 403 case PlatformKind::tvOSSimulator: 404 c->cmd = LC_VERSION_MIN_TVOS; 405 break; 406 case PlatformKind::watchOS: 407 case PlatformKind::watchOSSimulator: 408 c->cmd = LC_VERSION_MIN_WATCHOS; 409 break; 410 default: 411 llvm_unreachable("invalid platform"); 412 break; 413 } 414 c->cmdsize = getSize(); 415 c->version = encodeVersion(platformInfo.minimum); 416 c->sdk = encodeVersion(platformInfo.sdk); 417 } 418 419 private: 420 const PlatformInfo &platformInfo; 421 }; 422 423 class LCBuildVersion final : public LoadCommand { 424 public: 425 explicit LCBuildVersion(const PlatformInfo &platformInfo) 426 : platformInfo(platformInfo) {} 427 428 const int ntools = 1; 429 430 uint32_t getSize() const override { 431 return sizeof(build_version_command) + ntools * sizeof(build_tool_version); 432 } 433 434 void writeTo(uint8_t *buf) const override { 435 auto *c = reinterpret_cast<build_version_command *>(buf); 436 c->cmd = LC_BUILD_VERSION; 437 c->cmdsize = getSize(); 438 c->platform = static_cast<uint32_t>(platformInfo.target.Platform); 439 c->minos = encodeVersion(platformInfo.minimum); 440 c->sdk = encodeVersion(platformInfo.sdk); 441 c->ntools = ntools; 442 auto *t = reinterpret_cast<build_tool_version *>(&c[1]); 443 t->tool = TOOL_LD; 444 t->version = encodeVersion(llvm::VersionTuple( 445 LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERSION_PATCH)); 446 } 447 448 private: 449 const PlatformInfo &platformInfo; 450 }; 451 452 // Stores a unique identifier for the output file based on an MD5 hash of its 453 // contents. In order to hash the contents, we must first write them, but 454 // LC_UUID itself must be part of the written contents in order for all the 455 // offsets to be calculated correctly. We resolve this circular paradox by 456 // first writing an LC_UUID with an all-zero UUID, then updating the UUID with 457 // its real value later. 458 class LCUuid final : public LoadCommand { 459 public: 460 uint32_t getSize() const override { return sizeof(uuid_command); } 461 462 void writeTo(uint8_t *buf) const override { 463 auto *c = reinterpret_cast<uuid_command *>(buf); 464 c->cmd = LC_UUID; 465 c->cmdsize = getSize(); 466 uuidBuf = c->uuid; 467 } 468 469 void writeUuid(uint64_t digest) const { 470 // xxhash only gives us 8 bytes, so put some fixed data in the other half. 471 static_assert(sizeof(uuid_command::uuid) == 16, "unexpected uuid size"); 472 memcpy(uuidBuf, "LLD\xa1UU1D", 8); 473 memcpy(uuidBuf + 8, &digest, 8); 474 475 // RFC 4122 conformance. We need to fix 4 bits in byte 6 and 2 bits in 476 // byte 8. Byte 6 is already fine due to the fixed data we put in. We don't 477 // want to lose bits of the digest in byte 8, so swap that with a byte of 478 // fixed data that happens to have the right bits set. 479 std::swap(uuidBuf[3], uuidBuf[8]); 480 481 // Claim that this is an MD5-based hash. It isn't, but this signals that 482 // this is not a time-based and not a random hash. MD5 seems like the least 483 // bad lie we can put here. 484 assert((uuidBuf[6] & 0xf0) == 0x30 && "See RFC 4122 Sections 4.2.2, 4.1.3"); 485 assert((uuidBuf[8] & 0xc0) == 0x80 && "See RFC 4122 Section 4.2.2"); 486 } 487 488 mutable uint8_t *uuidBuf; 489 }; 490 491 template <class LP> class LCEncryptionInfo final : public LoadCommand { 492 public: 493 uint32_t getSize() const override { 494 return sizeof(typename LP::encryption_info_command); 495 } 496 497 void writeTo(uint8_t *buf) const override { 498 using EncryptionInfo = typename LP::encryption_info_command; 499 auto *c = reinterpret_cast<EncryptionInfo *>(buf); 500 buf += sizeof(EncryptionInfo); 501 c->cmd = LP::encryptionInfoLCType; 502 c->cmdsize = getSize(); 503 c->cryptoff = in.header->getSize(); 504 auto it = find_if(outputSegments, [](const OutputSegment *seg) { 505 return seg->name == segment_names::text; 506 }); 507 assert(it != outputSegments.end()); 508 c->cryptsize = (*it)->fileSize - c->cryptoff; 509 } 510 }; 511 512 class LCCodeSignature final : public LoadCommand { 513 public: 514 LCCodeSignature(CodeSignatureSection *section) : section(section) {} 515 516 uint32_t getSize() const override { return sizeof(linkedit_data_command); } 517 518 void writeTo(uint8_t *buf) const override { 519 auto *c = reinterpret_cast<linkedit_data_command *>(buf); 520 c->cmd = LC_CODE_SIGNATURE; 521 c->cmdsize = getSize(); 522 c->dataoff = static_cast<uint32_t>(section->fileOff); 523 c->datasize = section->getSize(); 524 } 525 526 CodeSignatureSection *section; 527 }; 528 529 } // namespace 530 531 // Add stubs and bindings where necessary (e.g. if the symbol is a 532 // DylibSymbol.) 533 static void prepareBranchTarget(Symbol *sym) { 534 if (auto *dysym = dyn_cast<DylibSymbol>(sym)) { 535 if (in.stubs->addEntry(dysym)) { 536 if (sym->isWeakDef()) { 537 in.binding->addEntry(dysym, in.lazyPointers->isec, 538 sym->stubsIndex * target->wordSize); 539 in.weakBinding->addEntry(sym, in.lazyPointers->isec, 540 sym->stubsIndex * target->wordSize); 541 } else { 542 in.lazyBinding->addEntry(dysym); 543 } 544 } 545 } else if (auto *defined = dyn_cast<Defined>(sym)) { 546 if (defined->isExternalWeakDef()) { 547 if (in.stubs->addEntry(sym)) { 548 in.rebase->addEntry(in.lazyPointers->isec, 549 sym->stubsIndex * target->wordSize); 550 in.weakBinding->addEntry(sym, in.lazyPointers->isec, 551 sym->stubsIndex * target->wordSize); 552 } 553 } 554 } else { 555 llvm_unreachable("invalid branch target symbol type"); 556 } 557 } 558 559 // Can a symbol's address can only be resolved at runtime? 560 static bool needsBinding(const Symbol *sym) { 561 if (isa<DylibSymbol>(sym)) 562 return true; 563 if (const auto *defined = dyn_cast<Defined>(sym)) 564 return defined->isExternalWeakDef(); 565 return false; 566 } 567 568 static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec, 569 const Reloc &r) { 570 const RelocAttrs &relocAttrs = target->getRelocAttrs(r.type); 571 572 if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) { 573 prepareBranchTarget(sym); 574 } else if (relocAttrs.hasAttr(RelocAttrBits::GOT)) { 575 if (relocAttrs.hasAttr(RelocAttrBits::POINTER) || needsBinding(sym)) 576 in.got->addEntry(sym); 577 } else if (relocAttrs.hasAttr(RelocAttrBits::TLV)) { 578 if (needsBinding(sym)) 579 in.tlvPointers->addEntry(sym); 580 } else if (relocAttrs.hasAttr(RelocAttrBits::UNSIGNED)) { 581 // References from thread-local variable sections are treated as offsets 582 // relative to the start of the referent section, and therefore have no 583 // need of rebase opcodes. 584 if (!(isThreadLocalVariables(isec->flags) && isa<Defined>(sym))) 585 addNonLazyBindingEntries(sym, isec, r.offset, r.addend); 586 } 587 } 588 589 void Writer::scanRelocations() { 590 TimeTraceScope timeScope("Scan relocations"); 591 for (InputSection *isec : inputSections) { 592 if (!isa<ConcatInputSection>(isec)) 593 continue; 594 auto concatIsec = cast<ConcatInputSection>(isec); 595 596 if (concatIsec->shouldOmitFromOutput()) 597 continue; 598 599 if (concatIsec->segname == segment_names::ld) { 600 in.unwindInfo->prepareRelocations(concatIsec); 601 continue; 602 } 603 604 for (auto it = isec->relocs.begin(); it != isec->relocs.end(); ++it) { 605 Reloc &r = *it; 606 if (target->hasAttr(r.type, RelocAttrBits::SUBTRAHEND)) { 607 // Skip over the following UNSIGNED relocation -- it's just there as the 608 // minuend, and doesn't have the usual UNSIGNED semantics. We don't want 609 // to emit rebase opcodes for it. 610 it++; 611 continue; 612 } 613 if (auto *sym = r.referent.dyn_cast<Symbol *>()) { 614 if (auto *undefined = dyn_cast<Undefined>(sym)) 615 treatUndefinedSymbol(*undefined); 616 // treatUndefinedSymbol() can replace sym with a DylibSymbol; re-check. 617 if (!isa<Undefined>(sym) && validateSymbolRelocation(sym, isec, r)) 618 prepareSymbolRelocation(sym, isec, r); 619 } else { 620 assert(r.referent.is<InputSection *>()); 621 if (!r.pcrel) 622 in.rebase->addEntry(isec, r.offset); 623 } 624 } 625 } 626 } 627 628 void Writer::scanSymbols() { 629 TimeTraceScope timeScope("Scan symbols"); 630 for (const Symbol *sym : symtab->getSymbols()) { 631 if (const auto *defined = dyn_cast<Defined>(sym)) { 632 if (defined->overridesWeakDef && defined->isLive()) 633 in.weakBinding->addNonWeakDefinition(defined); 634 } else if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) { 635 // This branch intentionally doesn't check isLive(). 636 if (dysym->isDynamicLookup()) 637 continue; 638 dysym->getFile()->refState = 639 std::max(dysym->getFile()->refState, dysym->getRefState()); 640 } 641 } 642 } 643 644 // TODO: ld64 enforces the old load commands in a few other cases. 645 static bool useLCBuildVersion(const PlatformInfo &platformInfo) { 646 static const std::map<PlatformKind, llvm::VersionTuple> minVersion = { 647 {PlatformKind::macOS, llvm::VersionTuple(10, 14)}, 648 {PlatformKind::iOS, llvm::VersionTuple(12, 0)}, 649 {PlatformKind::iOSSimulator, llvm::VersionTuple(13, 0)}, 650 {PlatformKind::tvOS, llvm::VersionTuple(12, 0)}, 651 {PlatformKind::tvOSSimulator, llvm::VersionTuple(13, 0)}, 652 {PlatformKind::watchOS, llvm::VersionTuple(5, 0)}, 653 {PlatformKind::watchOSSimulator, llvm::VersionTuple(6, 0)}}; 654 auto it = minVersion.find(platformInfo.target.Platform); 655 return it == minVersion.end() ? true : platformInfo.minimum >= it->second; 656 } 657 658 template <class LP> void Writer::createLoadCommands() { 659 uint8_t segIndex = 0; 660 for (OutputSegment *seg : outputSegments) { 661 in.header->addLoadCommand(make<LCSegment<LP>>(seg->name, seg)); 662 seg->index = segIndex++; 663 } 664 665 in.header->addLoadCommand(make<LCDyldInfo>( 666 in.rebase, in.binding, in.weakBinding, in.lazyBinding, in.exports)); 667 in.header->addLoadCommand(make<LCSymtab>(symtabSection, stringTableSection)); 668 in.header->addLoadCommand( 669 make<LCDysymtab>(symtabSection, indirectSymtabSection)); 670 if (functionStartsSection) 671 in.header->addLoadCommand(make<LCFunctionStarts>(functionStartsSection)); 672 if (dataInCodeSection) 673 in.header->addLoadCommand(make<LCDataInCode>(dataInCodeSection)); 674 if (config->emitEncryptionInfo) 675 in.header->addLoadCommand(make<LCEncryptionInfo<LP>>()); 676 for (StringRef path : config->runtimePaths) 677 in.header->addLoadCommand(make<LCRPath>(path)); 678 679 switch (config->outputType) { 680 case MH_EXECUTE: 681 in.header->addLoadCommand(make<LCLoadDylinker>()); 682 in.header->addLoadCommand(make<LCMain>()); 683 break; 684 case MH_DYLIB: 685 in.header->addLoadCommand(make<LCDylib>(LC_ID_DYLIB, config->installName, 686 config->dylibCompatibilityVersion, 687 config->dylibCurrentVersion)); 688 break; 689 case MH_BUNDLE: 690 break; 691 default: 692 llvm_unreachable("unhandled output file type"); 693 } 694 695 uuidCommand = make<LCUuid>(); 696 in.header->addLoadCommand(uuidCommand); 697 698 if (useLCBuildVersion(config->platformInfo)) 699 in.header->addLoadCommand(make<LCBuildVersion>(config->platformInfo)); 700 else 701 in.header->addLoadCommand(make<LCMinVersion>(config->platformInfo)); 702 703 int64_t dylibOrdinal = 1; 704 DenseMap<StringRef, int64_t> ordinalForInstallName; 705 for (InputFile *file : inputFiles) { 706 if (auto *dylibFile = dyn_cast<DylibFile>(file)) { 707 if (dylibFile->isBundleLoader) { 708 dylibFile->ordinal = BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE; 709 // Shortcut since bundle-loader does not re-export the symbols. 710 711 dylibFile->reexport = false; 712 continue; 713 } 714 715 // Don't emit load commands for a dylib that is not referenced if: 716 // - it was added implicitly (via a reexport, an LC_LOAD_DYLINKER -- 717 // if it's on the linker command line, it's explicit) 718 // - or it's marked MH_DEAD_STRIPPABLE_DYLIB 719 // - or the flag -dead_strip_dylibs is used 720 // FIXME: `isReferenced()` is currently computed before dead code 721 // stripping, so references from dead code keep a dylib alive. This 722 // matches ld64, but it's something we should do better. 723 if (!dylibFile->isReferenced() && !dylibFile->forceNeeded && 724 (!dylibFile->explicitlyLinked || dylibFile->deadStrippable || 725 config->deadStripDylibs)) 726 continue; 727 728 // Several DylibFiles can have the same installName. Only emit a single 729 // load command for that installName and give all these DylibFiles the 730 // same ordinal. 731 // This can happen in several cases: 732 // - a new framework could change its installName to an older 733 // framework name via an $ld$ symbol depending on platform_version 734 // - symlinks (for example, libpthread.tbd is a symlink to libSystem.tbd; 735 // Foo.framework/Foo.tbd is usually a symlink to 736 // Foo.framework/Versions/Current/Foo.tbd, where 737 // Foo.framework/Versions/Current is usually a symlink to 738 // Foo.framework/Versions/A) 739 // - a framework can be linked both explicitly on the linker 740 // command line and implicitly as a reexport from a different 741 // framework. The re-export will usually point to the tbd file 742 // in Foo.framework/Versions/A/Foo.tbd, while the explicit link will 743 // usually find Foo.framework/Foo.tbd. These are usually symlinks, 744 // but in a --reproduce archive they will be identical but distinct 745 // files. 746 // In the first case, *semantically distinct* DylibFiles will have the 747 // same installName. 748 int64_t &ordinal = ordinalForInstallName[dylibFile->installName]; 749 if (ordinal) { 750 dylibFile->ordinal = ordinal; 751 continue; 752 } 753 754 ordinal = dylibFile->ordinal = dylibOrdinal++; 755 LoadCommandType lcType = 756 dylibFile->forceWeakImport || dylibFile->refState == RefState::Weak 757 ? LC_LOAD_WEAK_DYLIB 758 : LC_LOAD_DYLIB; 759 in.header->addLoadCommand(make<LCDylib>(lcType, dylibFile->installName, 760 dylibFile->compatibilityVersion, 761 dylibFile->currentVersion)); 762 763 if (dylibFile->reexport) 764 in.header->addLoadCommand( 765 make<LCDylib>(LC_REEXPORT_DYLIB, dylibFile->installName)); 766 } 767 } 768 769 if (codeSignatureSection) 770 in.header->addLoadCommand(make<LCCodeSignature>(codeSignatureSection)); 771 772 const uint32_t MACOS_MAXPATHLEN = 1024; 773 config->headerPad = std::max( 774 config->headerPad, (config->headerPadMaxInstallNames 775 ? LCDylib::getInstanceCount() * MACOS_MAXPATHLEN 776 : 0)); 777 } 778 779 static size_t getSymbolPriority(const SymbolPriorityEntry &entry, 780 const InputFile *f) { 781 // We don't use toString(InputFile *) here because it returns the full path 782 // for object files, and we only want the basename. 783 StringRef filename; 784 if (f->archiveName.empty()) 785 filename = path::filename(f->getName()); 786 else 787 filename = saver.save(path::filename(f->archiveName) + "(" + 788 path::filename(f->getName()) + ")"); 789 return std::max(entry.objectFiles.lookup(filename), entry.anyObjectFile); 790 } 791 792 // Each section gets assigned the priority of the highest-priority symbol it 793 // contains. 794 static DenseMap<const InputSection *, size_t> buildInputSectionPriorities() { 795 DenseMap<const InputSection *, size_t> sectionPriorities; 796 797 if (config->priorities.empty()) 798 return sectionPriorities; 799 800 auto addSym = [&](Defined &sym) { 801 auto it = config->priorities.find(sym.getName()); 802 if (it == config->priorities.end()) 803 return; 804 805 SymbolPriorityEntry &entry = it->second; 806 size_t &priority = sectionPriorities[sym.isec]; 807 priority = std::max(priority, getSymbolPriority(entry, sym.isec->file)); 808 }; 809 810 // TODO: Make sure this handles weak symbols correctly. 811 for (const InputFile *file : inputFiles) { 812 if (isa<ObjFile>(file)) 813 for (Symbol *sym : file->symbols) 814 if (auto *d = dyn_cast_or_null<Defined>(sym)) 815 addSym(*d); 816 } 817 818 return sectionPriorities; 819 } 820 821 // Sorting only can happen once all outputs have been collected. Here we sort 822 // segments, output sections within each segment, and input sections within each 823 // output segment. 824 static void sortSegmentsAndSections() { 825 TimeTraceScope timeScope("Sort segments and sections"); 826 sortOutputSegments(); 827 828 DenseMap<const InputSection *, size_t> isecPriorities = 829 buildInputSectionPriorities(); 830 831 uint32_t sectionIndex = 0; 832 for (OutputSegment *seg : outputSegments) { 833 seg->sortOutputSections(); 834 for (OutputSection *osec : seg->getSections()) { 835 // Now that the output sections are sorted, assign the final 836 // output section indices. 837 if (!osec->isHidden()) 838 osec->index = ++sectionIndex; 839 if (!firstTLVDataSection && isThreadLocalData(osec->flags)) 840 firstTLVDataSection = osec; 841 842 if (!isecPriorities.empty()) { 843 if (auto *merged = dyn_cast<ConcatOutputSection>(osec)) { 844 llvm::stable_sort(merged->inputs, 845 [&](InputSection *a, InputSection *b) { 846 return isecPriorities[a] > isecPriorities[b]; 847 }); 848 } 849 } 850 } 851 } 852 } 853 854 static NamePair maybeRenameSection(NamePair key) { 855 auto newNames = config->sectionRenameMap.find(key); 856 if (newNames != config->sectionRenameMap.end()) 857 return newNames->second; 858 auto newName = config->segmentRenameMap.find(key.first); 859 if (newName != config->segmentRenameMap.end()) 860 return std::make_pair(newName->second, key.second); 861 return key; 862 } 863 864 template <class LP> void Writer::createOutputSections() { 865 TimeTraceScope timeScope("Create output sections"); 866 // First, create hidden sections 867 stringTableSection = make<StringTableSection>(); 868 symtabSection = makeSymtabSection<LP>(*stringTableSection); 869 indirectSymtabSection = make<IndirectSymtabSection>(); 870 if (config->adhocCodesign) 871 codeSignatureSection = make<CodeSignatureSection>(); 872 if (config->emitDataInCodeInfo) 873 dataInCodeSection = make<DataInCodeSection>(); 874 if (config->emitFunctionStarts) 875 functionStartsSection = make<FunctionStartsSection>(); 876 if (config->emitBitcodeBundle) 877 make<BitcodeBundleSection>(); 878 879 switch (config->outputType) { 880 case MH_EXECUTE: 881 make<PageZeroSection>(); 882 break; 883 case MH_DYLIB: 884 case MH_BUNDLE: 885 break; 886 default: 887 llvm_unreachable("unhandled output file type"); 888 } 889 890 // Then add input sections to output sections. 891 for (const auto &p : enumerate(inputSections)) { 892 InputSection *isec = p.value(); 893 OutputSection *osec; 894 if (auto *concatIsec = dyn_cast<ConcatInputSection>(isec)) { 895 if (concatIsec->shouldOmitFromOutput()) 896 continue; 897 NamePair names = maybeRenameSection({isec->segname, isec->name}); 898 ConcatOutputSection *&concatOsec = concatOutputSections[names]; 899 if (concatOsec == nullptr) 900 concatOsec = make<ConcatOutputSection>(names.second); 901 concatOsec->addInput(concatIsec); 902 osec = concatOsec; 903 } else if (auto *cStringIsec = dyn_cast<CStringInputSection>(isec)) { 904 in.cStringSection->addInput(cStringIsec); 905 osec = in.cStringSection; 906 } else if (auto *litIsec = dyn_cast<WordLiteralInputSection>(isec)) { 907 in.wordLiteralSection->addInput(litIsec); 908 osec = in.wordLiteralSection; 909 } else { 910 llvm_unreachable("unhandled InputSection type"); 911 } 912 osec->inputOrder = std::min(osec->inputOrder, static_cast<int>(p.index())); 913 } 914 915 // Once all the inputs are added, we can finalize the output section 916 // properties and create the corresponding output segments. 917 for (const auto &it : concatOutputSections) { 918 StringRef segname = it.first.first; 919 ConcatOutputSection *osec = it.second; 920 if (segname == segment_names::ld) { 921 assert(osec->name == section_names::compactUnwind); 922 in.unwindInfo->setCompactUnwindSection(osec); 923 } else { 924 getOrCreateOutputSegment(segname)->addOutputSection(osec); 925 } 926 } 927 928 for (SyntheticSection *ssec : syntheticSections) { 929 auto it = concatOutputSections.find({ssec->segname, ssec->name}); 930 if (ssec->isNeeded()) { 931 if (it == concatOutputSections.end()) { 932 getOrCreateOutputSegment(ssec->segname)->addOutputSection(ssec); 933 } else { 934 fatal("section from " + toString(it->second->firstSection()->file) + 935 " conflicts with synthetic section " + ssec->segname + "," + 936 ssec->name); 937 } 938 } 939 } 940 941 // dyld requires __LINKEDIT segment to always exist (even if empty). 942 linkEditSegment = getOrCreateOutputSegment(segment_names::linkEdit); 943 } 944 945 void Writer::foldIdenticalSections() { 946 if (config->icfLevel == ICFLevel::none) 947 return; 948 ConcatOutputSection *textOutputSection = concatOutputSections.lookup( 949 maybeRenameSection({segment_names::text, section_names::text})); 950 if (textOutputSection == nullptr) 951 return; 952 953 TimeTraceScope timeScope("Fold Identical Code Sections"); 954 // The ICF equivalence-class segregation algorithm relies on pre-computed 955 // hashes of InputSection::data for the ConcatOutputSection::inputs and all 956 // sections referenced by their relocs. We could recursively traverse the 957 // relocs to find every referenced InputSection, but that precludes easy 958 // parallelization. Therefore, we hash every InputSection here where we have 959 // them all accessible as a simple vector. 960 std::vector<ConcatInputSection *> hashable; 961 // If an InputSection is ineligible for ICF, we give it a unique ID to force 962 // it into an unfoldable singleton equivalence class. Begin the unique-ID 963 // space at inputSections.size(), so that it will never intersect with 964 // equivalence-class IDs which begin at 0. Since hashes & unique IDs never 965 // coexist with equivalence-class IDs, this is not necessary, but might help 966 // someone keep the numbers straight in case we ever need to debug the 967 // ICF::segregate() 968 uint64_t icfUniqueID = inputSections.size(); 969 for (InputSection *isec : inputSections) { 970 if (auto *concatIsec = dyn_cast<ConcatInputSection>(isec)) { 971 if (concatIsec->isHashableForICF(isec->parent == textOutputSection)) 972 hashable.push_back(concatIsec); 973 else 974 concatIsec->icfEqClass[0] = ++icfUniqueID; 975 } 976 // FIXME: hash literal sections here? 977 } 978 parallelForEach(hashable, 979 [](ConcatInputSection *isec) { isec->hashForICF(); }); 980 // Now that every input section is either hashed or marked as unique, 981 // run the segregation algorithm to detect foldable subsections 982 ICF(textOutputSection->inputs).run(); 983 size_t oldSize = textOutputSection->inputs.size(); 984 textOutputSection->eraseOmittedInputSections(); 985 size_t newSize = textOutputSection->inputs.size(); 986 log("ICF kept " + Twine(newSize) + " removed " + Twine(oldSize - newSize) + 987 " of " + Twine(oldSize)); 988 } 989 990 void Writer::finalizeAddresses() { 991 TimeTraceScope timeScope("Finalize addresses"); 992 uint64_t pageSize = target->getPageSize(); 993 // Ensure that segments (and the sections they contain) are allocated 994 // addresses in ascending order, which dyld requires. 995 // 996 // Note that at this point, __LINKEDIT sections are empty, but we need to 997 // determine addresses of other segments/sections before generating its 998 // contents. 999 for (OutputSegment *seg : outputSegments) { 1000 if (seg == linkEditSegment) 1001 continue; 1002 assignAddresses(seg); 1003 // codesign / libstuff checks for segment ordering by verifying that 1004 // `fileOff + fileSize == next segment fileOff`. So we call alignTo() before 1005 // (instead of after) computing fileSize to ensure that the segments are 1006 // contiguous. We handle addr / vmSize similarly for the same reason. 1007 fileOff = alignTo(fileOff, pageSize); 1008 addr = alignTo(addr, pageSize); 1009 seg->vmSize = addr - seg->firstSection()->addr; 1010 seg->fileSize = fileOff - seg->fileOff; 1011 } 1012 } 1013 1014 void Writer::finalizeLinkEditSegment() { 1015 TimeTraceScope timeScope("Finalize __LINKEDIT segment"); 1016 // Fill __LINKEDIT contents. 1017 std::vector<LinkEditSection *> linkEditSections{ 1018 in.rebase, 1019 in.binding, 1020 in.weakBinding, 1021 in.lazyBinding, 1022 in.exports, 1023 symtabSection, 1024 indirectSymtabSection, 1025 dataInCodeSection, 1026 functionStartsSection, 1027 }; 1028 parallelForEach(linkEditSections, [](LinkEditSection *osec) { 1029 if (osec) 1030 osec->finalizeContents(); 1031 }); 1032 1033 // Now that __LINKEDIT is filled out, do a proper calculation of its 1034 // addresses and offsets. 1035 assignAddresses(linkEditSegment); 1036 // No need to page-align fileOff / addr here since this is the last segment. 1037 linkEditSegment->vmSize = addr - linkEditSegment->firstSection()->addr; 1038 linkEditSegment->fileSize = fileOff - linkEditSegment->fileOff; 1039 } 1040 1041 void Writer::assignAddresses(OutputSegment *seg) { 1042 seg->fileOff = fileOff; 1043 1044 for (OutputSection *osec : seg->getSections()) { 1045 if (!osec->isNeeded()) 1046 continue; 1047 addr = alignTo(addr, osec->align); 1048 fileOff = alignTo(fileOff, osec->align); 1049 osec->addr = addr; 1050 osec->fileOff = isZeroFill(osec->flags) ? 0 : fileOff; 1051 osec->finalize(); 1052 1053 addr += osec->getSize(); 1054 fileOff += osec->getFileSize(); 1055 } 1056 } 1057 1058 void Writer::openFile() { 1059 Expected<std::unique_ptr<FileOutputBuffer>> bufferOrErr = 1060 FileOutputBuffer::create(config->outputFile, fileOff, 1061 FileOutputBuffer::F_executable); 1062 1063 if (!bufferOrErr) 1064 error("failed to open " + config->outputFile + ": " + 1065 llvm::toString(bufferOrErr.takeError())); 1066 else 1067 buffer = std::move(*bufferOrErr); 1068 } 1069 1070 void Writer::writeSections() { 1071 uint8_t *buf = buffer->getBufferStart(); 1072 for (const OutputSegment *seg : outputSegments) 1073 for (const OutputSection *osec : seg->getSections()) 1074 osec->writeTo(buf + osec->fileOff); 1075 } 1076 1077 // In order to utilize multiple cores, we first split the buffer into chunks, 1078 // compute a hash for each chunk, and then compute a hash value of the hash 1079 // values. 1080 void Writer::writeUuid() { 1081 TimeTraceScope timeScope("Computing UUID"); 1082 ArrayRef<uint8_t> data{buffer->getBufferStart(), buffer->getBufferEnd()}; 1083 unsigned chunkCount = parallel::strategy.compute_thread_count() * 10; 1084 // Round-up integer division 1085 size_t chunkSize = (data.size() + chunkCount - 1) / chunkCount; 1086 std::vector<ArrayRef<uint8_t>> chunks = split(data, chunkSize); 1087 std::vector<uint64_t> hashes(chunks.size()); 1088 parallelForEachN(0, chunks.size(), 1089 [&](size_t i) { hashes[i] = xxHash64(chunks[i]); }); 1090 uint64_t digest = xxHash64({reinterpret_cast<uint8_t *>(hashes.data()), 1091 hashes.size() * sizeof(uint64_t)}); 1092 uuidCommand->writeUuid(digest); 1093 } 1094 1095 void Writer::writeCodeSignature() { 1096 if (codeSignatureSection) 1097 codeSignatureSection->writeHashes(buffer->getBufferStart()); 1098 } 1099 1100 void Writer::writeOutputFile() { 1101 TimeTraceScope timeScope("Write output file"); 1102 openFile(); 1103 if (errorCount()) 1104 return; 1105 writeSections(); 1106 writeUuid(); 1107 writeCodeSignature(); 1108 1109 if (auto e = buffer->commit()) 1110 error("failed to write to the output file: " + toString(std::move(e))); 1111 } 1112 1113 template <class LP> void Writer::run() { 1114 if (config->entry && !isa<Undefined>(config->entry)) 1115 prepareBranchTarget(config->entry); 1116 scanRelocations(); 1117 if (in.stubHelper->isNeeded()) 1118 in.stubHelper->setup(); 1119 scanSymbols(); 1120 createOutputSections<LP>(); 1121 foldIdenticalSections(); 1122 // After this point, we create no new segments; HOWEVER, we might 1123 // yet create branch-range extension thunks for architectures whose 1124 // hardware call instructions have limited range, e.g., ARM(64). 1125 // The thunks are created as InputSections interspersed among 1126 // the ordinary __TEXT,_text InputSections. 1127 sortSegmentsAndSections(); 1128 createLoadCommands<LP>(); 1129 finalizeAddresses(); 1130 finalizeLinkEditSegment(); 1131 writeMapFile(); 1132 writeOutputFile(); 1133 } 1134 1135 template <class LP> void macho::writeResult() { Writer().run<LP>(); } 1136 1137 void macho::createSyntheticSections() { 1138 in.header = make<MachHeaderSection>(); 1139 in.cStringSection = config->dedupLiterals ? make<CStringSection>() : nullptr; 1140 in.wordLiteralSection = 1141 config->dedupLiterals ? make<WordLiteralSection>() : nullptr; 1142 in.rebase = make<RebaseSection>(); 1143 in.binding = make<BindingSection>(); 1144 in.weakBinding = make<WeakBindingSection>(); 1145 in.lazyBinding = make<LazyBindingSection>(); 1146 in.exports = make<ExportSection>(); 1147 in.got = make<GotSection>(); 1148 in.tlvPointers = make<TlvPointerSection>(); 1149 in.lazyPointers = make<LazyPointerSection>(); 1150 in.stubs = make<StubsSection>(); 1151 in.stubHelper = make<StubHelperSection>(); 1152 in.unwindInfo = makeUnwindInfoSection(); 1153 1154 // This section contains space for just a single word, and will be used by 1155 // dyld to cache an address to the image loader it uses. 1156 uint8_t *arr = bAlloc.Allocate<uint8_t>(target->wordSize); 1157 memset(arr, 0, target->wordSize); 1158 in.imageLoaderCache = make<ConcatInputSection>( 1159 segment_names::data, section_names::data, /*file=*/nullptr, 1160 ArrayRef<uint8_t>{arr, target->wordSize}, 1161 /*align=*/target->wordSize, /*flags=*/S_REGULAR); 1162 // References from dyld are not visible to us, so ensure this section is 1163 // always treated as live. 1164 in.imageLoaderCache->live = true; 1165 } 1166 1167 OutputSection *macho::firstTLVDataSection = nullptr; 1168 1169 template void macho::writeResult<LP64>(); 1170 template void macho::writeResult<ILP32>(); 1171