1 //===- Writer.cpp ---------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "Writer.h" 10 #include "ConcatOutputSection.h" 11 #include "Config.h" 12 #include "InputFiles.h" 13 #include "InputSection.h" 14 #include "MapFile.h" 15 #include "OutputSection.h" 16 #include "OutputSegment.h" 17 #include "SymbolTable.h" 18 #include "Symbols.h" 19 #include "SyntheticSections.h" 20 #include "Target.h" 21 #include "UnwindInfoSection.h" 22 23 #include "lld/Common/Arrays.h" 24 #include "lld/Common/ErrorHandler.h" 25 #include "lld/Common/Memory.h" 26 #include "llvm/BinaryFormat/MachO.h" 27 #include "llvm/Config/llvm-config.h" 28 #include "llvm/Support/LEB128.h" 29 #include "llvm/Support/MathExtras.h" 30 #include "llvm/Support/Parallel.h" 31 #include "llvm/Support/Path.h" 32 #include "llvm/Support/TimeProfiler.h" 33 #include "llvm/Support/xxhash.h" 34 35 #include <algorithm> 36 37 using namespace llvm; 38 using namespace llvm::MachO; 39 using namespace llvm::sys; 40 using namespace lld; 41 using namespace lld::macho; 42 43 namespace { 44 class LCUuid; 45 46 class Writer { 47 public: 48 Writer() : buffer(errorHandler().outputBuffer) {} 49 50 void scanRelocations(); 51 void scanSymbols(); 52 template <class LP> void createOutputSections(); 53 template <class LP> void createLoadCommands(); 54 void finalizeAddresses(); 55 void finalizeLinkEditSegment(); 56 void assignAddresses(OutputSegment *); 57 58 void openFile(); 59 void writeSections(); 60 void writeUuid(); 61 void writeCodeSignature(); 62 void writeOutputFile(); 63 64 template <class LP> void run(); 65 66 std::unique_ptr<FileOutputBuffer> &buffer; 67 uint64_t addr = 0; 68 uint64_t fileOff = 0; 69 MachHeaderSection *header = nullptr; 70 StringTableSection *stringTableSection = nullptr; 71 SymtabSection *symtabSection = nullptr; 72 IndirectSymtabSection *indirectSymtabSection = nullptr; 73 CodeSignatureSection *codeSignatureSection = nullptr; 74 DataInCodeSection *dataInCodeSection = nullptr; 75 FunctionStartsSection *functionStartsSection = nullptr; 76 77 LCUuid *uuidCommand = nullptr; 78 OutputSegment *linkEditSegment = nullptr; 79 80 // Output sections are added to output segments in iteration order 81 // of ConcatOutputSection, so must have deterministic iteration order. 82 MapVector<NamePair, ConcatOutputSection *> concatOutputSections; 83 }; 84 85 // LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information. 86 class LCDyldInfo final : public LoadCommand { 87 public: 88 LCDyldInfo(RebaseSection *rebaseSection, BindingSection *bindingSection, 89 WeakBindingSection *weakBindingSection, 90 LazyBindingSection *lazyBindingSection, 91 ExportSection *exportSection) 92 : rebaseSection(rebaseSection), bindingSection(bindingSection), 93 weakBindingSection(weakBindingSection), 94 lazyBindingSection(lazyBindingSection), exportSection(exportSection) {} 95 96 uint32_t getSize() const override { return sizeof(dyld_info_command); } 97 98 void writeTo(uint8_t *buf) const override { 99 auto *c = reinterpret_cast<dyld_info_command *>(buf); 100 c->cmd = LC_DYLD_INFO_ONLY; 101 c->cmdsize = getSize(); 102 if (rebaseSection->isNeeded()) { 103 c->rebase_off = rebaseSection->fileOff; 104 c->rebase_size = rebaseSection->getFileSize(); 105 } 106 if (bindingSection->isNeeded()) { 107 c->bind_off = bindingSection->fileOff; 108 c->bind_size = bindingSection->getFileSize(); 109 } 110 if (weakBindingSection->isNeeded()) { 111 c->weak_bind_off = weakBindingSection->fileOff; 112 c->weak_bind_size = weakBindingSection->getFileSize(); 113 } 114 if (lazyBindingSection->isNeeded()) { 115 c->lazy_bind_off = lazyBindingSection->fileOff; 116 c->lazy_bind_size = lazyBindingSection->getFileSize(); 117 } 118 if (exportSection->isNeeded()) { 119 c->export_off = exportSection->fileOff; 120 c->export_size = exportSection->getFileSize(); 121 } 122 } 123 124 RebaseSection *rebaseSection; 125 BindingSection *bindingSection; 126 WeakBindingSection *weakBindingSection; 127 LazyBindingSection *lazyBindingSection; 128 ExportSection *exportSection; 129 }; 130 131 class LCSubFramework final : public LoadCommand { 132 public: 133 LCSubFramework(StringRef umbrella) : umbrella(umbrella) {} 134 135 uint32_t getSize() const override { 136 return alignTo(sizeof(sub_framework_command) + umbrella.size() + 1, 137 target->wordSize); 138 } 139 140 void writeTo(uint8_t *buf) const override { 141 auto *c = reinterpret_cast<sub_framework_command *>(buf); 142 buf += sizeof(sub_framework_command); 143 144 c->cmd = LC_SUB_FRAMEWORK; 145 c->cmdsize = getSize(); 146 c->umbrella = sizeof(sub_framework_command); 147 148 memcpy(buf, umbrella.data(), umbrella.size()); 149 buf[umbrella.size()] = '\0'; 150 } 151 152 private: 153 const StringRef umbrella; 154 }; 155 156 class LCFunctionStarts final : public LoadCommand { 157 public: 158 explicit LCFunctionStarts(FunctionStartsSection *functionStartsSection) 159 : functionStartsSection(functionStartsSection) {} 160 161 uint32_t getSize() const override { return sizeof(linkedit_data_command); } 162 163 void writeTo(uint8_t *buf) const override { 164 auto *c = reinterpret_cast<linkedit_data_command *>(buf); 165 c->cmd = LC_FUNCTION_STARTS; 166 c->cmdsize = getSize(); 167 c->dataoff = functionStartsSection->fileOff; 168 c->datasize = functionStartsSection->getFileSize(); 169 } 170 171 private: 172 FunctionStartsSection *functionStartsSection; 173 }; 174 175 class LCDataInCode final : public LoadCommand { 176 public: 177 explicit LCDataInCode(DataInCodeSection *dataInCodeSection) 178 : dataInCodeSection(dataInCodeSection) {} 179 180 uint32_t getSize() const override { return sizeof(linkedit_data_command); } 181 182 void writeTo(uint8_t *buf) const override { 183 auto *c = reinterpret_cast<linkedit_data_command *>(buf); 184 c->cmd = LC_DATA_IN_CODE; 185 c->cmdsize = getSize(); 186 c->dataoff = dataInCodeSection->fileOff; 187 c->datasize = dataInCodeSection->getFileSize(); 188 } 189 190 private: 191 DataInCodeSection *dataInCodeSection; 192 }; 193 194 class LCDysymtab final : public LoadCommand { 195 public: 196 LCDysymtab(SymtabSection *symtabSection, 197 IndirectSymtabSection *indirectSymtabSection) 198 : symtabSection(symtabSection), 199 indirectSymtabSection(indirectSymtabSection) {} 200 201 uint32_t getSize() const override { return sizeof(dysymtab_command); } 202 203 void writeTo(uint8_t *buf) const override { 204 auto *c = reinterpret_cast<dysymtab_command *>(buf); 205 c->cmd = LC_DYSYMTAB; 206 c->cmdsize = getSize(); 207 208 c->ilocalsym = 0; 209 c->iextdefsym = c->nlocalsym = symtabSection->getNumLocalSymbols(); 210 c->nextdefsym = symtabSection->getNumExternalSymbols(); 211 c->iundefsym = c->iextdefsym + c->nextdefsym; 212 c->nundefsym = symtabSection->getNumUndefinedSymbols(); 213 214 c->indirectsymoff = indirectSymtabSection->fileOff; 215 c->nindirectsyms = indirectSymtabSection->getNumSymbols(); 216 } 217 218 SymtabSection *symtabSection; 219 IndirectSymtabSection *indirectSymtabSection; 220 }; 221 222 template <class LP> class LCSegment final : public LoadCommand { 223 public: 224 LCSegment(StringRef name, OutputSegment *seg) : name(name), seg(seg) {} 225 226 uint32_t getSize() const override { 227 return sizeof(typename LP::segment_command) + 228 seg->numNonHiddenSections() * sizeof(typename LP::section); 229 } 230 231 void writeTo(uint8_t *buf) const override { 232 using SegmentCommand = typename LP::segment_command; 233 using Section = typename LP::section; 234 235 auto *c = reinterpret_cast<SegmentCommand *>(buf); 236 buf += sizeof(SegmentCommand); 237 238 c->cmd = LP::segmentLCType; 239 c->cmdsize = getSize(); 240 memcpy(c->segname, name.data(), name.size()); 241 c->fileoff = seg->fileOff; 242 c->maxprot = seg->maxProt; 243 c->initprot = seg->initProt; 244 245 if (seg->getSections().empty()) 246 return; 247 248 c->vmaddr = seg->firstSection()->addr; 249 c->vmsize = seg->vmSize; 250 c->filesize = seg->fileSize; 251 c->nsects = seg->numNonHiddenSections(); 252 253 for (const OutputSection *osec : seg->getSections()) { 254 if (osec->isHidden()) 255 continue; 256 257 auto *sectHdr = reinterpret_cast<Section *>(buf); 258 buf += sizeof(Section); 259 260 memcpy(sectHdr->sectname, osec->name.data(), osec->name.size()); 261 memcpy(sectHdr->segname, name.data(), name.size()); 262 263 sectHdr->addr = osec->addr; 264 sectHdr->offset = osec->fileOff; 265 sectHdr->align = Log2_32(osec->align); 266 sectHdr->flags = osec->flags; 267 sectHdr->size = osec->getSize(); 268 sectHdr->reserved1 = osec->reserved1; 269 sectHdr->reserved2 = osec->reserved2; 270 } 271 } 272 273 private: 274 StringRef name; 275 OutputSegment *seg; 276 }; 277 278 class LCMain final : public LoadCommand { 279 uint32_t getSize() const override { 280 return sizeof(structs::entry_point_command); 281 } 282 283 void writeTo(uint8_t *buf) const override { 284 auto *c = reinterpret_cast<structs::entry_point_command *>(buf); 285 c->cmd = LC_MAIN; 286 c->cmdsize = getSize(); 287 288 if (config->entry->isInStubs()) 289 c->entryoff = 290 in.stubs->fileOff + config->entry->stubsIndex * target->stubSize; 291 else 292 c->entryoff = config->entry->getVA() - in.header->addr; 293 294 c->stacksize = 0; 295 } 296 }; 297 298 class LCSymtab final : public LoadCommand { 299 public: 300 LCSymtab(SymtabSection *symtabSection, StringTableSection *stringTableSection) 301 : symtabSection(symtabSection), stringTableSection(stringTableSection) {} 302 303 uint32_t getSize() const override { return sizeof(symtab_command); } 304 305 void writeTo(uint8_t *buf) const override { 306 auto *c = reinterpret_cast<symtab_command *>(buf); 307 c->cmd = LC_SYMTAB; 308 c->cmdsize = getSize(); 309 c->symoff = symtabSection->fileOff; 310 c->nsyms = symtabSection->getNumSymbols(); 311 c->stroff = stringTableSection->fileOff; 312 c->strsize = stringTableSection->getFileSize(); 313 } 314 315 SymtabSection *symtabSection = nullptr; 316 StringTableSection *stringTableSection = nullptr; 317 }; 318 319 // There are several dylib load commands that share the same structure: 320 // * LC_LOAD_DYLIB 321 // * LC_ID_DYLIB 322 // * LC_REEXPORT_DYLIB 323 class LCDylib final : public LoadCommand { 324 public: 325 LCDylib(LoadCommandType type, StringRef path, 326 uint32_t compatibilityVersion = 0, uint32_t currentVersion = 0) 327 : type(type), path(path), compatibilityVersion(compatibilityVersion), 328 currentVersion(currentVersion) { 329 instanceCount++; 330 } 331 332 uint32_t getSize() const override { 333 return alignTo(sizeof(dylib_command) + path.size() + 1, 8); 334 } 335 336 void writeTo(uint8_t *buf) const override { 337 auto *c = reinterpret_cast<dylib_command *>(buf); 338 buf += sizeof(dylib_command); 339 340 c->cmd = type; 341 c->cmdsize = getSize(); 342 c->dylib.name = sizeof(dylib_command); 343 c->dylib.timestamp = 0; 344 c->dylib.compatibility_version = compatibilityVersion; 345 c->dylib.current_version = currentVersion; 346 347 memcpy(buf, path.data(), path.size()); 348 buf[path.size()] = '\0'; 349 } 350 351 static uint32_t getInstanceCount() { return instanceCount; } 352 353 private: 354 LoadCommandType type; 355 StringRef path; 356 uint32_t compatibilityVersion; 357 uint32_t currentVersion; 358 static uint32_t instanceCount; 359 }; 360 361 uint32_t LCDylib::instanceCount = 0; 362 363 class LCLoadDylinker final : public LoadCommand { 364 public: 365 uint32_t getSize() const override { 366 return alignTo(sizeof(dylinker_command) + path.size() + 1, 8); 367 } 368 369 void writeTo(uint8_t *buf) const override { 370 auto *c = reinterpret_cast<dylinker_command *>(buf); 371 buf += sizeof(dylinker_command); 372 373 c->cmd = LC_LOAD_DYLINKER; 374 c->cmdsize = getSize(); 375 c->name = sizeof(dylinker_command); 376 377 memcpy(buf, path.data(), path.size()); 378 buf[path.size()] = '\0'; 379 } 380 381 private: 382 // Recent versions of Darwin won't run any binary that has dyld at a 383 // different location. 384 const StringRef path = "/usr/lib/dyld"; 385 }; 386 387 class LCRPath final : public LoadCommand { 388 public: 389 explicit LCRPath(StringRef path) : path(path) {} 390 391 uint32_t getSize() const override { 392 return alignTo(sizeof(rpath_command) + path.size() + 1, target->wordSize); 393 } 394 395 void writeTo(uint8_t *buf) const override { 396 auto *c = reinterpret_cast<rpath_command *>(buf); 397 buf += sizeof(rpath_command); 398 399 c->cmd = LC_RPATH; 400 c->cmdsize = getSize(); 401 c->path = sizeof(rpath_command); 402 403 memcpy(buf, path.data(), path.size()); 404 buf[path.size()] = '\0'; 405 } 406 407 private: 408 StringRef path; 409 }; 410 411 class LCMinVersion final : public LoadCommand { 412 public: 413 explicit LCMinVersion(const PlatformInfo &platformInfo) 414 : platformInfo(platformInfo) {} 415 416 uint32_t getSize() const override { return sizeof(version_min_command); } 417 418 void writeTo(uint8_t *buf) const override { 419 auto *c = reinterpret_cast<version_min_command *>(buf); 420 switch (platformInfo.target.Platform) { 421 case PlatformKind::macOS: 422 c->cmd = LC_VERSION_MIN_MACOSX; 423 break; 424 case PlatformKind::iOS: 425 case PlatformKind::iOSSimulator: 426 c->cmd = LC_VERSION_MIN_IPHONEOS; 427 break; 428 case PlatformKind::tvOS: 429 case PlatformKind::tvOSSimulator: 430 c->cmd = LC_VERSION_MIN_TVOS; 431 break; 432 case PlatformKind::watchOS: 433 case PlatformKind::watchOSSimulator: 434 c->cmd = LC_VERSION_MIN_WATCHOS; 435 break; 436 default: 437 llvm_unreachable("invalid platform"); 438 break; 439 } 440 c->cmdsize = getSize(); 441 c->version = encodeVersion(platformInfo.minimum); 442 c->sdk = encodeVersion(platformInfo.sdk); 443 } 444 445 private: 446 const PlatformInfo &platformInfo; 447 }; 448 449 class LCBuildVersion final : public LoadCommand { 450 public: 451 explicit LCBuildVersion(const PlatformInfo &platformInfo) 452 : platformInfo(platformInfo) {} 453 454 const int ntools = 1; 455 456 uint32_t getSize() const override { 457 return sizeof(build_version_command) + ntools * sizeof(build_tool_version); 458 } 459 460 void writeTo(uint8_t *buf) const override { 461 auto *c = reinterpret_cast<build_version_command *>(buf); 462 c->cmd = LC_BUILD_VERSION; 463 c->cmdsize = getSize(); 464 c->platform = static_cast<uint32_t>(platformInfo.target.Platform); 465 c->minos = encodeVersion(platformInfo.minimum); 466 c->sdk = encodeVersion(platformInfo.sdk); 467 c->ntools = ntools; 468 auto *t = reinterpret_cast<build_tool_version *>(&c[1]); 469 t->tool = TOOL_LD; 470 t->version = encodeVersion(llvm::VersionTuple( 471 LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERSION_PATCH)); 472 } 473 474 private: 475 const PlatformInfo &platformInfo; 476 }; 477 478 // Stores a unique identifier for the output file based on an MD5 hash of its 479 // contents. In order to hash the contents, we must first write them, but 480 // LC_UUID itself must be part of the written contents in order for all the 481 // offsets to be calculated correctly. We resolve this circular paradox by 482 // first writing an LC_UUID with an all-zero UUID, then updating the UUID with 483 // its real value later. 484 class LCUuid final : public LoadCommand { 485 public: 486 uint32_t getSize() const override { return sizeof(uuid_command); } 487 488 void writeTo(uint8_t *buf) const override { 489 auto *c = reinterpret_cast<uuid_command *>(buf); 490 c->cmd = LC_UUID; 491 c->cmdsize = getSize(); 492 uuidBuf = c->uuid; 493 } 494 495 void writeUuid(uint64_t digest) const { 496 // xxhash only gives us 8 bytes, so put some fixed data in the other half. 497 static_assert(sizeof(uuid_command::uuid) == 16, "unexpected uuid size"); 498 memcpy(uuidBuf, "LLD\xa1UU1D", 8); 499 memcpy(uuidBuf + 8, &digest, 8); 500 501 // RFC 4122 conformance. We need to fix 4 bits in byte 6 and 2 bits in 502 // byte 8. Byte 6 is already fine due to the fixed data we put in. We don't 503 // want to lose bits of the digest in byte 8, so swap that with a byte of 504 // fixed data that happens to have the right bits set. 505 std::swap(uuidBuf[3], uuidBuf[8]); 506 507 // Claim that this is an MD5-based hash. It isn't, but this signals that 508 // this is not a time-based and not a random hash. MD5 seems like the least 509 // bad lie we can put here. 510 assert((uuidBuf[6] & 0xf0) == 0x30 && "See RFC 4122 Sections 4.2.2, 4.1.3"); 511 assert((uuidBuf[8] & 0xc0) == 0x80 && "See RFC 4122 Section 4.2.2"); 512 } 513 514 mutable uint8_t *uuidBuf; 515 }; 516 517 template <class LP> class LCEncryptionInfo final : public LoadCommand { 518 public: 519 uint32_t getSize() const override { 520 return sizeof(typename LP::encryption_info_command); 521 } 522 523 void writeTo(uint8_t *buf) const override { 524 using EncryptionInfo = typename LP::encryption_info_command; 525 auto *c = reinterpret_cast<EncryptionInfo *>(buf); 526 buf += sizeof(EncryptionInfo); 527 c->cmd = LP::encryptionInfoLCType; 528 c->cmdsize = getSize(); 529 c->cryptoff = in.header->getSize(); 530 auto it = find_if(outputSegments, [](const OutputSegment *seg) { 531 return seg->name == segment_names::text; 532 }); 533 assert(it != outputSegments.end()); 534 c->cryptsize = (*it)->fileSize - c->cryptoff; 535 } 536 }; 537 538 class LCCodeSignature final : public LoadCommand { 539 public: 540 LCCodeSignature(CodeSignatureSection *section) : section(section) {} 541 542 uint32_t getSize() const override { return sizeof(linkedit_data_command); } 543 544 void writeTo(uint8_t *buf) const override { 545 auto *c = reinterpret_cast<linkedit_data_command *>(buf); 546 c->cmd = LC_CODE_SIGNATURE; 547 c->cmdsize = getSize(); 548 c->dataoff = static_cast<uint32_t>(section->fileOff); 549 c->datasize = section->getSize(); 550 } 551 552 CodeSignatureSection *section; 553 }; 554 555 } // namespace 556 557 // Add stubs and bindings where necessary (e.g. if the symbol is a 558 // DylibSymbol.) 559 static void prepareBranchTarget(Symbol *sym) { 560 if (auto *dysym = dyn_cast<DylibSymbol>(sym)) { 561 if (in.stubs->addEntry(dysym)) { 562 if (sym->isWeakDef()) { 563 in.binding->addEntry(dysym, in.lazyPointers->isec, 564 sym->stubsIndex * target->wordSize); 565 in.weakBinding->addEntry(sym, in.lazyPointers->isec, 566 sym->stubsIndex * target->wordSize); 567 } else { 568 in.lazyBinding->addEntry(dysym); 569 } 570 } 571 } else if (auto *defined = dyn_cast<Defined>(sym)) { 572 if (defined->isExternalWeakDef()) { 573 if (in.stubs->addEntry(sym)) { 574 in.rebase->addEntry(in.lazyPointers->isec, 575 sym->stubsIndex * target->wordSize); 576 in.weakBinding->addEntry(sym, in.lazyPointers->isec, 577 sym->stubsIndex * target->wordSize); 578 } 579 } 580 } else { 581 llvm_unreachable("invalid branch target symbol type"); 582 } 583 } 584 585 // Can a symbol's address can only be resolved at runtime? 586 static bool needsBinding(const Symbol *sym) { 587 if (isa<DylibSymbol>(sym)) 588 return true; 589 if (const auto *defined = dyn_cast<Defined>(sym)) 590 return defined->isExternalWeakDef(); 591 return false; 592 } 593 594 static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec, 595 const Reloc &r) { 596 const RelocAttrs &relocAttrs = target->getRelocAttrs(r.type); 597 598 if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) { 599 prepareBranchTarget(sym); 600 } else if (relocAttrs.hasAttr(RelocAttrBits::GOT)) { 601 if (relocAttrs.hasAttr(RelocAttrBits::POINTER) || needsBinding(sym)) 602 in.got->addEntry(sym); 603 } else if (relocAttrs.hasAttr(RelocAttrBits::TLV)) { 604 if (needsBinding(sym)) 605 in.tlvPointers->addEntry(sym); 606 } else if (relocAttrs.hasAttr(RelocAttrBits::UNSIGNED)) { 607 // References from thread-local variable sections are treated as offsets 608 // relative to the start of the referent section, and therefore have no 609 // need of rebase opcodes. 610 if (!(isThreadLocalVariables(isec->getFlags()) && isa<Defined>(sym))) 611 addNonLazyBindingEntries(sym, isec, r.offset, r.addend); 612 } 613 } 614 615 void Writer::scanRelocations() { 616 TimeTraceScope timeScope("Scan relocations"); 617 for (ConcatInputSection *isec : inputSections) { 618 if (isec->shouldOmitFromOutput()) 619 continue; 620 621 for (auto it = isec->relocs.begin(); it != isec->relocs.end(); ++it) { 622 Reloc &r = *it; 623 if (target->hasAttr(r.type, RelocAttrBits::SUBTRAHEND)) { 624 // Skip over the following UNSIGNED relocation -- it's just there as the 625 // minuend, and doesn't have the usual UNSIGNED semantics. We don't want 626 // to emit rebase opcodes for it. 627 it++; 628 continue; 629 } 630 if (auto *sym = r.referent.dyn_cast<Symbol *>()) { 631 if (auto *undefined = dyn_cast<Undefined>(sym)) 632 treatUndefinedSymbol(*undefined); 633 // treatUndefinedSymbol() can replace sym with a DylibSymbol; re-check. 634 if (!isa<Undefined>(sym) && validateSymbolRelocation(sym, isec, r)) 635 prepareSymbolRelocation(sym, isec, r); 636 } else { 637 // Canonicalize the referent so that later accesses in Writer won't 638 // have to worry about it. Perhaps we should do this for Defined::isec 639 // too... 640 auto *referentIsec = r.referent.get<InputSection *>(); 641 r.referent = referentIsec->canonical(); 642 if (!r.pcrel) 643 in.rebase->addEntry(isec, r.offset); 644 } 645 } 646 } 647 648 in.unwindInfo->prepareRelocations(); 649 } 650 651 void Writer::scanSymbols() { 652 TimeTraceScope timeScope("Scan symbols"); 653 for (const Symbol *sym : symtab->getSymbols()) { 654 if (const auto *defined = dyn_cast<Defined>(sym)) { 655 if (defined->overridesWeakDef && defined->isLive()) 656 in.weakBinding->addNonWeakDefinition(defined); 657 } else if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) { 658 // This branch intentionally doesn't check isLive(). 659 if (dysym->isDynamicLookup()) 660 continue; 661 dysym->getFile()->refState = 662 std::max(dysym->getFile()->refState, dysym->getRefState()); 663 } 664 } 665 } 666 667 // TODO: ld64 enforces the old load commands in a few other cases. 668 static bool useLCBuildVersion(const PlatformInfo &platformInfo) { 669 static const std::map<PlatformKind, llvm::VersionTuple> minVersion = { 670 {PlatformKind::macOS, llvm::VersionTuple(10, 14)}, 671 {PlatformKind::iOS, llvm::VersionTuple(12, 0)}, 672 {PlatformKind::iOSSimulator, llvm::VersionTuple(13, 0)}, 673 {PlatformKind::tvOS, llvm::VersionTuple(12, 0)}, 674 {PlatformKind::tvOSSimulator, llvm::VersionTuple(13, 0)}, 675 {PlatformKind::watchOS, llvm::VersionTuple(5, 0)}, 676 {PlatformKind::watchOSSimulator, llvm::VersionTuple(6, 0)}}; 677 auto it = minVersion.find(platformInfo.target.Platform); 678 return it == minVersion.end() ? true : platformInfo.minimum >= it->second; 679 } 680 681 template <class LP> void Writer::createLoadCommands() { 682 uint8_t segIndex = 0; 683 for (OutputSegment *seg : outputSegments) { 684 in.header->addLoadCommand(make<LCSegment<LP>>(seg->name, seg)); 685 seg->index = segIndex++; 686 } 687 688 in.header->addLoadCommand(make<LCDyldInfo>( 689 in.rebase, in.binding, in.weakBinding, in.lazyBinding, in.exports)); 690 in.header->addLoadCommand(make<LCSymtab>(symtabSection, stringTableSection)); 691 in.header->addLoadCommand( 692 make<LCDysymtab>(symtabSection, indirectSymtabSection)); 693 if (!config->umbrella.empty()) 694 in.header->addLoadCommand(make<LCSubFramework>(config->umbrella)); 695 if (functionStartsSection) 696 in.header->addLoadCommand(make<LCFunctionStarts>(functionStartsSection)); 697 if (dataInCodeSection) 698 in.header->addLoadCommand(make<LCDataInCode>(dataInCodeSection)); 699 if (config->emitEncryptionInfo) 700 in.header->addLoadCommand(make<LCEncryptionInfo<LP>>()); 701 for (StringRef path : config->runtimePaths) 702 in.header->addLoadCommand(make<LCRPath>(path)); 703 704 switch (config->outputType) { 705 case MH_EXECUTE: 706 in.header->addLoadCommand(make<LCLoadDylinker>()); 707 in.header->addLoadCommand(make<LCMain>()); 708 break; 709 case MH_DYLIB: 710 in.header->addLoadCommand(make<LCDylib>(LC_ID_DYLIB, config->installName, 711 config->dylibCompatibilityVersion, 712 config->dylibCurrentVersion)); 713 break; 714 case MH_BUNDLE: 715 break; 716 default: 717 llvm_unreachable("unhandled output file type"); 718 } 719 720 uuidCommand = make<LCUuid>(); 721 in.header->addLoadCommand(uuidCommand); 722 723 if (useLCBuildVersion(config->platformInfo)) 724 in.header->addLoadCommand(make<LCBuildVersion>(config->platformInfo)); 725 else 726 in.header->addLoadCommand(make<LCMinVersion>(config->platformInfo)); 727 728 int64_t dylibOrdinal = 1; 729 DenseMap<StringRef, int64_t> ordinalForInstallName; 730 for (InputFile *file : inputFiles) { 731 if (auto *dylibFile = dyn_cast<DylibFile>(file)) { 732 if (dylibFile->isBundleLoader) { 733 dylibFile->ordinal = BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE; 734 // Shortcut since bundle-loader does not re-export the symbols. 735 736 dylibFile->reexport = false; 737 continue; 738 } 739 740 // Don't emit load commands for a dylib that is not referenced if: 741 // - it was added implicitly (via a reexport, an LC_LOAD_DYLINKER -- 742 // if it's on the linker command line, it's explicit) 743 // - or it's marked MH_DEAD_STRIPPABLE_DYLIB 744 // - or the flag -dead_strip_dylibs is used 745 // FIXME: `isReferenced()` is currently computed before dead code 746 // stripping, so references from dead code keep a dylib alive. This 747 // matches ld64, but it's something we should do better. 748 if (!dylibFile->isReferenced() && !dylibFile->forceNeeded && 749 (!dylibFile->explicitlyLinked || dylibFile->deadStrippable || 750 config->deadStripDylibs)) 751 continue; 752 753 // Several DylibFiles can have the same installName. Only emit a single 754 // load command for that installName and give all these DylibFiles the 755 // same ordinal. 756 // This can happen in several cases: 757 // - a new framework could change its installName to an older 758 // framework name via an $ld$ symbol depending on platform_version 759 // - symlinks (for example, libpthread.tbd is a symlink to libSystem.tbd; 760 // Foo.framework/Foo.tbd is usually a symlink to 761 // Foo.framework/Versions/Current/Foo.tbd, where 762 // Foo.framework/Versions/Current is usually a symlink to 763 // Foo.framework/Versions/A) 764 // - a framework can be linked both explicitly on the linker 765 // command line and implicitly as a reexport from a different 766 // framework. The re-export will usually point to the tbd file 767 // in Foo.framework/Versions/A/Foo.tbd, while the explicit link will 768 // usually find Foo.framework/Foo.tbd. These are usually symlinks, 769 // but in a --reproduce archive they will be identical but distinct 770 // files. 771 // In the first case, *semantically distinct* DylibFiles will have the 772 // same installName. 773 int64_t &ordinal = ordinalForInstallName[dylibFile->installName]; 774 if (ordinal) { 775 dylibFile->ordinal = ordinal; 776 continue; 777 } 778 779 ordinal = dylibFile->ordinal = dylibOrdinal++; 780 LoadCommandType lcType = 781 dylibFile->forceWeakImport || dylibFile->refState == RefState::Weak 782 ? LC_LOAD_WEAK_DYLIB 783 : LC_LOAD_DYLIB; 784 in.header->addLoadCommand(make<LCDylib>(lcType, dylibFile->installName, 785 dylibFile->compatibilityVersion, 786 dylibFile->currentVersion)); 787 788 if (dylibFile->reexport) 789 in.header->addLoadCommand( 790 make<LCDylib>(LC_REEXPORT_DYLIB, dylibFile->installName)); 791 } 792 } 793 794 if (codeSignatureSection) 795 in.header->addLoadCommand(make<LCCodeSignature>(codeSignatureSection)); 796 797 const uint32_t MACOS_MAXPATHLEN = 1024; 798 config->headerPad = std::max( 799 config->headerPad, (config->headerPadMaxInstallNames 800 ? LCDylib::getInstanceCount() * MACOS_MAXPATHLEN 801 : 0)); 802 } 803 804 static size_t getSymbolPriority(const SymbolPriorityEntry &entry, 805 const InputFile *f) { 806 // We don't use toString(InputFile *) here because it returns the full path 807 // for object files, and we only want the basename. 808 StringRef filename; 809 if (f->archiveName.empty()) 810 filename = path::filename(f->getName()); 811 else 812 filename = saver.save(path::filename(f->archiveName) + "(" + 813 path::filename(f->getName()) + ")"); 814 return std::max(entry.objectFiles.lookup(filename), entry.anyObjectFile); 815 } 816 817 // Each section gets assigned the priority of the highest-priority symbol it 818 // contains. 819 static DenseMap<const InputSection *, size_t> buildInputSectionPriorities() { 820 DenseMap<const InputSection *, size_t> sectionPriorities; 821 822 if (config->priorities.empty()) 823 return sectionPriorities; 824 825 auto addSym = [&](Defined &sym) { 826 auto it = config->priorities.find(sym.getName()); 827 if (it == config->priorities.end()) 828 return; 829 830 SymbolPriorityEntry &entry = it->second; 831 size_t &priority = sectionPriorities[sym.isec]; 832 priority = 833 std::max(priority, getSymbolPriority(entry, sym.isec->getFile())); 834 }; 835 836 // TODO: Make sure this handles weak symbols correctly. 837 for (const InputFile *file : inputFiles) { 838 if (isa<ObjFile>(file)) 839 for (Symbol *sym : file->symbols) 840 if (auto *d = dyn_cast_or_null<Defined>(sym)) 841 addSym(*d); 842 } 843 844 return sectionPriorities; 845 } 846 847 // Sorting only can happen once all outputs have been collected. Here we sort 848 // segments, output sections within each segment, and input sections within each 849 // output segment. 850 static void sortSegmentsAndSections() { 851 TimeTraceScope timeScope("Sort segments and sections"); 852 sortOutputSegments(); 853 854 DenseMap<const InputSection *, size_t> isecPriorities = 855 buildInputSectionPriorities(); 856 857 uint32_t sectionIndex = 0; 858 for (OutputSegment *seg : outputSegments) { 859 seg->sortOutputSections(); 860 for (OutputSection *osec : seg->getSections()) { 861 // Now that the output sections are sorted, assign the final 862 // output section indices. 863 if (!osec->isHidden()) 864 osec->index = ++sectionIndex; 865 if (!firstTLVDataSection && isThreadLocalData(osec->flags)) 866 firstTLVDataSection = osec; 867 868 if (!isecPriorities.empty()) { 869 if (auto *merged = dyn_cast<ConcatOutputSection>(osec)) { 870 llvm::stable_sort(merged->inputs, 871 [&](InputSection *a, InputSection *b) { 872 return isecPriorities[a] > isecPriorities[b]; 873 }); 874 } 875 } 876 } 877 } 878 } 879 880 NamePair macho::maybeRenameSection(NamePair key) { 881 auto newNames = config->sectionRenameMap.find(key); 882 if (newNames != config->sectionRenameMap.end()) 883 return newNames->second; 884 auto newName = config->segmentRenameMap.find(key.first); 885 if (newName != config->segmentRenameMap.end()) 886 return std::make_pair(newName->second, key.second); 887 return key; 888 } 889 890 template <class LP> void Writer::createOutputSections() { 891 TimeTraceScope timeScope("Create output sections"); 892 // First, create hidden sections 893 stringTableSection = make<StringTableSection>(); 894 symtabSection = makeSymtabSection<LP>(*stringTableSection); 895 indirectSymtabSection = make<IndirectSymtabSection>(); 896 if (config->adhocCodesign) 897 codeSignatureSection = make<CodeSignatureSection>(); 898 if (config->emitDataInCodeInfo) 899 dataInCodeSection = make<DataInCodeSection>(); 900 if (config->emitFunctionStarts) 901 functionStartsSection = make<FunctionStartsSection>(); 902 if (config->emitBitcodeBundle) 903 make<BitcodeBundleSection>(); 904 905 switch (config->outputType) { 906 case MH_EXECUTE: 907 make<PageZeroSection>(); 908 break; 909 case MH_DYLIB: 910 case MH_BUNDLE: 911 break; 912 default: 913 llvm_unreachable("unhandled output file type"); 914 } 915 916 // Then add input sections to output sections. 917 for (ConcatInputSection *isec : inputSections) { 918 if (isec->shouldOmitFromOutput()) 919 continue; 920 NamePair names = maybeRenameSection({isec->getSegName(), isec->getName()}); 921 ConcatOutputSection *&osec = concatOutputSections[names]; 922 if (!osec) 923 osec = make<ConcatOutputSection>(names.second); 924 osec->addInput(isec); 925 osec->inputOrder = 926 std::min(osec->inputOrder, static_cast<int>(isec->outSecOff)); 927 } 928 929 // Once all the inputs are added, we can finalize the output section 930 // properties and create the corresponding output segments. 931 for (const auto &it : concatOutputSections) { 932 StringRef segname = it.first.first; 933 ConcatOutputSection *osec = it.second; 934 assert(segname != segment_names::ld); 935 getOrCreateOutputSegment(segname)->addOutputSection(osec); 936 } 937 938 for (SyntheticSection *ssec : syntheticSections) { 939 auto it = concatOutputSections.find({ssec->segname, ssec->name}); 940 if (ssec->isNeeded()) { 941 if (it == concatOutputSections.end()) { 942 getOrCreateOutputSegment(ssec->segname)->addOutputSection(ssec); 943 } else { 944 fatal("section from " + 945 toString(it->second->firstSection()->getFile()) + 946 " conflicts with synthetic section " + ssec->segname + "," + 947 ssec->name); 948 } 949 } 950 } 951 952 // dyld requires __LINKEDIT segment to always exist (even if empty). 953 linkEditSegment = getOrCreateOutputSegment(segment_names::linkEdit); 954 } 955 956 void Writer::finalizeAddresses() { 957 TimeTraceScope timeScope("Finalize addresses"); 958 uint64_t pageSize = target->getPageSize(); 959 // Ensure that segments (and the sections they contain) are allocated 960 // addresses in ascending order, which dyld requires. 961 // 962 // Note that at this point, __LINKEDIT sections are empty, but we need to 963 // determine addresses of other segments/sections before generating its 964 // contents. 965 for (OutputSegment *seg : outputSegments) { 966 if (seg == linkEditSegment) 967 continue; 968 assignAddresses(seg); 969 // codesign / libstuff checks for segment ordering by verifying that 970 // `fileOff + fileSize == next segment fileOff`. So we call alignTo() before 971 // (instead of after) computing fileSize to ensure that the segments are 972 // contiguous. We handle addr / vmSize similarly for the same reason. 973 fileOff = alignTo(fileOff, pageSize); 974 addr = alignTo(addr, pageSize); 975 seg->vmSize = addr - seg->firstSection()->addr; 976 seg->fileSize = fileOff - seg->fileOff; 977 } 978 } 979 980 void Writer::finalizeLinkEditSegment() { 981 TimeTraceScope timeScope("Finalize __LINKEDIT segment"); 982 // Fill __LINKEDIT contents. 983 std::vector<LinkEditSection *> linkEditSections{ 984 in.rebase, 985 in.binding, 986 in.weakBinding, 987 in.lazyBinding, 988 in.exports, 989 symtabSection, 990 indirectSymtabSection, 991 dataInCodeSection, 992 functionStartsSection, 993 }; 994 parallelForEach(linkEditSections, [](LinkEditSection *osec) { 995 if (osec) 996 osec->finalizeContents(); 997 }); 998 999 // Now that __LINKEDIT is filled out, do a proper calculation of its 1000 // addresses and offsets. 1001 assignAddresses(linkEditSegment); 1002 // No need to page-align fileOff / addr here since this is the last segment. 1003 linkEditSegment->vmSize = addr - linkEditSegment->firstSection()->addr; 1004 linkEditSegment->fileSize = fileOff - linkEditSegment->fileOff; 1005 } 1006 1007 void Writer::assignAddresses(OutputSegment *seg) { 1008 seg->fileOff = fileOff; 1009 1010 for (OutputSection *osec : seg->getSections()) { 1011 if (!osec->isNeeded()) 1012 continue; 1013 addr = alignTo(addr, osec->align); 1014 fileOff = alignTo(fileOff, osec->align); 1015 osec->addr = addr; 1016 osec->fileOff = isZeroFill(osec->flags) ? 0 : fileOff; 1017 osec->finalize(); 1018 1019 addr += osec->getSize(); 1020 fileOff += osec->getFileSize(); 1021 } 1022 } 1023 1024 void Writer::openFile() { 1025 Expected<std::unique_ptr<FileOutputBuffer>> bufferOrErr = 1026 FileOutputBuffer::create(config->outputFile, fileOff, 1027 FileOutputBuffer::F_executable); 1028 1029 if (!bufferOrErr) 1030 error("failed to open " + config->outputFile + ": " + 1031 llvm::toString(bufferOrErr.takeError())); 1032 else 1033 buffer = std::move(*bufferOrErr); 1034 } 1035 1036 void Writer::writeSections() { 1037 uint8_t *buf = buffer->getBufferStart(); 1038 for (const OutputSegment *seg : outputSegments) 1039 for (const OutputSection *osec : seg->getSections()) 1040 osec->writeTo(buf + osec->fileOff); 1041 } 1042 1043 // In order to utilize multiple cores, we first split the buffer into chunks, 1044 // compute a hash for each chunk, and then compute a hash value of the hash 1045 // values. 1046 void Writer::writeUuid() { 1047 TimeTraceScope timeScope("Computing UUID"); 1048 ArrayRef<uint8_t> data{buffer->getBufferStart(), buffer->getBufferEnd()}; 1049 unsigned chunkCount = parallel::strategy.compute_thread_count() * 10; 1050 // Round-up integer division 1051 size_t chunkSize = (data.size() + chunkCount - 1) / chunkCount; 1052 std::vector<ArrayRef<uint8_t>> chunks = split(data, chunkSize); 1053 std::vector<uint64_t> hashes(chunks.size()); 1054 parallelForEachN(0, chunks.size(), 1055 [&](size_t i) { hashes[i] = xxHash64(chunks[i]); }); 1056 uint64_t digest = xxHash64({reinterpret_cast<uint8_t *>(hashes.data()), 1057 hashes.size() * sizeof(uint64_t)}); 1058 uuidCommand->writeUuid(digest); 1059 } 1060 1061 void Writer::writeCodeSignature() { 1062 if (codeSignatureSection) 1063 codeSignatureSection->writeHashes(buffer->getBufferStart()); 1064 } 1065 1066 void Writer::writeOutputFile() { 1067 TimeTraceScope timeScope("Write output file"); 1068 openFile(); 1069 if (errorCount()) 1070 return; 1071 writeSections(); 1072 writeUuid(); 1073 writeCodeSignature(); 1074 1075 if (auto e = buffer->commit()) 1076 error("failed to write to the output file: " + toString(std::move(e))); 1077 } 1078 1079 template <class LP> void Writer::run() { 1080 if (config->entry && !isa<Undefined>(config->entry)) 1081 prepareBranchTarget(config->entry); 1082 scanRelocations(); 1083 if (in.stubHelper->isNeeded()) 1084 in.stubHelper->setup(); 1085 scanSymbols(); 1086 createOutputSections<LP>(); 1087 // After this point, we create no new segments; HOWEVER, we might 1088 // yet create branch-range extension thunks for architectures whose 1089 // hardware call instructions have limited range, e.g., ARM(64). 1090 // The thunks are created as InputSections interspersed among 1091 // the ordinary __TEXT,_text InputSections. 1092 sortSegmentsAndSections(); 1093 createLoadCommands<LP>(); 1094 finalizeAddresses(); 1095 finalizeLinkEditSegment(); 1096 writeMapFile(); 1097 writeOutputFile(); 1098 } 1099 1100 template <class LP> void macho::writeResult() { Writer().run<LP>(); } 1101 1102 void macho::createSyntheticSections() { 1103 in.header = make<MachHeaderSection>(); 1104 if (config->dedupLiterals) { 1105 in.cStringSection = make<DeduplicatedCStringSection>(); 1106 } else { 1107 in.cStringSection = make<CStringSection>(); 1108 } 1109 in.wordLiteralSection = 1110 config->dedupLiterals ? make<WordLiteralSection>() : nullptr; 1111 in.rebase = make<RebaseSection>(); 1112 in.binding = make<BindingSection>(); 1113 in.weakBinding = make<WeakBindingSection>(); 1114 in.lazyBinding = make<LazyBindingSection>(); 1115 in.exports = make<ExportSection>(); 1116 in.got = make<GotSection>(); 1117 in.tlvPointers = make<TlvPointerSection>(); 1118 in.lazyPointers = make<LazyPointerSection>(); 1119 in.stubs = make<StubsSection>(); 1120 in.stubHelper = make<StubHelperSection>(); 1121 in.unwindInfo = makeUnwindInfoSection(); 1122 1123 // This section contains space for just a single word, and will be used by 1124 // dyld to cache an address to the image loader it uses. 1125 uint8_t *arr = bAlloc.Allocate<uint8_t>(target->wordSize); 1126 memset(arr, 0, target->wordSize); 1127 in.imageLoaderCache = make<ConcatInputSection>( 1128 segment_names::data, section_names::data, /*file=*/nullptr, 1129 ArrayRef<uint8_t>{arr, target->wordSize}, 1130 /*align=*/target->wordSize, /*flags=*/S_REGULAR); 1131 // References from dyld are not visible to us, so ensure this section is 1132 // always treated as live. 1133 in.imageLoaderCache->live = true; 1134 } 1135 1136 OutputSection *macho::firstTLVDataSection = nullptr; 1137 1138 template void macho::writeResult<LP64>(); 1139 template void macho::writeResult<ILP32>(); 1140