1 //===- SyntheticSections.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "SyntheticSections.h" 10 #include "ConcatOutputSection.h" 11 #include "Config.h" 12 #include "ExportTrie.h" 13 #include "InputFiles.h" 14 #include "MachOStructs.h" 15 #include "OutputSegment.h" 16 #include "SymbolTable.h" 17 #include "Symbols.h" 18 19 #include "lld/Common/CommonLinkerContext.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/Config/llvm-config.h" 22 #include "llvm/Support/EndianStream.h" 23 #include "llvm/Support/FileSystem.h" 24 #include "llvm/Support/LEB128.h" 25 #include "llvm/Support/Path.h" 26 #include "llvm/Support/SHA256.h" 27 28 #if defined(__APPLE__) 29 #include <sys/mman.h> 30 #endif 31 32 #ifdef LLVM_HAVE_LIBXAR 33 #include <fcntl.h> 34 extern "C" { 35 #include <xar/xar.h> 36 } 37 #endif 38 39 using namespace llvm; 40 using namespace llvm::MachO; 41 using namespace llvm::support; 42 using namespace llvm::support::endian; 43 using namespace lld; 44 using namespace lld::macho; 45 46 InStruct macho::in; 47 std::vector<SyntheticSection *> macho::syntheticSections; 48 49 SyntheticSection::SyntheticSection(const char *segname, const char *name) 50 : OutputSection(SyntheticKind, name) { 51 std::tie(this->segname, this->name) = maybeRenameSection({segname, name}); 52 isec = makeSyntheticInputSection(segname, name); 53 isec->parent = this; 54 syntheticSections.push_back(this); 55 } 56 57 // dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts 58 // from the beginning of the file (i.e. the header). 59 MachHeaderSection::MachHeaderSection() 60 : SyntheticSection(segment_names::text, section_names::header) { 61 // XXX: This is a hack. (See D97007) 62 // Setting the index to 1 to pretend that this section is the text 63 // section. 64 index = 1; 65 isec->isFinal = true; 66 } 67 68 void MachHeaderSection::addLoadCommand(LoadCommand *lc) { 69 loadCommands.push_back(lc); 70 sizeOfCmds += lc->getSize(); 71 } 72 73 uint64_t MachHeaderSection::getSize() const { 74 uint64_t size = target->headerSize + sizeOfCmds + config->headerPad; 75 // If we are emitting an encryptable binary, our load commands must have a 76 // separate (non-encrypted) page to themselves. 77 if (config->emitEncryptionInfo) 78 size = alignTo(size, target->getPageSize()); 79 return size; 80 } 81 82 static uint32_t cpuSubtype() { 83 uint32_t subtype = target->cpuSubtype; 84 85 if (config->outputType == MH_EXECUTE && !config->staticLink && 86 target->cpuSubtype == CPU_SUBTYPE_X86_64_ALL && 87 config->platform() == PLATFORM_MACOS && 88 config->platformInfo.minimum >= VersionTuple(10, 5)) 89 subtype |= CPU_SUBTYPE_LIB64; 90 91 return subtype; 92 } 93 94 void MachHeaderSection::writeTo(uint8_t *buf) const { 95 auto *hdr = reinterpret_cast<mach_header *>(buf); 96 hdr->magic = target->magic; 97 hdr->cputype = target->cpuType; 98 hdr->cpusubtype = cpuSubtype(); 99 hdr->filetype = config->outputType; 100 hdr->ncmds = loadCommands.size(); 101 hdr->sizeofcmds = sizeOfCmds; 102 hdr->flags = MH_DYLDLINK; 103 104 if (config->namespaceKind == NamespaceKind::twolevel) 105 hdr->flags |= MH_NOUNDEFS | MH_TWOLEVEL; 106 107 if (config->outputType == MH_DYLIB && !config->hasReexports) 108 hdr->flags |= MH_NO_REEXPORTED_DYLIBS; 109 110 if (config->markDeadStrippableDylib) 111 hdr->flags |= MH_DEAD_STRIPPABLE_DYLIB; 112 113 if (config->outputType == MH_EXECUTE && config->isPic) 114 hdr->flags |= MH_PIE; 115 116 if (config->outputType == MH_DYLIB && config->applicationExtension) 117 hdr->flags |= MH_APP_EXTENSION_SAFE; 118 119 if (in.exports->hasWeakSymbol || in.weakBinding->hasNonWeakDefinition()) 120 hdr->flags |= MH_WEAK_DEFINES; 121 122 if (in.exports->hasWeakSymbol || in.weakBinding->hasEntry()) 123 hdr->flags |= MH_BINDS_TO_WEAK; 124 125 for (const OutputSegment *seg : outputSegments) { 126 for (const OutputSection *osec : seg->getSections()) { 127 if (isThreadLocalVariables(osec->flags)) { 128 hdr->flags |= MH_HAS_TLV_DESCRIPTORS; 129 break; 130 } 131 } 132 } 133 134 uint8_t *p = reinterpret_cast<uint8_t *>(hdr) + target->headerSize; 135 for (const LoadCommand *lc : loadCommands) { 136 lc->writeTo(p); 137 p += lc->getSize(); 138 } 139 } 140 141 PageZeroSection::PageZeroSection() 142 : SyntheticSection(segment_names::pageZero, section_names::pageZero) {} 143 144 RebaseSection::RebaseSection() 145 : LinkEditSection(segment_names::linkEdit, section_names::rebase) {} 146 147 namespace { 148 struct Rebase { 149 OutputSegment *segment = nullptr; 150 uint64_t offset = 0; 151 uint64_t consecutiveCount = 0; 152 }; 153 } // namespace 154 155 // Rebase opcodes allow us to describe a contiguous sequence of rebase location 156 // using a single DO_REBASE opcode. To take advantage of it, we delay emitting 157 // `DO_REBASE` until we have reached the end of a contiguous sequence. 158 static void encodeDoRebase(Rebase &rebase, raw_svector_ostream &os) { 159 assert(rebase.consecutiveCount != 0); 160 if (rebase.consecutiveCount <= REBASE_IMMEDIATE_MASK) { 161 os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_IMM_TIMES | 162 rebase.consecutiveCount); 163 } else { 164 os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ULEB_TIMES); 165 encodeULEB128(rebase.consecutiveCount, os); 166 } 167 rebase.consecutiveCount = 0; 168 } 169 170 static void encodeRebase(const OutputSection *osec, uint64_t outSecOff, 171 Rebase &lastRebase, raw_svector_ostream &os) { 172 OutputSegment *seg = osec->parent; 173 uint64_t offset = osec->getSegmentOffset() + outSecOff; 174 if (lastRebase.segment != seg || lastRebase.offset != offset) { 175 if (lastRebase.consecutiveCount != 0) 176 encodeDoRebase(lastRebase, os); 177 178 if (lastRebase.segment != seg) { 179 os << static_cast<uint8_t>(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 180 seg->index); 181 encodeULEB128(offset, os); 182 lastRebase.segment = seg; 183 lastRebase.offset = offset; 184 } else { 185 assert(lastRebase.offset != offset); 186 os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_ULEB); 187 encodeULEB128(offset - lastRebase.offset, os); 188 lastRebase.offset = offset; 189 } 190 } 191 ++lastRebase.consecutiveCount; 192 // DO_REBASE causes dyld to both perform the binding and increment the offset 193 lastRebase.offset += target->wordSize; 194 } 195 196 void RebaseSection::finalizeContents() { 197 if (locations.empty()) 198 return; 199 200 raw_svector_ostream os{contents}; 201 Rebase lastRebase; 202 203 os << static_cast<uint8_t>(REBASE_OPCODE_SET_TYPE_IMM | REBASE_TYPE_POINTER); 204 205 llvm::sort(locations, [](const Location &a, const Location &b) { 206 return a.isec->getVA(a.offset) < b.isec->getVA(b.offset); 207 }); 208 for (const Location &loc : locations) 209 encodeRebase(loc.isec->parent, loc.isec->getOffset(loc.offset), lastRebase, 210 os); 211 if (lastRebase.consecutiveCount != 0) 212 encodeDoRebase(lastRebase, os); 213 214 os << static_cast<uint8_t>(REBASE_OPCODE_DONE); 215 } 216 217 void RebaseSection::writeTo(uint8_t *buf) const { 218 memcpy(buf, contents.data(), contents.size()); 219 } 220 221 NonLazyPointerSectionBase::NonLazyPointerSectionBase(const char *segname, 222 const char *name) 223 : SyntheticSection(segname, name) { 224 align = target->wordSize; 225 } 226 227 void macho::addNonLazyBindingEntries(const Symbol *sym, 228 const InputSection *isec, uint64_t offset, 229 int64_t addend) { 230 if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) { 231 in.binding->addEntry(dysym, isec, offset, addend); 232 if (dysym->isWeakDef()) 233 in.weakBinding->addEntry(sym, isec, offset, addend); 234 } else if (const auto *defined = dyn_cast<Defined>(sym)) { 235 in.rebase->addEntry(isec, offset); 236 if (defined->isExternalWeakDef()) 237 in.weakBinding->addEntry(sym, isec, offset, addend); 238 else if (defined->interposable) 239 in.binding->addEntry(sym, isec, offset, addend); 240 } else { 241 // Undefined symbols are filtered out in scanRelocations(); we should never 242 // get here 243 llvm_unreachable("cannot bind to an undefined symbol"); 244 } 245 } 246 247 void NonLazyPointerSectionBase::addEntry(Symbol *sym) { 248 if (entries.insert(sym)) { 249 assert(!sym->isInGot()); 250 sym->gotIndex = entries.size() - 1; 251 252 addNonLazyBindingEntries(sym, isec, sym->gotIndex * target->wordSize); 253 } 254 } 255 256 void NonLazyPointerSectionBase::writeTo(uint8_t *buf) const { 257 for (size_t i = 0, n = entries.size(); i < n; ++i) 258 if (auto *defined = dyn_cast<Defined>(entries[i])) 259 write64le(&buf[i * target->wordSize], defined->getVA()); 260 } 261 262 GotSection::GotSection() 263 : NonLazyPointerSectionBase(segment_names::data, section_names::got) { 264 flags = S_NON_LAZY_SYMBOL_POINTERS; 265 } 266 267 TlvPointerSection::TlvPointerSection() 268 : NonLazyPointerSectionBase(segment_names::data, 269 section_names::threadPtrs) { 270 flags = S_THREAD_LOCAL_VARIABLE_POINTERS; 271 } 272 273 BindingSection::BindingSection() 274 : LinkEditSection(segment_names::linkEdit, section_names::binding) {} 275 276 namespace { 277 struct Binding { 278 OutputSegment *segment = nullptr; 279 uint64_t offset = 0; 280 int64_t addend = 0; 281 }; 282 struct BindIR { 283 // Default value of 0xF0 is not valid opcode and should make the program 284 // scream instead of accidentally writing "valid" values. 285 uint8_t opcode = 0xF0; 286 uint64_t data = 0; 287 uint64_t consecutiveCount = 0; 288 }; 289 } // namespace 290 291 // Encode a sequence of opcodes that tell dyld to write the address of symbol + 292 // addend at osec->addr + outSecOff. 293 // 294 // The bind opcode "interpreter" remembers the values of each binding field, so 295 // we only need to encode the differences between bindings. Hence the use of 296 // lastBinding. 297 static void encodeBinding(const OutputSection *osec, uint64_t outSecOff, 298 int64_t addend, Binding &lastBinding, 299 std::vector<BindIR> &opcodes) { 300 OutputSegment *seg = osec->parent; 301 uint64_t offset = osec->getSegmentOffset() + outSecOff; 302 if (lastBinding.segment != seg) { 303 opcodes.push_back( 304 {static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 305 seg->index), 306 offset}); 307 lastBinding.segment = seg; 308 lastBinding.offset = offset; 309 } else if (lastBinding.offset != offset) { 310 opcodes.push_back({BIND_OPCODE_ADD_ADDR_ULEB, offset - lastBinding.offset}); 311 lastBinding.offset = offset; 312 } 313 314 if (lastBinding.addend != addend) { 315 opcodes.push_back( 316 {BIND_OPCODE_SET_ADDEND_SLEB, static_cast<uint64_t>(addend)}); 317 lastBinding.addend = addend; 318 } 319 320 opcodes.push_back({BIND_OPCODE_DO_BIND, 0}); 321 // DO_BIND causes dyld to both perform the binding and increment the offset 322 lastBinding.offset += target->wordSize; 323 } 324 325 static void optimizeOpcodes(std::vector<BindIR> &opcodes) { 326 // Pass 1: Combine bind/add pairs 327 size_t i; 328 int pWrite = 0; 329 for (i = 1; i < opcodes.size(); ++i, ++pWrite) { 330 if ((opcodes[i].opcode == BIND_OPCODE_ADD_ADDR_ULEB) && 331 (opcodes[i - 1].opcode == BIND_OPCODE_DO_BIND)) { 332 opcodes[pWrite].opcode = BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB; 333 opcodes[pWrite].data = opcodes[i].data; 334 ++i; 335 } else { 336 opcodes[pWrite] = opcodes[i - 1]; 337 } 338 } 339 if (i == opcodes.size()) 340 opcodes[pWrite] = opcodes[i - 1]; 341 opcodes.resize(pWrite + 1); 342 343 // Pass 2: Compress two or more bind_add opcodes 344 pWrite = 0; 345 for (i = 1; i < opcodes.size(); ++i, ++pWrite) { 346 if ((opcodes[i].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) && 347 (opcodes[i - 1].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) && 348 (opcodes[i].data == opcodes[i - 1].data)) { 349 opcodes[pWrite].opcode = BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB; 350 opcodes[pWrite].consecutiveCount = 2; 351 opcodes[pWrite].data = opcodes[i].data; 352 ++i; 353 while (i < opcodes.size() && 354 (opcodes[i].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) && 355 (opcodes[i].data == opcodes[i - 1].data)) { 356 opcodes[pWrite].consecutiveCount++; 357 ++i; 358 } 359 } else { 360 opcodes[pWrite] = opcodes[i - 1]; 361 } 362 } 363 if (i == opcodes.size()) 364 opcodes[pWrite] = opcodes[i - 1]; 365 opcodes.resize(pWrite + 1); 366 367 // Pass 3: Use immediate encodings 368 // Every binding is the size of one pointer. If the next binding is a 369 // multiple of wordSize away that is within BIND_IMMEDIATE_MASK, the 370 // opcode can be scaled by wordSize into a single byte and dyld will 371 // expand it to the correct address. 372 for (auto &p : opcodes) { 373 // It's unclear why the check needs to be less than BIND_IMMEDIATE_MASK, 374 // but ld64 currently does this. This could be a potential bug, but 375 // for now, perform the same behavior to prevent mysterious bugs. 376 if ((p.opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) && 377 ((p.data / target->wordSize) < BIND_IMMEDIATE_MASK) && 378 ((p.data % target->wordSize) == 0)) { 379 p.opcode = BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED; 380 p.data /= target->wordSize; 381 } 382 } 383 } 384 385 static void flushOpcodes(const BindIR &op, raw_svector_ostream &os) { 386 uint8_t opcode = op.opcode & BIND_OPCODE_MASK; 387 switch (opcode) { 388 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: 389 case BIND_OPCODE_ADD_ADDR_ULEB: 390 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: 391 os << op.opcode; 392 encodeULEB128(op.data, os); 393 break; 394 case BIND_OPCODE_SET_ADDEND_SLEB: 395 os << op.opcode; 396 encodeSLEB128(static_cast<int64_t>(op.data), os); 397 break; 398 case BIND_OPCODE_DO_BIND: 399 os << op.opcode; 400 break; 401 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: 402 os << op.opcode; 403 encodeULEB128(op.consecutiveCount, os); 404 encodeULEB128(op.data, os); 405 break; 406 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: 407 os << static_cast<uint8_t>(op.opcode | op.data); 408 break; 409 default: 410 llvm_unreachable("cannot bind to an unrecognized symbol"); 411 } 412 } 413 414 // Non-weak bindings need to have their dylib ordinal encoded as well. 415 static int16_t ordinalForDylibSymbol(const DylibSymbol &dysym) { 416 if (config->namespaceKind == NamespaceKind::flat || dysym.isDynamicLookup()) 417 return static_cast<int16_t>(BIND_SPECIAL_DYLIB_FLAT_LOOKUP); 418 assert(dysym.getFile()->isReferenced()); 419 return dysym.getFile()->ordinal; 420 } 421 422 static int16_t ordinalForSymbol(const Symbol &sym) { 423 if (const auto *dysym = dyn_cast<DylibSymbol>(&sym)) 424 return ordinalForDylibSymbol(*dysym); 425 assert(cast<Defined>(&sym)->interposable); 426 return BIND_SPECIAL_DYLIB_FLAT_LOOKUP; 427 } 428 429 static void encodeDylibOrdinal(int16_t ordinal, raw_svector_ostream &os) { 430 if (ordinal <= 0) { 431 os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | 432 (ordinal & BIND_IMMEDIATE_MASK)); 433 } else if (ordinal <= BIND_IMMEDIATE_MASK) { 434 os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | ordinal); 435 } else { 436 os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); 437 encodeULEB128(ordinal, os); 438 } 439 } 440 441 static void encodeWeakOverride(const Defined *defined, 442 raw_svector_ostream &os) { 443 os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 444 BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION) 445 << defined->getName() << '\0'; 446 } 447 448 // Organize the bindings so we can encoded them with fewer opcodes. 449 // 450 // First, all bindings for a given symbol should be grouped together. 451 // BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM is the largest opcode (since it 452 // has an associated symbol string), so we only want to emit it once per symbol. 453 // 454 // Within each group, we sort the bindings by address. Since bindings are 455 // delta-encoded, sorting them allows for a more compact result. Note that 456 // sorting by address alone ensures that bindings for the same segment / section 457 // are located together, minimizing the number of times we have to emit 458 // BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB. 459 // 460 // Finally, we sort the symbols by the address of their first binding, again 461 // to facilitate the delta-encoding process. 462 template <class Sym> 463 std::vector<std::pair<const Sym *, std::vector<BindingEntry>>> 464 sortBindings(const BindingsMap<const Sym *> &bindingsMap) { 465 std::vector<std::pair<const Sym *, std::vector<BindingEntry>>> bindingsVec( 466 bindingsMap.begin(), bindingsMap.end()); 467 for (auto &p : bindingsVec) { 468 std::vector<BindingEntry> &bindings = p.second; 469 llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) { 470 return a.target.getVA() < b.target.getVA(); 471 }); 472 } 473 llvm::sort(bindingsVec, [](const auto &a, const auto &b) { 474 return a.second[0].target.getVA() < b.second[0].target.getVA(); 475 }); 476 return bindingsVec; 477 } 478 479 // Emit bind opcodes, which are a stream of byte-sized opcodes that dyld 480 // interprets to update a record with the following fields: 481 // * segment index (of the segment to write the symbol addresses to, typically 482 // the __DATA_CONST segment which contains the GOT) 483 // * offset within the segment, indicating the next location to write a binding 484 // * symbol type 485 // * symbol library ordinal (the index of its library's LC_LOAD_DYLIB command) 486 // * symbol name 487 // * addend 488 // When dyld sees BIND_OPCODE_DO_BIND, it uses the current record state to bind 489 // a symbol in the GOT, and increments the segment offset to point to the next 490 // entry. It does *not* clear the record state after doing the bind, so 491 // subsequent opcodes only need to encode the differences between bindings. 492 void BindingSection::finalizeContents() { 493 raw_svector_ostream os{contents}; 494 Binding lastBinding; 495 int16_t lastOrdinal = 0; 496 497 for (auto &p : sortBindings(bindingsMap)) { 498 const Symbol *sym = p.first; 499 std::vector<BindingEntry> &bindings = p.second; 500 uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM; 501 if (sym->isWeakRef()) 502 flags |= BIND_SYMBOL_FLAGS_WEAK_IMPORT; 503 os << flags << sym->getName() << '\0' 504 << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER); 505 int16_t ordinal = ordinalForSymbol(*sym); 506 if (ordinal != lastOrdinal) { 507 encodeDylibOrdinal(ordinal, os); 508 lastOrdinal = ordinal; 509 } 510 std::vector<BindIR> opcodes; 511 for (const BindingEntry &b : bindings) 512 encodeBinding(b.target.isec->parent, 513 b.target.isec->getOffset(b.target.offset), b.addend, 514 lastBinding, opcodes); 515 if (config->optimize > 1) 516 optimizeOpcodes(opcodes); 517 for (const auto &op : opcodes) 518 flushOpcodes(op, os); 519 } 520 if (!bindingsMap.empty()) 521 os << static_cast<uint8_t>(BIND_OPCODE_DONE); 522 } 523 524 void BindingSection::writeTo(uint8_t *buf) const { 525 memcpy(buf, contents.data(), contents.size()); 526 } 527 528 WeakBindingSection::WeakBindingSection() 529 : LinkEditSection(segment_names::linkEdit, section_names::weakBinding) {} 530 531 void WeakBindingSection::finalizeContents() { 532 raw_svector_ostream os{contents}; 533 Binding lastBinding; 534 535 for (const Defined *defined : definitions) 536 encodeWeakOverride(defined, os); 537 538 for (auto &p : sortBindings(bindingsMap)) { 539 const Symbol *sym = p.first; 540 std::vector<BindingEntry> &bindings = p.second; 541 os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) 542 << sym->getName() << '\0' 543 << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER); 544 std::vector<BindIR> opcodes; 545 for (const BindingEntry &b : bindings) 546 encodeBinding(b.target.isec->parent, 547 b.target.isec->getOffset(b.target.offset), b.addend, 548 lastBinding, opcodes); 549 if (config->optimize > 1) 550 optimizeOpcodes(opcodes); 551 for (const auto &op : opcodes) 552 flushOpcodes(op, os); 553 } 554 if (!bindingsMap.empty() || !definitions.empty()) 555 os << static_cast<uint8_t>(BIND_OPCODE_DONE); 556 } 557 558 void WeakBindingSection::writeTo(uint8_t *buf) const { 559 memcpy(buf, contents.data(), contents.size()); 560 } 561 562 StubsSection::StubsSection() 563 : SyntheticSection(segment_names::text, section_names::stubs) { 564 flags = S_SYMBOL_STUBS | S_ATTR_SOME_INSTRUCTIONS | S_ATTR_PURE_INSTRUCTIONS; 565 // The stubs section comprises machine instructions, which are aligned to 566 // 4 bytes on the archs we care about. 567 align = 4; 568 reserved2 = target->stubSize; 569 } 570 571 uint64_t StubsSection::getSize() const { 572 return entries.size() * target->stubSize; 573 } 574 575 void StubsSection::writeTo(uint8_t *buf) const { 576 size_t off = 0; 577 for (const Symbol *sym : entries) { 578 target->writeStub(buf + off, *sym); 579 off += target->stubSize; 580 } 581 } 582 583 void StubsSection::finalize() { isFinal = true; } 584 585 bool StubsSection::addEntry(Symbol *sym) { 586 bool inserted = entries.insert(sym); 587 if (inserted) 588 sym->stubsIndex = entries.size() - 1; 589 return inserted; 590 } 591 592 StubHelperSection::StubHelperSection() 593 : SyntheticSection(segment_names::text, section_names::stubHelper) { 594 flags = S_ATTR_SOME_INSTRUCTIONS | S_ATTR_PURE_INSTRUCTIONS; 595 align = 4; // This section comprises machine instructions 596 } 597 598 uint64_t StubHelperSection::getSize() const { 599 return target->stubHelperHeaderSize + 600 in.lazyBinding->getEntries().size() * target->stubHelperEntrySize; 601 } 602 603 bool StubHelperSection::isNeeded() const { return in.lazyBinding->isNeeded(); } 604 605 void StubHelperSection::writeTo(uint8_t *buf) const { 606 target->writeStubHelperHeader(buf); 607 size_t off = target->stubHelperHeaderSize; 608 for (const Symbol *sym : in.lazyBinding->getEntries()) { 609 target->writeStubHelperEntry(buf + off, *sym, addr + off); 610 off += target->stubHelperEntrySize; 611 } 612 } 613 614 void StubHelperSection::setup() { 615 Symbol *binder = symtab->addUndefined("dyld_stub_binder", /*file=*/nullptr, 616 /*isWeakRef=*/false); 617 if (auto *undefined = dyn_cast<Undefined>(binder)) 618 treatUndefinedSymbol(*undefined, 619 "lazy binding (normally in libSystem.dylib)"); 620 621 // treatUndefinedSymbol() can replace binder with a DylibSymbol; re-check. 622 stubBinder = dyn_cast_or_null<DylibSymbol>(binder); 623 if (stubBinder == nullptr) 624 return; 625 626 in.got->addEntry(stubBinder); 627 628 in.imageLoaderCache->parent = 629 ConcatOutputSection::getOrCreateForInput(in.imageLoaderCache); 630 inputSections.push_back(in.imageLoaderCache); 631 // Since this isn't in the symbol table or in any input file, the noDeadStrip 632 // argument doesn't matter. 633 dyldPrivate = 634 make<Defined>("__dyld_private", nullptr, in.imageLoaderCache, 0, 0, 635 /*isWeakDef=*/false, 636 /*isExternal=*/false, /*isPrivateExtern=*/false, 637 /*includeInSymtab=*/true, 638 /*isThumb=*/false, /*isReferencedDynamically=*/false, 639 /*noDeadStrip=*/false); 640 dyldPrivate->used = true; 641 } 642 643 LazyPointerSection::LazyPointerSection() 644 : SyntheticSection(segment_names::data, section_names::lazySymbolPtr) { 645 align = target->wordSize; 646 flags = S_LAZY_SYMBOL_POINTERS; 647 } 648 649 uint64_t LazyPointerSection::getSize() const { 650 return in.stubs->getEntries().size() * target->wordSize; 651 } 652 653 bool LazyPointerSection::isNeeded() const { 654 return !in.stubs->getEntries().empty(); 655 } 656 657 void LazyPointerSection::writeTo(uint8_t *buf) const { 658 size_t off = 0; 659 for (const Symbol *sym : in.stubs->getEntries()) { 660 if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) { 661 if (dysym->hasStubsHelper()) { 662 uint64_t stubHelperOffset = 663 target->stubHelperHeaderSize + 664 dysym->stubsHelperIndex * target->stubHelperEntrySize; 665 write64le(buf + off, in.stubHelper->addr + stubHelperOffset); 666 } 667 } else { 668 write64le(buf + off, sym->getVA()); 669 } 670 off += target->wordSize; 671 } 672 } 673 674 LazyBindingSection::LazyBindingSection() 675 : LinkEditSection(segment_names::linkEdit, section_names::lazyBinding) {} 676 677 void LazyBindingSection::finalizeContents() { 678 // TODO: Just precompute output size here instead of writing to a temporary 679 // buffer 680 for (Symbol *sym : entries) 681 sym->lazyBindOffset = encode(*sym); 682 } 683 684 void LazyBindingSection::writeTo(uint8_t *buf) const { 685 memcpy(buf, contents.data(), contents.size()); 686 } 687 688 void LazyBindingSection::addEntry(Symbol *sym) { 689 if (entries.insert(sym)) { 690 sym->stubsHelperIndex = entries.size() - 1; 691 in.rebase->addEntry(in.lazyPointers->isec, 692 sym->stubsIndex * target->wordSize); 693 } 694 } 695 696 // Unlike the non-lazy binding section, the bind opcodes in this section aren't 697 // interpreted all at once. Rather, dyld will start interpreting opcodes at a 698 // given offset, typically only binding a single symbol before it finds a 699 // BIND_OPCODE_DONE terminator. As such, unlike in the non-lazy-binding case, 700 // we cannot encode just the differences between symbols; we have to emit the 701 // complete bind information for each symbol. 702 uint32_t LazyBindingSection::encode(const Symbol &sym) { 703 uint32_t opstreamOffset = contents.size(); 704 OutputSegment *dataSeg = in.lazyPointers->parent; 705 os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 706 dataSeg->index); 707 uint64_t offset = 708 in.lazyPointers->addr - dataSeg->addr + sym.stubsIndex * target->wordSize; 709 encodeULEB128(offset, os); 710 encodeDylibOrdinal(ordinalForSymbol(sym), os); 711 712 uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM; 713 if (sym.isWeakRef()) 714 flags |= BIND_SYMBOL_FLAGS_WEAK_IMPORT; 715 716 os << flags << sym.getName() << '\0' 717 << static_cast<uint8_t>(BIND_OPCODE_DO_BIND) 718 << static_cast<uint8_t>(BIND_OPCODE_DONE); 719 return opstreamOffset; 720 } 721 722 ExportSection::ExportSection() 723 : LinkEditSection(segment_names::linkEdit, section_names::export_) {} 724 725 void ExportSection::finalizeContents() { 726 trieBuilder.setImageBase(in.header->addr); 727 for (const Symbol *sym : symtab->getSymbols()) { 728 if (const auto *defined = dyn_cast<Defined>(sym)) { 729 if (defined->privateExtern || !defined->isLive()) 730 continue; 731 trieBuilder.addSymbol(*defined); 732 hasWeakSymbol = hasWeakSymbol || sym->isWeakDef(); 733 } 734 } 735 size = trieBuilder.build(); 736 } 737 738 void ExportSection::writeTo(uint8_t *buf) const { trieBuilder.writeTo(buf); } 739 740 DataInCodeSection::DataInCodeSection() 741 : LinkEditSection(segment_names::linkEdit, section_names::dataInCode) {} 742 743 template <class LP> 744 static std::vector<MachO::data_in_code_entry> collectDataInCodeEntries() { 745 std::vector<MachO::data_in_code_entry> dataInCodeEntries; 746 for (const InputFile *inputFile : inputFiles) { 747 if (!isa<ObjFile>(inputFile)) 748 continue; 749 const ObjFile *objFile = cast<ObjFile>(inputFile); 750 ArrayRef<MachO::data_in_code_entry> entries = objFile->getDataInCode(); 751 if (entries.empty()) 752 continue; 753 754 assert(is_sorted(dataInCodeEntries, [](const data_in_code_entry &lhs, 755 const data_in_code_entry &rhs) { 756 return lhs.offset < rhs.offset; 757 })); 758 // For each code subsection find 'data in code' entries residing in it. 759 // Compute the new offset values as 760 // <offset within subsection> + <subsection address> - <__TEXT address>. 761 for (const Section *section : objFile->sections) { 762 for (const Subsection &subsec : section->subsections) { 763 const InputSection *isec = subsec.isec; 764 if (!isCodeSection(isec)) 765 continue; 766 if (cast<ConcatInputSection>(isec)->shouldOmitFromOutput()) 767 continue; 768 const uint64_t beginAddr = section->addr + subsec.offset; 769 auto it = llvm::lower_bound( 770 entries, beginAddr, 771 [](const MachO::data_in_code_entry &entry, uint64_t addr) { 772 return entry.offset < addr; 773 }); 774 const uint64_t endAddr = beginAddr + isec->getSize(); 775 for (const auto end = entries.end(); 776 it != end && it->offset + it->length <= endAddr; ++it) 777 dataInCodeEntries.push_back( 778 {static_cast<uint32_t>(isec->getVA(it->offset - beginAddr) - 779 in.header->addr), 780 it->length, it->kind}); 781 } 782 } 783 } 784 return dataInCodeEntries; 785 } 786 787 void DataInCodeSection::finalizeContents() { 788 entries = target->wordSize == 8 ? collectDataInCodeEntries<LP64>() 789 : collectDataInCodeEntries<ILP32>(); 790 } 791 792 void DataInCodeSection::writeTo(uint8_t *buf) const { 793 if (!entries.empty()) 794 memcpy(buf, entries.data(), getRawSize()); 795 } 796 797 FunctionStartsSection::FunctionStartsSection() 798 : LinkEditSection(segment_names::linkEdit, section_names::functionStarts) {} 799 800 void FunctionStartsSection::finalizeContents() { 801 raw_svector_ostream os{contents}; 802 std::vector<uint64_t> addrs; 803 for (const InputFile *file : inputFiles) { 804 if (auto *objFile = dyn_cast<ObjFile>(file)) { 805 for (const Symbol *sym : objFile->symbols) { 806 if (const auto *defined = dyn_cast_or_null<Defined>(sym)) { 807 if (!defined->isec || !isCodeSection(defined->isec) || 808 !defined->isLive()) 809 continue; 810 // TODO: Add support for thumbs, in that case 811 // the lowest bit of nextAddr needs to be set to 1. 812 addrs.push_back(defined->getVA()); 813 } 814 } 815 } 816 } 817 llvm::sort(addrs); 818 uint64_t addr = in.header->addr; 819 for (uint64_t nextAddr : addrs) { 820 uint64_t delta = nextAddr - addr; 821 if (delta == 0) 822 continue; 823 encodeULEB128(delta, os); 824 addr = nextAddr; 825 } 826 os << '\0'; 827 } 828 829 void FunctionStartsSection::writeTo(uint8_t *buf) const { 830 memcpy(buf, contents.data(), contents.size()); 831 } 832 833 SymtabSection::SymtabSection(StringTableSection &stringTableSection) 834 : LinkEditSection(segment_names::linkEdit, section_names::symbolTable), 835 stringTableSection(stringTableSection) {} 836 837 void SymtabSection::emitBeginSourceStab(DWARFUnit *compileUnit) { 838 StabsEntry stab(N_SO); 839 SmallString<261> dir(compileUnit->getCompilationDir()); 840 StringRef sep = sys::path::get_separator(); 841 // We don't use `path::append` here because we want an empty `dir` to result 842 // in an absolute path. `append` would give us a relative path for that case. 843 if (!dir.endswith(sep)) 844 dir += sep; 845 stab.strx = stringTableSection.addString( 846 saver().save(dir + compileUnit->getUnitDIE().getShortName())); 847 stabs.emplace_back(std::move(stab)); 848 } 849 850 void SymtabSection::emitEndSourceStab() { 851 StabsEntry stab(N_SO); 852 stab.sect = 1; 853 stabs.emplace_back(std::move(stab)); 854 } 855 856 void SymtabSection::emitObjectFileStab(ObjFile *file) { 857 StabsEntry stab(N_OSO); 858 stab.sect = target->cpuSubtype; 859 SmallString<261> path(!file->archiveName.empty() ? file->archiveName 860 : file->getName()); 861 std::error_code ec = sys::fs::make_absolute(path); 862 if (ec) 863 fatal("failed to get absolute path for " + path); 864 865 if (!file->archiveName.empty()) 866 path.append({"(", file->getName(), ")"}); 867 868 StringRef adjustedPath = saver().save(path.str()); 869 adjustedPath.consume_front(config->osoPrefix); 870 871 stab.strx = stringTableSection.addString(adjustedPath); 872 stab.desc = 1; 873 stab.value = file->modTime; 874 stabs.emplace_back(std::move(stab)); 875 } 876 877 void SymtabSection::emitEndFunStab(Defined *defined) { 878 StabsEntry stab(N_FUN); 879 stab.value = defined->size; 880 stabs.emplace_back(std::move(stab)); 881 } 882 883 void SymtabSection::emitStabs() { 884 if (config->omitDebugInfo) 885 return; 886 887 for (const std::string &s : config->astPaths) { 888 StabsEntry astStab(N_AST); 889 astStab.strx = stringTableSection.addString(s); 890 stabs.emplace_back(std::move(astStab)); 891 } 892 893 // Cache the file ID for each symbol in an std::pair for faster sorting. 894 using SortingPair = std::pair<Defined *, int>; 895 std::vector<SortingPair> symbolsNeedingStabs; 896 for (const SymtabEntry &entry : 897 concat<SymtabEntry>(localSymbols, externalSymbols)) { 898 Symbol *sym = entry.sym; 899 assert(sym->isLive() && 900 "dead symbols should not be in localSymbols, externalSymbols"); 901 if (auto *defined = dyn_cast<Defined>(sym)) { 902 // Excluded symbols should have been filtered out in finalizeContents(). 903 assert(defined->includeInSymtab); 904 905 if (defined->isAbsolute()) 906 continue; 907 908 // Constant-folded symbols go in the executable's symbol table, but don't 909 // get a stabs entry. 910 if (defined->wasIdenticalCodeFolded) 911 continue; 912 913 InputSection *isec = defined->isec; 914 ObjFile *file = dyn_cast_or_null<ObjFile>(isec->getFile()); 915 if (!file || !file->compileUnit) 916 continue; 917 918 symbolsNeedingStabs.emplace_back(defined, defined->isec->getFile()->id); 919 } 920 } 921 922 llvm::stable_sort(symbolsNeedingStabs, 923 [&](const SortingPair &a, const SortingPair &b) { 924 return a.second < b.second; 925 }); 926 927 // Emit STABS symbols so that dsymutil and/or the debugger can map address 928 // regions in the final binary to the source and object files from which they 929 // originated. 930 InputFile *lastFile = nullptr; 931 for (SortingPair &pair : symbolsNeedingStabs) { 932 Defined *defined = pair.first; 933 InputSection *isec = defined->isec; 934 ObjFile *file = cast<ObjFile>(isec->getFile()); 935 936 if (lastFile == nullptr || lastFile != file) { 937 if (lastFile != nullptr) 938 emitEndSourceStab(); 939 lastFile = file; 940 941 emitBeginSourceStab(file->compileUnit); 942 emitObjectFileStab(file); 943 } 944 945 StabsEntry symStab; 946 symStab.sect = defined->isec->parent->index; 947 symStab.strx = stringTableSection.addString(defined->getName()); 948 symStab.value = defined->getVA(); 949 950 if (isCodeSection(isec)) { 951 symStab.type = N_FUN; 952 stabs.emplace_back(std::move(symStab)); 953 emitEndFunStab(defined); 954 } else { 955 symStab.type = defined->isExternal() ? N_GSYM : N_STSYM; 956 stabs.emplace_back(std::move(symStab)); 957 } 958 } 959 960 if (!stabs.empty()) 961 emitEndSourceStab(); 962 } 963 964 void SymtabSection::finalizeContents() { 965 auto addSymbol = [&](std::vector<SymtabEntry> &symbols, Symbol *sym) { 966 uint32_t strx = stringTableSection.addString(sym->getName()); 967 symbols.push_back({sym, strx}); 968 }; 969 970 std::function<void(Symbol *)> localSymbolsHandler; 971 switch (config->localSymbolsPresence) { 972 case SymtabPresence::All: 973 localSymbolsHandler = [&](Symbol *sym) { addSymbol(localSymbols, sym); }; 974 break; 975 case SymtabPresence::None: 976 localSymbolsHandler = [&](Symbol *) { /* Do nothing*/ }; 977 break; 978 case SymtabPresence::SelectivelyIncluded: 979 localSymbolsHandler = [&](Symbol *sym) { 980 if (config->localSymbolPatterns.match(sym->getName())) 981 addSymbol(localSymbols, sym); 982 }; 983 break; 984 case SymtabPresence::SelectivelyExcluded: 985 localSymbolsHandler = [&](Symbol *sym) { 986 if (!config->localSymbolPatterns.match(sym->getName())) 987 addSymbol(localSymbols, sym); 988 }; 989 break; 990 } 991 992 // Local symbols aren't in the SymbolTable, so we walk the list of object 993 // files to gather them. 994 // But if `-x` is set, then we don't need to. localSymbolsHandler() will do 995 // the right thing regardless, but this check is a perf optimization because 996 // iterating through all the input files and their symbols is expensive. 997 if (config->localSymbolsPresence != SymtabPresence::None) { 998 for (const InputFile *file : inputFiles) { 999 if (auto *objFile = dyn_cast<ObjFile>(file)) { 1000 for (Symbol *sym : objFile->symbols) { 1001 if (auto *defined = dyn_cast_or_null<Defined>(sym)) { 1002 if (defined->isExternal() || !defined->isLive() || 1003 !defined->includeInSymtab) 1004 continue; 1005 localSymbolsHandler(sym); 1006 } 1007 } 1008 } 1009 } 1010 } 1011 1012 // __dyld_private is a local symbol too. It's linker-created and doesn't 1013 // exist in any object file. 1014 if (Defined *dyldPrivate = in.stubHelper->dyldPrivate) 1015 localSymbolsHandler(dyldPrivate); 1016 1017 for (Symbol *sym : symtab->getSymbols()) { 1018 if (!sym->isLive()) 1019 continue; 1020 if (auto *defined = dyn_cast<Defined>(sym)) { 1021 if (!defined->includeInSymtab) 1022 continue; 1023 assert(defined->isExternal()); 1024 if (defined->privateExtern) 1025 localSymbolsHandler(defined); 1026 else 1027 addSymbol(externalSymbols, defined); 1028 } else if (auto *dysym = dyn_cast<DylibSymbol>(sym)) { 1029 if (dysym->isReferenced()) 1030 addSymbol(undefinedSymbols, sym); 1031 } 1032 } 1033 1034 emitStabs(); 1035 uint32_t symtabIndex = stabs.size(); 1036 for (const SymtabEntry &entry : 1037 concat<SymtabEntry>(localSymbols, externalSymbols, undefinedSymbols)) { 1038 entry.sym->symtabIndex = symtabIndex++; 1039 } 1040 } 1041 1042 uint32_t SymtabSection::getNumSymbols() const { 1043 return stabs.size() + localSymbols.size() + externalSymbols.size() + 1044 undefinedSymbols.size(); 1045 } 1046 1047 // This serves to hide (type-erase) the template parameter from SymtabSection. 1048 template <class LP> class SymtabSectionImpl final : public SymtabSection { 1049 public: 1050 SymtabSectionImpl(StringTableSection &stringTableSection) 1051 : SymtabSection(stringTableSection) {} 1052 uint64_t getRawSize() const override; 1053 void writeTo(uint8_t *buf) const override; 1054 }; 1055 1056 template <class LP> uint64_t SymtabSectionImpl<LP>::getRawSize() const { 1057 return getNumSymbols() * sizeof(typename LP::nlist); 1058 } 1059 1060 template <class LP> void SymtabSectionImpl<LP>::writeTo(uint8_t *buf) const { 1061 auto *nList = reinterpret_cast<typename LP::nlist *>(buf); 1062 // Emit the stabs entries before the "real" symbols. We cannot emit them 1063 // after as that would render Symbol::symtabIndex inaccurate. 1064 for (const StabsEntry &entry : stabs) { 1065 nList->n_strx = entry.strx; 1066 nList->n_type = entry.type; 1067 nList->n_sect = entry.sect; 1068 nList->n_desc = entry.desc; 1069 nList->n_value = entry.value; 1070 ++nList; 1071 } 1072 1073 for (const SymtabEntry &entry : concat<const SymtabEntry>( 1074 localSymbols, externalSymbols, undefinedSymbols)) { 1075 nList->n_strx = entry.strx; 1076 // TODO populate n_desc with more flags 1077 if (auto *defined = dyn_cast<Defined>(entry.sym)) { 1078 uint8_t scope = 0; 1079 if (defined->privateExtern) { 1080 // Private external -- dylib scoped symbol. 1081 // Promote to non-external at link time. 1082 scope = N_PEXT; 1083 } else if (defined->isExternal()) { 1084 // Normal global symbol. 1085 scope = N_EXT; 1086 } else { 1087 // TU-local symbol from localSymbols. 1088 scope = 0; 1089 } 1090 1091 if (defined->isAbsolute()) { 1092 nList->n_type = scope | N_ABS; 1093 nList->n_sect = NO_SECT; 1094 nList->n_value = defined->value; 1095 } else { 1096 nList->n_type = scope | N_SECT; 1097 nList->n_sect = defined->isec->parent->index; 1098 // For the N_SECT symbol type, n_value is the address of the symbol 1099 nList->n_value = defined->getVA(); 1100 } 1101 nList->n_desc |= defined->thumb ? N_ARM_THUMB_DEF : 0; 1102 nList->n_desc |= defined->isExternalWeakDef() ? N_WEAK_DEF : 0; 1103 nList->n_desc |= 1104 defined->referencedDynamically ? REFERENCED_DYNAMICALLY : 0; 1105 } else if (auto *dysym = dyn_cast<DylibSymbol>(entry.sym)) { 1106 uint16_t n_desc = nList->n_desc; 1107 int16_t ordinal = ordinalForDylibSymbol(*dysym); 1108 if (ordinal == BIND_SPECIAL_DYLIB_FLAT_LOOKUP) 1109 SET_LIBRARY_ORDINAL(n_desc, DYNAMIC_LOOKUP_ORDINAL); 1110 else if (ordinal == BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE) 1111 SET_LIBRARY_ORDINAL(n_desc, EXECUTABLE_ORDINAL); 1112 else { 1113 assert(ordinal > 0); 1114 SET_LIBRARY_ORDINAL(n_desc, static_cast<uint8_t>(ordinal)); 1115 } 1116 1117 nList->n_type = N_EXT; 1118 n_desc |= dysym->isWeakDef() ? N_WEAK_DEF : 0; 1119 n_desc |= dysym->isWeakRef() ? N_WEAK_REF : 0; 1120 nList->n_desc = n_desc; 1121 } 1122 ++nList; 1123 } 1124 } 1125 1126 template <class LP> 1127 SymtabSection * 1128 macho::makeSymtabSection(StringTableSection &stringTableSection) { 1129 return make<SymtabSectionImpl<LP>>(stringTableSection); 1130 } 1131 1132 IndirectSymtabSection::IndirectSymtabSection() 1133 : LinkEditSection(segment_names::linkEdit, 1134 section_names::indirectSymbolTable) {} 1135 1136 uint32_t IndirectSymtabSection::getNumSymbols() const { 1137 return in.got->getEntries().size() + in.tlvPointers->getEntries().size() + 1138 2 * in.stubs->getEntries().size(); 1139 } 1140 1141 bool IndirectSymtabSection::isNeeded() const { 1142 return in.got->isNeeded() || in.tlvPointers->isNeeded() || 1143 in.stubs->isNeeded(); 1144 } 1145 1146 void IndirectSymtabSection::finalizeContents() { 1147 uint32_t off = 0; 1148 in.got->reserved1 = off; 1149 off += in.got->getEntries().size(); 1150 in.tlvPointers->reserved1 = off; 1151 off += in.tlvPointers->getEntries().size(); 1152 in.stubs->reserved1 = off; 1153 off += in.stubs->getEntries().size(); 1154 in.lazyPointers->reserved1 = off; 1155 } 1156 1157 static uint32_t indirectValue(const Symbol *sym) { 1158 if (sym->symtabIndex == UINT32_MAX) 1159 return INDIRECT_SYMBOL_LOCAL; 1160 if (auto *defined = dyn_cast<Defined>(sym)) 1161 if (defined->privateExtern) 1162 return INDIRECT_SYMBOL_LOCAL; 1163 return sym->symtabIndex; 1164 } 1165 1166 void IndirectSymtabSection::writeTo(uint8_t *buf) const { 1167 uint32_t off = 0; 1168 for (const Symbol *sym : in.got->getEntries()) { 1169 write32le(buf + off * sizeof(uint32_t), indirectValue(sym)); 1170 ++off; 1171 } 1172 for (const Symbol *sym : in.tlvPointers->getEntries()) { 1173 write32le(buf + off * sizeof(uint32_t), indirectValue(sym)); 1174 ++off; 1175 } 1176 for (const Symbol *sym : in.stubs->getEntries()) { 1177 write32le(buf + off * sizeof(uint32_t), indirectValue(sym)); 1178 ++off; 1179 } 1180 // There is a 1:1 correspondence between stubs and LazyPointerSection 1181 // entries. But giving __stubs and __la_symbol_ptr the same reserved1 1182 // (the offset into the indirect symbol table) so that they both refer 1183 // to the same range of offsets confuses `strip`, so write the stubs 1184 // symbol table offsets a second time. 1185 for (const Symbol *sym : in.stubs->getEntries()) { 1186 write32le(buf + off * sizeof(uint32_t), indirectValue(sym)); 1187 ++off; 1188 } 1189 } 1190 1191 StringTableSection::StringTableSection() 1192 : LinkEditSection(segment_names::linkEdit, section_names::stringTable) {} 1193 1194 uint32_t StringTableSection::addString(StringRef str) { 1195 uint32_t strx = size; 1196 strings.push_back(str); // TODO: consider deduplicating strings 1197 size += str.size() + 1; // account for null terminator 1198 return strx; 1199 } 1200 1201 void StringTableSection::writeTo(uint8_t *buf) const { 1202 uint32_t off = 0; 1203 for (StringRef str : strings) { 1204 memcpy(buf + off, str.data(), str.size()); 1205 off += str.size() + 1; // account for null terminator 1206 } 1207 } 1208 1209 static_assert((CodeSignatureSection::blobHeadersSize % 8) == 0, ""); 1210 static_assert((CodeSignatureSection::fixedHeadersSize % 8) == 0, ""); 1211 1212 CodeSignatureSection::CodeSignatureSection() 1213 : LinkEditSection(segment_names::linkEdit, section_names::codeSignature) { 1214 align = 16; // required by libstuff 1215 // FIXME: Consider using finalOutput instead of outputFile. 1216 fileName = config->outputFile; 1217 size_t slashIndex = fileName.rfind("/"); 1218 if (slashIndex != std::string::npos) 1219 fileName = fileName.drop_front(slashIndex + 1); 1220 1221 // NOTE: Any changes to these calculations should be repeated 1222 // in llvm-objcopy's MachOLayoutBuilder::layoutTail. 1223 allHeadersSize = alignTo<16>(fixedHeadersSize + fileName.size() + 1); 1224 fileNamePad = allHeadersSize - fixedHeadersSize - fileName.size(); 1225 } 1226 1227 uint32_t CodeSignatureSection::getBlockCount() const { 1228 return (fileOff + blockSize - 1) / blockSize; 1229 } 1230 1231 uint64_t CodeSignatureSection::getRawSize() const { 1232 return allHeadersSize + getBlockCount() * hashSize; 1233 } 1234 1235 void CodeSignatureSection::writeHashes(uint8_t *buf) const { 1236 // NOTE: Changes to this functionality should be repeated in llvm-objcopy's 1237 // MachOWriter::writeSignatureData. 1238 uint8_t *code = buf; 1239 uint8_t *codeEnd = buf + fileOff; 1240 uint8_t *hashes = codeEnd + allHeadersSize; 1241 while (code < codeEnd) { 1242 StringRef block(reinterpret_cast<char *>(code), 1243 std::min(codeEnd - code, static_cast<ssize_t>(blockSize))); 1244 SHA256 hasher; 1245 hasher.update(block); 1246 std::array<uint8_t, 32> hash = hasher.final(); 1247 assert(hash.size() == hashSize); 1248 memcpy(hashes, hash.data(), hashSize); 1249 code += blockSize; 1250 hashes += hashSize; 1251 } 1252 #if defined(__APPLE__) 1253 // This is macOS-specific work-around and makes no sense for any 1254 // other host OS. See https://openradar.appspot.com/FB8914231 1255 // 1256 // The macOS kernel maintains a signature-verification cache to 1257 // quickly validate applications at time of execve(2). The trouble 1258 // is that for the kernel creates the cache entry at the time of the 1259 // mmap(2) call, before we have a chance to write either the code to 1260 // sign or the signature header+hashes. The fix is to invalidate 1261 // all cached data associated with the output file, thus discarding 1262 // the bogus prematurely-cached signature. 1263 msync(buf, fileOff + getSize(), MS_INVALIDATE); 1264 #endif 1265 } 1266 1267 void CodeSignatureSection::writeTo(uint8_t *buf) const { 1268 // NOTE: Changes to this functionality should be repeated in llvm-objcopy's 1269 // MachOWriter::writeSignatureData. 1270 uint32_t signatureSize = static_cast<uint32_t>(getSize()); 1271 auto *superBlob = reinterpret_cast<CS_SuperBlob *>(buf); 1272 write32be(&superBlob->magic, CSMAGIC_EMBEDDED_SIGNATURE); 1273 write32be(&superBlob->length, signatureSize); 1274 write32be(&superBlob->count, 1); 1275 auto *blobIndex = reinterpret_cast<CS_BlobIndex *>(&superBlob[1]); 1276 write32be(&blobIndex->type, CSSLOT_CODEDIRECTORY); 1277 write32be(&blobIndex->offset, blobHeadersSize); 1278 auto *codeDirectory = 1279 reinterpret_cast<CS_CodeDirectory *>(buf + blobHeadersSize); 1280 write32be(&codeDirectory->magic, CSMAGIC_CODEDIRECTORY); 1281 write32be(&codeDirectory->length, signatureSize - blobHeadersSize); 1282 write32be(&codeDirectory->version, CS_SUPPORTSEXECSEG); 1283 write32be(&codeDirectory->flags, CS_ADHOC | CS_LINKER_SIGNED); 1284 write32be(&codeDirectory->hashOffset, 1285 sizeof(CS_CodeDirectory) + fileName.size() + fileNamePad); 1286 write32be(&codeDirectory->identOffset, sizeof(CS_CodeDirectory)); 1287 codeDirectory->nSpecialSlots = 0; 1288 write32be(&codeDirectory->nCodeSlots, getBlockCount()); 1289 write32be(&codeDirectory->codeLimit, fileOff); 1290 codeDirectory->hashSize = static_cast<uint8_t>(hashSize); 1291 codeDirectory->hashType = kSecCodeSignatureHashSHA256; 1292 codeDirectory->platform = 0; 1293 codeDirectory->pageSize = blockSizeShift; 1294 codeDirectory->spare2 = 0; 1295 codeDirectory->scatterOffset = 0; 1296 codeDirectory->teamOffset = 0; 1297 codeDirectory->spare3 = 0; 1298 codeDirectory->codeLimit64 = 0; 1299 OutputSegment *textSeg = getOrCreateOutputSegment(segment_names::text); 1300 write64be(&codeDirectory->execSegBase, textSeg->fileOff); 1301 write64be(&codeDirectory->execSegLimit, textSeg->fileSize); 1302 write64be(&codeDirectory->execSegFlags, 1303 config->outputType == MH_EXECUTE ? CS_EXECSEG_MAIN_BINARY : 0); 1304 auto *id = reinterpret_cast<char *>(&codeDirectory[1]); 1305 memcpy(id, fileName.begin(), fileName.size()); 1306 memset(id + fileName.size(), 0, fileNamePad); 1307 } 1308 1309 BitcodeBundleSection::BitcodeBundleSection() 1310 : SyntheticSection(segment_names::llvm, section_names::bitcodeBundle) {} 1311 1312 class ErrorCodeWrapper { 1313 public: 1314 explicit ErrorCodeWrapper(std::error_code ec) : errorCode(ec.value()) {} 1315 explicit ErrorCodeWrapper(int ec) : errorCode(ec) {} 1316 operator int() const { return errorCode; } 1317 1318 private: 1319 int errorCode; 1320 }; 1321 1322 #define CHECK_EC(exp) \ 1323 do { \ 1324 ErrorCodeWrapper ec(exp); \ 1325 if (ec) \ 1326 fatal(Twine("operation failed with error code ") + Twine(ec) + ": " + \ 1327 #exp); \ 1328 } while (0); 1329 1330 void BitcodeBundleSection::finalize() { 1331 #ifdef LLVM_HAVE_LIBXAR 1332 using namespace llvm::sys::fs; 1333 CHECK_EC(createTemporaryFile("bitcode-bundle", "xar", xarPath)); 1334 1335 #pragma clang diagnostic push 1336 #pragma clang diagnostic ignored "-Wdeprecated-declarations" 1337 xar_t xar(xar_open(xarPath.data(), O_RDWR)); 1338 #pragma clang diagnostic pop 1339 if (!xar) 1340 fatal("failed to open XAR temporary file at " + xarPath); 1341 CHECK_EC(xar_opt_set(xar, XAR_OPT_COMPRESSION, XAR_OPT_VAL_NONE)); 1342 // FIXME: add more data to XAR 1343 CHECK_EC(xar_close(xar)); 1344 1345 file_size(xarPath, xarSize); 1346 #endif // defined(LLVM_HAVE_LIBXAR) 1347 } 1348 1349 void BitcodeBundleSection::writeTo(uint8_t *buf) const { 1350 using namespace llvm::sys::fs; 1351 file_t handle = 1352 CHECK(openNativeFile(xarPath, CD_OpenExisting, FA_Read, OF_None), 1353 "failed to open XAR file"); 1354 std::error_code ec; 1355 mapped_file_region xarMap(handle, mapped_file_region::mapmode::readonly, 1356 xarSize, 0, ec); 1357 if (ec) 1358 fatal("failed to map XAR file"); 1359 memcpy(buf, xarMap.const_data(), xarSize); 1360 1361 closeFile(handle); 1362 remove(xarPath); 1363 } 1364 1365 CStringSection::CStringSection() 1366 : SyntheticSection(segment_names::text, section_names::cString) { 1367 flags = S_CSTRING_LITERALS; 1368 } 1369 1370 void CStringSection::addInput(CStringInputSection *isec) { 1371 isec->parent = this; 1372 inputs.push_back(isec); 1373 if (isec->align > align) 1374 align = isec->align; 1375 } 1376 1377 void CStringSection::writeTo(uint8_t *buf) const { 1378 for (const CStringInputSection *isec : inputs) { 1379 for (size_t i = 0, e = isec->pieces.size(); i != e; ++i) { 1380 if (!isec->pieces[i].live) 1381 continue; 1382 StringRef string = isec->getStringRef(i); 1383 memcpy(buf + isec->pieces[i].outSecOff, string.data(), string.size()); 1384 } 1385 } 1386 } 1387 1388 void CStringSection::finalizeContents() { 1389 uint64_t offset = 0; 1390 for (CStringInputSection *isec : inputs) { 1391 for (size_t i = 0, e = isec->pieces.size(); i != e; ++i) { 1392 if (!isec->pieces[i].live) 1393 continue; 1394 // See comment above DeduplicatedCStringSection for how alignment is 1395 // handled. 1396 uint32_t pieceAlign = 1397 1 << countTrailingZeros(isec->align | isec->pieces[i].inSecOff); 1398 offset = alignTo(offset, pieceAlign); 1399 isec->pieces[i].outSecOff = offset; 1400 isec->isFinal = true; 1401 StringRef string = isec->getStringRef(i); 1402 offset += string.size(); 1403 } 1404 } 1405 size = offset; 1406 } 1407 1408 // Mergeable cstring literals are found under the __TEXT,__cstring section. In 1409 // contrast to ELF, which puts strings that need different alignments into 1410 // different sections, clang's Mach-O backend puts them all in one section. 1411 // Strings that need to be aligned have the .p2align directive emitted before 1412 // them, which simply translates into zero padding in the object file. In other 1413 // words, we have to infer the desired alignment of these cstrings from their 1414 // addresses. 1415 // 1416 // We differ slightly from ld64 in how we've chosen to align these cstrings. 1417 // Both LLD and ld64 preserve the number of trailing zeros in each cstring's 1418 // address in the input object files. When deduplicating identical cstrings, 1419 // both linkers pick the cstring whose address has more trailing zeros, and 1420 // preserve the alignment of that address in the final binary. However, ld64 1421 // goes a step further and also preserves the offset of the cstring from the 1422 // last section-aligned address. I.e. if a cstring is at offset 18 in the 1423 // input, with a section alignment of 16, then both LLD and ld64 will ensure the 1424 // final address is 2-byte aligned (since 18 == 16 + 2). But ld64 will also 1425 // ensure that the final address is of the form 16 * k + 2 for some k. 1426 // 1427 // Note that ld64's heuristic means that a dedup'ed cstring's final address is 1428 // dependent on the order of the input object files. E.g. if in addition to the 1429 // cstring at offset 18 above, we have a duplicate one in another file with a 1430 // `.cstring` section alignment of 2 and an offset of zero, then ld64 will pick 1431 // the cstring from the object file earlier on the command line (since both have 1432 // the same number of trailing zeros in their address). So the final cstring may 1433 // either be at some address `16 * k + 2` or at some address `2 * k`. 1434 // 1435 // I've opted not to follow this behavior primarily for implementation 1436 // simplicity, and secondarily to save a few more bytes. It's not clear to me 1437 // that preserving the section alignment + offset is ever necessary, and there 1438 // are many cases that are clearly redundant. In particular, if an x86_64 object 1439 // file contains some strings that are accessed via SIMD instructions, then the 1440 // .cstring section in the object file will be 16-byte-aligned (since SIMD 1441 // requires its operand addresses to be 16-byte aligned). However, there will 1442 // typically also be other cstrings in the same file that aren't used via SIMD 1443 // and don't need this alignment. They will be emitted at some arbitrary address 1444 // `A`, but ld64 will treat them as being 16-byte aligned with an offset of `16 1445 // % A`. 1446 void DeduplicatedCStringSection::finalizeContents() { 1447 // Find the largest alignment required for each string. 1448 for (const CStringInputSection *isec : inputs) { 1449 for (size_t i = 0, e = isec->pieces.size(); i != e; ++i) { 1450 const StringPiece &piece = isec->pieces[i]; 1451 if (!piece.live) 1452 continue; 1453 auto s = isec->getCachedHashStringRef(i); 1454 assert(isec->align != 0); 1455 uint8_t trailingZeros = countTrailingZeros(isec->align | piece.inSecOff); 1456 auto it = stringOffsetMap.insert( 1457 std::make_pair(s, StringOffset(trailingZeros))); 1458 if (!it.second && it.first->second.trailingZeros < trailingZeros) 1459 it.first->second.trailingZeros = trailingZeros; 1460 } 1461 } 1462 1463 // Assign an offset for each string and save it to the corresponding 1464 // StringPieces for easy access. 1465 for (CStringInputSection *isec : inputs) { 1466 for (size_t i = 0, e = isec->pieces.size(); i != e; ++i) { 1467 if (!isec->pieces[i].live) 1468 continue; 1469 auto s = isec->getCachedHashStringRef(i); 1470 auto it = stringOffsetMap.find(s); 1471 assert(it != stringOffsetMap.end()); 1472 StringOffset &offsetInfo = it->second; 1473 if (offsetInfo.outSecOff == UINT64_MAX) { 1474 offsetInfo.outSecOff = alignTo(size, 1ULL << offsetInfo.trailingZeros); 1475 size = offsetInfo.outSecOff + s.size(); 1476 } 1477 isec->pieces[i].outSecOff = offsetInfo.outSecOff; 1478 } 1479 isec->isFinal = true; 1480 } 1481 } 1482 1483 void DeduplicatedCStringSection::writeTo(uint8_t *buf) const { 1484 for (const auto &p : stringOffsetMap) { 1485 StringRef data = p.first.val(); 1486 uint64_t off = p.second.outSecOff; 1487 if (!data.empty()) 1488 memcpy(buf + off, data.data(), data.size()); 1489 } 1490 } 1491 1492 // This section is actually emitted as __TEXT,__const by ld64, but clang may 1493 // emit input sections of that name, and LLD doesn't currently support mixing 1494 // synthetic and concat-type OutputSections. To work around this, I've given 1495 // our merged-literals section a different name. 1496 WordLiteralSection::WordLiteralSection() 1497 : SyntheticSection(segment_names::text, section_names::literals) { 1498 align = 16; 1499 } 1500 1501 void WordLiteralSection::addInput(WordLiteralInputSection *isec) { 1502 isec->parent = this; 1503 inputs.push_back(isec); 1504 } 1505 1506 void WordLiteralSection::finalizeContents() { 1507 for (WordLiteralInputSection *isec : inputs) { 1508 // We do all processing of the InputSection here, so it will be effectively 1509 // finalized. 1510 isec->isFinal = true; 1511 const uint8_t *buf = isec->data.data(); 1512 switch (sectionType(isec->getFlags())) { 1513 case S_4BYTE_LITERALS: { 1514 for (size_t off = 0, e = isec->data.size(); off < e; off += 4) { 1515 if (!isec->isLive(off)) 1516 continue; 1517 uint32_t value = *reinterpret_cast<const uint32_t *>(buf + off); 1518 literal4Map.emplace(value, literal4Map.size()); 1519 } 1520 break; 1521 } 1522 case S_8BYTE_LITERALS: { 1523 for (size_t off = 0, e = isec->data.size(); off < e; off += 8) { 1524 if (!isec->isLive(off)) 1525 continue; 1526 uint64_t value = *reinterpret_cast<const uint64_t *>(buf + off); 1527 literal8Map.emplace(value, literal8Map.size()); 1528 } 1529 break; 1530 } 1531 case S_16BYTE_LITERALS: { 1532 for (size_t off = 0, e = isec->data.size(); off < e; off += 16) { 1533 if (!isec->isLive(off)) 1534 continue; 1535 UInt128 value = *reinterpret_cast<const UInt128 *>(buf + off); 1536 literal16Map.emplace(value, literal16Map.size()); 1537 } 1538 break; 1539 } 1540 default: 1541 llvm_unreachable("invalid literal section type"); 1542 } 1543 } 1544 } 1545 1546 void WordLiteralSection::writeTo(uint8_t *buf) const { 1547 // Note that we don't attempt to do any endianness conversion in addInput(), 1548 // so we don't do it here either -- just write out the original value, 1549 // byte-for-byte. 1550 for (const auto &p : literal16Map) 1551 memcpy(buf + p.second * 16, &p.first, 16); 1552 buf += literal16Map.size() * 16; 1553 1554 for (const auto &p : literal8Map) 1555 memcpy(buf + p.second * 8, &p.first, 8); 1556 buf += literal8Map.size() * 8; 1557 1558 for (const auto &p : literal4Map) 1559 memcpy(buf + p.second * 4, &p.first, 4); 1560 } 1561 1562 void macho::createSyntheticSymbols() { 1563 auto addHeaderSymbol = [](const char *name) { 1564 symtab->addSynthetic(name, in.header->isec, /*value=*/0, 1565 /*isPrivateExtern=*/true, /*includeInSymtab=*/false, 1566 /*referencedDynamically=*/false); 1567 }; 1568 1569 switch (config->outputType) { 1570 // FIXME: Assign the right address value for these symbols 1571 // (rather than 0). But we need to do that after assignAddresses(). 1572 case MH_EXECUTE: 1573 // If linking PIE, __mh_execute_header is a defined symbol in 1574 // __TEXT, __text) 1575 // Otherwise, it's an absolute symbol. 1576 if (config->isPic) 1577 symtab->addSynthetic("__mh_execute_header", in.header->isec, /*value=*/0, 1578 /*isPrivateExtern=*/false, /*includeInSymtab=*/true, 1579 /*referencedDynamically=*/true); 1580 else 1581 symtab->addSynthetic("__mh_execute_header", /*isec=*/nullptr, /*value=*/0, 1582 /*isPrivateExtern=*/false, /*includeInSymtab=*/true, 1583 /*referencedDynamically=*/true); 1584 break; 1585 1586 // The following symbols are N_SECT symbols, even though the header is not 1587 // part of any section and that they are private to the bundle/dylib/object 1588 // they are part of. 1589 case MH_BUNDLE: 1590 addHeaderSymbol("__mh_bundle_header"); 1591 break; 1592 case MH_DYLIB: 1593 addHeaderSymbol("__mh_dylib_header"); 1594 break; 1595 case MH_DYLINKER: 1596 addHeaderSymbol("__mh_dylinker_header"); 1597 break; 1598 case MH_OBJECT: 1599 addHeaderSymbol("__mh_object_header"); 1600 break; 1601 default: 1602 llvm_unreachable("unexpected outputType"); 1603 break; 1604 } 1605 1606 // The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit 1607 // which does e.g. cleanup of static global variables. The ABI document 1608 // says that the pointer can point to any address in one of the dylib's 1609 // segments, but in practice ld64 seems to set it to point to the header, 1610 // so that's what's implemented here. 1611 addHeaderSymbol("___dso_handle"); 1612 } 1613 1614 template SymtabSection *macho::makeSymtabSection<LP64>(StringTableSection &); 1615 template SymtabSection *macho::makeSymtabSection<ILP32>(StringTableSection &); 1616