1 //===- SyntheticSections.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "SyntheticSections.h" 10 #include "ConcatOutputSection.h" 11 #include "Config.h" 12 #include "ExportTrie.h" 13 #include "InputFiles.h" 14 #include "MachOStructs.h" 15 #include "OutputSegment.h" 16 #include "SymbolTable.h" 17 #include "Symbols.h" 18 19 #include "lld/Common/CommonLinkerContext.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/Config/llvm-config.h" 22 #include "llvm/Support/EndianStream.h" 23 #include "llvm/Support/FileSystem.h" 24 #include "llvm/Support/LEB128.h" 25 #include "llvm/Support/Path.h" 26 #include "llvm/Support/SHA256.h" 27 28 #if defined(__APPLE__) 29 #include <sys/mman.h> 30 #endif 31 32 #ifdef LLVM_HAVE_LIBXAR 33 #include <fcntl.h> 34 extern "C" { 35 #include <xar/xar.h> 36 } 37 #endif 38 39 using namespace llvm; 40 using namespace llvm::MachO; 41 using namespace llvm::support; 42 using namespace llvm::support::endian; 43 using namespace lld; 44 using namespace lld::macho; 45 46 InStruct macho::in; 47 std::vector<SyntheticSection *> macho::syntheticSections; 48 49 SyntheticSection::SyntheticSection(const char *segname, const char *name) 50 : OutputSection(SyntheticKind, name) { 51 std::tie(this->segname, this->name) = maybeRenameSection({segname, name}); 52 isec = makeSyntheticInputSection(segname, name); 53 isec->parent = this; 54 syntheticSections.push_back(this); 55 } 56 57 // dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts 58 // from the beginning of the file (i.e. the header). 59 MachHeaderSection::MachHeaderSection() 60 : SyntheticSection(segment_names::text, section_names::header) { 61 // XXX: This is a hack. (See D97007) 62 // Setting the index to 1 to pretend that this section is the text 63 // section. 64 index = 1; 65 isec->isFinal = true; 66 } 67 68 void MachHeaderSection::addLoadCommand(LoadCommand *lc) { 69 loadCommands.push_back(lc); 70 sizeOfCmds += lc->getSize(); 71 } 72 73 uint64_t MachHeaderSection::getSize() const { 74 uint64_t size = target->headerSize + sizeOfCmds + config->headerPad; 75 // If we are emitting an encryptable binary, our load commands must have a 76 // separate (non-encrypted) page to themselves. 77 if (config->emitEncryptionInfo) 78 size = alignTo(size, target->getPageSize()); 79 return size; 80 } 81 82 static uint32_t cpuSubtype() { 83 uint32_t subtype = target->cpuSubtype; 84 85 if (config->outputType == MH_EXECUTE && !config->staticLink && 86 target->cpuSubtype == CPU_SUBTYPE_X86_64_ALL && 87 config->platform() == PLATFORM_MACOS && 88 config->platformInfo.minimum >= VersionTuple(10, 5)) 89 subtype |= CPU_SUBTYPE_LIB64; 90 91 return subtype; 92 } 93 94 void MachHeaderSection::writeTo(uint8_t *buf) const { 95 auto *hdr = reinterpret_cast<mach_header *>(buf); 96 hdr->magic = target->magic; 97 hdr->cputype = target->cpuType; 98 hdr->cpusubtype = cpuSubtype(); 99 hdr->filetype = config->outputType; 100 hdr->ncmds = loadCommands.size(); 101 hdr->sizeofcmds = sizeOfCmds; 102 hdr->flags = MH_DYLDLINK; 103 104 if (config->namespaceKind == NamespaceKind::twolevel) 105 hdr->flags |= MH_NOUNDEFS | MH_TWOLEVEL; 106 107 if (config->outputType == MH_DYLIB && !config->hasReexports) 108 hdr->flags |= MH_NO_REEXPORTED_DYLIBS; 109 110 if (config->markDeadStrippableDylib) 111 hdr->flags |= MH_DEAD_STRIPPABLE_DYLIB; 112 113 if (config->outputType == MH_EXECUTE && config->isPic) 114 hdr->flags |= MH_PIE; 115 116 if (config->outputType == MH_DYLIB && config->applicationExtension) 117 hdr->flags |= MH_APP_EXTENSION_SAFE; 118 119 if (in.exports->hasWeakSymbol || in.weakBinding->hasNonWeakDefinition()) 120 hdr->flags |= MH_WEAK_DEFINES; 121 122 if (in.exports->hasWeakSymbol || in.weakBinding->hasEntry()) 123 hdr->flags |= MH_BINDS_TO_WEAK; 124 125 for (const OutputSegment *seg : outputSegments) { 126 for (const OutputSection *osec : seg->getSections()) { 127 if (isThreadLocalVariables(osec->flags)) { 128 hdr->flags |= MH_HAS_TLV_DESCRIPTORS; 129 break; 130 } 131 } 132 } 133 134 uint8_t *p = reinterpret_cast<uint8_t *>(hdr) + target->headerSize; 135 for (const LoadCommand *lc : loadCommands) { 136 lc->writeTo(p); 137 p += lc->getSize(); 138 } 139 } 140 141 PageZeroSection::PageZeroSection() 142 : SyntheticSection(segment_names::pageZero, section_names::pageZero) {} 143 144 RebaseSection::RebaseSection() 145 : LinkEditSection(segment_names::linkEdit, section_names::rebase) {} 146 147 namespace { 148 struct Rebase { 149 OutputSegment *segment = nullptr; 150 uint64_t offset = 0; 151 uint64_t consecutiveCount = 0; 152 }; 153 } // namespace 154 155 // Rebase opcodes allow us to describe a contiguous sequence of rebase location 156 // using a single DO_REBASE opcode. To take advantage of it, we delay emitting 157 // `DO_REBASE` until we have reached the end of a contiguous sequence. 158 static void encodeDoRebase(Rebase &rebase, raw_svector_ostream &os) { 159 assert(rebase.consecutiveCount != 0); 160 if (rebase.consecutiveCount <= REBASE_IMMEDIATE_MASK) { 161 os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_IMM_TIMES | 162 rebase.consecutiveCount); 163 } else { 164 os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ULEB_TIMES); 165 encodeULEB128(rebase.consecutiveCount, os); 166 } 167 rebase.consecutiveCount = 0; 168 } 169 170 static void encodeRebase(const OutputSection *osec, uint64_t outSecOff, 171 Rebase &lastRebase, raw_svector_ostream &os) { 172 OutputSegment *seg = osec->parent; 173 uint64_t offset = osec->getSegmentOffset() + outSecOff; 174 if (lastRebase.segment != seg || lastRebase.offset != offset) { 175 if (lastRebase.consecutiveCount != 0) 176 encodeDoRebase(lastRebase, os); 177 178 if (lastRebase.segment != seg) { 179 os << static_cast<uint8_t>(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 180 seg->index); 181 encodeULEB128(offset, os); 182 lastRebase.segment = seg; 183 lastRebase.offset = offset; 184 } else { 185 assert(lastRebase.offset != offset); 186 os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_ULEB); 187 encodeULEB128(offset - lastRebase.offset, os); 188 lastRebase.offset = offset; 189 } 190 } 191 ++lastRebase.consecutiveCount; 192 // DO_REBASE causes dyld to both perform the binding and increment the offset 193 lastRebase.offset += target->wordSize; 194 } 195 196 void RebaseSection::finalizeContents() { 197 if (locations.empty()) 198 return; 199 200 raw_svector_ostream os{contents}; 201 Rebase lastRebase; 202 203 os << static_cast<uint8_t>(REBASE_OPCODE_SET_TYPE_IMM | REBASE_TYPE_POINTER); 204 205 llvm::sort(locations, [](const Location &a, const Location &b) { 206 return a.isec->getVA(a.offset) < b.isec->getVA(b.offset); 207 }); 208 for (const Location &loc : locations) 209 encodeRebase(loc.isec->parent, loc.isec->getOffset(loc.offset), lastRebase, 210 os); 211 if (lastRebase.consecutiveCount != 0) 212 encodeDoRebase(lastRebase, os); 213 214 os << static_cast<uint8_t>(REBASE_OPCODE_DONE); 215 } 216 217 void RebaseSection::writeTo(uint8_t *buf) const { 218 memcpy(buf, contents.data(), contents.size()); 219 } 220 221 NonLazyPointerSectionBase::NonLazyPointerSectionBase(const char *segname, 222 const char *name) 223 : SyntheticSection(segname, name) { 224 align = target->wordSize; 225 } 226 227 void macho::addNonLazyBindingEntries(const Symbol *sym, 228 const InputSection *isec, uint64_t offset, 229 int64_t addend) { 230 if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) { 231 in.binding->addEntry(dysym, isec, offset, addend); 232 if (dysym->isWeakDef()) 233 in.weakBinding->addEntry(sym, isec, offset, addend); 234 } else if (const auto *defined = dyn_cast<Defined>(sym)) { 235 in.rebase->addEntry(isec, offset); 236 if (defined->isExternalWeakDef()) 237 in.weakBinding->addEntry(sym, isec, offset, addend); 238 else if (defined->interposable) 239 in.binding->addEntry(sym, isec, offset, addend); 240 } else { 241 // Undefined symbols are filtered out in scanRelocations(); we should never 242 // get here 243 llvm_unreachable("cannot bind to an undefined symbol"); 244 } 245 } 246 247 void NonLazyPointerSectionBase::addEntry(Symbol *sym) { 248 if (entries.insert(sym)) { 249 assert(!sym->isInGot()); 250 sym->gotIndex = entries.size() - 1; 251 252 addNonLazyBindingEntries(sym, isec, sym->gotIndex * target->wordSize); 253 } 254 } 255 256 void NonLazyPointerSectionBase::writeTo(uint8_t *buf) const { 257 for (size_t i = 0, n = entries.size(); i < n; ++i) 258 if (auto *defined = dyn_cast<Defined>(entries[i])) 259 write64le(&buf[i * target->wordSize], defined->getVA()); 260 } 261 262 GotSection::GotSection() 263 : NonLazyPointerSectionBase(segment_names::data, section_names::got) { 264 flags = S_NON_LAZY_SYMBOL_POINTERS; 265 } 266 267 TlvPointerSection::TlvPointerSection() 268 : NonLazyPointerSectionBase(segment_names::data, 269 section_names::threadPtrs) { 270 flags = S_THREAD_LOCAL_VARIABLE_POINTERS; 271 } 272 273 BindingSection::BindingSection() 274 : LinkEditSection(segment_names::linkEdit, section_names::binding) {} 275 276 namespace { 277 struct Binding { 278 OutputSegment *segment = nullptr; 279 uint64_t offset = 0; 280 int64_t addend = 0; 281 }; 282 struct BindIR { 283 // Default value of 0xF0 is not valid opcode and should make the program 284 // scream instead of accidentally writing "valid" values. 285 uint8_t opcode = 0xF0; 286 uint64_t data = 0; 287 uint64_t consecutiveCount = 0; 288 }; 289 } // namespace 290 291 // Encode a sequence of opcodes that tell dyld to write the address of symbol + 292 // addend at osec->addr + outSecOff. 293 // 294 // The bind opcode "interpreter" remembers the values of each binding field, so 295 // we only need to encode the differences between bindings. Hence the use of 296 // lastBinding. 297 static void encodeBinding(const OutputSection *osec, uint64_t outSecOff, 298 int64_t addend, Binding &lastBinding, 299 std::vector<BindIR> &opcodes) { 300 OutputSegment *seg = osec->parent; 301 uint64_t offset = osec->getSegmentOffset() + outSecOff; 302 if (lastBinding.segment != seg) { 303 opcodes.push_back( 304 {static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 305 seg->index), 306 offset}); 307 lastBinding.segment = seg; 308 lastBinding.offset = offset; 309 } else if (lastBinding.offset != offset) { 310 opcodes.push_back({BIND_OPCODE_ADD_ADDR_ULEB, offset - lastBinding.offset}); 311 lastBinding.offset = offset; 312 } 313 314 if (lastBinding.addend != addend) { 315 opcodes.push_back( 316 {BIND_OPCODE_SET_ADDEND_SLEB, static_cast<uint64_t>(addend)}); 317 lastBinding.addend = addend; 318 } 319 320 opcodes.push_back({BIND_OPCODE_DO_BIND, 0}); 321 // DO_BIND causes dyld to both perform the binding and increment the offset 322 lastBinding.offset += target->wordSize; 323 } 324 325 static void optimizeOpcodes(std::vector<BindIR> &opcodes) { 326 // Pass 1: Combine bind/add pairs 327 size_t i; 328 int pWrite = 0; 329 for (i = 1; i < opcodes.size(); ++i, ++pWrite) { 330 if ((opcodes[i].opcode == BIND_OPCODE_ADD_ADDR_ULEB) && 331 (opcodes[i - 1].opcode == BIND_OPCODE_DO_BIND)) { 332 opcodes[pWrite].opcode = BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB; 333 opcodes[pWrite].data = opcodes[i].data; 334 ++i; 335 } else { 336 opcodes[pWrite] = opcodes[i - 1]; 337 } 338 } 339 if (i == opcodes.size()) 340 opcodes[pWrite] = opcodes[i - 1]; 341 opcodes.resize(pWrite + 1); 342 343 // Pass 2: Compress two or more bind_add opcodes 344 pWrite = 0; 345 for (i = 1; i < opcodes.size(); ++i, ++pWrite) { 346 if ((opcodes[i].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) && 347 (opcodes[i - 1].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) && 348 (opcodes[i].data == opcodes[i - 1].data)) { 349 opcodes[pWrite].opcode = BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB; 350 opcodes[pWrite].consecutiveCount = 2; 351 opcodes[pWrite].data = opcodes[i].data; 352 ++i; 353 while (i < opcodes.size() && 354 (opcodes[i].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) && 355 (opcodes[i].data == opcodes[i - 1].data)) { 356 opcodes[pWrite].consecutiveCount++; 357 ++i; 358 } 359 } else { 360 opcodes[pWrite] = opcodes[i - 1]; 361 } 362 } 363 if (i == opcodes.size()) 364 opcodes[pWrite] = opcodes[i - 1]; 365 opcodes.resize(pWrite + 1); 366 367 // Pass 3: Use immediate encodings 368 // Every binding is the size of one pointer. If the next binding is a 369 // multiple of wordSize away that is within BIND_IMMEDIATE_MASK, the 370 // opcode can be scaled by wordSize into a single byte and dyld will 371 // expand it to the correct address. 372 for (auto &p : opcodes) { 373 // It's unclear why the check needs to be less than BIND_IMMEDIATE_MASK, 374 // but ld64 currently does this. This could be a potential bug, but 375 // for now, perform the same behavior to prevent mysterious bugs. 376 if ((p.opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) && 377 ((p.data / target->wordSize) < BIND_IMMEDIATE_MASK) && 378 ((p.data % target->wordSize) == 0)) { 379 p.opcode = BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED; 380 p.data /= target->wordSize; 381 } 382 } 383 } 384 385 static void flushOpcodes(const BindIR &op, raw_svector_ostream &os) { 386 uint8_t opcode = op.opcode & BIND_OPCODE_MASK; 387 switch (opcode) { 388 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: 389 case BIND_OPCODE_ADD_ADDR_ULEB: 390 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: 391 os << op.opcode; 392 encodeULEB128(op.data, os); 393 break; 394 case BIND_OPCODE_SET_ADDEND_SLEB: 395 os << op.opcode; 396 encodeSLEB128(static_cast<int64_t>(op.data), os); 397 break; 398 case BIND_OPCODE_DO_BIND: 399 os << op.opcode; 400 break; 401 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: 402 os << op.opcode; 403 encodeULEB128(op.consecutiveCount, os); 404 encodeULEB128(op.data, os); 405 break; 406 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: 407 os << static_cast<uint8_t>(op.opcode | op.data); 408 break; 409 default: 410 llvm_unreachable("cannot bind to an unrecognized symbol"); 411 } 412 } 413 414 // Non-weak bindings need to have their dylib ordinal encoded as well. 415 static int16_t ordinalForDylibSymbol(const DylibSymbol &dysym) { 416 if (config->namespaceKind == NamespaceKind::flat || dysym.isDynamicLookup()) 417 return static_cast<int16_t>(BIND_SPECIAL_DYLIB_FLAT_LOOKUP); 418 assert(dysym.getFile()->isReferenced()); 419 return dysym.getFile()->ordinal; 420 } 421 422 static int16_t ordinalForSymbol(const Symbol &sym) { 423 if (const auto *dysym = dyn_cast<DylibSymbol>(&sym)) 424 return ordinalForDylibSymbol(*dysym); 425 assert(cast<Defined>(&sym)->interposable); 426 return BIND_SPECIAL_DYLIB_FLAT_LOOKUP; 427 } 428 429 static void encodeDylibOrdinal(int16_t ordinal, raw_svector_ostream &os) { 430 if (ordinal <= 0) { 431 os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | 432 (ordinal & BIND_IMMEDIATE_MASK)); 433 } else if (ordinal <= BIND_IMMEDIATE_MASK) { 434 os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | ordinal); 435 } else { 436 os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); 437 encodeULEB128(ordinal, os); 438 } 439 } 440 441 static void encodeWeakOverride(const Defined *defined, 442 raw_svector_ostream &os) { 443 os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 444 BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION) 445 << defined->getName() << '\0'; 446 } 447 448 // Organize the bindings so we can encoded them with fewer opcodes. 449 // 450 // First, all bindings for a given symbol should be grouped together. 451 // BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM is the largest opcode (since it 452 // has an associated symbol string), so we only want to emit it once per symbol. 453 // 454 // Within each group, we sort the bindings by address. Since bindings are 455 // delta-encoded, sorting them allows for a more compact result. Note that 456 // sorting by address alone ensures that bindings for the same segment / section 457 // are located together, minimizing the number of times we have to emit 458 // BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB. 459 // 460 // Finally, we sort the symbols by the address of their first binding, again 461 // to facilitate the delta-encoding process. 462 template <class Sym> 463 std::vector<std::pair<const Sym *, std::vector<BindingEntry>>> 464 sortBindings(const BindingsMap<const Sym *> &bindingsMap) { 465 std::vector<std::pair<const Sym *, std::vector<BindingEntry>>> bindingsVec( 466 bindingsMap.begin(), bindingsMap.end()); 467 for (auto &p : bindingsVec) { 468 std::vector<BindingEntry> &bindings = p.second; 469 llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) { 470 return a.target.getVA() < b.target.getVA(); 471 }); 472 } 473 llvm::sort(bindingsVec, [](const auto &a, const auto &b) { 474 return a.second[0].target.getVA() < b.second[0].target.getVA(); 475 }); 476 return bindingsVec; 477 } 478 479 // Emit bind opcodes, which are a stream of byte-sized opcodes that dyld 480 // interprets to update a record with the following fields: 481 // * segment index (of the segment to write the symbol addresses to, typically 482 // the __DATA_CONST segment which contains the GOT) 483 // * offset within the segment, indicating the next location to write a binding 484 // * symbol type 485 // * symbol library ordinal (the index of its library's LC_LOAD_DYLIB command) 486 // * symbol name 487 // * addend 488 // When dyld sees BIND_OPCODE_DO_BIND, it uses the current record state to bind 489 // a symbol in the GOT, and increments the segment offset to point to the next 490 // entry. It does *not* clear the record state after doing the bind, so 491 // subsequent opcodes only need to encode the differences between bindings. 492 void BindingSection::finalizeContents() { 493 raw_svector_ostream os{contents}; 494 Binding lastBinding; 495 int16_t lastOrdinal = 0; 496 497 for (auto &p : sortBindings(bindingsMap)) { 498 const Symbol *sym = p.first; 499 std::vector<BindingEntry> &bindings = p.second; 500 uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM; 501 if (sym->isWeakRef()) 502 flags |= BIND_SYMBOL_FLAGS_WEAK_IMPORT; 503 os << flags << sym->getName() << '\0' 504 << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER); 505 int16_t ordinal = ordinalForSymbol(*sym); 506 if (ordinal != lastOrdinal) { 507 encodeDylibOrdinal(ordinal, os); 508 lastOrdinal = ordinal; 509 } 510 std::vector<BindIR> opcodes; 511 for (const BindingEntry &b : bindings) 512 encodeBinding(b.target.isec->parent, 513 b.target.isec->getOffset(b.target.offset), b.addend, 514 lastBinding, opcodes); 515 if (config->optimize > 1) 516 optimizeOpcodes(opcodes); 517 for (const auto &op : opcodes) 518 flushOpcodes(op, os); 519 } 520 if (!bindingsMap.empty()) 521 os << static_cast<uint8_t>(BIND_OPCODE_DONE); 522 } 523 524 void BindingSection::writeTo(uint8_t *buf) const { 525 memcpy(buf, contents.data(), contents.size()); 526 } 527 528 WeakBindingSection::WeakBindingSection() 529 : LinkEditSection(segment_names::linkEdit, section_names::weakBinding) {} 530 531 void WeakBindingSection::finalizeContents() { 532 raw_svector_ostream os{contents}; 533 Binding lastBinding; 534 535 for (const Defined *defined : definitions) 536 encodeWeakOverride(defined, os); 537 538 for (auto &p : sortBindings(bindingsMap)) { 539 const Symbol *sym = p.first; 540 std::vector<BindingEntry> &bindings = p.second; 541 os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) 542 << sym->getName() << '\0' 543 << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER); 544 std::vector<BindIR> opcodes; 545 for (const BindingEntry &b : bindings) 546 encodeBinding(b.target.isec->parent, 547 b.target.isec->getOffset(b.target.offset), b.addend, 548 lastBinding, opcodes); 549 if (config->optimize > 1) 550 optimizeOpcodes(opcodes); 551 for (const auto &op : opcodes) 552 flushOpcodes(op, os); 553 } 554 if (!bindingsMap.empty() || !definitions.empty()) 555 os << static_cast<uint8_t>(BIND_OPCODE_DONE); 556 } 557 558 void WeakBindingSection::writeTo(uint8_t *buf) const { 559 memcpy(buf, contents.data(), contents.size()); 560 } 561 562 StubsSection::StubsSection() 563 : SyntheticSection(segment_names::text, section_names::stubs) { 564 flags = S_SYMBOL_STUBS | S_ATTR_SOME_INSTRUCTIONS | S_ATTR_PURE_INSTRUCTIONS; 565 // The stubs section comprises machine instructions, which are aligned to 566 // 4 bytes on the archs we care about. 567 align = 4; 568 reserved2 = target->stubSize; 569 } 570 571 uint64_t StubsSection::getSize() const { 572 return entries.size() * target->stubSize; 573 } 574 575 void StubsSection::writeTo(uint8_t *buf) const { 576 size_t off = 0; 577 for (const Symbol *sym : entries) { 578 target->writeStub(buf + off, *sym); 579 off += target->stubSize; 580 } 581 } 582 583 void StubsSection::finalize() { isFinal = true; } 584 585 bool StubsSection::addEntry(Symbol *sym) { 586 bool inserted = entries.insert(sym); 587 if (inserted) 588 sym->stubsIndex = entries.size() - 1; 589 return inserted; 590 } 591 592 StubHelperSection::StubHelperSection() 593 : SyntheticSection(segment_names::text, section_names::stubHelper) { 594 flags = S_ATTR_SOME_INSTRUCTIONS | S_ATTR_PURE_INSTRUCTIONS; 595 align = 4; // This section comprises machine instructions 596 } 597 598 uint64_t StubHelperSection::getSize() const { 599 return target->stubHelperHeaderSize + 600 in.lazyBinding->getEntries().size() * target->stubHelperEntrySize; 601 } 602 603 bool StubHelperSection::isNeeded() const { return in.lazyBinding->isNeeded(); } 604 605 void StubHelperSection::writeTo(uint8_t *buf) const { 606 target->writeStubHelperHeader(buf); 607 size_t off = target->stubHelperHeaderSize; 608 for (const Symbol *sym : in.lazyBinding->getEntries()) { 609 target->writeStubHelperEntry(buf + off, *sym, addr + off); 610 off += target->stubHelperEntrySize; 611 } 612 } 613 614 void StubHelperSection::setup() { 615 Symbol *binder = symtab->addUndefined("dyld_stub_binder", /*file=*/nullptr, 616 /*isWeakRef=*/false); 617 if (auto *undefined = dyn_cast<Undefined>(binder)) 618 treatUndefinedSymbol(*undefined, 619 "lazy binding (normally in libSystem.dylib)"); 620 621 // treatUndefinedSymbol() can replace binder with a DylibSymbol; re-check. 622 stubBinder = dyn_cast_or_null<DylibSymbol>(binder); 623 if (stubBinder == nullptr) 624 return; 625 626 in.got->addEntry(stubBinder); 627 628 in.imageLoaderCache->parent = 629 ConcatOutputSection::getOrCreateForInput(in.imageLoaderCache); 630 inputSections.push_back(in.imageLoaderCache); 631 // Since this isn't in the symbol table or in any input file, the noDeadStrip 632 // argument doesn't matter. 633 dyldPrivate = 634 make<Defined>("__dyld_private", nullptr, in.imageLoaderCache, 0, 0, 635 /*isWeakDef=*/false, 636 /*isExternal=*/false, /*isPrivateExtern=*/false, 637 /*includeInSymtab=*/true, 638 /*isThumb=*/false, /*isReferencedDynamically=*/false, 639 /*noDeadStrip=*/false); 640 dyldPrivate->used = true; 641 } 642 643 LazyPointerSection::LazyPointerSection() 644 : SyntheticSection(segment_names::data, section_names::lazySymbolPtr) { 645 align = target->wordSize; 646 flags = S_LAZY_SYMBOL_POINTERS; 647 } 648 649 uint64_t LazyPointerSection::getSize() const { 650 return in.stubs->getEntries().size() * target->wordSize; 651 } 652 653 bool LazyPointerSection::isNeeded() const { 654 return !in.stubs->getEntries().empty(); 655 } 656 657 void LazyPointerSection::writeTo(uint8_t *buf) const { 658 size_t off = 0; 659 for (const Symbol *sym : in.stubs->getEntries()) { 660 if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) { 661 if (dysym->hasStubsHelper()) { 662 uint64_t stubHelperOffset = 663 target->stubHelperHeaderSize + 664 dysym->stubsHelperIndex * target->stubHelperEntrySize; 665 write64le(buf + off, in.stubHelper->addr + stubHelperOffset); 666 } 667 } else { 668 write64le(buf + off, sym->getVA()); 669 } 670 off += target->wordSize; 671 } 672 } 673 674 LazyBindingSection::LazyBindingSection() 675 : LinkEditSection(segment_names::linkEdit, section_names::lazyBinding) {} 676 677 void LazyBindingSection::finalizeContents() { 678 // TODO: Just precompute output size here instead of writing to a temporary 679 // buffer 680 for (Symbol *sym : entries) 681 sym->lazyBindOffset = encode(*sym); 682 } 683 684 void LazyBindingSection::writeTo(uint8_t *buf) const { 685 memcpy(buf, contents.data(), contents.size()); 686 } 687 688 void LazyBindingSection::addEntry(Symbol *sym) { 689 if (entries.insert(sym)) { 690 sym->stubsHelperIndex = entries.size() - 1; 691 in.rebase->addEntry(in.lazyPointers->isec, 692 sym->stubsIndex * target->wordSize); 693 } 694 } 695 696 // Unlike the non-lazy binding section, the bind opcodes in this section aren't 697 // interpreted all at once. Rather, dyld will start interpreting opcodes at a 698 // given offset, typically only binding a single symbol before it finds a 699 // BIND_OPCODE_DONE terminator. As such, unlike in the non-lazy-binding case, 700 // we cannot encode just the differences between symbols; we have to emit the 701 // complete bind information for each symbol. 702 uint32_t LazyBindingSection::encode(const Symbol &sym) { 703 uint32_t opstreamOffset = contents.size(); 704 OutputSegment *dataSeg = in.lazyPointers->parent; 705 os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 706 dataSeg->index); 707 uint64_t offset = in.lazyPointers->addr - dataSeg->addr + 708 sym.stubsIndex * target->wordSize; 709 encodeULEB128(offset, os); 710 encodeDylibOrdinal(ordinalForSymbol(sym), os); 711 712 uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM; 713 if (sym.isWeakRef()) 714 flags |= BIND_SYMBOL_FLAGS_WEAK_IMPORT; 715 716 os << flags << sym.getName() << '\0' 717 << static_cast<uint8_t>(BIND_OPCODE_DO_BIND) 718 << static_cast<uint8_t>(BIND_OPCODE_DONE); 719 return opstreamOffset; 720 } 721 722 ExportSection::ExportSection() 723 : LinkEditSection(segment_names::linkEdit, section_names::export_) {} 724 725 void ExportSection::finalizeContents() { 726 trieBuilder.setImageBase(in.header->addr); 727 for (const Symbol *sym : symtab->getSymbols()) { 728 if (const auto *defined = dyn_cast<Defined>(sym)) { 729 if (defined->privateExtern || !defined->isLive()) 730 continue; 731 trieBuilder.addSymbol(*defined); 732 hasWeakSymbol = hasWeakSymbol || sym->isWeakDef(); 733 } 734 } 735 size = trieBuilder.build(); 736 } 737 738 void ExportSection::writeTo(uint8_t *buf) const { trieBuilder.writeTo(buf); } 739 740 DataInCodeSection::DataInCodeSection() 741 : LinkEditSection(segment_names::linkEdit, section_names::dataInCode) {} 742 743 template <class LP> 744 static std::vector<MachO::data_in_code_entry> collectDataInCodeEntries() { 745 std::vector<MachO::data_in_code_entry> dataInCodeEntries; 746 for (const InputFile *inputFile : inputFiles) { 747 if (!isa<ObjFile>(inputFile)) 748 continue; 749 const ObjFile *objFile = cast<ObjFile>(inputFile); 750 ArrayRef<MachO::data_in_code_entry> entries = objFile->getDataInCode(); 751 if (entries.empty()) 752 continue; 753 754 assert(is_sorted(dataInCodeEntries, [](const data_in_code_entry &lhs, 755 const data_in_code_entry &rhs) { 756 return lhs.offset < rhs.offset; 757 })); 758 // For each code subsection find 'data in code' entries residing in it. 759 // Compute the new offset values as 760 // <offset within subsection> + <subsection address> - <__TEXT address>. 761 for (const Section *section : objFile->sections) { 762 for (const Subsection &subsec : section->subsections) { 763 const InputSection *isec = subsec.isec; 764 if (!isCodeSection(isec)) 765 continue; 766 if (cast<ConcatInputSection>(isec)->shouldOmitFromOutput()) 767 continue; 768 const uint64_t beginAddr = section->addr + subsec.offset; 769 auto it = llvm::lower_bound( 770 entries, beginAddr, 771 [](const MachO::data_in_code_entry &entry, uint64_t addr) { 772 return entry.offset < addr; 773 }); 774 const uint64_t endAddr = beginAddr + isec->getSize(); 775 for (const auto end = entries.end(); 776 it != end && it->offset + it->length <= endAddr; ++it) 777 dataInCodeEntries.push_back( 778 {static_cast<uint32_t>(isec->getVA(it->offset - beginAddr) - 779 in.header->addr), 780 it->length, it->kind}); 781 } 782 } 783 } 784 return dataInCodeEntries; 785 } 786 787 void DataInCodeSection::finalizeContents() { 788 entries = target->wordSize == 8 ? collectDataInCodeEntries<LP64>() 789 : collectDataInCodeEntries<ILP32>(); 790 } 791 792 void DataInCodeSection::writeTo(uint8_t *buf) const { 793 if (!entries.empty()) 794 memcpy(buf, entries.data(), getRawSize()); 795 } 796 797 FunctionStartsSection::FunctionStartsSection() 798 : LinkEditSection(segment_names::linkEdit, section_names::functionStarts) {} 799 800 void FunctionStartsSection::finalizeContents() { 801 raw_svector_ostream os{contents}; 802 std::vector<uint64_t> addrs; 803 for (const InputFile *file : inputFiles) { 804 if (auto *objFile = dyn_cast<ObjFile>(file)) { 805 for (const Symbol *sym : objFile->symbols) { 806 if (const auto *defined = dyn_cast_or_null<Defined>(sym)) { 807 if (!defined->isec || !isCodeSection(defined->isec) || 808 !defined->isLive()) 809 continue; 810 // TODO: Add support for thumbs, in that case 811 // the lowest bit of nextAddr needs to be set to 1. 812 addrs.push_back(defined->getVA()); 813 } 814 } 815 } 816 } 817 llvm::sort(addrs); 818 uint64_t addr = in.header->addr; 819 for (uint64_t nextAddr : addrs) { 820 uint64_t delta = nextAddr - addr; 821 if (delta == 0) 822 continue; 823 encodeULEB128(delta, os); 824 addr = nextAddr; 825 } 826 os << '\0'; 827 } 828 829 void FunctionStartsSection::writeTo(uint8_t *buf) const { 830 memcpy(buf, contents.data(), contents.size()); 831 } 832 833 SymtabSection::SymtabSection(StringTableSection &stringTableSection) 834 : LinkEditSection(segment_names::linkEdit, section_names::symbolTable), 835 stringTableSection(stringTableSection) {} 836 837 void SymtabSection::emitBeginSourceStab(DWARFUnit *compileUnit) { 838 StabsEntry stab(N_SO); 839 SmallString<261> dir(compileUnit->getCompilationDir()); 840 StringRef sep = sys::path::get_separator(); 841 // We don't use `path::append` here because we want an empty `dir` to result 842 // in an absolute path. `append` would give us a relative path for that case. 843 if (!dir.endswith(sep)) 844 dir += sep; 845 stab.strx = stringTableSection.addString( 846 saver().save(dir + compileUnit->getUnitDIE().getShortName())); 847 stabs.emplace_back(std::move(stab)); 848 } 849 850 void SymtabSection::emitEndSourceStab() { 851 StabsEntry stab(N_SO); 852 stab.sect = 1; 853 stabs.emplace_back(std::move(stab)); 854 } 855 856 void SymtabSection::emitObjectFileStab(ObjFile *file) { 857 StabsEntry stab(N_OSO); 858 stab.sect = target->cpuSubtype; 859 SmallString<261> path(!file->archiveName.empty() ? file->archiveName 860 : file->getName()); 861 std::error_code ec = sys::fs::make_absolute(path); 862 if (ec) 863 fatal("failed to get absolute path for " + path); 864 865 if (!file->archiveName.empty()) 866 path.append({"(", file->getName(), ")"}); 867 868 StringRef adjustedPath = saver().save(path.str()); 869 adjustedPath.consume_front(config->osoPrefix); 870 871 stab.strx = stringTableSection.addString(adjustedPath); 872 stab.desc = 1; 873 stab.value = file->modTime; 874 stabs.emplace_back(std::move(stab)); 875 } 876 877 void SymtabSection::emitEndFunStab(Defined *defined) { 878 StabsEntry stab(N_FUN); 879 stab.value = defined->size; 880 stabs.emplace_back(std::move(stab)); 881 } 882 883 void SymtabSection::emitStabs() { 884 if (config->omitDebugInfo) 885 return; 886 887 for (const std::string &s : config->astPaths) { 888 StabsEntry astStab(N_AST); 889 astStab.strx = stringTableSection.addString(s); 890 stabs.emplace_back(std::move(astStab)); 891 } 892 893 std::vector<Defined *> symbolsNeedingStabs; 894 for (const SymtabEntry &entry : 895 concat<SymtabEntry>(localSymbols, externalSymbols)) { 896 Symbol *sym = entry.sym; 897 assert(sym->isLive() && 898 "dead symbols should not be in localSymbols, externalSymbols"); 899 if (auto *defined = dyn_cast<Defined>(sym)) { 900 // Excluded symbols should have been filtered out in finalizeContents(). 901 assert(defined->includeInSymtab); 902 903 if (defined->isAbsolute()) 904 continue; 905 906 // Constant-folded symbols go in the executable's symbol table, but don't 907 // get a stabs entry. 908 if (defined->wasIdenticalCodeFolded) 909 continue; 910 911 InputSection *isec = defined->isec; 912 ObjFile *file = dyn_cast_or_null<ObjFile>(isec->getFile()); 913 if (!file || !file->compileUnit) 914 continue; 915 916 symbolsNeedingStabs.push_back(defined); 917 } 918 } 919 920 llvm::stable_sort(symbolsNeedingStabs, [&](Defined *a, Defined *b) { 921 return a->isec->getFile()->id < b->isec->getFile()->id; 922 }); 923 924 // Emit STABS symbols so that dsymutil and/or the debugger can map address 925 // regions in the final binary to the source and object files from which they 926 // originated. 927 InputFile *lastFile = nullptr; 928 for (Defined *defined : symbolsNeedingStabs) { 929 InputSection *isec = defined->isec; 930 ObjFile *file = cast<ObjFile>(isec->getFile()); 931 932 if (lastFile == nullptr || lastFile != file) { 933 if (lastFile != nullptr) 934 emitEndSourceStab(); 935 lastFile = file; 936 937 emitBeginSourceStab(file->compileUnit); 938 emitObjectFileStab(file); 939 } 940 941 StabsEntry symStab; 942 symStab.sect = defined->isec->parent->index; 943 symStab.strx = stringTableSection.addString(defined->getName()); 944 symStab.value = defined->getVA(); 945 946 if (isCodeSection(isec)) { 947 symStab.type = N_FUN; 948 stabs.emplace_back(std::move(symStab)); 949 emitEndFunStab(defined); 950 } else { 951 symStab.type = defined->isExternal() ? N_GSYM : N_STSYM; 952 stabs.emplace_back(std::move(symStab)); 953 } 954 } 955 956 if (!stabs.empty()) 957 emitEndSourceStab(); 958 } 959 960 void SymtabSection::finalizeContents() { 961 auto addSymbol = [&](std::vector<SymtabEntry> &symbols, Symbol *sym) { 962 uint32_t strx = stringTableSection.addString(sym->getName()); 963 symbols.push_back({sym, strx}); 964 }; 965 966 // Local symbols aren't in the SymbolTable, so we walk the list of object 967 // files to gather them. 968 for (const InputFile *file : inputFiles) { 969 if (auto *objFile = dyn_cast<ObjFile>(file)) { 970 for (Symbol *sym : objFile->symbols) { 971 if (auto *defined = dyn_cast_or_null<Defined>(sym)) { 972 if (defined->isExternal() || !defined->isLive() || 973 !defined->includeInSymtab) 974 continue; 975 addSymbol(localSymbols, sym); 976 } 977 } 978 } 979 } 980 981 // __dyld_private is a local symbol too. It's linker-created and doesn't 982 // exist in any object file. 983 if (Defined *dyldPrivate = in.stubHelper->dyldPrivate) 984 addSymbol(localSymbols, dyldPrivate); 985 986 for (Symbol *sym : symtab->getSymbols()) { 987 if (!sym->isLive()) 988 continue; 989 if (auto *defined = dyn_cast<Defined>(sym)) { 990 if (!defined->includeInSymtab) 991 continue; 992 assert(defined->isExternal()); 993 if (defined->privateExtern) 994 addSymbol(localSymbols, defined); 995 else 996 addSymbol(externalSymbols, defined); 997 } else if (auto *dysym = dyn_cast<DylibSymbol>(sym)) { 998 if (dysym->isReferenced()) 999 addSymbol(undefinedSymbols, sym); 1000 } 1001 } 1002 1003 emitStabs(); 1004 uint32_t symtabIndex = stabs.size(); 1005 for (const SymtabEntry &entry : 1006 concat<SymtabEntry>(localSymbols, externalSymbols, undefinedSymbols)) { 1007 entry.sym->symtabIndex = symtabIndex++; 1008 } 1009 } 1010 1011 uint32_t SymtabSection::getNumSymbols() const { 1012 return stabs.size() + localSymbols.size() + externalSymbols.size() + 1013 undefinedSymbols.size(); 1014 } 1015 1016 // This serves to hide (type-erase) the template parameter from SymtabSection. 1017 template <class LP> class SymtabSectionImpl final : public SymtabSection { 1018 public: 1019 SymtabSectionImpl(StringTableSection &stringTableSection) 1020 : SymtabSection(stringTableSection) {} 1021 uint64_t getRawSize() const override; 1022 void writeTo(uint8_t *buf) const override; 1023 }; 1024 1025 template <class LP> uint64_t SymtabSectionImpl<LP>::getRawSize() const { 1026 return getNumSymbols() * sizeof(typename LP::nlist); 1027 } 1028 1029 template <class LP> void SymtabSectionImpl<LP>::writeTo(uint8_t *buf) const { 1030 auto *nList = reinterpret_cast<typename LP::nlist *>(buf); 1031 // Emit the stabs entries before the "real" symbols. We cannot emit them 1032 // after as that would render Symbol::symtabIndex inaccurate. 1033 for (const StabsEntry &entry : stabs) { 1034 nList->n_strx = entry.strx; 1035 nList->n_type = entry.type; 1036 nList->n_sect = entry.sect; 1037 nList->n_desc = entry.desc; 1038 nList->n_value = entry.value; 1039 ++nList; 1040 } 1041 1042 for (const SymtabEntry &entry : concat<const SymtabEntry>( 1043 localSymbols, externalSymbols, undefinedSymbols)) { 1044 nList->n_strx = entry.strx; 1045 // TODO populate n_desc with more flags 1046 if (auto *defined = dyn_cast<Defined>(entry.sym)) { 1047 uint8_t scope = 0; 1048 if (defined->privateExtern) { 1049 // Private external -- dylib scoped symbol. 1050 // Promote to non-external at link time. 1051 scope = N_PEXT; 1052 } else if (defined->isExternal()) { 1053 // Normal global symbol. 1054 scope = N_EXT; 1055 } else { 1056 // TU-local symbol from localSymbols. 1057 scope = 0; 1058 } 1059 1060 if (defined->isAbsolute()) { 1061 nList->n_type = scope | N_ABS; 1062 nList->n_sect = NO_SECT; 1063 nList->n_value = defined->value; 1064 } else { 1065 nList->n_type = scope | N_SECT; 1066 nList->n_sect = defined->isec->parent->index; 1067 // For the N_SECT symbol type, n_value is the address of the symbol 1068 nList->n_value = defined->getVA(); 1069 } 1070 nList->n_desc |= defined->thumb ? N_ARM_THUMB_DEF : 0; 1071 nList->n_desc |= defined->isExternalWeakDef() ? N_WEAK_DEF : 0; 1072 nList->n_desc |= 1073 defined->referencedDynamically ? REFERENCED_DYNAMICALLY : 0; 1074 } else if (auto *dysym = dyn_cast<DylibSymbol>(entry.sym)) { 1075 uint16_t n_desc = nList->n_desc; 1076 int16_t ordinal = ordinalForDylibSymbol(*dysym); 1077 if (ordinal == BIND_SPECIAL_DYLIB_FLAT_LOOKUP) 1078 SET_LIBRARY_ORDINAL(n_desc, DYNAMIC_LOOKUP_ORDINAL); 1079 else if (ordinal == BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE) 1080 SET_LIBRARY_ORDINAL(n_desc, EXECUTABLE_ORDINAL); 1081 else { 1082 assert(ordinal > 0); 1083 SET_LIBRARY_ORDINAL(n_desc, static_cast<uint8_t>(ordinal)); 1084 } 1085 1086 nList->n_type = N_EXT; 1087 n_desc |= dysym->isWeakDef() ? N_WEAK_DEF : 0; 1088 n_desc |= dysym->isWeakRef() ? N_WEAK_REF : 0; 1089 nList->n_desc = n_desc; 1090 } 1091 ++nList; 1092 } 1093 } 1094 1095 template <class LP> 1096 SymtabSection * 1097 macho::makeSymtabSection(StringTableSection &stringTableSection) { 1098 return make<SymtabSectionImpl<LP>>(stringTableSection); 1099 } 1100 1101 IndirectSymtabSection::IndirectSymtabSection() 1102 : LinkEditSection(segment_names::linkEdit, 1103 section_names::indirectSymbolTable) {} 1104 1105 uint32_t IndirectSymtabSection::getNumSymbols() const { 1106 return in.got->getEntries().size() + in.tlvPointers->getEntries().size() + 1107 2 * in.stubs->getEntries().size(); 1108 } 1109 1110 bool IndirectSymtabSection::isNeeded() const { 1111 return in.got->isNeeded() || in.tlvPointers->isNeeded() || 1112 in.stubs->isNeeded(); 1113 } 1114 1115 void IndirectSymtabSection::finalizeContents() { 1116 uint32_t off = 0; 1117 in.got->reserved1 = off; 1118 off += in.got->getEntries().size(); 1119 in.tlvPointers->reserved1 = off; 1120 off += in.tlvPointers->getEntries().size(); 1121 in.stubs->reserved1 = off; 1122 off += in.stubs->getEntries().size(); 1123 in.lazyPointers->reserved1 = off; 1124 } 1125 1126 static uint32_t indirectValue(const Symbol *sym) { 1127 if (sym->symtabIndex == UINT32_MAX) 1128 return INDIRECT_SYMBOL_LOCAL; 1129 if (auto *defined = dyn_cast<Defined>(sym)) 1130 if (defined->privateExtern) 1131 return INDIRECT_SYMBOL_LOCAL; 1132 return sym->symtabIndex; 1133 } 1134 1135 void IndirectSymtabSection::writeTo(uint8_t *buf) const { 1136 uint32_t off = 0; 1137 for (const Symbol *sym : in.got->getEntries()) { 1138 write32le(buf + off * sizeof(uint32_t), indirectValue(sym)); 1139 ++off; 1140 } 1141 for (const Symbol *sym : in.tlvPointers->getEntries()) { 1142 write32le(buf + off * sizeof(uint32_t), indirectValue(sym)); 1143 ++off; 1144 } 1145 for (const Symbol *sym : in.stubs->getEntries()) { 1146 write32le(buf + off * sizeof(uint32_t), indirectValue(sym)); 1147 ++off; 1148 } 1149 // There is a 1:1 correspondence between stubs and LazyPointerSection 1150 // entries. But giving __stubs and __la_symbol_ptr the same reserved1 1151 // (the offset into the indirect symbol table) so that they both refer 1152 // to the same range of offsets confuses `strip`, so write the stubs 1153 // symbol table offsets a second time. 1154 for (const Symbol *sym : in.stubs->getEntries()) { 1155 write32le(buf + off * sizeof(uint32_t), indirectValue(sym)); 1156 ++off; 1157 } 1158 } 1159 1160 StringTableSection::StringTableSection() 1161 : LinkEditSection(segment_names::linkEdit, section_names::stringTable) {} 1162 1163 uint32_t StringTableSection::addString(StringRef str) { 1164 uint32_t strx = size; 1165 strings.push_back(str); // TODO: consider deduplicating strings 1166 size += str.size() + 1; // account for null terminator 1167 return strx; 1168 } 1169 1170 void StringTableSection::writeTo(uint8_t *buf) const { 1171 uint32_t off = 0; 1172 for (StringRef str : strings) { 1173 memcpy(buf + off, str.data(), str.size()); 1174 off += str.size() + 1; // account for null terminator 1175 } 1176 } 1177 1178 static_assert((CodeSignatureSection::blobHeadersSize % 8) == 0, ""); 1179 static_assert((CodeSignatureSection::fixedHeadersSize % 8) == 0, ""); 1180 1181 CodeSignatureSection::CodeSignatureSection() 1182 : LinkEditSection(segment_names::linkEdit, section_names::codeSignature) { 1183 align = 16; // required by libstuff 1184 // FIXME: Consider using finalOutput instead of outputFile. 1185 fileName = config->outputFile; 1186 size_t slashIndex = fileName.rfind("/"); 1187 if (slashIndex != std::string::npos) 1188 fileName = fileName.drop_front(slashIndex + 1); 1189 1190 // NOTE: Any changes to these calculations should be repeated 1191 // in llvm-objcopy's MachOLayoutBuilder::layoutTail. 1192 allHeadersSize = alignTo<16>(fixedHeadersSize + fileName.size() + 1); 1193 fileNamePad = allHeadersSize - fixedHeadersSize - fileName.size(); 1194 } 1195 1196 uint32_t CodeSignatureSection::getBlockCount() const { 1197 return (fileOff + blockSize - 1) / blockSize; 1198 } 1199 1200 uint64_t CodeSignatureSection::getRawSize() const { 1201 return allHeadersSize + getBlockCount() * hashSize; 1202 } 1203 1204 void CodeSignatureSection::writeHashes(uint8_t *buf) const { 1205 // NOTE: Changes to this functionality should be repeated in llvm-objcopy's 1206 // MachOWriter::writeSignatureData. 1207 uint8_t *code = buf; 1208 uint8_t *codeEnd = buf + fileOff; 1209 uint8_t *hashes = codeEnd + allHeadersSize; 1210 while (code < codeEnd) { 1211 StringRef block(reinterpret_cast<char *>(code), 1212 std::min(codeEnd - code, static_cast<ssize_t>(blockSize))); 1213 SHA256 hasher; 1214 hasher.update(block); 1215 std::array<uint8_t, 32> hash = hasher.final(); 1216 assert(hash.size() == hashSize); 1217 memcpy(hashes, hash.data(), hashSize); 1218 code += blockSize; 1219 hashes += hashSize; 1220 } 1221 #if defined(__APPLE__) 1222 // This is macOS-specific work-around and makes no sense for any 1223 // other host OS. See https://openradar.appspot.com/FB8914231 1224 // 1225 // The macOS kernel maintains a signature-verification cache to 1226 // quickly validate applications at time of execve(2). The trouble 1227 // is that for the kernel creates the cache entry at the time of the 1228 // mmap(2) call, before we have a chance to write either the code to 1229 // sign or the signature header+hashes. The fix is to invalidate 1230 // all cached data associated with the output file, thus discarding 1231 // the bogus prematurely-cached signature. 1232 msync(buf, fileOff + getSize(), MS_INVALIDATE); 1233 #endif 1234 } 1235 1236 void CodeSignatureSection::writeTo(uint8_t *buf) const { 1237 // NOTE: Changes to this functionality should be repeated in llvm-objcopy's 1238 // MachOWriter::writeSignatureData. 1239 uint32_t signatureSize = static_cast<uint32_t>(getSize()); 1240 auto *superBlob = reinterpret_cast<CS_SuperBlob *>(buf); 1241 write32be(&superBlob->magic, CSMAGIC_EMBEDDED_SIGNATURE); 1242 write32be(&superBlob->length, signatureSize); 1243 write32be(&superBlob->count, 1); 1244 auto *blobIndex = reinterpret_cast<CS_BlobIndex *>(&superBlob[1]); 1245 write32be(&blobIndex->type, CSSLOT_CODEDIRECTORY); 1246 write32be(&blobIndex->offset, blobHeadersSize); 1247 auto *codeDirectory = 1248 reinterpret_cast<CS_CodeDirectory *>(buf + blobHeadersSize); 1249 write32be(&codeDirectory->magic, CSMAGIC_CODEDIRECTORY); 1250 write32be(&codeDirectory->length, signatureSize - blobHeadersSize); 1251 write32be(&codeDirectory->version, CS_SUPPORTSEXECSEG); 1252 write32be(&codeDirectory->flags, CS_ADHOC | CS_LINKER_SIGNED); 1253 write32be(&codeDirectory->hashOffset, 1254 sizeof(CS_CodeDirectory) + fileName.size() + fileNamePad); 1255 write32be(&codeDirectory->identOffset, sizeof(CS_CodeDirectory)); 1256 codeDirectory->nSpecialSlots = 0; 1257 write32be(&codeDirectory->nCodeSlots, getBlockCount()); 1258 write32be(&codeDirectory->codeLimit, fileOff); 1259 codeDirectory->hashSize = static_cast<uint8_t>(hashSize); 1260 codeDirectory->hashType = kSecCodeSignatureHashSHA256; 1261 codeDirectory->platform = 0; 1262 codeDirectory->pageSize = blockSizeShift; 1263 codeDirectory->spare2 = 0; 1264 codeDirectory->scatterOffset = 0; 1265 codeDirectory->teamOffset = 0; 1266 codeDirectory->spare3 = 0; 1267 codeDirectory->codeLimit64 = 0; 1268 OutputSegment *textSeg = getOrCreateOutputSegment(segment_names::text); 1269 write64be(&codeDirectory->execSegBase, textSeg->fileOff); 1270 write64be(&codeDirectory->execSegLimit, textSeg->fileSize); 1271 write64be(&codeDirectory->execSegFlags, 1272 config->outputType == MH_EXECUTE ? CS_EXECSEG_MAIN_BINARY : 0); 1273 auto *id = reinterpret_cast<char *>(&codeDirectory[1]); 1274 memcpy(id, fileName.begin(), fileName.size()); 1275 memset(id + fileName.size(), 0, fileNamePad); 1276 } 1277 1278 BitcodeBundleSection::BitcodeBundleSection() 1279 : SyntheticSection(segment_names::llvm, section_names::bitcodeBundle) {} 1280 1281 class ErrorCodeWrapper { 1282 public: 1283 explicit ErrorCodeWrapper(std::error_code ec) : errorCode(ec.value()) {} 1284 explicit ErrorCodeWrapper(int ec) : errorCode(ec) {} 1285 operator int() const { return errorCode; } 1286 1287 private: 1288 int errorCode; 1289 }; 1290 1291 #define CHECK_EC(exp) \ 1292 do { \ 1293 ErrorCodeWrapper ec(exp); \ 1294 if (ec) \ 1295 fatal(Twine("operation failed with error code ") + Twine(ec) + ": " + \ 1296 #exp); \ 1297 } while (0); 1298 1299 void BitcodeBundleSection::finalize() { 1300 #ifdef LLVM_HAVE_LIBXAR 1301 using namespace llvm::sys::fs; 1302 CHECK_EC(createTemporaryFile("bitcode-bundle", "xar", xarPath)); 1303 1304 #pragma clang diagnostic push 1305 #pragma clang diagnostic ignored "-Wdeprecated-declarations" 1306 xar_t xar(xar_open(xarPath.data(), O_RDWR)); 1307 #pragma clang diagnostic pop 1308 if (!xar) 1309 fatal("failed to open XAR temporary file at " + xarPath); 1310 CHECK_EC(xar_opt_set(xar, XAR_OPT_COMPRESSION, XAR_OPT_VAL_NONE)); 1311 // FIXME: add more data to XAR 1312 CHECK_EC(xar_close(xar)); 1313 1314 file_size(xarPath, xarSize); 1315 #endif // defined(LLVM_HAVE_LIBXAR) 1316 } 1317 1318 void BitcodeBundleSection::writeTo(uint8_t *buf) const { 1319 using namespace llvm::sys::fs; 1320 file_t handle = 1321 CHECK(openNativeFile(xarPath, CD_OpenExisting, FA_Read, OF_None), 1322 "failed to open XAR file"); 1323 std::error_code ec; 1324 mapped_file_region xarMap(handle, mapped_file_region::mapmode::readonly, 1325 xarSize, 0, ec); 1326 if (ec) 1327 fatal("failed to map XAR file"); 1328 memcpy(buf, xarMap.const_data(), xarSize); 1329 1330 closeFile(handle); 1331 remove(xarPath); 1332 } 1333 1334 CStringSection::CStringSection() 1335 : SyntheticSection(segment_names::text, section_names::cString) { 1336 flags = S_CSTRING_LITERALS; 1337 } 1338 1339 void CStringSection::addInput(CStringInputSection *isec) { 1340 isec->parent = this; 1341 inputs.push_back(isec); 1342 if (isec->align > align) 1343 align = isec->align; 1344 } 1345 1346 void CStringSection::writeTo(uint8_t *buf) const { 1347 for (const CStringInputSection *isec : inputs) { 1348 for (size_t i = 0, e = isec->pieces.size(); i != e; ++i) { 1349 if (!isec->pieces[i].live) 1350 continue; 1351 StringRef string = isec->getStringRef(i); 1352 memcpy(buf + isec->pieces[i].outSecOff, string.data(), string.size()); 1353 } 1354 } 1355 } 1356 1357 void CStringSection::finalizeContents() { 1358 uint64_t offset = 0; 1359 for (CStringInputSection *isec : inputs) { 1360 for (size_t i = 0, e = isec->pieces.size(); i != e; ++i) { 1361 if (!isec->pieces[i].live) 1362 continue; 1363 // See comment above DeduplicatedCStringSection for how alignment is 1364 // handled. 1365 uint32_t pieceAlign = 1366 1 << countTrailingZeros(isec->align | isec->pieces[i].inSecOff); 1367 offset = alignTo(offset, pieceAlign); 1368 isec->pieces[i].outSecOff = offset; 1369 isec->isFinal = true; 1370 StringRef string = isec->getStringRef(i); 1371 offset += string.size(); 1372 } 1373 } 1374 size = offset; 1375 } 1376 1377 // Mergeable cstring literals are found under the __TEXT,__cstring section. In 1378 // contrast to ELF, which puts strings that need different alignments into 1379 // different sections, clang's Mach-O backend puts them all in one section. 1380 // Strings that need to be aligned have the .p2align directive emitted before 1381 // them, which simply translates into zero padding in the object file. In other 1382 // words, we have to infer the desired alignment of these cstrings from their 1383 // addresses. 1384 // 1385 // We differ slightly from ld64 in how we've chosen to align these cstrings. 1386 // Both LLD and ld64 preserve the number of trailing zeros in each cstring's 1387 // address in the input object files. When deduplicating identical cstrings, 1388 // both linkers pick the cstring whose address has more trailing zeros, and 1389 // preserve the alignment of that address in the final binary. However, ld64 1390 // goes a step further and also preserves the offset of the cstring from the 1391 // last section-aligned address. I.e. if a cstring is at offset 18 in the 1392 // input, with a section alignment of 16, then both LLD and ld64 will ensure the 1393 // final address is 2-byte aligned (since 18 == 16 + 2). But ld64 will also 1394 // ensure that the final address is of the form 16 * k + 2 for some k. 1395 // 1396 // Note that ld64's heuristic means that a dedup'ed cstring's final address is 1397 // dependent on the order of the input object files. E.g. if in addition to the 1398 // cstring at offset 18 above, we have a duplicate one in another file with a 1399 // `.cstring` section alignment of 2 and an offset of zero, then ld64 will pick 1400 // the cstring from the object file earlier on the command line (since both have 1401 // the same number of trailing zeros in their address). So the final cstring may 1402 // either be at some address `16 * k + 2` or at some address `2 * k`. 1403 // 1404 // I've opted not to follow this behavior primarily for implementation 1405 // simplicity, and secondarily to save a few more bytes. It's not clear to me 1406 // that preserving the section alignment + offset is ever necessary, and there 1407 // are many cases that are clearly redundant. In particular, if an x86_64 object 1408 // file contains some strings that are accessed via SIMD instructions, then the 1409 // .cstring section in the object file will be 16-byte-aligned (since SIMD 1410 // requires its operand addresses to be 16-byte aligned). However, there will 1411 // typically also be other cstrings in the same file that aren't used via SIMD 1412 // and don't need this alignment. They will be emitted at some arbitrary address 1413 // `A`, but ld64 will treat them as being 16-byte aligned with an offset of `16 1414 // % A`. 1415 void DeduplicatedCStringSection::finalizeContents() { 1416 // Find the largest alignment required for each string. 1417 for (const CStringInputSection *isec : inputs) { 1418 for (size_t i = 0, e = isec->pieces.size(); i != e; ++i) { 1419 const StringPiece &piece = isec->pieces[i]; 1420 if (!piece.live) 1421 continue; 1422 auto s = isec->getCachedHashStringRef(i); 1423 assert(isec->align != 0); 1424 uint8_t trailingZeros = countTrailingZeros(isec->align | piece.inSecOff); 1425 auto it = stringOffsetMap.insert( 1426 std::make_pair(s, StringOffset(trailingZeros))); 1427 if (!it.second && it.first->second.trailingZeros < trailingZeros) 1428 it.first->second.trailingZeros = trailingZeros; 1429 } 1430 } 1431 1432 // Assign an offset for each string and save it to the corresponding 1433 // StringPieces for easy access. 1434 for (CStringInputSection *isec : inputs) { 1435 for (size_t i = 0, e = isec->pieces.size(); i != e; ++i) { 1436 if (!isec->pieces[i].live) 1437 continue; 1438 auto s = isec->getCachedHashStringRef(i); 1439 auto it = stringOffsetMap.find(s); 1440 assert(it != stringOffsetMap.end()); 1441 StringOffset &offsetInfo = it->second; 1442 if (offsetInfo.outSecOff == UINT64_MAX) { 1443 offsetInfo.outSecOff = alignTo(size, 1ULL << offsetInfo.trailingZeros); 1444 size = offsetInfo.outSecOff + s.size(); 1445 } 1446 isec->pieces[i].outSecOff = offsetInfo.outSecOff; 1447 } 1448 isec->isFinal = true; 1449 } 1450 } 1451 1452 void DeduplicatedCStringSection::writeTo(uint8_t *buf) const { 1453 for (const auto &p : stringOffsetMap) { 1454 StringRef data = p.first.val(); 1455 uint64_t off = p.second.outSecOff; 1456 if (!data.empty()) 1457 memcpy(buf + off, data.data(), data.size()); 1458 } 1459 } 1460 1461 // This section is actually emitted as __TEXT,__const by ld64, but clang may 1462 // emit input sections of that name, and LLD doesn't currently support mixing 1463 // synthetic and concat-type OutputSections. To work around this, I've given 1464 // our merged-literals section a different name. 1465 WordLiteralSection::WordLiteralSection() 1466 : SyntheticSection(segment_names::text, section_names::literals) { 1467 align = 16; 1468 } 1469 1470 void WordLiteralSection::addInput(WordLiteralInputSection *isec) { 1471 isec->parent = this; 1472 inputs.push_back(isec); 1473 } 1474 1475 void WordLiteralSection::finalizeContents() { 1476 for (WordLiteralInputSection *isec : inputs) { 1477 // We do all processing of the InputSection here, so it will be effectively 1478 // finalized. 1479 isec->isFinal = true; 1480 const uint8_t *buf = isec->data.data(); 1481 switch (sectionType(isec->getFlags())) { 1482 case S_4BYTE_LITERALS: { 1483 for (size_t off = 0, e = isec->data.size(); off < e; off += 4) { 1484 if (!isec->isLive(off)) 1485 continue; 1486 uint32_t value = *reinterpret_cast<const uint32_t *>(buf + off); 1487 literal4Map.emplace(value, literal4Map.size()); 1488 } 1489 break; 1490 } 1491 case S_8BYTE_LITERALS: { 1492 for (size_t off = 0, e = isec->data.size(); off < e; off += 8) { 1493 if (!isec->isLive(off)) 1494 continue; 1495 uint64_t value = *reinterpret_cast<const uint64_t *>(buf + off); 1496 literal8Map.emplace(value, literal8Map.size()); 1497 } 1498 break; 1499 } 1500 case S_16BYTE_LITERALS: { 1501 for (size_t off = 0, e = isec->data.size(); off < e; off += 16) { 1502 if (!isec->isLive(off)) 1503 continue; 1504 UInt128 value = *reinterpret_cast<const UInt128 *>(buf + off); 1505 literal16Map.emplace(value, literal16Map.size()); 1506 } 1507 break; 1508 } 1509 default: 1510 llvm_unreachable("invalid literal section type"); 1511 } 1512 } 1513 } 1514 1515 void WordLiteralSection::writeTo(uint8_t *buf) const { 1516 // Note that we don't attempt to do any endianness conversion in addInput(), 1517 // so we don't do it here either -- just write out the original value, 1518 // byte-for-byte. 1519 for (const auto &p : literal16Map) 1520 memcpy(buf + p.second * 16, &p.first, 16); 1521 buf += literal16Map.size() * 16; 1522 1523 for (const auto &p : literal8Map) 1524 memcpy(buf + p.second * 8, &p.first, 8); 1525 buf += literal8Map.size() * 8; 1526 1527 for (const auto &p : literal4Map) 1528 memcpy(buf + p.second * 4, &p.first, 4); 1529 } 1530 1531 void macho::createSyntheticSymbols() { 1532 auto addHeaderSymbol = [](const char *name) { 1533 symtab->addSynthetic(name, in.header->isec, /*value=*/0, 1534 /*isPrivateExtern=*/true, /*includeInSymtab=*/false, 1535 /*referencedDynamically=*/false); 1536 }; 1537 1538 switch (config->outputType) { 1539 // FIXME: Assign the right address value for these symbols 1540 // (rather than 0). But we need to do that after assignAddresses(). 1541 case MH_EXECUTE: 1542 // If linking PIE, __mh_execute_header is a defined symbol in 1543 // __TEXT, __text) 1544 // Otherwise, it's an absolute symbol. 1545 if (config->isPic) 1546 symtab->addSynthetic("__mh_execute_header", in.header->isec, /*value=*/0, 1547 /*isPrivateExtern=*/false, /*includeInSymtab=*/true, 1548 /*referencedDynamically=*/true); 1549 else 1550 symtab->addSynthetic("__mh_execute_header", /*isec=*/nullptr, /*value=*/0, 1551 /*isPrivateExtern=*/false, /*includeInSymtab=*/true, 1552 /*referencedDynamically=*/true); 1553 break; 1554 1555 // The following symbols are N_SECT symbols, even though the header is not 1556 // part of any section and that they are private to the bundle/dylib/object 1557 // they are part of. 1558 case MH_BUNDLE: 1559 addHeaderSymbol("__mh_bundle_header"); 1560 break; 1561 case MH_DYLIB: 1562 addHeaderSymbol("__mh_dylib_header"); 1563 break; 1564 case MH_DYLINKER: 1565 addHeaderSymbol("__mh_dylinker_header"); 1566 break; 1567 case MH_OBJECT: 1568 addHeaderSymbol("__mh_object_header"); 1569 break; 1570 default: 1571 llvm_unreachable("unexpected outputType"); 1572 break; 1573 } 1574 1575 // The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit 1576 // which does e.g. cleanup of static global variables. The ABI document 1577 // says that the pointer can point to any address in one of the dylib's 1578 // segments, but in practice ld64 seems to set it to point to the header, 1579 // so that's what's implemented here. 1580 addHeaderSymbol("___dso_handle"); 1581 } 1582 1583 template SymtabSection *macho::makeSymtabSection<LP64>(StringTableSection &); 1584 template SymtabSection *macho::makeSymtabSection<ILP32>(StringTableSection &); 1585