1 //===- SyntheticSections.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "SyntheticSections.h" 10 #include "ConcatOutputSection.h" 11 #include "Config.h" 12 #include "ExportTrie.h" 13 #include "InputFiles.h" 14 #include "MachOStructs.h" 15 #include "OutputSegment.h" 16 #include "SymbolTable.h" 17 #include "Symbols.h" 18 19 #include "lld/Common/CommonLinkerContext.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/Config/llvm-config.h" 22 #include "llvm/Support/EndianStream.h" 23 #include "llvm/Support/FileSystem.h" 24 #include "llvm/Support/LEB128.h" 25 #include "llvm/Support/Path.h" 26 #include "llvm/Support/SHA256.h" 27 28 #if defined(__APPLE__) 29 #include <sys/mman.h> 30 #endif 31 32 #ifdef LLVM_HAVE_LIBXAR 33 #include <fcntl.h> 34 extern "C" { 35 #include <xar/xar.h> 36 } 37 #endif 38 39 using namespace llvm; 40 using namespace llvm::MachO; 41 using namespace llvm::support; 42 using namespace llvm::support::endian; 43 using namespace lld; 44 using namespace lld::macho; 45 46 InStruct macho::in; 47 std::vector<SyntheticSection *> macho::syntheticSections; 48 49 SyntheticSection::SyntheticSection(const char *segname, const char *name) 50 : OutputSection(SyntheticKind, name) { 51 std::tie(this->segname, this->name) = maybeRenameSection({segname, name}); 52 isec = makeSyntheticInputSection(segname, name); 53 isec->parent = this; 54 syntheticSections.push_back(this); 55 } 56 57 // dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts 58 // from the beginning of the file (i.e. the header). 59 MachHeaderSection::MachHeaderSection() 60 : SyntheticSection(segment_names::text, section_names::header) { 61 // XXX: This is a hack. (See D97007) 62 // Setting the index to 1 to pretend that this section is the text 63 // section. 64 index = 1; 65 isec->isFinal = true; 66 } 67 68 void MachHeaderSection::addLoadCommand(LoadCommand *lc) { 69 loadCommands.push_back(lc); 70 sizeOfCmds += lc->getSize(); 71 } 72 73 uint64_t MachHeaderSection::getSize() const { 74 uint64_t size = target->headerSize + sizeOfCmds + config->headerPad; 75 // If we are emitting an encryptable binary, our load commands must have a 76 // separate (non-encrypted) page to themselves. 77 if (config->emitEncryptionInfo) 78 size = alignTo(size, target->getPageSize()); 79 return size; 80 } 81 82 static uint32_t cpuSubtype() { 83 uint32_t subtype = target->cpuSubtype; 84 85 if (config->outputType == MH_EXECUTE && !config->staticLink && 86 target->cpuSubtype == CPU_SUBTYPE_X86_64_ALL && 87 config->platform() == PLATFORM_MACOS && 88 config->platformInfo.minimum >= VersionTuple(10, 5)) 89 subtype |= CPU_SUBTYPE_LIB64; 90 91 return subtype; 92 } 93 94 void MachHeaderSection::writeTo(uint8_t *buf) const { 95 auto *hdr = reinterpret_cast<mach_header *>(buf); 96 hdr->magic = target->magic; 97 hdr->cputype = target->cpuType; 98 hdr->cpusubtype = cpuSubtype(); 99 hdr->filetype = config->outputType; 100 hdr->ncmds = loadCommands.size(); 101 hdr->sizeofcmds = sizeOfCmds; 102 hdr->flags = MH_DYLDLINK; 103 104 if (config->namespaceKind == NamespaceKind::twolevel) 105 hdr->flags |= MH_NOUNDEFS | MH_TWOLEVEL; 106 107 if (config->outputType == MH_DYLIB && !config->hasReexports) 108 hdr->flags |= MH_NO_REEXPORTED_DYLIBS; 109 110 if (config->markDeadStrippableDylib) 111 hdr->flags |= MH_DEAD_STRIPPABLE_DYLIB; 112 113 if (config->outputType == MH_EXECUTE && config->isPic) 114 hdr->flags |= MH_PIE; 115 116 if (config->outputType == MH_DYLIB && config->applicationExtension) 117 hdr->flags |= MH_APP_EXTENSION_SAFE; 118 119 if (in.exports->hasWeakSymbol || in.weakBinding->hasNonWeakDefinition()) 120 hdr->flags |= MH_WEAK_DEFINES; 121 122 if (in.exports->hasWeakSymbol || in.weakBinding->hasEntry()) 123 hdr->flags |= MH_BINDS_TO_WEAK; 124 125 for (const OutputSegment *seg : outputSegments) { 126 for (const OutputSection *osec : seg->getSections()) { 127 if (isThreadLocalVariables(osec->flags)) { 128 hdr->flags |= MH_HAS_TLV_DESCRIPTORS; 129 break; 130 } 131 } 132 } 133 134 uint8_t *p = reinterpret_cast<uint8_t *>(hdr) + target->headerSize; 135 for (const LoadCommand *lc : loadCommands) { 136 lc->writeTo(p); 137 p += lc->getSize(); 138 } 139 } 140 141 PageZeroSection::PageZeroSection() 142 : SyntheticSection(segment_names::pageZero, section_names::pageZero) {} 143 144 RebaseSection::RebaseSection() 145 : LinkEditSection(segment_names::linkEdit, section_names::rebase) {} 146 147 namespace { 148 struct Rebase { 149 OutputSegment *segment = nullptr; 150 uint64_t offset = 0; 151 uint64_t consecutiveCount = 0; 152 }; 153 } // namespace 154 155 // Rebase opcodes allow us to describe a contiguous sequence of rebase location 156 // using a single DO_REBASE opcode. To take advantage of it, we delay emitting 157 // `DO_REBASE` until we have reached the end of a contiguous sequence. 158 static void encodeDoRebase(Rebase &rebase, raw_svector_ostream &os) { 159 assert(rebase.consecutiveCount != 0); 160 if (rebase.consecutiveCount <= REBASE_IMMEDIATE_MASK) { 161 os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_IMM_TIMES | 162 rebase.consecutiveCount); 163 } else { 164 os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ULEB_TIMES); 165 encodeULEB128(rebase.consecutiveCount, os); 166 } 167 rebase.consecutiveCount = 0; 168 } 169 170 static void encodeRebase(const OutputSection *osec, uint64_t outSecOff, 171 Rebase &lastRebase, raw_svector_ostream &os) { 172 OutputSegment *seg = osec->parent; 173 uint64_t offset = osec->getSegmentOffset() + outSecOff; 174 if (lastRebase.segment != seg || lastRebase.offset != offset) { 175 if (lastRebase.consecutiveCount != 0) 176 encodeDoRebase(lastRebase, os); 177 178 if (lastRebase.segment != seg) { 179 os << static_cast<uint8_t>(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 180 seg->index); 181 encodeULEB128(offset, os); 182 lastRebase.segment = seg; 183 lastRebase.offset = offset; 184 } else { 185 assert(lastRebase.offset != offset); 186 os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_ULEB); 187 encodeULEB128(offset - lastRebase.offset, os); 188 lastRebase.offset = offset; 189 } 190 } 191 ++lastRebase.consecutiveCount; 192 // DO_REBASE causes dyld to both perform the binding and increment the offset 193 lastRebase.offset += target->wordSize; 194 } 195 196 void RebaseSection::finalizeContents() { 197 if (locations.empty()) 198 return; 199 200 raw_svector_ostream os{contents}; 201 Rebase lastRebase; 202 203 os << static_cast<uint8_t>(REBASE_OPCODE_SET_TYPE_IMM | REBASE_TYPE_POINTER); 204 205 llvm::sort(locations, [](const Location &a, const Location &b) { 206 return a.isec->getVA(a.offset) < b.isec->getVA(b.offset); 207 }); 208 for (const Location &loc : locations) 209 encodeRebase(loc.isec->parent, loc.isec->getOffset(loc.offset), lastRebase, 210 os); 211 if (lastRebase.consecutiveCount != 0) 212 encodeDoRebase(lastRebase, os); 213 214 os << static_cast<uint8_t>(REBASE_OPCODE_DONE); 215 } 216 217 void RebaseSection::writeTo(uint8_t *buf) const { 218 memcpy(buf, contents.data(), contents.size()); 219 } 220 221 NonLazyPointerSectionBase::NonLazyPointerSectionBase(const char *segname, 222 const char *name) 223 : SyntheticSection(segname, name) { 224 align = target->wordSize; 225 } 226 227 void macho::addNonLazyBindingEntries(const Symbol *sym, 228 const InputSection *isec, uint64_t offset, 229 int64_t addend) { 230 if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) { 231 in.binding->addEntry(dysym, isec, offset, addend); 232 if (dysym->isWeakDef()) 233 in.weakBinding->addEntry(sym, isec, offset, addend); 234 } else if (const auto *defined = dyn_cast<Defined>(sym)) { 235 in.rebase->addEntry(isec, offset); 236 if (defined->isExternalWeakDef()) 237 in.weakBinding->addEntry(sym, isec, offset, addend); 238 else if (defined->interposable) 239 in.binding->addEntry(sym, isec, offset, addend); 240 } else { 241 // Undefined symbols are filtered out in scanRelocations(); we should never 242 // get here 243 llvm_unreachable("cannot bind to an undefined symbol"); 244 } 245 } 246 247 void NonLazyPointerSectionBase::addEntry(Symbol *sym) { 248 if (entries.insert(sym)) { 249 assert(!sym->isInGot()); 250 sym->gotIndex = entries.size() - 1; 251 252 addNonLazyBindingEntries(sym, isec, sym->gotIndex * target->wordSize); 253 } 254 } 255 256 void NonLazyPointerSectionBase::writeTo(uint8_t *buf) const { 257 for (size_t i = 0, n = entries.size(); i < n; ++i) 258 if (auto *defined = dyn_cast<Defined>(entries[i])) 259 write64le(&buf[i * target->wordSize], defined->getVA()); 260 } 261 262 GotSection::GotSection() 263 : NonLazyPointerSectionBase(segment_names::data, section_names::got) { 264 flags = S_NON_LAZY_SYMBOL_POINTERS; 265 } 266 267 TlvPointerSection::TlvPointerSection() 268 : NonLazyPointerSectionBase(segment_names::data, 269 section_names::threadPtrs) { 270 flags = S_THREAD_LOCAL_VARIABLE_POINTERS; 271 } 272 273 BindingSection::BindingSection() 274 : LinkEditSection(segment_names::linkEdit, section_names::binding) {} 275 276 namespace { 277 struct Binding { 278 OutputSegment *segment = nullptr; 279 uint64_t offset = 0; 280 int64_t addend = 0; 281 }; 282 struct BindIR { 283 // Default value of 0xF0 is not valid opcode and should make the program 284 // scream instead of accidentally writing "valid" values. 285 uint8_t opcode = 0xF0; 286 uint64_t data = 0; 287 uint64_t consecutiveCount = 0; 288 }; 289 } // namespace 290 291 // Encode a sequence of opcodes that tell dyld to write the address of symbol + 292 // addend at osec->addr + outSecOff. 293 // 294 // The bind opcode "interpreter" remembers the values of each binding field, so 295 // we only need to encode the differences between bindings. Hence the use of 296 // lastBinding. 297 static void encodeBinding(const OutputSection *osec, uint64_t outSecOff, 298 int64_t addend, Binding &lastBinding, 299 std::vector<BindIR> &opcodes) { 300 OutputSegment *seg = osec->parent; 301 uint64_t offset = osec->getSegmentOffset() + outSecOff; 302 if (lastBinding.segment != seg) { 303 opcodes.push_back( 304 {static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 305 seg->index), 306 offset}); 307 lastBinding.segment = seg; 308 lastBinding.offset = offset; 309 } else if (lastBinding.offset != offset) { 310 opcodes.push_back({BIND_OPCODE_ADD_ADDR_ULEB, offset - lastBinding.offset}); 311 lastBinding.offset = offset; 312 } 313 314 if (lastBinding.addend != addend) { 315 opcodes.push_back( 316 {BIND_OPCODE_SET_ADDEND_SLEB, static_cast<uint64_t>(addend)}); 317 lastBinding.addend = addend; 318 } 319 320 opcodes.push_back({BIND_OPCODE_DO_BIND, 0}); 321 // DO_BIND causes dyld to both perform the binding and increment the offset 322 lastBinding.offset += target->wordSize; 323 } 324 325 static void optimizeOpcodes(std::vector<BindIR> &opcodes) { 326 // Pass 1: Combine bind/add pairs 327 size_t i; 328 int pWrite = 0; 329 for (i = 1; i < opcodes.size(); ++i, ++pWrite) { 330 if ((opcodes[i].opcode == BIND_OPCODE_ADD_ADDR_ULEB) && 331 (opcodes[i - 1].opcode == BIND_OPCODE_DO_BIND)) { 332 opcodes[pWrite].opcode = BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB; 333 opcodes[pWrite].data = opcodes[i].data; 334 ++i; 335 } else { 336 opcodes[pWrite] = opcodes[i - 1]; 337 } 338 } 339 if (i == opcodes.size()) 340 opcodes[pWrite] = opcodes[i - 1]; 341 opcodes.resize(pWrite + 1); 342 343 // Pass 2: Compress two or more bind_add opcodes 344 pWrite = 0; 345 for (i = 1; i < opcodes.size(); ++i, ++pWrite) { 346 if ((opcodes[i].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) && 347 (opcodes[i - 1].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) && 348 (opcodes[i].data == opcodes[i - 1].data)) { 349 opcodes[pWrite].opcode = BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB; 350 opcodes[pWrite].consecutiveCount = 2; 351 opcodes[pWrite].data = opcodes[i].data; 352 ++i; 353 while (i < opcodes.size() && 354 (opcodes[i].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) && 355 (opcodes[i].data == opcodes[i - 1].data)) { 356 opcodes[pWrite].consecutiveCount++; 357 ++i; 358 } 359 } else { 360 opcodes[pWrite] = opcodes[i - 1]; 361 } 362 } 363 if (i == opcodes.size()) 364 opcodes[pWrite] = opcodes[i - 1]; 365 opcodes.resize(pWrite + 1); 366 367 // Pass 3: Use immediate encodings 368 // Every binding is the size of one pointer. If the next binding is a 369 // multiple of wordSize away that is within BIND_IMMEDIATE_MASK, the 370 // opcode can be scaled by wordSize into a single byte and dyld will 371 // expand it to the correct address. 372 for (auto &p : opcodes) { 373 // It's unclear why the check needs to be less than BIND_IMMEDIATE_MASK, 374 // but ld64 currently does this. This could be a potential bug, but 375 // for now, perform the same behavior to prevent mysterious bugs. 376 if ((p.opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) && 377 ((p.data / target->wordSize) < BIND_IMMEDIATE_MASK) && 378 ((p.data % target->wordSize) == 0)) { 379 p.opcode = BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED; 380 p.data /= target->wordSize; 381 } 382 } 383 } 384 385 static void flushOpcodes(const BindIR &op, raw_svector_ostream &os) { 386 uint8_t opcode = op.opcode & BIND_OPCODE_MASK; 387 switch (opcode) { 388 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: 389 case BIND_OPCODE_ADD_ADDR_ULEB: 390 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: 391 os << op.opcode; 392 encodeULEB128(op.data, os); 393 break; 394 case BIND_OPCODE_SET_ADDEND_SLEB: 395 os << op.opcode; 396 encodeSLEB128(static_cast<int64_t>(op.data), os); 397 break; 398 case BIND_OPCODE_DO_BIND: 399 os << op.opcode; 400 break; 401 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: 402 os << op.opcode; 403 encodeULEB128(op.consecutiveCount, os); 404 encodeULEB128(op.data, os); 405 break; 406 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: 407 os << static_cast<uint8_t>(op.opcode | op.data); 408 break; 409 default: 410 llvm_unreachable("cannot bind to an unrecognized symbol"); 411 } 412 } 413 414 // Non-weak bindings need to have their dylib ordinal encoded as well. 415 static int16_t ordinalForDylibSymbol(const DylibSymbol &dysym) { 416 if (config->namespaceKind == NamespaceKind::flat || dysym.isDynamicLookup()) 417 return static_cast<int16_t>(BIND_SPECIAL_DYLIB_FLAT_LOOKUP); 418 assert(dysym.getFile()->isReferenced()); 419 return dysym.getFile()->ordinal; 420 } 421 422 static int16_t ordinalForSymbol(const Symbol &sym) { 423 if (const auto *dysym = dyn_cast<DylibSymbol>(&sym)) 424 return ordinalForDylibSymbol(*dysym); 425 assert(cast<Defined>(&sym)->interposable); 426 return BIND_SPECIAL_DYLIB_FLAT_LOOKUP; 427 } 428 429 static void encodeDylibOrdinal(int16_t ordinal, raw_svector_ostream &os) { 430 if (ordinal <= 0) { 431 os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | 432 (ordinal & BIND_IMMEDIATE_MASK)); 433 } else if (ordinal <= BIND_IMMEDIATE_MASK) { 434 os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | ordinal); 435 } else { 436 os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); 437 encodeULEB128(ordinal, os); 438 } 439 } 440 441 static void encodeWeakOverride(const Defined *defined, 442 raw_svector_ostream &os) { 443 os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 444 BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION) 445 << defined->getName() << '\0'; 446 } 447 448 // Organize the bindings so we can encoded them with fewer opcodes. 449 // 450 // First, all bindings for a given symbol should be grouped together. 451 // BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM is the largest opcode (since it 452 // has an associated symbol string), so we only want to emit it once per symbol. 453 // 454 // Within each group, we sort the bindings by address. Since bindings are 455 // delta-encoded, sorting them allows for a more compact result. Note that 456 // sorting by address alone ensures that bindings for the same segment / section 457 // are located together, minimizing the number of times we have to emit 458 // BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB. 459 // 460 // Finally, we sort the symbols by the address of their first binding, again 461 // to facilitate the delta-encoding process. 462 template <class Sym> 463 std::vector<std::pair<const Sym *, std::vector<BindingEntry>>> 464 sortBindings(const BindingsMap<const Sym *> &bindingsMap) { 465 std::vector<std::pair<const Sym *, std::vector<BindingEntry>>> bindingsVec( 466 bindingsMap.begin(), bindingsMap.end()); 467 for (auto &p : bindingsVec) { 468 std::vector<BindingEntry> &bindings = p.second; 469 llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) { 470 return a.target.getVA() < b.target.getVA(); 471 }); 472 } 473 llvm::sort(bindingsVec, [](const auto &a, const auto &b) { 474 return a.second[0].target.getVA() < b.second[0].target.getVA(); 475 }); 476 return bindingsVec; 477 } 478 479 // Emit bind opcodes, which are a stream of byte-sized opcodes that dyld 480 // interprets to update a record with the following fields: 481 // * segment index (of the segment to write the symbol addresses to, typically 482 // the __DATA_CONST segment which contains the GOT) 483 // * offset within the segment, indicating the next location to write a binding 484 // * symbol type 485 // * symbol library ordinal (the index of its library's LC_LOAD_DYLIB command) 486 // * symbol name 487 // * addend 488 // When dyld sees BIND_OPCODE_DO_BIND, it uses the current record state to bind 489 // a symbol in the GOT, and increments the segment offset to point to the next 490 // entry. It does *not* clear the record state after doing the bind, so 491 // subsequent opcodes only need to encode the differences between bindings. 492 void BindingSection::finalizeContents() { 493 raw_svector_ostream os{contents}; 494 Binding lastBinding; 495 int16_t lastOrdinal = 0; 496 497 for (auto &p : sortBindings(bindingsMap)) { 498 const Symbol *sym = p.first; 499 std::vector<BindingEntry> &bindings = p.second; 500 uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM; 501 if (sym->isWeakRef()) 502 flags |= BIND_SYMBOL_FLAGS_WEAK_IMPORT; 503 os << flags << sym->getName() << '\0' 504 << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER); 505 int16_t ordinal = ordinalForSymbol(*sym); 506 if (ordinal != lastOrdinal) { 507 encodeDylibOrdinal(ordinal, os); 508 lastOrdinal = ordinal; 509 } 510 std::vector<BindIR> opcodes; 511 for (const BindingEntry &b : bindings) 512 encodeBinding(b.target.isec->parent, 513 b.target.isec->getOffset(b.target.offset), b.addend, 514 lastBinding, opcodes); 515 if (config->optimize > 1) 516 optimizeOpcodes(opcodes); 517 for (const auto &op : opcodes) 518 flushOpcodes(op, os); 519 } 520 if (!bindingsMap.empty()) 521 os << static_cast<uint8_t>(BIND_OPCODE_DONE); 522 } 523 524 void BindingSection::writeTo(uint8_t *buf) const { 525 memcpy(buf, contents.data(), contents.size()); 526 } 527 528 WeakBindingSection::WeakBindingSection() 529 : LinkEditSection(segment_names::linkEdit, section_names::weakBinding) {} 530 531 void WeakBindingSection::finalizeContents() { 532 raw_svector_ostream os{contents}; 533 Binding lastBinding; 534 535 for (const Defined *defined : definitions) 536 encodeWeakOverride(defined, os); 537 538 for (auto &p : sortBindings(bindingsMap)) { 539 const Symbol *sym = p.first; 540 std::vector<BindingEntry> &bindings = p.second; 541 os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) 542 << sym->getName() << '\0' 543 << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER); 544 std::vector<BindIR> opcodes; 545 for (const BindingEntry &b : bindings) 546 encodeBinding(b.target.isec->parent, 547 b.target.isec->getOffset(b.target.offset), b.addend, 548 lastBinding, opcodes); 549 if (config->optimize > 1) 550 optimizeOpcodes(opcodes); 551 for (const auto &op : opcodes) 552 flushOpcodes(op, os); 553 } 554 if (!bindingsMap.empty() || !definitions.empty()) 555 os << static_cast<uint8_t>(BIND_OPCODE_DONE); 556 } 557 558 void WeakBindingSection::writeTo(uint8_t *buf) const { 559 memcpy(buf, contents.data(), contents.size()); 560 } 561 562 StubsSection::StubsSection() 563 : SyntheticSection(segment_names::text, section_names::stubs) { 564 flags = S_SYMBOL_STUBS | S_ATTR_SOME_INSTRUCTIONS | S_ATTR_PURE_INSTRUCTIONS; 565 // The stubs section comprises machine instructions, which are aligned to 566 // 4 bytes on the archs we care about. 567 align = 4; 568 reserved2 = target->stubSize; 569 } 570 571 uint64_t StubsSection::getSize() const { 572 return entries.size() * target->stubSize; 573 } 574 575 void StubsSection::writeTo(uint8_t *buf) const { 576 size_t off = 0; 577 for (const Symbol *sym : entries) { 578 target->writeStub(buf + off, *sym); 579 off += target->stubSize; 580 } 581 } 582 583 void StubsSection::finalize() { isFinal = true; } 584 585 bool StubsSection::addEntry(Symbol *sym) { 586 bool inserted = entries.insert(sym); 587 if (inserted) 588 sym->stubsIndex = entries.size() - 1; 589 return inserted; 590 } 591 592 StubHelperSection::StubHelperSection() 593 : SyntheticSection(segment_names::text, section_names::stubHelper) { 594 flags = S_ATTR_SOME_INSTRUCTIONS | S_ATTR_PURE_INSTRUCTIONS; 595 align = 4; // This section comprises machine instructions 596 } 597 598 uint64_t StubHelperSection::getSize() const { 599 return target->stubHelperHeaderSize + 600 in.lazyBinding->getEntries().size() * target->stubHelperEntrySize; 601 } 602 603 bool StubHelperSection::isNeeded() const { return in.lazyBinding->isNeeded(); } 604 605 void StubHelperSection::writeTo(uint8_t *buf) const { 606 target->writeStubHelperHeader(buf); 607 size_t off = target->stubHelperHeaderSize; 608 for (const Symbol *sym : in.lazyBinding->getEntries()) { 609 target->writeStubHelperEntry(buf + off, *sym, addr + off); 610 off += target->stubHelperEntrySize; 611 } 612 } 613 614 void StubHelperSection::setup() { 615 Symbol *binder = symtab->addUndefined("dyld_stub_binder", /*file=*/nullptr, 616 /*isWeakRef=*/false); 617 if (auto *undefined = dyn_cast<Undefined>(binder)) 618 treatUndefinedSymbol(*undefined, 619 "lazy binding (normally in libSystem.dylib)"); 620 621 // treatUndefinedSymbol() can replace binder with a DylibSymbol; re-check. 622 stubBinder = dyn_cast_or_null<DylibSymbol>(binder); 623 if (stubBinder == nullptr) 624 return; 625 626 in.got->addEntry(stubBinder); 627 628 in.imageLoaderCache->parent = 629 ConcatOutputSection::getOrCreateForInput(in.imageLoaderCache); 630 inputSections.push_back(in.imageLoaderCache); 631 // Since this isn't in the symbol table or in any input file, the noDeadStrip 632 // argument doesn't matter. 633 dyldPrivate = 634 make<Defined>("__dyld_private", nullptr, in.imageLoaderCache, 0, 0, 635 /*isWeakDef=*/false, 636 /*isExternal=*/false, /*isPrivateExtern=*/false, 637 /*isThumb=*/false, /*isReferencedDynamically=*/false, 638 /*noDeadStrip=*/false); 639 dyldPrivate->used = true; 640 } 641 642 LazyPointerSection::LazyPointerSection() 643 : SyntheticSection(segment_names::data, section_names::lazySymbolPtr) { 644 align = target->wordSize; 645 flags = S_LAZY_SYMBOL_POINTERS; 646 } 647 648 uint64_t LazyPointerSection::getSize() const { 649 return in.stubs->getEntries().size() * target->wordSize; 650 } 651 652 bool LazyPointerSection::isNeeded() const { 653 return !in.stubs->getEntries().empty(); 654 } 655 656 void LazyPointerSection::writeTo(uint8_t *buf) const { 657 size_t off = 0; 658 for (const Symbol *sym : in.stubs->getEntries()) { 659 if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) { 660 if (dysym->hasStubsHelper()) { 661 uint64_t stubHelperOffset = 662 target->stubHelperHeaderSize + 663 dysym->stubsHelperIndex * target->stubHelperEntrySize; 664 write64le(buf + off, in.stubHelper->addr + stubHelperOffset); 665 } 666 } else { 667 write64le(buf + off, sym->getVA()); 668 } 669 off += target->wordSize; 670 } 671 } 672 673 LazyBindingSection::LazyBindingSection() 674 : LinkEditSection(segment_names::linkEdit, section_names::lazyBinding) {} 675 676 void LazyBindingSection::finalizeContents() { 677 // TODO: Just precompute output size here instead of writing to a temporary 678 // buffer 679 for (Symbol *sym : entries) 680 sym->lazyBindOffset = encode(*sym); 681 } 682 683 void LazyBindingSection::writeTo(uint8_t *buf) const { 684 memcpy(buf, contents.data(), contents.size()); 685 } 686 687 void LazyBindingSection::addEntry(Symbol *sym) { 688 if (entries.insert(sym)) { 689 sym->stubsHelperIndex = entries.size() - 1; 690 in.rebase->addEntry(in.lazyPointers->isec, 691 sym->stubsIndex * target->wordSize); 692 } 693 } 694 695 // Unlike the non-lazy binding section, the bind opcodes in this section aren't 696 // interpreted all at once. Rather, dyld will start interpreting opcodes at a 697 // given offset, typically only binding a single symbol before it finds a 698 // BIND_OPCODE_DONE terminator. As such, unlike in the non-lazy-binding case, 699 // we cannot encode just the differences between symbols; we have to emit the 700 // complete bind information for each symbol. 701 uint32_t LazyBindingSection::encode(const Symbol &sym) { 702 uint32_t opstreamOffset = contents.size(); 703 OutputSegment *dataSeg = in.lazyPointers->parent; 704 os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 705 dataSeg->index); 706 uint64_t offset = in.lazyPointers->addr - dataSeg->addr + 707 sym.stubsIndex * target->wordSize; 708 encodeULEB128(offset, os); 709 encodeDylibOrdinal(ordinalForSymbol(sym), os); 710 711 uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM; 712 if (sym.isWeakRef()) 713 flags |= BIND_SYMBOL_FLAGS_WEAK_IMPORT; 714 715 os << flags << sym.getName() << '\0' 716 << static_cast<uint8_t>(BIND_OPCODE_DO_BIND) 717 << static_cast<uint8_t>(BIND_OPCODE_DONE); 718 return opstreamOffset; 719 } 720 721 ExportSection::ExportSection() 722 : LinkEditSection(segment_names::linkEdit, section_names::export_) {} 723 724 void ExportSection::finalizeContents() { 725 trieBuilder.setImageBase(in.header->addr); 726 for (const Symbol *sym : symtab->getSymbols()) { 727 if (const auto *defined = dyn_cast<Defined>(sym)) { 728 if (defined->privateExtern || !defined->isLive()) 729 continue; 730 trieBuilder.addSymbol(*defined); 731 hasWeakSymbol = hasWeakSymbol || sym->isWeakDef(); 732 } 733 } 734 size = trieBuilder.build(); 735 } 736 737 void ExportSection::writeTo(uint8_t *buf) const { trieBuilder.writeTo(buf); } 738 739 DataInCodeSection::DataInCodeSection() 740 : LinkEditSection(segment_names::linkEdit, section_names::dataInCode) {} 741 742 template <class LP> 743 static std::vector<MachO::data_in_code_entry> collectDataInCodeEntries() { 744 std::vector<MachO::data_in_code_entry> dataInCodeEntries; 745 for (const InputFile *inputFile : inputFiles) { 746 if (!isa<ObjFile>(inputFile)) 747 continue; 748 const ObjFile *objFile = cast<ObjFile>(inputFile); 749 ArrayRef<MachO::data_in_code_entry> entries = objFile->getDataInCode(); 750 if (entries.empty()) 751 continue; 752 753 assert(is_sorted(dataInCodeEntries, [](const data_in_code_entry &lhs, 754 const data_in_code_entry &rhs) { 755 return lhs.offset < rhs.offset; 756 })); 757 // For each code subsection find 'data in code' entries residing in it. 758 // Compute the new offset values as 759 // <offset within subsection> + <subsection address> - <__TEXT address>. 760 for (const Section *section : objFile->sections) { 761 for (const Subsection &subsec : section->subsections) { 762 const InputSection *isec = subsec.isec; 763 if (!isCodeSection(isec)) 764 continue; 765 if (cast<ConcatInputSection>(isec)->shouldOmitFromOutput()) 766 continue; 767 const uint64_t beginAddr = section->addr + subsec.offset; 768 auto it = llvm::lower_bound( 769 entries, beginAddr, 770 [](const MachO::data_in_code_entry &entry, uint64_t addr) { 771 return entry.offset < addr; 772 }); 773 const uint64_t endAddr = beginAddr + isec->getFileSize(); 774 for (const auto end = entries.end(); 775 it != end && it->offset + it->length <= endAddr; ++it) 776 dataInCodeEntries.push_back( 777 {static_cast<uint32_t>(isec->getVA(it->offset - beginAddr) - 778 in.header->addr), 779 it->length, it->kind}); 780 } 781 } 782 } 783 return dataInCodeEntries; 784 } 785 786 void DataInCodeSection::finalizeContents() { 787 entries = target->wordSize == 8 ? collectDataInCodeEntries<LP64>() 788 : collectDataInCodeEntries<ILP32>(); 789 } 790 791 void DataInCodeSection::writeTo(uint8_t *buf) const { 792 if (!entries.empty()) 793 memcpy(buf, entries.data(), getRawSize()); 794 } 795 796 FunctionStartsSection::FunctionStartsSection() 797 : LinkEditSection(segment_names::linkEdit, section_names::functionStarts) {} 798 799 void FunctionStartsSection::finalizeContents() { 800 raw_svector_ostream os{contents}; 801 std::vector<uint64_t> addrs; 802 for (const InputFile *file : inputFiles) { 803 if (auto *objFile = dyn_cast<ObjFile>(file)) { 804 for (const Symbol *sym : objFile->symbols) { 805 if (const auto *defined = dyn_cast_or_null<Defined>(sym)) { 806 if (!defined->isec || !isCodeSection(defined->isec) || 807 !defined->isLive()) 808 continue; 809 // TODO: Add support for thumbs, in that case 810 // the lowest bit of nextAddr needs to be set to 1. 811 addrs.push_back(defined->getVA()); 812 } 813 } 814 } 815 } 816 llvm::sort(addrs); 817 uint64_t addr = in.header->addr; 818 for (uint64_t nextAddr : addrs) { 819 uint64_t delta = nextAddr - addr; 820 if (delta == 0) 821 continue; 822 encodeULEB128(delta, os); 823 addr = nextAddr; 824 } 825 os << '\0'; 826 } 827 828 void FunctionStartsSection::writeTo(uint8_t *buf) const { 829 memcpy(buf, contents.data(), contents.size()); 830 } 831 832 SymtabSection::SymtabSection(StringTableSection &stringTableSection) 833 : LinkEditSection(segment_names::linkEdit, section_names::symbolTable), 834 stringTableSection(stringTableSection) {} 835 836 void SymtabSection::emitBeginSourceStab(DWARFUnit *compileUnit) { 837 StabsEntry stab(N_SO); 838 SmallString<261> dir(compileUnit->getCompilationDir()); 839 StringRef sep = sys::path::get_separator(); 840 // We don't use `path::append` here because we want an empty `dir` to result 841 // in an absolute path. `append` would give us a relative path for that case. 842 if (!dir.endswith(sep)) 843 dir += sep; 844 stab.strx = stringTableSection.addString( 845 saver().save(dir + compileUnit->getUnitDIE().getShortName())); 846 stabs.emplace_back(std::move(stab)); 847 } 848 849 void SymtabSection::emitEndSourceStab() { 850 StabsEntry stab(N_SO); 851 stab.sect = 1; 852 stabs.emplace_back(std::move(stab)); 853 } 854 855 void SymtabSection::emitObjectFileStab(ObjFile *file) { 856 StabsEntry stab(N_OSO); 857 stab.sect = target->cpuSubtype; 858 SmallString<261> path(!file->archiveName.empty() ? file->archiveName 859 : file->getName()); 860 std::error_code ec = sys::fs::make_absolute(path); 861 if (ec) 862 fatal("failed to get absolute path for " + path); 863 864 if (!file->archiveName.empty()) 865 path.append({"(", file->getName(), ")"}); 866 867 StringRef adjustedPath = saver().save(path.str()); 868 adjustedPath.consume_front(config->osoPrefix); 869 870 stab.strx = stringTableSection.addString(adjustedPath); 871 stab.desc = 1; 872 stab.value = file->modTime; 873 stabs.emplace_back(std::move(stab)); 874 } 875 876 void SymtabSection::emitEndFunStab(Defined *defined) { 877 StabsEntry stab(N_FUN); 878 stab.value = defined->size; 879 stabs.emplace_back(std::move(stab)); 880 } 881 882 void SymtabSection::emitStabs() { 883 if (config->omitDebugInfo) 884 return; 885 886 for (const std::string &s : config->astPaths) { 887 StabsEntry astStab(N_AST); 888 astStab.strx = stringTableSection.addString(s); 889 stabs.emplace_back(std::move(astStab)); 890 } 891 892 std::vector<Defined *> symbolsNeedingStabs; 893 for (const SymtabEntry &entry : 894 concat<SymtabEntry>(localSymbols, externalSymbols)) { 895 Symbol *sym = entry.sym; 896 assert(sym->isLive() && 897 "dead symbols should not be in localSymbols, externalSymbols"); 898 if (auto *defined = dyn_cast<Defined>(sym)) { 899 if (defined->isAbsolute()) 900 continue; 901 InputSection *isec = defined->isec; 902 ObjFile *file = dyn_cast_or_null<ObjFile>(isec->getFile()); 903 if (!file || !file->compileUnit) 904 continue; 905 symbolsNeedingStabs.push_back(defined); 906 } 907 } 908 909 llvm::stable_sort(symbolsNeedingStabs, [&](Defined *a, Defined *b) { 910 return a->isec->getFile()->id < b->isec->getFile()->id; 911 }); 912 913 // Emit STABS symbols so that dsymutil and/or the debugger can map address 914 // regions in the final binary to the source and object files from which they 915 // originated. 916 InputFile *lastFile = nullptr; 917 for (Defined *defined : symbolsNeedingStabs) { 918 InputSection *isec = defined->isec; 919 ObjFile *file = cast<ObjFile>(isec->getFile()); 920 921 if (lastFile == nullptr || lastFile != file) { 922 if (lastFile != nullptr) 923 emitEndSourceStab(); 924 lastFile = file; 925 926 emitBeginSourceStab(file->compileUnit); 927 emitObjectFileStab(file); 928 } 929 930 StabsEntry symStab; 931 symStab.sect = defined->isec->parent->index; 932 symStab.strx = stringTableSection.addString(defined->getName()); 933 symStab.value = defined->getVA(); 934 935 if (isCodeSection(isec)) { 936 symStab.type = N_FUN; 937 stabs.emplace_back(std::move(symStab)); 938 emitEndFunStab(defined); 939 } else { 940 symStab.type = defined->isExternal() ? N_GSYM : N_STSYM; 941 stabs.emplace_back(std::move(symStab)); 942 } 943 } 944 945 if (!stabs.empty()) 946 emitEndSourceStab(); 947 } 948 949 void SymtabSection::finalizeContents() { 950 auto addSymbol = [&](std::vector<SymtabEntry> &symbols, Symbol *sym) { 951 uint32_t strx = stringTableSection.addString(sym->getName()); 952 symbols.push_back({sym, strx}); 953 }; 954 955 // Local symbols aren't in the SymbolTable, so we walk the list of object 956 // files to gather them. 957 for (const InputFile *file : inputFiles) { 958 if (auto *objFile = dyn_cast<ObjFile>(file)) { 959 for (Symbol *sym : objFile->symbols) { 960 if (auto *defined = dyn_cast_or_null<Defined>(sym)) { 961 if (!defined->isExternal() && defined->isLive()) { 962 StringRef name = defined->getName(); 963 if (!name.startswith("l") && !name.startswith("L")) 964 addSymbol(localSymbols, sym); 965 } 966 } 967 } 968 } 969 } 970 971 // __dyld_private is a local symbol too. It's linker-created and doesn't 972 // exist in any object file. 973 if (Defined *dyldPrivate = in.stubHelper->dyldPrivate) 974 addSymbol(localSymbols, dyldPrivate); 975 976 for (Symbol *sym : symtab->getSymbols()) { 977 if (!sym->isLive()) 978 continue; 979 if (auto *defined = dyn_cast<Defined>(sym)) { 980 if (!defined->includeInSymtab) 981 continue; 982 assert(defined->isExternal()); 983 if (defined->privateExtern) 984 addSymbol(localSymbols, defined); 985 else 986 addSymbol(externalSymbols, defined); 987 } else if (auto *dysym = dyn_cast<DylibSymbol>(sym)) { 988 if (dysym->isReferenced()) 989 addSymbol(undefinedSymbols, sym); 990 } 991 } 992 993 emitStabs(); 994 uint32_t symtabIndex = stabs.size(); 995 for (const SymtabEntry &entry : 996 concat<SymtabEntry>(localSymbols, externalSymbols, undefinedSymbols)) { 997 entry.sym->symtabIndex = symtabIndex++; 998 } 999 } 1000 1001 uint32_t SymtabSection::getNumSymbols() const { 1002 return stabs.size() + localSymbols.size() + externalSymbols.size() + 1003 undefinedSymbols.size(); 1004 } 1005 1006 // This serves to hide (type-erase) the template parameter from SymtabSection. 1007 template <class LP> class SymtabSectionImpl final : public SymtabSection { 1008 public: 1009 SymtabSectionImpl(StringTableSection &stringTableSection) 1010 : SymtabSection(stringTableSection) {} 1011 uint64_t getRawSize() const override; 1012 void writeTo(uint8_t *buf) const override; 1013 }; 1014 1015 template <class LP> uint64_t SymtabSectionImpl<LP>::getRawSize() const { 1016 return getNumSymbols() * sizeof(typename LP::nlist); 1017 } 1018 1019 template <class LP> void SymtabSectionImpl<LP>::writeTo(uint8_t *buf) const { 1020 auto *nList = reinterpret_cast<typename LP::nlist *>(buf); 1021 // Emit the stabs entries before the "real" symbols. We cannot emit them 1022 // after as that would render Symbol::symtabIndex inaccurate. 1023 for (const StabsEntry &entry : stabs) { 1024 nList->n_strx = entry.strx; 1025 nList->n_type = entry.type; 1026 nList->n_sect = entry.sect; 1027 nList->n_desc = entry.desc; 1028 nList->n_value = entry.value; 1029 ++nList; 1030 } 1031 1032 for (const SymtabEntry &entry : concat<const SymtabEntry>( 1033 localSymbols, externalSymbols, undefinedSymbols)) { 1034 nList->n_strx = entry.strx; 1035 // TODO populate n_desc with more flags 1036 if (auto *defined = dyn_cast<Defined>(entry.sym)) { 1037 uint8_t scope = 0; 1038 if (defined->privateExtern) { 1039 // Private external -- dylib scoped symbol. 1040 // Promote to non-external at link time. 1041 scope = N_PEXT; 1042 } else if (defined->isExternal()) { 1043 // Normal global symbol. 1044 scope = N_EXT; 1045 } else { 1046 // TU-local symbol from localSymbols. 1047 scope = 0; 1048 } 1049 1050 if (defined->isAbsolute()) { 1051 nList->n_type = scope | N_ABS; 1052 nList->n_sect = NO_SECT; 1053 nList->n_value = defined->value; 1054 } else { 1055 nList->n_type = scope | N_SECT; 1056 nList->n_sect = defined->isec->parent->index; 1057 // For the N_SECT symbol type, n_value is the address of the symbol 1058 nList->n_value = defined->getVA(); 1059 } 1060 nList->n_desc |= defined->thumb ? N_ARM_THUMB_DEF : 0; 1061 nList->n_desc |= defined->isExternalWeakDef() ? N_WEAK_DEF : 0; 1062 nList->n_desc |= 1063 defined->referencedDynamically ? REFERENCED_DYNAMICALLY : 0; 1064 } else if (auto *dysym = dyn_cast<DylibSymbol>(entry.sym)) { 1065 uint16_t n_desc = nList->n_desc; 1066 int16_t ordinal = ordinalForDylibSymbol(*dysym); 1067 if (ordinal == BIND_SPECIAL_DYLIB_FLAT_LOOKUP) 1068 SET_LIBRARY_ORDINAL(n_desc, DYNAMIC_LOOKUP_ORDINAL); 1069 else if (ordinal == BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE) 1070 SET_LIBRARY_ORDINAL(n_desc, EXECUTABLE_ORDINAL); 1071 else { 1072 assert(ordinal > 0); 1073 SET_LIBRARY_ORDINAL(n_desc, static_cast<uint8_t>(ordinal)); 1074 } 1075 1076 nList->n_type = N_EXT; 1077 n_desc |= dysym->isWeakDef() ? N_WEAK_DEF : 0; 1078 n_desc |= dysym->isWeakRef() ? N_WEAK_REF : 0; 1079 nList->n_desc = n_desc; 1080 } 1081 ++nList; 1082 } 1083 } 1084 1085 template <class LP> 1086 SymtabSection * 1087 macho::makeSymtabSection(StringTableSection &stringTableSection) { 1088 return make<SymtabSectionImpl<LP>>(stringTableSection); 1089 } 1090 1091 IndirectSymtabSection::IndirectSymtabSection() 1092 : LinkEditSection(segment_names::linkEdit, 1093 section_names::indirectSymbolTable) {} 1094 1095 uint32_t IndirectSymtabSection::getNumSymbols() const { 1096 return in.got->getEntries().size() + in.tlvPointers->getEntries().size() + 1097 2 * in.stubs->getEntries().size(); 1098 } 1099 1100 bool IndirectSymtabSection::isNeeded() const { 1101 return in.got->isNeeded() || in.tlvPointers->isNeeded() || 1102 in.stubs->isNeeded(); 1103 } 1104 1105 void IndirectSymtabSection::finalizeContents() { 1106 uint32_t off = 0; 1107 in.got->reserved1 = off; 1108 off += in.got->getEntries().size(); 1109 in.tlvPointers->reserved1 = off; 1110 off += in.tlvPointers->getEntries().size(); 1111 in.stubs->reserved1 = off; 1112 off += in.stubs->getEntries().size(); 1113 in.lazyPointers->reserved1 = off; 1114 } 1115 1116 static uint32_t indirectValue(const Symbol *sym) { 1117 if (sym->symtabIndex == UINT32_MAX) 1118 return INDIRECT_SYMBOL_LOCAL; 1119 if (auto *defined = dyn_cast<Defined>(sym)) 1120 if (defined->privateExtern) 1121 return INDIRECT_SYMBOL_LOCAL; 1122 return sym->symtabIndex; 1123 } 1124 1125 void IndirectSymtabSection::writeTo(uint8_t *buf) const { 1126 uint32_t off = 0; 1127 for (const Symbol *sym : in.got->getEntries()) { 1128 write32le(buf + off * sizeof(uint32_t), indirectValue(sym)); 1129 ++off; 1130 } 1131 for (const Symbol *sym : in.tlvPointers->getEntries()) { 1132 write32le(buf + off * sizeof(uint32_t), indirectValue(sym)); 1133 ++off; 1134 } 1135 for (const Symbol *sym : in.stubs->getEntries()) { 1136 write32le(buf + off * sizeof(uint32_t), indirectValue(sym)); 1137 ++off; 1138 } 1139 // There is a 1:1 correspondence between stubs and LazyPointerSection 1140 // entries. But giving __stubs and __la_symbol_ptr the same reserved1 1141 // (the offset into the indirect symbol table) so that they both refer 1142 // to the same range of offsets confuses `strip`, so write the stubs 1143 // symbol table offsets a second time. 1144 for (const Symbol *sym : in.stubs->getEntries()) { 1145 write32le(buf + off * sizeof(uint32_t), indirectValue(sym)); 1146 ++off; 1147 } 1148 } 1149 1150 StringTableSection::StringTableSection() 1151 : LinkEditSection(segment_names::linkEdit, section_names::stringTable) {} 1152 1153 uint32_t StringTableSection::addString(StringRef str) { 1154 uint32_t strx = size; 1155 strings.push_back(str); // TODO: consider deduplicating strings 1156 size += str.size() + 1; // account for null terminator 1157 return strx; 1158 } 1159 1160 void StringTableSection::writeTo(uint8_t *buf) const { 1161 uint32_t off = 0; 1162 for (StringRef str : strings) { 1163 memcpy(buf + off, str.data(), str.size()); 1164 off += str.size() + 1; // account for null terminator 1165 } 1166 } 1167 1168 static_assert((CodeSignatureSection::blobHeadersSize % 8) == 0, ""); 1169 static_assert((CodeSignatureSection::fixedHeadersSize % 8) == 0, ""); 1170 1171 CodeSignatureSection::CodeSignatureSection() 1172 : LinkEditSection(segment_names::linkEdit, section_names::codeSignature) { 1173 align = 16; // required by libstuff 1174 // FIXME: Consider using finalOutput instead of outputFile. 1175 fileName = config->outputFile; 1176 size_t slashIndex = fileName.rfind("/"); 1177 if (slashIndex != std::string::npos) 1178 fileName = fileName.drop_front(slashIndex + 1); 1179 1180 // NOTE: Any changes to these calculations should be repeated 1181 // in llvm-objcopy's MachOLayoutBuilder::layoutTail. 1182 allHeadersSize = alignTo<16>(fixedHeadersSize + fileName.size() + 1); 1183 fileNamePad = allHeadersSize - fixedHeadersSize - fileName.size(); 1184 } 1185 1186 uint32_t CodeSignatureSection::getBlockCount() const { 1187 return (fileOff + blockSize - 1) / blockSize; 1188 } 1189 1190 uint64_t CodeSignatureSection::getRawSize() const { 1191 return allHeadersSize + getBlockCount() * hashSize; 1192 } 1193 1194 void CodeSignatureSection::writeHashes(uint8_t *buf) const { 1195 // NOTE: Changes to this functionality should be repeated in llvm-objcopy's 1196 // MachOWriter::writeSignatureData. 1197 uint8_t *code = buf; 1198 uint8_t *codeEnd = buf + fileOff; 1199 uint8_t *hashes = codeEnd + allHeadersSize; 1200 while (code < codeEnd) { 1201 StringRef block(reinterpret_cast<char *>(code), 1202 std::min(codeEnd - code, static_cast<ssize_t>(blockSize))); 1203 SHA256 hasher; 1204 hasher.update(block); 1205 StringRef hash = hasher.final(); 1206 assert(hash.size() == hashSize); 1207 memcpy(hashes, hash.data(), hashSize); 1208 code += blockSize; 1209 hashes += hashSize; 1210 } 1211 #if defined(__APPLE__) 1212 // This is macOS-specific work-around and makes no sense for any 1213 // other host OS. See https://openradar.appspot.com/FB8914231 1214 // 1215 // The macOS kernel maintains a signature-verification cache to 1216 // quickly validate applications at time of execve(2). The trouble 1217 // is that for the kernel creates the cache entry at the time of the 1218 // mmap(2) call, before we have a chance to write either the code to 1219 // sign or the signature header+hashes. The fix is to invalidate 1220 // all cached data associated with the output file, thus discarding 1221 // the bogus prematurely-cached signature. 1222 msync(buf, fileOff + getSize(), MS_INVALIDATE); 1223 #endif 1224 } 1225 1226 void CodeSignatureSection::writeTo(uint8_t *buf) const { 1227 // NOTE: Changes to this functionality should be repeated in llvm-objcopy's 1228 // MachOWriter::writeSignatureData. 1229 uint32_t signatureSize = static_cast<uint32_t>(getSize()); 1230 auto *superBlob = reinterpret_cast<CS_SuperBlob *>(buf); 1231 write32be(&superBlob->magic, CSMAGIC_EMBEDDED_SIGNATURE); 1232 write32be(&superBlob->length, signatureSize); 1233 write32be(&superBlob->count, 1); 1234 auto *blobIndex = reinterpret_cast<CS_BlobIndex *>(&superBlob[1]); 1235 write32be(&blobIndex->type, CSSLOT_CODEDIRECTORY); 1236 write32be(&blobIndex->offset, blobHeadersSize); 1237 auto *codeDirectory = 1238 reinterpret_cast<CS_CodeDirectory *>(buf + blobHeadersSize); 1239 write32be(&codeDirectory->magic, CSMAGIC_CODEDIRECTORY); 1240 write32be(&codeDirectory->length, signatureSize - blobHeadersSize); 1241 write32be(&codeDirectory->version, CS_SUPPORTSEXECSEG); 1242 write32be(&codeDirectory->flags, CS_ADHOC | CS_LINKER_SIGNED); 1243 write32be(&codeDirectory->hashOffset, 1244 sizeof(CS_CodeDirectory) + fileName.size() + fileNamePad); 1245 write32be(&codeDirectory->identOffset, sizeof(CS_CodeDirectory)); 1246 codeDirectory->nSpecialSlots = 0; 1247 write32be(&codeDirectory->nCodeSlots, getBlockCount()); 1248 write32be(&codeDirectory->codeLimit, fileOff); 1249 codeDirectory->hashSize = static_cast<uint8_t>(hashSize); 1250 codeDirectory->hashType = kSecCodeSignatureHashSHA256; 1251 codeDirectory->platform = 0; 1252 codeDirectory->pageSize = blockSizeShift; 1253 codeDirectory->spare2 = 0; 1254 codeDirectory->scatterOffset = 0; 1255 codeDirectory->teamOffset = 0; 1256 codeDirectory->spare3 = 0; 1257 codeDirectory->codeLimit64 = 0; 1258 OutputSegment *textSeg = getOrCreateOutputSegment(segment_names::text); 1259 write64be(&codeDirectory->execSegBase, textSeg->fileOff); 1260 write64be(&codeDirectory->execSegLimit, textSeg->fileSize); 1261 write64be(&codeDirectory->execSegFlags, 1262 config->outputType == MH_EXECUTE ? CS_EXECSEG_MAIN_BINARY : 0); 1263 auto *id = reinterpret_cast<char *>(&codeDirectory[1]); 1264 memcpy(id, fileName.begin(), fileName.size()); 1265 memset(id + fileName.size(), 0, fileNamePad); 1266 } 1267 1268 BitcodeBundleSection::BitcodeBundleSection() 1269 : SyntheticSection(segment_names::llvm, section_names::bitcodeBundle) {} 1270 1271 class ErrorCodeWrapper { 1272 public: 1273 explicit ErrorCodeWrapper(std::error_code ec) : errorCode(ec.value()) {} 1274 explicit ErrorCodeWrapper(int ec) : errorCode(ec) {} 1275 operator int() const { return errorCode; } 1276 1277 private: 1278 int errorCode; 1279 }; 1280 1281 #define CHECK_EC(exp) \ 1282 do { \ 1283 ErrorCodeWrapper ec(exp); \ 1284 if (ec) \ 1285 fatal(Twine("operation failed with error code ") + Twine(ec) + ": " + \ 1286 #exp); \ 1287 } while (0); 1288 1289 void BitcodeBundleSection::finalize() { 1290 #ifdef LLVM_HAVE_LIBXAR 1291 using namespace llvm::sys::fs; 1292 CHECK_EC(createTemporaryFile("bitcode-bundle", "xar", xarPath)); 1293 1294 #pragma clang diagnostic push 1295 #pragma clang diagnostic ignored "-Wdeprecated-declarations" 1296 xar_t xar(xar_open(xarPath.data(), O_RDWR)); 1297 #pragma clang diagnostic pop 1298 if (!xar) 1299 fatal("failed to open XAR temporary file at " + xarPath); 1300 CHECK_EC(xar_opt_set(xar, XAR_OPT_COMPRESSION, XAR_OPT_VAL_NONE)); 1301 // FIXME: add more data to XAR 1302 CHECK_EC(xar_close(xar)); 1303 1304 file_size(xarPath, xarSize); 1305 #endif // defined(LLVM_HAVE_LIBXAR) 1306 } 1307 1308 void BitcodeBundleSection::writeTo(uint8_t *buf) const { 1309 using namespace llvm::sys::fs; 1310 file_t handle = 1311 CHECK(openNativeFile(xarPath, CD_OpenExisting, FA_Read, OF_None), 1312 "failed to open XAR file"); 1313 std::error_code ec; 1314 mapped_file_region xarMap(handle, mapped_file_region::mapmode::readonly, 1315 xarSize, 0, ec); 1316 if (ec) 1317 fatal("failed to map XAR file"); 1318 memcpy(buf, xarMap.const_data(), xarSize); 1319 1320 closeFile(handle); 1321 remove(xarPath); 1322 } 1323 1324 CStringSection::CStringSection() 1325 : SyntheticSection(segment_names::text, section_names::cString) { 1326 flags = S_CSTRING_LITERALS; 1327 } 1328 1329 void CStringSection::addInput(CStringInputSection *isec) { 1330 isec->parent = this; 1331 inputs.push_back(isec); 1332 if (isec->align > align) 1333 align = isec->align; 1334 } 1335 1336 void CStringSection::writeTo(uint8_t *buf) const { 1337 for (const CStringInputSection *isec : inputs) { 1338 for (size_t i = 0, e = isec->pieces.size(); i != e; ++i) { 1339 if (!isec->pieces[i].live) 1340 continue; 1341 StringRef string = isec->getStringRef(i); 1342 memcpy(buf + isec->pieces[i].outSecOff, string.data(), string.size()); 1343 } 1344 } 1345 } 1346 1347 void CStringSection::finalizeContents() { 1348 uint64_t offset = 0; 1349 for (CStringInputSection *isec : inputs) { 1350 for (size_t i = 0, e = isec->pieces.size(); i != e; ++i) { 1351 if (!isec->pieces[i].live) 1352 continue; 1353 // See comment above DeduplicatedCStringSection for how alignment is 1354 // handled. 1355 uint32_t pieceAlign = 1356 1 << countTrailingZeros(isec->align | isec->pieces[i].inSecOff); 1357 offset = alignTo(offset, pieceAlign); 1358 isec->pieces[i].outSecOff = offset; 1359 isec->isFinal = true; 1360 StringRef string = isec->getStringRef(i); 1361 offset += string.size(); 1362 } 1363 } 1364 size = offset; 1365 } 1366 1367 // Mergeable cstring literals are found under the __TEXT,__cstring section. In 1368 // contrast to ELF, which puts strings that need different alignments into 1369 // different sections, clang's Mach-O backend puts them all in one section. 1370 // Strings that need to be aligned have the .p2align directive emitted before 1371 // them, which simply translates into zero padding in the object file. In other 1372 // words, we have to infer the desired alignment of these cstrings from their 1373 // addresses. 1374 // 1375 // We differ slightly from ld64 in how we've chosen to align these cstrings. 1376 // Both LLD and ld64 preserve the number of trailing zeros in each cstring's 1377 // address in the input object files. When deduplicating identical cstrings, 1378 // both linkers pick the cstring whose address has more trailing zeros, and 1379 // preserve the alignment of that address in the final binary. However, ld64 1380 // goes a step further and also preserves the offset of the cstring from the 1381 // last section-aligned address. I.e. if a cstring is at offset 18 in the 1382 // input, with a section alignment of 16, then both LLD and ld64 will ensure the 1383 // final address is 2-byte aligned (since 18 == 16 + 2). But ld64 will also 1384 // ensure that the final address is of the form 16 * k + 2 for some k. 1385 // 1386 // Note that ld64's heuristic means that a dedup'ed cstring's final address is 1387 // dependent on the order of the input object files. E.g. if in addition to the 1388 // cstring at offset 18 above, we have a duplicate one in another file with a 1389 // `.cstring` section alignment of 2 and an offset of zero, then ld64 will pick 1390 // the cstring from the object file earlier on the command line (since both have 1391 // the same number of trailing zeros in their address). So the final cstring may 1392 // either be at some address `16 * k + 2` or at some address `2 * k`. 1393 // 1394 // I've opted not to follow this behavior primarily for implementation 1395 // simplicity, and secondarily to save a few more bytes. It's not clear to me 1396 // that preserving the section alignment + offset is ever necessary, and there 1397 // are many cases that are clearly redundant. In particular, if an x86_64 object 1398 // file contains some strings that are accessed via SIMD instructions, then the 1399 // .cstring section in the object file will be 16-byte-aligned (since SIMD 1400 // requires its operand addresses to be 16-byte aligned). However, there will 1401 // typically also be other cstrings in the same file that aren't used via SIMD 1402 // and don't need this alignment. They will be emitted at some arbitrary address 1403 // `A`, but ld64 will treat them as being 16-byte aligned with an offset of `16 1404 // % A`. 1405 void DeduplicatedCStringSection::finalizeContents() { 1406 // Find the largest alignment required for each string. 1407 for (const CStringInputSection *isec : inputs) { 1408 for (size_t i = 0, e = isec->pieces.size(); i != e; ++i) { 1409 const StringPiece &piece = isec->pieces[i]; 1410 if (!piece.live) 1411 continue; 1412 auto s = isec->getCachedHashStringRef(i); 1413 assert(isec->align != 0); 1414 uint8_t trailingZeros = countTrailingZeros(isec->align | piece.inSecOff); 1415 auto it = stringOffsetMap.insert( 1416 std::make_pair(s, StringOffset(trailingZeros))); 1417 if (!it.second && it.first->second.trailingZeros < trailingZeros) 1418 it.first->second.trailingZeros = trailingZeros; 1419 } 1420 } 1421 1422 // Assign an offset for each string and save it to the corresponding 1423 // StringPieces for easy access. 1424 for (CStringInputSection *isec : inputs) { 1425 for (size_t i = 0, e = isec->pieces.size(); i != e; ++i) { 1426 if (!isec->pieces[i].live) 1427 continue; 1428 auto s = isec->getCachedHashStringRef(i); 1429 auto it = stringOffsetMap.find(s); 1430 assert(it != stringOffsetMap.end()); 1431 StringOffset &offsetInfo = it->second; 1432 if (offsetInfo.outSecOff == UINT64_MAX) { 1433 offsetInfo.outSecOff = alignTo(size, 1 << offsetInfo.trailingZeros); 1434 size = offsetInfo.outSecOff + s.size(); 1435 } 1436 isec->pieces[i].outSecOff = offsetInfo.outSecOff; 1437 } 1438 isec->isFinal = true; 1439 } 1440 } 1441 1442 void DeduplicatedCStringSection::writeTo(uint8_t *buf) const { 1443 for (const auto &p : stringOffsetMap) { 1444 StringRef data = p.first.val(); 1445 uint64_t off = p.second.outSecOff; 1446 if (!data.empty()) 1447 memcpy(buf + off, data.data(), data.size()); 1448 } 1449 } 1450 1451 // This section is actually emitted as __TEXT,__const by ld64, but clang may 1452 // emit input sections of that name, and LLD doesn't currently support mixing 1453 // synthetic and concat-type OutputSections. To work around this, I've given 1454 // our merged-literals section a different name. 1455 WordLiteralSection::WordLiteralSection() 1456 : SyntheticSection(segment_names::text, section_names::literals) { 1457 align = 16; 1458 } 1459 1460 void WordLiteralSection::addInput(WordLiteralInputSection *isec) { 1461 isec->parent = this; 1462 inputs.push_back(isec); 1463 } 1464 1465 void WordLiteralSection::finalizeContents() { 1466 for (WordLiteralInputSection *isec : inputs) { 1467 // We do all processing of the InputSection here, so it will be effectively 1468 // finalized. 1469 isec->isFinal = true; 1470 const uint8_t *buf = isec->data.data(); 1471 switch (sectionType(isec->getFlags())) { 1472 case S_4BYTE_LITERALS: { 1473 for (size_t off = 0, e = isec->data.size(); off < e; off += 4) { 1474 if (!isec->isLive(off)) 1475 continue; 1476 uint32_t value = *reinterpret_cast<const uint32_t *>(buf + off); 1477 literal4Map.emplace(value, literal4Map.size()); 1478 } 1479 break; 1480 } 1481 case S_8BYTE_LITERALS: { 1482 for (size_t off = 0, e = isec->data.size(); off < e; off += 8) { 1483 if (!isec->isLive(off)) 1484 continue; 1485 uint64_t value = *reinterpret_cast<const uint64_t *>(buf + off); 1486 literal8Map.emplace(value, literal8Map.size()); 1487 } 1488 break; 1489 } 1490 case S_16BYTE_LITERALS: { 1491 for (size_t off = 0, e = isec->data.size(); off < e; off += 16) { 1492 if (!isec->isLive(off)) 1493 continue; 1494 UInt128 value = *reinterpret_cast<const UInt128 *>(buf + off); 1495 literal16Map.emplace(value, literal16Map.size()); 1496 } 1497 break; 1498 } 1499 default: 1500 llvm_unreachable("invalid literal section type"); 1501 } 1502 } 1503 } 1504 1505 void WordLiteralSection::writeTo(uint8_t *buf) const { 1506 // Note that we don't attempt to do any endianness conversion in addInput(), 1507 // so we don't do it here either -- just write out the original value, 1508 // byte-for-byte. 1509 for (const auto &p : literal16Map) 1510 memcpy(buf + p.second * 16, &p.first, 16); 1511 buf += literal16Map.size() * 16; 1512 1513 for (const auto &p : literal8Map) 1514 memcpy(buf + p.second * 8, &p.first, 8); 1515 buf += literal8Map.size() * 8; 1516 1517 for (const auto &p : literal4Map) 1518 memcpy(buf + p.second * 4, &p.first, 4); 1519 } 1520 1521 void macho::createSyntheticSymbols() { 1522 auto addHeaderSymbol = [](const char *name) { 1523 symtab->addSynthetic(name, in.header->isec, /*value=*/0, 1524 /*isPrivateExtern=*/true, /*includeInSymtab=*/false, 1525 /*referencedDynamically=*/false); 1526 }; 1527 1528 switch (config->outputType) { 1529 // FIXME: Assign the right address value for these symbols 1530 // (rather than 0). But we need to do that after assignAddresses(). 1531 case MH_EXECUTE: 1532 // If linking PIE, __mh_execute_header is a defined symbol in 1533 // __TEXT, __text) 1534 // Otherwise, it's an absolute symbol. 1535 if (config->isPic) 1536 symtab->addSynthetic("__mh_execute_header", in.header->isec, /*value=*/0, 1537 /*isPrivateExtern=*/false, /*includeInSymtab=*/true, 1538 /*referencedDynamically=*/true); 1539 else 1540 symtab->addSynthetic("__mh_execute_header", /*isec=*/nullptr, /*value=*/0, 1541 /*isPrivateExtern=*/false, /*includeInSymtab=*/true, 1542 /*referencedDynamically=*/true); 1543 break; 1544 1545 // The following symbols are N_SECT symbols, even though the header is not 1546 // part of any section and that they are private to the bundle/dylib/object 1547 // they are part of. 1548 case MH_BUNDLE: 1549 addHeaderSymbol("__mh_bundle_header"); 1550 break; 1551 case MH_DYLIB: 1552 addHeaderSymbol("__mh_dylib_header"); 1553 break; 1554 case MH_DYLINKER: 1555 addHeaderSymbol("__mh_dylinker_header"); 1556 break; 1557 case MH_OBJECT: 1558 addHeaderSymbol("__mh_object_header"); 1559 break; 1560 default: 1561 llvm_unreachable("unexpected outputType"); 1562 break; 1563 } 1564 1565 // The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit 1566 // which does e.g. cleanup of static global variables. The ABI document 1567 // says that the pointer can point to any address in one of the dylib's 1568 // segments, but in practice ld64 seems to set it to point to the header, 1569 // so that's what's implemented here. 1570 addHeaderSymbol("___dso_handle"); 1571 } 1572 1573 template SymtabSection *macho::makeSymtabSection<LP64>(StringTableSection &); 1574 template SymtabSection *macho::makeSymtabSection<ILP32>(StringTableSection &); 1575