1 //===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp ---------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 /// 11 /// \file For mach-o object files, this implementation converts normalized 12 /// mach-o in memory to mach-o binary on disk. 13 /// 14 /// +---------------+ 15 /// | binary mach-o | 16 /// +---------------+ 17 /// ^ 18 /// | 19 /// | 20 /// +------------+ 21 /// | normalized | 22 /// +------------+ 23 24 #include "MachONormalizedFile.h" 25 #include "MachONormalizedFileBinaryUtils.h" 26 #include "lld/Core/Error.h" 27 #include "lld/Core/LLVM.h" 28 #include "llvm/ADT/ilist.h" 29 #include "llvm/ADT/ilist_node.h" 30 #include "llvm/ADT/SmallString.h" 31 #include "llvm/ADT/SmallVector.h" 32 #include "llvm/ADT/StringRef.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/Errc.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Support/FileOutputBuffer.h" 38 #include "llvm/Support/Format.h" 39 #include "llvm/Support/Host.h" 40 #include "llvm/Support/MachO.h" 41 #include "llvm/Support/MemoryBuffer.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include <functional> 44 #include <list> 45 #include <map> 46 #include <system_error> 47 48 using namespace llvm::MachO; 49 50 namespace lld { 51 namespace mach_o { 52 namespace normalized { 53 54 struct TrieNode; // Forward declaration. 55 56 struct TrieEdge : public llvm::ilist_node<TrieEdge> { 57 TrieEdge(StringRef s, TrieNode *node) : _subString(s), _child(node) {} 58 59 StringRef _subString; 60 struct TrieNode *_child; 61 }; 62 63 } // namespace normalized 64 } // namespace mach_o 65 } // namespace lld 66 67 68 namespace llvm { 69 using lld::mach_o::normalized::TrieEdge; 70 template <> 71 struct ilist_traits<TrieEdge> 72 : public ilist_default_traits<TrieEdge> { 73 private: 74 mutable ilist_half_node<TrieEdge> Sentinel; 75 public: 76 TrieEdge *createSentinel() const { 77 return static_cast<TrieEdge*>(&Sentinel); 78 } 79 void destroySentinel(TrieEdge *) const {} 80 81 TrieEdge *provideInitialHead() const { return createSentinel(); } 82 TrieEdge *ensureHead(TrieEdge*) const { return createSentinel(); } 83 static void noteHead(TrieEdge*, TrieEdge*) {} 84 void deleteNode(TrieEdge *N) {} 85 86 private: 87 void createNode(const TrieEdge &); 88 }; 89 } // namespace llvm 90 91 92 namespace lld { 93 namespace mach_o { 94 namespace normalized { 95 96 struct TrieNode { 97 typedef llvm::ilist<TrieEdge> TrieEdgeList; 98 99 TrieNode(StringRef s) 100 : _cummulativeString(s), _address(0), _flags(0), _other(0), 101 _trieOffset(0), _hasExportInfo(false) {} 102 ~TrieNode() = default; 103 104 void addSymbol(const Export &entry, BumpPtrAllocator &allocator, 105 std::vector<TrieNode *> &allNodes); 106 bool updateOffset(uint32_t &offset); 107 void appendToByteBuffer(ByteBuffer &out); 108 109 private: 110 StringRef _cummulativeString; 111 TrieEdgeList _children; 112 uint64_t _address; 113 uint64_t _flags; 114 uint64_t _other; 115 StringRef _importedName; 116 uint32_t _trieOffset; 117 bool _hasExportInfo; 118 }; 119 120 /// Utility class for writing a mach-o binary file given an in-memory 121 /// normalized file. 122 class MachOFileLayout { 123 public: 124 /// All layout computation is done in the constructor. 125 MachOFileLayout(const NormalizedFile &file); 126 127 /// Returns the final file size as computed in the constructor. 128 size_t size() const; 129 130 // Returns size of the mach_header and load commands. 131 size_t headerAndLoadCommandsSize() const; 132 133 /// Writes the normalized file as a binary mach-o file to the specified 134 /// path. This does not have a stream interface because the generated 135 /// file may need the 'x' bit set. 136 llvm::Error writeBinary(StringRef path); 137 138 private: 139 uint32_t loadCommandsSize(uint32_t &count); 140 void buildFileOffsets(); 141 void writeMachHeader(); 142 llvm::Error writeLoadCommands(); 143 void writeSectionContent(); 144 void writeRelocations(); 145 void writeSymbolTable(); 146 void writeRebaseInfo(); 147 void writeBindingInfo(); 148 void writeLazyBindingInfo(); 149 void writeExportInfo(); 150 void writeFunctionStartsInfo(); 151 void writeDataInCodeInfo(); 152 void writeLinkEditContent(); 153 void buildLinkEditInfo(); 154 void buildRebaseInfo(); 155 void buildBindInfo(); 156 void buildLazyBindInfo(); 157 void buildExportTrie(); 158 void computeFunctionStartsSize(); 159 void computeDataInCodeSize(); 160 void computeSymbolTableSizes(); 161 void buildSectionRelocations(); 162 void appendSymbols(const std::vector<Symbol> &symbols, 163 uint32_t &symOffset, uint32_t &strOffset); 164 uint32_t indirectSymbolIndex(const Section §, uint32_t &index); 165 uint32_t indirectSymbolElementSize(const Section §); 166 167 // For use as template parameter to load command methods. 168 struct MachO64Trait { 169 typedef llvm::MachO::segment_command_64 command; 170 typedef llvm::MachO::section_64 section; 171 enum { LC = llvm::MachO::LC_SEGMENT_64 }; 172 }; 173 174 // For use as template parameter to load command methods. 175 struct MachO32Trait { 176 typedef llvm::MachO::segment_command command; 177 typedef llvm::MachO::section section; 178 enum { LC = llvm::MachO::LC_SEGMENT }; 179 }; 180 181 template <typename T> 182 llvm::Error writeSingleSegmentLoadCommand(uint8_t *&lc); 183 template <typename T> llvm::Error writeSegmentLoadCommands(uint8_t *&lc); 184 185 uint32_t pointerAlign(uint32_t value); 186 static StringRef dyldPath(); 187 188 struct SegExtraInfo { 189 uint32_t fileOffset; 190 uint32_t fileSize; 191 std::vector<const Section*> sections; 192 }; 193 typedef std::map<const Segment*, SegExtraInfo> SegMap; 194 struct SectionExtraInfo { 195 uint32_t fileOffset; 196 }; 197 typedef std::map<const Section*, SectionExtraInfo> SectionMap; 198 199 const NormalizedFile &_file; 200 std::error_code _ec; 201 uint8_t *_buffer; 202 const bool _is64; 203 const bool _swap; 204 const bool _bigEndianArch; 205 uint64_t _seg1addr; 206 uint32_t _startOfLoadCommands; 207 uint32_t _countOfLoadCommands; 208 uint32_t _endOfLoadCommands; 209 uint32_t _startOfRelocations; 210 uint32_t _startOfFunctionStarts; 211 uint32_t _startOfDataInCode; 212 uint32_t _startOfSymbols; 213 uint32_t _startOfIndirectSymbols; 214 uint32_t _startOfSymbolStrings; 215 uint32_t _endOfSymbolStrings; 216 uint32_t _symbolTableLocalsStartIndex; 217 uint32_t _symbolTableGlobalsStartIndex; 218 uint32_t _symbolTableUndefinesStartIndex; 219 uint32_t _symbolStringPoolSize; 220 uint32_t _symbolTableSize; 221 uint32_t _functionStartsSize; 222 uint32_t _dataInCodeSize; 223 uint32_t _indirectSymbolTableCount; 224 // Used in object file creation only 225 uint32_t _startOfSectionsContent; 226 uint32_t _endOfSectionsContent; 227 // Used in final linked image only 228 uint32_t _startOfLinkEdit; 229 uint32_t _startOfRebaseInfo; 230 uint32_t _endOfRebaseInfo; 231 uint32_t _startOfBindingInfo; 232 uint32_t _endOfBindingInfo; 233 uint32_t _startOfLazyBindingInfo; 234 uint32_t _endOfLazyBindingInfo; 235 uint32_t _startOfExportTrie; 236 uint32_t _endOfExportTrie; 237 uint32_t _endOfLinkEdit; 238 uint64_t _addressOfLinkEdit; 239 SegMap _segInfo; 240 SectionMap _sectInfo; 241 ByteBuffer _rebaseInfo; 242 ByteBuffer _bindingInfo; 243 ByteBuffer _lazyBindingInfo; 244 ByteBuffer _weakBindingInfo; 245 ByteBuffer _exportTrie; 246 }; 247 248 size_t headerAndLoadCommandsSize(const NormalizedFile &file) { 249 MachOFileLayout layout(file); 250 return layout.headerAndLoadCommandsSize(); 251 } 252 253 StringRef MachOFileLayout::dyldPath() { 254 return "/usr/lib/dyld"; 255 } 256 257 uint32_t MachOFileLayout::pointerAlign(uint32_t value) { 258 return llvm::alignTo(value, _is64 ? 8 : 4); 259 } 260 261 262 size_t MachOFileLayout::headerAndLoadCommandsSize() const { 263 return _endOfLoadCommands; 264 } 265 266 MachOFileLayout::MachOFileLayout(const NormalizedFile &file) 267 : _file(file), 268 _is64(MachOLinkingContext::is64Bit(file.arch)), 269 _swap(!MachOLinkingContext::isHostEndian(file.arch)), 270 _bigEndianArch(MachOLinkingContext::isBigEndian(file.arch)), 271 _seg1addr(INT64_MAX) { 272 _startOfLoadCommands = _is64 ? sizeof(mach_header_64) : sizeof(mach_header); 273 const size_t segCommandBaseSize = 274 (_is64 ? sizeof(segment_command_64) : sizeof(segment_command)); 275 const size_t sectsSize = (_is64 ? sizeof(section_64) : sizeof(section)); 276 if (file.fileType == llvm::MachO::MH_OBJECT) { 277 // object files have just one segment load command containing all sections 278 _endOfLoadCommands = _startOfLoadCommands 279 + segCommandBaseSize 280 + file.sections.size() * sectsSize 281 + sizeof(symtab_command); 282 _countOfLoadCommands = 2; 283 if (file.hasMinVersionLoadCommand) { 284 _endOfLoadCommands += sizeof(version_min_command); 285 _countOfLoadCommands++; 286 } 287 if (!_file.functionStarts.empty()) { 288 _endOfLoadCommands += sizeof(linkedit_data_command); 289 _countOfLoadCommands++; 290 } 291 if (_file.generateDataInCodeLoadCommand) { 292 _endOfLoadCommands += sizeof(linkedit_data_command); 293 _countOfLoadCommands++; 294 } 295 // Assign file offsets to each section. 296 _startOfSectionsContent = _endOfLoadCommands; 297 unsigned relocCount = 0; 298 uint64_t offset = _startOfSectionsContent; 299 for (const Section § : file.sections) { 300 if (isZeroFillSection(sect.type)) 301 _sectInfo[§].fileOffset = 0; 302 else { 303 offset = llvm::alignTo(offset, sect.alignment); 304 _sectInfo[§].fileOffset = offset; 305 offset += sect.content.size(); 306 } 307 relocCount += sect.relocations.size(); 308 } 309 _endOfSectionsContent = offset; 310 311 computeSymbolTableSizes(); 312 computeFunctionStartsSize(); 313 computeDataInCodeSize(); 314 315 // Align start of relocations. 316 _startOfRelocations = pointerAlign(_endOfSectionsContent); 317 _startOfFunctionStarts = _startOfRelocations + relocCount * 8; 318 _startOfDataInCode = _startOfFunctionStarts + _functionStartsSize; 319 _startOfSymbols = _startOfDataInCode + _dataInCodeSize; 320 // Add Indirect symbol table. 321 _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize; 322 // Align start of symbol table and symbol strings. 323 _startOfSymbolStrings = _startOfIndirectSymbols 324 + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t)); 325 _endOfSymbolStrings = _startOfSymbolStrings 326 + pointerAlign(_symbolStringPoolSize); 327 _endOfLinkEdit = _endOfSymbolStrings; 328 DEBUG_WITH_TYPE("MachOFileLayout", 329 llvm::dbgs() << "MachOFileLayout()\n" 330 << " startOfLoadCommands=" << _startOfLoadCommands << "\n" 331 << " countOfLoadCommands=" << _countOfLoadCommands << "\n" 332 << " endOfLoadCommands=" << _endOfLoadCommands << "\n" 333 << " startOfRelocations=" << _startOfRelocations << "\n" 334 << " startOfSymbols=" << _startOfSymbols << "\n" 335 << " startOfSymbolStrings=" << _startOfSymbolStrings << "\n" 336 << " endOfSymbolStrings=" << _endOfSymbolStrings << "\n" 337 << " startOfSectionsContent=" << _startOfSectionsContent << "\n" 338 << " endOfSectionsContent=" << _endOfSectionsContent << "\n"); 339 } else { 340 // Final linked images have one load command per segment. 341 _endOfLoadCommands = _startOfLoadCommands 342 + loadCommandsSize(_countOfLoadCommands); 343 344 // Assign section file offsets. 345 buildFileOffsets(); 346 buildLinkEditInfo(); 347 348 // LINKEDIT of final linked images has in order: 349 // rebase info, binding info, lazy binding info, weak binding info, 350 // data-in-code, symbol table, indirect symbol table, symbol table strings. 351 _startOfRebaseInfo = _startOfLinkEdit; 352 _endOfRebaseInfo = _startOfRebaseInfo + _rebaseInfo.size(); 353 _startOfBindingInfo = _endOfRebaseInfo; 354 _endOfBindingInfo = _startOfBindingInfo + _bindingInfo.size(); 355 _startOfLazyBindingInfo = _endOfBindingInfo; 356 _endOfLazyBindingInfo = _startOfLazyBindingInfo + _lazyBindingInfo.size(); 357 _startOfExportTrie = _endOfLazyBindingInfo; 358 _endOfExportTrie = _startOfExportTrie + _exportTrie.size(); 359 _startOfFunctionStarts = _endOfExportTrie; 360 _startOfDataInCode = _startOfFunctionStarts + _functionStartsSize; 361 _startOfSymbols = _startOfDataInCode + _dataInCodeSize; 362 _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize; 363 _startOfSymbolStrings = _startOfIndirectSymbols 364 + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t)); 365 _endOfSymbolStrings = _startOfSymbolStrings 366 + pointerAlign(_symbolStringPoolSize); 367 _endOfLinkEdit = _endOfSymbolStrings; 368 DEBUG_WITH_TYPE("MachOFileLayout", 369 llvm::dbgs() << "MachOFileLayout()\n" 370 << " startOfLoadCommands=" << _startOfLoadCommands << "\n" 371 << " countOfLoadCommands=" << _countOfLoadCommands << "\n" 372 << " endOfLoadCommands=" << _endOfLoadCommands << "\n" 373 << " startOfLinkEdit=" << _startOfLinkEdit << "\n" 374 << " startOfRebaseInfo=" << _startOfRebaseInfo << "\n" 375 << " endOfRebaseInfo=" << _endOfRebaseInfo << "\n" 376 << " startOfBindingInfo=" << _startOfBindingInfo << "\n" 377 << " endOfBindingInfo=" << _endOfBindingInfo << "\n" 378 << " startOfLazyBindingInfo=" << _startOfLazyBindingInfo << "\n" 379 << " endOfLazyBindingInfo=" << _endOfLazyBindingInfo << "\n" 380 << " startOfExportTrie=" << _startOfExportTrie << "\n" 381 << " endOfExportTrie=" << _endOfExportTrie << "\n" 382 << " startOfFunctionStarts=" << _startOfFunctionStarts << "\n" 383 << " startOfDataInCode=" << _startOfDataInCode << "\n" 384 << " startOfSymbols=" << _startOfSymbols << "\n" 385 << " startOfSymbolStrings=" << _startOfSymbolStrings << "\n" 386 << " endOfSymbolStrings=" << _endOfSymbolStrings << "\n" 387 << " addressOfLinkEdit=" << _addressOfLinkEdit << "\n"); 388 } 389 } 390 391 uint32_t MachOFileLayout::loadCommandsSize(uint32_t &count) { 392 uint32_t size = 0; 393 count = 0; 394 395 const size_t segCommandSize = 396 (_is64 ? sizeof(segment_command_64) : sizeof(segment_command)); 397 const size_t sectionSize = (_is64 ? sizeof(section_64) : sizeof(section)); 398 399 // Add LC_SEGMENT for each segment. 400 size += _file.segments.size() * segCommandSize; 401 count += _file.segments.size(); 402 // Add section record for each section. 403 size += _file.sections.size() * sectionSize; 404 405 // If creating a dylib, add LC_ID_DYLIB. 406 if (_file.fileType == llvm::MachO::MH_DYLIB) { 407 size += sizeof(dylib_command) + pointerAlign(_file.installName.size() + 1); 408 ++count; 409 } 410 411 // Add LC_DYLD_INFO 412 size += sizeof(dyld_info_command); 413 ++count; 414 415 // Add LC_SYMTAB 416 size += sizeof(symtab_command); 417 ++count; 418 419 // Add LC_DYSYMTAB 420 if (_file.fileType != llvm::MachO::MH_PRELOAD) { 421 size += sizeof(dysymtab_command); 422 ++count; 423 } 424 425 // If main executable add LC_LOAD_DYLINKER 426 if (_file.fileType == llvm::MachO::MH_EXECUTE) { 427 size += pointerAlign(sizeof(dylinker_command) + dyldPath().size()+1); 428 ++count; 429 } 430 431 // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_WATCHOS, 432 // LC_VERSION_MIN_TVOS 433 if (_file.hasMinVersionLoadCommand) { 434 size += sizeof(version_min_command); 435 ++count; 436 } 437 438 // Add LC_SOURCE_VERSION 439 size += sizeof(source_version_command); 440 ++count; 441 442 // If main executable add LC_MAIN 443 if (_file.fileType == llvm::MachO::MH_EXECUTE) { 444 size += sizeof(entry_point_command); 445 ++count; 446 } 447 448 // Add LC_LOAD_DYLIB for each dependent dylib. 449 for (const DependentDylib &dep : _file.dependentDylibs) { 450 size += sizeof(dylib_command) + pointerAlign(dep.path.size()+1); 451 ++count; 452 } 453 454 // Add LC_RPATH 455 for (const StringRef &path : _file.rpaths) { 456 size += pointerAlign(sizeof(rpath_command) + path.size() + 1); 457 ++count; 458 } 459 460 // Add LC_FUNCTION_STARTS if needed 461 if (!_file.functionStarts.empty()) { 462 size += sizeof(linkedit_data_command); 463 ++count; 464 } 465 466 // Add LC_DATA_IN_CODE if requested. Note, we do encode zero length entries. 467 // FIXME: Zero length entries is only to match ld64. Should we change this? 468 if (_file.generateDataInCodeLoadCommand) { 469 size += sizeof(linkedit_data_command); 470 ++count; 471 } 472 473 return size; 474 } 475 476 static bool overlaps(const Segment &s1, const Segment &s2) { 477 if (s2.address >= s1.address+s1.size) 478 return false; 479 if (s1.address >= s2.address+s2.size) 480 return false; 481 return true; 482 } 483 484 static bool overlaps(const Section &s1, const Section &s2) { 485 if (s2.address >= s1.address+s1.content.size()) 486 return false; 487 if (s1.address >= s2.address+s2.content.size()) 488 return false; 489 return true; 490 } 491 492 void MachOFileLayout::buildFileOffsets() { 493 // Verify no segments overlap 494 for (const Segment &sg1 : _file.segments) { 495 for (const Segment &sg2 : _file.segments) { 496 if (&sg1 == &sg2) 497 continue; 498 if (overlaps(sg1,sg2)) { 499 _ec = make_error_code(llvm::errc::executable_format_error); 500 return; 501 } 502 } 503 } 504 505 // Verify no sections overlap 506 for (const Section &s1 : _file.sections) { 507 for (const Section &s2 : _file.sections) { 508 if (&s1 == &s2) 509 continue; 510 if (overlaps(s1,s2)) { 511 _ec = make_error_code(llvm::errc::executable_format_error); 512 return; 513 } 514 } 515 } 516 517 // Build side table of extra info about segments and sections. 518 SegExtraInfo t; 519 t.fileOffset = 0; 520 for (const Segment &sg : _file.segments) { 521 _segInfo[&sg] = t; 522 } 523 SectionExtraInfo t2; 524 t2.fileOffset = 0; 525 // Assign sections to segments. 526 for (const Section &s : _file.sections) { 527 _sectInfo[&s] = t2; 528 bool foundSegment = false; 529 for (const Segment &sg : _file.segments) { 530 if (sg.name.equals(s.segmentName)) { 531 if ((s.address >= sg.address) 532 && (s.address+s.content.size() <= sg.address+sg.size)) { 533 _segInfo[&sg].sections.push_back(&s); 534 foundSegment = true; 535 break; 536 } 537 } 538 } 539 if (!foundSegment) { 540 _ec = make_error_code(llvm::errc::executable_format_error); 541 return; 542 } 543 } 544 545 // Assign file offsets. 546 uint32_t fileOffset = 0; 547 DEBUG_WITH_TYPE("MachOFileLayout", 548 llvm::dbgs() << "buildFileOffsets()\n"); 549 for (const Segment &sg : _file.segments) { 550 _segInfo[&sg].fileOffset = fileOffset; 551 if ((_seg1addr == INT64_MAX) && sg.init_access) 552 _seg1addr = sg.address; 553 DEBUG_WITH_TYPE("MachOFileLayout", 554 llvm::dbgs() << " segment=" << sg.name 555 << ", fileOffset=" << _segInfo[&sg].fileOffset << "\n"); 556 557 uint32_t segFileSize = 0; 558 // A segment that is not zero-fill must use a least one page of disk space. 559 if (sg.init_access) 560 segFileSize = _file.pageSize; 561 for (const Section *s : _segInfo[&sg].sections) { 562 uint32_t sectOffset = s->address - sg.address; 563 uint32_t sectFileSize = 564 isZeroFillSection(s->type) ? 0 : s->content.size(); 565 segFileSize = std::max(segFileSize, sectOffset + sectFileSize); 566 567 _sectInfo[s].fileOffset = _segInfo[&sg].fileOffset + sectOffset; 568 DEBUG_WITH_TYPE("MachOFileLayout", 569 llvm::dbgs() << " section=" << s->sectionName 570 << ", fileOffset=" << fileOffset << "\n"); 571 } 572 573 // round up all segments to page aligned, except __LINKEDIT 574 if (!sg.name.equals("__LINKEDIT")) { 575 _segInfo[&sg].fileSize = llvm::alignTo(segFileSize, _file.pageSize); 576 fileOffset = llvm::alignTo(fileOffset + segFileSize, _file.pageSize); 577 } 578 _addressOfLinkEdit = sg.address + sg.size; 579 } 580 _startOfLinkEdit = fileOffset; 581 } 582 583 size_t MachOFileLayout::size() const { 584 return _endOfSymbolStrings; 585 } 586 587 void MachOFileLayout::writeMachHeader() { 588 auto cpusubtype = MachOLinkingContext::cpuSubtypeFromArch(_file.arch); 589 // dynamic x86 executables on newer OS version should also set the 590 // CPU_SUBTYPE_LIB64 mask in the CPU subtype. 591 // FIXME: Check that this is a dynamic executable, not a static one. 592 if (_file.fileType == llvm::MachO::MH_EXECUTE && 593 cpusubtype == CPU_SUBTYPE_X86_64_ALL && 594 _file.os == MachOLinkingContext::OS::macOSX) { 595 uint32_t version; 596 bool failed = MachOLinkingContext::parsePackedVersion("10.5", version); 597 if (!failed && _file.minOSverson >= version) 598 cpusubtype |= CPU_SUBTYPE_LIB64; 599 } 600 601 mach_header *mh = reinterpret_cast<mach_header*>(_buffer); 602 mh->magic = _is64 ? llvm::MachO::MH_MAGIC_64 : llvm::MachO::MH_MAGIC; 603 mh->cputype = MachOLinkingContext::cpuTypeFromArch(_file.arch); 604 mh->cpusubtype = cpusubtype; 605 mh->filetype = _file.fileType; 606 mh->ncmds = _countOfLoadCommands; 607 mh->sizeofcmds = _endOfLoadCommands - _startOfLoadCommands; 608 mh->flags = _file.flags; 609 if (_swap) 610 swapStruct(*mh); 611 } 612 613 uint32_t MachOFileLayout::indirectSymbolIndex(const Section §, 614 uint32_t &index) { 615 if (sect.indirectSymbols.empty()) 616 return 0; 617 uint32_t result = index; 618 index += sect.indirectSymbols.size(); 619 return result; 620 } 621 622 uint32_t MachOFileLayout::indirectSymbolElementSize(const Section §) { 623 if (sect.indirectSymbols.empty()) 624 return 0; 625 if (sect.type != S_SYMBOL_STUBS) 626 return 0; 627 return sect.content.size() / sect.indirectSymbols.size(); 628 } 629 630 template <typename T> 631 llvm::Error MachOFileLayout::writeSingleSegmentLoadCommand(uint8_t *&lc) { 632 typename T::command* seg = reinterpret_cast<typename T::command*>(lc); 633 seg->cmd = T::LC; 634 seg->cmdsize = sizeof(typename T::command) 635 + _file.sections.size() * sizeof(typename T::section); 636 uint8_t *next = lc + seg->cmdsize; 637 memset(seg->segname, 0, 16); 638 seg->vmaddr = 0; 639 seg->vmsize = _file.sections.back().address 640 + _file.sections.back().content.size(); 641 seg->fileoff = _endOfLoadCommands; 642 seg->filesize = seg->vmsize; 643 seg->maxprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; 644 seg->initprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; 645 seg->nsects = _file.sections.size(); 646 seg->flags = 0; 647 if (_swap) 648 swapStruct(*seg); 649 typename T::section *sout = reinterpret_cast<typename T::section*> 650 (lc+sizeof(typename T::command)); 651 uint32_t relOffset = _startOfRelocations; 652 uint32_t indirectSymRunningIndex = 0; 653 for (const Section &sin : _file.sections) { 654 setString16(sin.sectionName, sout->sectname); 655 setString16(sin.segmentName, sout->segname); 656 sout->addr = sin.address; 657 sout->size = sin.content.size(); 658 sout->offset = _sectInfo[&sin].fileOffset; 659 sout->align = llvm::Log2_32(sin.alignment); 660 sout->reloff = sin.relocations.empty() ? 0 : relOffset; 661 sout->nreloc = sin.relocations.size(); 662 sout->flags = sin.type | sin.attributes; 663 sout->reserved1 = indirectSymbolIndex(sin, indirectSymRunningIndex); 664 sout->reserved2 = indirectSymbolElementSize(sin); 665 relOffset += sin.relocations.size() * sizeof(any_relocation_info); 666 if (_swap) 667 swapStruct(*sout); 668 ++sout; 669 } 670 lc = next; 671 return llvm::Error(); 672 } 673 674 template <typename T> 675 llvm::Error MachOFileLayout::writeSegmentLoadCommands(uint8_t *&lc) { 676 uint32_t indirectSymRunningIndex = 0; 677 for (const Segment &seg : _file.segments) { 678 // Link edit has no sections and a custom range of address, so handle it 679 // specially. 680 SegExtraInfo &segInfo = _segInfo[&seg]; 681 if (seg.name.equals("__LINKEDIT")) { 682 size_t linkeditSize = _endOfLinkEdit - _startOfLinkEdit; 683 typename T::command* cmd = reinterpret_cast<typename T::command*>(lc); 684 cmd->cmd = T::LC; 685 cmd->cmdsize = sizeof(typename T::command); 686 uint8_t *next = lc + cmd->cmdsize; 687 setString16("__LINKEDIT", cmd->segname); 688 cmd->vmaddr = _addressOfLinkEdit; 689 cmd->vmsize = llvm::alignTo(linkeditSize, _file.pageSize); 690 cmd->fileoff = _startOfLinkEdit; 691 cmd->filesize = linkeditSize; 692 cmd->initprot = seg.init_access; 693 cmd->maxprot = seg.max_access; 694 cmd->nsects = 0; 695 cmd->flags = 0; 696 if (_swap) 697 swapStruct(*cmd); 698 lc = next; 699 continue; 700 } 701 // Write segment command with trailing sections. 702 typename T::command* cmd = reinterpret_cast<typename T::command*>(lc); 703 cmd->cmd = T::LC; 704 cmd->cmdsize = sizeof(typename T::command) 705 + segInfo.sections.size() * sizeof(typename T::section); 706 uint8_t *next = lc + cmd->cmdsize; 707 setString16(seg.name, cmd->segname); 708 cmd->vmaddr = seg.address; 709 cmd->vmsize = seg.size; 710 cmd->fileoff = segInfo.fileOffset; 711 cmd->filesize = segInfo.fileSize; 712 cmd->initprot = seg.init_access; 713 cmd->maxprot = seg.max_access; 714 cmd->nsects = segInfo.sections.size(); 715 cmd->flags = 0; 716 if (_swap) 717 swapStruct(*cmd); 718 typename T::section *sect = reinterpret_cast<typename T::section*> 719 (lc+sizeof(typename T::command)); 720 for (const Section *section : segInfo.sections) { 721 setString16(section->sectionName, sect->sectname); 722 setString16(section->segmentName, sect->segname); 723 sect->addr = section->address; 724 sect->size = section->content.size(); 725 if (isZeroFillSection(section->type)) 726 sect->offset = 0; 727 else 728 sect->offset = section->address - seg.address + segInfo.fileOffset; 729 sect->align = llvm::Log2_32(section->alignment); 730 sect->reloff = 0; 731 sect->nreloc = 0; 732 sect->flags = section->type | section->attributes; 733 sect->reserved1 = indirectSymbolIndex(*section, indirectSymRunningIndex); 734 sect->reserved2 = indirectSymbolElementSize(*section); 735 if (_swap) 736 swapStruct(*sect); 737 ++sect; 738 } 739 lc = reinterpret_cast<uint8_t*>(next); 740 } 741 return llvm::Error(); 742 } 743 744 static void writeVersionMinLoadCommand(const NormalizedFile &_file, 745 bool _swap, 746 uint8_t *&lc) { 747 if (!_file.hasMinVersionLoadCommand) 748 return; 749 version_min_command *vm = reinterpret_cast<version_min_command*>(lc); 750 switch (_file.os) { 751 case MachOLinkingContext::OS::unknown: 752 vm->cmd = _file.minOSVersionKind; 753 vm->cmdsize = sizeof(version_min_command); 754 vm->version = _file.minOSverson; 755 vm->sdk = 0; 756 break; 757 case MachOLinkingContext::OS::macOSX: 758 vm->cmd = LC_VERSION_MIN_MACOSX; 759 vm->cmdsize = sizeof(version_min_command); 760 vm->version = _file.minOSverson; 761 vm->sdk = _file.sdkVersion; 762 break; 763 case MachOLinkingContext::OS::iOS: 764 case MachOLinkingContext::OS::iOS_simulator: 765 vm->cmd = LC_VERSION_MIN_IPHONEOS; 766 vm->cmdsize = sizeof(version_min_command); 767 vm->version = _file.minOSverson; 768 vm->sdk = _file.sdkVersion; 769 break; 770 } 771 if (_swap) 772 swapStruct(*vm); 773 lc += sizeof(version_min_command); 774 } 775 776 llvm::Error MachOFileLayout::writeLoadCommands() { 777 uint8_t *lc = &_buffer[_startOfLoadCommands]; 778 if (_file.fileType == llvm::MachO::MH_OBJECT) { 779 // Object files have one unnamed segment which holds all sections. 780 if (_is64) { 781 if (auto ec = writeSingleSegmentLoadCommand<MachO64Trait>(lc)) 782 return ec; 783 } else { 784 if (auto ec = writeSingleSegmentLoadCommand<MachO32Trait>(lc)) 785 return ec; 786 } 787 // Add LC_SYMTAB with symbol table info 788 symtab_command* st = reinterpret_cast<symtab_command*>(lc); 789 st->cmd = LC_SYMTAB; 790 st->cmdsize = sizeof(symtab_command); 791 st->symoff = _startOfSymbols; 792 st->nsyms = _file.localSymbols.size() + _file.globalSymbols.size() 793 + _file.undefinedSymbols.size(); 794 st->stroff = _startOfSymbolStrings; 795 st->strsize = _endOfSymbolStrings - _startOfSymbolStrings; 796 if (_swap) 797 swapStruct(*st); 798 lc += sizeof(symtab_command); 799 800 // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, 801 // LC_VERSION_MIN_WATCHOS, LC_VERSION_MIN_TVOS 802 writeVersionMinLoadCommand(_file, _swap, lc); 803 804 // Add LC_FUNCTION_STARTS if needed. 805 if (_functionStartsSize != 0) { 806 linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc); 807 dl->cmd = LC_FUNCTION_STARTS; 808 dl->cmdsize = sizeof(linkedit_data_command); 809 dl->dataoff = _startOfFunctionStarts; 810 dl->datasize = _functionStartsSize; 811 if (_swap) 812 swapStruct(*dl); 813 lc += sizeof(linkedit_data_command); 814 } 815 816 // Add LC_DATA_IN_CODE if requested. 817 if (_file.generateDataInCodeLoadCommand) { 818 linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc); 819 dl->cmd = LC_DATA_IN_CODE; 820 dl->cmdsize = sizeof(linkedit_data_command); 821 dl->dataoff = _startOfDataInCode; 822 dl->datasize = _dataInCodeSize; 823 if (_swap) 824 swapStruct(*dl); 825 lc += sizeof(linkedit_data_command); 826 } 827 } else { 828 // Final linked images have sections under segments. 829 if (_is64) { 830 if (auto ec = writeSegmentLoadCommands<MachO64Trait>(lc)) 831 return ec; 832 } else { 833 if (auto ec = writeSegmentLoadCommands<MachO32Trait>(lc)) 834 return ec; 835 } 836 837 // Add LC_ID_DYLIB command for dynamic libraries. 838 if (_file.fileType == llvm::MachO::MH_DYLIB) { 839 dylib_command *dc = reinterpret_cast<dylib_command*>(lc); 840 StringRef path = _file.installName; 841 uint32_t size = sizeof(dylib_command) + pointerAlign(path.size() + 1); 842 dc->cmd = LC_ID_DYLIB; 843 dc->cmdsize = size; 844 dc->dylib.name = sizeof(dylib_command); // offset 845 // needs to be some constant value different than the one in LC_LOAD_DYLIB 846 dc->dylib.timestamp = 1; 847 dc->dylib.current_version = _file.currentVersion; 848 dc->dylib.compatibility_version = _file.compatVersion; 849 if (_swap) 850 swapStruct(*dc); 851 memcpy(lc + sizeof(dylib_command), path.begin(), path.size()); 852 lc[sizeof(dylib_command) + path.size()] = '\0'; 853 lc += size; 854 } 855 856 // Add LC_DYLD_INFO_ONLY. 857 dyld_info_command* di = reinterpret_cast<dyld_info_command*>(lc); 858 di->cmd = LC_DYLD_INFO_ONLY; 859 di->cmdsize = sizeof(dyld_info_command); 860 di->rebase_off = _rebaseInfo.size() ? _startOfRebaseInfo : 0; 861 di->rebase_size = _rebaseInfo.size(); 862 di->bind_off = _bindingInfo.size() ? _startOfBindingInfo : 0; 863 di->bind_size = _bindingInfo.size(); 864 di->weak_bind_off = 0; 865 di->weak_bind_size = 0; 866 di->lazy_bind_off = _lazyBindingInfo.size() ? _startOfLazyBindingInfo : 0; 867 di->lazy_bind_size = _lazyBindingInfo.size(); 868 di->export_off = _exportTrie.size() ? _startOfExportTrie : 0; 869 di->export_size = _exportTrie.size(); 870 if (_swap) 871 swapStruct(*di); 872 lc += sizeof(dyld_info_command); 873 874 // Add LC_SYMTAB with symbol table info. 875 symtab_command* st = reinterpret_cast<symtab_command*>(lc); 876 st->cmd = LC_SYMTAB; 877 st->cmdsize = sizeof(symtab_command); 878 st->symoff = _startOfSymbols; 879 st->nsyms = _file.localSymbols.size() + _file.globalSymbols.size() 880 + _file.undefinedSymbols.size(); 881 st->stroff = _startOfSymbolStrings; 882 st->strsize = _endOfSymbolStrings - _startOfSymbolStrings; 883 if (_swap) 884 swapStruct(*st); 885 lc += sizeof(symtab_command); 886 887 // Add LC_DYSYMTAB 888 if (_file.fileType != llvm::MachO::MH_PRELOAD) { 889 dysymtab_command* dst = reinterpret_cast<dysymtab_command*>(lc); 890 dst->cmd = LC_DYSYMTAB; 891 dst->cmdsize = sizeof(dysymtab_command); 892 dst->ilocalsym = _symbolTableLocalsStartIndex; 893 dst->nlocalsym = _file.localSymbols.size(); 894 dst->iextdefsym = _symbolTableGlobalsStartIndex; 895 dst->nextdefsym = _file.globalSymbols.size(); 896 dst->iundefsym = _symbolTableUndefinesStartIndex; 897 dst->nundefsym = _file.undefinedSymbols.size(); 898 dst->tocoff = 0; 899 dst->ntoc = 0; 900 dst->modtaboff = 0; 901 dst->nmodtab = 0; 902 dst->extrefsymoff = 0; 903 dst->nextrefsyms = 0; 904 dst->indirectsymoff = _startOfIndirectSymbols; 905 dst->nindirectsyms = _indirectSymbolTableCount; 906 dst->extreloff = 0; 907 dst->nextrel = 0; 908 dst->locreloff = 0; 909 dst->nlocrel = 0; 910 if (_swap) 911 swapStruct(*dst); 912 lc += sizeof(dysymtab_command); 913 } 914 915 // If main executable, add LC_LOAD_DYLINKER 916 if (_file.fileType == llvm::MachO::MH_EXECUTE) { 917 // Build LC_LOAD_DYLINKER load command. 918 uint32_t size=pointerAlign(sizeof(dylinker_command)+dyldPath().size()+1); 919 dylinker_command* dl = reinterpret_cast<dylinker_command*>(lc); 920 dl->cmd = LC_LOAD_DYLINKER; 921 dl->cmdsize = size; 922 dl->name = sizeof(dylinker_command); // offset 923 if (_swap) 924 swapStruct(*dl); 925 memcpy(lc+sizeof(dylinker_command), dyldPath().data(), dyldPath().size()); 926 lc[sizeof(dylinker_command)+dyldPath().size()] = '\0'; 927 lc += size; 928 } 929 930 // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_WATCHOS, 931 // LC_VERSION_MIN_TVOS 932 writeVersionMinLoadCommand(_file, _swap, lc); 933 934 // Add LC_SOURCE_VERSION 935 { 936 // Note, using a temporary here to appease UB as we may not be aligned 937 // enough for a struct containing a uint64_t when emitting a 32-bit binary 938 source_version_command sv; 939 sv.cmd = LC_SOURCE_VERSION; 940 sv.cmdsize = sizeof(source_version_command); 941 sv.version = _file.sourceVersion; 942 if (_swap) 943 swapStruct(sv); 944 memcpy(lc, &sv, sizeof(source_version_command)); 945 lc += sizeof(source_version_command); 946 } 947 948 // If main executable, add LC_MAIN. 949 if (_file.fileType == llvm::MachO::MH_EXECUTE) { 950 // Build LC_MAIN load command. 951 // Note, using a temporary here to appease UB as we may not be aligned 952 // enough for a struct containing a uint64_t when emitting a 32-bit binary 953 entry_point_command ep; 954 ep.cmd = LC_MAIN; 955 ep.cmdsize = sizeof(entry_point_command); 956 ep.entryoff = _file.entryAddress - _seg1addr; 957 ep.stacksize = _file.stackSize; 958 if (_swap) 959 swapStruct(ep); 960 memcpy(lc, &ep, sizeof(entry_point_command)); 961 lc += sizeof(entry_point_command); 962 } 963 964 // Add LC_LOAD_DYLIB commands 965 for (const DependentDylib &dep : _file.dependentDylibs) { 966 dylib_command* dc = reinterpret_cast<dylib_command*>(lc); 967 uint32_t size = sizeof(dylib_command) + pointerAlign(dep.path.size()+1); 968 dc->cmd = dep.kind; 969 dc->cmdsize = size; 970 dc->dylib.name = sizeof(dylib_command); // offset 971 // needs to be some constant value different than the one in LC_ID_DYLIB 972 dc->dylib.timestamp = 2; 973 dc->dylib.current_version = dep.currentVersion; 974 dc->dylib.compatibility_version = dep.compatVersion; 975 if (_swap) 976 swapStruct(*dc); 977 memcpy(lc+sizeof(dylib_command), dep.path.begin(), dep.path.size()); 978 lc[sizeof(dylib_command)+dep.path.size()] = '\0'; 979 lc += size; 980 } 981 982 // Add LC_RPATH 983 for (const StringRef &path : _file.rpaths) { 984 rpath_command *rpc = reinterpret_cast<rpath_command *>(lc); 985 uint32_t size = pointerAlign(sizeof(rpath_command) + path.size() + 1); 986 rpc->cmd = LC_RPATH; 987 rpc->cmdsize = size; 988 rpc->path = sizeof(rpath_command); // offset 989 if (_swap) 990 swapStruct(*rpc); 991 memcpy(lc+sizeof(rpath_command), path.begin(), path.size()); 992 lc[sizeof(rpath_command)+path.size()] = '\0'; 993 lc += size; 994 } 995 996 // Add LC_FUNCTION_STARTS if needed. 997 if (_functionStartsSize != 0) { 998 linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc); 999 dl->cmd = LC_FUNCTION_STARTS; 1000 dl->cmdsize = sizeof(linkedit_data_command); 1001 dl->dataoff = _startOfFunctionStarts; 1002 dl->datasize = _functionStartsSize; 1003 if (_swap) 1004 swapStruct(*dl); 1005 lc += sizeof(linkedit_data_command); 1006 } 1007 1008 // Add LC_DATA_IN_CODE if requested. 1009 if (_file.generateDataInCodeLoadCommand) { 1010 linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc); 1011 dl->cmd = LC_DATA_IN_CODE; 1012 dl->cmdsize = sizeof(linkedit_data_command); 1013 dl->dataoff = _startOfDataInCode; 1014 dl->datasize = _dataInCodeSize; 1015 if (_swap) 1016 swapStruct(*dl); 1017 lc += sizeof(linkedit_data_command); 1018 } 1019 } 1020 return llvm::Error(); 1021 } 1022 1023 void MachOFileLayout::writeSectionContent() { 1024 for (const Section &s : _file.sections) { 1025 // Copy all section content to output buffer. 1026 if (isZeroFillSection(s.type)) 1027 continue; 1028 if (s.content.empty()) 1029 continue; 1030 uint32_t offset = _sectInfo[&s].fileOffset; 1031 uint8_t *p = &_buffer[offset]; 1032 memcpy(p, &s.content[0], s.content.size()); 1033 p += s.content.size(); 1034 } 1035 } 1036 1037 void MachOFileLayout::writeRelocations() { 1038 uint32_t relOffset = _startOfRelocations; 1039 for (Section sect : _file.sections) { 1040 for (Relocation r : sect.relocations) { 1041 any_relocation_info* rb = reinterpret_cast<any_relocation_info*>( 1042 &_buffer[relOffset]); 1043 *rb = packRelocation(r, _swap, _bigEndianArch); 1044 relOffset += sizeof(any_relocation_info); 1045 } 1046 } 1047 } 1048 1049 void MachOFileLayout::appendSymbols(const std::vector<Symbol> &symbols, 1050 uint32_t &symOffset, uint32_t &strOffset) { 1051 for (const Symbol &sym : symbols) { 1052 if (_is64) { 1053 nlist_64* nb = reinterpret_cast<nlist_64*>(&_buffer[symOffset]); 1054 nb->n_strx = strOffset - _startOfSymbolStrings; 1055 nb->n_type = sym.type | sym.scope; 1056 nb->n_sect = sym.sect; 1057 nb->n_desc = sym.desc; 1058 nb->n_value = sym.value; 1059 if (_swap) 1060 swapStruct(*nb); 1061 symOffset += sizeof(nlist_64); 1062 } else { 1063 nlist* nb = reinterpret_cast<nlist*>(&_buffer[symOffset]); 1064 nb->n_strx = strOffset - _startOfSymbolStrings; 1065 nb->n_type = sym.type | sym.scope; 1066 nb->n_sect = sym.sect; 1067 nb->n_desc = sym.desc; 1068 nb->n_value = sym.value; 1069 if (_swap) 1070 swapStruct(*nb); 1071 symOffset += sizeof(nlist); 1072 } 1073 memcpy(&_buffer[strOffset], sym.name.begin(), sym.name.size()); 1074 strOffset += sym.name.size(); 1075 _buffer[strOffset++] ='\0'; // Strings in table have nul terminator. 1076 } 1077 } 1078 1079 void MachOFileLayout::writeFunctionStartsInfo() { 1080 if (!_functionStartsSize) 1081 return; 1082 memcpy(&_buffer[_startOfFunctionStarts], _file.functionStarts.data(), 1083 _functionStartsSize); 1084 } 1085 1086 void MachOFileLayout::writeDataInCodeInfo() { 1087 uint32_t offset = _startOfDataInCode; 1088 for (const DataInCode &entry : _file.dataInCode) { 1089 data_in_code_entry *dst = reinterpret_cast<data_in_code_entry*>( 1090 &_buffer[offset]); 1091 dst->offset = entry.offset; 1092 dst->length = entry.length; 1093 dst->kind = entry.kind; 1094 if (_swap) 1095 swapStruct(*dst); 1096 offset += sizeof(data_in_code_entry); 1097 } 1098 } 1099 1100 void MachOFileLayout::writeSymbolTable() { 1101 // Write symbol table and symbol strings in parallel. 1102 uint32_t symOffset = _startOfSymbols; 1103 uint32_t strOffset = _startOfSymbolStrings; 1104 _buffer[strOffset++] = '\0'; // Reserve n_strx offset of zero to mean no name. 1105 appendSymbols(_file.localSymbols, symOffset, strOffset); 1106 appendSymbols(_file.globalSymbols, symOffset, strOffset); 1107 appendSymbols(_file.undefinedSymbols, symOffset, strOffset); 1108 // Write indirect symbol table array. 1109 uint32_t *indirects = reinterpret_cast<uint32_t*> 1110 (&_buffer[_startOfIndirectSymbols]); 1111 if (_file.fileType == llvm::MachO::MH_OBJECT) { 1112 // Object files have sections in same order as input normalized file. 1113 for (const Section §ion : _file.sections) { 1114 for (uint32_t index : section.indirectSymbols) { 1115 if (_swap) 1116 *indirects++ = llvm::sys::getSwappedBytes(index); 1117 else 1118 *indirects++ = index; 1119 } 1120 } 1121 } else { 1122 // Final linked images must sort sections from normalized file. 1123 for (const Segment &seg : _file.segments) { 1124 SegExtraInfo &segInfo = _segInfo[&seg]; 1125 for (const Section *section : segInfo.sections) { 1126 for (uint32_t index : section->indirectSymbols) { 1127 if (_swap) 1128 *indirects++ = llvm::sys::getSwappedBytes(index); 1129 else 1130 *indirects++ = index; 1131 } 1132 } 1133 } 1134 } 1135 } 1136 1137 void MachOFileLayout::writeRebaseInfo() { 1138 memcpy(&_buffer[_startOfRebaseInfo], _rebaseInfo.bytes(), _rebaseInfo.size()); 1139 } 1140 1141 void MachOFileLayout::writeBindingInfo() { 1142 memcpy(&_buffer[_startOfBindingInfo], 1143 _bindingInfo.bytes(), _bindingInfo.size()); 1144 } 1145 1146 void MachOFileLayout::writeLazyBindingInfo() { 1147 memcpy(&_buffer[_startOfLazyBindingInfo], 1148 _lazyBindingInfo.bytes(), _lazyBindingInfo.size()); 1149 } 1150 1151 void MachOFileLayout::writeExportInfo() { 1152 memcpy(&_buffer[_startOfExportTrie], _exportTrie.bytes(), _exportTrie.size()); 1153 } 1154 1155 void MachOFileLayout::buildLinkEditInfo() { 1156 buildRebaseInfo(); 1157 buildBindInfo(); 1158 buildLazyBindInfo(); 1159 buildExportTrie(); 1160 computeSymbolTableSizes(); 1161 computeFunctionStartsSize(); 1162 computeDataInCodeSize(); 1163 } 1164 1165 void MachOFileLayout::buildSectionRelocations() { 1166 1167 } 1168 1169 void MachOFileLayout::buildRebaseInfo() { 1170 // TODO: compress rebasing info. 1171 for (const RebaseLocation& entry : _file.rebasingInfo) { 1172 _rebaseInfo.append_byte(REBASE_OPCODE_SET_TYPE_IMM | entry.kind); 1173 _rebaseInfo.append_byte(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB 1174 | entry.segIndex); 1175 _rebaseInfo.append_uleb128(entry.segOffset); 1176 _rebaseInfo.append_uleb128(REBASE_OPCODE_DO_REBASE_IMM_TIMES | 1); 1177 } 1178 _rebaseInfo.append_byte(REBASE_OPCODE_DONE); 1179 _rebaseInfo.align(_is64 ? 8 : 4); 1180 } 1181 1182 void MachOFileLayout::buildBindInfo() { 1183 // TODO: compress bind info. 1184 uint64_t lastAddend = 0; 1185 for (const BindLocation& entry : _file.bindingInfo) { 1186 _bindingInfo.append_byte(BIND_OPCODE_SET_TYPE_IMM | entry.kind); 1187 _bindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB 1188 | entry.segIndex); 1189 _bindingInfo.append_uleb128(entry.segOffset); 1190 if (entry.ordinal > 0) 1191 _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1192 (entry.ordinal & 0xF)); 1193 else 1194 _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | 1195 (entry.ordinal & 0xF)); 1196 _bindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); 1197 _bindingInfo.append_string(entry.symbolName); 1198 if (entry.addend != lastAddend) { 1199 _bindingInfo.append_byte(BIND_OPCODE_SET_ADDEND_SLEB); 1200 _bindingInfo.append_sleb128(entry.addend); 1201 lastAddend = entry.addend; 1202 } 1203 _bindingInfo.append_byte(BIND_OPCODE_DO_BIND); 1204 } 1205 _bindingInfo.append_byte(BIND_OPCODE_DONE); 1206 _bindingInfo.align(_is64 ? 8 : 4); 1207 } 1208 1209 void MachOFileLayout::buildLazyBindInfo() { 1210 for (const BindLocation& entry : _file.lazyBindingInfo) { 1211 _lazyBindingInfo.append_byte(BIND_OPCODE_SET_TYPE_IMM | entry.kind); 1212 _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB 1213 | entry.segIndex); 1214 _lazyBindingInfo.append_uleb128Fixed(entry.segOffset, 5); 1215 if (entry.ordinal > 0) 1216 _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1217 (entry.ordinal & 0xF)); 1218 else 1219 _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | 1220 (entry.ordinal & 0xF)); 1221 _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); 1222 _lazyBindingInfo.append_string(entry.symbolName); 1223 _lazyBindingInfo.append_byte(BIND_OPCODE_DO_BIND); 1224 _lazyBindingInfo.append_byte(BIND_OPCODE_DONE); 1225 } 1226 _lazyBindingInfo.append_byte(BIND_OPCODE_DONE); 1227 _lazyBindingInfo.align(_is64 ? 8 : 4); 1228 } 1229 1230 void TrieNode::addSymbol(const Export& entry, 1231 BumpPtrAllocator &allocator, 1232 std::vector<TrieNode*> &allNodes) { 1233 StringRef partialStr = entry.name.drop_front(_cummulativeString.size()); 1234 for (TrieEdge &edge : _children) { 1235 StringRef edgeStr = edge._subString; 1236 if (partialStr.startswith(edgeStr)) { 1237 // Already have matching edge, go down that path. 1238 edge._child->addSymbol(entry, allocator, allNodes); 1239 return; 1240 } 1241 // See if string has commmon prefix with existing edge. 1242 for (int n=edgeStr.size()-1; n > 0; --n) { 1243 if (partialStr.substr(0, n).equals(edgeStr.substr(0, n))) { 1244 // Splice in new node: was A -> C, now A -> B -> C 1245 StringRef bNodeStr = edge._child->_cummulativeString; 1246 bNodeStr = bNodeStr.drop_back(edgeStr.size()-n).copy(allocator); 1247 auto *bNode = new (allocator) TrieNode(bNodeStr); 1248 allNodes.push_back(bNode); 1249 TrieNode* cNode = edge._child; 1250 StringRef abEdgeStr = edgeStr.substr(0,n).copy(allocator); 1251 StringRef bcEdgeStr = edgeStr.substr(n).copy(allocator); 1252 DEBUG_WITH_TYPE("trie-builder", llvm::dbgs() 1253 << "splice in TrieNode('" << bNodeStr 1254 << "') between edge '" 1255 << abEdgeStr << "' and edge='" 1256 << bcEdgeStr<< "'\n"); 1257 TrieEdge& abEdge = edge; 1258 abEdge._subString = abEdgeStr; 1259 abEdge._child = bNode; 1260 auto *bcEdge = new (allocator) TrieEdge(bcEdgeStr, cNode); 1261 bNode->_children.insert(bNode->_children.end(), bcEdge); 1262 bNode->addSymbol(entry, allocator, allNodes); 1263 return; 1264 } 1265 } 1266 } 1267 if (entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { 1268 assert(entry.otherOffset != 0); 1269 } 1270 if (entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) { 1271 assert(entry.otherOffset != 0); 1272 } 1273 // No commonality with any existing child, make a new edge. 1274 auto *newNode = new (allocator) TrieNode(entry.name.copy(allocator)); 1275 auto *newEdge = new (allocator) TrieEdge(partialStr, newNode); 1276 _children.insert(_children.end(), newEdge); 1277 DEBUG_WITH_TYPE("trie-builder", llvm::dbgs() 1278 << "new TrieNode('" << entry.name << "') with edge '" 1279 << partialStr << "' from node='" 1280 << _cummulativeString << "'\n"); 1281 newNode->_address = entry.offset; 1282 newNode->_flags = entry.flags | entry.kind; 1283 newNode->_other = entry.otherOffset; 1284 if ((entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) && !entry.otherName.empty()) 1285 newNode->_importedName = entry.otherName.copy(allocator); 1286 newNode->_hasExportInfo = true; 1287 allNodes.push_back(newNode); 1288 } 1289 1290 bool TrieNode::updateOffset(uint32_t& offset) { 1291 uint32_t nodeSize = 1; // Length when no export info 1292 if (_hasExportInfo) { 1293 if (_flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { 1294 nodeSize = llvm::getULEB128Size(_flags); 1295 nodeSize += llvm::getULEB128Size(_other); // Other contains ordinal. 1296 nodeSize += _importedName.size(); 1297 ++nodeSize; // Trailing zero in imported name. 1298 } else { 1299 nodeSize = llvm::getULEB128Size(_flags) + llvm::getULEB128Size(_address); 1300 if (_flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) 1301 nodeSize += llvm::getULEB128Size(_other); 1302 } 1303 // Overall node size so far is uleb128 of export info + actual export info. 1304 nodeSize += llvm::getULEB128Size(nodeSize); 1305 } 1306 // Compute size of all child edges. 1307 ++nodeSize; // Byte for number of chidren. 1308 for (TrieEdge &edge : _children) { 1309 nodeSize += edge._subString.size() + 1 // String length. 1310 + llvm::getULEB128Size(edge._child->_trieOffset); // Offset len. 1311 } 1312 // On input, 'offset' is new prefered location for this node. 1313 bool result = (_trieOffset != offset); 1314 // Store new location in node object for use by parents. 1315 _trieOffset = offset; 1316 // Update offset for next iteration. 1317 offset += nodeSize; 1318 // Return true if _trieOffset was changed. 1319 return result; 1320 } 1321 1322 void TrieNode::appendToByteBuffer(ByteBuffer &out) { 1323 if (_hasExportInfo) { 1324 if (_flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { 1325 if (!_importedName.empty()) { 1326 // nodes with re-export info: size, flags, ordinal, import-name 1327 uint32_t nodeSize = llvm::getULEB128Size(_flags) 1328 + llvm::getULEB128Size(_other) 1329 + _importedName.size() + 1; 1330 assert(nodeSize < 256); 1331 out.append_byte(nodeSize); 1332 out.append_uleb128(_flags); 1333 out.append_uleb128(_other); 1334 out.append_string(_importedName); 1335 } else { 1336 // nodes without re-export info: size, flags, ordinal, empty-string 1337 uint32_t nodeSize = llvm::getULEB128Size(_flags) 1338 + llvm::getULEB128Size(_other) + 1; 1339 assert(nodeSize < 256); 1340 out.append_byte(nodeSize); 1341 out.append_uleb128(_flags); 1342 out.append_uleb128(_other); 1343 out.append_byte(0); 1344 } 1345 } else if ( _flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) { 1346 // Nodes with export info: size, flags, address, other 1347 uint32_t nodeSize = llvm::getULEB128Size(_flags) 1348 + llvm::getULEB128Size(_address) 1349 + llvm::getULEB128Size(_other); 1350 assert(nodeSize < 256); 1351 out.append_byte(nodeSize); 1352 out.append_uleb128(_flags); 1353 out.append_uleb128(_address); 1354 out.append_uleb128(_other); 1355 } else { 1356 // Nodes with export info: size, flags, address 1357 uint32_t nodeSize = llvm::getULEB128Size(_flags) 1358 + llvm::getULEB128Size(_address); 1359 assert(nodeSize < 256); 1360 out.append_byte(nodeSize); 1361 out.append_uleb128(_flags); 1362 out.append_uleb128(_address); 1363 } 1364 } else { 1365 // Node with no export info. 1366 uint32_t nodeSize = 0; 1367 out.append_byte(nodeSize); 1368 } 1369 // Add number of children. 1370 assert(_children.size() < 256); 1371 out.append_byte(_children.size()); 1372 // Append each child edge substring and node offset. 1373 for (TrieEdge &edge : _children) { 1374 out.append_string(edge._subString); 1375 out.append_uleb128(edge._child->_trieOffset); 1376 } 1377 } 1378 1379 void MachOFileLayout::buildExportTrie() { 1380 if (_file.exportInfo.empty()) 1381 return; 1382 1383 // For all temporary strings and objects used building trie. 1384 BumpPtrAllocator allocator; 1385 1386 // Build trie of all exported symbols. 1387 auto *rootNode = new (allocator) TrieNode(StringRef()); 1388 std::vector<TrieNode*> allNodes; 1389 allNodes.reserve(_file.exportInfo.size()*2); 1390 allNodes.push_back(rootNode); 1391 for (const Export& entry : _file.exportInfo) { 1392 rootNode->addSymbol(entry, allocator, allNodes); 1393 } 1394 1395 // Assign each node in the vector an offset in the trie stream, iterating 1396 // until all uleb128 sizes have stabilized. 1397 bool more; 1398 do { 1399 uint32_t offset = 0; 1400 more = false; 1401 for (TrieNode* node : allNodes) { 1402 if (node->updateOffset(offset)) 1403 more = true; 1404 } 1405 } while (more); 1406 1407 // Serialize trie to ByteBuffer. 1408 for (TrieNode* node : allNodes) { 1409 node->appendToByteBuffer(_exportTrie); 1410 } 1411 _exportTrie.align(_is64 ? 8 : 4); 1412 } 1413 1414 void MachOFileLayout::computeSymbolTableSizes() { 1415 // MachO symbol tables have three ranges: locals, globals, and undefines 1416 const size_t nlistSize = (_is64 ? sizeof(nlist_64) : sizeof(nlist)); 1417 _symbolTableSize = nlistSize * (_file.localSymbols.size() 1418 + _file.globalSymbols.size() 1419 + _file.undefinedSymbols.size()); 1420 _symbolStringPoolSize = 1; // Always reserve 1-byte for the empty string. 1421 for (const Symbol &sym : _file.localSymbols) { 1422 _symbolStringPoolSize += (sym.name.size()+1); 1423 } 1424 for (const Symbol &sym : _file.globalSymbols) { 1425 _symbolStringPoolSize += (sym.name.size()+1); 1426 } 1427 for (const Symbol &sym : _file.undefinedSymbols) { 1428 _symbolStringPoolSize += (sym.name.size()+1); 1429 } 1430 _symbolTableLocalsStartIndex = 0; 1431 _symbolTableGlobalsStartIndex = _file.localSymbols.size(); 1432 _symbolTableUndefinesStartIndex = _symbolTableGlobalsStartIndex 1433 + _file.globalSymbols.size(); 1434 1435 _indirectSymbolTableCount = 0; 1436 for (const Section § : _file.sections) { 1437 _indirectSymbolTableCount += sect.indirectSymbols.size(); 1438 } 1439 } 1440 1441 void MachOFileLayout::computeFunctionStartsSize() { 1442 _functionStartsSize = _file.functionStarts.size(); 1443 } 1444 1445 void MachOFileLayout::computeDataInCodeSize() { 1446 _dataInCodeSize = _file.dataInCode.size() * sizeof(data_in_code_entry); 1447 } 1448 1449 void MachOFileLayout::writeLinkEditContent() { 1450 if (_file.fileType == llvm::MachO::MH_OBJECT) { 1451 writeRelocations(); 1452 writeFunctionStartsInfo(); 1453 writeDataInCodeInfo(); 1454 writeSymbolTable(); 1455 } else { 1456 writeRebaseInfo(); 1457 writeBindingInfo(); 1458 writeLazyBindingInfo(); 1459 // TODO: add weak binding info 1460 writeExportInfo(); 1461 writeFunctionStartsInfo(); 1462 writeDataInCodeInfo(); 1463 writeSymbolTable(); 1464 } 1465 } 1466 1467 llvm::Error MachOFileLayout::writeBinary(StringRef path) { 1468 // Check for pending error from constructor. 1469 if (_ec) 1470 return llvm::errorCodeToError(_ec); 1471 // Create FileOutputBuffer with calculated size. 1472 unsigned flags = 0; 1473 if (_file.fileType != llvm::MachO::MH_OBJECT) 1474 flags = llvm::FileOutputBuffer::F_executable; 1475 ErrorOr<std::unique_ptr<llvm::FileOutputBuffer>> fobOrErr = 1476 llvm::FileOutputBuffer::create(path, size(), flags); 1477 if (std::error_code ec = fobOrErr.getError()) 1478 return llvm::errorCodeToError(ec); 1479 std::unique_ptr<llvm::FileOutputBuffer> &fob = *fobOrErr; 1480 // Write content. 1481 _buffer = fob->getBufferStart(); 1482 writeMachHeader(); 1483 if (auto ec = writeLoadCommands()) 1484 return ec; 1485 writeSectionContent(); 1486 writeLinkEditContent(); 1487 fob->commit(); 1488 1489 return llvm::Error(); 1490 } 1491 1492 /// Takes in-memory normalized view and writes a mach-o object file. 1493 llvm::Error writeBinary(const NormalizedFile &file, StringRef path) { 1494 MachOFileLayout layout(file); 1495 return layout.writeBinary(path); 1496 } 1497 1498 } // namespace normalized 1499 } // namespace mach_o 1500 } // namespace lld 1501