1 //===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp ---------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 /// 11 /// \file For mach-o object files, this implementation converts normalized 12 /// mach-o in memory to mach-o binary on disk. 13 /// 14 /// +---------------+ 15 /// | binary mach-o | 16 /// +---------------+ 17 /// ^ 18 /// | 19 /// | 20 /// +------------+ 21 /// | normalized | 22 /// +------------+ 23 24 #include "MachONormalizedFile.h" 25 #include "MachONormalizedFileBinaryUtils.h" 26 #include "lld/Common/LLVM.h" 27 #include "lld/Core/Error.h" 28 #include "llvm/ADT/SmallString.h" 29 #include "llvm/ADT/SmallVector.h" 30 #include "llvm/ADT/StringRef.h" 31 #include "llvm/ADT/ilist.h" 32 #include "llvm/ADT/ilist_node.h" 33 #include "llvm/BinaryFormat/MachO.h" 34 #include "llvm/Support/Casting.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/Errc.h" 37 #include "llvm/Support/ErrorHandling.h" 38 #include "llvm/Support/FileOutputBuffer.h" 39 #include "llvm/Support/Format.h" 40 #include "llvm/Support/Host.h" 41 #include "llvm/Support/MemoryBuffer.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include <functional> 44 #include <list> 45 #include <map> 46 #include <system_error> 47 48 using namespace llvm::MachO; 49 50 namespace lld { 51 namespace mach_o { 52 namespace normalized { 53 54 struct TrieNode; // Forward declaration. 55 56 struct TrieEdge : public llvm::ilist_node<TrieEdge> { 57 TrieEdge(StringRef s, TrieNode *node) : _subString(s), _child(node) {} 58 59 StringRef _subString; 60 struct TrieNode *_child; 61 }; 62 63 } // namespace normalized 64 } // namespace mach_o 65 } // namespace lld 66 67 68 namespace llvm { 69 using lld::mach_o::normalized::TrieEdge; 70 template <> 71 struct ilist_alloc_traits<TrieEdge> : ilist_noalloc_traits<TrieEdge> {}; 72 } // namespace llvm 73 74 75 namespace lld { 76 namespace mach_o { 77 namespace normalized { 78 79 struct TrieNode { 80 typedef llvm::ilist<TrieEdge> TrieEdgeList; 81 82 TrieNode(StringRef s) 83 : _cummulativeString(s), _address(0), _flags(0), _other(0), 84 _trieOffset(0), _hasExportInfo(false) {} 85 ~TrieNode() = default; 86 87 void addSymbol(const Export &entry, BumpPtrAllocator &allocator, 88 std::vector<TrieNode *> &allNodes); 89 90 void addOrderedNodes(const Export &entry, 91 std::vector<TrieNode *> &allNodes); 92 bool updateOffset(uint32_t &offset); 93 void appendToByteBuffer(ByteBuffer &out); 94 95 private: 96 StringRef _cummulativeString; 97 TrieEdgeList _children; 98 uint64_t _address; 99 uint64_t _flags; 100 uint64_t _other; 101 StringRef _importedName; 102 uint32_t _trieOffset; 103 bool _hasExportInfo; 104 bool _ordered = false; 105 }; 106 107 /// Utility class for writing a mach-o binary file given an in-memory 108 /// normalized file. 109 class MachOFileLayout { 110 public: 111 /// All layout computation is done in the constructor. 112 MachOFileLayout(const NormalizedFile &file); 113 114 /// Returns the final file size as computed in the constructor. 115 size_t size() const; 116 117 // Returns size of the mach_header and load commands. 118 size_t headerAndLoadCommandsSize() const; 119 120 /// Writes the normalized file as a binary mach-o file to the specified 121 /// path. This does not have a stream interface because the generated 122 /// file may need the 'x' bit set. 123 llvm::Error writeBinary(StringRef path); 124 125 private: 126 uint32_t loadCommandsSize(uint32_t &count); 127 void buildFileOffsets(); 128 void writeMachHeader(); 129 llvm::Error writeLoadCommands(); 130 void writeSectionContent(); 131 void writeRelocations(); 132 void writeSymbolTable(); 133 void writeRebaseInfo(); 134 void writeBindingInfo(); 135 void writeLazyBindingInfo(); 136 void writeExportInfo(); 137 void writeFunctionStartsInfo(); 138 void writeDataInCodeInfo(); 139 void writeLinkEditContent(); 140 void buildLinkEditInfo(); 141 void buildRebaseInfo(); 142 void buildBindInfo(); 143 void buildLazyBindInfo(); 144 void buildExportTrie(); 145 void computeFunctionStartsSize(); 146 void computeDataInCodeSize(); 147 void computeSymbolTableSizes(); 148 void buildSectionRelocations(); 149 void appendSymbols(const std::vector<Symbol> &symbols, 150 uint32_t &symOffset, uint32_t &strOffset); 151 uint32_t indirectSymbolIndex(const Section §, uint32_t &index); 152 uint32_t indirectSymbolElementSize(const Section §); 153 154 // For use as template parameter to load command methods. 155 struct MachO64Trait { 156 typedef llvm::MachO::segment_command_64 command; 157 typedef llvm::MachO::section_64 section; 158 enum { LC = llvm::MachO::LC_SEGMENT_64 }; 159 }; 160 161 // For use as template parameter to load command methods. 162 struct MachO32Trait { 163 typedef llvm::MachO::segment_command command; 164 typedef llvm::MachO::section section; 165 enum { LC = llvm::MachO::LC_SEGMENT }; 166 }; 167 168 template <typename T> 169 llvm::Error writeSingleSegmentLoadCommand(uint8_t *&lc); 170 template <typename T> llvm::Error writeSegmentLoadCommands(uint8_t *&lc); 171 172 uint32_t pointerAlign(uint32_t value); 173 static StringRef dyldPath(); 174 175 struct SegExtraInfo { 176 uint32_t fileOffset; 177 uint32_t fileSize; 178 std::vector<const Section*> sections; 179 }; 180 typedef std::map<const Segment*, SegExtraInfo> SegMap; 181 struct SectionExtraInfo { 182 uint32_t fileOffset; 183 }; 184 typedef std::map<const Section*, SectionExtraInfo> SectionMap; 185 186 const NormalizedFile &_file; 187 std::error_code _ec; 188 uint8_t *_buffer; 189 const bool _is64; 190 const bool _swap; 191 const bool _bigEndianArch; 192 uint64_t _seg1addr; 193 uint32_t _startOfLoadCommands; 194 uint32_t _countOfLoadCommands; 195 uint32_t _endOfLoadCommands; 196 uint32_t _startOfRelocations; 197 uint32_t _startOfFunctionStarts; 198 uint32_t _startOfDataInCode; 199 uint32_t _startOfSymbols; 200 uint32_t _startOfIndirectSymbols; 201 uint32_t _startOfSymbolStrings; 202 uint32_t _endOfSymbolStrings; 203 uint32_t _symbolTableLocalsStartIndex; 204 uint32_t _symbolTableGlobalsStartIndex; 205 uint32_t _symbolTableUndefinesStartIndex; 206 uint32_t _symbolStringPoolSize; 207 uint32_t _symbolTableSize; 208 uint32_t _functionStartsSize; 209 uint32_t _dataInCodeSize; 210 uint32_t _indirectSymbolTableCount; 211 // Used in object file creation only 212 uint32_t _startOfSectionsContent; 213 uint32_t _endOfSectionsContent; 214 // Used in final linked image only 215 uint32_t _startOfLinkEdit; 216 uint32_t _startOfRebaseInfo; 217 uint32_t _endOfRebaseInfo; 218 uint32_t _startOfBindingInfo; 219 uint32_t _endOfBindingInfo; 220 uint32_t _startOfLazyBindingInfo; 221 uint32_t _endOfLazyBindingInfo; 222 uint32_t _startOfExportTrie; 223 uint32_t _endOfExportTrie; 224 uint32_t _endOfLinkEdit; 225 uint64_t _addressOfLinkEdit; 226 SegMap _segInfo; 227 SectionMap _sectInfo; 228 ByteBuffer _rebaseInfo; 229 ByteBuffer _bindingInfo; 230 ByteBuffer _lazyBindingInfo; 231 ByteBuffer _weakBindingInfo; 232 ByteBuffer _exportTrie; 233 }; 234 235 size_t headerAndLoadCommandsSize(const NormalizedFile &file) { 236 MachOFileLayout layout(file); 237 return layout.headerAndLoadCommandsSize(); 238 } 239 240 StringRef MachOFileLayout::dyldPath() { 241 return "/usr/lib/dyld"; 242 } 243 244 uint32_t MachOFileLayout::pointerAlign(uint32_t value) { 245 return llvm::alignTo(value, _is64 ? 8 : 4); 246 } 247 248 249 size_t MachOFileLayout::headerAndLoadCommandsSize() const { 250 return _endOfLoadCommands; 251 } 252 253 MachOFileLayout::MachOFileLayout(const NormalizedFile &file) 254 : _file(file), 255 _is64(MachOLinkingContext::is64Bit(file.arch)), 256 _swap(!MachOLinkingContext::isHostEndian(file.arch)), 257 _bigEndianArch(MachOLinkingContext::isBigEndian(file.arch)), 258 _seg1addr(INT64_MAX) { 259 _startOfLoadCommands = _is64 ? sizeof(mach_header_64) : sizeof(mach_header); 260 const size_t segCommandBaseSize = 261 (_is64 ? sizeof(segment_command_64) : sizeof(segment_command)); 262 const size_t sectsSize = (_is64 ? sizeof(section_64) : sizeof(section)); 263 if (file.fileType == llvm::MachO::MH_OBJECT) { 264 // object files have just one segment load command containing all sections 265 _endOfLoadCommands = _startOfLoadCommands 266 + segCommandBaseSize 267 + file.sections.size() * sectsSize 268 + sizeof(symtab_command); 269 _countOfLoadCommands = 2; 270 if (file.hasMinVersionLoadCommand) { 271 _endOfLoadCommands += sizeof(version_min_command); 272 _countOfLoadCommands++; 273 } 274 if (!_file.functionStarts.empty()) { 275 _endOfLoadCommands += sizeof(linkedit_data_command); 276 _countOfLoadCommands++; 277 } 278 if (_file.generateDataInCodeLoadCommand) { 279 _endOfLoadCommands += sizeof(linkedit_data_command); 280 _countOfLoadCommands++; 281 } 282 // Assign file offsets to each section. 283 _startOfSectionsContent = _endOfLoadCommands; 284 unsigned relocCount = 0; 285 uint64_t offset = _startOfSectionsContent; 286 for (const Section § : file.sections) { 287 if (isZeroFillSection(sect.type)) 288 _sectInfo[§].fileOffset = 0; 289 else { 290 offset = llvm::alignTo(offset, sect.alignment); 291 _sectInfo[§].fileOffset = offset; 292 offset += sect.content.size(); 293 } 294 relocCount += sect.relocations.size(); 295 } 296 _endOfSectionsContent = offset; 297 298 computeSymbolTableSizes(); 299 computeFunctionStartsSize(); 300 computeDataInCodeSize(); 301 302 // Align start of relocations. 303 _startOfRelocations = pointerAlign(_endOfSectionsContent); 304 _startOfFunctionStarts = _startOfRelocations + relocCount * 8; 305 _startOfDataInCode = _startOfFunctionStarts + _functionStartsSize; 306 _startOfSymbols = _startOfDataInCode + _dataInCodeSize; 307 // Add Indirect symbol table. 308 _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize; 309 // Align start of symbol table and symbol strings. 310 _startOfSymbolStrings = _startOfIndirectSymbols 311 + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t)); 312 _endOfSymbolStrings = _startOfSymbolStrings 313 + pointerAlign(_symbolStringPoolSize); 314 _endOfLinkEdit = _endOfSymbolStrings; 315 DEBUG_WITH_TYPE("MachOFileLayout", 316 llvm::dbgs() << "MachOFileLayout()\n" 317 << " startOfLoadCommands=" << _startOfLoadCommands << "\n" 318 << " countOfLoadCommands=" << _countOfLoadCommands << "\n" 319 << " endOfLoadCommands=" << _endOfLoadCommands << "\n" 320 << " startOfRelocations=" << _startOfRelocations << "\n" 321 << " startOfSymbols=" << _startOfSymbols << "\n" 322 << " startOfSymbolStrings=" << _startOfSymbolStrings << "\n" 323 << " endOfSymbolStrings=" << _endOfSymbolStrings << "\n" 324 << " startOfSectionsContent=" << _startOfSectionsContent << "\n" 325 << " endOfSectionsContent=" << _endOfSectionsContent << "\n"); 326 } else { 327 // Final linked images have one load command per segment. 328 _endOfLoadCommands = _startOfLoadCommands 329 + loadCommandsSize(_countOfLoadCommands); 330 331 // Assign section file offsets. 332 buildFileOffsets(); 333 buildLinkEditInfo(); 334 335 // LINKEDIT of final linked images has in order: 336 // rebase info, binding info, lazy binding info, weak binding info, 337 // data-in-code, symbol table, indirect symbol table, symbol table strings. 338 _startOfRebaseInfo = _startOfLinkEdit; 339 _endOfRebaseInfo = _startOfRebaseInfo + _rebaseInfo.size(); 340 _startOfBindingInfo = _endOfRebaseInfo; 341 _endOfBindingInfo = _startOfBindingInfo + _bindingInfo.size(); 342 _startOfLazyBindingInfo = _endOfBindingInfo; 343 _endOfLazyBindingInfo = _startOfLazyBindingInfo + _lazyBindingInfo.size(); 344 _startOfExportTrie = _endOfLazyBindingInfo; 345 _endOfExportTrie = _startOfExportTrie + _exportTrie.size(); 346 _startOfFunctionStarts = _endOfExportTrie; 347 _startOfDataInCode = _startOfFunctionStarts + _functionStartsSize; 348 _startOfSymbols = _startOfDataInCode + _dataInCodeSize; 349 _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize; 350 _startOfSymbolStrings = _startOfIndirectSymbols 351 + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t)); 352 _endOfSymbolStrings = _startOfSymbolStrings 353 + pointerAlign(_symbolStringPoolSize); 354 _endOfLinkEdit = _endOfSymbolStrings; 355 DEBUG_WITH_TYPE("MachOFileLayout", 356 llvm::dbgs() << "MachOFileLayout()\n" 357 << " startOfLoadCommands=" << _startOfLoadCommands << "\n" 358 << " countOfLoadCommands=" << _countOfLoadCommands << "\n" 359 << " endOfLoadCommands=" << _endOfLoadCommands << "\n" 360 << " startOfLinkEdit=" << _startOfLinkEdit << "\n" 361 << " startOfRebaseInfo=" << _startOfRebaseInfo << "\n" 362 << " endOfRebaseInfo=" << _endOfRebaseInfo << "\n" 363 << " startOfBindingInfo=" << _startOfBindingInfo << "\n" 364 << " endOfBindingInfo=" << _endOfBindingInfo << "\n" 365 << " startOfLazyBindingInfo=" << _startOfLazyBindingInfo << "\n" 366 << " endOfLazyBindingInfo=" << _endOfLazyBindingInfo << "\n" 367 << " startOfExportTrie=" << _startOfExportTrie << "\n" 368 << " endOfExportTrie=" << _endOfExportTrie << "\n" 369 << " startOfFunctionStarts=" << _startOfFunctionStarts << "\n" 370 << " startOfDataInCode=" << _startOfDataInCode << "\n" 371 << " startOfSymbols=" << _startOfSymbols << "\n" 372 << " startOfSymbolStrings=" << _startOfSymbolStrings << "\n" 373 << " endOfSymbolStrings=" << _endOfSymbolStrings << "\n" 374 << " addressOfLinkEdit=" << _addressOfLinkEdit << "\n"); 375 } 376 } 377 378 uint32_t MachOFileLayout::loadCommandsSize(uint32_t &count) { 379 uint32_t size = 0; 380 count = 0; 381 382 const size_t segCommandSize = 383 (_is64 ? sizeof(segment_command_64) : sizeof(segment_command)); 384 const size_t sectionSize = (_is64 ? sizeof(section_64) : sizeof(section)); 385 386 // Add LC_SEGMENT for each segment. 387 size += _file.segments.size() * segCommandSize; 388 count += _file.segments.size(); 389 // Add section record for each section. 390 size += _file.sections.size() * sectionSize; 391 392 // If creating a dylib, add LC_ID_DYLIB. 393 if (_file.fileType == llvm::MachO::MH_DYLIB) { 394 size += sizeof(dylib_command) + pointerAlign(_file.installName.size() + 1); 395 ++count; 396 } 397 398 // Add LC_DYLD_INFO 399 size += sizeof(dyld_info_command); 400 ++count; 401 402 // Add LC_SYMTAB 403 size += sizeof(symtab_command); 404 ++count; 405 406 // Add LC_DYSYMTAB 407 if (_file.fileType != llvm::MachO::MH_PRELOAD) { 408 size += sizeof(dysymtab_command); 409 ++count; 410 } 411 412 // If main executable add LC_LOAD_DYLINKER 413 if (_file.fileType == llvm::MachO::MH_EXECUTE) { 414 size += pointerAlign(sizeof(dylinker_command) + dyldPath().size()+1); 415 ++count; 416 } 417 418 // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_WATCHOS, 419 // LC_VERSION_MIN_TVOS 420 if (_file.hasMinVersionLoadCommand) { 421 size += sizeof(version_min_command); 422 ++count; 423 } 424 425 // Add LC_SOURCE_VERSION 426 size += sizeof(source_version_command); 427 ++count; 428 429 // If main executable add LC_MAIN 430 if (_file.fileType == llvm::MachO::MH_EXECUTE) { 431 size += sizeof(entry_point_command); 432 ++count; 433 } 434 435 // Add LC_LOAD_DYLIB for each dependent dylib. 436 for (const DependentDylib &dep : _file.dependentDylibs) { 437 size += sizeof(dylib_command) + pointerAlign(dep.path.size()+1); 438 ++count; 439 } 440 441 // Add LC_RPATH 442 for (const StringRef &path : _file.rpaths) { 443 size += pointerAlign(sizeof(rpath_command) + path.size() + 1); 444 ++count; 445 } 446 447 // Add LC_FUNCTION_STARTS if needed 448 if (!_file.functionStarts.empty()) { 449 size += sizeof(linkedit_data_command); 450 ++count; 451 } 452 453 // Add LC_DATA_IN_CODE if requested. Note, we do encode zero length entries. 454 // FIXME: Zero length entries is only to match ld64. Should we change this? 455 if (_file.generateDataInCodeLoadCommand) { 456 size += sizeof(linkedit_data_command); 457 ++count; 458 } 459 460 return size; 461 } 462 463 static bool overlaps(const Segment &s1, const Segment &s2) { 464 if (s2.address >= s1.address+s1.size) 465 return false; 466 if (s1.address >= s2.address+s2.size) 467 return false; 468 return true; 469 } 470 471 static bool overlaps(const Section &s1, const Section &s2) { 472 if (s2.address >= s1.address+s1.content.size()) 473 return false; 474 if (s1.address >= s2.address+s2.content.size()) 475 return false; 476 return true; 477 } 478 479 void MachOFileLayout::buildFileOffsets() { 480 // Verify no segments overlap 481 for (const Segment &sg1 : _file.segments) { 482 for (const Segment &sg2 : _file.segments) { 483 if (&sg1 == &sg2) 484 continue; 485 if (overlaps(sg1,sg2)) { 486 _ec = make_error_code(llvm::errc::executable_format_error); 487 return; 488 } 489 } 490 } 491 492 // Verify no sections overlap 493 for (const Section &s1 : _file.sections) { 494 for (const Section &s2 : _file.sections) { 495 if (&s1 == &s2) 496 continue; 497 if (overlaps(s1,s2)) { 498 _ec = make_error_code(llvm::errc::executable_format_error); 499 return; 500 } 501 } 502 } 503 504 // Build side table of extra info about segments and sections. 505 SegExtraInfo t; 506 t.fileOffset = 0; 507 for (const Segment &sg : _file.segments) { 508 _segInfo[&sg] = t; 509 } 510 SectionExtraInfo t2; 511 t2.fileOffset = 0; 512 // Assign sections to segments. 513 for (const Section &s : _file.sections) { 514 _sectInfo[&s] = t2; 515 bool foundSegment = false; 516 for (const Segment &sg : _file.segments) { 517 if (sg.name.equals(s.segmentName)) { 518 if ((s.address >= sg.address) 519 && (s.address+s.content.size() <= sg.address+sg.size)) { 520 _segInfo[&sg].sections.push_back(&s); 521 foundSegment = true; 522 break; 523 } 524 } 525 } 526 if (!foundSegment) { 527 _ec = make_error_code(llvm::errc::executable_format_error); 528 return; 529 } 530 } 531 532 // Assign file offsets. 533 uint32_t fileOffset = 0; 534 DEBUG_WITH_TYPE("MachOFileLayout", 535 llvm::dbgs() << "buildFileOffsets()\n"); 536 for (const Segment &sg : _file.segments) { 537 _segInfo[&sg].fileOffset = fileOffset; 538 if ((_seg1addr == INT64_MAX) && sg.init_access) 539 _seg1addr = sg.address; 540 DEBUG_WITH_TYPE("MachOFileLayout", 541 llvm::dbgs() << " segment=" << sg.name 542 << ", fileOffset=" << _segInfo[&sg].fileOffset << "\n"); 543 544 uint32_t segFileSize = 0; 545 // A segment that is not zero-fill must use a least one page of disk space. 546 if (sg.init_access) 547 segFileSize = _file.pageSize; 548 for (const Section *s : _segInfo[&sg].sections) { 549 uint32_t sectOffset = s->address - sg.address; 550 uint32_t sectFileSize = 551 isZeroFillSection(s->type) ? 0 : s->content.size(); 552 segFileSize = std::max(segFileSize, sectOffset + sectFileSize); 553 554 _sectInfo[s].fileOffset = _segInfo[&sg].fileOffset + sectOffset; 555 DEBUG_WITH_TYPE("MachOFileLayout", 556 llvm::dbgs() << " section=" << s->sectionName 557 << ", fileOffset=" << fileOffset << "\n"); 558 } 559 560 // round up all segments to page aligned, except __LINKEDIT 561 if (!sg.name.equals("__LINKEDIT")) { 562 _segInfo[&sg].fileSize = llvm::alignTo(segFileSize, _file.pageSize); 563 fileOffset = llvm::alignTo(fileOffset + segFileSize, _file.pageSize); 564 } 565 _addressOfLinkEdit = sg.address + sg.size; 566 } 567 _startOfLinkEdit = fileOffset; 568 } 569 570 size_t MachOFileLayout::size() const { 571 return _endOfSymbolStrings; 572 } 573 574 void MachOFileLayout::writeMachHeader() { 575 auto cpusubtype = MachOLinkingContext::cpuSubtypeFromArch(_file.arch); 576 // dynamic x86 executables on newer OS version should also set the 577 // CPU_SUBTYPE_LIB64 mask in the CPU subtype. 578 // FIXME: Check that this is a dynamic executable, not a static one. 579 if (_file.fileType == llvm::MachO::MH_EXECUTE && 580 cpusubtype == CPU_SUBTYPE_X86_64_ALL && 581 _file.os == MachOLinkingContext::OS::macOSX) { 582 uint32_t version; 583 bool failed = MachOLinkingContext::parsePackedVersion("10.5", version); 584 if (!failed && _file.minOSverson >= version) 585 cpusubtype |= CPU_SUBTYPE_LIB64; 586 } 587 588 mach_header *mh = reinterpret_cast<mach_header*>(_buffer); 589 mh->magic = _is64 ? llvm::MachO::MH_MAGIC_64 : llvm::MachO::MH_MAGIC; 590 mh->cputype = MachOLinkingContext::cpuTypeFromArch(_file.arch); 591 mh->cpusubtype = cpusubtype; 592 mh->filetype = _file.fileType; 593 mh->ncmds = _countOfLoadCommands; 594 mh->sizeofcmds = _endOfLoadCommands - _startOfLoadCommands; 595 mh->flags = _file.flags; 596 if (_swap) 597 swapStruct(*mh); 598 } 599 600 uint32_t MachOFileLayout::indirectSymbolIndex(const Section §, 601 uint32_t &index) { 602 if (sect.indirectSymbols.empty()) 603 return 0; 604 uint32_t result = index; 605 index += sect.indirectSymbols.size(); 606 return result; 607 } 608 609 uint32_t MachOFileLayout::indirectSymbolElementSize(const Section §) { 610 if (sect.indirectSymbols.empty()) 611 return 0; 612 if (sect.type != S_SYMBOL_STUBS) 613 return 0; 614 return sect.content.size() / sect.indirectSymbols.size(); 615 } 616 617 template <typename T> 618 llvm::Error MachOFileLayout::writeSingleSegmentLoadCommand(uint8_t *&lc) { 619 typename T::command* seg = reinterpret_cast<typename T::command*>(lc); 620 seg->cmd = T::LC; 621 seg->cmdsize = sizeof(typename T::command) 622 + _file.sections.size() * sizeof(typename T::section); 623 uint8_t *next = lc + seg->cmdsize; 624 memset(seg->segname, 0, 16); 625 seg->vmaddr = 0; 626 seg->vmsize = _file.sections.back().address 627 + _file.sections.back().content.size(); 628 seg->fileoff = _endOfLoadCommands; 629 seg->filesize = _sectInfo[&_file.sections.back()].fileOffset + 630 _file.sections.back().content.size() - 631 _sectInfo[&_file.sections.front()].fileOffset; 632 seg->maxprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; 633 seg->initprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; 634 seg->nsects = _file.sections.size(); 635 seg->flags = 0; 636 if (_swap) 637 swapStruct(*seg); 638 typename T::section *sout = reinterpret_cast<typename T::section*> 639 (lc+sizeof(typename T::command)); 640 uint32_t relOffset = _startOfRelocations; 641 uint32_t indirectSymRunningIndex = 0; 642 for (const Section &sin : _file.sections) { 643 setString16(sin.sectionName, sout->sectname); 644 setString16(sin.segmentName, sout->segname); 645 sout->addr = sin.address; 646 sout->size = sin.content.size(); 647 sout->offset = _sectInfo[&sin].fileOffset; 648 sout->align = llvm::Log2_32(sin.alignment); 649 sout->reloff = sin.relocations.empty() ? 0 : relOffset; 650 sout->nreloc = sin.relocations.size(); 651 sout->flags = sin.type | sin.attributes; 652 sout->reserved1 = indirectSymbolIndex(sin, indirectSymRunningIndex); 653 sout->reserved2 = indirectSymbolElementSize(sin); 654 relOffset += sin.relocations.size() * sizeof(any_relocation_info); 655 if (_swap) 656 swapStruct(*sout); 657 ++sout; 658 } 659 lc = next; 660 return llvm::Error::success(); 661 } 662 663 template <typename T> 664 llvm::Error MachOFileLayout::writeSegmentLoadCommands(uint8_t *&lc) { 665 uint32_t indirectSymRunningIndex = 0; 666 for (const Segment &seg : _file.segments) { 667 // Link edit has no sections and a custom range of address, so handle it 668 // specially. 669 SegExtraInfo &segInfo = _segInfo[&seg]; 670 if (seg.name.equals("__LINKEDIT")) { 671 size_t linkeditSize = _endOfLinkEdit - _startOfLinkEdit; 672 typename T::command* cmd = reinterpret_cast<typename T::command*>(lc); 673 cmd->cmd = T::LC; 674 cmd->cmdsize = sizeof(typename T::command); 675 uint8_t *next = lc + cmd->cmdsize; 676 setString16("__LINKEDIT", cmd->segname); 677 cmd->vmaddr = _addressOfLinkEdit; 678 cmd->vmsize = llvm::alignTo(linkeditSize, _file.pageSize); 679 cmd->fileoff = _startOfLinkEdit; 680 cmd->filesize = linkeditSize; 681 cmd->initprot = seg.init_access; 682 cmd->maxprot = seg.max_access; 683 cmd->nsects = 0; 684 cmd->flags = 0; 685 if (_swap) 686 swapStruct(*cmd); 687 lc = next; 688 continue; 689 } 690 // Write segment command with trailing sections. 691 typename T::command* cmd = reinterpret_cast<typename T::command*>(lc); 692 cmd->cmd = T::LC; 693 cmd->cmdsize = sizeof(typename T::command) 694 + segInfo.sections.size() * sizeof(typename T::section); 695 uint8_t *next = lc + cmd->cmdsize; 696 setString16(seg.name, cmd->segname); 697 cmd->vmaddr = seg.address; 698 cmd->vmsize = seg.size; 699 cmd->fileoff = segInfo.fileOffset; 700 cmd->filesize = segInfo.fileSize; 701 cmd->initprot = seg.init_access; 702 cmd->maxprot = seg.max_access; 703 cmd->nsects = segInfo.sections.size(); 704 cmd->flags = 0; 705 if (_swap) 706 swapStruct(*cmd); 707 typename T::section *sect = reinterpret_cast<typename T::section*> 708 (lc+sizeof(typename T::command)); 709 for (const Section *section : segInfo.sections) { 710 setString16(section->sectionName, sect->sectname); 711 setString16(section->segmentName, sect->segname); 712 sect->addr = section->address; 713 sect->size = section->content.size(); 714 if (isZeroFillSection(section->type)) 715 sect->offset = 0; 716 else 717 sect->offset = section->address - seg.address + segInfo.fileOffset; 718 sect->align = llvm::Log2_32(section->alignment); 719 sect->reloff = 0; 720 sect->nreloc = 0; 721 sect->flags = section->type | section->attributes; 722 sect->reserved1 = indirectSymbolIndex(*section, indirectSymRunningIndex); 723 sect->reserved2 = indirectSymbolElementSize(*section); 724 if (_swap) 725 swapStruct(*sect); 726 ++sect; 727 } 728 lc = reinterpret_cast<uint8_t*>(next); 729 } 730 return llvm::Error::success(); 731 } 732 733 static void writeVersionMinLoadCommand(const NormalizedFile &_file, 734 bool _swap, 735 uint8_t *&lc) { 736 if (!_file.hasMinVersionLoadCommand) 737 return; 738 version_min_command *vm = reinterpret_cast<version_min_command*>(lc); 739 switch (_file.os) { 740 case MachOLinkingContext::OS::unknown: 741 vm->cmd = _file.minOSVersionKind; 742 vm->cmdsize = sizeof(version_min_command); 743 vm->version = _file.minOSverson; 744 vm->sdk = 0; 745 break; 746 case MachOLinkingContext::OS::macOSX: 747 vm->cmd = LC_VERSION_MIN_MACOSX; 748 vm->cmdsize = sizeof(version_min_command); 749 vm->version = _file.minOSverson; 750 vm->sdk = _file.sdkVersion; 751 break; 752 case MachOLinkingContext::OS::iOS: 753 case MachOLinkingContext::OS::iOS_simulator: 754 vm->cmd = LC_VERSION_MIN_IPHONEOS; 755 vm->cmdsize = sizeof(version_min_command); 756 vm->version = _file.minOSverson; 757 vm->sdk = _file.sdkVersion; 758 break; 759 } 760 if (_swap) 761 swapStruct(*vm); 762 lc += sizeof(version_min_command); 763 } 764 765 llvm::Error MachOFileLayout::writeLoadCommands() { 766 uint8_t *lc = &_buffer[_startOfLoadCommands]; 767 if (_file.fileType == llvm::MachO::MH_OBJECT) { 768 // Object files have one unnamed segment which holds all sections. 769 if (_is64) { 770 if (auto ec = writeSingleSegmentLoadCommand<MachO64Trait>(lc)) 771 return ec; 772 } else { 773 if (auto ec = writeSingleSegmentLoadCommand<MachO32Trait>(lc)) 774 return ec; 775 } 776 // Add LC_SYMTAB with symbol table info 777 symtab_command* st = reinterpret_cast<symtab_command*>(lc); 778 st->cmd = LC_SYMTAB; 779 st->cmdsize = sizeof(symtab_command); 780 st->symoff = _startOfSymbols; 781 st->nsyms = _file.stabsSymbols.size() + _file.localSymbols.size() + 782 _file.globalSymbols.size() + _file.undefinedSymbols.size(); 783 st->stroff = _startOfSymbolStrings; 784 st->strsize = _endOfSymbolStrings - _startOfSymbolStrings; 785 if (_swap) 786 swapStruct(*st); 787 lc += sizeof(symtab_command); 788 789 // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, 790 // LC_VERSION_MIN_WATCHOS, LC_VERSION_MIN_TVOS 791 writeVersionMinLoadCommand(_file, _swap, lc); 792 793 // Add LC_FUNCTION_STARTS if needed. 794 if (_functionStartsSize != 0) { 795 linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc); 796 dl->cmd = LC_FUNCTION_STARTS; 797 dl->cmdsize = sizeof(linkedit_data_command); 798 dl->dataoff = _startOfFunctionStarts; 799 dl->datasize = _functionStartsSize; 800 if (_swap) 801 swapStruct(*dl); 802 lc += sizeof(linkedit_data_command); 803 } 804 805 // Add LC_DATA_IN_CODE if requested. 806 if (_file.generateDataInCodeLoadCommand) { 807 linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc); 808 dl->cmd = LC_DATA_IN_CODE; 809 dl->cmdsize = sizeof(linkedit_data_command); 810 dl->dataoff = _startOfDataInCode; 811 dl->datasize = _dataInCodeSize; 812 if (_swap) 813 swapStruct(*dl); 814 lc += sizeof(linkedit_data_command); 815 } 816 } else { 817 // Final linked images have sections under segments. 818 if (_is64) { 819 if (auto ec = writeSegmentLoadCommands<MachO64Trait>(lc)) 820 return ec; 821 } else { 822 if (auto ec = writeSegmentLoadCommands<MachO32Trait>(lc)) 823 return ec; 824 } 825 826 // Add LC_ID_DYLIB command for dynamic libraries. 827 if (_file.fileType == llvm::MachO::MH_DYLIB) { 828 dylib_command *dc = reinterpret_cast<dylib_command*>(lc); 829 StringRef path = _file.installName; 830 uint32_t size = sizeof(dylib_command) + pointerAlign(path.size() + 1); 831 dc->cmd = LC_ID_DYLIB; 832 dc->cmdsize = size; 833 dc->dylib.name = sizeof(dylib_command); // offset 834 // needs to be some constant value different than the one in LC_LOAD_DYLIB 835 dc->dylib.timestamp = 1; 836 dc->dylib.current_version = _file.currentVersion; 837 dc->dylib.compatibility_version = _file.compatVersion; 838 if (_swap) 839 swapStruct(*dc); 840 memcpy(lc + sizeof(dylib_command), path.begin(), path.size()); 841 lc[sizeof(dylib_command) + path.size()] = '\0'; 842 lc += size; 843 } 844 845 // Add LC_DYLD_INFO_ONLY. 846 dyld_info_command* di = reinterpret_cast<dyld_info_command*>(lc); 847 di->cmd = LC_DYLD_INFO_ONLY; 848 di->cmdsize = sizeof(dyld_info_command); 849 di->rebase_off = _rebaseInfo.size() ? _startOfRebaseInfo : 0; 850 di->rebase_size = _rebaseInfo.size(); 851 di->bind_off = _bindingInfo.size() ? _startOfBindingInfo : 0; 852 di->bind_size = _bindingInfo.size(); 853 di->weak_bind_off = 0; 854 di->weak_bind_size = 0; 855 di->lazy_bind_off = _lazyBindingInfo.size() ? _startOfLazyBindingInfo : 0; 856 di->lazy_bind_size = _lazyBindingInfo.size(); 857 di->export_off = _exportTrie.size() ? _startOfExportTrie : 0; 858 di->export_size = _exportTrie.size(); 859 if (_swap) 860 swapStruct(*di); 861 lc += sizeof(dyld_info_command); 862 863 // Add LC_SYMTAB with symbol table info. 864 symtab_command* st = reinterpret_cast<symtab_command*>(lc); 865 st->cmd = LC_SYMTAB; 866 st->cmdsize = sizeof(symtab_command); 867 st->symoff = _startOfSymbols; 868 st->nsyms = _file.stabsSymbols.size() + _file.localSymbols.size() + 869 _file.globalSymbols.size() + _file.undefinedSymbols.size(); 870 st->stroff = _startOfSymbolStrings; 871 st->strsize = _endOfSymbolStrings - _startOfSymbolStrings; 872 if (_swap) 873 swapStruct(*st); 874 lc += sizeof(symtab_command); 875 876 // Add LC_DYSYMTAB 877 if (_file.fileType != llvm::MachO::MH_PRELOAD) { 878 dysymtab_command* dst = reinterpret_cast<dysymtab_command*>(lc); 879 dst->cmd = LC_DYSYMTAB; 880 dst->cmdsize = sizeof(dysymtab_command); 881 dst->ilocalsym = _symbolTableLocalsStartIndex; 882 dst->nlocalsym = _file.stabsSymbols.size() + 883 _file.localSymbols.size(); 884 dst->iextdefsym = _symbolTableGlobalsStartIndex; 885 dst->nextdefsym = _file.globalSymbols.size(); 886 dst->iundefsym = _symbolTableUndefinesStartIndex; 887 dst->nundefsym = _file.undefinedSymbols.size(); 888 dst->tocoff = 0; 889 dst->ntoc = 0; 890 dst->modtaboff = 0; 891 dst->nmodtab = 0; 892 dst->extrefsymoff = 0; 893 dst->nextrefsyms = 0; 894 dst->indirectsymoff = _startOfIndirectSymbols; 895 dst->nindirectsyms = _indirectSymbolTableCount; 896 dst->extreloff = 0; 897 dst->nextrel = 0; 898 dst->locreloff = 0; 899 dst->nlocrel = 0; 900 if (_swap) 901 swapStruct(*dst); 902 lc += sizeof(dysymtab_command); 903 } 904 905 // If main executable, add LC_LOAD_DYLINKER 906 if (_file.fileType == llvm::MachO::MH_EXECUTE) { 907 // Build LC_LOAD_DYLINKER load command. 908 uint32_t size=pointerAlign(sizeof(dylinker_command)+dyldPath().size()+1); 909 dylinker_command* dl = reinterpret_cast<dylinker_command*>(lc); 910 dl->cmd = LC_LOAD_DYLINKER; 911 dl->cmdsize = size; 912 dl->name = sizeof(dylinker_command); // offset 913 if (_swap) 914 swapStruct(*dl); 915 memcpy(lc+sizeof(dylinker_command), dyldPath().data(), dyldPath().size()); 916 lc[sizeof(dylinker_command)+dyldPath().size()] = '\0'; 917 lc += size; 918 } 919 920 // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_WATCHOS, 921 // LC_VERSION_MIN_TVOS 922 writeVersionMinLoadCommand(_file, _swap, lc); 923 924 // Add LC_SOURCE_VERSION 925 { 926 // Note, using a temporary here to appease UB as we may not be aligned 927 // enough for a struct containing a uint64_t when emitting a 32-bit binary 928 source_version_command sv; 929 sv.cmd = LC_SOURCE_VERSION; 930 sv.cmdsize = sizeof(source_version_command); 931 sv.version = _file.sourceVersion; 932 if (_swap) 933 swapStruct(sv); 934 memcpy(lc, &sv, sizeof(source_version_command)); 935 lc += sizeof(source_version_command); 936 } 937 938 // If main executable, add LC_MAIN. 939 if (_file.fileType == llvm::MachO::MH_EXECUTE) { 940 // Build LC_MAIN load command. 941 // Note, using a temporary here to appease UB as we may not be aligned 942 // enough for a struct containing a uint64_t when emitting a 32-bit binary 943 entry_point_command ep; 944 ep.cmd = LC_MAIN; 945 ep.cmdsize = sizeof(entry_point_command); 946 ep.entryoff = _file.entryAddress - _seg1addr; 947 ep.stacksize = _file.stackSize; 948 if (_swap) 949 swapStruct(ep); 950 memcpy(lc, &ep, sizeof(entry_point_command)); 951 lc += sizeof(entry_point_command); 952 } 953 954 // Add LC_LOAD_DYLIB commands 955 for (const DependentDylib &dep : _file.dependentDylibs) { 956 dylib_command* dc = reinterpret_cast<dylib_command*>(lc); 957 uint32_t size = sizeof(dylib_command) + pointerAlign(dep.path.size()+1); 958 dc->cmd = dep.kind; 959 dc->cmdsize = size; 960 dc->dylib.name = sizeof(dylib_command); // offset 961 // needs to be some constant value different than the one in LC_ID_DYLIB 962 dc->dylib.timestamp = 2; 963 dc->dylib.current_version = dep.currentVersion; 964 dc->dylib.compatibility_version = dep.compatVersion; 965 if (_swap) 966 swapStruct(*dc); 967 memcpy(lc+sizeof(dylib_command), dep.path.begin(), dep.path.size()); 968 lc[sizeof(dylib_command)+dep.path.size()] = '\0'; 969 lc += size; 970 } 971 972 // Add LC_RPATH 973 for (const StringRef &path : _file.rpaths) { 974 rpath_command *rpc = reinterpret_cast<rpath_command *>(lc); 975 uint32_t size = pointerAlign(sizeof(rpath_command) + path.size() + 1); 976 rpc->cmd = LC_RPATH; 977 rpc->cmdsize = size; 978 rpc->path = sizeof(rpath_command); // offset 979 if (_swap) 980 swapStruct(*rpc); 981 memcpy(lc+sizeof(rpath_command), path.begin(), path.size()); 982 lc[sizeof(rpath_command)+path.size()] = '\0'; 983 lc += size; 984 } 985 986 // Add LC_FUNCTION_STARTS if needed. 987 if (_functionStartsSize != 0) { 988 linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc); 989 dl->cmd = LC_FUNCTION_STARTS; 990 dl->cmdsize = sizeof(linkedit_data_command); 991 dl->dataoff = _startOfFunctionStarts; 992 dl->datasize = _functionStartsSize; 993 if (_swap) 994 swapStruct(*dl); 995 lc += sizeof(linkedit_data_command); 996 } 997 998 // Add LC_DATA_IN_CODE if requested. 999 if (_file.generateDataInCodeLoadCommand) { 1000 linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc); 1001 dl->cmd = LC_DATA_IN_CODE; 1002 dl->cmdsize = sizeof(linkedit_data_command); 1003 dl->dataoff = _startOfDataInCode; 1004 dl->datasize = _dataInCodeSize; 1005 if (_swap) 1006 swapStruct(*dl); 1007 lc += sizeof(linkedit_data_command); 1008 } 1009 } 1010 return llvm::Error::success(); 1011 } 1012 1013 void MachOFileLayout::writeSectionContent() { 1014 for (const Section &s : _file.sections) { 1015 // Copy all section content to output buffer. 1016 if (isZeroFillSection(s.type)) 1017 continue; 1018 if (s.content.empty()) 1019 continue; 1020 uint32_t offset = _sectInfo[&s].fileOffset; 1021 uint8_t *p = &_buffer[offset]; 1022 memcpy(p, &s.content[0], s.content.size()); 1023 p += s.content.size(); 1024 } 1025 } 1026 1027 void MachOFileLayout::writeRelocations() { 1028 uint32_t relOffset = _startOfRelocations; 1029 for (Section sect : _file.sections) { 1030 for (Relocation r : sect.relocations) { 1031 any_relocation_info* rb = reinterpret_cast<any_relocation_info*>( 1032 &_buffer[relOffset]); 1033 *rb = packRelocation(r, _swap, _bigEndianArch); 1034 relOffset += sizeof(any_relocation_info); 1035 } 1036 } 1037 } 1038 1039 void MachOFileLayout::appendSymbols(const std::vector<Symbol> &symbols, 1040 uint32_t &symOffset, uint32_t &strOffset) { 1041 for (const Symbol &sym : symbols) { 1042 if (_is64) { 1043 nlist_64* nb = reinterpret_cast<nlist_64*>(&_buffer[symOffset]); 1044 nb->n_strx = strOffset - _startOfSymbolStrings; 1045 nb->n_type = sym.type | sym.scope; 1046 nb->n_sect = sym.sect; 1047 nb->n_desc = sym.desc; 1048 nb->n_value = sym.value; 1049 if (_swap) 1050 swapStruct(*nb); 1051 symOffset += sizeof(nlist_64); 1052 } else { 1053 nlist* nb = reinterpret_cast<nlist*>(&_buffer[symOffset]); 1054 nb->n_strx = strOffset - _startOfSymbolStrings; 1055 nb->n_type = sym.type | sym.scope; 1056 nb->n_sect = sym.sect; 1057 nb->n_desc = sym.desc; 1058 nb->n_value = sym.value; 1059 if (_swap) 1060 swapStruct(*nb); 1061 symOffset += sizeof(nlist); 1062 } 1063 memcpy(&_buffer[strOffset], sym.name.begin(), sym.name.size()); 1064 strOffset += sym.name.size(); 1065 _buffer[strOffset++] ='\0'; // Strings in table have nul terminator. 1066 } 1067 } 1068 1069 void MachOFileLayout::writeFunctionStartsInfo() { 1070 if (!_functionStartsSize) 1071 return; 1072 memcpy(&_buffer[_startOfFunctionStarts], _file.functionStarts.data(), 1073 _functionStartsSize); 1074 } 1075 1076 void MachOFileLayout::writeDataInCodeInfo() { 1077 uint32_t offset = _startOfDataInCode; 1078 for (const DataInCode &entry : _file.dataInCode) { 1079 data_in_code_entry *dst = reinterpret_cast<data_in_code_entry*>( 1080 &_buffer[offset]); 1081 dst->offset = entry.offset; 1082 dst->length = entry.length; 1083 dst->kind = entry.kind; 1084 if (_swap) 1085 swapStruct(*dst); 1086 offset += sizeof(data_in_code_entry); 1087 } 1088 } 1089 1090 void MachOFileLayout::writeSymbolTable() { 1091 // Write symbol table and symbol strings in parallel. 1092 uint32_t symOffset = _startOfSymbols; 1093 uint32_t strOffset = _startOfSymbolStrings; 1094 // Reserve n_strx offset of zero to mean no name. 1095 _buffer[strOffset++] = ' '; 1096 _buffer[strOffset++] = '\0'; 1097 appendSymbols(_file.stabsSymbols, symOffset, strOffset); 1098 appendSymbols(_file.localSymbols, symOffset, strOffset); 1099 appendSymbols(_file.globalSymbols, symOffset, strOffset); 1100 appendSymbols(_file.undefinedSymbols, symOffset, strOffset); 1101 // Write indirect symbol table array. 1102 uint32_t *indirects = reinterpret_cast<uint32_t*> 1103 (&_buffer[_startOfIndirectSymbols]); 1104 if (_file.fileType == llvm::MachO::MH_OBJECT) { 1105 // Object files have sections in same order as input normalized file. 1106 for (const Section §ion : _file.sections) { 1107 for (uint32_t index : section.indirectSymbols) { 1108 if (_swap) 1109 *indirects++ = llvm::sys::getSwappedBytes(index); 1110 else 1111 *indirects++ = index; 1112 } 1113 } 1114 } else { 1115 // Final linked images must sort sections from normalized file. 1116 for (const Segment &seg : _file.segments) { 1117 SegExtraInfo &segInfo = _segInfo[&seg]; 1118 for (const Section *section : segInfo.sections) { 1119 for (uint32_t index : section->indirectSymbols) { 1120 if (_swap) 1121 *indirects++ = llvm::sys::getSwappedBytes(index); 1122 else 1123 *indirects++ = index; 1124 } 1125 } 1126 } 1127 } 1128 } 1129 1130 void MachOFileLayout::writeRebaseInfo() { 1131 memcpy(&_buffer[_startOfRebaseInfo], _rebaseInfo.bytes(), _rebaseInfo.size()); 1132 } 1133 1134 void MachOFileLayout::writeBindingInfo() { 1135 memcpy(&_buffer[_startOfBindingInfo], 1136 _bindingInfo.bytes(), _bindingInfo.size()); 1137 } 1138 1139 void MachOFileLayout::writeLazyBindingInfo() { 1140 memcpy(&_buffer[_startOfLazyBindingInfo], 1141 _lazyBindingInfo.bytes(), _lazyBindingInfo.size()); 1142 } 1143 1144 void MachOFileLayout::writeExportInfo() { 1145 memcpy(&_buffer[_startOfExportTrie], _exportTrie.bytes(), _exportTrie.size()); 1146 } 1147 1148 void MachOFileLayout::buildLinkEditInfo() { 1149 buildRebaseInfo(); 1150 buildBindInfo(); 1151 buildLazyBindInfo(); 1152 buildExportTrie(); 1153 computeSymbolTableSizes(); 1154 computeFunctionStartsSize(); 1155 computeDataInCodeSize(); 1156 } 1157 1158 void MachOFileLayout::buildSectionRelocations() { 1159 1160 } 1161 1162 void MachOFileLayout::buildRebaseInfo() { 1163 // TODO: compress rebasing info. 1164 for (const RebaseLocation& entry : _file.rebasingInfo) { 1165 _rebaseInfo.append_byte(REBASE_OPCODE_SET_TYPE_IMM | entry.kind); 1166 _rebaseInfo.append_byte(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB 1167 | entry.segIndex); 1168 _rebaseInfo.append_uleb128(entry.segOffset); 1169 _rebaseInfo.append_uleb128(REBASE_OPCODE_DO_REBASE_IMM_TIMES | 1); 1170 } 1171 _rebaseInfo.append_byte(REBASE_OPCODE_DONE); 1172 _rebaseInfo.align(_is64 ? 8 : 4); 1173 } 1174 1175 void MachOFileLayout::buildBindInfo() { 1176 // TODO: compress bind info. 1177 uint64_t lastAddend = 0; 1178 int lastOrdinal = 0x80000000; 1179 StringRef lastSymbolName; 1180 BindType lastType = (BindType)0; 1181 Hex32 lastSegOffset = ~0U; 1182 uint8_t lastSegIndex = (uint8_t)~0U; 1183 for (const BindLocation& entry : _file.bindingInfo) { 1184 if (entry.ordinal != lastOrdinal) { 1185 if (entry.ordinal <= 0) 1186 _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | 1187 (entry.ordinal & BIND_IMMEDIATE_MASK)); 1188 else if (entry.ordinal <= BIND_IMMEDIATE_MASK) 1189 _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1190 entry.ordinal); 1191 else { 1192 _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); 1193 _bindingInfo.append_uleb128(entry.ordinal); 1194 } 1195 lastOrdinal = entry.ordinal; 1196 } 1197 1198 if (lastSymbolName != entry.symbolName) { 1199 _bindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); 1200 _bindingInfo.append_string(entry.symbolName); 1201 lastSymbolName = entry.symbolName; 1202 } 1203 1204 if (lastType != entry.kind) { 1205 _bindingInfo.append_byte(BIND_OPCODE_SET_TYPE_IMM | entry.kind); 1206 lastType = entry.kind; 1207 } 1208 1209 if (lastSegIndex != entry.segIndex || lastSegOffset != entry.segOffset) { 1210 _bindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB 1211 | entry.segIndex); 1212 _bindingInfo.append_uleb128(entry.segOffset); 1213 lastSegIndex = entry.segIndex; 1214 lastSegOffset = entry.segOffset; 1215 } 1216 if (entry.addend != lastAddend) { 1217 _bindingInfo.append_byte(BIND_OPCODE_SET_ADDEND_SLEB); 1218 _bindingInfo.append_sleb128(entry.addend); 1219 lastAddend = entry.addend; 1220 } 1221 _bindingInfo.append_byte(BIND_OPCODE_DO_BIND); 1222 } 1223 _bindingInfo.append_byte(BIND_OPCODE_DONE); 1224 _bindingInfo.align(_is64 ? 8 : 4); 1225 } 1226 1227 void MachOFileLayout::buildLazyBindInfo() { 1228 for (const BindLocation& entry : _file.lazyBindingInfo) { 1229 _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB 1230 | entry.segIndex); 1231 _lazyBindingInfo.append_uleb128(entry.segOffset); 1232 if (entry.ordinal <= 0) 1233 _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | 1234 (entry.ordinal & BIND_IMMEDIATE_MASK)); 1235 else if (entry.ordinal <= BIND_IMMEDIATE_MASK) 1236 _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1237 entry.ordinal); 1238 else { 1239 _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); 1240 _lazyBindingInfo.append_uleb128(entry.ordinal); 1241 } 1242 // FIXME: We need to | the opcode here with flags. 1243 _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); 1244 _lazyBindingInfo.append_string(entry.symbolName); 1245 _lazyBindingInfo.append_byte(BIND_OPCODE_DO_BIND); 1246 _lazyBindingInfo.append_byte(BIND_OPCODE_DONE); 1247 } 1248 _lazyBindingInfo.align(_is64 ? 8 : 4); 1249 } 1250 1251 void TrieNode::addSymbol(const Export& entry, 1252 BumpPtrAllocator &allocator, 1253 std::vector<TrieNode*> &allNodes) { 1254 StringRef partialStr = entry.name.drop_front(_cummulativeString.size()); 1255 for (TrieEdge &edge : _children) { 1256 StringRef edgeStr = edge._subString; 1257 if (partialStr.startswith(edgeStr)) { 1258 // Already have matching edge, go down that path. 1259 edge._child->addSymbol(entry, allocator, allNodes); 1260 return; 1261 } 1262 // See if string has commmon prefix with existing edge. 1263 for (int n=edgeStr.size()-1; n > 0; --n) { 1264 if (partialStr.substr(0, n).equals(edgeStr.substr(0, n))) { 1265 // Splice in new node: was A -> C, now A -> B -> C 1266 StringRef bNodeStr = edge._child->_cummulativeString; 1267 bNodeStr = bNodeStr.drop_back(edgeStr.size()-n).copy(allocator); 1268 auto *bNode = new (allocator) TrieNode(bNodeStr); 1269 allNodes.push_back(bNode); 1270 TrieNode* cNode = edge._child; 1271 StringRef abEdgeStr = edgeStr.substr(0,n).copy(allocator); 1272 StringRef bcEdgeStr = edgeStr.substr(n).copy(allocator); 1273 DEBUG_WITH_TYPE("trie-builder", llvm::dbgs() 1274 << "splice in TrieNode('" << bNodeStr 1275 << "') between edge '" 1276 << abEdgeStr << "' and edge='" 1277 << bcEdgeStr<< "'\n"); 1278 TrieEdge& abEdge = edge; 1279 abEdge._subString = abEdgeStr; 1280 abEdge._child = bNode; 1281 auto *bcEdge = new (allocator) TrieEdge(bcEdgeStr, cNode); 1282 bNode->_children.insert(bNode->_children.end(), bcEdge); 1283 bNode->addSymbol(entry, allocator, allNodes); 1284 return; 1285 } 1286 } 1287 } 1288 if (entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { 1289 assert(entry.otherOffset != 0); 1290 } 1291 if (entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) { 1292 assert(entry.otherOffset != 0); 1293 } 1294 // No commonality with any existing child, make a new edge. 1295 auto *newNode = new (allocator) TrieNode(entry.name.copy(allocator)); 1296 auto *newEdge = new (allocator) TrieEdge(partialStr, newNode); 1297 _children.insert(_children.end(), newEdge); 1298 DEBUG_WITH_TYPE("trie-builder", llvm::dbgs() 1299 << "new TrieNode('" << entry.name << "') with edge '" 1300 << partialStr << "' from node='" 1301 << _cummulativeString << "'\n"); 1302 newNode->_address = entry.offset; 1303 newNode->_flags = entry.flags | entry.kind; 1304 newNode->_other = entry.otherOffset; 1305 if ((entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) && !entry.otherName.empty()) 1306 newNode->_importedName = entry.otherName.copy(allocator); 1307 newNode->_hasExportInfo = true; 1308 allNodes.push_back(newNode); 1309 } 1310 1311 void TrieNode::addOrderedNodes(const Export& entry, 1312 std::vector<TrieNode*> &orderedNodes) { 1313 if (!_ordered) { 1314 orderedNodes.push_back(this); 1315 _ordered = true; 1316 } 1317 1318 StringRef partialStr = entry.name.drop_front(_cummulativeString.size()); 1319 for (TrieEdge &edge : _children) { 1320 StringRef edgeStr = edge._subString; 1321 if (partialStr.startswith(edgeStr)) { 1322 // Already have matching edge, go down that path. 1323 edge._child->addOrderedNodes(entry, orderedNodes); 1324 return; 1325 } 1326 } 1327 } 1328 1329 bool TrieNode::updateOffset(uint32_t& offset) { 1330 uint32_t nodeSize = 1; // Length when no export info 1331 if (_hasExportInfo) { 1332 if (_flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { 1333 nodeSize = llvm::getULEB128Size(_flags); 1334 nodeSize += llvm::getULEB128Size(_other); // Other contains ordinal. 1335 nodeSize += _importedName.size(); 1336 ++nodeSize; // Trailing zero in imported name. 1337 } else { 1338 nodeSize = llvm::getULEB128Size(_flags) + llvm::getULEB128Size(_address); 1339 if (_flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) 1340 nodeSize += llvm::getULEB128Size(_other); 1341 } 1342 // Overall node size so far is uleb128 of export info + actual export info. 1343 nodeSize += llvm::getULEB128Size(nodeSize); 1344 } 1345 // Compute size of all child edges. 1346 ++nodeSize; // Byte for number of chidren. 1347 for (TrieEdge &edge : _children) { 1348 nodeSize += edge._subString.size() + 1 // String length. 1349 + llvm::getULEB128Size(edge._child->_trieOffset); // Offset len. 1350 } 1351 // On input, 'offset' is new prefered location for this node. 1352 bool result = (_trieOffset != offset); 1353 // Store new location in node object for use by parents. 1354 _trieOffset = offset; 1355 // Update offset for next iteration. 1356 offset += nodeSize; 1357 // Return true if _trieOffset was changed. 1358 return result; 1359 } 1360 1361 void TrieNode::appendToByteBuffer(ByteBuffer &out) { 1362 if (_hasExportInfo) { 1363 if (_flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { 1364 if (!_importedName.empty()) { 1365 // nodes with re-export info: size, flags, ordinal, import-name 1366 uint32_t nodeSize = llvm::getULEB128Size(_flags) 1367 + llvm::getULEB128Size(_other) 1368 + _importedName.size() + 1; 1369 assert(nodeSize < 256); 1370 out.append_byte(nodeSize); 1371 out.append_uleb128(_flags); 1372 out.append_uleb128(_other); 1373 out.append_string(_importedName); 1374 } else { 1375 // nodes without re-export info: size, flags, ordinal, empty-string 1376 uint32_t nodeSize = llvm::getULEB128Size(_flags) 1377 + llvm::getULEB128Size(_other) + 1; 1378 assert(nodeSize < 256); 1379 out.append_byte(nodeSize); 1380 out.append_uleb128(_flags); 1381 out.append_uleb128(_other); 1382 out.append_byte(0); 1383 } 1384 } else if ( _flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) { 1385 // Nodes with export info: size, flags, address, other 1386 uint32_t nodeSize = llvm::getULEB128Size(_flags) 1387 + llvm::getULEB128Size(_address) 1388 + llvm::getULEB128Size(_other); 1389 assert(nodeSize < 256); 1390 out.append_byte(nodeSize); 1391 out.append_uleb128(_flags); 1392 out.append_uleb128(_address); 1393 out.append_uleb128(_other); 1394 } else { 1395 // Nodes with export info: size, flags, address 1396 uint32_t nodeSize = llvm::getULEB128Size(_flags) 1397 + llvm::getULEB128Size(_address); 1398 assert(nodeSize < 256); 1399 out.append_byte(nodeSize); 1400 out.append_uleb128(_flags); 1401 out.append_uleb128(_address); 1402 } 1403 } else { 1404 // Node with no export info. 1405 uint32_t nodeSize = 0; 1406 out.append_byte(nodeSize); 1407 } 1408 // Add number of children. 1409 assert(_children.size() < 256); 1410 out.append_byte(_children.size()); 1411 // Append each child edge substring and node offset. 1412 for (TrieEdge &edge : _children) { 1413 out.append_string(edge._subString); 1414 out.append_uleb128(edge._child->_trieOffset); 1415 } 1416 } 1417 1418 void MachOFileLayout::buildExportTrie() { 1419 if (_file.exportInfo.empty()) 1420 return; 1421 1422 // For all temporary strings and objects used building trie. 1423 BumpPtrAllocator allocator; 1424 1425 // Build trie of all exported symbols. 1426 auto *rootNode = new (allocator) TrieNode(StringRef()); 1427 std::vector<TrieNode*> allNodes; 1428 allNodes.reserve(_file.exportInfo.size()*2); 1429 allNodes.push_back(rootNode); 1430 for (const Export& entry : _file.exportInfo) { 1431 rootNode->addSymbol(entry, allocator, allNodes); 1432 } 1433 1434 std::vector<TrieNode*> orderedNodes; 1435 orderedNodes.reserve(allNodes.size()); 1436 1437 for (const Export& entry : _file.exportInfo) 1438 rootNode->addOrderedNodes(entry, orderedNodes); 1439 1440 // Assign each node in the vector an offset in the trie stream, iterating 1441 // until all uleb128 sizes have stabilized. 1442 bool more; 1443 do { 1444 uint32_t offset = 0; 1445 more = false; 1446 for (TrieNode* node : orderedNodes) { 1447 if (node->updateOffset(offset)) 1448 more = true; 1449 } 1450 } while (more); 1451 1452 // Serialize trie to ByteBuffer. 1453 for (TrieNode* node : orderedNodes) { 1454 node->appendToByteBuffer(_exportTrie); 1455 } 1456 _exportTrie.align(_is64 ? 8 : 4); 1457 } 1458 1459 void MachOFileLayout::computeSymbolTableSizes() { 1460 // MachO symbol tables have three ranges: locals, globals, and undefines 1461 const size_t nlistSize = (_is64 ? sizeof(nlist_64) : sizeof(nlist)); 1462 _symbolTableSize = nlistSize * (_file.stabsSymbols.size() 1463 + _file.localSymbols.size() 1464 + _file.globalSymbols.size() 1465 + _file.undefinedSymbols.size()); 1466 // Always reserve 1-byte for the empty string and 1-byte for its terminator. 1467 _symbolStringPoolSize = 2; 1468 for (const Symbol &sym : _file.stabsSymbols) { 1469 _symbolStringPoolSize += (sym.name.size()+1); 1470 } 1471 for (const Symbol &sym : _file.localSymbols) { 1472 _symbolStringPoolSize += (sym.name.size()+1); 1473 } 1474 for (const Symbol &sym : _file.globalSymbols) { 1475 _symbolStringPoolSize += (sym.name.size()+1); 1476 } 1477 for (const Symbol &sym : _file.undefinedSymbols) { 1478 _symbolStringPoolSize += (sym.name.size()+1); 1479 } 1480 _symbolTableLocalsStartIndex = 0; 1481 _symbolTableGlobalsStartIndex = _file.stabsSymbols.size() + 1482 _file.localSymbols.size(); 1483 _symbolTableUndefinesStartIndex = _symbolTableGlobalsStartIndex 1484 + _file.globalSymbols.size(); 1485 1486 _indirectSymbolTableCount = 0; 1487 for (const Section § : _file.sections) { 1488 _indirectSymbolTableCount += sect.indirectSymbols.size(); 1489 } 1490 } 1491 1492 void MachOFileLayout::computeFunctionStartsSize() { 1493 _functionStartsSize = _file.functionStarts.size(); 1494 } 1495 1496 void MachOFileLayout::computeDataInCodeSize() { 1497 _dataInCodeSize = _file.dataInCode.size() * sizeof(data_in_code_entry); 1498 } 1499 1500 void MachOFileLayout::writeLinkEditContent() { 1501 if (_file.fileType == llvm::MachO::MH_OBJECT) { 1502 writeRelocations(); 1503 writeFunctionStartsInfo(); 1504 writeDataInCodeInfo(); 1505 writeSymbolTable(); 1506 } else { 1507 writeRebaseInfo(); 1508 writeBindingInfo(); 1509 writeLazyBindingInfo(); 1510 // TODO: add weak binding info 1511 writeExportInfo(); 1512 writeFunctionStartsInfo(); 1513 writeDataInCodeInfo(); 1514 writeSymbolTable(); 1515 } 1516 } 1517 1518 llvm::Error MachOFileLayout::writeBinary(StringRef path) { 1519 // Check for pending error from constructor. 1520 if (_ec) 1521 return llvm::errorCodeToError(_ec); 1522 // Create FileOutputBuffer with calculated size. 1523 unsigned flags = 0; 1524 if (_file.fileType != llvm::MachO::MH_OBJECT) 1525 flags = llvm::FileOutputBuffer::F_executable; 1526 Expected<std::unique_ptr<llvm::FileOutputBuffer>> fobOrErr = 1527 llvm::FileOutputBuffer::create(path, size(), flags); 1528 if (Error E = fobOrErr.takeError()) 1529 return E; 1530 std::unique_ptr<llvm::FileOutputBuffer> &fob = *fobOrErr; 1531 // Write content. 1532 _buffer = fob->getBufferStart(); 1533 writeMachHeader(); 1534 if (auto ec = writeLoadCommands()) 1535 return ec; 1536 writeSectionContent(); 1537 writeLinkEditContent(); 1538 if (Error E = fob->commit()) 1539 return E; 1540 1541 return llvm::Error::success(); 1542 } 1543 1544 /// Takes in-memory normalized view and writes a mach-o object file. 1545 llvm::Error writeBinary(const NormalizedFile &file, StringRef path) { 1546 MachOFileLayout layout(file); 1547 return layout.writeBinary(path); 1548 } 1549 1550 } // namespace normalized 1551 } // namespace mach_o 1552 } // namespace lld 1553