1 //===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp ---------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 /// 10 /// \file For mach-o object files, this implementation converts normalized 11 /// mach-o in memory to mach-o binary on disk. 12 /// 13 /// +---------------+ 14 /// | binary mach-o | 15 /// +---------------+ 16 /// ^ 17 /// | 18 /// | 19 /// +------------+ 20 /// | normalized | 21 /// +------------+ 22 23 #include "MachONormalizedFile.h" 24 #include "MachONormalizedFileBinaryUtils.h" 25 #include "lld/Common/LLVM.h" 26 #include "lld/Core/Error.h" 27 #include "llvm/ADT/SmallString.h" 28 #include "llvm/ADT/SmallVector.h" 29 #include "llvm/ADT/StringRef.h" 30 #include "llvm/ADT/ilist.h" 31 #include "llvm/ADT/ilist_node.h" 32 #include "llvm/BinaryFormat/MachO.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/Errc.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Support/FileOutputBuffer.h" 38 #include "llvm/Support/Format.h" 39 #include "llvm/Support/Host.h" 40 #include "llvm/Support/MemoryBuffer.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include <functional> 43 #include <list> 44 #include <map> 45 #include <system_error> 46 47 using namespace llvm::MachO; 48 49 namespace lld { 50 namespace mach_o { 51 namespace normalized { 52 53 struct TrieNode; // Forward declaration. 54 55 struct TrieEdge : public llvm::ilist_node<TrieEdge> { 56 TrieEdge(StringRef s, TrieNode *node) : _subString(s), _child(node) {} 57 58 StringRef _subString; 59 struct TrieNode *_child; 60 }; 61 62 } // namespace normalized 63 } // namespace mach_o 64 } // namespace lld 65 66 67 namespace llvm { 68 using lld::mach_o::normalized::TrieEdge; 69 template <> 70 struct ilist_alloc_traits<TrieEdge> : ilist_noalloc_traits<TrieEdge> {}; 71 } // namespace llvm 72 73 74 namespace lld { 75 namespace mach_o { 76 namespace normalized { 77 78 struct TrieNode { 79 typedef llvm::ilist<TrieEdge> TrieEdgeList; 80 81 TrieNode(StringRef s) 82 : _cummulativeString(s), _address(0), _flags(0), _other(0), 83 _trieOffset(0), _hasExportInfo(false) {} 84 ~TrieNode() = default; 85 86 void addSymbol(const Export &entry, BumpPtrAllocator &allocator, 87 std::vector<TrieNode *> &allNodes); 88 89 void addOrderedNodes(const Export &entry, 90 std::vector<TrieNode *> &allNodes); 91 bool updateOffset(uint32_t &offset); 92 void appendToByteBuffer(ByteBuffer &out); 93 94 private: 95 StringRef _cummulativeString; 96 TrieEdgeList _children; 97 uint64_t _address; 98 uint64_t _flags; 99 uint64_t _other; 100 StringRef _importedName; 101 uint32_t _trieOffset; 102 bool _hasExportInfo; 103 bool _ordered = false; 104 }; 105 106 /// Utility class for writing a mach-o binary file given an in-memory 107 /// normalized file. 108 class MachOFileLayout { 109 public: 110 /// All layout computation is done in the constructor. 111 MachOFileLayout(const NormalizedFile &file, bool alwaysIncludeFunctionStarts); 112 113 /// Returns the final file size as computed in the constructor. 114 size_t size() const; 115 116 // Returns size of the mach_header and load commands. 117 size_t headerAndLoadCommandsSize() const; 118 119 /// Writes the normalized file as a binary mach-o file to the specified 120 /// path. This does not have a stream interface because the generated 121 /// file may need the 'x' bit set. 122 llvm::Error writeBinary(StringRef path); 123 124 private: 125 uint32_t loadCommandsSize(uint32_t &count, 126 bool alwaysIncludeFunctionStarts); 127 void buildFileOffsets(); 128 void writeMachHeader(); 129 llvm::Error writeLoadCommands(); 130 void writeSectionContent(); 131 void writeRelocations(); 132 void writeSymbolTable(); 133 void writeRebaseInfo(); 134 void writeBindingInfo(); 135 void writeLazyBindingInfo(); 136 void writeExportInfo(); 137 void writeFunctionStartsInfo(); 138 void writeDataInCodeInfo(); 139 void writeLinkEditContent(); 140 void buildLinkEditInfo(); 141 void buildRebaseInfo(); 142 void buildBindInfo(); 143 void buildLazyBindInfo(); 144 void buildExportTrie(); 145 void computeFunctionStartsSize(); 146 void computeDataInCodeSize(); 147 void computeSymbolTableSizes(); 148 void buildSectionRelocations(); 149 void appendSymbols(const std::vector<Symbol> &symbols, 150 uint32_t &symOffset, uint32_t &strOffset); 151 uint32_t indirectSymbolIndex(const Section §, uint32_t &index); 152 uint32_t indirectSymbolElementSize(const Section §); 153 154 // For use as template parameter to load command methods. 155 struct MachO64Trait { 156 typedef llvm::MachO::segment_command_64 command; 157 typedef llvm::MachO::section_64 section; 158 enum { LC = llvm::MachO::LC_SEGMENT_64 }; 159 }; 160 161 // For use as template parameter to load command methods. 162 struct MachO32Trait { 163 typedef llvm::MachO::segment_command command; 164 typedef llvm::MachO::section section; 165 enum { LC = llvm::MachO::LC_SEGMENT }; 166 }; 167 168 template <typename T> 169 llvm::Error writeSingleSegmentLoadCommand(uint8_t *&lc); 170 template <typename T> llvm::Error writeSegmentLoadCommands(uint8_t *&lc); 171 172 uint32_t pointerAlign(uint32_t value); 173 static StringRef dyldPath(); 174 175 struct SegExtraInfo { 176 uint32_t fileOffset; 177 uint32_t fileSize; 178 std::vector<const Section*> sections; 179 }; 180 typedef std::map<const Segment*, SegExtraInfo> SegMap; 181 struct SectionExtraInfo { 182 uint32_t fileOffset; 183 }; 184 typedef std::map<const Section*, SectionExtraInfo> SectionMap; 185 186 const NormalizedFile &_file; 187 std::error_code _ec; 188 uint8_t *_buffer; 189 const bool _is64; 190 const bool _swap; 191 const bool _bigEndianArch; 192 uint64_t _seg1addr; 193 uint32_t _startOfLoadCommands; 194 uint32_t _countOfLoadCommands; 195 uint32_t _endOfLoadCommands; 196 uint32_t _startOfRelocations; 197 uint32_t _startOfFunctionStarts; 198 uint32_t _startOfDataInCode; 199 uint32_t _startOfSymbols; 200 uint32_t _startOfIndirectSymbols; 201 uint32_t _startOfSymbolStrings; 202 uint32_t _endOfSymbolStrings; 203 uint32_t _symbolTableLocalsStartIndex; 204 uint32_t _symbolTableGlobalsStartIndex; 205 uint32_t _symbolTableUndefinesStartIndex; 206 uint32_t _symbolStringPoolSize; 207 uint32_t _symbolTableSize; 208 uint32_t _functionStartsSize; 209 uint32_t _dataInCodeSize; 210 uint32_t _indirectSymbolTableCount; 211 // Used in object file creation only 212 uint32_t _startOfSectionsContent; 213 uint32_t _endOfSectionsContent; 214 // Used in final linked image only 215 uint32_t _startOfLinkEdit; 216 uint32_t _startOfRebaseInfo; 217 uint32_t _endOfRebaseInfo; 218 uint32_t _startOfBindingInfo; 219 uint32_t _endOfBindingInfo; 220 uint32_t _startOfLazyBindingInfo; 221 uint32_t _endOfLazyBindingInfo; 222 uint32_t _startOfExportTrie; 223 uint32_t _endOfExportTrie; 224 uint32_t _endOfLinkEdit; 225 uint64_t _addressOfLinkEdit; 226 SegMap _segInfo; 227 SectionMap _sectInfo; 228 ByteBuffer _rebaseInfo; 229 ByteBuffer _bindingInfo; 230 ByteBuffer _lazyBindingInfo; 231 ByteBuffer _weakBindingInfo; 232 ByteBuffer _exportTrie; 233 }; 234 235 size_t headerAndLoadCommandsSize(const NormalizedFile &file, 236 bool includeFunctionStarts) { 237 MachOFileLayout layout(file, includeFunctionStarts); 238 return layout.headerAndLoadCommandsSize(); 239 } 240 241 StringRef MachOFileLayout::dyldPath() { 242 return "/usr/lib/dyld"; 243 } 244 245 uint32_t MachOFileLayout::pointerAlign(uint32_t value) { 246 return llvm::alignTo(value, _is64 ? 8 : 4); 247 } 248 249 250 size_t MachOFileLayout::headerAndLoadCommandsSize() const { 251 return _endOfLoadCommands; 252 } 253 254 MachOFileLayout::MachOFileLayout(const NormalizedFile &file, 255 bool alwaysIncludeFunctionStarts) 256 : _file(file), 257 _is64(MachOLinkingContext::is64Bit(file.arch)), 258 _swap(!MachOLinkingContext::isHostEndian(file.arch)), 259 _bigEndianArch(MachOLinkingContext::isBigEndian(file.arch)), 260 _seg1addr(INT64_MAX) { 261 _startOfLoadCommands = _is64 ? sizeof(mach_header_64) : sizeof(mach_header); 262 const size_t segCommandBaseSize = 263 (_is64 ? sizeof(segment_command_64) : sizeof(segment_command)); 264 const size_t sectsSize = (_is64 ? sizeof(section_64) : sizeof(section)); 265 if (file.fileType == llvm::MachO::MH_OBJECT) { 266 // object files have just one segment load command containing all sections 267 _endOfLoadCommands = _startOfLoadCommands 268 + segCommandBaseSize 269 + file.sections.size() * sectsSize 270 + sizeof(symtab_command); 271 _countOfLoadCommands = 2; 272 if (file.hasMinVersionLoadCommand) { 273 _endOfLoadCommands += sizeof(version_min_command); 274 _countOfLoadCommands++; 275 } 276 if (!_file.functionStarts.empty() || alwaysIncludeFunctionStarts) { 277 _endOfLoadCommands += sizeof(linkedit_data_command); 278 _countOfLoadCommands++; 279 } 280 if (_file.generateDataInCodeLoadCommand) { 281 _endOfLoadCommands += sizeof(linkedit_data_command); 282 _countOfLoadCommands++; 283 } 284 // Assign file offsets to each section. 285 _startOfSectionsContent = _endOfLoadCommands; 286 unsigned relocCount = 0; 287 uint64_t offset = _startOfSectionsContent; 288 for (const Section § : file.sections) { 289 if (isZeroFillSection(sect.type)) 290 _sectInfo[§].fileOffset = 0; 291 else { 292 offset = llvm::alignTo(offset, sect.alignment); 293 _sectInfo[§].fileOffset = offset; 294 offset += sect.content.size(); 295 } 296 relocCount += sect.relocations.size(); 297 } 298 _endOfSectionsContent = offset; 299 300 computeSymbolTableSizes(); 301 computeFunctionStartsSize(); 302 computeDataInCodeSize(); 303 304 // Align start of relocations. 305 _startOfRelocations = pointerAlign(_endOfSectionsContent); 306 _startOfFunctionStarts = _startOfRelocations + relocCount * 8; 307 _startOfDataInCode = _startOfFunctionStarts + _functionStartsSize; 308 _startOfSymbols = _startOfDataInCode + _dataInCodeSize; 309 // Add Indirect symbol table. 310 _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize; 311 // Align start of symbol table and symbol strings. 312 _startOfSymbolStrings = _startOfIndirectSymbols 313 + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t)); 314 _endOfSymbolStrings = _startOfSymbolStrings 315 + pointerAlign(_symbolStringPoolSize); 316 _endOfLinkEdit = _endOfSymbolStrings; 317 DEBUG_WITH_TYPE("MachOFileLayout", 318 llvm::dbgs() << "MachOFileLayout()\n" 319 << " startOfLoadCommands=" << _startOfLoadCommands << "\n" 320 << " countOfLoadCommands=" << _countOfLoadCommands << "\n" 321 << " endOfLoadCommands=" << _endOfLoadCommands << "\n" 322 << " startOfRelocations=" << _startOfRelocations << "\n" 323 << " startOfSymbols=" << _startOfSymbols << "\n" 324 << " startOfSymbolStrings=" << _startOfSymbolStrings << "\n" 325 << " endOfSymbolStrings=" << _endOfSymbolStrings << "\n" 326 << " startOfSectionsContent=" << _startOfSectionsContent << "\n" 327 << " endOfSectionsContent=" << _endOfSectionsContent << "\n"); 328 } else { 329 // Final linked images have one load command per segment. 330 _endOfLoadCommands = _startOfLoadCommands 331 + loadCommandsSize(_countOfLoadCommands, 332 alwaysIncludeFunctionStarts); 333 334 // Assign section file offsets. 335 buildFileOffsets(); 336 buildLinkEditInfo(); 337 338 // LINKEDIT of final linked images has in order: 339 // rebase info, binding info, lazy binding info, weak binding info, 340 // data-in-code, symbol table, indirect symbol table, symbol table strings. 341 _startOfRebaseInfo = _startOfLinkEdit; 342 _endOfRebaseInfo = _startOfRebaseInfo + _rebaseInfo.size(); 343 _startOfBindingInfo = _endOfRebaseInfo; 344 _endOfBindingInfo = _startOfBindingInfo + _bindingInfo.size(); 345 _startOfLazyBindingInfo = _endOfBindingInfo; 346 _endOfLazyBindingInfo = _startOfLazyBindingInfo + _lazyBindingInfo.size(); 347 _startOfExportTrie = _endOfLazyBindingInfo; 348 _endOfExportTrie = _startOfExportTrie + _exportTrie.size(); 349 _startOfFunctionStarts = _endOfExportTrie; 350 _startOfDataInCode = _startOfFunctionStarts + _functionStartsSize; 351 _startOfSymbols = _startOfDataInCode + _dataInCodeSize; 352 _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize; 353 _startOfSymbolStrings = _startOfIndirectSymbols 354 + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t)); 355 _endOfSymbolStrings = _startOfSymbolStrings 356 + pointerAlign(_symbolStringPoolSize); 357 _endOfLinkEdit = _endOfSymbolStrings; 358 DEBUG_WITH_TYPE("MachOFileLayout", 359 llvm::dbgs() << "MachOFileLayout()\n" 360 << " startOfLoadCommands=" << _startOfLoadCommands << "\n" 361 << " countOfLoadCommands=" << _countOfLoadCommands << "\n" 362 << " endOfLoadCommands=" << _endOfLoadCommands << "\n" 363 << " startOfLinkEdit=" << _startOfLinkEdit << "\n" 364 << " startOfRebaseInfo=" << _startOfRebaseInfo << "\n" 365 << " endOfRebaseInfo=" << _endOfRebaseInfo << "\n" 366 << " startOfBindingInfo=" << _startOfBindingInfo << "\n" 367 << " endOfBindingInfo=" << _endOfBindingInfo << "\n" 368 << " startOfLazyBindingInfo=" << _startOfLazyBindingInfo << "\n" 369 << " endOfLazyBindingInfo=" << _endOfLazyBindingInfo << "\n" 370 << " startOfExportTrie=" << _startOfExportTrie << "\n" 371 << " endOfExportTrie=" << _endOfExportTrie << "\n" 372 << " startOfFunctionStarts=" << _startOfFunctionStarts << "\n" 373 << " startOfDataInCode=" << _startOfDataInCode << "\n" 374 << " startOfSymbols=" << _startOfSymbols << "\n" 375 << " startOfSymbolStrings=" << _startOfSymbolStrings << "\n" 376 << " endOfSymbolStrings=" << _endOfSymbolStrings << "\n" 377 << " addressOfLinkEdit=" << _addressOfLinkEdit << "\n"); 378 } 379 } 380 381 uint32_t MachOFileLayout::loadCommandsSize(uint32_t &count, 382 bool alwaysIncludeFunctionStarts) { 383 uint32_t size = 0; 384 count = 0; 385 386 const size_t segCommandSize = 387 (_is64 ? sizeof(segment_command_64) : sizeof(segment_command)); 388 const size_t sectionSize = (_is64 ? sizeof(section_64) : sizeof(section)); 389 390 // Add LC_SEGMENT for each segment. 391 size += _file.segments.size() * segCommandSize; 392 count += _file.segments.size(); 393 // Add section record for each section. 394 size += _file.sections.size() * sectionSize; 395 396 // If creating a dylib, add LC_ID_DYLIB. 397 if (_file.fileType == llvm::MachO::MH_DYLIB) { 398 size += sizeof(dylib_command) + pointerAlign(_file.installName.size() + 1); 399 ++count; 400 } 401 402 // Add LC_DYLD_INFO 403 size += sizeof(dyld_info_command); 404 ++count; 405 406 // Add LC_SYMTAB 407 size += sizeof(symtab_command); 408 ++count; 409 410 // Add LC_DYSYMTAB 411 if (_file.fileType != llvm::MachO::MH_PRELOAD) { 412 size += sizeof(dysymtab_command); 413 ++count; 414 } 415 416 // If main executable add LC_LOAD_DYLINKER 417 if (_file.fileType == llvm::MachO::MH_EXECUTE) { 418 size += pointerAlign(sizeof(dylinker_command) + dyldPath().size()+1); 419 ++count; 420 } 421 422 // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_WATCHOS, 423 // LC_VERSION_MIN_TVOS 424 if (_file.hasMinVersionLoadCommand) { 425 size += sizeof(version_min_command); 426 ++count; 427 } 428 429 // Add LC_SOURCE_VERSION 430 size += sizeof(source_version_command); 431 ++count; 432 433 // If main executable add LC_MAIN 434 if (_file.fileType == llvm::MachO::MH_EXECUTE) { 435 size += sizeof(entry_point_command); 436 ++count; 437 } 438 439 // Add LC_LOAD_DYLIB for each dependent dylib. 440 for (const DependentDylib &dep : _file.dependentDylibs) { 441 size += sizeof(dylib_command) + pointerAlign(dep.path.size()+1); 442 ++count; 443 } 444 445 // Add LC_RPATH 446 for (const StringRef &path : _file.rpaths) { 447 size += pointerAlign(sizeof(rpath_command) + path.size() + 1); 448 ++count; 449 } 450 451 // Add LC_FUNCTION_STARTS if needed 452 if (!_file.functionStarts.empty() || alwaysIncludeFunctionStarts) { 453 size += sizeof(linkedit_data_command); 454 ++count; 455 } 456 457 // Add LC_DATA_IN_CODE if requested. Note, we do encode zero length entries. 458 // FIXME: Zero length entries is only to match ld64. Should we change this? 459 if (_file.generateDataInCodeLoadCommand) { 460 size += sizeof(linkedit_data_command); 461 ++count; 462 } 463 464 return size; 465 } 466 467 static bool overlaps(const Segment &s1, const Segment &s2) { 468 if (s2.address >= s1.address+s1.size) 469 return false; 470 if (s1.address >= s2.address+s2.size) 471 return false; 472 return true; 473 } 474 475 static bool overlaps(const Section &s1, const Section &s2) { 476 if (s2.address >= s1.address+s1.content.size()) 477 return false; 478 if (s1.address >= s2.address+s2.content.size()) 479 return false; 480 return true; 481 } 482 483 void MachOFileLayout::buildFileOffsets() { 484 // Verify no segments overlap 485 for (const Segment &sg1 : _file.segments) { 486 for (const Segment &sg2 : _file.segments) { 487 if (&sg1 == &sg2) 488 continue; 489 if (overlaps(sg1,sg2)) { 490 _ec = make_error_code(llvm::errc::executable_format_error); 491 return; 492 } 493 } 494 } 495 496 // Verify no sections overlap 497 for (const Section &s1 : _file.sections) { 498 for (const Section &s2 : _file.sections) { 499 if (&s1 == &s2) 500 continue; 501 if (overlaps(s1,s2)) { 502 _ec = make_error_code(llvm::errc::executable_format_error); 503 return; 504 } 505 } 506 } 507 508 // Build side table of extra info about segments and sections. 509 SegExtraInfo t; 510 t.fileOffset = 0; 511 for (const Segment &sg : _file.segments) { 512 _segInfo[&sg] = t; 513 } 514 SectionExtraInfo t2; 515 t2.fileOffset = 0; 516 // Assign sections to segments. 517 for (const Section &s : _file.sections) { 518 _sectInfo[&s] = t2; 519 bool foundSegment = false; 520 for (const Segment &sg : _file.segments) { 521 if (sg.name.equals(s.segmentName)) { 522 if ((s.address >= sg.address) 523 && (s.address+s.content.size() <= sg.address+sg.size)) { 524 _segInfo[&sg].sections.push_back(&s); 525 foundSegment = true; 526 break; 527 } 528 } 529 } 530 if (!foundSegment) { 531 _ec = make_error_code(llvm::errc::executable_format_error); 532 return; 533 } 534 } 535 536 // Assign file offsets. 537 uint32_t fileOffset = 0; 538 DEBUG_WITH_TYPE("MachOFileLayout", 539 llvm::dbgs() << "buildFileOffsets()\n"); 540 for (const Segment &sg : _file.segments) { 541 _segInfo[&sg].fileOffset = fileOffset; 542 if ((_seg1addr == INT64_MAX) && sg.init_access) 543 _seg1addr = sg.address; 544 DEBUG_WITH_TYPE("MachOFileLayout", 545 llvm::dbgs() << " segment=" << sg.name 546 << ", fileOffset=" << _segInfo[&sg].fileOffset << "\n"); 547 548 uint32_t segFileSize = 0; 549 // A segment that is not zero-fill must use a least one page of disk space. 550 if (sg.init_access) 551 segFileSize = _file.pageSize; 552 for (const Section *s : _segInfo[&sg].sections) { 553 uint32_t sectOffset = s->address - sg.address; 554 uint32_t sectFileSize = 555 isZeroFillSection(s->type) ? 0 : s->content.size(); 556 segFileSize = std::max(segFileSize, sectOffset + sectFileSize); 557 558 _sectInfo[s].fileOffset = _segInfo[&sg].fileOffset + sectOffset; 559 DEBUG_WITH_TYPE("MachOFileLayout", 560 llvm::dbgs() << " section=" << s->sectionName 561 << ", fileOffset=" << fileOffset << "\n"); 562 } 563 564 // round up all segments to page aligned, except __LINKEDIT 565 if (!sg.name.equals("__LINKEDIT")) { 566 _segInfo[&sg].fileSize = llvm::alignTo(segFileSize, _file.pageSize); 567 fileOffset = llvm::alignTo(fileOffset + segFileSize, _file.pageSize); 568 } 569 _addressOfLinkEdit = sg.address + sg.size; 570 } 571 _startOfLinkEdit = fileOffset; 572 } 573 574 size_t MachOFileLayout::size() const { 575 return _endOfSymbolStrings; 576 } 577 578 void MachOFileLayout::writeMachHeader() { 579 auto cpusubtype = MachOLinkingContext::cpuSubtypeFromArch(_file.arch); 580 // dynamic x86 executables on newer OS version should also set the 581 // CPU_SUBTYPE_LIB64 mask in the CPU subtype. 582 // FIXME: Check that this is a dynamic executable, not a static one. 583 if (_file.fileType == llvm::MachO::MH_EXECUTE && 584 cpusubtype == CPU_SUBTYPE_X86_64_ALL && 585 _file.os == MachOLinkingContext::OS::macOSX) { 586 uint32_t version; 587 bool failed = MachOLinkingContext::parsePackedVersion("10.5", version); 588 if (!failed && _file.minOSverson >= version) 589 cpusubtype |= CPU_SUBTYPE_LIB64; 590 } 591 592 mach_header *mh = reinterpret_cast<mach_header*>(_buffer); 593 mh->magic = _is64 ? llvm::MachO::MH_MAGIC_64 : llvm::MachO::MH_MAGIC; 594 mh->cputype = MachOLinkingContext::cpuTypeFromArch(_file.arch); 595 mh->cpusubtype = cpusubtype; 596 mh->filetype = _file.fileType; 597 mh->ncmds = _countOfLoadCommands; 598 mh->sizeofcmds = _endOfLoadCommands - _startOfLoadCommands; 599 mh->flags = _file.flags; 600 if (_swap) 601 swapStruct(*mh); 602 } 603 604 uint32_t MachOFileLayout::indirectSymbolIndex(const Section §, 605 uint32_t &index) { 606 if (sect.indirectSymbols.empty()) 607 return 0; 608 uint32_t result = index; 609 index += sect.indirectSymbols.size(); 610 return result; 611 } 612 613 uint32_t MachOFileLayout::indirectSymbolElementSize(const Section §) { 614 if (sect.indirectSymbols.empty()) 615 return 0; 616 if (sect.type != S_SYMBOL_STUBS) 617 return 0; 618 return sect.content.size() / sect.indirectSymbols.size(); 619 } 620 621 template <typename T> 622 llvm::Error MachOFileLayout::writeSingleSegmentLoadCommand(uint8_t *&lc) { 623 typename T::command* seg = reinterpret_cast<typename T::command*>(lc); 624 seg->cmd = T::LC; 625 seg->cmdsize = sizeof(typename T::command) 626 + _file.sections.size() * sizeof(typename T::section); 627 uint8_t *next = lc + seg->cmdsize; 628 memset(seg->segname, 0, 16); 629 seg->vmaddr = 0; 630 seg->vmsize = _file.sections.back().address 631 + _file.sections.back().content.size(); 632 seg->fileoff = _endOfLoadCommands; 633 seg->filesize = _sectInfo[&_file.sections.back()].fileOffset + 634 _file.sections.back().content.size() - 635 _sectInfo[&_file.sections.front()].fileOffset; 636 seg->maxprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; 637 seg->initprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; 638 seg->nsects = _file.sections.size(); 639 seg->flags = 0; 640 if (_swap) 641 swapStruct(*seg); 642 typename T::section *sout = reinterpret_cast<typename T::section*> 643 (lc+sizeof(typename T::command)); 644 uint32_t relOffset = _startOfRelocations; 645 uint32_t indirectSymRunningIndex = 0; 646 for (const Section &sin : _file.sections) { 647 setString16(sin.sectionName, sout->sectname); 648 setString16(sin.segmentName, sout->segname); 649 sout->addr = sin.address; 650 sout->size = sin.content.size(); 651 sout->offset = _sectInfo[&sin].fileOffset; 652 sout->align = llvm::Log2_32(sin.alignment); 653 sout->reloff = sin.relocations.empty() ? 0 : relOffset; 654 sout->nreloc = sin.relocations.size(); 655 sout->flags = sin.type | sin.attributes; 656 sout->reserved1 = indirectSymbolIndex(sin, indirectSymRunningIndex); 657 sout->reserved2 = indirectSymbolElementSize(sin); 658 relOffset += sin.relocations.size() * sizeof(any_relocation_info); 659 if (_swap) 660 swapStruct(*sout); 661 ++sout; 662 } 663 lc = next; 664 return llvm::Error::success(); 665 } 666 667 template <typename T> 668 llvm::Error MachOFileLayout::writeSegmentLoadCommands(uint8_t *&lc) { 669 uint32_t indirectSymRunningIndex = 0; 670 for (const Segment &seg : _file.segments) { 671 // Link edit has no sections and a custom range of address, so handle it 672 // specially. 673 SegExtraInfo &segInfo = _segInfo[&seg]; 674 if (seg.name.equals("__LINKEDIT")) { 675 size_t linkeditSize = _endOfLinkEdit - _startOfLinkEdit; 676 typename T::command* cmd = reinterpret_cast<typename T::command*>(lc); 677 cmd->cmd = T::LC; 678 cmd->cmdsize = sizeof(typename T::command); 679 uint8_t *next = lc + cmd->cmdsize; 680 setString16("__LINKEDIT", cmd->segname); 681 cmd->vmaddr = _addressOfLinkEdit; 682 cmd->vmsize = llvm::alignTo(linkeditSize, _file.pageSize); 683 cmd->fileoff = _startOfLinkEdit; 684 cmd->filesize = linkeditSize; 685 cmd->initprot = seg.init_access; 686 cmd->maxprot = seg.max_access; 687 cmd->nsects = 0; 688 cmd->flags = 0; 689 if (_swap) 690 swapStruct(*cmd); 691 lc = next; 692 continue; 693 } 694 // Write segment command with trailing sections. 695 typename T::command* cmd = reinterpret_cast<typename T::command*>(lc); 696 cmd->cmd = T::LC; 697 cmd->cmdsize = sizeof(typename T::command) 698 + segInfo.sections.size() * sizeof(typename T::section); 699 uint8_t *next = lc + cmd->cmdsize; 700 setString16(seg.name, cmd->segname); 701 cmd->vmaddr = seg.address; 702 cmd->vmsize = seg.size; 703 cmd->fileoff = segInfo.fileOffset; 704 cmd->filesize = segInfo.fileSize; 705 cmd->initprot = seg.init_access; 706 cmd->maxprot = seg.max_access; 707 cmd->nsects = segInfo.sections.size(); 708 cmd->flags = 0; 709 if (_swap) 710 swapStruct(*cmd); 711 typename T::section *sect = reinterpret_cast<typename T::section*> 712 (lc+sizeof(typename T::command)); 713 for (const Section *section : segInfo.sections) { 714 setString16(section->sectionName, sect->sectname); 715 setString16(section->segmentName, sect->segname); 716 sect->addr = section->address; 717 sect->size = section->content.size(); 718 if (isZeroFillSection(section->type)) 719 sect->offset = 0; 720 else 721 sect->offset = section->address - seg.address + segInfo.fileOffset; 722 sect->align = llvm::Log2_32(section->alignment); 723 sect->reloff = 0; 724 sect->nreloc = 0; 725 sect->flags = section->type | section->attributes; 726 sect->reserved1 = indirectSymbolIndex(*section, indirectSymRunningIndex); 727 sect->reserved2 = indirectSymbolElementSize(*section); 728 if (_swap) 729 swapStruct(*sect); 730 ++sect; 731 } 732 lc = reinterpret_cast<uint8_t*>(next); 733 } 734 return llvm::Error::success(); 735 } 736 737 static void writeVersionMinLoadCommand(const NormalizedFile &_file, 738 bool _swap, 739 uint8_t *&lc) { 740 if (!_file.hasMinVersionLoadCommand) 741 return; 742 version_min_command *vm = reinterpret_cast<version_min_command*>(lc); 743 switch (_file.os) { 744 case MachOLinkingContext::OS::unknown: 745 vm->cmd = _file.minOSVersionKind; 746 vm->cmdsize = sizeof(version_min_command); 747 vm->version = _file.minOSverson; 748 vm->sdk = 0; 749 break; 750 case MachOLinkingContext::OS::macOSX: 751 vm->cmd = LC_VERSION_MIN_MACOSX; 752 vm->cmdsize = sizeof(version_min_command); 753 vm->version = _file.minOSverson; 754 vm->sdk = _file.sdkVersion; 755 break; 756 case MachOLinkingContext::OS::iOS: 757 case MachOLinkingContext::OS::iOS_simulator: 758 vm->cmd = LC_VERSION_MIN_IPHONEOS; 759 vm->cmdsize = sizeof(version_min_command); 760 vm->version = _file.minOSverson; 761 vm->sdk = _file.sdkVersion; 762 break; 763 } 764 if (_swap) 765 swapStruct(*vm); 766 lc += sizeof(version_min_command); 767 } 768 769 llvm::Error MachOFileLayout::writeLoadCommands() { 770 uint8_t *lc = &_buffer[_startOfLoadCommands]; 771 if (_file.fileType == llvm::MachO::MH_OBJECT) { 772 // Object files have one unnamed segment which holds all sections. 773 if (_is64) { 774 if (auto ec = writeSingleSegmentLoadCommand<MachO64Trait>(lc)) 775 return ec; 776 } else { 777 if (auto ec = writeSingleSegmentLoadCommand<MachO32Trait>(lc)) 778 return ec; 779 } 780 // Add LC_SYMTAB with symbol table info 781 symtab_command* st = reinterpret_cast<symtab_command*>(lc); 782 st->cmd = LC_SYMTAB; 783 st->cmdsize = sizeof(symtab_command); 784 st->symoff = _startOfSymbols; 785 st->nsyms = _file.stabsSymbols.size() + _file.localSymbols.size() + 786 _file.globalSymbols.size() + _file.undefinedSymbols.size(); 787 st->stroff = _startOfSymbolStrings; 788 st->strsize = _endOfSymbolStrings - _startOfSymbolStrings; 789 if (_swap) 790 swapStruct(*st); 791 lc += sizeof(symtab_command); 792 793 // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, 794 // LC_VERSION_MIN_WATCHOS, LC_VERSION_MIN_TVOS 795 writeVersionMinLoadCommand(_file, _swap, lc); 796 797 // Add LC_FUNCTION_STARTS if needed. 798 if (_functionStartsSize != 0) { 799 linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc); 800 dl->cmd = LC_FUNCTION_STARTS; 801 dl->cmdsize = sizeof(linkedit_data_command); 802 dl->dataoff = _startOfFunctionStarts; 803 dl->datasize = _functionStartsSize; 804 if (_swap) 805 swapStruct(*dl); 806 lc += sizeof(linkedit_data_command); 807 } 808 809 // Add LC_DATA_IN_CODE if requested. 810 if (_file.generateDataInCodeLoadCommand) { 811 linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc); 812 dl->cmd = LC_DATA_IN_CODE; 813 dl->cmdsize = sizeof(linkedit_data_command); 814 dl->dataoff = _startOfDataInCode; 815 dl->datasize = _dataInCodeSize; 816 if (_swap) 817 swapStruct(*dl); 818 lc += sizeof(linkedit_data_command); 819 } 820 } else { 821 // Final linked images have sections under segments. 822 if (_is64) { 823 if (auto ec = writeSegmentLoadCommands<MachO64Trait>(lc)) 824 return ec; 825 } else { 826 if (auto ec = writeSegmentLoadCommands<MachO32Trait>(lc)) 827 return ec; 828 } 829 830 // Add LC_ID_DYLIB command for dynamic libraries. 831 if (_file.fileType == llvm::MachO::MH_DYLIB) { 832 dylib_command *dc = reinterpret_cast<dylib_command*>(lc); 833 StringRef path = _file.installName; 834 uint32_t size = sizeof(dylib_command) + pointerAlign(path.size() + 1); 835 dc->cmd = LC_ID_DYLIB; 836 dc->cmdsize = size; 837 dc->dylib.name = sizeof(dylib_command); // offset 838 // needs to be some constant value different than the one in LC_LOAD_DYLIB 839 dc->dylib.timestamp = 1; 840 dc->dylib.current_version = _file.currentVersion; 841 dc->dylib.compatibility_version = _file.compatVersion; 842 if (_swap) 843 swapStruct(*dc); 844 memcpy(lc + sizeof(dylib_command), path.begin(), path.size()); 845 lc[sizeof(dylib_command) + path.size()] = '\0'; 846 lc += size; 847 } 848 849 // Add LC_DYLD_INFO_ONLY. 850 dyld_info_command* di = reinterpret_cast<dyld_info_command*>(lc); 851 di->cmd = LC_DYLD_INFO_ONLY; 852 di->cmdsize = sizeof(dyld_info_command); 853 di->rebase_off = _rebaseInfo.size() ? _startOfRebaseInfo : 0; 854 di->rebase_size = _rebaseInfo.size(); 855 di->bind_off = _bindingInfo.size() ? _startOfBindingInfo : 0; 856 di->bind_size = _bindingInfo.size(); 857 di->weak_bind_off = 0; 858 di->weak_bind_size = 0; 859 di->lazy_bind_off = _lazyBindingInfo.size() ? _startOfLazyBindingInfo : 0; 860 di->lazy_bind_size = _lazyBindingInfo.size(); 861 di->export_off = _exportTrie.size() ? _startOfExportTrie : 0; 862 di->export_size = _exportTrie.size(); 863 if (_swap) 864 swapStruct(*di); 865 lc += sizeof(dyld_info_command); 866 867 // Add LC_SYMTAB with symbol table info. 868 symtab_command* st = reinterpret_cast<symtab_command*>(lc); 869 st->cmd = LC_SYMTAB; 870 st->cmdsize = sizeof(symtab_command); 871 st->symoff = _startOfSymbols; 872 st->nsyms = _file.stabsSymbols.size() + _file.localSymbols.size() + 873 _file.globalSymbols.size() + _file.undefinedSymbols.size(); 874 st->stroff = _startOfSymbolStrings; 875 st->strsize = _endOfSymbolStrings - _startOfSymbolStrings; 876 if (_swap) 877 swapStruct(*st); 878 lc += sizeof(symtab_command); 879 880 // Add LC_DYSYMTAB 881 if (_file.fileType != llvm::MachO::MH_PRELOAD) { 882 dysymtab_command* dst = reinterpret_cast<dysymtab_command*>(lc); 883 dst->cmd = LC_DYSYMTAB; 884 dst->cmdsize = sizeof(dysymtab_command); 885 dst->ilocalsym = _symbolTableLocalsStartIndex; 886 dst->nlocalsym = _file.stabsSymbols.size() + 887 _file.localSymbols.size(); 888 dst->iextdefsym = _symbolTableGlobalsStartIndex; 889 dst->nextdefsym = _file.globalSymbols.size(); 890 dst->iundefsym = _symbolTableUndefinesStartIndex; 891 dst->nundefsym = _file.undefinedSymbols.size(); 892 dst->tocoff = 0; 893 dst->ntoc = 0; 894 dst->modtaboff = 0; 895 dst->nmodtab = 0; 896 dst->extrefsymoff = 0; 897 dst->nextrefsyms = 0; 898 dst->indirectsymoff = _startOfIndirectSymbols; 899 dst->nindirectsyms = _indirectSymbolTableCount; 900 dst->extreloff = 0; 901 dst->nextrel = 0; 902 dst->locreloff = 0; 903 dst->nlocrel = 0; 904 if (_swap) 905 swapStruct(*dst); 906 lc += sizeof(dysymtab_command); 907 } 908 909 // If main executable, add LC_LOAD_DYLINKER 910 if (_file.fileType == llvm::MachO::MH_EXECUTE) { 911 // Build LC_LOAD_DYLINKER load command. 912 uint32_t size=pointerAlign(sizeof(dylinker_command)+dyldPath().size()+1); 913 dylinker_command* dl = reinterpret_cast<dylinker_command*>(lc); 914 dl->cmd = LC_LOAD_DYLINKER; 915 dl->cmdsize = size; 916 dl->name = sizeof(dylinker_command); // offset 917 if (_swap) 918 swapStruct(*dl); 919 memcpy(lc+sizeof(dylinker_command), dyldPath().data(), dyldPath().size()); 920 lc[sizeof(dylinker_command)+dyldPath().size()] = '\0'; 921 lc += size; 922 } 923 924 // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_WATCHOS, 925 // LC_VERSION_MIN_TVOS 926 writeVersionMinLoadCommand(_file, _swap, lc); 927 928 // Add LC_SOURCE_VERSION 929 { 930 // Note, using a temporary here to appease UB as we may not be aligned 931 // enough for a struct containing a uint64_t when emitting a 32-bit binary 932 source_version_command sv; 933 sv.cmd = LC_SOURCE_VERSION; 934 sv.cmdsize = sizeof(source_version_command); 935 sv.version = _file.sourceVersion; 936 if (_swap) 937 swapStruct(sv); 938 memcpy(lc, &sv, sizeof(source_version_command)); 939 lc += sizeof(source_version_command); 940 } 941 942 // If main executable, add LC_MAIN. 943 if (_file.fileType == llvm::MachO::MH_EXECUTE) { 944 // Build LC_MAIN load command. 945 // Note, using a temporary here to appease UB as we may not be aligned 946 // enough for a struct containing a uint64_t when emitting a 32-bit binary 947 entry_point_command ep; 948 ep.cmd = LC_MAIN; 949 ep.cmdsize = sizeof(entry_point_command); 950 ep.entryoff = _file.entryAddress - _seg1addr; 951 ep.stacksize = _file.stackSize; 952 if (_swap) 953 swapStruct(ep); 954 memcpy(lc, &ep, sizeof(entry_point_command)); 955 lc += sizeof(entry_point_command); 956 } 957 958 // Add LC_LOAD_DYLIB commands 959 for (const DependentDylib &dep : _file.dependentDylibs) { 960 dylib_command* dc = reinterpret_cast<dylib_command*>(lc); 961 uint32_t size = sizeof(dylib_command) + pointerAlign(dep.path.size()+1); 962 dc->cmd = dep.kind; 963 dc->cmdsize = size; 964 dc->dylib.name = sizeof(dylib_command); // offset 965 // needs to be some constant value different than the one in LC_ID_DYLIB 966 dc->dylib.timestamp = 2; 967 dc->dylib.current_version = dep.currentVersion; 968 dc->dylib.compatibility_version = dep.compatVersion; 969 if (_swap) 970 swapStruct(*dc); 971 memcpy(lc+sizeof(dylib_command), dep.path.begin(), dep.path.size()); 972 lc[sizeof(dylib_command)+dep.path.size()] = '\0'; 973 lc += size; 974 } 975 976 // Add LC_RPATH 977 for (const StringRef &path : _file.rpaths) { 978 rpath_command *rpc = reinterpret_cast<rpath_command *>(lc); 979 uint32_t size = pointerAlign(sizeof(rpath_command) + path.size() + 1); 980 rpc->cmd = LC_RPATH; 981 rpc->cmdsize = size; 982 rpc->path = sizeof(rpath_command); // offset 983 if (_swap) 984 swapStruct(*rpc); 985 memcpy(lc+sizeof(rpath_command), path.begin(), path.size()); 986 lc[sizeof(rpath_command)+path.size()] = '\0'; 987 lc += size; 988 } 989 990 // Add LC_FUNCTION_STARTS if needed. 991 if (_functionStartsSize != 0) { 992 linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc); 993 dl->cmd = LC_FUNCTION_STARTS; 994 dl->cmdsize = sizeof(linkedit_data_command); 995 dl->dataoff = _startOfFunctionStarts; 996 dl->datasize = _functionStartsSize; 997 if (_swap) 998 swapStruct(*dl); 999 lc += sizeof(linkedit_data_command); 1000 } 1001 1002 // Add LC_DATA_IN_CODE if requested. 1003 if (_file.generateDataInCodeLoadCommand) { 1004 linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc); 1005 dl->cmd = LC_DATA_IN_CODE; 1006 dl->cmdsize = sizeof(linkedit_data_command); 1007 dl->dataoff = _startOfDataInCode; 1008 dl->datasize = _dataInCodeSize; 1009 if (_swap) 1010 swapStruct(*dl); 1011 lc += sizeof(linkedit_data_command); 1012 } 1013 } 1014 assert(lc == &_buffer[_endOfLoadCommands]); 1015 return llvm::Error::success(); 1016 } 1017 1018 void MachOFileLayout::writeSectionContent() { 1019 for (const Section &s : _file.sections) { 1020 // Copy all section content to output buffer. 1021 if (isZeroFillSection(s.type)) 1022 continue; 1023 if (s.content.empty()) 1024 continue; 1025 uint32_t offset = _sectInfo[&s].fileOffset; 1026 assert(offset >= _endOfLoadCommands); 1027 uint8_t *p = &_buffer[offset]; 1028 memcpy(p, &s.content[0], s.content.size()); 1029 p += s.content.size(); 1030 } 1031 } 1032 1033 void MachOFileLayout::writeRelocations() { 1034 uint32_t relOffset = _startOfRelocations; 1035 for (Section sect : _file.sections) { 1036 for (Relocation r : sect.relocations) { 1037 any_relocation_info* rb = reinterpret_cast<any_relocation_info*>( 1038 &_buffer[relOffset]); 1039 *rb = packRelocation(r, _swap, _bigEndianArch); 1040 relOffset += sizeof(any_relocation_info); 1041 } 1042 } 1043 } 1044 1045 void MachOFileLayout::appendSymbols(const std::vector<Symbol> &symbols, 1046 uint32_t &symOffset, uint32_t &strOffset) { 1047 for (const Symbol &sym : symbols) { 1048 if (_is64) { 1049 nlist_64* nb = reinterpret_cast<nlist_64*>(&_buffer[symOffset]); 1050 nb->n_strx = strOffset - _startOfSymbolStrings; 1051 nb->n_type = sym.type | sym.scope; 1052 nb->n_sect = sym.sect; 1053 nb->n_desc = sym.desc; 1054 nb->n_value = sym.value; 1055 if (_swap) 1056 swapStruct(*nb); 1057 symOffset += sizeof(nlist_64); 1058 } else { 1059 nlist* nb = reinterpret_cast<nlist*>(&_buffer[symOffset]); 1060 nb->n_strx = strOffset - _startOfSymbolStrings; 1061 nb->n_type = sym.type | sym.scope; 1062 nb->n_sect = sym.sect; 1063 nb->n_desc = sym.desc; 1064 nb->n_value = sym.value; 1065 if (_swap) 1066 swapStruct(*nb); 1067 symOffset += sizeof(nlist); 1068 } 1069 memcpy(&_buffer[strOffset], sym.name.begin(), sym.name.size()); 1070 strOffset += sym.name.size(); 1071 _buffer[strOffset++] ='\0'; // Strings in table have nul terminator. 1072 } 1073 } 1074 1075 void MachOFileLayout::writeFunctionStartsInfo() { 1076 if (!_functionStartsSize) 1077 return; 1078 memcpy(&_buffer[_startOfFunctionStarts], _file.functionStarts.data(), 1079 _functionStartsSize); 1080 } 1081 1082 void MachOFileLayout::writeDataInCodeInfo() { 1083 uint32_t offset = _startOfDataInCode; 1084 for (const DataInCode &entry : _file.dataInCode) { 1085 data_in_code_entry *dst = reinterpret_cast<data_in_code_entry*>( 1086 &_buffer[offset]); 1087 dst->offset = entry.offset; 1088 dst->length = entry.length; 1089 dst->kind = entry.kind; 1090 if (_swap) 1091 swapStruct(*dst); 1092 offset += sizeof(data_in_code_entry); 1093 } 1094 } 1095 1096 void MachOFileLayout::writeSymbolTable() { 1097 // Write symbol table and symbol strings in parallel. 1098 uint32_t symOffset = _startOfSymbols; 1099 uint32_t strOffset = _startOfSymbolStrings; 1100 // Reserve n_strx offset of zero to mean no name. 1101 _buffer[strOffset++] = ' '; 1102 _buffer[strOffset++] = '\0'; 1103 appendSymbols(_file.stabsSymbols, symOffset, strOffset); 1104 appendSymbols(_file.localSymbols, symOffset, strOffset); 1105 appendSymbols(_file.globalSymbols, symOffset, strOffset); 1106 appendSymbols(_file.undefinedSymbols, symOffset, strOffset); 1107 // Write indirect symbol table array. 1108 uint32_t *indirects = reinterpret_cast<uint32_t*> 1109 (&_buffer[_startOfIndirectSymbols]); 1110 if (_file.fileType == llvm::MachO::MH_OBJECT) { 1111 // Object files have sections in same order as input normalized file. 1112 for (const Section §ion : _file.sections) { 1113 for (uint32_t index : section.indirectSymbols) { 1114 if (_swap) 1115 *indirects++ = llvm::sys::getSwappedBytes(index); 1116 else 1117 *indirects++ = index; 1118 } 1119 } 1120 } else { 1121 // Final linked images must sort sections from normalized file. 1122 for (const Segment &seg : _file.segments) { 1123 SegExtraInfo &segInfo = _segInfo[&seg]; 1124 for (const Section *section : segInfo.sections) { 1125 for (uint32_t index : section->indirectSymbols) { 1126 if (_swap) 1127 *indirects++ = llvm::sys::getSwappedBytes(index); 1128 else 1129 *indirects++ = index; 1130 } 1131 } 1132 } 1133 } 1134 } 1135 1136 void MachOFileLayout::writeRebaseInfo() { 1137 memcpy(&_buffer[_startOfRebaseInfo], _rebaseInfo.bytes(), _rebaseInfo.size()); 1138 } 1139 1140 void MachOFileLayout::writeBindingInfo() { 1141 memcpy(&_buffer[_startOfBindingInfo], 1142 _bindingInfo.bytes(), _bindingInfo.size()); 1143 } 1144 1145 void MachOFileLayout::writeLazyBindingInfo() { 1146 memcpy(&_buffer[_startOfLazyBindingInfo], 1147 _lazyBindingInfo.bytes(), _lazyBindingInfo.size()); 1148 } 1149 1150 void MachOFileLayout::writeExportInfo() { 1151 memcpy(&_buffer[_startOfExportTrie], _exportTrie.bytes(), _exportTrie.size()); 1152 } 1153 1154 void MachOFileLayout::buildLinkEditInfo() { 1155 buildRebaseInfo(); 1156 buildBindInfo(); 1157 buildLazyBindInfo(); 1158 buildExportTrie(); 1159 computeSymbolTableSizes(); 1160 computeFunctionStartsSize(); 1161 computeDataInCodeSize(); 1162 } 1163 1164 void MachOFileLayout::buildSectionRelocations() { 1165 1166 } 1167 1168 void MachOFileLayout::buildRebaseInfo() { 1169 // TODO: compress rebasing info. 1170 for (const RebaseLocation& entry : _file.rebasingInfo) { 1171 _rebaseInfo.append_byte(REBASE_OPCODE_SET_TYPE_IMM | entry.kind); 1172 _rebaseInfo.append_byte(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB 1173 | entry.segIndex); 1174 _rebaseInfo.append_uleb128(entry.segOffset); 1175 _rebaseInfo.append_uleb128(REBASE_OPCODE_DO_REBASE_IMM_TIMES | 1); 1176 } 1177 _rebaseInfo.append_byte(REBASE_OPCODE_DONE); 1178 _rebaseInfo.align(_is64 ? 8 : 4); 1179 } 1180 1181 void MachOFileLayout::buildBindInfo() { 1182 // TODO: compress bind info. 1183 uint64_t lastAddend = 0; 1184 int lastOrdinal = 0x80000000; 1185 StringRef lastSymbolName; 1186 BindType lastType = (BindType)0; 1187 Hex32 lastSegOffset = ~0U; 1188 uint8_t lastSegIndex = (uint8_t)~0U; 1189 for (const BindLocation& entry : _file.bindingInfo) { 1190 if (entry.ordinal != lastOrdinal) { 1191 if (entry.ordinal <= 0) 1192 _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | 1193 (entry.ordinal & BIND_IMMEDIATE_MASK)); 1194 else if (entry.ordinal <= BIND_IMMEDIATE_MASK) 1195 _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1196 entry.ordinal); 1197 else { 1198 _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); 1199 _bindingInfo.append_uleb128(entry.ordinal); 1200 } 1201 lastOrdinal = entry.ordinal; 1202 } 1203 1204 if (lastSymbolName != entry.symbolName) { 1205 _bindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); 1206 _bindingInfo.append_string(entry.symbolName); 1207 lastSymbolName = entry.symbolName; 1208 } 1209 1210 if (lastType != entry.kind) { 1211 _bindingInfo.append_byte(BIND_OPCODE_SET_TYPE_IMM | entry.kind); 1212 lastType = entry.kind; 1213 } 1214 1215 if (lastSegIndex != entry.segIndex || lastSegOffset != entry.segOffset) { 1216 _bindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB 1217 | entry.segIndex); 1218 _bindingInfo.append_uleb128(entry.segOffset); 1219 lastSegIndex = entry.segIndex; 1220 lastSegOffset = entry.segOffset; 1221 } 1222 if (entry.addend != lastAddend) { 1223 _bindingInfo.append_byte(BIND_OPCODE_SET_ADDEND_SLEB); 1224 _bindingInfo.append_sleb128(entry.addend); 1225 lastAddend = entry.addend; 1226 } 1227 _bindingInfo.append_byte(BIND_OPCODE_DO_BIND); 1228 } 1229 _bindingInfo.append_byte(BIND_OPCODE_DONE); 1230 _bindingInfo.align(_is64 ? 8 : 4); 1231 } 1232 1233 void MachOFileLayout::buildLazyBindInfo() { 1234 for (const BindLocation& entry : _file.lazyBindingInfo) { 1235 _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB 1236 | entry.segIndex); 1237 _lazyBindingInfo.append_uleb128(entry.segOffset); 1238 if (entry.ordinal <= 0) 1239 _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | 1240 (entry.ordinal & BIND_IMMEDIATE_MASK)); 1241 else if (entry.ordinal <= BIND_IMMEDIATE_MASK) 1242 _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1243 entry.ordinal); 1244 else { 1245 _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); 1246 _lazyBindingInfo.append_uleb128(entry.ordinal); 1247 } 1248 // FIXME: We need to | the opcode here with flags. 1249 _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); 1250 _lazyBindingInfo.append_string(entry.symbolName); 1251 _lazyBindingInfo.append_byte(BIND_OPCODE_DO_BIND); 1252 _lazyBindingInfo.append_byte(BIND_OPCODE_DONE); 1253 } 1254 _lazyBindingInfo.align(_is64 ? 8 : 4); 1255 } 1256 1257 void TrieNode::addSymbol(const Export& entry, 1258 BumpPtrAllocator &allocator, 1259 std::vector<TrieNode*> &allNodes) { 1260 StringRef partialStr = entry.name.drop_front(_cummulativeString.size()); 1261 for (TrieEdge &edge : _children) { 1262 StringRef edgeStr = edge._subString; 1263 if (partialStr.startswith(edgeStr)) { 1264 // Already have matching edge, go down that path. 1265 edge._child->addSymbol(entry, allocator, allNodes); 1266 return; 1267 } 1268 // See if string has commmon prefix with existing edge. 1269 for (int n=edgeStr.size()-1; n > 0; --n) { 1270 if (partialStr.substr(0, n).equals(edgeStr.substr(0, n))) { 1271 // Splice in new node: was A -> C, now A -> B -> C 1272 StringRef bNodeStr = edge._child->_cummulativeString; 1273 bNodeStr = bNodeStr.drop_back(edgeStr.size()-n).copy(allocator); 1274 auto *bNode = new (allocator) TrieNode(bNodeStr); 1275 allNodes.push_back(bNode); 1276 TrieNode* cNode = edge._child; 1277 StringRef abEdgeStr = edgeStr.substr(0,n).copy(allocator); 1278 StringRef bcEdgeStr = edgeStr.substr(n).copy(allocator); 1279 DEBUG_WITH_TYPE("trie-builder", llvm::dbgs() 1280 << "splice in TrieNode('" << bNodeStr 1281 << "') between edge '" 1282 << abEdgeStr << "' and edge='" 1283 << bcEdgeStr<< "'\n"); 1284 TrieEdge& abEdge = edge; 1285 abEdge._subString = abEdgeStr; 1286 abEdge._child = bNode; 1287 auto *bcEdge = new (allocator) TrieEdge(bcEdgeStr, cNode); 1288 bNode->_children.insert(bNode->_children.end(), bcEdge); 1289 bNode->addSymbol(entry, allocator, allNodes); 1290 return; 1291 } 1292 } 1293 } 1294 if (entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { 1295 assert(entry.otherOffset != 0); 1296 } 1297 if (entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) { 1298 assert(entry.otherOffset != 0); 1299 } 1300 // No commonality with any existing child, make a new edge. 1301 auto *newNode = new (allocator) TrieNode(entry.name.copy(allocator)); 1302 auto *newEdge = new (allocator) TrieEdge(partialStr, newNode); 1303 _children.insert(_children.end(), newEdge); 1304 DEBUG_WITH_TYPE("trie-builder", llvm::dbgs() 1305 << "new TrieNode('" << entry.name << "') with edge '" 1306 << partialStr << "' from node='" 1307 << _cummulativeString << "'\n"); 1308 newNode->_address = entry.offset; 1309 newNode->_flags = entry.flags | entry.kind; 1310 newNode->_other = entry.otherOffset; 1311 if ((entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) && !entry.otherName.empty()) 1312 newNode->_importedName = entry.otherName.copy(allocator); 1313 newNode->_hasExportInfo = true; 1314 allNodes.push_back(newNode); 1315 } 1316 1317 void TrieNode::addOrderedNodes(const Export& entry, 1318 std::vector<TrieNode*> &orderedNodes) { 1319 if (!_ordered) { 1320 orderedNodes.push_back(this); 1321 _ordered = true; 1322 } 1323 1324 StringRef partialStr = entry.name.drop_front(_cummulativeString.size()); 1325 for (TrieEdge &edge : _children) { 1326 StringRef edgeStr = edge._subString; 1327 if (partialStr.startswith(edgeStr)) { 1328 // Already have matching edge, go down that path. 1329 edge._child->addOrderedNodes(entry, orderedNodes); 1330 return; 1331 } 1332 } 1333 } 1334 1335 bool TrieNode::updateOffset(uint32_t& offset) { 1336 uint32_t nodeSize = 1; // Length when no export info 1337 if (_hasExportInfo) { 1338 if (_flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { 1339 nodeSize = llvm::getULEB128Size(_flags); 1340 nodeSize += llvm::getULEB128Size(_other); // Other contains ordinal. 1341 nodeSize += _importedName.size(); 1342 ++nodeSize; // Trailing zero in imported name. 1343 } else { 1344 nodeSize = llvm::getULEB128Size(_flags) + llvm::getULEB128Size(_address); 1345 if (_flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) 1346 nodeSize += llvm::getULEB128Size(_other); 1347 } 1348 // Overall node size so far is uleb128 of export info + actual export info. 1349 nodeSize += llvm::getULEB128Size(nodeSize); 1350 } 1351 // Compute size of all child edges. 1352 ++nodeSize; // Byte for number of chidren. 1353 for (TrieEdge &edge : _children) { 1354 nodeSize += edge._subString.size() + 1 // String length. 1355 + llvm::getULEB128Size(edge._child->_trieOffset); // Offset len. 1356 } 1357 // On input, 'offset' is new prefered location for this node. 1358 bool result = (_trieOffset != offset); 1359 // Store new location in node object for use by parents. 1360 _trieOffset = offset; 1361 // Update offset for next iteration. 1362 offset += nodeSize; 1363 // Return true if _trieOffset was changed. 1364 return result; 1365 } 1366 1367 void TrieNode::appendToByteBuffer(ByteBuffer &out) { 1368 if (_hasExportInfo) { 1369 if (_flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { 1370 if (!_importedName.empty()) { 1371 // nodes with re-export info: size, flags, ordinal, import-name 1372 uint32_t nodeSize = llvm::getULEB128Size(_flags) 1373 + llvm::getULEB128Size(_other) 1374 + _importedName.size() + 1; 1375 assert(nodeSize < 256); 1376 out.append_byte(nodeSize); 1377 out.append_uleb128(_flags); 1378 out.append_uleb128(_other); 1379 out.append_string(_importedName); 1380 } else { 1381 // nodes without re-export info: size, flags, ordinal, empty-string 1382 uint32_t nodeSize = llvm::getULEB128Size(_flags) 1383 + llvm::getULEB128Size(_other) + 1; 1384 assert(nodeSize < 256); 1385 out.append_byte(nodeSize); 1386 out.append_uleb128(_flags); 1387 out.append_uleb128(_other); 1388 out.append_byte(0); 1389 } 1390 } else if ( _flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) { 1391 // Nodes with export info: size, flags, address, other 1392 uint32_t nodeSize = llvm::getULEB128Size(_flags) 1393 + llvm::getULEB128Size(_address) 1394 + llvm::getULEB128Size(_other); 1395 assert(nodeSize < 256); 1396 out.append_byte(nodeSize); 1397 out.append_uleb128(_flags); 1398 out.append_uleb128(_address); 1399 out.append_uleb128(_other); 1400 } else { 1401 // Nodes with export info: size, flags, address 1402 uint32_t nodeSize = llvm::getULEB128Size(_flags) 1403 + llvm::getULEB128Size(_address); 1404 assert(nodeSize < 256); 1405 out.append_byte(nodeSize); 1406 out.append_uleb128(_flags); 1407 out.append_uleb128(_address); 1408 } 1409 } else { 1410 // Node with no export info. 1411 uint32_t nodeSize = 0; 1412 out.append_byte(nodeSize); 1413 } 1414 // Add number of children. 1415 assert(_children.size() < 256); 1416 out.append_byte(_children.size()); 1417 // Append each child edge substring and node offset. 1418 for (TrieEdge &edge : _children) { 1419 out.append_string(edge._subString); 1420 out.append_uleb128(edge._child->_trieOffset); 1421 } 1422 } 1423 1424 void MachOFileLayout::buildExportTrie() { 1425 if (_file.exportInfo.empty()) 1426 return; 1427 1428 // For all temporary strings and objects used building trie. 1429 BumpPtrAllocator allocator; 1430 1431 // Build trie of all exported symbols. 1432 auto *rootNode = new (allocator) TrieNode(StringRef()); 1433 std::vector<TrieNode*> allNodes; 1434 allNodes.reserve(_file.exportInfo.size()*2); 1435 allNodes.push_back(rootNode); 1436 for (const Export& entry : _file.exportInfo) { 1437 rootNode->addSymbol(entry, allocator, allNodes); 1438 } 1439 1440 std::vector<TrieNode*> orderedNodes; 1441 orderedNodes.reserve(allNodes.size()); 1442 1443 for (const Export& entry : _file.exportInfo) 1444 rootNode->addOrderedNodes(entry, orderedNodes); 1445 1446 // Assign each node in the vector an offset in the trie stream, iterating 1447 // until all uleb128 sizes have stabilized. 1448 bool more; 1449 do { 1450 uint32_t offset = 0; 1451 more = false; 1452 for (TrieNode* node : orderedNodes) { 1453 if (node->updateOffset(offset)) 1454 more = true; 1455 } 1456 } while (more); 1457 1458 // Serialize trie to ByteBuffer. 1459 for (TrieNode* node : orderedNodes) { 1460 node->appendToByteBuffer(_exportTrie); 1461 } 1462 _exportTrie.align(_is64 ? 8 : 4); 1463 } 1464 1465 void MachOFileLayout::computeSymbolTableSizes() { 1466 // MachO symbol tables have three ranges: locals, globals, and undefines 1467 const size_t nlistSize = (_is64 ? sizeof(nlist_64) : sizeof(nlist)); 1468 _symbolTableSize = nlistSize * (_file.stabsSymbols.size() 1469 + _file.localSymbols.size() 1470 + _file.globalSymbols.size() 1471 + _file.undefinedSymbols.size()); 1472 // Always reserve 1-byte for the empty string and 1-byte for its terminator. 1473 _symbolStringPoolSize = 2; 1474 for (const Symbol &sym : _file.stabsSymbols) { 1475 _symbolStringPoolSize += (sym.name.size()+1); 1476 } 1477 for (const Symbol &sym : _file.localSymbols) { 1478 _symbolStringPoolSize += (sym.name.size()+1); 1479 } 1480 for (const Symbol &sym : _file.globalSymbols) { 1481 _symbolStringPoolSize += (sym.name.size()+1); 1482 } 1483 for (const Symbol &sym : _file.undefinedSymbols) { 1484 _symbolStringPoolSize += (sym.name.size()+1); 1485 } 1486 _symbolTableLocalsStartIndex = 0; 1487 _symbolTableGlobalsStartIndex = _file.stabsSymbols.size() + 1488 _file.localSymbols.size(); 1489 _symbolTableUndefinesStartIndex = _symbolTableGlobalsStartIndex 1490 + _file.globalSymbols.size(); 1491 1492 _indirectSymbolTableCount = 0; 1493 for (const Section § : _file.sections) { 1494 _indirectSymbolTableCount += sect.indirectSymbols.size(); 1495 } 1496 } 1497 1498 void MachOFileLayout::computeFunctionStartsSize() { 1499 _functionStartsSize = _file.functionStarts.size(); 1500 } 1501 1502 void MachOFileLayout::computeDataInCodeSize() { 1503 _dataInCodeSize = _file.dataInCode.size() * sizeof(data_in_code_entry); 1504 } 1505 1506 void MachOFileLayout::writeLinkEditContent() { 1507 if (_file.fileType == llvm::MachO::MH_OBJECT) { 1508 writeRelocations(); 1509 writeFunctionStartsInfo(); 1510 writeDataInCodeInfo(); 1511 writeSymbolTable(); 1512 } else { 1513 writeRebaseInfo(); 1514 writeBindingInfo(); 1515 writeLazyBindingInfo(); 1516 // TODO: add weak binding info 1517 writeExportInfo(); 1518 writeFunctionStartsInfo(); 1519 writeDataInCodeInfo(); 1520 writeSymbolTable(); 1521 } 1522 } 1523 1524 llvm::Error MachOFileLayout::writeBinary(StringRef path) { 1525 // Check for pending error from constructor. 1526 if (_ec) 1527 return llvm::errorCodeToError(_ec); 1528 // Create FileOutputBuffer with calculated size. 1529 unsigned flags = 0; 1530 if (_file.fileType != llvm::MachO::MH_OBJECT) 1531 flags = llvm::FileOutputBuffer::F_executable; 1532 Expected<std::unique_ptr<llvm::FileOutputBuffer>> fobOrErr = 1533 llvm::FileOutputBuffer::create(path, size(), flags); 1534 if (Error E = fobOrErr.takeError()) 1535 return E; 1536 std::unique_ptr<llvm::FileOutputBuffer> &fob = *fobOrErr; 1537 // Write content. 1538 _buffer = fob->getBufferStart(); 1539 writeMachHeader(); 1540 if (auto ec = writeLoadCommands()) 1541 return ec; 1542 writeSectionContent(); 1543 writeLinkEditContent(); 1544 if (Error E = fob->commit()) 1545 return E; 1546 1547 return llvm::Error::success(); 1548 } 1549 1550 /// Takes in-memory normalized view and writes a mach-o object file. 1551 llvm::Error writeBinary(const NormalizedFile &file, StringRef path) { 1552 MachOFileLayout layout(file, false); 1553 return layout.writeBinary(path); 1554 } 1555 1556 } // namespace normalized 1557 } // namespace mach_o 1558 } // namespace lld 1559