1 //===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "llvm/MC/MCMachObjectWriter.h" 11 #include "llvm/ADT/StringMap.h" 12 #include "llvm/ADT/Twine.h" 13 #include "llvm/MC/MCAsmBackend.h" 14 #include "llvm/MC/MCAsmLayout.h" 15 #include "llvm/MC/MCAssembler.h" 16 #include "llvm/MC/MCExpr.h" 17 #include "llvm/MC/MCFixupKindInfo.h" 18 #include "llvm/MC/MCMachOSymbolFlags.h" 19 #include "llvm/MC/MCObjectWriter.h" 20 #include "llvm/MC/MCSectionMachO.h" 21 #include "llvm/MC/MCSymbol.h" 22 #include "llvm/MC/MCValue.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/ErrorHandling.h" 25 #include "llvm/Support/MachO.h" 26 #include "llvm/Support/raw_ostream.h" 27 #include <vector> 28 using namespace llvm; 29 30 #define DEBUG_TYPE "mc" 31 32 void MachObjectWriter::reset() { 33 Relocations.clear(); 34 IndirectSymBase.clear(); 35 StringTable.clear(); 36 LocalSymbolData.clear(); 37 ExternalSymbolData.clear(); 38 UndefinedSymbolData.clear(); 39 MCObjectWriter::reset(); 40 } 41 42 bool MachObjectWriter:: 43 doesSymbolRequireExternRelocation(const MCSymbolData *SD) { 44 // Undefined symbols are always extern. 45 if (SD->getSymbol().isUndefined()) 46 return true; 47 48 // References to weak definitions require external relocation entries; the 49 // definition may not always be the one in the same object file. 50 if (SD->getFlags() & SF_WeakDefinition) 51 return true; 52 53 // Otherwise, we can use an internal relocation. 54 return false; 55 } 56 57 bool MachObjectWriter:: 58 MachSymbolData::operator<(const MachSymbolData &RHS) const { 59 return SymbolData->getSymbol().getName() < 60 RHS.SymbolData->getSymbol().getName(); 61 } 62 63 bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) { 64 const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo( 65 (MCFixupKind) Kind); 66 67 return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel; 68 } 69 70 uint64_t MachObjectWriter::getFragmentAddress(const MCFragment *Fragment, 71 const MCAsmLayout &Layout) const { 72 return getSectionAddress(Fragment->getParent()) + 73 Layout.getFragmentOffset(Fragment); 74 } 75 76 uint64_t MachObjectWriter::getSymbolAddress(const MCSymbolData* SD, 77 const MCAsmLayout &Layout) const { 78 const MCSymbol &S = SD->getSymbol(); 79 80 // If this is a variable, then recursively evaluate now. 81 if (S.isVariable()) { 82 if (const MCConstantExpr *C = 83 dyn_cast<const MCConstantExpr>(S.getVariableValue())) 84 return C->getValue(); 85 86 87 MCValue Target; 88 if (!S.getVariableValue()->EvaluateAsRelocatable(Target, &Layout, nullptr)) 89 report_fatal_error("unable to evaluate offset for variable '" + 90 S.getName() + "'"); 91 92 // Verify that any used symbols are defined. 93 if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined()) 94 report_fatal_error("unable to evaluate offset to undefined symbol '" + 95 Target.getSymA()->getSymbol().getName() + "'"); 96 if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined()) 97 report_fatal_error("unable to evaluate offset to undefined symbol '" + 98 Target.getSymB()->getSymbol().getName() + "'"); 99 100 uint64_t Address = Target.getConstant(); 101 if (Target.getSymA()) 102 Address += getSymbolAddress(&Layout.getAssembler().getSymbolData( 103 Target.getSymA()->getSymbol()), Layout); 104 if (Target.getSymB()) 105 Address += getSymbolAddress(&Layout.getAssembler().getSymbolData( 106 Target.getSymB()->getSymbol()), Layout); 107 return Address; 108 } 109 110 return getSectionAddress(SD->getFragment()->getParent()) + 111 Layout.getSymbolOffset(SD); 112 } 113 114 uint64_t MachObjectWriter::getPaddingSize(const MCSectionData *SD, 115 const MCAsmLayout &Layout) const { 116 uint64_t EndAddr = getSectionAddress(SD) + Layout.getSectionAddressSize(SD); 117 unsigned Next = SD->getLayoutOrder() + 1; 118 if (Next >= Layout.getSectionOrder().size()) 119 return 0; 120 121 const MCSectionData &NextSD = *Layout.getSectionOrder()[Next]; 122 if (NextSD.getSection().isVirtualSection()) 123 return 0; 124 return OffsetToAlignment(EndAddr, NextSD.getAlignment()); 125 } 126 127 void MachObjectWriter::WriteHeader(unsigned NumLoadCommands, 128 unsigned LoadCommandsSize, 129 bool SubsectionsViaSymbols) { 130 uint32_t Flags = 0; 131 132 if (SubsectionsViaSymbols) 133 Flags |= MachO::MH_SUBSECTIONS_VIA_SYMBOLS; 134 135 // struct mach_header (28 bytes) or 136 // struct mach_header_64 (32 bytes) 137 138 uint64_t Start = OS.tell(); 139 (void) Start; 140 141 Write32(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC); 142 143 Write32(TargetObjectWriter->getCPUType()); 144 Write32(TargetObjectWriter->getCPUSubtype()); 145 146 Write32(MachO::MH_OBJECT); 147 Write32(NumLoadCommands); 148 Write32(LoadCommandsSize); 149 Write32(Flags); 150 if (is64Bit()) 151 Write32(0); // reserved 152 153 assert(OS.tell() - Start == 154 (is64Bit()?sizeof(MachO::mach_header_64): sizeof(MachO::mach_header))); 155 } 156 157 /// WriteSegmentLoadCommand - Write a segment load command. 158 /// 159 /// \param NumSections The number of sections in this segment. 160 /// \param SectionDataSize The total size of the sections. 161 void MachObjectWriter::WriteSegmentLoadCommand(unsigned NumSections, 162 uint64_t VMSize, 163 uint64_t SectionDataStartOffset, 164 uint64_t SectionDataSize) { 165 // struct segment_command (56 bytes) or 166 // struct segment_command_64 (72 bytes) 167 168 uint64_t Start = OS.tell(); 169 (void) Start; 170 171 unsigned SegmentLoadCommandSize = 172 is64Bit() ? sizeof(MachO::segment_command_64): 173 sizeof(MachO::segment_command); 174 Write32(is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT); 175 Write32(SegmentLoadCommandSize + 176 NumSections * (is64Bit() ? sizeof(MachO::section_64) : 177 sizeof(MachO::section))); 178 179 WriteBytes("", 16); 180 if (is64Bit()) { 181 Write64(0); // vmaddr 182 Write64(VMSize); // vmsize 183 Write64(SectionDataStartOffset); // file offset 184 Write64(SectionDataSize); // file size 185 } else { 186 Write32(0); // vmaddr 187 Write32(VMSize); // vmsize 188 Write32(SectionDataStartOffset); // file offset 189 Write32(SectionDataSize); // file size 190 } 191 // maxprot 192 Write32(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); 193 // initprot 194 Write32(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); 195 Write32(NumSections); 196 Write32(0); // flags 197 198 assert(OS.tell() - Start == SegmentLoadCommandSize); 199 } 200 201 void MachObjectWriter::WriteSection(const MCAssembler &Asm, 202 const MCAsmLayout &Layout, 203 const MCSectionData &SD, 204 uint64_t FileOffset, 205 uint64_t RelocationsStart, 206 unsigned NumRelocations) { 207 uint64_t SectionSize = Layout.getSectionAddressSize(&SD); 208 209 // The offset is unused for virtual sections. 210 if (SD.getSection().isVirtualSection()) { 211 assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!"); 212 FileOffset = 0; 213 } 214 215 // struct section (68 bytes) or 216 // struct section_64 (80 bytes) 217 218 uint64_t Start = OS.tell(); 219 (void) Start; 220 221 const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection()); 222 WriteBytes(Section.getSectionName(), 16); 223 WriteBytes(Section.getSegmentName(), 16); 224 if (is64Bit()) { 225 Write64(getSectionAddress(&SD)); // address 226 Write64(SectionSize); // size 227 } else { 228 Write32(getSectionAddress(&SD)); // address 229 Write32(SectionSize); // size 230 } 231 Write32(FileOffset); 232 233 unsigned Flags = Section.getTypeAndAttributes(); 234 if (SD.hasInstructions()) 235 Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS; 236 237 assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!"); 238 Write32(Log2_32(SD.getAlignment())); 239 Write32(NumRelocations ? RelocationsStart : 0); 240 Write32(NumRelocations); 241 Write32(Flags); 242 Write32(IndirectSymBase.lookup(&SD)); // reserved1 243 Write32(Section.getStubSize()); // reserved2 244 if (is64Bit()) 245 Write32(0); // reserved3 246 247 assert(OS.tell() - Start == (is64Bit() ? sizeof(MachO::section_64) : 248 sizeof(MachO::section))); 249 } 250 251 void MachObjectWriter::WriteSymtabLoadCommand(uint32_t SymbolOffset, 252 uint32_t NumSymbols, 253 uint32_t StringTableOffset, 254 uint32_t StringTableSize) { 255 // struct symtab_command (24 bytes) 256 257 uint64_t Start = OS.tell(); 258 (void) Start; 259 260 Write32(MachO::LC_SYMTAB); 261 Write32(sizeof(MachO::symtab_command)); 262 Write32(SymbolOffset); 263 Write32(NumSymbols); 264 Write32(StringTableOffset); 265 Write32(StringTableSize); 266 267 assert(OS.tell() - Start == sizeof(MachO::symtab_command)); 268 } 269 270 void MachObjectWriter::WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol, 271 uint32_t NumLocalSymbols, 272 uint32_t FirstExternalSymbol, 273 uint32_t NumExternalSymbols, 274 uint32_t FirstUndefinedSymbol, 275 uint32_t NumUndefinedSymbols, 276 uint32_t IndirectSymbolOffset, 277 uint32_t NumIndirectSymbols) { 278 // struct dysymtab_command (80 bytes) 279 280 uint64_t Start = OS.tell(); 281 (void) Start; 282 283 Write32(MachO::LC_DYSYMTAB); 284 Write32(sizeof(MachO::dysymtab_command)); 285 Write32(FirstLocalSymbol); 286 Write32(NumLocalSymbols); 287 Write32(FirstExternalSymbol); 288 Write32(NumExternalSymbols); 289 Write32(FirstUndefinedSymbol); 290 Write32(NumUndefinedSymbols); 291 Write32(0); // tocoff 292 Write32(0); // ntoc 293 Write32(0); // modtaboff 294 Write32(0); // nmodtab 295 Write32(0); // extrefsymoff 296 Write32(0); // nextrefsyms 297 Write32(IndirectSymbolOffset); 298 Write32(NumIndirectSymbols); 299 Write32(0); // extreloff 300 Write32(0); // nextrel 301 Write32(0); // locreloff 302 Write32(0); // nlocrel 303 304 assert(OS.tell() - Start == sizeof(MachO::dysymtab_command)); 305 } 306 307 MachObjectWriter::MachSymbolData * 308 MachObjectWriter::findSymbolData(const MCSymbol &Sym) { 309 for (auto &Entry : LocalSymbolData) 310 if (&Entry.SymbolData->getSymbol() == &Sym) 311 return &Entry; 312 313 for (auto &Entry : ExternalSymbolData) 314 if (&Entry.SymbolData->getSymbol() == &Sym) 315 return &Entry; 316 317 for (auto &Entry : UndefinedSymbolData) 318 if (&Entry.SymbolData->getSymbol() == &Sym) 319 return &Entry; 320 321 return nullptr; 322 } 323 324 const MCSymbol &MachObjectWriter::findAliasedSymbol(const MCSymbol &Sym) const { 325 const MCSymbol *S = &Sym; 326 while (S->isVariable()) { 327 const MCExpr *Value = S->getVariableValue(); 328 const auto *Ref = dyn_cast<MCSymbolRefExpr>(Value); 329 if (!Ref) 330 return *S; 331 S = &Ref->getSymbol(); 332 } 333 return *S; 334 } 335 336 void MachObjectWriter::WriteNlist(MachSymbolData &MSD, 337 const MCAsmLayout &Layout) { 338 MCSymbolData &Data = *MSD.SymbolData; 339 const MCSymbol *Symbol = &Data.getSymbol(); 340 const MCSymbol *AliasedSymbol = &findAliasedSymbol(*Symbol); 341 uint8_t SectionIndex = MSD.SectionIndex; 342 uint8_t Type = 0; 343 uint16_t Flags = Data.getFlags(); 344 uint64_t Address = 0; 345 bool IsAlias = Symbol != AliasedSymbol; 346 347 MachSymbolData *AliaseeInfo; 348 if (IsAlias) { 349 AliaseeInfo = findSymbolData(*AliasedSymbol); 350 if (AliaseeInfo) 351 SectionIndex = AliaseeInfo->SectionIndex; 352 Symbol = AliasedSymbol; 353 } 354 355 // Set the N_TYPE bits. See <mach-o/nlist.h>. 356 // 357 // FIXME: Are the prebound or indirect fields possible here? 358 if (IsAlias && Symbol->isUndefined()) 359 Type = MachO::N_INDR; 360 else if (Symbol->isUndefined()) 361 Type = MachO::N_UNDF; 362 else if (Symbol->isAbsolute()) 363 Type = MachO::N_ABS; 364 else 365 Type = MachO::N_SECT; 366 367 // FIXME: Set STAB bits. 368 369 if (Data.isPrivateExtern()) 370 Type |= MachO::N_PEXT; 371 372 // Set external bit. 373 if (Data.isExternal() || (!IsAlias && Symbol->isUndefined())) 374 Type |= MachO::N_EXT; 375 376 // Compute the symbol address. 377 if (IsAlias && Symbol->isUndefined()) 378 Address = AliaseeInfo->StringIndex; 379 else if (Symbol->isDefined()) 380 Address = getSymbolAddress(&Data, Layout); 381 else if (Data.isCommon()) { 382 // Common symbols are encoded with the size in the address 383 // field, and their alignment in the flags. 384 Address = Data.getCommonSize(); 385 386 // Common alignment is packed into the 'desc' bits. 387 if (unsigned Align = Data.getCommonAlignment()) { 388 unsigned Log2Size = Log2_32(Align); 389 assert((1U << Log2Size) == Align && "Invalid 'common' alignment!"); 390 if (Log2Size > 15) 391 report_fatal_error("invalid 'common' alignment '" + 392 Twine(Align) + "' for '" + Symbol->getName() + "'", 393 false); 394 // FIXME: Keep this mask with the SymbolFlags enumeration. 395 Flags = (Flags & 0xF0FF) | (Log2Size << 8); 396 } 397 } 398 399 if (Layout.getAssembler().isThumbFunc(Symbol)) 400 Flags |= SF_ThumbFunc; 401 402 // struct nlist (12 bytes) 403 404 Write32(MSD.StringIndex); 405 Write8(Type); 406 Write8(SectionIndex); 407 408 // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc' 409 // value. 410 Write16(Flags); 411 if (is64Bit()) 412 Write64(Address); 413 else 414 Write32(Address); 415 } 416 417 void MachObjectWriter::WriteLinkeditLoadCommand(uint32_t Type, 418 uint32_t DataOffset, 419 uint32_t DataSize) { 420 uint64_t Start = OS.tell(); 421 (void) Start; 422 423 Write32(Type); 424 Write32(sizeof(MachO::linkedit_data_command)); 425 Write32(DataOffset); 426 Write32(DataSize); 427 428 assert(OS.tell() - Start == sizeof(MachO::linkedit_data_command)); 429 } 430 431 static unsigned ComputeLinkerOptionsLoadCommandSize( 432 const std::vector<std::string> &Options, bool is64Bit) 433 { 434 unsigned Size = sizeof(MachO::linker_option_command); 435 for (unsigned i = 0, e = Options.size(); i != e; ++i) 436 Size += Options[i].size() + 1; 437 return RoundUpToAlignment(Size, is64Bit ? 8 : 4); 438 } 439 440 void MachObjectWriter::WriteLinkerOptionsLoadCommand( 441 const std::vector<std::string> &Options) 442 { 443 unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit()); 444 uint64_t Start = OS.tell(); 445 (void) Start; 446 447 Write32(MachO::LC_LINKER_OPTION); 448 Write32(Size); 449 Write32(Options.size()); 450 uint64_t BytesWritten = sizeof(MachO::linker_option_command); 451 for (unsigned i = 0, e = Options.size(); i != e; ++i) { 452 // Write each string, including the null byte. 453 const std::string &Option = Options[i]; 454 WriteBytes(Option.c_str(), Option.size() + 1); 455 BytesWritten += Option.size() + 1; 456 } 457 458 // Pad to a multiple of the pointer size. 459 WriteBytes("", OffsetToAlignment(BytesWritten, is64Bit() ? 8 : 4)); 460 461 assert(OS.tell() - Start == Size); 462 } 463 464 void MachObjectWriter::RecordRelocation(MCAssembler &Asm, 465 const MCAsmLayout &Layout, 466 const MCFragment *Fragment, 467 const MCFixup &Fixup, MCValue Target, 468 bool &IsPCRel, uint64_t &FixedValue) { 469 TargetObjectWriter->RecordRelocation(this, Asm, Layout, Fragment, Fixup, 470 Target, FixedValue); 471 } 472 473 void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) { 474 // This is the point where 'as' creates actual symbols for indirect symbols 475 // (in the following two passes). It would be easier for us to do this sooner 476 // when we see the attribute, but that makes getting the order in the symbol 477 // table much more complicated than it is worth. 478 // 479 // FIXME: Revisit this when the dust settles. 480 481 // Report errors for use of .indirect_symbol not in a symbol pointer section 482 // or stub section. 483 for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), 484 ie = Asm.indirect_symbol_end(); it != ie; ++it) { 485 const MCSectionMachO &Section = 486 cast<MCSectionMachO>(it->SectionData->getSection()); 487 488 if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS && 489 Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS && 490 Section.getType() != MachO::S_SYMBOL_STUBS) { 491 MCSymbol &Symbol = *it->Symbol; 492 report_fatal_error("indirect symbol '" + Symbol.getName() + 493 "' not in a symbol pointer or stub section"); 494 } 495 } 496 497 // Bind non-lazy symbol pointers first. 498 unsigned IndirectIndex = 0; 499 for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), 500 ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { 501 const MCSectionMachO &Section = 502 cast<MCSectionMachO>(it->SectionData->getSection()); 503 504 if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS) 505 continue; 506 507 // Initialize the section indirect symbol base, if necessary. 508 IndirectSymBase.insert(std::make_pair(it->SectionData, IndirectIndex)); 509 510 Asm.getOrCreateSymbolData(*it->Symbol); 511 } 512 513 // Then lazy symbol pointers and symbol stubs. 514 IndirectIndex = 0; 515 for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), 516 ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { 517 const MCSectionMachO &Section = 518 cast<MCSectionMachO>(it->SectionData->getSection()); 519 520 if (Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS && 521 Section.getType() != MachO::S_SYMBOL_STUBS) 522 continue; 523 524 // Initialize the section indirect symbol base, if necessary. 525 IndirectSymBase.insert(std::make_pair(it->SectionData, IndirectIndex)); 526 527 // Set the symbol type to undefined lazy, but only on construction. 528 // 529 // FIXME: Do not hardcode. 530 bool Created; 531 MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created); 532 if (Created) 533 Entry.setFlags(Entry.getFlags() | 0x0001); 534 } 535 } 536 537 /// ComputeSymbolTable - Compute the symbol table data 538 void MachObjectWriter::ComputeSymbolTable( 539 MCAssembler &Asm, std::vector<MachSymbolData> &LocalSymbolData, 540 std::vector<MachSymbolData> &ExternalSymbolData, 541 std::vector<MachSymbolData> &UndefinedSymbolData) { 542 // Build section lookup table. 543 DenseMap<const MCSection*, uint8_t> SectionIndexMap; 544 unsigned Index = 1; 545 for (MCAssembler::iterator it = Asm.begin(), 546 ie = Asm.end(); it != ie; ++it, ++Index) 547 SectionIndexMap[&it->getSection()] = Index; 548 assert(Index <= 256 && "Too many sections!"); 549 550 // Build the string table. 551 for (const MCSymbol &Symbol : Asm.symbols()) { 552 if (!Asm.isSymbolLinkerVisible(Symbol)) 553 continue; 554 555 StringTable.add(Symbol.getName()); 556 } 557 StringTable.finalize(StringTableBuilder::MachO); 558 559 // Build the symbol arrays but only for non-local symbols. 560 // 561 // The particular order that we collect and then sort the symbols is chosen to 562 // match 'as'. Even though it doesn't matter for correctness, this is 563 // important for letting us diff .o files. 564 for (const MCSymbol &Symbol : Asm.symbols()) { 565 MCSymbolData &SD = Symbol.getData(); 566 567 // Ignore non-linker visible symbols. 568 if (!Asm.isSymbolLinkerVisible(Symbol)) 569 continue; 570 571 if (!SD.isExternal() && !Symbol.isUndefined()) 572 continue; 573 574 MachSymbolData MSD; 575 MSD.SymbolData = &SD; 576 MSD.StringIndex = StringTable.getOffset(Symbol.getName()); 577 578 if (Symbol.isUndefined()) { 579 MSD.SectionIndex = 0; 580 UndefinedSymbolData.push_back(MSD); 581 } else if (Symbol.isAbsolute()) { 582 MSD.SectionIndex = 0; 583 ExternalSymbolData.push_back(MSD); 584 } else { 585 MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); 586 assert(MSD.SectionIndex && "Invalid section index!"); 587 ExternalSymbolData.push_back(MSD); 588 } 589 } 590 591 // Now add the data for local symbols. 592 for (const MCSymbol &Symbol : Asm.symbols()) { 593 MCSymbolData &SD = Symbol.getData(); 594 595 // Ignore non-linker visible symbols. 596 if (!Asm.isSymbolLinkerVisible(Symbol)) 597 continue; 598 599 if (SD.isExternal() || Symbol.isUndefined()) 600 continue; 601 602 MachSymbolData MSD; 603 MSD.SymbolData = &SD; 604 MSD.StringIndex = StringTable.getOffset(Symbol.getName()); 605 606 if (Symbol.isAbsolute()) { 607 MSD.SectionIndex = 0; 608 LocalSymbolData.push_back(MSD); 609 } else { 610 MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); 611 assert(MSD.SectionIndex && "Invalid section index!"); 612 LocalSymbolData.push_back(MSD); 613 } 614 } 615 616 // External and undefined symbols are required to be in lexicographic order. 617 std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); 618 std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end()); 619 620 // Set the symbol indices. 621 Index = 0; 622 for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) 623 LocalSymbolData[i].SymbolData->setIndex(Index++); 624 for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) 625 ExternalSymbolData[i].SymbolData->setIndex(Index++); 626 for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) 627 UndefinedSymbolData[i].SymbolData->setIndex(Index++); 628 629 for (const MCSectionData &SD : Asm) { 630 std::vector<RelAndSymbol> &Relocs = Relocations[&SD]; 631 for (RelAndSymbol &Rel : Relocs) { 632 if (!Rel.Sym) 633 continue; 634 635 // Set the Index and the IsExtern bit. 636 unsigned Index = Rel.Sym->getData().getIndex(); 637 assert(isInt<24>(Index)); 638 if (IsLittleEndian) 639 Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (~0U << 24)) | Index | (1 << 27); 640 else 641 Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4); 642 } 643 } 644 } 645 646 void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm, 647 const MCAsmLayout &Layout) { 648 uint64_t StartAddress = 0; 649 const SmallVectorImpl<MCSectionData*> &Order = Layout.getSectionOrder(); 650 for (int i = 0, n = Order.size(); i != n ; ++i) { 651 const MCSectionData *SD = Order[i]; 652 StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment()); 653 SectionAddress[SD] = StartAddress; 654 StartAddress += Layout.getSectionAddressSize(SD); 655 656 // Explicitly pad the section to match the alignment requirements of the 657 // following one. This is for 'gas' compatibility, it shouldn't 658 /// strictly be necessary. 659 StartAddress += getPaddingSize(SD, Layout); 660 } 661 } 662 663 void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm, 664 const MCAsmLayout &Layout) { 665 computeSectionAddresses(Asm, Layout); 666 667 // Create symbol data for any indirect symbols. 668 BindIndirectSymbols(Asm); 669 } 670 671 bool MachObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl( 672 const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB, 673 bool InSet, bool IsPCRel) const { 674 if (InSet) 675 return true; 676 677 // The effective address is 678 // addr(atom(A)) + offset(A) 679 // - addr(atom(B)) - offset(B) 680 // and the offsets are not relocatable, so the fixup is fully resolved when 681 // addr(atom(A)) - addr(atom(B)) == 0. 682 const MCSymbol &SA = findAliasedSymbol(SymA); 683 const MCSection &SecA = SA.getSection(); 684 const MCSection &SecB = FB.getParent()->getSection(); 685 686 if (IsPCRel) { 687 // The simple (Darwin, except on x86_64) way of dealing with this was to 688 // assume that any reference to a temporary symbol *must* be a temporary 689 // symbol in the same atom, unless the sections differ. Therefore, any PCrel 690 // relocation to a temporary symbol (in the same section) is fully 691 // resolved. This also works in conjunction with absolutized .set, which 692 // requires the compiler to use .set to absolutize the differences between 693 // symbols which the compiler knows to be assembly time constants, so we 694 // don't need to worry about considering symbol differences fully resolved. 695 // 696 // If the file isn't using sub-sections-via-symbols, we can make the 697 // same assumptions about any symbol that we normally make about 698 // assembler locals. 699 700 bool hasReliableSymbolDifference = isX86_64(); 701 if (!hasReliableSymbolDifference) { 702 if (!SA.isInSection() || &SecA != &SecB || 703 (!SA.isTemporary() && 704 FB.getAtom() != Asm.getSymbolData(SA).getFragment()->getAtom() && 705 Asm.getSubsectionsViaSymbols())) 706 return false; 707 return true; 708 } 709 // For Darwin x86_64, there is one special case when the reference IsPCRel. 710 // If the fragment with the reference does not have a base symbol but meets 711 // the simple way of dealing with this, in that it is a temporary symbol in 712 // the same atom then it is assumed to be fully resolved. This is needed so 713 // a relocation entry is not created and so the static linker does not 714 // mess up the reference later. 715 else if(!FB.getAtom() && 716 SA.isTemporary() && SA.isInSection() && &SecA == &SecB){ 717 return true; 718 } 719 } else { 720 if (!TargetObjectWriter->useAggressiveSymbolFolding()) 721 return false; 722 } 723 724 // If they are not in the same section, we can't compute the diff. 725 if (&SecA != &SecB) 726 return false; 727 728 const MCFragment *FA = Asm.getSymbolData(SA).getFragment(); 729 730 // Bail if the symbol has no fragment. 731 if (!FA) 732 return false; 733 734 // If the atoms are the same, they are guaranteed to have the same address. 735 if (FA->getAtom() == FB.getAtom()) 736 return true; 737 738 // Otherwise, we can't prove this is fully resolved. 739 return false; 740 } 741 742 void MachObjectWriter::WriteObject(MCAssembler &Asm, 743 const MCAsmLayout &Layout) { 744 // Compute symbol table information and bind symbol indices. 745 ComputeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData, 746 UndefinedSymbolData); 747 748 unsigned NumSections = Asm.size(); 749 const MCAssembler::VersionMinInfoType &VersionInfo = 750 Layout.getAssembler().getVersionMinInfo(); 751 752 // The section data starts after the header, the segment load command (and 753 // section headers) and the symbol table. 754 unsigned NumLoadCommands = 1; 755 uint64_t LoadCommandsSize = is64Bit() ? 756 sizeof(MachO::segment_command_64) + NumSections * sizeof(MachO::section_64): 757 sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section); 758 759 // Add the deployment target version info load command size, if used. 760 if (VersionInfo.Major != 0) { 761 ++NumLoadCommands; 762 LoadCommandsSize += sizeof(MachO::version_min_command); 763 } 764 765 // Add the data-in-code load command size, if used. 766 unsigned NumDataRegions = Asm.getDataRegions().size(); 767 if (NumDataRegions) { 768 ++NumLoadCommands; 769 LoadCommandsSize += sizeof(MachO::linkedit_data_command); 770 } 771 772 // Add the loh load command size, if used. 773 uint64_t LOHRawSize = Asm.getLOHContainer().getEmitSize(*this, Layout); 774 uint64_t LOHSize = RoundUpToAlignment(LOHRawSize, is64Bit() ? 8 : 4); 775 if (LOHSize) { 776 ++NumLoadCommands; 777 LoadCommandsSize += sizeof(MachO::linkedit_data_command); 778 } 779 780 // Add the symbol table load command sizes, if used. 781 unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() + 782 UndefinedSymbolData.size(); 783 if (NumSymbols) { 784 NumLoadCommands += 2; 785 LoadCommandsSize += (sizeof(MachO::symtab_command) + 786 sizeof(MachO::dysymtab_command)); 787 } 788 789 // Add the linker option load commands sizes. 790 const std::vector<std::vector<std::string> > &LinkerOptions = 791 Asm.getLinkerOptions(); 792 for (unsigned i = 0, e = LinkerOptions.size(); i != e; ++i) { 793 ++NumLoadCommands; 794 LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(LinkerOptions[i], 795 is64Bit()); 796 } 797 798 // Compute the total size of the section data, as well as its file size and vm 799 // size. 800 uint64_t SectionDataStart = (is64Bit() ? sizeof(MachO::mach_header_64) : 801 sizeof(MachO::mach_header)) + LoadCommandsSize; 802 uint64_t SectionDataSize = 0; 803 uint64_t SectionDataFileSize = 0; 804 uint64_t VMSize = 0; 805 for (MCAssembler::const_iterator it = Asm.begin(), 806 ie = Asm.end(); it != ie; ++it) { 807 const MCSectionData &SD = *it; 808 uint64_t Address = getSectionAddress(&SD); 809 uint64_t Size = Layout.getSectionAddressSize(&SD); 810 uint64_t FileSize = Layout.getSectionFileSize(&SD); 811 FileSize += getPaddingSize(&SD, Layout); 812 813 VMSize = std::max(VMSize, Address + Size); 814 815 if (SD.getSection().isVirtualSection()) 816 continue; 817 818 SectionDataSize = std::max(SectionDataSize, Address + Size); 819 SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize); 820 } 821 822 // The section data is padded to 4 bytes. 823 // 824 // FIXME: Is this machine dependent? 825 unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4); 826 SectionDataFileSize += SectionDataPadding; 827 828 // Write the prolog, starting with the header and load command... 829 WriteHeader(NumLoadCommands, LoadCommandsSize, 830 Asm.getSubsectionsViaSymbols()); 831 WriteSegmentLoadCommand(NumSections, VMSize, 832 SectionDataStart, SectionDataSize); 833 834 // ... and then the section headers. 835 uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; 836 for (MCAssembler::const_iterator it = Asm.begin(), 837 ie = Asm.end(); it != ie; ++it) { 838 std::vector<RelAndSymbol> &Relocs = Relocations[it]; 839 unsigned NumRelocs = Relocs.size(); 840 uint64_t SectionStart = SectionDataStart + getSectionAddress(it); 841 WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs); 842 RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info); 843 } 844 845 // Write out the deployment target information, if it's available. 846 if (VersionInfo.Major != 0) { 847 assert(VersionInfo.Update < 256 && "unencodable update target version"); 848 assert(VersionInfo.Minor < 256 && "unencodable minor target version"); 849 assert(VersionInfo.Major < 65536 && "unencodable major target version"); 850 uint32_t EncodedVersion = VersionInfo.Update | (VersionInfo.Minor << 8) | 851 (VersionInfo.Major << 16); 852 Write32(VersionInfo.Kind == MCVM_OSXVersionMin ? MachO::LC_VERSION_MIN_MACOSX : 853 MachO::LC_VERSION_MIN_IPHONEOS); 854 Write32(sizeof(MachO::version_min_command)); 855 Write32(EncodedVersion); 856 Write32(0); // reserved. 857 } 858 859 // Write the data-in-code load command, if used. 860 uint64_t DataInCodeTableEnd = RelocTableEnd + NumDataRegions * 8; 861 if (NumDataRegions) { 862 uint64_t DataRegionsOffset = RelocTableEnd; 863 uint64_t DataRegionsSize = NumDataRegions * 8; 864 WriteLinkeditLoadCommand(MachO::LC_DATA_IN_CODE, DataRegionsOffset, 865 DataRegionsSize); 866 } 867 868 // Write the loh load command, if used. 869 uint64_t LOHTableEnd = DataInCodeTableEnd + LOHSize; 870 if (LOHSize) 871 WriteLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT, 872 DataInCodeTableEnd, LOHSize); 873 874 // Write the symbol table load command, if used. 875 if (NumSymbols) { 876 unsigned FirstLocalSymbol = 0; 877 unsigned NumLocalSymbols = LocalSymbolData.size(); 878 unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols; 879 unsigned NumExternalSymbols = ExternalSymbolData.size(); 880 unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols; 881 unsigned NumUndefinedSymbols = UndefinedSymbolData.size(); 882 unsigned NumIndirectSymbols = Asm.indirect_symbol_size(); 883 unsigned NumSymTabSymbols = 884 NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols; 885 uint64_t IndirectSymbolSize = NumIndirectSymbols * 4; 886 uint64_t IndirectSymbolOffset = 0; 887 888 // If used, the indirect symbols are written after the section data. 889 if (NumIndirectSymbols) 890 IndirectSymbolOffset = LOHTableEnd; 891 892 // The symbol table is written after the indirect symbol data. 893 uint64_t SymbolTableOffset = LOHTableEnd + IndirectSymbolSize; 894 895 // The string table is written after symbol table. 896 uint64_t StringTableOffset = 897 SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? 898 sizeof(MachO::nlist_64) : 899 sizeof(MachO::nlist)); 900 WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols, 901 StringTableOffset, StringTable.data().size()); 902 903 WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols, 904 FirstExternalSymbol, NumExternalSymbols, 905 FirstUndefinedSymbol, NumUndefinedSymbols, 906 IndirectSymbolOffset, NumIndirectSymbols); 907 } 908 909 // Write the linker options load commands. 910 for (unsigned i = 0, e = LinkerOptions.size(); i != e; ++i) { 911 WriteLinkerOptionsLoadCommand(LinkerOptions[i]); 912 } 913 914 // Write the actual section data. 915 for (MCAssembler::const_iterator it = Asm.begin(), 916 ie = Asm.end(); it != ie; ++it) { 917 Asm.writeSectionData(it, Layout); 918 919 uint64_t Pad = getPaddingSize(it, Layout); 920 WriteZeros(Pad); 921 } 922 923 // Write the extra padding. 924 WriteZeros(SectionDataPadding); 925 926 // Write the relocation entries. 927 for (MCAssembler::const_iterator it = Asm.begin(), 928 ie = Asm.end(); it != ie; ++it) { 929 // Write the section relocation entries, in reverse order to match 'as' 930 // (approximately, the exact algorithm is more complicated than this). 931 std::vector<RelAndSymbol> &Relocs = Relocations[it]; 932 for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { 933 Write32(Relocs[e - i - 1].MRE.r_word0); 934 Write32(Relocs[e - i - 1].MRE.r_word1); 935 } 936 } 937 938 // Write out the data-in-code region payload, if there is one. 939 for (MCAssembler::const_data_region_iterator 940 it = Asm.data_region_begin(), ie = Asm.data_region_end(); 941 it != ie; ++it) { 942 const DataRegionData *Data = &(*it); 943 uint64_t Start = 944 getSymbolAddress(&Layout.getAssembler().getSymbolData(*Data->Start), 945 Layout); 946 uint64_t End = 947 getSymbolAddress(&Layout.getAssembler().getSymbolData(*Data->End), 948 Layout); 949 DEBUG(dbgs() << "data in code region-- kind: " << Data->Kind 950 << " start: " << Start << "(" << Data->Start->getName() << ")" 951 << " end: " << End << "(" << Data->End->getName() << ")" 952 << " size: " << End - Start 953 << "\n"); 954 Write32(Start); 955 Write16(End - Start); 956 Write16(Data->Kind); 957 } 958 959 // Write out the loh commands, if there is one. 960 if (LOHSize) { 961 #ifndef NDEBUG 962 unsigned Start = OS.tell(); 963 #endif 964 Asm.getLOHContainer().Emit(*this, Layout); 965 // Pad to a multiple of the pointer size. 966 WriteBytes("", OffsetToAlignment(LOHRawSize, is64Bit() ? 8 : 4)); 967 assert(OS.tell() - Start == LOHSize); 968 } 969 970 // Write the symbol table data, if used. 971 if (NumSymbols) { 972 // Write the indirect symbol entries. 973 for (MCAssembler::const_indirect_symbol_iterator 974 it = Asm.indirect_symbol_begin(), 975 ie = Asm.indirect_symbol_end(); it != ie; ++it) { 976 // Indirect symbols in the non-lazy symbol pointer section have some 977 // special handling. 978 const MCSectionMachO &Section = 979 static_cast<const MCSectionMachO&>(it->SectionData->getSection()); 980 if (Section.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS) { 981 // If this symbol is defined and internal, mark it as such. 982 if (it->Symbol->isDefined() && 983 !Asm.getSymbolData(*it->Symbol).isExternal()) { 984 uint32_t Flags = MachO::INDIRECT_SYMBOL_LOCAL; 985 if (it->Symbol->isAbsolute()) 986 Flags |= MachO::INDIRECT_SYMBOL_ABS; 987 Write32(Flags); 988 continue; 989 } 990 } 991 992 Write32(Asm.getSymbolData(*it->Symbol).getIndex()); 993 } 994 995 // FIXME: Check that offsets match computed ones. 996 997 // Write the symbol table entries. 998 for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) 999 WriteNlist(LocalSymbolData[i], Layout); 1000 for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) 1001 WriteNlist(ExternalSymbolData[i], Layout); 1002 for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) 1003 WriteNlist(UndefinedSymbolData[i], Layout); 1004 1005 // Write the string table. 1006 OS << StringTable.data(); 1007 } 1008 } 1009 1010 MCObjectWriter *llvm::createMachObjectWriter(MCMachObjectTargetWriter *MOTW, 1011 raw_pwrite_stream &OS, 1012 bool IsLittleEndian) { 1013 return new MachObjectWriter(MOTW, OS, IsLittleEndian); 1014 } 1015