1 //===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "llvm/MC/MCMachObjectWriter.h" 11 #include "llvm/ADT/StringMap.h" 12 #include "llvm/ADT/Twine.h" 13 #include "llvm/MC/MCAsmBackend.h" 14 #include "llvm/MC/MCAsmLayout.h" 15 #include "llvm/MC/MCAssembler.h" 16 #include "llvm/MC/MCExpr.h" 17 #include "llvm/MC/MCFixupKindInfo.h" 18 #include "llvm/MC/MCMachOSymbolFlags.h" 19 #include "llvm/MC/MCObjectWriter.h" 20 #include "llvm/MC/MCSectionMachO.h" 21 #include "llvm/MC/MCSymbol.h" 22 #include "llvm/MC/MCValue.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/ErrorHandling.h" 25 #include "llvm/Support/MachO.h" 26 #include "llvm/Support/raw_ostream.h" 27 #include <vector> 28 using namespace llvm; 29 30 #define DEBUG_TYPE "mc" 31 32 void MachObjectWriter::reset() { 33 Relocations.clear(); 34 IndirectSymBase.clear(); 35 StringTable.clear(); 36 LocalSymbolData.clear(); 37 ExternalSymbolData.clear(); 38 UndefinedSymbolData.clear(); 39 MCObjectWriter::reset(); 40 } 41 42 bool MachObjectWriter:: 43 doesSymbolRequireExternRelocation(const MCSymbolData *SD) { 44 // Undefined symbols are always extern. 45 if (SD->getSymbol().isUndefined()) 46 return true; 47 48 // References to weak definitions require external relocation entries; the 49 // definition may not always be the one in the same object file. 50 if (SD->getFlags() & SF_WeakDefinition) 51 return true; 52 53 // Otherwise, we can use an internal relocation. 54 return false; 55 } 56 57 bool MachObjectWriter:: 58 MachSymbolData::operator<(const MachSymbolData &RHS) const { 59 return SymbolData->getSymbol().getName() < 60 RHS.SymbolData->getSymbol().getName(); 61 } 62 63 bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) { 64 const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo( 65 (MCFixupKind) Kind); 66 67 return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel; 68 } 69 70 uint64_t MachObjectWriter::getFragmentAddress(const MCFragment *Fragment, 71 const MCAsmLayout &Layout) const { 72 return getSectionAddress(Fragment->getParent()) + 73 Layout.getFragmentOffset(Fragment); 74 } 75 76 uint64_t MachObjectWriter::getSymbolAddress(const MCSymbolData* SD, 77 const MCAsmLayout &Layout) const { 78 const MCSymbol &S = SD->getSymbol(); 79 80 // If this is a variable, then recursively evaluate now. 81 if (S.isVariable()) { 82 if (const MCConstantExpr *C = 83 dyn_cast<const MCConstantExpr>(S.getVariableValue())) 84 return C->getValue(); 85 86 87 MCValue Target; 88 if (!S.getVariableValue()->EvaluateAsRelocatable(Target, &Layout, nullptr)) 89 report_fatal_error("unable to evaluate offset for variable '" + 90 S.getName() + "'"); 91 92 // Verify that any used symbols are defined. 93 if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined()) 94 report_fatal_error("unable to evaluate offset to undefined symbol '" + 95 Target.getSymA()->getSymbol().getName() + "'"); 96 if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined()) 97 report_fatal_error("unable to evaluate offset to undefined symbol '" + 98 Target.getSymB()->getSymbol().getName() + "'"); 99 100 uint64_t Address = Target.getConstant(); 101 if (Target.getSymA()) 102 Address += getSymbolAddress(&Layout.getAssembler().getSymbolData( 103 Target.getSymA()->getSymbol()), Layout); 104 if (Target.getSymB()) 105 Address += getSymbolAddress(&Layout.getAssembler().getSymbolData( 106 Target.getSymB()->getSymbol()), Layout); 107 return Address; 108 } 109 110 return getSectionAddress(SD->getFragment()->getParent()) + 111 Layout.getSymbolOffset(SD); 112 } 113 114 uint64_t MachObjectWriter::getPaddingSize(const MCSectionData *SD, 115 const MCAsmLayout &Layout) const { 116 uint64_t EndAddr = getSectionAddress(SD) + Layout.getSectionAddressSize(SD); 117 unsigned Next = SD->getLayoutOrder() + 1; 118 if (Next >= Layout.getSectionOrder().size()) 119 return 0; 120 121 const MCSectionData &NextSD = *Layout.getSectionOrder()[Next]; 122 if (NextSD.getSection().isVirtualSection()) 123 return 0; 124 return OffsetToAlignment(EndAddr, NextSD.getAlignment()); 125 } 126 127 void MachObjectWriter::WriteHeader(unsigned NumLoadCommands, 128 unsigned LoadCommandsSize, 129 bool SubsectionsViaSymbols) { 130 uint32_t Flags = 0; 131 132 if (SubsectionsViaSymbols) 133 Flags |= MachO::MH_SUBSECTIONS_VIA_SYMBOLS; 134 135 // struct mach_header (28 bytes) or 136 // struct mach_header_64 (32 bytes) 137 138 uint64_t Start = OS.tell(); 139 (void) Start; 140 141 Write32(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC); 142 143 Write32(TargetObjectWriter->getCPUType()); 144 Write32(TargetObjectWriter->getCPUSubtype()); 145 146 Write32(MachO::MH_OBJECT); 147 Write32(NumLoadCommands); 148 Write32(LoadCommandsSize); 149 Write32(Flags); 150 if (is64Bit()) 151 Write32(0); // reserved 152 153 assert(OS.tell() - Start == 154 (is64Bit()?sizeof(MachO::mach_header_64): sizeof(MachO::mach_header))); 155 } 156 157 /// WriteSegmentLoadCommand - Write a segment load command. 158 /// 159 /// \param NumSections The number of sections in this segment. 160 /// \param SectionDataSize The total size of the sections. 161 void MachObjectWriter::WriteSegmentLoadCommand(unsigned NumSections, 162 uint64_t VMSize, 163 uint64_t SectionDataStartOffset, 164 uint64_t SectionDataSize) { 165 // struct segment_command (56 bytes) or 166 // struct segment_command_64 (72 bytes) 167 168 uint64_t Start = OS.tell(); 169 (void) Start; 170 171 unsigned SegmentLoadCommandSize = 172 is64Bit() ? sizeof(MachO::segment_command_64): 173 sizeof(MachO::segment_command); 174 Write32(is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT); 175 Write32(SegmentLoadCommandSize + 176 NumSections * (is64Bit() ? sizeof(MachO::section_64) : 177 sizeof(MachO::section))); 178 179 WriteBytes("", 16); 180 if (is64Bit()) { 181 Write64(0); // vmaddr 182 Write64(VMSize); // vmsize 183 Write64(SectionDataStartOffset); // file offset 184 Write64(SectionDataSize); // file size 185 } else { 186 Write32(0); // vmaddr 187 Write32(VMSize); // vmsize 188 Write32(SectionDataStartOffset); // file offset 189 Write32(SectionDataSize); // file size 190 } 191 // maxprot 192 Write32(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); 193 // initprot 194 Write32(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); 195 Write32(NumSections); 196 Write32(0); // flags 197 198 assert(OS.tell() - Start == SegmentLoadCommandSize); 199 } 200 201 void MachObjectWriter::WriteSection(const MCAssembler &Asm, 202 const MCAsmLayout &Layout, 203 const MCSectionData &SD, 204 uint64_t FileOffset, 205 uint64_t RelocationsStart, 206 unsigned NumRelocations) { 207 uint64_t SectionSize = Layout.getSectionAddressSize(&SD); 208 209 // The offset is unused for virtual sections. 210 if (SD.getSection().isVirtualSection()) { 211 assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!"); 212 FileOffset = 0; 213 } 214 215 // struct section (68 bytes) or 216 // struct section_64 (80 bytes) 217 218 uint64_t Start = OS.tell(); 219 (void) Start; 220 221 const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection()); 222 WriteBytes(Section.getSectionName(), 16); 223 WriteBytes(Section.getSegmentName(), 16); 224 if (is64Bit()) { 225 Write64(getSectionAddress(&SD)); // address 226 Write64(SectionSize); // size 227 } else { 228 Write32(getSectionAddress(&SD)); // address 229 Write32(SectionSize); // size 230 } 231 Write32(FileOffset); 232 233 unsigned Flags = Section.getTypeAndAttributes(); 234 if (SD.hasInstructions()) 235 Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS; 236 237 assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!"); 238 Write32(Log2_32(SD.getAlignment())); 239 Write32(NumRelocations ? RelocationsStart : 0); 240 Write32(NumRelocations); 241 Write32(Flags); 242 Write32(IndirectSymBase.lookup(&SD)); // reserved1 243 Write32(Section.getStubSize()); // reserved2 244 if (is64Bit()) 245 Write32(0); // reserved3 246 247 assert(OS.tell() - Start == (is64Bit() ? sizeof(MachO::section_64) : 248 sizeof(MachO::section))); 249 } 250 251 void MachObjectWriter::WriteSymtabLoadCommand(uint32_t SymbolOffset, 252 uint32_t NumSymbols, 253 uint32_t StringTableOffset, 254 uint32_t StringTableSize) { 255 // struct symtab_command (24 bytes) 256 257 uint64_t Start = OS.tell(); 258 (void) Start; 259 260 Write32(MachO::LC_SYMTAB); 261 Write32(sizeof(MachO::symtab_command)); 262 Write32(SymbolOffset); 263 Write32(NumSymbols); 264 Write32(StringTableOffset); 265 Write32(StringTableSize); 266 267 assert(OS.tell() - Start == sizeof(MachO::symtab_command)); 268 } 269 270 void MachObjectWriter::WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol, 271 uint32_t NumLocalSymbols, 272 uint32_t FirstExternalSymbol, 273 uint32_t NumExternalSymbols, 274 uint32_t FirstUndefinedSymbol, 275 uint32_t NumUndefinedSymbols, 276 uint32_t IndirectSymbolOffset, 277 uint32_t NumIndirectSymbols) { 278 // struct dysymtab_command (80 bytes) 279 280 uint64_t Start = OS.tell(); 281 (void) Start; 282 283 Write32(MachO::LC_DYSYMTAB); 284 Write32(sizeof(MachO::dysymtab_command)); 285 Write32(FirstLocalSymbol); 286 Write32(NumLocalSymbols); 287 Write32(FirstExternalSymbol); 288 Write32(NumExternalSymbols); 289 Write32(FirstUndefinedSymbol); 290 Write32(NumUndefinedSymbols); 291 Write32(0); // tocoff 292 Write32(0); // ntoc 293 Write32(0); // modtaboff 294 Write32(0); // nmodtab 295 Write32(0); // extrefsymoff 296 Write32(0); // nextrefsyms 297 Write32(IndirectSymbolOffset); 298 Write32(NumIndirectSymbols); 299 Write32(0); // extreloff 300 Write32(0); // nextrel 301 Write32(0); // locreloff 302 Write32(0); // nlocrel 303 304 assert(OS.tell() - Start == sizeof(MachO::dysymtab_command)); 305 } 306 307 MachObjectWriter::MachSymbolData * 308 MachObjectWriter::findSymbolData(const MCSymbol &Sym) { 309 for (auto &Entry : LocalSymbolData) 310 if (&Entry.SymbolData->getSymbol() == &Sym) 311 return &Entry; 312 313 for (auto &Entry : ExternalSymbolData) 314 if (&Entry.SymbolData->getSymbol() == &Sym) 315 return &Entry; 316 317 for (auto &Entry : UndefinedSymbolData) 318 if (&Entry.SymbolData->getSymbol() == &Sym) 319 return &Entry; 320 321 return nullptr; 322 } 323 324 const MCSymbol &MachObjectWriter::findAliasedSymbol(const MCSymbol &Sym) const { 325 const MCSymbol *S = &Sym; 326 while (S->isVariable()) { 327 const MCExpr *Value = S->getVariableValue(); 328 const auto *Ref = dyn_cast<MCSymbolRefExpr>(Value); 329 if (!Ref) 330 return *S; 331 S = &Ref->getSymbol(); 332 } 333 return *S; 334 } 335 336 void MachObjectWriter::WriteNlist(MachSymbolData &MSD, 337 const MCAsmLayout &Layout) { 338 MCSymbolData &Data = *MSD.SymbolData; 339 const MCSymbol *Symbol = &Data.getSymbol(); 340 const MCSymbol *AliasedSymbol = &findAliasedSymbol(*Symbol); 341 uint8_t SectionIndex = MSD.SectionIndex; 342 uint8_t Type = 0; 343 uint16_t Flags = Data.getFlags(); 344 uint64_t Address = 0; 345 bool IsAlias = Symbol != AliasedSymbol; 346 347 MachSymbolData *AliaseeInfo; 348 if (IsAlias) { 349 AliaseeInfo = findSymbolData(*AliasedSymbol); 350 if (AliaseeInfo) 351 SectionIndex = AliaseeInfo->SectionIndex; 352 Symbol = AliasedSymbol; 353 } 354 355 // Set the N_TYPE bits. See <mach-o/nlist.h>. 356 // 357 // FIXME: Are the prebound or indirect fields possible here? 358 if (IsAlias && Symbol->isUndefined()) 359 Type = MachO::N_INDR; 360 else if (Symbol->isUndefined()) 361 Type = MachO::N_UNDF; 362 else if (Symbol->isAbsolute()) 363 Type = MachO::N_ABS; 364 else 365 Type = MachO::N_SECT; 366 367 // FIXME: Set STAB bits. 368 369 if (Data.isPrivateExtern()) 370 Type |= MachO::N_PEXT; 371 372 // Set external bit. 373 if (Data.isExternal() || (!IsAlias && Symbol->isUndefined())) 374 Type |= MachO::N_EXT; 375 376 // Compute the symbol address. 377 if (IsAlias && Symbol->isUndefined()) 378 Address = AliaseeInfo->StringIndex; 379 else if (Symbol->isDefined()) 380 Address = getSymbolAddress(&Data, Layout); 381 else if (Data.isCommon()) { 382 // Common symbols are encoded with the size in the address 383 // field, and their alignment in the flags. 384 Address = Data.getCommonSize(); 385 386 // Common alignment is packed into the 'desc' bits. 387 if (unsigned Align = Data.getCommonAlignment()) { 388 unsigned Log2Size = Log2_32(Align); 389 assert((1U << Log2Size) == Align && "Invalid 'common' alignment!"); 390 if (Log2Size > 15) 391 report_fatal_error("invalid 'common' alignment '" + 392 Twine(Align) + "' for '" + Symbol->getName() + "'", 393 false); 394 // FIXME: Keep this mask with the SymbolFlags enumeration. 395 Flags = (Flags & 0xF0FF) | (Log2Size << 8); 396 } 397 } 398 399 if (Layout.getAssembler().isThumbFunc(Symbol)) 400 Flags |= SF_ThumbFunc; 401 402 // struct nlist (12 bytes) 403 404 Write32(MSD.StringIndex); 405 Write8(Type); 406 Write8(SectionIndex); 407 408 // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc' 409 // value. 410 Write16(Flags); 411 if (is64Bit()) 412 Write64(Address); 413 else 414 Write32(Address); 415 } 416 417 void MachObjectWriter::WriteLinkeditLoadCommand(uint32_t Type, 418 uint32_t DataOffset, 419 uint32_t DataSize) { 420 uint64_t Start = OS.tell(); 421 (void) Start; 422 423 Write32(Type); 424 Write32(sizeof(MachO::linkedit_data_command)); 425 Write32(DataOffset); 426 Write32(DataSize); 427 428 assert(OS.tell() - Start == sizeof(MachO::linkedit_data_command)); 429 } 430 431 static unsigned ComputeLinkerOptionsLoadCommandSize( 432 const std::vector<std::string> &Options, bool is64Bit) 433 { 434 unsigned Size = sizeof(MachO::linker_option_command); 435 for (unsigned i = 0, e = Options.size(); i != e; ++i) 436 Size += Options[i].size() + 1; 437 return RoundUpToAlignment(Size, is64Bit ? 8 : 4); 438 } 439 440 void MachObjectWriter::WriteLinkerOptionsLoadCommand( 441 const std::vector<std::string> &Options) 442 { 443 unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit()); 444 uint64_t Start = OS.tell(); 445 (void) Start; 446 447 Write32(MachO::LC_LINKER_OPTION); 448 Write32(Size); 449 Write32(Options.size()); 450 uint64_t BytesWritten = sizeof(MachO::linker_option_command); 451 for (unsigned i = 0, e = Options.size(); i != e; ++i) { 452 // Write each string, including the null byte. 453 const std::string &Option = Options[i]; 454 WriteBytes(Option.c_str(), Option.size() + 1); 455 BytesWritten += Option.size() + 1; 456 } 457 458 // Pad to a multiple of the pointer size. 459 WriteBytes("", OffsetToAlignment(BytesWritten, is64Bit() ? 8 : 4)); 460 461 assert(OS.tell() - Start == Size); 462 } 463 464 void MachObjectWriter::RecordRelocation(MCAssembler &Asm, 465 const MCAsmLayout &Layout, 466 const MCFragment *Fragment, 467 const MCFixup &Fixup, MCValue Target, 468 bool &IsPCRel, uint64_t &FixedValue) { 469 TargetObjectWriter->RecordRelocation(this, Asm, Layout, Fragment, Fixup, 470 Target, FixedValue); 471 } 472 473 void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) { 474 // This is the point where 'as' creates actual symbols for indirect symbols 475 // (in the following two passes). It would be easier for us to do this sooner 476 // when we see the attribute, but that makes getting the order in the symbol 477 // table much more complicated than it is worth. 478 // 479 // FIXME: Revisit this when the dust settles. 480 481 // Report errors for use of .indirect_symbol not in a symbol pointer section 482 // or stub section. 483 for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), 484 ie = Asm.indirect_symbol_end(); it != ie; ++it) { 485 const MCSectionMachO &Section = 486 cast<MCSectionMachO>(it->SectionData->getSection()); 487 488 if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS && 489 Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS && 490 Section.getType() != MachO::S_SYMBOL_STUBS) { 491 MCSymbol &Symbol = *it->Symbol; 492 report_fatal_error("indirect symbol '" + Symbol.getName() + 493 "' not in a symbol pointer or stub section"); 494 } 495 } 496 497 // Bind non-lazy symbol pointers first. 498 unsigned IndirectIndex = 0; 499 for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), 500 ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { 501 const MCSectionMachO &Section = 502 cast<MCSectionMachO>(it->SectionData->getSection()); 503 504 if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS) 505 continue; 506 507 // Initialize the section indirect symbol base, if necessary. 508 IndirectSymBase.insert(std::make_pair(it->SectionData, IndirectIndex)); 509 510 Asm.getOrCreateSymbolData(*it->Symbol); 511 } 512 513 // Then lazy symbol pointers and symbol stubs. 514 IndirectIndex = 0; 515 for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), 516 ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { 517 const MCSectionMachO &Section = 518 cast<MCSectionMachO>(it->SectionData->getSection()); 519 520 if (Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS && 521 Section.getType() != MachO::S_SYMBOL_STUBS) 522 continue; 523 524 // Initialize the section indirect symbol base, if necessary. 525 IndirectSymBase.insert(std::make_pair(it->SectionData, IndirectIndex)); 526 527 // Set the symbol type to undefined lazy, but only on construction. 528 // 529 // FIXME: Do not hardcode. 530 bool Created; 531 MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created); 532 if (Created) 533 Entry.setFlags(Entry.getFlags() | 0x0001); 534 } 535 } 536 537 /// ComputeSymbolTable - Compute the symbol table data 538 void MachObjectWriter::ComputeSymbolTable( 539 MCAssembler &Asm, std::vector<MachSymbolData> &LocalSymbolData, 540 std::vector<MachSymbolData> &ExternalSymbolData, 541 std::vector<MachSymbolData> &UndefinedSymbolData) { 542 // Build section lookup table. 543 DenseMap<const MCSection*, uint8_t> SectionIndexMap; 544 unsigned Index = 1; 545 for (MCAssembler::iterator it = Asm.begin(), 546 ie = Asm.end(); it != ie; ++it, ++Index) 547 SectionIndexMap[&it->getSection()] = Index; 548 assert(Index <= 256 && "Too many sections!"); 549 550 // Build the string table. 551 for (MCSymbolData &SD : Asm.symbols()) { 552 const MCSymbol &Symbol = SD.getSymbol(); 553 if (!Asm.isSymbolLinkerVisible(Symbol)) 554 continue; 555 556 StringTable.add(Symbol.getName()); 557 } 558 StringTable.finalize(StringTableBuilder::MachO); 559 560 // Build the symbol arrays but only for non-local symbols. 561 // 562 // The particular order that we collect and then sort the symbols is chosen to 563 // match 'as'. Even though it doesn't matter for correctness, this is 564 // important for letting us diff .o files. 565 for (MCSymbolData &SD : Asm.symbols()) { 566 const MCSymbol &Symbol = SD.getSymbol(); 567 568 // Ignore non-linker visible symbols. 569 if (!Asm.isSymbolLinkerVisible(Symbol)) 570 continue; 571 572 if (!SD.isExternal() && !Symbol.isUndefined()) 573 continue; 574 575 MachSymbolData MSD; 576 MSD.SymbolData = &SD; 577 MSD.StringIndex = StringTable.getOffset(Symbol.getName()); 578 579 if (Symbol.isUndefined()) { 580 MSD.SectionIndex = 0; 581 UndefinedSymbolData.push_back(MSD); 582 } else if (Symbol.isAbsolute()) { 583 MSD.SectionIndex = 0; 584 ExternalSymbolData.push_back(MSD); 585 } else { 586 MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); 587 assert(MSD.SectionIndex && "Invalid section index!"); 588 ExternalSymbolData.push_back(MSD); 589 } 590 } 591 592 // Now add the data for local symbols. 593 for (MCSymbolData &SD : Asm.symbols()) { 594 const MCSymbol &Symbol = SD.getSymbol(); 595 596 // Ignore non-linker visible symbols. 597 if (!Asm.isSymbolLinkerVisible(Symbol)) 598 continue; 599 600 if (SD.isExternal() || Symbol.isUndefined()) 601 continue; 602 603 MachSymbolData MSD; 604 MSD.SymbolData = &SD; 605 MSD.StringIndex = StringTable.getOffset(Symbol.getName()); 606 607 if (Symbol.isAbsolute()) { 608 MSD.SectionIndex = 0; 609 LocalSymbolData.push_back(MSD); 610 } else { 611 MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); 612 assert(MSD.SectionIndex && "Invalid section index!"); 613 LocalSymbolData.push_back(MSD); 614 } 615 } 616 617 // External and undefined symbols are required to be in lexicographic order. 618 std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); 619 std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end()); 620 621 // Set the symbol indices. 622 Index = 0; 623 for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) 624 LocalSymbolData[i].SymbolData->setIndex(Index++); 625 for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) 626 ExternalSymbolData[i].SymbolData->setIndex(Index++); 627 for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) 628 UndefinedSymbolData[i].SymbolData->setIndex(Index++); 629 630 for (const MCSectionData &SD : Asm) { 631 std::vector<RelAndSymbol> &Relocs = Relocations[&SD]; 632 for (RelAndSymbol &Rel : Relocs) { 633 if (!Rel.Sym) 634 continue; 635 636 // Set the Index and the IsExtern bit. 637 unsigned Index = Rel.Sym->getIndex(); 638 assert(isInt<24>(Index)); 639 if (IsLittleEndian) 640 Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (-1 << 24)) | Index | (1 << 27); 641 else 642 Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4); 643 } 644 } 645 } 646 647 void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm, 648 const MCAsmLayout &Layout) { 649 uint64_t StartAddress = 0; 650 const SmallVectorImpl<MCSectionData*> &Order = Layout.getSectionOrder(); 651 for (int i = 0, n = Order.size(); i != n ; ++i) { 652 const MCSectionData *SD = Order[i]; 653 StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment()); 654 SectionAddress[SD] = StartAddress; 655 StartAddress += Layout.getSectionAddressSize(SD); 656 657 // Explicitly pad the section to match the alignment requirements of the 658 // following one. This is for 'gas' compatibility, it shouldn't 659 /// strictly be necessary. 660 StartAddress += getPaddingSize(SD, Layout); 661 } 662 } 663 664 void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm, 665 const MCAsmLayout &Layout) { 666 computeSectionAddresses(Asm, Layout); 667 668 // Create symbol data for any indirect symbols. 669 BindIndirectSymbols(Asm); 670 } 671 672 bool MachObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl( 673 const MCAssembler &Asm, const MCSymbolData &DataA, const MCFragment &FB, 674 bool InSet, bool IsPCRel) const { 675 if (InSet) 676 return true; 677 678 // The effective address is 679 // addr(atom(A)) + offset(A) 680 // - addr(atom(B)) - offset(B) 681 // and the offsets are not relocatable, so the fixup is fully resolved when 682 // addr(atom(A)) - addr(atom(B)) == 0. 683 const MCSymbolData *A_Base = nullptr, *B_Base = nullptr; 684 685 const MCSymbol &SA = findAliasedSymbol(DataA.getSymbol()); 686 const MCSection &SecA = SA.getSection(); 687 const MCSection &SecB = FB.getParent()->getSection(); 688 689 if (IsPCRel) { 690 // The simple (Darwin, except on x86_64) way of dealing with this was to 691 // assume that any reference to a temporary symbol *must* be a temporary 692 // symbol in the same atom, unless the sections differ. Therefore, any PCrel 693 // relocation to a temporary symbol (in the same section) is fully 694 // resolved. This also works in conjunction with absolutized .set, which 695 // requires the compiler to use .set to absolutize the differences between 696 // symbols which the compiler knows to be assembly time constants, so we 697 // don't need to worry about considering symbol differences fully resolved. 698 // 699 // If the file isn't using sub-sections-via-symbols, we can make the 700 // same assumptions about any symbol that we normally make about 701 // assembler locals. 702 703 bool hasReliableSymbolDifference = isX86_64(); 704 if (!hasReliableSymbolDifference) { 705 if (!SA.isInSection() || &SecA != &SecB || 706 (!SA.isTemporary() && 707 FB.getAtom() != Asm.getSymbolData(SA).getFragment()->getAtom() && 708 Asm.getSubsectionsViaSymbols())) 709 return false; 710 return true; 711 } 712 // For Darwin x86_64, there is one special case when the reference IsPCRel. 713 // If the fragment with the reference does not have a base symbol but meets 714 // the simple way of dealing with this, in that it is a temporary symbol in 715 // the same atom then it is assumed to be fully resolved. This is needed so 716 // a relocation entry is not created and so the static linker does not 717 // mess up the reference later. 718 else if(!FB.getAtom() && 719 SA.isTemporary() && SA.isInSection() && &SecA == &SecB){ 720 return true; 721 } 722 } else { 723 if (!TargetObjectWriter->useAggressiveSymbolFolding()) 724 return false; 725 } 726 727 // If they are not in the same section, we can't compute the diff. 728 if (&SecA != &SecB) 729 return false; 730 731 const MCFragment *FA = Asm.getSymbolData(SA).getFragment(); 732 733 // Bail if the symbol has no fragment. 734 if (!FA) 735 return false; 736 737 A_Base = FA->getAtom(); 738 B_Base = FB.getAtom(); 739 740 // If the atoms are the same, they are guaranteed to have the same address. 741 if (A_Base == B_Base) 742 return true; 743 744 // Otherwise, we can't prove this is fully resolved. 745 return false; 746 } 747 748 void MachObjectWriter::WriteObject(MCAssembler &Asm, 749 const MCAsmLayout &Layout) { 750 // Compute symbol table information and bind symbol indices. 751 ComputeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData, 752 UndefinedSymbolData); 753 754 unsigned NumSections = Asm.size(); 755 const MCAssembler::VersionMinInfoType &VersionInfo = 756 Layout.getAssembler().getVersionMinInfo(); 757 758 // The section data starts after the header, the segment load command (and 759 // section headers) and the symbol table. 760 unsigned NumLoadCommands = 1; 761 uint64_t LoadCommandsSize = is64Bit() ? 762 sizeof(MachO::segment_command_64) + NumSections * sizeof(MachO::section_64): 763 sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section); 764 765 // Add the deployment target version info load command size, if used. 766 if (VersionInfo.Major != 0) { 767 ++NumLoadCommands; 768 LoadCommandsSize += sizeof(MachO::version_min_command); 769 } 770 771 // Add the data-in-code load command size, if used. 772 unsigned NumDataRegions = Asm.getDataRegions().size(); 773 if (NumDataRegions) { 774 ++NumLoadCommands; 775 LoadCommandsSize += sizeof(MachO::linkedit_data_command); 776 } 777 778 // Add the loh load command size, if used. 779 uint64_t LOHRawSize = Asm.getLOHContainer().getEmitSize(*this, Layout); 780 uint64_t LOHSize = RoundUpToAlignment(LOHRawSize, is64Bit() ? 8 : 4); 781 if (LOHSize) { 782 ++NumLoadCommands; 783 LoadCommandsSize += sizeof(MachO::linkedit_data_command); 784 } 785 786 // Add the symbol table load command sizes, if used. 787 unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() + 788 UndefinedSymbolData.size(); 789 if (NumSymbols) { 790 NumLoadCommands += 2; 791 LoadCommandsSize += (sizeof(MachO::symtab_command) + 792 sizeof(MachO::dysymtab_command)); 793 } 794 795 // Add the linker option load commands sizes. 796 const std::vector<std::vector<std::string> > &LinkerOptions = 797 Asm.getLinkerOptions(); 798 for (unsigned i = 0, e = LinkerOptions.size(); i != e; ++i) { 799 ++NumLoadCommands; 800 LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(LinkerOptions[i], 801 is64Bit()); 802 } 803 804 // Compute the total size of the section data, as well as its file size and vm 805 // size. 806 uint64_t SectionDataStart = (is64Bit() ? sizeof(MachO::mach_header_64) : 807 sizeof(MachO::mach_header)) + LoadCommandsSize; 808 uint64_t SectionDataSize = 0; 809 uint64_t SectionDataFileSize = 0; 810 uint64_t VMSize = 0; 811 for (MCAssembler::const_iterator it = Asm.begin(), 812 ie = Asm.end(); it != ie; ++it) { 813 const MCSectionData &SD = *it; 814 uint64_t Address = getSectionAddress(&SD); 815 uint64_t Size = Layout.getSectionAddressSize(&SD); 816 uint64_t FileSize = Layout.getSectionFileSize(&SD); 817 FileSize += getPaddingSize(&SD, Layout); 818 819 VMSize = std::max(VMSize, Address + Size); 820 821 if (SD.getSection().isVirtualSection()) 822 continue; 823 824 SectionDataSize = std::max(SectionDataSize, Address + Size); 825 SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize); 826 } 827 828 // The section data is padded to 4 bytes. 829 // 830 // FIXME: Is this machine dependent? 831 unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4); 832 SectionDataFileSize += SectionDataPadding; 833 834 // Write the prolog, starting with the header and load command... 835 WriteHeader(NumLoadCommands, LoadCommandsSize, 836 Asm.getSubsectionsViaSymbols()); 837 WriteSegmentLoadCommand(NumSections, VMSize, 838 SectionDataStart, SectionDataSize); 839 840 // ... and then the section headers. 841 uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; 842 for (MCAssembler::const_iterator it = Asm.begin(), 843 ie = Asm.end(); it != ie; ++it) { 844 std::vector<RelAndSymbol> &Relocs = Relocations[it]; 845 unsigned NumRelocs = Relocs.size(); 846 uint64_t SectionStart = SectionDataStart + getSectionAddress(it); 847 WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs); 848 RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info); 849 } 850 851 // Write out the deployment target information, if it's available. 852 if (VersionInfo.Major != 0) { 853 assert(VersionInfo.Update < 256 && "unencodable update target version"); 854 assert(VersionInfo.Minor < 256 && "unencodable minor target version"); 855 assert(VersionInfo.Major < 65536 && "unencodable major target version"); 856 uint32_t EncodedVersion = VersionInfo.Update | (VersionInfo.Minor << 8) | 857 (VersionInfo.Major << 16); 858 Write32(VersionInfo.Kind == MCVM_OSXVersionMin ? MachO::LC_VERSION_MIN_MACOSX : 859 MachO::LC_VERSION_MIN_IPHONEOS); 860 Write32(sizeof(MachO::version_min_command)); 861 Write32(EncodedVersion); 862 Write32(0); // reserved. 863 } 864 865 // Write the data-in-code load command, if used. 866 uint64_t DataInCodeTableEnd = RelocTableEnd + NumDataRegions * 8; 867 if (NumDataRegions) { 868 uint64_t DataRegionsOffset = RelocTableEnd; 869 uint64_t DataRegionsSize = NumDataRegions * 8; 870 WriteLinkeditLoadCommand(MachO::LC_DATA_IN_CODE, DataRegionsOffset, 871 DataRegionsSize); 872 } 873 874 // Write the loh load command, if used. 875 uint64_t LOHTableEnd = DataInCodeTableEnd + LOHSize; 876 if (LOHSize) 877 WriteLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT, 878 DataInCodeTableEnd, LOHSize); 879 880 // Write the symbol table load command, if used. 881 if (NumSymbols) { 882 unsigned FirstLocalSymbol = 0; 883 unsigned NumLocalSymbols = LocalSymbolData.size(); 884 unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols; 885 unsigned NumExternalSymbols = ExternalSymbolData.size(); 886 unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols; 887 unsigned NumUndefinedSymbols = UndefinedSymbolData.size(); 888 unsigned NumIndirectSymbols = Asm.indirect_symbol_size(); 889 unsigned NumSymTabSymbols = 890 NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols; 891 uint64_t IndirectSymbolSize = NumIndirectSymbols * 4; 892 uint64_t IndirectSymbolOffset = 0; 893 894 // If used, the indirect symbols are written after the section data. 895 if (NumIndirectSymbols) 896 IndirectSymbolOffset = LOHTableEnd; 897 898 // The symbol table is written after the indirect symbol data. 899 uint64_t SymbolTableOffset = LOHTableEnd + IndirectSymbolSize; 900 901 // The string table is written after symbol table. 902 uint64_t StringTableOffset = 903 SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? 904 sizeof(MachO::nlist_64) : 905 sizeof(MachO::nlist)); 906 WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols, 907 StringTableOffset, StringTable.data().size()); 908 909 WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols, 910 FirstExternalSymbol, NumExternalSymbols, 911 FirstUndefinedSymbol, NumUndefinedSymbols, 912 IndirectSymbolOffset, NumIndirectSymbols); 913 } 914 915 // Write the linker options load commands. 916 for (unsigned i = 0, e = LinkerOptions.size(); i != e; ++i) { 917 WriteLinkerOptionsLoadCommand(LinkerOptions[i]); 918 } 919 920 // Write the actual section data. 921 for (MCAssembler::const_iterator it = Asm.begin(), 922 ie = Asm.end(); it != ie; ++it) { 923 Asm.writeSectionData(it, Layout); 924 925 uint64_t Pad = getPaddingSize(it, Layout); 926 WriteZeros(Pad); 927 } 928 929 // Write the extra padding. 930 WriteZeros(SectionDataPadding); 931 932 // Write the relocation entries. 933 for (MCAssembler::const_iterator it = Asm.begin(), 934 ie = Asm.end(); it != ie; ++it) { 935 // Write the section relocation entries, in reverse order to match 'as' 936 // (approximately, the exact algorithm is more complicated than this). 937 std::vector<RelAndSymbol> &Relocs = Relocations[it]; 938 for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { 939 Write32(Relocs[e - i - 1].MRE.r_word0); 940 Write32(Relocs[e - i - 1].MRE.r_word1); 941 } 942 } 943 944 // Write out the data-in-code region payload, if there is one. 945 for (MCAssembler::const_data_region_iterator 946 it = Asm.data_region_begin(), ie = Asm.data_region_end(); 947 it != ie; ++it) { 948 const DataRegionData *Data = &(*it); 949 uint64_t Start = 950 getSymbolAddress(&Layout.getAssembler().getSymbolData(*Data->Start), 951 Layout); 952 uint64_t End = 953 getSymbolAddress(&Layout.getAssembler().getSymbolData(*Data->End), 954 Layout); 955 DEBUG(dbgs() << "data in code region-- kind: " << Data->Kind 956 << " start: " << Start << "(" << Data->Start->getName() << ")" 957 << " end: " << End << "(" << Data->End->getName() << ")" 958 << " size: " << End - Start 959 << "\n"); 960 Write32(Start); 961 Write16(End - Start); 962 Write16(Data->Kind); 963 } 964 965 // Write out the loh commands, if there is one. 966 if (LOHSize) { 967 #ifndef NDEBUG 968 unsigned Start = OS.tell(); 969 #endif 970 Asm.getLOHContainer().Emit(*this, Layout); 971 // Pad to a multiple of the pointer size. 972 WriteBytes("", OffsetToAlignment(LOHRawSize, is64Bit() ? 8 : 4)); 973 assert(OS.tell() - Start == LOHSize); 974 } 975 976 // Write the symbol table data, if used. 977 if (NumSymbols) { 978 // Write the indirect symbol entries. 979 for (MCAssembler::const_indirect_symbol_iterator 980 it = Asm.indirect_symbol_begin(), 981 ie = Asm.indirect_symbol_end(); it != ie; ++it) { 982 // Indirect symbols in the non-lazy symbol pointer section have some 983 // special handling. 984 const MCSectionMachO &Section = 985 static_cast<const MCSectionMachO&>(it->SectionData->getSection()); 986 if (Section.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS) { 987 // If this symbol is defined and internal, mark it as such. 988 if (it->Symbol->isDefined() && 989 !Asm.getSymbolData(*it->Symbol).isExternal()) { 990 uint32_t Flags = MachO::INDIRECT_SYMBOL_LOCAL; 991 if (it->Symbol->isAbsolute()) 992 Flags |= MachO::INDIRECT_SYMBOL_ABS; 993 Write32(Flags); 994 continue; 995 } 996 } 997 998 Write32(Asm.getSymbolData(*it->Symbol).getIndex()); 999 } 1000 1001 // FIXME: Check that offsets match computed ones. 1002 1003 // Write the symbol table entries. 1004 for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) 1005 WriteNlist(LocalSymbolData[i], Layout); 1006 for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) 1007 WriteNlist(ExternalSymbolData[i], Layout); 1008 for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) 1009 WriteNlist(UndefinedSymbolData[i], Layout); 1010 1011 // Write the string table. 1012 OS << StringTable.data(); 1013 } 1014 } 1015 1016 MCObjectWriter *llvm::createMachObjectWriter(MCMachObjectTargetWriter *MOTW, 1017 raw_pwrite_stream &OS, 1018 bool IsLittleEndian) { 1019 return new MachObjectWriter(MOTW, OS, IsLittleEndian); 1020 } 1021