1 //===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "llvm/MC/MCMachObjectWriter.h" 11 #include "llvm/ADT/OwningPtr.h" 12 #include "llvm/ADT/StringMap.h" 13 #include "llvm/ADT/Twine.h" 14 #include "llvm/MC/MCAssembler.h" 15 #include "llvm/MC/MCAsmLayout.h" 16 #include "llvm/MC/MCExpr.h" 17 #include "llvm/MC/MCObjectWriter.h" 18 #include "llvm/MC/MCSectionMachO.h" 19 #include "llvm/MC/MCSymbol.h" 20 #include "llvm/MC/MCMachOSymbolFlags.h" 21 #include "llvm/MC/MCValue.h" 22 #include "llvm/Object/MachOFormat.h" 23 #include "llvm/Support/ErrorHandling.h" 24 #include "llvm/Target/TargetAsmBackend.h" 25 26 // FIXME: Gross. 27 #include "../Target/ARM/ARMFixupKinds.h" 28 #include "../Target/X86/X86FixupKinds.h" 29 30 #include <vector> 31 using namespace llvm; 32 using namespace llvm::object; 33 34 // FIXME: this has been copied from (or to) X86AsmBackend.cpp 35 static unsigned getFixupKindLog2Size(unsigned Kind) { 36 switch (Kind) { 37 default: 38 llvm_unreachable("invalid fixup kind!"); 39 case FK_PCRel_1: 40 case FK_Data_1: return 0; 41 case FK_PCRel_2: 42 case FK_Data_2: return 1; 43 case FK_PCRel_4: 44 // FIXME: Remove these!!! 45 case X86::reloc_riprel_4byte: 46 case X86::reloc_riprel_4byte_movq_load: 47 case X86::reloc_signed_4byte: 48 case FK_Data_4: return 2; 49 case FK_Data_8: return 3; 50 } 51 } 52 53 static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) { 54 // Undefined symbols are always extern. 55 if (SD->Symbol->isUndefined()) 56 return true; 57 58 // References to weak definitions require external relocation entries; the 59 // definition may not always be the one in the same object file. 60 if (SD->getFlags() & SF_WeakDefinition) 61 return true; 62 63 // Otherwise, we can use an internal relocation. 64 return false; 65 } 66 67 namespace { 68 69 class MachObjectWriter : public MCObjectWriter { 70 /// MachSymbolData - Helper struct for containing some precomputed information 71 /// on symbols. 72 struct MachSymbolData { 73 MCSymbolData *SymbolData; 74 uint64_t StringIndex; 75 uint8_t SectionIndex; 76 77 // Support lexicographic sorting. 78 bool operator<(const MachSymbolData &RHS) const { 79 return SymbolData->getSymbol().getName() < 80 RHS.SymbolData->getSymbol().getName(); 81 } 82 }; 83 84 /// The target specific Mach-O writer instance. 85 llvm::OwningPtr<MCMachObjectTargetWriter> TargetObjectWriter; 86 87 /// @name Relocation Data 88 /// @{ 89 90 llvm::DenseMap<const MCSectionData*, 91 std::vector<macho::RelocationEntry> > Relocations; 92 llvm::DenseMap<const MCSectionData*, unsigned> IndirectSymBase; 93 94 /// @} 95 /// @name Symbol Table Data 96 /// @{ 97 98 SmallString<256> StringTable; 99 std::vector<MachSymbolData> LocalSymbolData; 100 std::vector<MachSymbolData> ExternalSymbolData; 101 std::vector<MachSymbolData> UndefinedSymbolData; 102 103 /// @} 104 105 private: 106 /// @name Utility Methods 107 /// @{ 108 109 bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) { 110 const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo( 111 (MCFixupKind) Kind); 112 113 return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel; 114 } 115 116 /// @} 117 118 SectionAddrMap SectionAddress; 119 uint64_t getSectionAddress(const MCSectionData* SD) const { 120 return SectionAddress.lookup(SD); 121 } 122 uint64_t getSymbolAddress(const MCSymbolData* SD, 123 const MCAsmLayout &Layout) const { 124 return getSectionAddress(SD->getFragment()->getParent()) + 125 Layout.getSymbolOffset(SD); 126 } 127 uint64_t getFragmentAddress(const MCFragment *Fragment, 128 const MCAsmLayout &Layout) const { 129 return getSectionAddress(Fragment->getParent()) + 130 Layout.getFragmentOffset(Fragment); 131 } 132 133 uint64_t getPaddingSize(const MCSectionData *SD, 134 const MCAsmLayout &Layout) const { 135 uint64_t EndAddr = getSectionAddress(SD) + Layout.getSectionAddressSize(SD); 136 unsigned Next = SD->getLayoutOrder() + 1; 137 if (Next >= Layout.getSectionOrder().size()) 138 return 0; 139 140 const MCSectionData &NextSD = *Layout.getSectionOrder()[Next]; 141 if (NextSD.getSection().isVirtualSection()) 142 return 0; 143 return OffsetToAlignment(EndAddr, NextSD.getAlignment()); 144 } 145 146 public: 147 MachObjectWriter(MCMachObjectTargetWriter *MOTW, raw_ostream &_OS, 148 bool _IsLittleEndian) 149 : MCObjectWriter(_OS, _IsLittleEndian), TargetObjectWriter(MOTW) { 150 } 151 152 /// @name Target Writer Proxy Accessors 153 /// @{ 154 155 bool is64Bit() const { return TargetObjectWriter->is64Bit(); } 156 bool isARM() const { 157 uint32_t CPUType = TargetObjectWriter->getCPUType() & ~mach::CTFM_ArchMask; 158 return CPUType == mach::CTM_ARM; 159 } 160 161 /// @} 162 163 void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize, 164 bool SubsectionsViaSymbols) { 165 uint32_t Flags = 0; 166 167 if (SubsectionsViaSymbols) 168 Flags |= macho::HF_SubsectionsViaSymbols; 169 170 // struct mach_header (28 bytes) or 171 // struct mach_header_64 (32 bytes) 172 173 uint64_t Start = OS.tell(); 174 (void) Start; 175 176 Write32(is64Bit() ? macho::HM_Object64 : macho::HM_Object32); 177 178 Write32(TargetObjectWriter->getCPUType()); 179 Write32(TargetObjectWriter->getCPUSubtype()); 180 181 Write32(macho::HFT_Object); 182 Write32(NumLoadCommands); 183 Write32(LoadCommandsSize); 184 Write32(Flags); 185 if (is64Bit()) 186 Write32(0); // reserved 187 188 assert(OS.tell() - Start == 189 (is64Bit() ? macho::Header64Size : macho::Header32Size)); 190 } 191 192 /// WriteSegmentLoadCommand - Write a segment load command. 193 /// 194 /// \arg NumSections - The number of sections in this segment. 195 /// \arg SectionDataSize - The total size of the sections. 196 void WriteSegmentLoadCommand(unsigned NumSections, 197 uint64_t VMSize, 198 uint64_t SectionDataStartOffset, 199 uint64_t SectionDataSize) { 200 // struct segment_command (56 bytes) or 201 // struct segment_command_64 (72 bytes) 202 203 uint64_t Start = OS.tell(); 204 (void) Start; 205 206 unsigned SegmentLoadCommandSize = 207 is64Bit() ? macho::SegmentLoadCommand64Size: 208 macho::SegmentLoadCommand32Size; 209 Write32(is64Bit() ? macho::LCT_Segment64 : macho::LCT_Segment); 210 Write32(SegmentLoadCommandSize + 211 NumSections * (is64Bit() ? macho::Section64Size : 212 macho::Section32Size)); 213 214 WriteBytes("", 16); 215 if (is64Bit()) { 216 Write64(0); // vmaddr 217 Write64(VMSize); // vmsize 218 Write64(SectionDataStartOffset); // file offset 219 Write64(SectionDataSize); // file size 220 } else { 221 Write32(0); // vmaddr 222 Write32(VMSize); // vmsize 223 Write32(SectionDataStartOffset); // file offset 224 Write32(SectionDataSize); // file size 225 } 226 Write32(0x7); // maxprot 227 Write32(0x7); // initprot 228 Write32(NumSections); 229 Write32(0); // flags 230 231 assert(OS.tell() - Start == SegmentLoadCommandSize); 232 } 233 234 void WriteSection(const MCAssembler &Asm, const MCAsmLayout &Layout, 235 const MCSectionData &SD, uint64_t FileOffset, 236 uint64_t RelocationsStart, unsigned NumRelocations) { 237 uint64_t SectionSize = Layout.getSectionAddressSize(&SD); 238 239 // The offset is unused for virtual sections. 240 if (SD.getSection().isVirtualSection()) { 241 assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!"); 242 FileOffset = 0; 243 } 244 245 // struct section (68 bytes) or 246 // struct section_64 (80 bytes) 247 248 uint64_t Start = OS.tell(); 249 (void) Start; 250 251 const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection()); 252 WriteBytes(Section.getSectionName(), 16); 253 WriteBytes(Section.getSegmentName(), 16); 254 if (is64Bit()) { 255 Write64(getSectionAddress(&SD)); // address 256 Write64(SectionSize); // size 257 } else { 258 Write32(getSectionAddress(&SD)); // address 259 Write32(SectionSize); // size 260 } 261 Write32(FileOffset); 262 263 unsigned Flags = Section.getTypeAndAttributes(); 264 if (SD.hasInstructions()) 265 Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS; 266 267 assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!"); 268 Write32(Log2_32(SD.getAlignment())); 269 Write32(NumRelocations ? RelocationsStart : 0); 270 Write32(NumRelocations); 271 Write32(Flags); 272 Write32(IndirectSymBase.lookup(&SD)); // reserved1 273 Write32(Section.getStubSize()); // reserved2 274 if (is64Bit()) 275 Write32(0); // reserved3 276 277 assert(OS.tell() - Start == (is64Bit() ? macho::Section64Size : 278 macho::Section32Size)); 279 } 280 281 void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols, 282 uint32_t StringTableOffset, 283 uint32_t StringTableSize) { 284 // struct symtab_command (24 bytes) 285 286 uint64_t Start = OS.tell(); 287 (void) Start; 288 289 Write32(macho::LCT_Symtab); 290 Write32(macho::SymtabLoadCommandSize); 291 Write32(SymbolOffset); 292 Write32(NumSymbols); 293 Write32(StringTableOffset); 294 Write32(StringTableSize); 295 296 assert(OS.tell() - Start == macho::SymtabLoadCommandSize); 297 } 298 299 void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol, 300 uint32_t NumLocalSymbols, 301 uint32_t FirstExternalSymbol, 302 uint32_t NumExternalSymbols, 303 uint32_t FirstUndefinedSymbol, 304 uint32_t NumUndefinedSymbols, 305 uint32_t IndirectSymbolOffset, 306 uint32_t NumIndirectSymbols) { 307 // struct dysymtab_command (80 bytes) 308 309 uint64_t Start = OS.tell(); 310 (void) Start; 311 312 Write32(macho::LCT_Dysymtab); 313 Write32(macho::DysymtabLoadCommandSize); 314 Write32(FirstLocalSymbol); 315 Write32(NumLocalSymbols); 316 Write32(FirstExternalSymbol); 317 Write32(NumExternalSymbols); 318 Write32(FirstUndefinedSymbol); 319 Write32(NumUndefinedSymbols); 320 Write32(0); // tocoff 321 Write32(0); // ntoc 322 Write32(0); // modtaboff 323 Write32(0); // nmodtab 324 Write32(0); // extrefsymoff 325 Write32(0); // nextrefsyms 326 Write32(IndirectSymbolOffset); 327 Write32(NumIndirectSymbols); 328 Write32(0); // extreloff 329 Write32(0); // nextrel 330 Write32(0); // locreloff 331 Write32(0); // nlocrel 332 333 assert(OS.tell() - Start == macho::DysymtabLoadCommandSize); 334 } 335 336 void WriteNlist(MachSymbolData &MSD, const MCAsmLayout &Layout) { 337 MCSymbolData &Data = *MSD.SymbolData; 338 const MCSymbol &Symbol = Data.getSymbol(); 339 uint8_t Type = 0; 340 uint16_t Flags = Data.getFlags(); 341 uint32_t Address = 0; 342 343 // Set the N_TYPE bits. See <mach-o/nlist.h>. 344 // 345 // FIXME: Are the prebound or indirect fields possible here? 346 if (Symbol.isUndefined()) 347 Type = macho::STT_Undefined; 348 else if (Symbol.isAbsolute()) 349 Type = macho::STT_Absolute; 350 else 351 Type = macho::STT_Section; 352 353 // FIXME: Set STAB bits. 354 355 if (Data.isPrivateExtern()) 356 Type |= macho::STF_PrivateExtern; 357 358 // Set external bit. 359 if (Data.isExternal() || Symbol.isUndefined()) 360 Type |= macho::STF_External; 361 362 // Compute the symbol address. 363 if (Symbol.isDefined()) { 364 if (Symbol.isAbsolute()) { 365 Address = cast<MCConstantExpr>(Symbol.getVariableValue())->getValue(); 366 } else { 367 Address = getSymbolAddress(&Data, Layout); 368 } 369 } else if (Data.isCommon()) { 370 // Common symbols are encoded with the size in the address 371 // field, and their alignment in the flags. 372 Address = Data.getCommonSize(); 373 374 // Common alignment is packed into the 'desc' bits. 375 if (unsigned Align = Data.getCommonAlignment()) { 376 unsigned Log2Size = Log2_32(Align); 377 assert((1U << Log2Size) == Align && "Invalid 'common' alignment!"); 378 if (Log2Size > 15) 379 report_fatal_error("invalid 'common' alignment '" + 380 Twine(Align) + "'"); 381 // FIXME: Keep this mask with the SymbolFlags enumeration. 382 Flags = (Flags & 0xF0FF) | (Log2Size << 8); 383 } 384 } 385 386 // struct nlist (12 bytes) 387 388 Write32(MSD.StringIndex); 389 Write8(Type); 390 Write8(MSD.SectionIndex); 391 392 // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc' 393 // value. 394 Write16(Flags); 395 if (is64Bit()) 396 Write64(Address); 397 else 398 Write32(Address); 399 } 400 401 // FIXME: We really need to improve the relocation validation. Basically, we 402 // want to implement a separate computation which evaluates the relocation 403 // entry as the linker would, and verifies that the resultant fixup value is 404 // exactly what the encoder wanted. This will catch several classes of 405 // problems: 406 // 407 // - Relocation entry bugs, the two algorithms are unlikely to have the same 408 // exact bug. 409 // 410 // - Relaxation issues, where we forget to relax something. 411 // 412 // - Input errors, where something cannot be correctly encoded. 'as' allows 413 // these through in many cases. 414 415 static bool isFixupKindRIPRel(unsigned Kind) { 416 return Kind == X86::reloc_riprel_4byte || 417 Kind == X86::reloc_riprel_4byte_movq_load; 418 } 419 void RecordX86_64Relocation(const MCAssembler &Asm, const MCAsmLayout &Layout, 420 const MCFragment *Fragment, 421 const MCFixup &Fixup, MCValue Target, 422 uint64_t &FixedValue) { 423 unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); 424 unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind()); 425 unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); 426 427 // See <reloc.h>. 428 uint32_t FixupOffset = 429 Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); 430 uint32_t FixupAddress = 431 getFragmentAddress(Fragment, Layout) + Fixup.getOffset(); 432 int64_t Value = 0; 433 unsigned Index = 0; 434 unsigned IsExtern = 0; 435 unsigned Type = 0; 436 437 Value = Target.getConstant(); 438 439 if (IsPCRel) { 440 // Compensate for the relocation offset, Darwin x86_64 relocations only 441 // have the addend and appear to have attempted to define it to be the 442 // actual expression addend without the PCrel bias. However, instructions 443 // with data following the relocation are not accommodated for (see comment 444 // below regarding SIGNED{1,2,4}), so it isn't exactly that either. 445 Value += 1LL << Log2Size; 446 } 447 448 if (Target.isAbsolute()) { // constant 449 // SymbolNum of 0 indicates the absolute section. 450 Type = macho::RIT_X86_64_Unsigned; 451 Index = 0; 452 453 // FIXME: I believe this is broken, I don't think the linker can 454 // understand it. I think it would require a local relocation, but I'm not 455 // sure if that would work either. The official way to get an absolute 456 // PCrel relocation is to use an absolute symbol (which we don't support 457 // yet). 458 if (IsPCRel) { 459 IsExtern = 1; 460 Type = macho::RIT_X86_64_Branch; 461 } 462 } else if (Target.getSymB()) { // A - B + constant 463 const MCSymbol *A = &Target.getSymA()->getSymbol(); 464 MCSymbolData &A_SD = Asm.getSymbolData(*A); 465 const MCSymbolData *A_Base = Asm.getAtom(&A_SD); 466 467 const MCSymbol *B = &Target.getSymB()->getSymbol(); 468 MCSymbolData &B_SD = Asm.getSymbolData(*B); 469 const MCSymbolData *B_Base = Asm.getAtom(&B_SD); 470 471 // Neither symbol can be modified. 472 if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || 473 Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) 474 report_fatal_error("unsupported relocation of modified symbol"); 475 476 // We don't support PCrel relocations of differences. Darwin 'as' doesn't 477 // implement most of these correctly. 478 if (IsPCRel) 479 report_fatal_error("unsupported pc-relative relocation of difference"); 480 481 // The support for the situation where one or both of the symbols would 482 // require a local relocation is handled just like if the symbols were 483 // external. This is certainly used in the case of debug sections where 484 // the section has only temporary symbols and thus the symbols don't have 485 // base symbols. This is encoded using the section ordinal and 486 // non-extern relocation entries. 487 488 // Darwin 'as' doesn't emit correct relocations for this (it ends up with 489 // a single SIGNED relocation); reject it for now. Except the case where 490 // both symbols don't have a base, equal but both NULL. 491 if (A_Base == B_Base && A_Base) 492 report_fatal_error("unsupported relocation with identical base"); 493 494 Value += getSymbolAddress(&A_SD, Layout) - 495 (A_Base == NULL ? 0 : getSymbolAddress(A_Base, Layout)); 496 Value -= getSymbolAddress(&B_SD, Layout) - 497 (B_Base == NULL ? 0 : getSymbolAddress(B_Base, Layout)); 498 499 if (A_Base) { 500 Index = A_Base->getIndex(); 501 IsExtern = 1; 502 } 503 else { 504 Index = A_SD.getFragment()->getParent()->getOrdinal() + 1; 505 IsExtern = 0; 506 } 507 Type = macho::RIT_X86_64_Unsigned; 508 509 macho::RelocationEntry MRE; 510 MRE.Word0 = FixupOffset; 511 MRE.Word1 = ((Index << 0) | 512 (IsPCRel << 24) | 513 (Log2Size << 25) | 514 (IsExtern << 27) | 515 (Type << 28)); 516 Relocations[Fragment->getParent()].push_back(MRE); 517 518 if (B_Base) { 519 Index = B_Base->getIndex(); 520 IsExtern = 1; 521 } 522 else { 523 Index = B_SD.getFragment()->getParent()->getOrdinal() + 1; 524 IsExtern = 0; 525 } 526 Type = macho::RIT_X86_64_Subtractor; 527 } else { 528 const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); 529 MCSymbolData &SD = Asm.getSymbolData(*Symbol); 530 const MCSymbolData *Base = Asm.getAtom(&SD); 531 532 // Relocations inside debug sections always use local relocations when 533 // possible. This seems to be done because the debugger doesn't fully 534 // understand x86_64 relocation entries, and expects to find values that 535 // have already been fixed up. 536 if (Symbol->isInSection()) { 537 const MCSectionMachO &Section = static_cast<const MCSectionMachO&>( 538 Fragment->getParent()->getSection()); 539 if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG)) 540 Base = 0; 541 } 542 543 // x86_64 almost always uses external relocations, except when there is no 544 // symbol to use as a base address (a local symbol with no preceding 545 // non-local symbol). 546 if (Base) { 547 Index = Base->getIndex(); 548 IsExtern = 1; 549 550 // Add the local offset, if needed. 551 if (Base != &SD) 552 Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base); 553 } else if (Symbol->isInSection()) { 554 // The index is the section ordinal (1-based). 555 Index = SD.getFragment()->getParent()->getOrdinal() + 1; 556 IsExtern = 0; 557 Value += getSymbolAddress(&SD, Layout); 558 559 if (IsPCRel) 560 Value -= FixupAddress + (1 << Log2Size); 561 } else if (Symbol->isVariable()) { 562 const MCExpr *Value = Symbol->getVariableValue(); 563 int64_t Res; 564 bool isAbs = Value->EvaluateAsAbsolute(Res, Layout, SectionAddress); 565 if (isAbs) { 566 FixedValue = Res; 567 return; 568 } else { 569 report_fatal_error("unsupported relocation of variable '" + 570 Symbol->getName() + "'"); 571 } 572 } else { 573 report_fatal_error("unsupported relocation of undefined symbol '" + 574 Symbol->getName() + "'"); 575 } 576 577 MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind(); 578 if (IsPCRel) { 579 if (IsRIPRel) { 580 if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { 581 // x86_64 distinguishes movq foo@GOTPCREL so that the linker can 582 // rewrite the movq to an leaq at link time if the symbol ends up in 583 // the same linkage unit. 584 if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load) 585 Type = macho::RIT_X86_64_GOTLoad; 586 else 587 Type = macho::RIT_X86_64_GOT; 588 } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { 589 Type = macho::RIT_X86_64_TLV; 590 } else if (Modifier != MCSymbolRefExpr::VK_None) { 591 report_fatal_error("unsupported symbol modifier in relocation"); 592 } else { 593 Type = macho::RIT_X86_64_Signed; 594 595 // The Darwin x86_64 relocation format has a problem where it cannot 596 // encode an address (L<foo> + <constant>) which is outside the atom 597 // containing L<foo>. Generally, this shouldn't occur but it does 598 // happen when we have a RIPrel instruction with data following the 599 // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel 600 // adjustment Darwin x86_64 uses, the offset is still negative and 601 // the linker has no way to recognize this. 602 // 603 // To work around this, Darwin uses several special relocation types 604 // to indicate the offsets. However, the specification or 605 // implementation of these seems to also be incomplete; they should 606 // adjust the addend as well based on the actual encoded instruction 607 // (the additional bias), but instead appear to just look at the 608 // final offset. 609 switch (-(Target.getConstant() + (1LL << Log2Size))) { 610 case 1: Type = macho::RIT_X86_64_Signed1; break; 611 case 2: Type = macho::RIT_X86_64_Signed2; break; 612 case 4: Type = macho::RIT_X86_64_Signed4; break; 613 } 614 } 615 } else { 616 if (Modifier != MCSymbolRefExpr::VK_None) 617 report_fatal_error("unsupported symbol modifier in branch " 618 "relocation"); 619 620 Type = macho::RIT_X86_64_Branch; 621 } 622 } else { 623 if (Modifier == MCSymbolRefExpr::VK_GOT) { 624 Type = macho::RIT_X86_64_GOT; 625 } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { 626 // GOTPCREL is allowed as a modifier on non-PCrel instructions, in 627 // which case all we do is set the PCrel bit in the relocation entry; 628 // this is used with exception handling, for example. The source is 629 // required to include any necessary offset directly. 630 Type = macho::RIT_X86_64_GOT; 631 IsPCRel = 1; 632 } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { 633 report_fatal_error("TLVP symbol modifier should have been rip-rel"); 634 } else if (Modifier != MCSymbolRefExpr::VK_None) 635 report_fatal_error("unsupported symbol modifier in relocation"); 636 else 637 Type = macho::RIT_X86_64_Unsigned; 638 } 639 } 640 641 // x86_64 always writes custom values into the fixups. 642 FixedValue = Value; 643 644 // struct relocation_info (8 bytes) 645 macho::RelocationEntry MRE; 646 MRE.Word0 = FixupOffset; 647 MRE.Word1 = ((Index << 0) | 648 (IsPCRel << 24) | 649 (Log2Size << 25) | 650 (IsExtern << 27) | 651 (Type << 28)); 652 Relocations[Fragment->getParent()].push_back(MRE); 653 } 654 655 void RecordScatteredRelocation(const MCAssembler &Asm, 656 const MCAsmLayout &Layout, 657 const MCFragment *Fragment, 658 const MCFixup &Fixup, MCValue Target, 659 unsigned Log2Size, 660 uint64_t &FixedValue) { 661 uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); 662 unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); 663 unsigned Type = macho::RIT_Vanilla; 664 665 // See <reloc.h>. 666 const MCSymbol *A = &Target.getSymA()->getSymbol(); 667 MCSymbolData *A_SD = &Asm.getSymbolData(*A); 668 669 if (!A_SD->getFragment()) 670 report_fatal_error("symbol '" + A->getName() + 671 "' can not be undefined in a subtraction expression"); 672 673 uint32_t Value = getSymbolAddress(A_SD, Layout); 674 uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent()); 675 FixedValue += SecAddr; 676 uint32_t Value2 = 0; 677 678 if (const MCSymbolRefExpr *B = Target.getSymB()) { 679 MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); 680 681 if (!B_SD->getFragment()) 682 report_fatal_error("symbol '" + B->getSymbol().getName() + 683 "' can not be undefined in a subtraction expression"); 684 685 // Select the appropriate difference relocation type. 686 // 687 // Note that there is no longer any semantic difference between these two 688 // relocation types from the linkers point of view, this is done solely 689 // for pedantic compatibility with 'as'. 690 Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference : 691 (unsigned)macho::RIT_Generic_LocalDifference; 692 Value2 = getSymbolAddress(B_SD, Layout); 693 FixedValue -= getSectionAddress(B_SD->getFragment()->getParent()); 694 } 695 696 // Relocations are written out in reverse order, so the PAIR comes first. 697 if (Type == macho::RIT_Difference || 698 Type == macho::RIT_Generic_LocalDifference) { 699 macho::RelocationEntry MRE; 700 MRE.Word0 = ((0 << 0) | 701 (macho::RIT_Pair << 24) | 702 (Log2Size << 28) | 703 (IsPCRel << 30) | 704 macho::RF_Scattered); 705 MRE.Word1 = Value2; 706 Relocations[Fragment->getParent()].push_back(MRE); 707 } 708 709 macho::RelocationEntry MRE; 710 MRE.Word0 = ((FixupOffset << 0) | 711 (Type << 24) | 712 (Log2Size << 28) | 713 (IsPCRel << 30) | 714 macho::RF_Scattered); 715 MRE.Word1 = Value; 716 Relocations[Fragment->getParent()].push_back(MRE); 717 } 718 719 void RecordARMScatteredRelocation(const MCAssembler &Asm, 720 const MCAsmLayout &Layout, 721 const MCFragment *Fragment, 722 const MCFixup &Fixup, MCValue Target, 723 unsigned Log2Size, 724 uint64_t &FixedValue) { 725 uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); 726 unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); 727 unsigned Type = macho::RIT_Vanilla; 728 729 // See <reloc.h>. 730 const MCSymbol *A = &Target.getSymA()->getSymbol(); 731 MCSymbolData *A_SD = &Asm.getSymbolData(*A); 732 733 if (!A_SD->getFragment()) 734 report_fatal_error("symbol '" + A->getName() + 735 "' can not be undefined in a subtraction expression"); 736 737 uint32_t Value = getSymbolAddress(A_SD, Layout); 738 uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent()); 739 FixedValue += SecAddr; 740 uint32_t Value2 = 0; 741 742 if (const MCSymbolRefExpr *B = Target.getSymB()) { 743 MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); 744 745 if (!B_SD->getFragment()) 746 report_fatal_error("symbol '" + B->getSymbol().getName() + 747 "' can not be undefined in a subtraction expression"); 748 749 // Select the appropriate difference relocation type. 750 Type = macho::RIT_Difference; 751 Value2 = getSymbolAddress(B_SD, Layout); 752 FixedValue -= getSectionAddress(B_SD->getFragment()->getParent()); 753 } 754 755 // Relocations are written out in reverse order, so the PAIR comes first. 756 if (Type == macho::RIT_Difference || 757 Type == macho::RIT_Generic_LocalDifference) { 758 macho::RelocationEntry MRE; 759 MRE.Word0 = ((0 << 0) | 760 (macho::RIT_Pair << 24) | 761 (Log2Size << 28) | 762 (IsPCRel << 30) | 763 macho::RF_Scattered); 764 MRE.Word1 = Value2; 765 Relocations[Fragment->getParent()].push_back(MRE); 766 } 767 768 macho::RelocationEntry MRE; 769 MRE.Word0 = ((FixupOffset << 0) | 770 (Type << 24) | 771 (Log2Size << 28) | 772 (IsPCRel << 30) | 773 macho::RF_Scattered); 774 MRE.Word1 = Value; 775 Relocations[Fragment->getParent()].push_back(MRE); 776 } 777 778 void RecordARMMovwMovtRelocation(const MCAssembler &Asm, 779 const MCAsmLayout &Layout, 780 const MCFragment *Fragment, 781 const MCFixup &Fixup, MCValue Target, 782 uint64_t &FixedValue) { 783 uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); 784 unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); 785 unsigned Type = macho::RIT_ARM_Half; 786 787 // See <reloc.h>. 788 const MCSymbol *A = &Target.getSymA()->getSymbol(); 789 MCSymbolData *A_SD = &Asm.getSymbolData(*A); 790 791 if (!A_SD->getFragment()) 792 report_fatal_error("symbol '" + A->getName() + 793 "' can not be undefined in a subtraction expression"); 794 795 uint32_t Value = getSymbolAddress(A_SD, Layout); 796 uint32_t Value2 = 0; 797 uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent()); 798 FixedValue += SecAddr; 799 800 if (const MCSymbolRefExpr *B = Target.getSymB()) { 801 MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); 802 803 if (!B_SD->getFragment()) 804 report_fatal_error("symbol '" + B->getSymbol().getName() + 805 "' can not be undefined in a subtraction expression"); 806 807 // Select the appropriate difference relocation type. 808 Type = macho::RIT_ARM_HalfDifference; 809 Value2 = getSymbolAddress(B_SD, Layout); 810 FixedValue -= getSectionAddress(B_SD->getFragment()->getParent()); 811 } 812 813 // Relocations are written out in reverse order, so the PAIR comes first. 814 // ARM_RELOC_HALF and ARM_RELOC_HALF_SECTDIFF abuse the r_length field: 815 // 816 // For these two r_type relocations they always have a pair following them 817 // and the r_length bits are used differently. The encoding of the 818 // r_length is as follows: 819 // low bit of r_length: 820 // 0 - :lower16: for movw instructions 821 // 1 - :upper16: for movt instructions 822 // high bit of r_length: 823 // 0 - arm instructions 824 // 1 - thumb instructions 825 // the other half of the relocated expression is in the following pair 826 // relocation entry in the the low 16 bits of r_address field. 827 unsigned ThumbBit = 0; 828 unsigned MovtBit = 0; 829 switch ((unsigned)Fixup.getKind()) { 830 default: break; 831 case ARM::fixup_arm_movt_hi16: 832 case ARM::fixup_arm_movt_hi16_pcrel: 833 MovtBit = 1; 834 break; 835 case ARM::fixup_t2_movt_hi16: 836 case ARM::fixup_t2_movt_hi16_pcrel: 837 MovtBit = 1; 838 // Fallthrough 839 case ARM::fixup_t2_movw_lo16: 840 case ARM::fixup_t2_movw_lo16_pcrel: 841 ThumbBit = 1; 842 break; 843 } 844 845 846 if (Type == macho::RIT_ARM_HalfDifference) { 847 uint32_t OtherHalf = MovtBit 848 ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16); 849 850 macho::RelocationEntry MRE; 851 MRE.Word0 = ((OtherHalf << 0) | 852 (macho::RIT_Pair << 24) | 853 (MovtBit << 28) | 854 (ThumbBit << 29) | 855 (IsPCRel << 30) | 856 macho::RF_Scattered); 857 MRE.Word1 = Value2; 858 Relocations[Fragment->getParent()].push_back(MRE); 859 } 860 861 macho::RelocationEntry MRE; 862 MRE.Word0 = ((FixupOffset << 0) | 863 (Type << 24) | 864 (MovtBit << 28) | 865 (ThumbBit << 29) | 866 (IsPCRel << 30) | 867 macho::RF_Scattered); 868 MRE.Word1 = Value; 869 Relocations[Fragment->getParent()].push_back(MRE); 870 } 871 872 void RecordTLVPRelocation(const MCAssembler &Asm, 873 const MCAsmLayout &Layout, 874 const MCFragment *Fragment, 875 const MCFixup &Fixup, MCValue Target, 876 uint64_t &FixedValue) { 877 assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP && 878 !is64Bit() && 879 "Should only be called with a 32-bit TLVP relocation!"); 880 881 unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); 882 uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); 883 unsigned IsPCRel = 0; 884 885 // Get the symbol data. 886 MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol()); 887 unsigned Index = SD_A->getIndex(); 888 889 // We're only going to have a second symbol in pic mode and it'll be a 890 // subtraction from the picbase. For 32-bit pic the addend is the difference 891 // between the picbase and the next address. For 32-bit static the addend 892 // is zero. 893 if (Target.getSymB()) { 894 // If this is a subtraction then we're pcrel. 895 uint32_t FixupAddress = 896 getFragmentAddress(Fragment, Layout) + Fixup.getOffset(); 897 MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol()); 898 IsPCRel = 1; 899 FixedValue = (FixupAddress - getSymbolAddress(SD_B, Layout) + 900 Target.getConstant()); 901 FixedValue += 1ULL << Log2Size; 902 } else { 903 FixedValue = 0; 904 } 905 906 // struct relocation_info (8 bytes) 907 macho::RelocationEntry MRE; 908 MRE.Word0 = Value; 909 MRE.Word1 = ((Index << 0) | 910 (IsPCRel << 24) | 911 (Log2Size << 25) | 912 (1 << 27) | // Extern 913 (macho::RIT_Generic_TLV << 28)); // Type 914 Relocations[Fragment->getParent()].push_back(MRE); 915 } 916 917 static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, 918 unsigned &Log2Size) { 919 RelocType = unsigned(macho::RIT_Vanilla); 920 Log2Size = ~0U; 921 922 switch (Kind) { 923 default: 924 return false; 925 926 case FK_Data_1: 927 Log2Size = llvm::Log2_32(1); 928 return true; 929 case FK_Data_2: 930 Log2Size = llvm::Log2_32(2); 931 return true; 932 case FK_Data_4: 933 Log2Size = llvm::Log2_32(4); 934 return true; 935 case FK_Data_8: 936 Log2Size = llvm::Log2_32(8); 937 return true; 938 939 // Handle 24-bit branch kinds. 940 case ARM::fixup_arm_ldst_pcrel_12: 941 case ARM::fixup_arm_pcrel_10: 942 case ARM::fixup_arm_adr_pcrel_12: 943 case ARM::fixup_arm_condbranch: 944 case ARM::fixup_arm_uncondbranch: 945 RelocType = unsigned(macho::RIT_ARM_Branch24Bit); 946 // Report as 'long', even though that is not quite accurate. 947 Log2Size = llvm::Log2_32(4); 948 return true; 949 950 // Handle Thumb branches. 951 case ARM::fixup_arm_thumb_br: 952 RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit); 953 Log2Size = llvm::Log2_32(2); 954 return true; 955 956 case ARM::fixup_arm_thumb_bl: 957 case ARM::fixup_arm_thumb_blx: 958 RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit); 959 Log2Size = llvm::Log2_32(4); 960 return true; 961 962 case ARM::fixup_arm_movt_hi16: 963 case ARM::fixup_arm_movt_hi16_pcrel: 964 case ARM::fixup_t2_movt_hi16: 965 case ARM::fixup_t2_movt_hi16_pcrel: 966 RelocType = unsigned(macho::RIT_ARM_HalfDifference); 967 // Report as 'long', even though that is not quite accurate. 968 Log2Size = llvm::Log2_32(4); 969 return true; 970 971 case ARM::fixup_arm_movw_lo16: 972 case ARM::fixup_arm_movw_lo16_pcrel: 973 case ARM::fixup_t2_movw_lo16: 974 case ARM::fixup_t2_movw_lo16_pcrel: 975 RelocType = unsigned(macho::RIT_ARM_Half); 976 // Report as 'long', even though that is not quite accurate. 977 Log2Size = llvm::Log2_32(4); 978 return true; 979 } 980 } 981 void RecordARMRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, 982 const MCFragment *Fragment, const MCFixup &Fixup, 983 MCValue Target, uint64_t &FixedValue) { 984 unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); 985 unsigned Log2Size; 986 unsigned RelocType = macho::RIT_Vanilla; 987 if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) { 988 report_fatal_error("unknown ARM fixup kind!"); 989 return; 990 } 991 992 // If this is a difference or a defined symbol plus an offset, then we need 993 // a scattered relocation entry. Differences always require scattered 994 // relocations. 995 if (Target.getSymB()) { 996 if (RelocType == macho::RIT_ARM_Half || 997 RelocType == macho::RIT_ARM_HalfDifference) 998 return RecordARMMovwMovtRelocation(Asm, Layout, Fragment, Fixup, 999 Target, FixedValue); 1000 return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup, 1001 Target, Log2Size, FixedValue); 1002 } 1003 1004 // Get the symbol data, if any. 1005 MCSymbolData *SD = 0; 1006 if (Target.getSymA()) 1007 SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); 1008 1009 // FIXME: For other platforms, we need to use scattered relocations for 1010 // internal relocations with offsets. If this is an internal relocation 1011 // with an offset, it also needs a scattered relocation entry. 1012 // 1013 // Is this right for ARM? 1014 uint32_t Offset = Target.getConstant(); 1015 if (IsPCRel && RelocType == macho::RIT_Vanilla) 1016 Offset += 1 << Log2Size; 1017 if (Offset && SD && !doesSymbolRequireExternRelocation(SD)) 1018 return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup, Target, 1019 Log2Size, FixedValue); 1020 1021 // See <reloc.h>. 1022 uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); 1023 unsigned Index = 0; 1024 unsigned IsExtern = 0; 1025 unsigned Type = 0; 1026 1027 if (Target.isAbsolute()) { // constant 1028 // FIXME! 1029 report_fatal_error("FIXME: relocations to absolute targets " 1030 "not yet implemented"); 1031 } else if (SD->getSymbol().isVariable()) { 1032 int64_t Res; 1033 if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute( 1034 Res, Layout, SectionAddress)) { 1035 FixedValue = Res; 1036 return; 1037 } 1038 1039 report_fatal_error("unsupported relocation of variable '" + 1040 SD->getSymbol().getName() + "'"); 1041 } else { 1042 // Check whether we need an external or internal relocation. 1043 if (doesSymbolRequireExternRelocation(SD)) { 1044 IsExtern = 1; 1045 Index = SD->getIndex(); 1046 // For external relocations, make sure to offset the fixup value to 1047 // compensate for the addend of the symbol address, if it was 1048 // undefined. This occurs with weak definitions, for example. 1049 if (!SD->Symbol->isUndefined()) 1050 FixedValue -= Layout.getSymbolOffset(SD); 1051 } else { 1052 // The index is the section ordinal (1-based). 1053 Index = SD->getFragment()->getParent()->getOrdinal() + 1; 1054 FixedValue += getSectionAddress(SD->getFragment()->getParent()); 1055 } 1056 if (IsPCRel) 1057 FixedValue -= getSectionAddress(Fragment->getParent()); 1058 1059 // The type is determined by the fixup kind. 1060 Type = RelocType; 1061 } 1062 1063 // struct relocation_info (8 bytes) 1064 macho::RelocationEntry MRE; 1065 MRE.Word0 = FixupOffset; 1066 MRE.Word1 = ((Index << 0) | 1067 (IsPCRel << 24) | 1068 (Log2Size << 25) | 1069 (IsExtern << 27) | 1070 (Type << 28)); 1071 Relocations[Fragment->getParent()].push_back(MRE); 1072 } 1073 1074 void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, 1075 const MCFragment *Fragment, const MCFixup &Fixup, 1076 MCValue Target, uint64_t &FixedValue) { 1077 // FIXME: These needs to be factored into the target Mach-O writer. 1078 if (isARM()) { 1079 RecordARMRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); 1080 return; 1081 } 1082 if (is64Bit()) { 1083 RecordX86_64Relocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); 1084 return; 1085 } 1086 1087 unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); 1088 unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); 1089 1090 // If this is a 32-bit TLVP reloc it's handled a bit differently. 1091 if (Target.getSymA() && 1092 Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) { 1093 RecordTLVPRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); 1094 return; 1095 } 1096 1097 // If this is a difference or a defined symbol plus an offset, then we need 1098 // a scattered relocation entry. 1099 // Differences always require scattered relocations. 1100 if (Target.getSymB()) 1101 return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup, 1102 Target, Log2Size, FixedValue); 1103 1104 // Get the symbol data, if any. 1105 MCSymbolData *SD = 0; 1106 if (Target.getSymA()) 1107 SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); 1108 1109 // If this is an internal relocation with an offset, it also needs a 1110 // scattered relocation entry. 1111 uint32_t Offset = Target.getConstant(); 1112 if (IsPCRel) 1113 Offset += 1 << Log2Size; 1114 if (Offset && SD && !doesSymbolRequireExternRelocation(SD)) 1115 return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup, 1116 Target, Log2Size, FixedValue); 1117 1118 // See <reloc.h>. 1119 uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); 1120 unsigned Index = 0; 1121 unsigned IsExtern = 0; 1122 unsigned Type = 0; 1123 1124 if (Target.isAbsolute()) { // constant 1125 // SymbolNum of 0 indicates the absolute section. 1126 // 1127 // FIXME: Currently, these are never generated (see code below). I cannot 1128 // find a case where they are actually emitted. 1129 Type = macho::RIT_Vanilla; 1130 } else if (SD->getSymbol().isVariable()) { 1131 int64_t Res; 1132 if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute( 1133 Res, Layout, SectionAddress)) { 1134 FixedValue = Res; 1135 return; 1136 } 1137 1138 report_fatal_error("unsupported relocation of variable '" + 1139 SD->getSymbol().getName() + "'"); 1140 } else { 1141 // Check whether we need an external or internal relocation. 1142 if (doesSymbolRequireExternRelocation(SD)) { 1143 IsExtern = 1; 1144 Index = SD->getIndex(); 1145 // For external relocations, make sure to offset the fixup value to 1146 // compensate for the addend of the symbol address, if it was 1147 // undefined. This occurs with weak definitions, for example. 1148 if (!SD->Symbol->isUndefined()) 1149 FixedValue -= Layout.getSymbolOffset(SD); 1150 } else { 1151 // The index is the section ordinal (1-based). 1152 Index = SD->getFragment()->getParent()->getOrdinal() + 1; 1153 FixedValue += getSectionAddress(SD->getFragment()->getParent()); 1154 } 1155 if (IsPCRel) 1156 FixedValue -= getSectionAddress(Fragment->getParent()); 1157 1158 Type = macho::RIT_Vanilla; 1159 } 1160 1161 // struct relocation_info (8 bytes) 1162 macho::RelocationEntry MRE; 1163 MRE.Word0 = FixupOffset; 1164 MRE.Word1 = ((Index << 0) | 1165 (IsPCRel << 24) | 1166 (Log2Size << 25) | 1167 (IsExtern << 27) | 1168 (Type << 28)); 1169 Relocations[Fragment->getParent()].push_back(MRE); 1170 } 1171 1172 void BindIndirectSymbols(MCAssembler &Asm) { 1173 // This is the point where 'as' creates actual symbols for indirect symbols 1174 // (in the following two passes). It would be easier for us to do this 1175 // sooner when we see the attribute, but that makes getting the order in the 1176 // symbol table much more complicated than it is worth. 1177 // 1178 // FIXME: Revisit this when the dust settles. 1179 1180 // Bind non lazy symbol pointers first. 1181 unsigned IndirectIndex = 0; 1182 for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), 1183 ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { 1184 const MCSectionMachO &Section = 1185 cast<MCSectionMachO>(it->SectionData->getSection()); 1186 1187 if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) 1188 continue; 1189 1190 // Initialize the section indirect symbol base, if necessary. 1191 if (!IndirectSymBase.count(it->SectionData)) 1192 IndirectSymBase[it->SectionData] = IndirectIndex; 1193 1194 Asm.getOrCreateSymbolData(*it->Symbol); 1195 } 1196 1197 // Then lazy symbol pointers and symbol stubs. 1198 IndirectIndex = 0; 1199 for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), 1200 ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { 1201 const MCSectionMachO &Section = 1202 cast<MCSectionMachO>(it->SectionData->getSection()); 1203 1204 if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS && 1205 Section.getType() != MCSectionMachO::S_SYMBOL_STUBS) 1206 continue; 1207 1208 // Initialize the section indirect symbol base, if necessary. 1209 if (!IndirectSymBase.count(it->SectionData)) 1210 IndirectSymBase[it->SectionData] = IndirectIndex; 1211 1212 // Set the symbol type to undefined lazy, but only on construction. 1213 // 1214 // FIXME: Do not hardcode. 1215 bool Created; 1216 MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created); 1217 if (Created) 1218 Entry.setFlags(Entry.getFlags() | 0x0001); 1219 } 1220 } 1221 1222 /// ComputeSymbolTable - Compute the symbol table data 1223 /// 1224 /// \param StringTable [out] - The string table data. 1225 /// \param StringIndexMap [out] - Map from symbol names to offsets in the 1226 /// string table. 1227 void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable, 1228 std::vector<MachSymbolData> &LocalSymbolData, 1229 std::vector<MachSymbolData> &ExternalSymbolData, 1230 std::vector<MachSymbolData> &UndefinedSymbolData) { 1231 // Build section lookup table. 1232 DenseMap<const MCSection*, uint8_t> SectionIndexMap; 1233 unsigned Index = 1; 1234 for (MCAssembler::iterator it = Asm.begin(), 1235 ie = Asm.end(); it != ie; ++it, ++Index) 1236 SectionIndexMap[&it->getSection()] = Index; 1237 assert(Index <= 256 && "Too many sections!"); 1238 1239 // Index 0 is always the empty string. 1240 StringMap<uint64_t> StringIndexMap; 1241 StringTable += '\x00'; 1242 1243 // Build the symbol arrays and the string table, but only for non-local 1244 // symbols. 1245 // 1246 // The particular order that we collect the symbols and create the string 1247 // table, then sort the symbols is chosen to match 'as'. Even though it 1248 // doesn't matter for correctness, this is important for letting us diff .o 1249 // files. 1250 for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), 1251 ie = Asm.symbol_end(); it != ie; ++it) { 1252 const MCSymbol &Symbol = it->getSymbol(); 1253 1254 // Ignore non-linker visible symbols. 1255 if (!Asm.isSymbolLinkerVisible(it->getSymbol())) 1256 continue; 1257 1258 if (!it->isExternal() && !Symbol.isUndefined()) 1259 continue; 1260 1261 uint64_t &Entry = StringIndexMap[Symbol.getName()]; 1262 if (!Entry) { 1263 Entry = StringTable.size(); 1264 StringTable += Symbol.getName(); 1265 StringTable += '\x00'; 1266 } 1267 1268 MachSymbolData MSD; 1269 MSD.SymbolData = it; 1270 MSD.StringIndex = Entry; 1271 1272 if (Symbol.isUndefined()) { 1273 MSD.SectionIndex = 0; 1274 UndefinedSymbolData.push_back(MSD); 1275 } else if (Symbol.isAbsolute()) { 1276 MSD.SectionIndex = 0; 1277 ExternalSymbolData.push_back(MSD); 1278 } else { 1279 MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); 1280 assert(MSD.SectionIndex && "Invalid section index!"); 1281 ExternalSymbolData.push_back(MSD); 1282 } 1283 } 1284 1285 // Now add the data for local symbols. 1286 for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), 1287 ie = Asm.symbol_end(); it != ie; ++it) { 1288 const MCSymbol &Symbol = it->getSymbol(); 1289 1290 // Ignore non-linker visible symbols. 1291 if (!Asm.isSymbolLinkerVisible(it->getSymbol())) 1292 continue; 1293 1294 if (it->isExternal() || Symbol.isUndefined()) 1295 continue; 1296 1297 uint64_t &Entry = StringIndexMap[Symbol.getName()]; 1298 if (!Entry) { 1299 Entry = StringTable.size(); 1300 StringTable += Symbol.getName(); 1301 StringTable += '\x00'; 1302 } 1303 1304 MachSymbolData MSD; 1305 MSD.SymbolData = it; 1306 MSD.StringIndex = Entry; 1307 1308 if (Symbol.isAbsolute()) { 1309 MSD.SectionIndex = 0; 1310 LocalSymbolData.push_back(MSD); 1311 } else { 1312 MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); 1313 assert(MSD.SectionIndex && "Invalid section index!"); 1314 LocalSymbolData.push_back(MSD); 1315 } 1316 } 1317 1318 // External and undefined symbols are required to be in lexicographic order. 1319 std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); 1320 std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end()); 1321 1322 // Set the symbol indices. 1323 Index = 0; 1324 for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) 1325 LocalSymbolData[i].SymbolData->setIndex(Index++); 1326 for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) 1327 ExternalSymbolData[i].SymbolData->setIndex(Index++); 1328 for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) 1329 UndefinedSymbolData[i].SymbolData->setIndex(Index++); 1330 1331 // The string table is padded to a multiple of 4. 1332 while (StringTable.size() % 4) 1333 StringTable += '\x00'; 1334 } 1335 1336 void computeSectionAddresses(const MCAssembler &Asm, 1337 const MCAsmLayout &Layout) { 1338 uint64_t StartAddress = 0; 1339 const SmallVectorImpl<MCSectionData*> &Order = Layout.getSectionOrder(); 1340 for (int i = 0, n = Order.size(); i != n ; ++i) { 1341 const MCSectionData *SD = Order[i]; 1342 StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment()); 1343 SectionAddress[SD] = StartAddress; 1344 StartAddress += Layout.getSectionAddressSize(SD); 1345 // Explicitly pad the section to match the alignment requirements of the 1346 // following one. This is for 'gas' compatibility, it shouldn't 1347 /// strictly be necessary. 1348 StartAddress += getPaddingSize(SD, Layout); 1349 } 1350 } 1351 1352 void ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) { 1353 computeSectionAddresses(Asm, Layout); 1354 1355 // Create symbol data for any indirect symbols. 1356 BindIndirectSymbols(Asm); 1357 1358 // Compute symbol table information and bind symbol indices. 1359 ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData, 1360 UndefinedSymbolData); 1361 } 1362 1363 virtual bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, 1364 const MCSymbolData &DataA, 1365 const MCFragment &FB, 1366 bool InSet, 1367 bool IsPCRel) const { 1368 if (InSet) 1369 return true; 1370 1371 // The effective address is 1372 // addr(atom(A)) + offset(A) 1373 // - addr(atom(B)) - offset(B) 1374 // and the offsets are not relocatable, so the fixup is fully resolved when 1375 // addr(atom(A)) - addr(atom(B)) == 0. 1376 const MCSymbolData *A_Base = 0, *B_Base = 0; 1377 1378 const MCSymbol &SA = DataA.getSymbol().AliasedSymbol(); 1379 const MCSection &SecA = SA.getSection(); 1380 const MCSection &SecB = FB.getParent()->getSection(); 1381 1382 if (IsPCRel) { 1383 // The simple (Darwin, except on x86_64) way of dealing with this was to 1384 // assume that any reference to a temporary symbol *must* be a temporary 1385 // symbol in the same atom, unless the sections differ. Therefore, any 1386 // PCrel relocation to a temporary symbol (in the same section) is fully 1387 // resolved. This also works in conjunction with absolutized .set, which 1388 // requires the compiler to use .set to absolutize the differences between 1389 // symbols which the compiler knows to be assembly time constants, so we 1390 // don't need to worry about considering symbol differences fully 1391 // resolved. 1392 1393 if (!Asm.getBackend().hasReliableSymbolDifference()) { 1394 if (!SA.isTemporary() || !SA.isInSection() || &SecA != &SecB) 1395 return false; 1396 return true; 1397 } 1398 } else { 1399 if (!TargetObjectWriter->useAggressiveSymbolFolding()) 1400 return false; 1401 } 1402 1403 const MCFragment &FA = *Asm.getSymbolData(SA).getFragment(); 1404 1405 A_Base = FA.getAtom(); 1406 if (!A_Base) 1407 return false; 1408 1409 B_Base = FB.getAtom(); 1410 if (!B_Base) 1411 return false; 1412 1413 // If the atoms are the same, they are guaranteed to have the same address. 1414 if (A_Base == B_Base) 1415 return true; 1416 1417 // Otherwise, we can't prove this is fully resolved. 1418 return false; 1419 } 1420 1421 void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) { 1422 unsigned NumSections = Asm.size(); 1423 1424 // The section data starts after the header, the segment load command (and 1425 // section headers) and the symbol table. 1426 unsigned NumLoadCommands = 1; 1427 uint64_t LoadCommandsSize = is64Bit() ? 1428 macho::SegmentLoadCommand64Size + NumSections * macho::Section64Size : 1429 macho::SegmentLoadCommand32Size + NumSections * macho::Section32Size; 1430 1431 // Add the symbol table load command sizes, if used. 1432 unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() + 1433 UndefinedSymbolData.size(); 1434 if (NumSymbols) { 1435 NumLoadCommands += 2; 1436 LoadCommandsSize += (macho::SymtabLoadCommandSize + 1437 macho::DysymtabLoadCommandSize); 1438 } 1439 1440 // Compute the total size of the section data, as well as its file size and 1441 // vm size. 1442 uint64_t SectionDataStart = (is64Bit() ? macho::Header64Size : 1443 macho::Header32Size) + LoadCommandsSize; 1444 uint64_t SectionDataSize = 0; 1445 uint64_t SectionDataFileSize = 0; 1446 uint64_t VMSize = 0; 1447 for (MCAssembler::const_iterator it = Asm.begin(), 1448 ie = Asm.end(); it != ie; ++it) { 1449 const MCSectionData &SD = *it; 1450 uint64_t Address = getSectionAddress(&SD); 1451 uint64_t Size = Layout.getSectionAddressSize(&SD); 1452 uint64_t FileSize = Layout.getSectionFileSize(&SD); 1453 FileSize += getPaddingSize(&SD, Layout); 1454 1455 VMSize = std::max(VMSize, Address + Size); 1456 1457 if (SD.getSection().isVirtualSection()) 1458 continue; 1459 1460 SectionDataSize = std::max(SectionDataSize, Address + Size); 1461 SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize); 1462 } 1463 1464 // The section data is padded to 4 bytes. 1465 // 1466 // FIXME: Is this machine dependent? 1467 unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4); 1468 SectionDataFileSize += SectionDataPadding; 1469 1470 // Write the prolog, starting with the header and load command... 1471 WriteHeader(NumLoadCommands, LoadCommandsSize, 1472 Asm.getSubsectionsViaSymbols()); 1473 WriteSegmentLoadCommand(NumSections, VMSize, 1474 SectionDataStart, SectionDataSize); 1475 1476 // ... and then the section headers. 1477 uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; 1478 for (MCAssembler::const_iterator it = Asm.begin(), 1479 ie = Asm.end(); it != ie; ++it) { 1480 std::vector<macho::RelocationEntry> &Relocs = Relocations[it]; 1481 unsigned NumRelocs = Relocs.size(); 1482 uint64_t SectionStart = SectionDataStart + getSectionAddress(it); 1483 WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs); 1484 RelocTableEnd += NumRelocs * macho::RelocationInfoSize; 1485 } 1486 1487 // Write the symbol table load command, if used. 1488 if (NumSymbols) { 1489 unsigned FirstLocalSymbol = 0; 1490 unsigned NumLocalSymbols = LocalSymbolData.size(); 1491 unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols; 1492 unsigned NumExternalSymbols = ExternalSymbolData.size(); 1493 unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols; 1494 unsigned NumUndefinedSymbols = UndefinedSymbolData.size(); 1495 unsigned NumIndirectSymbols = Asm.indirect_symbol_size(); 1496 unsigned NumSymTabSymbols = 1497 NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols; 1498 uint64_t IndirectSymbolSize = NumIndirectSymbols * 4; 1499 uint64_t IndirectSymbolOffset = 0; 1500 1501 // If used, the indirect symbols are written after the section data. 1502 if (NumIndirectSymbols) 1503 IndirectSymbolOffset = RelocTableEnd; 1504 1505 // The symbol table is written after the indirect symbol data. 1506 uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize; 1507 1508 // The string table is written after symbol table. 1509 uint64_t StringTableOffset = 1510 SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? macho::Nlist64Size : 1511 macho::Nlist32Size); 1512 WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols, 1513 StringTableOffset, StringTable.size()); 1514 1515 WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols, 1516 FirstExternalSymbol, NumExternalSymbols, 1517 FirstUndefinedSymbol, NumUndefinedSymbols, 1518 IndirectSymbolOffset, NumIndirectSymbols); 1519 } 1520 1521 // Write the actual section data. 1522 for (MCAssembler::const_iterator it = Asm.begin(), 1523 ie = Asm.end(); it != ie; ++it) { 1524 Asm.WriteSectionData(it, Layout); 1525 1526 uint64_t Pad = getPaddingSize(it, Layout); 1527 for (unsigned int i = 0; i < Pad; ++i) 1528 Write8(0); 1529 } 1530 1531 // Write the extra padding. 1532 WriteZeros(SectionDataPadding); 1533 1534 // Write the relocation entries. 1535 for (MCAssembler::const_iterator it = Asm.begin(), 1536 ie = Asm.end(); it != ie; ++it) { 1537 // Write the section relocation entries, in reverse order to match 'as' 1538 // (approximately, the exact algorithm is more complicated than this). 1539 std::vector<macho::RelocationEntry> &Relocs = Relocations[it]; 1540 for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { 1541 Write32(Relocs[e - i - 1].Word0); 1542 Write32(Relocs[e - i - 1].Word1); 1543 } 1544 } 1545 1546 // Write the symbol table data, if used. 1547 if (NumSymbols) { 1548 // Write the indirect symbol entries. 1549 for (MCAssembler::const_indirect_symbol_iterator 1550 it = Asm.indirect_symbol_begin(), 1551 ie = Asm.indirect_symbol_end(); it != ie; ++it) { 1552 // Indirect symbols in the non lazy symbol pointer section have some 1553 // special handling. 1554 const MCSectionMachO &Section = 1555 static_cast<const MCSectionMachO&>(it->SectionData->getSection()); 1556 if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) { 1557 // If this symbol is defined and internal, mark it as such. 1558 if (it->Symbol->isDefined() && 1559 !Asm.getSymbolData(*it->Symbol).isExternal()) { 1560 uint32_t Flags = macho::ISF_Local; 1561 if (it->Symbol->isAbsolute()) 1562 Flags |= macho::ISF_Absolute; 1563 Write32(Flags); 1564 continue; 1565 } 1566 } 1567 1568 Write32(Asm.getSymbolData(*it->Symbol).getIndex()); 1569 } 1570 1571 // FIXME: Check that offsets match computed ones. 1572 1573 // Write the symbol table entries. 1574 for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) 1575 WriteNlist(LocalSymbolData[i], Layout); 1576 for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) 1577 WriteNlist(ExternalSymbolData[i], Layout); 1578 for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) 1579 WriteNlist(UndefinedSymbolData[i], Layout); 1580 1581 // Write the string table. 1582 OS << StringTable.str(); 1583 } 1584 } 1585 }; 1586 1587 } 1588 1589 MCObjectWriter *llvm::createMachObjectWriter(MCMachObjectTargetWriter *MOTW, 1590 raw_ostream &OS, 1591 bool IsLittleEndian) { 1592 return new MachObjectWriter(MOTW, OS, IsLittleEndian); 1593 } 1594