1 //===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "llvm/MC/MachObjectWriter.h" 11 #include "llvm/ADT/StringMap.h" 12 #include "llvm/ADT/Twine.h" 13 #include "llvm/MC/MCAssembler.h" 14 #include "llvm/MC/MCAsmLayout.h" 15 #include "llvm/MC/MCExpr.h" 16 #include "llvm/MC/MCObjectWriter.h" 17 #include "llvm/MC/MCSectionMachO.h" 18 #include "llvm/MC/MCSymbol.h" 19 #include "llvm/MC/MCValue.h" 20 #include "llvm/Support/ErrorHandling.h" 21 #include "llvm/Support/MachO.h" 22 #include "llvm/Target/TargetAsmBackend.h" 23 24 // FIXME: Gross. 25 #include "../Target/X86/X86FixupKinds.h" 26 27 #include <vector> 28 using namespace llvm; 29 30 static unsigned getFixupKindLog2Size(unsigned Kind) { 31 switch (Kind) { 32 default: llvm_unreachable("invalid fixup kind!"); 33 case X86::reloc_pcrel_1byte: 34 case FK_Data_1: return 0; 35 case FK_Data_2: return 1; 36 case X86::reloc_pcrel_4byte: 37 case X86::reloc_riprel_4byte: 38 case X86::reloc_riprel_4byte_movq_load: 39 case FK_Data_4: return 2; 40 case FK_Data_8: return 3; 41 } 42 } 43 44 static bool isFixupKindPCRel(unsigned Kind) { 45 switch (Kind) { 46 default: 47 return false; 48 case X86::reloc_pcrel_1byte: 49 case X86::reloc_pcrel_4byte: 50 case X86::reloc_riprel_4byte: 51 case X86::reloc_riprel_4byte_movq_load: 52 return true; 53 } 54 } 55 56 static bool isFixupKindRIPRel(unsigned Kind) { 57 return Kind == X86::reloc_riprel_4byte || 58 Kind == X86::reloc_riprel_4byte_movq_load; 59 } 60 61 namespace { 62 63 class MachObjectWriterImpl { 64 // See <mach-o/loader.h>. 65 enum { 66 Header_Magic32 = 0xFEEDFACE, 67 Header_Magic64 = 0xFEEDFACF 68 }; 69 70 enum { 71 Header32Size = 28, 72 Header64Size = 32, 73 SegmentLoadCommand32Size = 56, 74 SegmentLoadCommand64Size = 72, 75 Section32Size = 68, 76 Section64Size = 80, 77 SymtabLoadCommandSize = 24, 78 DysymtabLoadCommandSize = 80, 79 Nlist32Size = 12, 80 Nlist64Size = 16, 81 RelocationInfoSize = 8 82 }; 83 84 enum HeaderFileType { 85 HFT_Object = 0x1 86 }; 87 88 enum HeaderFlags { 89 HF_SubsectionsViaSymbols = 0x2000 90 }; 91 92 enum LoadCommandType { 93 LCT_Segment = 0x1, 94 LCT_Symtab = 0x2, 95 LCT_Dysymtab = 0xb, 96 LCT_Segment64 = 0x19 97 }; 98 99 // See <mach-o/nlist.h>. 100 enum SymbolTypeType { 101 STT_Undefined = 0x00, 102 STT_Absolute = 0x02, 103 STT_Section = 0x0e 104 }; 105 106 enum SymbolTypeFlags { 107 // If any of these bits are set, then the entry is a stab entry number (see 108 // <mach-o/stab.h>. Otherwise the other masks apply. 109 STF_StabsEntryMask = 0xe0, 110 111 STF_TypeMask = 0x0e, 112 STF_External = 0x01, 113 STF_PrivateExtern = 0x10 114 }; 115 116 /// IndirectSymbolFlags - Flags for encoding special values in the indirect 117 /// symbol entry. 118 enum IndirectSymbolFlags { 119 ISF_Local = 0x80000000, 120 ISF_Absolute = 0x40000000 121 }; 122 123 /// RelocationFlags - Special flags for addresses. 124 enum RelocationFlags { 125 RF_Scattered = 0x80000000 126 }; 127 128 enum RelocationInfoType { 129 RIT_Vanilla = 0, 130 RIT_Pair = 1, 131 RIT_Difference = 2, 132 RIT_PreboundLazyPointer = 3, 133 RIT_LocalDifference = 4 134 }; 135 136 /// X86_64 uses its own relocation types. 137 enum RelocationInfoTypeX86_64 { 138 RIT_X86_64_Unsigned = 0, 139 RIT_X86_64_Signed = 1, 140 RIT_X86_64_Branch = 2, 141 RIT_X86_64_GOTLoad = 3, 142 RIT_X86_64_GOT = 4, 143 RIT_X86_64_Subtractor = 5, 144 RIT_X86_64_Signed1 = 6, 145 RIT_X86_64_Signed2 = 7, 146 RIT_X86_64_Signed4 = 8 147 }; 148 149 /// MachSymbolData - Helper struct for containing some precomputed information 150 /// on symbols. 151 struct MachSymbolData { 152 MCSymbolData *SymbolData; 153 uint64_t StringIndex; 154 uint8_t SectionIndex; 155 156 // Support lexicographic sorting. 157 bool operator<(const MachSymbolData &RHS) const { 158 const std::string &Name = SymbolData->getSymbol().getName(); 159 return Name < RHS.SymbolData->getSymbol().getName(); 160 } 161 }; 162 163 /// @name Relocation Data 164 /// @{ 165 166 struct MachRelocationEntry { 167 uint32_t Word0; 168 uint32_t Word1; 169 }; 170 171 llvm::DenseMap<const MCSectionData*, 172 std::vector<MachRelocationEntry> > Relocations; 173 174 /// @} 175 /// @name Symbol Table Data 176 /// @{ 177 178 SmallString<256> StringTable; 179 std::vector<MachSymbolData> LocalSymbolData; 180 std::vector<MachSymbolData> ExternalSymbolData; 181 std::vector<MachSymbolData> UndefinedSymbolData; 182 183 /// @} 184 185 MachObjectWriter *Writer; 186 187 raw_ostream &OS; 188 189 unsigned Is64Bit : 1; 190 191 public: 192 MachObjectWriterImpl(MachObjectWriter *_Writer, bool _Is64Bit) 193 : Writer(_Writer), OS(Writer->getStream()), Is64Bit(_Is64Bit) { 194 } 195 196 void Write8(uint8_t Value) { Writer->Write8(Value); } 197 void Write16(uint16_t Value) { Writer->Write16(Value); } 198 void Write32(uint32_t Value) { Writer->Write32(Value); } 199 void Write64(uint64_t Value) { Writer->Write64(Value); } 200 void WriteZeros(unsigned N) { Writer->WriteZeros(N); } 201 void WriteBytes(StringRef Str, unsigned ZeroFillSize = 0) { 202 Writer->WriteBytes(Str, ZeroFillSize); 203 } 204 205 void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize, 206 bool SubsectionsViaSymbols) { 207 uint32_t Flags = 0; 208 209 if (SubsectionsViaSymbols) 210 Flags |= HF_SubsectionsViaSymbols; 211 212 // struct mach_header (28 bytes) or 213 // struct mach_header_64 (32 bytes) 214 215 uint64_t Start = OS.tell(); 216 (void) Start; 217 218 Write32(Is64Bit ? Header_Magic64 : Header_Magic32); 219 220 // FIXME: Support cputype. 221 Write32(Is64Bit ? MachO::CPUTypeX86_64 : MachO::CPUTypeI386); 222 // FIXME: Support cpusubtype. 223 Write32(MachO::CPUSubType_I386_ALL); 224 Write32(HFT_Object); 225 Write32(NumLoadCommands); // Object files have a single load command, the 226 // segment. 227 Write32(LoadCommandsSize); 228 Write32(Flags); 229 if (Is64Bit) 230 Write32(0); // reserved 231 232 assert(OS.tell() - Start == Is64Bit ? Header64Size : Header32Size); 233 } 234 235 /// WriteSegmentLoadCommand - Write a segment load command. 236 /// 237 /// \arg NumSections - The number of sections in this segment. 238 /// \arg SectionDataSize - The total size of the sections. 239 void WriteSegmentLoadCommand(unsigned NumSections, 240 uint64_t VMSize, 241 uint64_t SectionDataStartOffset, 242 uint64_t SectionDataSize) { 243 // struct segment_command (56 bytes) or 244 // struct segment_command_64 (72 bytes) 245 246 uint64_t Start = OS.tell(); 247 (void) Start; 248 249 unsigned SegmentLoadCommandSize = Is64Bit ? SegmentLoadCommand64Size : 250 SegmentLoadCommand32Size; 251 Write32(Is64Bit ? LCT_Segment64 : LCT_Segment); 252 Write32(SegmentLoadCommandSize + 253 NumSections * (Is64Bit ? Section64Size : Section32Size)); 254 255 WriteBytes("", 16); 256 if (Is64Bit) { 257 Write64(0); // vmaddr 258 Write64(VMSize); // vmsize 259 Write64(SectionDataStartOffset); // file offset 260 Write64(SectionDataSize); // file size 261 } else { 262 Write32(0); // vmaddr 263 Write32(VMSize); // vmsize 264 Write32(SectionDataStartOffset); // file offset 265 Write32(SectionDataSize); // file size 266 } 267 Write32(0x7); // maxprot 268 Write32(0x7); // initprot 269 Write32(NumSections); 270 Write32(0); // flags 271 272 assert(OS.tell() - Start == SegmentLoadCommandSize); 273 } 274 275 void WriteSection(const MCAssembler &Asm, const MCAsmLayout &Layout, 276 const MCSectionData &SD, uint64_t FileOffset, 277 uint64_t RelocationsStart, unsigned NumRelocations) { 278 uint64_t SectionSize = Layout.getSectionSize(&SD); 279 280 // The offset is unused for virtual sections. 281 if (Asm.getBackend().isVirtualSection(SD.getSection())) { 282 assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!"); 283 FileOffset = 0; 284 } 285 286 // struct section (68 bytes) or 287 // struct section_64 (80 bytes) 288 289 uint64_t Start = OS.tell(); 290 (void) Start; 291 292 // FIXME: cast<> support! 293 const MCSectionMachO &Section = 294 static_cast<const MCSectionMachO&>(SD.getSection()); 295 WriteBytes(Section.getSectionName(), 16); 296 WriteBytes(Section.getSegmentName(), 16); 297 if (Is64Bit) { 298 Write64(Layout.getSectionAddress(&SD)); // address 299 Write64(SectionSize); // size 300 } else { 301 Write32(Layout.getSectionAddress(&SD)); // address 302 Write32(SectionSize); // size 303 } 304 Write32(FileOffset); 305 306 unsigned Flags = Section.getTypeAndAttributes(); 307 if (SD.hasInstructions()) 308 Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS; 309 310 assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!"); 311 Write32(Log2_32(SD.getAlignment())); 312 Write32(NumRelocations ? RelocationsStart : 0); 313 Write32(NumRelocations); 314 Write32(Flags); 315 Write32(0); // reserved1 316 Write32(Section.getStubSize()); // reserved2 317 if (Is64Bit) 318 Write32(0); // reserved3 319 320 assert(OS.tell() - Start == Is64Bit ? Section64Size : Section32Size); 321 } 322 323 void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols, 324 uint32_t StringTableOffset, 325 uint32_t StringTableSize) { 326 // struct symtab_command (24 bytes) 327 328 uint64_t Start = OS.tell(); 329 (void) Start; 330 331 Write32(LCT_Symtab); 332 Write32(SymtabLoadCommandSize); 333 Write32(SymbolOffset); 334 Write32(NumSymbols); 335 Write32(StringTableOffset); 336 Write32(StringTableSize); 337 338 assert(OS.tell() - Start == SymtabLoadCommandSize); 339 } 340 341 void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol, 342 uint32_t NumLocalSymbols, 343 uint32_t FirstExternalSymbol, 344 uint32_t NumExternalSymbols, 345 uint32_t FirstUndefinedSymbol, 346 uint32_t NumUndefinedSymbols, 347 uint32_t IndirectSymbolOffset, 348 uint32_t NumIndirectSymbols) { 349 // struct dysymtab_command (80 bytes) 350 351 uint64_t Start = OS.tell(); 352 (void) Start; 353 354 Write32(LCT_Dysymtab); 355 Write32(DysymtabLoadCommandSize); 356 Write32(FirstLocalSymbol); 357 Write32(NumLocalSymbols); 358 Write32(FirstExternalSymbol); 359 Write32(NumExternalSymbols); 360 Write32(FirstUndefinedSymbol); 361 Write32(NumUndefinedSymbols); 362 Write32(0); // tocoff 363 Write32(0); // ntoc 364 Write32(0); // modtaboff 365 Write32(0); // nmodtab 366 Write32(0); // extrefsymoff 367 Write32(0); // nextrefsyms 368 Write32(IndirectSymbolOffset); 369 Write32(NumIndirectSymbols); 370 Write32(0); // extreloff 371 Write32(0); // nextrel 372 Write32(0); // locreloff 373 Write32(0); // nlocrel 374 375 assert(OS.tell() - Start == DysymtabLoadCommandSize); 376 } 377 378 void WriteNlist(MachSymbolData &MSD, const MCAsmLayout &Layout) { 379 MCSymbolData &Data = *MSD.SymbolData; 380 const MCSymbol &Symbol = Data.getSymbol(); 381 uint8_t Type = 0; 382 uint16_t Flags = Data.getFlags(); 383 uint32_t Address = 0; 384 385 // Set the N_TYPE bits. See <mach-o/nlist.h>. 386 // 387 // FIXME: Are the prebound or indirect fields possible here? 388 if (Symbol.isUndefined()) 389 Type = STT_Undefined; 390 else if (Symbol.isAbsolute()) 391 Type = STT_Absolute; 392 else 393 Type = STT_Section; 394 395 // FIXME: Set STAB bits. 396 397 if (Data.isPrivateExtern()) 398 Type |= STF_PrivateExtern; 399 400 // Set external bit. 401 if (Data.isExternal() || Symbol.isUndefined()) 402 Type |= STF_External; 403 404 // Compute the symbol address. 405 if (Symbol.isDefined()) { 406 if (Symbol.isAbsolute()) { 407 Address = cast<MCConstantExpr>(Symbol.getVariableValue())->getValue(); 408 } else { 409 Address = Layout.getSymbolAddress(&Data); 410 } 411 } else if (Data.isCommon()) { 412 // Common symbols are encoded with the size in the address 413 // field, and their alignment in the flags. 414 Address = Data.getCommonSize(); 415 416 // Common alignment is packed into the 'desc' bits. 417 if (unsigned Align = Data.getCommonAlignment()) { 418 unsigned Log2Size = Log2_32(Align); 419 assert((1U << Log2Size) == Align && "Invalid 'common' alignment!"); 420 if (Log2Size > 15) 421 report_fatal_error("invalid 'common' alignment '" + 422 Twine(Align) + "'"); 423 // FIXME: Keep this mask with the SymbolFlags enumeration. 424 Flags = (Flags & 0xF0FF) | (Log2Size << 8); 425 } 426 } 427 428 // struct nlist (12 bytes) 429 430 Write32(MSD.StringIndex); 431 Write8(Type); 432 Write8(MSD.SectionIndex); 433 434 // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc' 435 // value. 436 Write16(Flags); 437 if (Is64Bit) 438 Write64(Address); 439 else 440 Write32(Address); 441 } 442 443 // FIXME: We really need to improve the relocation validation. Basically, we 444 // want to implement a separate computation which evaluates the relocation 445 // entry as the linker would, and verifies that the resultant fixup value is 446 // exactly what the encoder wanted. This will catch several classes of 447 // problems: 448 // 449 // - Relocation entry bugs, the two algorithms are unlikely to have the same 450 // exact bug. 451 // 452 // - Relaxation issues, where we forget to relax something. 453 // 454 // - Input errors, where something cannot be correctly encoded. 'as' allows 455 // these through in many cases. 456 457 void RecordX86_64Relocation(const MCAssembler &Asm, const MCAsmLayout &Layout, 458 const MCFragment *Fragment, 459 const MCAsmFixup &Fixup, MCValue Target, 460 uint64_t &FixedValue) { 461 unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind); 462 unsigned IsRIPRel = isFixupKindRIPRel(Fixup.Kind); 463 unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind); 464 465 // See <reloc.h>. 466 uint32_t Address = Layout.getFragmentOffset(Fragment) + Fixup.Offset; 467 int64_t Value = 0; 468 unsigned Index = 0; 469 unsigned IsExtern = 0; 470 unsigned Type = 0; 471 472 Value = Target.getConstant(); 473 474 if (IsPCRel) { 475 // Compensate for the relocation offset, Darwin x86_64 relocations only 476 // have the addend and appear to have attempted to define it to be the 477 // actual expression addend without the PCrel bias. However, instructions 478 // with data following the relocation are not accomodated for (see comment 479 // below regarding SIGNED{1,2,4}), so it isn't exactly that either. 480 Value += 1LL << Log2Size; 481 } 482 483 if (Target.isAbsolute()) { // constant 484 // SymbolNum of 0 indicates the absolute section. 485 Type = RIT_X86_64_Unsigned; 486 Index = 0; 487 488 // FIXME: I believe this is broken, I don't think the linker can 489 // understand it. I think it would require a local relocation, but I'm not 490 // sure if that would work either. The official way to get an absolute 491 // PCrel relocation is to use an absolute symbol (which we don't support 492 // yet). 493 if (IsPCRel) { 494 IsExtern = 1; 495 Type = RIT_X86_64_Branch; 496 } 497 } else if (Target.getSymB()) { // A - B + constant 498 const MCSymbol *A = &Target.getSymA()->getSymbol(); 499 MCSymbolData &A_SD = Asm.getSymbolData(*A); 500 const MCSymbolData *A_Base = Asm.getAtom(Layout, &A_SD); 501 502 const MCSymbol *B = &Target.getSymB()->getSymbol(); 503 MCSymbolData &B_SD = Asm.getSymbolData(*B); 504 const MCSymbolData *B_Base = Asm.getAtom(Layout, &B_SD); 505 506 // Neither symbol can be modified. 507 if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || 508 Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) 509 report_fatal_error("unsupported relocation of modified symbol"); 510 511 // We don't support PCrel relocations of differences. Darwin 'as' doesn't 512 // implement most of these correctly. 513 if (IsPCRel) 514 report_fatal_error("unsupported pc-relative relocation of difference"); 515 516 // We don't currently support any situation where one or both of the 517 // symbols would require a local relocation. This is almost certainly 518 // unused and may not be possible to encode correctly. 519 if (!A_Base || !B_Base) 520 report_fatal_error("unsupported local relocations in difference"); 521 522 // Darwin 'as' doesn't emit correct relocations for this (it ends up with 523 // a single SIGNED relocation); reject it for now. 524 if (A_Base == B_Base) 525 report_fatal_error("unsupported relocation with identical base"); 526 527 Value += Layout.getSymbolAddress(&A_SD) - Layout.getSymbolAddress(A_Base); 528 Value -= Layout.getSymbolAddress(&B_SD) - Layout.getSymbolAddress(B_Base); 529 530 Index = A_Base->getIndex(); 531 IsExtern = 1; 532 Type = RIT_X86_64_Unsigned; 533 534 MachRelocationEntry MRE; 535 MRE.Word0 = Address; 536 MRE.Word1 = ((Index << 0) | 537 (IsPCRel << 24) | 538 (Log2Size << 25) | 539 (IsExtern << 27) | 540 (Type << 28)); 541 Relocations[Fragment->getParent()].push_back(MRE); 542 543 Index = B_Base->getIndex(); 544 IsExtern = 1; 545 Type = RIT_X86_64_Subtractor; 546 } else { 547 const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); 548 MCSymbolData &SD = Asm.getSymbolData(*Symbol); 549 const MCSymbolData *Base = Asm.getAtom(Layout, &SD); 550 551 // Relocations inside debug sections always use local relocations when 552 // possible. This seems to be done because the debugger doesn't fully 553 // understand x86_64 relocation entries, and expects to find values that 554 // have already been fixed up. 555 if (Symbol->isInSection()) { 556 const MCSectionMachO &Section = static_cast<const MCSectionMachO&>( 557 Fragment->getParent()->getSection()); 558 if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG)) 559 Base = 0; 560 } 561 562 // x86_64 almost always uses external relocations, except when there is no 563 // symbol to use as a base address (a local symbol with no preceeding 564 // non-local symbol). 565 if (Base) { 566 Index = Base->getIndex(); 567 IsExtern = 1; 568 569 // Add the local offset, if needed. 570 if (Base != &SD) 571 Value += Layout.getSymbolAddress(&SD) - Layout.getSymbolAddress(Base); 572 } else { 573 // The index is the section ordinal (1-based). 574 Index = SD.getFragment()->getParent()->getOrdinal() + 1; 575 IsExtern = 0; 576 Value += Layout.getSymbolAddress(&SD); 577 578 if (IsPCRel) 579 Value -= Address + (1 << Log2Size); 580 } 581 582 MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind(); 583 if (IsPCRel) { 584 if (IsRIPRel) { 585 if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { 586 // x86_64 distinguishes movq foo@GOTPCREL so that the linker can 587 // rewrite the movq to an leaq at link time if the symbol ends up in 588 // the same linkage unit. 589 if (unsigned(Fixup.Kind) == X86::reloc_riprel_4byte_movq_load) 590 Type = RIT_X86_64_GOTLoad; 591 else 592 Type = RIT_X86_64_GOT; 593 } else if (Modifier != MCSymbolRefExpr::VK_None) 594 report_fatal_error("unsupported symbol modifier in relocation"); 595 else 596 Type = RIT_X86_64_Signed; 597 } else { 598 if (Modifier != MCSymbolRefExpr::VK_None) 599 report_fatal_error("unsupported symbol modifier in branch " 600 "relocation"); 601 602 Type = RIT_X86_64_Branch; 603 } 604 605 // The Darwin x86_64 relocation format has a problem where it cannot 606 // encode an address (L<foo> + <constant>) which is outside the atom 607 // containing L<foo>. Generally, this shouldn't occur but it does happen 608 // when we have a RIPrel instruction with data following the relocation 609 // entry (e.g., movb $012, L0(%rip)). Even with the PCrel adjustment 610 // Darwin x86_64 uses, the offset is still negative and the linker has 611 // no way to recognize this. 612 // 613 // To work around this, Darwin uses several special relocation types to 614 // indicate the offsets. However, the specification or implementation of 615 // these seems to also be incomplete; they should adjust the addend as 616 // well based on the actual encoded instruction (the additional bias), 617 // but instead appear to just look at the final offset. 618 if (IsRIPRel) { 619 switch (-(Target.getConstant() + (1LL << Log2Size))) { 620 case 1: Type = RIT_X86_64_Signed1; break; 621 case 2: Type = RIT_X86_64_Signed2; break; 622 case 4: Type = RIT_X86_64_Signed4; break; 623 } 624 } 625 } else { 626 if (Modifier == MCSymbolRefExpr::VK_GOT) { 627 Type = RIT_X86_64_GOT; 628 } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { 629 // GOTPCREL is allowed as a modifier on non-PCrel instructions, in 630 // which case all we do is set the PCrel bit in the relocation entry; 631 // this is used with exception handling, for example. The source is 632 // required to include any necessary offset directly. 633 Type = RIT_X86_64_GOT; 634 IsPCRel = 1; 635 } else if (Modifier != MCSymbolRefExpr::VK_None) 636 report_fatal_error("unsupported symbol modifier in relocation"); 637 else 638 Type = RIT_X86_64_Unsigned; 639 } 640 } 641 642 // x86_64 always writes custom values into the fixups. 643 FixedValue = Value; 644 645 // struct relocation_info (8 bytes) 646 MachRelocationEntry MRE; 647 MRE.Word0 = Address; 648 MRE.Word1 = ((Index << 0) | 649 (IsPCRel << 24) | 650 (Log2Size << 25) | 651 (IsExtern << 27) | 652 (Type << 28)); 653 Relocations[Fragment->getParent()].push_back(MRE); 654 } 655 656 void RecordScatteredRelocation(const MCAssembler &Asm, 657 const MCAsmLayout &Layout, 658 const MCFragment *Fragment, 659 const MCAsmFixup &Fixup, MCValue Target, 660 uint64_t &FixedValue) { 661 uint32_t Address = Layout.getFragmentOffset(Fragment) + Fixup.Offset; 662 unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind); 663 unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind); 664 unsigned Type = RIT_Vanilla; 665 666 // See <reloc.h>. 667 const MCSymbol *A = &Target.getSymA()->getSymbol(); 668 MCSymbolData *A_SD = &Asm.getSymbolData(*A); 669 670 if (!A_SD->getFragment()) 671 report_fatal_error("symbol '" + A->getName() + 672 "' can not be undefined in a subtraction expression"); 673 674 uint32_t Value = Layout.getSymbolAddress(A_SD); 675 uint32_t Value2 = 0; 676 677 if (const MCSymbolRefExpr *B = Target.getSymB()) { 678 MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); 679 680 if (!B_SD->getFragment()) 681 report_fatal_error("symbol '" + B->getSymbol().getName() + 682 "' can not be undefined in a subtraction expression"); 683 684 // Select the appropriate difference relocation type. 685 // 686 // Note that there is no longer any semantic difference between these two 687 // relocation types from the linkers point of view, this is done solely 688 // for pedantic compatibility with 'as'. 689 Type = A_SD->isExternal() ? RIT_Difference : RIT_LocalDifference; 690 Value2 = Layout.getSymbolAddress(B_SD); 691 } 692 693 // Relocations are written out in reverse order, so the PAIR comes first. 694 if (Type == RIT_Difference || Type == RIT_LocalDifference) { 695 MachRelocationEntry MRE; 696 MRE.Word0 = ((0 << 0) | 697 (RIT_Pair << 24) | 698 (Log2Size << 28) | 699 (IsPCRel << 30) | 700 RF_Scattered); 701 MRE.Word1 = Value2; 702 Relocations[Fragment->getParent()].push_back(MRE); 703 } 704 705 MachRelocationEntry MRE; 706 MRE.Word0 = ((Address << 0) | 707 (Type << 24) | 708 (Log2Size << 28) | 709 (IsPCRel << 30) | 710 RF_Scattered); 711 MRE.Word1 = Value; 712 Relocations[Fragment->getParent()].push_back(MRE); 713 } 714 715 void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, 716 const MCFragment *Fragment, const MCAsmFixup &Fixup, 717 MCValue Target, uint64_t &FixedValue) { 718 if (Is64Bit) { 719 RecordX86_64Relocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); 720 return; 721 } 722 723 unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind); 724 unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind); 725 726 // If this is a difference or a defined symbol plus an offset, then we need 727 // a scattered relocation entry. 728 uint32_t Offset = Target.getConstant(); 729 if (IsPCRel) 730 Offset += 1 << Log2Size; 731 if (Target.getSymB() || 732 (Target.getSymA() && !Target.getSymA()->getSymbol().isUndefined() && 733 Offset)) { 734 RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,Target,FixedValue); 735 return; 736 } 737 738 // See <reloc.h>. 739 uint32_t Address = Layout.getFragmentOffset(Fragment) + Fixup.Offset; 740 uint32_t Value = 0; 741 unsigned Index = 0; 742 unsigned IsExtern = 0; 743 unsigned Type = 0; 744 745 if (Target.isAbsolute()) { // constant 746 // SymbolNum of 0 indicates the absolute section. 747 // 748 // FIXME: Currently, these are never generated (see code below). I cannot 749 // find a case where they are actually emitted. 750 Type = RIT_Vanilla; 751 Value = 0; 752 } else { 753 const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); 754 MCSymbolData *SD = &Asm.getSymbolData(*Symbol); 755 756 if (Symbol->isUndefined()) { 757 IsExtern = 1; 758 Index = SD->getIndex(); 759 Value = 0; 760 } else { 761 // The index is the section ordinal (1-based). 762 Index = SD->getFragment()->getParent()->getOrdinal() + 1; 763 Value = Layout.getSymbolAddress(SD); 764 } 765 766 Type = RIT_Vanilla; 767 } 768 769 // struct relocation_info (8 bytes) 770 MachRelocationEntry MRE; 771 MRE.Word0 = Address; 772 MRE.Word1 = ((Index << 0) | 773 (IsPCRel << 24) | 774 (Log2Size << 25) | 775 (IsExtern << 27) | 776 (Type << 28)); 777 Relocations[Fragment->getParent()].push_back(MRE); 778 } 779 780 void BindIndirectSymbols(MCAssembler &Asm) { 781 // This is the point where 'as' creates actual symbols for indirect symbols 782 // (in the following two passes). It would be easier for us to do this 783 // sooner when we see the attribute, but that makes getting the order in the 784 // symbol table much more complicated than it is worth. 785 // 786 // FIXME: Revisit this when the dust settles. 787 788 // Bind non lazy symbol pointers first. 789 for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), 790 ie = Asm.indirect_symbol_end(); it != ie; ++it) { 791 // FIXME: cast<> support! 792 const MCSectionMachO &Section = 793 static_cast<const MCSectionMachO&>(it->SectionData->getSection()); 794 795 if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) 796 continue; 797 798 Asm.getOrCreateSymbolData(*it->Symbol); 799 } 800 801 // Then lazy symbol pointers and symbol stubs. 802 for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), 803 ie = Asm.indirect_symbol_end(); it != ie; ++it) { 804 // FIXME: cast<> support! 805 const MCSectionMachO &Section = 806 static_cast<const MCSectionMachO&>(it->SectionData->getSection()); 807 808 if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS && 809 Section.getType() != MCSectionMachO::S_SYMBOL_STUBS) 810 continue; 811 812 // Set the symbol type to undefined lazy, but only on construction. 813 // 814 // FIXME: Do not hardcode. 815 bool Created; 816 MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created); 817 if (Created) 818 Entry.setFlags(Entry.getFlags() | 0x0001); 819 } 820 } 821 822 /// ComputeSymbolTable - Compute the symbol table data 823 /// 824 /// \param StringTable [out] - The string table data. 825 /// \param StringIndexMap [out] - Map from symbol names to offsets in the 826 /// string table. 827 void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable, 828 std::vector<MachSymbolData> &LocalSymbolData, 829 std::vector<MachSymbolData> &ExternalSymbolData, 830 std::vector<MachSymbolData> &UndefinedSymbolData) { 831 // Build section lookup table. 832 DenseMap<const MCSection*, uint8_t> SectionIndexMap; 833 unsigned Index = 1; 834 for (MCAssembler::iterator it = Asm.begin(), 835 ie = Asm.end(); it != ie; ++it, ++Index) 836 SectionIndexMap[&it->getSection()] = Index; 837 assert(Index <= 256 && "Too many sections!"); 838 839 // Index 0 is always the empty string. 840 StringMap<uint64_t> StringIndexMap; 841 StringTable += '\x00'; 842 843 // Build the symbol arrays and the string table, but only for non-local 844 // symbols. 845 // 846 // The particular order that we collect the symbols and create the string 847 // table, then sort the symbols is chosen to match 'as'. Even though it 848 // doesn't matter for correctness, this is important for letting us diff .o 849 // files. 850 for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), 851 ie = Asm.symbol_end(); it != ie; ++it) { 852 const MCSymbol &Symbol = it->getSymbol(); 853 854 // Ignore non-linker visible symbols. 855 if (!Asm.isSymbolLinkerVisible(it)) 856 continue; 857 858 if (!it->isExternal() && !Symbol.isUndefined()) 859 continue; 860 861 uint64_t &Entry = StringIndexMap[Symbol.getName()]; 862 if (!Entry) { 863 Entry = StringTable.size(); 864 StringTable += Symbol.getName(); 865 StringTable += '\x00'; 866 } 867 868 MachSymbolData MSD; 869 MSD.SymbolData = it; 870 MSD.StringIndex = Entry; 871 872 if (Symbol.isUndefined()) { 873 MSD.SectionIndex = 0; 874 UndefinedSymbolData.push_back(MSD); 875 } else if (Symbol.isAbsolute()) { 876 MSD.SectionIndex = 0; 877 ExternalSymbolData.push_back(MSD); 878 } else { 879 MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); 880 assert(MSD.SectionIndex && "Invalid section index!"); 881 ExternalSymbolData.push_back(MSD); 882 } 883 } 884 885 // Now add the data for local symbols. 886 for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), 887 ie = Asm.symbol_end(); it != ie; ++it) { 888 const MCSymbol &Symbol = it->getSymbol(); 889 890 // Ignore non-linker visible symbols. 891 if (!Asm.isSymbolLinkerVisible(it)) 892 continue; 893 894 if (it->isExternal() || Symbol.isUndefined()) 895 continue; 896 897 uint64_t &Entry = StringIndexMap[Symbol.getName()]; 898 if (!Entry) { 899 Entry = StringTable.size(); 900 StringTable += Symbol.getName(); 901 StringTable += '\x00'; 902 } 903 904 MachSymbolData MSD; 905 MSD.SymbolData = it; 906 MSD.StringIndex = Entry; 907 908 if (Symbol.isAbsolute()) { 909 MSD.SectionIndex = 0; 910 LocalSymbolData.push_back(MSD); 911 } else { 912 MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); 913 assert(MSD.SectionIndex && "Invalid section index!"); 914 LocalSymbolData.push_back(MSD); 915 } 916 } 917 918 // External and undefined symbols are required to be in lexicographic order. 919 std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); 920 std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end()); 921 922 // Set the symbol indices. 923 Index = 0; 924 for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) 925 LocalSymbolData[i].SymbolData->setIndex(Index++); 926 for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) 927 ExternalSymbolData[i].SymbolData->setIndex(Index++); 928 for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) 929 UndefinedSymbolData[i].SymbolData->setIndex(Index++); 930 931 // The string table is padded to a multiple of 4. 932 while (StringTable.size() % 4) 933 StringTable += '\x00'; 934 } 935 936 void ExecutePostLayoutBinding(MCAssembler &Asm) { 937 // Create symbol data for any indirect symbols. 938 BindIndirectSymbols(Asm); 939 940 // Compute symbol table information and bind symbol indices. 941 ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData, 942 UndefinedSymbolData); 943 } 944 945 void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout) { 946 unsigned NumSections = Asm.size(); 947 948 // The section data starts after the header, the segment load command (and 949 // section headers) and the symbol table. 950 unsigned NumLoadCommands = 1; 951 uint64_t LoadCommandsSize = Is64Bit ? 952 SegmentLoadCommand64Size + NumSections * Section64Size : 953 SegmentLoadCommand32Size + NumSections * Section32Size; 954 955 // Add the symbol table load command sizes, if used. 956 unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() + 957 UndefinedSymbolData.size(); 958 if (NumSymbols) { 959 NumLoadCommands += 2; 960 LoadCommandsSize += SymtabLoadCommandSize + DysymtabLoadCommandSize; 961 } 962 963 // Compute the total size of the section data, as well as its file size and 964 // vm size. 965 uint64_t SectionDataStart = (Is64Bit ? Header64Size : Header32Size) 966 + LoadCommandsSize; 967 uint64_t SectionDataSize = 0; 968 uint64_t SectionDataFileSize = 0; 969 uint64_t VMSize = 0; 970 for (MCAssembler::const_iterator it = Asm.begin(), 971 ie = Asm.end(); it != ie; ++it) { 972 const MCSectionData &SD = *it; 973 uint64_t Address = Layout.getSectionAddress(&SD); 974 uint64_t Size = Layout.getSectionSize(&SD); 975 uint64_t FileSize = Layout.getSectionFileSize(&SD); 976 977 VMSize = std::max(VMSize, Address + Size); 978 979 if (Asm.getBackend().isVirtualSection(SD.getSection())) 980 continue; 981 982 SectionDataSize = std::max(SectionDataSize, Address + Size); 983 SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize); 984 } 985 986 // The section data is padded to 4 bytes. 987 // 988 // FIXME: Is this machine dependent? 989 unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4); 990 SectionDataFileSize += SectionDataPadding; 991 992 // Write the prolog, starting with the header and load command... 993 WriteHeader(NumLoadCommands, LoadCommandsSize, 994 Asm.getSubsectionsViaSymbols()); 995 WriteSegmentLoadCommand(NumSections, VMSize, 996 SectionDataStart, SectionDataSize); 997 998 // ... and then the section headers. 999 uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; 1000 for (MCAssembler::const_iterator it = Asm.begin(), 1001 ie = Asm.end(); it != ie; ++it) { 1002 std::vector<MachRelocationEntry> &Relocs = Relocations[it]; 1003 unsigned NumRelocs = Relocs.size(); 1004 uint64_t SectionStart = SectionDataStart + Layout.getSectionAddress(it); 1005 WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs); 1006 RelocTableEnd += NumRelocs * RelocationInfoSize; 1007 } 1008 1009 // Write the symbol table load command, if used. 1010 if (NumSymbols) { 1011 unsigned FirstLocalSymbol = 0; 1012 unsigned NumLocalSymbols = LocalSymbolData.size(); 1013 unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols; 1014 unsigned NumExternalSymbols = ExternalSymbolData.size(); 1015 unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols; 1016 unsigned NumUndefinedSymbols = UndefinedSymbolData.size(); 1017 unsigned NumIndirectSymbols = Asm.indirect_symbol_size(); 1018 unsigned NumSymTabSymbols = 1019 NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols; 1020 uint64_t IndirectSymbolSize = NumIndirectSymbols * 4; 1021 uint64_t IndirectSymbolOffset = 0; 1022 1023 // If used, the indirect symbols are written after the section data. 1024 if (NumIndirectSymbols) 1025 IndirectSymbolOffset = RelocTableEnd; 1026 1027 // The symbol table is written after the indirect symbol data. 1028 uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize; 1029 1030 // The string table is written after symbol table. 1031 uint64_t StringTableOffset = 1032 SymbolTableOffset + NumSymTabSymbols * (Is64Bit ? Nlist64Size : 1033 Nlist32Size); 1034 WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols, 1035 StringTableOffset, StringTable.size()); 1036 1037 WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols, 1038 FirstExternalSymbol, NumExternalSymbols, 1039 FirstUndefinedSymbol, NumUndefinedSymbols, 1040 IndirectSymbolOffset, NumIndirectSymbols); 1041 } 1042 1043 // Write the actual section data. 1044 for (MCAssembler::const_iterator it = Asm.begin(), 1045 ie = Asm.end(); it != ie; ++it) 1046 Asm.WriteSectionData(it, Layout, Writer); 1047 1048 // Write the extra padding. 1049 WriteZeros(SectionDataPadding); 1050 1051 // Write the relocation entries. 1052 for (MCAssembler::const_iterator it = Asm.begin(), 1053 ie = Asm.end(); it != ie; ++it) { 1054 // Write the section relocation entries, in reverse order to match 'as' 1055 // (approximately, the exact algorithm is more complicated than this). 1056 std::vector<MachRelocationEntry> &Relocs = Relocations[it]; 1057 for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { 1058 Write32(Relocs[e - i - 1].Word0); 1059 Write32(Relocs[e - i - 1].Word1); 1060 } 1061 } 1062 1063 // Write the symbol table data, if used. 1064 if (NumSymbols) { 1065 // Write the indirect symbol entries. 1066 for (MCAssembler::const_indirect_symbol_iterator 1067 it = Asm.indirect_symbol_begin(), 1068 ie = Asm.indirect_symbol_end(); it != ie; ++it) { 1069 // Indirect symbols in the non lazy symbol pointer section have some 1070 // special handling. 1071 const MCSectionMachO &Section = 1072 static_cast<const MCSectionMachO&>(it->SectionData->getSection()); 1073 if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) { 1074 // If this symbol is defined and internal, mark it as such. 1075 if (it->Symbol->isDefined() && 1076 !Asm.getSymbolData(*it->Symbol).isExternal()) { 1077 uint32_t Flags = ISF_Local; 1078 if (it->Symbol->isAbsolute()) 1079 Flags |= ISF_Absolute; 1080 Write32(Flags); 1081 continue; 1082 } 1083 } 1084 1085 Write32(Asm.getSymbolData(*it->Symbol).getIndex()); 1086 } 1087 1088 // FIXME: Check that offsets match computed ones. 1089 1090 // Write the symbol table entries. 1091 for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) 1092 WriteNlist(LocalSymbolData[i], Layout); 1093 for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) 1094 WriteNlist(ExternalSymbolData[i], Layout); 1095 for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) 1096 WriteNlist(UndefinedSymbolData[i], Layout); 1097 1098 // Write the string table. 1099 OS << StringTable.str(); 1100 } 1101 } 1102 }; 1103 1104 } 1105 1106 MachObjectWriter::MachObjectWriter(raw_ostream &OS, 1107 bool Is64Bit, 1108 bool IsLittleEndian) 1109 : MCObjectWriter(OS, IsLittleEndian) 1110 { 1111 Impl = new MachObjectWriterImpl(this, Is64Bit); 1112 } 1113 1114 MachObjectWriter::~MachObjectWriter() { 1115 delete (MachObjectWriterImpl*) Impl; 1116 } 1117 1118 void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) { 1119 ((MachObjectWriterImpl*) Impl)->ExecutePostLayoutBinding(Asm); 1120 } 1121 1122 void MachObjectWriter::RecordRelocation(const MCAssembler &Asm, 1123 const MCAsmLayout &Layout, 1124 const MCFragment *Fragment, 1125 const MCAsmFixup &Fixup, MCValue Target, 1126 uint64_t &FixedValue) { 1127 ((MachObjectWriterImpl*) Impl)->RecordRelocation(Asm, Layout, Fragment, Fixup, 1128 Target, FixedValue); 1129 } 1130 1131 void MachObjectWriter::WriteObject(const MCAssembler &Asm, 1132 const MCAsmLayout &Layout) { 1133 ((MachObjectWriterImpl*) Impl)->WriteObject(Asm, Layout); 1134 } 1135