1 //===- lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp --------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 /// 11 /// \file Converts from in-memory normalized mach-o to in-memory Atoms. 12 /// 13 /// +------------+ 14 /// | normalized | 15 /// +------------+ 16 /// | 17 /// | 18 /// v 19 /// +-------+ 20 /// | Atoms | 21 /// +-------+ 22 23 #include "MachONormalizedFile.h" 24 #include "ArchHandler.h" 25 #include "Atoms.h" 26 #include "File.h" 27 #include "MachONormalizedFileBinaryUtils.h" 28 #include "lld/Core/Error.h" 29 #include "lld/Core/LLVM.h" 30 #include "llvm/Support/Debug.h" 31 #include "llvm/Support/Format.h" 32 #include "llvm/Support/MachO.h" 33 #include "llvm/Support/LEB128.h" 34 #include "llvm/Support/raw_ostream.h" 35 36 using namespace llvm::MachO; 37 using namespace lld::mach_o::normalized; 38 39 #define DEBUG_TYPE "normalized-file-to-atoms" 40 41 namespace lld { 42 namespace mach_o { 43 44 45 namespace { // anonymous 46 47 48 #define ENTRY(seg, sect, type, atomType) \ 49 {seg, sect, type, DefinedAtom::atomType } 50 51 struct MachORelocatableSectionToAtomType { 52 StringRef segmentName; 53 StringRef sectionName; 54 SectionType sectionType; 55 DefinedAtom::ContentType atomType; 56 }; 57 58 const MachORelocatableSectionToAtomType sectsToAtomType[] = { 59 ENTRY("__TEXT", "__text", S_REGULAR, typeCode), 60 ENTRY("__TEXT", "__text", S_REGULAR, typeResolver), 61 ENTRY("__TEXT", "__cstring", S_CSTRING_LITERALS, typeCString), 62 ENTRY("", "", S_CSTRING_LITERALS, typeCString), 63 ENTRY("__TEXT", "__ustring", S_REGULAR, typeUTF16String), 64 ENTRY("__TEXT", "__const", S_REGULAR, typeConstant), 65 ENTRY("__TEXT", "__const_coal", S_COALESCED, typeConstant), 66 ENTRY("__TEXT", "__eh_frame", S_COALESCED, typeCFI), 67 ENTRY("__TEXT", "__eh_frame", S_REGULAR, typeCFI), 68 ENTRY("__TEXT", "__literal4", S_4BYTE_LITERALS, typeLiteral4), 69 ENTRY("__TEXT", "__literal8", S_8BYTE_LITERALS, typeLiteral8), 70 ENTRY("__TEXT", "__literal16", S_16BYTE_LITERALS, typeLiteral16), 71 ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR, typeLSDA), 72 ENTRY("__DATA", "__data", S_REGULAR, typeData), 73 ENTRY("__DATA", "__datacoal_nt", S_COALESCED, typeData), 74 ENTRY("__DATA", "__const", S_REGULAR, typeConstData), 75 ENTRY("__DATA", "__cfstring", S_REGULAR, typeCFString), 76 ENTRY("__DATA", "__mod_init_func", S_MOD_INIT_FUNC_POINTERS, 77 typeInitializerPtr), 78 ENTRY("__DATA", "__mod_term_func", S_MOD_TERM_FUNC_POINTERS, 79 typeTerminatorPtr), 80 ENTRY("__DATA", "__got", S_NON_LAZY_SYMBOL_POINTERS, 81 typeGOT), 82 ENTRY("__DATA", "__bss", S_ZEROFILL, typeZeroFill), 83 ENTRY("", "", S_NON_LAZY_SYMBOL_POINTERS, 84 typeGOT), 85 ENTRY("__DATA", "__interposing", S_INTERPOSING, typeInterposingTuples), 86 ENTRY("__DATA", "__thread_vars", S_THREAD_LOCAL_VARIABLES, 87 typeThunkTLV), 88 ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, typeTLVInitialData), 89 ENTRY("__DATA", "__thread_bss", S_THREAD_LOCAL_ZEROFILL, 90 typeTLVInitialZeroFill), 91 ENTRY("__DATA", "__objc_imageinfo", S_REGULAR, typeObjCImageInfo), 92 ENTRY("__DATA", "__objc_catlist", S_REGULAR, typeObjC2CategoryList), 93 ENTRY("", "", S_INTERPOSING, typeInterposingTuples), 94 ENTRY("__LD", "__compact_unwind", S_REGULAR, 95 typeCompactUnwindInfo), 96 ENTRY("", "", S_REGULAR, typeUnknown) 97 }; 98 #undef ENTRY 99 100 101 /// Figures out ContentType of a mach-o section. 102 DefinedAtom::ContentType atomTypeFromSection(const Section §ion, 103 bool &customSectionName) { 104 // First look for match of name and type. Empty names in table are wildcards. 105 customSectionName = false; 106 for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ; 107 p->atomType != DefinedAtom::typeUnknown; ++p) { 108 if (p->sectionType != section.type) 109 continue; 110 if (!p->segmentName.equals(section.segmentName) && !p->segmentName.empty()) 111 continue; 112 if (!p->sectionName.equals(section.sectionName) && !p->sectionName.empty()) 113 continue; 114 customSectionName = p->segmentName.empty() && p->sectionName.empty(); 115 return p->atomType; 116 } 117 // Look for code denoted by section attributes 118 if (section.attributes & S_ATTR_PURE_INSTRUCTIONS) 119 return DefinedAtom::typeCode; 120 121 return DefinedAtom::typeUnknown; 122 } 123 124 enum AtomizeModel { 125 atomizeAtSymbols, 126 atomizeFixedSize, 127 atomizePointerSize, 128 atomizeUTF8, 129 atomizeUTF16, 130 atomizeCFI, 131 atomizeCU, 132 atomizeCFString 133 }; 134 135 /// Returns info on how to atomize a section of the specified ContentType. 136 void sectionParseInfo(DefinedAtom::ContentType atomType, 137 unsigned int &sizeMultiple, 138 DefinedAtom::Scope &scope, 139 DefinedAtom::Merge &merge, 140 AtomizeModel &atomizeModel) { 141 struct ParseInfo { 142 DefinedAtom::ContentType atomType; 143 unsigned int sizeMultiple; 144 DefinedAtom::Scope scope; 145 DefinedAtom::Merge merge; 146 AtomizeModel atomizeModel; 147 }; 148 149 #define ENTRY(type, size, scope, merge, model) \ 150 {DefinedAtom::type, size, DefinedAtom::scope, DefinedAtom::merge, model } 151 152 static const ParseInfo parseInfo[] = { 153 ENTRY(typeCode, 1, scopeGlobal, mergeNo, 154 atomizeAtSymbols), 155 ENTRY(typeData, 1, scopeGlobal, mergeNo, 156 atomizeAtSymbols), 157 ENTRY(typeConstData, 1, scopeGlobal, mergeNo, 158 atomizeAtSymbols), 159 ENTRY(typeZeroFill, 1, scopeGlobal, mergeNo, 160 atomizeAtSymbols), 161 ENTRY(typeConstant, 1, scopeGlobal, mergeNo, 162 atomizeAtSymbols), 163 ENTRY(typeCString, 1, scopeLinkageUnit, mergeByContent, 164 atomizeUTF8), 165 ENTRY(typeUTF16String, 1, scopeLinkageUnit, mergeByContent, 166 atomizeUTF16), 167 ENTRY(typeCFI, 4, scopeTranslationUnit, mergeNo, 168 atomizeCFI), 169 ENTRY(typeLiteral4, 4, scopeLinkageUnit, mergeByContent, 170 atomizeFixedSize), 171 ENTRY(typeLiteral8, 8, scopeLinkageUnit, mergeByContent, 172 atomizeFixedSize), 173 ENTRY(typeLiteral16, 16, scopeLinkageUnit, mergeByContent, 174 atomizeFixedSize), 175 ENTRY(typeCFString, 4, scopeLinkageUnit, mergeByContent, 176 atomizeCFString), 177 ENTRY(typeInitializerPtr, 4, scopeTranslationUnit, mergeNo, 178 atomizePointerSize), 179 ENTRY(typeTerminatorPtr, 4, scopeTranslationUnit, mergeNo, 180 atomizePointerSize), 181 ENTRY(typeCompactUnwindInfo, 4, scopeTranslationUnit, mergeNo, 182 atomizeCU), 183 ENTRY(typeGOT, 4, scopeLinkageUnit, mergeByContent, 184 atomizePointerSize), 185 ENTRY(typeObjC2CategoryList, 4, scopeTranslationUnit, mergeByContent, 186 atomizePointerSize), 187 ENTRY(typeUnknown, 1, scopeGlobal, mergeNo, 188 atomizeAtSymbols) 189 }; 190 #undef ENTRY 191 const int tableLen = sizeof(parseInfo) / sizeof(ParseInfo); 192 for (int i=0; i < tableLen; ++i) { 193 if (parseInfo[i].atomType == atomType) { 194 sizeMultiple = parseInfo[i].sizeMultiple; 195 scope = parseInfo[i].scope; 196 merge = parseInfo[i].merge; 197 atomizeModel = parseInfo[i].atomizeModel; 198 return; 199 } 200 } 201 202 // Unknown type is atomized by symbols. 203 sizeMultiple = 1; 204 scope = DefinedAtom::scopeGlobal; 205 merge = DefinedAtom::mergeNo; 206 atomizeModel = atomizeAtSymbols; 207 } 208 209 210 Atom::Scope atomScope(uint8_t scope) { 211 switch (scope) { 212 case N_EXT: 213 return Atom::scopeGlobal; 214 case N_PEXT: 215 case N_PEXT | N_EXT: 216 return Atom::scopeLinkageUnit; 217 case 0: 218 return Atom::scopeTranslationUnit; 219 } 220 llvm_unreachable("unknown scope value!"); 221 } 222 223 void appendSymbolsInSection(const std::vector<Symbol> &inSymbols, 224 uint32_t sectionIndex, 225 SmallVector<const Symbol *, 64> &outSyms) { 226 for (const Symbol &sym : inSymbols) { 227 // Only look at definition symbols. 228 if ((sym.type & N_TYPE) != N_SECT) 229 continue; 230 if (sym.sect != sectionIndex) 231 continue; 232 outSyms.push_back(&sym); 233 } 234 } 235 236 void atomFromSymbol(DefinedAtom::ContentType atomType, const Section §ion, 237 MachOFile &file, uint64_t symbolAddr, StringRef symbolName, 238 uint16_t symbolDescFlags, Atom::Scope symbolScope, 239 uint64_t nextSymbolAddr, bool scatterable, bool copyRefs) { 240 // Mach-O symbol table does have size in it. Instead the size is the 241 // difference between this and the next symbol. 242 uint64_t size = nextSymbolAddr - symbolAddr; 243 uint64_t offset = symbolAddr - section.address; 244 bool noDeadStrip = (symbolDescFlags & N_NO_DEAD_STRIP) || !scatterable; 245 if (isZeroFillSection(section.type)) { 246 file.addZeroFillDefinedAtom(symbolName, symbolScope, offset, size, 247 noDeadStrip, copyRefs, §ion); 248 } else { 249 DefinedAtom::Merge merge = (symbolDescFlags & N_WEAK_DEF) 250 ? DefinedAtom::mergeAsWeak : DefinedAtom::mergeNo; 251 bool thumb = (symbolDescFlags & N_ARM_THUMB_DEF); 252 if (atomType == DefinedAtom::typeUnknown) { 253 // Mach-O needs a segment and section name. Concatentate those two 254 // with a / separator (e.g. "seg/sect") to fit into the lld model 255 // of just a section name. 256 std::string segSectName = section.segmentName.str() 257 + "/" + section.sectionName.str(); 258 file.addDefinedAtomInCustomSection(symbolName, symbolScope, atomType, 259 merge, thumb, noDeadStrip, offset, 260 size, segSectName, true, §ion); 261 } else { 262 if ((atomType == lld::DefinedAtom::typeCode) && 263 (symbolDescFlags & N_SYMBOL_RESOLVER)) { 264 atomType = lld::DefinedAtom::typeResolver; 265 } 266 file.addDefinedAtom(symbolName, symbolScope, atomType, merge, 267 offset, size, thumb, noDeadStrip, copyRefs, §ion); 268 } 269 } 270 } 271 272 llvm::Error processSymboledSection(DefinedAtom::ContentType atomType, 273 const Section §ion, 274 const NormalizedFile &normalizedFile, 275 MachOFile &file, bool scatterable, 276 bool copyRefs) { 277 // Find section's index. 278 uint32_t sectIndex = 1; 279 for (auto § : normalizedFile.sections) { 280 if (§ == §ion) 281 break; 282 ++sectIndex; 283 } 284 285 // Find all symbols in this section. 286 SmallVector<const Symbol *, 64> symbols; 287 appendSymbolsInSection(normalizedFile.globalSymbols, sectIndex, symbols); 288 appendSymbolsInSection(normalizedFile.localSymbols, sectIndex, symbols); 289 290 // Sort symbols. 291 std::sort(symbols.begin(), symbols.end(), 292 [](const Symbol *lhs, const Symbol *rhs) -> bool { 293 if (lhs == rhs) 294 return false; 295 // First by address. 296 uint64_t lhsAddr = lhs->value; 297 uint64_t rhsAddr = rhs->value; 298 if (lhsAddr != rhsAddr) 299 return lhsAddr < rhsAddr; 300 // If same address, one is an alias so sort by scope. 301 Atom::Scope lScope = atomScope(lhs->scope); 302 Atom::Scope rScope = atomScope(rhs->scope); 303 if (lScope != rScope) 304 return lScope < rScope; 305 // If same address and scope, see if one might be better as 306 // the alias. 307 bool lPrivate = (lhs->name.front() == 'l'); 308 bool rPrivate = (rhs->name.front() == 'l'); 309 if (lPrivate != rPrivate) 310 return lPrivate; 311 // If same address and scope, sort by name. 312 return lhs->name < rhs->name; 313 }); 314 315 // Debug logging of symbols. 316 //for (const Symbol *sym : symbols) 317 // llvm::errs() << " sym: " 318 // << llvm::format("0x%08llx ", (uint64_t)sym->value) 319 // << ", " << sym->name << "\n"; 320 321 // If section has no symbols and no content, there are no atoms. 322 if (symbols.empty() && section.content.empty()) 323 return llvm::Error(); 324 325 if (symbols.empty()) { 326 // Section has no symbols, put all content in one anoymous atom. 327 atomFromSymbol(atomType, section, file, section.address, StringRef(), 328 0, Atom::scopeTranslationUnit, 329 section.address + section.content.size(), 330 scatterable, copyRefs); 331 } 332 else if (symbols.front()->value != section.address) { 333 // Section has anonymous content before first symbol. 334 atomFromSymbol(atomType, section, file, section.address, StringRef(), 335 0, Atom::scopeTranslationUnit, symbols.front()->value, 336 scatterable, copyRefs); 337 } 338 339 const Symbol *lastSym = nullptr; 340 for (const Symbol *sym : symbols) { 341 if (lastSym != nullptr) { 342 // Ignore any assembler added "ltmpNNN" symbol at start of section 343 // if there is another symbol at the start. 344 if ((lastSym->value != sym->value) 345 || lastSym->value != section.address 346 || !lastSym->name.startswith("ltmp")) { 347 atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name, 348 lastSym->desc, atomScope(lastSym->scope), sym->value, 349 scatterable, copyRefs); 350 } 351 } 352 lastSym = sym; 353 } 354 if (lastSym != nullptr) { 355 atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name, 356 lastSym->desc, atomScope(lastSym->scope), 357 section.address + section.content.size(), 358 scatterable, copyRefs); 359 } 360 361 // If object built without .subsections_via_symbols, add reference chain. 362 if (!scatterable) { 363 MachODefinedAtom *prevAtom = nullptr; 364 file.eachAtomInSection(section, 365 [&](MachODefinedAtom *atom, uint64_t offset)->void { 366 if (prevAtom) 367 prevAtom->addReference(Reference::KindNamespace::all, 368 Reference::KindArch::all, 369 Reference::kindLayoutAfter, 0, atom, 0); 370 prevAtom = atom; 371 }); 372 } 373 374 return llvm::Error(); 375 } 376 377 llvm::Error processSection(DefinedAtom::ContentType atomType, 378 const Section §ion, 379 bool customSectionName, 380 const NormalizedFile &normalizedFile, 381 MachOFile &file, bool scatterable, 382 bool copyRefs) { 383 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); 384 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); 385 386 // Get info on how to atomize section. 387 unsigned int sizeMultiple; 388 DefinedAtom::Scope scope; 389 DefinedAtom::Merge merge; 390 AtomizeModel atomizeModel; 391 sectionParseInfo(atomType, sizeMultiple, scope, merge, atomizeModel); 392 393 // Validate section size. 394 if ((section.content.size() % sizeMultiple) != 0) 395 return llvm::make_error<GenericError>(Twine("Section ") 396 + section.segmentName 397 + "/" + section.sectionName 398 + " has size (" 399 + Twine(section.content.size()) 400 + ") which is not a multiple of " 401 + Twine(sizeMultiple)); 402 403 if (atomizeModel == atomizeAtSymbols) { 404 // Break section up into atoms each with a fixed size. 405 return processSymboledSection(atomType, section, normalizedFile, file, 406 scatterable, copyRefs); 407 } else { 408 unsigned int size; 409 for (unsigned int offset = 0, e = section.content.size(); offset != e;) { 410 switch (atomizeModel) { 411 case atomizeFixedSize: 412 // Break section up into atoms each with a fixed size. 413 size = sizeMultiple; 414 break; 415 case atomizePointerSize: 416 // Break section up into atoms each the size of a pointer. 417 size = is64 ? 8 : 4; 418 break; 419 case atomizeUTF8: 420 // Break section up into zero terminated c-strings. 421 size = 0; 422 for (unsigned int i = offset; i < e; ++i) { 423 if (section.content[i] == 0) { 424 size = i + 1 - offset; 425 break; 426 } 427 } 428 break; 429 case atomizeUTF16: 430 // Break section up into zero terminated UTF16 strings. 431 size = 0; 432 for (unsigned int i = offset; i < e; i += 2) { 433 if ((section.content[i] == 0) && (section.content[i + 1] == 0)) { 434 size = i + 2 - offset; 435 break; 436 } 437 } 438 break; 439 case atomizeCFI: 440 // Break section up into dwarf unwind CFIs (FDE or CIE). 441 size = read32(§ion.content[offset], isBig) + 4; 442 if (offset+size > section.content.size()) { 443 return llvm::make_error<GenericError>(Twine("Section ") 444 + section.segmentName 445 + "/" + section.sectionName 446 + " is malformed. Size of CFI " 447 "starting at offset (" 448 + Twine(offset) 449 + ") is past end of section."); 450 } 451 break; 452 case atomizeCU: 453 // Break section up into compact unwind entries. 454 size = is64 ? 32 : 20; 455 break; 456 case atomizeCFString: 457 // Break section up into NS/CFString objects. 458 size = is64 ? 32 : 16; 459 break; 460 case atomizeAtSymbols: 461 break; 462 } 463 if (size == 0) { 464 return llvm::make_error<GenericError>(Twine("Section ") 465 + section.segmentName 466 + "/" + section.sectionName 467 + " is malformed. The last atom " 468 "is not zero terminated."); 469 } 470 if (customSectionName) { 471 // Mach-O needs a segment and section name. Concatentate those two 472 // with a / separator (e.g. "seg/sect") to fit into the lld model 473 // of just a section name. 474 std::string segSectName = section.segmentName.str() 475 + "/" + section.sectionName.str(); 476 file.addDefinedAtomInCustomSection(StringRef(), scope, atomType, 477 merge, false, false, offset, 478 size, segSectName, true, §ion); 479 } else { 480 file.addDefinedAtom(StringRef(), scope, atomType, merge, offset, size, 481 false, false, copyRefs, §ion); 482 } 483 offset += size; 484 } 485 } 486 return llvm::Error(); 487 } 488 489 const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile, 490 uint64_t address) { 491 for (const Section &s : normalizedFile.sections) { 492 uint64_t sAddr = s.address; 493 if ((sAddr <= address) && (address < sAddr+s.content.size())) { 494 return &s; 495 } 496 } 497 return nullptr; 498 } 499 500 const MachODefinedAtom * 501 findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file, 502 uint64_t addr, Reference::Addend *addend) { 503 const Section *sect = nullptr; 504 sect = findSectionCoveringAddress(normalizedFile, addr); 505 if (!sect) 506 return nullptr; 507 508 uint32_t offsetInTarget; 509 uint64_t offsetInSect = addr - sect->address; 510 auto atom = 511 file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); 512 *addend = offsetInTarget; 513 return atom; 514 } 515 516 // Walks all relocations for a section in a normalized .o file and 517 // creates corresponding lld::Reference objects. 518 llvm::Error convertRelocs(const Section §ion, 519 const NormalizedFile &normalizedFile, 520 bool scatterable, 521 MachOFile &file, 522 ArchHandler &handler) { 523 // Utility function for ArchHandler to find atom by its address. 524 auto atomByAddr = [&] (uint32_t sectIndex, uint64_t addr, 525 const lld::Atom **atom, Reference::Addend *addend) 526 -> llvm::Error { 527 if (sectIndex > normalizedFile.sections.size()) 528 return llvm::make_error<GenericError>(Twine("out of range section " 529 "index (") + Twine(sectIndex) + ")"); 530 const Section *sect = nullptr; 531 if (sectIndex == 0) { 532 sect = findSectionCoveringAddress(normalizedFile, addr); 533 if (!sect) 534 return llvm::make_error<GenericError>(Twine("address (" + Twine(addr) 535 + ") is not in any section")); 536 } else { 537 sect = &normalizedFile.sections[sectIndex-1]; 538 } 539 uint32_t offsetInTarget; 540 uint64_t offsetInSect = addr - sect->address; 541 *atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); 542 *addend = offsetInTarget; 543 return llvm::Error(); 544 }; 545 546 // Utility function for ArchHandler to find atom by its symbol index. 547 auto atomBySymbol = [&] (uint32_t symbolIndex, const lld::Atom **result) 548 -> llvm::Error { 549 // Find symbol from index. 550 const Symbol *sym = nullptr; 551 uint32_t numLocal = normalizedFile.localSymbols.size(); 552 uint32_t numGlobal = normalizedFile.globalSymbols.size(); 553 uint32_t numUndef = normalizedFile.undefinedSymbols.size(); 554 if (symbolIndex < numLocal) { 555 sym = &normalizedFile.localSymbols[symbolIndex]; 556 } else if (symbolIndex < numLocal+numGlobal) { 557 sym = &normalizedFile.globalSymbols[symbolIndex-numLocal]; 558 } else if (symbolIndex < numLocal+numGlobal+numUndef) { 559 sym = &normalizedFile.undefinedSymbols[symbolIndex-numLocal-numGlobal]; 560 } else { 561 return llvm::make_error<GenericError>(Twine("symbol index (") 562 + Twine(symbolIndex) + ") out of range"); 563 } 564 // Find atom from symbol. 565 if ((sym->type & N_TYPE) == N_SECT) { 566 if (sym->sect > normalizedFile.sections.size()) 567 return llvm::make_error<GenericError>(Twine("symbol section index (") 568 + Twine(sym->sect) + ") out of range "); 569 const Section &symSection = normalizedFile.sections[sym->sect-1]; 570 uint64_t targetOffsetInSect = sym->value - symSection.address; 571 MachODefinedAtom *target = file.findAtomCoveringAddress(symSection, 572 targetOffsetInSect); 573 if (target) { 574 *result = target; 575 return llvm::Error(); 576 } 577 return llvm::make_error<GenericError>("no atom found for defined symbol"); 578 } else if ((sym->type & N_TYPE) == N_UNDF) { 579 const lld::Atom *target = file.findUndefAtom(sym->name); 580 if (target) { 581 *result = target; 582 return llvm::Error(); 583 } 584 return llvm::make_error<GenericError>("no undefined atom found for sym"); 585 } else { 586 // Search undefs 587 return llvm::make_error<GenericError>("no atom found for symbol"); 588 } 589 }; 590 591 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); 592 // Use old-school iterator so that paired relocations can be grouped. 593 for (auto it=section.relocations.begin(), e=section.relocations.end(); 594 it != e; ++it) { 595 const Relocation &reloc = *it; 596 // Find atom this relocation is in. 597 if (reloc.offset > section.content.size()) 598 return llvm::make_error<GenericError>( 599 Twine("r_address (") + Twine(reloc.offset) 600 + ") is larger than section size (" 601 + Twine(section.content.size()) + ")"); 602 uint32_t offsetInAtom; 603 MachODefinedAtom *inAtom = file.findAtomCoveringAddress(section, 604 reloc.offset, 605 &offsetInAtom); 606 assert(inAtom && "r_address in range, should have found atom"); 607 uint64_t fixupAddress = section.address + reloc.offset; 608 609 const lld::Atom *target = nullptr; 610 Reference::Addend addend = 0; 611 Reference::KindValue kind; 612 if (handler.isPairedReloc(reloc)) { 613 // Handle paired relocations together. 614 const Relocation &reloc2 = *++it; 615 auto relocErr = handler.getPairReferenceInfo( 616 reloc, reloc2, inAtom, offsetInAtom, fixupAddress, isBig, scatterable, 617 atomByAddr, atomBySymbol, &kind, &target, &addend); 618 if (relocErr) { 619 return handleErrors(std::move(relocErr), 620 [&](std::unique_ptr<GenericError> GE) { 621 return llvm::make_error<GenericError>( 622 Twine("bad relocation (") + GE->getMessage() 623 + ") in section " 624 + section.segmentName + "/" + section.sectionName 625 + " (r1_address=" + Twine::utohexstr(reloc.offset) 626 + ", r1_type=" + Twine(reloc.type) 627 + ", r1_extern=" + Twine(reloc.isExtern) 628 + ", r1_length=" + Twine((int)reloc.length) 629 + ", r1_pcrel=" + Twine(reloc.pcRel) 630 + (!reloc.scattered ? (Twine(", r1_symbolnum=") 631 + Twine(reloc.symbol)) 632 : (Twine(", r1_scattered=1, r1_value=") 633 + Twine(reloc.value))) 634 + ")" 635 + ", (r2_address=" + Twine::utohexstr(reloc2.offset) 636 + ", r2_type=" + Twine(reloc2.type) 637 + ", r2_extern=" + Twine(reloc2.isExtern) 638 + ", r2_length=" + Twine((int)reloc2.length) 639 + ", r2_pcrel=" + Twine(reloc2.pcRel) 640 + (!reloc2.scattered ? (Twine(", r2_symbolnum=") 641 + Twine(reloc2.symbol)) 642 : (Twine(", r2_scattered=1, r2_value=") 643 + Twine(reloc2.value))) 644 + ")" ); 645 }); 646 } 647 } 648 else { 649 // Use ArchHandler to convert relocation record into information 650 // needed to instantiate an lld::Reference object. 651 auto relocErr = handler.getReferenceInfo( 652 reloc, inAtom, offsetInAtom, fixupAddress, isBig, atomByAddr, 653 atomBySymbol, &kind, &target, &addend); 654 if (relocErr) { 655 return handleErrors(std::move(relocErr), 656 [&](std::unique_ptr<GenericError> GE) { 657 return llvm::make_error<GenericError>( 658 Twine("bad relocation (") + GE->getMessage() 659 + ") in section " 660 + section.segmentName + "/" + section.sectionName 661 + " (r_address=" + Twine::utohexstr(reloc.offset) 662 + ", r_type=" + Twine(reloc.type) 663 + ", r_extern=" + Twine(reloc.isExtern) 664 + ", r_length=" + Twine((int)reloc.length) 665 + ", r_pcrel=" + Twine(reloc.pcRel) 666 + (!reloc.scattered ? (Twine(", r_symbolnum=") + Twine(reloc.symbol)) 667 : (Twine(", r_scattered=1, r_value=") 668 + Twine(reloc.value))) 669 + ")" ); 670 }); 671 } 672 } 673 // Instantiate an lld::Reference object and add to its atom. 674 inAtom->addReference(Reference::KindNamespace::mach_o, 675 handler.kindArch(), 676 kind, offsetInAtom, target, addend); 677 } 678 679 return llvm::Error(); 680 } 681 682 bool isDebugInfoSection(const Section §ion) { 683 if ((section.attributes & S_ATTR_DEBUG) == 0) 684 return false; 685 return section.segmentName.equals("__DWARF"); 686 } 687 688 static int64_t readSPtr(bool is64, bool isBig, const uint8_t *addr) { 689 if (is64) 690 return read64(addr, isBig); 691 692 int32_t res = read32(addr, isBig); 693 return res; 694 } 695 696 /// --- Augmentation String Processing --- 697 698 struct CIEInfo { 699 bool _augmentationDataPresent = false; 700 bool _mayHaveEH = false; 701 uint32_t _offsetOfLSDA = ~0U; 702 uint32_t _offsetOfPersonality = ~0U; 703 uint32_t _offsetOfFDEPointerEncoding = ~0U; 704 uint32_t _augmentationDataLength = ~0U; 705 }; 706 707 typedef llvm::DenseMap<const MachODefinedAtom*, CIEInfo> CIEInfoMap; 708 709 static llvm::Error processAugmentationString(const uint8_t *augStr, 710 CIEInfo &cieInfo, 711 unsigned &len) { 712 713 if (augStr[0] == '\0') { 714 len = 1; 715 return llvm::Error(); 716 } 717 718 if (augStr[0] != 'z') 719 return llvm::make_error<GenericError>("expected 'z' at start of " 720 "augmentation string"); 721 722 cieInfo._augmentationDataPresent = true; 723 uint64_t idx = 1; 724 725 uint32_t offsetInAugmentationData = 0; 726 while (augStr[idx] != '\0') { 727 if (augStr[idx] == 'L') { 728 cieInfo._offsetOfLSDA = offsetInAugmentationData; 729 // This adds a single byte to the augmentation data. 730 ++offsetInAugmentationData; 731 ++idx; 732 continue; 733 } 734 if (augStr[idx] == 'P') { 735 cieInfo._offsetOfPersonality = offsetInAugmentationData; 736 // This adds a single byte to the augmentation data for the encoding, 737 // then a number of bytes for the pointer data. 738 // FIXME: We are assuming 4 is correct here for the pointer size as we 739 // always currently use delta32ToGOT. 740 offsetInAugmentationData += 5; 741 ++idx; 742 continue; 743 } 744 if (augStr[idx] == 'R') { 745 cieInfo._offsetOfFDEPointerEncoding = offsetInAugmentationData; 746 // This adds a single byte to the augmentation data. 747 ++offsetInAugmentationData; 748 ++idx; 749 continue; 750 } 751 if (augStr[idx] == 'e') { 752 if (augStr[idx + 1] != 'h') 753 return llvm::make_error<GenericError>("expected 'eh' in " 754 "augmentation string"); 755 cieInfo._mayHaveEH = true; 756 idx += 2; 757 continue; 758 } 759 ++idx; 760 } 761 762 cieInfo._augmentationDataLength = offsetInAugmentationData; 763 764 len = idx + 1; 765 return llvm::Error(); 766 } 767 768 static llvm::Error processCIE(const NormalizedFile &normalizedFile, 769 MachOFile &file, 770 mach_o::ArchHandler &handler, 771 const Section *ehFrameSection, 772 MachODefinedAtom *atom, 773 uint64_t offset, 774 CIEInfoMap &cieInfos) { 775 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); 776 const uint8_t *frameData = atom->rawContent().data(); 777 778 CIEInfo cieInfo; 779 780 uint32_t size = read32(frameData, isBig); 781 uint64_t cieIDField = size == 0xffffffffU 782 ? sizeof(uint32_t) + sizeof(uint64_t) 783 : sizeof(uint32_t); 784 uint64_t versionField = cieIDField + sizeof(uint32_t); 785 uint64_t augmentationStringField = versionField + sizeof(uint8_t); 786 787 unsigned augmentationStringLength = 0; 788 if (auto err = processAugmentationString(frameData + augmentationStringField, 789 cieInfo, augmentationStringLength)) 790 return err; 791 792 if (cieInfo._offsetOfPersonality != ~0U) { 793 // If we have augmentation data for the personality function, then we may 794 // need to implicitly generate its relocation. 795 796 // Parse the EH Data field which is pointer sized. 797 uint64_t EHDataField = augmentationStringField + augmentationStringLength; 798 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); 799 unsigned EHDataFieldSize = (cieInfo._mayHaveEH ? (is64 ? 8 : 4) : 0); 800 801 // Parse Code Align Factor which is a ULEB128. 802 uint64_t CodeAlignField = EHDataField + EHDataFieldSize; 803 unsigned lengthFieldSize = 0; 804 llvm::decodeULEB128(frameData + CodeAlignField, &lengthFieldSize); 805 806 // Parse Data Align Factor which is a SLEB128. 807 uint64_t DataAlignField = CodeAlignField + lengthFieldSize; 808 llvm::decodeSLEB128(frameData + DataAlignField, &lengthFieldSize); 809 810 // Parse Return Address Register which is a byte. 811 uint64_t ReturnAddressField = DataAlignField + lengthFieldSize; 812 813 // Parse the augmentation length which is a ULEB128. 814 uint64_t AugmentationLengthField = ReturnAddressField + 1; 815 uint64_t AugmentationLength = 816 llvm::decodeULEB128(frameData + AugmentationLengthField, 817 &lengthFieldSize); 818 819 if (AugmentationLength != cieInfo._augmentationDataLength) 820 return llvm::make_error<GenericError>("CIE augmentation data length " 821 "mismatch"); 822 823 // Get the start address of the augmentation data. 824 uint64_t AugmentationDataField = AugmentationLengthField + lengthFieldSize; 825 826 // Parse the personality function from the augmentation data. 827 uint64_t PersonalityField = 828 AugmentationDataField + cieInfo._offsetOfPersonality; 829 830 // Parse the personality encoding. 831 // FIXME: Verify that this is a 32-bit pcrel offset. 832 uint64_t PersonalityFunctionField = PersonalityField + 1; 833 834 if (atom->begin() != atom->end()) { 835 // If we have an explicit relocation, then make sure it matches this 836 // offset as this is where we'd expect it to be applied to. 837 DefinedAtom::reference_iterator CurrentRef = atom->begin(); 838 if (CurrentRef->offsetInAtom() != PersonalityFunctionField) 839 return llvm::make_error<GenericError>("CIE personality reloc at " 840 "wrong offset"); 841 842 if (++CurrentRef != atom->end()) 843 return llvm::make_error<GenericError>("CIE contains too many relocs"); 844 } else { 845 // Implicitly generate the personality function reloc. It's assumed to 846 // be a delta32 offset to a GOT entry. 847 // FIXME: Parse the encoding and check this. 848 int32_t funcDelta = read32(frameData + PersonalityFunctionField, isBig); 849 uint64_t funcAddress = ehFrameSection->address + offset + 850 PersonalityFunctionField; 851 funcAddress += funcDelta; 852 853 const MachODefinedAtom *func = nullptr; 854 Reference::Addend addend; 855 func = findAtomCoveringAddress(normalizedFile, file, funcAddress, 856 &addend); 857 atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(), 858 handler.unwindRefToPersonalityFunctionKind(), 859 PersonalityFunctionField, func, addend); 860 } 861 } else if (atom->begin() != atom->end()) { 862 // Otherwise, we expect there to be no relocations in this atom as the only 863 // relocation would have been to the personality function. 864 return llvm::make_error<GenericError>("unexpected relocation in CIE"); 865 } 866 867 868 cieInfos[atom] = std::move(cieInfo); 869 870 return llvm::Error(); 871 } 872 873 static llvm::Error processFDE(const NormalizedFile &normalizedFile, 874 MachOFile &file, 875 mach_o::ArchHandler &handler, 876 const Section *ehFrameSection, 877 MachODefinedAtom *atom, 878 uint64_t offset, 879 const CIEInfoMap &cieInfos) { 880 881 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); 882 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); 883 884 // Compiler wasn't lazy and actually told us what it meant. 885 // Unfortunately, the compiler may not have generated references for all of 886 // [cie, func, lsda] and so we still need to parse the FDE and add references 887 // for any the compiler didn't generate. 888 if (atom->begin() != atom->end()) 889 atom->sortReferences(); 890 891 DefinedAtom::reference_iterator CurrentRef = atom->begin(); 892 893 // This helper returns the reference (if one exists) at the offset we are 894 // currently processing. It automatically increments the ref iterator if we 895 // do return a ref, and throws an error if we pass over a ref without 896 // comsuming it. 897 auto currentRefGetter = [&CurrentRef, 898 &atom](uint64_t Offset)->const Reference* { 899 // If there are no more refs found, then we are done. 900 if (CurrentRef == atom->end()) 901 return nullptr; 902 903 const Reference *Ref = *CurrentRef; 904 905 // If we haven't reached the offset for this reference, then return that 906 // we don't yet have a reference to process. 907 if (Offset < Ref->offsetInAtom()) 908 return nullptr; 909 910 // If the offset is equal, then we want to process this ref. 911 if (Offset == Ref->offsetInAtom()) { 912 ++CurrentRef; 913 return Ref; 914 } 915 916 // The current ref is at an offset which is earlier than the current 917 // offset, then we failed to consume it when we should have. In this case 918 // throw an error. 919 llvm::report_fatal_error("Skipped reference when processing FDE"); 920 }; 921 922 // Helper to either get the reference at this current location, and verify 923 // that it is of the expected type, or add a reference of that type. 924 // Returns the reference target. 925 auto verifyOrAddReference = [&](uint64_t targetAddress, 926 Reference::KindValue refKind, 927 uint64_t refAddress, 928 bool allowsAddend)->const Atom* { 929 if (auto *ref = currentRefGetter(refAddress)) { 930 // The compiler already emitted a relocation for the CIE ref. This should 931 // have been converted to the correct type of reference in 932 // get[Pair]ReferenceInfo(). 933 assert(ref->kindValue() == refKind && 934 "Incorrect EHFrame reference kind"); 935 return ref->target(); 936 } 937 Reference::Addend addend; 938 auto *target = findAtomCoveringAddress(normalizedFile, file, 939 targetAddress, &addend); 940 atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(), 941 refKind, refAddress, target, addend); 942 943 if (!allowsAddend) 944 assert(!addend && "EHFrame reference cannot have addend"); 945 return target; 946 }; 947 948 const uint8_t *startFrameData = atom->rawContent().data(); 949 const uint8_t *frameData = startFrameData; 950 951 uint32_t size = read32(frameData, isBig); 952 uint64_t cieFieldInFDE = size == 0xffffffffU 953 ? sizeof(uint32_t) + sizeof(uint64_t) 954 : sizeof(uint32_t); 955 956 // Linker needs to fixup a reference from the FDE to its parent CIE (a 957 // 32-bit byte offset backwards in the __eh_frame section). 958 uint32_t cieDelta = read32(frameData + cieFieldInFDE, isBig); 959 uint64_t cieAddress = ehFrameSection->address + offset + cieFieldInFDE; 960 cieAddress -= cieDelta; 961 962 auto *cieRefTarget = verifyOrAddReference(cieAddress, 963 handler.unwindRefToCIEKind(), 964 cieFieldInFDE, false); 965 const MachODefinedAtom *cie = dyn_cast<MachODefinedAtom>(cieRefTarget); 966 assert(cie && cie->contentType() == DefinedAtom::typeCFI && 967 "FDE's CIE field does not point at the start of a CIE."); 968 969 const CIEInfo &cieInfo = cieInfos.find(cie)->second; 970 971 // Linker needs to fixup reference from the FDE to the function it's 972 // describing. FIXME: there are actually different ways to do this, and the 973 // particular method used is specified in the CIE's augmentation fields 974 // (hopefully) 975 uint64_t rangeFieldInFDE = cieFieldInFDE + sizeof(uint32_t); 976 977 int64_t functionFromFDE = readSPtr(is64, isBig, 978 frameData + rangeFieldInFDE); 979 uint64_t rangeStart = ehFrameSection->address + offset + rangeFieldInFDE; 980 rangeStart += functionFromFDE; 981 982 verifyOrAddReference(rangeStart, 983 handler.unwindRefToFunctionKind(), 984 rangeFieldInFDE, true); 985 986 // Handle the augmentation data if there is any. 987 if (cieInfo._augmentationDataPresent) { 988 // First process the augmentation data length field. 989 uint64_t augmentationDataLengthFieldInFDE = 990 rangeFieldInFDE + 2 * (is64 ? sizeof(uint64_t) : sizeof(uint32_t)); 991 unsigned lengthFieldSize = 0; 992 uint64_t augmentationDataLength = 993 llvm::decodeULEB128(frameData + augmentationDataLengthFieldInFDE, 994 &lengthFieldSize); 995 996 if (cieInfo._offsetOfLSDA != ~0U && augmentationDataLength > 0) { 997 998 // Look at the augmentation data field. 999 uint64_t augmentationDataFieldInFDE = 1000 augmentationDataLengthFieldInFDE + lengthFieldSize; 1001 1002 int64_t lsdaFromFDE = readSPtr(is64, isBig, 1003 frameData + augmentationDataFieldInFDE); 1004 uint64_t lsdaStart = 1005 ehFrameSection->address + offset + augmentationDataFieldInFDE + 1006 lsdaFromFDE; 1007 1008 verifyOrAddReference(lsdaStart, 1009 handler.unwindRefToFunctionKind(), 1010 augmentationDataFieldInFDE, true); 1011 } 1012 } 1013 1014 return llvm::Error(); 1015 } 1016 1017 llvm::Error addEHFrameReferences(const NormalizedFile &normalizedFile, 1018 MachOFile &file, 1019 mach_o::ArchHandler &handler) { 1020 1021 const Section *ehFrameSection = nullptr; 1022 for (auto §ion : normalizedFile.sections) 1023 if (section.segmentName == "__TEXT" && 1024 section.sectionName == "__eh_frame") { 1025 ehFrameSection = §ion; 1026 break; 1027 } 1028 1029 // No __eh_frame so nothing to do. 1030 if (!ehFrameSection) 1031 return llvm::Error(); 1032 1033 llvm::Error ehFrameErr; 1034 CIEInfoMap cieInfos; 1035 1036 file.eachAtomInSection(*ehFrameSection, 1037 [&](MachODefinedAtom *atom, uint64_t offset) -> void { 1038 assert(atom->contentType() == DefinedAtom::typeCFI); 1039 1040 // Bail out if we've encountered an error. 1041 if (ehFrameErr) 1042 return; 1043 1044 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); 1045 if (ArchHandler::isDwarfCIE(isBig, atom)) 1046 ehFrameErr = processCIE(normalizedFile, file, handler, ehFrameSection, 1047 atom, offset, cieInfos); 1048 else 1049 ehFrameErr = processFDE(normalizedFile, file, handler, ehFrameSection, 1050 atom, offset, cieInfos); 1051 }); 1052 1053 return ehFrameErr; 1054 } 1055 1056 llvm::Error parseObjCImageInfo(const Section §, 1057 const NormalizedFile &normalizedFile, 1058 MachOFile &file) { 1059 1060 // struct objc_image_info { 1061 // uint32_t version; // initially 0 1062 // uint32_t flags; 1063 // }; 1064 1065 ArrayRef<uint8_t> content = sect.content; 1066 if (content.size() != 8) 1067 return llvm::make_error<GenericError>(sect.segmentName + "/" + 1068 sect.sectionName + 1069 " in file " + file.path() + 1070 " should be 8 bytes in size"); 1071 1072 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); 1073 uint32_t version = read32(content.data(), isBig); 1074 if (version) 1075 return llvm::make_error<GenericError>(sect.segmentName + "/" + 1076 sect.sectionName + 1077 " in file " + file.path() + 1078 " should have version=0"); 1079 1080 uint32_t flags = read32(content.data() + 4, isBig); 1081 if (flags & (MachOLinkingContext::objc_supports_gc | 1082 MachOLinkingContext::objc_gc_only)) 1083 return llvm::make_error<GenericError>(sect.segmentName + "/" + 1084 sect.sectionName + 1085 " in file " + file.path() + 1086 " uses GC. This is not supported"); 1087 1088 if (flags & MachOLinkingContext::objc_retainReleaseForSimulator) 1089 file.setObjcConstraint(MachOLinkingContext::objc_retainReleaseForSimulator); 1090 else 1091 file.setObjcConstraint(MachOLinkingContext::objc_retainRelease); 1092 1093 file.setSwiftVersion((flags >> 8) & 0xFF); 1094 1095 return llvm::Error(); 1096 } 1097 1098 1099 /// Converts normalized mach-o file into an lld::File and lld::Atoms. 1100 llvm::Expected<std::unique_ptr<lld::File>> 1101 objectToAtoms(const NormalizedFile &normalizedFile, StringRef path, 1102 bool copyRefs) { 1103 std::unique_ptr<MachOFile> file(new MachOFile(path)); 1104 if (auto ec = normalizedObjectToAtoms(file.get(), normalizedFile, copyRefs)) 1105 return std::move(ec); 1106 return std::unique_ptr<File>(std::move(file)); 1107 } 1108 1109 llvm::Expected<std::unique_ptr<lld::File>> 1110 dylibToAtoms(const NormalizedFile &normalizedFile, StringRef path, 1111 bool copyRefs) { 1112 // Instantiate SharedLibraryFile object. 1113 std::unique_ptr<MachODylibFile> file(new MachODylibFile(path)); 1114 if (auto ec = normalizedDylibToAtoms(file.get(), normalizedFile, copyRefs)) 1115 return std::move(ec); 1116 return std::unique_ptr<File>(std::move(file)); 1117 } 1118 1119 } // anonymous namespace 1120 1121 namespace normalized { 1122 1123 static bool isObjCImageInfo(const Section §) { 1124 return (sect.segmentName == "__OBJC" && sect.sectionName == "__image_info") || 1125 (sect.segmentName == "__DATA" && sect.sectionName == "__objc_imageinfo"); 1126 } 1127 1128 llvm::Error 1129 normalizedObjectToAtoms(MachOFile *file, 1130 const NormalizedFile &normalizedFile, 1131 bool copyRefs) { 1132 DEBUG(llvm::dbgs() << "******** Normalizing file to atoms: " 1133 << file->path() << "\n"); 1134 bool scatterable = ((normalizedFile.flags & MH_SUBSECTIONS_VIA_SYMBOLS) != 0); 1135 1136 // Create atoms from each section. 1137 for (auto § : normalizedFile.sections) { 1138 DEBUG(llvm::dbgs() << "Creating atoms: "; sect.dump()); 1139 if (isDebugInfoSection(sect)) 1140 continue; 1141 1142 1143 // If the file contains an objc_image_info struct, then we should parse the 1144 // ObjC flags and Swift version. 1145 if (isObjCImageInfo(sect)) { 1146 if (auto ec = parseObjCImageInfo(sect, normalizedFile, *file)) 1147 return ec; 1148 // We then skip adding atoms for this section as we use the ObjCPass to 1149 // re-emit this data after it has been aggregated for all files. 1150 continue; 1151 } 1152 1153 bool customSectionName; 1154 DefinedAtom::ContentType atomType = atomTypeFromSection(sect, 1155 customSectionName); 1156 if (auto ec = processSection(atomType, sect, customSectionName, 1157 normalizedFile, *file, scatterable, copyRefs)) 1158 return ec; 1159 } 1160 // Create atoms from undefined symbols. 1161 for (auto &sym : normalizedFile.undefinedSymbols) { 1162 // Undefinded symbols with n_value != 0 are actually tentative definitions. 1163 if (sym.value == Hex64(0)) { 1164 file->addUndefinedAtom(sym.name, copyRefs); 1165 } else { 1166 file->addTentativeDefAtom(sym.name, atomScope(sym.scope), sym.value, 1167 DefinedAtom::Alignment(1 << (sym.desc >> 8)), 1168 copyRefs); 1169 } 1170 } 1171 1172 // Convert mach-o relocations to References 1173 std::unique_ptr<mach_o::ArchHandler> handler 1174 = ArchHandler::create(normalizedFile.arch); 1175 for (auto § : normalizedFile.sections) { 1176 if (isDebugInfoSection(sect)) 1177 continue; 1178 if (llvm::Error ec = convertRelocs(sect, normalizedFile, scatterable, 1179 *file, *handler)) 1180 return ec; 1181 } 1182 1183 // Add additional arch-specific References 1184 file->eachDefinedAtom([&](MachODefinedAtom* atom) -> void { 1185 handler->addAdditionalReferences(*atom); 1186 }); 1187 1188 // Each __eh_frame section needs references to both __text (the function we're 1189 // providing unwind info for) and itself (FDE -> CIE). These aren't 1190 // represented in the relocations on some architectures, so we have to add 1191 // them back in manually there. 1192 if (auto ec = addEHFrameReferences(normalizedFile, *file, *handler)) 1193 return ec; 1194 1195 // Process mach-o data-in-code regions array. That information is encoded in 1196 // atoms as References at each transition point. 1197 unsigned nextIndex = 0; 1198 for (const DataInCode &entry : normalizedFile.dataInCode) { 1199 ++nextIndex; 1200 const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset); 1201 if (!s) { 1202 return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE address (" 1203 + Twine(entry.offset) 1204 + ") is not in any section")); 1205 } 1206 uint64_t offsetInSect = entry.offset - s->address; 1207 uint32_t offsetInAtom; 1208 MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect, 1209 &offsetInAtom); 1210 if (offsetInAtom + entry.length > atom->size()) { 1211 return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE entry " 1212 "(offset=" 1213 + Twine(entry.offset) 1214 + ", length=" 1215 + Twine(entry.length) 1216 + ") crosses atom boundary.")); 1217 } 1218 // Add reference that marks start of data-in-code. 1219 atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(), 1220 handler->dataInCodeTransitionStart(*atom), 1221 offsetInAtom, atom, entry.kind); 1222 1223 // Peek at next entry, if it starts where this one ends, skip ending ref. 1224 if (nextIndex < normalizedFile.dataInCode.size()) { 1225 const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex]; 1226 if (nextEntry.offset == (entry.offset + entry.length)) 1227 continue; 1228 } 1229 1230 // If data goes to end of function, skip ending ref. 1231 if ((offsetInAtom + entry.length) == atom->size()) 1232 continue; 1233 1234 // Add reference that marks end of data-in-code. 1235 atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(), 1236 handler->dataInCodeTransitionEnd(*atom), 1237 offsetInAtom+entry.length, atom, 0); 1238 } 1239 1240 // Cache some attributes on the file for use later. 1241 file->setFlags(normalizedFile.flags); 1242 file->setArch(normalizedFile.arch); 1243 file->setOS(normalizedFile.os); 1244 file->setMinVersion(normalizedFile.minOSverson); 1245 file->setMinVersionLoadCommandKind(normalizedFile.minOSVersionKind); 1246 1247 // Sort references in each atom to their canonical order. 1248 for (const DefinedAtom* defAtom : file->defined()) { 1249 reinterpret_cast<const SimpleDefinedAtom*>(defAtom)->sortReferences(); 1250 } 1251 return llvm::Error(); 1252 } 1253 1254 llvm::Error 1255 normalizedDylibToAtoms(MachODylibFile *file, 1256 const NormalizedFile &normalizedFile, 1257 bool copyRefs) { 1258 file->setInstallName(normalizedFile.installName); 1259 file->setCompatVersion(normalizedFile.compatVersion); 1260 file->setCurrentVersion(normalizedFile.currentVersion); 1261 1262 // Tell MachODylibFile object about all symbols it exports. 1263 if (!normalizedFile.exportInfo.empty()) { 1264 // If exports trie exists, use it instead of traditional symbol table. 1265 for (const Export &exp : normalizedFile.exportInfo) { 1266 bool weakDef = (exp.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION); 1267 // StringRefs from export iterator are ephemeral, so force copy. 1268 file->addExportedSymbol(exp.name, weakDef, true); 1269 } 1270 } else { 1271 for (auto &sym : normalizedFile.globalSymbols) { 1272 assert((sym.scope & N_EXT) && "only expect external symbols here"); 1273 bool weakDef = (sym.desc & N_WEAK_DEF); 1274 file->addExportedSymbol(sym.name, weakDef, copyRefs); 1275 } 1276 } 1277 // Tell MachODylibFile object about all dylibs it re-exports. 1278 for (const DependentDylib &dep : normalizedFile.dependentDylibs) { 1279 if (dep.kind == llvm::MachO::LC_REEXPORT_DYLIB) 1280 file->addReExportedDylib(dep.path); 1281 } 1282 return llvm::Error(); 1283 } 1284 1285 void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType, 1286 StringRef &segmentName, 1287 StringRef §ionName, 1288 SectionType §ionType, 1289 SectionAttr §ionAttrs, 1290 bool &relocsToDefinedCanBeImplicit) { 1291 1292 for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ; 1293 p->atomType != DefinedAtom::typeUnknown; ++p) { 1294 if (p->atomType != atomType) 1295 continue; 1296 // Wild carded entries are ignored for reverse lookups. 1297 if (p->segmentName.empty() || p->sectionName.empty()) 1298 continue; 1299 segmentName = p->segmentName; 1300 sectionName = p->sectionName; 1301 sectionType = p->sectionType; 1302 sectionAttrs = 0; 1303 relocsToDefinedCanBeImplicit = false; 1304 if (atomType == DefinedAtom::typeCode) 1305 sectionAttrs = S_ATTR_PURE_INSTRUCTIONS; 1306 if (atomType == DefinedAtom::typeCFI) 1307 relocsToDefinedCanBeImplicit = true; 1308 return; 1309 } 1310 llvm_unreachable("content type not yet supported"); 1311 } 1312 1313 llvm::Expected<std::unique_ptr<lld::File>> 1314 normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path, 1315 bool copyRefs) { 1316 switch (normalizedFile.fileType) { 1317 case MH_DYLIB: 1318 case MH_DYLIB_STUB: 1319 return dylibToAtoms(normalizedFile, path, copyRefs); 1320 case MH_OBJECT: 1321 return objectToAtoms(normalizedFile, path, copyRefs); 1322 default: 1323 llvm_unreachable("unhandled MachO file type!"); 1324 } 1325 } 1326 1327 #ifndef NDEBUG 1328 void Section::dump(llvm::raw_ostream &OS) const { 1329 OS << "Section (\"" << segmentName << ", " << sectionName << "\""; 1330 OS << ", addr: " << llvm::format_hex(address, 16, true); 1331 OS << ", size: " << llvm::format_hex(content.size(), 8, true) << ")\n"; 1332 } 1333 #endif 1334 1335 } // namespace normalized 1336 } // namespace mach_o 1337 } // namespace lld 1338