1 //===- lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 /// 10 /// \file Converts from in-memory normalized mach-o to in-memory Atoms. 11 /// 12 /// +------------+ 13 /// | normalized | 14 /// +------------+ 15 /// | 16 /// | 17 /// v 18 /// +-------+ 19 /// | Atoms | 20 /// +-------+ 21 22 #include "ArchHandler.h" 23 #include "Atoms.h" 24 #include "File.h" 25 #include "MachONormalizedFile.h" 26 #include "MachONormalizedFileBinaryUtils.h" 27 #include "lld/Common/LLVM.h" 28 #include "lld/Core/Error.h" 29 #include "llvm/BinaryFormat/Dwarf.h" 30 #include "llvm/BinaryFormat/MachO.h" 31 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 32 #include "llvm/Support/DataExtractor.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/Error.h" 35 #include "llvm/Support/Format.h" 36 #include "llvm/Support/LEB128.h" 37 #include "llvm/Support/raw_ostream.h" 38 39 using namespace llvm::MachO; 40 using namespace lld::mach_o::normalized; 41 42 #define DEBUG_TYPE "normalized-file-to-atoms" 43 44 namespace lld { 45 namespace mach_o { 46 47 48 namespace { // anonymous 49 50 51 #define ENTRY(seg, sect, type, atomType) \ 52 {seg, sect, type, DefinedAtom::atomType } 53 54 struct MachORelocatableSectionToAtomType { 55 StringRef segmentName; 56 StringRef sectionName; 57 SectionType sectionType; 58 DefinedAtom::ContentType atomType; 59 }; 60 61 const MachORelocatableSectionToAtomType sectsToAtomType[] = { 62 ENTRY("__TEXT", "__text", S_REGULAR, typeCode), 63 ENTRY("__TEXT", "__text", S_REGULAR, typeResolver), 64 ENTRY("__TEXT", "__cstring", S_CSTRING_LITERALS, typeCString), 65 ENTRY("", "", S_CSTRING_LITERALS, typeCString), 66 ENTRY("__TEXT", "__ustring", S_REGULAR, typeUTF16String), 67 ENTRY("__TEXT", "__const", S_REGULAR, typeConstant), 68 ENTRY("__TEXT", "__const_coal", S_COALESCED, typeConstant), 69 ENTRY("__TEXT", "__eh_frame", S_COALESCED, typeCFI), 70 ENTRY("__TEXT", "__eh_frame", S_REGULAR, typeCFI), 71 ENTRY("__TEXT", "__literal4", S_4BYTE_LITERALS, typeLiteral4), 72 ENTRY("__TEXT", "__literal8", S_8BYTE_LITERALS, typeLiteral8), 73 ENTRY("__TEXT", "__literal16", S_16BYTE_LITERALS, typeLiteral16), 74 ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR, typeLSDA), 75 ENTRY("__DATA", "__data", S_REGULAR, typeData), 76 ENTRY("__DATA", "__datacoal_nt", S_COALESCED, typeData), 77 ENTRY("__DATA", "__const", S_REGULAR, typeConstData), 78 ENTRY("__DATA", "__cfstring", S_REGULAR, typeCFString), 79 ENTRY("__DATA", "__mod_init_func", S_MOD_INIT_FUNC_POINTERS, 80 typeInitializerPtr), 81 ENTRY("__DATA", "__mod_term_func", S_MOD_TERM_FUNC_POINTERS, 82 typeTerminatorPtr), 83 ENTRY("__DATA", "__got", S_NON_LAZY_SYMBOL_POINTERS, 84 typeGOT), 85 ENTRY("__DATA", "__bss", S_ZEROFILL, typeZeroFill), 86 ENTRY("", "", S_NON_LAZY_SYMBOL_POINTERS, 87 typeGOT), 88 ENTRY("__DATA", "__interposing", S_INTERPOSING, typeInterposingTuples), 89 ENTRY("__DATA", "__thread_vars", S_THREAD_LOCAL_VARIABLES, 90 typeThunkTLV), 91 ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, typeTLVInitialData), 92 ENTRY("__DATA", "__thread_bss", S_THREAD_LOCAL_ZEROFILL, 93 typeTLVInitialZeroFill), 94 ENTRY("__DATA", "__objc_imageinfo", S_REGULAR, typeObjCImageInfo), 95 ENTRY("__DATA", "__objc_catlist", S_REGULAR, typeObjC2CategoryList), 96 ENTRY("", "", S_INTERPOSING, typeInterposingTuples), 97 ENTRY("__LD", "__compact_unwind", S_REGULAR, 98 typeCompactUnwindInfo), 99 ENTRY("", "", S_REGULAR, typeUnknown) 100 }; 101 #undef ENTRY 102 103 104 /// Figures out ContentType of a mach-o section. 105 DefinedAtom::ContentType atomTypeFromSection(const Section §ion, 106 bool &customSectionName) { 107 // First look for match of name and type. Empty names in table are wildcards. 108 customSectionName = false; 109 for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ; 110 p->atomType != DefinedAtom::typeUnknown; ++p) { 111 if (p->sectionType != section.type) 112 continue; 113 if (!p->segmentName.equals(section.segmentName) && !p->segmentName.empty()) 114 continue; 115 if (!p->sectionName.equals(section.sectionName) && !p->sectionName.empty()) 116 continue; 117 customSectionName = p->segmentName.empty() && p->sectionName.empty(); 118 return p->atomType; 119 } 120 // Look for code denoted by section attributes 121 if (section.attributes & S_ATTR_PURE_INSTRUCTIONS) 122 return DefinedAtom::typeCode; 123 124 return DefinedAtom::typeUnknown; 125 } 126 127 enum AtomizeModel { 128 atomizeAtSymbols, 129 atomizeFixedSize, 130 atomizePointerSize, 131 atomizeUTF8, 132 atomizeUTF16, 133 atomizeCFI, 134 atomizeCU, 135 atomizeCFString 136 }; 137 138 /// Returns info on how to atomize a section of the specified ContentType. 139 void sectionParseInfo(DefinedAtom::ContentType atomType, 140 unsigned int &sizeMultiple, 141 DefinedAtom::Scope &scope, 142 DefinedAtom::Merge &merge, 143 AtomizeModel &atomizeModel) { 144 struct ParseInfo { 145 DefinedAtom::ContentType atomType; 146 unsigned int sizeMultiple; 147 DefinedAtom::Scope scope; 148 DefinedAtom::Merge merge; 149 AtomizeModel atomizeModel; 150 }; 151 152 #define ENTRY(type, size, scope, merge, model) \ 153 {DefinedAtom::type, size, DefinedAtom::scope, DefinedAtom::merge, model } 154 155 static const ParseInfo parseInfo[] = { 156 ENTRY(typeCode, 1, scopeGlobal, mergeNo, 157 atomizeAtSymbols), 158 ENTRY(typeData, 1, scopeGlobal, mergeNo, 159 atomizeAtSymbols), 160 ENTRY(typeConstData, 1, scopeGlobal, mergeNo, 161 atomizeAtSymbols), 162 ENTRY(typeZeroFill, 1, scopeGlobal, mergeNo, 163 atomizeAtSymbols), 164 ENTRY(typeConstant, 1, scopeGlobal, mergeNo, 165 atomizeAtSymbols), 166 ENTRY(typeCString, 1, scopeLinkageUnit, mergeByContent, 167 atomizeUTF8), 168 ENTRY(typeUTF16String, 1, scopeLinkageUnit, mergeByContent, 169 atomizeUTF16), 170 ENTRY(typeCFI, 4, scopeTranslationUnit, mergeNo, 171 atomizeCFI), 172 ENTRY(typeLiteral4, 4, scopeLinkageUnit, mergeByContent, 173 atomizeFixedSize), 174 ENTRY(typeLiteral8, 8, scopeLinkageUnit, mergeByContent, 175 atomizeFixedSize), 176 ENTRY(typeLiteral16, 16, scopeLinkageUnit, mergeByContent, 177 atomizeFixedSize), 178 ENTRY(typeCFString, 4, scopeLinkageUnit, mergeByContent, 179 atomizeCFString), 180 ENTRY(typeInitializerPtr, 4, scopeTranslationUnit, mergeNo, 181 atomizePointerSize), 182 ENTRY(typeTerminatorPtr, 4, scopeTranslationUnit, mergeNo, 183 atomizePointerSize), 184 ENTRY(typeCompactUnwindInfo, 4, scopeTranslationUnit, mergeNo, 185 atomizeCU), 186 ENTRY(typeGOT, 4, scopeLinkageUnit, mergeByContent, 187 atomizePointerSize), 188 ENTRY(typeObjC2CategoryList, 4, scopeTranslationUnit, mergeByContent, 189 atomizePointerSize), 190 ENTRY(typeUnknown, 1, scopeGlobal, mergeNo, 191 atomizeAtSymbols) 192 }; 193 #undef ENTRY 194 const int tableLen = sizeof(parseInfo) / sizeof(ParseInfo); 195 for (int i=0; i < tableLen; ++i) { 196 if (parseInfo[i].atomType == atomType) { 197 sizeMultiple = parseInfo[i].sizeMultiple; 198 scope = parseInfo[i].scope; 199 merge = parseInfo[i].merge; 200 atomizeModel = parseInfo[i].atomizeModel; 201 return; 202 } 203 } 204 205 // Unknown type is atomized by symbols. 206 sizeMultiple = 1; 207 scope = DefinedAtom::scopeGlobal; 208 merge = DefinedAtom::mergeNo; 209 atomizeModel = atomizeAtSymbols; 210 } 211 212 213 Atom::Scope atomScope(uint8_t scope) { 214 switch (scope) { 215 case N_EXT: 216 return Atom::scopeGlobal; 217 case N_PEXT: 218 case N_PEXT | N_EXT: 219 return Atom::scopeLinkageUnit; 220 case 0: 221 return Atom::scopeTranslationUnit; 222 } 223 llvm_unreachable("unknown scope value!"); 224 } 225 226 void appendSymbolsInSection( 227 const std::vector<lld::mach_o::normalized::Symbol> &inSymbols, 228 uint32_t sectionIndex, 229 SmallVector<const lld::mach_o::normalized::Symbol *, 64> &outSyms) { 230 for (const lld::mach_o::normalized::Symbol &sym : inSymbols) { 231 // Only look at definition symbols. 232 if ((sym.type & N_TYPE) != N_SECT) 233 continue; 234 if (sym.sect != sectionIndex) 235 continue; 236 outSyms.push_back(&sym); 237 } 238 } 239 240 void atomFromSymbol(DefinedAtom::ContentType atomType, const Section §ion, 241 MachOFile &file, uint64_t symbolAddr, StringRef symbolName, 242 uint16_t symbolDescFlags, Atom::Scope symbolScope, 243 uint64_t nextSymbolAddr, bool scatterable, bool copyRefs) { 244 // Mach-O symbol table does have size in it. Instead the size is the 245 // difference between this and the next symbol. 246 uint64_t size = nextSymbolAddr - symbolAddr; 247 uint64_t offset = symbolAddr - section.address; 248 bool noDeadStrip = (symbolDescFlags & N_NO_DEAD_STRIP) || !scatterable; 249 if (isZeroFillSection(section.type)) { 250 file.addZeroFillDefinedAtom(symbolName, symbolScope, offset, size, 251 noDeadStrip, copyRefs, §ion); 252 } else { 253 DefinedAtom::Merge merge = (symbolDescFlags & N_WEAK_DEF) 254 ? DefinedAtom::mergeAsWeak : DefinedAtom::mergeNo; 255 bool thumb = (symbolDescFlags & N_ARM_THUMB_DEF); 256 if (atomType == DefinedAtom::typeUnknown) { 257 // Mach-O needs a segment and section name. Concatenate those two 258 // with a / separator (e.g. "seg/sect") to fit into the lld model 259 // of just a section name. 260 std::string segSectName = section.segmentName.str() 261 + "/" + section.sectionName.str(); 262 file.addDefinedAtomInCustomSection(symbolName, symbolScope, atomType, 263 merge, thumb, noDeadStrip, offset, 264 size, segSectName, true, §ion); 265 } else { 266 if ((atomType == lld::DefinedAtom::typeCode) && 267 (symbolDescFlags & N_SYMBOL_RESOLVER)) { 268 atomType = lld::DefinedAtom::typeResolver; 269 } 270 file.addDefinedAtom(symbolName, symbolScope, atomType, merge, 271 offset, size, thumb, noDeadStrip, copyRefs, §ion); 272 } 273 } 274 } 275 276 llvm::Error processSymboledSection(DefinedAtom::ContentType atomType, 277 const Section §ion, 278 const NormalizedFile &normalizedFile, 279 MachOFile &file, bool scatterable, 280 bool copyRefs) { 281 // Find section's index. 282 uint32_t sectIndex = 1; 283 for (auto § : normalizedFile.sections) { 284 if (§ == §ion) 285 break; 286 ++sectIndex; 287 } 288 289 // Find all symbols in this section. 290 SmallVector<const lld::mach_o::normalized::Symbol *, 64> symbols; 291 appendSymbolsInSection(normalizedFile.globalSymbols, sectIndex, symbols); 292 appendSymbolsInSection(normalizedFile.localSymbols, sectIndex, symbols); 293 294 // Sort symbols. 295 std::sort(symbols.begin(), symbols.end(), 296 [](const lld::mach_o::normalized::Symbol *lhs, 297 const lld::mach_o::normalized::Symbol *rhs) -> bool { 298 if (lhs == rhs) 299 return false; 300 // First by address. 301 uint64_t lhsAddr = lhs->value; 302 uint64_t rhsAddr = rhs->value; 303 if (lhsAddr != rhsAddr) 304 return lhsAddr < rhsAddr; 305 // If same address, one is an alias so sort by scope. 306 Atom::Scope lScope = atomScope(lhs->scope); 307 Atom::Scope rScope = atomScope(rhs->scope); 308 if (lScope != rScope) 309 return lScope < rScope; 310 // If same address and scope, see if one might be better as 311 // the alias. 312 bool lPrivate = (lhs->name.front() == 'l'); 313 bool rPrivate = (rhs->name.front() == 'l'); 314 if (lPrivate != rPrivate) 315 return lPrivate; 316 // If same address and scope, sort by name. 317 return lhs->name < rhs->name; 318 }); 319 320 // Debug logging of symbols. 321 // for (const Symbol *sym : symbols) 322 // llvm::errs() << " sym: " 323 // << llvm::format("0x%08llx ", (uint64_t)sym->value) 324 // << ", " << sym->name << "\n"; 325 326 // If section has no symbols and no content, there are no atoms. 327 if (symbols.empty() && section.content.empty()) 328 return llvm::Error::success(); 329 330 if (symbols.empty()) { 331 // Section has no symbols, put all content in one anonymous atom. 332 atomFromSymbol(atomType, section, file, section.address, StringRef(), 333 0, Atom::scopeTranslationUnit, 334 section.address + section.content.size(), 335 scatterable, copyRefs); 336 } 337 else if (symbols.front()->value != section.address) { 338 // Section has anonymous content before first symbol. 339 atomFromSymbol(atomType, section, file, section.address, StringRef(), 340 0, Atom::scopeTranslationUnit, symbols.front()->value, 341 scatterable, copyRefs); 342 } 343 344 const lld::mach_o::normalized::Symbol *lastSym = nullptr; 345 for (const lld::mach_o::normalized::Symbol *sym : symbols) { 346 if (lastSym != nullptr) { 347 // Ignore any assembler added "ltmpNNN" symbol at start of section 348 // if there is another symbol at the start. 349 if ((lastSym->value != sym->value) 350 || lastSym->value != section.address 351 || !lastSym->name.startswith("ltmp")) { 352 atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name, 353 lastSym->desc, atomScope(lastSym->scope), sym->value, 354 scatterable, copyRefs); 355 } 356 } 357 lastSym = sym; 358 } 359 if (lastSym != nullptr) { 360 atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name, 361 lastSym->desc, atomScope(lastSym->scope), 362 section.address + section.content.size(), 363 scatterable, copyRefs); 364 } 365 366 // If object built without .subsections_via_symbols, add reference chain. 367 if (!scatterable) { 368 MachODefinedAtom *prevAtom = nullptr; 369 file.eachAtomInSection(section, 370 [&](MachODefinedAtom *atom, uint64_t offset)->void { 371 if (prevAtom) 372 prevAtom->addReference(Reference::KindNamespace::all, 373 Reference::KindArch::all, 374 Reference::kindLayoutAfter, 0, atom, 0); 375 prevAtom = atom; 376 }); 377 } 378 379 return llvm::Error::success(); 380 } 381 382 llvm::Error processSection(DefinedAtom::ContentType atomType, 383 const Section §ion, 384 bool customSectionName, 385 const NormalizedFile &normalizedFile, 386 MachOFile &file, bool scatterable, 387 bool copyRefs) { 388 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); 389 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); 390 391 // Get info on how to atomize section. 392 unsigned int sizeMultiple; 393 DefinedAtom::Scope scope; 394 DefinedAtom::Merge merge; 395 AtomizeModel atomizeModel; 396 sectionParseInfo(atomType, sizeMultiple, scope, merge, atomizeModel); 397 398 // Validate section size. 399 if ((section.content.size() % sizeMultiple) != 0) 400 return llvm::make_error<GenericError>(Twine("Section ") 401 + section.segmentName 402 + "/" + section.sectionName 403 + " has size (" 404 + Twine(section.content.size()) 405 + ") which is not a multiple of " 406 + Twine(sizeMultiple)); 407 408 if (atomizeModel == atomizeAtSymbols) { 409 // Break section up into atoms each with a fixed size. 410 return processSymboledSection(atomType, section, normalizedFile, file, 411 scatterable, copyRefs); 412 } else { 413 unsigned int size; 414 for (unsigned int offset = 0, e = section.content.size(); offset != e;) { 415 switch (atomizeModel) { 416 case atomizeFixedSize: 417 // Break section up into atoms each with a fixed size. 418 size = sizeMultiple; 419 break; 420 case atomizePointerSize: 421 // Break section up into atoms each the size of a pointer. 422 size = is64 ? 8 : 4; 423 break; 424 case atomizeUTF8: 425 // Break section up into zero terminated c-strings. 426 size = 0; 427 for (unsigned int i = offset; i < e; ++i) { 428 if (section.content[i] == 0) { 429 size = i + 1 - offset; 430 break; 431 } 432 } 433 break; 434 case atomizeUTF16: 435 // Break section up into zero terminated UTF16 strings. 436 size = 0; 437 for (unsigned int i = offset; i < e; i += 2) { 438 if ((section.content[i] == 0) && (section.content[i + 1] == 0)) { 439 size = i + 2 - offset; 440 break; 441 } 442 } 443 break; 444 case atomizeCFI: 445 // Break section up into dwarf unwind CFIs (FDE or CIE). 446 size = read32(§ion.content[offset], isBig) + 4; 447 if (offset+size > section.content.size()) { 448 return llvm::make_error<GenericError>(Twine("Section ") 449 + section.segmentName 450 + "/" + section.sectionName 451 + " is malformed. Size of CFI " 452 "starting at offset (" 453 + Twine(offset) 454 + ") is past end of section."); 455 } 456 break; 457 case atomizeCU: 458 // Break section up into compact unwind entries. 459 size = is64 ? 32 : 20; 460 break; 461 case atomizeCFString: 462 // Break section up into NS/CFString objects. 463 size = is64 ? 32 : 16; 464 break; 465 case atomizeAtSymbols: 466 break; 467 } 468 if (size == 0) { 469 return llvm::make_error<GenericError>(Twine("Section ") 470 + section.segmentName 471 + "/" + section.sectionName 472 + " is malformed. The last atom " 473 "is not zero terminated."); 474 } 475 if (customSectionName) { 476 // Mach-O needs a segment and section name. Concatenate those two 477 // with a / separator (e.g. "seg/sect") to fit into the lld model 478 // of just a section name. 479 std::string segSectName = section.segmentName.str() 480 + "/" + section.sectionName.str(); 481 file.addDefinedAtomInCustomSection(StringRef(), scope, atomType, 482 merge, false, false, offset, 483 size, segSectName, true, §ion); 484 } else { 485 file.addDefinedAtom(StringRef(), scope, atomType, merge, offset, size, 486 false, false, copyRefs, §ion); 487 } 488 offset += size; 489 } 490 } 491 return llvm::Error::success(); 492 } 493 494 const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile, 495 uint64_t address) { 496 for (const Section &s : normalizedFile.sections) { 497 uint64_t sAddr = s.address; 498 if ((sAddr <= address) && (address < sAddr+s.content.size())) { 499 return &s; 500 } 501 } 502 return nullptr; 503 } 504 505 const MachODefinedAtom * 506 findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file, 507 uint64_t addr, Reference::Addend &addend) { 508 const Section *sect = nullptr; 509 sect = findSectionCoveringAddress(normalizedFile, addr); 510 if (!sect) 511 return nullptr; 512 513 uint32_t offsetInTarget; 514 uint64_t offsetInSect = addr - sect->address; 515 auto atom = 516 file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); 517 addend = offsetInTarget; 518 return atom; 519 } 520 521 // Walks all relocations for a section in a normalized .o file and 522 // creates corresponding lld::Reference objects. 523 llvm::Error convertRelocs(const Section §ion, 524 const NormalizedFile &normalizedFile, 525 bool scatterable, 526 MachOFile &file, 527 ArchHandler &handler) { 528 // Utility function for ArchHandler to find atom by its address. 529 auto atomByAddr = [&] (uint32_t sectIndex, uint64_t addr, 530 const lld::Atom **atom, Reference::Addend *addend) 531 -> llvm::Error { 532 if (sectIndex > normalizedFile.sections.size()) 533 return llvm::make_error<GenericError>(Twine("out of range section " 534 "index (") + Twine(sectIndex) + ")"); 535 const Section *sect = nullptr; 536 if (sectIndex == 0) { 537 sect = findSectionCoveringAddress(normalizedFile, addr); 538 if (!sect) 539 return llvm::make_error<GenericError>(Twine("address (" + Twine(addr) 540 + ") is not in any section")); 541 } else { 542 sect = &normalizedFile.sections[sectIndex-1]; 543 } 544 uint32_t offsetInTarget; 545 uint64_t offsetInSect = addr - sect->address; 546 *atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); 547 *addend = offsetInTarget; 548 return llvm::Error::success(); 549 }; 550 551 // Utility function for ArchHandler to find atom by its symbol index. 552 auto atomBySymbol = [&] (uint32_t symbolIndex, const lld::Atom **result) 553 -> llvm::Error { 554 // Find symbol from index. 555 const lld::mach_o::normalized::Symbol *sym = nullptr; 556 uint32_t numStabs = normalizedFile.stabsSymbols.size(); 557 uint32_t numLocal = normalizedFile.localSymbols.size(); 558 uint32_t numGlobal = normalizedFile.globalSymbols.size(); 559 uint32_t numUndef = normalizedFile.undefinedSymbols.size(); 560 assert(symbolIndex >= numStabs && "Searched for stab via atomBySymbol?"); 561 if (symbolIndex < numStabs+numLocal) { 562 sym = &normalizedFile.localSymbols[symbolIndex-numStabs]; 563 } else if (symbolIndex < numStabs+numLocal+numGlobal) { 564 sym = &normalizedFile.globalSymbols[symbolIndex-numStabs-numLocal]; 565 } else if (symbolIndex < numStabs+numLocal+numGlobal+numUndef) { 566 sym = &normalizedFile.undefinedSymbols[symbolIndex-numStabs-numLocal- 567 numGlobal]; 568 } else { 569 return llvm::make_error<GenericError>(Twine("symbol index (") 570 + Twine(symbolIndex) + ") out of range"); 571 } 572 573 // Find atom from symbol. 574 if ((sym->type & N_TYPE) == N_SECT) { 575 if (sym->sect > normalizedFile.sections.size()) 576 return llvm::make_error<GenericError>(Twine("symbol section index (") 577 + Twine(sym->sect) + ") out of range "); 578 const Section &symSection = normalizedFile.sections[sym->sect-1]; 579 uint64_t targetOffsetInSect = sym->value - symSection.address; 580 MachODefinedAtom *target = file.findAtomCoveringAddress(symSection, 581 targetOffsetInSect); 582 if (target) { 583 *result = target; 584 return llvm::Error::success(); 585 } 586 return llvm::make_error<GenericError>("no atom found for defined symbol"); 587 } else if ((sym->type & N_TYPE) == N_UNDF) { 588 const lld::Atom *target = file.findUndefAtom(sym->name); 589 if (target) { 590 *result = target; 591 return llvm::Error::success(); 592 } 593 return llvm::make_error<GenericError>("no undefined atom found for sym"); 594 } else { 595 // Search undefs 596 return llvm::make_error<GenericError>("no atom found for symbol"); 597 } 598 }; 599 600 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); 601 // Use old-school iterator so that paired relocations can be grouped. 602 for (auto it=section.relocations.begin(), e=section.relocations.end(); 603 it != e; ++it) { 604 const Relocation &reloc = *it; 605 // Find atom this relocation is in. 606 if (reloc.offset > section.content.size()) 607 return llvm::make_error<GenericError>( 608 Twine("r_address (") + Twine(reloc.offset) 609 + ") is larger than section size (" 610 + Twine(section.content.size()) + ")"); 611 uint32_t offsetInAtom; 612 MachODefinedAtom *inAtom = file.findAtomCoveringAddress(section, 613 reloc.offset, 614 &offsetInAtom); 615 assert(inAtom && "r_address in range, should have found atom"); 616 uint64_t fixupAddress = section.address + reloc.offset; 617 618 const lld::Atom *target = nullptr; 619 Reference::Addend addend = 0; 620 Reference::KindValue kind; 621 if (handler.isPairedReloc(reloc)) { 622 // Handle paired relocations together. 623 const Relocation &reloc2 = *++it; 624 auto relocErr = handler.getPairReferenceInfo( 625 reloc, reloc2, inAtom, offsetInAtom, fixupAddress, isBig, scatterable, 626 atomByAddr, atomBySymbol, &kind, &target, &addend); 627 if (relocErr) { 628 return handleErrors(std::move(relocErr), 629 [&](std::unique_ptr<GenericError> GE) { 630 return llvm::make_error<GenericError>( 631 Twine("bad relocation (") + GE->getMessage() 632 + ") in section " 633 + section.segmentName + "/" + section.sectionName 634 + " (r1_address=" + Twine::utohexstr(reloc.offset) 635 + ", r1_type=" + Twine(reloc.type) 636 + ", r1_extern=" + Twine(reloc.isExtern) 637 + ", r1_length=" + Twine((int)reloc.length) 638 + ", r1_pcrel=" + Twine(reloc.pcRel) 639 + (!reloc.scattered ? (Twine(", r1_symbolnum=") 640 + Twine(reloc.symbol)) 641 : (Twine(", r1_scattered=1, r1_value=") 642 + Twine(reloc.value))) 643 + ")" 644 + ", (r2_address=" + Twine::utohexstr(reloc2.offset) 645 + ", r2_type=" + Twine(reloc2.type) 646 + ", r2_extern=" + Twine(reloc2.isExtern) 647 + ", r2_length=" + Twine((int)reloc2.length) 648 + ", r2_pcrel=" + Twine(reloc2.pcRel) 649 + (!reloc2.scattered ? (Twine(", r2_symbolnum=") 650 + Twine(reloc2.symbol)) 651 : (Twine(", r2_scattered=1, r2_value=") 652 + Twine(reloc2.value))) 653 + ")" ); 654 }); 655 } 656 } 657 else { 658 // Use ArchHandler to convert relocation record into information 659 // needed to instantiate an lld::Reference object. 660 auto relocErr = handler.getReferenceInfo( 661 reloc, inAtom, offsetInAtom, fixupAddress, isBig, atomByAddr, 662 atomBySymbol, &kind, &target, &addend); 663 if (relocErr) { 664 return handleErrors(std::move(relocErr), 665 [&](std::unique_ptr<GenericError> GE) { 666 return llvm::make_error<GenericError>( 667 Twine("bad relocation (") + GE->getMessage() 668 + ") in section " 669 + section.segmentName + "/" + section.sectionName 670 + " (r_address=" + Twine::utohexstr(reloc.offset) 671 + ", r_type=" + Twine(reloc.type) 672 + ", r_extern=" + Twine(reloc.isExtern) 673 + ", r_length=" + Twine((int)reloc.length) 674 + ", r_pcrel=" + Twine(reloc.pcRel) 675 + (!reloc.scattered ? (Twine(", r_symbolnum=") + Twine(reloc.symbol)) 676 : (Twine(", r_scattered=1, r_value=") 677 + Twine(reloc.value))) 678 + ")" ); 679 }); 680 } 681 } 682 // Instantiate an lld::Reference object and add to its atom. 683 inAtom->addReference(Reference::KindNamespace::mach_o, 684 handler.kindArch(), 685 kind, offsetInAtom, target, addend); 686 } 687 688 return llvm::Error::success(); 689 } 690 691 bool isDebugInfoSection(const Section §ion) { 692 if ((section.attributes & S_ATTR_DEBUG) == 0) 693 return false; 694 return section.segmentName.equals("__DWARF"); 695 } 696 697 static const Atom* findDefinedAtomByName(MachOFile &file, Twine name) { 698 std::string strName = name.str(); 699 for (auto *atom : file.defined()) 700 if (atom->name() == strName) 701 return atom; 702 return nullptr; 703 } 704 705 static StringRef copyDebugString(StringRef str, BumpPtrAllocator &alloc) { 706 char *strCopy = alloc.Allocate<char>(str.size() + 1); 707 memcpy(strCopy, str.data(), str.size()); 708 strCopy[str.size()] = '\0'; 709 return strCopy; 710 } 711 712 llvm::Error parseStabs(MachOFile &file, 713 const NormalizedFile &normalizedFile, 714 bool copyRefs) { 715 716 if (normalizedFile.stabsSymbols.empty()) 717 return llvm::Error::success(); 718 719 // FIXME: Kill this off when we can move to sane yaml parsing. 720 std::unique_ptr<BumpPtrAllocator> allocator; 721 if (copyRefs) 722 allocator = std::make_unique<BumpPtrAllocator>(); 723 724 enum { start, inBeginEnd } state = start; 725 726 const Atom *currentAtom = nullptr; 727 uint64_t currentAtomAddress = 0; 728 StabsDebugInfo::StabsList stabsList; 729 for (const auto &stabSym : normalizedFile.stabsSymbols) { 730 Stab stab(nullptr, stabSym.type, stabSym.sect, stabSym.desc, 731 stabSym.value, stabSym.name); 732 switch (state) { 733 case start: 734 switch (static_cast<StabType>(stabSym.type)) { 735 case N_BNSYM: 736 state = inBeginEnd; 737 currentAtomAddress = stabSym.value; 738 Reference::Addend addend; 739 currentAtom = findAtomCoveringAddress(normalizedFile, file, 740 currentAtomAddress, addend); 741 if (addend != 0) 742 return llvm::make_error<GenericError>( 743 "Non-zero addend for BNSYM '" + stabSym.name + "' in " + 744 file.path()); 745 if (currentAtom) 746 stab.atom = currentAtom; 747 else { 748 // FIXME: ld64 just issues a warning here - should we match that? 749 return llvm::make_error<GenericError>( 750 "can't find atom for stabs BNSYM at " + 751 Twine::utohexstr(stabSym.value) + " in " + file.path()); 752 } 753 break; 754 case N_SO: 755 case N_OSO: 756 // Not associated with an atom, just copy. 757 if (copyRefs) 758 stab.str = copyDebugString(stabSym.name, *allocator); 759 else 760 stab.str = stabSym.name; 761 break; 762 case N_GSYM: { 763 auto colonIdx = stabSym.name.find(':'); 764 if (colonIdx != StringRef::npos) { 765 StringRef name = stabSym.name.substr(0, colonIdx); 766 currentAtom = findDefinedAtomByName(file, "_" + name); 767 stab.atom = currentAtom; 768 if (copyRefs) 769 stab.str = copyDebugString(stabSym.name, *allocator); 770 else 771 stab.str = stabSym.name; 772 } else { 773 currentAtom = findDefinedAtomByName(file, stabSym.name); 774 stab.atom = currentAtom; 775 if (copyRefs) 776 stab.str = copyDebugString(stabSym.name, *allocator); 777 else 778 stab.str = stabSym.name; 779 } 780 if (stab.atom == nullptr) 781 return llvm::make_error<GenericError>( 782 "can't find atom for N_GSYM stabs" + stabSym.name + 783 " in " + file.path()); 784 break; 785 } 786 case N_FUN: 787 return llvm::make_error<GenericError>( 788 "old-style N_FUN stab '" + stabSym.name + "' unsupported"); 789 default: 790 return llvm::make_error<GenericError>( 791 "unrecognized stab symbol '" + stabSym.name + "'"); 792 } 793 break; 794 case inBeginEnd: 795 stab.atom = currentAtom; 796 switch (static_cast<StabType>(stabSym.type)) { 797 case N_ENSYM: 798 state = start; 799 currentAtom = nullptr; 800 break; 801 case N_FUN: 802 // Just copy the string. 803 if (copyRefs) 804 stab.str = copyDebugString(stabSym.name, *allocator); 805 else 806 stab.str = stabSym.name; 807 break; 808 default: 809 return llvm::make_error<GenericError>( 810 "unrecognized stab symbol '" + stabSym.name + "'"); 811 } 812 } 813 llvm::dbgs() << "Adding to stabsList: " << stab << "\n"; 814 stabsList.push_back(stab); 815 } 816 817 file.setDebugInfo(std::make_unique<StabsDebugInfo>(std::move(stabsList))); 818 819 // FIXME: Kill this off when we fix YAML memory ownership. 820 file.debugInfo()->setAllocator(std::move(allocator)); 821 822 return llvm::Error::success(); 823 } 824 825 static llvm::DataExtractor 826 dataExtractorFromSection(const NormalizedFile &normalizedFile, 827 const Section &S) { 828 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); 829 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); 830 StringRef SecData(reinterpret_cast<const char*>(S.content.data()), 831 S.content.size()); 832 return llvm::DataExtractor(SecData, !isBig, is64 ? 8 : 4); 833 } 834 835 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE 836 // inspection" code if possible. 837 static uint64_t getCUAbbrevOffset(llvm::DataExtractor abbrevData, 838 uint64_t abbrCode) { 839 uint64_t curCode; 840 uint64_t offset = 0; 841 while ((curCode = abbrevData.getULEB128(&offset)) != abbrCode) { 842 // Tag 843 abbrevData.getULEB128(&offset); 844 // DW_CHILDREN 845 abbrevData.getU8(&offset); 846 // Attributes 847 while (abbrevData.getULEB128(&offset) | abbrevData.getULEB128(&offset)) 848 ; 849 } 850 return offset; 851 } 852 853 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE 854 // inspection" code if possible. 855 static Expected<const char *> 856 getIndexedString(const NormalizedFile &normalizedFile, 857 llvm::dwarf::Form form, llvm::DataExtractor infoData, 858 uint64_t &infoOffset, const Section &stringsSection) { 859 if (form == llvm::dwarf::DW_FORM_string) 860 return infoData.getCStr(&infoOffset); 861 if (form != llvm::dwarf::DW_FORM_strp) 862 return llvm::make_error<GenericError>( 863 "string field encoded without DW_FORM_strp"); 864 uint64_t stringOffset = infoData.getU32(&infoOffset); 865 llvm::DataExtractor stringsData = 866 dataExtractorFromSection(normalizedFile, stringsSection); 867 return stringsData.getCStr(&stringOffset); 868 } 869 870 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE 871 // inspection" code if possible. 872 static llvm::Expected<TranslationUnitSource> 873 readCompUnit(const NormalizedFile &normalizedFile, 874 const Section &info, 875 const Section &abbrev, 876 const Section &strings, 877 StringRef path) { 878 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE 879 // inspection" code if possible. 880 uint64_t offset = 0; 881 llvm::dwarf::DwarfFormat Format = llvm::dwarf::DwarfFormat::DWARF32; 882 auto infoData = dataExtractorFromSection(normalizedFile, info); 883 uint32_t length = infoData.getU32(&offset); 884 if (length == llvm::dwarf::DW_LENGTH_DWARF64) { 885 Format = llvm::dwarf::DwarfFormat::DWARF64; 886 infoData.getU64(&offset); 887 } 888 else if (length >= llvm::dwarf::DW_LENGTH_lo_reserved) 889 return llvm::make_error<GenericError>("Malformed DWARF in " + path); 890 891 uint16_t version = infoData.getU16(&offset); 892 893 if (version < 2 || version > 4) 894 return llvm::make_error<GenericError>("Unsupported DWARF version in " + 895 path); 896 897 infoData.getU32(&offset); // Abbrev offset (should be zero) 898 uint8_t addrSize = infoData.getU8(&offset); 899 900 uint32_t abbrCode = infoData.getULEB128(&offset); 901 auto abbrevData = dataExtractorFromSection(normalizedFile, abbrev); 902 uint64_t abbrevOffset = getCUAbbrevOffset(abbrevData, abbrCode); 903 uint64_t tag = abbrevData.getULEB128(&abbrevOffset); 904 if (tag != llvm::dwarf::DW_TAG_compile_unit) 905 return llvm::make_error<GenericError>("top level DIE is not a compile unit"); 906 // DW_CHILDREN 907 abbrevData.getU8(&abbrevOffset); 908 uint32_t name; 909 llvm::dwarf::Form form; 910 llvm::dwarf::FormParams formParams = {version, addrSize, Format}; 911 TranslationUnitSource tu; 912 while ((name = abbrevData.getULEB128(&abbrevOffset)) | 913 (form = static_cast<llvm::dwarf::Form>( 914 abbrevData.getULEB128(&abbrevOffset))) && 915 (name != 0 || form != 0)) { 916 switch (name) { 917 case llvm::dwarf::DW_AT_name: { 918 if (auto eName = getIndexedString(normalizedFile, form, infoData, offset, 919 strings)) 920 tu.name = *eName; 921 else 922 return eName.takeError(); 923 break; 924 } 925 case llvm::dwarf::DW_AT_comp_dir: { 926 if (auto eName = getIndexedString(normalizedFile, form, infoData, offset, 927 strings)) 928 tu.path = *eName; 929 else 930 return eName.takeError(); 931 break; 932 } 933 default: 934 llvm::DWARFFormValue::skipValue(form, infoData, &offset, formParams); 935 } 936 } 937 return tu; 938 } 939 940 llvm::Error parseDebugInfo(MachOFile &file, 941 const NormalizedFile &normalizedFile, bool copyRefs) { 942 943 // Find the interesting debug info sections. 944 const Section *debugInfo = nullptr; 945 const Section *debugAbbrev = nullptr; 946 const Section *debugStrings = nullptr; 947 948 for (auto &s : normalizedFile.sections) { 949 if (s.segmentName == "__DWARF") { 950 if (s.sectionName == "__debug_info") 951 debugInfo = &s; 952 else if (s.sectionName == "__debug_abbrev") 953 debugAbbrev = &s; 954 else if (s.sectionName == "__debug_str") 955 debugStrings = &s; 956 } 957 } 958 959 if (!debugInfo) 960 return parseStabs(file, normalizedFile, copyRefs); 961 962 if (debugInfo->content.size() == 0) 963 return llvm::Error::success(); 964 965 if (debugInfo->content.size() < 12) 966 return llvm::make_error<GenericError>("Malformed __debug_info section in " + 967 file.path() + ": too small"); 968 969 if (!debugAbbrev) 970 return llvm::make_error<GenericError>("Missing __dwarf_abbrev section in " + 971 file.path()); 972 973 if (auto tuOrErr = readCompUnit(normalizedFile, *debugInfo, *debugAbbrev, 974 *debugStrings, file.path())) { 975 // FIXME: Kill of allocator and code under 'copyRefs' when we fix YAML 976 // memory ownership. 977 std::unique_ptr<BumpPtrAllocator> allocator; 978 if (copyRefs) { 979 allocator = std::make_unique<BumpPtrAllocator>(); 980 tuOrErr->name = copyDebugString(tuOrErr->name, *allocator); 981 tuOrErr->path = copyDebugString(tuOrErr->path, *allocator); 982 } 983 file.setDebugInfo(std::make_unique<DwarfDebugInfo>(std::move(*tuOrErr))); 984 if (copyRefs) 985 file.debugInfo()->setAllocator(std::move(allocator)); 986 } else 987 return tuOrErr.takeError(); 988 989 return llvm::Error::success(); 990 } 991 992 static int64_t readSPtr(bool is64, bool isBig, const uint8_t *addr) { 993 if (is64) 994 return read64(addr, isBig); 995 996 int32_t res = read32(addr, isBig); 997 return res; 998 } 999 1000 /// --- Augmentation String Processing --- 1001 1002 struct CIEInfo { 1003 bool _augmentationDataPresent = false; 1004 bool _mayHaveEH = false; 1005 uint32_t _offsetOfLSDA = ~0U; 1006 uint32_t _offsetOfPersonality = ~0U; 1007 uint32_t _offsetOfFDEPointerEncoding = ~0U; 1008 uint32_t _augmentationDataLength = ~0U; 1009 }; 1010 1011 typedef llvm::DenseMap<const MachODefinedAtom*, CIEInfo> CIEInfoMap; 1012 1013 static llvm::Error processAugmentationString(const uint8_t *augStr, 1014 CIEInfo &cieInfo, 1015 unsigned &len) { 1016 1017 if (augStr[0] == '\0') { 1018 len = 1; 1019 return llvm::Error::success(); 1020 } 1021 1022 if (augStr[0] != 'z') 1023 return llvm::make_error<GenericError>("expected 'z' at start of " 1024 "augmentation string"); 1025 1026 cieInfo._augmentationDataPresent = true; 1027 uint64_t idx = 1; 1028 1029 uint32_t offsetInAugmentationData = 0; 1030 while (augStr[idx] != '\0') { 1031 if (augStr[idx] == 'L') { 1032 cieInfo._offsetOfLSDA = offsetInAugmentationData; 1033 // This adds a single byte to the augmentation data. 1034 ++offsetInAugmentationData; 1035 ++idx; 1036 continue; 1037 } 1038 if (augStr[idx] == 'P') { 1039 cieInfo._offsetOfPersonality = offsetInAugmentationData; 1040 // This adds a single byte to the augmentation data for the encoding, 1041 // then a number of bytes for the pointer data. 1042 // FIXME: We are assuming 4 is correct here for the pointer size as we 1043 // always currently use delta32ToGOT. 1044 offsetInAugmentationData += 5; 1045 ++idx; 1046 continue; 1047 } 1048 if (augStr[idx] == 'R') { 1049 cieInfo._offsetOfFDEPointerEncoding = offsetInAugmentationData; 1050 // This adds a single byte to the augmentation data. 1051 ++offsetInAugmentationData; 1052 ++idx; 1053 continue; 1054 } 1055 if (augStr[idx] == 'e') { 1056 if (augStr[idx + 1] != 'h') 1057 return llvm::make_error<GenericError>("expected 'eh' in " 1058 "augmentation string"); 1059 cieInfo._mayHaveEH = true; 1060 idx += 2; 1061 continue; 1062 } 1063 ++idx; 1064 } 1065 1066 cieInfo._augmentationDataLength = offsetInAugmentationData; 1067 1068 len = idx + 1; 1069 return llvm::Error::success(); 1070 } 1071 1072 static llvm::Error processCIE(const NormalizedFile &normalizedFile, 1073 MachOFile &file, 1074 mach_o::ArchHandler &handler, 1075 const Section *ehFrameSection, 1076 MachODefinedAtom *atom, 1077 uint64_t offset, 1078 CIEInfoMap &cieInfos) { 1079 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); 1080 const uint8_t *frameData = atom->rawContent().data(); 1081 1082 CIEInfo cieInfo; 1083 1084 uint32_t size = read32(frameData, isBig); 1085 uint64_t cieIDField = size == 0xffffffffU 1086 ? sizeof(uint32_t) + sizeof(uint64_t) 1087 : sizeof(uint32_t); 1088 uint64_t versionField = cieIDField + sizeof(uint32_t); 1089 uint64_t augmentationStringField = versionField + sizeof(uint8_t); 1090 1091 unsigned augmentationStringLength = 0; 1092 if (auto err = processAugmentationString(frameData + augmentationStringField, 1093 cieInfo, augmentationStringLength)) 1094 return err; 1095 1096 if (cieInfo._offsetOfPersonality != ~0U) { 1097 // If we have augmentation data for the personality function, then we may 1098 // need to implicitly generate its relocation. 1099 1100 // Parse the EH Data field which is pointer sized. 1101 uint64_t EHDataField = augmentationStringField + augmentationStringLength; 1102 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); 1103 unsigned EHDataFieldSize = (cieInfo._mayHaveEH ? (is64 ? 8 : 4) : 0); 1104 1105 // Parse Code Align Factor which is a ULEB128. 1106 uint64_t CodeAlignField = EHDataField + EHDataFieldSize; 1107 unsigned lengthFieldSize = 0; 1108 llvm::decodeULEB128(frameData + CodeAlignField, &lengthFieldSize); 1109 1110 // Parse Data Align Factor which is a SLEB128. 1111 uint64_t DataAlignField = CodeAlignField + lengthFieldSize; 1112 llvm::decodeSLEB128(frameData + DataAlignField, &lengthFieldSize); 1113 1114 // Parse Return Address Register which is a byte. 1115 uint64_t ReturnAddressField = DataAlignField + lengthFieldSize; 1116 1117 // Parse the augmentation length which is a ULEB128. 1118 uint64_t AugmentationLengthField = ReturnAddressField + 1; 1119 uint64_t AugmentationLength = 1120 llvm::decodeULEB128(frameData + AugmentationLengthField, 1121 &lengthFieldSize); 1122 1123 if (AugmentationLength != cieInfo._augmentationDataLength) 1124 return llvm::make_error<GenericError>("CIE augmentation data length " 1125 "mismatch"); 1126 1127 // Get the start address of the augmentation data. 1128 uint64_t AugmentationDataField = AugmentationLengthField + lengthFieldSize; 1129 1130 // Parse the personality function from the augmentation data. 1131 uint64_t PersonalityField = 1132 AugmentationDataField + cieInfo._offsetOfPersonality; 1133 1134 // Parse the personality encoding. 1135 // FIXME: Verify that this is a 32-bit pcrel offset. 1136 uint64_t PersonalityFunctionField = PersonalityField + 1; 1137 1138 if (atom->begin() != atom->end()) { 1139 // If we have an explicit relocation, then make sure it matches this 1140 // offset as this is where we'd expect it to be applied to. 1141 DefinedAtom::reference_iterator CurrentRef = atom->begin(); 1142 if (CurrentRef->offsetInAtom() != PersonalityFunctionField) 1143 return llvm::make_error<GenericError>("CIE personality reloc at " 1144 "wrong offset"); 1145 1146 if (++CurrentRef != atom->end()) 1147 return llvm::make_error<GenericError>("CIE contains too many relocs"); 1148 } else { 1149 // Implicitly generate the personality function reloc. It's assumed to 1150 // be a delta32 offset to a GOT entry. 1151 // FIXME: Parse the encoding and check this. 1152 int32_t funcDelta = read32(frameData + PersonalityFunctionField, isBig); 1153 uint64_t funcAddress = ehFrameSection->address + offset + 1154 PersonalityFunctionField; 1155 funcAddress += funcDelta; 1156 1157 const MachODefinedAtom *func = nullptr; 1158 Reference::Addend addend; 1159 func = findAtomCoveringAddress(normalizedFile, file, funcAddress, 1160 addend); 1161 atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(), 1162 handler.unwindRefToPersonalityFunctionKind(), 1163 PersonalityFunctionField, func, addend); 1164 } 1165 } else if (atom->begin() != atom->end()) { 1166 // Otherwise, we expect there to be no relocations in this atom as the only 1167 // relocation would have been to the personality function. 1168 return llvm::make_error<GenericError>("unexpected relocation in CIE"); 1169 } 1170 1171 1172 cieInfos[atom] = std::move(cieInfo); 1173 1174 return llvm::Error::success(); 1175 } 1176 1177 static llvm::Error processFDE(const NormalizedFile &normalizedFile, 1178 MachOFile &file, 1179 mach_o::ArchHandler &handler, 1180 const Section *ehFrameSection, 1181 MachODefinedAtom *atom, 1182 uint64_t offset, 1183 const CIEInfoMap &cieInfos) { 1184 1185 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); 1186 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); 1187 1188 // Compiler wasn't lazy and actually told us what it meant. 1189 // Unfortunately, the compiler may not have generated references for all of 1190 // [cie, func, lsda] and so we still need to parse the FDE and add references 1191 // for any the compiler didn't generate. 1192 if (atom->begin() != atom->end()) 1193 atom->sortReferences(); 1194 1195 DefinedAtom::reference_iterator CurrentRef = atom->begin(); 1196 1197 // This helper returns the reference (if one exists) at the offset we are 1198 // currently processing. It automatically increments the ref iterator if we 1199 // do return a ref, and throws an error if we pass over a ref without 1200 // comsuming it. 1201 auto currentRefGetter = [&CurrentRef, 1202 &atom](uint64_t Offset)->const Reference* { 1203 // If there are no more refs found, then we are done. 1204 if (CurrentRef == atom->end()) 1205 return nullptr; 1206 1207 const Reference *Ref = *CurrentRef; 1208 1209 // If we haven't reached the offset for this reference, then return that 1210 // we don't yet have a reference to process. 1211 if (Offset < Ref->offsetInAtom()) 1212 return nullptr; 1213 1214 // If the offset is equal, then we want to process this ref. 1215 if (Offset == Ref->offsetInAtom()) { 1216 ++CurrentRef; 1217 return Ref; 1218 } 1219 1220 // The current ref is at an offset which is earlier than the current 1221 // offset, then we failed to consume it when we should have. In this case 1222 // throw an error. 1223 llvm::report_fatal_error("Skipped reference when processing FDE"); 1224 }; 1225 1226 // Helper to either get the reference at this current location, and verify 1227 // that it is of the expected type, or add a reference of that type. 1228 // Returns the reference target. 1229 auto verifyOrAddReference = [&](uint64_t targetAddress, 1230 Reference::KindValue refKind, 1231 uint64_t refAddress, 1232 bool allowsAddend)->const Atom* { 1233 if (auto *ref = currentRefGetter(refAddress)) { 1234 // The compiler already emitted a relocation for the CIE ref. This should 1235 // have been converted to the correct type of reference in 1236 // get[Pair]ReferenceInfo(). 1237 assert(ref->kindValue() == refKind && 1238 "Incorrect EHFrame reference kind"); 1239 return ref->target(); 1240 } 1241 Reference::Addend addend; 1242 auto *target = findAtomCoveringAddress(normalizedFile, file, 1243 targetAddress, addend); 1244 atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(), 1245 refKind, refAddress, target, addend); 1246 1247 if (!allowsAddend) 1248 assert(!addend && "EHFrame reference cannot have addend"); 1249 return target; 1250 }; 1251 1252 const uint8_t *startFrameData = atom->rawContent().data(); 1253 const uint8_t *frameData = startFrameData; 1254 1255 uint32_t size = read32(frameData, isBig); 1256 uint64_t cieFieldInFDE = size == 0xffffffffU 1257 ? sizeof(uint32_t) + sizeof(uint64_t) 1258 : sizeof(uint32_t); 1259 1260 // Linker needs to fixup a reference from the FDE to its parent CIE (a 1261 // 32-bit byte offset backwards in the __eh_frame section). 1262 uint32_t cieDelta = read32(frameData + cieFieldInFDE, isBig); 1263 uint64_t cieAddress = ehFrameSection->address + offset + cieFieldInFDE; 1264 cieAddress -= cieDelta; 1265 1266 auto *cieRefTarget = verifyOrAddReference(cieAddress, 1267 handler.unwindRefToCIEKind(), 1268 cieFieldInFDE, false); 1269 const MachODefinedAtom *cie = dyn_cast<MachODefinedAtom>(cieRefTarget); 1270 assert(cie && cie->contentType() == DefinedAtom::typeCFI && 1271 "FDE's CIE field does not point at the start of a CIE."); 1272 1273 const CIEInfo &cieInfo = cieInfos.find(cie)->second; 1274 1275 // Linker needs to fixup reference from the FDE to the function it's 1276 // describing. FIXME: there are actually different ways to do this, and the 1277 // particular method used is specified in the CIE's augmentation fields 1278 // (hopefully) 1279 uint64_t rangeFieldInFDE = cieFieldInFDE + sizeof(uint32_t); 1280 1281 int64_t functionFromFDE = readSPtr(is64, isBig, 1282 frameData + rangeFieldInFDE); 1283 uint64_t rangeStart = ehFrameSection->address + offset + rangeFieldInFDE; 1284 rangeStart += functionFromFDE; 1285 1286 verifyOrAddReference(rangeStart, 1287 handler.unwindRefToFunctionKind(), 1288 rangeFieldInFDE, true); 1289 1290 // Handle the augmentation data if there is any. 1291 if (cieInfo._augmentationDataPresent) { 1292 // First process the augmentation data length field. 1293 uint64_t augmentationDataLengthFieldInFDE = 1294 rangeFieldInFDE + 2 * (is64 ? sizeof(uint64_t) : sizeof(uint32_t)); 1295 unsigned lengthFieldSize = 0; 1296 uint64_t augmentationDataLength = 1297 llvm::decodeULEB128(frameData + augmentationDataLengthFieldInFDE, 1298 &lengthFieldSize); 1299 1300 if (cieInfo._offsetOfLSDA != ~0U && augmentationDataLength > 0) { 1301 1302 // Look at the augmentation data field. 1303 uint64_t augmentationDataFieldInFDE = 1304 augmentationDataLengthFieldInFDE + lengthFieldSize; 1305 1306 int64_t lsdaFromFDE = readSPtr(is64, isBig, 1307 frameData + augmentationDataFieldInFDE); 1308 uint64_t lsdaStart = 1309 ehFrameSection->address + offset + augmentationDataFieldInFDE + 1310 lsdaFromFDE; 1311 1312 verifyOrAddReference(lsdaStart, 1313 handler.unwindRefToFunctionKind(), 1314 augmentationDataFieldInFDE, true); 1315 } 1316 } 1317 1318 return llvm::Error::success(); 1319 } 1320 1321 llvm::Error addEHFrameReferences(const NormalizedFile &normalizedFile, 1322 MachOFile &file, 1323 mach_o::ArchHandler &handler) { 1324 1325 const Section *ehFrameSection = nullptr; 1326 for (auto §ion : normalizedFile.sections) 1327 if (section.segmentName == "__TEXT" && 1328 section.sectionName == "__eh_frame") { 1329 ehFrameSection = §ion; 1330 break; 1331 } 1332 1333 // No __eh_frame so nothing to do. 1334 if (!ehFrameSection) 1335 return llvm::Error::success(); 1336 1337 llvm::Error ehFrameErr = llvm::Error::success(); 1338 CIEInfoMap cieInfos; 1339 1340 file.eachAtomInSection(*ehFrameSection, 1341 [&](MachODefinedAtom *atom, uint64_t offset) -> void { 1342 assert(atom->contentType() == DefinedAtom::typeCFI); 1343 1344 // Bail out if we've encountered an error. 1345 if (ehFrameErr) 1346 return; 1347 1348 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); 1349 if (ArchHandler::isDwarfCIE(isBig, atom)) 1350 ehFrameErr = processCIE(normalizedFile, file, handler, ehFrameSection, 1351 atom, offset, cieInfos); 1352 else 1353 ehFrameErr = processFDE(normalizedFile, file, handler, ehFrameSection, 1354 atom, offset, cieInfos); 1355 }); 1356 1357 return ehFrameErr; 1358 } 1359 1360 llvm::Error parseObjCImageInfo(const Section §, 1361 const NormalizedFile &normalizedFile, 1362 MachOFile &file) { 1363 1364 // struct objc_image_info { 1365 // uint32_t version; // initially 0 1366 // uint32_t flags; 1367 // }; 1368 1369 ArrayRef<uint8_t> content = sect.content; 1370 if (content.size() != 8) 1371 return llvm::make_error<GenericError>(sect.segmentName + "/" + 1372 sect.sectionName + 1373 " in file " + file.path() + 1374 " should be 8 bytes in size"); 1375 1376 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); 1377 uint32_t version = read32(content.data(), isBig); 1378 if (version) 1379 return llvm::make_error<GenericError>(sect.segmentName + "/" + 1380 sect.sectionName + 1381 " in file " + file.path() + 1382 " should have version=0"); 1383 1384 uint32_t flags = read32(content.data() + 4, isBig); 1385 if (flags & (MachOLinkingContext::objc_supports_gc | 1386 MachOLinkingContext::objc_gc_only)) 1387 return llvm::make_error<GenericError>(sect.segmentName + "/" + 1388 sect.sectionName + 1389 " in file " + file.path() + 1390 " uses GC. This is not supported"); 1391 1392 if (flags & MachOLinkingContext::objc_retainReleaseForSimulator) 1393 file.setObjcConstraint(MachOLinkingContext::objc_retainReleaseForSimulator); 1394 else 1395 file.setObjcConstraint(MachOLinkingContext::objc_retainRelease); 1396 1397 file.setSwiftVersion((flags >> 8) & 0xFF); 1398 1399 return llvm::Error::success(); 1400 } 1401 1402 /// Converts normalized mach-o file into an lld::File and lld::Atoms. 1403 llvm::Expected<std::unique_ptr<lld::File>> 1404 objectToAtoms(const NormalizedFile &normalizedFile, StringRef path, 1405 bool copyRefs) { 1406 auto file = std::make_unique<MachOFile>(path); 1407 if (auto ec = normalizedObjectToAtoms(file.get(), normalizedFile, copyRefs)) 1408 return std::move(ec); 1409 return std::unique_ptr<File>(std::move(file)); 1410 } 1411 1412 llvm::Expected<std::unique_ptr<lld::File>> 1413 dylibToAtoms(const NormalizedFile &normalizedFile, StringRef path, 1414 bool copyRefs) { 1415 // Instantiate SharedLibraryFile object. 1416 auto file = std::make_unique<MachODylibFile>(path); 1417 if (auto ec = normalizedDylibToAtoms(file.get(), normalizedFile, copyRefs)) 1418 return std::move(ec); 1419 return std::unique_ptr<File>(std::move(file)); 1420 } 1421 1422 } // anonymous namespace 1423 1424 namespace normalized { 1425 1426 static bool isObjCImageInfo(const Section §) { 1427 return (sect.segmentName == "__OBJC" && sect.sectionName == "__image_info") || 1428 (sect.segmentName == "__DATA" && sect.sectionName == "__objc_imageinfo"); 1429 } 1430 1431 llvm::Error 1432 normalizedObjectToAtoms(MachOFile *file, 1433 const NormalizedFile &normalizedFile, 1434 bool copyRefs) { 1435 LLVM_DEBUG(llvm::dbgs() << "******** Normalizing file to atoms: " 1436 << file->path() << "\n"); 1437 bool scatterable = ((normalizedFile.flags & MH_SUBSECTIONS_VIA_SYMBOLS) != 0); 1438 1439 // Create atoms from each section. 1440 for (auto § : normalizedFile.sections) { 1441 1442 // If this is a debug-info section parse it specially. 1443 if (isDebugInfoSection(sect)) 1444 continue; 1445 1446 // If the file contains an objc_image_info struct, then we should parse the 1447 // ObjC flags and Swift version. 1448 if (isObjCImageInfo(sect)) { 1449 if (auto ec = parseObjCImageInfo(sect, normalizedFile, *file)) 1450 return ec; 1451 // We then skip adding atoms for this section as we use the ObjCPass to 1452 // re-emit this data after it has been aggregated for all files. 1453 continue; 1454 } 1455 1456 bool customSectionName; 1457 DefinedAtom::ContentType atomType = atomTypeFromSection(sect, 1458 customSectionName); 1459 if (auto ec = processSection(atomType, sect, customSectionName, 1460 normalizedFile, *file, scatterable, copyRefs)) 1461 return ec; 1462 } 1463 // Create atoms from undefined symbols. 1464 for (auto &sym : normalizedFile.undefinedSymbols) { 1465 // Undefined symbols with n_value != 0 are actually tentative definitions. 1466 if (sym.value == Hex64(0)) { 1467 file->addUndefinedAtom(sym.name, copyRefs); 1468 } else { 1469 file->addTentativeDefAtom(sym.name, atomScope(sym.scope), sym.value, 1470 DefinedAtom::Alignment(1 << (sym.desc >> 8)), 1471 copyRefs); 1472 } 1473 } 1474 1475 // Convert mach-o relocations to References 1476 std::unique_ptr<mach_o::ArchHandler> handler 1477 = ArchHandler::create(normalizedFile.arch); 1478 for (auto § : normalizedFile.sections) { 1479 if (isDebugInfoSection(sect)) 1480 continue; 1481 if (llvm::Error ec = convertRelocs(sect, normalizedFile, scatterable, 1482 *file, *handler)) 1483 return ec; 1484 } 1485 1486 // Add additional arch-specific References 1487 file->eachDefinedAtom([&](MachODefinedAtom* atom) -> void { 1488 handler->addAdditionalReferences(*atom); 1489 }); 1490 1491 // Each __eh_frame section needs references to both __text (the function we're 1492 // providing unwind info for) and itself (FDE -> CIE). These aren't 1493 // represented in the relocations on some architectures, so we have to add 1494 // them back in manually there. 1495 if (auto ec = addEHFrameReferences(normalizedFile, *file, *handler)) 1496 return ec; 1497 1498 // Process mach-o data-in-code regions array. That information is encoded in 1499 // atoms as References at each transition point. 1500 unsigned nextIndex = 0; 1501 for (const DataInCode &entry : normalizedFile.dataInCode) { 1502 ++nextIndex; 1503 const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset); 1504 if (!s) { 1505 return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE address (" 1506 + Twine(entry.offset) 1507 + ") is not in any section")); 1508 } 1509 uint64_t offsetInSect = entry.offset - s->address; 1510 uint32_t offsetInAtom; 1511 MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect, 1512 &offsetInAtom); 1513 if (offsetInAtom + entry.length > atom->size()) { 1514 return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE entry " 1515 "(offset=" 1516 + Twine(entry.offset) 1517 + ", length=" 1518 + Twine(entry.length) 1519 + ") crosses atom boundary.")); 1520 } 1521 // Add reference that marks start of data-in-code. 1522 atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(), 1523 handler->dataInCodeTransitionStart(*atom), 1524 offsetInAtom, atom, entry.kind); 1525 1526 // Peek at next entry, if it starts where this one ends, skip ending ref. 1527 if (nextIndex < normalizedFile.dataInCode.size()) { 1528 const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex]; 1529 if (nextEntry.offset == (entry.offset + entry.length)) 1530 continue; 1531 } 1532 1533 // If data goes to end of function, skip ending ref. 1534 if ((offsetInAtom + entry.length) == atom->size()) 1535 continue; 1536 1537 // Add reference that marks end of data-in-code. 1538 atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(), 1539 handler->dataInCodeTransitionEnd(*atom), 1540 offsetInAtom+entry.length, atom, 0); 1541 } 1542 1543 // Cache some attributes on the file for use later. 1544 file->setFlags(normalizedFile.flags); 1545 file->setArch(normalizedFile.arch); 1546 file->setOS(normalizedFile.os); 1547 file->setMinVersion(normalizedFile.minOSverson); 1548 file->setMinVersionLoadCommandKind(normalizedFile.minOSVersionKind); 1549 1550 // Sort references in each atom to their canonical order. 1551 for (const DefinedAtom* defAtom : file->defined()) { 1552 reinterpret_cast<const SimpleDefinedAtom*>(defAtom)->sortReferences(); 1553 } 1554 1555 if (auto err = parseDebugInfo(*file, normalizedFile, copyRefs)) 1556 return err; 1557 1558 return llvm::Error::success(); 1559 } 1560 1561 llvm::Error 1562 normalizedDylibToAtoms(MachODylibFile *file, 1563 const NormalizedFile &normalizedFile, 1564 bool copyRefs) { 1565 file->setInstallName(normalizedFile.installName); 1566 file->setCompatVersion(normalizedFile.compatVersion); 1567 file->setCurrentVersion(normalizedFile.currentVersion); 1568 1569 // Tell MachODylibFile object about all symbols it exports. 1570 if (!normalizedFile.exportInfo.empty()) { 1571 // If exports trie exists, use it instead of traditional symbol table. 1572 for (const Export &exp : normalizedFile.exportInfo) { 1573 bool weakDef = (exp.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION); 1574 // StringRefs from export iterator are ephemeral, so force copy. 1575 file->addExportedSymbol(exp.name, weakDef, true); 1576 } 1577 } else { 1578 for (auto &sym : normalizedFile.globalSymbols) { 1579 assert((sym.scope & N_EXT) && "only expect external symbols here"); 1580 bool weakDef = (sym.desc & N_WEAK_DEF); 1581 file->addExportedSymbol(sym.name, weakDef, copyRefs); 1582 } 1583 } 1584 // Tell MachODylibFile object about all dylibs it re-exports. 1585 for (const DependentDylib &dep : normalizedFile.dependentDylibs) { 1586 if (dep.kind == llvm::MachO::LC_REEXPORT_DYLIB) 1587 file->addReExportedDylib(dep.path); 1588 } 1589 return llvm::Error::success(); 1590 } 1591 1592 void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType, 1593 StringRef &segmentName, 1594 StringRef §ionName, 1595 SectionType §ionType, 1596 SectionAttr §ionAttrs, 1597 bool &relocsToDefinedCanBeImplicit) { 1598 1599 for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ; 1600 p->atomType != DefinedAtom::typeUnknown; ++p) { 1601 if (p->atomType != atomType) 1602 continue; 1603 // Wild carded entries are ignored for reverse lookups. 1604 if (p->segmentName.empty() || p->sectionName.empty()) 1605 continue; 1606 segmentName = p->segmentName; 1607 sectionName = p->sectionName; 1608 sectionType = p->sectionType; 1609 sectionAttrs = 0; 1610 relocsToDefinedCanBeImplicit = false; 1611 if (atomType == DefinedAtom::typeCode) 1612 sectionAttrs = S_ATTR_PURE_INSTRUCTIONS; 1613 if (atomType == DefinedAtom::typeCFI) 1614 relocsToDefinedCanBeImplicit = true; 1615 return; 1616 } 1617 llvm_unreachable("content type not yet supported"); 1618 } 1619 1620 llvm::Expected<std::unique_ptr<lld::File>> 1621 normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path, 1622 bool copyRefs) { 1623 switch (normalizedFile.fileType) { 1624 case MH_DYLIB: 1625 case MH_DYLIB_STUB: 1626 return dylibToAtoms(normalizedFile, path, copyRefs); 1627 case MH_OBJECT: 1628 return objectToAtoms(normalizedFile, path, copyRefs); 1629 default: 1630 llvm_unreachable("unhandled MachO file type!"); 1631 } 1632 } 1633 1634 } // namespace normalized 1635 } // namespace mach_o 1636 } // namespace lld 1637