1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the BinaryContext class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/BinaryContext.h" 14 #include "bolt/Core/BinaryEmitter.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "bolt/Utils/NameResolver.h" 18 #include "bolt/Utils/Utils.h" 19 #include "llvm/ADT/Twine.h" 20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" 21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 23 #include "llvm/MC/MCAsmLayout.h" 24 #include "llvm/MC/MCAssembler.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 27 #include "llvm/MC/MCInstPrinter.h" 28 #include "llvm/MC/MCObjectStreamer.h" 29 #include "llvm/MC/MCObjectWriter.h" 30 #include "llvm/MC/MCRegisterInfo.h" 31 #include "llvm/MC/MCSectionELF.h" 32 #include "llvm/MC/MCStreamer.h" 33 #include "llvm/MC/MCSubtargetInfo.h" 34 #include "llvm/MC/MCSymbol.h" 35 #include "llvm/Support/CommandLine.h" 36 #include "llvm/Support/Regex.h" 37 #include <algorithm> 38 #include <functional> 39 #include <iterator> 40 #include <unordered_set> 41 42 using namespace llvm; 43 44 #undef DEBUG_TYPE 45 #define DEBUG_TYPE "bolt" 46 47 namespace opts { 48 49 cl::opt<bool> 50 NoHugePages("no-huge-pages", 51 cl::desc("use regular size pages for code alignment"), 52 cl::ZeroOrMore, 53 cl::Hidden, 54 cl::cat(BoltCategory)); 55 56 static cl::opt<bool> 57 PrintDebugInfo("print-debug-info", 58 cl::desc("print debug info when printing functions"), 59 cl::Hidden, 60 cl::ZeroOrMore, 61 cl::cat(BoltCategory)); 62 63 cl::opt<bool> 64 PrintRelocations("print-relocations", 65 cl::desc("print relocations when printing functions/objects"), 66 cl::Hidden, 67 cl::ZeroOrMore, 68 cl::cat(BoltCategory)); 69 70 static cl::opt<bool> 71 PrintMemData("print-mem-data", 72 cl::desc("print memory data annotations when printing functions"), 73 cl::Hidden, 74 cl::ZeroOrMore, 75 cl::cat(BoltCategory)); 76 77 } // namespace opts 78 79 namespace llvm { 80 namespace bolt { 81 82 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, 83 std::unique_ptr<DWARFContext> DwCtx, 84 std::unique_ptr<Triple> TheTriple, 85 const Target *TheTarget, std::string TripleName, 86 std::unique_ptr<MCCodeEmitter> MCE, 87 std::unique_ptr<MCObjectFileInfo> MOFI, 88 std::unique_ptr<const MCAsmInfo> AsmInfo, 89 std::unique_ptr<const MCInstrInfo> MII, 90 std::unique_ptr<const MCSubtargetInfo> STI, 91 std::unique_ptr<MCInstPrinter> InstPrinter, 92 std::unique_ptr<const MCInstrAnalysis> MIA, 93 std::unique_ptr<MCPlusBuilder> MIB, 94 std::unique_ptr<const MCRegisterInfo> MRI, 95 std::unique_ptr<MCDisassembler> DisAsm) 96 : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), 97 TheTriple(std::move(TheTriple)), TheTarget(TheTarget), 98 TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)), 99 AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)), 100 InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)), 101 MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) { 102 Relocation::Arch = this->TheTriple->getArch(); 103 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; 104 } 105 106 BinaryContext::~BinaryContext() { 107 for (BinarySection *Section : Sections) 108 delete Section; 109 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) 110 delete InjectedFunction; 111 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) 112 delete JTI.second; 113 clearBinaryData(); 114 } 115 116 /// Create BinaryContext for a given architecture \p ArchName and 117 /// triple \p TripleName. 118 std::unique_ptr<BinaryContext> 119 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, 120 std::unique_ptr<DWARFContext> DwCtx) { 121 StringRef ArchName = ""; 122 StringRef FeaturesStr = ""; 123 switch (File->getArch()) { 124 case llvm::Triple::x86_64: 125 ArchName = "x86-64"; 126 FeaturesStr = "+nopl"; 127 break; 128 case llvm::Triple::aarch64: 129 ArchName = "aarch64"; 130 FeaturesStr = "+fp-armv8,+neon,+crypto,+dotprod,+crc,+lse,+ras,+rdm," 131 "+fullfp16,+spe,+fuse-aes,+rcpc"; 132 break; 133 default: 134 errs() << "BOLT-ERROR: Unrecognized machine in ELF file.\n"; 135 return nullptr; 136 } 137 138 auto TheTriple = std::make_unique<Triple>(File->makeTriple()); 139 const std::string TripleName = TheTriple->str(); 140 141 std::string Error; 142 const Target *TheTarget = 143 TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error); 144 if (!TheTarget) { 145 errs() << "BOLT-ERROR: " << Error; 146 return nullptr; 147 } 148 149 std::unique_ptr<const MCRegisterInfo> MRI( 150 TheTarget->createMCRegInfo(TripleName)); 151 if (!MRI) { 152 errs() << "BOLT-ERROR: no register info for target " << TripleName << "\n"; 153 return nullptr; 154 } 155 156 // Set up disassembler. 157 std::unique_ptr<const MCAsmInfo> AsmInfo( 158 TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); 159 if (!AsmInfo) { 160 errs() << "BOLT-ERROR: no assembly info for target " << TripleName << "\n"; 161 return nullptr; 162 } 163 164 std::unique_ptr<const MCSubtargetInfo> STI( 165 TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); 166 if (!STI) { 167 errs() << "BOLT-ERROR: no subtarget info for target " << TripleName << "\n"; 168 return nullptr; 169 } 170 171 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 172 if (!MII) { 173 errs() << "BOLT-ERROR: no instruction info for target " << TripleName 174 << "\n"; 175 return nullptr; 176 } 177 178 std::unique_ptr<MCContext> Ctx( 179 new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get())); 180 std::unique_ptr<MCObjectFileInfo> MOFI( 181 TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); 182 Ctx->setObjectFileInfo(MOFI.get()); 183 // We do not support X86 Large code model. Change this in the future. 184 bool Large = false; 185 if (TheTriple->getArch() == llvm::Triple::aarch64) 186 Large = true; 187 unsigned LSDAEncoding = 188 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 189 unsigned TTypeEncoding = 190 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 191 if (IsPIC) { 192 LSDAEncoding = dwarf::DW_EH_PE_pcrel | 193 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 194 TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | 195 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 196 } 197 198 std::unique_ptr<MCDisassembler> DisAsm( 199 TheTarget->createMCDisassembler(*STI, *Ctx)); 200 201 if (!DisAsm) { 202 errs() << "BOLT-ERROR: no disassembler for target " << TripleName << "\n"; 203 return nullptr; 204 } 205 206 std::unique_ptr<const MCInstrAnalysis> MIA( 207 TheTarget->createMCInstrAnalysis(MII.get())); 208 if (!MIA) { 209 errs() << "BOLT-ERROR: failed to create instruction analysis for target" 210 << TripleName << "\n"; 211 return nullptr; 212 } 213 214 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 215 std::unique_ptr<MCInstPrinter> InstructionPrinter( 216 TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo, 217 *MII, *MRI)); 218 if (!InstructionPrinter) { 219 errs() << "BOLT-ERROR: no instruction printer for target " << TripleName 220 << '\n'; 221 return nullptr; 222 } 223 InstructionPrinter->setPrintImmHex(true); 224 225 std::unique_ptr<MCCodeEmitter> MCE( 226 TheTarget->createMCCodeEmitter(*MII, *MRI, *Ctx)); 227 228 // Make sure we don't miss any output on core dumps. 229 outs().SetUnbuffered(); 230 errs().SetUnbuffered(); 231 dbgs().SetUnbuffered(); 232 233 auto BC = std::make_unique<BinaryContext>( 234 std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget, 235 std::string(TripleName), std::move(MCE), std::move(MOFI), 236 std::move(AsmInfo), std::move(MII), std::move(STI), 237 std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI), 238 std::move(DisAsm)); 239 240 BC->TTypeEncoding = TTypeEncoding; 241 BC->LSDAEncoding = LSDAEncoding; 242 243 BC->MAB = std::unique_ptr<MCAsmBackend>( 244 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 245 246 BC->setFilename(File->getFileName()); 247 248 BC->HasFixedLoadAddress = !IsPIC; 249 250 return BC; 251 } 252 253 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { 254 if (opts::HotText && 255 (SymbolName == "__hot_start" || SymbolName == "__hot_end")) 256 return true; 257 258 if (opts::HotData && 259 (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end")) 260 return true; 261 262 if (SymbolName == "_end") 263 return true; 264 265 return false; 266 } 267 268 std::unique_ptr<MCObjectWriter> 269 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { 270 return MAB->createObjectWriter(OS); 271 } 272 273 bool BinaryContext::validateObjectNesting() const { 274 auto Itr = BinaryDataMap.begin(); 275 auto End = BinaryDataMap.end(); 276 bool Valid = true; 277 while (Itr != End) { 278 auto Next = std::next(Itr); 279 while (Next != End && 280 Itr->second->getSection() == Next->second->getSection() && 281 Itr->second->containsRange(Next->second->getAddress(), 282 Next->second->getSize())) { 283 if (Next->second->Parent != Itr->second) { 284 errs() << "BOLT-WARNING: object nesting incorrect for:\n" 285 << "BOLT-WARNING: " << *Itr->second << "\n" 286 << "BOLT-WARNING: " << *Next->second << "\n"; 287 Valid = false; 288 } 289 ++Next; 290 } 291 Itr = Next; 292 } 293 return Valid; 294 } 295 296 bool BinaryContext::validateHoles() const { 297 bool Valid = true; 298 for (BinarySection &Section : sections()) { 299 for (const Relocation &Rel : Section.relocations()) { 300 uint64_t RelAddr = Rel.Offset + Section.getAddress(); 301 const BinaryData *BD = getBinaryDataContainingAddress(RelAddr); 302 if (!BD) { 303 errs() << "BOLT-WARNING: no BinaryData found for relocation at address" 304 << " 0x" << Twine::utohexstr(RelAddr) << " in " 305 << Section.getName() << "\n"; 306 Valid = false; 307 } else if (!BD->getAtomicRoot()) { 308 errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at " 309 << "address 0x" << Twine::utohexstr(RelAddr) << " in " 310 << Section.getName() << "\n"; 311 Valid = false; 312 } 313 } 314 } 315 return Valid; 316 } 317 318 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { 319 const uint64_t Address = GAI->second->getAddress(); 320 const uint64_t Size = GAI->second->getSize(); 321 322 auto fixParents = [&](BinaryDataMapType::iterator Itr, 323 BinaryData *NewParent) { 324 BinaryData *OldParent = Itr->second->Parent; 325 Itr->second->Parent = NewParent; 326 ++Itr; 327 while (Itr != BinaryDataMap.end() && OldParent && 328 Itr->second->Parent == OldParent) { 329 Itr->second->Parent = NewParent; 330 ++Itr; 331 } 332 }; 333 334 // Check if the previous symbol contains the newly added symbol. 335 if (GAI != BinaryDataMap.begin()) { 336 BinaryData *Prev = std::prev(GAI)->second; 337 while (Prev) { 338 if (Prev->getSection() == GAI->second->getSection() && 339 Prev->containsRange(Address, Size)) { 340 fixParents(GAI, Prev); 341 } else { 342 fixParents(GAI, nullptr); 343 } 344 Prev = Prev->Parent; 345 } 346 } 347 348 // Check if the newly added symbol contains any subsequent symbols. 349 if (Size != 0) { 350 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; 351 auto Itr = std::next(GAI); 352 while ( 353 Itr != BinaryDataMap.end() && 354 BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) { 355 Itr->second->Parent = BD; 356 ++Itr; 357 } 358 } 359 } 360 361 iterator_range<BinaryContext::binary_data_iterator> 362 BinaryContext::getSubBinaryData(BinaryData *BD) { 363 auto Start = std::next(BinaryDataMap.find(BD->getAddress())); 364 auto End = Start; 365 while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second)) 366 ++End; 367 return make_range(Start, End); 368 } 369 370 std::pair<const MCSymbol *, uint64_t> 371 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, 372 bool IsPCRel) { 373 uint64_t Addend = 0; 374 375 if (isAArch64()) { 376 // Check if this is an access to a constant island and create bookkeeping 377 // to keep track of it and emit it later as part of this function. 378 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) 379 return std::make_pair(IslandSym, Addend); 380 381 // Detect custom code written in assembly that refers to arbitrary 382 // constant islands from other functions. Write this reference so we 383 // can pull this constant island and emit it as part of this function 384 // too. 385 auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); 386 if (IslandIter != AddressToConstantIslandMap.end()) { 387 if (MCSymbol *IslandSym = 388 IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) { 389 BF.createIslandDependency(IslandSym, IslandIter->second); 390 return std::make_pair(IslandSym, Addend); 391 } 392 } 393 } 394 395 // Note that the address does not necessarily have to reside inside 396 // a section, it could be an absolute address too. 397 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 398 if (Section && Section->isText()) { 399 if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) { 400 if (Address != BF.getAddress()) { 401 // The address could potentially escape. Mark it as another entry 402 // point into the function. 403 if (opts::Verbosity >= 1) { 404 outs() << "BOLT-INFO: potentially escaped address 0x" 405 << Twine::utohexstr(Address) << " in function " << BF << '\n'; 406 } 407 BF.HasInternalLabelReference = true; 408 return std::make_pair( 409 BF.addEntryPointAtOffset(Address - BF.getAddress()), Addend); 410 } 411 } else { 412 BF.InterproceduralReferences.insert(Address); 413 } 414 } 415 416 // With relocations, catch jump table references outside of the basic block 417 // containing the indirect jump. 418 if (HasRelocations) { 419 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); 420 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { 421 const MCSymbol *Symbol = 422 getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); 423 424 return std::make_pair(Symbol, Addend); 425 } 426 } 427 428 if (BinaryData *BD = getBinaryDataContainingAddress(Address)) 429 return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); 430 431 // TODO: use DWARF info to get size/alignment here? 432 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); 433 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); 434 return std::make_pair(TargetSymbol, Addend); 435 } 436 437 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, 438 BinaryFunction &BF) { 439 if (!isX86()) 440 return MemoryContentsType::UNKNOWN; 441 442 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 443 if (!Section) { 444 // No section - possibly an absolute address. Since we don't allow 445 // internal function addresses to escape the function scope - we 446 // consider it a tail call. 447 if (opts::Verbosity > 1) { 448 errs() << "BOLT-WARNING: no section for address 0x" 449 << Twine::utohexstr(Address) << " referenced from function " << BF 450 << '\n'; 451 } 452 return MemoryContentsType::UNKNOWN; 453 } 454 455 if (Section->isVirtual()) { 456 // The contents are filled at runtime. 457 return MemoryContentsType::UNKNOWN; 458 } 459 460 // No support for jump tables in code yet. 461 if (Section->isText()) 462 return MemoryContentsType::UNKNOWN; 463 464 // Start with checking for PIC jump table. We expect non-PIC jump tables 465 // to have high 32 bits set to 0. 466 if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) 467 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; 468 469 if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) 470 return MemoryContentsType::POSSIBLE_JUMP_TABLE; 471 472 return MemoryContentsType::UNKNOWN; 473 } 474 475 /// Check if <fragment restored name> == <parent restored name>.cold(.\d+)? 476 bool isPotentialFragmentByName(BinaryFunction &Fragment, 477 BinaryFunction &Parent) { 478 for (StringRef Name : Parent.getNames()) { 479 std::string NamePrefix = Regex::escape(NameResolver::restore(Name)); 480 std::string NameRegex = Twine(NamePrefix, "\\.cold(\\.[0-9]+)?").str(); 481 if (Fragment.hasRestoredNameRegex(NameRegex)) 482 return true; 483 } 484 return false; 485 } 486 487 bool BinaryContext::analyzeJumpTable(const uint64_t Address, 488 const JumpTable::JumpTableType Type, 489 BinaryFunction &BF, 490 const uint64_t NextJTAddress, 491 JumpTable::OffsetsType *Offsets) { 492 // Is one of the targets __builtin_unreachable? 493 bool HasUnreachable = false; 494 495 // Number of targets other than __builtin_unreachable. 496 uint64_t NumRealEntries = 0; 497 498 constexpr uint64_t INVALID_OFFSET = std::numeric_limits<uint64_t>::max(); 499 auto addOffset = [&](uint64_t Offset) { 500 if (Offsets) 501 Offsets->emplace_back(Offset); 502 }; 503 504 auto doesBelongToFunction = [&](const uint64_t Addr, 505 BinaryFunction *TargetBF) -> bool { 506 if (BF.containsAddress(Addr)) 507 return true; 508 // Nothing to do if we failed to identify the containing function. 509 if (!TargetBF) 510 return false; 511 // Case 1: check if BF is a fragment and TargetBF is its parent. 512 if (BF.isFragment()) { 513 // Parent function may or may not be already registered. 514 // Set parent link based on function name matching heuristic. 515 return registerFragment(BF, *TargetBF); 516 } 517 // Case 2: check if TargetBF is a fragment and BF is its parent. 518 return TargetBF->isFragment() && registerFragment(*TargetBF, BF); 519 }; 520 521 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 522 if (!Section) 523 return false; 524 525 // The upper bound is defined by containing object, section limits, and 526 // the next jump table in memory. 527 uint64_t UpperBound = Section->getEndAddress(); 528 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); 529 if (JumpTableBD && JumpTableBD->getSize()) { 530 assert(JumpTableBD->getEndAddress() <= UpperBound && 531 "data object cannot cross a section boundary"); 532 UpperBound = JumpTableBD->getEndAddress(); 533 } 534 if (NextJTAddress) 535 UpperBound = std::min(NextJTAddress, UpperBound); 536 537 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: analyzeJumpTable in " << BF.getPrintName() 538 << '\n'); 539 const uint64_t EntrySize = getJumpTableEntrySize(Type); 540 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; 541 EntryAddress += EntrySize) { 542 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) 543 << " -> "); 544 // Check if there's a proper relocation against the jump table entry. 545 if (HasRelocations) { 546 if (Type == JumpTable::JTT_PIC && 547 !DataPCRelocations.count(EntryAddress)) { 548 LLVM_DEBUG( 549 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); 550 break; 551 } 552 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { 553 LLVM_DEBUG( 554 dbgs() 555 << "FAIL: JTT_NORMAL table, no relocation for this address\n"); 556 break; 557 } 558 } 559 560 const uint64_t Value = 561 (Type == JumpTable::JTT_PIC) 562 ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) 563 : *getPointerAtAddress(EntryAddress); 564 565 // __builtin_unreachable() case. 566 if (Value == BF.getAddress() + BF.getSize()) { 567 addOffset(Value - BF.getAddress()); 568 HasUnreachable = true; 569 LLVM_DEBUG(dbgs() << "OK: __builtin_unreachable\n"); 570 continue; 571 } 572 573 // Function or one of its fragments. 574 BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); 575 576 // We assume that a jump table cannot have function start as an entry. 577 if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) { 578 LLVM_DEBUG({ 579 if (!BF.containsAddress(Value)) { 580 dbgs() << "FAIL: function doesn't contain this address\n"; 581 if (TargetBF) { 582 dbgs() << " ! function containing this address: " 583 << TargetBF->getPrintName() << '\n'; 584 if (TargetBF->isFragment()) 585 dbgs() << " ! is a fragment\n"; 586 for (BinaryFunction *TargetParent : TargetBF->ParentFragments) 587 dbgs() << " ! its parent is " 588 << (TargetParent ? TargetParent->getPrintName() : "(none)") 589 << '\n'; 590 } 591 } 592 if (Value == BF.getAddress()) 593 dbgs() << "FAIL: jump table cannot have function start as an entry\n"; 594 }); 595 break; 596 } 597 598 // Check there's an instruction at this offset. 599 if (TargetBF->getState() == BinaryFunction::State::Disassembled && 600 !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { 601 LLVM_DEBUG(dbgs() << "FAIL: no instruction at this offset\n"); 602 break; 603 } 604 605 ++NumRealEntries; 606 607 if (TargetBF == &BF) { 608 // Address inside the function. 609 addOffset(Value - TargetBF->getAddress()); 610 LLVM_DEBUG(dbgs() << "OK: real entry\n"); 611 } else { 612 // Address in split fragment. 613 BF.setHasSplitJumpTable(true); 614 // Add invalid offset for proper identification of jump table size. 615 addOffset(INVALID_OFFSET); 616 LLVM_DEBUG(dbgs() << "OK: address in split fragment " 617 << TargetBF->getPrintName() << '\n'); 618 } 619 } 620 621 // It's a jump table if the number of real entries is more than 1, or there's 622 // one real entry and "unreachable" targets. If there are only multiple 623 // "unreachable" targets, then it's not a jump table. 624 return NumRealEntries + HasUnreachable >= 2; 625 } 626 627 void BinaryContext::populateJumpTables() { 628 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() 629 << '\n'); 630 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; 631 ++JTI) { 632 JumpTable *JT = JTI->second; 633 BinaryFunction &BF = *JT->Parent; 634 635 if (!BF.isSimple()) 636 continue; 637 638 uint64_t NextJTAddress = 0; 639 auto NextJTI = std::next(JTI); 640 if (NextJTI != JTE) 641 NextJTAddress = NextJTI->second->getAddress(); 642 643 const bool Success = analyzeJumpTable(JT->getAddress(), JT->Type, BF, 644 NextJTAddress, &JT->OffsetEntries); 645 if (!Success) { 646 dbgs() << "failed to analyze jump table in function " << BF << '\n'; 647 JT->print(dbgs()); 648 if (NextJTI != JTE) { 649 dbgs() << "next jump table at 0x" 650 << Twine::utohexstr(NextJTI->second->getAddress()) 651 << " belongs to function " << *NextJTI->second->Parent << '\n'; 652 NextJTI->second->print(dbgs()); 653 } 654 llvm_unreachable("jump table heuristic failure"); 655 } 656 657 for (uint64_t EntryOffset : JT->OffsetEntries) { 658 if (EntryOffset == BF.getSize()) 659 BF.IgnoredBranches.emplace_back(EntryOffset, BF.getSize()); 660 else 661 BF.registerReferencedOffset(EntryOffset); 662 } 663 664 // In strict mode, erase PC-relative relocation record. Later we check that 665 // all such records are erased and thus have been accounted for. 666 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { 667 for (uint64_t Address = JT->getAddress(); 668 Address < JT->getAddress() + JT->getSize(); 669 Address += JT->EntrySize) { 670 DataPCRelocations.erase(DataPCRelocations.find(Address)); 671 } 672 } 673 674 // Mark to skip the function and all its fragments. 675 if (BF.hasSplitJumpTable()) 676 FragmentsToSkip.push_back(&BF); 677 } 678 679 if (opts::StrictMode && DataPCRelocations.size()) { 680 LLVM_DEBUG({ 681 dbgs() << DataPCRelocations.size() 682 << " unclaimed PC-relative relocations left in data:\n"; 683 for (uint64_t Reloc : DataPCRelocations) 684 dbgs() << Twine::utohexstr(Reloc) << '\n'; 685 }); 686 assert(0 && "unclaimed PC-relative relocations left in data\n"); 687 } 688 clearList(DataPCRelocations); 689 } 690 691 void BinaryContext::skipMarkedFragments() { 692 // Unique functions in the vector. 693 std::unordered_set<BinaryFunction *> UniqueFunctions(FragmentsToSkip.begin(), 694 FragmentsToSkip.end()); 695 // Copy the functions back to FragmentsToSkip. 696 FragmentsToSkip.assign(UniqueFunctions.begin(), UniqueFunctions.end()); 697 auto addToWorklist = [&](BinaryFunction *Function) -> void { 698 if (UniqueFunctions.count(Function)) 699 return; 700 FragmentsToSkip.push_back(Function); 701 UniqueFunctions.insert(Function); 702 }; 703 // Functions containing split jump tables need to be skipped with all 704 // fragments (transitively). 705 for (size_t I = 0; I != FragmentsToSkip.size(); I++) { 706 BinaryFunction *BF = FragmentsToSkip[I]; 707 assert(UniqueFunctions.count(BF) && 708 "internal error in traversing function fragments"); 709 if (opts::Verbosity >= 1) 710 errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; 711 BF->setIgnored(); 712 std::for_each(BF->Fragments.begin(), BF->Fragments.end(), addToWorklist); 713 std::for_each(BF->ParentFragments.begin(), BF->ParentFragments.end(), 714 addToWorklist); 715 } 716 if (!FragmentsToSkip.empty()) 717 errs() << "BOLT-WARNING: ignored " << FragmentsToSkip.size() << " function" 718 << (FragmentsToSkip.size() == 1 ? "" : "s") 719 << " due to cold fragments\n"; 720 FragmentsToSkip.clear(); 721 } 722 723 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, 724 uint64_t Size, 725 uint16_t Alignment, 726 unsigned Flags) { 727 auto Itr = BinaryDataMap.find(Address); 728 if (Itr != BinaryDataMap.end()) { 729 assert(Itr->second->getSize() == Size || !Size); 730 return Itr->second->getSymbol(); 731 } 732 733 std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); 734 assert(!GlobalSymbols.count(Name) && "created name is not unique"); 735 return registerNameAtAddress(Name, Address, Size, Alignment, Flags); 736 } 737 738 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { 739 return Ctx->getOrCreateSymbol(Name); 740 } 741 742 BinaryFunction *BinaryContext::createBinaryFunction( 743 const std::string &Name, BinarySection &Section, uint64_t Address, 744 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { 745 auto Result = BinaryFunctions.emplace( 746 Address, BinaryFunction(Name, Section, Address, Size, *this)); 747 assert(Result.second == true && "unexpected duplicate function"); 748 BinaryFunction *BF = &Result.first->second; 749 registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, 750 Alignment); 751 setSymbolToFunctionMap(BF->getSymbol(), BF); 752 return BF; 753 } 754 755 const MCSymbol * 756 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, 757 JumpTable::JumpTableType Type) { 758 if (JumpTable *JT = getJumpTableContainingAddress(Address)) { 759 assert(JT->Type == Type && "jump table types have to match"); 760 assert(JT->Parent == &Function && 761 "cannot re-use jump table of a different function"); 762 assert(Address == JT->getAddress() && "unexpected non-empty jump table"); 763 764 return JT->getFirstLabel(); 765 } 766 767 // Re-use the existing symbol if possible. 768 MCSymbol *JTLabel = nullptr; 769 if (BinaryData *Object = getBinaryDataAtAddress(Address)) { 770 if (!isInternalSymbolName(Object->getSymbol()->getName())) 771 JTLabel = Object->getSymbol(); 772 } 773 774 const uint64_t EntrySize = getJumpTableEntrySize(Type); 775 if (!JTLabel) { 776 const std::string JumpTableName = generateJumpTableName(Function, Address); 777 JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize); 778 } 779 780 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() 781 << " in function " << Function << '\n'); 782 783 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, 784 JumpTable::LabelMapType{{0, JTLabel}}, Function, 785 *getSectionForAddress(Address)); 786 JumpTables.emplace(Address, JT); 787 788 // Duplicate the entry for the parent function for easy access. 789 Function.JumpTables.emplace(Address, JT); 790 791 return JTLabel; 792 } 793 794 std::pair<uint64_t, const MCSymbol *> 795 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, 796 const MCSymbol *OldLabel) { 797 auto L = scopeLock(); 798 unsigned Offset = 0; 799 bool Found = false; 800 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { 801 if (Elmt.second != OldLabel) 802 continue; 803 Offset = Elmt.first; 804 Found = true; 805 break; 806 } 807 assert(Found && "Label not found"); 808 MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); 809 JumpTable *NewJT = 810 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, 811 JumpTable::LabelMapType{{Offset, NewLabel}}, Function, 812 *getSectionForAddress(JT->getAddress())); 813 NewJT->Entries = JT->Entries; 814 NewJT->Counts = JT->Counts; 815 uint64_t JumpTableID = ++DuplicatedJumpTables; 816 // Invert it to differentiate from regular jump tables whose IDs are their 817 // addresses in the input binary memory space 818 JumpTableID = ~JumpTableID; 819 JumpTables.emplace(JumpTableID, NewJT); 820 Function.JumpTables.emplace(JumpTableID, NewJT); 821 return std::make_pair(JumpTableID, NewLabel); 822 } 823 824 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, 825 uint64_t Address) { 826 size_t Id; 827 uint64_t Offset = 0; 828 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { 829 Offset = Address - JT->getAddress(); 830 auto Itr = JT->Labels.find(Offset); 831 if (Itr != JT->Labels.end()) 832 return std::string(Itr->second->getName()); 833 Id = JumpTableIds.at(JT->getAddress()); 834 } else { 835 Id = JumpTableIds[Address] = BF.JumpTables.size(); 836 } 837 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) + 838 (Offset ? ("." + std::to_string(Offset)) : "")); 839 } 840 841 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { 842 // FIXME: aarch64 support is missing. 843 if (!isX86()) 844 return true; 845 846 if (BF.getSize() == BF.getMaxSize()) 847 return true; 848 849 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); 850 assert(FunctionData && "cannot get function as data"); 851 852 uint64_t Offset = BF.getSize(); 853 MCInst Instr; 854 uint64_t InstrSize = 0; 855 uint64_t InstrAddress = BF.getAddress() + Offset; 856 using std::placeholders::_1; 857 858 // Skip instructions that satisfy the predicate condition. 859 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { 860 const uint64_t StartOffset = Offset; 861 for (; Offset < BF.getMaxSize(); 862 Offset += InstrSize, InstrAddress += InstrSize) { 863 if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset), 864 InstrAddress, nulls())) 865 break; 866 if (!Predicate(Instr)) 867 break; 868 } 869 870 return Offset - StartOffset; 871 }; 872 873 // Skip a sequence of zero bytes. 874 auto skipZeros = [&]() { 875 const uint64_t StartOffset = Offset; 876 for (; Offset < BF.getMaxSize(); ++Offset) 877 if ((*FunctionData)[Offset] != 0) 878 break; 879 880 return Offset - StartOffset; 881 }; 882 883 // Accept the whole padding area filled with breakpoints. 884 auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1); 885 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) 886 return true; 887 888 auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1); 889 890 // Some functions have a jump to the next function or to the padding area 891 // inserted after the body. 892 auto isSkipJump = [&](const MCInst &Instr) { 893 uint64_t TargetAddress = 0; 894 if (MIB->isUnconditionalBranch(Instr) && 895 MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) { 896 if (TargetAddress >= InstrAddress + InstrSize && 897 TargetAddress <= BF.getAddress() + BF.getMaxSize()) { 898 return true; 899 } 900 } 901 return false; 902 }; 903 904 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). 905 while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || 906 skipZeros()) 907 ; 908 909 if (Offset == BF.getMaxSize()) 910 return true; 911 912 if (opts::Verbosity >= 1) { 913 errs() << "BOLT-WARNING: bad padding at address 0x" 914 << Twine::utohexstr(BF.getAddress() + BF.getSize()) 915 << " starting at offset " << (Offset - BF.getSize()) 916 << " in function " << BF << '\n' 917 << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize()) 918 << '\n'; 919 } 920 921 return false; 922 } 923 924 void BinaryContext::adjustCodePadding() { 925 for (auto &BFI : BinaryFunctions) { 926 BinaryFunction &BF = BFI.second; 927 if (!shouldEmit(BF)) 928 continue; 929 930 if (!hasValidCodePadding(BF)) { 931 if (HasRelocations) { 932 if (opts::Verbosity >= 1) { 933 outs() << "BOLT-INFO: function " << BF 934 << " has invalid padding. Ignoring the function.\n"; 935 } 936 BF.setIgnored(); 937 } else { 938 BF.setMaxSize(BF.getSize()); 939 } 940 } 941 } 942 } 943 944 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, 945 uint64_t Size, 946 uint16_t Alignment, 947 unsigned Flags) { 948 // Register the name with MCContext. 949 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); 950 951 auto GAI = BinaryDataMap.find(Address); 952 BinaryData *BD; 953 if (GAI == BinaryDataMap.end()) { 954 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); 955 BinarySection &Section = 956 SectionOrErr ? SectionOrErr.get() : absoluteSection(); 957 BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, 958 Section, Flags); 959 GAI = BinaryDataMap.emplace(Address, BD).first; 960 GlobalSymbols[Name] = BD; 961 updateObjectNesting(GAI); 962 } else { 963 BD = GAI->second; 964 if (!BD->hasName(Name)) { 965 GlobalSymbols[Name] = BD; 966 BD->Symbols.push_back(Symbol); 967 } 968 } 969 970 return Symbol; 971 } 972 973 const BinaryData * 974 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { 975 auto NI = BinaryDataMap.lower_bound(Address); 976 auto End = BinaryDataMap.end(); 977 if ((NI != End && Address == NI->first) || 978 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { 979 if (NI->second->containsAddress(Address)) 980 return NI->second; 981 982 // If this is a sub-symbol, see if a parent data contains the address. 983 const BinaryData *BD = NI->second->getParent(); 984 while (BD) { 985 if (BD->containsAddress(Address)) 986 return BD; 987 BD = BD->getParent(); 988 } 989 } 990 return nullptr; 991 } 992 993 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { 994 auto NI = BinaryDataMap.find(Address); 995 assert(NI != BinaryDataMap.end()); 996 if (NI == BinaryDataMap.end()) 997 return false; 998 // TODO: it's possible that a jump table starts at the same address 999 // as a larger blob of private data. When we set the size of the 1000 // jump table, it might be smaller than the total blob size. In this 1001 // case we just leave the original size since (currently) it won't really 1002 // affect anything. 1003 assert((!NI->second->Size || NI->second->Size == Size || 1004 (NI->second->isJumpTable() && NI->second->Size > Size)) && 1005 "can't change the size of a symbol that has already had its " 1006 "size set"); 1007 if (!NI->second->Size) { 1008 NI->second->Size = Size; 1009 updateObjectNesting(NI); 1010 return true; 1011 } 1012 return false; 1013 } 1014 1015 void BinaryContext::generateSymbolHashes() { 1016 auto isPadding = [](const BinaryData &BD) { 1017 StringRef Contents = BD.getSection().getContents(); 1018 StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize()); 1019 return (BD.getName().startswith("HOLEat") || 1020 SymData.find_first_not_of(0) == StringRef::npos); 1021 }; 1022 1023 uint64_t NumCollisions = 0; 1024 for (auto &Entry : BinaryDataMap) { 1025 BinaryData &BD = *Entry.second; 1026 StringRef Name = BD.getName(); 1027 1028 if (!isInternalSymbolName(Name)) 1029 continue; 1030 1031 // First check if a non-anonymous alias exists and move it to the front. 1032 if (BD.getSymbols().size() > 1) { 1033 auto Itr = std::find_if(BD.getSymbols().begin(), BD.getSymbols().end(), 1034 [&](const MCSymbol *Symbol) { 1035 return !isInternalSymbolName(Symbol->getName()); 1036 }); 1037 if (Itr != BD.getSymbols().end()) { 1038 size_t Idx = std::distance(BD.getSymbols().begin(), Itr); 1039 std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]); 1040 continue; 1041 } 1042 } 1043 1044 // We have to skip 0 size symbols since they will all collide. 1045 if (BD.getSize() == 0) { 1046 continue; 1047 } 1048 1049 const uint64_t Hash = BD.getSection().hash(BD); 1050 const size_t Idx = Name.find("0x"); 1051 std::string NewName = 1052 (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str(); 1053 if (getBinaryDataByName(NewName)) { 1054 // Ignore collisions for symbols that appear to be padding 1055 // (i.e. all zeros or a "hole") 1056 if (!isPadding(BD)) { 1057 if (opts::Verbosity) { 1058 errs() << "BOLT-WARNING: collision detected when hashing " << BD 1059 << " with new name (" << NewName << "), skipping.\n"; 1060 } 1061 ++NumCollisions; 1062 } 1063 continue; 1064 } 1065 BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName)); 1066 GlobalSymbols[NewName] = &BD; 1067 } 1068 if (NumCollisions) { 1069 errs() << "BOLT-WARNING: " << NumCollisions 1070 << " collisions detected while hashing binary objects"; 1071 if (!opts::Verbosity) 1072 errs() << ". Use -v=1 to see the list."; 1073 errs() << '\n'; 1074 } 1075 } 1076 1077 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, 1078 BinaryFunction &Function) const { 1079 if (!isPotentialFragmentByName(TargetFunction, Function)) 1080 return false; 1081 assert(TargetFunction.isFragment() && "TargetFunction must be a fragment"); 1082 if (TargetFunction.isParentFragment(&Function)) 1083 return true; 1084 TargetFunction.addParentFragment(Function); 1085 Function.addFragment(TargetFunction); 1086 if (!HasRelocations) { 1087 TargetFunction.setSimple(false); 1088 Function.setSimple(false); 1089 } 1090 if (opts::Verbosity >= 1) { 1091 outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of " 1092 << Function << '\n'; 1093 } 1094 return true; 1095 } 1096 1097 void BinaryContext::processInterproceduralReferences(BinaryFunction &Function) { 1098 for (uint64_t Address : Function.InterproceduralReferences) { 1099 if (!Address) 1100 continue; 1101 1102 BinaryFunction *TargetFunction = 1103 getBinaryFunctionContainingAddress(Address); 1104 if (&Function == TargetFunction) 1105 continue; 1106 1107 if (TargetFunction) { 1108 if (TargetFunction->IsFragment && 1109 !registerFragment(*TargetFunction, Function)) { 1110 errs() << "BOLT-WARNING: interprocedural reference between unrelated " 1111 "fragments: " 1112 << Function.getPrintName() << " and " 1113 << TargetFunction->getPrintName() << '\n'; 1114 } 1115 if (uint64_t Offset = Address - TargetFunction->getAddress()) 1116 TargetFunction->addEntryPointAtOffset(Offset); 1117 1118 continue; 1119 } 1120 1121 // Check if address falls in function padding space - this could be 1122 // unmarked data in code. In this case adjust the padding space size. 1123 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1124 assert(Section && "cannot get section for referenced address"); 1125 1126 if (!Section->isText()) 1127 continue; 1128 1129 // PLT requires special handling and could be ignored in this context. 1130 StringRef SectionName = Section->getName(); 1131 if (SectionName == ".plt" || SectionName == ".plt.got") 1132 continue; 1133 1134 if (opts::processAllFunctions()) { 1135 errs() << "BOLT-ERROR: cannot process binaries with unmarked " 1136 << "object in code at address 0x" << Twine::utohexstr(Address) 1137 << " belonging to section " << SectionName << " in current mode\n"; 1138 exit(1); 1139 } 1140 1141 TargetFunction = getBinaryFunctionContainingAddress(Address, 1142 /*CheckPastEnd=*/false, 1143 /*UseMaxSize=*/true); 1144 // We are not going to overwrite non-simple functions, but for simple 1145 // ones - adjust the padding size. 1146 if (TargetFunction && TargetFunction->isSimple()) { 1147 errs() << "BOLT-WARNING: function " << *TargetFunction 1148 << " has an object detected in a padding region at address 0x" 1149 << Twine::utohexstr(Address) << '\n'; 1150 TargetFunction->setMaxSize(TargetFunction->getSize()); 1151 } 1152 } 1153 1154 clearList(Function.InterproceduralReferences); 1155 } 1156 1157 void BinaryContext::postProcessSymbolTable() { 1158 fixBinaryDataHoles(); 1159 bool Valid = true; 1160 for (auto &Entry : BinaryDataMap) { 1161 BinaryData *BD = Entry.second; 1162 if ((BD->getName().startswith("SYMBOLat") || 1163 BD->getName().startswith("DATAat")) && 1164 !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && 1165 BD->getSection()) { 1166 errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n"; 1167 Valid = false; 1168 } 1169 } 1170 assert(Valid); 1171 generateSymbolHashes(); 1172 } 1173 1174 void BinaryContext::foldFunction(BinaryFunction &ChildBF, 1175 BinaryFunction &ParentBF) { 1176 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && 1177 "cannot merge functions with multiple entry points"); 1178 1179 std::unique_lock<std::shared_timed_mutex> WriteCtxLock(CtxMutex, 1180 std::defer_lock); 1181 std::unique_lock<std::shared_timed_mutex> WriteSymbolMapLock( 1182 SymbolToFunctionMapMutex, std::defer_lock); 1183 1184 const StringRef ChildName = ChildBF.getOneName(); 1185 1186 // Move symbols over and update bookkeeping info. 1187 for (MCSymbol *Symbol : ChildBF.getSymbols()) { 1188 ParentBF.getSymbols().push_back(Symbol); 1189 WriteSymbolMapLock.lock(); 1190 SymbolToFunctionMap[Symbol] = &ParentBF; 1191 WriteSymbolMapLock.unlock(); 1192 // NB: there's no need to update BinaryDataMap and GlobalSymbols. 1193 } 1194 ChildBF.getSymbols().clear(); 1195 1196 // Move other names the child function is known under. 1197 std::move(ChildBF.Aliases.begin(), ChildBF.Aliases.end(), 1198 std::back_inserter(ParentBF.Aliases)); 1199 ChildBF.Aliases.clear(); 1200 1201 if (HasRelocations) { 1202 // Merge execution counts of ChildBF into those of ParentBF. 1203 // Without relocations, we cannot reliably merge profiles as both functions 1204 // continue to exist and either one can be executed. 1205 ChildBF.mergeProfileDataInto(ParentBF); 1206 1207 std::shared_lock<std::shared_timed_mutex> ReadBfsLock(BinaryFunctionsMutex, 1208 std::defer_lock); 1209 std::unique_lock<std::shared_timed_mutex> WriteBfsLock(BinaryFunctionsMutex, 1210 std::defer_lock); 1211 // Remove ChildBF from the global set of functions in relocs mode. 1212 ReadBfsLock.lock(); 1213 auto FI = BinaryFunctions.find(ChildBF.getAddress()); 1214 ReadBfsLock.unlock(); 1215 1216 assert(FI != BinaryFunctions.end() && "function not found"); 1217 assert(&ChildBF == &FI->second && "function mismatch"); 1218 1219 WriteBfsLock.lock(); 1220 ChildBF.clearDisasmState(); 1221 FI = BinaryFunctions.erase(FI); 1222 WriteBfsLock.unlock(); 1223 1224 } else { 1225 // In non-relocation mode we keep the function, but rename it. 1226 std::string NewName = "__ICF_" + ChildName.str(); 1227 1228 WriteCtxLock.lock(); 1229 ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName)); 1230 WriteCtxLock.unlock(); 1231 1232 ChildBF.setFolded(&ParentBF); 1233 } 1234 } 1235 1236 void BinaryContext::fixBinaryDataHoles() { 1237 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1238 1239 for (BinarySection &Section : allocatableSections()) { 1240 std::vector<std::pair<uint64_t, uint64_t>> Holes; 1241 1242 auto isNotHole = [&Section](const binary_data_iterator &Itr) { 1243 BinaryData *BD = Itr->second; 1244 bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && 1245 (BD->getName().startswith("SYMBOLat0x") || 1246 BD->getName().startswith("DATAat0x") || 1247 BD->getName().startswith("ANONYMOUS"))); 1248 return !isHole && BD->getSection() == Section && !BD->getParent(); 1249 }; 1250 1251 auto BDStart = BinaryDataMap.begin(); 1252 auto BDEnd = BinaryDataMap.end(); 1253 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); 1254 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); 1255 1256 uint64_t EndAddress = Section.getAddress(); 1257 1258 while (Itr != End) { 1259 if (Itr->second->getAddress() > EndAddress) { 1260 uint64_t Gap = Itr->second->getAddress() - EndAddress; 1261 Holes.emplace_back(EndAddress, Gap); 1262 } 1263 EndAddress = Itr->second->getEndAddress(); 1264 ++Itr; 1265 } 1266 1267 if (EndAddress < Section.getEndAddress()) 1268 Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress); 1269 1270 // If there is already a symbol at the start of the hole, grow that symbol 1271 // to cover the rest. Otherwise, create a new symbol to cover the hole. 1272 for (std::pair<uint64_t, uint64_t> &Hole : Holes) { 1273 BinaryData *BD = getBinaryDataAtAddress(Hole.first); 1274 if (BD) { 1275 // BD->getSection() can be != Section if there are sections that 1276 // overlap. In this case it is probably safe to just skip the holes 1277 // since the overlapping section will not(?) have any symbols in it. 1278 if (BD->getSection() == Section) 1279 setBinaryDataSize(Hole.first, Hole.second); 1280 } else { 1281 getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); 1282 } 1283 } 1284 } 1285 1286 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1287 assert(validateHoles() && "top level hole detected in object map"); 1288 } 1289 1290 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { 1291 const BinarySection *CurrentSection = nullptr; 1292 bool FirstSection = true; 1293 1294 for (auto &Entry : BinaryDataMap) { 1295 const BinaryData *BD = Entry.second; 1296 const BinarySection &Section = BD->getSection(); 1297 if (FirstSection || Section != *CurrentSection) { 1298 uint64_t Address, Size; 1299 StringRef Name = Section.getName(); 1300 if (Section) { 1301 Address = Section.getAddress(); 1302 Size = Section.getSize(); 1303 } else { 1304 Address = BD->getAddress(); 1305 Size = BD->getSize(); 1306 } 1307 OS << "BOLT-INFO: Section " << Name << ", " 1308 << "0x" + Twine::utohexstr(Address) << ":" 1309 << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n"; 1310 CurrentSection = &Section; 1311 FirstSection = false; 1312 } 1313 1314 OS << "BOLT-INFO: "; 1315 const BinaryData *P = BD->getParent(); 1316 while (P) { 1317 OS << " "; 1318 P = P->getParent(); 1319 } 1320 OS << *BD << "\n"; 1321 } 1322 } 1323 1324 Expected<unsigned> 1325 BinaryContext::getDwarfFile(StringRef Directory, StringRef FileName, 1326 unsigned FileNumber, 1327 Optional<MD5::MD5Result> Checksum, 1328 Optional<StringRef> Source, unsigned CUID) { 1329 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; 1330 return Table.tryGetFile(Directory, FileName, Checksum, Source, 1331 Ctx->getDwarfVersion(), FileNumber); 1332 } 1333 1334 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, 1335 const uint32_t SrcCUID, 1336 unsigned FileIndex) { 1337 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); 1338 const DWARFDebugLine::LineTable *LineTable = 1339 DwCtx->getLineTableForUnit(SrcUnit); 1340 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1341 LineTable->Prologue.FileNames; 1342 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1343 // means empty dir. 1344 assert(FileIndex > 0 && FileIndex <= FileNames.size() && 1345 "FileIndex out of range for the compilation unit."); 1346 StringRef Dir = ""; 1347 if (FileNames[FileIndex - 1].DirIdx != 0) { 1348 if (Optional<const char *> DirName = dwarf::toString( 1349 LineTable->Prologue 1350 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { 1351 Dir = *DirName; 1352 } 1353 } 1354 StringRef FileName = ""; 1355 if (Optional<const char *> FName = 1356 dwarf::toString(FileNames[FileIndex - 1].Name)) 1357 FileName = *FName; 1358 assert(FileName != ""); 1359 return cantFail(getDwarfFile(Dir, FileName, 0, None, None, DestCUID)); 1360 } 1361 1362 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { 1363 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); 1364 std::transform(BinaryFunctions.begin(), BinaryFunctions.end(), 1365 SortedFunctions.begin(), 1366 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 1367 return &BFI.second; 1368 }); 1369 1370 std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(), 1371 [](const BinaryFunction *A, const BinaryFunction *B) { 1372 if (A->hasValidIndex() && B->hasValidIndex()) { 1373 return A->getIndex() < B->getIndex(); 1374 } 1375 return A->hasValidIndex(); 1376 }); 1377 return SortedFunctions; 1378 } 1379 1380 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { 1381 std::vector<BinaryFunction *> AllFunctions; 1382 AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size()); 1383 std::transform(BinaryFunctions.begin(), BinaryFunctions.end(), 1384 std::back_inserter(AllFunctions), 1385 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 1386 return &BFI.second; 1387 }); 1388 std::copy(InjectedBinaryFunctions.begin(), InjectedBinaryFunctions.end(), 1389 std::back_inserter(AllFunctions)); 1390 1391 return AllFunctions; 1392 } 1393 1394 Optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { 1395 auto Iter = DWOCUs.find(DWOId); 1396 if (Iter == DWOCUs.end()) 1397 return None; 1398 1399 return Iter->second; 1400 } 1401 1402 DWARFContext *BinaryContext::getDWOContext() { 1403 if (DWOCUs.empty()) 1404 return nullptr; 1405 return &DWOCUs.begin()->second->getContext(); 1406 } 1407 1408 /// Handles DWO sections that can either be in .o, .dwo or .dwp files. 1409 void BinaryContext::preprocessDWODebugInfo() { 1410 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1411 DWARFUnit *const DwarfUnit = CU.get(); 1412 if (llvm::Optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { 1413 DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit(); 1414 if (!DWOCU->isDWOUnit()) { 1415 std::string DWOName = dwarf::toString( 1416 DwarfUnit->getUnitDIE().find( 1417 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 1418 ""); 1419 outs() << "BOLT-WARNING: Debug Fission: DWO debug information for " 1420 << DWOName 1421 << " was not retrieved and won't be updated. Please check " 1422 "relative path.\n"; 1423 continue; 1424 } 1425 DWOCUs[*DWOId] = DWOCU; 1426 } 1427 } 1428 } 1429 1430 void BinaryContext::preprocessDebugInfo() { 1431 struct CURange { 1432 uint64_t LowPC; 1433 uint64_t HighPC; 1434 DWARFUnit *Unit; 1435 1436 bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } 1437 }; 1438 1439 // Building a map of address ranges to CUs similar to .debug_aranges and use 1440 // it to assign CU to functions. 1441 std::vector<CURange> AllRanges; 1442 AllRanges.reserve(DwCtx->getNumCompileUnits()); 1443 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1444 Expected<DWARFAddressRangesVector> RangesOrError = 1445 CU->getUnitDIE().getAddressRanges(); 1446 if (!RangesOrError) { 1447 consumeError(RangesOrError.takeError()); 1448 continue; 1449 } 1450 for (DWARFAddressRange &Range : *RangesOrError) { 1451 // Parts of the debug info could be invalidated due to corresponding code 1452 // being removed from the binary by the linker. Hence we check if the 1453 // address is a valid one. 1454 if (containsAddress(Range.LowPC)) 1455 AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()}); 1456 } 1457 } 1458 1459 std::sort(AllRanges.begin(), AllRanges.end()); 1460 for (auto &KV : BinaryFunctions) { 1461 const uint64_t FunctionAddress = KV.first; 1462 BinaryFunction &Function = KV.second; 1463 1464 auto It = std::partition_point( 1465 AllRanges.begin(), AllRanges.end(), 1466 [=](CURange R) { return R.HighPC <= FunctionAddress; }); 1467 if (It != AllRanges.end() && It->LowPC <= FunctionAddress) { 1468 Function.setDWARFUnit(It->Unit); 1469 } 1470 } 1471 1472 // Discover units with debug info that needs to be updated. 1473 for (const auto &KV : BinaryFunctions) { 1474 const BinaryFunction &BF = KV.second; 1475 if (shouldEmit(BF) && BF.getDWARFUnit()) 1476 ProcessedCUs.insert(BF.getDWARFUnit()); 1477 } 1478 1479 // Clear debug info for functions from units that we are not going to process. 1480 for (auto &KV : BinaryFunctions) { 1481 BinaryFunction &BF = KV.second; 1482 if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) 1483 BF.setDWARFUnit(nullptr); 1484 } 1485 1486 if (opts::Verbosity >= 1) { 1487 outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " 1488 << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; 1489 } 1490 1491 // Populate MCContext with DWARF files from all units. 1492 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); 1493 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1494 const uint64_t CUID = CU->getOffset(); 1495 getDwarfLineTable(CUID).setLabel(Ctx->getOrCreateSymbol( 1496 GlobalPrefix + "line_table_start" + Twine(CUID))); 1497 1498 if (!ProcessedCUs.count(CU.get())) 1499 continue; 1500 1501 const DWARFDebugLine::LineTable *LineTable = 1502 DwCtx->getLineTableForUnit(CU.get()); 1503 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1504 LineTable->Prologue.FileNames; 1505 1506 // Assign a unique label to every line table, one per CU. 1507 // Make sure empty debug line tables are registered too. 1508 if (FileNames.empty()) { 1509 cantFail(getDwarfFile("", "<unknown>", 0, None, None, CUID)); 1510 continue; 1511 } 1512 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { 1513 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1514 // means empty dir. 1515 StringRef Dir = ""; 1516 if (FileNames[I].DirIdx != 0) 1517 if (Optional<const char *> DirName = dwarf::toString( 1518 LineTable->Prologue 1519 .IncludeDirectories[FileNames[I].DirIdx - 1])) 1520 Dir = *DirName; 1521 StringRef FileName = ""; 1522 if (Optional<const char *> FName = dwarf::toString(FileNames[I].Name)) 1523 FileName = *FName; 1524 assert(FileName != ""); 1525 cantFail(getDwarfFile(Dir, FileName, 0, None, None, CUID)); 1526 } 1527 } 1528 1529 preprocessDWODebugInfo(); 1530 } 1531 1532 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { 1533 if (opts::processAllFunctions()) 1534 return true; 1535 1536 if (Function.isIgnored()) 1537 return false; 1538 1539 // In relocation mode we will emit non-simple functions with CFG. 1540 // If the function does not have a CFG it should be marked as ignored. 1541 return HasRelocations || Function.isSimple(); 1542 } 1543 1544 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { 1545 uint32_t Operation = Inst.getOperation(); 1546 switch (Operation) { 1547 case MCCFIInstruction::OpSameValue: 1548 OS << "OpSameValue Reg" << Inst.getRegister(); 1549 break; 1550 case MCCFIInstruction::OpRememberState: 1551 OS << "OpRememberState"; 1552 break; 1553 case MCCFIInstruction::OpRestoreState: 1554 OS << "OpRestoreState"; 1555 break; 1556 case MCCFIInstruction::OpOffset: 1557 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1558 break; 1559 case MCCFIInstruction::OpDefCfaRegister: 1560 OS << "OpDefCfaRegister Reg" << Inst.getRegister(); 1561 break; 1562 case MCCFIInstruction::OpDefCfaOffset: 1563 OS << "OpDefCfaOffset " << Inst.getOffset(); 1564 break; 1565 case MCCFIInstruction::OpDefCfa: 1566 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1567 break; 1568 case MCCFIInstruction::OpRelOffset: 1569 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1570 break; 1571 case MCCFIInstruction::OpAdjustCfaOffset: 1572 OS << "OfAdjustCfaOffset " << Inst.getOffset(); 1573 break; 1574 case MCCFIInstruction::OpEscape: 1575 OS << "OpEscape"; 1576 break; 1577 case MCCFIInstruction::OpRestore: 1578 OS << "OpRestore Reg" << Inst.getRegister(); 1579 break; 1580 case MCCFIInstruction::OpUndefined: 1581 OS << "OpUndefined Reg" << Inst.getRegister(); 1582 break; 1583 case MCCFIInstruction::OpRegister: 1584 OS << "OpRegister Reg" << Inst.getRegister() << " Reg" 1585 << Inst.getRegister2(); 1586 break; 1587 case MCCFIInstruction::OpWindowSave: 1588 OS << "OpWindowSave"; 1589 break; 1590 case MCCFIInstruction::OpGnuArgsSize: 1591 OS << "OpGnuArgsSize"; 1592 break; 1593 default: 1594 OS << "Op#" << Operation; 1595 break; 1596 } 1597 } 1598 1599 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, 1600 uint64_t Offset, 1601 const BinaryFunction *Function, 1602 bool PrintMCInst, bool PrintMemData, 1603 bool PrintRelocations) const { 1604 if (MIB->isEHLabel(Instruction)) { 1605 OS << " EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << '\n'; 1606 return; 1607 } 1608 OS << format(" %08" PRIx64 ": ", Offset); 1609 if (MIB->isCFI(Instruction)) { 1610 uint32_t Offset = Instruction.getOperand(0).getImm(); 1611 OS << "\t!CFI\t$" << Offset << "\t; "; 1612 if (Function) 1613 printCFI(OS, *Function->getCFIFor(Instruction)); 1614 OS << "\n"; 1615 return; 1616 } 1617 InstPrinter->printInst(&Instruction, 0, "", *STI, OS); 1618 if (MIB->isCall(Instruction)) { 1619 if (MIB->isTailCall(Instruction)) 1620 OS << " # TAILCALL "; 1621 if (MIB->isInvoke(Instruction)) { 1622 const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instruction); 1623 OS << " # handler: "; 1624 if (EHInfo->first) 1625 OS << *EHInfo->first; 1626 else 1627 OS << '0'; 1628 OS << "; action: " << EHInfo->second; 1629 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction); 1630 if (GnuArgsSize >= 0) 1631 OS << "; GNU_args_size = " << GnuArgsSize; 1632 } 1633 } else if (MIB->isIndirectBranch(Instruction)) { 1634 if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) { 1635 OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); 1636 } else { 1637 OS << " # UNKNOWN CONTROL FLOW"; 1638 } 1639 } 1640 if (Optional<uint32_t> Offset = MIB->getOffset(Instruction)) 1641 OS << " # Offset: " << *Offset; 1642 1643 MIB->printAnnotations(Instruction, OS); 1644 1645 if (opts::PrintDebugInfo) { 1646 DebugLineTableRowRef RowRef = 1647 DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); 1648 if (RowRef != DebugLineTableRowRef::NULL_ROW) { 1649 const DWARFDebugLine::LineTable *LineTable; 1650 if (Function && Function->getDWARFUnit() && 1651 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { 1652 LineTable = Function->getDWARFLineTable(); 1653 } else { 1654 LineTable = DwCtx->getLineTableForUnit( 1655 DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); 1656 } 1657 assert(LineTable && 1658 "line table expected for instruction with debug info"); 1659 1660 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; 1661 StringRef FileName = ""; 1662 if (Optional<const char *> FName = 1663 dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) 1664 FileName = *FName; 1665 OS << " # debug line " << FileName << ":" << Row.Line; 1666 if (Row.Column) 1667 OS << ":" << Row.Column; 1668 if (Row.Discriminator) 1669 OS << " discriminator:" << Row.Discriminator; 1670 } 1671 } 1672 1673 if ((opts::PrintRelocations || PrintRelocations) && Function) { 1674 const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1); 1675 Function->printRelocations(OS, Offset, Size); 1676 } 1677 1678 OS << "\n"; 1679 1680 if (PrintMCInst) { 1681 Instruction.dump_pretty(OS, InstPrinter.get()); 1682 OS << "\n"; 1683 } 1684 } 1685 1686 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { 1687 auto SI = AddressToSection.upper_bound(Address); 1688 if (SI != AddressToSection.begin()) { 1689 --SI; 1690 uint64_t UpperBound = SI->first + SI->second->getSize(); 1691 if (!SI->second->getSize()) 1692 UpperBound += 1; 1693 if (UpperBound > Address) 1694 return *SI->second; 1695 } 1696 return std::make_error_code(std::errc::bad_address); 1697 } 1698 1699 ErrorOr<StringRef> 1700 BinaryContext::getSectionNameForAddress(uint64_t Address) const { 1701 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) 1702 return Section->getName(); 1703 return std::make_error_code(std::errc::bad_address); 1704 } 1705 1706 BinarySection &BinaryContext::registerSection(BinarySection *Section) { 1707 auto Res = Sections.insert(Section); 1708 (void)Res; 1709 assert(Res.second && "can't register the same section twice."); 1710 1711 // Only register allocatable sections in the AddressToSection map. 1712 if (Section->isAllocatable() && Section->getAddress()) 1713 AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); 1714 NameToSection.insert( 1715 std::make_pair(std::string(Section->getName()), Section)); 1716 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); 1717 return *Section; 1718 } 1719 1720 BinarySection &BinaryContext::registerSection(SectionRef Section) { 1721 return registerSection(new BinarySection(*this, Section)); 1722 } 1723 1724 BinarySection & 1725 BinaryContext::registerSection(StringRef SectionName, 1726 const BinarySection &OriginalSection) { 1727 return registerSection( 1728 new BinarySection(*this, SectionName, OriginalSection)); 1729 } 1730 1731 BinarySection & 1732 BinaryContext::registerOrUpdateSection(StringRef Name, unsigned ELFType, 1733 unsigned ELFFlags, uint8_t *Data, 1734 uint64_t Size, unsigned Alignment) { 1735 auto NamedSections = getSectionByName(Name); 1736 if (NamedSections.begin() != NamedSections.end()) { 1737 assert(std::next(NamedSections.begin()) == NamedSections.end() && 1738 "can only update unique sections"); 1739 BinarySection *Section = NamedSections.begin()->second; 1740 1741 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); 1742 const bool Flag = Section->isAllocatable(); 1743 (void)Flag; 1744 Section->update(Data, Size, Alignment, ELFType, ELFFlags); 1745 LLVM_DEBUG(dbgs() << *Section << "\n"); 1746 // FIXME: Fix section flags/attributes for MachO. 1747 if (isELF()) 1748 assert(Flag == Section->isAllocatable() && 1749 "can't change section allocation status"); 1750 return *Section; 1751 } 1752 1753 return registerSection( 1754 new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); 1755 } 1756 1757 bool BinaryContext::deregisterSection(BinarySection &Section) { 1758 BinarySection *SectionPtr = &Section; 1759 auto Itr = Sections.find(SectionPtr); 1760 if (Itr != Sections.end()) { 1761 auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); 1762 while (Range.first != Range.second) { 1763 if (Range.first->second == SectionPtr) { 1764 AddressToSection.erase(Range.first); 1765 break; 1766 } 1767 ++Range.first; 1768 } 1769 1770 auto NameRange = 1771 NameToSection.equal_range(std::string(SectionPtr->getName())); 1772 while (NameRange.first != NameRange.second) { 1773 if (NameRange.first->second == SectionPtr) { 1774 NameToSection.erase(NameRange.first); 1775 break; 1776 } 1777 ++NameRange.first; 1778 } 1779 1780 Sections.erase(Itr); 1781 delete SectionPtr; 1782 return true; 1783 } 1784 return false; 1785 } 1786 1787 void BinaryContext::printSections(raw_ostream &OS) const { 1788 for (BinarySection *const &Section : Sections) 1789 OS << "BOLT-INFO: " << *Section << "\n"; 1790 } 1791 1792 BinarySection &BinaryContext::absoluteSection() { 1793 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>")) 1794 return *Section; 1795 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u); 1796 } 1797 1798 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, 1799 size_t Size) const { 1800 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 1801 if (!Section) 1802 return std::make_error_code(std::errc::bad_address); 1803 1804 if (Section->isVirtual()) 1805 return 0; 1806 1807 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 1808 AsmInfo->getCodePointerSize()); 1809 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 1810 return DE.getUnsigned(&ValueOffset, Size); 1811 } 1812 1813 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, 1814 size_t Size) const { 1815 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 1816 if (!Section) 1817 return std::make_error_code(std::errc::bad_address); 1818 1819 if (Section->isVirtual()) 1820 return 0; 1821 1822 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 1823 AsmInfo->getCodePointerSize()); 1824 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 1825 return DE.getSigned(&ValueOffset, Size); 1826 } 1827 1828 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, 1829 uint64_t Type, uint64_t Addend, 1830 uint64_t Value) { 1831 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1832 assert(Section && "cannot find section for address"); 1833 Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend, 1834 Value); 1835 } 1836 1837 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, 1838 uint64_t Type, uint64_t Addend, 1839 uint64_t Value) { 1840 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1841 assert(Section && "cannot find section for address"); 1842 Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type, 1843 Addend, Value); 1844 } 1845 1846 bool BinaryContext::removeRelocationAt(uint64_t Address) { 1847 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1848 assert(Section && "cannot find section for address"); 1849 return Section->removeRelocationAt(Address - Section->getAddress()); 1850 } 1851 1852 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) { 1853 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1854 if (!Section) 1855 return nullptr; 1856 1857 return Section->getRelocationAt(Address - Section->getAddress()); 1858 } 1859 1860 const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) { 1861 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1862 if (!Section) 1863 return nullptr; 1864 1865 return Section->getDynamicRelocationAt(Address - Section->getAddress()); 1866 } 1867 1868 void BinaryContext::markAmbiguousRelocations(BinaryData &BD, 1869 const uint64_t Address) { 1870 auto setImmovable = [&](BinaryData &BD) { 1871 BinaryData *Root = BD.getAtomicRoot(); 1872 LLVM_DEBUG(if (Root->isMoveable()) { 1873 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " 1874 << "due to ambiguous relocation referencing 0x" 1875 << Twine::utohexstr(Address) << '\n'; 1876 }); 1877 Root->setIsMoveable(false); 1878 }; 1879 1880 if (Address == BD.getAddress()) { 1881 setImmovable(BD); 1882 1883 // Set previous symbol as immovable 1884 BinaryData *Prev = getBinaryDataContainingAddress(Address - 1); 1885 if (Prev && Prev->getEndAddress() == BD.getAddress()) 1886 setImmovable(*Prev); 1887 } 1888 1889 if (Address == BD.getEndAddress()) { 1890 setImmovable(BD); 1891 1892 // Set next symbol as immovable 1893 BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress()); 1894 if (Next && Next->getAddress() == BD.getEndAddress()) 1895 setImmovable(*Next); 1896 } 1897 } 1898 1899 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, 1900 uint64_t *EntryDesc) { 1901 std::shared_lock<std::shared_timed_mutex> Lock(SymbolToFunctionMapMutex); 1902 auto BFI = SymbolToFunctionMap.find(Symbol); 1903 if (BFI == SymbolToFunctionMap.end()) 1904 return nullptr; 1905 1906 BinaryFunction *BF = BFI->second; 1907 if (EntryDesc) 1908 *EntryDesc = BF->getEntryIDForSymbol(Symbol); 1909 1910 return BF; 1911 } 1912 1913 void BinaryContext::exitWithBugReport(StringRef Message, 1914 const BinaryFunction &Function) const { 1915 errs() << "=======================================\n"; 1916 errs() << "BOLT is unable to proceed because it couldn't properly understand " 1917 "this function.\n"; 1918 errs() << "If you are running the most recent version of BOLT, you may " 1919 "want to " 1920 "report this and paste this dump.\nPlease check that there is no " 1921 "sensitive contents being shared in this dump.\n"; 1922 errs() << "\nOffending function: " << Function.getPrintName() << "\n\n"; 1923 ScopedPrinter SP(errs()); 1924 SP.printBinaryBlock("Function contents", *Function.getData()); 1925 errs() << "\n"; 1926 Function.dump(); 1927 errs() << "ERROR: " << Message; 1928 errs() << "\n=======================================\n"; 1929 exit(1); 1930 } 1931 1932 BinaryFunction * 1933 BinaryContext::createInjectedBinaryFunction(const std::string &Name, 1934 bool IsSimple) { 1935 InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); 1936 BinaryFunction *BF = InjectedBinaryFunctions.back(); 1937 setSymbolToFunctionMap(BF->getSymbol(), BF); 1938 BF->CurrentState = BinaryFunction::State::CFG; 1939 return BF; 1940 } 1941 1942 std::pair<size_t, size_t> 1943 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { 1944 // Adjust branch instruction to match the current layout. 1945 if (FixBranches) 1946 BF.fixBranches(); 1947 1948 // Create local MC context to isolate the effect of ephemeral code emission. 1949 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); 1950 MCContext *LocalCtx = MCEInstance.LocalCtx.get(); 1951 MCAsmBackend *MAB = 1952 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); 1953 1954 SmallString<256> Code; 1955 raw_svector_ostream VecOS(Code); 1956 1957 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS); 1958 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( 1959 *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), 1960 std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI, 1961 /*RelaxAll=*/false, 1962 /*IncrementalLinkerCompatible=*/false, 1963 /*DWARFMustBeAtTheEnd=*/false)); 1964 1965 Streamer->initSections(false, *STI); 1966 1967 MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); 1968 Section->setHasInstructions(true); 1969 1970 // Create symbols in the LocalCtx so that they get destroyed with it. 1971 MCSymbol *StartLabel = LocalCtx->createTempSymbol(); 1972 MCSymbol *EndLabel = LocalCtx->createTempSymbol(); 1973 MCSymbol *ColdStartLabel = LocalCtx->createTempSymbol(); 1974 MCSymbol *ColdEndLabel = LocalCtx->createTempSymbol(); 1975 1976 Streamer->SwitchSection(Section); 1977 Streamer->emitLabel(StartLabel); 1978 emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/false, 1979 /*EmitCodeOnly=*/true); 1980 Streamer->emitLabel(EndLabel); 1981 1982 if (BF.isSplit()) { 1983 MCSectionELF *ColdSection = 1984 LocalCtx->getELFSection(BF.getColdCodeSectionName(), ELF::SHT_PROGBITS, 1985 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); 1986 ColdSection->setHasInstructions(true); 1987 1988 Streamer->SwitchSection(ColdSection); 1989 Streamer->emitLabel(ColdStartLabel); 1990 emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/true, 1991 /*EmitCodeOnly=*/true); 1992 Streamer->emitLabel(ColdEndLabel); 1993 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private 1994 Streamer->emitBytes(StringRef("")); 1995 Streamer->SwitchSection(Section); 1996 } 1997 1998 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or 1999 // MCStreamer::Finish(), which does more than we want 2000 Streamer->emitBytes(StringRef("")); 2001 2002 MCAssembler &Assembler = 2003 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); 2004 MCAsmLayout Layout(Assembler); 2005 Assembler.layout(Layout); 2006 2007 const uint64_t HotSize = 2008 Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel); 2009 const uint64_t ColdSize = BF.isSplit() 2010 ? Layout.getSymbolOffset(*ColdEndLabel) - 2011 Layout.getSymbolOffset(*ColdStartLabel) 2012 : 0ULL; 2013 2014 // Clean-up the effect of the code emission. 2015 for (const MCSymbol &Symbol : Assembler.symbols()) { 2016 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); 2017 MutableSymbol->setUndefined(); 2018 MutableSymbol->setIsRegistered(false); 2019 } 2020 2021 return std::make_pair(HotSize, ColdSize); 2022 } 2023 2024 bool BinaryContext::validateEncoding(const MCInst &Inst, 2025 ArrayRef<uint8_t> InputEncoding) const { 2026 SmallString<256> Code; 2027 SmallVector<MCFixup, 4> Fixups; 2028 raw_svector_ostream VecOS(Code); 2029 2030 MCE->encodeInstruction(Inst, VecOS, Fixups, *STI); 2031 auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); 2032 if (InputEncoding != EncodedData) { 2033 if (opts::Verbosity > 1) { 2034 errs() << "BOLT-WARNING: mismatched encoding detected\n" 2035 << " input: " << InputEncoding << '\n' 2036 << " output: " << EncodedData << '\n'; 2037 } 2038 return false; 2039 } 2040 2041 return true; 2042 } 2043 2044 uint64_t BinaryContext::getHotThreshold() const { 2045 static uint64_t Threshold = 0; 2046 if (Threshold == 0) { 2047 Threshold = std::max( 2048 (uint64_t)opts::ExecutionCountThreshold, 2049 NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); 2050 } 2051 return Threshold; 2052 } 2053 2054 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( 2055 uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { 2056 auto FI = BinaryFunctions.upper_bound(Address); 2057 if (FI == BinaryFunctions.begin()) 2058 return nullptr; 2059 --FI; 2060 2061 const uint64_t UsedSize = 2062 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); 2063 2064 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) 2065 return nullptr; 2066 2067 return &FI->second; 2068 } 2069 2070 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { 2071 // First, try to find a function starting at the given address. If the 2072 // function was folded, this will get us the original folded function if it 2073 // wasn't removed from the list, e.g. in non-relocation mode. 2074 auto BFI = BinaryFunctions.find(Address); 2075 if (BFI != BinaryFunctions.end()) 2076 return &BFI->second; 2077 2078 // We might have folded the function matching the object at the given 2079 // address. In such case, we look for a function matching the symbol 2080 // registered at the original address. The new function (the one that the 2081 // original was folded into) will hold the symbol. 2082 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { 2083 uint64_t EntryID = 0; 2084 BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); 2085 if (BF && EntryID == 0) 2086 return BF; 2087 } 2088 return nullptr; 2089 } 2090 2091 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( 2092 const DWARFAddressRangesVector &InputRanges) const { 2093 DebugAddressRangesVector OutputRanges; 2094 2095 for (const DWARFAddressRange Range : InputRanges) { 2096 auto BFI = BinaryFunctions.lower_bound(Range.LowPC); 2097 while (BFI != BinaryFunctions.end()) { 2098 const BinaryFunction &Function = BFI->second; 2099 if (Function.getAddress() >= Range.HighPC) 2100 break; 2101 const DebugAddressRangesVector FunctionRanges = 2102 Function.getOutputAddressRanges(); 2103 std::move(std::begin(FunctionRanges), std::end(FunctionRanges), 2104 std::back_inserter(OutputRanges)); 2105 std::advance(BFI, 1); 2106 } 2107 } 2108 2109 return OutputRanges; 2110 } 2111 2112 } // namespace bolt 2113 } // namespace llvm 2114