1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the BinaryContext class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/BinaryContext.h" 14 #include "bolt/Core/BinaryEmitter.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "bolt/Utils/NameResolver.h" 18 #include "bolt/Utils/Utils.h" 19 #include "llvm/ADT/Twine.h" 20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" 21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 23 #include "llvm/MC/MCAsmLayout.h" 24 #include "llvm/MC/MCAssembler.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 27 #include "llvm/MC/MCInstPrinter.h" 28 #include "llvm/MC/MCObjectStreamer.h" 29 #include "llvm/MC/MCObjectWriter.h" 30 #include "llvm/MC/MCRegisterInfo.h" 31 #include "llvm/MC/MCSectionELF.h" 32 #include "llvm/MC/MCStreamer.h" 33 #include "llvm/MC/MCSubtargetInfo.h" 34 #include "llvm/MC/MCSymbol.h" 35 #include "llvm/Support/CommandLine.h" 36 #include "llvm/Support/Error.h" 37 #include "llvm/Support/Regex.h" 38 #include <algorithm> 39 #include <functional> 40 #include <iterator> 41 #include <unordered_set> 42 43 using namespace llvm; 44 45 #undef DEBUG_TYPE 46 #define DEBUG_TYPE "bolt" 47 48 namespace opts { 49 50 cl::opt<bool> NoHugePages("no-huge-pages", 51 cl::desc("use regular size pages for code alignment"), 52 cl::Hidden, cl::cat(BoltCategory)); 53 54 static cl::opt<bool> 55 PrintDebugInfo("print-debug-info", 56 cl::desc("print debug info when printing functions"), 57 cl::Hidden, 58 cl::ZeroOrMore, 59 cl::cat(BoltCategory)); 60 61 cl::opt<bool> PrintRelocations( 62 "print-relocations", 63 cl::desc("print relocations when printing functions/objects"), cl::Hidden, 64 cl::cat(BoltCategory)); 65 66 static cl::opt<bool> 67 PrintMemData("print-mem-data", 68 cl::desc("print memory data annotations when printing functions"), 69 cl::Hidden, 70 cl::ZeroOrMore, 71 cl::cat(BoltCategory)); 72 73 } // namespace opts 74 75 namespace llvm { 76 namespace bolt { 77 78 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, 79 std::unique_ptr<DWARFContext> DwCtx, 80 std::unique_ptr<Triple> TheTriple, 81 const Target *TheTarget, std::string TripleName, 82 std::unique_ptr<MCCodeEmitter> MCE, 83 std::unique_ptr<MCObjectFileInfo> MOFI, 84 std::unique_ptr<const MCAsmInfo> AsmInfo, 85 std::unique_ptr<const MCInstrInfo> MII, 86 std::unique_ptr<const MCSubtargetInfo> STI, 87 std::unique_ptr<MCInstPrinter> InstPrinter, 88 std::unique_ptr<const MCInstrAnalysis> MIA, 89 std::unique_ptr<MCPlusBuilder> MIB, 90 std::unique_ptr<const MCRegisterInfo> MRI, 91 std::unique_ptr<MCDisassembler> DisAsm) 92 : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), 93 TheTriple(std::move(TheTriple)), TheTarget(TheTarget), 94 TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)), 95 AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)), 96 InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)), 97 MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) { 98 Relocation::Arch = this->TheTriple->getArch(); 99 RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86; 100 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; 101 } 102 103 BinaryContext::~BinaryContext() { 104 for (BinarySection *Section : Sections) 105 delete Section; 106 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) 107 delete InjectedFunction; 108 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) 109 delete JTI.second; 110 clearBinaryData(); 111 } 112 113 /// Create BinaryContext for a given architecture \p ArchName and 114 /// triple \p TripleName. 115 Expected<std::unique_ptr<BinaryContext>> 116 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, 117 std::unique_ptr<DWARFContext> DwCtx) { 118 StringRef ArchName = ""; 119 StringRef FeaturesStr = ""; 120 switch (File->getArch()) { 121 case llvm::Triple::x86_64: 122 ArchName = "x86-64"; 123 FeaturesStr = "+nopl"; 124 break; 125 case llvm::Triple::aarch64: 126 ArchName = "aarch64"; 127 FeaturesStr = "+fp-armv8,+neon,+crypto,+dotprod,+crc,+lse,+ras,+rdm," 128 "+fullfp16,+spe,+fuse-aes,+rcpc"; 129 break; 130 default: 131 return createStringError(std::errc::not_supported, 132 "BOLT-ERROR: Unrecognized machine in ELF file"); 133 } 134 135 auto TheTriple = std::make_unique<Triple>(File->makeTriple()); 136 const std::string TripleName = TheTriple->str(); 137 138 std::string Error; 139 const Target *TheTarget = 140 TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error); 141 if (!TheTarget) 142 return createStringError(make_error_code(std::errc::not_supported), 143 Twine("BOLT-ERROR: ", Error)); 144 145 std::unique_ptr<const MCRegisterInfo> MRI( 146 TheTarget->createMCRegInfo(TripleName)); 147 if (!MRI) 148 return createStringError( 149 make_error_code(std::errc::not_supported), 150 Twine("BOLT-ERROR: no register info for target ", TripleName)); 151 152 // Set up disassembler. 153 std::unique_ptr<MCAsmInfo> AsmInfo( 154 TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); 155 if (!AsmInfo) 156 return createStringError( 157 make_error_code(std::errc::not_supported), 158 Twine("BOLT-ERROR: no assembly info for target ", TripleName)); 159 // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump 160 // we want to emit such names as using @PLT without double quotes to convey 161 // variant kind to the assembler. BOLT doesn't rely on the linker so we can 162 // override the default AsmInfo behavior to emit names the way we want. 163 AsmInfo->setAllowAtInName(true); 164 165 std::unique_ptr<const MCSubtargetInfo> STI( 166 TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); 167 if (!STI) 168 return createStringError( 169 make_error_code(std::errc::not_supported), 170 Twine("BOLT-ERROR: no subtarget info for target ", TripleName)); 171 172 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 173 if (!MII) 174 return createStringError( 175 make_error_code(std::errc::not_supported), 176 Twine("BOLT-ERROR: no instruction info for target ", TripleName)); 177 178 std::unique_ptr<MCContext> Ctx( 179 new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get())); 180 std::unique_ptr<MCObjectFileInfo> MOFI( 181 TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); 182 Ctx->setObjectFileInfo(MOFI.get()); 183 // We do not support X86 Large code model. Change this in the future. 184 bool Large = false; 185 if (TheTriple->getArch() == llvm::Triple::aarch64) 186 Large = true; 187 unsigned LSDAEncoding = 188 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 189 unsigned TTypeEncoding = 190 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 191 if (IsPIC) { 192 LSDAEncoding = dwarf::DW_EH_PE_pcrel | 193 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 194 TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | 195 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 196 } 197 198 std::unique_ptr<MCDisassembler> DisAsm( 199 TheTarget->createMCDisassembler(*STI, *Ctx)); 200 201 if (!DisAsm) 202 return createStringError( 203 make_error_code(std::errc::not_supported), 204 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 205 206 std::unique_ptr<const MCInstrAnalysis> MIA( 207 TheTarget->createMCInstrAnalysis(MII.get())); 208 if (!MIA) 209 return createStringError( 210 make_error_code(std::errc::not_supported), 211 Twine("BOLT-ERROR: failed to create instruction analysis for target ", 212 TripleName)); 213 214 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 215 std::unique_ptr<MCInstPrinter> InstructionPrinter( 216 TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo, 217 *MII, *MRI)); 218 if (!InstructionPrinter) 219 return createStringError( 220 make_error_code(std::errc::not_supported), 221 Twine("BOLT-ERROR: no instruction printer for target ", TripleName)); 222 InstructionPrinter->setPrintImmHex(true); 223 224 std::unique_ptr<MCCodeEmitter> MCE( 225 TheTarget->createMCCodeEmitter(*MII, *Ctx)); 226 227 // Make sure we don't miss any output on core dumps. 228 outs().SetUnbuffered(); 229 errs().SetUnbuffered(); 230 dbgs().SetUnbuffered(); 231 232 auto BC = std::make_unique<BinaryContext>( 233 std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget, 234 std::string(TripleName), std::move(MCE), std::move(MOFI), 235 std::move(AsmInfo), std::move(MII), std::move(STI), 236 std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI), 237 std::move(DisAsm)); 238 239 BC->TTypeEncoding = TTypeEncoding; 240 BC->LSDAEncoding = LSDAEncoding; 241 242 BC->MAB = std::unique_ptr<MCAsmBackend>( 243 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 244 245 BC->setFilename(File->getFileName()); 246 247 BC->HasFixedLoadAddress = !IsPIC; 248 249 BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>( 250 BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx)); 251 252 if (!BC->SymbolicDisAsm) 253 return createStringError( 254 make_error_code(std::errc::not_supported), 255 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 256 257 return std::move(BC); 258 } 259 260 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { 261 if (opts::HotText && 262 (SymbolName == "__hot_start" || SymbolName == "__hot_end")) 263 return true; 264 265 if (opts::HotData && 266 (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end")) 267 return true; 268 269 if (SymbolName == "_end") 270 return true; 271 272 return false; 273 } 274 275 std::unique_ptr<MCObjectWriter> 276 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { 277 return MAB->createObjectWriter(OS); 278 } 279 280 bool BinaryContext::validateObjectNesting() const { 281 auto Itr = BinaryDataMap.begin(); 282 auto End = BinaryDataMap.end(); 283 bool Valid = true; 284 while (Itr != End) { 285 auto Next = std::next(Itr); 286 while (Next != End && 287 Itr->second->getSection() == Next->second->getSection() && 288 Itr->second->containsRange(Next->second->getAddress(), 289 Next->second->getSize())) { 290 if (Next->second->Parent != Itr->second) { 291 errs() << "BOLT-WARNING: object nesting incorrect for:\n" 292 << "BOLT-WARNING: " << *Itr->second << "\n" 293 << "BOLT-WARNING: " << *Next->second << "\n"; 294 Valid = false; 295 } 296 ++Next; 297 } 298 Itr = Next; 299 } 300 return Valid; 301 } 302 303 bool BinaryContext::validateHoles() const { 304 bool Valid = true; 305 for (BinarySection &Section : sections()) { 306 for (const Relocation &Rel : Section.relocations()) { 307 uint64_t RelAddr = Rel.Offset + Section.getAddress(); 308 const BinaryData *BD = getBinaryDataContainingAddress(RelAddr); 309 if (!BD) { 310 errs() << "BOLT-WARNING: no BinaryData found for relocation at address" 311 << " 0x" << Twine::utohexstr(RelAddr) << " in " 312 << Section.getName() << "\n"; 313 Valid = false; 314 } else if (!BD->getAtomicRoot()) { 315 errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at " 316 << "address 0x" << Twine::utohexstr(RelAddr) << " in " 317 << Section.getName() << "\n"; 318 Valid = false; 319 } 320 } 321 } 322 return Valid; 323 } 324 325 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { 326 const uint64_t Address = GAI->second->getAddress(); 327 const uint64_t Size = GAI->second->getSize(); 328 329 auto fixParents = [&](BinaryDataMapType::iterator Itr, 330 BinaryData *NewParent) { 331 BinaryData *OldParent = Itr->second->Parent; 332 Itr->second->Parent = NewParent; 333 ++Itr; 334 while (Itr != BinaryDataMap.end() && OldParent && 335 Itr->second->Parent == OldParent) { 336 Itr->second->Parent = NewParent; 337 ++Itr; 338 } 339 }; 340 341 // Check if the previous symbol contains the newly added symbol. 342 if (GAI != BinaryDataMap.begin()) { 343 BinaryData *Prev = std::prev(GAI)->second; 344 while (Prev) { 345 if (Prev->getSection() == GAI->second->getSection() && 346 Prev->containsRange(Address, Size)) { 347 fixParents(GAI, Prev); 348 } else { 349 fixParents(GAI, nullptr); 350 } 351 Prev = Prev->Parent; 352 } 353 } 354 355 // Check if the newly added symbol contains any subsequent symbols. 356 if (Size != 0) { 357 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; 358 auto Itr = std::next(GAI); 359 while ( 360 Itr != BinaryDataMap.end() && 361 BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) { 362 Itr->second->Parent = BD; 363 ++Itr; 364 } 365 } 366 } 367 368 iterator_range<BinaryContext::binary_data_iterator> 369 BinaryContext::getSubBinaryData(BinaryData *BD) { 370 auto Start = std::next(BinaryDataMap.find(BD->getAddress())); 371 auto End = Start; 372 while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second)) 373 ++End; 374 return make_range(Start, End); 375 } 376 377 std::pair<const MCSymbol *, uint64_t> 378 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, 379 bool IsPCRel) { 380 uint64_t Addend = 0; 381 382 if (isAArch64()) { 383 // Check if this is an access to a constant island and create bookkeeping 384 // to keep track of it and emit it later as part of this function. 385 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) 386 return std::make_pair(IslandSym, Addend); 387 388 // Detect custom code written in assembly that refers to arbitrary 389 // constant islands from other functions. Write this reference so we 390 // can pull this constant island and emit it as part of this function 391 // too. 392 auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); 393 if (IslandIter != AddressToConstantIslandMap.end()) { 394 if (MCSymbol *IslandSym = 395 IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) { 396 BF.createIslandDependency(IslandSym, IslandIter->second); 397 return std::make_pair(IslandSym, Addend); 398 } 399 } 400 } 401 402 // Note that the address does not necessarily have to reside inside 403 // a section, it could be an absolute address too. 404 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 405 if (Section && Section->isText()) { 406 if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) { 407 if (Address != BF.getAddress()) { 408 // The address could potentially escape. Mark it as another entry 409 // point into the function. 410 if (opts::Verbosity >= 1) { 411 outs() << "BOLT-INFO: potentially escaped address 0x" 412 << Twine::utohexstr(Address) << " in function " << BF << '\n'; 413 } 414 BF.HasInternalLabelReference = true; 415 return std::make_pair( 416 BF.addEntryPointAtOffset(Address - BF.getAddress()), Addend); 417 } 418 } else { 419 BF.InterproceduralReferences.insert(Address); 420 } 421 } 422 423 // With relocations, catch jump table references outside of the basic block 424 // containing the indirect jump. 425 if (HasRelocations) { 426 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); 427 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { 428 const MCSymbol *Symbol = 429 getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); 430 431 return std::make_pair(Symbol, Addend); 432 } 433 } 434 435 if (BinaryData *BD = getBinaryDataContainingAddress(Address)) 436 return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); 437 438 // TODO: use DWARF info to get size/alignment here? 439 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); 440 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); 441 return std::make_pair(TargetSymbol, Addend); 442 } 443 444 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, 445 BinaryFunction &BF) { 446 if (!isX86()) 447 return MemoryContentsType::UNKNOWN; 448 449 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 450 if (!Section) { 451 // No section - possibly an absolute address. Since we don't allow 452 // internal function addresses to escape the function scope - we 453 // consider it a tail call. 454 if (opts::Verbosity > 1) { 455 errs() << "BOLT-WARNING: no section for address 0x" 456 << Twine::utohexstr(Address) << " referenced from function " << BF 457 << '\n'; 458 } 459 return MemoryContentsType::UNKNOWN; 460 } 461 462 if (Section->isVirtual()) { 463 // The contents are filled at runtime. 464 return MemoryContentsType::UNKNOWN; 465 } 466 467 // No support for jump tables in code yet. 468 if (Section->isText()) 469 return MemoryContentsType::UNKNOWN; 470 471 // Start with checking for PIC jump table. We expect non-PIC jump tables 472 // to have high 32 bits set to 0. 473 if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) 474 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; 475 476 if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) 477 return MemoryContentsType::POSSIBLE_JUMP_TABLE; 478 479 return MemoryContentsType::UNKNOWN; 480 } 481 482 /// Check if <fragment restored name> == <parent restored name>.cold(.\d+)? 483 bool isPotentialFragmentByName(BinaryFunction &Fragment, 484 BinaryFunction &Parent) { 485 for (StringRef Name : Parent.getNames()) { 486 std::string NamePrefix = Regex::escape(NameResolver::restore(Name)); 487 std::string NameRegex = Twine(NamePrefix, "\\.cold(\\.[0-9]+)?").str(); 488 if (Fragment.hasRestoredNameRegex(NameRegex)) 489 return true; 490 } 491 return false; 492 } 493 494 bool BinaryContext::analyzeJumpTable(const uint64_t Address, 495 const JumpTable::JumpTableType Type, 496 BinaryFunction &BF, 497 const uint64_t NextJTAddress, 498 JumpTable::OffsetsType *Offsets) { 499 // Is one of the targets __builtin_unreachable? 500 bool HasUnreachable = false; 501 502 // Number of targets other than __builtin_unreachable. 503 uint64_t NumRealEntries = 0; 504 505 constexpr uint64_t INVALID_OFFSET = std::numeric_limits<uint64_t>::max(); 506 auto addOffset = [&](uint64_t Offset) { 507 if (Offsets) 508 Offsets->emplace_back(Offset); 509 }; 510 511 auto doesBelongToFunction = [&](const uint64_t Addr, 512 BinaryFunction *TargetBF) -> bool { 513 if (BF.containsAddress(Addr)) 514 return true; 515 // Nothing to do if we failed to identify the containing function. 516 if (!TargetBF) 517 return false; 518 // Case 1: check if BF is a fragment and TargetBF is its parent. 519 if (BF.isFragment()) { 520 // Parent function may or may not be already registered. 521 // Set parent link based on function name matching heuristic. 522 return registerFragment(BF, *TargetBF); 523 } 524 // Case 2: check if TargetBF is a fragment and BF is its parent. 525 return TargetBF->isFragment() && registerFragment(*TargetBF, BF); 526 }; 527 528 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 529 if (!Section) 530 return false; 531 532 // The upper bound is defined by containing object, section limits, and 533 // the next jump table in memory. 534 uint64_t UpperBound = Section->getEndAddress(); 535 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); 536 if (JumpTableBD && JumpTableBD->getSize()) { 537 assert(JumpTableBD->getEndAddress() <= UpperBound && 538 "data object cannot cross a section boundary"); 539 UpperBound = JumpTableBD->getEndAddress(); 540 } 541 if (NextJTAddress) 542 UpperBound = std::min(NextJTAddress, UpperBound); 543 544 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: analyzeJumpTable in " << BF.getPrintName() 545 << '\n'); 546 const uint64_t EntrySize = getJumpTableEntrySize(Type); 547 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; 548 EntryAddress += EntrySize) { 549 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) 550 << " -> "); 551 // Check if there's a proper relocation against the jump table entry. 552 if (HasRelocations) { 553 if (Type == JumpTable::JTT_PIC && 554 !DataPCRelocations.count(EntryAddress)) { 555 LLVM_DEBUG( 556 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); 557 break; 558 } 559 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { 560 LLVM_DEBUG( 561 dbgs() 562 << "FAIL: JTT_NORMAL table, no relocation for this address\n"); 563 break; 564 } 565 } 566 567 const uint64_t Value = 568 (Type == JumpTable::JTT_PIC) 569 ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) 570 : *getPointerAtAddress(EntryAddress); 571 572 // __builtin_unreachable() case. 573 if (Value == BF.getAddress() + BF.getSize()) { 574 addOffset(Value - BF.getAddress()); 575 HasUnreachable = true; 576 LLVM_DEBUG(dbgs() << "OK: __builtin_unreachable\n"); 577 continue; 578 } 579 580 // Function or one of its fragments. 581 BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); 582 583 // We assume that a jump table cannot have function start as an entry. 584 if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) { 585 LLVM_DEBUG({ 586 if (!BF.containsAddress(Value)) { 587 dbgs() << "FAIL: function doesn't contain this address\n"; 588 if (TargetBF) { 589 dbgs() << " ! function containing this address: " 590 << TargetBF->getPrintName() << '\n'; 591 if (TargetBF->isFragment()) 592 dbgs() << " ! is a fragment\n"; 593 for (BinaryFunction *TargetParent : TargetBF->ParentFragments) 594 dbgs() << " ! its parent is " 595 << (TargetParent ? TargetParent->getPrintName() : "(none)") 596 << '\n'; 597 } 598 } 599 if (Value == BF.getAddress()) 600 dbgs() << "FAIL: jump table cannot have function start as an entry\n"; 601 }); 602 break; 603 } 604 605 // Check there's an instruction at this offset. 606 if (TargetBF->getState() == BinaryFunction::State::Disassembled && 607 !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { 608 LLVM_DEBUG(dbgs() << "FAIL: no instruction at this offset\n"); 609 break; 610 } 611 612 ++NumRealEntries; 613 614 if (TargetBF == &BF) { 615 // Address inside the function. 616 addOffset(Value - TargetBF->getAddress()); 617 LLVM_DEBUG(dbgs() << "OK: real entry\n"); 618 } else { 619 // Address in split fragment. 620 BF.setHasSplitJumpTable(true); 621 // Add invalid offset for proper identification of jump table size. 622 addOffset(INVALID_OFFSET); 623 LLVM_DEBUG(dbgs() << "OK: address in split fragment " 624 << TargetBF->getPrintName() << '\n'); 625 } 626 } 627 628 // It's a jump table if the number of real entries is more than 1, or there's 629 // one real entry and "unreachable" targets. If there are only multiple 630 // "unreachable" targets, then it's not a jump table. 631 return NumRealEntries + HasUnreachable >= 2; 632 } 633 634 void BinaryContext::populateJumpTables() { 635 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() 636 << '\n'); 637 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; 638 ++JTI) { 639 JumpTable *JT = JTI->second; 640 BinaryFunction &BF = *JT->Parent; 641 642 if (!BF.isSimple()) 643 continue; 644 645 uint64_t NextJTAddress = 0; 646 auto NextJTI = std::next(JTI); 647 if (NextJTI != JTE) 648 NextJTAddress = NextJTI->second->getAddress(); 649 650 const bool Success = analyzeJumpTable(JT->getAddress(), JT->Type, BF, 651 NextJTAddress, &JT->OffsetEntries); 652 if (!Success) { 653 dbgs() << "failed to analyze jump table in function " << BF << '\n'; 654 JT->print(dbgs()); 655 if (NextJTI != JTE) { 656 dbgs() << "next jump table at 0x" 657 << Twine::utohexstr(NextJTI->second->getAddress()) 658 << " belongs to function " << *NextJTI->second->Parent << '\n'; 659 NextJTI->second->print(dbgs()); 660 } 661 llvm_unreachable("jump table heuristic failure"); 662 } 663 664 for (uint64_t EntryOffset : JT->OffsetEntries) { 665 if (EntryOffset == BF.getSize()) 666 BF.IgnoredBranches.emplace_back(EntryOffset, BF.getSize()); 667 else 668 BF.registerReferencedOffset(EntryOffset); 669 } 670 671 // In strict mode, erase PC-relative relocation record. Later we check that 672 // all such records are erased and thus have been accounted for. 673 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { 674 for (uint64_t Address = JT->getAddress(); 675 Address < JT->getAddress() + JT->getSize(); 676 Address += JT->EntrySize) { 677 DataPCRelocations.erase(DataPCRelocations.find(Address)); 678 } 679 } 680 681 // Mark to skip the function and all its fragments. 682 if (BF.hasSplitJumpTable()) 683 FragmentsToSkip.push_back(&BF); 684 } 685 686 if (opts::StrictMode && DataPCRelocations.size()) { 687 LLVM_DEBUG({ 688 dbgs() << DataPCRelocations.size() 689 << " unclaimed PC-relative relocations left in data:\n"; 690 for (uint64_t Reloc : DataPCRelocations) 691 dbgs() << Twine::utohexstr(Reloc) << '\n'; 692 }); 693 assert(0 && "unclaimed PC-relative relocations left in data\n"); 694 } 695 clearList(DataPCRelocations); 696 } 697 698 void BinaryContext::skipMarkedFragments() { 699 // Unique functions in the vector. 700 std::unordered_set<BinaryFunction *> UniqueFunctions(FragmentsToSkip.begin(), 701 FragmentsToSkip.end()); 702 // Copy the functions back to FragmentsToSkip. 703 FragmentsToSkip.assign(UniqueFunctions.begin(), UniqueFunctions.end()); 704 auto addToWorklist = [&](BinaryFunction *Function) -> void { 705 if (UniqueFunctions.count(Function)) 706 return; 707 FragmentsToSkip.push_back(Function); 708 UniqueFunctions.insert(Function); 709 }; 710 // Functions containing split jump tables need to be skipped with all 711 // fragments (transitively). 712 for (size_t I = 0; I != FragmentsToSkip.size(); I++) { 713 BinaryFunction *BF = FragmentsToSkip[I]; 714 assert(UniqueFunctions.count(BF) && 715 "internal error in traversing function fragments"); 716 if (opts::Verbosity >= 1) 717 errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; 718 BF->setSimple(false); 719 BF->setHasSplitJumpTable(true); 720 721 std::for_each(BF->Fragments.begin(), BF->Fragments.end(), addToWorklist); 722 std::for_each(BF->ParentFragments.begin(), BF->ParentFragments.end(), 723 addToWorklist); 724 } 725 if (!FragmentsToSkip.empty()) 726 errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() << " function" 727 << (FragmentsToSkip.size() == 1 ? "" : "s") 728 << " due to cold fragments\n"; 729 FragmentsToSkip.clear(); 730 } 731 732 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, 733 uint64_t Size, 734 uint16_t Alignment, 735 unsigned Flags) { 736 auto Itr = BinaryDataMap.find(Address); 737 if (Itr != BinaryDataMap.end()) { 738 assert(Itr->second->getSize() == Size || !Size); 739 return Itr->second->getSymbol(); 740 } 741 742 std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); 743 assert(!GlobalSymbols.count(Name) && "created name is not unique"); 744 return registerNameAtAddress(Name, Address, Size, Alignment, Flags); 745 } 746 747 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { 748 return Ctx->getOrCreateSymbol(Name); 749 } 750 751 BinaryFunction *BinaryContext::createBinaryFunction( 752 const std::string &Name, BinarySection &Section, uint64_t Address, 753 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { 754 auto Result = BinaryFunctions.emplace( 755 Address, BinaryFunction(Name, Section, Address, Size, *this)); 756 assert(Result.second == true && "unexpected duplicate function"); 757 BinaryFunction *BF = &Result.first->second; 758 registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, 759 Alignment); 760 setSymbolToFunctionMap(BF->getSymbol(), BF); 761 return BF; 762 } 763 764 const MCSymbol * 765 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, 766 JumpTable::JumpTableType Type) { 767 auto isFragmentOf = [](BinaryFunction *Fragment, BinaryFunction *Parent) { 768 return (Fragment->isFragment() && Fragment->isParentFragment(Parent)); 769 }; 770 771 if (JumpTable *JT = getJumpTableContainingAddress(Address)) { 772 assert(JT->Type == Type && "jump table types have to match"); 773 bool HasMultipleParents = isFragmentOf(JT->Parent, &Function) || 774 isFragmentOf(&Function, JT->Parent); 775 assert((JT->Parent == &Function || HasMultipleParents) && 776 "cannot re-use jump table of a different function"); 777 assert(Address == JT->getAddress() && "unexpected non-empty jump table"); 778 779 // Flush OffsetEntries with INVALID_OFFSET if multiple parents 780 // Duplicate the entry for the parent function for easy access 781 if (HasMultipleParents) { 782 if (opts::Verbosity > 2) { 783 outs() << "BOLT-WARNING: Multiple fragments access same jump table: " 784 << JT->Parent->getPrintName() << "; " << Function.getPrintName() 785 << "\n"; 786 } 787 constexpr uint64_t INVALID_OFFSET = std::numeric_limits<uint64_t>::max(); 788 for (unsigned I = 0; I < JT->OffsetEntries.size(); ++I) 789 JT->OffsetEntries[I] = INVALID_OFFSET; 790 Function.JumpTables.emplace(Address, JT); 791 } 792 return JT->getFirstLabel(); 793 } 794 795 // Re-use the existing symbol if possible. 796 MCSymbol *JTLabel = nullptr; 797 if (BinaryData *Object = getBinaryDataAtAddress(Address)) { 798 if (!isInternalSymbolName(Object->getSymbol()->getName())) 799 JTLabel = Object->getSymbol(); 800 } 801 802 const uint64_t EntrySize = getJumpTableEntrySize(Type); 803 if (!JTLabel) { 804 const std::string JumpTableName = generateJumpTableName(Function, Address); 805 JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize); 806 } 807 808 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() 809 << " in function " << Function << '\n'); 810 811 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, 812 JumpTable::LabelMapType{{0, JTLabel}}, Function, 813 *getSectionForAddress(Address)); 814 JumpTables.emplace(Address, JT); 815 816 // Duplicate the entry for the parent function for easy access. 817 Function.JumpTables.emplace(Address, JT); 818 819 return JTLabel; 820 } 821 822 std::pair<uint64_t, const MCSymbol *> 823 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, 824 const MCSymbol *OldLabel) { 825 auto L = scopeLock(); 826 unsigned Offset = 0; 827 bool Found = false; 828 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { 829 if (Elmt.second != OldLabel) 830 continue; 831 Offset = Elmt.first; 832 Found = true; 833 break; 834 } 835 assert(Found && "Label not found"); 836 (void)Found; 837 MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); 838 JumpTable *NewJT = 839 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, 840 JumpTable::LabelMapType{{Offset, NewLabel}}, Function, 841 *getSectionForAddress(JT->getAddress())); 842 NewJT->Entries = JT->Entries; 843 NewJT->Counts = JT->Counts; 844 uint64_t JumpTableID = ++DuplicatedJumpTables; 845 // Invert it to differentiate from regular jump tables whose IDs are their 846 // addresses in the input binary memory space 847 JumpTableID = ~JumpTableID; 848 JumpTables.emplace(JumpTableID, NewJT); 849 Function.JumpTables.emplace(JumpTableID, NewJT); 850 return std::make_pair(JumpTableID, NewLabel); 851 } 852 853 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, 854 uint64_t Address) { 855 size_t Id; 856 uint64_t Offset = 0; 857 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { 858 Offset = Address - JT->getAddress(); 859 auto Itr = JT->Labels.find(Offset); 860 if (Itr != JT->Labels.end()) 861 return std::string(Itr->second->getName()); 862 Id = JumpTableIds.at(JT->getAddress()); 863 } else { 864 Id = JumpTableIds[Address] = BF.JumpTables.size(); 865 } 866 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) + 867 (Offset ? ("." + std::to_string(Offset)) : "")); 868 } 869 870 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { 871 // FIXME: aarch64 support is missing. 872 if (!isX86()) 873 return true; 874 875 if (BF.getSize() == BF.getMaxSize()) 876 return true; 877 878 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); 879 assert(FunctionData && "cannot get function as data"); 880 881 uint64_t Offset = BF.getSize(); 882 MCInst Instr; 883 uint64_t InstrSize = 0; 884 uint64_t InstrAddress = BF.getAddress() + Offset; 885 using std::placeholders::_1; 886 887 // Skip instructions that satisfy the predicate condition. 888 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { 889 const uint64_t StartOffset = Offset; 890 for (; Offset < BF.getMaxSize(); 891 Offset += InstrSize, InstrAddress += InstrSize) { 892 if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset), 893 InstrAddress, nulls())) 894 break; 895 if (!Predicate(Instr)) 896 break; 897 } 898 899 return Offset - StartOffset; 900 }; 901 902 // Skip a sequence of zero bytes. 903 auto skipZeros = [&]() { 904 const uint64_t StartOffset = Offset; 905 for (; Offset < BF.getMaxSize(); ++Offset) 906 if ((*FunctionData)[Offset] != 0) 907 break; 908 909 return Offset - StartOffset; 910 }; 911 912 // Accept the whole padding area filled with breakpoints. 913 auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1); 914 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) 915 return true; 916 917 auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1); 918 919 // Some functions have a jump to the next function or to the padding area 920 // inserted after the body. 921 auto isSkipJump = [&](const MCInst &Instr) { 922 uint64_t TargetAddress = 0; 923 if (MIB->isUnconditionalBranch(Instr) && 924 MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) { 925 if (TargetAddress >= InstrAddress + InstrSize && 926 TargetAddress <= BF.getAddress() + BF.getMaxSize()) { 927 return true; 928 } 929 } 930 return false; 931 }; 932 933 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). 934 while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || 935 skipZeros()) 936 ; 937 938 if (Offset == BF.getMaxSize()) 939 return true; 940 941 if (opts::Verbosity >= 1) { 942 errs() << "BOLT-WARNING: bad padding at address 0x" 943 << Twine::utohexstr(BF.getAddress() + BF.getSize()) 944 << " starting at offset " << (Offset - BF.getSize()) 945 << " in function " << BF << '\n' 946 << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize()) 947 << '\n'; 948 } 949 950 return false; 951 } 952 953 void BinaryContext::adjustCodePadding() { 954 for (auto &BFI : BinaryFunctions) { 955 BinaryFunction &BF = BFI.second; 956 if (!shouldEmit(BF)) 957 continue; 958 959 if (!hasValidCodePadding(BF)) { 960 if (HasRelocations) { 961 if (opts::Verbosity >= 1) { 962 outs() << "BOLT-INFO: function " << BF 963 << " has invalid padding. Ignoring the function.\n"; 964 } 965 BF.setIgnored(); 966 } else { 967 BF.setMaxSize(BF.getSize()); 968 } 969 } 970 } 971 } 972 973 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, 974 uint64_t Size, 975 uint16_t Alignment, 976 unsigned Flags) { 977 // Register the name with MCContext. 978 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); 979 980 auto GAI = BinaryDataMap.find(Address); 981 BinaryData *BD; 982 if (GAI == BinaryDataMap.end()) { 983 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); 984 BinarySection &Section = 985 SectionOrErr ? SectionOrErr.get() : absoluteSection(); 986 BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, 987 Section, Flags); 988 GAI = BinaryDataMap.emplace(Address, BD).first; 989 GlobalSymbols[Name] = BD; 990 updateObjectNesting(GAI); 991 } else { 992 BD = GAI->second; 993 if (!BD->hasName(Name)) { 994 GlobalSymbols[Name] = BD; 995 BD->Symbols.push_back(Symbol); 996 } 997 } 998 999 return Symbol; 1000 } 1001 1002 const BinaryData * 1003 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { 1004 auto NI = BinaryDataMap.lower_bound(Address); 1005 auto End = BinaryDataMap.end(); 1006 if ((NI != End && Address == NI->first) || 1007 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { 1008 if (NI->second->containsAddress(Address)) 1009 return NI->second; 1010 1011 // If this is a sub-symbol, see if a parent data contains the address. 1012 const BinaryData *BD = NI->second->getParent(); 1013 while (BD) { 1014 if (BD->containsAddress(Address)) 1015 return BD; 1016 BD = BD->getParent(); 1017 } 1018 } 1019 return nullptr; 1020 } 1021 1022 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { 1023 auto NI = BinaryDataMap.find(Address); 1024 assert(NI != BinaryDataMap.end()); 1025 if (NI == BinaryDataMap.end()) 1026 return false; 1027 // TODO: it's possible that a jump table starts at the same address 1028 // as a larger blob of private data. When we set the size of the 1029 // jump table, it might be smaller than the total blob size. In this 1030 // case we just leave the original size since (currently) it won't really 1031 // affect anything. 1032 assert((!NI->second->Size || NI->second->Size == Size || 1033 (NI->second->isJumpTable() && NI->second->Size > Size)) && 1034 "can't change the size of a symbol that has already had its " 1035 "size set"); 1036 if (!NI->second->Size) { 1037 NI->second->Size = Size; 1038 updateObjectNesting(NI); 1039 return true; 1040 } 1041 return false; 1042 } 1043 1044 void BinaryContext::generateSymbolHashes() { 1045 auto isPadding = [](const BinaryData &BD) { 1046 StringRef Contents = BD.getSection().getContents(); 1047 StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize()); 1048 return (BD.getName().startswith("HOLEat") || 1049 SymData.find_first_not_of(0) == StringRef::npos); 1050 }; 1051 1052 uint64_t NumCollisions = 0; 1053 for (auto &Entry : BinaryDataMap) { 1054 BinaryData &BD = *Entry.second; 1055 StringRef Name = BD.getName(); 1056 1057 if (!isInternalSymbolName(Name)) 1058 continue; 1059 1060 // First check if a non-anonymous alias exists and move it to the front. 1061 if (BD.getSymbols().size() > 1) { 1062 auto Itr = std::find_if(BD.getSymbols().begin(), BD.getSymbols().end(), 1063 [&](const MCSymbol *Symbol) { 1064 return !isInternalSymbolName(Symbol->getName()); 1065 }); 1066 if (Itr != BD.getSymbols().end()) { 1067 size_t Idx = std::distance(BD.getSymbols().begin(), Itr); 1068 std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]); 1069 continue; 1070 } 1071 } 1072 1073 // We have to skip 0 size symbols since they will all collide. 1074 if (BD.getSize() == 0) { 1075 continue; 1076 } 1077 1078 const uint64_t Hash = BD.getSection().hash(BD); 1079 const size_t Idx = Name.find("0x"); 1080 std::string NewName = 1081 (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str(); 1082 if (getBinaryDataByName(NewName)) { 1083 // Ignore collisions for symbols that appear to be padding 1084 // (i.e. all zeros or a "hole") 1085 if (!isPadding(BD)) { 1086 if (opts::Verbosity) { 1087 errs() << "BOLT-WARNING: collision detected when hashing " << BD 1088 << " with new name (" << NewName << "), skipping.\n"; 1089 } 1090 ++NumCollisions; 1091 } 1092 continue; 1093 } 1094 BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName)); 1095 GlobalSymbols[NewName] = &BD; 1096 } 1097 if (NumCollisions) { 1098 errs() << "BOLT-WARNING: " << NumCollisions 1099 << " collisions detected while hashing binary objects"; 1100 if (!opts::Verbosity) 1101 errs() << ". Use -v=1 to see the list."; 1102 errs() << '\n'; 1103 } 1104 } 1105 1106 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, 1107 BinaryFunction &Function) const { 1108 if (!isPotentialFragmentByName(TargetFunction, Function)) 1109 return false; 1110 assert(TargetFunction.isFragment() && "TargetFunction must be a fragment"); 1111 if (TargetFunction.isParentFragment(&Function)) 1112 return true; 1113 TargetFunction.addParentFragment(Function); 1114 Function.addFragment(TargetFunction); 1115 if (!HasRelocations) { 1116 TargetFunction.setSimple(false); 1117 Function.setSimple(false); 1118 } 1119 if (opts::Verbosity >= 1) { 1120 outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of " 1121 << Function << '\n'; 1122 } 1123 return true; 1124 } 1125 1126 void BinaryContext::processInterproceduralReferences(BinaryFunction &Function) { 1127 for (uint64_t Address : Function.InterproceduralReferences) { 1128 if (!Address) 1129 continue; 1130 1131 BinaryFunction *TargetFunction = 1132 getBinaryFunctionContainingAddress(Address); 1133 if (&Function == TargetFunction) 1134 continue; 1135 1136 if (TargetFunction) { 1137 if (TargetFunction->IsFragment && 1138 !registerFragment(*TargetFunction, Function)) { 1139 errs() << "BOLT-WARNING: interprocedural reference between unrelated " 1140 "fragments: " 1141 << Function.getPrintName() << " and " 1142 << TargetFunction->getPrintName() << '\n'; 1143 } 1144 if (uint64_t Offset = Address - TargetFunction->getAddress()) 1145 TargetFunction->addEntryPointAtOffset(Offset); 1146 1147 continue; 1148 } 1149 1150 // Check if address falls in function padding space - this could be 1151 // unmarked data in code. In this case adjust the padding space size. 1152 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1153 assert(Section && "cannot get section for referenced address"); 1154 1155 if (!Section->isText()) 1156 continue; 1157 1158 // PLT requires special handling and could be ignored in this context. 1159 StringRef SectionName = Section->getName(); 1160 if (SectionName == ".plt" || SectionName == ".plt.got") 1161 continue; 1162 1163 if (opts::processAllFunctions()) { 1164 errs() << "BOLT-ERROR: cannot process binaries with unmarked " 1165 << "object in code at address 0x" << Twine::utohexstr(Address) 1166 << " belonging to section " << SectionName << " in current mode\n"; 1167 exit(1); 1168 } 1169 1170 TargetFunction = getBinaryFunctionContainingAddress(Address, 1171 /*CheckPastEnd=*/false, 1172 /*UseMaxSize=*/true); 1173 // We are not going to overwrite non-simple functions, but for simple 1174 // ones - adjust the padding size. 1175 if (TargetFunction && TargetFunction->isSimple()) { 1176 errs() << "BOLT-WARNING: function " << *TargetFunction 1177 << " has an object detected in a padding region at address 0x" 1178 << Twine::utohexstr(Address) << '\n'; 1179 TargetFunction->setMaxSize(TargetFunction->getSize()); 1180 } 1181 } 1182 1183 clearList(Function.InterproceduralReferences); 1184 } 1185 1186 void BinaryContext::postProcessSymbolTable() { 1187 fixBinaryDataHoles(); 1188 bool Valid = true; 1189 for (auto &Entry : BinaryDataMap) { 1190 BinaryData *BD = Entry.second; 1191 if ((BD->getName().startswith("SYMBOLat") || 1192 BD->getName().startswith("DATAat")) && 1193 !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && 1194 BD->getSection()) { 1195 errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n"; 1196 Valid = false; 1197 } 1198 } 1199 assert(Valid); 1200 (void)Valid; 1201 generateSymbolHashes(); 1202 } 1203 1204 void BinaryContext::foldFunction(BinaryFunction &ChildBF, 1205 BinaryFunction &ParentBF) { 1206 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && 1207 "cannot merge functions with multiple entry points"); 1208 1209 std::unique_lock<std::shared_timed_mutex> WriteCtxLock(CtxMutex, 1210 std::defer_lock); 1211 std::unique_lock<std::shared_timed_mutex> WriteSymbolMapLock( 1212 SymbolToFunctionMapMutex, std::defer_lock); 1213 1214 const StringRef ChildName = ChildBF.getOneName(); 1215 1216 // Move symbols over and update bookkeeping info. 1217 for (MCSymbol *Symbol : ChildBF.getSymbols()) { 1218 ParentBF.getSymbols().push_back(Symbol); 1219 WriteSymbolMapLock.lock(); 1220 SymbolToFunctionMap[Symbol] = &ParentBF; 1221 WriteSymbolMapLock.unlock(); 1222 // NB: there's no need to update BinaryDataMap and GlobalSymbols. 1223 } 1224 ChildBF.getSymbols().clear(); 1225 1226 // Move other names the child function is known under. 1227 std::move(ChildBF.Aliases.begin(), ChildBF.Aliases.end(), 1228 std::back_inserter(ParentBF.Aliases)); 1229 ChildBF.Aliases.clear(); 1230 1231 if (HasRelocations) { 1232 // Merge execution counts of ChildBF into those of ParentBF. 1233 // Without relocations, we cannot reliably merge profiles as both functions 1234 // continue to exist and either one can be executed. 1235 ChildBF.mergeProfileDataInto(ParentBF); 1236 1237 std::shared_lock<std::shared_timed_mutex> ReadBfsLock(BinaryFunctionsMutex, 1238 std::defer_lock); 1239 std::unique_lock<std::shared_timed_mutex> WriteBfsLock(BinaryFunctionsMutex, 1240 std::defer_lock); 1241 // Remove ChildBF from the global set of functions in relocs mode. 1242 ReadBfsLock.lock(); 1243 auto FI = BinaryFunctions.find(ChildBF.getAddress()); 1244 ReadBfsLock.unlock(); 1245 1246 assert(FI != BinaryFunctions.end() && "function not found"); 1247 assert(&ChildBF == &FI->second && "function mismatch"); 1248 1249 WriteBfsLock.lock(); 1250 ChildBF.clearDisasmState(); 1251 FI = BinaryFunctions.erase(FI); 1252 WriteBfsLock.unlock(); 1253 1254 } else { 1255 // In non-relocation mode we keep the function, but rename it. 1256 std::string NewName = "__ICF_" + ChildName.str(); 1257 1258 WriteCtxLock.lock(); 1259 ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName)); 1260 WriteCtxLock.unlock(); 1261 1262 ChildBF.setFolded(&ParentBF); 1263 } 1264 } 1265 1266 void BinaryContext::fixBinaryDataHoles() { 1267 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1268 1269 for (BinarySection &Section : allocatableSections()) { 1270 std::vector<std::pair<uint64_t, uint64_t>> Holes; 1271 1272 auto isNotHole = [&Section](const binary_data_iterator &Itr) { 1273 BinaryData *BD = Itr->second; 1274 bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && 1275 (BD->getName().startswith("SYMBOLat0x") || 1276 BD->getName().startswith("DATAat0x") || 1277 BD->getName().startswith("ANONYMOUS"))); 1278 return !isHole && BD->getSection() == Section && !BD->getParent(); 1279 }; 1280 1281 auto BDStart = BinaryDataMap.begin(); 1282 auto BDEnd = BinaryDataMap.end(); 1283 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); 1284 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); 1285 1286 uint64_t EndAddress = Section.getAddress(); 1287 1288 while (Itr != End) { 1289 if (Itr->second->getAddress() > EndAddress) { 1290 uint64_t Gap = Itr->second->getAddress() - EndAddress; 1291 Holes.emplace_back(EndAddress, Gap); 1292 } 1293 EndAddress = Itr->second->getEndAddress(); 1294 ++Itr; 1295 } 1296 1297 if (EndAddress < Section.getEndAddress()) 1298 Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress); 1299 1300 // If there is already a symbol at the start of the hole, grow that symbol 1301 // to cover the rest. Otherwise, create a new symbol to cover the hole. 1302 for (std::pair<uint64_t, uint64_t> &Hole : Holes) { 1303 BinaryData *BD = getBinaryDataAtAddress(Hole.first); 1304 if (BD) { 1305 // BD->getSection() can be != Section if there are sections that 1306 // overlap. In this case it is probably safe to just skip the holes 1307 // since the overlapping section will not(?) have any symbols in it. 1308 if (BD->getSection() == Section) 1309 setBinaryDataSize(Hole.first, Hole.second); 1310 } else { 1311 getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); 1312 } 1313 } 1314 } 1315 1316 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1317 assert(validateHoles() && "top level hole detected in object map"); 1318 } 1319 1320 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { 1321 const BinarySection *CurrentSection = nullptr; 1322 bool FirstSection = true; 1323 1324 for (auto &Entry : BinaryDataMap) { 1325 const BinaryData *BD = Entry.second; 1326 const BinarySection &Section = BD->getSection(); 1327 if (FirstSection || Section != *CurrentSection) { 1328 uint64_t Address, Size; 1329 StringRef Name = Section.getName(); 1330 if (Section) { 1331 Address = Section.getAddress(); 1332 Size = Section.getSize(); 1333 } else { 1334 Address = BD->getAddress(); 1335 Size = BD->getSize(); 1336 } 1337 OS << "BOLT-INFO: Section " << Name << ", " 1338 << "0x" + Twine::utohexstr(Address) << ":" 1339 << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n"; 1340 CurrentSection = &Section; 1341 FirstSection = false; 1342 } 1343 1344 OS << "BOLT-INFO: "; 1345 const BinaryData *P = BD->getParent(); 1346 while (P) { 1347 OS << " "; 1348 P = P->getParent(); 1349 } 1350 OS << *BD << "\n"; 1351 } 1352 } 1353 1354 Expected<unsigned> BinaryContext::getDwarfFile( 1355 StringRef Directory, StringRef FileName, unsigned FileNumber, 1356 Optional<MD5::MD5Result> Checksum, Optional<StringRef> Source, 1357 unsigned CUID, unsigned DWARFVersion) { 1358 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; 1359 return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion, 1360 FileNumber); 1361 } 1362 1363 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, 1364 const uint32_t SrcCUID, 1365 unsigned FileIndex) { 1366 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); 1367 const DWARFDebugLine::LineTable *LineTable = 1368 DwCtx->getLineTableForUnit(SrcUnit); 1369 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1370 LineTable->Prologue.FileNames; 1371 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1372 // means empty dir. 1373 assert(FileIndex > 0 && FileIndex <= FileNames.size() && 1374 "FileIndex out of range for the compilation unit."); 1375 StringRef Dir = ""; 1376 if (FileNames[FileIndex - 1].DirIdx != 0) { 1377 if (Optional<const char *> DirName = dwarf::toString( 1378 LineTable->Prologue 1379 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { 1380 Dir = *DirName; 1381 } 1382 } 1383 StringRef FileName = ""; 1384 if (Optional<const char *> FName = 1385 dwarf::toString(FileNames[FileIndex - 1].Name)) 1386 FileName = *FName; 1387 assert(FileName != ""); 1388 DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID); 1389 return cantFail(getDwarfFile(Dir, FileName, 0, None, None, DestCUID, 1390 DstUnit->getVersion())); 1391 } 1392 1393 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { 1394 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); 1395 std::transform(BinaryFunctions.begin(), BinaryFunctions.end(), 1396 SortedFunctions.begin(), 1397 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 1398 return &BFI.second; 1399 }); 1400 1401 std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(), 1402 [](const BinaryFunction *A, const BinaryFunction *B) { 1403 if (A->hasValidIndex() && B->hasValidIndex()) { 1404 return A->getIndex() < B->getIndex(); 1405 } 1406 return A->hasValidIndex(); 1407 }); 1408 return SortedFunctions; 1409 } 1410 1411 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { 1412 std::vector<BinaryFunction *> AllFunctions; 1413 AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size()); 1414 std::transform(BinaryFunctions.begin(), BinaryFunctions.end(), 1415 std::back_inserter(AllFunctions), 1416 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 1417 return &BFI.second; 1418 }); 1419 std::copy(InjectedBinaryFunctions.begin(), InjectedBinaryFunctions.end(), 1420 std::back_inserter(AllFunctions)); 1421 1422 return AllFunctions; 1423 } 1424 1425 Optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { 1426 auto Iter = DWOCUs.find(DWOId); 1427 if (Iter == DWOCUs.end()) 1428 return None; 1429 1430 return Iter->second; 1431 } 1432 1433 DWARFContext *BinaryContext::getDWOContext() const { 1434 if (DWOCUs.empty()) 1435 return nullptr; 1436 return &DWOCUs.begin()->second->getContext(); 1437 } 1438 1439 /// Handles DWO sections that can either be in .o, .dwo or .dwp files. 1440 void BinaryContext::preprocessDWODebugInfo() { 1441 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1442 DWARFUnit *const DwarfUnit = CU.get(); 1443 if (llvm::Optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { 1444 DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit(); 1445 if (!DWOCU->isDWOUnit()) { 1446 std::string DWOName = dwarf::toString( 1447 DwarfUnit->getUnitDIE().find( 1448 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 1449 ""); 1450 outs() << "BOLT-WARNING: Debug Fission: DWO debug information for " 1451 << DWOName 1452 << " was not retrieved and won't be updated. Please check " 1453 "relative path.\n"; 1454 continue; 1455 } 1456 DWOCUs[*DWOId] = DWOCU; 1457 } 1458 } 1459 } 1460 1461 void BinaryContext::preprocessDebugInfo() { 1462 struct CURange { 1463 uint64_t LowPC; 1464 uint64_t HighPC; 1465 DWARFUnit *Unit; 1466 1467 bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } 1468 }; 1469 1470 // Building a map of address ranges to CUs similar to .debug_aranges and use 1471 // it to assign CU to functions. 1472 std::vector<CURange> AllRanges; 1473 AllRanges.reserve(DwCtx->getNumCompileUnits()); 1474 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1475 Expected<DWARFAddressRangesVector> RangesOrError = 1476 CU->getUnitDIE().getAddressRanges(); 1477 if (!RangesOrError) { 1478 consumeError(RangesOrError.takeError()); 1479 continue; 1480 } 1481 for (DWARFAddressRange &Range : *RangesOrError) { 1482 // Parts of the debug info could be invalidated due to corresponding code 1483 // being removed from the binary by the linker. Hence we check if the 1484 // address is a valid one. 1485 if (containsAddress(Range.LowPC)) 1486 AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()}); 1487 } 1488 1489 ContainsDwarf5 |= CU->getVersion() >= 5; 1490 ContainsDwarfLegacy |= CU->getVersion() < 5; 1491 } 1492 1493 if (ContainsDwarf5 && ContainsDwarfLegacy) 1494 llvm::errs() << "BOLT-WARNING: BOLT does not support mix mode binary with " 1495 "DWARF5 and DWARF{2,3,4}.\n"; 1496 1497 std::sort(AllRanges.begin(), AllRanges.end()); 1498 for (auto &KV : BinaryFunctions) { 1499 const uint64_t FunctionAddress = KV.first; 1500 BinaryFunction &Function = KV.second; 1501 1502 auto It = std::partition_point( 1503 AllRanges.begin(), AllRanges.end(), 1504 [=](CURange R) { return R.HighPC <= FunctionAddress; }); 1505 if (It != AllRanges.end() && It->LowPC <= FunctionAddress) { 1506 Function.setDWARFUnit(It->Unit); 1507 } 1508 } 1509 1510 // Discover units with debug info that needs to be updated. 1511 for (const auto &KV : BinaryFunctions) { 1512 const BinaryFunction &BF = KV.second; 1513 if (shouldEmit(BF) && BF.getDWARFUnit()) 1514 ProcessedCUs.insert(BF.getDWARFUnit()); 1515 } 1516 1517 // Clear debug info for functions from units that we are not going to process. 1518 for (auto &KV : BinaryFunctions) { 1519 BinaryFunction &BF = KV.second; 1520 if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) 1521 BF.setDWARFUnit(nullptr); 1522 } 1523 1524 if (opts::Verbosity >= 1) { 1525 outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " 1526 << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; 1527 } 1528 1529 preprocessDWODebugInfo(); 1530 1531 // Populate MCContext with DWARF files from all units. 1532 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); 1533 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1534 const uint64_t CUID = CU->getOffset(); 1535 DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID); 1536 BinaryLineTable.setLabel(Ctx->getOrCreateSymbol( 1537 GlobalPrefix + "line_table_start" + Twine(CUID))); 1538 1539 if (!ProcessedCUs.count(CU.get())) 1540 continue; 1541 1542 const DWARFDebugLine::LineTable *LineTable = 1543 DwCtx->getLineTableForUnit(CU.get()); 1544 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1545 LineTable->Prologue.FileNames; 1546 1547 uint16_t DwarfVersion = LineTable->Prologue.getVersion(); 1548 if (DwarfVersion >= 5) { 1549 Optional<MD5::MD5Result> Checksum = None; 1550 if (LineTable->Prologue.ContentTypes.HasMD5) 1551 Checksum = LineTable->Prologue.FileNames[0].Checksum; 1552 Optional<const char *> Name = 1553 dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1554 if (Optional<uint64_t> DWOID = CU->getDWOId()) { 1555 auto Iter = DWOCUs.find(*DWOID); 1556 assert(Iter != DWOCUs.end() && "DWO CU was not found."); 1557 Name = dwarf::toString( 1558 Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1559 } 1560 BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum, 1561 None); 1562 } 1563 1564 BinaryLineTable.setDwarfVersion(DwarfVersion); 1565 1566 // Assign a unique label to every line table, one per CU. 1567 // Make sure empty debug line tables are registered too. 1568 if (FileNames.empty()) { 1569 cantFail( 1570 getDwarfFile("", "<unknown>", 0, None, None, CUID, DwarfVersion)); 1571 continue; 1572 } 1573 const uint32_t Offset = DwarfVersion < 5 ? 1 : 0; 1574 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { 1575 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1576 // means empty dir. 1577 StringRef Dir = ""; 1578 if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5) 1579 if (Optional<const char *> DirName = dwarf::toString( 1580 LineTable->Prologue 1581 .IncludeDirectories[FileNames[I].DirIdx - Offset])) 1582 Dir = *DirName; 1583 StringRef FileName = ""; 1584 if (Optional<const char *> FName = dwarf::toString(FileNames[I].Name)) 1585 FileName = *FName; 1586 assert(FileName != ""); 1587 Optional<MD5::MD5Result> Checksum = None; 1588 if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5) 1589 Checksum = LineTable->Prologue.FileNames[I].Checksum; 1590 cantFail( 1591 getDwarfFile(Dir, FileName, 0, Checksum, None, CUID, DwarfVersion)); 1592 } 1593 } 1594 } 1595 1596 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { 1597 if (Function.isPseudo()) 1598 return false; 1599 1600 if (opts::processAllFunctions()) 1601 return true; 1602 1603 if (Function.isIgnored()) 1604 return false; 1605 1606 // In relocation mode we will emit non-simple functions with CFG. 1607 // If the function does not have a CFG it should be marked as ignored. 1608 return HasRelocations || Function.isSimple(); 1609 } 1610 1611 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { 1612 uint32_t Operation = Inst.getOperation(); 1613 switch (Operation) { 1614 case MCCFIInstruction::OpSameValue: 1615 OS << "OpSameValue Reg" << Inst.getRegister(); 1616 break; 1617 case MCCFIInstruction::OpRememberState: 1618 OS << "OpRememberState"; 1619 break; 1620 case MCCFIInstruction::OpRestoreState: 1621 OS << "OpRestoreState"; 1622 break; 1623 case MCCFIInstruction::OpOffset: 1624 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1625 break; 1626 case MCCFIInstruction::OpDefCfaRegister: 1627 OS << "OpDefCfaRegister Reg" << Inst.getRegister(); 1628 break; 1629 case MCCFIInstruction::OpDefCfaOffset: 1630 OS << "OpDefCfaOffset " << Inst.getOffset(); 1631 break; 1632 case MCCFIInstruction::OpDefCfa: 1633 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1634 break; 1635 case MCCFIInstruction::OpRelOffset: 1636 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1637 break; 1638 case MCCFIInstruction::OpAdjustCfaOffset: 1639 OS << "OfAdjustCfaOffset " << Inst.getOffset(); 1640 break; 1641 case MCCFIInstruction::OpEscape: 1642 OS << "OpEscape"; 1643 break; 1644 case MCCFIInstruction::OpRestore: 1645 OS << "OpRestore Reg" << Inst.getRegister(); 1646 break; 1647 case MCCFIInstruction::OpUndefined: 1648 OS << "OpUndefined Reg" << Inst.getRegister(); 1649 break; 1650 case MCCFIInstruction::OpRegister: 1651 OS << "OpRegister Reg" << Inst.getRegister() << " Reg" 1652 << Inst.getRegister2(); 1653 break; 1654 case MCCFIInstruction::OpWindowSave: 1655 OS << "OpWindowSave"; 1656 break; 1657 case MCCFIInstruction::OpGnuArgsSize: 1658 OS << "OpGnuArgsSize"; 1659 break; 1660 default: 1661 OS << "Op#" << Operation; 1662 break; 1663 } 1664 } 1665 1666 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const { 1667 // For aarch64, the ABI defines mapping symbols so we identify data in the 1668 // code section (see IHI0056B). $x identifies a symbol starting code or the 1669 // end of a data chunk inside code, $d indentifies start of data. 1670 if (!isAArch64() || ELFSymbolRef(Symbol).getSize()) 1671 return MarkerSymType::NONE; 1672 1673 Expected<StringRef> NameOrError = Symbol.getName(); 1674 Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType(); 1675 1676 if (!TypeOrError || !NameOrError) 1677 return MarkerSymType::NONE; 1678 1679 if (*TypeOrError != SymbolRef::ST_Unknown) 1680 return MarkerSymType::NONE; 1681 1682 if (*NameOrError == "$x" || NameOrError->startswith("$x.")) 1683 return MarkerSymType::CODE; 1684 1685 if (*NameOrError == "$d" || NameOrError->startswith("$d.")) 1686 return MarkerSymType::DATA; 1687 1688 return MarkerSymType::NONE; 1689 } 1690 1691 bool BinaryContext::isMarker(const SymbolRef &Symbol) const { 1692 return getMarkerType(Symbol) != MarkerSymType::NONE; 1693 } 1694 1695 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, 1696 const BinaryFunction *Function, 1697 DWARFContext *DwCtx) { 1698 DebugLineTableRowRef RowRef = 1699 DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); 1700 if (RowRef == DebugLineTableRowRef::NULL_ROW) 1701 return; 1702 1703 const DWARFDebugLine::LineTable *LineTable; 1704 if (Function && Function->getDWARFUnit() && 1705 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { 1706 LineTable = Function->getDWARFLineTable(); 1707 } else { 1708 LineTable = DwCtx->getLineTableForUnit( 1709 DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); 1710 } 1711 assert(LineTable && "line table expected for instruction with debug info"); 1712 1713 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; 1714 StringRef FileName = ""; 1715 if (Optional<const char *> FName = 1716 dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) 1717 FileName = *FName; 1718 OS << " # debug line " << FileName << ":" << Row.Line; 1719 if (Row.Column) 1720 OS << ":" << Row.Column; 1721 if (Row.Discriminator) 1722 OS << " discriminator:" << Row.Discriminator; 1723 } 1724 1725 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, 1726 uint64_t Offset, 1727 const BinaryFunction *Function, 1728 bool PrintMCInst, bool PrintMemData, 1729 bool PrintRelocations, 1730 StringRef Endl) const { 1731 if (MIB->isEHLabel(Instruction)) { 1732 OS << " EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << Endl; 1733 return; 1734 } 1735 OS << format(" %08" PRIx64 ": ", Offset); 1736 if (MIB->isCFI(Instruction)) { 1737 uint32_t Offset = Instruction.getOperand(0).getImm(); 1738 OS << "\t!CFI\t$" << Offset << "\t; "; 1739 if (Function) 1740 printCFI(OS, *Function->getCFIFor(Instruction)); 1741 OS << Endl; 1742 return; 1743 } 1744 InstPrinter->printInst(&Instruction, 0, "", *STI, OS); 1745 if (MIB->isCall(Instruction)) { 1746 if (MIB->isTailCall(Instruction)) 1747 OS << " # TAILCALL "; 1748 if (MIB->isInvoke(Instruction)) { 1749 const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instruction); 1750 OS << " # handler: "; 1751 if (EHInfo->first) 1752 OS << *EHInfo->first; 1753 else 1754 OS << '0'; 1755 OS << "; action: " << EHInfo->second; 1756 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction); 1757 if (GnuArgsSize >= 0) 1758 OS << "; GNU_args_size = " << GnuArgsSize; 1759 } 1760 } else if (MIB->isIndirectBranch(Instruction)) { 1761 if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) { 1762 OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); 1763 } else { 1764 OS << " # UNKNOWN CONTROL FLOW"; 1765 } 1766 } 1767 if (Optional<uint32_t> Offset = MIB->getOffset(Instruction)) 1768 OS << " # Offset: " << *Offset; 1769 1770 MIB->printAnnotations(Instruction, OS); 1771 1772 if (opts::PrintDebugInfo) 1773 printDebugInfo(OS, Instruction, Function, DwCtx.get()); 1774 1775 if ((opts::PrintRelocations || PrintRelocations) && Function) { 1776 const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1); 1777 Function->printRelocations(OS, Offset, Size); 1778 } 1779 1780 OS << Endl; 1781 1782 if (PrintMCInst) { 1783 Instruction.dump_pretty(OS, InstPrinter.get()); 1784 OS << Endl; 1785 } 1786 } 1787 1788 Optional<uint64_t> 1789 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, 1790 uint64_t FileOffset) const { 1791 // Find a segment with a matching file offset. 1792 for (auto &KV : SegmentMapInfo) { 1793 const SegmentInfo &SegInfo = KV.second; 1794 if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) { 1795 // Use segment's aligned memory offset to calculate the base address. 1796 const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment); 1797 return MMapAddress - MemOffset; 1798 } 1799 } 1800 1801 return NoneType(); 1802 } 1803 1804 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { 1805 auto SI = AddressToSection.upper_bound(Address); 1806 if (SI != AddressToSection.begin()) { 1807 --SI; 1808 uint64_t UpperBound = SI->first + SI->second->getSize(); 1809 if (!SI->second->getSize()) 1810 UpperBound += 1; 1811 if (UpperBound > Address) 1812 return *SI->second; 1813 } 1814 return std::make_error_code(std::errc::bad_address); 1815 } 1816 1817 ErrorOr<StringRef> 1818 BinaryContext::getSectionNameForAddress(uint64_t Address) const { 1819 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) 1820 return Section->getName(); 1821 return std::make_error_code(std::errc::bad_address); 1822 } 1823 1824 BinarySection &BinaryContext::registerSection(BinarySection *Section) { 1825 auto Res = Sections.insert(Section); 1826 (void)Res; 1827 assert(Res.second && "can't register the same section twice."); 1828 1829 // Only register allocatable sections in the AddressToSection map. 1830 if (Section->isAllocatable() && Section->getAddress()) 1831 AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); 1832 NameToSection.insert( 1833 std::make_pair(std::string(Section->getName()), Section)); 1834 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); 1835 return *Section; 1836 } 1837 1838 BinarySection &BinaryContext::registerSection(SectionRef Section) { 1839 return registerSection(new BinarySection(*this, Section)); 1840 } 1841 1842 BinarySection & 1843 BinaryContext::registerSection(StringRef SectionName, 1844 const BinarySection &OriginalSection) { 1845 return registerSection( 1846 new BinarySection(*this, SectionName, OriginalSection)); 1847 } 1848 1849 BinarySection & 1850 BinaryContext::registerOrUpdateSection(StringRef Name, unsigned ELFType, 1851 unsigned ELFFlags, uint8_t *Data, 1852 uint64_t Size, unsigned Alignment) { 1853 auto NamedSections = getSectionByName(Name); 1854 if (NamedSections.begin() != NamedSections.end()) { 1855 assert(std::next(NamedSections.begin()) == NamedSections.end() && 1856 "can only update unique sections"); 1857 BinarySection *Section = NamedSections.begin()->second; 1858 1859 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); 1860 const bool Flag = Section->isAllocatable(); 1861 (void)Flag; 1862 Section->update(Data, Size, Alignment, ELFType, ELFFlags); 1863 LLVM_DEBUG(dbgs() << *Section << "\n"); 1864 // FIXME: Fix section flags/attributes for MachO. 1865 if (isELF()) 1866 assert(Flag == Section->isAllocatable() && 1867 "can't change section allocation status"); 1868 return *Section; 1869 } 1870 1871 return registerSection( 1872 new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); 1873 } 1874 1875 bool BinaryContext::deregisterSection(BinarySection &Section) { 1876 BinarySection *SectionPtr = &Section; 1877 auto Itr = Sections.find(SectionPtr); 1878 if (Itr != Sections.end()) { 1879 auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); 1880 while (Range.first != Range.second) { 1881 if (Range.first->second == SectionPtr) { 1882 AddressToSection.erase(Range.first); 1883 break; 1884 } 1885 ++Range.first; 1886 } 1887 1888 auto NameRange = 1889 NameToSection.equal_range(std::string(SectionPtr->getName())); 1890 while (NameRange.first != NameRange.second) { 1891 if (NameRange.first->second == SectionPtr) { 1892 NameToSection.erase(NameRange.first); 1893 break; 1894 } 1895 ++NameRange.first; 1896 } 1897 1898 Sections.erase(Itr); 1899 delete SectionPtr; 1900 return true; 1901 } 1902 return false; 1903 } 1904 1905 void BinaryContext::printSections(raw_ostream &OS) const { 1906 for (BinarySection *const &Section : Sections) 1907 OS << "BOLT-INFO: " << *Section << "\n"; 1908 } 1909 1910 BinarySection &BinaryContext::absoluteSection() { 1911 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>")) 1912 return *Section; 1913 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u); 1914 } 1915 1916 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, 1917 size_t Size) const { 1918 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 1919 if (!Section) 1920 return std::make_error_code(std::errc::bad_address); 1921 1922 if (Section->isVirtual()) 1923 return 0; 1924 1925 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 1926 AsmInfo->getCodePointerSize()); 1927 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 1928 return DE.getUnsigned(&ValueOffset, Size); 1929 } 1930 1931 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, 1932 size_t Size) const { 1933 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 1934 if (!Section) 1935 return std::make_error_code(std::errc::bad_address); 1936 1937 if (Section->isVirtual()) 1938 return 0; 1939 1940 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 1941 AsmInfo->getCodePointerSize()); 1942 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 1943 return DE.getSigned(&ValueOffset, Size); 1944 } 1945 1946 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, 1947 uint64_t Type, uint64_t Addend, 1948 uint64_t Value) { 1949 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1950 assert(Section && "cannot find section for address"); 1951 Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend, 1952 Value); 1953 } 1954 1955 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, 1956 uint64_t Type, uint64_t Addend, 1957 uint64_t Value) { 1958 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1959 assert(Section && "cannot find section for address"); 1960 Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type, 1961 Addend, Value); 1962 } 1963 1964 bool BinaryContext::removeRelocationAt(uint64_t Address) { 1965 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1966 assert(Section && "cannot find section for address"); 1967 return Section->removeRelocationAt(Address - Section->getAddress()); 1968 } 1969 1970 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) { 1971 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1972 if (!Section) 1973 return nullptr; 1974 1975 return Section->getRelocationAt(Address - Section->getAddress()); 1976 } 1977 1978 const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) { 1979 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1980 if (!Section) 1981 return nullptr; 1982 1983 return Section->getDynamicRelocationAt(Address - Section->getAddress()); 1984 } 1985 1986 void BinaryContext::markAmbiguousRelocations(BinaryData &BD, 1987 const uint64_t Address) { 1988 auto setImmovable = [&](BinaryData &BD) { 1989 BinaryData *Root = BD.getAtomicRoot(); 1990 LLVM_DEBUG(if (Root->isMoveable()) { 1991 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " 1992 << "due to ambiguous relocation referencing 0x" 1993 << Twine::utohexstr(Address) << '\n'; 1994 }); 1995 Root->setIsMoveable(false); 1996 }; 1997 1998 if (Address == BD.getAddress()) { 1999 setImmovable(BD); 2000 2001 // Set previous symbol as immovable 2002 BinaryData *Prev = getBinaryDataContainingAddress(Address - 1); 2003 if (Prev && Prev->getEndAddress() == BD.getAddress()) 2004 setImmovable(*Prev); 2005 } 2006 2007 if (Address == BD.getEndAddress()) { 2008 setImmovable(BD); 2009 2010 // Set next symbol as immovable 2011 BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress()); 2012 if (Next && Next->getAddress() == BD.getEndAddress()) 2013 setImmovable(*Next); 2014 } 2015 } 2016 2017 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, 2018 uint64_t *EntryDesc) { 2019 std::shared_lock<std::shared_timed_mutex> Lock(SymbolToFunctionMapMutex); 2020 auto BFI = SymbolToFunctionMap.find(Symbol); 2021 if (BFI == SymbolToFunctionMap.end()) 2022 return nullptr; 2023 2024 BinaryFunction *BF = BFI->second; 2025 if (EntryDesc) 2026 *EntryDesc = BF->getEntryIDForSymbol(Symbol); 2027 2028 return BF; 2029 } 2030 2031 void BinaryContext::exitWithBugReport(StringRef Message, 2032 const BinaryFunction &Function) const { 2033 errs() << "=======================================\n"; 2034 errs() << "BOLT is unable to proceed because it couldn't properly understand " 2035 "this function.\n"; 2036 errs() << "If you are running the most recent version of BOLT, you may " 2037 "want to " 2038 "report this and paste this dump.\nPlease check that there is no " 2039 "sensitive contents being shared in this dump.\n"; 2040 errs() << "\nOffending function: " << Function.getPrintName() << "\n\n"; 2041 ScopedPrinter SP(errs()); 2042 SP.printBinaryBlock("Function contents", *Function.getData()); 2043 errs() << "\n"; 2044 Function.dump(); 2045 errs() << "ERROR: " << Message; 2046 errs() << "\n=======================================\n"; 2047 exit(1); 2048 } 2049 2050 BinaryFunction * 2051 BinaryContext::createInjectedBinaryFunction(const std::string &Name, 2052 bool IsSimple) { 2053 InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); 2054 BinaryFunction *BF = InjectedBinaryFunctions.back(); 2055 setSymbolToFunctionMap(BF->getSymbol(), BF); 2056 BF->CurrentState = BinaryFunction::State::CFG; 2057 return BF; 2058 } 2059 2060 std::pair<size_t, size_t> 2061 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { 2062 // Adjust branch instruction to match the current layout. 2063 if (FixBranches) 2064 BF.fixBranches(); 2065 2066 // Create local MC context to isolate the effect of ephemeral code emission. 2067 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); 2068 MCContext *LocalCtx = MCEInstance.LocalCtx.get(); 2069 MCAsmBackend *MAB = 2070 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); 2071 2072 SmallString<256> Code; 2073 raw_svector_ostream VecOS(Code); 2074 2075 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS); 2076 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( 2077 *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), 2078 std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI, 2079 /*RelaxAll=*/false, 2080 /*IncrementalLinkerCompatible=*/false, 2081 /*DWARFMustBeAtTheEnd=*/false)); 2082 2083 Streamer->initSections(false, *STI); 2084 2085 MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); 2086 Section->setHasInstructions(true); 2087 2088 // Create symbols in the LocalCtx so that they get destroyed with it. 2089 MCSymbol *StartLabel = LocalCtx->createTempSymbol(); 2090 MCSymbol *EndLabel = LocalCtx->createTempSymbol(); 2091 MCSymbol *ColdStartLabel = LocalCtx->createTempSymbol(); 2092 MCSymbol *ColdEndLabel = LocalCtx->createTempSymbol(); 2093 2094 Streamer->switchSection(Section); 2095 Streamer->emitLabel(StartLabel); 2096 emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/false, 2097 /*EmitCodeOnly=*/true); 2098 Streamer->emitLabel(EndLabel); 2099 2100 if (BF.isSplit()) { 2101 MCSectionELF *ColdSection = 2102 LocalCtx->getELFSection(BF.getColdCodeSectionName(), ELF::SHT_PROGBITS, 2103 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); 2104 ColdSection->setHasInstructions(true); 2105 2106 Streamer->switchSection(ColdSection); 2107 Streamer->emitLabel(ColdStartLabel); 2108 emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/true, 2109 /*EmitCodeOnly=*/true); 2110 Streamer->emitLabel(ColdEndLabel); 2111 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private 2112 Streamer->emitBytes(StringRef("")); 2113 Streamer->switchSection(Section); 2114 } 2115 2116 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or 2117 // MCStreamer::Finish(), which does more than we want 2118 Streamer->emitBytes(StringRef("")); 2119 2120 MCAssembler &Assembler = 2121 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); 2122 MCAsmLayout Layout(Assembler); 2123 Assembler.layout(Layout); 2124 2125 const uint64_t HotSize = 2126 Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel); 2127 const uint64_t ColdSize = BF.isSplit() 2128 ? Layout.getSymbolOffset(*ColdEndLabel) - 2129 Layout.getSymbolOffset(*ColdStartLabel) 2130 : 0ULL; 2131 2132 // Clean-up the effect of the code emission. 2133 for (const MCSymbol &Symbol : Assembler.symbols()) { 2134 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); 2135 MutableSymbol->setUndefined(); 2136 MutableSymbol->setIsRegistered(false); 2137 } 2138 2139 return std::make_pair(HotSize, ColdSize); 2140 } 2141 2142 bool BinaryContext::validateEncoding(const MCInst &Inst, 2143 ArrayRef<uint8_t> InputEncoding) const { 2144 SmallString<256> Code; 2145 SmallVector<MCFixup, 4> Fixups; 2146 raw_svector_ostream VecOS(Code); 2147 2148 MCE->encodeInstruction(Inst, VecOS, Fixups, *STI); 2149 auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); 2150 if (InputEncoding != EncodedData) { 2151 if (opts::Verbosity > 1) { 2152 errs() << "BOLT-WARNING: mismatched encoding detected\n" 2153 << " input: " << InputEncoding << '\n' 2154 << " output: " << EncodedData << '\n'; 2155 } 2156 return false; 2157 } 2158 2159 return true; 2160 } 2161 2162 uint64_t BinaryContext::getHotThreshold() const { 2163 static uint64_t Threshold = 0; 2164 if (Threshold == 0) { 2165 Threshold = std::max( 2166 (uint64_t)opts::ExecutionCountThreshold, 2167 NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); 2168 } 2169 return Threshold; 2170 } 2171 2172 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( 2173 uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { 2174 auto FI = BinaryFunctions.upper_bound(Address); 2175 if (FI == BinaryFunctions.begin()) 2176 return nullptr; 2177 --FI; 2178 2179 const uint64_t UsedSize = 2180 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); 2181 2182 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) 2183 return nullptr; 2184 2185 return &FI->second; 2186 } 2187 2188 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { 2189 // First, try to find a function starting at the given address. If the 2190 // function was folded, this will get us the original folded function if it 2191 // wasn't removed from the list, e.g. in non-relocation mode. 2192 auto BFI = BinaryFunctions.find(Address); 2193 if (BFI != BinaryFunctions.end()) 2194 return &BFI->second; 2195 2196 // We might have folded the function matching the object at the given 2197 // address. In such case, we look for a function matching the symbol 2198 // registered at the original address. The new function (the one that the 2199 // original was folded into) will hold the symbol. 2200 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { 2201 uint64_t EntryID = 0; 2202 BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); 2203 if (BF && EntryID == 0) 2204 return BF; 2205 } 2206 return nullptr; 2207 } 2208 2209 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( 2210 const DWARFAddressRangesVector &InputRanges) const { 2211 DebugAddressRangesVector OutputRanges; 2212 2213 for (const DWARFAddressRange Range : InputRanges) { 2214 auto BFI = BinaryFunctions.lower_bound(Range.LowPC); 2215 while (BFI != BinaryFunctions.end()) { 2216 const BinaryFunction &Function = BFI->second; 2217 if (Function.getAddress() >= Range.HighPC) 2218 break; 2219 const DebugAddressRangesVector FunctionRanges = 2220 Function.getOutputAddressRanges(); 2221 std::move(std::begin(FunctionRanges), std::end(FunctionRanges), 2222 std::back_inserter(OutputRanges)); 2223 std::advance(BFI, 1); 2224 } 2225 } 2226 2227 return OutputRanges; 2228 } 2229 2230 } // namespace bolt 2231 } // namespace llvm 2232