1 //===- bolt/Core/BinaryBasicBlock.cpp - Low-level basic block -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the BinaryBasicBlock class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/BinaryBasicBlock.h" 14 #include "bolt/Core/BinaryContext.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "llvm/ADT/SmallPtrSet.h" 17 #include "llvm/MC/MCAsmLayout.h" 18 #include "llvm/MC/MCInst.h" 19 #include "llvm/Support/Errc.h" 20 21 #define DEBUG_TYPE "bolt" 22 23 namespace llvm { 24 namespace bolt { 25 26 constexpr uint32_t BinaryBasicBlock::INVALID_OFFSET; 27 28 bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS) { 29 return LHS.Index < RHS.Index; 30 } 31 32 bool BinaryBasicBlock::hasCFG() const { return getParent()->hasCFG(); } 33 34 bool BinaryBasicBlock::isEntryPoint() const { 35 return getParent()->isEntryPoint(*this); 36 } 37 38 bool BinaryBasicBlock::hasInstructions() const { 39 return getParent()->hasInstructions(); 40 } 41 42 bool BinaryBasicBlock::hasJumpTable() const { 43 const MCInst *Inst = getLastNonPseudoInstr(); 44 const JumpTable *JT = Inst ? Function->getJumpTable(*Inst) : nullptr; 45 return (JT != nullptr); 46 } 47 48 void BinaryBasicBlock::adjustNumPseudos(const MCInst &Inst, int Sign) { 49 BinaryContext &BC = Function->getBinaryContext(); 50 if (BC.MIB->isPseudo(Inst)) 51 NumPseudos += Sign; 52 } 53 54 BinaryBasicBlock::iterator BinaryBasicBlock::getFirstNonPseudo() { 55 const BinaryContext &BC = Function->getBinaryContext(); 56 for (auto II = Instructions.begin(), E = Instructions.end(); II != E; ++II) { 57 if (!BC.MIB->isPseudo(*II)) 58 return II; 59 } 60 return end(); 61 } 62 63 BinaryBasicBlock::reverse_iterator BinaryBasicBlock::getLastNonPseudo() { 64 const BinaryContext &BC = Function->getBinaryContext(); 65 for (auto RII = Instructions.rbegin(), E = Instructions.rend(); RII != E; 66 ++RII) { 67 if (!BC.MIB->isPseudo(*RII)) 68 return RII; 69 } 70 return rend(); 71 } 72 73 bool BinaryBasicBlock::validateSuccessorInvariants() { 74 const MCInst *Inst = getLastNonPseudoInstr(); 75 const JumpTable *JT = Inst ? Function->getJumpTable(*Inst) : nullptr; 76 BinaryContext &BC = Function->getBinaryContext(); 77 bool Valid = true; 78 79 if (JT) { 80 // Note: for now we assume that successors do not reference labels from 81 // any overlapping jump tables. We only look at the entries for the jump 82 // table that is referenced at the last instruction. 83 const auto Range = JT->getEntriesForAddress(BC.MIB->getJumpTable(*Inst)); 84 const std::vector<const MCSymbol *> Entries( 85 std::next(JT->Entries.begin(), Range.first), 86 std::next(JT->Entries.begin(), Range.second)); 87 std::set<const MCSymbol *> UniqueSyms(Entries.begin(), Entries.end()); 88 for (BinaryBasicBlock *Succ : Successors) { 89 auto Itr = UniqueSyms.find(Succ->getLabel()); 90 if (Itr != UniqueSyms.end()) { 91 UniqueSyms.erase(Itr); 92 } else { 93 // Work on the assumption that jump table blocks don't 94 // have a conditional successor. 95 Valid = false; 96 errs() << "BOLT-WARNING: Jump table successor " << Succ->getName() 97 << " not contained in the jump table.\n"; 98 } 99 } 100 // If there are any leftover entries in the jump table, they 101 // must be one of the function end labels. 102 if (Valid) { 103 for (const MCSymbol *Sym : UniqueSyms) { 104 Valid &= (Sym == Function->getFunctionEndLabel() || 105 Sym == Function->getFunctionColdEndLabel()); 106 if (!Valid) { 107 errs() << "BOLT-WARNING: Jump table contains illegal entry: " 108 << Sym->getName() << "\n"; 109 } 110 } 111 } 112 } else { 113 // Unknown control flow. 114 if (Inst && BC.MIB->isIndirectBranch(*Inst)) 115 return true; 116 117 const MCSymbol *TBB = nullptr; 118 const MCSymbol *FBB = nullptr; 119 MCInst *CondBranch = nullptr; 120 MCInst *UncondBranch = nullptr; 121 122 if (analyzeBranch(TBB, FBB, CondBranch, UncondBranch)) { 123 switch (Successors.size()) { 124 case 0: 125 Valid = !CondBranch && !UncondBranch; 126 break; 127 case 1: { 128 const bool HasCondBlock = 129 CondBranch && Function->getBasicBlockForLabel( 130 BC.MIB->getTargetSymbol(*CondBranch)); 131 Valid = !CondBranch || !HasCondBlock; 132 break; 133 } 134 case 2: 135 Valid = (CondBranch && 136 (TBB == getConditionalSuccessor(true)->getLabel() && 137 ((!UncondBranch && !FBB) || 138 (UncondBranch && 139 FBB == getConditionalSuccessor(false)->getLabel())))); 140 break; 141 } 142 } 143 } 144 if (!Valid) { 145 errs() << "BOLT-WARNING: CFG invalid in " << *getFunction() << " @ " 146 << getName() << "\n"; 147 if (JT) { 148 errs() << "Jump Table instruction addr = 0x" 149 << Twine::utohexstr(BC.MIB->getJumpTable(*Inst)) << "\n"; 150 JT->print(errs()); 151 } 152 getFunction()->dump(); 153 } 154 return Valid; 155 } 156 157 BinaryBasicBlock *BinaryBasicBlock::getSuccessor(const MCSymbol *Label) const { 158 if (!Label && succ_size() == 1) 159 return *succ_begin(); 160 161 for (BinaryBasicBlock *BB : successors()) 162 if (BB->getLabel() == Label) 163 return BB; 164 165 return nullptr; 166 } 167 168 BinaryBasicBlock *BinaryBasicBlock::getSuccessor(const MCSymbol *Label, 169 BinaryBranchInfo &BI) const { 170 auto BIIter = branch_info_begin(); 171 for (BinaryBasicBlock *BB : successors()) { 172 if (BB->getLabel() == Label) { 173 BI = *BIIter; 174 return BB; 175 } 176 ++BIIter; 177 } 178 179 return nullptr; 180 } 181 182 BinaryBasicBlock *BinaryBasicBlock::getLandingPad(const MCSymbol *Label) const { 183 for (BinaryBasicBlock *BB : landing_pads()) 184 if (BB->getLabel() == Label) 185 return BB; 186 187 return nullptr; 188 } 189 190 int32_t BinaryBasicBlock::getCFIStateAtInstr(const MCInst *Instr) const { 191 assert( 192 getFunction()->getState() >= BinaryFunction::State::CFG && 193 "can only calculate CFI state when function is in or past the CFG state"); 194 195 const BinaryFunction::CFIInstrMapType &FDEProgram = 196 getFunction()->getFDEProgram(); 197 198 // Find the last CFI preceding Instr in this basic block. 199 const MCInst *LastCFI = nullptr; 200 bool InstrSeen = (Instr == nullptr); 201 for (auto RII = Instructions.rbegin(), E = Instructions.rend(); RII != E; 202 ++RII) { 203 if (!InstrSeen) { 204 InstrSeen = (&*RII == Instr); 205 continue; 206 } 207 if (Function->getBinaryContext().MIB->isCFI(*RII)) { 208 LastCFI = &*RII; 209 break; 210 } 211 } 212 213 assert(InstrSeen && "instruction expected in basic block"); 214 215 // CFI state is the same as at basic block entry point. 216 if (!LastCFI) 217 return getCFIState(); 218 219 // Fold all RememberState/RestoreState sequences, such as for: 220 // 221 // [ CFI #(K-1) ] 222 // RememberState (#K) 223 // .... 224 // RestoreState 225 // RememberState 226 // .... 227 // RestoreState 228 // [ GNU_args_size ] 229 // RememberState 230 // .... 231 // RestoreState <- LastCFI 232 // 233 // we return K - the most efficient state to (re-)generate. 234 int64_t State = LastCFI->getOperand(0).getImm(); 235 while (State >= 0 && 236 FDEProgram[State].getOperation() == MCCFIInstruction::OpRestoreState) { 237 int32_t Depth = 1; 238 --State; 239 assert(State >= 0 && "first CFI cannot be RestoreState"); 240 while (Depth && State >= 0) { 241 const MCCFIInstruction &CFIInstr = FDEProgram[State]; 242 if (CFIInstr.getOperation() == MCCFIInstruction::OpRestoreState) 243 ++Depth; 244 else if (CFIInstr.getOperation() == MCCFIInstruction::OpRememberState) 245 --Depth; 246 --State; 247 } 248 assert(Depth == 0 && "unbalanced RememberState/RestoreState stack"); 249 250 // Skip any GNU_args_size. 251 while (State >= 0 && FDEProgram[State].getOperation() == 252 MCCFIInstruction::OpGnuArgsSize) { 253 --State; 254 } 255 } 256 257 assert((State + 1 >= 0) && "miscalculated CFI state"); 258 return State + 1; 259 } 260 261 void BinaryBasicBlock::addSuccessor(BinaryBasicBlock *Succ, uint64_t Count, 262 uint64_t MispredictedCount) { 263 Successors.push_back(Succ); 264 BranchInfo.push_back({Count, MispredictedCount}); 265 Succ->Predecessors.push_back(this); 266 } 267 268 void BinaryBasicBlock::replaceSuccessor(BinaryBasicBlock *Succ, 269 BinaryBasicBlock *NewSucc, 270 uint64_t Count, 271 uint64_t MispredictedCount) { 272 Succ->removePredecessor(this, /*Multiple=*/false); 273 auto I = succ_begin(); 274 auto BI = BranchInfo.begin(); 275 for (; I != succ_end(); ++I) { 276 assert(BI != BranchInfo.end() && "missing BranchInfo entry"); 277 if (*I == Succ) 278 break; 279 ++BI; 280 } 281 assert(I != succ_end() && "no such successor!"); 282 283 *I = NewSucc; 284 *BI = BinaryBranchInfo{Count, MispredictedCount}; 285 NewSucc->addPredecessor(this); 286 } 287 288 void BinaryBasicBlock::removeAllSuccessors() { 289 SmallPtrSet<BinaryBasicBlock *, 2> UniqSuccessors(succ_begin(), succ_end()); 290 for (BinaryBasicBlock *SuccessorBB : UniqSuccessors) 291 SuccessorBB->removePredecessor(this); 292 Successors.clear(); 293 BranchInfo.clear(); 294 } 295 296 void BinaryBasicBlock::removeSuccessor(BinaryBasicBlock *Succ) { 297 Succ->removePredecessor(this, /*Multiple=*/false); 298 auto I = succ_begin(); 299 auto BI = BranchInfo.begin(); 300 for (; I != succ_end(); ++I) { 301 assert(BI != BranchInfo.end() && "missing BranchInfo entry"); 302 if (*I == Succ) 303 break; 304 ++BI; 305 } 306 assert(I != succ_end() && "no such successor!"); 307 308 Successors.erase(I); 309 BranchInfo.erase(BI); 310 } 311 312 void BinaryBasicBlock::addPredecessor(BinaryBasicBlock *Pred) { 313 Predecessors.push_back(Pred); 314 } 315 316 void BinaryBasicBlock::removePredecessor(BinaryBasicBlock *Pred, 317 bool Multiple) { 318 // Note: the predecessor could be listed multiple times. 319 bool Erased = false; 320 for (auto PredI = Predecessors.begin(); PredI != Predecessors.end();) { 321 if (*PredI == Pred) { 322 Erased = true; 323 PredI = Predecessors.erase(PredI); 324 if (!Multiple) 325 return; 326 } else { 327 ++PredI; 328 } 329 } 330 assert(Erased && "Pred is not a predecessor of this block!"); 331 } 332 333 void BinaryBasicBlock::removeDuplicateConditionalSuccessor(MCInst *CondBranch) { 334 assert(succ_size() == 2 && Successors[0] == Successors[1] && 335 "conditional successors expected"); 336 337 BinaryBasicBlock *Succ = Successors[0]; 338 const BinaryBranchInfo CondBI = BranchInfo[0]; 339 const BinaryBranchInfo UncondBI = BranchInfo[1]; 340 341 eraseInstruction(findInstruction(CondBranch)); 342 343 Successors.clear(); 344 BranchInfo.clear(); 345 346 Successors.push_back(Succ); 347 348 uint64_t Count = COUNT_NO_PROFILE; 349 if (CondBI.Count != COUNT_NO_PROFILE && UncondBI.Count != COUNT_NO_PROFILE) 350 Count = CondBI.Count + UncondBI.Count; 351 BranchInfo.push_back({Count, 0}); 352 } 353 354 void BinaryBasicBlock::adjustExecutionCount(double Ratio) { 355 auto adjustedCount = [&](uint64_t Count) -> uint64_t { 356 double NewCount = Count * Ratio; 357 if (!NewCount && Count && (Ratio > 0.0)) 358 NewCount = 1; 359 return NewCount; 360 }; 361 362 setExecutionCount(adjustedCount(getKnownExecutionCount())); 363 for (BinaryBranchInfo &BI : branch_info()) { 364 if (BI.Count != COUNT_NO_PROFILE) 365 BI.Count = adjustedCount(BI.Count); 366 if (BI.MispredictedCount != COUNT_INFERRED) 367 BI.MispredictedCount = adjustedCount(BI.MispredictedCount); 368 } 369 } 370 371 bool BinaryBasicBlock::analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB, 372 MCInst *&CondBranch, 373 MCInst *&UncondBranch) { 374 auto &MIB = Function->getBinaryContext().MIB; 375 return MIB->analyzeBranch(Instructions.begin(), Instructions.end(), TBB, FBB, 376 CondBranch, UncondBranch); 377 } 378 379 bool BinaryBasicBlock::isMacroOpFusionPair(const_iterator I) const { 380 auto &MIB = Function->getBinaryContext().MIB; 381 ArrayRef<MCInst> Insts = Instructions; 382 return MIB->isMacroOpFusionPair(Insts.slice(I - begin())); 383 } 384 385 BinaryBasicBlock::const_iterator 386 BinaryBasicBlock::getMacroOpFusionPair() const { 387 if (!Function->getBinaryContext().isX86()) 388 return end(); 389 390 if (getNumNonPseudos() < 2 || succ_size() != 2) 391 return end(); 392 393 auto RI = getLastNonPseudo(); 394 assert(RI != rend() && "cannot have an empty block with 2 successors"); 395 396 BinaryContext &BC = Function->getBinaryContext(); 397 398 // Skip instruction if it's an unconditional branch following 399 // a conditional one. 400 if (BC.MIB->isUnconditionalBranch(*RI)) 401 ++RI; 402 403 if (!BC.MIB->isConditionalBranch(*RI)) 404 return end(); 405 406 // Start checking with instruction preceding the conditional branch. 407 ++RI; 408 if (RI == rend()) 409 return end(); 410 411 auto II = std::prev(RI.base()); // convert to a forward iterator 412 if (isMacroOpFusionPair(II)) 413 return II; 414 415 return end(); 416 } 417 418 MCInst *BinaryBasicBlock::getTerminatorBefore(MCInst *Pos) { 419 BinaryContext &BC = Function->getBinaryContext(); 420 auto Itr = rbegin(); 421 bool Check = Pos ? false : true; 422 MCInst *FirstTerminator = nullptr; 423 while (Itr != rend()) { 424 if (!Check) { 425 if (&*Itr == Pos) 426 Check = true; 427 ++Itr; 428 continue; 429 } 430 if (BC.MIB->isTerminator(*Itr)) 431 FirstTerminator = &*Itr; 432 ++Itr; 433 } 434 return FirstTerminator; 435 } 436 437 bool BinaryBasicBlock::hasTerminatorAfter(MCInst *Pos) { 438 BinaryContext &BC = Function->getBinaryContext(); 439 auto Itr = rbegin(); 440 while (Itr != rend()) { 441 if (&*Itr == Pos) 442 return false; 443 if (BC.MIB->isTerminator(*Itr)) 444 return true; 445 ++Itr; 446 } 447 return false; 448 } 449 450 bool BinaryBasicBlock::swapConditionalSuccessors() { 451 if (succ_size() != 2) 452 return false; 453 454 std::swap(Successors[0], Successors[1]); 455 std::swap(BranchInfo[0], BranchInfo[1]); 456 return true; 457 } 458 459 void BinaryBasicBlock::addBranchInstruction(const BinaryBasicBlock *Successor) { 460 assert(isSuccessor(Successor)); 461 BinaryContext &BC = Function->getBinaryContext(); 462 MCInst NewInst; 463 std::unique_lock<std::shared_timed_mutex> Lock(BC.CtxMutex); 464 BC.MIB->createUncondBranch(NewInst, Successor->getLabel(), BC.Ctx.get()); 465 Instructions.emplace_back(std::move(NewInst)); 466 } 467 468 void BinaryBasicBlock::addTailCallInstruction(const MCSymbol *Target) { 469 BinaryContext &BC = Function->getBinaryContext(); 470 MCInst NewInst; 471 BC.MIB->createTailCall(NewInst, Target, BC.Ctx.get()); 472 Instructions.emplace_back(std::move(NewInst)); 473 } 474 475 uint32_t BinaryBasicBlock::getNumCalls() const { 476 uint32_t N = 0; 477 BinaryContext &BC = Function->getBinaryContext(); 478 for (const MCInst &Instr : Instructions) { 479 if (BC.MIB->isCall(Instr)) 480 ++N; 481 } 482 return N; 483 } 484 485 uint32_t BinaryBasicBlock::getNumPseudos() const { 486 #ifndef NDEBUG 487 BinaryContext &BC = Function->getBinaryContext(); 488 uint32_t N = 0; 489 for (const MCInst &Instr : Instructions) 490 if (BC.MIB->isPseudo(Instr)) 491 ++N; 492 493 if (N != NumPseudos) { 494 errs() << "BOLT-ERROR: instructions for basic block " << getName() 495 << " in function " << *Function << ": calculated pseudos " << N 496 << ", set pseudos " << NumPseudos << ", size " << size() << '\n'; 497 llvm_unreachable("pseudos mismatch"); 498 } 499 #endif 500 return NumPseudos; 501 } 502 503 ErrorOr<std::pair<double, double>> 504 BinaryBasicBlock::getBranchStats(const BinaryBasicBlock *Succ) const { 505 if (Function->hasValidProfile()) { 506 uint64_t TotalCount = 0; 507 uint64_t TotalMispreds = 0; 508 for (const BinaryBranchInfo &BI : BranchInfo) { 509 if (BI.Count != COUNT_NO_PROFILE) { 510 TotalCount += BI.Count; 511 TotalMispreds += BI.MispredictedCount; 512 } 513 } 514 515 if (TotalCount > 0) { 516 auto Itr = std::find(Successors.begin(), Successors.end(), Succ); 517 assert(Itr != Successors.end()); 518 const BinaryBranchInfo &BI = BranchInfo[Itr - Successors.begin()]; 519 if (BI.Count && BI.Count != COUNT_NO_PROFILE) { 520 if (TotalMispreds == 0) 521 TotalMispreds = 1; 522 return std::make_pair(double(BI.Count) / TotalCount, 523 double(BI.MispredictedCount) / TotalMispreds); 524 } 525 } 526 } 527 return make_error_code(llvm::errc::result_out_of_range); 528 } 529 530 void BinaryBasicBlock::dump() const { 531 BinaryContext &BC = Function->getBinaryContext(); 532 if (Label) 533 outs() << Label->getName() << ":\n"; 534 BC.printInstructions(outs(), Instructions.begin(), Instructions.end(), 535 getOffset()); 536 outs() << "preds:"; 537 for (auto itr = pred_begin(); itr != pred_end(); ++itr) { 538 outs() << " " << (*itr)->getName(); 539 } 540 outs() << "\nsuccs:"; 541 for (auto itr = succ_begin(); itr != succ_end(); ++itr) { 542 outs() << " " << (*itr)->getName(); 543 } 544 outs() << "\n"; 545 } 546 547 uint64_t BinaryBasicBlock::estimateSize(const MCCodeEmitter *Emitter) const { 548 return Function->getBinaryContext().computeCodeSize(begin(), end(), Emitter); 549 } 550 551 BinaryBasicBlock::BinaryBranchInfo & 552 BinaryBasicBlock::getBranchInfo(const BinaryBasicBlock &Succ) { 553 auto BI = branch_info_begin(); 554 for (BinaryBasicBlock *BB : successors()) { 555 if (&Succ == BB) 556 return *BI; 557 ++BI; 558 } 559 560 llvm_unreachable("Invalid successor"); 561 return *BI; 562 } 563 564 BinaryBasicBlock::BinaryBranchInfo & 565 BinaryBasicBlock::getBranchInfo(const MCSymbol *Label) { 566 auto BI = branch_info_begin(); 567 for (BinaryBasicBlock *BB : successors()) { 568 if (BB->getLabel() == Label) 569 return *BI; 570 ++BI; 571 } 572 573 llvm_unreachable("Invalid successor"); 574 return *BI; 575 } 576 577 BinaryBasicBlock *BinaryBasicBlock::splitAt(iterator II) { 578 assert(II != end() && "expected iterator pointing to instruction"); 579 580 BinaryBasicBlock *NewBlock = getFunction()->addBasicBlock(0); 581 582 // Adjust successors/predecessors and propagate the execution count. 583 moveAllSuccessorsTo(NewBlock); 584 addSuccessor(NewBlock, getExecutionCount(), 0); 585 586 // Set correct CFI state for the new block. 587 NewBlock->setCFIState(getCFIStateAtInstr(&*II)); 588 589 // Move instructions over. 590 adjustNumPseudos(II, end(), -1); 591 NewBlock->addInstructions(II, end()); 592 Instructions.erase(II, end()); 593 594 return NewBlock; 595 } 596 597 void BinaryBasicBlock::updateOutputValues(const MCAsmLayout &Layout) { 598 if (!LocSyms) 599 return; 600 601 const uint64_t BBAddress = getOutputAddressRange().first; 602 const uint64_t BBOffset = Layout.getSymbolOffset(*getLabel()); 603 for (const auto &LocSymKV : *LocSyms) { 604 const uint32_t InputFunctionOffset = LocSymKV.first; 605 const uint32_t OutputOffset = static_cast<uint32_t>( 606 Layout.getSymbolOffset(*LocSymKV.second) - BBOffset); 607 getOffsetTranslationTable().emplace_back( 608 std::make_pair(OutputOffset, InputFunctionOffset)); 609 610 // Update reverse (relative to BAT) address lookup table for function. 611 if (getFunction()->requiresAddressTranslation()) { 612 getFunction()->getInputOffsetToAddressMap().emplace( 613 std::make_pair(InputFunctionOffset, OutputOffset + BBAddress)); 614 } 615 } 616 LocSyms.reset(nullptr); 617 } 618 619 } // namespace bolt 620 } // namespace llvm 621