1 //===- bolt/Core/BinaryBasicBlock.h - Low-level basic block -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Sequence of MC/MCPlus instructions. Call/invoke does not terminate the block. 10 // CFI instructions are part of the instruction list with the initial CFI state 11 // defined at the beginning of the block. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef BOLT_CORE_BINARY_BASIC_BLOCK_H 16 #define BOLT_CORE_BINARY_BASIC_BLOCK_H 17 18 #include "bolt/Core/FunctionLayout.h" 19 #include "bolt/Core/MCPlus.h" 20 #include "llvm/ADT/GraphTraits.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/MC/MCInst.h" 23 #include "llvm/MC/MCSymbol.h" 24 #include "llvm/Support/ErrorOr.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include <limits> 27 #include <utility> 28 29 namespace llvm { 30 class MCCodeEmitter; 31 32 namespace bolt { 33 34 class BinaryFunction; 35 class JumpTable; 36 37 class BinaryBasicBlock { 38 public: 39 /// Profile execution information for a given edge in CFG. 40 /// 41 /// If MispredictedCount equals COUNT_INFERRED, then we have a profile 42 /// data for a fall-through edge with a Count representing an inferred 43 /// execution count, i.e. the count we calculated internally, not the one 44 /// coming from profile data. 45 /// 46 /// For all other values of MispredictedCount, Count represents the number of 47 /// branch executions from a profile, and MispredictedCount is the number 48 /// of times the branch was mispredicted according to this profile. 49 struct BinaryBranchInfo { 50 uint64_t Count; 51 uint64_t MispredictedCount; /// number of branches mispredicted 52 53 bool operator<(const BinaryBranchInfo &Other) const { 54 return (Count < Other.Count) || 55 (Count == Other.Count && 56 MispredictedCount < Other.MispredictedCount); 57 } 58 }; 59 60 static constexpr uint32_t INVALID_OFFSET = 61 std::numeric_limits<uint32_t>::max(); 62 63 using BranchInfoType = SmallVector<BinaryBranchInfo, 0>; 64 65 private: 66 /// Vector of all instructions in the block. 67 InstructionListType Instructions; 68 69 /// CFG information. 70 using EdgeListType = SmallVector<BinaryBasicBlock *, 0>; 71 EdgeListType Predecessors; 72 EdgeListType Successors; 73 74 /// Each successor has a corresponding BranchInfo entry in the list. 75 BranchInfoType BranchInfo; 76 77 using ExceptionListType = SmallVector<BinaryBasicBlock *, 0>; 78 79 /// List of blocks that this landing pad is handling. 80 ExceptionListType Throwers; 81 82 /// List of blocks that can catch exceptions thrown by code in this block. 83 ExceptionListType LandingPads; 84 85 /// Function that owns this basic block. 86 BinaryFunction *Function; 87 88 /// Label associated with the block. 89 MCSymbol *Label{nullptr}; 90 91 /// [Begin, End) address range for this block in the output binary. 92 std::pair<uint32_t, uint32_t> OutputAddressRange = {0, 0}; 93 94 /// Original offset range of the basic block in the function. 95 std::pair<uint32_t, uint32_t> InputRange = {INVALID_OFFSET, INVALID_OFFSET}; 96 97 /// Map input offset (from function start) of an instruction to an output 98 /// symbol. Enables writing BOLT address translation tables used for mapping 99 /// control transfer in the output binary back to the original binary. 100 using LocSymsTy = std::vector<std::pair<uint32_t, const MCSymbol *>>; 101 std::unique_ptr<LocSymsTy> LocSyms; 102 103 /// After output/codegen, map output offsets of instructions in this basic 104 /// block to instruction offsets in the original function. Note that the 105 /// output basic block could be different from the input basic block. 106 /// We only map instruction of interest, such as calls, and sdt markers. 107 /// 108 /// We store the offset array in a basic block to facilitate BAT tables 109 /// generation. Otherwise, the mapping could be done at function level. 110 using OffsetTranslationTableTy = std::vector<std::pair<uint32_t, uint32_t>>; 111 std::unique_ptr<OffsetTranslationTableTy> OffsetTranslationTable; 112 113 /// Alignment requirements for the block. 114 uint32_t Alignment{1}; 115 116 /// Maximum number of bytes to use for alignment of the block. 117 uint32_t AlignmentMaxBytes{0}; 118 119 /// Number of times this basic block was executed. 120 uint64_t ExecutionCount{COUNT_NO_PROFILE}; 121 122 static constexpr unsigned InvalidIndex = ~0u; 123 124 /// Index to BasicBlocks vector in BinaryFunction. 125 unsigned Index{InvalidIndex}; 126 127 /// Index in the current layout. 128 mutable unsigned LayoutIndex{InvalidIndex}; 129 130 /// Number of pseudo instructions in this block. 131 uint32_t NumPseudos{0}; 132 133 /// CFI state at the entry to this basic block. 134 int32_t CFIState{-1}; 135 136 /// In cases where the parent function has been split, IsCold == true means 137 /// this BB will be allocated outside its parent function. 138 bool IsCold{false}; 139 140 /// Indicates if the block could be outlined. 141 bool CanOutline{true}; 142 143 /// Flag to indicate whether this block is valid or not. Invalid 144 /// blocks may contain out of date or incorrect information. 145 bool IsValid{true}; 146 147 private: 148 BinaryBasicBlock() = delete; 149 BinaryBasicBlock(const BinaryBasicBlock &) = delete; 150 BinaryBasicBlock(const BinaryBasicBlock &&) = delete; 151 BinaryBasicBlock &operator=(const BinaryBasicBlock &) = delete; 152 BinaryBasicBlock &operator=(const BinaryBasicBlock &&) = delete; 153 BinaryBasicBlock(BinaryFunction * Function,MCSymbol * Label)154 explicit BinaryBasicBlock(BinaryFunction *Function, MCSymbol *Label) 155 : Function(Function), Label(Label) { 156 assert(Function && "Function must be non-null"); 157 } 158 159 // Exclusively managed by BinaryFunction. 160 friend class BinaryFunction; 161 friend bool operator<(const BinaryBasicBlock &LHS, 162 const BinaryBasicBlock &RHS); 163 164 /// Assign new label to the basic block. setLabel(MCSymbol * Symbol)165 void setLabel(MCSymbol *Symbol) { Label = Symbol; } 166 167 public: 168 static constexpr uint64_t COUNT_INFERRED = 169 std::numeric_limits<uint64_t>::max(); 170 static constexpr uint64_t COUNT_NO_PROFILE = 171 std::numeric_limits<uint64_t>::max(); 172 173 // Instructions iterators. 174 using iterator = InstructionListType::iterator; 175 using const_iterator = InstructionListType::const_iterator; 176 using reverse_iterator = std::reverse_iterator<iterator>; 177 using const_reverse_iterator = std::reverse_iterator<const_iterator>; 178 empty()179 bool empty() const { assert(hasInstructions()); 180 return Instructions.empty(); } size()181 size_t size() const { assert(hasInstructions()); 182 return Instructions.size(); } front()183 MCInst &front() { assert(hasInstructions()); 184 return Instructions.front(); } back()185 MCInst &back() { assert(hasInstructions()); 186 return Instructions.back(); } front()187 const MCInst &front() const { assert(hasInstructions()); 188 return Instructions.front(); } back()189 const MCInst &back() const { assert(hasInstructions()); 190 return Instructions.back(); } 191 begin()192 iterator begin() { assert(hasInstructions()); 193 return Instructions.begin(); } begin()194 const_iterator begin() const { assert(hasInstructions()); 195 return Instructions.begin(); } end()196 iterator end () { assert(hasInstructions()); 197 return Instructions.end(); } end()198 const_iterator end () const { assert(hasInstructions()); 199 return Instructions.end(); } rbegin()200 reverse_iterator rbegin() { assert(hasInstructions()); 201 return Instructions.rbegin(); } rbegin()202 const_reverse_iterator rbegin() const { assert(hasInstructions()); 203 return Instructions.rbegin(); } rend()204 reverse_iterator rend () { assert(hasInstructions()); 205 return Instructions.rend(); } rend()206 const_reverse_iterator rend () const { assert(hasInstructions()); 207 return Instructions.rend(); } 208 209 // CFG iterators. 210 using pred_iterator = EdgeListType::iterator; 211 using const_pred_iterator = EdgeListType::const_iterator; 212 using succ_iterator = EdgeListType::iterator; 213 using const_succ_iterator = EdgeListType::const_iterator; 214 using throw_iterator = decltype(Throwers)::iterator; 215 using const_throw_iterator = decltype(Throwers)::const_iterator; 216 using lp_iterator = decltype(LandingPads)::iterator; 217 using const_lp_iterator = decltype(LandingPads)::const_iterator; 218 219 using pred_reverse_iterator = std::reverse_iterator<pred_iterator>; 220 using const_pred_reverse_iterator = 221 std::reverse_iterator<const_pred_iterator>; 222 using succ_reverse_iterator = std::reverse_iterator<succ_iterator>; 223 using const_succ_reverse_iterator = 224 std::reverse_iterator<const_succ_iterator>; 225 pred_begin()226 pred_iterator pred_begin() { return Predecessors.begin(); } pred_begin()227 const_pred_iterator pred_begin() const { return Predecessors.begin(); } pred_end()228 pred_iterator pred_end() { return Predecessors.end(); } pred_end()229 const_pred_iterator pred_end() const { return Predecessors.end(); } pred_rbegin()230 pred_reverse_iterator pred_rbegin() 231 { return Predecessors.rbegin();} pred_rbegin()232 const_pred_reverse_iterator pred_rbegin() const 233 { return Predecessors.rbegin();} pred_rend()234 pred_reverse_iterator pred_rend() 235 { return Predecessors.rend(); } pred_rend()236 const_pred_reverse_iterator pred_rend() const 237 { return Predecessors.rend(); } pred_size()238 size_t pred_size() const { 239 return Predecessors.size(); 240 } pred_empty()241 bool pred_empty() const { return Predecessors.empty(); } 242 succ_begin()243 succ_iterator succ_begin() { return Successors.begin(); } succ_begin()244 const_succ_iterator succ_begin() const { return Successors.begin(); } succ_end()245 succ_iterator succ_end() { return Successors.end(); } succ_end()246 const_succ_iterator succ_end() const { return Successors.end(); } succ_rbegin()247 succ_reverse_iterator succ_rbegin() 248 { return Successors.rbegin(); } succ_rbegin()249 const_succ_reverse_iterator succ_rbegin() const 250 { return Successors.rbegin(); } succ_rend()251 succ_reverse_iterator succ_rend() 252 { return Successors.rend(); } succ_rend()253 const_succ_reverse_iterator succ_rend() const 254 { return Successors.rend(); } succ_size()255 size_t succ_size() const { 256 return Successors.size(); 257 } succ_empty()258 bool succ_empty() const { return Successors.empty(); } 259 throw_begin()260 throw_iterator throw_begin() { return Throwers.begin(); } throw_begin()261 const_throw_iterator throw_begin() const { return Throwers.begin(); } throw_end()262 throw_iterator throw_end() { return Throwers.end(); } throw_end()263 const_throw_iterator throw_end() const { return Throwers.end(); } throw_size()264 size_t throw_size() const { 265 return Throwers.size(); 266 } throw_empty()267 bool throw_empty() const { return Throwers.empty(); } isLandingPad()268 bool isLandingPad() const { return !Throwers.empty(); } 269 lp_begin()270 lp_iterator lp_begin() { return LandingPads.begin(); } lp_begin()271 const_lp_iterator lp_begin() const { return LandingPads.begin(); } lp_end()272 lp_iterator lp_end() { return LandingPads.end(); } lp_end()273 const_lp_iterator lp_end() const { return LandingPads.end(); } lp_size()274 size_t lp_size() const { 275 return LandingPads.size(); 276 } lp_empty()277 bool lp_empty() const { return LandingPads.empty(); } 278 instructions()279 inline iterator_range<iterator> instructions() { 280 assert(hasInstructions()); 281 return iterator_range<iterator>(begin(), end()); 282 } instructions()283 inline iterator_range<const_iterator> instructions() const { 284 assert(hasInstructions()); 285 return iterator_range<const_iterator>(begin(), end()); 286 } predecessors()287 inline iterator_range<pred_iterator> predecessors() { 288 assert(hasCFG()); 289 return iterator_range<pred_iterator>(pred_begin(), pred_end()); 290 } predecessors()291 inline iterator_range<const_pred_iterator> predecessors() const { 292 assert(hasCFG()); 293 return iterator_range<const_pred_iterator>(pred_begin(), pred_end()); 294 } successors()295 inline iterator_range<succ_iterator> successors() { 296 assert(hasCFG()); 297 return iterator_range<succ_iterator>(succ_begin(), succ_end()); 298 } successors()299 inline iterator_range<const_succ_iterator> successors() const { 300 assert(hasCFG()); 301 return iterator_range<const_succ_iterator>(succ_begin(), succ_end()); 302 } throwers()303 inline iterator_range<throw_iterator> throwers() { 304 assert(hasCFG()); 305 return iterator_range<throw_iterator>(throw_begin(), throw_end()); 306 } throwers()307 inline iterator_range<const_throw_iterator> throwers() const { 308 assert(hasCFG()); 309 return iterator_range<const_throw_iterator>(throw_begin(), throw_end()); 310 } landing_pads()311 inline iterator_range<lp_iterator> landing_pads() { 312 assert(hasCFG()); 313 return iterator_range<lp_iterator>(lp_begin(), lp_end()); 314 } landing_pads()315 inline iterator_range<const_lp_iterator> landing_pads() const { 316 assert(hasCFG()); 317 return iterator_range<const_lp_iterator>(lp_begin(), lp_end()); 318 } 319 320 // BranchInfo iterators. 321 using branch_info_iterator = BranchInfoType::iterator; 322 using const_branch_info_iterator = BranchInfoType::const_iterator; 323 using branch_info_reverse_iterator = 324 std::reverse_iterator<branch_info_iterator>; 325 using const_branch_info_reverse_iterator = 326 std::reverse_iterator<const_branch_info_iterator>; 327 branch_info_begin()328 branch_info_iterator branch_info_begin() { return BranchInfo.begin(); } branch_info_end()329 branch_info_iterator branch_info_end() { return BranchInfo.end(); } branch_info_begin()330 const_branch_info_iterator branch_info_begin() const { 331 return BranchInfo.begin(); 332 } branch_info_end()333 const_branch_info_iterator branch_info_end() const { 334 return BranchInfo.end(); 335 } branch_info_rbegin()336 branch_info_reverse_iterator branch_info_rbegin() { 337 return BranchInfo.rbegin(); 338 } branch_info_rend()339 branch_info_reverse_iterator branch_info_rend() { return BranchInfo.rend(); } branch_info_rbegin()340 const_branch_info_reverse_iterator branch_info_rbegin() const { 341 return BranchInfo.rbegin(); 342 } branch_info_rend()343 const_branch_info_reverse_iterator branch_info_rend() const { 344 return BranchInfo.rend(); 345 } 346 branch_info_size()347 size_t branch_info_size() const { return BranchInfo.size(); } branch_info_empty()348 bool branch_info_empty() const { return BranchInfo.empty(); } 349 branch_info()350 inline iterator_range<branch_info_iterator> branch_info() { 351 return iterator_range<branch_info_iterator>(BranchInfo.begin(), 352 BranchInfo.end()); 353 } branch_info()354 inline iterator_range<const_branch_info_iterator> branch_info() const { 355 return iterator_range<const_branch_info_iterator>(BranchInfo.begin(), 356 BranchInfo.end()); 357 } 358 359 /// Get instruction at given index. getInstructionAtIndex(unsigned Index)360 MCInst &getInstructionAtIndex(unsigned Index) { return Instructions[Index]; } 361 getInstructionAtIndex(unsigned Index)362 const MCInst &getInstructionAtIndex(unsigned Index) const { 363 return Instructions[Index]; 364 } 365 366 /// Return symbol marking the start of this basic block. getLabel()367 MCSymbol *getLabel() { return Label; } 368 369 /// Return symbol marking the start of this basic block (const version). getLabel()370 const MCSymbol *getLabel() const { return Label; } 371 372 /// Get successor with given \p Label if \p Label != nullptr. 373 /// Returns nullptr if no such successor is found. 374 /// If the \p Label == nullptr and the block has only one successor then 375 /// return the successor. 376 BinaryBasicBlock *getSuccessor(const MCSymbol *Label = nullptr) const; 377 378 /// Return the related branch info as well as the successor. 379 BinaryBasicBlock *getSuccessor(const MCSymbol *Label, 380 BinaryBranchInfo &BI) const; 381 382 /// If the basic block ends with a conditional branch (possibly followed by 383 /// an unconditional branch) and thus has 2 successors, return a successor 384 /// corresponding to a jump condition which could be true or false. 385 /// Return nullptr if the basic block does not have a conditional jump. getConditionalSuccessor(bool Condition)386 BinaryBasicBlock *getConditionalSuccessor(bool Condition) { 387 if (succ_size() != 2) 388 return nullptr; 389 return Successors[Condition == true ? 0 : 1]; 390 } 391 getConditionalSuccessor(bool Condition)392 const BinaryBasicBlock *getConditionalSuccessor(bool Condition) const { 393 return const_cast<BinaryBasicBlock *>(this)->getConditionalSuccessor( 394 Condition); 395 } 396 397 /// Find the fallthrough successor for a block, or nullptr if there is 398 /// none. getFallthrough()399 BinaryBasicBlock *getFallthrough() { 400 if (succ_size() == 2) 401 return getConditionalSuccessor(false); 402 else 403 return getSuccessor(); 404 } 405 getFallthrough()406 const BinaryBasicBlock *getFallthrough() const { 407 return const_cast<BinaryBasicBlock *>(this)->getFallthrough(); 408 } 409 410 /// Return branch info corresponding to a taken branch. getTakenBranchInfo()411 const BinaryBranchInfo &getTakenBranchInfo() const { 412 assert(BranchInfo.size() == 2 && 413 "could only be called for blocks with 2 successors"); 414 return BranchInfo[0]; 415 }; 416 417 /// Return branch info corresponding to a fall-through branch. getFallthroughBranchInfo()418 const BinaryBranchInfo &getFallthroughBranchInfo() const { 419 assert(BranchInfo.size() == 2 && 420 "could only be called for blocks with 2 successors"); 421 return BranchInfo[1]; 422 }; 423 424 /// Return branch info corresponding to an edge going to \p Succ basic block. 425 BinaryBranchInfo &getBranchInfo(const BinaryBasicBlock &Succ); 426 427 /// Return branch info corresponding to an edge going to a basic block with 428 /// label \p Label. 429 BinaryBranchInfo &getBranchInfo(const MCSymbol *Label); 430 431 /// Set branch information for the outgoing edge to block \p Succ. setSuccessorBranchInfo(const BinaryBasicBlock & Succ,uint64_t Count,uint64_t MispredictedCount)432 void setSuccessorBranchInfo(const BinaryBasicBlock &Succ, uint64_t Count, 433 uint64_t MispredictedCount) { 434 BinaryBranchInfo &BI = getBranchInfo(Succ); 435 BI.Count = Count; 436 BI.MispredictedCount = MispredictedCount; 437 } 438 439 /// Try to compute the taken and misprediction frequencies for the given 440 /// successor. The result is an error if no information can be found. 441 ErrorOr<std::pair<double, double>> 442 getBranchStats(const BinaryBasicBlock *Succ) const; 443 444 /// If the basic block ends with a conditional branch (possibly followed by 445 /// an unconditional branch) and thus has 2 successor, reverse the order of 446 /// its successors in CFG, update branch info, and return true. If the basic 447 /// block does not have 2 successors return false. 448 bool swapConditionalSuccessors(); 449 450 /// Add an instruction with unconditional control transfer to \p Successor 451 /// basic block to the end of this basic block. 452 void addBranchInstruction(const BinaryBasicBlock *Successor); 453 454 /// Add an instruction with tail call control transfer to \p Target 455 /// to the end of this basic block. 456 void addTailCallInstruction(const MCSymbol *Target); 457 458 /// Return the number of call instructions in this basic block. 459 uint32_t getNumCalls() const; 460 461 /// Get landing pad with given label. Returns nullptr if no such 462 /// landing pad is found. 463 BinaryBasicBlock *getLandingPad(const MCSymbol *Label) const; 464 465 /// Return local name for the block. getName()466 StringRef getName() const { return Label->getName(); } 467 468 /// Add instruction at the end of this basic block. 469 /// Returns iterator pointing to the inserted instruction. addInstruction(MCInst && Inst)470 iterator addInstruction(MCInst &&Inst) { 471 adjustNumPseudos(Inst, 1); 472 Instructions.emplace_back(Inst); 473 return std::prev(Instructions.end()); 474 } 475 476 /// Add instruction at the end of this basic block. 477 /// Returns iterator pointing to the inserted instruction. addInstruction(const MCInst & Inst)478 iterator addInstruction(const MCInst &Inst) { 479 adjustNumPseudos(Inst, 1); 480 Instructions.push_back(Inst); 481 return std::prev(Instructions.end()); 482 } 483 484 /// Add a range of instructions to the end of this basic block. addInstructions(Itr Begin,Itr End)485 template <typename Itr> void addInstructions(Itr Begin, Itr End) { 486 while (Begin != End) 487 addInstruction(*Begin++); 488 } 489 490 /// Add a range of instructions to the end of this basic block. addInstructions(RangeTy R)491 template <typename RangeTy> void addInstructions(RangeTy R) { 492 for (auto &I : R) 493 addInstruction(I); 494 } 495 496 /// Add instruction before Pos in this basic block. insertPseudoInstr(Itr Pos,MCInst & Instr)497 template <typename Itr> Itr insertPseudoInstr(Itr Pos, MCInst &Instr) { 498 ++NumPseudos; 499 return Instructions.insert(Pos, Instr); 500 } 501 502 /// Return the number of pseudo instructions in the basic block. 503 uint32_t getNumPseudos() const; 504 505 /// Return the number of emitted instructions for this basic block. getNumNonPseudos()506 uint32_t getNumNonPseudos() const { return size() - getNumPseudos(); } 507 508 /// Return iterator to the first non-pseudo instruction or end() 509 /// if no such instruction was found. 510 iterator getFirstNonPseudo(); 511 512 /// Return a pointer to the first non-pseudo instruction in this basic 513 /// block. Returns nullptr if none exists. getFirstNonPseudoInstr()514 MCInst *getFirstNonPseudoInstr() { 515 auto II = getFirstNonPseudo(); 516 return II == Instructions.end() ? nullptr : &*II; 517 } 518 519 /// Return reverse iterator to the last non-pseudo instruction or rend() 520 /// if no such instruction was found. 521 reverse_iterator getLastNonPseudo(); getLastNonPseudo()522 const_reverse_iterator getLastNonPseudo() const { 523 return const_cast<BinaryBasicBlock *>(this)->getLastNonPseudo(); 524 } 525 526 /// Return a pointer to the last non-pseudo instruction in this basic 527 /// block. Returns nullptr if none exists. getLastNonPseudoInstr()528 MCInst *getLastNonPseudoInstr() { 529 auto RII = getLastNonPseudo(); 530 return RII == Instructions.rend() ? nullptr : &*RII; 531 } getLastNonPseudoInstr()532 const MCInst *getLastNonPseudoInstr() const { 533 auto RII = getLastNonPseudo(); 534 return RII == Instructions.rend() ? nullptr : &*RII; 535 } 536 537 /// Set CFI state at entry to this basic block. setCFIState(int32_t NewCFIState)538 void setCFIState(int32_t NewCFIState) { 539 assert((CFIState == -1 || NewCFIState == CFIState) && 540 "unexpected change of CFI state for basic block"); 541 CFIState = NewCFIState; 542 } 543 544 /// Return CFI state (expected) at entry of this basic block. getCFIState()545 int32_t getCFIState() const { 546 assert(CFIState >= 0 && "unknown CFI state"); 547 return CFIState; 548 } 549 550 /// Calculate and return CFI state right before instruction \p Instr in 551 /// this basic block. If \p Instr is nullptr then return the state at 552 /// the end of the basic block. 553 int32_t getCFIStateAtInstr(const MCInst *Instr) const; 554 555 /// Calculate and return CFI state after execution of this basic block. 556 /// The state depends on CFI state at entry and CFI instructions inside the 557 /// basic block. getCFIStateAtExit()558 int32_t getCFIStateAtExit() const { return getCFIStateAtInstr(nullptr); } 559 560 /// Set minimum alignment for the basic block. setAlignment(uint32_t Align)561 void setAlignment(uint32_t Align) { Alignment = Align; } 562 563 /// Set alignment of the block based on the alignment of its offset. setDerivedAlignment()564 void setDerivedAlignment() { 565 const uint64_t DerivedAlignment = getOffset() & (1 + ~getOffset()); 566 Alignment = std::min(DerivedAlignment, uint64_t(32)); 567 } 568 569 /// Return required alignment for the block. getAlignment()570 uint32_t getAlignment() const { return Alignment; } 571 572 /// Set the maximum number of bytes to use for the block alignment. setAlignmentMaxBytes(uint32_t Value)573 void setAlignmentMaxBytes(uint32_t Value) { AlignmentMaxBytes = Value; } 574 575 /// Return the maximum number of bytes to use for the block alignment. getAlignmentMaxBytes()576 uint32_t getAlignmentMaxBytes() const { return AlignmentMaxBytes; } 577 578 /// Adds block to successor list, and also updates predecessor list for 579 /// successor block. 580 /// Set branch info for this path. 581 void addSuccessor(BinaryBasicBlock *Succ, uint64_t Count = 0, 582 uint64_t MispredictedCount = 0); 583 addSuccessor(BinaryBasicBlock * Succ,const BinaryBranchInfo & BI)584 void addSuccessor(BinaryBasicBlock *Succ, const BinaryBranchInfo &BI) { 585 addSuccessor(Succ, BI.Count, BI.MispredictedCount); 586 } 587 588 /// Add a range of successors. addSuccessors(Itr Begin,Itr End)589 template <typename Itr> void addSuccessors(Itr Begin, Itr End) { 590 while (Begin != End) 591 addSuccessor(*Begin++); 592 } 593 594 /// Add a range of successors with branch info. 595 template <typename Itr, typename BrItr> addSuccessors(Itr Begin,Itr End,BrItr BrBegin,BrItr BrEnd)596 void addSuccessors(Itr Begin, Itr End, BrItr BrBegin, BrItr BrEnd) { 597 assert(std::distance(Begin, End) == std::distance(BrBegin, BrEnd)); 598 while (Begin != End) 599 addSuccessor(*Begin++, *BrBegin++); 600 } 601 602 /// Replace Succ with NewSucc. This routine is helpful for preserving 603 /// the order of conditional successors when editing the CFG. 604 void replaceSuccessor(BinaryBasicBlock *Succ, BinaryBasicBlock *NewSucc, 605 uint64_t Count = 0, uint64_t MispredictedCount = 0); 606 607 /// Move all of this block's successors to a new block, and set the 608 /// execution count of this new block with our execution count. This is 609 /// useful when splitting a block in two. moveAllSuccessorsTo(BinaryBasicBlock * New)610 void moveAllSuccessorsTo(BinaryBasicBlock *New) { 611 New->addSuccessors(successors().begin(), successors().end(), 612 branch_info_begin(), branch_info_end()); 613 removeAllSuccessors(); 614 615 // Update the execution count on the new block. 616 New->setExecutionCount(getExecutionCount()); 617 } 618 619 /// Remove /p Succ basic block from the list of successors. Update the 620 /// list of predecessors of /p Succ and update branch info. 621 void removeSuccessor(BinaryBasicBlock *Succ); 622 623 /// Remove all successors of the basic block, and remove the block 624 /// from respective lists of predecessors. 625 void removeAllSuccessors(); 626 627 /// Remove useless duplicate successors. When the conditional 628 /// successor is the same as the unconditional successor, we can 629 /// remove the conditional successor and branch instruction. 630 void removeDuplicateConditionalSuccessor(MCInst *CondBranch); 631 632 /// Update successors of the basic block based on the jump table instruction. 633 /// The block must end with a jump table instruction. 634 void updateJumpTableSuccessors(); 635 636 /// Test if BB is a predecessor of this block. isPredecessor(const BinaryBasicBlock * BB)637 bool isPredecessor(const BinaryBasicBlock *BB) const { 638 return llvm::is_contained(Predecessors, BB); 639 } 640 641 /// Test if BB is a successor of this block. isSuccessor(const BinaryBasicBlock * BB)642 bool isSuccessor(const BinaryBasicBlock *BB) const { 643 return llvm::is_contained(Successors, BB); 644 } 645 646 /// Test if this BB has a valid execution count. hasProfile()647 bool hasProfile() const { return ExecutionCount != COUNT_NO_PROFILE; } 648 649 /// Return the information about the number of times this basic block was 650 /// executed. 651 /// 652 /// Return COUNT_NO_PROFILE if there's no profile info. getExecutionCount()653 uint64_t getExecutionCount() const { return ExecutionCount; } 654 655 /// Return the execution count for blocks with known profile. 656 /// Return 0 if the block has no profile. getKnownExecutionCount()657 uint64_t getKnownExecutionCount() const { 658 return !hasProfile() ? 0 : ExecutionCount; 659 } 660 661 /// Set the execution count for this block. setExecutionCount(uint64_t Count)662 void setExecutionCount(uint64_t Count) { ExecutionCount = Count; } 663 664 /// Apply a given \p Ratio to the profile information of this basic block. 665 void adjustExecutionCount(double Ratio); 666 667 /// Return true if the basic block is an entry point into the function 668 /// (either primary or secondary). 669 bool isEntryPoint() const; 670 isValid()671 bool isValid() const { return IsValid; } 672 markValid(const bool Valid)673 void markValid(const bool Valid) { IsValid = Valid; } 674 getFragmentNum()675 FragmentNum getFragmentNum() const { 676 return IsCold ? FragmentNum::cold() : FragmentNum::hot(); 677 } 678 isCold()679 bool isCold() const { return IsCold; } 680 setIsCold(const bool Flag)681 void setIsCold(const bool Flag) { IsCold = Flag; } 682 683 /// Return true if the block can be outlined. At the moment we disallow 684 /// outlining of blocks that can potentially throw exceptions or are 685 /// the beginning of a landing pad. The entry basic block also can 686 /// never be outlined. canOutline()687 bool canOutline() const { return CanOutline; } 688 setCanOutline(const bool Flag)689 void setCanOutline(const bool Flag) { CanOutline = Flag; } 690 691 /// Erase pseudo instruction at a given iterator. 692 /// Return iterator following the removed instruction. erasePseudoInstruction(iterator II)693 iterator erasePseudoInstruction(iterator II) { 694 --NumPseudos; 695 return Instructions.erase(II); 696 } 697 698 /// Erase non-pseudo instruction at a given iterator \p II. 699 /// Return iterator following the removed instruction. eraseInstruction(iterator II)700 iterator eraseInstruction(iterator II) { 701 adjustNumPseudos(*II, -1); 702 return Instructions.erase(II); 703 } 704 705 /// Erase non-pseudo instruction at a given \p Index eraseInstructionAtIndex(unsigned Index)706 void eraseInstructionAtIndex(unsigned Index) { 707 eraseInstruction(Instructions.begin() + Index); 708 } 709 710 /// Erase instructions in the specified range. 711 template <typename ItrType> eraseInstructions(ItrType Begin,ItrType End)712 void eraseInstructions(ItrType Begin, ItrType End) { 713 while (End > Begin) 714 eraseInstruction(findInstruction(*--End)); 715 } 716 717 /// Erase all instructions. clear()718 void clear() { 719 Instructions.clear(); 720 NumPseudos = 0; 721 } 722 723 /// Retrieve iterator for \p Inst or return end iterator if instruction is not 724 /// from this basic block. decltype(Instructions)725 decltype(Instructions)::iterator findInstruction(const MCInst *Inst) { 726 if (Instructions.empty()) 727 return Instructions.end(); 728 size_t Index = Inst - &Instructions[0]; 729 return Index >= Instructions.size() ? Instructions.end() 730 : Instructions.begin() + Index; 731 } 732 733 /// Replace instruction referenced by iterator \II with a sequence of 734 /// instructions defined by [\p Begin, \p End] range. 735 /// 736 /// Return iterator pointing to the first inserted instruction. 737 template <typename Itr> replaceInstruction(iterator II,Itr Begin,Itr End)738 iterator replaceInstruction(iterator II, Itr Begin, Itr End) { 739 adjustNumPseudos(*II, -1); 740 adjustNumPseudos(Begin, End, 1); 741 742 auto I = II - Instructions.begin(); 743 Instructions.insert(Instructions.erase(II), Begin, End); 744 return I + Instructions.begin(); 745 } 746 replaceInstruction(iterator II,const InstructionListType & Replacement)747 iterator replaceInstruction(iterator II, 748 const InstructionListType &Replacement) { 749 return replaceInstruction(II, Replacement.begin(), Replacement.end()); 750 } 751 752 /// Insert \p NewInst before \p At, which must be an existing instruction in 753 /// this BB. Return iterator pointing to the newly inserted instruction. insertInstruction(iterator At,MCInst && NewInst)754 iterator insertInstruction(iterator At, MCInst &&NewInst) { 755 adjustNumPseudos(NewInst, 1); 756 return Instructions.emplace(At, std::move(NewInst)); 757 } 758 insertInstruction(iterator At,MCInst & NewInst)759 iterator insertInstruction(iterator At, MCInst &NewInst) { 760 adjustNumPseudos(NewInst, 1); 761 return Instructions.insert(At, NewInst); 762 } 763 764 /// Helper to retrieve any terminators in \p BB before \p Pos. This is used 765 /// to skip CFI instructions and to retrieve the first terminator instruction 766 /// in basic blocks with two terminators (conditional jump and unconditional 767 /// jump). 768 MCInst *getTerminatorBefore(MCInst *Pos); 769 770 /// Used to identify whether an instruction is before a terminator and whether 771 /// moving it to the end of the BB would render it dead code. 772 bool hasTerminatorAfter(MCInst *Pos); 773 774 /// Split apart the instructions in this basic block starting at Inst. 775 /// The instructions following Inst are removed and returned in a vector. splitInstructions(const MCInst * Inst)776 InstructionListType splitInstructions(const MCInst *Inst) { 777 InstructionListType SplitInst; 778 779 assert(!Instructions.empty()); 780 while (&Instructions.back() != Inst) { 781 SplitInst.push_back(Instructions.back()); 782 Instructions.pop_back(); 783 } 784 std::reverse(SplitInst.begin(), SplitInst.end()); 785 NumPseudos = 0; 786 adjustNumPseudos(Instructions.begin(), Instructions.end(), 1); 787 return SplitInst; 788 } 789 790 /// Split basic block at the instruction pointed to by II. 791 /// All iterators pointing after II get invalidated. 792 /// 793 /// Return the new basic block that starts with the instruction 794 /// at the split point. 795 BinaryBasicBlock *splitAt(iterator II); 796 797 /// Set start offset of this basic block in the input binary. setOffset(uint32_t Offset)798 void setOffset(uint32_t Offset) { InputRange.first = Offset; }; 799 800 /// Sets address of the basic block in the output. setOutputStartAddress(uint64_t Address)801 void setOutputStartAddress(uint64_t Address) { 802 OutputAddressRange.first = Address; 803 } 804 805 /// Sets address past the end of the basic block in the output. setOutputEndAddress(uint64_t Address)806 void setOutputEndAddress(uint64_t Address) { 807 OutputAddressRange.second = Address; 808 } 809 810 /// Gets the memory address range of this BB in the input binary. getInputAddressRange()811 std::pair<uint64_t, uint64_t> getInputAddressRange() const { 812 return InputRange; 813 } 814 815 /// Gets the memory address range of this BB in the output binary. getOutputAddressRange()816 std::pair<uint64_t, uint64_t> getOutputAddressRange() const { 817 return OutputAddressRange; 818 } 819 820 /// Update addresses of special instructions inside this basic block. 821 void updateOutputValues(const MCAsmLayout &Layout); 822 823 /// Return mapping of input offsets to symbols in the output. getLocSyms()824 LocSymsTy &getLocSyms() { 825 return LocSyms ? *LocSyms : *(LocSyms = std::make_unique<LocSymsTy>()); 826 } 827 828 /// Return mapping of input offsets to symbols in the output. getLocSyms()829 const LocSymsTy &getLocSyms() const { 830 return const_cast<BinaryBasicBlock *>(this)->getLocSyms(); 831 } 832 833 /// Return offset translation table for the basic block. getOffsetTranslationTable()834 OffsetTranslationTableTy &getOffsetTranslationTable() { 835 return OffsetTranslationTable 836 ? *OffsetTranslationTable 837 : *(OffsetTranslationTable = 838 std::make_unique<OffsetTranslationTableTy>()); 839 } 840 841 /// Return offset translation table for the basic block. getOffsetTranslationTable()842 const OffsetTranslationTableTy &getOffsetTranslationTable() const { 843 return const_cast<BinaryBasicBlock *>(this)->getOffsetTranslationTable(); 844 } 845 846 /// Return size of the basic block in the output binary. getOutputSize()847 uint64_t getOutputSize() const { 848 return OutputAddressRange.second - OutputAddressRange.first; 849 } 850 getFunction()851 BinaryFunction *getFunction() const { return Function; } 852 853 /// Analyze and interpret the terminators of this basic block. TBB must be 854 /// initialized with the original fall-through for this BB. 855 bool analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB, 856 MCInst *&CondBranch, MCInst *&UncondBranch); 857 858 /// Return true if iterator \p I is pointing to the first instruction in 859 /// a pair that could be macro-fused. 860 bool isMacroOpFusionPair(const_iterator I) const; 861 862 /// If the basic block has a pair of instructions suitable for macro-fusion, 863 /// return iterator to the first instruction of the pair. 864 /// Otherwise return end(). 865 const_iterator getMacroOpFusionPair() const; 866 867 /// Printer required for printing dominator trees. 868 void printAsOperand(raw_ostream &OS, bool PrintType = true) { 869 if (PrintType) 870 OS << "basic block "; 871 OS << getName(); 872 } 873 874 /// A simple dump function for debugging. 875 void dump() const; 876 877 /// Validate successor invariants for this BB. 878 bool validateSuccessorInvariants(); 879 880 /// Return offset of the basic block from the function start on input. getInputOffset()881 uint32_t getInputOffset() const { return InputRange.first; } 882 883 /// Return offset from the function start to location immediately past 884 /// the end of the basic block. getEndOffset()885 uint32_t getEndOffset() const { return InputRange.second; } 886 887 /// Return size of the basic block on input. getOriginalSize()888 uint32_t getOriginalSize() const { 889 return InputRange.second - InputRange.first; 890 } 891 892 /// Returns an estimate of size of basic block during run time optionally 893 /// using a user-supplied emitter for lock-free multi-thread work. 894 /// MCCodeEmitter is not thread safe and each thread should operate with its 895 /// own copy of it. 896 uint64_t estimateSize(const MCCodeEmitter *Emitter = nullptr) const; 897 898 /// Return index in the current layout. The user is responsible for 899 /// making sure the indices are up to date, 900 /// e.g. by calling BinaryFunction::updateLayoutIndices(); getLayoutIndex()901 unsigned getLayoutIndex() const { 902 assert(isValid()); 903 return LayoutIndex; 904 } 905 906 /// Set layout index. To be used by BinaryFunction. setLayoutIndex(unsigned Index)907 void setLayoutIndex(unsigned Index) const { LayoutIndex = Index; } 908 909 /// Needed by graph traits. getParent()910 BinaryFunction *getParent() const { return getFunction(); } 911 912 /// Return true if the containing function is in CFG state. 913 bool hasCFG() const; 914 915 /// Return true if the containing function is in a state with instructions. 916 bool hasInstructions() const; 917 918 /// Return offset of the basic block from the function start. getOffset()919 uint32_t getOffset() const { return InputRange.first; } 920 921 /// Get the index of this basic block. getIndex()922 unsigned getIndex() const { 923 assert(isValid()); 924 return Index; 925 } 926 927 /// Return jump table if the block contains a jump table instruction or 928 /// nullptr otherwise. 929 const JumpTable *getJumpTable() const; 930 931 /// Check if the block has a jump table instruction. hasJumpTable()932 bool hasJumpTable() const { return getJumpTable() != nullptr; } 933 934 private: 935 void adjustNumPseudos(const MCInst &Inst, int Sign); 936 adjustNumPseudos(Itr Begin,Itr End,int Sign)937 template <typename Itr> void adjustNumPseudos(Itr Begin, Itr End, int Sign) { 938 while (Begin != End) 939 adjustNumPseudos(*Begin++, Sign); 940 } 941 942 /// Adds predecessor to the BB. Most likely you don't need to call this. 943 void addPredecessor(BinaryBasicBlock *Pred); 944 945 /// Remove predecessor of the basic block. Don't use directly, instead 946 /// use removeSuccessor() function. 947 /// If \p Multiple is set to true, it will remove all predecessors that 948 /// are equal to \p Pred. Otherwise, the first instance of \p Pred found 949 /// will be removed. This only matters in awkward, redundant CFGs. 950 void removePredecessor(BinaryBasicBlock *Pred, bool Multiple = true); 951 952 /// Set end offset of this basic block. setEndOffset(uint32_t Offset)953 void setEndOffset(uint32_t Offset) { InputRange.second = Offset; } 954 955 /// Set the index of this basic block. setIndex(unsigned I)956 void setIndex(unsigned I) { Index = I; } 957 clearList(T & List)958 template <typename T> void clearList(T &List) { 959 T TempList; 960 TempList.swap(List); 961 } 962 963 /// Release memory taken by CFG edges and instructions. releaseCFG()964 void releaseCFG() { 965 clearList(Predecessors); 966 clearList(Successors); 967 clearList(Throwers); 968 clearList(LandingPads); 969 clearList(BranchInfo); 970 clearList(Instructions); 971 } 972 }; 973 974 #if defined(LLVM_ON_UNIX) 975 /// Keep the size of the BinaryBasicBlock within a reasonable size class 976 /// (jemalloc bucket) on Linux 977 static_assert(sizeof(BinaryBasicBlock) <= 256, ""); 978 #endif 979 980 bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS); 981 982 } // namespace bolt 983 984 // GraphTraits specializations for basic block graphs (CFGs) 985 template <> struct GraphTraits<bolt::BinaryBasicBlock *> { 986 using NodeRef = bolt::BinaryBasicBlock *; 987 using ChildIteratorType = bolt::BinaryBasicBlock::succ_iterator; 988 989 static NodeRef getEntryNode(bolt::BinaryBasicBlock *BB) { return BB; } 990 static inline ChildIteratorType child_begin(NodeRef N) { 991 return N->succ_begin(); 992 } 993 static inline ChildIteratorType child_end(NodeRef N) { return N->succ_end(); } 994 }; 995 996 template <> struct GraphTraits<const bolt::BinaryBasicBlock *> { 997 using NodeRef = const bolt::BinaryBasicBlock *; 998 using ChildIteratorType = bolt::BinaryBasicBlock::const_succ_iterator; 999 1000 static NodeRef getEntryNode(const bolt::BinaryBasicBlock *BB) { return BB; } 1001 static inline ChildIteratorType child_begin(NodeRef N) { 1002 return N->succ_begin(); 1003 } 1004 static inline ChildIteratorType child_end(NodeRef N) { return N->succ_end(); } 1005 }; 1006 1007 template <> struct GraphTraits<Inverse<bolt::BinaryBasicBlock *>> { 1008 using NodeRef = bolt::BinaryBasicBlock *; 1009 using ChildIteratorType = bolt::BinaryBasicBlock::pred_iterator; 1010 static NodeRef getEntryNode(Inverse<bolt::BinaryBasicBlock *> G) { 1011 return G.Graph; 1012 } 1013 static inline ChildIteratorType child_begin(NodeRef N) { 1014 return N->pred_begin(); 1015 } 1016 static inline ChildIteratorType child_end(NodeRef N) { return N->pred_end(); } 1017 }; 1018 1019 template <> struct GraphTraits<Inverse<const bolt::BinaryBasicBlock *>> { 1020 using NodeRef = const bolt::BinaryBasicBlock *; 1021 using ChildIteratorType = bolt::BinaryBasicBlock::const_pred_iterator; 1022 static NodeRef getEntryNode(Inverse<const bolt::BinaryBasicBlock *> G) { 1023 return G.Graph; 1024 } 1025 static inline ChildIteratorType child_begin(NodeRef N) { 1026 return N->pred_begin(); 1027 } 1028 static inline ChildIteratorType child_end(NodeRef N) { return N->pred_end(); } 1029 }; 1030 1031 } // namespace llvm 1032 1033 #endif 1034