1 //===- bolt/Core/BinaryBasicBlock.h - Low-level basic block -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Sequence of MC/MCPlus instructions. Call/invoke does not terminate the block.
10 // CFI instructions are part of the instruction list with the initial CFI state
11 // defined at the beginning of the block.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef BOLT_CORE_BINARY_BASIC_BLOCK_H
16 #define BOLT_CORE_BINARY_BASIC_BLOCK_H
17 
18 #include "bolt/Core/FunctionLayout.h"
19 #include "bolt/Core/MCPlus.h"
20 #include "llvm/ADT/GraphTraits.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCSymbol.h"
24 #include "llvm/Support/ErrorOr.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include <limits>
27 #include <utility>
28 
29 namespace llvm {
30 class MCCodeEmitter;
31 
32 namespace bolt {
33 
34 class BinaryFunction;
35 class JumpTable;
36 
37 class BinaryBasicBlock {
38 public:
39   /// Profile execution information for a given edge in CFG.
40   ///
41   /// If MispredictedCount equals COUNT_INFERRED, then we have a profile
42   /// data for a fall-through edge with a Count representing an inferred
43   /// execution count, i.e. the count we calculated internally, not the one
44   /// coming from profile data.
45   ///
46   /// For all other values of MispredictedCount, Count represents the number of
47   /// branch executions from a profile, and MispredictedCount is the number
48   /// of times the branch was mispredicted according to this profile.
49   struct BinaryBranchInfo {
50     uint64_t Count;
51     uint64_t MispredictedCount; /// number of branches mispredicted
52 
53     bool operator<(const BinaryBranchInfo &Other) const {
54       return (Count < Other.Count) ||
55              (Count == Other.Count &&
56               MispredictedCount < Other.MispredictedCount);
57     }
58   };
59 
60   static constexpr uint32_t INVALID_OFFSET =
61       std::numeric_limits<uint32_t>::max();
62 
63   using BranchInfoType = SmallVector<BinaryBranchInfo, 0>;
64 
65 private:
66   /// Vector of all instructions in the block.
67   InstructionListType Instructions;
68 
69   /// CFG information.
70   using EdgeListType = SmallVector<BinaryBasicBlock *, 0>;
71   EdgeListType Predecessors;
72   EdgeListType Successors;
73 
74   /// Each successor has a corresponding BranchInfo entry in the list.
75   BranchInfoType BranchInfo;
76 
77   using ExceptionListType = SmallVector<BinaryBasicBlock *, 0>;
78 
79   /// List of blocks that this landing pad is handling.
80   ExceptionListType Throwers;
81 
82   /// List of blocks that can catch exceptions thrown by code in this block.
83   ExceptionListType LandingPads;
84 
85   /// Function that owns this basic block.
86   BinaryFunction *Function;
87 
88   /// Label associated with the block.
89   MCSymbol *Label{nullptr};
90 
91   /// [Begin, End) address range for this block in the output binary.
92   std::pair<uint32_t, uint32_t> OutputAddressRange = {0, 0};
93 
94   /// Original offset range of the basic block in the function.
95   std::pair<uint32_t, uint32_t> InputRange = {INVALID_OFFSET, INVALID_OFFSET};
96 
97   /// Map input offset (from function start) of an instruction to an output
98   /// symbol. Enables writing BOLT address translation tables used for mapping
99   /// control transfer in the output binary back to the original binary.
100   using LocSymsTy = std::vector<std::pair<uint32_t, const MCSymbol *>>;
101   std::unique_ptr<LocSymsTy> LocSyms;
102 
103   /// After output/codegen, map output offsets of instructions in this basic
104   /// block to instruction offsets in the original function. Note that the
105   /// output basic block could be different from the input basic block.
106   /// We only map instruction of interest, such as calls, and sdt markers.
107   ///
108   /// We store the offset array in a basic block to facilitate BAT tables
109   /// generation. Otherwise, the mapping could be done at function level.
110   using OffsetTranslationTableTy = std::vector<std::pair<uint32_t, uint32_t>>;
111   std::unique_ptr<OffsetTranslationTableTy> OffsetTranslationTable;
112 
113   /// Alignment requirements for the block.
114   uint32_t Alignment{1};
115 
116   /// Maximum number of bytes to use for alignment of the block.
117   uint32_t AlignmentMaxBytes{0};
118 
119   /// Number of times this basic block was executed.
120   uint64_t ExecutionCount{COUNT_NO_PROFILE};
121 
122   static constexpr unsigned InvalidIndex = ~0u;
123 
124   /// Index to BasicBlocks vector in BinaryFunction.
125   unsigned Index{InvalidIndex};
126 
127   /// Index in the current layout.
128   mutable unsigned LayoutIndex{InvalidIndex};
129 
130   /// Number of pseudo instructions in this block.
131   uint32_t NumPseudos{0};
132 
133   /// CFI state at the entry to this basic block.
134   int32_t CFIState{-1};
135 
136   /// In cases where the parent function has been split, IsCold == true means
137   /// this BB will be allocated outside its parent function.
138   bool IsCold{false};
139 
140   /// Indicates if the block could be outlined.
141   bool CanOutline{true};
142 
143   /// Flag to indicate whether this block is valid or not.  Invalid
144   /// blocks may contain out of date or incorrect information.
145   bool IsValid{true};
146 
147 private:
148   BinaryBasicBlock() = delete;
149   BinaryBasicBlock(const BinaryBasicBlock &) = delete;
150   BinaryBasicBlock(const BinaryBasicBlock &&) = delete;
151   BinaryBasicBlock &operator=(const BinaryBasicBlock &) = delete;
152   BinaryBasicBlock &operator=(const BinaryBasicBlock &&) = delete;
153 
BinaryBasicBlock(BinaryFunction * Function,MCSymbol * Label)154   explicit BinaryBasicBlock(BinaryFunction *Function, MCSymbol *Label)
155       : Function(Function), Label(Label) {
156     assert(Function && "Function must be non-null");
157   }
158 
159   // Exclusively managed by BinaryFunction.
160   friend class BinaryFunction;
161   friend bool operator<(const BinaryBasicBlock &LHS,
162                         const BinaryBasicBlock &RHS);
163 
164   /// Assign new label to the basic block.
setLabel(MCSymbol * Symbol)165   void setLabel(MCSymbol *Symbol) { Label = Symbol; }
166 
167 public:
168   static constexpr uint64_t COUNT_INFERRED =
169       std::numeric_limits<uint64_t>::max();
170   static constexpr uint64_t COUNT_NO_PROFILE =
171       std::numeric_limits<uint64_t>::max();
172 
173   // Instructions iterators.
174   using iterator = InstructionListType::iterator;
175   using const_iterator = InstructionListType::const_iterator;
176   using reverse_iterator = std::reverse_iterator<iterator>;
177   using const_reverse_iterator = std::reverse_iterator<const_iterator>;
178 
empty()179   bool         empty()            const { assert(hasInstructions());
180                                           return Instructions.empty(); }
size()181   size_t       size()             const { assert(hasInstructions());
182                                           return Instructions.size(); }
front()183   MCInst       &front()                 { assert(hasInstructions());
184                                           return Instructions.front();  }
back()185   MCInst       &back()                  { assert(hasInstructions());
186                                           return Instructions.back();   }
front()187   const MCInst &front()           const { assert(hasInstructions());
188                                           return Instructions.front();  }
back()189   const MCInst &back()            const { assert(hasInstructions());
190                                           return Instructions.back();   }
191 
begin()192   iterator                begin()       { assert(hasInstructions());
193                                           return Instructions.begin();  }
begin()194   const_iterator          begin() const { assert(hasInstructions());
195                                           return Instructions.begin();  }
end()196   iterator                end  ()       { assert(hasInstructions());
197                                           return Instructions.end();    }
end()198   const_iterator          end  () const { assert(hasInstructions());
199                                           return Instructions.end();    }
rbegin()200   reverse_iterator       rbegin()       { assert(hasInstructions());
201                                           return Instructions.rbegin(); }
rbegin()202   const_reverse_iterator rbegin() const { assert(hasInstructions());
203                                           return Instructions.rbegin(); }
rend()204   reverse_iterator       rend  ()       { assert(hasInstructions());
205                                           return Instructions.rend();   }
rend()206   const_reverse_iterator rend  () const { assert(hasInstructions());
207                                           return Instructions.rend();   }
208 
209   // CFG iterators.
210   using pred_iterator = EdgeListType::iterator;
211   using const_pred_iterator = EdgeListType::const_iterator;
212   using succ_iterator = EdgeListType::iterator;
213   using const_succ_iterator = EdgeListType::const_iterator;
214   using throw_iterator = decltype(Throwers)::iterator;
215   using const_throw_iterator = decltype(Throwers)::const_iterator;
216   using lp_iterator = decltype(LandingPads)::iterator;
217   using const_lp_iterator = decltype(LandingPads)::const_iterator;
218 
219   using pred_reverse_iterator = std::reverse_iterator<pred_iterator>;
220   using const_pred_reverse_iterator =
221     std::reverse_iterator<const_pred_iterator>;
222   using succ_reverse_iterator = std::reverse_iterator<succ_iterator>;
223   using const_succ_reverse_iterator =
224     std::reverse_iterator<const_succ_iterator>;
225 
pred_begin()226   pred_iterator        pred_begin()       { return Predecessors.begin(); }
pred_begin()227   const_pred_iterator  pred_begin() const { return Predecessors.begin(); }
pred_end()228   pred_iterator        pred_end()         { return Predecessors.end();   }
pred_end()229   const_pred_iterator  pred_end()   const { return Predecessors.end();   }
pred_rbegin()230   pred_reverse_iterator        pred_rbegin()
231                                           { return Predecessors.rbegin();}
pred_rbegin()232   const_pred_reverse_iterator  pred_rbegin() const
233                                           { return Predecessors.rbegin();}
pred_rend()234   pred_reverse_iterator        pred_rend()
235                                           { return Predecessors.rend();  }
pred_rend()236   const_pred_reverse_iterator  pred_rend()   const
237                                           { return Predecessors.rend();  }
pred_size()238   size_t               pred_size()  const {
239     return Predecessors.size();
240   }
pred_empty()241   bool                 pred_empty() const { return Predecessors.empty(); }
242 
succ_begin()243   succ_iterator        succ_begin()       { return Successors.begin();   }
succ_begin()244   const_succ_iterator  succ_begin() const { return Successors.begin();   }
succ_end()245   succ_iterator        succ_end()         { return Successors.end();     }
succ_end()246   const_succ_iterator  succ_end()   const { return Successors.end();     }
succ_rbegin()247   succ_reverse_iterator        succ_rbegin()
248                                           { return Successors.rbegin();  }
succ_rbegin()249   const_succ_reverse_iterator  succ_rbegin() const
250                                           { return Successors.rbegin();  }
succ_rend()251   succ_reverse_iterator        succ_rend()
252                                           { return Successors.rend();    }
succ_rend()253   const_succ_reverse_iterator  succ_rend()   const
254                                           { return Successors.rend();    }
succ_size()255   size_t               succ_size()  const {
256     return Successors.size();
257   }
succ_empty()258   bool                 succ_empty() const { return Successors.empty();   }
259 
throw_begin()260   throw_iterator        throw_begin()       { return Throwers.begin(); }
throw_begin()261   const_throw_iterator  throw_begin() const { return Throwers.begin(); }
throw_end()262   throw_iterator        throw_end()         { return Throwers.end();   }
throw_end()263   const_throw_iterator  throw_end()   const { return Throwers.end();   }
throw_size()264   size_t                throw_size()  const {
265     return Throwers.size();
266   }
throw_empty()267   bool                  throw_empty() const { return Throwers.empty(); }
isLandingPad()268   bool                  isLandingPad() const { return !Throwers.empty(); }
269 
lp_begin()270   lp_iterator        lp_begin()       { return LandingPads.begin();   }
lp_begin()271   const_lp_iterator  lp_begin() const { return LandingPads.begin();   }
lp_end()272   lp_iterator        lp_end()         { return LandingPads.end();     }
lp_end()273   const_lp_iterator  lp_end()   const { return LandingPads.end();     }
lp_size()274   size_t             lp_size()  const {
275     return LandingPads.size();
276   }
lp_empty()277   bool               lp_empty() const { return LandingPads.empty();   }
278 
instructions()279   inline iterator_range<iterator> instructions() {
280     assert(hasInstructions());
281     return iterator_range<iterator>(begin(), end());
282   }
instructions()283   inline iterator_range<const_iterator> instructions() const {
284     assert(hasInstructions());
285     return iterator_range<const_iterator>(begin(), end());
286   }
predecessors()287   inline iterator_range<pred_iterator> predecessors() {
288     assert(hasCFG());
289     return iterator_range<pred_iterator>(pred_begin(), pred_end());
290   }
predecessors()291   inline iterator_range<const_pred_iterator> predecessors() const {
292     assert(hasCFG());
293     return iterator_range<const_pred_iterator>(pred_begin(), pred_end());
294   }
successors()295   inline iterator_range<succ_iterator> successors() {
296     assert(hasCFG());
297     return iterator_range<succ_iterator>(succ_begin(), succ_end());
298   }
successors()299   inline iterator_range<const_succ_iterator> successors() const {
300     assert(hasCFG());
301     return iterator_range<const_succ_iterator>(succ_begin(), succ_end());
302   }
throwers()303   inline iterator_range<throw_iterator> throwers() {
304     assert(hasCFG());
305     return iterator_range<throw_iterator>(throw_begin(), throw_end());
306   }
throwers()307   inline iterator_range<const_throw_iterator> throwers() const {
308     assert(hasCFG());
309     return iterator_range<const_throw_iterator>(throw_begin(), throw_end());
310   }
landing_pads()311   inline iterator_range<lp_iterator> landing_pads() {
312     assert(hasCFG());
313     return iterator_range<lp_iterator>(lp_begin(), lp_end());
314   }
landing_pads()315   inline iterator_range<const_lp_iterator> landing_pads() const {
316     assert(hasCFG());
317     return iterator_range<const_lp_iterator>(lp_begin(), lp_end());
318   }
319 
320   // BranchInfo iterators.
321   using branch_info_iterator = BranchInfoType::iterator;
322   using const_branch_info_iterator = BranchInfoType::const_iterator;
323   using branch_info_reverse_iterator =
324       std::reverse_iterator<branch_info_iterator>;
325   using const_branch_info_reverse_iterator =
326       std::reverse_iterator<const_branch_info_iterator>;
327 
branch_info_begin()328   branch_info_iterator branch_info_begin() { return BranchInfo.begin(); }
branch_info_end()329   branch_info_iterator branch_info_end() { return BranchInfo.end(); }
branch_info_begin()330   const_branch_info_iterator branch_info_begin() const {
331     return BranchInfo.begin();
332   }
branch_info_end()333   const_branch_info_iterator branch_info_end() const {
334     return BranchInfo.end();
335   }
branch_info_rbegin()336   branch_info_reverse_iterator branch_info_rbegin() {
337     return BranchInfo.rbegin();
338   }
branch_info_rend()339   branch_info_reverse_iterator branch_info_rend() { return BranchInfo.rend(); }
branch_info_rbegin()340   const_branch_info_reverse_iterator branch_info_rbegin() const {
341     return BranchInfo.rbegin();
342   }
branch_info_rend()343   const_branch_info_reverse_iterator branch_info_rend() const {
344     return BranchInfo.rend();
345   }
346 
branch_info_size()347   size_t branch_info_size() const { return BranchInfo.size(); }
branch_info_empty()348   bool branch_info_empty() const { return BranchInfo.empty(); }
349 
branch_info()350   inline iterator_range<branch_info_iterator> branch_info() {
351     return iterator_range<branch_info_iterator>(BranchInfo.begin(),
352                                                 BranchInfo.end());
353   }
branch_info()354   inline iterator_range<const_branch_info_iterator> branch_info() const {
355     return iterator_range<const_branch_info_iterator>(BranchInfo.begin(),
356                                                       BranchInfo.end());
357   }
358 
359   /// Get instruction at given index.
getInstructionAtIndex(unsigned Index)360   MCInst &getInstructionAtIndex(unsigned Index) { return Instructions[Index]; }
361 
getInstructionAtIndex(unsigned Index)362   const MCInst &getInstructionAtIndex(unsigned Index) const {
363     return Instructions[Index];
364   }
365 
366   /// Return symbol marking the start of this basic block.
getLabel()367   MCSymbol *getLabel() { return Label; }
368 
369   /// Return symbol marking the start of this basic block (const version).
getLabel()370   const MCSymbol *getLabel() const { return Label; }
371 
372   /// Get successor with given \p Label if \p Label != nullptr.
373   /// Returns nullptr if no such successor is found.
374   /// If the \p Label == nullptr and the block has only one successor then
375   /// return the successor.
376   BinaryBasicBlock *getSuccessor(const MCSymbol *Label = nullptr) const;
377 
378   /// Return the related branch info as well as the successor.
379   BinaryBasicBlock *getSuccessor(const MCSymbol *Label,
380                                  BinaryBranchInfo &BI) const;
381 
382   /// If the basic block ends with a conditional branch (possibly followed by
383   /// an unconditional branch) and thus has 2 successors, return a successor
384   /// corresponding to a jump condition which could be true or false.
385   /// Return nullptr if the basic block does not have a conditional jump.
getConditionalSuccessor(bool Condition)386   BinaryBasicBlock *getConditionalSuccessor(bool Condition) {
387     if (succ_size() != 2)
388       return nullptr;
389     return Successors[Condition == true ? 0 : 1];
390   }
391 
getConditionalSuccessor(bool Condition)392   const BinaryBasicBlock *getConditionalSuccessor(bool Condition) const {
393     return const_cast<BinaryBasicBlock *>(this)->getConditionalSuccessor(
394         Condition);
395   }
396 
397   /// Find the fallthrough successor for a block, or nullptr if there is
398   /// none.
getFallthrough()399   BinaryBasicBlock *getFallthrough() {
400     if (succ_size() == 2)
401       return getConditionalSuccessor(false);
402     else
403       return getSuccessor();
404   }
405 
getFallthrough()406   const BinaryBasicBlock *getFallthrough() const {
407     return const_cast<BinaryBasicBlock *>(this)->getFallthrough();
408   }
409 
410   /// Return branch info corresponding to a taken branch.
getTakenBranchInfo()411   const BinaryBranchInfo &getTakenBranchInfo() const {
412     assert(BranchInfo.size() == 2 &&
413            "could only be called for blocks with 2 successors");
414     return BranchInfo[0];
415   };
416 
417   /// Return branch info corresponding to a fall-through branch.
getFallthroughBranchInfo()418   const BinaryBranchInfo &getFallthroughBranchInfo() const {
419     assert(BranchInfo.size() == 2 &&
420            "could only be called for blocks with 2 successors");
421     return BranchInfo[1];
422   };
423 
424   /// Return branch info corresponding to an edge going to \p Succ basic block.
425   BinaryBranchInfo &getBranchInfo(const BinaryBasicBlock &Succ);
426 
427   /// Return branch info corresponding to an edge going to a basic block with
428   /// label \p Label.
429   BinaryBranchInfo &getBranchInfo(const MCSymbol *Label);
430 
431   /// Set branch information for the outgoing edge to block \p Succ.
setSuccessorBranchInfo(const BinaryBasicBlock & Succ,uint64_t Count,uint64_t MispredictedCount)432   void setSuccessorBranchInfo(const BinaryBasicBlock &Succ, uint64_t Count,
433                               uint64_t MispredictedCount) {
434     BinaryBranchInfo &BI = getBranchInfo(Succ);
435     BI.Count = Count;
436     BI.MispredictedCount = MispredictedCount;
437   }
438 
439   /// Try to compute the taken and misprediction frequencies for the given
440   /// successor.  The result is an error if no information can be found.
441   ErrorOr<std::pair<double, double>>
442   getBranchStats(const BinaryBasicBlock *Succ) const;
443 
444   /// If the basic block ends with a conditional branch (possibly followed by
445   /// an unconditional branch) and thus has 2 successor, reverse the order of
446   /// its successors in CFG, update branch info, and return true. If the basic
447   /// block does not have 2 successors return false.
448   bool swapConditionalSuccessors();
449 
450   /// Add an instruction with unconditional control transfer to \p Successor
451   /// basic block to the end of this basic block.
452   void addBranchInstruction(const BinaryBasicBlock *Successor);
453 
454   /// Add an instruction with tail call control transfer to \p Target
455   /// to the end of this basic block.
456   void addTailCallInstruction(const MCSymbol *Target);
457 
458   /// Return the number of call instructions in this basic block.
459   uint32_t getNumCalls() const;
460 
461   /// Get landing pad with given label. Returns nullptr if no such
462   /// landing pad is found.
463   BinaryBasicBlock *getLandingPad(const MCSymbol *Label) const;
464 
465   /// Return local name for the block.
getName()466   StringRef getName() const { return Label->getName(); }
467 
468   /// Add instruction at the end of this basic block.
469   /// Returns iterator pointing to the inserted instruction.
addInstruction(MCInst && Inst)470   iterator addInstruction(MCInst &&Inst) {
471     adjustNumPseudos(Inst, 1);
472     Instructions.emplace_back(Inst);
473     return std::prev(Instructions.end());
474   }
475 
476   /// Add instruction at the end of this basic block.
477   /// Returns iterator pointing to the inserted instruction.
addInstruction(const MCInst & Inst)478   iterator addInstruction(const MCInst &Inst) {
479     adjustNumPseudos(Inst, 1);
480     Instructions.push_back(Inst);
481     return std::prev(Instructions.end());
482   }
483 
484   /// Add a range of instructions to the end of this basic block.
addInstructions(Itr Begin,Itr End)485   template <typename Itr> void addInstructions(Itr Begin, Itr End) {
486     while (Begin != End)
487       addInstruction(*Begin++);
488   }
489 
490   /// Add a range of instructions to the end of this basic block.
addInstructions(RangeTy R)491   template <typename RangeTy> void addInstructions(RangeTy R) {
492     for (auto &I : R)
493       addInstruction(I);
494   }
495 
496   /// Add instruction before Pos in this basic block.
insertPseudoInstr(Itr Pos,MCInst & Instr)497   template <typename Itr> Itr insertPseudoInstr(Itr Pos, MCInst &Instr) {
498     ++NumPseudos;
499     return Instructions.insert(Pos, Instr);
500   }
501 
502   /// Return the number of pseudo instructions in the basic block.
503   uint32_t getNumPseudos() const;
504 
505   /// Return the number of emitted instructions for this basic block.
getNumNonPseudos()506   uint32_t getNumNonPseudos() const { return size() - getNumPseudos(); }
507 
508   /// Return iterator to the first non-pseudo instruction or end()
509   /// if no such instruction was found.
510   iterator getFirstNonPseudo();
511 
512   /// Return a pointer to the first non-pseudo instruction in this basic
513   /// block.  Returns nullptr if none exists.
getFirstNonPseudoInstr()514   MCInst *getFirstNonPseudoInstr() {
515     auto II = getFirstNonPseudo();
516     return II == Instructions.end() ? nullptr : &*II;
517   }
518 
519   /// Return reverse iterator to the last non-pseudo instruction or rend()
520   /// if no such instruction was found.
521   reverse_iterator getLastNonPseudo();
getLastNonPseudo()522   const_reverse_iterator getLastNonPseudo() const {
523     return const_cast<BinaryBasicBlock *>(this)->getLastNonPseudo();
524   }
525 
526   /// Return a pointer to the last non-pseudo instruction in this basic
527   /// block.  Returns nullptr if none exists.
getLastNonPseudoInstr()528   MCInst *getLastNonPseudoInstr() {
529     auto RII = getLastNonPseudo();
530     return RII == Instructions.rend() ? nullptr : &*RII;
531   }
getLastNonPseudoInstr()532   const MCInst *getLastNonPseudoInstr() const {
533     auto RII = getLastNonPseudo();
534     return RII == Instructions.rend() ? nullptr : &*RII;
535   }
536 
537   /// Set CFI state at entry to this basic block.
setCFIState(int32_t NewCFIState)538   void setCFIState(int32_t NewCFIState) {
539     assert((CFIState == -1 || NewCFIState == CFIState) &&
540            "unexpected change of CFI state for basic block");
541     CFIState = NewCFIState;
542   }
543 
544   /// Return CFI state (expected) at entry of this basic block.
getCFIState()545   int32_t getCFIState() const {
546     assert(CFIState >= 0 && "unknown CFI state");
547     return CFIState;
548   }
549 
550   /// Calculate and return CFI state right before instruction \p Instr in
551   /// this basic block. If \p Instr is nullptr then return the state at
552   /// the end of the basic block.
553   int32_t getCFIStateAtInstr(const MCInst *Instr) const;
554 
555   /// Calculate and return CFI state after execution of this basic block.
556   /// The state depends on CFI state at entry and CFI instructions inside the
557   /// basic block.
getCFIStateAtExit()558   int32_t getCFIStateAtExit() const { return getCFIStateAtInstr(nullptr); }
559 
560   /// Set minimum alignment for the basic block.
setAlignment(uint32_t Align)561   void setAlignment(uint32_t Align) { Alignment = Align; }
562 
563   /// Set alignment of the block based on the alignment of its offset.
setDerivedAlignment()564   void setDerivedAlignment() {
565     const uint64_t DerivedAlignment = getOffset() & (1 + ~getOffset());
566     Alignment = std::min(DerivedAlignment, uint64_t(32));
567   }
568 
569   /// Return required alignment for the block.
getAlignment()570   uint32_t getAlignment() const { return Alignment; }
571 
572   /// Set the maximum number of bytes to use for the block alignment.
setAlignmentMaxBytes(uint32_t Value)573   void setAlignmentMaxBytes(uint32_t Value) { AlignmentMaxBytes = Value; }
574 
575   /// Return the maximum number of bytes to use for the block alignment.
getAlignmentMaxBytes()576   uint32_t getAlignmentMaxBytes() const { return AlignmentMaxBytes; }
577 
578   /// Adds block to successor list, and also updates predecessor list for
579   /// successor block.
580   /// Set branch info for this path.
581   void addSuccessor(BinaryBasicBlock *Succ, uint64_t Count = 0,
582                     uint64_t MispredictedCount = 0);
583 
addSuccessor(BinaryBasicBlock * Succ,const BinaryBranchInfo & BI)584   void addSuccessor(BinaryBasicBlock *Succ, const BinaryBranchInfo &BI) {
585     addSuccessor(Succ, BI.Count, BI.MispredictedCount);
586   }
587 
588   /// Add a range of successors.
addSuccessors(Itr Begin,Itr End)589   template <typename Itr> void addSuccessors(Itr Begin, Itr End) {
590     while (Begin != End)
591       addSuccessor(*Begin++);
592   }
593 
594   /// Add a range of successors with branch info.
595   template <typename Itr, typename BrItr>
addSuccessors(Itr Begin,Itr End,BrItr BrBegin,BrItr BrEnd)596   void addSuccessors(Itr Begin, Itr End, BrItr BrBegin, BrItr BrEnd) {
597     assert(std::distance(Begin, End) == std::distance(BrBegin, BrEnd));
598     while (Begin != End)
599       addSuccessor(*Begin++, *BrBegin++);
600   }
601 
602   /// Replace Succ with NewSucc.  This routine is helpful for preserving
603   /// the order of conditional successors when editing the CFG.
604   void replaceSuccessor(BinaryBasicBlock *Succ, BinaryBasicBlock *NewSucc,
605                         uint64_t Count = 0, uint64_t MispredictedCount = 0);
606 
607   /// Move all of this block's successors to a new block, and set the
608   /// execution count of this new block with our execution count. This is
609   /// useful when splitting a block in two.
moveAllSuccessorsTo(BinaryBasicBlock * New)610   void moveAllSuccessorsTo(BinaryBasicBlock *New) {
611     New->addSuccessors(successors().begin(), successors().end(),
612                        branch_info_begin(), branch_info_end());
613     removeAllSuccessors();
614 
615     // Update the execution count on the new block.
616     New->setExecutionCount(getExecutionCount());
617   }
618 
619   /// Remove /p Succ basic block from the list of successors. Update the
620   /// list of predecessors of /p Succ and update branch info.
621   void removeSuccessor(BinaryBasicBlock *Succ);
622 
623   /// Remove all successors of the basic block, and remove the block
624   /// from respective lists of predecessors.
625   void removeAllSuccessors();
626 
627   /// Remove useless duplicate successors.  When the conditional
628   /// successor is the same as the unconditional successor, we can
629   /// remove the conditional successor and branch instruction.
630   void removeDuplicateConditionalSuccessor(MCInst *CondBranch);
631 
632   /// Update successors of the basic block based on the jump table instruction.
633   /// The block must end with a jump table instruction.
634   void updateJumpTableSuccessors();
635 
636   /// Test if BB is a predecessor of this block.
isPredecessor(const BinaryBasicBlock * BB)637   bool isPredecessor(const BinaryBasicBlock *BB) const {
638     return llvm::is_contained(Predecessors, BB);
639   }
640 
641   /// Test if BB is a successor of this block.
isSuccessor(const BinaryBasicBlock * BB)642   bool isSuccessor(const BinaryBasicBlock *BB) const {
643     return llvm::is_contained(Successors, BB);
644   }
645 
646   /// Test if this BB has a valid execution count.
hasProfile()647   bool hasProfile() const { return ExecutionCount != COUNT_NO_PROFILE; }
648 
649   /// Return the information about the number of times this basic block was
650   /// executed.
651   ///
652   /// Return COUNT_NO_PROFILE if there's no profile info.
getExecutionCount()653   uint64_t getExecutionCount() const { return ExecutionCount; }
654 
655   /// Return the execution count for blocks with known profile.
656   /// Return 0 if the block has no profile.
getKnownExecutionCount()657   uint64_t getKnownExecutionCount() const {
658     return !hasProfile() ? 0 : ExecutionCount;
659   }
660 
661   /// Set the execution count for this block.
setExecutionCount(uint64_t Count)662   void setExecutionCount(uint64_t Count) { ExecutionCount = Count; }
663 
664   /// Apply a given \p Ratio to the profile information of this basic block.
665   void adjustExecutionCount(double Ratio);
666 
667   /// Return true if the basic block is an entry point into the function
668   /// (either primary or secondary).
669   bool isEntryPoint() const;
670 
isValid()671   bool isValid() const { return IsValid; }
672 
markValid(const bool Valid)673   void markValid(const bool Valid) { IsValid = Valid; }
674 
getFragmentNum()675   FragmentNum getFragmentNum() const {
676     return IsCold ? FragmentNum::cold() : FragmentNum::hot();
677   }
678 
isCold()679   bool isCold() const { return IsCold; }
680 
setIsCold(const bool Flag)681   void setIsCold(const bool Flag) { IsCold = Flag; }
682 
683   /// Return true if the block can be outlined. At the moment we disallow
684   /// outlining of blocks that can potentially throw exceptions or are
685   /// the beginning of a landing pad. The entry basic block also can
686   /// never be outlined.
canOutline()687   bool canOutline() const { return CanOutline; }
688 
setCanOutline(const bool Flag)689   void setCanOutline(const bool Flag) { CanOutline = Flag; }
690 
691   /// Erase pseudo instruction at a given iterator.
692   /// Return iterator following the removed instruction.
erasePseudoInstruction(iterator II)693   iterator erasePseudoInstruction(iterator II) {
694     --NumPseudos;
695     return Instructions.erase(II);
696   }
697 
698   /// Erase non-pseudo instruction at a given iterator \p II.
699   /// Return iterator following the removed instruction.
eraseInstruction(iterator II)700   iterator eraseInstruction(iterator II) {
701     adjustNumPseudos(*II, -1);
702     return Instructions.erase(II);
703   }
704 
705   /// Erase non-pseudo instruction at a given \p Index
eraseInstructionAtIndex(unsigned Index)706   void eraseInstructionAtIndex(unsigned Index) {
707     eraseInstruction(Instructions.begin() + Index);
708   }
709 
710   /// Erase instructions in the specified range.
711   template <typename ItrType>
eraseInstructions(ItrType Begin,ItrType End)712   void eraseInstructions(ItrType Begin, ItrType End) {
713     while (End > Begin)
714       eraseInstruction(findInstruction(*--End));
715   }
716 
717   /// Erase all instructions.
clear()718   void clear() {
719     Instructions.clear();
720     NumPseudos = 0;
721   }
722 
723   /// Retrieve iterator for \p Inst or return end iterator if instruction is not
724   /// from this basic block.
decltype(Instructions)725   decltype(Instructions)::iterator findInstruction(const MCInst *Inst) {
726     if (Instructions.empty())
727       return Instructions.end();
728     size_t Index = Inst - &Instructions[0];
729     return Index >= Instructions.size() ? Instructions.end()
730                                         : Instructions.begin() + Index;
731   }
732 
733   /// Replace instruction referenced by iterator \II with a sequence of
734   /// instructions defined by [\p Begin, \p End] range.
735   ///
736   /// Return iterator pointing to the first inserted instruction.
737   template <typename Itr>
replaceInstruction(iterator II,Itr Begin,Itr End)738   iterator replaceInstruction(iterator II, Itr Begin, Itr End) {
739     adjustNumPseudos(*II, -1);
740     adjustNumPseudos(Begin, End, 1);
741 
742     auto I = II - Instructions.begin();
743     Instructions.insert(Instructions.erase(II), Begin, End);
744     return I + Instructions.begin();
745   }
746 
replaceInstruction(iterator II,const InstructionListType & Replacement)747   iterator replaceInstruction(iterator II,
748                               const InstructionListType &Replacement) {
749     return replaceInstruction(II, Replacement.begin(), Replacement.end());
750   }
751 
752   /// Insert \p NewInst before \p At, which must be an existing instruction in
753   /// this BB. Return iterator pointing to the newly inserted instruction.
insertInstruction(iterator At,MCInst && NewInst)754   iterator insertInstruction(iterator At, MCInst &&NewInst) {
755     adjustNumPseudos(NewInst, 1);
756     return Instructions.emplace(At, std::move(NewInst));
757   }
758 
insertInstruction(iterator At,MCInst & NewInst)759   iterator insertInstruction(iterator At, MCInst &NewInst) {
760     adjustNumPseudos(NewInst, 1);
761     return Instructions.insert(At, NewInst);
762   }
763 
764   /// Helper to retrieve any terminators in \p BB before \p Pos. This is used
765   /// to skip CFI instructions and to retrieve the first terminator instruction
766   /// in basic blocks with two terminators (conditional jump and unconditional
767   /// jump).
768   MCInst *getTerminatorBefore(MCInst *Pos);
769 
770   /// Used to identify whether an instruction is before a terminator and whether
771   /// moving it to the end of the BB would render it dead code.
772   bool hasTerminatorAfter(MCInst *Pos);
773 
774   /// Split apart the instructions in this basic block starting at Inst.
775   /// The instructions following Inst are removed and returned in a vector.
splitInstructions(const MCInst * Inst)776   InstructionListType splitInstructions(const MCInst *Inst) {
777     InstructionListType SplitInst;
778 
779     assert(!Instructions.empty());
780     while (&Instructions.back() != Inst) {
781       SplitInst.push_back(Instructions.back());
782       Instructions.pop_back();
783     }
784     std::reverse(SplitInst.begin(), SplitInst.end());
785     NumPseudos = 0;
786     adjustNumPseudos(Instructions.begin(), Instructions.end(), 1);
787     return SplitInst;
788   }
789 
790   /// Split basic block at the instruction pointed to by II.
791   /// All iterators pointing after II get invalidated.
792   ///
793   /// Return the new basic block that starts with the instruction
794   /// at the split point.
795   BinaryBasicBlock *splitAt(iterator II);
796 
797   /// Set start offset of this basic block in the input binary.
setOffset(uint32_t Offset)798   void setOffset(uint32_t Offset) { InputRange.first = Offset; };
799 
800   /// Sets address of the basic block in the output.
setOutputStartAddress(uint64_t Address)801   void setOutputStartAddress(uint64_t Address) {
802     OutputAddressRange.first = Address;
803   }
804 
805   /// Sets address past the end of the basic block in the output.
setOutputEndAddress(uint64_t Address)806   void setOutputEndAddress(uint64_t Address) {
807     OutputAddressRange.second = Address;
808   }
809 
810   /// Gets the memory address range of this BB in the input binary.
getInputAddressRange()811   std::pair<uint64_t, uint64_t> getInputAddressRange() const {
812     return InputRange;
813   }
814 
815   /// Gets the memory address range of this BB in the output binary.
getOutputAddressRange()816   std::pair<uint64_t, uint64_t> getOutputAddressRange() const {
817     return OutputAddressRange;
818   }
819 
820   /// Update addresses of special instructions inside this basic block.
821   void updateOutputValues(const MCAsmLayout &Layout);
822 
823   /// Return mapping of input offsets to symbols in the output.
getLocSyms()824   LocSymsTy &getLocSyms() {
825     return LocSyms ? *LocSyms : *(LocSyms = std::make_unique<LocSymsTy>());
826   }
827 
828   /// Return mapping of input offsets to symbols in the output.
getLocSyms()829   const LocSymsTy &getLocSyms() const {
830     return const_cast<BinaryBasicBlock *>(this)->getLocSyms();
831   }
832 
833   /// Return offset translation table for the basic block.
getOffsetTranslationTable()834   OffsetTranslationTableTy &getOffsetTranslationTable() {
835     return OffsetTranslationTable
836                ? *OffsetTranslationTable
837                : *(OffsetTranslationTable =
838                        std::make_unique<OffsetTranslationTableTy>());
839   }
840 
841   /// Return offset translation table for the basic block.
getOffsetTranslationTable()842   const OffsetTranslationTableTy &getOffsetTranslationTable() const {
843     return const_cast<BinaryBasicBlock *>(this)->getOffsetTranslationTable();
844   }
845 
846   /// Return size of the basic block in the output binary.
getOutputSize()847   uint64_t getOutputSize() const {
848     return OutputAddressRange.second - OutputAddressRange.first;
849   }
850 
getFunction()851   BinaryFunction *getFunction() const { return Function; }
852 
853   /// Analyze and interpret the terminators of this basic block. TBB must be
854   /// initialized with the original fall-through for this BB.
855   bool analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB,
856                      MCInst *&CondBranch, MCInst *&UncondBranch);
857 
858   /// Return true if iterator \p I is pointing to the first instruction in
859   /// a pair that could be macro-fused.
860   bool isMacroOpFusionPair(const_iterator I) const;
861 
862   /// If the basic block has a pair of instructions suitable for macro-fusion,
863   /// return iterator to the first instruction of the pair.
864   /// Otherwise return end().
865   const_iterator getMacroOpFusionPair() const;
866 
867   /// Printer required for printing dominator trees.
868   void printAsOperand(raw_ostream &OS, bool PrintType = true) {
869     if (PrintType)
870       OS << "basic block ";
871     OS << getName();
872   }
873 
874   /// A simple dump function for debugging.
875   void dump() const;
876 
877   /// Validate successor invariants for this BB.
878   bool validateSuccessorInvariants();
879 
880   /// Return offset of the basic block from the function start on input.
getInputOffset()881   uint32_t getInputOffset() const { return InputRange.first; }
882 
883   /// Return offset from the function start to location immediately past
884   /// the end of the basic block.
getEndOffset()885   uint32_t getEndOffset() const { return InputRange.second; }
886 
887   /// Return size of the basic block on input.
getOriginalSize()888   uint32_t getOriginalSize() const {
889     return InputRange.second - InputRange.first;
890   }
891 
892   /// Returns an estimate of size of basic block during run time optionally
893   /// using a user-supplied emitter for lock-free multi-thread work.
894   /// MCCodeEmitter is not thread safe and each thread should operate with its
895   /// own copy of it.
896   uint64_t estimateSize(const MCCodeEmitter *Emitter = nullptr) const;
897 
898   /// Return index in the current layout. The user is responsible for
899   /// making sure the indices are up to date,
900   /// e.g. by calling BinaryFunction::updateLayoutIndices();
getLayoutIndex()901   unsigned getLayoutIndex() const {
902     assert(isValid());
903     return LayoutIndex;
904   }
905 
906   /// Set layout index. To be used by BinaryFunction.
setLayoutIndex(unsigned Index)907   void setLayoutIndex(unsigned Index) const { LayoutIndex = Index; }
908 
909   /// Needed by graph traits.
getParent()910   BinaryFunction *getParent() const { return getFunction(); }
911 
912   /// Return true if the containing function is in CFG state.
913   bool hasCFG() const;
914 
915   /// Return true if the containing function is in a state with instructions.
916   bool hasInstructions() const;
917 
918   /// Return offset of the basic block from the function start.
getOffset()919   uint32_t getOffset() const { return InputRange.first; }
920 
921   /// Get the index of this basic block.
getIndex()922   unsigned getIndex() const {
923     assert(isValid());
924     return Index;
925   }
926 
927   /// Return jump table if the block contains a jump table instruction or
928   /// nullptr otherwise.
929   const JumpTable *getJumpTable() const;
930 
931   /// Check if the block has a jump table instruction.
hasJumpTable()932   bool hasJumpTable() const { return getJumpTable() != nullptr; }
933 
934 private:
935   void adjustNumPseudos(const MCInst &Inst, int Sign);
936 
adjustNumPseudos(Itr Begin,Itr End,int Sign)937   template <typename Itr> void adjustNumPseudos(Itr Begin, Itr End, int Sign) {
938     while (Begin != End)
939       adjustNumPseudos(*Begin++, Sign);
940   }
941 
942   /// Adds predecessor to the BB. Most likely you don't need to call this.
943   void addPredecessor(BinaryBasicBlock *Pred);
944 
945   /// Remove predecessor of the basic block. Don't use directly, instead
946   /// use removeSuccessor() function.
947   /// If \p Multiple is set to true, it will remove all predecessors that
948   /// are equal to \p Pred. Otherwise, the first instance of \p Pred found
949   /// will be removed. This only matters in awkward, redundant CFGs.
950   void removePredecessor(BinaryBasicBlock *Pred, bool Multiple = true);
951 
952   /// Set end offset of this basic block.
setEndOffset(uint32_t Offset)953   void setEndOffset(uint32_t Offset) { InputRange.second = Offset; }
954 
955   /// Set the index of this basic block.
setIndex(unsigned I)956   void setIndex(unsigned I) { Index = I; }
957 
clearList(T & List)958   template <typename T> void clearList(T &List) {
959     T TempList;
960     TempList.swap(List);
961   }
962 
963   /// Release memory taken by CFG edges and instructions.
releaseCFG()964   void releaseCFG() {
965     clearList(Predecessors);
966     clearList(Successors);
967     clearList(Throwers);
968     clearList(LandingPads);
969     clearList(BranchInfo);
970     clearList(Instructions);
971   }
972 };
973 
974 #if defined(LLVM_ON_UNIX)
975 /// Keep the size of the BinaryBasicBlock within a reasonable size class
976 /// (jemalloc bucket) on Linux
977 static_assert(sizeof(BinaryBasicBlock) <= 256, "");
978 #endif
979 
980 bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS);
981 
982 } // namespace bolt
983 
984 // GraphTraits specializations for basic block graphs (CFGs)
985 template <> struct GraphTraits<bolt::BinaryBasicBlock *> {
986   using NodeRef = bolt::BinaryBasicBlock *;
987   using ChildIteratorType = bolt::BinaryBasicBlock::succ_iterator;
988 
989   static NodeRef getEntryNode(bolt::BinaryBasicBlock *BB) { return BB; }
990   static inline ChildIteratorType child_begin(NodeRef N) {
991     return N->succ_begin();
992   }
993   static inline ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
994 };
995 
996 template <> struct GraphTraits<const bolt::BinaryBasicBlock *> {
997   using NodeRef = const bolt::BinaryBasicBlock *;
998   using ChildIteratorType = bolt::BinaryBasicBlock::const_succ_iterator;
999 
1000   static NodeRef getEntryNode(const bolt::BinaryBasicBlock *BB) { return BB; }
1001   static inline ChildIteratorType child_begin(NodeRef N) {
1002     return N->succ_begin();
1003   }
1004   static inline ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
1005 };
1006 
1007 template <> struct GraphTraits<Inverse<bolt::BinaryBasicBlock *>> {
1008   using NodeRef = bolt::BinaryBasicBlock *;
1009   using ChildIteratorType = bolt::BinaryBasicBlock::pred_iterator;
1010   static NodeRef getEntryNode(Inverse<bolt::BinaryBasicBlock *> G) {
1011     return G.Graph;
1012   }
1013   static inline ChildIteratorType child_begin(NodeRef N) {
1014     return N->pred_begin();
1015   }
1016   static inline ChildIteratorType child_end(NodeRef N) { return N->pred_end(); }
1017 };
1018 
1019 template <> struct GraphTraits<Inverse<const bolt::BinaryBasicBlock *>> {
1020   using NodeRef = const bolt::BinaryBasicBlock *;
1021   using ChildIteratorType = bolt::BinaryBasicBlock::const_pred_iterator;
1022   static NodeRef getEntryNode(Inverse<const bolt::BinaryBasicBlock *> G) {
1023     return G.Graph;
1024   }
1025   static inline ChildIteratorType child_begin(NodeRef N) {
1026     return N->pred_begin();
1027   }
1028   static inline ChildIteratorType child_end(NodeRef N) { return N->pred_end(); }
1029 };
1030 
1031 } // namespace llvm
1032 
1033 #endif
1034