1 //===- bolt/Passes/CMOVConversion.cpp ------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the CMOV conversion pass. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/CMOVConversion.h" 14 #include "bolt/Core/BinaryBasicBlock.h" 15 #include "bolt/Core/BinaryContext.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "llvm/ADT/PostOrderIterator.h" 18 #include "llvm/Support/CommandLine.h" 19 #include "llvm/Support/ErrorHandling.h" 20 #include <numeric> 21 22 #define DEBUG_TYPE "cmov" 23 24 using namespace llvm; 25 26 namespace opts { 27 28 extern cl::OptionCategory BoltOptCategory; 29 30 static cl::opt<int> BiasThreshold( 31 "cmov-conversion-bias-threshold", 32 cl::desc("minimum condition bias (pct) to perform a CMOV conversion, " 33 "-1 to not account bias"), 34 cl::ReallyHidden, cl::init(1), cl::cat(BoltOptCategory)); 35 36 static cl::opt<int> MispredictionThreshold( 37 "cmov-conversion-misprediction-threshold", 38 cl::desc("minimum misprediction rate (pct) to perform a CMOV conversion, " 39 "-1 to not account misprediction rate"), 40 cl::ReallyHidden, cl::init(5), cl::cat(BoltOptCategory)); 41 42 static cl::opt<bool> ConvertStackMemOperand( 43 "cmov-conversion-convert-stack-mem-operand", 44 cl::desc("convert moves with stack memory operand (potentially unsafe)"), 45 cl::ReallyHidden, cl::init(false), cl::cat(BoltOptCategory)); 46 47 static cl::opt<bool> ConvertBasePtrStackMemOperand( 48 "cmov-conversion-convert-rbp-stack-mem-operand", 49 cl::desc("convert moves with rbp stack memory operand (unsafe, must be off " 50 "for binaries compiled with -fomit-frame-pointer)"), 51 cl::ReallyHidden, cl::init(false), cl::cat(BoltOptCategory)); 52 53 } // namespace opts 54 55 namespace llvm { 56 namespace bolt { 57 58 // Return true if the CFG conforms to the following subgraph: 59 // Predecessor 60 // / \ 61 // | RHS 62 // \ / 63 // LHS 64 // Caller guarantees that LHS and RHS share the same predecessor. 65 bool isIfThenSubgraph(const BinaryBasicBlock &LHS, 66 const BinaryBasicBlock &RHS) { 67 if (LHS.pred_size() != 2 || RHS.pred_size() != 1) 68 return false; 69 70 // Sanity check 71 BinaryBasicBlock *Predecessor = *RHS.pred_begin(); 72 assert(Predecessor && LHS.isPredecessor(Predecessor) && "invalid subgraph"); 73 (void)Predecessor; 74 75 if (!LHS.isPredecessor(&RHS)) 76 return false; 77 if (RHS.succ_size() != 1) 78 return false; 79 return true; 80 } 81 82 bool matchCFGSubgraph(BinaryBasicBlock &BB, BinaryBasicBlock *&ConditionalSucc, 83 BinaryBasicBlock *&UnconditionalSucc, 84 bool &IsConditionalTaken) { 85 BinaryBasicBlock *TakenSucc = BB.getConditionalSuccessor(true); 86 BinaryBasicBlock *FallthroughSucc = BB.getConditionalSuccessor(false); 87 bool IsIfThenTaken = isIfThenSubgraph(*FallthroughSucc, *TakenSucc); 88 bool IsIfThenFallthrough = isIfThenSubgraph(*TakenSucc, *FallthroughSucc); 89 if (!IsIfThenFallthrough && !IsIfThenTaken) 90 return false; 91 assert((!IsIfThenFallthrough || !IsIfThenTaken) && "Invalid subgraph"); 92 93 // Output parameters 94 ConditionalSucc = IsIfThenTaken ? TakenSucc : FallthroughSucc; 95 UnconditionalSucc = IsIfThenTaken ? FallthroughSucc : TakenSucc; 96 IsConditionalTaken = IsIfThenTaken; 97 return true; 98 } 99 100 // Return true if basic block instructions can be converted into cmov(s). 101 bool canConvertInstructions(const BinaryContext &BC, const BinaryBasicBlock &BB, 102 unsigned CC) { 103 if (BB.empty()) 104 return false; 105 const MCInst *LastInst = BB.getLastNonPseudoInstr(); 106 // Only pseudo instructions, can't be converted into CMOV 107 if (LastInst == nullptr) 108 return false; 109 for (const MCInst &Inst : BB) { 110 if (BC.MIB->isPseudo(Inst)) 111 continue; 112 // Unconditional branch as a last instruction is OK 113 if (&Inst == LastInst && BC.MIB->isUnconditionalBranch(Inst)) 114 continue; 115 MCInst Cmov(Inst); 116 // GPR move is OK 117 if (!BC.MIB->convertMoveToConditionalMove( 118 Cmov, CC, opts::ConvertStackMemOperand, 119 opts::ConvertBasePtrStackMemOperand)) { 120 LLVM_DEBUG({ 121 dbgs() << BB.getName() << ": can't convert instruction "; 122 BC.printInstruction(dbgs(), Cmov); 123 }); 124 return false; 125 } 126 } 127 return true; 128 } 129 130 void convertMoves(const BinaryContext &BC, BinaryBasicBlock &BB, unsigned CC) { 131 for (auto II = BB.begin(), IE = BB.end(); II != IE; ++II) { 132 if (BC.MIB->isPseudo(*II)) 133 continue; 134 if (BC.MIB->isUnconditionalBranch(*II)) { 135 // XXX: this invalidates II but we return immediately 136 BB.eraseInstruction(II); 137 return; 138 } 139 bool Result = BC.MIB->convertMoveToConditionalMove( 140 *II, CC, opts::ConvertStackMemOperand, 141 opts::ConvertBasePtrStackMemOperand); 142 assert(Result && "unexpected instruction"); 143 (void)Result; 144 } 145 } 146 147 // Returns misprediction rate if the profile data is available, -1 otherwise. 148 std::pair<int, uint64_t> 149 calculateMispredictionRate(const BinaryBasicBlock &BB) { 150 uint64_t TotalExecCount = 0; 151 uint64_t TotalMispredictionCount = 0; 152 for (auto BI : BB.branch_info()) { 153 TotalExecCount += BI.Count; 154 if (BI.MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) 155 TotalMispredictionCount += BI.MispredictedCount; 156 } 157 if (!TotalExecCount) 158 return {-1, TotalMispredictionCount}; 159 return {100.0f * TotalMispredictionCount / TotalExecCount, 160 TotalMispredictionCount}; 161 } 162 163 // Returns conditional succ bias if the profile is available, -1 otherwise. 164 int calculateConditionBias(const BinaryBasicBlock &BB, 165 const BinaryBasicBlock &ConditionalSucc) { 166 if (auto BranchStats = BB.getBranchStats(&ConditionalSucc)) 167 return BranchStats->first; 168 return -1; 169 } 170 171 void CMOVConversion::Stats::dump() { 172 outs() << "converted static " << StaticPerformed << "/" << StaticPossible 173 << formatv(" ({0:P}) ", getStaticRatio()) 174 << "hammock(s) into CMOV sequences, with dynamic execution count " 175 << DynamicPerformed << "/" << DynamicPossible 176 << formatv(" ({0:P}), ", getDynamicRatio()) << "saving " << RemovedMP 177 << "/" << PossibleMP << formatv(" ({0:P}) ", getMPRatio()) 178 << "mispredictions\n"; 179 } 180 181 void CMOVConversion::runOnFunction(BinaryFunction &Function) { 182 BinaryContext &BC = Function.getBinaryContext(); 183 bool Modified = false; 184 // Function-local stats 185 Stats Local; 186 // Traverse blocks in RPO, merging block with a converted cmov with its 187 // successor. 188 for (BinaryBasicBlock *BB : post_order(&Function)) { 189 uint64_t BBExecCount = BB->getKnownExecutionCount(); 190 if (BB->empty() || // The block must have instructions 191 BBExecCount == 0 || // must be hot 192 BB->succ_size() != 2 || // with two successors 193 BB->hasJumpTable()) // no jump table 194 continue; 195 196 assert(BB->isValid() && "traversal internal error"); 197 198 // Check branch instruction 199 auto BranchInstrIter = BB->getLastNonPseudo(); 200 if (BranchInstrIter == BB->rend() || 201 !BC.MIB->isConditionalBranch(*BranchInstrIter)) 202 continue; 203 204 // Check successors 205 BinaryBasicBlock *ConditionalSucc, *UnconditionalSucc; 206 bool IsConditionalTaken; 207 if (!matchCFGSubgraph(*BB, ConditionalSucc, UnconditionalSucc, 208 IsConditionalTaken)) { 209 LLVM_DEBUG(dbgs() << BB->getName() << ": couldn't match hammock\n"); 210 continue; 211 } 212 213 unsigned CC = BC.MIB->getCondCode(*BranchInstrIter); 214 if (!IsConditionalTaken) 215 CC = BC.MIB->getInvertedCondCode(CC); 216 // Check contents of the conditional block 217 if (!canConvertInstructions(BC, *ConditionalSucc, CC)) 218 continue; 219 220 int ConditionBias = calculateConditionBias(*BB, *ConditionalSucc); 221 int MispredictionRate = 0; 222 uint64_t MispredictionCount = 0; 223 std::tie(MispredictionRate, MispredictionCount) = 224 calculateMispredictionRate(*BB); 225 226 Local.StaticPossible++; 227 Local.DynamicPossible += BBExecCount; 228 Local.PossibleMP += MispredictionCount; 229 230 // If the conditional successor is never executed, don't convert it 231 if (ConditionBias < opts::BiasThreshold) { 232 LLVM_DEBUG(dbgs() << BB->getName() << "->" << ConditionalSucc->getName() 233 << " bias = " << ConditionBias 234 << ", less than threshold " << opts::BiasThreshold 235 << '\n'); 236 continue; 237 } 238 239 // Check the misprediction rate of a branch 240 if (MispredictionRate < opts::MispredictionThreshold) { 241 LLVM_DEBUG(dbgs() << BB->getName() << " misprediction rate = " 242 << MispredictionRate << ", less than threshold " 243 << opts::MispredictionThreshold << '\n'); 244 continue; 245 } 246 247 // remove conditional branch 248 BB->eraseInstruction(std::prev(BranchInstrIter.base())); 249 BB->removeAllSuccessors(); 250 // Convert instructions from the conditional successor into cmov's in BB. 251 convertMoves(BC, *ConditionalSucc, CC); 252 BB->addInstructions(ConditionalSucc->begin(), ConditionalSucc->end()); 253 ConditionalSucc->markValid(false); 254 255 // RPO traversal guarantees that the successor is visited and merged if 256 // necessary. Merge the unconditional successor into the current block. 257 BB->addInstructions(UnconditionalSucc->begin(), UnconditionalSucc->end()); 258 UnconditionalSucc->moveAllSuccessorsTo(BB); 259 UnconditionalSucc->markValid(false); 260 Local.StaticPerformed++; 261 Local.DynamicPerformed += BBExecCount; 262 Local.RemovedMP += MispredictionCount; 263 Modified = true; 264 } 265 if (Modified) 266 Function.eraseInvalidBBs(); 267 if (opts::Verbosity > 1) { 268 outs() << "BOLT-INFO: CMOVConversion: " << Function << ", "; 269 Local.dump(); 270 } 271 Global = Global + Local; 272 } 273 274 void CMOVConversion::runOnFunctions(BinaryContext &BC) { 275 for (auto &It : BC.getBinaryFunctions()) { 276 BinaryFunction &Function = It.second; 277 if (!shouldOptimize(Function)) 278 continue; 279 runOnFunction(Function); 280 } 281 282 outs() << "BOLT-INFO: CMOVConversion total: "; 283 Global.dump(); 284 } 285 286 } // end namespace bolt 287 } // end namespace llvm 288