1ca7c307dSSotiris Apostolakis //===--- SelectOptimize.cpp - Convert select to branches if profitable ---===// 2ca7c307dSSotiris Apostolakis // 3ca7c307dSSotiris Apostolakis // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4ca7c307dSSotiris Apostolakis // See https://llvm.org/LICENSE.txt for license information. 5ca7c307dSSotiris Apostolakis // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6ca7c307dSSotiris Apostolakis // 7ca7c307dSSotiris Apostolakis //===----------------------------------------------------------------------===// 8ca7c307dSSotiris Apostolakis // 9ca7c307dSSotiris Apostolakis // This pass converts selects to conditional jumps when profitable. 10ca7c307dSSotiris Apostolakis // 11ca7c307dSSotiris Apostolakis //===----------------------------------------------------------------------===// 12ca7c307dSSotiris Apostolakis 13*d7ebb746SSotiris Apostolakis #include "llvm/ADT/Optional.h" 1497c3ef5cSSotiris Apostolakis #include "llvm/ADT/SmallVector.h" 1597c3ef5cSSotiris Apostolakis #include "llvm/ADT/Statistic.h" 1697c3ef5cSSotiris Apostolakis #include "llvm/Analysis/BlockFrequencyInfo.h" 1797c3ef5cSSotiris Apostolakis #include "llvm/Analysis/BranchProbabilityInfo.h" 1897c3ef5cSSotiris Apostolakis #include "llvm/Analysis/LoopInfo.h" 198b42bc56SSotiris Apostolakis #include "llvm/Analysis/OptimizationRemarkEmitter.h" 208b42bc56SSotiris Apostolakis #include "llvm/Analysis/ProfileSummaryInfo.h" 218b42bc56SSotiris Apostolakis #include "llvm/Analysis/TargetTransformInfo.h" 22ca7c307dSSotiris Apostolakis #include "llvm/CodeGen/Passes.h" 2397c3ef5cSSotiris Apostolakis #include "llvm/CodeGen/TargetLowering.h" 2497c3ef5cSSotiris Apostolakis #include "llvm/CodeGen/TargetPassConfig.h" 2597c3ef5cSSotiris Apostolakis #include "llvm/CodeGen/TargetSchedule.h" 2697c3ef5cSSotiris Apostolakis #include "llvm/CodeGen/TargetSubtargetInfo.h" 2797c3ef5cSSotiris Apostolakis #include "llvm/IR/BasicBlock.h" 288b42bc56SSotiris Apostolakis #include "llvm/IR/Dominators.h" 29ca7c307dSSotiris Apostolakis #include "llvm/IR/Function.h" 3097c3ef5cSSotiris Apostolakis #include "llvm/IR/IRBuilder.h" 3197c3ef5cSSotiris Apostolakis #include "llvm/IR/Instruction.h" 32ca7c307dSSotiris Apostolakis #include "llvm/InitializePasses.h" 33ca7c307dSSotiris Apostolakis #include "llvm/Pass.h" 34*d7ebb746SSotiris Apostolakis #include "llvm/Support/ScaledNumber.h" 3597c3ef5cSSotiris Apostolakis #include "llvm/Target/TargetMachine.h" 368b42bc56SSotiris Apostolakis #include "llvm/Transforms/Utils/SizeOpts.h" 378b42bc56SSotiris Apostolakis #include <algorithm> 388b42bc56SSotiris Apostolakis #include <memory> 398b42bc56SSotiris Apostolakis #include <queue> 408b42bc56SSotiris Apostolakis #include <stack> 418b42bc56SSotiris Apostolakis #include <string> 42ca7c307dSSotiris Apostolakis 43ca7c307dSSotiris Apostolakis using namespace llvm; 44ca7c307dSSotiris Apostolakis 4597c3ef5cSSotiris Apostolakis #define DEBUG_TYPE "select-optimize" 4697c3ef5cSSotiris Apostolakis 478b42bc56SSotiris Apostolakis STATISTIC(NumSelectOptAnalyzed, 488b42bc56SSotiris Apostolakis "Number of select groups considered for conversion to branch"); 498b42bc56SSotiris Apostolakis STATISTIC(NumSelectConvertedExpColdOperand, 508b42bc56SSotiris Apostolakis "Number of select groups converted due to expensive cold operand"); 518b42bc56SSotiris Apostolakis STATISTIC(NumSelectConvertedHighPred, 528b42bc56SSotiris Apostolakis "Number of select groups converted due to high-predictability"); 538b42bc56SSotiris Apostolakis STATISTIC(NumSelectUnPred, 548b42bc56SSotiris Apostolakis "Number of select groups not converted due to unpredictability"); 558b42bc56SSotiris Apostolakis STATISTIC(NumSelectColdBB, 568b42bc56SSotiris Apostolakis "Number of select groups not converted due to cold basic block"); 57*d7ebb746SSotiris Apostolakis STATISTIC(NumSelectConvertedLoop, 58*d7ebb746SSotiris Apostolakis "Number of select groups converted due to loop-level analysis"); 5997c3ef5cSSotiris Apostolakis STATISTIC(NumSelectsConverted, "Number of selects converted"); 6097c3ef5cSSotiris Apostolakis 618b42bc56SSotiris Apostolakis static cl::opt<unsigned> ColdOperandThreshold( 628b42bc56SSotiris Apostolakis "cold-operand-threshold", 638b42bc56SSotiris Apostolakis cl::desc("Maximum frequency of path for an operand to be considered cold."), 648b42bc56SSotiris Apostolakis cl::init(20), cl::Hidden); 658b42bc56SSotiris Apostolakis 668b42bc56SSotiris Apostolakis static cl::opt<unsigned> ColdOperandMaxCostMultiplier( 678b42bc56SSotiris Apostolakis "cold-operand-max-cost-multiplier", 688b42bc56SSotiris Apostolakis cl::desc("Maximum cost multiplier of TCC_expensive for the dependence " 698b42bc56SSotiris Apostolakis "slice of a cold operand to be considered inexpensive."), 708b42bc56SSotiris Apostolakis cl::init(1), cl::Hidden); 718b42bc56SSotiris Apostolakis 72*d7ebb746SSotiris Apostolakis static cl::opt<unsigned> 73*d7ebb746SSotiris Apostolakis GainGradientThreshold("select-opti-loop-gradient-gain-threshold", 74*d7ebb746SSotiris Apostolakis cl::desc("Gradient gain threshold (%)."), 75*d7ebb746SSotiris Apostolakis cl::init(25), cl::Hidden); 76*d7ebb746SSotiris Apostolakis 77*d7ebb746SSotiris Apostolakis static cl::opt<unsigned> 78*d7ebb746SSotiris Apostolakis GainCycleThreshold("select-opti-loop-cycle-gain-threshold", 79*d7ebb746SSotiris Apostolakis cl::desc("Minimum gain per loop (in cycles) threshold."), 80*d7ebb746SSotiris Apostolakis cl::init(4), cl::Hidden); 81*d7ebb746SSotiris Apostolakis 82*d7ebb746SSotiris Apostolakis static cl::opt<unsigned> GainRelativeThreshold( 83*d7ebb746SSotiris Apostolakis "select-opti-loop-relative-gain-threshold", 84*d7ebb746SSotiris Apostolakis cl::desc( 85*d7ebb746SSotiris Apostolakis "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"), 86*d7ebb746SSotiris Apostolakis cl::init(8), cl::Hidden); 87*d7ebb746SSotiris Apostolakis 88*d7ebb746SSotiris Apostolakis static cl::opt<unsigned> MispredictDefaultRate( 89*d7ebb746SSotiris Apostolakis "mispredict-default-rate", cl::Hidden, cl::init(25), 90*d7ebb746SSotiris Apostolakis cl::desc("Default mispredict rate (initialized to 25%).")); 91*d7ebb746SSotiris Apostolakis 92*d7ebb746SSotiris Apostolakis static cl::opt<bool> 93*d7ebb746SSotiris Apostolakis DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden, 94*d7ebb746SSotiris Apostolakis cl::init(false), 95*d7ebb746SSotiris Apostolakis cl::desc("Disable loop-level heuristics.")); 96*d7ebb746SSotiris Apostolakis 97ca7c307dSSotiris Apostolakis namespace { 98ca7c307dSSotiris Apostolakis 99ca7c307dSSotiris Apostolakis class SelectOptimize : public FunctionPass { 10097c3ef5cSSotiris Apostolakis const TargetMachine *TM = nullptr; 10197c3ef5cSSotiris Apostolakis const TargetSubtargetInfo *TSI; 10297c3ef5cSSotiris Apostolakis const TargetLowering *TLI = nullptr; 1038b42bc56SSotiris Apostolakis const TargetTransformInfo *TTI = nullptr; 10497c3ef5cSSotiris Apostolakis const LoopInfo *LI; 1058b42bc56SSotiris Apostolakis DominatorTree *DT; 10697c3ef5cSSotiris Apostolakis std::unique_ptr<BlockFrequencyInfo> BFI; 10797c3ef5cSSotiris Apostolakis std::unique_ptr<BranchProbabilityInfo> BPI; 1088b42bc56SSotiris Apostolakis ProfileSummaryInfo *PSI; 1098b42bc56SSotiris Apostolakis OptimizationRemarkEmitter *ORE; 110*d7ebb746SSotiris Apostolakis TargetSchedModel TSchedModel; 11197c3ef5cSSotiris Apostolakis 112ca7c307dSSotiris Apostolakis public: 113ca7c307dSSotiris Apostolakis static char ID; 1148b42bc56SSotiris Apostolakis 115ca7c307dSSotiris Apostolakis SelectOptimize() : FunctionPass(ID) { 116ca7c307dSSotiris Apostolakis initializeSelectOptimizePass(*PassRegistry::getPassRegistry()); 117ca7c307dSSotiris Apostolakis } 118ca7c307dSSotiris Apostolakis 119ca7c307dSSotiris Apostolakis bool runOnFunction(Function &F) override; 120ca7c307dSSotiris Apostolakis 12197c3ef5cSSotiris Apostolakis void getAnalysisUsage(AnalysisUsage &AU) const override { 1228b42bc56SSotiris Apostolakis AU.addRequired<ProfileSummaryInfoWrapperPass>(); 12397c3ef5cSSotiris Apostolakis AU.addRequired<TargetPassConfig>(); 1248b42bc56SSotiris Apostolakis AU.addRequired<TargetTransformInfoWrapperPass>(); 1258b42bc56SSotiris Apostolakis AU.addRequired<DominatorTreeWrapperPass>(); 12697c3ef5cSSotiris Apostolakis AU.addRequired<LoopInfoWrapperPass>(); 1278b42bc56SSotiris Apostolakis AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 12897c3ef5cSSotiris Apostolakis } 12997c3ef5cSSotiris Apostolakis 13097c3ef5cSSotiris Apostolakis private: 13197c3ef5cSSotiris Apostolakis // Select groups consist of consecutive select instructions with the same 13297c3ef5cSSotiris Apostolakis // condition. 13397c3ef5cSSotiris Apostolakis using SelectGroup = SmallVector<SelectInst *, 2>; 13497c3ef5cSSotiris Apostolakis using SelectGroups = SmallVector<SelectGroup, 2>; 13597c3ef5cSSotiris Apostolakis 136*d7ebb746SSotiris Apostolakis using Scaled64 = ScaledNumber<uint64_t>; 137*d7ebb746SSotiris Apostolakis 138*d7ebb746SSotiris Apostolakis struct CostInfo { 139*d7ebb746SSotiris Apostolakis /// Predicated cost (with selects as conditional moves). 140*d7ebb746SSotiris Apostolakis Scaled64 PredCost; 141*d7ebb746SSotiris Apostolakis /// Non-predicated cost (with selects converted to branches). 142*d7ebb746SSotiris Apostolakis Scaled64 NonPredCost; 143*d7ebb746SSotiris Apostolakis }; 144*d7ebb746SSotiris Apostolakis 1458b42bc56SSotiris Apostolakis // Converts select instructions of a function to conditional jumps when deemed 1468b42bc56SSotiris Apostolakis // profitable. Returns true if at least one select was converted. 14797c3ef5cSSotiris Apostolakis bool optimizeSelects(Function &F); 1488b42bc56SSotiris Apostolakis 1498b42bc56SSotiris Apostolakis // Heuristics for determining which select instructions can be profitably 1508b42bc56SSotiris Apostolakis // conveted to branches. Separate heuristics for selects in inner-most loops 1518b42bc56SSotiris Apostolakis // and the rest of code regions (base heuristics for non-inner-most loop 1528b42bc56SSotiris Apostolakis // regions). 1538b42bc56SSotiris Apostolakis void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups); 1548b42bc56SSotiris Apostolakis void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups); 1558b42bc56SSotiris Apostolakis 1568b42bc56SSotiris Apostolakis // Converts to branches the select groups that were deemed 1578b42bc56SSotiris Apostolakis // profitable-to-convert. 15897c3ef5cSSotiris Apostolakis void convertProfitableSIGroups(SelectGroups &ProfSIGroups); 1598b42bc56SSotiris Apostolakis 1608b42bc56SSotiris Apostolakis // Splits selects of a given basic block into select groups. 16197c3ef5cSSotiris Apostolakis void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups); 1628b42bc56SSotiris Apostolakis 1638b42bc56SSotiris Apostolakis // Determines for which select groups it is profitable converting to branches 164*d7ebb746SSotiris Apostolakis // (base and inner-most-loop heuristics). 1658b42bc56SSotiris Apostolakis void findProfitableSIGroupsBase(SelectGroups &SIGroups, 1668b42bc56SSotiris Apostolakis SelectGroups &ProfSIGroups); 167*d7ebb746SSotiris Apostolakis void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups, 168*d7ebb746SSotiris Apostolakis SelectGroups &ProfSIGroups); 169*d7ebb746SSotiris Apostolakis 1708b42bc56SSotiris Apostolakis // Determines if a select group should be converted to a branch (base 1718b42bc56SSotiris Apostolakis // heuristics). 1728b42bc56SSotiris Apostolakis bool isConvertToBranchProfitableBase(const SmallVector<SelectInst *, 2> &ASI); 1738b42bc56SSotiris Apostolakis 1748b42bc56SSotiris Apostolakis // Returns true if there are expensive instructions in the cold value 1758b42bc56SSotiris Apostolakis // operand's (if any) dependence slice of any of the selects of the given 1768b42bc56SSotiris Apostolakis // group. 1778b42bc56SSotiris Apostolakis bool hasExpensiveColdOperand(const SmallVector<SelectInst *, 2> &ASI); 1788b42bc56SSotiris Apostolakis 1798b42bc56SSotiris Apostolakis // For a given source instruction, collect its backwards dependence slice 1808b42bc56SSotiris Apostolakis // consisting of instructions exclusively computed for producing the operands 1818b42bc56SSotiris Apostolakis // of the source instruction. 1828b42bc56SSotiris Apostolakis void getExclBackwardsSlice(Instruction *I, 1838b42bc56SSotiris Apostolakis SmallVector<Instruction *, 2> &Slice); 1848b42bc56SSotiris Apostolakis 1858b42bc56SSotiris Apostolakis // Returns true if the condition of the select is highly predictable. 1868b42bc56SSotiris Apostolakis bool isSelectHighlyPredictable(const SelectInst *SI); 1878b42bc56SSotiris Apostolakis 188*d7ebb746SSotiris Apostolakis // Loop-level checks to determine if a non-predicated version (with branches) 189*d7ebb746SSotiris Apostolakis // of the given loop is more profitable than its predicated version. 190*d7ebb746SSotiris Apostolakis bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]); 191*d7ebb746SSotiris Apostolakis 192*d7ebb746SSotiris Apostolakis // Computes instruction and loop-critical-path costs for both the predicated 193*d7ebb746SSotiris Apostolakis // and non-predicated version of the given loop. 194*d7ebb746SSotiris Apostolakis bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups, 195*d7ebb746SSotiris Apostolakis DenseMap<const Instruction *, CostInfo> &InstCostMap, 196*d7ebb746SSotiris Apostolakis CostInfo *LoopCost); 197*d7ebb746SSotiris Apostolakis 198*d7ebb746SSotiris Apostolakis // Returns a set of all the select instructions in the given select groups. 199*d7ebb746SSotiris Apostolakis SmallPtrSet<const Instruction *, 2> getSIset(const SelectGroups &SIGroups); 200*d7ebb746SSotiris Apostolakis 201*d7ebb746SSotiris Apostolakis // Returns the latency cost of a given instruction. 202*d7ebb746SSotiris Apostolakis Optional<uint64_t> computeInstCost(const Instruction *I); 203*d7ebb746SSotiris Apostolakis 204*d7ebb746SSotiris Apostolakis // Returns the misprediction cost of a given select when converted to branch. 205*d7ebb746SSotiris Apostolakis Scaled64 getMispredictionCost(const SelectInst *SI, const Scaled64 CondCost); 206*d7ebb746SSotiris Apostolakis 207*d7ebb746SSotiris Apostolakis // Returns the cost of a branch when the prediction is correct. 208*d7ebb746SSotiris Apostolakis Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost, 209*d7ebb746SSotiris Apostolakis const SelectInst *SI); 210*d7ebb746SSotiris Apostolakis 2118b42bc56SSotiris Apostolakis // Returns true if the target architecture supports lowering a given select. 21297c3ef5cSSotiris Apostolakis bool isSelectKindSupported(SelectInst *SI); 213ca7c307dSSotiris Apostolakis }; 214ca7c307dSSotiris Apostolakis } // namespace 215ca7c307dSSotiris Apostolakis 216ca7c307dSSotiris Apostolakis char SelectOptimize::ID = 0; 21797c3ef5cSSotiris Apostolakis 21897c3ef5cSSotiris Apostolakis INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, 21997c3ef5cSSotiris Apostolakis false) 22097c3ef5cSSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 2218b42bc56SSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 2228b42bc56SSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 22397c3ef5cSSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 2248b42bc56SSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 2258b42bc56SSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) 22697c3ef5cSSotiris Apostolakis INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, 227ca7c307dSSotiris Apostolakis false) 228ca7c307dSSotiris Apostolakis 229ca7c307dSSotiris Apostolakis FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); } 230ca7c307dSSotiris Apostolakis 231ca7c307dSSotiris Apostolakis bool SelectOptimize::runOnFunction(Function &F) { 23297c3ef5cSSotiris Apostolakis TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); 23397c3ef5cSSotiris Apostolakis TSI = TM->getSubtargetImpl(F); 23497c3ef5cSSotiris Apostolakis TLI = TSI->getTargetLowering(); 2358b42bc56SSotiris Apostolakis 2368b42bc56SSotiris Apostolakis // If none of the select types is supported then skip this pass. 2378b42bc56SSotiris Apostolakis // This is an optimization pass. Legality issues will be handled by 2388b42bc56SSotiris Apostolakis // instruction selection. 2398b42bc56SSotiris Apostolakis if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) && 2408b42bc56SSotiris Apostolakis !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) && 2418b42bc56SSotiris Apostolakis !TLI->isSelectSupported(TargetLowering::VectorMaskSelect)) 2428b42bc56SSotiris Apostolakis return false; 2438b42bc56SSotiris Apostolakis 2448b42bc56SSotiris Apostolakis TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 2458b42bc56SSotiris Apostolakis DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 24697c3ef5cSSotiris Apostolakis LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 24797c3ef5cSSotiris Apostolakis BPI.reset(new BranchProbabilityInfo(F, *LI)); 24897c3ef5cSSotiris Apostolakis BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); 2498b42bc56SSotiris Apostolakis PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 2508b42bc56SSotiris Apostolakis ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 251*d7ebb746SSotiris Apostolakis TSchedModel.init(TSI); 2528b42bc56SSotiris Apostolakis 2538b42bc56SSotiris Apostolakis // When optimizing for size, selects are preferable over branches. 2548b42bc56SSotiris Apostolakis if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get())) 2558b42bc56SSotiris Apostolakis return false; 25697c3ef5cSSotiris Apostolakis 25797c3ef5cSSotiris Apostolakis return optimizeSelects(F); 25897c3ef5cSSotiris Apostolakis } 25997c3ef5cSSotiris Apostolakis 26097c3ef5cSSotiris Apostolakis bool SelectOptimize::optimizeSelects(Function &F) { 26197c3ef5cSSotiris Apostolakis // Determine for which select groups it is profitable converting to branches. 26297c3ef5cSSotiris Apostolakis SelectGroups ProfSIGroups; 2638b42bc56SSotiris Apostolakis // Base heuristics apply only to non-loops and outer loops. 2648b42bc56SSotiris Apostolakis optimizeSelectsBase(F, ProfSIGroups); 2658b42bc56SSotiris Apostolakis // Separate heuristics for inner-most loops. 2668b42bc56SSotiris Apostolakis optimizeSelectsInnerLoops(F, ProfSIGroups); 26797c3ef5cSSotiris Apostolakis 26897c3ef5cSSotiris Apostolakis // Convert to branches the select groups that were deemed 26997c3ef5cSSotiris Apostolakis // profitable-to-convert. 27097c3ef5cSSotiris Apostolakis convertProfitableSIGroups(ProfSIGroups); 27197c3ef5cSSotiris Apostolakis 27297c3ef5cSSotiris Apostolakis // Code modified if at least one select group was converted. 27397c3ef5cSSotiris Apostolakis return !ProfSIGroups.empty(); 27497c3ef5cSSotiris Apostolakis } 27597c3ef5cSSotiris Apostolakis 2768b42bc56SSotiris Apostolakis void SelectOptimize::optimizeSelectsBase(Function &F, 2778b42bc56SSotiris Apostolakis SelectGroups &ProfSIGroups) { 2788b42bc56SSotiris Apostolakis // Collect all the select groups. 2798b42bc56SSotiris Apostolakis SelectGroups SIGroups; 2808b42bc56SSotiris Apostolakis for (BasicBlock &BB : F) { 2818b42bc56SSotiris Apostolakis // Base heuristics apply only to non-loops and outer loops. 2828b42bc56SSotiris Apostolakis Loop *L = LI->getLoopFor(&BB); 2838b42bc56SSotiris Apostolakis if (L && L->isInnermost()) 2848b42bc56SSotiris Apostolakis continue; 2858b42bc56SSotiris Apostolakis collectSelectGroups(BB, SIGroups); 2868b42bc56SSotiris Apostolakis } 2878b42bc56SSotiris Apostolakis 2888b42bc56SSotiris Apostolakis // Determine for which select groups it is profitable converting to branches. 2898b42bc56SSotiris Apostolakis findProfitableSIGroupsBase(SIGroups, ProfSIGroups); 2908b42bc56SSotiris Apostolakis } 2918b42bc56SSotiris Apostolakis 2928b42bc56SSotiris Apostolakis void SelectOptimize::optimizeSelectsInnerLoops(Function &F, 293*d7ebb746SSotiris Apostolakis SelectGroups &ProfSIGroups) { 294*d7ebb746SSotiris Apostolakis SmallVector<Loop *, 4> Loops(LI->begin(), LI->end()); 295*d7ebb746SSotiris Apostolakis // Need to check size on each iteration as we accumulate child loops. 296*d7ebb746SSotiris Apostolakis for (unsigned long i = 0; i < Loops.size(); ++i) 297*d7ebb746SSotiris Apostolakis for (Loop *ChildL : Loops[i]->getSubLoops()) 298*d7ebb746SSotiris Apostolakis Loops.push_back(ChildL); 299*d7ebb746SSotiris Apostolakis 300*d7ebb746SSotiris Apostolakis for (Loop *L : Loops) { 301*d7ebb746SSotiris Apostolakis if (!L->isInnermost()) 302*d7ebb746SSotiris Apostolakis continue; 303*d7ebb746SSotiris Apostolakis 304*d7ebb746SSotiris Apostolakis SelectGroups SIGroups; 305*d7ebb746SSotiris Apostolakis for (BasicBlock *BB : L->getBlocks()) 306*d7ebb746SSotiris Apostolakis collectSelectGroups(*BB, SIGroups); 307*d7ebb746SSotiris Apostolakis 308*d7ebb746SSotiris Apostolakis findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups); 309*d7ebb746SSotiris Apostolakis } 310*d7ebb746SSotiris Apostolakis } 3118b42bc56SSotiris Apostolakis 31297c3ef5cSSotiris Apostolakis /// If \p isTrue is true, return the true value of \p SI, otherwise return 31397c3ef5cSSotiris Apostolakis /// false value of \p SI. If the true/false value of \p SI is defined by any 31497c3ef5cSSotiris Apostolakis /// select instructions in \p Selects, look through the defining select 31597c3ef5cSSotiris Apostolakis /// instruction until the true/false value is not defined in \p Selects. 31697c3ef5cSSotiris Apostolakis static Value * 31797c3ef5cSSotiris Apostolakis getTrueOrFalseValue(SelectInst *SI, bool isTrue, 31897c3ef5cSSotiris Apostolakis const SmallPtrSet<const Instruction *, 2> &Selects) { 31997c3ef5cSSotiris Apostolakis Value *V = nullptr; 32097c3ef5cSSotiris Apostolakis for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI); 32197c3ef5cSSotiris Apostolakis DefSI = dyn_cast<SelectInst>(V)) { 32297c3ef5cSSotiris Apostolakis assert(DefSI->getCondition() == SI->getCondition() && 32397c3ef5cSSotiris Apostolakis "The condition of DefSI does not match with SI"); 32497c3ef5cSSotiris Apostolakis V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue()); 32597c3ef5cSSotiris Apostolakis } 32697c3ef5cSSotiris Apostolakis assert(V && "Failed to get select true/false value"); 32797c3ef5cSSotiris Apostolakis return V; 32897c3ef5cSSotiris Apostolakis } 32997c3ef5cSSotiris Apostolakis 33097c3ef5cSSotiris Apostolakis void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { 33197c3ef5cSSotiris Apostolakis for (SelectGroup &ASI : ProfSIGroups) { 33297c3ef5cSSotiris Apostolakis // TODO: eliminate the redundancy of logic transforming selects to branches 33397c3ef5cSSotiris Apostolakis // by removing CodeGenPrepare::optimizeSelectInst and optimizing here 33497c3ef5cSSotiris Apostolakis // selects for all cases (with and without profile information). 33597c3ef5cSSotiris Apostolakis 33697c3ef5cSSotiris Apostolakis // Transform a sequence like this: 33797c3ef5cSSotiris Apostolakis // start: 33897c3ef5cSSotiris Apostolakis // %cmp = cmp uge i32 %a, %b 33997c3ef5cSSotiris Apostolakis // %sel = select i1 %cmp, i32 %c, i32 %d 34097c3ef5cSSotiris Apostolakis // 34197c3ef5cSSotiris Apostolakis // Into: 34297c3ef5cSSotiris Apostolakis // start: 34397c3ef5cSSotiris Apostolakis // %cmp = cmp uge i32 %a, %b 34497c3ef5cSSotiris Apostolakis // %cmp.frozen = freeze %cmp 34597c3ef5cSSotiris Apostolakis // br i1 %cmp.frozen, label %select.end, label %select.false 34697c3ef5cSSotiris Apostolakis // select.false: 34797c3ef5cSSotiris Apostolakis // br label %select.end 34897c3ef5cSSotiris Apostolakis // select.end: 34997c3ef5cSSotiris Apostolakis // %sel = phi i32 [ %c, %start ], [ %d, %select.false ] 35097c3ef5cSSotiris Apostolakis // 35197c3ef5cSSotiris Apostolakis // %cmp should be frozen, otherwise it may introduce undefined behavior. 35297c3ef5cSSotiris Apostolakis 35397c3ef5cSSotiris Apostolakis // We split the block containing the select(s) into two blocks. 35497c3ef5cSSotiris Apostolakis SelectInst *SI = ASI.front(); 35597c3ef5cSSotiris Apostolakis SelectInst *LastSI = ASI.back(); 35697c3ef5cSSotiris Apostolakis BasicBlock *StartBlock = SI->getParent(); 35797c3ef5cSSotiris Apostolakis BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI)); 35897c3ef5cSSotiris Apostolakis BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); 35997c3ef5cSSotiris Apostolakis BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency()); 36097c3ef5cSSotiris Apostolakis // Delete the unconditional branch that was just created by the split. 36197c3ef5cSSotiris Apostolakis StartBlock->getTerminator()->eraseFromParent(); 36297c3ef5cSSotiris Apostolakis 36397c3ef5cSSotiris Apostolakis // Move any debug/pseudo instructions that were in-between the select 36497c3ef5cSSotiris Apostolakis // group to the newly-created end block. 36597c3ef5cSSotiris Apostolakis SmallVector<Instruction *, 2> DebugPseudoINS; 36697c3ef5cSSotiris Apostolakis auto DIt = SI->getIterator(); 36797c3ef5cSSotiris Apostolakis while (&*DIt != LastSI) { 36897c3ef5cSSotiris Apostolakis if (DIt->isDebugOrPseudoInst()) 36997c3ef5cSSotiris Apostolakis DebugPseudoINS.push_back(&*DIt); 37097c3ef5cSSotiris Apostolakis DIt++; 37197c3ef5cSSotiris Apostolakis } 37297c3ef5cSSotiris Apostolakis for (auto DI : DebugPseudoINS) { 37397c3ef5cSSotiris Apostolakis DI->moveBefore(&*EndBlock->getFirstInsertionPt()); 37497c3ef5cSSotiris Apostolakis } 37597c3ef5cSSotiris Apostolakis 37697c3ef5cSSotiris Apostolakis // These are the new basic blocks for the conditional branch. 37797c3ef5cSSotiris Apostolakis // For now, no instruction sinking to the true/false blocks. 37897c3ef5cSSotiris Apostolakis // Thus both True and False blocks will be empty. 37997c3ef5cSSotiris Apostolakis BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr; 38097c3ef5cSSotiris Apostolakis 38197c3ef5cSSotiris Apostolakis // Use the 'false' side for a new input value to the PHI. 38297c3ef5cSSotiris Apostolakis FalseBlock = BasicBlock::Create(SI->getContext(), "select.false", 38397c3ef5cSSotiris Apostolakis EndBlock->getParent(), EndBlock); 38497c3ef5cSSotiris Apostolakis auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); 38597c3ef5cSSotiris Apostolakis FalseBranch->setDebugLoc(SI->getDebugLoc()); 38697c3ef5cSSotiris Apostolakis 38797c3ef5cSSotiris Apostolakis // For the 'true' side the path originates from the start block from the 38897c3ef5cSSotiris Apostolakis // point view of the new PHI. 38997c3ef5cSSotiris Apostolakis TrueBlock = StartBlock; 39097c3ef5cSSotiris Apostolakis 39197c3ef5cSSotiris Apostolakis // Insert the real conditional branch based on the original condition. 39297c3ef5cSSotiris Apostolakis BasicBlock *TT, *FT; 39397c3ef5cSSotiris Apostolakis TT = EndBlock; 39497c3ef5cSSotiris Apostolakis FT = FalseBlock; 39597c3ef5cSSotiris Apostolakis IRBuilder<> IB(SI); 39697c3ef5cSSotiris Apostolakis auto *CondFr = 39797c3ef5cSSotiris Apostolakis IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen"); 39897c3ef5cSSotiris Apostolakis IB.CreateCondBr(CondFr, TT, FT, SI); 39997c3ef5cSSotiris Apostolakis 40097c3ef5cSSotiris Apostolakis SmallPtrSet<const Instruction *, 2> INS; 40197c3ef5cSSotiris Apostolakis INS.insert(ASI.begin(), ASI.end()); 40297c3ef5cSSotiris Apostolakis // Use reverse iterator because later select may use the value of the 40397c3ef5cSSotiris Apostolakis // earlier select, and we need to propagate value through earlier select 40497c3ef5cSSotiris Apostolakis // to get the PHI operand. 40597c3ef5cSSotiris Apostolakis for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) { 40697c3ef5cSSotiris Apostolakis SelectInst *SI = *It; 40797c3ef5cSSotiris Apostolakis // The select itself is replaced with a PHI Node. 40897c3ef5cSSotiris Apostolakis PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front()); 40997c3ef5cSSotiris Apostolakis PN->takeName(SI); 41097c3ef5cSSotiris Apostolakis PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock); 41197c3ef5cSSotiris Apostolakis PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock); 41297c3ef5cSSotiris Apostolakis PN->setDebugLoc(SI->getDebugLoc()); 41397c3ef5cSSotiris Apostolakis 41497c3ef5cSSotiris Apostolakis SI->replaceAllUsesWith(PN); 41597c3ef5cSSotiris Apostolakis SI->eraseFromParent(); 41697c3ef5cSSotiris Apostolakis INS.erase(SI); 41797c3ef5cSSotiris Apostolakis ++NumSelectsConverted; 41897c3ef5cSSotiris Apostolakis } 41997c3ef5cSSotiris Apostolakis } 42097c3ef5cSSotiris Apostolakis } 42197c3ef5cSSotiris Apostolakis 42297c3ef5cSSotiris Apostolakis void SelectOptimize::collectSelectGroups(BasicBlock &BB, 42397c3ef5cSSotiris Apostolakis SelectGroups &SIGroups) { 42497c3ef5cSSotiris Apostolakis BasicBlock::iterator BBIt = BB.begin(); 42597c3ef5cSSotiris Apostolakis while (BBIt != BB.end()) { 42697c3ef5cSSotiris Apostolakis Instruction *I = &*BBIt++; 42797c3ef5cSSotiris Apostolakis if (SelectInst *SI = dyn_cast<SelectInst>(I)) { 42897c3ef5cSSotiris Apostolakis SelectGroup SIGroup; 42997c3ef5cSSotiris Apostolakis SIGroup.push_back(SI); 43097c3ef5cSSotiris Apostolakis while (BBIt != BB.end()) { 43197c3ef5cSSotiris Apostolakis Instruction *NI = &*BBIt; 43297c3ef5cSSotiris Apostolakis SelectInst *NSI = dyn_cast<SelectInst>(NI); 43397c3ef5cSSotiris Apostolakis if (NSI && SI->getCondition() == NSI->getCondition()) { 43497c3ef5cSSotiris Apostolakis SIGroup.push_back(NSI); 43597c3ef5cSSotiris Apostolakis } else if (!NI->isDebugOrPseudoInst()) { 43697c3ef5cSSotiris Apostolakis // Debug/pseudo instructions should be skipped and not prevent the 43797c3ef5cSSotiris Apostolakis // formation of a select group. 43897c3ef5cSSotiris Apostolakis break; 43997c3ef5cSSotiris Apostolakis } 44097c3ef5cSSotiris Apostolakis ++BBIt; 44197c3ef5cSSotiris Apostolakis } 44297c3ef5cSSotiris Apostolakis 44397c3ef5cSSotiris Apostolakis // If the select type is not supported, no point optimizing it. 44497c3ef5cSSotiris Apostolakis // Instruction selection will take care of it. 44597c3ef5cSSotiris Apostolakis if (!isSelectKindSupported(SI)) 44697c3ef5cSSotiris Apostolakis continue; 44797c3ef5cSSotiris Apostolakis 44897c3ef5cSSotiris Apostolakis SIGroups.push_back(SIGroup); 44997c3ef5cSSotiris Apostolakis } 45097c3ef5cSSotiris Apostolakis } 45197c3ef5cSSotiris Apostolakis } 45297c3ef5cSSotiris Apostolakis 4538b42bc56SSotiris Apostolakis void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups, 4548b42bc56SSotiris Apostolakis SelectGroups &ProfSIGroups) { 4558b42bc56SSotiris Apostolakis for (SelectGroup &ASI : SIGroups) { 4568b42bc56SSotiris Apostolakis ++NumSelectOptAnalyzed; 4578b42bc56SSotiris Apostolakis if (isConvertToBranchProfitableBase(ASI)) 4588b42bc56SSotiris Apostolakis ProfSIGroups.push_back(ASI); 4598b42bc56SSotiris Apostolakis } 4608b42bc56SSotiris Apostolakis } 4618b42bc56SSotiris Apostolakis 462*d7ebb746SSotiris Apostolakis void SelectOptimize::findProfitableSIGroupsInnerLoops( 463*d7ebb746SSotiris Apostolakis const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) { 464*d7ebb746SSotiris Apostolakis NumSelectOptAnalyzed += SIGroups.size(); 465*d7ebb746SSotiris Apostolakis // For each select group in an inner-most loop, 466*d7ebb746SSotiris Apostolakis // a branch is more preferable than a select/conditional-move if: 467*d7ebb746SSotiris Apostolakis // i) conversion to branches for all the select groups of the loop satisfies 468*d7ebb746SSotiris Apostolakis // loop-level heuristics including reducing the loop's critical path by 469*d7ebb746SSotiris Apostolakis // some threshold (see SelectOptimize::checkLoopHeuristics); and 470*d7ebb746SSotiris Apostolakis // ii) the total cost of the select group is cheaper with a branch compared 471*d7ebb746SSotiris Apostolakis // to its predicated version. The cost is in terms of latency and the cost 472*d7ebb746SSotiris Apostolakis // of a select group is the cost of its most expensive select instruction 473*d7ebb746SSotiris Apostolakis // (assuming infinite resources and thus fully leveraging available ILP). 474*d7ebb746SSotiris Apostolakis 475*d7ebb746SSotiris Apostolakis DenseMap<const Instruction *, CostInfo> InstCostMap; 476*d7ebb746SSotiris Apostolakis CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()}, 477*d7ebb746SSotiris Apostolakis {Scaled64::getZero(), Scaled64::getZero()}}; 478*d7ebb746SSotiris Apostolakis if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) || 479*d7ebb746SSotiris Apostolakis !checkLoopHeuristics(L, LoopCost)) { 480*d7ebb746SSotiris Apostolakis return; 481*d7ebb746SSotiris Apostolakis } 482*d7ebb746SSotiris Apostolakis 483*d7ebb746SSotiris Apostolakis for (SelectGroup &ASI : SIGroups) { 484*d7ebb746SSotiris Apostolakis // Assuming infinite resources, the cost of a group of instructions is the 485*d7ebb746SSotiris Apostolakis // cost of the most expensive instruction of the group. 486*d7ebb746SSotiris Apostolakis Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero(); 487*d7ebb746SSotiris Apostolakis for (SelectInst *SI : ASI) { 488*d7ebb746SSotiris Apostolakis SelectCost = std::max(SelectCost, InstCostMap[SI].PredCost); 489*d7ebb746SSotiris Apostolakis BranchCost = std::max(BranchCost, InstCostMap[SI].NonPredCost); 490*d7ebb746SSotiris Apostolakis } 491*d7ebb746SSotiris Apostolakis if (BranchCost < SelectCost) { 492*d7ebb746SSotiris Apostolakis OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", ASI.front()); 493*d7ebb746SSotiris Apostolakis OR << "Profitable to convert to branch (loop analysis). BranchCost=" 494*d7ebb746SSotiris Apostolakis << BranchCost.toString() << ", SelectCost=" << SelectCost.toString() 495*d7ebb746SSotiris Apostolakis << ". "; 496*d7ebb746SSotiris Apostolakis ORE->emit(OR); 497*d7ebb746SSotiris Apostolakis ++NumSelectConvertedLoop; 498*d7ebb746SSotiris Apostolakis ProfSIGroups.push_back(ASI); 499*d7ebb746SSotiris Apostolakis } else { 500*d7ebb746SSotiris Apostolakis OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front()); 501*d7ebb746SSotiris Apostolakis ORmiss << "Select is more profitable (loop analysis). BranchCost=" 502*d7ebb746SSotiris Apostolakis << BranchCost.toString() 503*d7ebb746SSotiris Apostolakis << ", SelectCost=" << SelectCost.toString() << ". "; 504*d7ebb746SSotiris Apostolakis ORE->emit(ORmiss); 505*d7ebb746SSotiris Apostolakis } 506*d7ebb746SSotiris Apostolakis } 507*d7ebb746SSotiris Apostolakis } 508*d7ebb746SSotiris Apostolakis 5098b42bc56SSotiris Apostolakis bool SelectOptimize::isConvertToBranchProfitableBase( 5108b42bc56SSotiris Apostolakis const SmallVector<SelectInst *, 2> &ASI) { 5118b42bc56SSotiris Apostolakis SelectInst *SI = ASI.front(); 5128b42bc56SSotiris Apostolakis OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI); 5138b42bc56SSotiris Apostolakis OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI); 5148b42bc56SSotiris Apostolakis 5158b42bc56SSotiris Apostolakis // Skip cold basic blocks. Better to optimize for size for cold blocks. 5168b42bc56SSotiris Apostolakis if (PSI->isColdBlock(SI->getParent(), BFI.get())) { 5178b42bc56SSotiris Apostolakis ++NumSelectColdBB; 5188b42bc56SSotiris Apostolakis ORmiss << "Not converted to branch because of cold basic block. "; 5198b42bc56SSotiris Apostolakis ORE->emit(ORmiss); 5208b42bc56SSotiris Apostolakis return false; 5218b42bc56SSotiris Apostolakis } 5228b42bc56SSotiris Apostolakis 5238b42bc56SSotiris Apostolakis // If unpredictable, branch form is less profitable. 5248b42bc56SSotiris Apostolakis if (SI->getMetadata(LLVMContext::MD_unpredictable)) { 5258b42bc56SSotiris Apostolakis ++NumSelectUnPred; 5268b42bc56SSotiris Apostolakis ORmiss << "Not converted to branch because of unpredictable branch. "; 5278b42bc56SSotiris Apostolakis ORE->emit(ORmiss); 5288b42bc56SSotiris Apostolakis return false; 5298b42bc56SSotiris Apostolakis } 5308b42bc56SSotiris Apostolakis 5318b42bc56SSotiris Apostolakis // If highly predictable, branch form is more profitable, unless a 5328b42bc56SSotiris Apostolakis // predictable select is inexpensive in the target architecture. 5338b42bc56SSotiris Apostolakis if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) { 5348b42bc56SSotiris Apostolakis ++NumSelectConvertedHighPred; 5358b42bc56SSotiris Apostolakis OR << "Converted to branch because of highly predictable branch. "; 5368b42bc56SSotiris Apostolakis ORE->emit(OR); 5378b42bc56SSotiris Apostolakis return true; 5388b42bc56SSotiris Apostolakis } 5398b42bc56SSotiris Apostolakis 5408b42bc56SSotiris Apostolakis // Look for expensive instructions in the cold operand's (if any) dependence 5418b42bc56SSotiris Apostolakis // slice of any of the selects in the group. 5428b42bc56SSotiris Apostolakis if (hasExpensiveColdOperand(ASI)) { 5438b42bc56SSotiris Apostolakis ++NumSelectConvertedExpColdOperand; 5448b42bc56SSotiris Apostolakis OR << "Converted to branch because of expensive cold operand."; 5458b42bc56SSotiris Apostolakis ORE->emit(OR); 5468b42bc56SSotiris Apostolakis return true; 5478b42bc56SSotiris Apostolakis } 5488b42bc56SSotiris Apostolakis 5498b42bc56SSotiris Apostolakis ORmiss << "Not profitable to convert to branch (base heuristic)."; 5508b42bc56SSotiris Apostolakis ORE->emit(ORmiss); 5518b42bc56SSotiris Apostolakis return false; 5528b42bc56SSotiris Apostolakis } 5538b42bc56SSotiris Apostolakis 5548b42bc56SSotiris Apostolakis static InstructionCost divideNearest(InstructionCost Numerator, 5558b42bc56SSotiris Apostolakis uint64_t Denominator) { 5568b42bc56SSotiris Apostolakis return (Numerator + (Denominator / 2)) / Denominator; 5578b42bc56SSotiris Apostolakis } 5588b42bc56SSotiris Apostolakis 5598b42bc56SSotiris Apostolakis bool SelectOptimize::hasExpensiveColdOperand( 5608b42bc56SSotiris Apostolakis const SmallVector<SelectInst *, 2> &ASI) { 5618b42bc56SSotiris Apostolakis bool ColdOperand = false; 5628b42bc56SSotiris Apostolakis uint64_t TrueWeight, FalseWeight, TotalWeight; 5638b42bc56SSotiris Apostolakis if (ASI.front()->extractProfMetadata(TrueWeight, FalseWeight)) { 5648b42bc56SSotiris Apostolakis uint64_t MinWeight = std::min(TrueWeight, FalseWeight); 5658b42bc56SSotiris Apostolakis TotalWeight = TrueWeight + FalseWeight; 5668b42bc56SSotiris Apostolakis // Is there a path with frequency <ColdOperandThreshold% (default:20%) ? 5678b42bc56SSotiris Apostolakis ColdOperand = TotalWeight * ColdOperandThreshold > 100 * MinWeight; 5688b42bc56SSotiris Apostolakis } else if (PSI->hasProfileSummary()) { 5698b42bc56SSotiris Apostolakis OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front()); 5708b42bc56SSotiris Apostolakis ORmiss << "Profile data available but missing branch-weights metadata for " 5718b42bc56SSotiris Apostolakis "select instruction. "; 5728b42bc56SSotiris Apostolakis ORE->emit(ORmiss); 5738b42bc56SSotiris Apostolakis } 5748b42bc56SSotiris Apostolakis if (!ColdOperand) 5758b42bc56SSotiris Apostolakis return false; 5768b42bc56SSotiris Apostolakis // Check if the cold path's dependence slice is expensive for any of the 5778b42bc56SSotiris Apostolakis // selects of the group. 5788b42bc56SSotiris Apostolakis for (SelectInst *SI : ASI) { 5798b42bc56SSotiris Apostolakis Instruction *ColdI = nullptr; 5808b42bc56SSotiris Apostolakis uint64_t HotWeight; 5818b42bc56SSotiris Apostolakis if (TrueWeight < FalseWeight) { 5828b42bc56SSotiris Apostolakis ColdI = dyn_cast<Instruction>(SI->getTrueValue()); 5838b42bc56SSotiris Apostolakis HotWeight = FalseWeight; 5848b42bc56SSotiris Apostolakis } else { 5858b42bc56SSotiris Apostolakis ColdI = dyn_cast<Instruction>(SI->getFalseValue()); 5868b42bc56SSotiris Apostolakis HotWeight = TrueWeight; 5878b42bc56SSotiris Apostolakis } 5888b42bc56SSotiris Apostolakis if (ColdI) { 5898b42bc56SSotiris Apostolakis SmallVector<Instruction *, 2> ColdSlice; 5908b42bc56SSotiris Apostolakis getExclBackwardsSlice(ColdI, ColdSlice); 5918b42bc56SSotiris Apostolakis InstructionCost SliceCost = 0; 5928b42bc56SSotiris Apostolakis for (auto *ColdII : ColdSlice) { 5938b42bc56SSotiris Apostolakis SliceCost += 5948b42bc56SSotiris Apostolakis TTI->getInstructionCost(ColdII, TargetTransformInfo::TCK_Latency); 5958b42bc56SSotiris Apostolakis } 5968b42bc56SSotiris Apostolakis // The colder the cold value operand of the select is the more expensive 5978b42bc56SSotiris Apostolakis // the cmov becomes for computing the cold value operand every time. Thus, 5988b42bc56SSotiris Apostolakis // the colder the cold operand is the more its cost counts. 5998b42bc56SSotiris Apostolakis // Get nearest integer cost adjusted for coldness. 6008b42bc56SSotiris Apostolakis InstructionCost AdjSliceCost = 6018b42bc56SSotiris Apostolakis divideNearest(SliceCost * HotWeight, TotalWeight); 6028b42bc56SSotiris Apostolakis if (AdjSliceCost >= 6038b42bc56SSotiris Apostolakis ColdOperandMaxCostMultiplier * TargetTransformInfo::TCC_Expensive) 6048b42bc56SSotiris Apostolakis return true; 6058b42bc56SSotiris Apostolakis } 6068b42bc56SSotiris Apostolakis } 6078b42bc56SSotiris Apostolakis return false; 6088b42bc56SSotiris Apostolakis } 6098b42bc56SSotiris Apostolakis 6108b42bc56SSotiris Apostolakis // For a given source instruction, collect its backwards dependence slice 6118b42bc56SSotiris Apostolakis // consisting of instructions exclusively computed for the purpose of producing 6128b42bc56SSotiris Apostolakis // the operands of the source instruction. As an approximation 6138b42bc56SSotiris Apostolakis // (sufficiently-accurate in practice), we populate this set with the 6148b42bc56SSotiris Apostolakis // instructions of the backwards dependence slice that only have one-use and 6158b42bc56SSotiris Apostolakis // form an one-use chain that leads to the source instruction. 6168b42bc56SSotiris Apostolakis void SelectOptimize::getExclBackwardsSlice( 6178b42bc56SSotiris Apostolakis Instruction *I, SmallVector<Instruction *, 2> &Slice) { 6188b42bc56SSotiris Apostolakis SmallPtrSet<Instruction *, 2> Visited; 6198b42bc56SSotiris Apostolakis std::queue<Instruction *> Worklist; 6208b42bc56SSotiris Apostolakis Worklist.push(I); 6218b42bc56SSotiris Apostolakis while (!Worklist.empty()) { 6228b42bc56SSotiris Apostolakis Instruction *II = Worklist.front(); 6238b42bc56SSotiris Apostolakis Worklist.pop(); 6248b42bc56SSotiris Apostolakis 6258b42bc56SSotiris Apostolakis // Avoid cycles. 6268b42bc56SSotiris Apostolakis if (Visited.count(II)) 6278b42bc56SSotiris Apostolakis continue; 6288b42bc56SSotiris Apostolakis Visited.insert(II); 6298b42bc56SSotiris Apostolakis 6308b42bc56SSotiris Apostolakis if (!II->hasOneUse()) 6318b42bc56SSotiris Apostolakis continue; 6328b42bc56SSotiris Apostolakis 6338b42bc56SSotiris Apostolakis // Avoid considering instructions with less frequency than the source 6348b42bc56SSotiris Apostolakis // instruction (i.e., avoid colder code regions of the dependence slice). 6358b42bc56SSotiris Apostolakis if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent())) 6368b42bc56SSotiris Apostolakis continue; 6378b42bc56SSotiris Apostolakis 6388b42bc56SSotiris Apostolakis // Eligible one-use instruction added to the dependence slice. 6398b42bc56SSotiris Apostolakis Slice.push_back(II); 6408b42bc56SSotiris Apostolakis 6418b42bc56SSotiris Apostolakis // Explore all the operands of the current instruction to expand the slice. 6428b42bc56SSotiris Apostolakis for (unsigned k = 0; k < II->getNumOperands(); ++k) 6438b42bc56SSotiris Apostolakis if (auto *OpI = dyn_cast<Instruction>(II->getOperand(k))) 6448b42bc56SSotiris Apostolakis Worklist.push(OpI); 6458b42bc56SSotiris Apostolakis } 6468b42bc56SSotiris Apostolakis } 6478b42bc56SSotiris Apostolakis 6488b42bc56SSotiris Apostolakis bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) { 6498b42bc56SSotiris Apostolakis uint64_t TrueWeight, FalseWeight; 6508b42bc56SSotiris Apostolakis if (SI->extractProfMetadata(TrueWeight, FalseWeight)) { 6518b42bc56SSotiris Apostolakis uint64_t Max = std::max(TrueWeight, FalseWeight); 6528b42bc56SSotiris Apostolakis uint64_t Sum = TrueWeight + FalseWeight; 6538b42bc56SSotiris Apostolakis if (Sum != 0) { 6548b42bc56SSotiris Apostolakis auto Probability = BranchProbability::getBranchProbability(Max, Sum); 6558b42bc56SSotiris Apostolakis if (Probability > TTI->getPredictableBranchThreshold()) 6568b42bc56SSotiris Apostolakis return true; 6578b42bc56SSotiris Apostolakis } 6588b42bc56SSotiris Apostolakis } 6598b42bc56SSotiris Apostolakis return false; 6608b42bc56SSotiris Apostolakis } 6618b42bc56SSotiris Apostolakis 662*d7ebb746SSotiris Apostolakis bool SelectOptimize::checkLoopHeuristics(const Loop *L, 663*d7ebb746SSotiris Apostolakis const CostInfo LoopCost[2]) { 664*d7ebb746SSotiris Apostolakis // Loop-level checks to determine if a non-predicated version (with branches) 665*d7ebb746SSotiris Apostolakis // of the loop is more profitable than its predicated version. 666*d7ebb746SSotiris Apostolakis 667*d7ebb746SSotiris Apostolakis if (DisableLoopLevelHeuristics) 668*d7ebb746SSotiris Apostolakis return true; 669*d7ebb746SSotiris Apostolakis 670*d7ebb746SSotiris Apostolakis OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", 671*d7ebb746SSotiris Apostolakis L->getHeader()->getFirstNonPHI()); 672*d7ebb746SSotiris Apostolakis 673*d7ebb746SSotiris Apostolakis if (LoopCost[0].NonPredCost > LoopCost[0].PredCost || 674*d7ebb746SSotiris Apostolakis LoopCost[1].NonPredCost >= LoopCost[1].PredCost) { 675*d7ebb746SSotiris Apostolakis ORmissL << "No select conversion in the loop due to no reduction of loop's " 676*d7ebb746SSotiris Apostolakis "critical path. "; 677*d7ebb746SSotiris Apostolakis ORE->emit(ORmissL); 678*d7ebb746SSotiris Apostolakis return false; 679*d7ebb746SSotiris Apostolakis } 680*d7ebb746SSotiris Apostolakis 681*d7ebb746SSotiris Apostolakis Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost, 682*d7ebb746SSotiris Apostolakis LoopCost[1].PredCost - LoopCost[1].NonPredCost}; 683*d7ebb746SSotiris Apostolakis 684*d7ebb746SSotiris Apostolakis // Profitably converting to branches need to reduce the loop's critical path 685*d7ebb746SSotiris Apostolakis // by at least some threshold (absolute gain of GainCycleThreshold cycles and 686*d7ebb746SSotiris Apostolakis // relative gain of 12.5%). 687*d7ebb746SSotiris Apostolakis if (Gain[1] < Scaled64::get(GainCycleThreshold) || 688*d7ebb746SSotiris Apostolakis Gain[1] * Scaled64::get(GainRelativeThreshold) < LoopCost[1].PredCost) { 689*d7ebb746SSotiris Apostolakis Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost; 690*d7ebb746SSotiris Apostolakis ORmissL << "No select conversion in the loop due to small reduction of " 691*d7ebb746SSotiris Apostolakis "loop's critical path. Gain=" 692*d7ebb746SSotiris Apostolakis << Gain[1].toString() 693*d7ebb746SSotiris Apostolakis << ", RelativeGain=" << RelativeGain.toString() << "%. "; 694*d7ebb746SSotiris Apostolakis ORE->emit(ORmissL); 695*d7ebb746SSotiris Apostolakis return false; 696*d7ebb746SSotiris Apostolakis } 697*d7ebb746SSotiris Apostolakis 698*d7ebb746SSotiris Apostolakis // If the loop's critical path involves loop-carried dependences, the gradient 699*d7ebb746SSotiris Apostolakis // of the gain needs to be at least GainGradientThreshold% (defaults to 25%). 700*d7ebb746SSotiris Apostolakis // This check ensures that the latency reduction for the loop's critical path 701*d7ebb746SSotiris Apostolakis // keeps decreasing with sufficient rate beyond the two analyzed loop 702*d7ebb746SSotiris Apostolakis // iterations. 703*d7ebb746SSotiris Apostolakis if (Gain[1] > Gain[0]) { 704*d7ebb746SSotiris Apostolakis Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) / 705*d7ebb746SSotiris Apostolakis (LoopCost[1].PredCost - LoopCost[0].PredCost); 706*d7ebb746SSotiris Apostolakis if (GradientGain < Scaled64::get(GainGradientThreshold)) { 707*d7ebb746SSotiris Apostolakis ORmissL << "No select conversion in the loop due to small gradient gain. " 708*d7ebb746SSotiris Apostolakis "GradientGain=" 709*d7ebb746SSotiris Apostolakis << GradientGain.toString() << "%. "; 710*d7ebb746SSotiris Apostolakis ORE->emit(ORmissL); 711*d7ebb746SSotiris Apostolakis return false; 712*d7ebb746SSotiris Apostolakis } 713*d7ebb746SSotiris Apostolakis } 714*d7ebb746SSotiris Apostolakis // If the gain decreases it is not profitable to convert. 715*d7ebb746SSotiris Apostolakis else if (Gain[1] < Gain[0]) { 716*d7ebb746SSotiris Apostolakis ORmissL 717*d7ebb746SSotiris Apostolakis << "No select conversion in the loop due to negative gradient gain. "; 718*d7ebb746SSotiris Apostolakis ORE->emit(ORmissL); 719*d7ebb746SSotiris Apostolakis return false; 720*d7ebb746SSotiris Apostolakis } 721*d7ebb746SSotiris Apostolakis 722*d7ebb746SSotiris Apostolakis // Non-predicated version of the loop is more profitable than its 723*d7ebb746SSotiris Apostolakis // predicated version. 724*d7ebb746SSotiris Apostolakis return true; 725*d7ebb746SSotiris Apostolakis } 726*d7ebb746SSotiris Apostolakis 727*d7ebb746SSotiris Apostolakis // Computes instruction and loop-critical-path costs for both the predicated 728*d7ebb746SSotiris Apostolakis // and non-predicated version of the given loop. 729*d7ebb746SSotiris Apostolakis // Returns false if unable to compute these costs due to invalid cost of loop 730*d7ebb746SSotiris Apostolakis // instruction(s). 731*d7ebb746SSotiris Apostolakis bool SelectOptimize::computeLoopCosts( 732*d7ebb746SSotiris Apostolakis const Loop *L, const SelectGroups &SIGroups, 733*d7ebb746SSotiris Apostolakis DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) { 734*d7ebb746SSotiris Apostolakis const auto &SIset = getSIset(SIGroups); 735*d7ebb746SSotiris Apostolakis // Compute instruction and loop-critical-path costs across two iterations for 736*d7ebb746SSotiris Apostolakis // both predicated and non-predicated version. 737*d7ebb746SSotiris Apostolakis const unsigned Iterations = 2; 738*d7ebb746SSotiris Apostolakis for (unsigned Iter = 0; Iter < Iterations; ++Iter) { 739*d7ebb746SSotiris Apostolakis // Cost of the loop's critical path. 740*d7ebb746SSotiris Apostolakis CostInfo &MaxCost = LoopCost[Iter]; 741*d7ebb746SSotiris Apostolakis for (BasicBlock *BB : L->getBlocks()) { 742*d7ebb746SSotiris Apostolakis for (const Instruction &I : *BB) { 743*d7ebb746SSotiris Apostolakis if (I.isDebugOrPseudoInst()) 744*d7ebb746SSotiris Apostolakis continue; 745*d7ebb746SSotiris Apostolakis // Compute the predicated and non-predicated cost of the instruction. 746*d7ebb746SSotiris Apostolakis Scaled64 IPredCost = Scaled64::getZero(), 747*d7ebb746SSotiris Apostolakis INonPredCost = Scaled64::getZero(); 748*d7ebb746SSotiris Apostolakis 749*d7ebb746SSotiris Apostolakis // Assume infinite resources that allow to fully exploit the available 750*d7ebb746SSotiris Apostolakis // instruction-level parallelism. 751*d7ebb746SSotiris Apostolakis // InstCost = InstLatency + max(Op1Cost, Op2Cost, … OpNCost) 752*d7ebb746SSotiris Apostolakis for (const Use &U : I.operands()) { 753*d7ebb746SSotiris Apostolakis auto UI = dyn_cast<Instruction>(U.get()); 754*d7ebb746SSotiris Apostolakis if (!UI) 755*d7ebb746SSotiris Apostolakis continue; 756*d7ebb746SSotiris Apostolakis if (InstCostMap.count(UI)) { 757*d7ebb746SSotiris Apostolakis IPredCost = std::max(IPredCost, InstCostMap[UI].PredCost); 758*d7ebb746SSotiris Apostolakis INonPredCost = std::max(INonPredCost, InstCostMap[UI].NonPredCost); 759*d7ebb746SSotiris Apostolakis } 760*d7ebb746SSotiris Apostolakis } 761*d7ebb746SSotiris Apostolakis auto ILatency = computeInstCost(&I); 762*d7ebb746SSotiris Apostolakis if (!ILatency.hasValue()) { 763*d7ebb746SSotiris Apostolakis OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", &I); 764*d7ebb746SSotiris Apostolakis ORmissL << "Invalid instruction cost preventing analysis and " 765*d7ebb746SSotiris Apostolakis "optimization of the inner-most loop containing this " 766*d7ebb746SSotiris Apostolakis "instruction. "; 767*d7ebb746SSotiris Apostolakis ORE->emit(ORmissL); 768*d7ebb746SSotiris Apostolakis return false; 769*d7ebb746SSotiris Apostolakis } 770*d7ebb746SSotiris Apostolakis IPredCost += Scaled64::get(ILatency.getValue()); 771*d7ebb746SSotiris Apostolakis INonPredCost += Scaled64::get(ILatency.getValue()); 772*d7ebb746SSotiris Apostolakis 773*d7ebb746SSotiris Apostolakis // For a select that can be converted to branch, 774*d7ebb746SSotiris Apostolakis // compute its cost as a branch (non-predicated cost). 775*d7ebb746SSotiris Apostolakis // 776*d7ebb746SSotiris Apostolakis // BranchCost = PredictedPathCost + MispredictCost 777*d7ebb746SSotiris Apostolakis // PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb 778*d7ebb746SSotiris Apostolakis // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate 779*d7ebb746SSotiris Apostolakis if (SIset.contains(&I)) { 780*d7ebb746SSotiris Apostolakis auto SI = dyn_cast<SelectInst>(&I); 781*d7ebb746SSotiris Apostolakis 782*d7ebb746SSotiris Apostolakis Scaled64 TrueOpCost = Scaled64::getZero(), 783*d7ebb746SSotiris Apostolakis FalseOpCost = Scaled64::getZero(); 784*d7ebb746SSotiris Apostolakis if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue())) 785*d7ebb746SSotiris Apostolakis if (InstCostMap.count(TI)) 786*d7ebb746SSotiris Apostolakis TrueOpCost = InstCostMap[TI].NonPredCost; 787*d7ebb746SSotiris Apostolakis if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue())) 788*d7ebb746SSotiris Apostolakis if (InstCostMap.count(FI)) 789*d7ebb746SSotiris Apostolakis FalseOpCost = InstCostMap[FI].NonPredCost; 790*d7ebb746SSotiris Apostolakis Scaled64 PredictedPathCost = 791*d7ebb746SSotiris Apostolakis getPredictedPathCost(TrueOpCost, FalseOpCost, SI); 792*d7ebb746SSotiris Apostolakis 793*d7ebb746SSotiris Apostolakis Scaled64 CondCost = Scaled64::getZero(); 794*d7ebb746SSotiris Apostolakis if (auto *CI = dyn_cast<Instruction>(SI->getCondition())) 795*d7ebb746SSotiris Apostolakis if (InstCostMap.count(CI)) 796*d7ebb746SSotiris Apostolakis CondCost = InstCostMap[CI].NonPredCost; 797*d7ebb746SSotiris Apostolakis Scaled64 MispredictCost = getMispredictionCost(SI, CondCost); 798*d7ebb746SSotiris Apostolakis 799*d7ebb746SSotiris Apostolakis INonPredCost = PredictedPathCost + MispredictCost; 800*d7ebb746SSotiris Apostolakis } 801*d7ebb746SSotiris Apostolakis 802*d7ebb746SSotiris Apostolakis InstCostMap[&I] = {IPredCost, INonPredCost}; 803*d7ebb746SSotiris Apostolakis MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost); 804*d7ebb746SSotiris Apostolakis MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost); 805*d7ebb746SSotiris Apostolakis } 806*d7ebb746SSotiris Apostolakis } 807*d7ebb746SSotiris Apostolakis } 808*d7ebb746SSotiris Apostolakis return true; 809*d7ebb746SSotiris Apostolakis } 810*d7ebb746SSotiris Apostolakis 811*d7ebb746SSotiris Apostolakis SmallPtrSet<const Instruction *, 2> 812*d7ebb746SSotiris Apostolakis SelectOptimize::getSIset(const SelectGroups &SIGroups) { 813*d7ebb746SSotiris Apostolakis SmallPtrSet<const Instruction *, 2> SIset; 814*d7ebb746SSotiris Apostolakis for (const SelectGroup &ASI : SIGroups) 815*d7ebb746SSotiris Apostolakis for (const SelectInst *SI : ASI) 816*d7ebb746SSotiris Apostolakis SIset.insert(SI); 817*d7ebb746SSotiris Apostolakis return SIset; 818*d7ebb746SSotiris Apostolakis } 819*d7ebb746SSotiris Apostolakis 820*d7ebb746SSotiris Apostolakis Optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) { 821*d7ebb746SSotiris Apostolakis InstructionCost ICost = 822*d7ebb746SSotiris Apostolakis TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency); 823*d7ebb746SSotiris Apostolakis if (auto OC = ICost.getValue()) 824*d7ebb746SSotiris Apostolakis return Optional<uint64_t>(OC.getValue()); 825*d7ebb746SSotiris Apostolakis return Optional<uint64_t>(None); 826*d7ebb746SSotiris Apostolakis } 827*d7ebb746SSotiris Apostolakis 828*d7ebb746SSotiris Apostolakis ScaledNumber<uint64_t> 829*d7ebb746SSotiris Apostolakis SelectOptimize::getMispredictionCost(const SelectInst *SI, 830*d7ebb746SSotiris Apostolakis const Scaled64 CondCost) { 831*d7ebb746SSotiris Apostolakis uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty; 832*d7ebb746SSotiris Apostolakis 833*d7ebb746SSotiris Apostolakis // Account for the default misprediction rate when using a branch 834*d7ebb746SSotiris Apostolakis // (conservatively set to 25% by default). 835*d7ebb746SSotiris Apostolakis uint64_t MispredictRate = MispredictDefaultRate; 836*d7ebb746SSotiris Apostolakis // If the select condition is obviously predictable, then the misprediction 837*d7ebb746SSotiris Apostolakis // rate is zero. 838*d7ebb746SSotiris Apostolakis if (isSelectHighlyPredictable(SI)) 839*d7ebb746SSotiris Apostolakis MispredictRate = 0; 840*d7ebb746SSotiris Apostolakis 841*d7ebb746SSotiris Apostolakis // CondCost is included to account for cases where the computation of the 842*d7ebb746SSotiris Apostolakis // condition is part of a long dependence chain (potentially loop-carried) 843*d7ebb746SSotiris Apostolakis // that would delay detection of a misprediction and increase its cost. 844*d7ebb746SSotiris Apostolakis Scaled64 MispredictCost = 845*d7ebb746SSotiris Apostolakis std::max(Scaled64::get(MispredictPenalty), CondCost) * 846*d7ebb746SSotiris Apostolakis Scaled64::get(MispredictRate); 847*d7ebb746SSotiris Apostolakis MispredictCost /= Scaled64::get(100); 848*d7ebb746SSotiris Apostolakis 849*d7ebb746SSotiris Apostolakis return MispredictCost; 850*d7ebb746SSotiris Apostolakis } 851*d7ebb746SSotiris Apostolakis 852*d7ebb746SSotiris Apostolakis // Returns the cost of a branch when the prediction is correct. 853*d7ebb746SSotiris Apostolakis // TrueCost * TrueProbability + FalseCost * FalseProbability. 854*d7ebb746SSotiris Apostolakis ScaledNumber<uint64_t> 855*d7ebb746SSotiris Apostolakis SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost, 856*d7ebb746SSotiris Apostolakis const SelectInst *SI) { 857*d7ebb746SSotiris Apostolakis Scaled64 PredPathCost; 858*d7ebb746SSotiris Apostolakis uint64_t TrueWeight, FalseWeight; 859*d7ebb746SSotiris Apostolakis if (SI->extractProfMetadata(TrueWeight, FalseWeight)) { 860*d7ebb746SSotiris Apostolakis uint64_t SumWeight = TrueWeight + FalseWeight; 861*d7ebb746SSotiris Apostolakis if (SumWeight != 0) { 862*d7ebb746SSotiris Apostolakis PredPathCost = TrueCost * Scaled64::get(TrueWeight) + 863*d7ebb746SSotiris Apostolakis FalseCost * Scaled64::get(FalseWeight); 864*d7ebb746SSotiris Apostolakis PredPathCost /= Scaled64::get(SumWeight); 865*d7ebb746SSotiris Apostolakis return PredPathCost; 866*d7ebb746SSotiris Apostolakis } 867*d7ebb746SSotiris Apostolakis } 868*d7ebb746SSotiris Apostolakis // Without branch weight metadata, we assume 75% for the one path and 25% for 869*d7ebb746SSotiris Apostolakis // the other, and pick the result with the biggest cost. 870*d7ebb746SSotiris Apostolakis PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost, 871*d7ebb746SSotiris Apostolakis FalseCost * Scaled64::get(3) + TrueCost); 872*d7ebb746SSotiris Apostolakis PredPathCost /= Scaled64::get(4); 873*d7ebb746SSotiris Apostolakis return PredPathCost; 874*d7ebb746SSotiris Apostolakis } 875*d7ebb746SSotiris Apostolakis 87697c3ef5cSSotiris Apostolakis bool SelectOptimize::isSelectKindSupported(SelectInst *SI) { 87797c3ef5cSSotiris Apostolakis bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); 87897c3ef5cSSotiris Apostolakis if (VectorCond) 87997c3ef5cSSotiris Apostolakis return false; 88097c3ef5cSSotiris Apostolakis TargetLowering::SelectSupportKind SelectKind; 88197c3ef5cSSotiris Apostolakis if (SI->getType()->isVectorTy()) 88297c3ef5cSSotiris Apostolakis SelectKind = TargetLowering::ScalarCondVectorVal; 88397c3ef5cSSotiris Apostolakis else 88497c3ef5cSSotiris Apostolakis SelectKind = TargetLowering::ScalarValSelect; 88597c3ef5cSSotiris Apostolakis return TLI->isSelectSupported(SelectKind); 886ca7c307dSSotiris Apostolakis } 887