1ca7c307dSSotiris Apostolakis //===--- SelectOptimize.cpp - Convert select to branches if profitable ---===// 2ca7c307dSSotiris Apostolakis // 3ca7c307dSSotiris Apostolakis // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4ca7c307dSSotiris Apostolakis // See https://llvm.org/LICENSE.txt for license information. 5ca7c307dSSotiris Apostolakis // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6ca7c307dSSotiris Apostolakis // 7ca7c307dSSotiris Apostolakis //===----------------------------------------------------------------------===// 8ca7c307dSSotiris Apostolakis // 9ca7c307dSSotiris Apostolakis // This pass converts selects to conditional jumps when profitable. 10ca7c307dSSotiris Apostolakis // 11ca7c307dSSotiris Apostolakis //===----------------------------------------------------------------------===// 12ca7c307dSSotiris Apostolakis 13d7ebb746SSotiris Apostolakis #include "llvm/ADT/Optional.h" 1497c3ef5cSSotiris Apostolakis #include "llvm/ADT/SmallVector.h" 1597c3ef5cSSotiris Apostolakis #include "llvm/ADT/Statistic.h" 1697c3ef5cSSotiris Apostolakis #include "llvm/Analysis/BlockFrequencyInfo.h" 1797c3ef5cSSotiris Apostolakis #include "llvm/Analysis/BranchProbabilityInfo.h" 1897c3ef5cSSotiris Apostolakis #include "llvm/Analysis/LoopInfo.h" 198b42bc56SSotiris Apostolakis #include "llvm/Analysis/OptimizationRemarkEmitter.h" 208b42bc56SSotiris Apostolakis #include "llvm/Analysis/ProfileSummaryInfo.h" 218b42bc56SSotiris Apostolakis #include "llvm/Analysis/TargetTransformInfo.h" 22ca7c307dSSotiris Apostolakis #include "llvm/CodeGen/Passes.h" 2397c3ef5cSSotiris Apostolakis #include "llvm/CodeGen/TargetLowering.h" 2497c3ef5cSSotiris Apostolakis #include "llvm/CodeGen/TargetPassConfig.h" 2597c3ef5cSSotiris Apostolakis #include "llvm/CodeGen/TargetSchedule.h" 2697c3ef5cSSotiris Apostolakis #include "llvm/CodeGen/TargetSubtargetInfo.h" 2797c3ef5cSSotiris Apostolakis #include "llvm/IR/BasicBlock.h" 288b42bc56SSotiris Apostolakis #include "llvm/IR/Dominators.h" 29ca7c307dSSotiris Apostolakis #include "llvm/IR/Function.h" 3097c3ef5cSSotiris Apostolakis #include "llvm/IR/IRBuilder.h" 3197c3ef5cSSotiris Apostolakis #include "llvm/IR/Instruction.h" 32ca7c307dSSotiris Apostolakis #include "llvm/InitializePasses.h" 33ca7c307dSSotiris Apostolakis #include "llvm/Pass.h" 34d7ebb746SSotiris Apostolakis #include "llvm/Support/ScaledNumber.h" 3597c3ef5cSSotiris Apostolakis #include "llvm/Target/TargetMachine.h" 368b42bc56SSotiris Apostolakis #include "llvm/Transforms/Utils/SizeOpts.h" 378b42bc56SSotiris Apostolakis #include <algorithm> 388b42bc56SSotiris Apostolakis #include <memory> 398b42bc56SSotiris Apostolakis #include <queue> 408b42bc56SSotiris Apostolakis #include <stack> 418b42bc56SSotiris Apostolakis #include <string> 42ca7c307dSSotiris Apostolakis 43ca7c307dSSotiris Apostolakis using namespace llvm; 44ca7c307dSSotiris Apostolakis 4597c3ef5cSSotiris Apostolakis #define DEBUG_TYPE "select-optimize" 4697c3ef5cSSotiris Apostolakis 478b42bc56SSotiris Apostolakis STATISTIC(NumSelectOptAnalyzed, 488b42bc56SSotiris Apostolakis "Number of select groups considered for conversion to branch"); 498b42bc56SSotiris Apostolakis STATISTIC(NumSelectConvertedExpColdOperand, 508b42bc56SSotiris Apostolakis "Number of select groups converted due to expensive cold operand"); 518b42bc56SSotiris Apostolakis STATISTIC(NumSelectConvertedHighPred, 528b42bc56SSotiris Apostolakis "Number of select groups converted due to high-predictability"); 538b42bc56SSotiris Apostolakis STATISTIC(NumSelectUnPred, 548b42bc56SSotiris Apostolakis "Number of select groups not converted due to unpredictability"); 558b42bc56SSotiris Apostolakis STATISTIC(NumSelectColdBB, 568b42bc56SSotiris Apostolakis "Number of select groups not converted due to cold basic block"); 57d7ebb746SSotiris Apostolakis STATISTIC(NumSelectConvertedLoop, 58d7ebb746SSotiris Apostolakis "Number of select groups converted due to loop-level analysis"); 5997c3ef5cSSotiris Apostolakis STATISTIC(NumSelectsConverted, "Number of selects converted"); 6097c3ef5cSSotiris Apostolakis 618b42bc56SSotiris Apostolakis static cl::opt<unsigned> ColdOperandThreshold( 628b42bc56SSotiris Apostolakis "cold-operand-threshold", 638b42bc56SSotiris Apostolakis cl::desc("Maximum frequency of path for an operand to be considered cold."), 648b42bc56SSotiris Apostolakis cl::init(20), cl::Hidden); 658b42bc56SSotiris Apostolakis 668b42bc56SSotiris Apostolakis static cl::opt<unsigned> ColdOperandMaxCostMultiplier( 678b42bc56SSotiris Apostolakis "cold-operand-max-cost-multiplier", 688b42bc56SSotiris Apostolakis cl::desc("Maximum cost multiplier of TCC_expensive for the dependence " 698b42bc56SSotiris Apostolakis "slice of a cold operand to be considered inexpensive."), 708b42bc56SSotiris Apostolakis cl::init(1), cl::Hidden); 718b42bc56SSotiris Apostolakis 72d7ebb746SSotiris Apostolakis static cl::opt<unsigned> 73d7ebb746SSotiris Apostolakis GainGradientThreshold("select-opti-loop-gradient-gain-threshold", 74d7ebb746SSotiris Apostolakis cl::desc("Gradient gain threshold (%)."), 75d7ebb746SSotiris Apostolakis cl::init(25), cl::Hidden); 76d7ebb746SSotiris Apostolakis 77d7ebb746SSotiris Apostolakis static cl::opt<unsigned> 78d7ebb746SSotiris Apostolakis GainCycleThreshold("select-opti-loop-cycle-gain-threshold", 79d7ebb746SSotiris Apostolakis cl::desc("Minimum gain per loop (in cycles) threshold."), 80d7ebb746SSotiris Apostolakis cl::init(4), cl::Hidden); 81d7ebb746SSotiris Apostolakis 82d7ebb746SSotiris Apostolakis static cl::opt<unsigned> GainRelativeThreshold( 83d7ebb746SSotiris Apostolakis "select-opti-loop-relative-gain-threshold", 84d7ebb746SSotiris Apostolakis cl::desc( 85d7ebb746SSotiris Apostolakis "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"), 86d7ebb746SSotiris Apostolakis cl::init(8), cl::Hidden); 87d7ebb746SSotiris Apostolakis 88d7ebb746SSotiris Apostolakis static cl::opt<unsigned> MispredictDefaultRate( 89d7ebb746SSotiris Apostolakis "mispredict-default-rate", cl::Hidden, cl::init(25), 90d7ebb746SSotiris Apostolakis cl::desc("Default mispredict rate (initialized to 25%).")); 91d7ebb746SSotiris Apostolakis 92d7ebb746SSotiris Apostolakis static cl::opt<bool> 93d7ebb746SSotiris Apostolakis DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden, 94d7ebb746SSotiris Apostolakis cl::init(false), 95d7ebb746SSotiris Apostolakis cl::desc("Disable loop-level heuristics.")); 96d7ebb746SSotiris Apostolakis 97ca7c307dSSotiris Apostolakis namespace { 98ca7c307dSSotiris Apostolakis 99ca7c307dSSotiris Apostolakis class SelectOptimize : public FunctionPass { 10097c3ef5cSSotiris Apostolakis const TargetMachine *TM = nullptr; 10197c3ef5cSSotiris Apostolakis const TargetSubtargetInfo *TSI; 10297c3ef5cSSotiris Apostolakis const TargetLowering *TLI = nullptr; 1038b42bc56SSotiris Apostolakis const TargetTransformInfo *TTI = nullptr; 10497c3ef5cSSotiris Apostolakis const LoopInfo *LI; 1058b42bc56SSotiris Apostolakis DominatorTree *DT; 10697c3ef5cSSotiris Apostolakis std::unique_ptr<BlockFrequencyInfo> BFI; 10797c3ef5cSSotiris Apostolakis std::unique_ptr<BranchProbabilityInfo> BPI; 1088b42bc56SSotiris Apostolakis ProfileSummaryInfo *PSI; 1098b42bc56SSotiris Apostolakis OptimizationRemarkEmitter *ORE; 110d7ebb746SSotiris Apostolakis TargetSchedModel TSchedModel; 11197c3ef5cSSotiris Apostolakis 112ca7c307dSSotiris Apostolakis public: 113ca7c307dSSotiris Apostolakis static char ID; 1148b42bc56SSotiris Apostolakis 115ca7c307dSSotiris Apostolakis SelectOptimize() : FunctionPass(ID) { 116ca7c307dSSotiris Apostolakis initializeSelectOptimizePass(*PassRegistry::getPassRegistry()); 117ca7c307dSSotiris Apostolakis } 118ca7c307dSSotiris Apostolakis 119ca7c307dSSotiris Apostolakis bool runOnFunction(Function &F) override; 120ca7c307dSSotiris Apostolakis 12197c3ef5cSSotiris Apostolakis void getAnalysisUsage(AnalysisUsage &AU) const override { 1228b42bc56SSotiris Apostolakis AU.addRequired<ProfileSummaryInfoWrapperPass>(); 12397c3ef5cSSotiris Apostolakis AU.addRequired<TargetPassConfig>(); 1248b42bc56SSotiris Apostolakis AU.addRequired<TargetTransformInfoWrapperPass>(); 1258b42bc56SSotiris Apostolakis AU.addRequired<DominatorTreeWrapperPass>(); 12697c3ef5cSSotiris Apostolakis AU.addRequired<LoopInfoWrapperPass>(); 1278b42bc56SSotiris Apostolakis AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 12897c3ef5cSSotiris Apostolakis } 12997c3ef5cSSotiris Apostolakis 13097c3ef5cSSotiris Apostolakis private: 13197c3ef5cSSotiris Apostolakis // Select groups consist of consecutive select instructions with the same 13297c3ef5cSSotiris Apostolakis // condition. 13397c3ef5cSSotiris Apostolakis using SelectGroup = SmallVector<SelectInst *, 2>; 13497c3ef5cSSotiris Apostolakis using SelectGroups = SmallVector<SelectGroup, 2>; 13597c3ef5cSSotiris Apostolakis 136d7ebb746SSotiris Apostolakis using Scaled64 = ScaledNumber<uint64_t>; 137d7ebb746SSotiris Apostolakis 138d7ebb746SSotiris Apostolakis struct CostInfo { 139d7ebb746SSotiris Apostolakis /// Predicated cost (with selects as conditional moves). 140d7ebb746SSotiris Apostolakis Scaled64 PredCost; 141d7ebb746SSotiris Apostolakis /// Non-predicated cost (with selects converted to branches). 142d7ebb746SSotiris Apostolakis Scaled64 NonPredCost; 143d7ebb746SSotiris Apostolakis }; 144d7ebb746SSotiris Apostolakis 1458b42bc56SSotiris Apostolakis // Converts select instructions of a function to conditional jumps when deemed 1468b42bc56SSotiris Apostolakis // profitable. Returns true if at least one select was converted. 14797c3ef5cSSotiris Apostolakis bool optimizeSelects(Function &F); 1488b42bc56SSotiris Apostolakis 1498b42bc56SSotiris Apostolakis // Heuristics for determining which select instructions can be profitably 1508b42bc56SSotiris Apostolakis // conveted to branches. Separate heuristics for selects in inner-most loops 1518b42bc56SSotiris Apostolakis // and the rest of code regions (base heuristics for non-inner-most loop 1528b42bc56SSotiris Apostolakis // regions). 1538b42bc56SSotiris Apostolakis void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups); 1548b42bc56SSotiris Apostolakis void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups); 1558b42bc56SSotiris Apostolakis 1568b42bc56SSotiris Apostolakis // Converts to branches the select groups that were deemed 1578b42bc56SSotiris Apostolakis // profitable-to-convert. 15897c3ef5cSSotiris Apostolakis void convertProfitableSIGroups(SelectGroups &ProfSIGroups); 1598b42bc56SSotiris Apostolakis 1608b42bc56SSotiris Apostolakis // Splits selects of a given basic block into select groups. 16197c3ef5cSSotiris Apostolakis void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups); 1628b42bc56SSotiris Apostolakis 1638b42bc56SSotiris Apostolakis // Determines for which select groups it is profitable converting to branches 164d7ebb746SSotiris Apostolakis // (base and inner-most-loop heuristics). 1658b42bc56SSotiris Apostolakis void findProfitableSIGroupsBase(SelectGroups &SIGroups, 1668b42bc56SSotiris Apostolakis SelectGroups &ProfSIGroups); 167d7ebb746SSotiris Apostolakis void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups, 168d7ebb746SSotiris Apostolakis SelectGroups &ProfSIGroups); 169d7ebb746SSotiris Apostolakis 1708b42bc56SSotiris Apostolakis // Determines if a select group should be converted to a branch (base 1718b42bc56SSotiris Apostolakis // heuristics). 1728b42bc56SSotiris Apostolakis bool isConvertToBranchProfitableBase(const SmallVector<SelectInst *, 2> &ASI); 1738b42bc56SSotiris Apostolakis 1748b42bc56SSotiris Apostolakis // Returns true if there are expensive instructions in the cold value 1758b42bc56SSotiris Apostolakis // operand's (if any) dependence slice of any of the selects of the given 1768b42bc56SSotiris Apostolakis // group. 1778b42bc56SSotiris Apostolakis bool hasExpensiveColdOperand(const SmallVector<SelectInst *, 2> &ASI); 1788b42bc56SSotiris Apostolakis 1798b42bc56SSotiris Apostolakis // For a given source instruction, collect its backwards dependence slice 1808b42bc56SSotiris Apostolakis // consisting of instructions exclusively computed for producing the operands 1818b42bc56SSotiris Apostolakis // of the source instruction. 182*67be40dfSSotiris Apostolakis void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice, 183*67be40dfSSotiris Apostolakis bool ForSinking = false); 1848b42bc56SSotiris Apostolakis 1858b42bc56SSotiris Apostolakis // Returns true if the condition of the select is highly predictable. 1868b42bc56SSotiris Apostolakis bool isSelectHighlyPredictable(const SelectInst *SI); 1878b42bc56SSotiris Apostolakis 188d7ebb746SSotiris Apostolakis // Loop-level checks to determine if a non-predicated version (with branches) 189d7ebb746SSotiris Apostolakis // of the given loop is more profitable than its predicated version. 190d7ebb746SSotiris Apostolakis bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]); 191d7ebb746SSotiris Apostolakis 192d7ebb746SSotiris Apostolakis // Computes instruction and loop-critical-path costs for both the predicated 193d7ebb746SSotiris Apostolakis // and non-predicated version of the given loop. 194d7ebb746SSotiris Apostolakis bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups, 195d7ebb746SSotiris Apostolakis DenseMap<const Instruction *, CostInfo> &InstCostMap, 196d7ebb746SSotiris Apostolakis CostInfo *LoopCost); 197d7ebb746SSotiris Apostolakis 198d7ebb746SSotiris Apostolakis // Returns a set of all the select instructions in the given select groups. 199d7ebb746SSotiris Apostolakis SmallPtrSet<const Instruction *, 2> getSIset(const SelectGroups &SIGroups); 200d7ebb746SSotiris Apostolakis 201d7ebb746SSotiris Apostolakis // Returns the latency cost of a given instruction. 202d7ebb746SSotiris Apostolakis Optional<uint64_t> computeInstCost(const Instruction *I); 203d7ebb746SSotiris Apostolakis 204d7ebb746SSotiris Apostolakis // Returns the misprediction cost of a given select when converted to branch. 205d7ebb746SSotiris Apostolakis Scaled64 getMispredictionCost(const SelectInst *SI, const Scaled64 CondCost); 206d7ebb746SSotiris Apostolakis 207d7ebb746SSotiris Apostolakis // Returns the cost of a branch when the prediction is correct. 208d7ebb746SSotiris Apostolakis Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost, 209d7ebb746SSotiris Apostolakis const SelectInst *SI); 210d7ebb746SSotiris Apostolakis 2118b42bc56SSotiris Apostolakis // Returns true if the target architecture supports lowering a given select. 21297c3ef5cSSotiris Apostolakis bool isSelectKindSupported(SelectInst *SI); 213ca7c307dSSotiris Apostolakis }; 214ca7c307dSSotiris Apostolakis } // namespace 215ca7c307dSSotiris Apostolakis 216ca7c307dSSotiris Apostolakis char SelectOptimize::ID = 0; 21797c3ef5cSSotiris Apostolakis 21897c3ef5cSSotiris Apostolakis INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, 21997c3ef5cSSotiris Apostolakis false) 22097c3ef5cSSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 2218b42bc56SSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 2228b42bc56SSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 22397c3ef5cSSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 2248b42bc56SSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 2258b42bc56SSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) 22697c3ef5cSSotiris Apostolakis INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, 227ca7c307dSSotiris Apostolakis false) 228ca7c307dSSotiris Apostolakis 229ca7c307dSSotiris Apostolakis FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); } 230ca7c307dSSotiris Apostolakis 231ca7c307dSSotiris Apostolakis bool SelectOptimize::runOnFunction(Function &F) { 23297c3ef5cSSotiris Apostolakis TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); 23397c3ef5cSSotiris Apostolakis TSI = TM->getSubtargetImpl(F); 23497c3ef5cSSotiris Apostolakis TLI = TSI->getTargetLowering(); 2358b42bc56SSotiris Apostolakis 2368b42bc56SSotiris Apostolakis // If none of the select types is supported then skip this pass. 2378b42bc56SSotiris Apostolakis // This is an optimization pass. Legality issues will be handled by 2388b42bc56SSotiris Apostolakis // instruction selection. 2398b42bc56SSotiris Apostolakis if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) && 2408b42bc56SSotiris Apostolakis !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) && 2418b42bc56SSotiris Apostolakis !TLI->isSelectSupported(TargetLowering::VectorMaskSelect)) 2428b42bc56SSotiris Apostolakis return false; 2438b42bc56SSotiris Apostolakis 2448b42bc56SSotiris Apostolakis TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 2458b42bc56SSotiris Apostolakis DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 24697c3ef5cSSotiris Apostolakis LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 24797c3ef5cSSotiris Apostolakis BPI.reset(new BranchProbabilityInfo(F, *LI)); 24897c3ef5cSSotiris Apostolakis BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); 2498b42bc56SSotiris Apostolakis PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 2508b42bc56SSotiris Apostolakis ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 251d7ebb746SSotiris Apostolakis TSchedModel.init(TSI); 2528b42bc56SSotiris Apostolakis 2538b42bc56SSotiris Apostolakis // When optimizing for size, selects are preferable over branches. 2548b42bc56SSotiris Apostolakis if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get())) 2558b42bc56SSotiris Apostolakis return false; 25697c3ef5cSSotiris Apostolakis 25797c3ef5cSSotiris Apostolakis return optimizeSelects(F); 25897c3ef5cSSotiris Apostolakis } 25997c3ef5cSSotiris Apostolakis 26097c3ef5cSSotiris Apostolakis bool SelectOptimize::optimizeSelects(Function &F) { 26197c3ef5cSSotiris Apostolakis // Determine for which select groups it is profitable converting to branches. 26297c3ef5cSSotiris Apostolakis SelectGroups ProfSIGroups; 2638b42bc56SSotiris Apostolakis // Base heuristics apply only to non-loops and outer loops. 2648b42bc56SSotiris Apostolakis optimizeSelectsBase(F, ProfSIGroups); 2658b42bc56SSotiris Apostolakis // Separate heuristics for inner-most loops. 2668b42bc56SSotiris Apostolakis optimizeSelectsInnerLoops(F, ProfSIGroups); 26797c3ef5cSSotiris Apostolakis 26897c3ef5cSSotiris Apostolakis // Convert to branches the select groups that were deemed 26997c3ef5cSSotiris Apostolakis // profitable-to-convert. 27097c3ef5cSSotiris Apostolakis convertProfitableSIGroups(ProfSIGroups); 27197c3ef5cSSotiris Apostolakis 27297c3ef5cSSotiris Apostolakis // Code modified if at least one select group was converted. 27397c3ef5cSSotiris Apostolakis return !ProfSIGroups.empty(); 27497c3ef5cSSotiris Apostolakis } 27597c3ef5cSSotiris Apostolakis 2768b42bc56SSotiris Apostolakis void SelectOptimize::optimizeSelectsBase(Function &F, 2778b42bc56SSotiris Apostolakis SelectGroups &ProfSIGroups) { 2788b42bc56SSotiris Apostolakis // Collect all the select groups. 2798b42bc56SSotiris Apostolakis SelectGroups SIGroups; 2808b42bc56SSotiris Apostolakis for (BasicBlock &BB : F) { 2818b42bc56SSotiris Apostolakis // Base heuristics apply only to non-loops and outer loops. 2828b42bc56SSotiris Apostolakis Loop *L = LI->getLoopFor(&BB); 2838b42bc56SSotiris Apostolakis if (L && L->isInnermost()) 2848b42bc56SSotiris Apostolakis continue; 2858b42bc56SSotiris Apostolakis collectSelectGroups(BB, SIGroups); 2868b42bc56SSotiris Apostolakis } 2878b42bc56SSotiris Apostolakis 2888b42bc56SSotiris Apostolakis // Determine for which select groups it is profitable converting to branches. 2898b42bc56SSotiris Apostolakis findProfitableSIGroupsBase(SIGroups, ProfSIGroups); 2908b42bc56SSotiris Apostolakis } 2918b42bc56SSotiris Apostolakis 2928b42bc56SSotiris Apostolakis void SelectOptimize::optimizeSelectsInnerLoops(Function &F, 293d7ebb746SSotiris Apostolakis SelectGroups &ProfSIGroups) { 294d7ebb746SSotiris Apostolakis SmallVector<Loop *, 4> Loops(LI->begin(), LI->end()); 295d7ebb746SSotiris Apostolakis // Need to check size on each iteration as we accumulate child loops. 296d7ebb746SSotiris Apostolakis for (unsigned long i = 0; i < Loops.size(); ++i) 297d7ebb746SSotiris Apostolakis for (Loop *ChildL : Loops[i]->getSubLoops()) 298d7ebb746SSotiris Apostolakis Loops.push_back(ChildL); 299d7ebb746SSotiris Apostolakis 300d7ebb746SSotiris Apostolakis for (Loop *L : Loops) { 301d7ebb746SSotiris Apostolakis if (!L->isInnermost()) 302d7ebb746SSotiris Apostolakis continue; 303d7ebb746SSotiris Apostolakis 304d7ebb746SSotiris Apostolakis SelectGroups SIGroups; 305d7ebb746SSotiris Apostolakis for (BasicBlock *BB : L->getBlocks()) 306d7ebb746SSotiris Apostolakis collectSelectGroups(*BB, SIGroups); 307d7ebb746SSotiris Apostolakis 308d7ebb746SSotiris Apostolakis findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups); 309d7ebb746SSotiris Apostolakis } 310d7ebb746SSotiris Apostolakis } 3118b42bc56SSotiris Apostolakis 31297c3ef5cSSotiris Apostolakis /// If \p isTrue is true, return the true value of \p SI, otherwise return 31397c3ef5cSSotiris Apostolakis /// false value of \p SI. If the true/false value of \p SI is defined by any 31497c3ef5cSSotiris Apostolakis /// select instructions in \p Selects, look through the defining select 31597c3ef5cSSotiris Apostolakis /// instruction until the true/false value is not defined in \p Selects. 31697c3ef5cSSotiris Apostolakis static Value * 31797c3ef5cSSotiris Apostolakis getTrueOrFalseValue(SelectInst *SI, bool isTrue, 31897c3ef5cSSotiris Apostolakis const SmallPtrSet<const Instruction *, 2> &Selects) { 31997c3ef5cSSotiris Apostolakis Value *V = nullptr; 32097c3ef5cSSotiris Apostolakis for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI); 32197c3ef5cSSotiris Apostolakis DefSI = dyn_cast<SelectInst>(V)) { 32297c3ef5cSSotiris Apostolakis assert(DefSI->getCondition() == SI->getCondition() && 32397c3ef5cSSotiris Apostolakis "The condition of DefSI does not match with SI"); 32497c3ef5cSSotiris Apostolakis V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue()); 32597c3ef5cSSotiris Apostolakis } 32697c3ef5cSSotiris Apostolakis assert(V && "Failed to get select true/false value"); 32797c3ef5cSSotiris Apostolakis return V; 32897c3ef5cSSotiris Apostolakis } 32997c3ef5cSSotiris Apostolakis 33097c3ef5cSSotiris Apostolakis void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { 33197c3ef5cSSotiris Apostolakis for (SelectGroup &ASI : ProfSIGroups) { 332*67be40dfSSotiris Apostolakis // The code transformation here is a modified version of the sinking 333*67be40dfSSotiris Apostolakis // transformation in CodeGenPrepare::optimizeSelectInst with a more 334*67be40dfSSotiris Apostolakis // aggressive strategy of which instructions to sink. 335*67be40dfSSotiris Apostolakis // 33697c3ef5cSSotiris Apostolakis // TODO: eliminate the redundancy of logic transforming selects to branches 33797c3ef5cSSotiris Apostolakis // by removing CodeGenPrepare::optimizeSelectInst and optimizing here 33897c3ef5cSSotiris Apostolakis // selects for all cases (with and without profile information). 33997c3ef5cSSotiris Apostolakis 34097c3ef5cSSotiris Apostolakis // Transform a sequence like this: 34197c3ef5cSSotiris Apostolakis // start: 34297c3ef5cSSotiris Apostolakis // %cmp = cmp uge i32 %a, %b 34397c3ef5cSSotiris Apostolakis // %sel = select i1 %cmp, i32 %c, i32 %d 34497c3ef5cSSotiris Apostolakis // 34597c3ef5cSSotiris Apostolakis // Into: 34697c3ef5cSSotiris Apostolakis // start: 34797c3ef5cSSotiris Apostolakis // %cmp = cmp uge i32 %a, %b 34897c3ef5cSSotiris Apostolakis // %cmp.frozen = freeze %cmp 349*67be40dfSSotiris Apostolakis // br i1 %cmp.frozen, label %select.true, label %select.false 350*67be40dfSSotiris Apostolakis // select.true: 351*67be40dfSSotiris Apostolakis // br label %select.end 35297c3ef5cSSotiris Apostolakis // select.false: 35397c3ef5cSSotiris Apostolakis // br label %select.end 35497c3ef5cSSotiris Apostolakis // select.end: 355*67be40dfSSotiris Apostolakis // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ] 35697c3ef5cSSotiris Apostolakis // 35797c3ef5cSSotiris Apostolakis // %cmp should be frozen, otherwise it may introduce undefined behavior. 358*67be40dfSSotiris Apostolakis // In addition, we may sink instructions that produce %c or %d into the 359*67be40dfSSotiris Apostolakis // destination(s) of the new branch. 360*67be40dfSSotiris Apostolakis // If the true or false blocks do not contain a sunken instruction, that 361*67be40dfSSotiris Apostolakis // block and its branch may be optimized away. In that case, one side of the 362*67be40dfSSotiris Apostolakis // first branch will point directly to select.end, and the corresponding PHI 363*67be40dfSSotiris Apostolakis // predecessor block will be the start block. 364*67be40dfSSotiris Apostolakis 365*67be40dfSSotiris Apostolakis // Find all the instructions that can be soundly sunk to the true/false 366*67be40dfSSotiris Apostolakis // blocks. These are instructions that are computed solely for producing the 367*67be40dfSSotiris Apostolakis // operands of the select instructions in the group and can be sunk without 368*67be40dfSSotiris Apostolakis // breaking the semantics of the LLVM IR (e.g., cannot sink instructions 369*67be40dfSSotiris Apostolakis // with side effects). 370*67be40dfSSotiris Apostolakis SmallVector<std::stack<Instruction *>, 2> TrueSlices, FalseSlices; 371*67be40dfSSotiris Apostolakis typedef std::stack<Instruction *>::size_type StackSizeType; 372*67be40dfSSotiris Apostolakis StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0; 373*67be40dfSSotiris Apostolakis for (SelectInst *SI : ASI) { 374*67be40dfSSotiris Apostolakis // For each select, compute the sinkable dependence chains of the true and 375*67be40dfSSotiris Apostolakis // false operands. 376*67be40dfSSotiris Apostolakis if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue())) { 377*67be40dfSSotiris Apostolakis std::stack<Instruction *> TrueSlice; 378*67be40dfSSotiris Apostolakis getExclBackwardsSlice(TI, TrueSlice, true); 379*67be40dfSSotiris Apostolakis maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size()); 380*67be40dfSSotiris Apostolakis TrueSlices.push_back(TrueSlice); 381*67be40dfSSotiris Apostolakis } 382*67be40dfSSotiris Apostolakis if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue())) { 383*67be40dfSSotiris Apostolakis std::stack<Instruction *> FalseSlice; 384*67be40dfSSotiris Apostolakis getExclBackwardsSlice(FI, FalseSlice, true); 385*67be40dfSSotiris Apostolakis maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size()); 386*67be40dfSSotiris Apostolakis FalseSlices.push_back(FalseSlice); 387*67be40dfSSotiris Apostolakis } 388*67be40dfSSotiris Apostolakis } 389*67be40dfSSotiris Apostolakis // In the case of multiple select instructions in the same group, the order 390*67be40dfSSotiris Apostolakis // of non-dependent instructions (instructions of different dependence 391*67be40dfSSotiris Apostolakis // slices) in the true/false blocks appears to affect performance. 392*67be40dfSSotiris Apostolakis // Interleaving the slices seems to experimentally be the optimal approach. 393*67be40dfSSotiris Apostolakis // This interleaving scheduling allows for more ILP (with a natural downside 394*67be40dfSSotiris Apostolakis // of increasing a bit register pressure) compared to a simple ordering of 395*67be40dfSSotiris Apostolakis // one whole chain after another. One would expect that this ordering would 396*67be40dfSSotiris Apostolakis // not matter since the scheduling in the backend of the compiler would 397*67be40dfSSotiris Apostolakis // take care of it, but apparently the scheduler fails to deliver optimal 398*67be40dfSSotiris Apostolakis // ILP with a naive ordering here. 399*67be40dfSSotiris Apostolakis SmallVector<Instruction *, 2> TrueSlicesInterleaved, FalseSlicesInterleaved; 400*67be40dfSSotiris Apostolakis for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) { 401*67be40dfSSotiris Apostolakis for (auto &S : TrueSlices) { 402*67be40dfSSotiris Apostolakis if (!S.empty()) { 403*67be40dfSSotiris Apostolakis TrueSlicesInterleaved.push_back(S.top()); 404*67be40dfSSotiris Apostolakis S.pop(); 405*67be40dfSSotiris Apostolakis } 406*67be40dfSSotiris Apostolakis } 407*67be40dfSSotiris Apostolakis } 408*67be40dfSSotiris Apostolakis for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) { 409*67be40dfSSotiris Apostolakis for (auto &S : FalseSlices) { 410*67be40dfSSotiris Apostolakis if (!S.empty()) { 411*67be40dfSSotiris Apostolakis FalseSlicesInterleaved.push_back(S.top()); 412*67be40dfSSotiris Apostolakis S.pop(); 413*67be40dfSSotiris Apostolakis } 414*67be40dfSSotiris Apostolakis } 415*67be40dfSSotiris Apostolakis } 41697c3ef5cSSotiris Apostolakis 41797c3ef5cSSotiris Apostolakis // We split the block containing the select(s) into two blocks. 41897c3ef5cSSotiris Apostolakis SelectInst *SI = ASI.front(); 41997c3ef5cSSotiris Apostolakis SelectInst *LastSI = ASI.back(); 42097c3ef5cSSotiris Apostolakis BasicBlock *StartBlock = SI->getParent(); 42197c3ef5cSSotiris Apostolakis BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI)); 42297c3ef5cSSotiris Apostolakis BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); 42397c3ef5cSSotiris Apostolakis BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency()); 42497c3ef5cSSotiris Apostolakis // Delete the unconditional branch that was just created by the split. 42597c3ef5cSSotiris Apostolakis StartBlock->getTerminator()->eraseFromParent(); 42697c3ef5cSSotiris Apostolakis 42797c3ef5cSSotiris Apostolakis // Move any debug/pseudo instructions that were in-between the select 42897c3ef5cSSotiris Apostolakis // group to the newly-created end block. 42997c3ef5cSSotiris Apostolakis SmallVector<Instruction *, 2> DebugPseudoINS; 43097c3ef5cSSotiris Apostolakis auto DIt = SI->getIterator(); 43197c3ef5cSSotiris Apostolakis while (&*DIt != LastSI) { 43297c3ef5cSSotiris Apostolakis if (DIt->isDebugOrPseudoInst()) 43397c3ef5cSSotiris Apostolakis DebugPseudoINS.push_back(&*DIt); 43497c3ef5cSSotiris Apostolakis DIt++; 43597c3ef5cSSotiris Apostolakis } 43697c3ef5cSSotiris Apostolakis for (auto DI : DebugPseudoINS) { 43797c3ef5cSSotiris Apostolakis DI->moveBefore(&*EndBlock->getFirstInsertionPt()); 43897c3ef5cSSotiris Apostolakis } 43997c3ef5cSSotiris Apostolakis 44097c3ef5cSSotiris Apostolakis // These are the new basic blocks for the conditional branch. 441*67be40dfSSotiris Apostolakis // At least one will become an actual new basic block. 44297c3ef5cSSotiris Apostolakis BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr; 443*67be40dfSSotiris Apostolakis BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr; 444*67be40dfSSotiris Apostolakis if (!TrueSlicesInterleaved.empty()) { 445*67be40dfSSotiris Apostolakis TrueBlock = BasicBlock::Create(LastSI->getContext(), "select.true.sink", 446*67be40dfSSotiris Apostolakis EndBlock->getParent(), EndBlock); 447*67be40dfSSotiris Apostolakis TrueBranch = BranchInst::Create(EndBlock, TrueBlock); 448*67be40dfSSotiris Apostolakis TrueBranch->setDebugLoc(LastSI->getDebugLoc()); 449*67be40dfSSotiris Apostolakis for (Instruction *TrueInst : TrueSlicesInterleaved) 450*67be40dfSSotiris Apostolakis TrueInst->moveBefore(TrueBranch); 451*67be40dfSSotiris Apostolakis } 452*67be40dfSSotiris Apostolakis if (!FalseSlicesInterleaved.empty()) { 453*67be40dfSSotiris Apostolakis FalseBlock = BasicBlock::Create(LastSI->getContext(), "select.false.sink", 454*67be40dfSSotiris Apostolakis EndBlock->getParent(), EndBlock); 455*67be40dfSSotiris Apostolakis FalseBranch = BranchInst::Create(EndBlock, FalseBlock); 456*67be40dfSSotiris Apostolakis FalseBranch->setDebugLoc(LastSI->getDebugLoc()); 457*67be40dfSSotiris Apostolakis for (Instruction *FalseInst : FalseSlicesInterleaved) 458*67be40dfSSotiris Apostolakis FalseInst->moveBefore(FalseBranch); 459*67be40dfSSotiris Apostolakis } 460*67be40dfSSotiris Apostolakis // If there was nothing to sink, then arbitrarily choose the 'false' side 461*67be40dfSSotiris Apostolakis // for a new input value to the PHI. 462*67be40dfSSotiris Apostolakis if (TrueBlock == FalseBlock) { 463*67be40dfSSotiris Apostolakis assert(TrueBlock == nullptr && 464*67be40dfSSotiris Apostolakis "Unexpected basic block transform while optimizing select"); 46597c3ef5cSSotiris Apostolakis 46697c3ef5cSSotiris Apostolakis FalseBlock = BasicBlock::Create(SI->getContext(), "select.false", 46797c3ef5cSSotiris Apostolakis EndBlock->getParent(), EndBlock); 46897c3ef5cSSotiris Apostolakis auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); 46997c3ef5cSSotiris Apostolakis FalseBranch->setDebugLoc(SI->getDebugLoc()); 470*67be40dfSSotiris Apostolakis } 47197c3ef5cSSotiris Apostolakis 47297c3ef5cSSotiris Apostolakis // Insert the real conditional branch based on the original condition. 473*67be40dfSSotiris Apostolakis // If we did not create a new block for one of the 'true' or 'false' paths 474*67be40dfSSotiris Apostolakis // of the condition, it means that side of the branch goes to the end block 475*67be40dfSSotiris Apostolakis // directly and the path originates from the start block from the point of 476*67be40dfSSotiris Apostolakis // view of the new PHI. 47797c3ef5cSSotiris Apostolakis BasicBlock *TT, *FT; 478*67be40dfSSotiris Apostolakis if (TrueBlock == nullptr) { 47997c3ef5cSSotiris Apostolakis TT = EndBlock; 48097c3ef5cSSotiris Apostolakis FT = FalseBlock; 481*67be40dfSSotiris Apostolakis TrueBlock = StartBlock; 482*67be40dfSSotiris Apostolakis } else if (FalseBlock == nullptr) { 483*67be40dfSSotiris Apostolakis TT = TrueBlock; 484*67be40dfSSotiris Apostolakis FT = EndBlock; 485*67be40dfSSotiris Apostolakis FalseBlock = StartBlock; 486*67be40dfSSotiris Apostolakis } else { 487*67be40dfSSotiris Apostolakis TT = TrueBlock; 488*67be40dfSSotiris Apostolakis FT = FalseBlock; 489*67be40dfSSotiris Apostolakis } 49097c3ef5cSSotiris Apostolakis IRBuilder<> IB(SI); 49197c3ef5cSSotiris Apostolakis auto *CondFr = 49297c3ef5cSSotiris Apostolakis IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen"); 49397c3ef5cSSotiris Apostolakis IB.CreateCondBr(CondFr, TT, FT, SI); 49497c3ef5cSSotiris Apostolakis 49597c3ef5cSSotiris Apostolakis SmallPtrSet<const Instruction *, 2> INS; 49697c3ef5cSSotiris Apostolakis INS.insert(ASI.begin(), ASI.end()); 49797c3ef5cSSotiris Apostolakis // Use reverse iterator because later select may use the value of the 49897c3ef5cSSotiris Apostolakis // earlier select, and we need to propagate value through earlier select 49997c3ef5cSSotiris Apostolakis // to get the PHI operand. 50097c3ef5cSSotiris Apostolakis for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) { 50197c3ef5cSSotiris Apostolakis SelectInst *SI = *It; 50297c3ef5cSSotiris Apostolakis // The select itself is replaced with a PHI Node. 50397c3ef5cSSotiris Apostolakis PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front()); 50497c3ef5cSSotiris Apostolakis PN->takeName(SI); 50597c3ef5cSSotiris Apostolakis PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock); 50697c3ef5cSSotiris Apostolakis PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock); 50797c3ef5cSSotiris Apostolakis PN->setDebugLoc(SI->getDebugLoc()); 50897c3ef5cSSotiris Apostolakis 50997c3ef5cSSotiris Apostolakis SI->replaceAllUsesWith(PN); 51097c3ef5cSSotiris Apostolakis SI->eraseFromParent(); 51197c3ef5cSSotiris Apostolakis INS.erase(SI); 51297c3ef5cSSotiris Apostolakis ++NumSelectsConverted; 51397c3ef5cSSotiris Apostolakis } 51497c3ef5cSSotiris Apostolakis } 51597c3ef5cSSotiris Apostolakis } 51697c3ef5cSSotiris Apostolakis 51797c3ef5cSSotiris Apostolakis void SelectOptimize::collectSelectGroups(BasicBlock &BB, 51897c3ef5cSSotiris Apostolakis SelectGroups &SIGroups) { 51997c3ef5cSSotiris Apostolakis BasicBlock::iterator BBIt = BB.begin(); 52097c3ef5cSSotiris Apostolakis while (BBIt != BB.end()) { 52197c3ef5cSSotiris Apostolakis Instruction *I = &*BBIt++; 52297c3ef5cSSotiris Apostolakis if (SelectInst *SI = dyn_cast<SelectInst>(I)) { 52397c3ef5cSSotiris Apostolakis SelectGroup SIGroup; 52497c3ef5cSSotiris Apostolakis SIGroup.push_back(SI); 52597c3ef5cSSotiris Apostolakis while (BBIt != BB.end()) { 52697c3ef5cSSotiris Apostolakis Instruction *NI = &*BBIt; 52797c3ef5cSSotiris Apostolakis SelectInst *NSI = dyn_cast<SelectInst>(NI); 52897c3ef5cSSotiris Apostolakis if (NSI && SI->getCondition() == NSI->getCondition()) { 52997c3ef5cSSotiris Apostolakis SIGroup.push_back(NSI); 53097c3ef5cSSotiris Apostolakis } else if (!NI->isDebugOrPseudoInst()) { 53197c3ef5cSSotiris Apostolakis // Debug/pseudo instructions should be skipped and not prevent the 53297c3ef5cSSotiris Apostolakis // formation of a select group. 53397c3ef5cSSotiris Apostolakis break; 53497c3ef5cSSotiris Apostolakis } 53597c3ef5cSSotiris Apostolakis ++BBIt; 53697c3ef5cSSotiris Apostolakis } 53797c3ef5cSSotiris Apostolakis 53897c3ef5cSSotiris Apostolakis // If the select type is not supported, no point optimizing it. 53997c3ef5cSSotiris Apostolakis // Instruction selection will take care of it. 54097c3ef5cSSotiris Apostolakis if (!isSelectKindSupported(SI)) 54197c3ef5cSSotiris Apostolakis continue; 54297c3ef5cSSotiris Apostolakis 54397c3ef5cSSotiris Apostolakis SIGroups.push_back(SIGroup); 54497c3ef5cSSotiris Apostolakis } 54597c3ef5cSSotiris Apostolakis } 54697c3ef5cSSotiris Apostolakis } 54797c3ef5cSSotiris Apostolakis 5488b42bc56SSotiris Apostolakis void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups, 5498b42bc56SSotiris Apostolakis SelectGroups &ProfSIGroups) { 5508b42bc56SSotiris Apostolakis for (SelectGroup &ASI : SIGroups) { 5518b42bc56SSotiris Apostolakis ++NumSelectOptAnalyzed; 5528b42bc56SSotiris Apostolakis if (isConvertToBranchProfitableBase(ASI)) 5538b42bc56SSotiris Apostolakis ProfSIGroups.push_back(ASI); 5548b42bc56SSotiris Apostolakis } 5558b42bc56SSotiris Apostolakis } 5568b42bc56SSotiris Apostolakis 557d7ebb746SSotiris Apostolakis void SelectOptimize::findProfitableSIGroupsInnerLoops( 558d7ebb746SSotiris Apostolakis const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) { 559d7ebb746SSotiris Apostolakis NumSelectOptAnalyzed += SIGroups.size(); 560d7ebb746SSotiris Apostolakis // For each select group in an inner-most loop, 561d7ebb746SSotiris Apostolakis // a branch is more preferable than a select/conditional-move if: 562d7ebb746SSotiris Apostolakis // i) conversion to branches for all the select groups of the loop satisfies 563d7ebb746SSotiris Apostolakis // loop-level heuristics including reducing the loop's critical path by 564d7ebb746SSotiris Apostolakis // some threshold (see SelectOptimize::checkLoopHeuristics); and 565d7ebb746SSotiris Apostolakis // ii) the total cost of the select group is cheaper with a branch compared 566d7ebb746SSotiris Apostolakis // to its predicated version. The cost is in terms of latency and the cost 567d7ebb746SSotiris Apostolakis // of a select group is the cost of its most expensive select instruction 568d7ebb746SSotiris Apostolakis // (assuming infinite resources and thus fully leveraging available ILP). 569d7ebb746SSotiris Apostolakis 570d7ebb746SSotiris Apostolakis DenseMap<const Instruction *, CostInfo> InstCostMap; 571d7ebb746SSotiris Apostolakis CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()}, 572d7ebb746SSotiris Apostolakis {Scaled64::getZero(), Scaled64::getZero()}}; 573d7ebb746SSotiris Apostolakis if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) || 574d7ebb746SSotiris Apostolakis !checkLoopHeuristics(L, LoopCost)) { 575d7ebb746SSotiris Apostolakis return; 576d7ebb746SSotiris Apostolakis } 577d7ebb746SSotiris Apostolakis 578d7ebb746SSotiris Apostolakis for (SelectGroup &ASI : SIGroups) { 579d7ebb746SSotiris Apostolakis // Assuming infinite resources, the cost of a group of instructions is the 580d7ebb746SSotiris Apostolakis // cost of the most expensive instruction of the group. 581d7ebb746SSotiris Apostolakis Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero(); 582d7ebb746SSotiris Apostolakis for (SelectInst *SI : ASI) { 583d7ebb746SSotiris Apostolakis SelectCost = std::max(SelectCost, InstCostMap[SI].PredCost); 584d7ebb746SSotiris Apostolakis BranchCost = std::max(BranchCost, InstCostMap[SI].NonPredCost); 585d7ebb746SSotiris Apostolakis } 586d7ebb746SSotiris Apostolakis if (BranchCost < SelectCost) { 587d7ebb746SSotiris Apostolakis OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", ASI.front()); 588d7ebb746SSotiris Apostolakis OR << "Profitable to convert to branch (loop analysis). BranchCost=" 589d7ebb746SSotiris Apostolakis << BranchCost.toString() << ", SelectCost=" << SelectCost.toString() 590d7ebb746SSotiris Apostolakis << ". "; 591d7ebb746SSotiris Apostolakis ORE->emit(OR); 592d7ebb746SSotiris Apostolakis ++NumSelectConvertedLoop; 593d7ebb746SSotiris Apostolakis ProfSIGroups.push_back(ASI); 594d7ebb746SSotiris Apostolakis } else { 595d7ebb746SSotiris Apostolakis OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front()); 596d7ebb746SSotiris Apostolakis ORmiss << "Select is more profitable (loop analysis). BranchCost=" 597d7ebb746SSotiris Apostolakis << BranchCost.toString() 598d7ebb746SSotiris Apostolakis << ", SelectCost=" << SelectCost.toString() << ". "; 599d7ebb746SSotiris Apostolakis ORE->emit(ORmiss); 600d7ebb746SSotiris Apostolakis } 601d7ebb746SSotiris Apostolakis } 602d7ebb746SSotiris Apostolakis } 603d7ebb746SSotiris Apostolakis 6048b42bc56SSotiris Apostolakis bool SelectOptimize::isConvertToBranchProfitableBase( 6058b42bc56SSotiris Apostolakis const SmallVector<SelectInst *, 2> &ASI) { 6068b42bc56SSotiris Apostolakis SelectInst *SI = ASI.front(); 6078b42bc56SSotiris Apostolakis OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI); 6088b42bc56SSotiris Apostolakis OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI); 6098b42bc56SSotiris Apostolakis 6108b42bc56SSotiris Apostolakis // Skip cold basic blocks. Better to optimize for size for cold blocks. 6118b42bc56SSotiris Apostolakis if (PSI->isColdBlock(SI->getParent(), BFI.get())) { 6128b42bc56SSotiris Apostolakis ++NumSelectColdBB; 6138b42bc56SSotiris Apostolakis ORmiss << "Not converted to branch because of cold basic block. "; 6148b42bc56SSotiris Apostolakis ORE->emit(ORmiss); 6158b42bc56SSotiris Apostolakis return false; 6168b42bc56SSotiris Apostolakis } 6178b42bc56SSotiris Apostolakis 6188b42bc56SSotiris Apostolakis // If unpredictable, branch form is less profitable. 6198b42bc56SSotiris Apostolakis if (SI->getMetadata(LLVMContext::MD_unpredictable)) { 6208b42bc56SSotiris Apostolakis ++NumSelectUnPred; 6218b42bc56SSotiris Apostolakis ORmiss << "Not converted to branch because of unpredictable branch. "; 6228b42bc56SSotiris Apostolakis ORE->emit(ORmiss); 6238b42bc56SSotiris Apostolakis return false; 6248b42bc56SSotiris Apostolakis } 6258b42bc56SSotiris Apostolakis 6268b42bc56SSotiris Apostolakis // If highly predictable, branch form is more profitable, unless a 6278b42bc56SSotiris Apostolakis // predictable select is inexpensive in the target architecture. 6288b42bc56SSotiris Apostolakis if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) { 6298b42bc56SSotiris Apostolakis ++NumSelectConvertedHighPred; 6308b42bc56SSotiris Apostolakis OR << "Converted to branch because of highly predictable branch. "; 6318b42bc56SSotiris Apostolakis ORE->emit(OR); 6328b42bc56SSotiris Apostolakis return true; 6338b42bc56SSotiris Apostolakis } 6348b42bc56SSotiris Apostolakis 6358b42bc56SSotiris Apostolakis // Look for expensive instructions in the cold operand's (if any) dependence 6368b42bc56SSotiris Apostolakis // slice of any of the selects in the group. 6378b42bc56SSotiris Apostolakis if (hasExpensiveColdOperand(ASI)) { 6388b42bc56SSotiris Apostolakis ++NumSelectConvertedExpColdOperand; 6398b42bc56SSotiris Apostolakis OR << "Converted to branch because of expensive cold operand."; 6408b42bc56SSotiris Apostolakis ORE->emit(OR); 6418b42bc56SSotiris Apostolakis return true; 6428b42bc56SSotiris Apostolakis } 6438b42bc56SSotiris Apostolakis 6448b42bc56SSotiris Apostolakis ORmiss << "Not profitable to convert to branch (base heuristic)."; 6458b42bc56SSotiris Apostolakis ORE->emit(ORmiss); 6468b42bc56SSotiris Apostolakis return false; 6478b42bc56SSotiris Apostolakis } 6488b42bc56SSotiris Apostolakis 6498b42bc56SSotiris Apostolakis static InstructionCost divideNearest(InstructionCost Numerator, 6508b42bc56SSotiris Apostolakis uint64_t Denominator) { 6518b42bc56SSotiris Apostolakis return (Numerator + (Denominator / 2)) / Denominator; 6528b42bc56SSotiris Apostolakis } 6538b42bc56SSotiris Apostolakis 6548b42bc56SSotiris Apostolakis bool SelectOptimize::hasExpensiveColdOperand( 6558b42bc56SSotiris Apostolakis const SmallVector<SelectInst *, 2> &ASI) { 6568b42bc56SSotiris Apostolakis bool ColdOperand = false; 6578b42bc56SSotiris Apostolakis uint64_t TrueWeight, FalseWeight, TotalWeight; 6588b42bc56SSotiris Apostolakis if (ASI.front()->extractProfMetadata(TrueWeight, FalseWeight)) { 6598b42bc56SSotiris Apostolakis uint64_t MinWeight = std::min(TrueWeight, FalseWeight); 6608b42bc56SSotiris Apostolakis TotalWeight = TrueWeight + FalseWeight; 6618b42bc56SSotiris Apostolakis // Is there a path with frequency <ColdOperandThreshold% (default:20%) ? 6628b42bc56SSotiris Apostolakis ColdOperand = TotalWeight * ColdOperandThreshold > 100 * MinWeight; 6638b42bc56SSotiris Apostolakis } else if (PSI->hasProfileSummary()) { 6648b42bc56SSotiris Apostolakis OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front()); 6658b42bc56SSotiris Apostolakis ORmiss << "Profile data available but missing branch-weights metadata for " 6668b42bc56SSotiris Apostolakis "select instruction. "; 6678b42bc56SSotiris Apostolakis ORE->emit(ORmiss); 6688b42bc56SSotiris Apostolakis } 6698b42bc56SSotiris Apostolakis if (!ColdOperand) 6708b42bc56SSotiris Apostolakis return false; 6718b42bc56SSotiris Apostolakis // Check if the cold path's dependence slice is expensive for any of the 6728b42bc56SSotiris Apostolakis // selects of the group. 6738b42bc56SSotiris Apostolakis for (SelectInst *SI : ASI) { 6748b42bc56SSotiris Apostolakis Instruction *ColdI = nullptr; 6758b42bc56SSotiris Apostolakis uint64_t HotWeight; 6768b42bc56SSotiris Apostolakis if (TrueWeight < FalseWeight) { 6778b42bc56SSotiris Apostolakis ColdI = dyn_cast<Instruction>(SI->getTrueValue()); 6788b42bc56SSotiris Apostolakis HotWeight = FalseWeight; 6798b42bc56SSotiris Apostolakis } else { 6808b42bc56SSotiris Apostolakis ColdI = dyn_cast<Instruction>(SI->getFalseValue()); 6818b42bc56SSotiris Apostolakis HotWeight = TrueWeight; 6828b42bc56SSotiris Apostolakis } 6838b42bc56SSotiris Apostolakis if (ColdI) { 684*67be40dfSSotiris Apostolakis std::stack<Instruction *> ColdSlice; 6858b42bc56SSotiris Apostolakis getExclBackwardsSlice(ColdI, ColdSlice); 6868b42bc56SSotiris Apostolakis InstructionCost SliceCost = 0; 687*67be40dfSSotiris Apostolakis while (!ColdSlice.empty()) { 688*67be40dfSSotiris Apostolakis SliceCost += TTI->getInstructionCost(ColdSlice.top(), 689*67be40dfSSotiris Apostolakis TargetTransformInfo::TCK_Latency); 690*67be40dfSSotiris Apostolakis ColdSlice.pop(); 6918b42bc56SSotiris Apostolakis } 6928b42bc56SSotiris Apostolakis // The colder the cold value operand of the select is the more expensive 6938b42bc56SSotiris Apostolakis // the cmov becomes for computing the cold value operand every time. Thus, 6948b42bc56SSotiris Apostolakis // the colder the cold operand is the more its cost counts. 6958b42bc56SSotiris Apostolakis // Get nearest integer cost adjusted for coldness. 6968b42bc56SSotiris Apostolakis InstructionCost AdjSliceCost = 6978b42bc56SSotiris Apostolakis divideNearest(SliceCost * HotWeight, TotalWeight); 6988b42bc56SSotiris Apostolakis if (AdjSliceCost >= 6998b42bc56SSotiris Apostolakis ColdOperandMaxCostMultiplier * TargetTransformInfo::TCC_Expensive) 7008b42bc56SSotiris Apostolakis return true; 7018b42bc56SSotiris Apostolakis } 7028b42bc56SSotiris Apostolakis } 7038b42bc56SSotiris Apostolakis return false; 7048b42bc56SSotiris Apostolakis } 7058b42bc56SSotiris Apostolakis 7068b42bc56SSotiris Apostolakis // For a given source instruction, collect its backwards dependence slice 7078b42bc56SSotiris Apostolakis // consisting of instructions exclusively computed for the purpose of producing 7088b42bc56SSotiris Apostolakis // the operands of the source instruction. As an approximation 7098b42bc56SSotiris Apostolakis // (sufficiently-accurate in practice), we populate this set with the 7108b42bc56SSotiris Apostolakis // instructions of the backwards dependence slice that only have one-use and 7118b42bc56SSotiris Apostolakis // form an one-use chain that leads to the source instruction. 712*67be40dfSSotiris Apostolakis void SelectOptimize::getExclBackwardsSlice(Instruction *I, 713*67be40dfSSotiris Apostolakis std::stack<Instruction *> &Slice, 714*67be40dfSSotiris Apostolakis bool ForSinking) { 7158b42bc56SSotiris Apostolakis SmallPtrSet<Instruction *, 2> Visited; 7168b42bc56SSotiris Apostolakis std::queue<Instruction *> Worklist; 7178b42bc56SSotiris Apostolakis Worklist.push(I); 7188b42bc56SSotiris Apostolakis while (!Worklist.empty()) { 7198b42bc56SSotiris Apostolakis Instruction *II = Worklist.front(); 7208b42bc56SSotiris Apostolakis Worklist.pop(); 7218b42bc56SSotiris Apostolakis 7228b42bc56SSotiris Apostolakis // Avoid cycles. 7238b42bc56SSotiris Apostolakis if (Visited.count(II)) 7248b42bc56SSotiris Apostolakis continue; 7258b42bc56SSotiris Apostolakis Visited.insert(II); 7268b42bc56SSotiris Apostolakis 7278b42bc56SSotiris Apostolakis if (!II->hasOneUse()) 7288b42bc56SSotiris Apostolakis continue; 7298b42bc56SSotiris Apostolakis 730*67be40dfSSotiris Apostolakis // Cannot soundly sink instructions with side-effects. 731*67be40dfSSotiris Apostolakis // Terminator or phi instructions cannot be sunk. 732*67be40dfSSotiris Apostolakis // Avoid sinking other select instructions (should be handled separetely). 733*67be40dfSSotiris Apostolakis if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() || 734*67be40dfSSotiris Apostolakis isa<SelectInst>(II) || isa<PHINode>(II))) 735*67be40dfSSotiris Apostolakis continue; 736*67be40dfSSotiris Apostolakis 7378b42bc56SSotiris Apostolakis // Avoid considering instructions with less frequency than the source 7388b42bc56SSotiris Apostolakis // instruction (i.e., avoid colder code regions of the dependence slice). 7398b42bc56SSotiris Apostolakis if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent())) 7408b42bc56SSotiris Apostolakis continue; 7418b42bc56SSotiris Apostolakis 7428b42bc56SSotiris Apostolakis // Eligible one-use instruction added to the dependence slice. 743*67be40dfSSotiris Apostolakis Slice.push(II); 7448b42bc56SSotiris Apostolakis 7458b42bc56SSotiris Apostolakis // Explore all the operands of the current instruction to expand the slice. 7468b42bc56SSotiris Apostolakis for (unsigned k = 0; k < II->getNumOperands(); ++k) 7478b42bc56SSotiris Apostolakis if (auto *OpI = dyn_cast<Instruction>(II->getOperand(k))) 7488b42bc56SSotiris Apostolakis Worklist.push(OpI); 7498b42bc56SSotiris Apostolakis } 7508b42bc56SSotiris Apostolakis } 7518b42bc56SSotiris Apostolakis 7528b42bc56SSotiris Apostolakis bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) { 7538b42bc56SSotiris Apostolakis uint64_t TrueWeight, FalseWeight; 7548b42bc56SSotiris Apostolakis if (SI->extractProfMetadata(TrueWeight, FalseWeight)) { 7558b42bc56SSotiris Apostolakis uint64_t Max = std::max(TrueWeight, FalseWeight); 7568b42bc56SSotiris Apostolakis uint64_t Sum = TrueWeight + FalseWeight; 7578b42bc56SSotiris Apostolakis if (Sum != 0) { 7588b42bc56SSotiris Apostolakis auto Probability = BranchProbability::getBranchProbability(Max, Sum); 7598b42bc56SSotiris Apostolakis if (Probability > TTI->getPredictableBranchThreshold()) 7608b42bc56SSotiris Apostolakis return true; 7618b42bc56SSotiris Apostolakis } 7628b42bc56SSotiris Apostolakis } 7638b42bc56SSotiris Apostolakis return false; 7648b42bc56SSotiris Apostolakis } 7658b42bc56SSotiris Apostolakis 766d7ebb746SSotiris Apostolakis bool SelectOptimize::checkLoopHeuristics(const Loop *L, 767d7ebb746SSotiris Apostolakis const CostInfo LoopCost[2]) { 768d7ebb746SSotiris Apostolakis // Loop-level checks to determine if a non-predicated version (with branches) 769d7ebb746SSotiris Apostolakis // of the loop is more profitable than its predicated version. 770d7ebb746SSotiris Apostolakis 771d7ebb746SSotiris Apostolakis if (DisableLoopLevelHeuristics) 772d7ebb746SSotiris Apostolakis return true; 773d7ebb746SSotiris Apostolakis 774d7ebb746SSotiris Apostolakis OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", 775d7ebb746SSotiris Apostolakis L->getHeader()->getFirstNonPHI()); 776d7ebb746SSotiris Apostolakis 777d7ebb746SSotiris Apostolakis if (LoopCost[0].NonPredCost > LoopCost[0].PredCost || 778d7ebb746SSotiris Apostolakis LoopCost[1].NonPredCost >= LoopCost[1].PredCost) { 779d7ebb746SSotiris Apostolakis ORmissL << "No select conversion in the loop due to no reduction of loop's " 780d7ebb746SSotiris Apostolakis "critical path. "; 781d7ebb746SSotiris Apostolakis ORE->emit(ORmissL); 782d7ebb746SSotiris Apostolakis return false; 783d7ebb746SSotiris Apostolakis } 784d7ebb746SSotiris Apostolakis 785d7ebb746SSotiris Apostolakis Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost, 786d7ebb746SSotiris Apostolakis LoopCost[1].PredCost - LoopCost[1].NonPredCost}; 787d7ebb746SSotiris Apostolakis 788d7ebb746SSotiris Apostolakis // Profitably converting to branches need to reduce the loop's critical path 789d7ebb746SSotiris Apostolakis // by at least some threshold (absolute gain of GainCycleThreshold cycles and 790d7ebb746SSotiris Apostolakis // relative gain of 12.5%). 791d7ebb746SSotiris Apostolakis if (Gain[1] < Scaled64::get(GainCycleThreshold) || 792d7ebb746SSotiris Apostolakis Gain[1] * Scaled64::get(GainRelativeThreshold) < LoopCost[1].PredCost) { 793d7ebb746SSotiris Apostolakis Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost; 794d7ebb746SSotiris Apostolakis ORmissL << "No select conversion in the loop due to small reduction of " 795d7ebb746SSotiris Apostolakis "loop's critical path. Gain=" 796d7ebb746SSotiris Apostolakis << Gain[1].toString() 797d7ebb746SSotiris Apostolakis << ", RelativeGain=" << RelativeGain.toString() << "%. "; 798d7ebb746SSotiris Apostolakis ORE->emit(ORmissL); 799d7ebb746SSotiris Apostolakis return false; 800d7ebb746SSotiris Apostolakis } 801d7ebb746SSotiris Apostolakis 802d7ebb746SSotiris Apostolakis // If the loop's critical path involves loop-carried dependences, the gradient 803d7ebb746SSotiris Apostolakis // of the gain needs to be at least GainGradientThreshold% (defaults to 25%). 804d7ebb746SSotiris Apostolakis // This check ensures that the latency reduction for the loop's critical path 805d7ebb746SSotiris Apostolakis // keeps decreasing with sufficient rate beyond the two analyzed loop 806d7ebb746SSotiris Apostolakis // iterations. 807d7ebb746SSotiris Apostolakis if (Gain[1] > Gain[0]) { 808d7ebb746SSotiris Apostolakis Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) / 809d7ebb746SSotiris Apostolakis (LoopCost[1].PredCost - LoopCost[0].PredCost); 810d7ebb746SSotiris Apostolakis if (GradientGain < Scaled64::get(GainGradientThreshold)) { 811d7ebb746SSotiris Apostolakis ORmissL << "No select conversion in the loop due to small gradient gain. " 812d7ebb746SSotiris Apostolakis "GradientGain=" 813d7ebb746SSotiris Apostolakis << GradientGain.toString() << "%. "; 814d7ebb746SSotiris Apostolakis ORE->emit(ORmissL); 815d7ebb746SSotiris Apostolakis return false; 816d7ebb746SSotiris Apostolakis } 817d7ebb746SSotiris Apostolakis } 818d7ebb746SSotiris Apostolakis // If the gain decreases it is not profitable to convert. 819d7ebb746SSotiris Apostolakis else if (Gain[1] < Gain[0]) { 820d7ebb746SSotiris Apostolakis ORmissL 821d7ebb746SSotiris Apostolakis << "No select conversion in the loop due to negative gradient gain. "; 822d7ebb746SSotiris Apostolakis ORE->emit(ORmissL); 823d7ebb746SSotiris Apostolakis return false; 824d7ebb746SSotiris Apostolakis } 825d7ebb746SSotiris Apostolakis 826d7ebb746SSotiris Apostolakis // Non-predicated version of the loop is more profitable than its 827d7ebb746SSotiris Apostolakis // predicated version. 828d7ebb746SSotiris Apostolakis return true; 829d7ebb746SSotiris Apostolakis } 830d7ebb746SSotiris Apostolakis 831d7ebb746SSotiris Apostolakis // Computes instruction and loop-critical-path costs for both the predicated 832d7ebb746SSotiris Apostolakis // and non-predicated version of the given loop. 833d7ebb746SSotiris Apostolakis // Returns false if unable to compute these costs due to invalid cost of loop 834d7ebb746SSotiris Apostolakis // instruction(s). 835d7ebb746SSotiris Apostolakis bool SelectOptimize::computeLoopCosts( 836d7ebb746SSotiris Apostolakis const Loop *L, const SelectGroups &SIGroups, 837d7ebb746SSotiris Apostolakis DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) { 838d7ebb746SSotiris Apostolakis const auto &SIset = getSIset(SIGroups); 839d7ebb746SSotiris Apostolakis // Compute instruction and loop-critical-path costs across two iterations for 840d7ebb746SSotiris Apostolakis // both predicated and non-predicated version. 841d7ebb746SSotiris Apostolakis const unsigned Iterations = 2; 842d7ebb746SSotiris Apostolakis for (unsigned Iter = 0; Iter < Iterations; ++Iter) { 843d7ebb746SSotiris Apostolakis // Cost of the loop's critical path. 844d7ebb746SSotiris Apostolakis CostInfo &MaxCost = LoopCost[Iter]; 845d7ebb746SSotiris Apostolakis for (BasicBlock *BB : L->getBlocks()) { 846d7ebb746SSotiris Apostolakis for (const Instruction &I : *BB) { 847d7ebb746SSotiris Apostolakis if (I.isDebugOrPseudoInst()) 848d7ebb746SSotiris Apostolakis continue; 849d7ebb746SSotiris Apostolakis // Compute the predicated and non-predicated cost of the instruction. 850d7ebb746SSotiris Apostolakis Scaled64 IPredCost = Scaled64::getZero(), 851d7ebb746SSotiris Apostolakis INonPredCost = Scaled64::getZero(); 852d7ebb746SSotiris Apostolakis 853d7ebb746SSotiris Apostolakis // Assume infinite resources that allow to fully exploit the available 854d7ebb746SSotiris Apostolakis // instruction-level parallelism. 855d7ebb746SSotiris Apostolakis // InstCost = InstLatency + max(Op1Cost, Op2Cost, … OpNCost) 856d7ebb746SSotiris Apostolakis for (const Use &U : I.operands()) { 857d7ebb746SSotiris Apostolakis auto UI = dyn_cast<Instruction>(U.get()); 858d7ebb746SSotiris Apostolakis if (!UI) 859d7ebb746SSotiris Apostolakis continue; 860d7ebb746SSotiris Apostolakis if (InstCostMap.count(UI)) { 861d7ebb746SSotiris Apostolakis IPredCost = std::max(IPredCost, InstCostMap[UI].PredCost); 862d7ebb746SSotiris Apostolakis INonPredCost = std::max(INonPredCost, InstCostMap[UI].NonPredCost); 863d7ebb746SSotiris Apostolakis } 864d7ebb746SSotiris Apostolakis } 865d7ebb746SSotiris Apostolakis auto ILatency = computeInstCost(&I); 866d7ebb746SSotiris Apostolakis if (!ILatency.hasValue()) { 867d7ebb746SSotiris Apostolakis OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", &I); 868d7ebb746SSotiris Apostolakis ORmissL << "Invalid instruction cost preventing analysis and " 869d7ebb746SSotiris Apostolakis "optimization of the inner-most loop containing this " 870d7ebb746SSotiris Apostolakis "instruction. "; 871d7ebb746SSotiris Apostolakis ORE->emit(ORmissL); 872d7ebb746SSotiris Apostolakis return false; 873d7ebb746SSotiris Apostolakis } 874d7ebb746SSotiris Apostolakis IPredCost += Scaled64::get(ILatency.getValue()); 875d7ebb746SSotiris Apostolakis INonPredCost += Scaled64::get(ILatency.getValue()); 876d7ebb746SSotiris Apostolakis 877d7ebb746SSotiris Apostolakis // For a select that can be converted to branch, 878d7ebb746SSotiris Apostolakis // compute its cost as a branch (non-predicated cost). 879d7ebb746SSotiris Apostolakis // 880d7ebb746SSotiris Apostolakis // BranchCost = PredictedPathCost + MispredictCost 881d7ebb746SSotiris Apostolakis // PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb 882d7ebb746SSotiris Apostolakis // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate 883d7ebb746SSotiris Apostolakis if (SIset.contains(&I)) { 884d7ebb746SSotiris Apostolakis auto SI = dyn_cast<SelectInst>(&I); 885d7ebb746SSotiris Apostolakis 886d7ebb746SSotiris Apostolakis Scaled64 TrueOpCost = Scaled64::getZero(), 887d7ebb746SSotiris Apostolakis FalseOpCost = Scaled64::getZero(); 888d7ebb746SSotiris Apostolakis if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue())) 889d7ebb746SSotiris Apostolakis if (InstCostMap.count(TI)) 890d7ebb746SSotiris Apostolakis TrueOpCost = InstCostMap[TI].NonPredCost; 891d7ebb746SSotiris Apostolakis if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue())) 892d7ebb746SSotiris Apostolakis if (InstCostMap.count(FI)) 893d7ebb746SSotiris Apostolakis FalseOpCost = InstCostMap[FI].NonPredCost; 894d7ebb746SSotiris Apostolakis Scaled64 PredictedPathCost = 895d7ebb746SSotiris Apostolakis getPredictedPathCost(TrueOpCost, FalseOpCost, SI); 896d7ebb746SSotiris Apostolakis 897d7ebb746SSotiris Apostolakis Scaled64 CondCost = Scaled64::getZero(); 898d7ebb746SSotiris Apostolakis if (auto *CI = dyn_cast<Instruction>(SI->getCondition())) 899d7ebb746SSotiris Apostolakis if (InstCostMap.count(CI)) 900d7ebb746SSotiris Apostolakis CondCost = InstCostMap[CI].NonPredCost; 901d7ebb746SSotiris Apostolakis Scaled64 MispredictCost = getMispredictionCost(SI, CondCost); 902d7ebb746SSotiris Apostolakis 903d7ebb746SSotiris Apostolakis INonPredCost = PredictedPathCost + MispredictCost; 904d7ebb746SSotiris Apostolakis } 905d7ebb746SSotiris Apostolakis 906d7ebb746SSotiris Apostolakis InstCostMap[&I] = {IPredCost, INonPredCost}; 907d7ebb746SSotiris Apostolakis MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost); 908d7ebb746SSotiris Apostolakis MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost); 909d7ebb746SSotiris Apostolakis } 910d7ebb746SSotiris Apostolakis } 911d7ebb746SSotiris Apostolakis } 912d7ebb746SSotiris Apostolakis return true; 913d7ebb746SSotiris Apostolakis } 914d7ebb746SSotiris Apostolakis 915d7ebb746SSotiris Apostolakis SmallPtrSet<const Instruction *, 2> 916d7ebb746SSotiris Apostolakis SelectOptimize::getSIset(const SelectGroups &SIGroups) { 917d7ebb746SSotiris Apostolakis SmallPtrSet<const Instruction *, 2> SIset; 918d7ebb746SSotiris Apostolakis for (const SelectGroup &ASI : SIGroups) 919d7ebb746SSotiris Apostolakis for (const SelectInst *SI : ASI) 920d7ebb746SSotiris Apostolakis SIset.insert(SI); 921d7ebb746SSotiris Apostolakis return SIset; 922d7ebb746SSotiris Apostolakis } 923d7ebb746SSotiris Apostolakis 924d7ebb746SSotiris Apostolakis Optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) { 925d7ebb746SSotiris Apostolakis InstructionCost ICost = 926d7ebb746SSotiris Apostolakis TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency); 927d7ebb746SSotiris Apostolakis if (auto OC = ICost.getValue()) 928d7ebb746SSotiris Apostolakis return Optional<uint64_t>(OC.getValue()); 929d7ebb746SSotiris Apostolakis return Optional<uint64_t>(None); 930d7ebb746SSotiris Apostolakis } 931d7ebb746SSotiris Apostolakis 932d7ebb746SSotiris Apostolakis ScaledNumber<uint64_t> 933d7ebb746SSotiris Apostolakis SelectOptimize::getMispredictionCost(const SelectInst *SI, 934d7ebb746SSotiris Apostolakis const Scaled64 CondCost) { 935d7ebb746SSotiris Apostolakis uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty; 936d7ebb746SSotiris Apostolakis 937d7ebb746SSotiris Apostolakis // Account for the default misprediction rate when using a branch 938d7ebb746SSotiris Apostolakis // (conservatively set to 25% by default). 939d7ebb746SSotiris Apostolakis uint64_t MispredictRate = MispredictDefaultRate; 940d7ebb746SSotiris Apostolakis // If the select condition is obviously predictable, then the misprediction 941d7ebb746SSotiris Apostolakis // rate is zero. 942d7ebb746SSotiris Apostolakis if (isSelectHighlyPredictable(SI)) 943d7ebb746SSotiris Apostolakis MispredictRate = 0; 944d7ebb746SSotiris Apostolakis 945d7ebb746SSotiris Apostolakis // CondCost is included to account for cases where the computation of the 946d7ebb746SSotiris Apostolakis // condition is part of a long dependence chain (potentially loop-carried) 947d7ebb746SSotiris Apostolakis // that would delay detection of a misprediction and increase its cost. 948d7ebb746SSotiris Apostolakis Scaled64 MispredictCost = 949d7ebb746SSotiris Apostolakis std::max(Scaled64::get(MispredictPenalty), CondCost) * 950d7ebb746SSotiris Apostolakis Scaled64::get(MispredictRate); 951d7ebb746SSotiris Apostolakis MispredictCost /= Scaled64::get(100); 952d7ebb746SSotiris Apostolakis 953d7ebb746SSotiris Apostolakis return MispredictCost; 954d7ebb746SSotiris Apostolakis } 955d7ebb746SSotiris Apostolakis 956d7ebb746SSotiris Apostolakis // Returns the cost of a branch when the prediction is correct. 957d7ebb746SSotiris Apostolakis // TrueCost * TrueProbability + FalseCost * FalseProbability. 958d7ebb746SSotiris Apostolakis ScaledNumber<uint64_t> 959d7ebb746SSotiris Apostolakis SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost, 960d7ebb746SSotiris Apostolakis const SelectInst *SI) { 961d7ebb746SSotiris Apostolakis Scaled64 PredPathCost; 962d7ebb746SSotiris Apostolakis uint64_t TrueWeight, FalseWeight; 963d7ebb746SSotiris Apostolakis if (SI->extractProfMetadata(TrueWeight, FalseWeight)) { 964d7ebb746SSotiris Apostolakis uint64_t SumWeight = TrueWeight + FalseWeight; 965d7ebb746SSotiris Apostolakis if (SumWeight != 0) { 966d7ebb746SSotiris Apostolakis PredPathCost = TrueCost * Scaled64::get(TrueWeight) + 967d7ebb746SSotiris Apostolakis FalseCost * Scaled64::get(FalseWeight); 968d7ebb746SSotiris Apostolakis PredPathCost /= Scaled64::get(SumWeight); 969d7ebb746SSotiris Apostolakis return PredPathCost; 970d7ebb746SSotiris Apostolakis } 971d7ebb746SSotiris Apostolakis } 972d7ebb746SSotiris Apostolakis // Without branch weight metadata, we assume 75% for the one path and 25% for 973d7ebb746SSotiris Apostolakis // the other, and pick the result with the biggest cost. 974d7ebb746SSotiris Apostolakis PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost, 975d7ebb746SSotiris Apostolakis FalseCost * Scaled64::get(3) + TrueCost); 976d7ebb746SSotiris Apostolakis PredPathCost /= Scaled64::get(4); 977d7ebb746SSotiris Apostolakis return PredPathCost; 978d7ebb746SSotiris Apostolakis } 979d7ebb746SSotiris Apostolakis 98097c3ef5cSSotiris Apostolakis bool SelectOptimize::isSelectKindSupported(SelectInst *SI) { 98197c3ef5cSSotiris Apostolakis bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); 98297c3ef5cSSotiris Apostolakis if (VectorCond) 98397c3ef5cSSotiris Apostolakis return false; 98497c3ef5cSSotiris Apostolakis TargetLowering::SelectSupportKind SelectKind; 98597c3ef5cSSotiris Apostolakis if (SI->getType()->isVectorTy()) 98697c3ef5cSSotiris Apostolakis SelectKind = TargetLowering::ScalarCondVectorVal; 98797c3ef5cSSotiris Apostolakis else 98897c3ef5cSSotiris Apostolakis SelectKind = TargetLowering::ScalarValSelect; 98997c3ef5cSSotiris Apostolakis return TLI->isSelectSupported(SelectKind); 990ca7c307dSSotiris Apostolakis } 991