1ca7c307dSSotiris Apostolakis //===--- SelectOptimize.cpp - Convert select to branches if profitable ---===// 2ca7c307dSSotiris Apostolakis // 3ca7c307dSSotiris Apostolakis // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4ca7c307dSSotiris Apostolakis // See https://llvm.org/LICENSE.txt for license information. 5ca7c307dSSotiris Apostolakis // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6ca7c307dSSotiris Apostolakis // 7ca7c307dSSotiris Apostolakis //===----------------------------------------------------------------------===// 8ca7c307dSSotiris Apostolakis // 9ca7c307dSSotiris Apostolakis // This pass converts selects to conditional jumps when profitable. 10ca7c307dSSotiris Apostolakis // 11ca7c307dSSotiris Apostolakis //===----------------------------------------------------------------------===// 12ca7c307dSSotiris Apostolakis 1397c3ef5cSSotiris Apostolakis #include "llvm/ADT/SmallVector.h" 1497c3ef5cSSotiris Apostolakis #include "llvm/ADT/Statistic.h" 1597c3ef5cSSotiris Apostolakis #include "llvm/Analysis/BlockFrequencyInfo.h" 1697c3ef5cSSotiris Apostolakis #include "llvm/Analysis/BranchProbabilityInfo.h" 1797c3ef5cSSotiris Apostolakis #include "llvm/Analysis/LoopInfo.h" 18*8b42bc56SSotiris Apostolakis #include "llvm/Analysis/OptimizationRemarkEmitter.h" 19*8b42bc56SSotiris Apostolakis #include "llvm/Analysis/ProfileSummaryInfo.h" 20*8b42bc56SSotiris Apostolakis #include "llvm/Analysis/TargetTransformInfo.h" 21ca7c307dSSotiris Apostolakis #include "llvm/CodeGen/Passes.h" 2297c3ef5cSSotiris Apostolakis #include "llvm/CodeGen/TargetLowering.h" 2397c3ef5cSSotiris Apostolakis #include "llvm/CodeGen/TargetPassConfig.h" 2497c3ef5cSSotiris Apostolakis #include "llvm/CodeGen/TargetSchedule.h" 2597c3ef5cSSotiris Apostolakis #include "llvm/CodeGen/TargetSubtargetInfo.h" 2697c3ef5cSSotiris Apostolakis #include "llvm/IR/BasicBlock.h" 27*8b42bc56SSotiris Apostolakis #include "llvm/IR/Dominators.h" 28ca7c307dSSotiris Apostolakis #include "llvm/IR/Function.h" 2997c3ef5cSSotiris Apostolakis #include "llvm/IR/IRBuilder.h" 3097c3ef5cSSotiris Apostolakis #include "llvm/IR/Instruction.h" 31ca7c307dSSotiris Apostolakis #include "llvm/InitializePasses.h" 32ca7c307dSSotiris Apostolakis #include "llvm/Pass.h" 3397c3ef5cSSotiris Apostolakis #include "llvm/Target/TargetMachine.h" 34*8b42bc56SSotiris Apostolakis #include "llvm/Transforms/Utils/SizeOpts.h" 35*8b42bc56SSotiris Apostolakis #include <algorithm> 36*8b42bc56SSotiris Apostolakis #include <memory> 37*8b42bc56SSotiris Apostolakis #include <queue> 38*8b42bc56SSotiris Apostolakis #include <stack> 39*8b42bc56SSotiris Apostolakis #include <string> 40ca7c307dSSotiris Apostolakis 41ca7c307dSSotiris Apostolakis using namespace llvm; 42ca7c307dSSotiris Apostolakis 4397c3ef5cSSotiris Apostolakis #define DEBUG_TYPE "select-optimize" 4497c3ef5cSSotiris Apostolakis 45*8b42bc56SSotiris Apostolakis STATISTIC(NumSelectOptAnalyzed, 46*8b42bc56SSotiris Apostolakis "Number of select groups considered for conversion to branch"); 47*8b42bc56SSotiris Apostolakis STATISTIC(NumSelectConvertedExpColdOperand, 48*8b42bc56SSotiris Apostolakis "Number of select groups converted due to expensive cold operand"); 49*8b42bc56SSotiris Apostolakis STATISTIC(NumSelectConvertedHighPred, 50*8b42bc56SSotiris Apostolakis "Number of select groups converted due to high-predictability"); 51*8b42bc56SSotiris Apostolakis STATISTIC(NumSelectUnPred, 52*8b42bc56SSotiris Apostolakis "Number of select groups not converted due to unpredictability"); 53*8b42bc56SSotiris Apostolakis STATISTIC(NumSelectColdBB, 54*8b42bc56SSotiris Apostolakis "Number of select groups not converted due to cold basic block"); 5597c3ef5cSSotiris Apostolakis STATISTIC(NumSelectsConverted, "Number of selects converted"); 5697c3ef5cSSotiris Apostolakis 57*8b42bc56SSotiris Apostolakis static cl::opt<unsigned> ColdOperandThreshold( 58*8b42bc56SSotiris Apostolakis "cold-operand-threshold", 59*8b42bc56SSotiris Apostolakis cl::desc("Maximum frequency of path for an operand to be considered cold."), 60*8b42bc56SSotiris Apostolakis cl::init(20), cl::Hidden); 61*8b42bc56SSotiris Apostolakis 62*8b42bc56SSotiris Apostolakis static cl::opt<unsigned> ColdOperandMaxCostMultiplier( 63*8b42bc56SSotiris Apostolakis "cold-operand-max-cost-multiplier", 64*8b42bc56SSotiris Apostolakis cl::desc("Maximum cost multiplier of TCC_expensive for the dependence " 65*8b42bc56SSotiris Apostolakis "slice of a cold operand to be considered inexpensive."), 66*8b42bc56SSotiris Apostolakis cl::init(1), cl::Hidden); 67*8b42bc56SSotiris Apostolakis 68ca7c307dSSotiris Apostolakis namespace { 69ca7c307dSSotiris Apostolakis 70ca7c307dSSotiris Apostolakis class SelectOptimize : public FunctionPass { 7197c3ef5cSSotiris Apostolakis const TargetMachine *TM = nullptr; 7297c3ef5cSSotiris Apostolakis const TargetSubtargetInfo *TSI; 7397c3ef5cSSotiris Apostolakis const TargetLowering *TLI = nullptr; 74*8b42bc56SSotiris Apostolakis const TargetTransformInfo *TTI = nullptr; 7597c3ef5cSSotiris Apostolakis const LoopInfo *LI; 76*8b42bc56SSotiris Apostolakis DominatorTree *DT; 7797c3ef5cSSotiris Apostolakis std::unique_ptr<BlockFrequencyInfo> BFI; 7897c3ef5cSSotiris Apostolakis std::unique_ptr<BranchProbabilityInfo> BPI; 79*8b42bc56SSotiris Apostolakis ProfileSummaryInfo *PSI; 80*8b42bc56SSotiris Apostolakis OptimizationRemarkEmitter *ORE; 8197c3ef5cSSotiris Apostolakis 82ca7c307dSSotiris Apostolakis public: 83ca7c307dSSotiris Apostolakis static char ID; 84*8b42bc56SSotiris Apostolakis 85ca7c307dSSotiris Apostolakis SelectOptimize() : FunctionPass(ID) { 86ca7c307dSSotiris Apostolakis initializeSelectOptimizePass(*PassRegistry::getPassRegistry()); 87ca7c307dSSotiris Apostolakis } 88ca7c307dSSotiris Apostolakis 89ca7c307dSSotiris Apostolakis bool runOnFunction(Function &F) override; 90ca7c307dSSotiris Apostolakis 9197c3ef5cSSotiris Apostolakis void getAnalysisUsage(AnalysisUsage &AU) const override { 92*8b42bc56SSotiris Apostolakis AU.addRequired<ProfileSummaryInfoWrapperPass>(); 9397c3ef5cSSotiris Apostolakis AU.addRequired<TargetPassConfig>(); 94*8b42bc56SSotiris Apostolakis AU.addRequired<TargetTransformInfoWrapperPass>(); 95*8b42bc56SSotiris Apostolakis AU.addRequired<DominatorTreeWrapperPass>(); 9697c3ef5cSSotiris Apostolakis AU.addRequired<LoopInfoWrapperPass>(); 97*8b42bc56SSotiris Apostolakis AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 9897c3ef5cSSotiris Apostolakis } 9997c3ef5cSSotiris Apostolakis 10097c3ef5cSSotiris Apostolakis private: 10197c3ef5cSSotiris Apostolakis // Select groups consist of consecutive select instructions with the same 10297c3ef5cSSotiris Apostolakis // condition. 10397c3ef5cSSotiris Apostolakis using SelectGroup = SmallVector<SelectInst *, 2>; 10497c3ef5cSSotiris Apostolakis using SelectGroups = SmallVector<SelectGroup, 2>; 10597c3ef5cSSotiris Apostolakis 106*8b42bc56SSotiris Apostolakis // Converts select instructions of a function to conditional jumps when deemed 107*8b42bc56SSotiris Apostolakis // profitable. Returns true if at least one select was converted. 10897c3ef5cSSotiris Apostolakis bool optimizeSelects(Function &F); 109*8b42bc56SSotiris Apostolakis 110*8b42bc56SSotiris Apostolakis // Heuristics for determining which select instructions can be profitably 111*8b42bc56SSotiris Apostolakis // conveted to branches. Separate heuristics for selects in inner-most loops 112*8b42bc56SSotiris Apostolakis // and the rest of code regions (base heuristics for non-inner-most loop 113*8b42bc56SSotiris Apostolakis // regions). 114*8b42bc56SSotiris Apostolakis void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups); 115*8b42bc56SSotiris Apostolakis void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups); 116*8b42bc56SSotiris Apostolakis 117*8b42bc56SSotiris Apostolakis // Converts to branches the select groups that were deemed 118*8b42bc56SSotiris Apostolakis // profitable-to-convert. 11997c3ef5cSSotiris Apostolakis void convertProfitableSIGroups(SelectGroups &ProfSIGroups); 120*8b42bc56SSotiris Apostolakis 121*8b42bc56SSotiris Apostolakis // Splits selects of a given basic block into select groups. 12297c3ef5cSSotiris Apostolakis void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups); 123*8b42bc56SSotiris Apostolakis 124*8b42bc56SSotiris Apostolakis // Determines for which select groups it is profitable converting to branches 125*8b42bc56SSotiris Apostolakis // (base heuristics). 126*8b42bc56SSotiris Apostolakis void findProfitableSIGroupsBase(SelectGroups &SIGroups, 127*8b42bc56SSotiris Apostolakis SelectGroups &ProfSIGroups); 128*8b42bc56SSotiris Apostolakis // Determines if a select group should be converted to a branch (base 129*8b42bc56SSotiris Apostolakis // heuristics). 130*8b42bc56SSotiris Apostolakis bool isConvertToBranchProfitableBase(const SmallVector<SelectInst *, 2> &ASI); 131*8b42bc56SSotiris Apostolakis 132*8b42bc56SSotiris Apostolakis // Returns true if there are expensive instructions in the cold value 133*8b42bc56SSotiris Apostolakis // operand's (if any) dependence slice of any of the selects of the given 134*8b42bc56SSotiris Apostolakis // group. 135*8b42bc56SSotiris Apostolakis bool hasExpensiveColdOperand(const SmallVector<SelectInst *, 2> &ASI); 136*8b42bc56SSotiris Apostolakis 137*8b42bc56SSotiris Apostolakis // For a given source instruction, collect its backwards dependence slice 138*8b42bc56SSotiris Apostolakis // consisting of instructions exclusively computed for producing the operands 139*8b42bc56SSotiris Apostolakis // of the source instruction. 140*8b42bc56SSotiris Apostolakis void getExclBackwardsSlice(Instruction *I, 141*8b42bc56SSotiris Apostolakis SmallVector<Instruction *, 2> &Slice); 142*8b42bc56SSotiris Apostolakis 143*8b42bc56SSotiris Apostolakis // Returns true if the condition of the select is highly predictable. 144*8b42bc56SSotiris Apostolakis bool isSelectHighlyPredictable(const SelectInst *SI); 145*8b42bc56SSotiris Apostolakis 146*8b42bc56SSotiris Apostolakis // Returns true if the target architecture supports lowering a given select. 14797c3ef5cSSotiris Apostolakis bool isSelectKindSupported(SelectInst *SI); 148ca7c307dSSotiris Apostolakis }; 149ca7c307dSSotiris Apostolakis } // namespace 150ca7c307dSSotiris Apostolakis 151ca7c307dSSotiris Apostolakis char SelectOptimize::ID = 0; 15297c3ef5cSSotiris Apostolakis 15397c3ef5cSSotiris Apostolakis INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, 15497c3ef5cSSotiris Apostolakis false) 15597c3ef5cSSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 156*8b42bc56SSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 157*8b42bc56SSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 15897c3ef5cSSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 159*8b42bc56SSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 160*8b42bc56SSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) 16197c3ef5cSSotiris Apostolakis INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, 162ca7c307dSSotiris Apostolakis false) 163ca7c307dSSotiris Apostolakis 164ca7c307dSSotiris Apostolakis FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); } 165ca7c307dSSotiris Apostolakis 166ca7c307dSSotiris Apostolakis bool SelectOptimize::runOnFunction(Function &F) { 16797c3ef5cSSotiris Apostolakis TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); 16897c3ef5cSSotiris Apostolakis TSI = TM->getSubtargetImpl(F); 16997c3ef5cSSotiris Apostolakis TLI = TSI->getTargetLowering(); 170*8b42bc56SSotiris Apostolakis 171*8b42bc56SSotiris Apostolakis // If none of the select types is supported then skip this pass. 172*8b42bc56SSotiris Apostolakis // This is an optimization pass. Legality issues will be handled by 173*8b42bc56SSotiris Apostolakis // instruction selection. 174*8b42bc56SSotiris Apostolakis if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) && 175*8b42bc56SSotiris Apostolakis !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) && 176*8b42bc56SSotiris Apostolakis !TLI->isSelectSupported(TargetLowering::VectorMaskSelect)) 177*8b42bc56SSotiris Apostolakis return false; 178*8b42bc56SSotiris Apostolakis 179*8b42bc56SSotiris Apostolakis TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 180*8b42bc56SSotiris Apostolakis DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 18197c3ef5cSSotiris Apostolakis LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 18297c3ef5cSSotiris Apostolakis BPI.reset(new BranchProbabilityInfo(F, *LI)); 18397c3ef5cSSotiris Apostolakis BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); 184*8b42bc56SSotiris Apostolakis PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 185*8b42bc56SSotiris Apostolakis ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 186*8b42bc56SSotiris Apostolakis 187*8b42bc56SSotiris Apostolakis // When optimizing for size, selects are preferable over branches. 188*8b42bc56SSotiris Apostolakis if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get())) 189*8b42bc56SSotiris Apostolakis return false; 19097c3ef5cSSotiris Apostolakis 19197c3ef5cSSotiris Apostolakis return optimizeSelects(F); 19297c3ef5cSSotiris Apostolakis } 19397c3ef5cSSotiris Apostolakis 19497c3ef5cSSotiris Apostolakis bool SelectOptimize::optimizeSelects(Function &F) { 19597c3ef5cSSotiris Apostolakis // Determine for which select groups it is profitable converting to branches. 19697c3ef5cSSotiris Apostolakis SelectGroups ProfSIGroups; 197*8b42bc56SSotiris Apostolakis // Base heuristics apply only to non-loops and outer loops. 198*8b42bc56SSotiris Apostolakis optimizeSelectsBase(F, ProfSIGroups); 199*8b42bc56SSotiris Apostolakis // Separate heuristics for inner-most loops. 200*8b42bc56SSotiris Apostolakis optimizeSelectsInnerLoops(F, ProfSIGroups); 20197c3ef5cSSotiris Apostolakis 20297c3ef5cSSotiris Apostolakis // Convert to branches the select groups that were deemed 20397c3ef5cSSotiris Apostolakis // profitable-to-convert. 20497c3ef5cSSotiris Apostolakis convertProfitableSIGroups(ProfSIGroups); 20597c3ef5cSSotiris Apostolakis 20697c3ef5cSSotiris Apostolakis // Code modified if at least one select group was converted. 20797c3ef5cSSotiris Apostolakis return !ProfSIGroups.empty(); 20897c3ef5cSSotiris Apostolakis } 20997c3ef5cSSotiris Apostolakis 210*8b42bc56SSotiris Apostolakis void SelectOptimize::optimizeSelectsBase(Function &F, 211*8b42bc56SSotiris Apostolakis SelectGroups &ProfSIGroups) { 212*8b42bc56SSotiris Apostolakis // Collect all the select groups. 213*8b42bc56SSotiris Apostolakis SelectGroups SIGroups; 214*8b42bc56SSotiris Apostolakis for (BasicBlock &BB : F) { 215*8b42bc56SSotiris Apostolakis // Base heuristics apply only to non-loops and outer loops. 216*8b42bc56SSotiris Apostolakis Loop *L = LI->getLoopFor(&BB); 217*8b42bc56SSotiris Apostolakis if (L && L->isInnermost()) 218*8b42bc56SSotiris Apostolakis continue; 219*8b42bc56SSotiris Apostolakis collectSelectGroups(BB, SIGroups); 220*8b42bc56SSotiris Apostolakis } 221*8b42bc56SSotiris Apostolakis 222*8b42bc56SSotiris Apostolakis // Determine for which select groups it is profitable converting to branches. 223*8b42bc56SSotiris Apostolakis findProfitableSIGroupsBase(SIGroups, ProfSIGroups); 224*8b42bc56SSotiris Apostolakis } 225*8b42bc56SSotiris Apostolakis 226*8b42bc56SSotiris Apostolakis void SelectOptimize::optimizeSelectsInnerLoops(Function &F, 227*8b42bc56SSotiris Apostolakis SelectGroups &ProfSIGroups) {} 228*8b42bc56SSotiris Apostolakis 22997c3ef5cSSotiris Apostolakis /// If \p isTrue is true, return the true value of \p SI, otherwise return 23097c3ef5cSSotiris Apostolakis /// false value of \p SI. If the true/false value of \p SI is defined by any 23197c3ef5cSSotiris Apostolakis /// select instructions in \p Selects, look through the defining select 23297c3ef5cSSotiris Apostolakis /// instruction until the true/false value is not defined in \p Selects. 23397c3ef5cSSotiris Apostolakis static Value * 23497c3ef5cSSotiris Apostolakis getTrueOrFalseValue(SelectInst *SI, bool isTrue, 23597c3ef5cSSotiris Apostolakis const SmallPtrSet<const Instruction *, 2> &Selects) { 23697c3ef5cSSotiris Apostolakis Value *V = nullptr; 23797c3ef5cSSotiris Apostolakis for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI); 23897c3ef5cSSotiris Apostolakis DefSI = dyn_cast<SelectInst>(V)) { 23997c3ef5cSSotiris Apostolakis assert(DefSI->getCondition() == SI->getCondition() && 24097c3ef5cSSotiris Apostolakis "The condition of DefSI does not match with SI"); 24197c3ef5cSSotiris Apostolakis V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue()); 24297c3ef5cSSotiris Apostolakis } 24397c3ef5cSSotiris Apostolakis assert(V && "Failed to get select true/false value"); 24497c3ef5cSSotiris Apostolakis return V; 24597c3ef5cSSotiris Apostolakis } 24697c3ef5cSSotiris Apostolakis 24797c3ef5cSSotiris Apostolakis void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { 24897c3ef5cSSotiris Apostolakis for (SelectGroup &ASI : ProfSIGroups) { 24997c3ef5cSSotiris Apostolakis // TODO: eliminate the redundancy of logic transforming selects to branches 25097c3ef5cSSotiris Apostolakis // by removing CodeGenPrepare::optimizeSelectInst and optimizing here 25197c3ef5cSSotiris Apostolakis // selects for all cases (with and without profile information). 25297c3ef5cSSotiris Apostolakis 25397c3ef5cSSotiris Apostolakis // Transform a sequence like this: 25497c3ef5cSSotiris Apostolakis // start: 25597c3ef5cSSotiris Apostolakis // %cmp = cmp uge i32 %a, %b 25697c3ef5cSSotiris Apostolakis // %sel = select i1 %cmp, i32 %c, i32 %d 25797c3ef5cSSotiris Apostolakis // 25897c3ef5cSSotiris Apostolakis // Into: 25997c3ef5cSSotiris Apostolakis // start: 26097c3ef5cSSotiris Apostolakis // %cmp = cmp uge i32 %a, %b 26197c3ef5cSSotiris Apostolakis // %cmp.frozen = freeze %cmp 26297c3ef5cSSotiris Apostolakis // br i1 %cmp.frozen, label %select.end, label %select.false 26397c3ef5cSSotiris Apostolakis // select.false: 26497c3ef5cSSotiris Apostolakis // br label %select.end 26597c3ef5cSSotiris Apostolakis // select.end: 26697c3ef5cSSotiris Apostolakis // %sel = phi i32 [ %c, %start ], [ %d, %select.false ] 26797c3ef5cSSotiris Apostolakis // 26897c3ef5cSSotiris Apostolakis // %cmp should be frozen, otherwise it may introduce undefined behavior. 26997c3ef5cSSotiris Apostolakis 27097c3ef5cSSotiris Apostolakis // We split the block containing the select(s) into two blocks. 27197c3ef5cSSotiris Apostolakis SelectInst *SI = ASI.front(); 27297c3ef5cSSotiris Apostolakis SelectInst *LastSI = ASI.back(); 27397c3ef5cSSotiris Apostolakis BasicBlock *StartBlock = SI->getParent(); 27497c3ef5cSSotiris Apostolakis BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI)); 27597c3ef5cSSotiris Apostolakis BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); 27697c3ef5cSSotiris Apostolakis BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency()); 27797c3ef5cSSotiris Apostolakis // Delete the unconditional branch that was just created by the split. 27897c3ef5cSSotiris Apostolakis StartBlock->getTerminator()->eraseFromParent(); 27997c3ef5cSSotiris Apostolakis 28097c3ef5cSSotiris Apostolakis // Move any debug/pseudo instructions that were in-between the select 28197c3ef5cSSotiris Apostolakis // group to the newly-created end block. 28297c3ef5cSSotiris Apostolakis SmallVector<Instruction *, 2> DebugPseudoINS; 28397c3ef5cSSotiris Apostolakis auto DIt = SI->getIterator(); 28497c3ef5cSSotiris Apostolakis while (&*DIt != LastSI) { 28597c3ef5cSSotiris Apostolakis if (DIt->isDebugOrPseudoInst()) 28697c3ef5cSSotiris Apostolakis DebugPseudoINS.push_back(&*DIt); 28797c3ef5cSSotiris Apostolakis DIt++; 28897c3ef5cSSotiris Apostolakis } 28997c3ef5cSSotiris Apostolakis for (auto DI : DebugPseudoINS) { 29097c3ef5cSSotiris Apostolakis DI->moveBefore(&*EndBlock->getFirstInsertionPt()); 29197c3ef5cSSotiris Apostolakis } 29297c3ef5cSSotiris Apostolakis 29397c3ef5cSSotiris Apostolakis // These are the new basic blocks for the conditional branch. 29497c3ef5cSSotiris Apostolakis // For now, no instruction sinking to the true/false blocks. 29597c3ef5cSSotiris Apostolakis // Thus both True and False blocks will be empty. 29697c3ef5cSSotiris Apostolakis BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr; 29797c3ef5cSSotiris Apostolakis 29897c3ef5cSSotiris Apostolakis // Use the 'false' side for a new input value to the PHI. 29997c3ef5cSSotiris Apostolakis FalseBlock = BasicBlock::Create(SI->getContext(), "select.false", 30097c3ef5cSSotiris Apostolakis EndBlock->getParent(), EndBlock); 30197c3ef5cSSotiris Apostolakis auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); 30297c3ef5cSSotiris Apostolakis FalseBranch->setDebugLoc(SI->getDebugLoc()); 30397c3ef5cSSotiris Apostolakis 30497c3ef5cSSotiris Apostolakis // For the 'true' side the path originates from the start block from the 30597c3ef5cSSotiris Apostolakis // point view of the new PHI. 30697c3ef5cSSotiris Apostolakis TrueBlock = StartBlock; 30797c3ef5cSSotiris Apostolakis 30897c3ef5cSSotiris Apostolakis // Insert the real conditional branch based on the original condition. 30997c3ef5cSSotiris Apostolakis BasicBlock *TT, *FT; 31097c3ef5cSSotiris Apostolakis TT = EndBlock; 31197c3ef5cSSotiris Apostolakis FT = FalseBlock; 31297c3ef5cSSotiris Apostolakis IRBuilder<> IB(SI); 31397c3ef5cSSotiris Apostolakis auto *CondFr = 31497c3ef5cSSotiris Apostolakis IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen"); 31597c3ef5cSSotiris Apostolakis IB.CreateCondBr(CondFr, TT, FT, SI); 31697c3ef5cSSotiris Apostolakis 31797c3ef5cSSotiris Apostolakis SmallPtrSet<const Instruction *, 2> INS; 31897c3ef5cSSotiris Apostolakis INS.insert(ASI.begin(), ASI.end()); 31997c3ef5cSSotiris Apostolakis // Use reverse iterator because later select may use the value of the 32097c3ef5cSSotiris Apostolakis // earlier select, and we need to propagate value through earlier select 32197c3ef5cSSotiris Apostolakis // to get the PHI operand. 32297c3ef5cSSotiris Apostolakis for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) { 32397c3ef5cSSotiris Apostolakis SelectInst *SI = *It; 32497c3ef5cSSotiris Apostolakis // The select itself is replaced with a PHI Node. 32597c3ef5cSSotiris Apostolakis PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front()); 32697c3ef5cSSotiris Apostolakis PN->takeName(SI); 32797c3ef5cSSotiris Apostolakis PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock); 32897c3ef5cSSotiris Apostolakis PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock); 32997c3ef5cSSotiris Apostolakis PN->setDebugLoc(SI->getDebugLoc()); 33097c3ef5cSSotiris Apostolakis 33197c3ef5cSSotiris Apostolakis SI->replaceAllUsesWith(PN); 33297c3ef5cSSotiris Apostolakis SI->eraseFromParent(); 33397c3ef5cSSotiris Apostolakis INS.erase(SI); 33497c3ef5cSSotiris Apostolakis ++NumSelectsConverted; 33597c3ef5cSSotiris Apostolakis } 33697c3ef5cSSotiris Apostolakis } 33797c3ef5cSSotiris Apostolakis } 33897c3ef5cSSotiris Apostolakis 33997c3ef5cSSotiris Apostolakis void SelectOptimize::collectSelectGroups(BasicBlock &BB, 34097c3ef5cSSotiris Apostolakis SelectGroups &SIGroups) { 34197c3ef5cSSotiris Apostolakis BasicBlock::iterator BBIt = BB.begin(); 34297c3ef5cSSotiris Apostolakis while (BBIt != BB.end()) { 34397c3ef5cSSotiris Apostolakis Instruction *I = &*BBIt++; 34497c3ef5cSSotiris Apostolakis if (SelectInst *SI = dyn_cast<SelectInst>(I)) { 34597c3ef5cSSotiris Apostolakis SelectGroup SIGroup; 34697c3ef5cSSotiris Apostolakis SIGroup.push_back(SI); 34797c3ef5cSSotiris Apostolakis while (BBIt != BB.end()) { 34897c3ef5cSSotiris Apostolakis Instruction *NI = &*BBIt; 34997c3ef5cSSotiris Apostolakis SelectInst *NSI = dyn_cast<SelectInst>(NI); 35097c3ef5cSSotiris Apostolakis if (NSI && SI->getCondition() == NSI->getCondition()) { 35197c3ef5cSSotiris Apostolakis SIGroup.push_back(NSI); 35297c3ef5cSSotiris Apostolakis } else if (!NI->isDebugOrPseudoInst()) { 35397c3ef5cSSotiris Apostolakis // Debug/pseudo instructions should be skipped and not prevent the 35497c3ef5cSSotiris Apostolakis // formation of a select group. 35597c3ef5cSSotiris Apostolakis break; 35697c3ef5cSSotiris Apostolakis } 35797c3ef5cSSotiris Apostolakis ++BBIt; 35897c3ef5cSSotiris Apostolakis } 35997c3ef5cSSotiris Apostolakis 36097c3ef5cSSotiris Apostolakis // If the select type is not supported, no point optimizing it. 36197c3ef5cSSotiris Apostolakis // Instruction selection will take care of it. 36297c3ef5cSSotiris Apostolakis if (!isSelectKindSupported(SI)) 36397c3ef5cSSotiris Apostolakis continue; 36497c3ef5cSSotiris Apostolakis 36597c3ef5cSSotiris Apostolakis SIGroups.push_back(SIGroup); 36697c3ef5cSSotiris Apostolakis } 36797c3ef5cSSotiris Apostolakis } 36897c3ef5cSSotiris Apostolakis } 36997c3ef5cSSotiris Apostolakis 370*8b42bc56SSotiris Apostolakis void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups, 371*8b42bc56SSotiris Apostolakis SelectGroups &ProfSIGroups) { 372*8b42bc56SSotiris Apostolakis for (SelectGroup &ASI : SIGroups) { 373*8b42bc56SSotiris Apostolakis ++NumSelectOptAnalyzed; 374*8b42bc56SSotiris Apostolakis if (isConvertToBranchProfitableBase(ASI)) 375*8b42bc56SSotiris Apostolakis ProfSIGroups.push_back(ASI); 376*8b42bc56SSotiris Apostolakis } 377*8b42bc56SSotiris Apostolakis } 378*8b42bc56SSotiris Apostolakis 379*8b42bc56SSotiris Apostolakis bool SelectOptimize::isConvertToBranchProfitableBase( 380*8b42bc56SSotiris Apostolakis const SmallVector<SelectInst *, 2> &ASI) { 381*8b42bc56SSotiris Apostolakis SelectInst *SI = ASI.front(); 382*8b42bc56SSotiris Apostolakis OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI); 383*8b42bc56SSotiris Apostolakis OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI); 384*8b42bc56SSotiris Apostolakis 385*8b42bc56SSotiris Apostolakis // Skip cold basic blocks. Better to optimize for size for cold blocks. 386*8b42bc56SSotiris Apostolakis if (PSI->isColdBlock(SI->getParent(), BFI.get())) { 387*8b42bc56SSotiris Apostolakis ++NumSelectColdBB; 388*8b42bc56SSotiris Apostolakis ORmiss << "Not converted to branch because of cold basic block. "; 389*8b42bc56SSotiris Apostolakis ORE->emit(ORmiss); 390*8b42bc56SSotiris Apostolakis return false; 391*8b42bc56SSotiris Apostolakis } 392*8b42bc56SSotiris Apostolakis 393*8b42bc56SSotiris Apostolakis // If unpredictable, branch form is less profitable. 394*8b42bc56SSotiris Apostolakis if (SI->getMetadata(LLVMContext::MD_unpredictable)) { 395*8b42bc56SSotiris Apostolakis ++NumSelectUnPred; 396*8b42bc56SSotiris Apostolakis ORmiss << "Not converted to branch because of unpredictable branch. "; 397*8b42bc56SSotiris Apostolakis ORE->emit(ORmiss); 398*8b42bc56SSotiris Apostolakis return false; 399*8b42bc56SSotiris Apostolakis } 400*8b42bc56SSotiris Apostolakis 401*8b42bc56SSotiris Apostolakis // If highly predictable, branch form is more profitable, unless a 402*8b42bc56SSotiris Apostolakis // predictable select is inexpensive in the target architecture. 403*8b42bc56SSotiris Apostolakis if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) { 404*8b42bc56SSotiris Apostolakis ++NumSelectConvertedHighPred; 405*8b42bc56SSotiris Apostolakis OR << "Converted to branch because of highly predictable branch. "; 406*8b42bc56SSotiris Apostolakis ORE->emit(OR); 407*8b42bc56SSotiris Apostolakis return true; 408*8b42bc56SSotiris Apostolakis } 409*8b42bc56SSotiris Apostolakis 410*8b42bc56SSotiris Apostolakis // Look for expensive instructions in the cold operand's (if any) dependence 411*8b42bc56SSotiris Apostolakis // slice of any of the selects in the group. 412*8b42bc56SSotiris Apostolakis if (hasExpensiveColdOperand(ASI)) { 413*8b42bc56SSotiris Apostolakis ++NumSelectConvertedExpColdOperand; 414*8b42bc56SSotiris Apostolakis OR << "Converted to branch because of expensive cold operand."; 415*8b42bc56SSotiris Apostolakis ORE->emit(OR); 416*8b42bc56SSotiris Apostolakis return true; 417*8b42bc56SSotiris Apostolakis } 418*8b42bc56SSotiris Apostolakis 419*8b42bc56SSotiris Apostolakis ORmiss << "Not profitable to convert to branch (base heuristic)."; 420*8b42bc56SSotiris Apostolakis ORE->emit(ORmiss); 421*8b42bc56SSotiris Apostolakis return false; 422*8b42bc56SSotiris Apostolakis } 423*8b42bc56SSotiris Apostolakis 424*8b42bc56SSotiris Apostolakis static InstructionCost divideNearest(InstructionCost Numerator, 425*8b42bc56SSotiris Apostolakis uint64_t Denominator) { 426*8b42bc56SSotiris Apostolakis return (Numerator + (Denominator / 2)) / Denominator; 427*8b42bc56SSotiris Apostolakis } 428*8b42bc56SSotiris Apostolakis 429*8b42bc56SSotiris Apostolakis bool SelectOptimize::hasExpensiveColdOperand( 430*8b42bc56SSotiris Apostolakis const SmallVector<SelectInst *, 2> &ASI) { 431*8b42bc56SSotiris Apostolakis bool ColdOperand = false; 432*8b42bc56SSotiris Apostolakis uint64_t TrueWeight, FalseWeight, TotalWeight; 433*8b42bc56SSotiris Apostolakis if (ASI.front()->extractProfMetadata(TrueWeight, FalseWeight)) { 434*8b42bc56SSotiris Apostolakis uint64_t MinWeight = std::min(TrueWeight, FalseWeight); 435*8b42bc56SSotiris Apostolakis TotalWeight = TrueWeight + FalseWeight; 436*8b42bc56SSotiris Apostolakis // Is there a path with frequency <ColdOperandThreshold% (default:20%) ? 437*8b42bc56SSotiris Apostolakis ColdOperand = TotalWeight * ColdOperandThreshold > 100 * MinWeight; 438*8b42bc56SSotiris Apostolakis } else if (PSI->hasProfileSummary()) { 439*8b42bc56SSotiris Apostolakis OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front()); 440*8b42bc56SSotiris Apostolakis ORmiss << "Profile data available but missing branch-weights metadata for " 441*8b42bc56SSotiris Apostolakis "select instruction. "; 442*8b42bc56SSotiris Apostolakis ORE->emit(ORmiss); 443*8b42bc56SSotiris Apostolakis } 444*8b42bc56SSotiris Apostolakis if (!ColdOperand) 445*8b42bc56SSotiris Apostolakis return false; 446*8b42bc56SSotiris Apostolakis // Check if the cold path's dependence slice is expensive for any of the 447*8b42bc56SSotiris Apostolakis // selects of the group. 448*8b42bc56SSotiris Apostolakis for (SelectInst *SI : ASI) { 449*8b42bc56SSotiris Apostolakis Instruction *ColdI = nullptr; 450*8b42bc56SSotiris Apostolakis uint64_t HotWeight; 451*8b42bc56SSotiris Apostolakis if (TrueWeight < FalseWeight) { 452*8b42bc56SSotiris Apostolakis ColdI = dyn_cast<Instruction>(SI->getTrueValue()); 453*8b42bc56SSotiris Apostolakis HotWeight = FalseWeight; 454*8b42bc56SSotiris Apostolakis } else { 455*8b42bc56SSotiris Apostolakis ColdI = dyn_cast<Instruction>(SI->getFalseValue()); 456*8b42bc56SSotiris Apostolakis HotWeight = TrueWeight; 457*8b42bc56SSotiris Apostolakis } 458*8b42bc56SSotiris Apostolakis if (ColdI) { 459*8b42bc56SSotiris Apostolakis SmallVector<Instruction *, 2> ColdSlice; 460*8b42bc56SSotiris Apostolakis getExclBackwardsSlice(ColdI, ColdSlice); 461*8b42bc56SSotiris Apostolakis InstructionCost SliceCost = 0; 462*8b42bc56SSotiris Apostolakis for (auto *ColdII : ColdSlice) { 463*8b42bc56SSotiris Apostolakis SliceCost += 464*8b42bc56SSotiris Apostolakis TTI->getInstructionCost(ColdII, TargetTransformInfo::TCK_Latency); 465*8b42bc56SSotiris Apostolakis } 466*8b42bc56SSotiris Apostolakis // The colder the cold value operand of the select is the more expensive 467*8b42bc56SSotiris Apostolakis // the cmov becomes for computing the cold value operand every time. Thus, 468*8b42bc56SSotiris Apostolakis // the colder the cold operand is the more its cost counts. 469*8b42bc56SSotiris Apostolakis // Get nearest integer cost adjusted for coldness. 470*8b42bc56SSotiris Apostolakis InstructionCost AdjSliceCost = 471*8b42bc56SSotiris Apostolakis divideNearest(SliceCost * HotWeight, TotalWeight); 472*8b42bc56SSotiris Apostolakis if (AdjSliceCost >= 473*8b42bc56SSotiris Apostolakis ColdOperandMaxCostMultiplier * TargetTransformInfo::TCC_Expensive) 474*8b42bc56SSotiris Apostolakis return true; 475*8b42bc56SSotiris Apostolakis } 476*8b42bc56SSotiris Apostolakis } 477*8b42bc56SSotiris Apostolakis return false; 478*8b42bc56SSotiris Apostolakis } 479*8b42bc56SSotiris Apostolakis 480*8b42bc56SSotiris Apostolakis // For a given source instruction, collect its backwards dependence slice 481*8b42bc56SSotiris Apostolakis // consisting of instructions exclusively computed for the purpose of producing 482*8b42bc56SSotiris Apostolakis // the operands of the source instruction. As an approximation 483*8b42bc56SSotiris Apostolakis // (sufficiently-accurate in practice), we populate this set with the 484*8b42bc56SSotiris Apostolakis // instructions of the backwards dependence slice that only have one-use and 485*8b42bc56SSotiris Apostolakis // form an one-use chain that leads to the source instruction. 486*8b42bc56SSotiris Apostolakis void SelectOptimize::getExclBackwardsSlice( 487*8b42bc56SSotiris Apostolakis Instruction *I, SmallVector<Instruction *, 2> &Slice) { 488*8b42bc56SSotiris Apostolakis SmallPtrSet<Instruction *, 2> Visited; 489*8b42bc56SSotiris Apostolakis std::queue<Instruction *> Worklist; 490*8b42bc56SSotiris Apostolakis Worklist.push(I); 491*8b42bc56SSotiris Apostolakis while (!Worklist.empty()) { 492*8b42bc56SSotiris Apostolakis Instruction *II = Worklist.front(); 493*8b42bc56SSotiris Apostolakis Worklist.pop(); 494*8b42bc56SSotiris Apostolakis 495*8b42bc56SSotiris Apostolakis // Avoid cycles. 496*8b42bc56SSotiris Apostolakis if (Visited.count(II)) 497*8b42bc56SSotiris Apostolakis continue; 498*8b42bc56SSotiris Apostolakis Visited.insert(II); 499*8b42bc56SSotiris Apostolakis 500*8b42bc56SSotiris Apostolakis if (!II->hasOneUse()) 501*8b42bc56SSotiris Apostolakis continue; 502*8b42bc56SSotiris Apostolakis 503*8b42bc56SSotiris Apostolakis // Avoid considering instructions with less frequency than the source 504*8b42bc56SSotiris Apostolakis // instruction (i.e., avoid colder code regions of the dependence slice). 505*8b42bc56SSotiris Apostolakis if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent())) 506*8b42bc56SSotiris Apostolakis continue; 507*8b42bc56SSotiris Apostolakis 508*8b42bc56SSotiris Apostolakis // Eligible one-use instruction added to the dependence slice. 509*8b42bc56SSotiris Apostolakis Slice.push_back(II); 510*8b42bc56SSotiris Apostolakis 511*8b42bc56SSotiris Apostolakis // Explore all the operands of the current instruction to expand the slice. 512*8b42bc56SSotiris Apostolakis for (unsigned k = 0; k < II->getNumOperands(); ++k) 513*8b42bc56SSotiris Apostolakis if (auto *OpI = dyn_cast<Instruction>(II->getOperand(k))) 514*8b42bc56SSotiris Apostolakis Worklist.push(OpI); 515*8b42bc56SSotiris Apostolakis } 516*8b42bc56SSotiris Apostolakis } 517*8b42bc56SSotiris Apostolakis 518*8b42bc56SSotiris Apostolakis bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) { 519*8b42bc56SSotiris Apostolakis uint64_t TrueWeight, FalseWeight; 520*8b42bc56SSotiris Apostolakis if (SI->extractProfMetadata(TrueWeight, FalseWeight)) { 521*8b42bc56SSotiris Apostolakis uint64_t Max = std::max(TrueWeight, FalseWeight); 522*8b42bc56SSotiris Apostolakis uint64_t Sum = TrueWeight + FalseWeight; 523*8b42bc56SSotiris Apostolakis if (Sum != 0) { 524*8b42bc56SSotiris Apostolakis auto Probability = BranchProbability::getBranchProbability(Max, Sum); 525*8b42bc56SSotiris Apostolakis if (Probability > TTI->getPredictableBranchThreshold()) 526*8b42bc56SSotiris Apostolakis return true; 527*8b42bc56SSotiris Apostolakis } 528*8b42bc56SSotiris Apostolakis } 529*8b42bc56SSotiris Apostolakis return false; 530*8b42bc56SSotiris Apostolakis } 531*8b42bc56SSotiris Apostolakis 53297c3ef5cSSotiris Apostolakis bool SelectOptimize::isSelectKindSupported(SelectInst *SI) { 53397c3ef5cSSotiris Apostolakis bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); 53497c3ef5cSSotiris Apostolakis if (VectorCond) 53597c3ef5cSSotiris Apostolakis return false; 53697c3ef5cSSotiris Apostolakis TargetLowering::SelectSupportKind SelectKind; 53797c3ef5cSSotiris Apostolakis if (SI->getType()->isVectorTy()) 53897c3ef5cSSotiris Apostolakis SelectKind = TargetLowering::ScalarCondVectorVal; 53997c3ef5cSSotiris Apostolakis else 54097c3ef5cSSotiris Apostolakis SelectKind = TargetLowering::ScalarValSelect; 54197c3ef5cSSotiris Apostolakis return TLI->isSelectSupported(SelectKind); 542ca7c307dSSotiris Apostolakis } 543