1ca7c307dSSotiris Apostolakis //===--- SelectOptimize.cpp - Convert select to branches if profitable ---===//
2ca7c307dSSotiris Apostolakis //
3ca7c307dSSotiris Apostolakis // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4ca7c307dSSotiris Apostolakis // See https://llvm.org/LICENSE.txt for license information.
5ca7c307dSSotiris Apostolakis // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6ca7c307dSSotiris Apostolakis //
7ca7c307dSSotiris Apostolakis //===----------------------------------------------------------------------===//
8ca7c307dSSotiris Apostolakis //
9ca7c307dSSotiris Apostolakis // This pass converts selects to conditional jumps when profitable.
10ca7c307dSSotiris Apostolakis //
11ca7c307dSSotiris Apostolakis //===----------------------------------------------------------------------===//
12ca7c307dSSotiris Apostolakis 
1397c3ef5cSSotiris Apostolakis #include "llvm/ADT/SmallVector.h"
1497c3ef5cSSotiris Apostolakis #include "llvm/ADT/Statistic.h"
1597c3ef5cSSotiris Apostolakis #include "llvm/Analysis/BlockFrequencyInfo.h"
1697c3ef5cSSotiris Apostolakis #include "llvm/Analysis/BranchProbabilityInfo.h"
1797c3ef5cSSotiris Apostolakis #include "llvm/Analysis/LoopInfo.h"
18*8b42bc56SSotiris Apostolakis #include "llvm/Analysis/OptimizationRemarkEmitter.h"
19*8b42bc56SSotiris Apostolakis #include "llvm/Analysis/ProfileSummaryInfo.h"
20*8b42bc56SSotiris Apostolakis #include "llvm/Analysis/TargetTransformInfo.h"
21ca7c307dSSotiris Apostolakis #include "llvm/CodeGen/Passes.h"
2297c3ef5cSSotiris Apostolakis #include "llvm/CodeGen/TargetLowering.h"
2397c3ef5cSSotiris Apostolakis #include "llvm/CodeGen/TargetPassConfig.h"
2497c3ef5cSSotiris Apostolakis #include "llvm/CodeGen/TargetSchedule.h"
2597c3ef5cSSotiris Apostolakis #include "llvm/CodeGen/TargetSubtargetInfo.h"
2697c3ef5cSSotiris Apostolakis #include "llvm/IR/BasicBlock.h"
27*8b42bc56SSotiris Apostolakis #include "llvm/IR/Dominators.h"
28ca7c307dSSotiris Apostolakis #include "llvm/IR/Function.h"
2997c3ef5cSSotiris Apostolakis #include "llvm/IR/IRBuilder.h"
3097c3ef5cSSotiris Apostolakis #include "llvm/IR/Instruction.h"
31ca7c307dSSotiris Apostolakis #include "llvm/InitializePasses.h"
32ca7c307dSSotiris Apostolakis #include "llvm/Pass.h"
3397c3ef5cSSotiris Apostolakis #include "llvm/Target/TargetMachine.h"
34*8b42bc56SSotiris Apostolakis #include "llvm/Transforms/Utils/SizeOpts.h"
35*8b42bc56SSotiris Apostolakis #include <algorithm>
36*8b42bc56SSotiris Apostolakis #include <memory>
37*8b42bc56SSotiris Apostolakis #include <queue>
38*8b42bc56SSotiris Apostolakis #include <stack>
39*8b42bc56SSotiris Apostolakis #include <string>
40ca7c307dSSotiris Apostolakis 
41ca7c307dSSotiris Apostolakis using namespace llvm;
42ca7c307dSSotiris Apostolakis 
4397c3ef5cSSotiris Apostolakis #define DEBUG_TYPE "select-optimize"
4497c3ef5cSSotiris Apostolakis 
45*8b42bc56SSotiris Apostolakis STATISTIC(NumSelectOptAnalyzed,
46*8b42bc56SSotiris Apostolakis           "Number of select groups considered for conversion to branch");
47*8b42bc56SSotiris Apostolakis STATISTIC(NumSelectConvertedExpColdOperand,
48*8b42bc56SSotiris Apostolakis           "Number of select groups converted due to expensive cold operand");
49*8b42bc56SSotiris Apostolakis STATISTIC(NumSelectConvertedHighPred,
50*8b42bc56SSotiris Apostolakis           "Number of select groups converted due to high-predictability");
51*8b42bc56SSotiris Apostolakis STATISTIC(NumSelectUnPred,
52*8b42bc56SSotiris Apostolakis           "Number of select groups not converted due to unpredictability");
53*8b42bc56SSotiris Apostolakis STATISTIC(NumSelectColdBB,
54*8b42bc56SSotiris Apostolakis           "Number of select groups not converted due to cold basic block");
5597c3ef5cSSotiris Apostolakis STATISTIC(NumSelectsConverted, "Number of selects converted");
5697c3ef5cSSotiris Apostolakis 
57*8b42bc56SSotiris Apostolakis static cl::opt<unsigned> ColdOperandThreshold(
58*8b42bc56SSotiris Apostolakis     "cold-operand-threshold",
59*8b42bc56SSotiris Apostolakis     cl::desc("Maximum frequency of path for an operand to be considered cold."),
60*8b42bc56SSotiris Apostolakis     cl::init(20), cl::Hidden);
61*8b42bc56SSotiris Apostolakis 
62*8b42bc56SSotiris Apostolakis static cl::opt<unsigned> ColdOperandMaxCostMultiplier(
63*8b42bc56SSotiris Apostolakis     "cold-operand-max-cost-multiplier",
64*8b42bc56SSotiris Apostolakis     cl::desc("Maximum cost multiplier of TCC_expensive for the dependence "
65*8b42bc56SSotiris Apostolakis              "slice of a cold operand to be considered inexpensive."),
66*8b42bc56SSotiris Apostolakis     cl::init(1), cl::Hidden);
67*8b42bc56SSotiris Apostolakis 
68ca7c307dSSotiris Apostolakis namespace {
69ca7c307dSSotiris Apostolakis 
70ca7c307dSSotiris Apostolakis class SelectOptimize : public FunctionPass {
7197c3ef5cSSotiris Apostolakis   const TargetMachine *TM = nullptr;
7297c3ef5cSSotiris Apostolakis   const TargetSubtargetInfo *TSI;
7397c3ef5cSSotiris Apostolakis   const TargetLowering *TLI = nullptr;
74*8b42bc56SSotiris Apostolakis   const TargetTransformInfo *TTI = nullptr;
7597c3ef5cSSotiris Apostolakis   const LoopInfo *LI;
76*8b42bc56SSotiris Apostolakis   DominatorTree *DT;
7797c3ef5cSSotiris Apostolakis   std::unique_ptr<BlockFrequencyInfo> BFI;
7897c3ef5cSSotiris Apostolakis   std::unique_ptr<BranchProbabilityInfo> BPI;
79*8b42bc56SSotiris Apostolakis   ProfileSummaryInfo *PSI;
80*8b42bc56SSotiris Apostolakis   OptimizationRemarkEmitter *ORE;
8197c3ef5cSSotiris Apostolakis 
82ca7c307dSSotiris Apostolakis public:
83ca7c307dSSotiris Apostolakis   static char ID;
84*8b42bc56SSotiris Apostolakis 
85ca7c307dSSotiris Apostolakis   SelectOptimize() : FunctionPass(ID) {
86ca7c307dSSotiris Apostolakis     initializeSelectOptimizePass(*PassRegistry::getPassRegistry());
87ca7c307dSSotiris Apostolakis   }
88ca7c307dSSotiris Apostolakis 
89ca7c307dSSotiris Apostolakis   bool runOnFunction(Function &F) override;
90ca7c307dSSotiris Apostolakis 
9197c3ef5cSSotiris Apostolakis   void getAnalysisUsage(AnalysisUsage &AU) const override {
92*8b42bc56SSotiris Apostolakis     AU.addRequired<ProfileSummaryInfoWrapperPass>();
9397c3ef5cSSotiris Apostolakis     AU.addRequired<TargetPassConfig>();
94*8b42bc56SSotiris Apostolakis     AU.addRequired<TargetTransformInfoWrapperPass>();
95*8b42bc56SSotiris Apostolakis     AU.addRequired<DominatorTreeWrapperPass>();
9697c3ef5cSSotiris Apostolakis     AU.addRequired<LoopInfoWrapperPass>();
97*8b42bc56SSotiris Apostolakis     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
9897c3ef5cSSotiris Apostolakis   }
9997c3ef5cSSotiris Apostolakis 
10097c3ef5cSSotiris Apostolakis private:
10197c3ef5cSSotiris Apostolakis   // Select groups consist of consecutive select instructions with the same
10297c3ef5cSSotiris Apostolakis   // condition.
10397c3ef5cSSotiris Apostolakis   using SelectGroup = SmallVector<SelectInst *, 2>;
10497c3ef5cSSotiris Apostolakis   using SelectGroups = SmallVector<SelectGroup, 2>;
10597c3ef5cSSotiris Apostolakis 
106*8b42bc56SSotiris Apostolakis   // Converts select instructions of a function to conditional jumps when deemed
107*8b42bc56SSotiris Apostolakis   // profitable. Returns true if at least one select was converted.
10897c3ef5cSSotiris Apostolakis   bool optimizeSelects(Function &F);
109*8b42bc56SSotiris Apostolakis 
110*8b42bc56SSotiris Apostolakis   // Heuristics for determining which select instructions can be profitably
111*8b42bc56SSotiris Apostolakis   // conveted to branches. Separate heuristics for selects in inner-most loops
112*8b42bc56SSotiris Apostolakis   // and the rest of code regions (base heuristics for non-inner-most loop
113*8b42bc56SSotiris Apostolakis   // regions).
114*8b42bc56SSotiris Apostolakis   void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups);
115*8b42bc56SSotiris Apostolakis   void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups);
116*8b42bc56SSotiris Apostolakis 
117*8b42bc56SSotiris Apostolakis   // Converts to branches the select groups that were deemed
118*8b42bc56SSotiris Apostolakis   // profitable-to-convert.
11997c3ef5cSSotiris Apostolakis   void convertProfitableSIGroups(SelectGroups &ProfSIGroups);
120*8b42bc56SSotiris Apostolakis 
121*8b42bc56SSotiris Apostolakis   // Splits selects of a given basic block into select groups.
12297c3ef5cSSotiris Apostolakis   void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups);
123*8b42bc56SSotiris Apostolakis 
124*8b42bc56SSotiris Apostolakis   // Determines for which select groups it is profitable converting to branches
125*8b42bc56SSotiris Apostolakis   // (base heuristics).
126*8b42bc56SSotiris Apostolakis   void findProfitableSIGroupsBase(SelectGroups &SIGroups,
127*8b42bc56SSotiris Apostolakis                                   SelectGroups &ProfSIGroups);
128*8b42bc56SSotiris Apostolakis   // Determines if a select group should be converted to a branch (base
129*8b42bc56SSotiris Apostolakis   // heuristics).
130*8b42bc56SSotiris Apostolakis   bool isConvertToBranchProfitableBase(const SmallVector<SelectInst *, 2> &ASI);
131*8b42bc56SSotiris Apostolakis 
132*8b42bc56SSotiris Apostolakis   // Returns true if there are expensive instructions in the cold value
133*8b42bc56SSotiris Apostolakis   // operand's (if any) dependence slice of any of the selects of the given
134*8b42bc56SSotiris Apostolakis   // group.
135*8b42bc56SSotiris Apostolakis   bool hasExpensiveColdOperand(const SmallVector<SelectInst *, 2> &ASI);
136*8b42bc56SSotiris Apostolakis 
137*8b42bc56SSotiris Apostolakis   // For a given source instruction, collect its backwards dependence slice
138*8b42bc56SSotiris Apostolakis   // consisting of instructions exclusively computed for producing the operands
139*8b42bc56SSotiris Apostolakis   // of the source instruction.
140*8b42bc56SSotiris Apostolakis   void getExclBackwardsSlice(Instruction *I,
141*8b42bc56SSotiris Apostolakis                              SmallVector<Instruction *, 2> &Slice);
142*8b42bc56SSotiris Apostolakis 
143*8b42bc56SSotiris Apostolakis   // Returns true if the condition of the select is highly predictable.
144*8b42bc56SSotiris Apostolakis   bool isSelectHighlyPredictable(const SelectInst *SI);
145*8b42bc56SSotiris Apostolakis 
146*8b42bc56SSotiris Apostolakis   // Returns true if the target architecture supports lowering a given select.
14797c3ef5cSSotiris Apostolakis   bool isSelectKindSupported(SelectInst *SI);
148ca7c307dSSotiris Apostolakis };
149ca7c307dSSotiris Apostolakis } // namespace
150ca7c307dSSotiris Apostolakis 
151ca7c307dSSotiris Apostolakis char SelectOptimize::ID = 0;
15297c3ef5cSSotiris Apostolakis 
15397c3ef5cSSotiris Apostolakis INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
15497c3ef5cSSotiris Apostolakis                       false)
15597c3ef5cSSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
156*8b42bc56SSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
157*8b42bc56SSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
15897c3ef5cSSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
159*8b42bc56SSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
160*8b42bc56SSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
16197c3ef5cSSotiris Apostolakis INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
162ca7c307dSSotiris Apostolakis                     false)
163ca7c307dSSotiris Apostolakis 
164ca7c307dSSotiris Apostolakis FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); }
165ca7c307dSSotiris Apostolakis 
166ca7c307dSSotiris Apostolakis bool SelectOptimize::runOnFunction(Function &F) {
16797c3ef5cSSotiris Apostolakis   TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
16897c3ef5cSSotiris Apostolakis   TSI = TM->getSubtargetImpl(F);
16997c3ef5cSSotiris Apostolakis   TLI = TSI->getTargetLowering();
170*8b42bc56SSotiris Apostolakis 
171*8b42bc56SSotiris Apostolakis   // If none of the select types is supported then skip this pass.
172*8b42bc56SSotiris Apostolakis   // This is an optimization pass. Legality issues will be handled by
173*8b42bc56SSotiris Apostolakis   // instruction selection.
174*8b42bc56SSotiris Apostolakis   if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) &&
175*8b42bc56SSotiris Apostolakis       !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) &&
176*8b42bc56SSotiris Apostolakis       !TLI->isSelectSupported(TargetLowering::VectorMaskSelect))
177*8b42bc56SSotiris Apostolakis     return false;
178*8b42bc56SSotiris Apostolakis 
179*8b42bc56SSotiris Apostolakis   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
180*8b42bc56SSotiris Apostolakis   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
18197c3ef5cSSotiris Apostolakis   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
18297c3ef5cSSotiris Apostolakis   BPI.reset(new BranchProbabilityInfo(F, *LI));
18397c3ef5cSSotiris Apostolakis   BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
184*8b42bc56SSotiris Apostolakis   PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
185*8b42bc56SSotiris Apostolakis   ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
186*8b42bc56SSotiris Apostolakis 
187*8b42bc56SSotiris Apostolakis   // When optimizing for size, selects are preferable over branches.
188*8b42bc56SSotiris Apostolakis   if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get()))
189*8b42bc56SSotiris Apostolakis     return false;
19097c3ef5cSSotiris Apostolakis 
19197c3ef5cSSotiris Apostolakis   return optimizeSelects(F);
19297c3ef5cSSotiris Apostolakis }
19397c3ef5cSSotiris Apostolakis 
19497c3ef5cSSotiris Apostolakis bool SelectOptimize::optimizeSelects(Function &F) {
19597c3ef5cSSotiris Apostolakis   // Determine for which select groups it is profitable converting to branches.
19697c3ef5cSSotiris Apostolakis   SelectGroups ProfSIGroups;
197*8b42bc56SSotiris Apostolakis   // Base heuristics apply only to non-loops and outer loops.
198*8b42bc56SSotiris Apostolakis   optimizeSelectsBase(F, ProfSIGroups);
199*8b42bc56SSotiris Apostolakis   // Separate heuristics for inner-most loops.
200*8b42bc56SSotiris Apostolakis   optimizeSelectsInnerLoops(F, ProfSIGroups);
20197c3ef5cSSotiris Apostolakis 
20297c3ef5cSSotiris Apostolakis   // Convert to branches the select groups that were deemed
20397c3ef5cSSotiris Apostolakis   // profitable-to-convert.
20497c3ef5cSSotiris Apostolakis   convertProfitableSIGroups(ProfSIGroups);
20597c3ef5cSSotiris Apostolakis 
20697c3ef5cSSotiris Apostolakis   // Code modified if at least one select group was converted.
20797c3ef5cSSotiris Apostolakis   return !ProfSIGroups.empty();
20897c3ef5cSSotiris Apostolakis }
20997c3ef5cSSotiris Apostolakis 
210*8b42bc56SSotiris Apostolakis void SelectOptimize::optimizeSelectsBase(Function &F,
211*8b42bc56SSotiris Apostolakis                                          SelectGroups &ProfSIGroups) {
212*8b42bc56SSotiris Apostolakis   // Collect all the select groups.
213*8b42bc56SSotiris Apostolakis   SelectGroups SIGroups;
214*8b42bc56SSotiris Apostolakis   for (BasicBlock &BB : F) {
215*8b42bc56SSotiris Apostolakis     // Base heuristics apply only to non-loops and outer loops.
216*8b42bc56SSotiris Apostolakis     Loop *L = LI->getLoopFor(&BB);
217*8b42bc56SSotiris Apostolakis     if (L && L->isInnermost())
218*8b42bc56SSotiris Apostolakis       continue;
219*8b42bc56SSotiris Apostolakis     collectSelectGroups(BB, SIGroups);
220*8b42bc56SSotiris Apostolakis   }
221*8b42bc56SSotiris Apostolakis 
222*8b42bc56SSotiris Apostolakis   // Determine for which select groups it is profitable converting to branches.
223*8b42bc56SSotiris Apostolakis   findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
224*8b42bc56SSotiris Apostolakis }
225*8b42bc56SSotiris Apostolakis 
226*8b42bc56SSotiris Apostolakis void SelectOptimize::optimizeSelectsInnerLoops(Function &F,
227*8b42bc56SSotiris Apostolakis                                                SelectGroups &ProfSIGroups) {}
228*8b42bc56SSotiris Apostolakis 
22997c3ef5cSSotiris Apostolakis /// If \p isTrue is true, return the true value of \p SI, otherwise return
23097c3ef5cSSotiris Apostolakis /// false value of \p SI. If the true/false value of \p SI is defined by any
23197c3ef5cSSotiris Apostolakis /// select instructions in \p Selects, look through the defining select
23297c3ef5cSSotiris Apostolakis /// instruction until the true/false value is not defined in \p Selects.
23397c3ef5cSSotiris Apostolakis static Value *
23497c3ef5cSSotiris Apostolakis getTrueOrFalseValue(SelectInst *SI, bool isTrue,
23597c3ef5cSSotiris Apostolakis                     const SmallPtrSet<const Instruction *, 2> &Selects) {
23697c3ef5cSSotiris Apostolakis   Value *V = nullptr;
23797c3ef5cSSotiris Apostolakis   for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
23897c3ef5cSSotiris Apostolakis        DefSI = dyn_cast<SelectInst>(V)) {
23997c3ef5cSSotiris Apostolakis     assert(DefSI->getCondition() == SI->getCondition() &&
24097c3ef5cSSotiris Apostolakis            "The condition of DefSI does not match with SI");
24197c3ef5cSSotiris Apostolakis     V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
24297c3ef5cSSotiris Apostolakis   }
24397c3ef5cSSotiris Apostolakis   assert(V && "Failed to get select true/false value");
24497c3ef5cSSotiris Apostolakis   return V;
24597c3ef5cSSotiris Apostolakis }
24697c3ef5cSSotiris Apostolakis 
24797c3ef5cSSotiris Apostolakis void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
24897c3ef5cSSotiris Apostolakis   for (SelectGroup &ASI : ProfSIGroups) {
24997c3ef5cSSotiris Apostolakis     // TODO: eliminate the redundancy of logic transforming selects to branches
25097c3ef5cSSotiris Apostolakis     // by removing CodeGenPrepare::optimizeSelectInst and optimizing here
25197c3ef5cSSotiris Apostolakis     // selects for all cases (with and without profile information).
25297c3ef5cSSotiris Apostolakis 
25397c3ef5cSSotiris Apostolakis     // Transform a sequence like this:
25497c3ef5cSSotiris Apostolakis     //    start:
25597c3ef5cSSotiris Apostolakis     //       %cmp = cmp uge i32 %a, %b
25697c3ef5cSSotiris Apostolakis     //       %sel = select i1 %cmp, i32 %c, i32 %d
25797c3ef5cSSotiris Apostolakis     //
25897c3ef5cSSotiris Apostolakis     // Into:
25997c3ef5cSSotiris Apostolakis     //    start:
26097c3ef5cSSotiris Apostolakis     //       %cmp = cmp uge i32 %a, %b
26197c3ef5cSSotiris Apostolakis     //       %cmp.frozen = freeze %cmp
26297c3ef5cSSotiris Apostolakis     //       br i1 %cmp.frozen, label %select.end, label %select.false
26397c3ef5cSSotiris Apostolakis     //    select.false:
26497c3ef5cSSotiris Apostolakis     //       br label %select.end
26597c3ef5cSSotiris Apostolakis     //    select.end:
26697c3ef5cSSotiris Apostolakis     //       %sel = phi i32 [ %c, %start ], [ %d, %select.false ]
26797c3ef5cSSotiris Apostolakis     //
26897c3ef5cSSotiris Apostolakis     // %cmp should be frozen, otherwise it may introduce undefined behavior.
26997c3ef5cSSotiris Apostolakis 
27097c3ef5cSSotiris Apostolakis     // We split the block containing the select(s) into two blocks.
27197c3ef5cSSotiris Apostolakis     SelectInst *SI = ASI.front();
27297c3ef5cSSotiris Apostolakis     SelectInst *LastSI = ASI.back();
27397c3ef5cSSotiris Apostolakis     BasicBlock *StartBlock = SI->getParent();
27497c3ef5cSSotiris Apostolakis     BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
27597c3ef5cSSotiris Apostolakis     BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
27697c3ef5cSSotiris Apostolakis     BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());
27797c3ef5cSSotiris Apostolakis     // Delete the unconditional branch that was just created by the split.
27897c3ef5cSSotiris Apostolakis     StartBlock->getTerminator()->eraseFromParent();
27997c3ef5cSSotiris Apostolakis 
28097c3ef5cSSotiris Apostolakis     // Move any debug/pseudo instructions that were in-between the select
28197c3ef5cSSotiris Apostolakis     // group to the newly-created end block.
28297c3ef5cSSotiris Apostolakis     SmallVector<Instruction *, 2> DebugPseudoINS;
28397c3ef5cSSotiris Apostolakis     auto DIt = SI->getIterator();
28497c3ef5cSSotiris Apostolakis     while (&*DIt != LastSI) {
28597c3ef5cSSotiris Apostolakis       if (DIt->isDebugOrPseudoInst())
28697c3ef5cSSotiris Apostolakis         DebugPseudoINS.push_back(&*DIt);
28797c3ef5cSSotiris Apostolakis       DIt++;
28897c3ef5cSSotiris Apostolakis     }
28997c3ef5cSSotiris Apostolakis     for (auto DI : DebugPseudoINS) {
29097c3ef5cSSotiris Apostolakis       DI->moveBefore(&*EndBlock->getFirstInsertionPt());
29197c3ef5cSSotiris Apostolakis     }
29297c3ef5cSSotiris Apostolakis 
29397c3ef5cSSotiris Apostolakis     // These are the new basic blocks for the conditional branch.
29497c3ef5cSSotiris Apostolakis     // For now, no instruction sinking to the true/false blocks.
29597c3ef5cSSotiris Apostolakis     // Thus both True and False blocks will be empty.
29697c3ef5cSSotiris Apostolakis     BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr;
29797c3ef5cSSotiris Apostolakis 
29897c3ef5cSSotiris Apostolakis     // Use the 'false' side for a new input value to the PHI.
29997c3ef5cSSotiris Apostolakis     FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
30097c3ef5cSSotiris Apostolakis                                     EndBlock->getParent(), EndBlock);
30197c3ef5cSSotiris Apostolakis     auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
30297c3ef5cSSotiris Apostolakis     FalseBranch->setDebugLoc(SI->getDebugLoc());
30397c3ef5cSSotiris Apostolakis 
30497c3ef5cSSotiris Apostolakis     // For the 'true' side the path originates from the start block from the
30597c3ef5cSSotiris Apostolakis     // point view of the new PHI.
30697c3ef5cSSotiris Apostolakis     TrueBlock = StartBlock;
30797c3ef5cSSotiris Apostolakis 
30897c3ef5cSSotiris Apostolakis     // Insert the real conditional branch based on the original condition.
30997c3ef5cSSotiris Apostolakis     BasicBlock *TT, *FT;
31097c3ef5cSSotiris Apostolakis     TT = EndBlock;
31197c3ef5cSSotiris Apostolakis     FT = FalseBlock;
31297c3ef5cSSotiris Apostolakis     IRBuilder<> IB(SI);
31397c3ef5cSSotiris Apostolakis     auto *CondFr =
31497c3ef5cSSotiris Apostolakis         IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
31597c3ef5cSSotiris Apostolakis     IB.CreateCondBr(CondFr, TT, FT, SI);
31697c3ef5cSSotiris Apostolakis 
31797c3ef5cSSotiris Apostolakis     SmallPtrSet<const Instruction *, 2> INS;
31897c3ef5cSSotiris Apostolakis     INS.insert(ASI.begin(), ASI.end());
31997c3ef5cSSotiris Apostolakis     // Use reverse iterator because later select may use the value of the
32097c3ef5cSSotiris Apostolakis     // earlier select, and we need to propagate value through earlier select
32197c3ef5cSSotiris Apostolakis     // to get the PHI operand.
32297c3ef5cSSotiris Apostolakis     for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
32397c3ef5cSSotiris Apostolakis       SelectInst *SI = *It;
32497c3ef5cSSotiris Apostolakis       // The select itself is replaced with a PHI Node.
32597c3ef5cSSotiris Apostolakis       PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
32697c3ef5cSSotiris Apostolakis       PN->takeName(SI);
32797c3ef5cSSotiris Apostolakis       PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
32897c3ef5cSSotiris Apostolakis       PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
32997c3ef5cSSotiris Apostolakis       PN->setDebugLoc(SI->getDebugLoc());
33097c3ef5cSSotiris Apostolakis 
33197c3ef5cSSotiris Apostolakis       SI->replaceAllUsesWith(PN);
33297c3ef5cSSotiris Apostolakis       SI->eraseFromParent();
33397c3ef5cSSotiris Apostolakis       INS.erase(SI);
33497c3ef5cSSotiris Apostolakis       ++NumSelectsConverted;
33597c3ef5cSSotiris Apostolakis     }
33697c3ef5cSSotiris Apostolakis   }
33797c3ef5cSSotiris Apostolakis }
33897c3ef5cSSotiris Apostolakis 
33997c3ef5cSSotiris Apostolakis void SelectOptimize::collectSelectGroups(BasicBlock &BB,
34097c3ef5cSSotiris Apostolakis                                          SelectGroups &SIGroups) {
34197c3ef5cSSotiris Apostolakis   BasicBlock::iterator BBIt = BB.begin();
34297c3ef5cSSotiris Apostolakis   while (BBIt != BB.end()) {
34397c3ef5cSSotiris Apostolakis     Instruction *I = &*BBIt++;
34497c3ef5cSSotiris Apostolakis     if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
34597c3ef5cSSotiris Apostolakis       SelectGroup SIGroup;
34697c3ef5cSSotiris Apostolakis       SIGroup.push_back(SI);
34797c3ef5cSSotiris Apostolakis       while (BBIt != BB.end()) {
34897c3ef5cSSotiris Apostolakis         Instruction *NI = &*BBIt;
34997c3ef5cSSotiris Apostolakis         SelectInst *NSI = dyn_cast<SelectInst>(NI);
35097c3ef5cSSotiris Apostolakis         if (NSI && SI->getCondition() == NSI->getCondition()) {
35197c3ef5cSSotiris Apostolakis           SIGroup.push_back(NSI);
35297c3ef5cSSotiris Apostolakis         } else if (!NI->isDebugOrPseudoInst()) {
35397c3ef5cSSotiris Apostolakis           // Debug/pseudo instructions should be skipped and not prevent the
35497c3ef5cSSotiris Apostolakis           // formation of a select group.
35597c3ef5cSSotiris Apostolakis           break;
35697c3ef5cSSotiris Apostolakis         }
35797c3ef5cSSotiris Apostolakis         ++BBIt;
35897c3ef5cSSotiris Apostolakis       }
35997c3ef5cSSotiris Apostolakis 
36097c3ef5cSSotiris Apostolakis       // If the select type is not supported, no point optimizing it.
36197c3ef5cSSotiris Apostolakis       // Instruction selection will take care of it.
36297c3ef5cSSotiris Apostolakis       if (!isSelectKindSupported(SI))
36397c3ef5cSSotiris Apostolakis         continue;
36497c3ef5cSSotiris Apostolakis 
36597c3ef5cSSotiris Apostolakis       SIGroups.push_back(SIGroup);
36697c3ef5cSSotiris Apostolakis     }
36797c3ef5cSSotiris Apostolakis   }
36897c3ef5cSSotiris Apostolakis }
36997c3ef5cSSotiris Apostolakis 
370*8b42bc56SSotiris Apostolakis void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups,
371*8b42bc56SSotiris Apostolakis                                                 SelectGroups &ProfSIGroups) {
372*8b42bc56SSotiris Apostolakis   for (SelectGroup &ASI : SIGroups) {
373*8b42bc56SSotiris Apostolakis     ++NumSelectOptAnalyzed;
374*8b42bc56SSotiris Apostolakis     if (isConvertToBranchProfitableBase(ASI))
375*8b42bc56SSotiris Apostolakis       ProfSIGroups.push_back(ASI);
376*8b42bc56SSotiris Apostolakis   }
377*8b42bc56SSotiris Apostolakis }
378*8b42bc56SSotiris Apostolakis 
379*8b42bc56SSotiris Apostolakis bool SelectOptimize::isConvertToBranchProfitableBase(
380*8b42bc56SSotiris Apostolakis     const SmallVector<SelectInst *, 2> &ASI) {
381*8b42bc56SSotiris Apostolakis   SelectInst *SI = ASI.front();
382*8b42bc56SSotiris Apostolakis   OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI);
383*8b42bc56SSotiris Apostolakis   OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI);
384*8b42bc56SSotiris Apostolakis 
385*8b42bc56SSotiris Apostolakis   // Skip cold basic blocks. Better to optimize for size for cold blocks.
386*8b42bc56SSotiris Apostolakis   if (PSI->isColdBlock(SI->getParent(), BFI.get())) {
387*8b42bc56SSotiris Apostolakis     ++NumSelectColdBB;
388*8b42bc56SSotiris Apostolakis     ORmiss << "Not converted to branch because of cold basic block. ";
389*8b42bc56SSotiris Apostolakis     ORE->emit(ORmiss);
390*8b42bc56SSotiris Apostolakis     return false;
391*8b42bc56SSotiris Apostolakis   }
392*8b42bc56SSotiris Apostolakis 
393*8b42bc56SSotiris Apostolakis   // If unpredictable, branch form is less profitable.
394*8b42bc56SSotiris Apostolakis   if (SI->getMetadata(LLVMContext::MD_unpredictable)) {
395*8b42bc56SSotiris Apostolakis     ++NumSelectUnPred;
396*8b42bc56SSotiris Apostolakis     ORmiss << "Not converted to branch because of unpredictable branch. ";
397*8b42bc56SSotiris Apostolakis     ORE->emit(ORmiss);
398*8b42bc56SSotiris Apostolakis     return false;
399*8b42bc56SSotiris Apostolakis   }
400*8b42bc56SSotiris Apostolakis 
401*8b42bc56SSotiris Apostolakis   // If highly predictable, branch form is more profitable, unless a
402*8b42bc56SSotiris Apostolakis   // predictable select is inexpensive in the target architecture.
403*8b42bc56SSotiris Apostolakis   if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) {
404*8b42bc56SSotiris Apostolakis     ++NumSelectConvertedHighPred;
405*8b42bc56SSotiris Apostolakis     OR << "Converted to branch because of highly predictable branch. ";
406*8b42bc56SSotiris Apostolakis     ORE->emit(OR);
407*8b42bc56SSotiris Apostolakis     return true;
408*8b42bc56SSotiris Apostolakis   }
409*8b42bc56SSotiris Apostolakis 
410*8b42bc56SSotiris Apostolakis   // Look for expensive instructions in the cold operand's (if any) dependence
411*8b42bc56SSotiris Apostolakis   // slice of any of the selects in the group.
412*8b42bc56SSotiris Apostolakis   if (hasExpensiveColdOperand(ASI)) {
413*8b42bc56SSotiris Apostolakis     ++NumSelectConvertedExpColdOperand;
414*8b42bc56SSotiris Apostolakis     OR << "Converted to branch because of expensive cold operand.";
415*8b42bc56SSotiris Apostolakis     ORE->emit(OR);
416*8b42bc56SSotiris Apostolakis     return true;
417*8b42bc56SSotiris Apostolakis   }
418*8b42bc56SSotiris Apostolakis 
419*8b42bc56SSotiris Apostolakis   ORmiss << "Not profitable to convert to branch (base heuristic).";
420*8b42bc56SSotiris Apostolakis   ORE->emit(ORmiss);
421*8b42bc56SSotiris Apostolakis   return false;
422*8b42bc56SSotiris Apostolakis }
423*8b42bc56SSotiris Apostolakis 
424*8b42bc56SSotiris Apostolakis static InstructionCost divideNearest(InstructionCost Numerator,
425*8b42bc56SSotiris Apostolakis                                      uint64_t Denominator) {
426*8b42bc56SSotiris Apostolakis   return (Numerator + (Denominator / 2)) / Denominator;
427*8b42bc56SSotiris Apostolakis }
428*8b42bc56SSotiris Apostolakis 
429*8b42bc56SSotiris Apostolakis bool SelectOptimize::hasExpensiveColdOperand(
430*8b42bc56SSotiris Apostolakis     const SmallVector<SelectInst *, 2> &ASI) {
431*8b42bc56SSotiris Apostolakis   bool ColdOperand = false;
432*8b42bc56SSotiris Apostolakis   uint64_t TrueWeight, FalseWeight, TotalWeight;
433*8b42bc56SSotiris Apostolakis   if (ASI.front()->extractProfMetadata(TrueWeight, FalseWeight)) {
434*8b42bc56SSotiris Apostolakis     uint64_t MinWeight = std::min(TrueWeight, FalseWeight);
435*8b42bc56SSotiris Apostolakis     TotalWeight = TrueWeight + FalseWeight;
436*8b42bc56SSotiris Apostolakis     // Is there a path with frequency <ColdOperandThreshold% (default:20%) ?
437*8b42bc56SSotiris Apostolakis     ColdOperand = TotalWeight * ColdOperandThreshold > 100 * MinWeight;
438*8b42bc56SSotiris Apostolakis   } else if (PSI->hasProfileSummary()) {
439*8b42bc56SSotiris Apostolakis     OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front());
440*8b42bc56SSotiris Apostolakis     ORmiss << "Profile data available but missing branch-weights metadata for "
441*8b42bc56SSotiris Apostolakis               "select instruction. ";
442*8b42bc56SSotiris Apostolakis     ORE->emit(ORmiss);
443*8b42bc56SSotiris Apostolakis   }
444*8b42bc56SSotiris Apostolakis   if (!ColdOperand)
445*8b42bc56SSotiris Apostolakis     return false;
446*8b42bc56SSotiris Apostolakis   // Check if the cold path's dependence slice is expensive for any of the
447*8b42bc56SSotiris Apostolakis   // selects of the group.
448*8b42bc56SSotiris Apostolakis   for (SelectInst *SI : ASI) {
449*8b42bc56SSotiris Apostolakis     Instruction *ColdI = nullptr;
450*8b42bc56SSotiris Apostolakis     uint64_t HotWeight;
451*8b42bc56SSotiris Apostolakis     if (TrueWeight < FalseWeight) {
452*8b42bc56SSotiris Apostolakis       ColdI = dyn_cast<Instruction>(SI->getTrueValue());
453*8b42bc56SSotiris Apostolakis       HotWeight = FalseWeight;
454*8b42bc56SSotiris Apostolakis     } else {
455*8b42bc56SSotiris Apostolakis       ColdI = dyn_cast<Instruction>(SI->getFalseValue());
456*8b42bc56SSotiris Apostolakis       HotWeight = TrueWeight;
457*8b42bc56SSotiris Apostolakis     }
458*8b42bc56SSotiris Apostolakis     if (ColdI) {
459*8b42bc56SSotiris Apostolakis       SmallVector<Instruction *, 2> ColdSlice;
460*8b42bc56SSotiris Apostolakis       getExclBackwardsSlice(ColdI, ColdSlice);
461*8b42bc56SSotiris Apostolakis       InstructionCost SliceCost = 0;
462*8b42bc56SSotiris Apostolakis       for (auto *ColdII : ColdSlice) {
463*8b42bc56SSotiris Apostolakis         SliceCost +=
464*8b42bc56SSotiris Apostolakis             TTI->getInstructionCost(ColdII, TargetTransformInfo::TCK_Latency);
465*8b42bc56SSotiris Apostolakis       }
466*8b42bc56SSotiris Apostolakis       // The colder the cold value operand of the select is the more expensive
467*8b42bc56SSotiris Apostolakis       // the cmov becomes for computing the cold value operand every time. Thus,
468*8b42bc56SSotiris Apostolakis       // the colder the cold operand is the more its cost counts.
469*8b42bc56SSotiris Apostolakis       // Get nearest integer cost adjusted for coldness.
470*8b42bc56SSotiris Apostolakis       InstructionCost AdjSliceCost =
471*8b42bc56SSotiris Apostolakis           divideNearest(SliceCost * HotWeight, TotalWeight);
472*8b42bc56SSotiris Apostolakis       if (AdjSliceCost >=
473*8b42bc56SSotiris Apostolakis           ColdOperandMaxCostMultiplier * TargetTransformInfo::TCC_Expensive)
474*8b42bc56SSotiris Apostolakis         return true;
475*8b42bc56SSotiris Apostolakis     }
476*8b42bc56SSotiris Apostolakis   }
477*8b42bc56SSotiris Apostolakis   return false;
478*8b42bc56SSotiris Apostolakis }
479*8b42bc56SSotiris Apostolakis 
480*8b42bc56SSotiris Apostolakis // For a given source instruction, collect its backwards dependence slice
481*8b42bc56SSotiris Apostolakis // consisting of instructions exclusively computed for the purpose of producing
482*8b42bc56SSotiris Apostolakis // the operands of the source instruction. As an approximation
483*8b42bc56SSotiris Apostolakis // (sufficiently-accurate in practice), we populate this set with the
484*8b42bc56SSotiris Apostolakis // instructions of the backwards dependence slice that only have one-use and
485*8b42bc56SSotiris Apostolakis // form an one-use chain that leads to the source instruction.
486*8b42bc56SSotiris Apostolakis void SelectOptimize::getExclBackwardsSlice(
487*8b42bc56SSotiris Apostolakis     Instruction *I, SmallVector<Instruction *, 2> &Slice) {
488*8b42bc56SSotiris Apostolakis   SmallPtrSet<Instruction *, 2> Visited;
489*8b42bc56SSotiris Apostolakis   std::queue<Instruction *> Worklist;
490*8b42bc56SSotiris Apostolakis   Worklist.push(I);
491*8b42bc56SSotiris Apostolakis   while (!Worklist.empty()) {
492*8b42bc56SSotiris Apostolakis     Instruction *II = Worklist.front();
493*8b42bc56SSotiris Apostolakis     Worklist.pop();
494*8b42bc56SSotiris Apostolakis 
495*8b42bc56SSotiris Apostolakis     // Avoid cycles.
496*8b42bc56SSotiris Apostolakis     if (Visited.count(II))
497*8b42bc56SSotiris Apostolakis       continue;
498*8b42bc56SSotiris Apostolakis     Visited.insert(II);
499*8b42bc56SSotiris Apostolakis 
500*8b42bc56SSotiris Apostolakis     if (!II->hasOneUse())
501*8b42bc56SSotiris Apostolakis       continue;
502*8b42bc56SSotiris Apostolakis 
503*8b42bc56SSotiris Apostolakis     // Avoid considering instructions with less frequency than the source
504*8b42bc56SSotiris Apostolakis     // instruction (i.e., avoid colder code regions of the dependence slice).
505*8b42bc56SSotiris Apostolakis     if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent()))
506*8b42bc56SSotiris Apostolakis       continue;
507*8b42bc56SSotiris Apostolakis 
508*8b42bc56SSotiris Apostolakis     // Eligible one-use instruction added to the dependence slice.
509*8b42bc56SSotiris Apostolakis     Slice.push_back(II);
510*8b42bc56SSotiris Apostolakis 
511*8b42bc56SSotiris Apostolakis     // Explore all the operands of the current instruction to expand the slice.
512*8b42bc56SSotiris Apostolakis     for (unsigned k = 0; k < II->getNumOperands(); ++k)
513*8b42bc56SSotiris Apostolakis       if (auto *OpI = dyn_cast<Instruction>(II->getOperand(k)))
514*8b42bc56SSotiris Apostolakis         Worklist.push(OpI);
515*8b42bc56SSotiris Apostolakis   }
516*8b42bc56SSotiris Apostolakis }
517*8b42bc56SSotiris Apostolakis 
518*8b42bc56SSotiris Apostolakis bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) {
519*8b42bc56SSotiris Apostolakis   uint64_t TrueWeight, FalseWeight;
520*8b42bc56SSotiris Apostolakis   if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {
521*8b42bc56SSotiris Apostolakis     uint64_t Max = std::max(TrueWeight, FalseWeight);
522*8b42bc56SSotiris Apostolakis     uint64_t Sum = TrueWeight + FalseWeight;
523*8b42bc56SSotiris Apostolakis     if (Sum != 0) {
524*8b42bc56SSotiris Apostolakis       auto Probability = BranchProbability::getBranchProbability(Max, Sum);
525*8b42bc56SSotiris Apostolakis       if (Probability > TTI->getPredictableBranchThreshold())
526*8b42bc56SSotiris Apostolakis         return true;
527*8b42bc56SSotiris Apostolakis     }
528*8b42bc56SSotiris Apostolakis   }
529*8b42bc56SSotiris Apostolakis   return false;
530*8b42bc56SSotiris Apostolakis }
531*8b42bc56SSotiris Apostolakis 
53297c3ef5cSSotiris Apostolakis bool SelectOptimize::isSelectKindSupported(SelectInst *SI) {
53397c3ef5cSSotiris Apostolakis   bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
53497c3ef5cSSotiris Apostolakis   if (VectorCond)
53597c3ef5cSSotiris Apostolakis     return false;
53697c3ef5cSSotiris Apostolakis   TargetLowering::SelectSupportKind SelectKind;
53797c3ef5cSSotiris Apostolakis   if (SI->getType()->isVectorTy())
53897c3ef5cSSotiris Apostolakis     SelectKind = TargetLowering::ScalarCondVectorVal;
53997c3ef5cSSotiris Apostolakis   else
54097c3ef5cSSotiris Apostolakis     SelectKind = TargetLowering::ScalarValSelect;
54197c3ef5cSSotiris Apostolakis   return TLI->isSelectSupported(SelectKind);
542ca7c307dSSotiris Apostolakis }
543