1 //===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Transform each threading path to effectively jump thread the DFA. For
11 // example, the CFG below could be transformed as follows, where the cloned
12 // blocks unconditionally branch to the next correct case based on what is
13 // identified in the analysis.
14 //
15 //          sw.bb                        sw.bb
16 //        /   |   \                    /   |   \
17 //   case1  case2  case3          case1  case2  case3
18 //        \   |   /                 |      |      |
19 //       determinator            det.2   det.3  det.1
20 //        br sw.bb                /        |        \
21 //                          sw.bb.2     sw.bb.3     sw.bb.1
22 //                           br case2    br case3    br case1§
23 //
24 // Definitions and Terminology:
25 //
26 // * Threading path:
27 //   a list of basic blocks, the exit state, and the block that determines
28 //   the next state, for which the following notation will be used:
29 //   < path of BBs that form a cycle > [ state, determinator ]
30 //
31 // * Predictable switch:
32 //   The switch variable is always a known constant so that all conditional
33 //   jumps based on switch variable can be converted to unconditional jump.
34 //
35 // * Determinator:
36 //   The basic block that determines the next state of the DFA.
37 //
38 // Representing the optimization in C-like pseudocode: the code pattern on the
39 // left could functionally be transformed to the right pattern if the switch
40 // condition is predictable.
41 //
42 //  X = A                       goto A
43 //  for (...)                   A:
44 //    switch (X)                  ...
45 //      case A                    goto B
46 //        X = B                 B:
47 //      case B                    ...
48 //        X = C                   goto C
49 //
50 // The pass first checks that switch variable X is decided by the control flow
51 // path taken in the loop; for example, in case B, the next value of X is
52 // decided to be C. It then enumerates through all paths in the loop and labels
53 // the basic blocks where the next state is decided.
54 //
55 // Using this information it creates new paths that unconditionally branch to
56 // the next case. This involves cloning code, so it only gets triggered if the
57 // amount of code duplicated is below a threshold.
58 //
59 //===----------------------------------------------------------------------===//
60 
61 #include "llvm/Transforms/Scalar/DFAJumpThreading.h"
62 #include "llvm/ADT/APInt.h"
63 #include "llvm/ADT/DenseMap.h"
64 #include "llvm/ADT/DepthFirstIterator.h"
65 #include "llvm/ADT/SmallSet.h"
66 #include "llvm/ADT/Statistic.h"
67 #include "llvm/Analysis/AssumptionCache.h"
68 #include "llvm/Analysis/CodeMetrics.h"
69 #include "llvm/Analysis/LoopIterator.h"
70 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
71 #include "llvm/Analysis/TargetTransformInfo.h"
72 #include "llvm/IR/CFG.h"
73 #include "llvm/IR/Constants.h"
74 #include "llvm/IR/IntrinsicInst.h"
75 #include "llvm/IR/Verifier.h"
76 #include "llvm/InitializePasses.h"
77 #include "llvm/Pass.h"
78 #include "llvm/Support/CommandLine.h"
79 #include "llvm/Support/Debug.h"
80 #include "llvm/Transforms/Scalar.h"
81 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
82 #include "llvm/Transforms/Utils/Cloning.h"
83 #include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
84 #include "llvm/Transforms/Utils/ValueMapper.h"
85 #include <algorithm>
86 #include <deque>
87 #include <unordered_map>
88 #include <unordered_set>
89 
90 using namespace llvm;
91 
92 #define DEBUG_TYPE "dfa-jump-threading"
93 
94 STATISTIC(NumTransforms, "Number of transformations done");
95 STATISTIC(NumCloned, "Number of blocks cloned");
96 STATISTIC(NumPaths, "Number of individual paths threaded");
97 
98 static cl::opt<bool>
99     ClViewCfgBefore("dfa-jump-view-cfg-before",
100                     cl::desc("View the CFG before DFA Jump Threading"),
101                     cl::Hidden, cl::init(false));
102 
103 static cl::opt<unsigned> MaxPathLength(
104     "dfa-max-path-length",
105     cl::desc("Max number of blocks searched to find a threading path"),
106     cl::Hidden, cl::init(20));
107 
108 static cl::opt<unsigned>
109     CostThreshold("dfa-cost-threshold",
110                   cl::desc("Maximum cost accepted for the transformation"),
111                   cl::Hidden, cl::init(50));
112 
113 namespace {
114 
115 class SelectInstToUnfold {
116   SelectInst *SI;
117   PHINode *SIUse;
118 
119 public:
120   SelectInstToUnfold(SelectInst *SI, PHINode *SIUse) : SI(SI), SIUse(SIUse) {}
121 
122   SelectInst *getInst() { return SI; }
123   PHINode *getUse() { return SIUse; }
124 
125   explicit operator bool() const { return SI && SIUse; }
126 };
127 
128 void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold,
129             std::vector<SelectInstToUnfold> *NewSIsToUnfold,
130             std::vector<BasicBlock *> *NewBBs);
131 
132 class DFAJumpThreading {
133 public:
134   DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT,
135                    TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE)
136       : AC(AC), DT(DT), TTI(TTI), ORE(ORE) {}
137 
138   bool run(Function &F);
139 
140 private:
141   void
142   unfoldSelectInstrs(DominatorTree *DT,
143                      const SmallVector<SelectInstToUnfold, 4> &SelectInsts) {
144     DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
145     SmallVector<SelectInstToUnfold, 4> Stack;
146     for (SelectInstToUnfold SIToUnfold : SelectInsts)
147       Stack.push_back(SIToUnfold);
148 
149     while (!Stack.empty()) {
150       SelectInstToUnfold SIToUnfold = Stack.back();
151       Stack.pop_back();
152 
153       std::vector<SelectInstToUnfold> NewSIsToUnfold;
154       std::vector<BasicBlock *> NewBBs;
155       unfold(&DTU, SIToUnfold, &NewSIsToUnfold, &NewBBs);
156 
157       // Put newly discovered select instructions into the work list.
158       for (const SelectInstToUnfold &NewSIToUnfold : NewSIsToUnfold)
159         Stack.push_back(NewSIToUnfold);
160     }
161   }
162 
163   AssumptionCache *AC;
164   DominatorTree *DT;
165   TargetTransformInfo *TTI;
166   OptimizationRemarkEmitter *ORE;
167 };
168 
169 class DFAJumpThreadingLegacyPass : public FunctionPass {
170 public:
171   static char ID; // Pass identification
172   DFAJumpThreadingLegacyPass() : FunctionPass(ID) {}
173 
174   void getAnalysisUsage(AnalysisUsage &AU) const override {
175     AU.addRequired<AssumptionCacheTracker>();
176     AU.addRequired<DominatorTreeWrapperPass>();
177     AU.addPreserved<DominatorTreeWrapperPass>();
178     AU.addRequired<TargetTransformInfoWrapperPass>();
179     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
180   }
181 
182   bool runOnFunction(Function &F) override {
183     if (skipFunction(F))
184       return false;
185 
186     AssumptionCache *AC =
187         &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
188     DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
189     TargetTransformInfo *TTI =
190         &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
191     OptimizationRemarkEmitter *ORE =
192         &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
193 
194     return DFAJumpThreading(AC, DT, TTI, ORE).run(F);
195   }
196 };
197 } // end anonymous namespace
198 
199 char DFAJumpThreadingLegacyPass::ID = 0;
200 INITIALIZE_PASS_BEGIN(DFAJumpThreadingLegacyPass, "dfa-jump-threading",
201                       "DFA Jump Threading", false, false)
202 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
203 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
204 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
205 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
206 INITIALIZE_PASS_END(DFAJumpThreadingLegacyPass, "dfa-jump-threading",
207                     "DFA Jump Threading", false, false)
208 
209 // Public interface to the DFA Jump Threading pass
210 FunctionPass *llvm::createDFAJumpThreadingPass() {
211   return new DFAJumpThreadingLegacyPass();
212 }
213 
214 namespace {
215 
216 /// Create a new basic block and sink \p SIToSink into it.
217 void createBasicBlockAndSinkSelectInst(
218     DomTreeUpdater *DTU, SelectInst *SI, PHINode *SIUse, SelectInst *SIToSink,
219     BasicBlock *EndBlock, StringRef NewBBName, BasicBlock **NewBlock,
220     BranchInst **NewBranch, std::vector<SelectInstToUnfold> *NewSIsToUnfold,
221     std::vector<BasicBlock *> *NewBBs) {
222   assert(SIToSink->hasOneUse());
223   assert(NewBlock);
224   assert(NewBranch);
225   *NewBlock = BasicBlock::Create(SI->getContext(), NewBBName,
226                                  EndBlock->getParent(), EndBlock);
227   NewBBs->push_back(*NewBlock);
228   *NewBranch = BranchInst::Create(EndBlock, *NewBlock);
229   SIToSink->moveBefore(*NewBranch);
230   NewSIsToUnfold->push_back(SelectInstToUnfold(SIToSink, SIUse));
231   DTU->applyUpdates({{DominatorTree::Insert, *NewBlock, EndBlock}});
232 }
233 
234 /// Unfold the select instruction held in \p SIToUnfold by replacing it with
235 /// control flow.
236 ///
237 /// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly
238 /// created basic blocks into \p NewBBs.
239 ///
240 /// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible.
241 void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold,
242             std::vector<SelectInstToUnfold> *NewSIsToUnfold,
243             std::vector<BasicBlock *> *NewBBs) {
244   SelectInst *SI = SIToUnfold.getInst();
245   PHINode *SIUse = SIToUnfold.getUse();
246   BasicBlock *StartBlock = SI->getParent();
247   BasicBlock *EndBlock = SIUse->getParent();
248   BranchInst *StartBlockTerm =
249       dyn_cast<BranchInst>(StartBlock->getTerminator());
250 
251   assert(StartBlockTerm && StartBlockTerm->isUnconditional());
252   assert(SI->hasOneUse());
253 
254   // These are the new basic blocks for the conditional branch.
255   // At least one will become an actual new basic block.
256   BasicBlock *TrueBlock = nullptr;
257   BasicBlock *FalseBlock = nullptr;
258   BranchInst *TrueBranch = nullptr;
259   BranchInst *FalseBranch = nullptr;
260 
261   // Sink select instructions to be able to unfold them later.
262   if (SelectInst *SIOp = dyn_cast<SelectInst>(SI->getTrueValue())) {
263     createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIOp, EndBlock,
264                                       "si.unfold.true", &TrueBlock, &TrueBranch,
265                                       NewSIsToUnfold, NewBBs);
266   }
267   if (SelectInst *SIOp = dyn_cast<SelectInst>(SI->getFalseValue())) {
268     createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIOp, EndBlock,
269                                       "si.unfold.false", &FalseBlock,
270                                       &FalseBranch, NewSIsToUnfold, NewBBs);
271   }
272 
273   // If there was nothing to sink, then arbitrarily choose the 'false' side
274   // for a new input value to the PHI.
275   if (!TrueBlock && !FalseBlock) {
276     FalseBlock = BasicBlock::Create(SI->getContext(), "si.unfold.false",
277                                     EndBlock->getParent(), EndBlock);
278     NewBBs->push_back(FalseBlock);
279     BranchInst::Create(EndBlock, FalseBlock);
280     DTU->applyUpdates({{DominatorTree::Insert, FalseBlock, EndBlock}});
281   }
282 
283   // Insert the real conditional branch based on the original condition.
284   // If we did not create a new block for one of the 'true' or 'false' paths
285   // of the condition, it means that side of the branch goes to the end block
286   // directly and the path originates from the start block from the point of
287   // view of the new PHI.
288   BasicBlock *TT = EndBlock;
289   BasicBlock *FT = EndBlock;
290   if (TrueBlock && FalseBlock) {
291     // A diamond.
292     TT = TrueBlock;
293     FT = FalseBlock;
294 
295     // Update the phi node of SI.
296     SIUse->removeIncomingValue(StartBlock, /* DeletePHIIfEmpty = */ false);
297     SIUse->addIncoming(SI->getTrueValue(), TrueBlock);
298     SIUse->addIncoming(SI->getFalseValue(), FalseBlock);
299 
300     // Update any other PHI nodes in EndBlock.
301     for (PHINode &Phi : EndBlock->phis()) {
302       if (&Phi != SIUse) {
303         Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), TrueBlock);
304         Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), FalseBlock);
305       }
306     }
307   } else {
308     BasicBlock *NewBlock = nullptr;
309     Value *SIOp1 = SI->getTrueValue();
310     Value *SIOp2 = SI->getFalseValue();
311 
312     // A triangle pointing right.
313     if (!TrueBlock) {
314       NewBlock = FalseBlock;
315       FT = FalseBlock;
316     }
317     // A triangle pointing left.
318     else {
319       NewBlock = TrueBlock;
320       TT = TrueBlock;
321       std::swap(SIOp1, SIOp2);
322     }
323 
324     // Update the phi node of SI.
325     for (unsigned Idx = 0; Idx < SIUse->getNumIncomingValues(); ++Idx) {
326       if (SIUse->getIncomingBlock(Idx) == StartBlock)
327         SIUse->setIncomingValue(Idx, SIOp1);
328     }
329     SIUse->addIncoming(SIOp2, NewBlock);
330 
331     // Update any other PHI nodes in EndBlock.
332     for (auto II = EndBlock->begin(); PHINode *Phi = dyn_cast<PHINode>(II);
333          ++II) {
334       if (Phi != SIUse)
335         Phi->addIncoming(Phi->getIncomingValueForBlock(StartBlock), NewBlock);
336     }
337   }
338   StartBlockTerm->eraseFromParent();
339   BranchInst::Create(TT, FT, SI->getCondition(), StartBlock);
340   DTU->applyUpdates({{DominatorTree::Insert, StartBlock, TT},
341                      {DominatorTree::Insert, StartBlock, FT}});
342 
343   // The select is now dead.
344   SI->eraseFromParent();
345 }
346 
347 struct ClonedBlock {
348   BasicBlock *BB;
349   uint64_t State; ///< \p State corresponds to the next value of a switch stmnt.
350 };
351 
352 typedef std::deque<BasicBlock *> PathType;
353 typedef std::vector<PathType> PathsType;
354 typedef std::set<const BasicBlock *> VisitedBlocks;
355 typedef std::vector<ClonedBlock> CloneList;
356 
357 // This data structure keeps track of all blocks that have been cloned.  If two
358 // different ThreadingPaths clone the same block for a certain state it should
359 // be reused, and it can be looked up in this map.
360 typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap;
361 
362 // This map keeps track of all the new definitions for an instruction. This
363 // information is needed when restoring SSA form after cloning blocks.
364 typedef DenseMap<Instruction *, std::vector<Instruction *>> DefMap;
365 
366 inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) {
367   OS << "< ";
368   for (const BasicBlock *BB : Path) {
369     std::string BBName;
370     if (BB->hasName())
371       raw_string_ostream(BBName) << BB->getName();
372     else
373       raw_string_ostream(BBName) << BB;
374     OS << BBName << " ";
375   }
376   OS << ">";
377   return OS;
378 }
379 
380 /// ThreadingPath is a path in the control flow of a loop that can be threaded
381 /// by cloning necessary basic blocks and replacing conditional branches with
382 /// unconditional ones. A threading path includes a list of basic blocks, the
383 /// exit state, and the block that determines the next state.
384 struct ThreadingPath {
385   /// Exit value is DFA's exit state for the given path.
386   uint64_t getExitValue() const { return ExitVal; }
387   void setExitValue(const ConstantInt *V) {
388     ExitVal = V->getZExtValue();
389     IsExitValSet = true;
390   }
391   bool isExitValueSet() const { return IsExitValSet; }
392 
393   /// Determinator is the basic block that determines the next state of the DFA.
394   const BasicBlock *getDeterminatorBB() const { return DBB; }
395   void setDeterminator(const BasicBlock *BB) { DBB = BB; }
396 
397   /// Path is a list of basic blocks.
398   const PathType &getPath() const { return Path; }
399   void setPath(const PathType &NewPath) { Path = NewPath; }
400 
401   void print(raw_ostream &OS) const {
402     OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]";
403   }
404 
405 private:
406   PathType Path;
407   uint64_t ExitVal;
408   const BasicBlock *DBB = nullptr;
409   bool IsExitValSet = false;
410 };
411 
412 #ifndef NDEBUG
413 inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) {
414   TPath.print(OS);
415   return OS;
416 }
417 #endif
418 
419 struct MainSwitch {
420   MainSwitch(SwitchInst *SI, OptimizationRemarkEmitter *ORE) {
421     if (isPredictable(SI)) {
422       Instr = SI;
423     } else {
424       ORE->emit([&]() {
425         return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", SI)
426                << "Switch instruction is not predictable.";
427       });
428     }
429   }
430 
431   virtual ~MainSwitch() = default;
432 
433   SwitchInst *getInstr() const { return Instr; }
434   const SmallVector<SelectInstToUnfold, 4> getSelectInsts() {
435     return SelectInsts;
436   }
437 
438 private:
439   /// Do a use-def chain traversal. Make sure the value of the switch variable
440   /// is always a known constant. This means that all conditional jumps based on
441   /// switch variable can be converted to unconditional jumps.
442   bool isPredictable(const SwitchInst *SI) {
443     std::deque<Instruction *> Q;
444     SmallSet<Value *, 16> SeenValues;
445     SelectInsts.clear();
446 
447     Value *FirstDef = SI->getOperand(0);
448     auto *Inst = dyn_cast<Instruction>(FirstDef);
449 
450     // If this is a function argument or another non-instruction, then give up.
451     // We are interested in loop local variables.
452     if (!Inst)
453       return false;
454 
455     // Require the first definition to be a PHINode
456     if (!isa<PHINode>(Inst))
457       return false;
458 
459     LLVM_DEBUG(dbgs() << "\tisPredictable() FirstDef: " << *Inst << "\n");
460 
461     Q.push_back(Inst);
462     SeenValues.insert(FirstDef);
463 
464     while (!Q.empty()) {
465       Instruction *Current = Q.front();
466       Q.pop_front();
467 
468       if (auto *Phi = dyn_cast<PHINode>(Current)) {
469         for (Value *Incoming : Phi->incoming_values()) {
470           if (!isPredictableValue(Incoming, SeenValues))
471             return false;
472           addInstToQueue(Incoming, Q, SeenValues);
473         }
474         LLVM_DEBUG(dbgs() << "\tisPredictable() phi: " << *Phi << "\n");
475       } else if (SelectInst *SelI = dyn_cast<SelectInst>(Current)) {
476         if (!isValidSelectInst(SelI))
477           return false;
478         if (!isPredictableValue(SelI->getTrueValue(), SeenValues) ||
479             !isPredictableValue(SelI->getFalseValue(), SeenValues)) {
480           return false;
481         }
482         addInstToQueue(SelI->getTrueValue(), Q, SeenValues);
483         addInstToQueue(SelI->getFalseValue(), Q, SeenValues);
484         LLVM_DEBUG(dbgs() << "\tisPredictable() select: " << *SelI << "\n");
485         if (auto *SelIUse = dyn_cast<PHINode>(SelI->user_back()))
486           SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse));
487       } else {
488         // If it is neither a phi nor a select, then we give up.
489         return false;
490       }
491     }
492 
493     return true;
494   }
495 
496   bool isPredictableValue(Value *InpVal, SmallSet<Value *, 16> &SeenValues) {
497     if (SeenValues.find(InpVal) != SeenValues.end())
498       return true;
499 
500     if (isa<ConstantInt>(InpVal))
501       return true;
502 
503     // If this is a function argument or another non-instruction, then give up.
504     if (!isa<Instruction>(InpVal))
505       return false;
506 
507     return true;
508   }
509 
510   void addInstToQueue(Value *Val, std::deque<Instruction *> &Q,
511                       SmallSet<Value *, 16> &SeenValues) {
512     if (SeenValues.find(Val) != SeenValues.end())
513       return;
514     if (Instruction *I = dyn_cast<Instruction>(Val))
515       Q.push_back(I);
516     SeenValues.insert(Val);
517   }
518 
519   bool isValidSelectInst(SelectInst *SI) {
520     if (!SI->hasOneUse())
521       return false;
522 
523     Instruction *SIUse = dyn_cast<Instruction>(SI->user_back());
524     // The use of the select inst should be either a phi or another select.
525     if (!SIUse && !(isa<PHINode>(SIUse) || isa<SelectInst>(SIUse)))
526       return false;
527 
528     BasicBlock *SIBB = SI->getParent();
529 
530     // Currently, we can only expand select instructions in basic blocks with
531     // one successor.
532     BranchInst *SITerm = dyn_cast<BranchInst>(SIBB->getTerminator());
533     if (!SITerm || !SITerm->isUnconditional())
534       return false;
535 
536     if (isa<PHINode>(SIUse) &&
537         SIBB->getSingleSuccessor() != dyn_cast<Instruction>(SIUse)->getParent())
538       return false;
539 
540     // If select will not be sunk during unfolding, and it is in the same basic
541     // block as another state defining select, then cannot unfold both.
542     for (SelectInstToUnfold SIToUnfold : SelectInsts) {
543       SelectInst *PrevSI = SIToUnfold.getInst();
544       if (PrevSI->getTrueValue() != SI && PrevSI->getFalseValue() != SI &&
545           PrevSI->getParent() == SI->getParent())
546         return false;
547     }
548 
549     return true;
550   }
551 
552   SwitchInst *Instr = nullptr;
553   SmallVector<SelectInstToUnfold, 4> SelectInsts;
554 };
555 
556 struct AllSwitchPaths {
557   AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE)
558       : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()),
559         ORE(ORE) {}
560 
561   std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; }
562   unsigned getNumThreadingPaths() { return TPaths.size(); }
563   SwitchInst *getSwitchInst() { return Switch; }
564   BasicBlock *getSwitchBlock() { return SwitchBlock; }
565 
566   void run() {
567     VisitedBlocks Visited;
568     PathsType LoopPaths = paths(SwitchBlock, Visited, /* PathDepth = */ 1);
569     StateDefMap StateDef = getStateDefMap();
570 
571     for (PathType Path : LoopPaths) {
572       ThreadingPath TPath;
573 
574       const BasicBlock *PrevBB = Path.back();
575       for (const BasicBlock *BB : Path) {
576         if (StateDef.count(BB) != 0) {
577           const PHINode *Phi = dyn_cast<PHINode>(StateDef[BB]);
578           assert(Phi && "Expected a state-defining instr to be a phi node.");
579 
580           const Value *V = Phi->getIncomingValueForBlock(PrevBB);
581           if (const ConstantInt *C = dyn_cast<const ConstantInt>(V)) {
582             TPath.setExitValue(C);
583             TPath.setDeterminator(BB);
584             TPath.setPath(Path);
585           }
586         }
587 
588         // Switch block is the determinator, this is the final exit value.
589         if (TPath.isExitValueSet() && BB == Path.front())
590           break;
591 
592         PrevBB = BB;
593       }
594 
595       if (TPath.isExitValueSet())
596         TPaths.push_back(TPath);
597     }
598   }
599 
600 private:
601   // Value: an instruction that defines a switch state;
602   // Key: the parent basic block of that instruction.
603   typedef DenseMap<const BasicBlock *, const PHINode *> StateDefMap;
604 
605   PathsType paths(BasicBlock *BB, VisitedBlocks &Visited,
606                   unsigned PathDepth) const {
607     PathsType Res;
608 
609     // Stop exploring paths after visiting MaxPathLength blocks
610     if (PathDepth > MaxPathLength) {
611       ORE->emit([&]() {
612         return OptimizationRemarkAnalysis(DEBUG_TYPE, "MaxPathLengthReached",
613                                           Switch)
614                << "Exploration stopped after visiting MaxPathLength="
615                << ore::NV("MaxPathLength", MaxPathLength) << " blocks.";
616       });
617       return Res;
618     }
619 
620     Visited.insert(BB);
621 
622     // Some blocks have multiple edges to the same successor, and this set
623     // is used to prevent a duplicate path from being generated
624     SmallSet<BasicBlock *, 4> Successors;
625 
626     for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) {
627       BasicBlock *Succ = *SI;
628 
629       if (Successors.find(Succ) != Successors.end())
630         continue;
631       Successors.insert(Succ);
632 
633       // Found a cycle through the SwitchBlock
634       if (Succ == SwitchBlock) {
635         Res.push_back({BB});
636         continue;
637       }
638 
639       // We have encountered a cycle, do not get caught in it
640       if (Visited.find(Succ) != Visited.end())
641         continue;
642 
643       PathsType SuccPaths = paths(Succ, Visited, PathDepth + 1);
644       for (PathType Path : SuccPaths) {
645         PathType NewPath(Path);
646         NewPath.push_front(BB);
647         Res.push_back(NewPath);
648       }
649     }
650     // This block could now be visited again from a different predecessor. Note
651     // that this will result in exponential runtime. Subpaths could possibly be
652     // cached but it takes a lot of memory to store them.
653     Visited.erase(BB);
654     return Res;
655   }
656 
657   /// Walk the use-def chain and collect all the state-defining instructions.
658   StateDefMap getStateDefMap() const {
659     StateDefMap Res;
660 
661     Value *FirstDef = Switch->getOperand(0);
662 
663     assert(isa<PHINode>(FirstDef) && "After select unfolding, all state "
664                                      "definitions are expected to be phi "
665                                      "nodes.");
666 
667     SmallVector<PHINode *, 8> Stack;
668     Stack.push_back(dyn_cast<PHINode>(FirstDef));
669     SmallSet<Value *, 16> SeenValues;
670 
671     while (!Stack.empty()) {
672       PHINode *CurPhi = Stack.back();
673       Stack.pop_back();
674 
675       Res[CurPhi->getParent()] = CurPhi;
676       SeenValues.insert(CurPhi);
677 
678       for (Value *Incoming : CurPhi->incoming_values()) {
679         if (Incoming == FirstDef || isa<ConstantInt>(Incoming) ||
680             SeenValues.find(Incoming) != SeenValues.end()) {
681           continue;
682         }
683 
684         assert(isa<PHINode>(Incoming) && "After select unfolding, all state "
685                                          "definitions are expected to be phi "
686                                          "nodes.");
687 
688         Stack.push_back(cast<PHINode>(Incoming));
689       }
690     }
691 
692     return Res;
693   }
694 
695   SwitchInst *Switch;
696   BasicBlock *SwitchBlock;
697   OptimizationRemarkEmitter *ORE;
698   std::vector<ThreadingPath> TPaths;
699 };
700 
701 struct TransformDFA {
702   TransformDFA(AllSwitchPaths *SwitchPaths, DominatorTree *DT,
703                AssumptionCache *AC, TargetTransformInfo *TTI,
704                OptimizationRemarkEmitter *ORE,
705                SmallPtrSet<const Value *, 32> EphValues)
706       : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE),
707         EphValues(EphValues) {}
708 
709   void run() {
710     if (isLegalAndProfitableToTransform()) {
711       createAllExitPaths();
712       NumTransforms++;
713     }
714   }
715 
716 private:
717   /// This function performs both a legality check and profitability check at
718   /// the same time since it is convenient to do so. It iterates through all
719   /// blocks that will be cloned, and keeps track of the duplication cost. It
720   /// also returns false if it is illegal to clone some required block.
721   bool isLegalAndProfitableToTransform() {
722     CodeMetrics Metrics;
723     SwitchInst *Switch = SwitchPaths->getSwitchInst();
724 
725     // Note that DuplicateBlockMap is not being used as intended here. It is
726     // just being used to ensure (BB, State) pairs are only counted once.
727     DuplicateBlockMap DuplicateMap;
728 
729     for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
730       PathType PathBBs = TPath.getPath();
731       uint64_t NextState = TPath.getExitValue();
732       const BasicBlock *Determinator = TPath.getDeterminatorBB();
733 
734       // Update Metrics for the Switch block, this is always cloned
735       BasicBlock *BB = SwitchPaths->getSwitchBlock();
736       BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
737       if (!VisitedBB) {
738         Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
739         DuplicateMap[BB].push_back({BB, NextState});
740       }
741 
742       // If the Switch block is the Determinator, then we can continue since
743       // this is the only block that is cloned and we already counted for it.
744       if (PathBBs.front() == Determinator)
745         continue;
746 
747       // Otherwise update Metrics for all blocks that will be cloned. If any
748       // block is already cloned and would be reused, don't double count it.
749       auto DetIt = std::find(PathBBs.begin(), PathBBs.end(), Determinator);
750       for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) {
751         BB = *BBIt;
752         VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
753         if (VisitedBB)
754           continue;
755         Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
756         DuplicateMap[BB].push_back({BB, NextState});
757       }
758 
759       if (Metrics.notDuplicatable) {
760         LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
761                           << "non-duplicatable instructions.\n");
762         ORE->emit([&]() {
763           return OptimizationRemarkMissed(DEBUG_TYPE, "NonDuplicatableInst",
764                                           Switch)
765                  << "Contains non-duplicatable instructions.";
766         });
767         return false;
768       }
769 
770       if (Metrics.convergent) {
771         LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
772                           << "convergent instructions.\n");
773         ORE->emit([&]() {
774           return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch)
775                  << "Contains convergent instructions.";
776         });
777         return false;
778       }
779     }
780 
781     unsigned DuplicationCost = 0;
782 
783     unsigned JumpTableSize = 0;
784     TTI->getEstimatedNumberOfCaseClusters(*Switch, JumpTableSize, nullptr,
785                                           nullptr);
786     if (JumpTableSize == 0) {
787       // Factor in the number of conditional branches reduced from jump
788       // threading. Assume that lowering the switch block is implemented by
789       // using binary search, hence the LogBase2().
790       unsigned CondBranches =
791           APInt(32, Switch->getNumSuccessors()).ceilLogBase2();
792       DuplicationCost = Metrics.NumInsts / CondBranches;
793     } else {
794       // Compared with jump tables, the DFA optimizer removes an indirect branch
795       // on each loop iteration, thus making branch prediction more precise. The
796       // more branch targets there are, the more likely it is for the branch
797       // predictor to make a mistake, and the more benefit there is in the DFA
798       // optimizer. Thus, the more branch targets there are, the lower is the
799       // cost of the DFA opt.
800       DuplicationCost = Metrics.NumInsts / JumpTableSize;
801     }
802 
803     LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block "
804                       << SwitchPaths->getSwitchBlock()->getName()
805                       << " is: " << DuplicationCost << "\n\n");
806 
807     if (DuplicationCost > CostThreshold) {
808       LLVM_DEBUG(dbgs() << "Not jump threading, duplication cost exceeds the "
809                         << "cost threshold.\n");
810       ORE->emit([&]() {
811         return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch)
812                << "Duplication cost exceeds the cost threshold (cost="
813                << ore::NV("Cost", DuplicationCost)
814                << ", threshold=" << ore::NV("Threshold", CostThreshold) << ").";
815       });
816       return false;
817     }
818 
819     ORE->emit([&]() {
820       return OptimizationRemark(DEBUG_TYPE, "JumpThreaded", Switch)
821              << "Switch statement jump-threaded.";
822     });
823 
824     return true;
825   }
826 
827   /// Transform each threading path to effectively jump thread the DFA.
828   void createAllExitPaths() {
829     DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager);
830 
831     // Move the switch block to the end of the path, since it will be duplicated
832     BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock();
833     for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
834       LLVM_DEBUG(dbgs() << TPath << "\n");
835       PathType NewPath(TPath.getPath());
836       NewPath.push_back(SwitchBlock);
837       TPath.setPath(NewPath);
838     }
839 
840     // Transform the ThreadingPaths and keep track of the cloned values
841     DuplicateBlockMap DuplicateMap;
842     DefMap NewDefs;
843 
844     SmallSet<BasicBlock *, 16> BlocksToClean;
845     for (BasicBlock *BB : successors(SwitchBlock))
846       BlocksToClean.insert(BB);
847 
848     for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
849       createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU);
850       NumPaths++;
851     }
852 
853     // After all paths are cloned, now update the last successor of the cloned
854     // path so it skips over the switch statement
855     for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths())
856       updateLastSuccessor(TPath, DuplicateMap, &DTU);
857 
858     // For each instruction that was cloned and used outside, update its uses
859     updateSSA(NewDefs);
860 
861     // Clean PHI Nodes for the newly created blocks
862     for (BasicBlock *BB : BlocksToClean)
863       cleanPhiNodes(BB);
864   }
865 
866   /// For a specific ThreadingPath \p Path, create an exit path starting from
867   /// the determinator block.
868   ///
869   /// To remember the correct destination, we have to duplicate blocks
870   /// corresponding to each state. Also update the terminating instruction of
871   /// the predecessors, and phis in the successor blocks.
872   void createExitPath(DefMap &NewDefs, ThreadingPath &Path,
873                       DuplicateBlockMap &DuplicateMap,
874                       SmallSet<BasicBlock *, 16> &BlocksToClean,
875                       DomTreeUpdater *DTU) {
876     uint64_t NextState = Path.getExitValue();
877     const BasicBlock *Determinator = Path.getDeterminatorBB();
878     PathType PathBBs = Path.getPath();
879 
880     // Don't select the placeholder block in front
881     if (PathBBs.front() == Determinator)
882       PathBBs.pop_front();
883 
884     auto DetIt = std::find(PathBBs.begin(), PathBBs.end(), Determinator);
885     auto Prev = std::prev(DetIt);
886     BasicBlock *PrevBB = *Prev;
887     for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) {
888       BasicBlock *BB = *BBIt;
889       BlocksToClean.insert(BB);
890 
891       // We already cloned BB for this NextState, now just update the branch
892       // and continue.
893       BasicBlock *NextBB = getClonedBB(BB, NextState, DuplicateMap);
894       if (NextBB) {
895         updatePredecessor(PrevBB, BB, NextBB, DTU);
896         PrevBB = NextBB;
897         continue;
898       }
899 
900       // Clone the BB and update the successor of Prev to jump to the new block
901       BasicBlock *NewBB = cloneBlockAndUpdatePredecessor(
902           BB, PrevBB, NextState, DuplicateMap, NewDefs, DTU);
903       DuplicateMap[BB].push_back({NewBB, NextState});
904       BlocksToClean.insert(NewBB);
905       PrevBB = NewBB;
906     }
907   }
908 
909   /// Restore SSA form after cloning blocks.
910   ///
911   /// Each cloned block creates new defs for a variable, and the uses need to be
912   /// updated to reflect this. The uses may be replaced with a cloned value, or
913   /// some derived phi instruction. Note that all uses of a value defined in the
914   /// same block were already remapped when cloning the block.
915   void updateSSA(DefMap &NewDefs) {
916     SSAUpdaterBulk SSAUpdate;
917     SmallVector<Use *, 16> UsesToRename;
918 
919     for (auto KV : NewDefs) {
920       Instruction *I = KV.first;
921       BasicBlock *BB = I->getParent();
922       std::vector<Instruction *> Cloned = KV.second;
923 
924       // Scan all uses of this instruction to see if it is used outside of its
925       // block, and if so, record them in UsesToRename.
926       for (Use &U : I->uses()) {
927         Instruction *User = cast<Instruction>(U.getUser());
928         if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
929           if (UserPN->getIncomingBlock(U) == BB)
930             continue;
931         } else if (User->getParent() == BB) {
932           continue;
933         }
934 
935         UsesToRename.push_back(&U);
936       }
937 
938       // If there are no uses outside the block, we're done with this
939       // instruction.
940       if (UsesToRename.empty())
941         continue;
942       LLVM_DEBUG(dbgs() << "DFA-JT: Renaming non-local uses of: " << *I
943                         << "\n");
944 
945       // We found a use of I outside of BB.  Rename all uses of I that are
946       // outside its block to be uses of the appropriate PHI node etc.  See
947       // ValuesInBlocks with the values we know.
948       unsigned VarNum = SSAUpdate.AddVariable(I->getName(), I->getType());
949       SSAUpdate.AddAvailableValue(VarNum, BB, I);
950       for (Instruction *New : Cloned)
951         SSAUpdate.AddAvailableValue(VarNum, New->getParent(), New);
952 
953       while (!UsesToRename.empty())
954         SSAUpdate.AddUse(VarNum, UsesToRename.pop_back_val());
955 
956       LLVM_DEBUG(dbgs() << "\n");
957     }
958     // SSAUpdater handles phi placement and renaming uses with the appropriate
959     // value.
960     SSAUpdate.RewriteAllUses(DT);
961   }
962 
963   /// Clones a basic block, and adds it to the CFG.
964   ///
965   /// This function also includes updating phi nodes in the successors of the
966   /// BB, and remapping uses that were defined locally in the cloned BB.
967   BasicBlock *cloneBlockAndUpdatePredecessor(BasicBlock *BB, BasicBlock *PrevBB,
968                                              uint64_t NextState,
969                                              DuplicateBlockMap &DuplicateMap,
970                                              DefMap &NewDefs,
971                                              DomTreeUpdater *DTU) {
972     ValueToValueMapTy VMap;
973     BasicBlock *NewBB = CloneBasicBlock(
974         BB, VMap, ".jt" + std::to_string(NextState), BB->getParent());
975     NewBB->moveAfter(BB);
976     NumCloned++;
977 
978     for (Instruction &I : *NewBB) {
979       // Do not remap operands of PHINode in case a definition in BB is an
980       // incoming value to a phi in the same block. This incoming value will
981       // be renamed later while restoring SSA.
982       if (isa<PHINode>(&I))
983         continue;
984       RemapInstruction(&I, VMap,
985                        RF_IgnoreMissingLocals | RF_NoModuleLevelChanges);
986       if (AssumeInst *II = dyn_cast<AssumeInst>(&I))
987         AC->registerAssumption(II);
988     }
989 
990     updateSuccessorPhis(BB, NewBB, NextState, VMap, DuplicateMap);
991     updatePredecessor(PrevBB, BB, NewBB, DTU);
992     updateDefMap(NewDefs, VMap);
993 
994     // Add all successors to the DominatorTree
995     SmallPtrSet<BasicBlock *, 4> SuccSet;
996     for (auto *SuccBB : successors(NewBB)) {
997       if (SuccSet.insert(SuccBB).second)
998         DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}});
999     }
1000     SuccSet.clear();
1001     return NewBB;
1002   }
1003 
1004   /// Update the phi nodes in BB's successors.
1005   ///
1006   /// This means creating a new incoming value from NewBB with the new
1007   /// instruction wherever there is an incoming value from BB.
1008   void updateSuccessorPhis(BasicBlock *BB, BasicBlock *ClonedBB,
1009                            uint64_t NextState, ValueToValueMapTy &VMap,
1010                            DuplicateBlockMap &DuplicateMap) {
1011     std::vector<BasicBlock *> BlocksToUpdate;
1012 
1013     // If BB is the last block in the path, we can simply update the one case
1014     // successor that will be reached.
1015     if (BB == SwitchPaths->getSwitchBlock()) {
1016       SwitchInst *Switch = SwitchPaths->getSwitchInst();
1017       BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
1018       BlocksToUpdate.push_back(NextCase);
1019       BasicBlock *ClonedSucc = getClonedBB(NextCase, NextState, DuplicateMap);
1020       if (ClonedSucc)
1021         BlocksToUpdate.push_back(ClonedSucc);
1022     }
1023     // Otherwise update phis in all successors.
1024     else {
1025       for (BasicBlock *Succ : successors(BB)) {
1026         BlocksToUpdate.push_back(Succ);
1027 
1028         // Check if a successor has already been cloned for the particular exit
1029         // value. In this case if a successor was already cloned, the phi nodes
1030         // in the cloned block should be updated directly.
1031         BasicBlock *ClonedSucc = getClonedBB(Succ, NextState, DuplicateMap);
1032         if (ClonedSucc)
1033           BlocksToUpdate.push_back(ClonedSucc);
1034       }
1035     }
1036 
1037     // If there is a phi with an incoming value from BB, create a new incoming
1038     // value for the new predecessor ClonedBB. The value will either be the same
1039     // value from BB or a cloned value.
1040     for (BasicBlock *Succ : BlocksToUpdate) {
1041       for (auto II = Succ->begin(); PHINode *Phi = dyn_cast<PHINode>(II);
1042            ++II) {
1043         Value *Incoming = Phi->getIncomingValueForBlock(BB);
1044         if (Incoming) {
1045           if (isa<Constant>(Incoming)) {
1046             Phi->addIncoming(Incoming, ClonedBB);
1047             continue;
1048           }
1049           Value *ClonedVal = VMap[Incoming];
1050           if (ClonedVal)
1051             Phi->addIncoming(ClonedVal, ClonedBB);
1052           else
1053             Phi->addIncoming(Incoming, ClonedBB);
1054         }
1055       }
1056     }
1057   }
1058 
1059   /// Sets the successor of PrevBB to be NewBB instead of OldBB. Note that all
1060   /// other successors are kept as well.
1061   void updatePredecessor(BasicBlock *PrevBB, BasicBlock *OldBB,
1062                          BasicBlock *NewBB, DomTreeUpdater *DTU) {
1063     // When a path is reused, there is a chance that predecessors were already
1064     // updated before. Check if the predecessor needs to be updated first.
1065     if (!isPredecessor(OldBB, PrevBB))
1066       return;
1067 
1068     Instruction *PrevTerm = PrevBB->getTerminator();
1069     for (unsigned Idx = 0; Idx < PrevTerm->getNumSuccessors(); Idx++) {
1070       if (PrevTerm->getSuccessor(Idx) == OldBB) {
1071         OldBB->removePredecessor(PrevBB, /* KeepOneInputPHIs = */ true);
1072         PrevTerm->setSuccessor(Idx, NewBB);
1073       }
1074     }
1075     DTU->applyUpdates({{DominatorTree::Delete, PrevBB, OldBB},
1076                        {DominatorTree::Insert, PrevBB, NewBB}});
1077   }
1078 
1079   /// Add new value mappings to the DefMap to keep track of all new definitions
1080   /// for a particular instruction. These will be used while updating SSA form.
1081   void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) {
1082     for (auto Entry : VMap) {
1083       Instruction *Inst =
1084           dyn_cast<Instruction>(const_cast<Value *>(Entry.first));
1085       if (!Inst || !Entry.second || isa<BranchInst>(Inst) ||
1086           isa<SwitchInst>(Inst)) {
1087         continue;
1088       }
1089 
1090       Instruction *Cloned = dyn_cast<Instruction>(Entry.second);
1091       if (!Cloned)
1092         continue;
1093 
1094       if (NewDefs.find(Inst) == NewDefs.end())
1095         NewDefs[Inst] = {Cloned};
1096       else
1097         NewDefs[Inst].push_back(Cloned);
1098     }
1099   }
1100 
1101   /// Update the last branch of a particular cloned path to point to the correct
1102   /// case successor.
1103   ///
1104   /// Note that this is an optional step and would have been done in later
1105   /// optimizations, but it makes the CFG significantly easier to work with.
1106   void updateLastSuccessor(ThreadingPath &TPath,
1107                            DuplicateBlockMap &DuplicateMap,
1108                            DomTreeUpdater *DTU) {
1109     uint64_t NextState = TPath.getExitValue();
1110     BasicBlock *BB = TPath.getPath().back();
1111     BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap);
1112 
1113     // Note multiple paths can end at the same block so check that it is not
1114     // updated yet
1115     if (!isa<SwitchInst>(LastBlock->getTerminator()))
1116       return;
1117     SwitchInst *Switch = cast<SwitchInst>(LastBlock->getTerminator());
1118     BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
1119 
1120     std::vector<DominatorTree::UpdateType> DTUpdates;
1121     SmallPtrSet<BasicBlock *, 4> SuccSet;
1122     for (BasicBlock *Succ : successors(LastBlock)) {
1123       if (Succ != NextCase && SuccSet.insert(Succ).second)
1124         DTUpdates.push_back({DominatorTree::Delete, LastBlock, Succ});
1125     }
1126 
1127     Switch->eraseFromParent();
1128     BranchInst::Create(NextCase, LastBlock);
1129 
1130     DTU->applyUpdates(DTUpdates);
1131   }
1132 
1133   /// After cloning blocks, some of the phi nodes have extra incoming values
1134   /// that are no longer used. This function removes them.
1135   void cleanPhiNodes(BasicBlock *BB) {
1136     // If BB is no longer reachable, remove any remaining phi nodes
1137     if (pred_empty(BB)) {
1138       std::vector<PHINode *> PhiToRemove;
1139       for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) {
1140         PhiToRemove.push_back(Phi);
1141       }
1142       for (PHINode *PN : PhiToRemove) {
1143         PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
1144         PN->eraseFromParent();
1145       }
1146       return;
1147     }
1148 
1149     // Remove any incoming values that come from an invalid predecessor
1150     for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) {
1151       std::vector<BasicBlock *> BlocksToRemove;
1152       for (BasicBlock *IncomingBB : Phi->blocks()) {
1153         if (!isPredecessor(BB, IncomingBB))
1154           BlocksToRemove.push_back(IncomingBB);
1155       }
1156       for (BasicBlock *BB : BlocksToRemove)
1157         Phi->removeIncomingValue(BB);
1158     }
1159   }
1160 
1161   /// Checks if BB was already cloned for a particular next state value. If it
1162   /// was then it returns this cloned block, and otherwise null.
1163   BasicBlock *getClonedBB(BasicBlock *BB, uint64_t NextState,
1164                           DuplicateBlockMap &DuplicateMap) {
1165     CloneList ClonedBBs = DuplicateMap[BB];
1166 
1167     // Find an entry in the CloneList with this NextState. If it exists then
1168     // return the corresponding BB
1169     auto It = llvm::find_if(ClonedBBs, [NextState](const ClonedBlock &C) {
1170       return C.State == NextState;
1171     });
1172     return It != ClonedBBs.end() ? (*It).BB : nullptr;
1173   }
1174 
1175   /// Helper to get the successor corresponding to a particular case value for
1176   /// a switch statement.
1177   BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, uint64_t NextState) {
1178     BasicBlock *NextCase = nullptr;
1179     for (auto Case : Switch->cases()) {
1180       if (Case.getCaseValue()->getZExtValue() == NextState) {
1181         NextCase = Case.getCaseSuccessor();
1182         break;
1183       }
1184     }
1185     if (!NextCase)
1186       NextCase = Switch->getDefaultDest();
1187     return NextCase;
1188   }
1189 
1190   /// Returns true if IncomingBB is a predecessor of BB.
1191   bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) {
1192     return llvm::find(predecessors(BB), IncomingBB) != pred_end(BB);
1193   }
1194 
1195   AllSwitchPaths *SwitchPaths;
1196   DominatorTree *DT;
1197   AssumptionCache *AC;
1198   TargetTransformInfo *TTI;
1199   OptimizationRemarkEmitter *ORE;
1200   SmallPtrSet<const Value *, 32> EphValues;
1201   std::vector<ThreadingPath> TPaths;
1202 };
1203 
1204 bool DFAJumpThreading::run(Function &F) {
1205   LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n");
1206 
1207   if (F.hasOptSize()) {
1208     LLVM_DEBUG(dbgs() << "Skipping due to the 'minsize' attribute\n");
1209     return false;
1210   }
1211 
1212   if (ClViewCfgBefore)
1213     F.viewCFG();
1214 
1215   SmallVector<AllSwitchPaths, 2> ThreadableLoops;
1216   bool MadeChanges = false;
1217 
1218   for (BasicBlock &BB : F) {
1219     auto *SI = dyn_cast<SwitchInst>(BB.getTerminator());
1220     if (!SI)
1221       continue;
1222 
1223     LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName()
1224                       << " is predictable\n");
1225     MainSwitch Switch(SI, ORE);
1226 
1227     if (!Switch.getInstr())
1228       continue;
1229 
1230     LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is a "
1231                       << "candidate for jump threading\n");
1232     LLVM_DEBUG(SI->dump());
1233 
1234     unfoldSelectInstrs(DT, Switch.getSelectInsts());
1235     if (!Switch.getSelectInsts().empty())
1236       MadeChanges = true;
1237 
1238     AllSwitchPaths SwitchPaths(&Switch, ORE);
1239     SwitchPaths.run();
1240 
1241     if (SwitchPaths.getNumThreadingPaths() > 0) {
1242       ThreadableLoops.push_back(SwitchPaths);
1243 
1244       // For the time being limit this optimization to occurring once in a
1245       // function since it can change the CFG significantly. This is not a
1246       // strict requirement but it can cause buggy behavior if there is an
1247       // overlap of blocks in different opportunities. There is a lot of room to
1248       // experiment with catching more opportunities here.
1249       break;
1250     }
1251   }
1252 
1253   SmallPtrSet<const Value *, 32> EphValues;
1254   if (ThreadableLoops.size() > 0)
1255     CodeMetrics::collectEphemeralValues(&F, AC, EphValues);
1256 
1257   for (AllSwitchPaths SwitchPaths : ThreadableLoops) {
1258     TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues);
1259     Transform.run();
1260     MadeChanges = true;
1261   }
1262 
1263 #ifdef EXPENSIVE_CHECKS
1264   assert(DT->verify(DominatorTree::VerificationLevel::Full));
1265   verifyFunction(F, &dbgs());
1266 #endif
1267 
1268   return MadeChanges;
1269 }
1270 
1271 } // end anonymous namespace
1272 
1273 /// Integrate with the new Pass Manager
1274 PreservedAnalyses DFAJumpThreadingPass::run(Function &F,
1275                                             FunctionAnalysisManager &AM) {
1276   AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
1277   DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
1278   TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
1279   OptimizationRemarkEmitter ORE(&F);
1280 
1281   if (!DFAJumpThreading(&AC, &DT, &TTI, &ORE).run(F))
1282     return PreservedAnalyses::all();
1283 
1284   PreservedAnalyses PA;
1285   PA.preserve<DominatorTreeAnalysis>();
1286   return PA;
1287 }
1288