1 //===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Transform each threading path to effectively jump thread the DFA. For 11 // example, the CFG below could be transformed as follows, where the cloned 12 // blocks unconditionally branch to the next correct case based on what is 13 // identified in the analysis. 14 // 15 // sw.bb sw.bb 16 // / | \ / | \ 17 // case1 case2 case3 case1 case2 case3 18 // \ | / | | | 19 // determinator det.2 det.3 det.1 20 // br sw.bb / | \ 21 // sw.bb.2 sw.bb.3 sw.bb.1 22 // br case2 br case3 br case1§ 23 // 24 // Definitions and Terminology: 25 // 26 // * Threading path: 27 // a list of basic blocks, the exit state, and the block that determines 28 // the next state, for which the following notation will be used: 29 // < path of BBs that form a cycle > [ state, determinator ] 30 // 31 // * Predictable switch: 32 // The switch variable is always a known constant so that all conditional 33 // jumps based on switch variable can be converted to unconditional jump. 34 // 35 // * Determinator: 36 // The basic block that determines the next state of the DFA. 37 // 38 // Representing the optimization in C-like pseudocode: the code pattern on the 39 // left could functionally be transformed to the right pattern if the switch 40 // condition is predictable. 41 // 42 // X = A goto A 43 // for (...) A: 44 // switch (X) ... 45 // case A goto B 46 // X = B B: 47 // case B ... 48 // X = C goto C 49 // 50 // The pass first checks that switch variable X is decided by the control flow 51 // path taken in the loop; for example, in case B, the next value of X is 52 // decided to be C. It then enumerates through all paths in the loop and labels 53 // the basic blocks where the next state is decided. 54 // 55 // Using this information it creates new paths that unconditionally branch to 56 // the next case. This involves cloning code, so it only gets triggered if the 57 // amount of code duplicated is below a threshold. 58 // 59 //===----------------------------------------------------------------------===// 60 61 #include "llvm/Transforms/Scalar/DFAJumpThreading.h" 62 #include "llvm/ADT/APInt.h" 63 #include "llvm/ADT/DenseMap.h" 64 #include "llvm/ADT/DepthFirstIterator.h" 65 #include "llvm/ADT/SmallSet.h" 66 #include "llvm/ADT/Statistic.h" 67 #include "llvm/Analysis/AssumptionCache.h" 68 #include "llvm/Analysis/CodeMetrics.h" 69 #include "llvm/Analysis/LoopIterator.h" 70 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 71 #include "llvm/Analysis/TargetTransformInfo.h" 72 #include "llvm/IR/CFG.h" 73 #include "llvm/IR/Constants.h" 74 #include "llvm/IR/IntrinsicInst.h" 75 #include "llvm/IR/Verifier.h" 76 #include "llvm/InitializePasses.h" 77 #include "llvm/Pass.h" 78 #include "llvm/Support/CommandLine.h" 79 #include "llvm/Support/Debug.h" 80 #include "llvm/Transforms/Scalar.h" 81 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 82 #include "llvm/Transforms/Utils/Cloning.h" 83 #include "llvm/Transforms/Utils/SSAUpdaterBulk.h" 84 #include "llvm/Transforms/Utils/ValueMapper.h" 85 #include <algorithm> 86 #include <deque> 87 #include <unordered_map> 88 #include <unordered_set> 89 90 using namespace llvm; 91 92 #define DEBUG_TYPE "dfa-jump-threading" 93 94 STATISTIC(NumTransforms, "Number of transformations done"); 95 STATISTIC(NumCloned, "Number of blocks cloned"); 96 STATISTIC(NumPaths, "Number of individual paths threaded"); 97 98 static cl::opt<bool> 99 ClViewCfgBefore("dfa-jump-view-cfg-before", 100 cl::desc("View the CFG before DFA Jump Threading"), 101 cl::Hidden, cl::init(false)); 102 103 static cl::opt<unsigned> MaxPathLength( 104 "dfa-max-path-length", 105 cl::desc("Max number of blocks searched to find a threading path"), 106 cl::Hidden, cl::init(20)); 107 108 static cl::opt<unsigned> 109 CostThreshold("dfa-cost-threshold", 110 cl::desc("Maximum cost accepted for the transformation"), 111 cl::Hidden, cl::init(50)); 112 113 namespace { 114 115 class SelectInstToUnfold { 116 SelectInst *SI; 117 PHINode *SIUse; 118 119 public: 120 SelectInstToUnfold(SelectInst *SI, PHINode *SIUse) : SI(SI), SIUse(SIUse) {} 121 122 SelectInst *getInst() { return SI; } 123 PHINode *getUse() { return SIUse; } 124 125 explicit operator bool() const { return SI && SIUse; } 126 }; 127 128 void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold, 129 std::vector<SelectInstToUnfold> *NewSIsToUnfold, 130 std::vector<BasicBlock *> *NewBBs); 131 132 class DFAJumpThreading { 133 public: 134 DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, 135 TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE) 136 : AC(AC), DT(DT), TTI(TTI), ORE(ORE) {} 137 138 bool run(Function &F); 139 140 private: 141 void 142 unfoldSelectInstrs(DominatorTree *DT, 143 const SmallVector<SelectInstToUnfold, 4> &SelectInsts) { 144 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); 145 SmallVector<SelectInstToUnfold, 4> Stack; 146 for (SelectInstToUnfold SIToUnfold : SelectInsts) 147 Stack.push_back(SIToUnfold); 148 149 while (!Stack.empty()) { 150 SelectInstToUnfold SIToUnfold = Stack.back(); 151 Stack.pop_back(); 152 153 std::vector<SelectInstToUnfold> NewSIsToUnfold; 154 std::vector<BasicBlock *> NewBBs; 155 unfold(&DTU, SIToUnfold, &NewSIsToUnfold, &NewBBs); 156 157 // Put newly discovered select instructions into the work list. 158 for (const SelectInstToUnfold &NewSIToUnfold : NewSIsToUnfold) 159 Stack.push_back(NewSIToUnfold); 160 } 161 } 162 163 AssumptionCache *AC; 164 DominatorTree *DT; 165 TargetTransformInfo *TTI; 166 OptimizationRemarkEmitter *ORE; 167 }; 168 169 class DFAJumpThreadingLegacyPass : public FunctionPass { 170 public: 171 static char ID; // Pass identification 172 DFAJumpThreadingLegacyPass() : FunctionPass(ID) {} 173 174 void getAnalysisUsage(AnalysisUsage &AU) const override { 175 AU.addRequired<AssumptionCacheTracker>(); 176 AU.addRequired<DominatorTreeWrapperPass>(); 177 AU.addPreserved<DominatorTreeWrapperPass>(); 178 AU.addRequired<TargetTransformInfoWrapperPass>(); 179 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 180 } 181 182 bool runOnFunction(Function &F) override { 183 if (skipFunction(F)) 184 return false; 185 186 AssumptionCache *AC = 187 &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 188 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 189 TargetTransformInfo *TTI = 190 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 191 OptimizationRemarkEmitter *ORE = 192 &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 193 194 return DFAJumpThreading(AC, DT, TTI, ORE).run(F); 195 } 196 }; 197 } // end anonymous namespace 198 199 char DFAJumpThreadingLegacyPass::ID = 0; 200 INITIALIZE_PASS_BEGIN(DFAJumpThreadingLegacyPass, "dfa-jump-threading", 201 "DFA Jump Threading", false, false) 202 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 203 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 204 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 205 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) 206 INITIALIZE_PASS_END(DFAJumpThreadingLegacyPass, "dfa-jump-threading", 207 "DFA Jump Threading", false, false) 208 209 // Public interface to the DFA Jump Threading pass 210 FunctionPass *llvm::createDFAJumpThreadingPass() { 211 return new DFAJumpThreadingLegacyPass(); 212 } 213 214 namespace { 215 216 /// Create a new basic block and sink \p SIToSink into it. 217 void createBasicBlockAndSinkSelectInst( 218 DomTreeUpdater *DTU, SelectInst *SI, PHINode *SIUse, SelectInst *SIToSink, 219 BasicBlock *EndBlock, StringRef NewBBName, BasicBlock **NewBlock, 220 BranchInst **NewBranch, std::vector<SelectInstToUnfold> *NewSIsToUnfold, 221 std::vector<BasicBlock *> *NewBBs) { 222 assert(SIToSink->hasOneUse()); 223 assert(NewBlock); 224 assert(NewBranch); 225 *NewBlock = BasicBlock::Create(SI->getContext(), NewBBName, 226 EndBlock->getParent(), EndBlock); 227 NewBBs->push_back(*NewBlock); 228 *NewBranch = BranchInst::Create(EndBlock, *NewBlock); 229 SIToSink->moveBefore(*NewBranch); 230 NewSIsToUnfold->push_back(SelectInstToUnfold(SIToSink, SIUse)); 231 DTU->applyUpdates({{DominatorTree::Insert, *NewBlock, EndBlock}}); 232 } 233 234 /// Unfold the select instruction held in \p SIToUnfold by replacing it with 235 /// control flow. 236 /// 237 /// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly 238 /// created basic blocks into \p NewBBs. 239 /// 240 /// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible. 241 void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold, 242 std::vector<SelectInstToUnfold> *NewSIsToUnfold, 243 std::vector<BasicBlock *> *NewBBs) { 244 SelectInst *SI = SIToUnfold.getInst(); 245 PHINode *SIUse = SIToUnfold.getUse(); 246 BasicBlock *StartBlock = SI->getParent(); 247 BasicBlock *EndBlock = SIUse->getParent(); 248 BranchInst *StartBlockTerm = 249 dyn_cast<BranchInst>(StartBlock->getTerminator()); 250 251 assert(StartBlockTerm && StartBlockTerm->isUnconditional()); 252 assert(SI->hasOneUse()); 253 254 // These are the new basic blocks for the conditional branch. 255 // At least one will become an actual new basic block. 256 BasicBlock *TrueBlock = nullptr; 257 BasicBlock *FalseBlock = nullptr; 258 BranchInst *TrueBranch = nullptr; 259 BranchInst *FalseBranch = nullptr; 260 261 // Sink select instructions to be able to unfold them later. 262 if (SelectInst *SIOp = dyn_cast<SelectInst>(SI->getTrueValue())) { 263 createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIOp, EndBlock, 264 "si.unfold.true", &TrueBlock, &TrueBranch, 265 NewSIsToUnfold, NewBBs); 266 } 267 if (SelectInst *SIOp = dyn_cast<SelectInst>(SI->getFalseValue())) { 268 createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIOp, EndBlock, 269 "si.unfold.false", &FalseBlock, 270 &FalseBranch, NewSIsToUnfold, NewBBs); 271 } 272 273 // If there was nothing to sink, then arbitrarily choose the 'false' side 274 // for a new input value to the PHI. 275 if (!TrueBlock && !FalseBlock) { 276 FalseBlock = BasicBlock::Create(SI->getContext(), "si.unfold.false", 277 EndBlock->getParent(), EndBlock); 278 NewBBs->push_back(FalseBlock); 279 BranchInst::Create(EndBlock, FalseBlock); 280 DTU->applyUpdates({{DominatorTree::Insert, FalseBlock, EndBlock}}); 281 } 282 283 // Insert the real conditional branch based on the original condition. 284 // If we did not create a new block for one of the 'true' or 'false' paths 285 // of the condition, it means that side of the branch goes to the end block 286 // directly and the path originates from the start block from the point of 287 // view of the new PHI. 288 BasicBlock *TT = EndBlock; 289 BasicBlock *FT = EndBlock; 290 if (TrueBlock && FalseBlock) { 291 // A diamond. 292 TT = TrueBlock; 293 FT = FalseBlock; 294 295 // Update the phi node of SI. 296 SIUse->removeIncomingValue(StartBlock, /* DeletePHIIfEmpty = */ false); 297 SIUse->addIncoming(SI->getTrueValue(), TrueBlock); 298 SIUse->addIncoming(SI->getFalseValue(), FalseBlock); 299 300 // Update any other PHI nodes in EndBlock. 301 for (PHINode &Phi : EndBlock->phis()) { 302 if (&Phi != SIUse) { 303 Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), TrueBlock); 304 Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), FalseBlock); 305 } 306 } 307 } else { 308 BasicBlock *NewBlock = nullptr; 309 Value *SIOp1 = SI->getTrueValue(); 310 Value *SIOp2 = SI->getFalseValue(); 311 312 // A triangle pointing right. 313 if (!TrueBlock) { 314 NewBlock = FalseBlock; 315 FT = FalseBlock; 316 } 317 // A triangle pointing left. 318 else { 319 NewBlock = TrueBlock; 320 TT = TrueBlock; 321 std::swap(SIOp1, SIOp2); 322 } 323 324 // Update the phi node of SI. 325 for (unsigned Idx = 0; Idx < SIUse->getNumIncomingValues(); ++Idx) { 326 if (SIUse->getIncomingBlock(Idx) == StartBlock) 327 SIUse->setIncomingValue(Idx, SIOp1); 328 } 329 SIUse->addIncoming(SIOp2, NewBlock); 330 331 // Update any other PHI nodes in EndBlock. 332 for (auto II = EndBlock->begin(); PHINode *Phi = dyn_cast<PHINode>(II); 333 ++II) { 334 if (Phi != SIUse) 335 Phi->addIncoming(Phi->getIncomingValueForBlock(StartBlock), NewBlock); 336 } 337 } 338 StartBlockTerm->eraseFromParent(); 339 BranchInst::Create(TT, FT, SI->getCondition(), StartBlock); 340 DTU->applyUpdates({{DominatorTree::Insert, StartBlock, TT}, 341 {DominatorTree::Insert, StartBlock, FT}}); 342 343 // The select is now dead. 344 SI->eraseFromParent(); 345 } 346 347 struct ClonedBlock { 348 BasicBlock *BB; 349 uint64_t State; ///< \p State corresponds to the next value of a switch stmnt. 350 }; 351 352 typedef std::deque<BasicBlock *> PathType; 353 typedef std::vector<PathType> PathsType; 354 typedef std::set<const BasicBlock *> VisitedBlocks; 355 typedef std::vector<ClonedBlock> CloneList; 356 357 // This data structure keeps track of all blocks that have been cloned. If two 358 // different ThreadingPaths clone the same block for a certain state it should 359 // be reused, and it can be looked up in this map. 360 typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap; 361 362 // This map keeps track of all the new definitions for an instruction. This 363 // information is needed when restoring SSA form after cloning blocks. 364 typedef DenseMap<Instruction *, std::vector<Instruction *>> DefMap; 365 366 inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) { 367 OS << "< "; 368 for (const BasicBlock *BB : Path) { 369 std::string BBName; 370 if (BB->hasName()) 371 raw_string_ostream(BBName) << BB->getName(); 372 else 373 raw_string_ostream(BBName) << BB; 374 OS << BBName << " "; 375 } 376 OS << ">"; 377 return OS; 378 } 379 380 /// ThreadingPath is a path in the control flow of a loop that can be threaded 381 /// by cloning necessary basic blocks and replacing conditional branches with 382 /// unconditional ones. A threading path includes a list of basic blocks, the 383 /// exit state, and the block that determines the next state. 384 struct ThreadingPath { 385 /// Exit value is DFA's exit state for the given path. 386 uint64_t getExitValue() const { return ExitVal; } 387 void setExitValue(const ConstantInt *V) { 388 ExitVal = V->getZExtValue(); 389 IsExitValSet = true; 390 } 391 bool isExitValueSet() const { return IsExitValSet; } 392 393 /// Determinator is the basic block that determines the next state of the DFA. 394 const BasicBlock *getDeterminatorBB() const { return DBB; } 395 void setDeterminator(const BasicBlock *BB) { DBB = BB; } 396 397 /// Path is a list of basic blocks. 398 const PathType &getPath() const { return Path; } 399 void setPath(const PathType &NewPath) { Path = NewPath; } 400 401 void print(raw_ostream &OS) const { 402 OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]"; 403 } 404 405 private: 406 PathType Path; 407 uint64_t ExitVal; 408 const BasicBlock *DBB = nullptr; 409 bool IsExitValSet = false; 410 }; 411 412 #ifndef NDEBUG 413 inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) { 414 TPath.print(OS); 415 return OS; 416 } 417 #endif 418 419 struct MainSwitch { 420 MainSwitch(SwitchInst *SI, OptimizationRemarkEmitter *ORE) { 421 if (isPredictable(SI)) { 422 Instr = SI; 423 } else { 424 ORE->emit([&]() { 425 return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", SI) 426 << "Switch instruction is not predictable."; 427 }); 428 } 429 } 430 431 virtual ~MainSwitch() = default; 432 433 SwitchInst *getInstr() const { return Instr; } 434 const SmallVector<SelectInstToUnfold, 4> getSelectInsts() { 435 return SelectInsts; 436 } 437 438 private: 439 /// Do a use-def chain traversal. Make sure the value of the switch variable 440 /// is always a known constant. This means that all conditional jumps based on 441 /// switch variable can be converted to unconditional jumps. 442 bool isPredictable(const SwitchInst *SI) { 443 std::deque<Instruction *> Q; 444 SmallSet<Value *, 16> SeenValues; 445 SelectInsts.clear(); 446 447 Value *FirstDef = SI->getOperand(0); 448 auto *Inst = dyn_cast<Instruction>(FirstDef); 449 450 // If this is a function argument or another non-instruction, then give up. 451 // We are interested in loop local variables. 452 if (!Inst) 453 return false; 454 455 // Require the first definition to be a PHINode 456 if (!isa<PHINode>(Inst)) 457 return false; 458 459 LLVM_DEBUG(dbgs() << "\tisPredictable() FirstDef: " << *Inst << "\n"); 460 461 Q.push_back(Inst); 462 SeenValues.insert(FirstDef); 463 464 while (!Q.empty()) { 465 Instruction *Current = Q.front(); 466 Q.pop_front(); 467 468 if (auto *Phi = dyn_cast<PHINode>(Current)) { 469 for (Value *Incoming : Phi->incoming_values()) { 470 if (!isPredictableValue(Incoming, SeenValues)) 471 return false; 472 addInstToQueue(Incoming, Q, SeenValues); 473 } 474 LLVM_DEBUG(dbgs() << "\tisPredictable() phi: " << *Phi << "\n"); 475 } else if (SelectInst *SelI = dyn_cast<SelectInst>(Current)) { 476 if (!isValidSelectInst(SelI)) 477 return false; 478 if (!isPredictableValue(SelI->getTrueValue(), SeenValues) || 479 !isPredictableValue(SelI->getFalseValue(), SeenValues)) { 480 return false; 481 } 482 addInstToQueue(SelI->getTrueValue(), Q, SeenValues); 483 addInstToQueue(SelI->getFalseValue(), Q, SeenValues); 484 LLVM_DEBUG(dbgs() << "\tisPredictable() select: " << *SelI << "\n"); 485 if (auto *SelIUse = dyn_cast<PHINode>(SelI->user_back())) 486 SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse)); 487 } else { 488 // If it is neither a phi nor a select, then we give up. 489 return false; 490 } 491 } 492 493 return true; 494 } 495 496 bool isPredictableValue(Value *InpVal, SmallSet<Value *, 16> &SeenValues) { 497 if (SeenValues.find(InpVal) != SeenValues.end()) 498 return true; 499 500 if (isa<ConstantInt>(InpVal)) 501 return true; 502 503 // If this is a function argument or another non-instruction, then give up. 504 if (!isa<Instruction>(InpVal)) 505 return false; 506 507 return true; 508 } 509 510 void addInstToQueue(Value *Val, std::deque<Instruction *> &Q, 511 SmallSet<Value *, 16> &SeenValues) { 512 if (SeenValues.find(Val) != SeenValues.end()) 513 return; 514 if (Instruction *I = dyn_cast<Instruction>(Val)) 515 Q.push_back(I); 516 SeenValues.insert(Val); 517 } 518 519 bool isValidSelectInst(SelectInst *SI) { 520 if (!SI->hasOneUse()) 521 return false; 522 523 Instruction *SIUse = dyn_cast<Instruction>(SI->user_back()); 524 // The use of the select inst should be either a phi or another select. 525 if (!SIUse && !(isa<PHINode>(SIUse) || isa<SelectInst>(SIUse))) 526 return false; 527 528 BasicBlock *SIBB = SI->getParent(); 529 530 // Currently, we can only expand select instructions in basic blocks with 531 // one successor. 532 BranchInst *SITerm = dyn_cast<BranchInst>(SIBB->getTerminator()); 533 if (!SITerm || !SITerm->isUnconditional()) 534 return false; 535 536 if (isa<PHINode>(SIUse) && 537 SIBB->getSingleSuccessor() != dyn_cast<Instruction>(SIUse)->getParent()) 538 return false; 539 540 // If select will not be sunk during unfolding, and it is in the same basic 541 // block as another state defining select, then cannot unfold both. 542 for (SelectInstToUnfold SIToUnfold : SelectInsts) { 543 SelectInst *PrevSI = SIToUnfold.getInst(); 544 if (PrevSI->getTrueValue() != SI && PrevSI->getFalseValue() != SI && 545 PrevSI->getParent() == SI->getParent()) 546 return false; 547 } 548 549 return true; 550 } 551 552 SwitchInst *Instr = nullptr; 553 SmallVector<SelectInstToUnfold, 4> SelectInsts; 554 }; 555 556 struct AllSwitchPaths { 557 AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE) 558 : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), 559 ORE(ORE) {} 560 561 std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; } 562 unsigned getNumThreadingPaths() { return TPaths.size(); } 563 SwitchInst *getSwitchInst() { return Switch; } 564 BasicBlock *getSwitchBlock() { return SwitchBlock; } 565 566 void run() { 567 VisitedBlocks Visited; 568 PathsType LoopPaths = paths(SwitchBlock, Visited, /* PathDepth = */ 1); 569 StateDefMap StateDef = getStateDefMap(); 570 571 for (PathType Path : LoopPaths) { 572 ThreadingPath TPath; 573 574 const BasicBlock *PrevBB = Path.back(); 575 for (const BasicBlock *BB : Path) { 576 if (StateDef.count(BB) != 0) { 577 const PHINode *Phi = dyn_cast<PHINode>(StateDef[BB]); 578 assert(Phi && "Expected a state-defining instr to be a phi node."); 579 580 const Value *V = Phi->getIncomingValueForBlock(PrevBB); 581 if (const ConstantInt *C = dyn_cast<const ConstantInt>(V)) { 582 TPath.setExitValue(C); 583 TPath.setDeterminator(BB); 584 TPath.setPath(Path); 585 } 586 } 587 588 // Switch block is the determinator, this is the final exit value. 589 if (TPath.isExitValueSet() && BB == Path.front()) 590 break; 591 592 PrevBB = BB; 593 } 594 595 if (TPath.isExitValueSet()) 596 TPaths.push_back(TPath); 597 } 598 } 599 600 private: 601 // Value: an instruction that defines a switch state; 602 // Key: the parent basic block of that instruction. 603 typedef DenseMap<const BasicBlock *, const PHINode *> StateDefMap; 604 605 PathsType paths(BasicBlock *BB, VisitedBlocks &Visited, 606 unsigned PathDepth) const { 607 PathsType Res; 608 609 // Stop exploring paths after visiting MaxPathLength blocks 610 if (PathDepth > MaxPathLength) { 611 ORE->emit([&]() { 612 return OptimizationRemarkAnalysis(DEBUG_TYPE, "MaxPathLengthReached", 613 Switch) 614 << "Exploration stopped after visiting MaxPathLength=" 615 << ore::NV("MaxPathLength", MaxPathLength) << " blocks."; 616 }); 617 return Res; 618 } 619 620 Visited.insert(BB); 621 622 // Some blocks have multiple edges to the same successor, and this set 623 // is used to prevent a duplicate path from being generated 624 SmallSet<BasicBlock *, 4> Successors; 625 626 for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) { 627 BasicBlock *Succ = *SI; 628 629 if (Successors.find(Succ) != Successors.end()) 630 continue; 631 Successors.insert(Succ); 632 633 // Found a cycle through the SwitchBlock 634 if (Succ == SwitchBlock) { 635 Res.push_back({BB}); 636 continue; 637 } 638 639 // We have encountered a cycle, do not get caught in it 640 if (Visited.find(Succ) != Visited.end()) 641 continue; 642 643 PathsType SuccPaths = paths(Succ, Visited, PathDepth + 1); 644 for (PathType Path : SuccPaths) { 645 PathType NewPath(Path); 646 NewPath.push_front(BB); 647 Res.push_back(NewPath); 648 } 649 } 650 // This block could now be visited again from a different predecessor. Note 651 // that this will result in exponential runtime. Subpaths could possibly be 652 // cached but it takes a lot of memory to store them. 653 Visited.erase(BB); 654 return Res; 655 } 656 657 /// Walk the use-def chain and collect all the state-defining instructions. 658 StateDefMap getStateDefMap() const { 659 StateDefMap Res; 660 661 Value *FirstDef = Switch->getOperand(0); 662 663 assert(isa<PHINode>(FirstDef) && "After select unfolding, all state " 664 "definitions are expected to be phi " 665 "nodes."); 666 667 SmallVector<PHINode *, 8> Stack; 668 Stack.push_back(dyn_cast<PHINode>(FirstDef)); 669 SmallSet<Value *, 16> SeenValues; 670 671 while (!Stack.empty()) { 672 PHINode *CurPhi = Stack.back(); 673 Stack.pop_back(); 674 675 Res[CurPhi->getParent()] = CurPhi; 676 SeenValues.insert(CurPhi); 677 678 for (Value *Incoming : CurPhi->incoming_values()) { 679 if (Incoming == FirstDef || isa<ConstantInt>(Incoming) || 680 SeenValues.find(Incoming) != SeenValues.end()) { 681 continue; 682 } 683 684 assert(isa<PHINode>(Incoming) && "After select unfolding, all state " 685 "definitions are expected to be phi " 686 "nodes."); 687 688 Stack.push_back(cast<PHINode>(Incoming)); 689 } 690 } 691 692 return Res; 693 } 694 695 SwitchInst *Switch; 696 BasicBlock *SwitchBlock; 697 OptimizationRemarkEmitter *ORE; 698 std::vector<ThreadingPath> TPaths; 699 }; 700 701 struct TransformDFA { 702 TransformDFA(AllSwitchPaths *SwitchPaths, DominatorTree *DT, 703 AssumptionCache *AC, TargetTransformInfo *TTI, 704 OptimizationRemarkEmitter *ORE, 705 SmallPtrSet<const Value *, 32> EphValues) 706 : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE), 707 EphValues(EphValues) {} 708 709 void run() { 710 if (isLegalAndProfitableToTransform()) { 711 createAllExitPaths(); 712 NumTransforms++; 713 } 714 } 715 716 private: 717 /// This function performs both a legality check and profitability check at 718 /// the same time since it is convenient to do so. It iterates through all 719 /// blocks that will be cloned, and keeps track of the duplication cost. It 720 /// also returns false if it is illegal to clone some required block. 721 bool isLegalAndProfitableToTransform() { 722 CodeMetrics Metrics; 723 SwitchInst *Switch = SwitchPaths->getSwitchInst(); 724 725 // Note that DuplicateBlockMap is not being used as intended here. It is 726 // just being used to ensure (BB, State) pairs are only counted once. 727 DuplicateBlockMap DuplicateMap; 728 729 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 730 PathType PathBBs = TPath.getPath(); 731 uint64_t NextState = TPath.getExitValue(); 732 const BasicBlock *Determinator = TPath.getDeterminatorBB(); 733 734 // Update Metrics for the Switch block, this is always cloned 735 BasicBlock *BB = SwitchPaths->getSwitchBlock(); 736 BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap); 737 if (!VisitedBB) { 738 Metrics.analyzeBasicBlock(BB, *TTI, EphValues); 739 DuplicateMap[BB].push_back({BB, NextState}); 740 } 741 742 // If the Switch block is the Determinator, then we can continue since 743 // this is the only block that is cloned and we already counted for it. 744 if (PathBBs.front() == Determinator) 745 continue; 746 747 // Otherwise update Metrics for all blocks that will be cloned. If any 748 // block is already cloned and would be reused, don't double count it. 749 auto DetIt = std::find(PathBBs.begin(), PathBBs.end(), Determinator); 750 for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) { 751 BB = *BBIt; 752 VisitedBB = getClonedBB(BB, NextState, DuplicateMap); 753 if (VisitedBB) 754 continue; 755 Metrics.analyzeBasicBlock(BB, *TTI, EphValues); 756 DuplicateMap[BB].push_back({BB, NextState}); 757 } 758 759 if (Metrics.notDuplicatable) { 760 LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " 761 << "non-duplicatable instructions.\n"); 762 ORE->emit([&]() { 763 return OptimizationRemarkMissed(DEBUG_TYPE, "NonDuplicatableInst", 764 Switch) 765 << "Contains non-duplicatable instructions."; 766 }); 767 return false; 768 } 769 770 if (Metrics.convergent) { 771 LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " 772 << "convergent instructions.\n"); 773 ORE->emit([&]() { 774 return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch) 775 << "Contains convergent instructions."; 776 }); 777 return false; 778 } 779 } 780 781 unsigned DuplicationCost = 0; 782 783 unsigned JumpTableSize = 0; 784 TTI->getEstimatedNumberOfCaseClusters(*Switch, JumpTableSize, nullptr, 785 nullptr); 786 if (JumpTableSize == 0) { 787 // Factor in the number of conditional branches reduced from jump 788 // threading. Assume that lowering the switch block is implemented by 789 // using binary search, hence the LogBase2(). 790 unsigned CondBranches = 791 APInt(32, Switch->getNumSuccessors()).ceilLogBase2(); 792 DuplicationCost = Metrics.NumInsts / CondBranches; 793 } else { 794 // Compared with jump tables, the DFA optimizer removes an indirect branch 795 // on each loop iteration, thus making branch prediction more precise. The 796 // more branch targets there are, the more likely it is for the branch 797 // predictor to make a mistake, and the more benefit there is in the DFA 798 // optimizer. Thus, the more branch targets there are, the lower is the 799 // cost of the DFA opt. 800 DuplicationCost = Metrics.NumInsts / JumpTableSize; 801 } 802 803 LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block " 804 << SwitchPaths->getSwitchBlock()->getName() 805 << " is: " << DuplicationCost << "\n\n"); 806 807 if (DuplicationCost > CostThreshold) { 808 LLVM_DEBUG(dbgs() << "Not jump threading, duplication cost exceeds the " 809 << "cost threshold.\n"); 810 ORE->emit([&]() { 811 return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch) 812 << "Duplication cost exceeds the cost threshold (cost=" 813 << ore::NV("Cost", DuplicationCost) 814 << ", threshold=" << ore::NV("Threshold", CostThreshold) << ")."; 815 }); 816 return false; 817 } 818 819 ORE->emit([&]() { 820 return OptimizationRemark(DEBUG_TYPE, "JumpThreaded", Switch) 821 << "Switch statement jump-threaded."; 822 }); 823 824 return true; 825 } 826 827 /// Transform each threading path to effectively jump thread the DFA. 828 void createAllExitPaths() { 829 DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager); 830 831 // Move the switch block to the end of the path, since it will be duplicated 832 BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock(); 833 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 834 LLVM_DEBUG(dbgs() << TPath << "\n"); 835 PathType NewPath(TPath.getPath()); 836 NewPath.push_back(SwitchBlock); 837 TPath.setPath(NewPath); 838 } 839 840 // Transform the ThreadingPaths and keep track of the cloned values 841 DuplicateBlockMap DuplicateMap; 842 DefMap NewDefs; 843 844 SmallSet<BasicBlock *, 16> BlocksToClean; 845 for (BasicBlock *BB : successors(SwitchBlock)) 846 BlocksToClean.insert(BB); 847 848 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 849 createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU); 850 NumPaths++; 851 } 852 853 // After all paths are cloned, now update the last successor of the cloned 854 // path so it skips over the switch statement 855 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) 856 updateLastSuccessor(TPath, DuplicateMap, &DTU); 857 858 // For each instruction that was cloned and used outside, update its uses 859 updateSSA(NewDefs); 860 861 // Clean PHI Nodes for the newly created blocks 862 for (BasicBlock *BB : BlocksToClean) 863 cleanPhiNodes(BB); 864 } 865 866 /// For a specific ThreadingPath \p Path, create an exit path starting from 867 /// the determinator block. 868 /// 869 /// To remember the correct destination, we have to duplicate blocks 870 /// corresponding to each state. Also update the terminating instruction of 871 /// the predecessors, and phis in the successor blocks. 872 void createExitPath(DefMap &NewDefs, ThreadingPath &Path, 873 DuplicateBlockMap &DuplicateMap, 874 SmallSet<BasicBlock *, 16> &BlocksToClean, 875 DomTreeUpdater *DTU) { 876 uint64_t NextState = Path.getExitValue(); 877 const BasicBlock *Determinator = Path.getDeterminatorBB(); 878 PathType PathBBs = Path.getPath(); 879 880 // Don't select the placeholder block in front 881 if (PathBBs.front() == Determinator) 882 PathBBs.pop_front(); 883 884 auto DetIt = std::find(PathBBs.begin(), PathBBs.end(), Determinator); 885 auto Prev = std::prev(DetIt); 886 BasicBlock *PrevBB = *Prev; 887 for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) { 888 BasicBlock *BB = *BBIt; 889 BlocksToClean.insert(BB); 890 891 // We already cloned BB for this NextState, now just update the branch 892 // and continue. 893 BasicBlock *NextBB = getClonedBB(BB, NextState, DuplicateMap); 894 if (NextBB) { 895 updatePredecessor(PrevBB, BB, NextBB, DTU); 896 PrevBB = NextBB; 897 continue; 898 } 899 900 // Clone the BB and update the successor of Prev to jump to the new block 901 BasicBlock *NewBB = cloneBlockAndUpdatePredecessor( 902 BB, PrevBB, NextState, DuplicateMap, NewDefs, DTU); 903 DuplicateMap[BB].push_back({NewBB, NextState}); 904 BlocksToClean.insert(NewBB); 905 PrevBB = NewBB; 906 } 907 } 908 909 /// Restore SSA form after cloning blocks. 910 /// 911 /// Each cloned block creates new defs for a variable, and the uses need to be 912 /// updated to reflect this. The uses may be replaced with a cloned value, or 913 /// some derived phi instruction. Note that all uses of a value defined in the 914 /// same block were already remapped when cloning the block. 915 void updateSSA(DefMap &NewDefs) { 916 SSAUpdaterBulk SSAUpdate; 917 SmallVector<Use *, 16> UsesToRename; 918 919 for (auto KV : NewDefs) { 920 Instruction *I = KV.first; 921 BasicBlock *BB = I->getParent(); 922 std::vector<Instruction *> Cloned = KV.second; 923 924 // Scan all uses of this instruction to see if it is used outside of its 925 // block, and if so, record them in UsesToRename. 926 for (Use &U : I->uses()) { 927 Instruction *User = cast<Instruction>(U.getUser()); 928 if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 929 if (UserPN->getIncomingBlock(U) == BB) 930 continue; 931 } else if (User->getParent() == BB) { 932 continue; 933 } 934 935 UsesToRename.push_back(&U); 936 } 937 938 // If there are no uses outside the block, we're done with this 939 // instruction. 940 if (UsesToRename.empty()) 941 continue; 942 LLVM_DEBUG(dbgs() << "DFA-JT: Renaming non-local uses of: " << *I 943 << "\n"); 944 945 // We found a use of I outside of BB. Rename all uses of I that are 946 // outside its block to be uses of the appropriate PHI node etc. See 947 // ValuesInBlocks with the values we know. 948 unsigned VarNum = SSAUpdate.AddVariable(I->getName(), I->getType()); 949 SSAUpdate.AddAvailableValue(VarNum, BB, I); 950 for (Instruction *New : Cloned) 951 SSAUpdate.AddAvailableValue(VarNum, New->getParent(), New); 952 953 while (!UsesToRename.empty()) 954 SSAUpdate.AddUse(VarNum, UsesToRename.pop_back_val()); 955 956 LLVM_DEBUG(dbgs() << "\n"); 957 } 958 // SSAUpdater handles phi placement and renaming uses with the appropriate 959 // value. 960 SSAUpdate.RewriteAllUses(DT); 961 } 962 963 /// Clones a basic block, and adds it to the CFG. 964 /// 965 /// This function also includes updating phi nodes in the successors of the 966 /// BB, and remapping uses that were defined locally in the cloned BB. 967 BasicBlock *cloneBlockAndUpdatePredecessor(BasicBlock *BB, BasicBlock *PrevBB, 968 uint64_t NextState, 969 DuplicateBlockMap &DuplicateMap, 970 DefMap &NewDefs, 971 DomTreeUpdater *DTU) { 972 ValueToValueMapTy VMap; 973 BasicBlock *NewBB = CloneBasicBlock( 974 BB, VMap, ".jt" + std::to_string(NextState), BB->getParent()); 975 NewBB->moveAfter(BB); 976 NumCloned++; 977 978 for (Instruction &I : *NewBB) { 979 // Do not remap operands of PHINode in case a definition in BB is an 980 // incoming value to a phi in the same block. This incoming value will 981 // be renamed later while restoring SSA. 982 if (isa<PHINode>(&I)) 983 continue; 984 RemapInstruction(&I, VMap, 985 RF_IgnoreMissingLocals | RF_NoModuleLevelChanges); 986 if (AssumeInst *II = dyn_cast<AssumeInst>(&I)) 987 AC->registerAssumption(II); 988 } 989 990 updateSuccessorPhis(BB, NewBB, NextState, VMap, DuplicateMap); 991 updatePredecessor(PrevBB, BB, NewBB, DTU); 992 updateDefMap(NewDefs, VMap); 993 994 // Add all successors to the DominatorTree 995 SmallPtrSet<BasicBlock *, 4> SuccSet; 996 for (auto *SuccBB : successors(NewBB)) { 997 if (SuccSet.insert(SuccBB).second) 998 DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}}); 999 } 1000 SuccSet.clear(); 1001 return NewBB; 1002 } 1003 1004 /// Update the phi nodes in BB's successors. 1005 /// 1006 /// This means creating a new incoming value from NewBB with the new 1007 /// instruction wherever there is an incoming value from BB. 1008 void updateSuccessorPhis(BasicBlock *BB, BasicBlock *ClonedBB, 1009 uint64_t NextState, ValueToValueMapTy &VMap, 1010 DuplicateBlockMap &DuplicateMap) { 1011 std::vector<BasicBlock *> BlocksToUpdate; 1012 1013 // If BB is the last block in the path, we can simply update the one case 1014 // successor that will be reached. 1015 if (BB == SwitchPaths->getSwitchBlock()) { 1016 SwitchInst *Switch = SwitchPaths->getSwitchInst(); 1017 BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState); 1018 BlocksToUpdate.push_back(NextCase); 1019 BasicBlock *ClonedSucc = getClonedBB(NextCase, NextState, DuplicateMap); 1020 if (ClonedSucc) 1021 BlocksToUpdate.push_back(ClonedSucc); 1022 } 1023 // Otherwise update phis in all successors. 1024 else { 1025 for (BasicBlock *Succ : successors(BB)) { 1026 BlocksToUpdate.push_back(Succ); 1027 1028 // Check if a successor has already been cloned for the particular exit 1029 // value. In this case if a successor was already cloned, the phi nodes 1030 // in the cloned block should be updated directly. 1031 BasicBlock *ClonedSucc = getClonedBB(Succ, NextState, DuplicateMap); 1032 if (ClonedSucc) 1033 BlocksToUpdate.push_back(ClonedSucc); 1034 } 1035 } 1036 1037 // If there is a phi with an incoming value from BB, create a new incoming 1038 // value for the new predecessor ClonedBB. The value will either be the same 1039 // value from BB or a cloned value. 1040 for (BasicBlock *Succ : BlocksToUpdate) { 1041 for (auto II = Succ->begin(); PHINode *Phi = dyn_cast<PHINode>(II); 1042 ++II) { 1043 Value *Incoming = Phi->getIncomingValueForBlock(BB); 1044 if (Incoming) { 1045 if (isa<Constant>(Incoming)) { 1046 Phi->addIncoming(Incoming, ClonedBB); 1047 continue; 1048 } 1049 Value *ClonedVal = VMap[Incoming]; 1050 if (ClonedVal) 1051 Phi->addIncoming(ClonedVal, ClonedBB); 1052 else 1053 Phi->addIncoming(Incoming, ClonedBB); 1054 } 1055 } 1056 } 1057 } 1058 1059 /// Sets the successor of PrevBB to be NewBB instead of OldBB. Note that all 1060 /// other successors are kept as well. 1061 void updatePredecessor(BasicBlock *PrevBB, BasicBlock *OldBB, 1062 BasicBlock *NewBB, DomTreeUpdater *DTU) { 1063 // When a path is reused, there is a chance that predecessors were already 1064 // updated before. Check if the predecessor needs to be updated first. 1065 if (!isPredecessor(OldBB, PrevBB)) 1066 return; 1067 1068 Instruction *PrevTerm = PrevBB->getTerminator(); 1069 for (unsigned Idx = 0; Idx < PrevTerm->getNumSuccessors(); Idx++) { 1070 if (PrevTerm->getSuccessor(Idx) == OldBB) { 1071 OldBB->removePredecessor(PrevBB, /* KeepOneInputPHIs = */ true); 1072 PrevTerm->setSuccessor(Idx, NewBB); 1073 } 1074 } 1075 DTU->applyUpdates({{DominatorTree::Delete, PrevBB, OldBB}, 1076 {DominatorTree::Insert, PrevBB, NewBB}}); 1077 } 1078 1079 /// Add new value mappings to the DefMap to keep track of all new definitions 1080 /// for a particular instruction. These will be used while updating SSA form. 1081 void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) { 1082 for (auto Entry : VMap) { 1083 Instruction *Inst = 1084 dyn_cast<Instruction>(const_cast<Value *>(Entry.first)); 1085 if (!Inst || !Entry.second || isa<BranchInst>(Inst) || 1086 isa<SwitchInst>(Inst)) { 1087 continue; 1088 } 1089 1090 Instruction *Cloned = dyn_cast<Instruction>(Entry.second); 1091 if (!Cloned) 1092 continue; 1093 1094 if (NewDefs.find(Inst) == NewDefs.end()) 1095 NewDefs[Inst] = {Cloned}; 1096 else 1097 NewDefs[Inst].push_back(Cloned); 1098 } 1099 } 1100 1101 /// Update the last branch of a particular cloned path to point to the correct 1102 /// case successor. 1103 /// 1104 /// Note that this is an optional step and would have been done in later 1105 /// optimizations, but it makes the CFG significantly easier to work with. 1106 void updateLastSuccessor(ThreadingPath &TPath, 1107 DuplicateBlockMap &DuplicateMap, 1108 DomTreeUpdater *DTU) { 1109 uint64_t NextState = TPath.getExitValue(); 1110 BasicBlock *BB = TPath.getPath().back(); 1111 BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap); 1112 1113 // Note multiple paths can end at the same block so check that it is not 1114 // updated yet 1115 if (!isa<SwitchInst>(LastBlock->getTerminator())) 1116 return; 1117 SwitchInst *Switch = cast<SwitchInst>(LastBlock->getTerminator()); 1118 BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState); 1119 1120 std::vector<DominatorTree::UpdateType> DTUpdates; 1121 SmallPtrSet<BasicBlock *, 4> SuccSet; 1122 for (BasicBlock *Succ : successors(LastBlock)) { 1123 if (Succ != NextCase && SuccSet.insert(Succ).second) 1124 DTUpdates.push_back({DominatorTree::Delete, LastBlock, Succ}); 1125 } 1126 1127 Switch->eraseFromParent(); 1128 BranchInst::Create(NextCase, LastBlock); 1129 1130 DTU->applyUpdates(DTUpdates); 1131 } 1132 1133 /// After cloning blocks, some of the phi nodes have extra incoming values 1134 /// that are no longer used. This function removes them. 1135 void cleanPhiNodes(BasicBlock *BB) { 1136 // If BB is no longer reachable, remove any remaining phi nodes 1137 if (pred_empty(BB)) { 1138 std::vector<PHINode *> PhiToRemove; 1139 for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) { 1140 PhiToRemove.push_back(Phi); 1141 } 1142 for (PHINode *PN : PhiToRemove) { 1143 PN->replaceAllUsesWith(UndefValue::get(PN->getType())); 1144 PN->eraseFromParent(); 1145 } 1146 return; 1147 } 1148 1149 // Remove any incoming values that come from an invalid predecessor 1150 for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) { 1151 std::vector<BasicBlock *> BlocksToRemove; 1152 for (BasicBlock *IncomingBB : Phi->blocks()) { 1153 if (!isPredecessor(BB, IncomingBB)) 1154 BlocksToRemove.push_back(IncomingBB); 1155 } 1156 for (BasicBlock *BB : BlocksToRemove) 1157 Phi->removeIncomingValue(BB); 1158 } 1159 } 1160 1161 /// Checks if BB was already cloned for a particular next state value. If it 1162 /// was then it returns this cloned block, and otherwise null. 1163 BasicBlock *getClonedBB(BasicBlock *BB, uint64_t NextState, 1164 DuplicateBlockMap &DuplicateMap) { 1165 CloneList ClonedBBs = DuplicateMap[BB]; 1166 1167 // Find an entry in the CloneList with this NextState. If it exists then 1168 // return the corresponding BB 1169 auto It = llvm::find_if(ClonedBBs, [NextState](const ClonedBlock &C) { 1170 return C.State == NextState; 1171 }); 1172 return It != ClonedBBs.end() ? (*It).BB : nullptr; 1173 } 1174 1175 /// Helper to get the successor corresponding to a particular case value for 1176 /// a switch statement. 1177 BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, uint64_t NextState) { 1178 BasicBlock *NextCase = nullptr; 1179 for (auto Case : Switch->cases()) { 1180 if (Case.getCaseValue()->getZExtValue() == NextState) { 1181 NextCase = Case.getCaseSuccessor(); 1182 break; 1183 } 1184 } 1185 if (!NextCase) 1186 NextCase = Switch->getDefaultDest(); 1187 return NextCase; 1188 } 1189 1190 /// Returns true if IncomingBB is a predecessor of BB. 1191 bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) { 1192 return llvm::find(predecessors(BB), IncomingBB) != pred_end(BB); 1193 } 1194 1195 AllSwitchPaths *SwitchPaths; 1196 DominatorTree *DT; 1197 AssumptionCache *AC; 1198 TargetTransformInfo *TTI; 1199 OptimizationRemarkEmitter *ORE; 1200 SmallPtrSet<const Value *, 32> EphValues; 1201 std::vector<ThreadingPath> TPaths; 1202 }; 1203 1204 bool DFAJumpThreading::run(Function &F) { 1205 LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n"); 1206 1207 if (F.hasOptSize()) { 1208 LLVM_DEBUG(dbgs() << "Skipping due to the 'minsize' attribute\n"); 1209 return false; 1210 } 1211 1212 if (ClViewCfgBefore) 1213 F.viewCFG(); 1214 1215 SmallVector<AllSwitchPaths, 2> ThreadableLoops; 1216 bool MadeChanges = false; 1217 1218 for (BasicBlock &BB : F) { 1219 auto *SI = dyn_cast<SwitchInst>(BB.getTerminator()); 1220 if (!SI) 1221 continue; 1222 1223 LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName() 1224 << " is predictable\n"); 1225 MainSwitch Switch(SI, ORE); 1226 1227 if (!Switch.getInstr()) 1228 continue; 1229 1230 LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is a " 1231 << "candidate for jump threading\n"); 1232 LLVM_DEBUG(SI->dump()); 1233 1234 unfoldSelectInstrs(DT, Switch.getSelectInsts()); 1235 if (!Switch.getSelectInsts().empty()) 1236 MadeChanges = true; 1237 1238 AllSwitchPaths SwitchPaths(&Switch, ORE); 1239 SwitchPaths.run(); 1240 1241 if (SwitchPaths.getNumThreadingPaths() > 0) { 1242 ThreadableLoops.push_back(SwitchPaths); 1243 1244 // For the time being limit this optimization to occurring once in a 1245 // function since it can change the CFG significantly. This is not a 1246 // strict requirement but it can cause buggy behavior if there is an 1247 // overlap of blocks in different opportunities. There is a lot of room to 1248 // experiment with catching more opportunities here. 1249 break; 1250 } 1251 } 1252 1253 SmallPtrSet<const Value *, 32> EphValues; 1254 if (ThreadableLoops.size() > 0) 1255 CodeMetrics::collectEphemeralValues(&F, AC, EphValues); 1256 1257 for (AllSwitchPaths SwitchPaths : ThreadableLoops) { 1258 TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues); 1259 Transform.run(); 1260 MadeChanges = true; 1261 } 1262 1263 #ifdef EXPENSIVE_CHECKS 1264 assert(DT->verify(DominatorTree::VerificationLevel::Full)); 1265 verifyFunction(F, &dbgs()); 1266 #endif 1267 1268 return MadeChanges; 1269 } 1270 1271 } // end anonymous namespace 1272 1273 /// Integrate with the new Pass Manager 1274 PreservedAnalyses DFAJumpThreadingPass::run(Function &F, 1275 FunctionAnalysisManager &AM) { 1276 AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F); 1277 DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F); 1278 TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F); 1279 OptimizationRemarkEmitter ORE(&F); 1280 1281 if (!DFAJumpThreading(&AC, &DT, &TTI, &ORE).run(F)) 1282 return PreservedAnalyses::all(); 1283 1284 PreservedAnalyses PA; 1285 PA.preserve<DominatorTreeAnalysis>(); 1286 return PA; 1287 } 1288