1 //===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Transform each threading path to effectively jump thread the DFA. For 11 // example, the CFG below could be transformed as follows, where the cloned 12 // blocks unconditionally branch to the next correct case based on what is 13 // identified in the analysis. 14 // 15 // sw.bb sw.bb 16 // / | \ / | \ 17 // case1 case2 case3 case1 case2 case3 18 // \ | / | | | 19 // determinator det.2 det.3 det.1 20 // br sw.bb / | \ 21 // sw.bb.2 sw.bb.3 sw.bb.1 22 // br case2 br case3 br case1§ 23 // 24 // Definitions and Terminology: 25 // 26 // * Threading path: 27 // a list of basic blocks, the exit state, and the block that determines 28 // the next state, for which the following notation will be used: 29 // < path of BBs that form a cycle > [ state, determinator ] 30 // 31 // * Predictable switch: 32 // The switch variable is always a known constant so that all conditional 33 // jumps based on switch variable can be converted to unconditional jump. 34 // 35 // * Determinator: 36 // The basic block that determines the next state of the DFA. 37 // 38 // Representing the optimization in C-like pseudocode: the code pattern on the 39 // left could functionally be transformed to the right pattern if the switch 40 // condition is predictable. 41 // 42 // X = A goto A 43 // for (...) A: 44 // switch (X) ... 45 // case A goto B 46 // X = B B: 47 // case B ... 48 // X = C goto C 49 // 50 // The pass first checks that switch variable X is decided by the control flow 51 // path taken in the loop; for example, in case B, the next value of X is 52 // decided to be C. It then enumerates through all paths in the loop and labels 53 // the basic blocks where the next state is decided. 54 // 55 // Using this information it creates new paths that unconditionally branch to 56 // the next case. This involves cloning code, so it only gets triggered if the 57 // amount of code duplicated is below a threshold. 58 // 59 //===----------------------------------------------------------------------===// 60 61 #include "llvm/Transforms/Scalar/DFAJumpThreading.h" 62 #include "llvm/ADT/APInt.h" 63 #include "llvm/ADT/DenseMap.h" 64 #include "llvm/ADT/DepthFirstIterator.h" 65 #include "llvm/ADT/SmallSet.h" 66 #include "llvm/ADT/Statistic.h" 67 #include "llvm/Analysis/AssumptionCache.h" 68 #include "llvm/Analysis/CodeMetrics.h" 69 #include "llvm/Analysis/LoopIterator.h" 70 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 71 #include "llvm/Analysis/TargetTransformInfo.h" 72 #include "llvm/IR/CFG.h" 73 #include "llvm/IR/Constants.h" 74 #include "llvm/IR/IntrinsicInst.h" 75 #include "llvm/IR/Verifier.h" 76 #include "llvm/InitializePasses.h" 77 #include "llvm/Pass.h" 78 #include "llvm/Support/CommandLine.h" 79 #include "llvm/Support/Debug.h" 80 #include "llvm/Transforms/Scalar.h" 81 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 82 #include "llvm/Transforms/Utils/Cloning.h" 83 #include "llvm/Transforms/Utils/SSAUpdaterBulk.h" 84 #include "llvm/Transforms/Utils/ValueMapper.h" 85 #include <algorithm> 86 #include <deque> 87 88 using namespace llvm; 89 90 #define DEBUG_TYPE "dfa-jump-threading" 91 92 STATISTIC(NumTransforms, "Number of transformations done"); 93 STATISTIC(NumCloned, "Number of blocks cloned"); 94 STATISTIC(NumPaths, "Number of individual paths threaded"); 95 96 static cl::opt<bool> 97 ClViewCfgBefore("dfa-jump-view-cfg-before", 98 cl::desc("View the CFG before DFA Jump Threading"), 99 cl::Hidden, cl::init(false)); 100 101 static cl::opt<unsigned> MaxPathLength( 102 "dfa-max-path-length", 103 cl::desc("Max number of blocks searched to find a threading path"), 104 cl::Hidden, cl::init(20)); 105 106 static cl::opt<unsigned> 107 CostThreshold("dfa-cost-threshold", 108 cl::desc("Maximum cost accepted for the transformation"), 109 cl::Hidden, cl::init(50)); 110 111 namespace { 112 113 class SelectInstToUnfold { 114 SelectInst *SI; 115 PHINode *SIUse; 116 117 public: 118 SelectInstToUnfold(SelectInst *SI, PHINode *SIUse) : SI(SI), SIUse(SIUse) {} 119 120 SelectInst *getInst() { return SI; } 121 PHINode *getUse() { return SIUse; } 122 123 explicit operator bool() const { return SI && SIUse; } 124 }; 125 126 void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold, 127 std::vector<SelectInstToUnfold> *NewSIsToUnfold, 128 std::vector<BasicBlock *> *NewBBs); 129 130 class DFAJumpThreading { 131 public: 132 DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, 133 TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE) 134 : AC(AC), DT(DT), TTI(TTI), ORE(ORE) {} 135 136 bool run(Function &F); 137 138 private: 139 void 140 unfoldSelectInstrs(DominatorTree *DT, 141 const SmallVector<SelectInstToUnfold, 4> &SelectInsts) { 142 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); 143 SmallVector<SelectInstToUnfold, 4> Stack; 144 for (SelectInstToUnfold SIToUnfold : SelectInsts) 145 Stack.push_back(SIToUnfold); 146 147 while (!Stack.empty()) { 148 SelectInstToUnfold SIToUnfold = Stack.back(); 149 Stack.pop_back(); 150 151 std::vector<SelectInstToUnfold> NewSIsToUnfold; 152 std::vector<BasicBlock *> NewBBs; 153 unfold(&DTU, SIToUnfold, &NewSIsToUnfold, &NewBBs); 154 155 // Put newly discovered select instructions into the work list. 156 for (const SelectInstToUnfold &NewSIToUnfold : NewSIsToUnfold) 157 Stack.push_back(NewSIToUnfold); 158 } 159 } 160 161 AssumptionCache *AC; 162 DominatorTree *DT; 163 TargetTransformInfo *TTI; 164 OptimizationRemarkEmitter *ORE; 165 }; 166 167 class DFAJumpThreadingLegacyPass : public FunctionPass { 168 public: 169 static char ID; // Pass identification 170 DFAJumpThreadingLegacyPass() : FunctionPass(ID) {} 171 172 void getAnalysisUsage(AnalysisUsage &AU) const override { 173 AU.addRequired<AssumptionCacheTracker>(); 174 AU.addRequired<DominatorTreeWrapperPass>(); 175 AU.addPreserved<DominatorTreeWrapperPass>(); 176 AU.addRequired<TargetTransformInfoWrapperPass>(); 177 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 178 } 179 180 bool runOnFunction(Function &F) override { 181 if (skipFunction(F)) 182 return false; 183 184 AssumptionCache *AC = 185 &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 186 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 187 TargetTransformInfo *TTI = 188 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 189 OptimizationRemarkEmitter *ORE = 190 &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 191 192 return DFAJumpThreading(AC, DT, TTI, ORE).run(F); 193 } 194 }; 195 } // end anonymous namespace 196 197 char DFAJumpThreadingLegacyPass::ID = 0; 198 INITIALIZE_PASS_BEGIN(DFAJumpThreadingLegacyPass, "dfa-jump-threading", 199 "DFA Jump Threading", false, false) 200 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 201 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 202 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 203 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) 204 INITIALIZE_PASS_END(DFAJumpThreadingLegacyPass, "dfa-jump-threading", 205 "DFA Jump Threading", false, false) 206 207 // Public interface to the DFA Jump Threading pass 208 FunctionPass *llvm::createDFAJumpThreadingPass() { 209 return new DFAJumpThreadingLegacyPass(); 210 } 211 212 namespace { 213 214 /// Create a new basic block and sink \p SIToSink into it. 215 void createBasicBlockAndSinkSelectInst( 216 DomTreeUpdater *DTU, SelectInst *SI, PHINode *SIUse, SelectInst *SIToSink, 217 BasicBlock *EndBlock, StringRef NewBBName, BasicBlock **NewBlock, 218 BranchInst **NewBranch, std::vector<SelectInstToUnfold> *NewSIsToUnfold, 219 std::vector<BasicBlock *> *NewBBs) { 220 assert(SIToSink->hasOneUse()); 221 assert(NewBlock); 222 assert(NewBranch); 223 *NewBlock = BasicBlock::Create(SI->getContext(), NewBBName, 224 EndBlock->getParent(), EndBlock); 225 NewBBs->push_back(*NewBlock); 226 *NewBranch = BranchInst::Create(EndBlock, *NewBlock); 227 SIToSink->moveBefore(*NewBranch); 228 NewSIsToUnfold->push_back(SelectInstToUnfold(SIToSink, SIUse)); 229 DTU->applyUpdates({{DominatorTree::Insert, *NewBlock, EndBlock}}); 230 } 231 232 /// Unfold the select instruction held in \p SIToUnfold by replacing it with 233 /// control flow. 234 /// 235 /// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly 236 /// created basic blocks into \p NewBBs. 237 /// 238 /// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible. 239 void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold, 240 std::vector<SelectInstToUnfold> *NewSIsToUnfold, 241 std::vector<BasicBlock *> *NewBBs) { 242 SelectInst *SI = SIToUnfold.getInst(); 243 PHINode *SIUse = SIToUnfold.getUse(); 244 BasicBlock *StartBlock = SI->getParent(); 245 BasicBlock *EndBlock = SIUse->getParent(); 246 BranchInst *StartBlockTerm = 247 dyn_cast<BranchInst>(StartBlock->getTerminator()); 248 249 assert(StartBlockTerm && StartBlockTerm->isUnconditional()); 250 assert(SI->hasOneUse()); 251 252 // These are the new basic blocks for the conditional branch. 253 // At least one will become an actual new basic block. 254 BasicBlock *TrueBlock = nullptr; 255 BasicBlock *FalseBlock = nullptr; 256 BranchInst *TrueBranch = nullptr; 257 BranchInst *FalseBranch = nullptr; 258 259 // Sink select instructions to be able to unfold them later. 260 if (SelectInst *SIOp = dyn_cast<SelectInst>(SI->getTrueValue())) { 261 createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIOp, EndBlock, 262 "si.unfold.true", &TrueBlock, &TrueBranch, 263 NewSIsToUnfold, NewBBs); 264 } 265 if (SelectInst *SIOp = dyn_cast<SelectInst>(SI->getFalseValue())) { 266 createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIOp, EndBlock, 267 "si.unfold.false", &FalseBlock, 268 &FalseBranch, NewSIsToUnfold, NewBBs); 269 } 270 271 // If there was nothing to sink, then arbitrarily choose the 'false' side 272 // for a new input value to the PHI. 273 if (!TrueBlock && !FalseBlock) { 274 FalseBlock = BasicBlock::Create(SI->getContext(), "si.unfold.false", 275 EndBlock->getParent(), EndBlock); 276 NewBBs->push_back(FalseBlock); 277 BranchInst::Create(EndBlock, FalseBlock); 278 DTU->applyUpdates({{DominatorTree::Insert, FalseBlock, EndBlock}}); 279 } 280 281 // Insert the real conditional branch based on the original condition. 282 // If we did not create a new block for one of the 'true' or 'false' paths 283 // of the condition, it means that side of the branch goes to the end block 284 // directly and the path originates from the start block from the point of 285 // view of the new PHI. 286 BasicBlock *TT = EndBlock; 287 BasicBlock *FT = EndBlock; 288 if (TrueBlock && FalseBlock) { 289 // A diamond. 290 TT = TrueBlock; 291 FT = FalseBlock; 292 293 // Update the phi node of SI. 294 SIUse->removeIncomingValue(StartBlock, /* DeletePHIIfEmpty = */ false); 295 SIUse->addIncoming(SI->getTrueValue(), TrueBlock); 296 SIUse->addIncoming(SI->getFalseValue(), FalseBlock); 297 298 // Update any other PHI nodes in EndBlock. 299 for (PHINode &Phi : EndBlock->phis()) { 300 if (&Phi != SIUse) { 301 Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), TrueBlock); 302 Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), FalseBlock); 303 } 304 } 305 } else { 306 BasicBlock *NewBlock = nullptr; 307 Value *SIOp1 = SI->getTrueValue(); 308 Value *SIOp2 = SI->getFalseValue(); 309 310 // A triangle pointing right. 311 if (!TrueBlock) { 312 NewBlock = FalseBlock; 313 FT = FalseBlock; 314 } 315 // A triangle pointing left. 316 else { 317 NewBlock = TrueBlock; 318 TT = TrueBlock; 319 std::swap(SIOp1, SIOp2); 320 } 321 322 // Update the phi node of SI. 323 for (unsigned Idx = 0; Idx < SIUse->getNumIncomingValues(); ++Idx) { 324 if (SIUse->getIncomingBlock(Idx) == StartBlock) 325 SIUse->setIncomingValue(Idx, SIOp1); 326 } 327 SIUse->addIncoming(SIOp2, NewBlock); 328 329 // Update any other PHI nodes in EndBlock. 330 for (auto II = EndBlock->begin(); PHINode *Phi = dyn_cast<PHINode>(II); 331 ++II) { 332 if (Phi != SIUse) 333 Phi->addIncoming(Phi->getIncomingValueForBlock(StartBlock), NewBlock); 334 } 335 } 336 StartBlockTerm->eraseFromParent(); 337 BranchInst::Create(TT, FT, SI->getCondition(), StartBlock); 338 DTU->applyUpdates({{DominatorTree::Insert, StartBlock, TT}, 339 {DominatorTree::Insert, StartBlock, FT}}); 340 341 // The select is now dead. 342 SI->eraseFromParent(); 343 } 344 345 struct ClonedBlock { 346 BasicBlock *BB; 347 uint64_t State; ///< \p State corresponds to the next value of a switch stmnt. 348 }; 349 350 typedef std::deque<BasicBlock *> PathType; 351 typedef std::vector<PathType> PathsType; 352 typedef SmallPtrSet<const BasicBlock *, 8> VisitedBlocks; 353 typedef std::vector<ClonedBlock> CloneList; 354 355 // This data structure keeps track of all blocks that have been cloned. If two 356 // different ThreadingPaths clone the same block for a certain state it should 357 // be reused, and it can be looked up in this map. 358 typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap; 359 360 // This map keeps track of all the new definitions for an instruction. This 361 // information is needed when restoring SSA form after cloning blocks. 362 typedef DenseMap<Instruction *, std::vector<Instruction *>> DefMap; 363 364 inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) { 365 OS << "< "; 366 for (const BasicBlock *BB : Path) { 367 std::string BBName; 368 if (BB->hasName()) 369 raw_string_ostream(BBName) << BB->getName(); 370 else 371 raw_string_ostream(BBName) << BB; 372 OS << BBName << " "; 373 } 374 OS << ">"; 375 return OS; 376 } 377 378 /// ThreadingPath is a path in the control flow of a loop that can be threaded 379 /// by cloning necessary basic blocks and replacing conditional branches with 380 /// unconditional ones. A threading path includes a list of basic blocks, the 381 /// exit state, and the block that determines the next state. 382 struct ThreadingPath { 383 /// Exit value is DFA's exit state for the given path. 384 uint64_t getExitValue() const { return ExitVal; } 385 void setExitValue(const ConstantInt *V) { 386 ExitVal = V->getZExtValue(); 387 IsExitValSet = true; 388 } 389 bool isExitValueSet() const { return IsExitValSet; } 390 391 /// Determinator is the basic block that determines the next state of the DFA. 392 const BasicBlock *getDeterminatorBB() const { return DBB; } 393 void setDeterminator(const BasicBlock *BB) { DBB = BB; } 394 395 /// Path is a list of basic blocks. 396 const PathType &getPath() const { return Path; } 397 void setPath(const PathType &NewPath) { Path = NewPath; } 398 399 void print(raw_ostream &OS) const { 400 OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]"; 401 } 402 403 private: 404 PathType Path; 405 uint64_t ExitVal; 406 const BasicBlock *DBB = nullptr; 407 bool IsExitValSet = false; 408 }; 409 410 #ifndef NDEBUG 411 inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) { 412 TPath.print(OS); 413 return OS; 414 } 415 #endif 416 417 struct MainSwitch { 418 MainSwitch(SwitchInst *SI, OptimizationRemarkEmitter *ORE) { 419 if (isPredictable(SI)) { 420 Instr = SI; 421 } else { 422 ORE->emit([&]() { 423 return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", SI) 424 << "Switch instruction is not predictable."; 425 }); 426 } 427 } 428 429 virtual ~MainSwitch() = default; 430 431 SwitchInst *getInstr() const { return Instr; } 432 const SmallVector<SelectInstToUnfold, 4> getSelectInsts() { 433 return SelectInsts; 434 } 435 436 private: 437 /// Do a use-def chain traversal. Make sure the value of the switch variable 438 /// is always a known constant. This means that all conditional jumps based on 439 /// switch variable can be converted to unconditional jumps. 440 bool isPredictable(const SwitchInst *SI) { 441 std::deque<Instruction *> Q; 442 SmallSet<Value *, 16> SeenValues; 443 SelectInsts.clear(); 444 445 Value *FirstDef = SI->getOperand(0); 446 auto *Inst = dyn_cast<Instruction>(FirstDef); 447 448 // If this is a function argument or another non-instruction, then give up. 449 // We are interested in loop local variables. 450 if (!Inst) 451 return false; 452 453 // Require the first definition to be a PHINode 454 if (!isa<PHINode>(Inst)) 455 return false; 456 457 LLVM_DEBUG(dbgs() << "\tisPredictable() FirstDef: " << *Inst << "\n"); 458 459 Q.push_back(Inst); 460 SeenValues.insert(FirstDef); 461 462 while (!Q.empty()) { 463 Instruction *Current = Q.front(); 464 Q.pop_front(); 465 466 if (auto *Phi = dyn_cast<PHINode>(Current)) { 467 for (Value *Incoming : Phi->incoming_values()) { 468 if (!isPredictableValue(Incoming, SeenValues)) 469 return false; 470 addInstToQueue(Incoming, Q, SeenValues); 471 } 472 LLVM_DEBUG(dbgs() << "\tisPredictable() phi: " << *Phi << "\n"); 473 } else if (SelectInst *SelI = dyn_cast<SelectInst>(Current)) { 474 if (!isValidSelectInst(SelI)) 475 return false; 476 if (!isPredictableValue(SelI->getTrueValue(), SeenValues) || 477 !isPredictableValue(SelI->getFalseValue(), SeenValues)) { 478 return false; 479 } 480 addInstToQueue(SelI->getTrueValue(), Q, SeenValues); 481 addInstToQueue(SelI->getFalseValue(), Q, SeenValues); 482 LLVM_DEBUG(dbgs() << "\tisPredictable() select: " << *SelI << "\n"); 483 if (auto *SelIUse = dyn_cast<PHINode>(SelI->user_back())) 484 SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse)); 485 } else { 486 // If it is neither a phi nor a select, then we give up. 487 return false; 488 } 489 } 490 491 return true; 492 } 493 494 bool isPredictableValue(Value *InpVal, SmallSet<Value *, 16> &SeenValues) { 495 if (SeenValues.find(InpVal) != SeenValues.end()) 496 return true; 497 498 if (isa<ConstantInt>(InpVal)) 499 return true; 500 501 // If this is a function argument or another non-instruction, then give up. 502 if (!isa<Instruction>(InpVal)) 503 return false; 504 505 return true; 506 } 507 508 void addInstToQueue(Value *Val, std::deque<Instruction *> &Q, 509 SmallSet<Value *, 16> &SeenValues) { 510 if (SeenValues.find(Val) != SeenValues.end()) 511 return; 512 if (Instruction *I = dyn_cast<Instruction>(Val)) 513 Q.push_back(I); 514 SeenValues.insert(Val); 515 } 516 517 bool isValidSelectInst(SelectInst *SI) { 518 if (!SI->hasOneUse()) 519 return false; 520 521 Instruction *SIUse = dyn_cast<Instruction>(SI->user_back()); 522 // The use of the select inst should be either a phi or another select. 523 if (!SIUse && !(isa<PHINode>(SIUse) || isa<SelectInst>(SIUse))) 524 return false; 525 526 BasicBlock *SIBB = SI->getParent(); 527 528 // Currently, we can only expand select instructions in basic blocks with 529 // one successor. 530 BranchInst *SITerm = dyn_cast<BranchInst>(SIBB->getTerminator()); 531 if (!SITerm || !SITerm->isUnconditional()) 532 return false; 533 534 if (isa<PHINode>(SIUse) && 535 SIBB->getSingleSuccessor() != dyn_cast<Instruction>(SIUse)->getParent()) 536 return false; 537 538 // If select will not be sunk during unfolding, and it is in the same basic 539 // block as another state defining select, then cannot unfold both. 540 for (SelectInstToUnfold SIToUnfold : SelectInsts) { 541 SelectInst *PrevSI = SIToUnfold.getInst(); 542 if (PrevSI->getTrueValue() != SI && PrevSI->getFalseValue() != SI && 543 PrevSI->getParent() == SI->getParent()) 544 return false; 545 } 546 547 return true; 548 } 549 550 SwitchInst *Instr = nullptr; 551 SmallVector<SelectInstToUnfold, 4> SelectInsts; 552 }; 553 554 struct AllSwitchPaths { 555 AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE) 556 : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), 557 ORE(ORE) {} 558 559 std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; } 560 unsigned getNumThreadingPaths() { return TPaths.size(); } 561 SwitchInst *getSwitchInst() { return Switch; } 562 BasicBlock *getSwitchBlock() { return SwitchBlock; } 563 564 void run() { 565 VisitedBlocks Visited; 566 PathsType LoopPaths = paths(SwitchBlock, Visited, /* PathDepth = */ 1); 567 StateDefMap StateDef = getStateDefMap(); 568 569 for (PathType Path : LoopPaths) { 570 ThreadingPath TPath; 571 572 const BasicBlock *PrevBB = Path.back(); 573 for (const BasicBlock *BB : Path) { 574 if (StateDef.count(BB) != 0) { 575 const PHINode *Phi = dyn_cast<PHINode>(StateDef[BB]); 576 assert(Phi && "Expected a state-defining instr to be a phi node."); 577 578 const Value *V = Phi->getIncomingValueForBlock(PrevBB); 579 if (const ConstantInt *C = dyn_cast<const ConstantInt>(V)) { 580 TPath.setExitValue(C); 581 TPath.setDeterminator(BB); 582 TPath.setPath(Path); 583 } 584 } 585 586 // Switch block is the determinator, this is the final exit value. 587 if (TPath.isExitValueSet() && BB == Path.front()) 588 break; 589 590 PrevBB = BB; 591 } 592 593 if (TPath.isExitValueSet()) 594 TPaths.push_back(TPath); 595 } 596 } 597 598 private: 599 // Value: an instruction that defines a switch state; 600 // Key: the parent basic block of that instruction. 601 typedef DenseMap<const BasicBlock *, const PHINode *> StateDefMap; 602 603 PathsType paths(BasicBlock *BB, VisitedBlocks &Visited, 604 unsigned PathDepth) const { 605 PathsType Res; 606 607 // Stop exploring paths after visiting MaxPathLength blocks 608 if (PathDepth > MaxPathLength) { 609 ORE->emit([&]() { 610 return OptimizationRemarkAnalysis(DEBUG_TYPE, "MaxPathLengthReached", 611 Switch) 612 << "Exploration stopped after visiting MaxPathLength=" 613 << ore::NV("MaxPathLength", MaxPathLength) << " blocks."; 614 }); 615 return Res; 616 } 617 618 Visited.insert(BB); 619 620 // Some blocks have multiple edges to the same successor, and this set 621 // is used to prevent a duplicate path from being generated 622 SmallSet<BasicBlock *, 4> Successors; 623 for (BasicBlock *Succ : successors(BB)) { 624 if (!Successors.insert(Succ).second) 625 continue; 626 627 // Found a cycle through the SwitchBlock 628 if (Succ == SwitchBlock) { 629 Res.push_back({BB}); 630 continue; 631 } 632 633 // We have encountered a cycle, do not get caught in it 634 if (Visited.contains(Succ)) 635 continue; 636 637 PathsType SuccPaths = paths(Succ, Visited, PathDepth + 1); 638 for (PathType Path : SuccPaths) { 639 PathType NewPath(Path); 640 NewPath.push_front(BB); 641 Res.push_back(NewPath); 642 } 643 } 644 // This block could now be visited again from a different predecessor. Note 645 // that this will result in exponential runtime. Subpaths could possibly be 646 // cached but it takes a lot of memory to store them. 647 Visited.erase(BB); 648 return Res; 649 } 650 651 /// Walk the use-def chain and collect all the state-defining instructions. 652 StateDefMap getStateDefMap() const { 653 StateDefMap Res; 654 655 Value *FirstDef = Switch->getOperand(0); 656 657 assert(isa<PHINode>(FirstDef) && "After select unfolding, all state " 658 "definitions are expected to be phi " 659 "nodes."); 660 661 SmallVector<PHINode *, 8> Stack; 662 Stack.push_back(dyn_cast<PHINode>(FirstDef)); 663 SmallSet<Value *, 16> SeenValues; 664 665 while (!Stack.empty()) { 666 PHINode *CurPhi = Stack.back(); 667 Stack.pop_back(); 668 669 Res[CurPhi->getParent()] = CurPhi; 670 SeenValues.insert(CurPhi); 671 672 for (Value *Incoming : CurPhi->incoming_values()) { 673 if (Incoming == FirstDef || isa<ConstantInt>(Incoming) || 674 SeenValues.find(Incoming) != SeenValues.end()) { 675 continue; 676 } 677 678 assert(isa<PHINode>(Incoming) && "After select unfolding, all state " 679 "definitions are expected to be phi " 680 "nodes."); 681 682 Stack.push_back(cast<PHINode>(Incoming)); 683 } 684 } 685 686 return Res; 687 } 688 689 SwitchInst *Switch; 690 BasicBlock *SwitchBlock; 691 OptimizationRemarkEmitter *ORE; 692 std::vector<ThreadingPath> TPaths; 693 }; 694 695 struct TransformDFA { 696 TransformDFA(AllSwitchPaths *SwitchPaths, DominatorTree *DT, 697 AssumptionCache *AC, TargetTransformInfo *TTI, 698 OptimizationRemarkEmitter *ORE, 699 SmallPtrSet<const Value *, 32> EphValues) 700 : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE), 701 EphValues(EphValues) {} 702 703 void run() { 704 if (isLegalAndProfitableToTransform()) { 705 createAllExitPaths(); 706 NumTransforms++; 707 } 708 } 709 710 private: 711 /// This function performs both a legality check and profitability check at 712 /// the same time since it is convenient to do so. It iterates through all 713 /// blocks that will be cloned, and keeps track of the duplication cost. It 714 /// also returns false if it is illegal to clone some required block. 715 bool isLegalAndProfitableToTransform() { 716 CodeMetrics Metrics; 717 SwitchInst *Switch = SwitchPaths->getSwitchInst(); 718 719 // Note that DuplicateBlockMap is not being used as intended here. It is 720 // just being used to ensure (BB, State) pairs are only counted once. 721 DuplicateBlockMap DuplicateMap; 722 723 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 724 PathType PathBBs = TPath.getPath(); 725 uint64_t NextState = TPath.getExitValue(); 726 const BasicBlock *Determinator = TPath.getDeterminatorBB(); 727 728 // Update Metrics for the Switch block, this is always cloned 729 BasicBlock *BB = SwitchPaths->getSwitchBlock(); 730 BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap); 731 if (!VisitedBB) { 732 Metrics.analyzeBasicBlock(BB, *TTI, EphValues); 733 DuplicateMap[BB].push_back({BB, NextState}); 734 } 735 736 // If the Switch block is the Determinator, then we can continue since 737 // this is the only block that is cloned and we already counted for it. 738 if (PathBBs.front() == Determinator) 739 continue; 740 741 // Otherwise update Metrics for all blocks that will be cloned. If any 742 // block is already cloned and would be reused, don't double count it. 743 auto DetIt = std::find(PathBBs.begin(), PathBBs.end(), Determinator); 744 for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) { 745 BB = *BBIt; 746 VisitedBB = getClonedBB(BB, NextState, DuplicateMap); 747 if (VisitedBB) 748 continue; 749 Metrics.analyzeBasicBlock(BB, *TTI, EphValues); 750 DuplicateMap[BB].push_back({BB, NextState}); 751 } 752 753 if (Metrics.notDuplicatable) { 754 LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " 755 << "non-duplicatable instructions.\n"); 756 ORE->emit([&]() { 757 return OptimizationRemarkMissed(DEBUG_TYPE, "NonDuplicatableInst", 758 Switch) 759 << "Contains non-duplicatable instructions."; 760 }); 761 return false; 762 } 763 764 if (Metrics.convergent) { 765 LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " 766 << "convergent instructions.\n"); 767 ORE->emit([&]() { 768 return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch) 769 << "Contains convergent instructions."; 770 }); 771 return false; 772 } 773 } 774 775 unsigned DuplicationCost = 0; 776 777 unsigned JumpTableSize = 0; 778 TTI->getEstimatedNumberOfCaseClusters(*Switch, JumpTableSize, nullptr, 779 nullptr); 780 if (JumpTableSize == 0) { 781 // Factor in the number of conditional branches reduced from jump 782 // threading. Assume that lowering the switch block is implemented by 783 // using binary search, hence the LogBase2(). 784 unsigned CondBranches = 785 APInt(32, Switch->getNumSuccessors()).ceilLogBase2(); 786 DuplicationCost = Metrics.NumInsts / CondBranches; 787 } else { 788 // Compared with jump tables, the DFA optimizer removes an indirect branch 789 // on each loop iteration, thus making branch prediction more precise. The 790 // more branch targets there are, the more likely it is for the branch 791 // predictor to make a mistake, and the more benefit there is in the DFA 792 // optimizer. Thus, the more branch targets there are, the lower is the 793 // cost of the DFA opt. 794 DuplicationCost = Metrics.NumInsts / JumpTableSize; 795 } 796 797 LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block " 798 << SwitchPaths->getSwitchBlock()->getName() 799 << " is: " << DuplicationCost << "\n\n"); 800 801 if (DuplicationCost > CostThreshold) { 802 LLVM_DEBUG(dbgs() << "Not jump threading, duplication cost exceeds the " 803 << "cost threshold.\n"); 804 ORE->emit([&]() { 805 return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch) 806 << "Duplication cost exceeds the cost threshold (cost=" 807 << ore::NV("Cost", DuplicationCost) 808 << ", threshold=" << ore::NV("Threshold", CostThreshold) << ")."; 809 }); 810 return false; 811 } 812 813 ORE->emit([&]() { 814 return OptimizationRemark(DEBUG_TYPE, "JumpThreaded", Switch) 815 << "Switch statement jump-threaded."; 816 }); 817 818 return true; 819 } 820 821 /// Transform each threading path to effectively jump thread the DFA. 822 void createAllExitPaths() { 823 DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager); 824 825 // Move the switch block to the end of the path, since it will be duplicated 826 BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock(); 827 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 828 LLVM_DEBUG(dbgs() << TPath << "\n"); 829 PathType NewPath(TPath.getPath()); 830 NewPath.push_back(SwitchBlock); 831 TPath.setPath(NewPath); 832 } 833 834 // Transform the ThreadingPaths and keep track of the cloned values 835 DuplicateBlockMap DuplicateMap; 836 DefMap NewDefs; 837 838 SmallSet<BasicBlock *, 16> BlocksToClean; 839 for (BasicBlock *BB : successors(SwitchBlock)) 840 BlocksToClean.insert(BB); 841 842 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 843 createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU); 844 NumPaths++; 845 } 846 847 // After all paths are cloned, now update the last successor of the cloned 848 // path so it skips over the switch statement 849 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) 850 updateLastSuccessor(TPath, DuplicateMap, &DTU); 851 852 // For each instruction that was cloned and used outside, update its uses 853 updateSSA(NewDefs); 854 855 // Clean PHI Nodes for the newly created blocks 856 for (BasicBlock *BB : BlocksToClean) 857 cleanPhiNodes(BB); 858 } 859 860 /// For a specific ThreadingPath \p Path, create an exit path starting from 861 /// the determinator block. 862 /// 863 /// To remember the correct destination, we have to duplicate blocks 864 /// corresponding to each state. Also update the terminating instruction of 865 /// the predecessors, and phis in the successor blocks. 866 void createExitPath(DefMap &NewDefs, ThreadingPath &Path, 867 DuplicateBlockMap &DuplicateMap, 868 SmallSet<BasicBlock *, 16> &BlocksToClean, 869 DomTreeUpdater *DTU) { 870 uint64_t NextState = Path.getExitValue(); 871 const BasicBlock *Determinator = Path.getDeterminatorBB(); 872 PathType PathBBs = Path.getPath(); 873 874 // Don't select the placeholder block in front 875 if (PathBBs.front() == Determinator) 876 PathBBs.pop_front(); 877 878 auto DetIt = std::find(PathBBs.begin(), PathBBs.end(), Determinator); 879 auto Prev = std::prev(DetIt); 880 BasicBlock *PrevBB = *Prev; 881 for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) { 882 BasicBlock *BB = *BBIt; 883 BlocksToClean.insert(BB); 884 885 // We already cloned BB for this NextState, now just update the branch 886 // and continue. 887 BasicBlock *NextBB = getClonedBB(BB, NextState, DuplicateMap); 888 if (NextBB) { 889 updatePredecessor(PrevBB, BB, NextBB, DTU); 890 PrevBB = NextBB; 891 continue; 892 } 893 894 // Clone the BB and update the successor of Prev to jump to the new block 895 BasicBlock *NewBB = cloneBlockAndUpdatePredecessor( 896 BB, PrevBB, NextState, DuplicateMap, NewDefs, DTU); 897 DuplicateMap[BB].push_back({NewBB, NextState}); 898 BlocksToClean.insert(NewBB); 899 PrevBB = NewBB; 900 } 901 } 902 903 /// Restore SSA form after cloning blocks. 904 /// 905 /// Each cloned block creates new defs for a variable, and the uses need to be 906 /// updated to reflect this. The uses may be replaced with a cloned value, or 907 /// some derived phi instruction. Note that all uses of a value defined in the 908 /// same block were already remapped when cloning the block. 909 void updateSSA(DefMap &NewDefs) { 910 SSAUpdaterBulk SSAUpdate; 911 SmallVector<Use *, 16> UsesToRename; 912 913 for (auto KV : NewDefs) { 914 Instruction *I = KV.first; 915 BasicBlock *BB = I->getParent(); 916 std::vector<Instruction *> Cloned = KV.second; 917 918 // Scan all uses of this instruction to see if it is used outside of its 919 // block, and if so, record them in UsesToRename. 920 for (Use &U : I->uses()) { 921 Instruction *User = cast<Instruction>(U.getUser()); 922 if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 923 if (UserPN->getIncomingBlock(U) == BB) 924 continue; 925 } else if (User->getParent() == BB) { 926 continue; 927 } 928 929 UsesToRename.push_back(&U); 930 } 931 932 // If there are no uses outside the block, we're done with this 933 // instruction. 934 if (UsesToRename.empty()) 935 continue; 936 LLVM_DEBUG(dbgs() << "DFA-JT: Renaming non-local uses of: " << *I 937 << "\n"); 938 939 // We found a use of I outside of BB. Rename all uses of I that are 940 // outside its block to be uses of the appropriate PHI node etc. See 941 // ValuesInBlocks with the values we know. 942 unsigned VarNum = SSAUpdate.AddVariable(I->getName(), I->getType()); 943 SSAUpdate.AddAvailableValue(VarNum, BB, I); 944 for (Instruction *New : Cloned) 945 SSAUpdate.AddAvailableValue(VarNum, New->getParent(), New); 946 947 while (!UsesToRename.empty()) 948 SSAUpdate.AddUse(VarNum, UsesToRename.pop_back_val()); 949 950 LLVM_DEBUG(dbgs() << "\n"); 951 } 952 // SSAUpdater handles phi placement and renaming uses with the appropriate 953 // value. 954 SSAUpdate.RewriteAllUses(DT); 955 } 956 957 /// Clones a basic block, and adds it to the CFG. 958 /// 959 /// This function also includes updating phi nodes in the successors of the 960 /// BB, and remapping uses that were defined locally in the cloned BB. 961 BasicBlock *cloneBlockAndUpdatePredecessor(BasicBlock *BB, BasicBlock *PrevBB, 962 uint64_t NextState, 963 DuplicateBlockMap &DuplicateMap, 964 DefMap &NewDefs, 965 DomTreeUpdater *DTU) { 966 ValueToValueMapTy VMap; 967 BasicBlock *NewBB = CloneBasicBlock( 968 BB, VMap, ".jt" + std::to_string(NextState), BB->getParent()); 969 NewBB->moveAfter(BB); 970 NumCloned++; 971 972 for (Instruction &I : *NewBB) { 973 // Do not remap operands of PHINode in case a definition in BB is an 974 // incoming value to a phi in the same block. This incoming value will 975 // be renamed later while restoring SSA. 976 if (isa<PHINode>(&I)) 977 continue; 978 RemapInstruction(&I, VMap, 979 RF_IgnoreMissingLocals | RF_NoModuleLevelChanges); 980 if (AssumeInst *II = dyn_cast<AssumeInst>(&I)) 981 AC->registerAssumption(II); 982 } 983 984 updateSuccessorPhis(BB, NewBB, NextState, VMap, DuplicateMap); 985 updatePredecessor(PrevBB, BB, NewBB, DTU); 986 updateDefMap(NewDefs, VMap); 987 988 // Add all successors to the DominatorTree 989 SmallPtrSet<BasicBlock *, 4> SuccSet; 990 for (auto *SuccBB : successors(NewBB)) { 991 if (SuccSet.insert(SuccBB).second) 992 DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}}); 993 } 994 SuccSet.clear(); 995 return NewBB; 996 } 997 998 /// Update the phi nodes in BB's successors. 999 /// 1000 /// This means creating a new incoming value from NewBB with the new 1001 /// instruction wherever there is an incoming value from BB. 1002 void updateSuccessorPhis(BasicBlock *BB, BasicBlock *ClonedBB, 1003 uint64_t NextState, ValueToValueMapTy &VMap, 1004 DuplicateBlockMap &DuplicateMap) { 1005 std::vector<BasicBlock *> BlocksToUpdate; 1006 1007 // If BB is the last block in the path, we can simply update the one case 1008 // successor that will be reached. 1009 if (BB == SwitchPaths->getSwitchBlock()) { 1010 SwitchInst *Switch = SwitchPaths->getSwitchInst(); 1011 BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState); 1012 BlocksToUpdate.push_back(NextCase); 1013 BasicBlock *ClonedSucc = getClonedBB(NextCase, NextState, DuplicateMap); 1014 if (ClonedSucc) 1015 BlocksToUpdate.push_back(ClonedSucc); 1016 } 1017 // Otherwise update phis in all successors. 1018 else { 1019 for (BasicBlock *Succ : successors(BB)) { 1020 BlocksToUpdate.push_back(Succ); 1021 1022 // Check if a successor has already been cloned for the particular exit 1023 // value. In this case if a successor was already cloned, the phi nodes 1024 // in the cloned block should be updated directly. 1025 BasicBlock *ClonedSucc = getClonedBB(Succ, NextState, DuplicateMap); 1026 if (ClonedSucc) 1027 BlocksToUpdate.push_back(ClonedSucc); 1028 } 1029 } 1030 1031 // If there is a phi with an incoming value from BB, create a new incoming 1032 // value for the new predecessor ClonedBB. The value will either be the same 1033 // value from BB or a cloned value. 1034 for (BasicBlock *Succ : BlocksToUpdate) { 1035 for (auto II = Succ->begin(); PHINode *Phi = dyn_cast<PHINode>(II); 1036 ++II) { 1037 Value *Incoming = Phi->getIncomingValueForBlock(BB); 1038 if (Incoming) { 1039 if (isa<Constant>(Incoming)) { 1040 Phi->addIncoming(Incoming, ClonedBB); 1041 continue; 1042 } 1043 Value *ClonedVal = VMap[Incoming]; 1044 if (ClonedVal) 1045 Phi->addIncoming(ClonedVal, ClonedBB); 1046 else 1047 Phi->addIncoming(Incoming, ClonedBB); 1048 } 1049 } 1050 } 1051 } 1052 1053 /// Sets the successor of PrevBB to be NewBB instead of OldBB. Note that all 1054 /// other successors are kept as well. 1055 void updatePredecessor(BasicBlock *PrevBB, BasicBlock *OldBB, 1056 BasicBlock *NewBB, DomTreeUpdater *DTU) { 1057 // When a path is reused, there is a chance that predecessors were already 1058 // updated before. Check if the predecessor needs to be updated first. 1059 if (!isPredecessor(OldBB, PrevBB)) 1060 return; 1061 1062 Instruction *PrevTerm = PrevBB->getTerminator(); 1063 for (unsigned Idx = 0; Idx < PrevTerm->getNumSuccessors(); Idx++) { 1064 if (PrevTerm->getSuccessor(Idx) == OldBB) { 1065 OldBB->removePredecessor(PrevBB, /* KeepOneInputPHIs = */ true); 1066 PrevTerm->setSuccessor(Idx, NewBB); 1067 } 1068 } 1069 DTU->applyUpdates({{DominatorTree::Delete, PrevBB, OldBB}, 1070 {DominatorTree::Insert, PrevBB, NewBB}}); 1071 } 1072 1073 /// Add new value mappings to the DefMap to keep track of all new definitions 1074 /// for a particular instruction. These will be used while updating SSA form. 1075 void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) { 1076 for (auto Entry : VMap) { 1077 Instruction *Inst = 1078 dyn_cast<Instruction>(const_cast<Value *>(Entry.first)); 1079 if (!Inst || !Entry.second || isa<BranchInst>(Inst) || 1080 isa<SwitchInst>(Inst)) { 1081 continue; 1082 } 1083 1084 Instruction *Cloned = dyn_cast<Instruction>(Entry.second); 1085 if (!Cloned) 1086 continue; 1087 1088 if (NewDefs.find(Inst) == NewDefs.end()) 1089 NewDefs[Inst] = {Cloned}; 1090 else 1091 NewDefs[Inst].push_back(Cloned); 1092 } 1093 } 1094 1095 /// Update the last branch of a particular cloned path to point to the correct 1096 /// case successor. 1097 /// 1098 /// Note that this is an optional step and would have been done in later 1099 /// optimizations, but it makes the CFG significantly easier to work with. 1100 void updateLastSuccessor(ThreadingPath &TPath, 1101 DuplicateBlockMap &DuplicateMap, 1102 DomTreeUpdater *DTU) { 1103 uint64_t NextState = TPath.getExitValue(); 1104 BasicBlock *BB = TPath.getPath().back(); 1105 BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap); 1106 1107 // Note multiple paths can end at the same block so check that it is not 1108 // updated yet 1109 if (!isa<SwitchInst>(LastBlock->getTerminator())) 1110 return; 1111 SwitchInst *Switch = cast<SwitchInst>(LastBlock->getTerminator()); 1112 BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState); 1113 1114 std::vector<DominatorTree::UpdateType> DTUpdates; 1115 SmallPtrSet<BasicBlock *, 4> SuccSet; 1116 for (BasicBlock *Succ : successors(LastBlock)) { 1117 if (Succ != NextCase && SuccSet.insert(Succ).second) 1118 DTUpdates.push_back({DominatorTree::Delete, LastBlock, Succ}); 1119 } 1120 1121 Switch->eraseFromParent(); 1122 BranchInst::Create(NextCase, LastBlock); 1123 1124 DTU->applyUpdates(DTUpdates); 1125 } 1126 1127 /// After cloning blocks, some of the phi nodes have extra incoming values 1128 /// that are no longer used. This function removes them. 1129 void cleanPhiNodes(BasicBlock *BB) { 1130 // If BB is no longer reachable, remove any remaining phi nodes 1131 if (pred_empty(BB)) { 1132 std::vector<PHINode *> PhiToRemove; 1133 for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) { 1134 PhiToRemove.push_back(Phi); 1135 } 1136 for (PHINode *PN : PhiToRemove) { 1137 PN->replaceAllUsesWith(UndefValue::get(PN->getType())); 1138 PN->eraseFromParent(); 1139 } 1140 return; 1141 } 1142 1143 // Remove any incoming values that come from an invalid predecessor 1144 for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) { 1145 std::vector<BasicBlock *> BlocksToRemove; 1146 for (BasicBlock *IncomingBB : Phi->blocks()) { 1147 if (!isPredecessor(BB, IncomingBB)) 1148 BlocksToRemove.push_back(IncomingBB); 1149 } 1150 for (BasicBlock *BB : BlocksToRemove) 1151 Phi->removeIncomingValue(BB); 1152 } 1153 } 1154 1155 /// Checks if BB was already cloned for a particular next state value. If it 1156 /// was then it returns this cloned block, and otherwise null. 1157 BasicBlock *getClonedBB(BasicBlock *BB, uint64_t NextState, 1158 DuplicateBlockMap &DuplicateMap) { 1159 CloneList ClonedBBs = DuplicateMap[BB]; 1160 1161 // Find an entry in the CloneList with this NextState. If it exists then 1162 // return the corresponding BB 1163 auto It = llvm::find_if(ClonedBBs, [NextState](const ClonedBlock &C) { 1164 return C.State == NextState; 1165 }); 1166 return It != ClonedBBs.end() ? (*It).BB : nullptr; 1167 } 1168 1169 /// Helper to get the successor corresponding to a particular case value for 1170 /// a switch statement. 1171 BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, uint64_t NextState) { 1172 BasicBlock *NextCase = nullptr; 1173 for (auto Case : Switch->cases()) { 1174 if (Case.getCaseValue()->getZExtValue() == NextState) { 1175 NextCase = Case.getCaseSuccessor(); 1176 break; 1177 } 1178 } 1179 if (!NextCase) 1180 NextCase = Switch->getDefaultDest(); 1181 return NextCase; 1182 } 1183 1184 /// Returns true if IncomingBB is a predecessor of BB. 1185 bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) { 1186 return llvm::find(predecessors(BB), IncomingBB) != pred_end(BB); 1187 } 1188 1189 AllSwitchPaths *SwitchPaths; 1190 DominatorTree *DT; 1191 AssumptionCache *AC; 1192 TargetTransformInfo *TTI; 1193 OptimizationRemarkEmitter *ORE; 1194 SmallPtrSet<const Value *, 32> EphValues; 1195 std::vector<ThreadingPath> TPaths; 1196 }; 1197 1198 bool DFAJumpThreading::run(Function &F) { 1199 LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n"); 1200 1201 if (F.hasOptSize()) { 1202 LLVM_DEBUG(dbgs() << "Skipping due to the 'minsize' attribute\n"); 1203 return false; 1204 } 1205 1206 if (ClViewCfgBefore) 1207 F.viewCFG(); 1208 1209 SmallVector<AllSwitchPaths, 2> ThreadableLoops; 1210 bool MadeChanges = false; 1211 1212 for (BasicBlock &BB : F) { 1213 auto *SI = dyn_cast<SwitchInst>(BB.getTerminator()); 1214 if (!SI) 1215 continue; 1216 1217 LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName() 1218 << " is predictable\n"); 1219 MainSwitch Switch(SI, ORE); 1220 1221 if (!Switch.getInstr()) 1222 continue; 1223 1224 LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is a " 1225 << "candidate for jump threading\n"); 1226 LLVM_DEBUG(SI->dump()); 1227 1228 unfoldSelectInstrs(DT, Switch.getSelectInsts()); 1229 if (!Switch.getSelectInsts().empty()) 1230 MadeChanges = true; 1231 1232 AllSwitchPaths SwitchPaths(&Switch, ORE); 1233 SwitchPaths.run(); 1234 1235 if (SwitchPaths.getNumThreadingPaths() > 0) { 1236 ThreadableLoops.push_back(SwitchPaths); 1237 1238 // For the time being limit this optimization to occurring once in a 1239 // function since it can change the CFG significantly. This is not a 1240 // strict requirement but it can cause buggy behavior if there is an 1241 // overlap of blocks in different opportunities. There is a lot of room to 1242 // experiment with catching more opportunities here. 1243 break; 1244 } 1245 } 1246 1247 SmallPtrSet<const Value *, 32> EphValues; 1248 if (ThreadableLoops.size() > 0) 1249 CodeMetrics::collectEphemeralValues(&F, AC, EphValues); 1250 1251 for (AllSwitchPaths SwitchPaths : ThreadableLoops) { 1252 TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues); 1253 Transform.run(); 1254 MadeChanges = true; 1255 } 1256 1257 #ifdef EXPENSIVE_CHECKS 1258 assert(DT->verify(DominatorTree::VerificationLevel::Full)); 1259 verifyFunction(F, &dbgs()); 1260 #endif 1261 1262 return MadeChanges; 1263 } 1264 1265 } // end anonymous namespace 1266 1267 /// Integrate with the new Pass Manager 1268 PreservedAnalyses DFAJumpThreadingPass::run(Function &F, 1269 FunctionAnalysisManager &AM) { 1270 AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F); 1271 DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F); 1272 TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F); 1273 OptimizationRemarkEmitter ORE(&F); 1274 1275 if (!DFAJumpThreading(&AC, &DT, &TTI, &ORE).run(F)) 1276 return PreservedAnalyses::all(); 1277 1278 PreservedAnalyses PA; 1279 PA.preserve<DominatorTreeAnalysis>(); 1280 return PA; 1281 } 1282