1 //===- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass transforms loops that contain branches on loop-invariant conditions 10 // to multiple loops. For example, it turns the left into the right code: 11 // 12 // for (...) if (lic) 13 // A for (...) 14 // if (lic) A; B; C 15 // B else 16 // C for (...) 17 // A; C 18 // 19 // This can increase the size of the code exponentially (doubling it every time 20 // a loop is unswitched) so we only unswitch if the resultant code will be 21 // smaller than a threshold. 22 // 23 // This pass expects LICM to be run before it to hoist invariant conditions out 24 // of the loop, to make the unswitching opportunity obvious. 25 // 26 //===----------------------------------------------------------------------===// 27 28 #include "llvm/ADT/DenseMap.h" 29 #include "llvm/ADT/SmallPtrSet.h" 30 #include "llvm/ADT/SmallVector.h" 31 #include "llvm/ADT/Statistic.h" 32 #include "llvm/Analysis/AssumptionCache.h" 33 #include "llvm/Analysis/CodeMetrics.h" 34 #include "llvm/Analysis/InstructionSimplify.h" 35 #include "llvm/Analysis/LazyBlockFrequencyInfo.h" 36 #include "llvm/Analysis/LegacyDivergenceAnalysis.h" 37 #include "llvm/Analysis/LoopInfo.h" 38 #include "llvm/Analysis/LoopIterator.h" 39 #include "llvm/Analysis/LoopPass.h" 40 #include "llvm/Analysis/MemorySSA.h" 41 #include "llvm/Analysis/MemorySSAUpdater.h" 42 #include "llvm/Analysis/MustExecute.h" 43 #include "llvm/Analysis/ScalarEvolution.h" 44 #include "llvm/Analysis/TargetTransformInfo.h" 45 #include "llvm/IR/Attributes.h" 46 #include "llvm/IR/BasicBlock.h" 47 #include "llvm/IR/Constant.h" 48 #include "llvm/IR/Constants.h" 49 #include "llvm/IR/DerivedTypes.h" 50 #include "llvm/IR/Dominators.h" 51 #include "llvm/IR/Function.h" 52 #include "llvm/IR/IRBuilder.h" 53 #include "llvm/IR/InstrTypes.h" 54 #include "llvm/IR/Instruction.h" 55 #include "llvm/IR/Instructions.h" 56 #include "llvm/IR/IntrinsicInst.h" 57 #include "llvm/IR/Intrinsics.h" 58 #include "llvm/IR/Module.h" 59 #include "llvm/IR/Type.h" 60 #include "llvm/IR/User.h" 61 #include "llvm/IR/Value.h" 62 #include "llvm/IR/ValueHandle.h" 63 #include "llvm/InitializePasses.h" 64 #include "llvm/Pass.h" 65 #include "llvm/Support/Casting.h" 66 #include "llvm/Support/CommandLine.h" 67 #include "llvm/Support/Debug.h" 68 #include "llvm/Support/raw_ostream.h" 69 #include "llvm/Transforms/Scalar.h" 70 #include "llvm/Transforms/Scalar/LoopPassManager.h" 71 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 72 #include "llvm/Transforms/Utils/Cloning.h" 73 #include "llvm/Transforms/Utils/Local.h" 74 #include "llvm/Transforms/Utils/LoopUtils.h" 75 #include "llvm/Transforms/Utils/ValueMapper.h" 76 #include <algorithm> 77 #include <cassert> 78 #include <map> 79 #include <set> 80 #include <tuple> 81 #include <utility> 82 #include <vector> 83 84 using namespace llvm; 85 86 #define DEBUG_TYPE "loop-unswitch" 87 88 STATISTIC(NumBranches, "Number of branches unswitched"); 89 STATISTIC(NumSwitches, "Number of switches unswitched"); 90 STATISTIC(NumGuards, "Number of guards unswitched"); 91 STATISTIC(NumSelects , "Number of selects unswitched"); 92 STATISTIC(NumTrivial , "Number of unswitches that are trivial"); 93 STATISTIC(NumSimplify, "Number of simplifications of unswitched code"); 94 STATISTIC(TotalInsts, "Total number of instructions analyzed"); 95 96 // The specific value of 100 here was chosen based only on intuition and a 97 // few specific examples. 98 static cl::opt<unsigned> 99 Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), 100 cl::init(100), cl::Hidden); 101 102 static cl::opt<unsigned> 103 MSSAThreshold("loop-unswitch-memoryssa-threshold", 104 cl::desc("Max number of memory uses to explore during " 105 "partial unswitching analysis"), 106 cl::init(100), cl::Hidden); 107 108 namespace { 109 110 class LUAnalysisCache { 111 using UnswitchedValsMap = 112 DenseMap<const SwitchInst *, SmallPtrSet<const Value *, 8>>; 113 using UnswitchedValsIt = UnswitchedValsMap::iterator; 114 115 struct LoopProperties { 116 unsigned CanBeUnswitchedCount; 117 unsigned WasUnswitchedCount; 118 unsigned SizeEstimation; 119 UnswitchedValsMap UnswitchedVals; 120 }; 121 122 // Here we use std::map instead of DenseMap, since we need to keep valid 123 // LoopProperties pointer for current loop for better performance. 124 using LoopPropsMap = std::map<const Loop *, LoopProperties>; 125 using LoopPropsMapIt = LoopPropsMap::iterator; 126 127 LoopPropsMap LoopsProperties; 128 UnswitchedValsMap *CurLoopInstructions = nullptr; 129 LoopProperties *CurrentLoopProperties = nullptr; 130 131 // A loop unswitching with an estimated cost above this threshold 132 // is not performed. MaxSize is turned into unswitching quota for 133 // the current loop, and reduced correspondingly, though note that 134 // the quota is returned by releaseMemory() when the loop has been 135 // processed, so that MaxSize will return to its previous 136 // value. So in most cases MaxSize will equal the Threshold flag 137 // when a new loop is processed. An exception to that is that 138 // MaxSize will have a smaller value while processing nested loops 139 // that were introduced due to loop unswitching of an outer loop. 140 // 141 // FIXME: The way that MaxSize works is subtle and depends on the 142 // pass manager processing loops and calling releaseMemory() in a 143 // specific order. It would be good to find a more straightforward 144 // way of doing what MaxSize does. 145 unsigned MaxSize; 146 147 public: 148 LUAnalysisCache() : MaxSize(Threshold) {} 149 150 // Analyze loop. Check its size, calculate is it possible to unswitch 151 // it. Returns true if we can unswitch this loop. 152 bool countLoop(const Loop *L, const TargetTransformInfo &TTI, 153 AssumptionCache *AC); 154 155 // Clean all data related to given loop. 156 void forgetLoop(const Loop *L); 157 158 // Mark case value as unswitched. 159 // Since SI instruction can be partly unswitched, in order to avoid 160 // extra unswitching in cloned loops keep track all unswitched values. 161 void setUnswitched(const SwitchInst *SI, const Value *V); 162 163 // Check was this case value unswitched before or not. 164 bool isUnswitched(const SwitchInst *SI, const Value *V); 165 166 // Returns true if another unswitching could be done within the cost 167 // threshold. 168 bool costAllowsUnswitching(); 169 170 // Clone all loop-unswitch related loop properties. 171 // Redistribute unswitching quotas. 172 // Note, that new loop data is stored inside the VMap. 173 void cloneData(const Loop *NewLoop, const Loop *OldLoop, 174 const ValueToValueMapTy &VMap); 175 }; 176 177 class LoopUnswitch : public LoopPass { 178 LoopInfo *LI; // Loop information 179 LPPassManager *LPM; 180 AssumptionCache *AC; 181 182 // Used to check if second loop needs processing after 183 // rewriteLoopBodyWithConditionConstant rewrites first loop. 184 std::vector<Loop*> LoopProcessWorklist; 185 186 LUAnalysisCache BranchesInfo; 187 188 bool OptimizeForSize; 189 bool RedoLoop = false; 190 191 Loop *CurrentLoop = nullptr; 192 DominatorTree *DT = nullptr; 193 MemorySSA *MSSA = nullptr; 194 AAResults *AA = nullptr; 195 std::unique_ptr<MemorySSAUpdater> MSSAU; 196 BasicBlock *LoopHeader = nullptr; 197 BasicBlock *LoopPreheader = nullptr; 198 199 bool SanitizeMemory; 200 SimpleLoopSafetyInfo SafetyInfo; 201 202 // LoopBlocks contains all of the basic blocks of the loop, including the 203 // preheader of the loop, the body of the loop, and the exit blocks of the 204 // loop, in that order. 205 std::vector<BasicBlock*> LoopBlocks; 206 // NewBlocks contained cloned copy of basic blocks from LoopBlocks. 207 std::vector<BasicBlock*> NewBlocks; 208 209 bool HasBranchDivergence; 210 211 public: 212 static char ID; // Pass ID, replacement for typeid 213 214 explicit LoopUnswitch(bool Os = false, bool HasBranchDivergence = false) 215 : LoopPass(ID), OptimizeForSize(Os), 216 HasBranchDivergence(HasBranchDivergence) { 217 initializeLoopUnswitchPass(*PassRegistry::getPassRegistry()); 218 } 219 220 bool runOnLoop(Loop *L, LPPassManager &LPM) override; 221 bool processCurrentLoop(); 222 bool isUnreachableDueToPreviousUnswitching(BasicBlock *); 223 224 /// This transformation requires natural loop information & requires that 225 /// loop preheaders be inserted into the CFG. 226 /// 227 void getAnalysisUsage(AnalysisUsage &AU) const override { 228 // Lazy BFI and BPI are marked as preserved here so Loop Unswitching 229 // can remain part of the same loop pass as LICM 230 AU.addPreserved<LazyBlockFrequencyInfoPass>(); 231 AU.addPreserved<LazyBranchProbabilityInfoPass>(); 232 AU.addRequired<AssumptionCacheTracker>(); 233 AU.addRequired<TargetTransformInfoWrapperPass>(); 234 if (EnableMSSALoopDependency) { 235 AU.addRequired<MemorySSAWrapperPass>(); 236 AU.addPreserved<MemorySSAWrapperPass>(); 237 } 238 if (HasBranchDivergence) 239 AU.addRequired<LegacyDivergenceAnalysis>(); 240 getLoopAnalysisUsage(AU); 241 } 242 243 private: 244 void releaseMemory() override { BranchesInfo.forgetLoop(CurrentLoop); } 245 246 void initLoopData() { 247 LoopHeader = CurrentLoop->getHeader(); 248 LoopPreheader = CurrentLoop->getLoopPreheader(); 249 } 250 251 /// Split all of the edges from inside the loop to their exit blocks. 252 /// Update the appropriate Phi nodes as we do so. 253 void splitExitEdges(Loop *L, 254 const SmallVectorImpl<BasicBlock *> &ExitBlocks); 255 256 bool tryTrivialLoopUnswitch(bool &Changed); 257 258 bool unswitchIfProfitable(Value *LoopCond, Constant *Val, 259 Instruction *TI = nullptr, 260 ArrayRef<Instruction *> ToDuplicate = {}); 261 void unswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, 262 BasicBlock *ExitBlock, Instruction *TI); 263 void unswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L, 264 Instruction *TI, 265 ArrayRef<Instruction *> ToDuplicate = {}); 266 267 void rewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, 268 Constant *Val, bool IsEqual); 269 270 void 271 emitPreheaderBranchOnCondition(Value *LIC, Constant *Val, 272 BasicBlock *TrueDest, BasicBlock *FalseDest, 273 BranchInst *OldBranch, Instruction *TI, 274 ArrayRef<Instruction *> ToDuplicate = {}); 275 276 void simplifyCode(std::vector<Instruction *> &Worklist, Loop *L); 277 278 /// Given that the Invariant is not equal to Val. Simplify instructions 279 /// in the loop. 280 Value *simplifyInstructionWithNotEqual(Instruction *Inst, Value *Invariant, 281 Constant *Val); 282 }; 283 284 } // end anonymous namespace 285 286 // Analyze loop. Check its size, calculate is it possible to unswitch 287 // it. Returns true if we can unswitch this loop. 288 bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI, 289 AssumptionCache *AC) { 290 LoopPropsMapIt PropsIt; 291 bool Inserted; 292 std::tie(PropsIt, Inserted) = 293 LoopsProperties.insert(std::make_pair(L, LoopProperties())); 294 295 LoopProperties &Props = PropsIt->second; 296 297 if (Inserted) { 298 // New loop. 299 300 // Limit the number of instructions to avoid causing significant code 301 // expansion, and the number of basic blocks, to avoid loops with 302 // large numbers of branches which cause loop unswitching to go crazy. 303 // This is a very ad-hoc heuristic. 304 305 SmallPtrSet<const Value *, 32> EphValues; 306 CodeMetrics::collectEphemeralValues(L, AC, EphValues); 307 308 // FIXME: This is overly conservative because it does not take into 309 // consideration code simplification opportunities and code that can 310 // be shared by the resultant unswitched loops. 311 CodeMetrics Metrics; 312 for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; 313 ++I) 314 Metrics.analyzeBasicBlock(*I, TTI, EphValues); 315 316 Props.SizeEstimation = Metrics.NumInsts; 317 Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation); 318 Props.WasUnswitchedCount = 0; 319 MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount; 320 321 if (Metrics.notDuplicatable) { 322 LLVM_DEBUG(dbgs() << "NOT unswitching loop %" << L->getHeader()->getName() 323 << ", contents cannot be " 324 << "duplicated!\n"); 325 return false; 326 } 327 } 328 329 // Be careful. This links are good only before new loop addition. 330 CurrentLoopProperties = &Props; 331 CurLoopInstructions = &Props.UnswitchedVals; 332 333 return true; 334 } 335 336 // Clean all data related to given loop. 337 void LUAnalysisCache::forgetLoop(const Loop *L) { 338 LoopPropsMapIt LIt = LoopsProperties.find(L); 339 340 if (LIt != LoopsProperties.end()) { 341 LoopProperties &Props = LIt->second; 342 MaxSize += (Props.CanBeUnswitchedCount + Props.WasUnswitchedCount) * 343 Props.SizeEstimation; 344 LoopsProperties.erase(LIt); 345 } 346 347 CurrentLoopProperties = nullptr; 348 CurLoopInstructions = nullptr; 349 } 350 351 // Mark case value as unswitched. 352 // Since SI instruction can be partly unswitched, in order to avoid 353 // extra unswitching in cloned loops keep track all unswitched values. 354 void LUAnalysisCache::setUnswitched(const SwitchInst *SI, const Value *V) { 355 (*CurLoopInstructions)[SI].insert(V); 356 } 357 358 // Check was this case value unswitched before or not. 359 bool LUAnalysisCache::isUnswitched(const SwitchInst *SI, const Value *V) { 360 return (*CurLoopInstructions)[SI].count(V); 361 } 362 363 bool LUAnalysisCache::costAllowsUnswitching() { 364 return CurrentLoopProperties->CanBeUnswitchedCount > 0; 365 } 366 367 // Clone all loop-unswitch related loop properties. 368 // Redistribute unswitching quotas. 369 // Note, that new loop data is stored inside the VMap. 370 void LUAnalysisCache::cloneData(const Loop *NewLoop, const Loop *OldLoop, 371 const ValueToValueMapTy &VMap) { 372 LoopProperties &NewLoopProps = LoopsProperties[NewLoop]; 373 LoopProperties &OldLoopProps = *CurrentLoopProperties; 374 UnswitchedValsMap &Insts = OldLoopProps.UnswitchedVals; 375 376 // Reallocate "can-be-unswitched quota" 377 378 --OldLoopProps.CanBeUnswitchedCount; 379 ++OldLoopProps.WasUnswitchedCount; 380 NewLoopProps.WasUnswitchedCount = 0; 381 unsigned Quota = OldLoopProps.CanBeUnswitchedCount; 382 NewLoopProps.CanBeUnswitchedCount = Quota / 2; 383 OldLoopProps.CanBeUnswitchedCount = Quota - Quota / 2; 384 385 NewLoopProps.SizeEstimation = OldLoopProps.SizeEstimation; 386 387 // Clone unswitched values info: 388 // for new loop switches we clone info about values that was 389 // already unswitched and has redundant successors. 390 for (UnswitchedValsIt I = Insts.begin(); I != Insts.end(); ++I) { 391 const SwitchInst *OldInst = I->first; 392 Value *NewI = VMap.lookup(OldInst); 393 const SwitchInst *NewInst = cast_or_null<SwitchInst>(NewI); 394 assert(NewInst && "All instructions that are in SrcBB must be in VMap."); 395 396 NewLoopProps.UnswitchedVals[NewInst] = OldLoopProps.UnswitchedVals[OldInst]; 397 } 398 } 399 400 char LoopUnswitch::ID = 0; 401 402 INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops", 403 false, false) 404 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 405 INITIALIZE_PASS_DEPENDENCY(LoopPass) 406 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 407 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) 408 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 409 INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops", 410 false, false) 411 412 Pass *llvm::createLoopUnswitchPass(bool Os, bool HasBranchDivergence) { 413 return new LoopUnswitch(Os, HasBranchDivergence); 414 } 415 416 /// Operator chain lattice. 417 enum OperatorChain { 418 OC_OpChainNone, ///< There is no operator. 419 OC_OpChainOr, ///< There are only ORs. 420 OC_OpChainAnd, ///< There are only ANDs. 421 OC_OpChainMixed ///< There are ANDs and ORs. 422 }; 423 424 /// Cond is a condition that occurs in L. If it is invariant in the loop, or has 425 /// an invariant piece, return the invariant. Otherwise, return null. 426 // 427 /// NOTE: findLIVLoopCondition will not return a partial LIV by walking up a 428 /// mixed operator chain, as we can not reliably find a value which will 429 /// simplify the operator chain. If the chain is AND-only or OR-only, we can use 430 /// 0 or ~0 to simplify the chain. 431 /// 432 /// NOTE: In case a partial LIV and a mixed operator chain, we may be able to 433 /// simplify the condition itself to a loop variant condition, but at the 434 /// cost of creating an entirely new loop. 435 static Value *findLIVLoopCondition(Value *Cond, Loop *L, bool &Changed, 436 OperatorChain &ParentChain, 437 DenseMap<Value *, Value *> &Cache, 438 MemorySSAUpdater *MSSAU) { 439 auto CacheIt = Cache.find(Cond); 440 if (CacheIt != Cache.end()) 441 return CacheIt->second; 442 443 // We started analyze new instruction, increment scanned instructions counter. 444 ++TotalInsts; 445 446 // We can never unswitch on vector conditions. 447 if (Cond->getType()->isVectorTy()) 448 return nullptr; 449 450 // Constants should be folded, not unswitched on! 451 if (isa<Constant>(Cond)) return nullptr; 452 453 // TODO: Handle: br (VARIANT|INVARIANT). 454 455 // Hoist simple values out. 456 if (L->makeLoopInvariant(Cond, Changed, nullptr, MSSAU)) { 457 Cache[Cond] = Cond; 458 return Cond; 459 } 460 461 // Walk up the operator chain to find partial invariant conditions. 462 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond)) 463 if (BO->getOpcode() == Instruction::And || 464 BO->getOpcode() == Instruction::Or) { 465 // Given the previous operator, compute the current operator chain status. 466 OperatorChain NewChain; 467 switch (ParentChain) { 468 case OC_OpChainNone: 469 NewChain = BO->getOpcode() == Instruction::And ? OC_OpChainAnd : 470 OC_OpChainOr; 471 break; 472 case OC_OpChainOr: 473 NewChain = BO->getOpcode() == Instruction::Or ? OC_OpChainOr : 474 OC_OpChainMixed; 475 break; 476 case OC_OpChainAnd: 477 NewChain = BO->getOpcode() == Instruction::And ? OC_OpChainAnd : 478 OC_OpChainMixed; 479 break; 480 case OC_OpChainMixed: 481 NewChain = OC_OpChainMixed; 482 break; 483 } 484 485 // If we reach a Mixed state, we do not want to keep walking up as we can not 486 // reliably find a value that will simplify the chain. With this check, we 487 // will return null on the first sight of mixed chain and the caller will 488 // either backtrack to find partial LIV in other operand or return null. 489 if (NewChain != OC_OpChainMixed) { 490 // Update the current operator chain type before we search up the chain. 491 ParentChain = NewChain; 492 // If either the left or right side is invariant, we can unswitch on this, 493 // which will cause the branch to go away in one loop and the condition to 494 // simplify in the other one. 495 if (Value *LHS = findLIVLoopCondition(BO->getOperand(0), L, Changed, 496 ParentChain, Cache, MSSAU)) { 497 Cache[Cond] = LHS; 498 return LHS; 499 } 500 // We did not manage to find a partial LIV in operand(0). Backtrack and try 501 // operand(1). 502 ParentChain = NewChain; 503 if (Value *RHS = findLIVLoopCondition(BO->getOperand(1), L, Changed, 504 ParentChain, Cache, MSSAU)) { 505 Cache[Cond] = RHS; 506 return RHS; 507 } 508 } 509 } 510 511 Cache[Cond] = nullptr; 512 return nullptr; 513 } 514 515 /// Cond is a condition that occurs in L. If it is invariant in the loop, or has 516 /// an invariant piece, return the invariant along with the operator chain type. 517 /// Otherwise, return null. 518 static std::pair<Value *, OperatorChain> 519 findLIVLoopCondition(Value *Cond, Loop *L, bool &Changed, 520 MemorySSAUpdater *MSSAU) { 521 DenseMap<Value *, Value *> Cache; 522 OperatorChain OpChain = OC_OpChainNone; 523 Value *FCond = findLIVLoopCondition(Cond, L, Changed, OpChain, Cache, MSSAU); 524 525 // In case we do find a LIV, it can not be obtained by walking up a mixed 526 // operator chain. 527 assert((!FCond || OpChain != OC_OpChainMixed) && 528 "Do not expect a partial LIV with mixed operator chain"); 529 return {FCond, OpChain}; 530 } 531 532 bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPMRef) { 533 if (skipLoop(L)) 534 return false; 535 536 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache( 537 *L->getHeader()->getParent()); 538 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 539 LPM = &LPMRef; 540 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 541 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 542 if (EnableMSSALoopDependency) { 543 MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA(); 544 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 545 assert(DT && "Cannot update MemorySSA without a valid DomTree."); 546 } 547 CurrentLoop = L; 548 Function *F = CurrentLoop->getHeader()->getParent(); 549 550 SanitizeMemory = F->hasFnAttribute(Attribute::SanitizeMemory); 551 if (SanitizeMemory) 552 SafetyInfo.computeLoopSafetyInfo(L); 553 554 if (MSSA && VerifyMemorySSA) 555 MSSA->verifyMemorySSA(); 556 557 bool Changed = false; 558 do { 559 assert(CurrentLoop->isLCSSAForm(*DT)); 560 if (MSSA && VerifyMemorySSA) 561 MSSA->verifyMemorySSA(); 562 RedoLoop = false; 563 Changed |= processCurrentLoop(); 564 } while (RedoLoop); 565 566 if (MSSA && VerifyMemorySSA) 567 MSSA->verifyMemorySSA(); 568 569 return Changed; 570 } 571 572 // Return true if the BasicBlock BB is unreachable from the loop header. 573 // Return false, otherwise. 574 bool LoopUnswitch::isUnreachableDueToPreviousUnswitching(BasicBlock *BB) { 575 auto *Node = DT->getNode(BB)->getIDom(); 576 BasicBlock *DomBB = Node->getBlock(); 577 while (CurrentLoop->contains(DomBB)) { 578 BranchInst *BInst = dyn_cast<BranchInst>(DomBB->getTerminator()); 579 580 Node = DT->getNode(DomBB)->getIDom(); 581 DomBB = Node->getBlock(); 582 583 if (!BInst || !BInst->isConditional()) 584 continue; 585 586 Value *Cond = BInst->getCondition(); 587 if (!isa<ConstantInt>(Cond)) 588 continue; 589 590 BasicBlock *UnreachableSucc = 591 Cond == ConstantInt::getTrue(Cond->getContext()) 592 ? BInst->getSuccessor(1) 593 : BInst->getSuccessor(0); 594 595 if (DT->dominates(UnreachableSucc, BB)) 596 return true; 597 } 598 return false; 599 } 600 601 /// FIXME: Remove this workaround when freeze related patches are done. 602 /// LoopUnswitch and Equality propagation in GVN have discrepancy about 603 /// whether branch on undef/poison has undefine behavior. Here it is to 604 /// rule out some common cases that we found such discrepancy already 605 /// causing problems. Detail could be found in PR31652. Note if the 606 /// func returns true, it is unsafe. But if it is false, it doesn't mean 607 /// it is necessarily safe. 608 static bool equalityPropUnSafe(Value &LoopCond) { 609 ICmpInst *CI = dyn_cast<ICmpInst>(&LoopCond); 610 if (!CI || !CI->isEquality()) 611 return false; 612 613 Value *LHS = CI->getOperand(0); 614 Value *RHS = CI->getOperand(1); 615 if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) 616 return true; 617 618 auto HasUndefInPHI = [](PHINode &PN) { 619 for (Value *Opd : PN.incoming_values()) { 620 if (isa<UndefValue>(Opd)) 621 return true; 622 } 623 return false; 624 }; 625 PHINode *LPHI = dyn_cast<PHINode>(LHS); 626 PHINode *RPHI = dyn_cast<PHINode>(RHS); 627 if ((LPHI && HasUndefInPHI(*LPHI)) || (RPHI && HasUndefInPHI(*RPHI))) 628 return true; 629 630 auto HasUndefInSelect = [](SelectInst &SI) { 631 if (isa<UndefValue>(SI.getTrueValue()) || 632 isa<UndefValue>(SI.getFalseValue())) 633 return true; 634 return false; 635 }; 636 SelectInst *LSI = dyn_cast<SelectInst>(LHS); 637 SelectInst *RSI = dyn_cast<SelectInst>(RHS); 638 if ((LSI && HasUndefInSelect(*LSI)) || (RSI && HasUndefInSelect(*RSI))) 639 return true; 640 return false; 641 } 642 643 /// Check if the loop header has a conditional branch that is not 644 /// loop-invariant, because it involves load instructions. If all paths from 645 /// either the true or false successor to the header or loop exists do not 646 /// modify the memory feeding the condition, perform 'partial unswitching'. That 647 /// is, duplicate the instructions feeding the condition in the pre-header. Then 648 /// unswitch on the duplicated condition. The condition is now known in the 649 /// unswitched version for the 'invariant' path through the original loop. 650 /// 651 /// If the branch condition of the header is partially invariant, return a pair 652 /// containing the instructions to duplicate and a boolean Constant to update 653 /// the condition in the loops created for the true or false successors. 654 static std::pair<SmallVector<Instruction *, 4>, Constant *> 655 hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) { 656 SmallVector<Instruction *, 4> ToDuplicate; 657 658 auto *TI = dyn_cast<BranchInst>(L->getHeader()->getTerminator()); 659 if (!TI || !TI->isConditional()) 660 return {}; 661 662 auto *CondI = dyn_cast<CmpInst>(TI->getCondition()); 663 // The case with the condition outside the loop should already be handled 664 // earlier. 665 if (!CondI || !L->contains(CondI)) 666 return {}; 667 668 ToDuplicate.push_back(CondI); 669 670 SmallVector<Value *, 4> WorkList; 671 WorkList.append(CondI->op_begin(), CondI->op_end()); 672 673 SmallVector<MemoryAccess *, 4> AccessesToCheck; 674 SmallVector<MemoryLocation, 4> AccessedLocs; 675 while (!WorkList.empty()) { 676 Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val()); 677 if (!I || !L->contains(I)) 678 continue; 679 680 // TODO: support additional instructions. 681 if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I)) 682 return {}; 683 684 // Do not duplicate volatile and atomic loads. 685 if (auto *LI = dyn_cast<LoadInst>(I)) 686 if (LI->isVolatile() || LI->isAtomic()) 687 return {}; 688 689 ToDuplicate.push_back(I); 690 if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) { 691 if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) { 692 // Queue the defining access to check for alias checks. 693 AccessesToCheck.push_back(MemUse->getDefiningAccess()); 694 AccessedLocs.push_back(MemoryLocation::get(I)); 695 } else { 696 // MemoryDefs may clobber the location or may be atomic memory 697 // operations. Bail out. 698 return {}; 699 } 700 } 701 WorkList.append(I->op_begin(), I->op_end()); 702 } 703 704 if (ToDuplicate.size() <= 1) 705 return {}; 706 707 auto HasNoClobbersOnPath = 708 [L, AA, &AccessedLocs](BasicBlock *Succ, BasicBlock *Header, 709 SmallVector<MemoryAccess *, 4> AccessesToCheck) { 710 // First, collect all blocks in the loop that are on a patch from Succ 711 // to the header. 712 SmallVector<BasicBlock *, 4> WorkList; 713 WorkList.push_back(Succ); 714 WorkList.push_back(Header); 715 SmallPtrSet<BasicBlock *, 4> Seen; 716 Seen.insert(Header); 717 while (!WorkList.empty()) { 718 BasicBlock *Current = WorkList.pop_back_val(); 719 if (!L->contains(Current)) 720 continue; 721 const auto &SeenIns = Seen.insert(Current); 722 if (!SeenIns.second) 723 continue; 724 725 WorkList.append(succ_begin(Current), succ_end(Current)); 726 } 727 728 // Require at least 2 blocks on a path through the loop. This skips 729 // paths that directly exit the loop. 730 if (Seen.size() < 2) 731 return false; 732 733 // Next, check if there are any MemoryDefs that are on the path through 734 // the loop (in the Seen set) and they may-alias any of the locations in 735 // AccessedLocs. If that is the case, they may modify the condition and 736 // partial unswitching is not possible. 737 SmallPtrSet<MemoryAccess *, 4> SeenAccesses; 738 while (!AccessesToCheck.empty()) { 739 MemoryAccess *Current = AccessesToCheck.pop_back_val(); 740 auto SeenI = SeenAccesses.insert(Current); 741 if (!SeenI.second || !Seen.contains(Current->getBlock())) 742 continue; 743 744 // Bail out if exceeded the threshold. 745 if (SeenAccesses.size() >= MSSAThreshold) 746 return false; 747 748 // MemoryUse are read-only accesses. 749 if (isa<MemoryUse>(Current)) 750 continue; 751 752 // For a MemoryDef, check if is aliases any of the location feeding 753 // the original condition. 754 if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) { 755 if (any_of(AccessedLocs, [AA, CurrentDef](MemoryLocation &Loc) { 756 return isModSet( 757 AA->getModRefInfo(CurrentDef->getMemoryInst(), Loc)); 758 })) 759 return false; 760 } 761 762 for (Use &U : Current->uses()) 763 AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser())); 764 } 765 766 return true; 767 }; 768 769 // If we branch to the same successor, partial unswitching will not be 770 // beneficial. 771 if (TI->getSuccessor(0) == TI->getSuccessor(1)) 772 return {}; 773 774 if (HasNoClobbersOnPath(TI->getSuccessor(0), L->getHeader(), AccessesToCheck)) 775 return {ToDuplicate, ConstantInt::getTrue(TI->getContext())}; 776 if (HasNoClobbersOnPath(TI->getSuccessor(1), L->getHeader(), AccessesToCheck)) 777 return {ToDuplicate, ConstantInt::getFalse(TI->getContext())}; 778 779 return {}; 780 } 781 782 /// Do actual work and unswitch loop if possible and profitable. 783 bool LoopUnswitch::processCurrentLoop() { 784 bool Changed = false; 785 786 initLoopData(); 787 788 // If LoopSimplify was unable to form a preheader, don't do any unswitching. 789 if (!LoopPreheader) 790 return false; 791 792 // Loops with indirectbr cannot be cloned. 793 if (!CurrentLoop->isSafeToClone()) 794 return false; 795 796 // Without dedicated exits, splitting the exit edge may fail. 797 if (!CurrentLoop->hasDedicatedExits()) 798 return false; 799 800 LLVMContext &Context = LoopHeader->getContext(); 801 802 // Analyze loop cost, and stop unswitching if loop content can not be duplicated. 803 if (!BranchesInfo.countLoop( 804 CurrentLoop, 805 getAnalysis<TargetTransformInfoWrapperPass>().getTTI( 806 *CurrentLoop->getHeader()->getParent()), 807 AC)) 808 return false; 809 810 // Try trivial unswitch first before loop over other basic blocks in the loop. 811 if (tryTrivialLoopUnswitch(Changed)) { 812 return true; 813 } 814 815 // Do not do non-trivial unswitch while optimizing for size. 816 // FIXME: Use Function::hasOptSize(). 817 if (OptimizeForSize || 818 LoopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize)) 819 return Changed; 820 821 // Run through the instructions in the loop, keeping track of three things: 822 // 823 // - That we do not unswitch loops containing convergent operations, as we 824 // might be making them control dependent on the unswitch value when they 825 // were not before. 826 // FIXME: This could be refined to only bail if the convergent operation is 827 // not already control-dependent on the unswitch value. 828 // 829 // - That basic blocks in the loop contain invokes whose predecessor edges we 830 // cannot split. 831 // 832 // - The set of guard intrinsics encountered (these are non terminator 833 // instructions that are also profitable to be unswitched). 834 835 SmallVector<IntrinsicInst *, 4> Guards; 836 837 for (const auto BB : CurrentLoop->blocks()) { 838 for (auto &I : *BB) { 839 auto *CB = dyn_cast<CallBase>(&I); 840 if (!CB) 841 continue; 842 if (CB->isConvergent()) 843 return Changed; 844 if (auto *II = dyn_cast<InvokeInst>(&I)) 845 if (!II->getUnwindDest()->canSplitPredecessors()) 846 return Changed; 847 if (auto *II = dyn_cast<IntrinsicInst>(&I)) 848 if (II->getIntrinsicID() == Intrinsic::experimental_guard) 849 Guards.push_back(II); 850 } 851 } 852 853 for (IntrinsicInst *Guard : Guards) { 854 Value *LoopCond = findLIVLoopCondition(Guard->getOperand(0), CurrentLoop, 855 Changed, MSSAU.get()) 856 .first; 857 if (LoopCond && 858 unswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) { 859 // NB! Unswitching (if successful) could have erased some of the 860 // instructions in Guards leaving dangling pointers there. This is fine 861 // because we're returning now, and won't look at Guards again. 862 ++NumGuards; 863 return true; 864 } 865 } 866 867 // Loop over all of the basic blocks in the loop. If we find an interior 868 // block that is branching on a loop-invariant condition, we can unswitch this 869 // loop. 870 for (Loop::block_iterator I = CurrentLoop->block_begin(), 871 E = CurrentLoop->block_end(); 872 I != E; ++I) { 873 Instruction *TI = (*I)->getTerminator(); 874 875 // Unswitching on a potentially uninitialized predicate is not 876 // MSan-friendly. Limit this to the cases when the original predicate is 877 // guaranteed to execute, to avoid creating a use-of-uninitialized-value 878 // in the code that did not have one. 879 // This is a workaround for the discrepancy between LLVM IR and MSan 880 // semantics. See PR28054 for more details. 881 if (SanitizeMemory && 882 !SafetyInfo.isGuaranteedToExecute(*TI, DT, CurrentLoop)) 883 continue; 884 885 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 886 // Some branches may be rendered unreachable because of previous 887 // unswitching. 888 // Unswitch only those branches that are reachable. 889 if (isUnreachableDueToPreviousUnswitching(*I)) 890 continue; 891 892 // If this isn't branching on an invariant condition, we can't unswitch 893 // it. 894 if (BI->isConditional()) { 895 // See if this, or some part of it, is loop invariant. If so, we can 896 // unswitch on it if we desire. 897 Value *LoopCond = findLIVLoopCondition(BI->getCondition(), CurrentLoop, 898 Changed, MSSAU.get()) 899 .first; 900 if (LoopCond && !equalityPropUnSafe(*LoopCond) && 901 unswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)) { 902 ++NumBranches; 903 return true; 904 } 905 } 906 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { 907 Value *SC = SI->getCondition(); 908 Value *LoopCond; 909 OperatorChain OpChain; 910 std::tie(LoopCond, OpChain) = 911 findLIVLoopCondition(SC, CurrentLoop, Changed, MSSAU.get()); 912 913 unsigned NumCases = SI->getNumCases(); 914 if (LoopCond && NumCases) { 915 // Find a value to unswitch on: 916 // FIXME: this should chose the most expensive case! 917 // FIXME: scan for a case with a non-critical edge? 918 Constant *UnswitchVal = nullptr; 919 // Find a case value such that at least one case value is unswitched 920 // out. 921 if (OpChain == OC_OpChainAnd) { 922 // If the chain only has ANDs and the switch has a case value of 0. 923 // Dropping in a 0 to the chain will unswitch out the 0-casevalue. 924 auto *AllZero = cast<ConstantInt>(Constant::getNullValue(SC->getType())); 925 if (BranchesInfo.isUnswitched(SI, AllZero)) 926 continue; 927 // We are unswitching 0 out. 928 UnswitchVal = AllZero; 929 } else if (OpChain == OC_OpChainOr) { 930 // If the chain only has ORs and the switch has a case value of ~0. 931 // Dropping in a ~0 to the chain will unswitch out the ~0-casevalue. 932 auto *AllOne = cast<ConstantInt>(Constant::getAllOnesValue(SC->getType())); 933 if (BranchesInfo.isUnswitched(SI, AllOne)) 934 continue; 935 // We are unswitching ~0 out. 936 UnswitchVal = AllOne; 937 } else { 938 assert(OpChain == OC_OpChainNone && 939 "Expect to unswitch on trivial chain"); 940 // Do not process same value again and again. 941 // At this point we have some cases already unswitched and 942 // some not yet unswitched. Let's find the first not yet unswitched one. 943 for (auto Case : SI->cases()) { 944 Constant *UnswitchValCandidate = Case.getCaseValue(); 945 if (!BranchesInfo.isUnswitched(SI, UnswitchValCandidate)) { 946 UnswitchVal = UnswitchValCandidate; 947 break; 948 } 949 } 950 } 951 952 if (!UnswitchVal) 953 continue; 954 955 if (unswitchIfProfitable(LoopCond, UnswitchVal)) { 956 ++NumSwitches; 957 // In case of a full LIV, UnswitchVal is the value we unswitched out. 958 // In case of a partial LIV, we only unswitch when its an AND-chain 959 // or OR-chain. In both cases switch input value simplifies to 960 // UnswitchVal. 961 BranchesInfo.setUnswitched(SI, UnswitchVal); 962 return true; 963 } 964 } 965 } 966 967 // Scan the instructions to check for unswitchable values. 968 for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end(); 969 BBI != E; ++BBI) 970 if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) { 971 Value *LoopCond = findLIVLoopCondition(SI->getCondition(), CurrentLoop, 972 Changed, MSSAU.get()) 973 .first; 974 if (LoopCond && 975 unswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) { 976 ++NumSelects; 977 return true; 978 } 979 } 980 } 981 982 // Check if there is a header condition that is invariant along the patch from 983 // either the true or false successors to the header. This allows unswitching 984 // conditions depending on memory accesses, if there's a path not clobbering 985 // the memory locations. Check if this transform has been disabled using 986 // metadata, to avoid unswitching the same loop multiple times. 987 if (MSSA && 988 !findOptionMDForLoop(CurrentLoop, "llvm.loop.unswitch.partial.disable")) { 989 auto ToDuplicate = hasPartialIVCondition(CurrentLoop, *MSSA, AA); 990 if (!ToDuplicate.first.empty()) { 991 LLVM_DEBUG(dbgs() << "loop-unswitch: Found partially invariant condition " 992 << *ToDuplicate.first[0] << "\n"); 993 ++NumBranches; 994 unswitchIfProfitable(ToDuplicate.first[0], ToDuplicate.second, 995 CurrentLoop->getHeader()->getTerminator(), 996 ToDuplicate.first); 997 998 RedoLoop = false; 999 return true; 1000 } 1001 } 1002 1003 return Changed; 1004 } 1005 1006 /// Check to see if all paths from BB exit the loop with no side effects 1007 /// (including infinite loops). 1008 /// 1009 /// If true, we return true and set ExitBB to the block we 1010 /// exit through. 1011 /// 1012 static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB, 1013 BasicBlock *&ExitBB, 1014 std::set<BasicBlock*> &Visited) { 1015 if (!Visited.insert(BB).second) { 1016 // Already visited. Without more analysis, this could indicate an infinite 1017 // loop. 1018 return false; 1019 } 1020 if (!L->contains(BB)) { 1021 // Otherwise, this is a loop exit, this is fine so long as this is the 1022 // first exit. 1023 if (ExitBB) return false; 1024 ExitBB = BB; 1025 return true; 1026 } 1027 1028 // Otherwise, this is an unvisited intra-loop node. Check all successors. 1029 for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) { 1030 // Check to see if the successor is a trivial loop exit. 1031 if (!isTrivialLoopExitBlockHelper(L, *SI, ExitBB, Visited)) 1032 return false; 1033 } 1034 1035 // Okay, everything after this looks good, check to make sure that this block 1036 // doesn't include any side effects. 1037 for (Instruction &I : *BB) 1038 if (I.mayHaveSideEffects()) 1039 return false; 1040 1041 return true; 1042 } 1043 1044 /// Return true if the specified block unconditionally leads to an exit from 1045 /// the specified loop, and has no side-effects in the process. If so, return 1046 /// the block that is exited to, otherwise return null. 1047 static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) { 1048 std::set<BasicBlock*> Visited; 1049 Visited.insert(L->getHeader()); // Branches to header make infinite loops. 1050 BasicBlock *ExitBB = nullptr; 1051 if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited)) 1052 return ExitBB; 1053 return nullptr; 1054 } 1055 1056 /// We have found that we can unswitch CurrentLoop when LoopCond == Val to 1057 /// simplify the loop. If we decide that this is profitable, 1058 /// unswitch the loop, reprocess the pieces, then return true. 1059 bool LoopUnswitch::unswitchIfProfitable(Value *LoopCond, Constant *Val, 1060 Instruction *TI, 1061 ArrayRef<Instruction *> ToDuplicate) { 1062 // Check to see if it would be profitable to unswitch current loop. 1063 if (!BranchesInfo.costAllowsUnswitching()) { 1064 LLVM_DEBUG(dbgs() << "NOT unswitching loop %" 1065 << CurrentLoop->getHeader()->getName() 1066 << " at non-trivial condition '" << *Val 1067 << "' == " << *LoopCond << "\n" 1068 << ". Cost too high.\n"); 1069 return false; 1070 } 1071 if (HasBranchDivergence && 1072 getAnalysis<LegacyDivergenceAnalysis>().isDivergent(LoopCond)) { 1073 LLVM_DEBUG(dbgs() << "NOT unswitching loop %" 1074 << CurrentLoop->getHeader()->getName() 1075 << " at non-trivial condition '" << *Val 1076 << "' == " << *LoopCond << "\n" 1077 << ". Condition is divergent.\n"); 1078 return false; 1079 } 1080 1081 unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI, ToDuplicate); 1082 return true; 1083 } 1084 1085 /// Emit a conditional branch on two values if LIC == Val, branch to TrueDst, 1086 /// otherwise branch to FalseDest. Insert the code immediately before OldBranch 1087 /// and remove (but not erase!) it from the function. 1088 void LoopUnswitch::emitPreheaderBranchOnCondition( 1089 Value *LIC, Constant *Val, BasicBlock *TrueDest, BasicBlock *FalseDest, 1090 BranchInst *OldBranch, Instruction *TI, 1091 ArrayRef<Instruction *> ToDuplicate) { 1092 assert(OldBranch->isUnconditional() && "Preheader is not split correctly"); 1093 assert(TrueDest != FalseDest && "Branch targets should be different"); 1094 1095 // Insert a conditional branch on LIC to the two preheaders. The original 1096 // code is the true version and the new code is the false version. 1097 Value *BranchVal = LIC; 1098 bool Swapped = false; 1099 1100 if (!ToDuplicate.empty()) { 1101 ValueToValueMapTy Old2New; 1102 for (Instruction *I : reverse(ToDuplicate)) { 1103 auto *New = I->clone(); 1104 New->insertBefore(OldBranch); 1105 RemapInstruction(New, Old2New, 1106 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 1107 Old2New[I] = New; 1108 1109 if (MSSAU) { 1110 MemorySSA *MSSA = MSSAU->getMemorySSA(); 1111 auto *MemA = dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(I)); 1112 if (!MemA) 1113 continue; 1114 1115 Loop *L = LI->getLoopFor(I->getParent()); 1116 auto *DefiningAccess = MemA->getDefiningAccess(); 1117 // Get the first defining access before the loop. 1118 while (L->contains(DefiningAccess->getBlock())) { 1119 // If the defining access is a MemoryPhi, get the incoming 1120 // value for the pre-header as defining access. 1121 if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) { 1122 DefiningAccess = 1123 MemPhi->getIncomingValueForBlock(L->getLoopPreheader()); 1124 } else { 1125 DefiningAccess = 1126 cast<MemoryDef>(DefiningAccess)->getDefiningAccess(); 1127 } 1128 } 1129 MSSAU->createMemoryAccessInBB(New, DefiningAccess, New->getParent(), 1130 MemorySSA::BeforeTerminator); 1131 } 1132 } 1133 BranchVal = Old2New[ToDuplicate[0]]; 1134 } else { 1135 1136 if (!isa<ConstantInt>(Val) || 1137 Val->getType() != Type::getInt1Ty(LIC->getContext())) 1138 BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val); 1139 else if (Val != ConstantInt::getTrue(Val->getContext())) { 1140 // We want to enter the new loop when the condition is true. 1141 std::swap(TrueDest, FalseDest); 1142 Swapped = true; 1143 } 1144 } 1145 1146 // Old branch will be removed, so save its parent and successor to update the 1147 // DomTree. 1148 auto *OldBranchSucc = OldBranch->getSuccessor(0); 1149 auto *OldBranchParent = OldBranch->getParent(); 1150 1151 // Insert the new branch. 1152 BranchInst *BI = 1153 IRBuilder<>(OldBranch).CreateCondBr(BranchVal, TrueDest, FalseDest, TI); 1154 if (Swapped) 1155 BI->swapProfMetadata(); 1156 1157 // Remove the old branch so there is only one branch at the end. This is 1158 // needed to perform DomTree's internal DFS walk on the function's CFG. 1159 OldBranch->removeFromParent(); 1160 1161 // Inform the DT about the new branch. 1162 if (DT) { 1163 // First, add both successors. 1164 SmallVector<DominatorTree::UpdateType, 3> Updates; 1165 if (TrueDest != OldBranchSucc) 1166 Updates.push_back({DominatorTree::Insert, OldBranchParent, TrueDest}); 1167 if (FalseDest != OldBranchSucc) 1168 Updates.push_back({DominatorTree::Insert, OldBranchParent, FalseDest}); 1169 // If both of the new successors are different from the old one, inform the 1170 // DT that the edge was deleted. 1171 if (OldBranchSucc != TrueDest && OldBranchSucc != FalseDest) { 1172 Updates.push_back({DominatorTree::Delete, OldBranchParent, OldBranchSucc}); 1173 } 1174 1175 if (MSSAU) 1176 MSSAU->applyUpdates(Updates, *DT, /*UpdateDT=*/true); 1177 else 1178 DT->applyUpdates(Updates); 1179 } 1180 1181 // If either edge is critical, split it. This helps preserve LoopSimplify 1182 // form for enclosing loops. 1183 auto Options = 1184 CriticalEdgeSplittingOptions(DT, LI, MSSAU.get()).setPreserveLCSSA(); 1185 SplitCriticalEdge(BI, 0, Options); 1186 SplitCriticalEdge(BI, 1, Options); 1187 } 1188 1189 /// Given a loop that has a trivial unswitchable condition in it (a cond branch 1190 /// from its header block to its latch block, where the path through the loop 1191 /// that doesn't execute its body has no side-effects), unswitch it. This 1192 /// doesn't involve any code duplication, just moving the conditional branch 1193 /// outside of the loop and updating loop info. 1194 void LoopUnswitch::unswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, 1195 BasicBlock *ExitBlock, 1196 Instruction *TI) { 1197 LLVM_DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %" 1198 << LoopHeader->getName() << " [" << L->getBlocks().size() 1199 << " blocks] in Function " 1200 << L->getHeader()->getParent()->getName() 1201 << " on cond: " << *Val << " == " << *Cond << "\n"); 1202 // We are going to make essential changes to CFG. This may invalidate cached 1203 // information for L or one of its parent loops in SCEV. 1204 if (auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>()) 1205 SEWP->getSE().forgetTopmostLoop(L); 1206 1207 // First step, split the preheader, so that we know that there is a safe place 1208 // to insert the conditional branch. We will change LoopPreheader to have a 1209 // conditional branch on Cond. 1210 BasicBlock *NewPH = SplitEdge(LoopPreheader, LoopHeader, DT, LI, MSSAU.get()); 1211 1212 // Now that we have a place to insert the conditional branch, create a place 1213 // to branch to: this is the exit block out of the loop that we should 1214 // short-circuit to. 1215 1216 // Split this block now, so that the loop maintains its exit block, and so 1217 // that the jump from the preheader can execute the contents of the exit block 1218 // without actually branching to it (the exit block should be dominated by the 1219 // loop header, not the preheader). 1220 assert(!L->contains(ExitBlock) && "Exit block is in the loop?"); 1221 BasicBlock *NewExit = 1222 SplitBlock(ExitBlock, &ExitBlock->front(), DT, LI, MSSAU.get()); 1223 1224 // Okay, now we have a position to branch from and a position to branch to, 1225 // insert the new conditional branch. 1226 auto *OldBranch = dyn_cast<BranchInst>(LoopPreheader->getTerminator()); 1227 assert(OldBranch && "Failed to split the preheader"); 1228 emitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH, OldBranch, TI); 1229 1230 // emitPreheaderBranchOnCondition removed the OldBranch from the function. 1231 // Delete it, as it is no longer needed. 1232 delete OldBranch; 1233 1234 // We need to reprocess this loop, it could be unswitched again. 1235 RedoLoop = true; 1236 1237 // Now that we know that the loop is never entered when this condition is a 1238 // particular value, rewrite the loop with this info. We know that this will 1239 // at least eliminate the old branch. 1240 rewriteLoopBodyWithConditionConstant(L, Cond, Val, /*IsEqual=*/false); 1241 1242 ++NumTrivial; 1243 } 1244 1245 /// Check if the first non-constant condition starting from the loop header is 1246 /// a trivial unswitch condition: that is, a condition controls whether or not 1247 /// the loop does anything at all. If it is a trivial condition, unswitching 1248 /// produces no code duplications (equivalently, it produces a simpler loop and 1249 /// a new empty loop, which gets deleted). Therefore always unswitch trivial 1250 /// condition. 1251 bool LoopUnswitch::tryTrivialLoopUnswitch(bool &Changed) { 1252 BasicBlock *CurrentBB = CurrentLoop->getHeader(); 1253 Instruction *CurrentTerm = CurrentBB->getTerminator(); 1254 LLVMContext &Context = CurrentBB->getContext(); 1255 1256 // If loop header has only one reachable successor (currently via an 1257 // unconditional branch or constant foldable conditional branch, but 1258 // should also consider adding constant foldable switch instruction in 1259 // future), we should keep looking for trivial condition candidates in 1260 // the successor as well. An alternative is to constant fold conditions 1261 // and merge successors into loop header (then we only need to check header's 1262 // terminator). The reason for not doing this in LoopUnswitch pass is that 1263 // it could potentially break LoopPassManager's invariants. Folding dead 1264 // branches could either eliminate the current loop or make other loops 1265 // unreachable. LCSSA form might also not be preserved after deleting 1266 // branches. The following code keeps traversing loop header's successors 1267 // until it finds the trivial condition candidate (condition that is not a 1268 // constant). Since unswitching generates branches with constant conditions, 1269 // this scenario could be very common in practice. 1270 SmallPtrSet<BasicBlock*, 8> Visited; 1271 1272 while (true) { 1273 // If we exit loop or reach a previous visited block, then 1274 // we can not reach any trivial condition candidates (unfoldable 1275 // branch instructions or switch instructions) and no unswitch 1276 // can happen. Exit and return false. 1277 if (!CurrentLoop->contains(CurrentBB) || !Visited.insert(CurrentBB).second) 1278 return false; 1279 1280 // Check if this loop will execute any side-effecting instructions (e.g. 1281 // stores, calls, volatile loads) in the part of the loop that the code 1282 // *would* execute. Check the header first. 1283 for (Instruction &I : *CurrentBB) 1284 if (I.mayHaveSideEffects()) 1285 return false; 1286 1287 if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) { 1288 if (BI->isUnconditional()) { 1289 CurrentBB = BI->getSuccessor(0); 1290 } else if (BI->getCondition() == ConstantInt::getTrue(Context)) { 1291 CurrentBB = BI->getSuccessor(0); 1292 } else if (BI->getCondition() == ConstantInt::getFalse(Context)) { 1293 CurrentBB = BI->getSuccessor(1); 1294 } else { 1295 // Found a trivial condition candidate: non-foldable conditional branch. 1296 break; 1297 } 1298 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) { 1299 // At this point, any constant-foldable instructions should have probably 1300 // been folded. 1301 ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition()); 1302 if (!Cond) 1303 break; 1304 // Find the target block we are definitely going to. 1305 CurrentBB = SI->findCaseValue(Cond)->getCaseSuccessor(); 1306 } else { 1307 // We do not understand these terminator instructions. 1308 break; 1309 } 1310 1311 CurrentTerm = CurrentBB->getTerminator(); 1312 } 1313 1314 // CondVal is the condition that controls the trivial condition. 1315 // LoopExitBB is the BasicBlock that loop exits when meets trivial condition. 1316 Constant *CondVal = nullptr; 1317 BasicBlock *LoopExitBB = nullptr; 1318 1319 if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) { 1320 // If this isn't branching on an invariant condition, we can't unswitch it. 1321 if (!BI->isConditional()) 1322 return false; 1323 1324 Value *LoopCond = findLIVLoopCondition(BI->getCondition(), CurrentLoop, 1325 Changed, MSSAU.get()) 1326 .first; 1327 1328 // Unswitch only if the trivial condition itself is an LIV (not 1329 // partial LIV which could occur in and/or) 1330 if (!LoopCond || LoopCond != BI->getCondition()) 1331 return false; 1332 1333 // Check to see if a successor of the branch is guaranteed to 1334 // exit through a unique exit block without having any 1335 // side-effects. If so, determine the value of Cond that causes 1336 // it to do this. 1337 if ((LoopExitBB = 1338 isTrivialLoopExitBlock(CurrentLoop, BI->getSuccessor(0)))) { 1339 CondVal = ConstantInt::getTrue(Context); 1340 } else if ((LoopExitBB = 1341 isTrivialLoopExitBlock(CurrentLoop, BI->getSuccessor(1)))) { 1342 CondVal = ConstantInt::getFalse(Context); 1343 } 1344 1345 // If we didn't find a single unique LoopExit block, or if the loop exit 1346 // block contains phi nodes, this isn't trivial. 1347 if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin())) 1348 return false; // Can't handle this. 1349 1350 if (equalityPropUnSafe(*LoopCond)) 1351 return false; 1352 1353 unswitchTrivialCondition(CurrentLoop, LoopCond, CondVal, LoopExitBB, 1354 CurrentTerm); 1355 ++NumBranches; 1356 return true; 1357 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) { 1358 // If this isn't switching on an invariant condition, we can't unswitch it. 1359 Value *LoopCond = findLIVLoopCondition(SI->getCondition(), CurrentLoop, 1360 Changed, MSSAU.get()) 1361 .first; 1362 1363 // Unswitch only if the trivial condition itself is an LIV (not 1364 // partial LIV which could occur in and/or) 1365 if (!LoopCond || LoopCond != SI->getCondition()) 1366 return false; 1367 1368 // Check to see if a successor of the switch is guaranteed to go to the 1369 // latch block or exit through a one exit block without having any 1370 // side-effects. If so, determine the value of Cond that causes it to do 1371 // this. 1372 // Note that we can't trivially unswitch on the default case or 1373 // on already unswitched cases. 1374 for (auto Case : SI->cases()) { 1375 BasicBlock *LoopExitCandidate; 1376 if ((LoopExitCandidate = 1377 isTrivialLoopExitBlock(CurrentLoop, Case.getCaseSuccessor()))) { 1378 // Okay, we found a trivial case, remember the value that is trivial. 1379 ConstantInt *CaseVal = Case.getCaseValue(); 1380 1381 // Check that it was not unswitched before, since already unswitched 1382 // trivial vals are looks trivial too. 1383 if (BranchesInfo.isUnswitched(SI, CaseVal)) 1384 continue; 1385 LoopExitBB = LoopExitCandidate; 1386 CondVal = CaseVal; 1387 break; 1388 } 1389 } 1390 1391 // If we didn't find a single unique LoopExit block, or if the loop exit 1392 // block contains phi nodes, this isn't trivial. 1393 if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin())) 1394 return false; // Can't handle this. 1395 1396 unswitchTrivialCondition(CurrentLoop, LoopCond, CondVal, LoopExitBB, 1397 nullptr); 1398 1399 // We are only unswitching full LIV. 1400 BranchesInfo.setUnswitched(SI, CondVal); 1401 ++NumSwitches; 1402 return true; 1403 } 1404 return false; 1405 } 1406 1407 /// Split all of the edges from inside the loop to their exit blocks. 1408 /// Update the appropriate Phi nodes as we do so. 1409 void LoopUnswitch::splitExitEdges( 1410 Loop *L, const SmallVectorImpl<BasicBlock *> &ExitBlocks) { 1411 1412 for (unsigned I = 0, E = ExitBlocks.size(); I != E; ++I) { 1413 BasicBlock *ExitBlock = ExitBlocks[I]; 1414 SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock), 1415 pred_end(ExitBlock)); 1416 1417 // Although SplitBlockPredecessors doesn't preserve loop-simplify in 1418 // general, if we call it on all predecessors of all exits then it does. 1419 SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", DT, LI, MSSAU.get(), 1420 /*PreserveLCSSA*/ true); 1421 } 1422 } 1423 1424 /// We determined that the loop is profitable to unswitch when LIC equal Val. 1425 /// Split it into loop versions and test the condition outside of either loop. 1426 /// Return the loops created as Out1/Out2. 1427 void LoopUnswitch::unswitchNontrivialCondition( 1428 Value *LIC, Constant *Val, Loop *L, Instruction *TI, 1429 ArrayRef<Instruction *> ToDuplicate) { 1430 Function *F = LoopHeader->getParent(); 1431 LLVM_DEBUG(dbgs() << "loop-unswitch: Unswitching loop %" 1432 << LoopHeader->getName() << " [" << L->getBlocks().size() 1433 << " blocks] in Function " << F->getName() << " when '" 1434 << *Val << "' == " << *LIC << "\n"); 1435 1436 // We are going to make essential changes to CFG. This may invalidate cached 1437 // information for L or one of its parent loops in SCEV. 1438 if (auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>()) 1439 SEWP->getSE().forgetTopmostLoop(L); 1440 1441 LoopBlocks.clear(); 1442 NewBlocks.clear(); 1443 1444 if (MSSAU && VerifyMemorySSA) 1445 MSSA->verifyMemorySSA(); 1446 1447 // First step, split the preheader and exit blocks, and add these blocks to 1448 // the LoopBlocks list. 1449 BasicBlock *NewPreheader = 1450 SplitEdge(LoopPreheader, LoopHeader, DT, LI, MSSAU.get()); 1451 LoopBlocks.push_back(NewPreheader); 1452 1453 // We want the loop to come after the preheader, but before the exit blocks. 1454 llvm::append_range(LoopBlocks, L->blocks()); 1455 1456 SmallVector<BasicBlock*, 8> ExitBlocks; 1457 L->getUniqueExitBlocks(ExitBlocks); 1458 1459 // Split all of the edges from inside the loop to their exit blocks. Update 1460 // the appropriate Phi nodes as we do so. 1461 splitExitEdges(L, ExitBlocks); 1462 1463 // The exit blocks may have been changed due to edge splitting, recompute. 1464 ExitBlocks.clear(); 1465 L->getUniqueExitBlocks(ExitBlocks); 1466 1467 // Add exit blocks to the loop blocks. 1468 llvm::append_range(LoopBlocks, ExitBlocks); 1469 1470 // Next step, clone all of the basic blocks that make up the loop (including 1471 // the loop preheader and exit blocks), keeping track of the mapping between 1472 // the instructions and blocks. 1473 NewBlocks.reserve(LoopBlocks.size()); 1474 ValueToValueMapTy VMap; 1475 for (unsigned I = 0, E = LoopBlocks.size(); I != E; ++I) { 1476 BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[I], VMap, ".us", F); 1477 1478 NewBlocks.push_back(NewBB); 1479 VMap[LoopBlocks[I]] = NewBB; // Keep the BB mapping. 1480 } 1481 1482 // Splice the newly inserted blocks into the function right before the 1483 // original preheader. 1484 F->getBasicBlockList().splice(NewPreheader->getIterator(), 1485 F->getBasicBlockList(), 1486 NewBlocks[0]->getIterator(), F->end()); 1487 1488 // Now we create the new Loop object for the versioned loop. 1489 Loop *NewLoop = cloneLoop(L, L->getParentLoop(), VMap, LI, LPM); 1490 1491 // Recalculate unswitching quota, inherit simplified switches info for NewBB, 1492 // Probably clone more loop-unswitch related loop properties. 1493 BranchesInfo.cloneData(NewLoop, L, VMap); 1494 1495 Loop *ParentLoop = L->getParentLoop(); 1496 if (ParentLoop) { 1497 // Make sure to add the cloned preheader and exit blocks to the parent loop 1498 // as well. 1499 ParentLoop->addBasicBlockToLoop(NewBlocks[0], *LI); 1500 } 1501 1502 for (unsigned EBI = 0, EBE = ExitBlocks.size(); EBI != EBE; ++EBI) { 1503 BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[EBI]]); 1504 // The new exit block should be in the same loop as the old one. 1505 if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[EBI])) 1506 ExitBBLoop->addBasicBlockToLoop(NewExit, *LI); 1507 1508 assert(NewExit->getTerminator()->getNumSuccessors() == 1 && 1509 "Exit block should have been split to have one successor!"); 1510 BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0); 1511 1512 // If the successor of the exit block had PHI nodes, add an entry for 1513 // NewExit. 1514 for (PHINode &PN : ExitSucc->phis()) { 1515 Value *V = PN.getIncomingValueForBlock(ExitBlocks[EBI]); 1516 ValueToValueMapTy::iterator It = VMap.find(V); 1517 if (It != VMap.end()) V = It->second; 1518 PN.addIncoming(V, NewExit); 1519 } 1520 1521 if (LandingPadInst *LPad = NewExit->getLandingPadInst()) { 1522 PHINode *PN = PHINode::Create(LPad->getType(), 0, "", 1523 &*ExitSucc->getFirstInsertionPt()); 1524 1525 for (pred_iterator I = pred_begin(ExitSucc), E = pred_end(ExitSucc); 1526 I != E; ++I) { 1527 BasicBlock *BB = *I; 1528 LandingPadInst *LPI = BB->getLandingPadInst(); 1529 LPI->replaceAllUsesWith(PN); 1530 PN->addIncoming(LPI, BB); 1531 } 1532 } 1533 } 1534 1535 // Rewrite the code to refer to itself. 1536 for (unsigned NBI = 0, NBE = NewBlocks.size(); NBI != NBE; ++NBI) { 1537 for (Instruction &I : *NewBlocks[NBI]) { 1538 RemapInstruction(&I, VMap, 1539 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 1540 if (auto *II = dyn_cast<IntrinsicInst>(&I)) 1541 if (II->getIntrinsicID() == Intrinsic::assume) 1542 AC->registerAssumption(II); 1543 } 1544 } 1545 1546 // Rewrite the original preheader to select between versions of the loop. 1547 BranchInst *OldBR = cast<BranchInst>(LoopPreheader->getTerminator()); 1548 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] && 1549 "Preheader splitting did not work correctly!"); 1550 1551 if (MSSAU) { 1552 // Update MemorySSA after cloning, and before splitting to unreachables, 1553 // since that invalidates the 1:1 mapping of clones in VMap. 1554 LoopBlocksRPO LBRPO(L); 1555 LBRPO.perform(LI); 1556 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, VMap); 1557 } 1558 1559 // Emit the new branch that selects between the two versions of this loop. 1560 emitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR, 1561 TI, ToDuplicate); 1562 if (MSSAU) { 1563 // Update MemoryPhis in Exit blocks. 1564 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMap, *DT); 1565 if (VerifyMemorySSA) 1566 MSSA->verifyMemorySSA(); 1567 } 1568 1569 // The OldBr was replaced by a new one and removed (but not erased) by 1570 // emitPreheaderBranchOnCondition. It is no longer needed, so delete it. 1571 delete OldBR; 1572 1573 LoopProcessWorklist.push_back(NewLoop); 1574 RedoLoop = true; 1575 1576 // Keep a WeakTrackingVH holding onto LIC. If the first call to 1577 // RewriteLoopBody 1578 // deletes the instruction (for example by simplifying a PHI that feeds into 1579 // the condition that we're unswitching on), we don't rewrite the second 1580 // iteration. 1581 WeakTrackingVH LICHandle(LIC); 1582 1583 if (ToDuplicate.empty()) { 1584 // Now we rewrite the original code to know that the condition is true and 1585 // the new code to know that the condition is false. 1586 rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false); 1587 1588 // It's possible that simplifying one loop could cause the other to be 1589 // changed to another value or a constant. If its a constant, don't 1590 // simplify it. 1591 if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop && 1592 LICHandle && !isa<Constant>(LICHandle)) 1593 rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, 1594 /*IsEqual=*/true); 1595 } else { 1596 // Partial unswitching. Update the condition in the right loop with the 1597 // constant. 1598 auto *CC = cast<ConstantInt>(Val); 1599 if (CC->isOneValue()) { 1600 rewriteLoopBodyWithConditionConstant(NewLoop, VMap[LIC], Val, 1601 /*IsEqual=*/true); 1602 } else 1603 rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/true); 1604 1605 // Mark the new loop as partially unswitched, to avoid unswitching on the 1606 // same condition again. 1607 auto &Context = NewLoop->getHeader()->getContext(); 1608 MDNode *DisableUnswitchMD = MDNode::get( 1609 Context, MDString::get(Context, "llvm.loop.unswitch.partial.disable")); 1610 MDNode *NewLoopID = makePostTransformationMetadata( 1611 Context, L->getLoopID(), {"llvm.loop.unswitch.partial"}, 1612 {DisableUnswitchMD}); 1613 NewLoop->setLoopID(NewLoopID); 1614 } 1615 1616 if (MSSA && VerifyMemorySSA) 1617 MSSA->verifyMemorySSA(); 1618 } 1619 1620 /// Remove all instances of I from the worklist vector specified. 1621 static void removeFromWorklist(Instruction *I, 1622 std::vector<Instruction *> &Worklist) { 1623 llvm::erase_value(Worklist, I); 1624 } 1625 1626 /// When we find that I really equals V, remove I from the 1627 /// program, replacing all uses with V and update the worklist. 1628 static void replaceUsesOfWith(Instruction *I, Value *V, 1629 std::vector<Instruction *> &Worklist, Loop *L, 1630 LPPassManager *LPM, MemorySSAUpdater *MSSAU) { 1631 LLVM_DEBUG(dbgs() << "Replace with '" << *V << "': " << *I << "\n"); 1632 1633 // Add uses to the worklist, which may be dead now. 1634 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) 1635 if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i))) 1636 Worklist.push_back(Use); 1637 1638 // Add users to the worklist which may be simplified now. 1639 for (User *U : I->users()) 1640 Worklist.push_back(cast<Instruction>(U)); 1641 removeFromWorklist(I, Worklist); 1642 I->replaceAllUsesWith(V); 1643 if (!I->mayHaveSideEffects()) { 1644 if (MSSAU) 1645 MSSAU->removeMemoryAccess(I); 1646 I->eraseFromParent(); 1647 } 1648 ++NumSimplify; 1649 } 1650 1651 /// We know either that the value LIC has the value specified by Val in the 1652 /// specified loop, or we know it does NOT have that value. 1653 /// Rewrite any uses of LIC or of properties correlated to it. 1654 void LoopUnswitch::rewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, 1655 Constant *Val, 1656 bool IsEqual) { 1657 assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?"); 1658 1659 // FIXME: Support correlated properties, like: 1660 // for (...) 1661 // if (li1 < li2) 1662 // ... 1663 // if (li1 > li2) 1664 // ... 1665 1666 // FOLD boolean conditions (X|LIC), (X&LIC). Fold conditional branches, 1667 // selects, switches. 1668 std::vector<Instruction*> Worklist; 1669 LLVMContext &Context = Val->getContext(); 1670 1671 // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC 1672 // in the loop with the appropriate one directly. 1673 if (IsEqual || (isa<ConstantInt>(Val) && 1674 Val->getType()->isIntegerTy(1))) { 1675 Value *Replacement; 1676 if (IsEqual) 1677 Replacement = Val; 1678 else 1679 Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()), 1680 !cast<ConstantInt>(Val)->getZExtValue()); 1681 1682 for (User *U : LIC->users()) { 1683 Instruction *UI = dyn_cast<Instruction>(U); 1684 if (!UI || !L->contains(UI)) 1685 continue; 1686 Worklist.push_back(UI); 1687 } 1688 1689 for (Instruction *UI : Worklist) 1690 UI->replaceUsesOfWith(LIC, Replacement); 1691 1692 simplifyCode(Worklist, L); 1693 return; 1694 } 1695 1696 // Otherwise, we don't know the precise value of LIC, but we do know that it 1697 // is certainly NOT "Val". As such, simplify any uses in the loop that we 1698 // can. This case occurs when we unswitch switch statements. 1699 for (User *U : LIC->users()) { 1700 Instruction *UI = dyn_cast<Instruction>(U); 1701 if (!UI || !L->contains(UI)) 1702 continue; 1703 1704 // At this point, we know LIC is definitely not Val. Try to use some simple 1705 // logic to simplify the user w.r.t. to the context. 1706 if (Value *Replacement = simplifyInstructionWithNotEqual(UI, LIC, Val)) { 1707 if (LI->replacementPreservesLCSSAForm(UI, Replacement)) { 1708 // This in-loop instruction has been simplified w.r.t. its context, 1709 // i.e. LIC != Val, make sure we propagate its replacement value to 1710 // all its users. 1711 // 1712 // We can not yet delete UI, the LIC user, yet, because that would invalidate 1713 // the LIC->users() iterator !. However, we can make this instruction 1714 // dead by replacing all its users and push it onto the worklist so that 1715 // it can be properly deleted and its operands simplified. 1716 UI->replaceAllUsesWith(Replacement); 1717 } 1718 } 1719 1720 // This is a LIC user, push it into the worklist so that simplifyCode can 1721 // attempt to simplify it. 1722 Worklist.push_back(UI); 1723 1724 // If we know that LIC is not Val, use this info to simplify code. 1725 SwitchInst *SI = dyn_cast<SwitchInst>(UI); 1726 if (!SI || !isa<ConstantInt>(Val)) continue; 1727 1728 // NOTE: if a case value for the switch is unswitched out, we record it 1729 // after the unswitch finishes. We can not record it here as the switch 1730 // is not a direct user of the partial LIV. 1731 SwitchInst::CaseHandle DeadCase = 1732 *SI->findCaseValue(cast<ConstantInt>(Val)); 1733 // Default case is live for multiple values. 1734 if (DeadCase == *SI->case_default()) 1735 continue; 1736 1737 // Found a dead case value. Don't remove PHI nodes in the 1738 // successor if they become single-entry, those PHI nodes may 1739 // be in the Users list. 1740 1741 BasicBlock *Switch = SI->getParent(); 1742 BasicBlock *SISucc = DeadCase.getCaseSuccessor(); 1743 BasicBlock *Latch = L->getLoopLatch(); 1744 1745 if (!SI->findCaseDest(SISucc)) continue; // Edge is critical. 1746 // If the DeadCase successor dominates the loop latch, then the 1747 // transformation isn't safe since it will delete the sole predecessor edge 1748 // to the latch. 1749 if (Latch && DT->dominates(SISucc, Latch)) 1750 continue; 1751 1752 // FIXME: This is a hack. We need to keep the successor around 1753 // and hooked up so as to preserve the loop structure, because 1754 // trying to update it is complicated. So instead we preserve the 1755 // loop structure and put the block on a dead code path. 1756 SplitEdge(Switch, SISucc, DT, LI, MSSAU.get()); 1757 // Compute the successors instead of relying on the return value 1758 // of SplitEdge, since it may have split the switch successor 1759 // after PHI nodes. 1760 BasicBlock *NewSISucc = DeadCase.getCaseSuccessor(); 1761 BasicBlock *OldSISucc = *succ_begin(NewSISucc); 1762 // Create an "unreachable" destination. 1763 BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable", 1764 Switch->getParent(), 1765 OldSISucc); 1766 new UnreachableInst(Context, Abort); 1767 // Force the new case destination to branch to the "unreachable" 1768 // block while maintaining a (dead) CFG edge to the old block. 1769 NewSISucc->getTerminator()->eraseFromParent(); 1770 BranchInst::Create(Abort, OldSISucc, 1771 ConstantInt::getTrue(Context), NewSISucc); 1772 // Release the PHI operands for this edge. 1773 for (PHINode &PN : NewSISucc->phis()) 1774 PN.setIncomingValueForBlock(Switch, UndefValue::get(PN.getType())); 1775 // Tell the domtree about the new block. We don't fully update the 1776 // domtree here -- instead we force it to do a full recomputation 1777 // after the pass is complete -- but we do need to inform it of 1778 // new blocks. 1779 DT->addNewBlock(Abort, NewSISucc); 1780 } 1781 1782 simplifyCode(Worklist, L); 1783 } 1784 1785 /// Now that we have simplified some instructions in the loop, walk over it and 1786 /// constant prop, dce, and fold control flow where possible. Note that this is 1787 /// effectively a very simple loop-structure-aware optimizer. During processing 1788 /// of this loop, L could very well be deleted, so it must not be used. 1789 /// 1790 /// FIXME: When the loop optimizer is more mature, separate this out to a new 1791 /// pass. 1792 /// 1793 void LoopUnswitch::simplifyCode(std::vector<Instruction *> &Worklist, Loop *L) { 1794 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 1795 while (!Worklist.empty()) { 1796 Instruction *I = Worklist.back(); 1797 Worklist.pop_back(); 1798 1799 // Simple DCE. 1800 if (isInstructionTriviallyDead(I)) { 1801 LLVM_DEBUG(dbgs() << "Remove dead instruction '" << *I << "\n"); 1802 1803 // Add uses to the worklist, which may be dead now. 1804 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) 1805 if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i))) 1806 Worklist.push_back(Use); 1807 removeFromWorklist(I, Worklist); 1808 if (MSSAU) 1809 MSSAU->removeMemoryAccess(I); 1810 I->eraseFromParent(); 1811 ++NumSimplify; 1812 continue; 1813 } 1814 1815 // See if instruction simplification can hack this up. This is common for 1816 // things like "select false, X, Y" after unswitching made the condition be 1817 // 'false'. TODO: update the domtree properly so we can pass it here. 1818 if (Value *V = SimplifyInstruction(I, DL)) 1819 if (LI->replacementPreservesLCSSAForm(I, V)) { 1820 replaceUsesOfWith(I, V, Worklist, L, LPM, MSSAU.get()); 1821 continue; 1822 } 1823 1824 // Special case hacks that appear commonly in unswitched code. 1825 if (BranchInst *BI = dyn_cast<BranchInst>(I)) { 1826 if (BI->isUnconditional()) { 1827 // If BI's parent is the only pred of the successor, fold the two blocks 1828 // together. 1829 BasicBlock *Pred = BI->getParent(); 1830 (void)Pred; 1831 BasicBlock *Succ = BI->getSuccessor(0); 1832 BasicBlock *SinglePred = Succ->getSinglePredecessor(); 1833 if (!SinglePred) continue; // Nothing to do. 1834 assert(SinglePred == Pred && "CFG broken"); 1835 1836 // Make the LPM and Worklist updates specific to LoopUnswitch. 1837 removeFromWorklist(BI, Worklist); 1838 auto SuccIt = Succ->begin(); 1839 while (PHINode *PN = dyn_cast<PHINode>(SuccIt++)) { 1840 for (unsigned It = 0, E = PN->getNumOperands(); It != E; ++It) 1841 if (Instruction *Use = dyn_cast<Instruction>(PN->getOperand(It))) 1842 Worklist.push_back(Use); 1843 for (User *U : PN->users()) 1844 Worklist.push_back(cast<Instruction>(U)); 1845 removeFromWorklist(PN, Worklist); 1846 ++NumSimplify; 1847 } 1848 // Merge the block and make the remaining analyses updates. 1849 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); 1850 MergeBlockIntoPredecessor(Succ, &DTU, LI, MSSAU.get()); 1851 ++NumSimplify; 1852 continue; 1853 } 1854 1855 continue; 1856 } 1857 } 1858 } 1859 1860 /// Simple simplifications we can do given the information that Cond is 1861 /// definitely not equal to Val. 1862 Value *LoopUnswitch::simplifyInstructionWithNotEqual(Instruction *Inst, 1863 Value *Invariant, 1864 Constant *Val) { 1865 // icmp eq cond, val -> false 1866 ICmpInst *CI = dyn_cast<ICmpInst>(Inst); 1867 if (CI && CI->isEquality()) { 1868 Value *Op0 = CI->getOperand(0); 1869 Value *Op1 = CI->getOperand(1); 1870 if ((Op0 == Invariant && Op1 == Val) || (Op0 == Val && Op1 == Invariant)) { 1871 LLVMContext &Ctx = Inst->getContext(); 1872 if (CI->getPredicate() == CmpInst::ICMP_EQ) 1873 return ConstantInt::getFalse(Ctx); 1874 else 1875 return ConstantInt::getTrue(Ctx); 1876 } 1877 } 1878 1879 // FIXME: there may be other opportunities, e.g. comparison with floating 1880 // point, or Invariant - Val != 0, etc. 1881 return nullptr; 1882 } 1883