176aa662cSKarthik Bhat //===-- LoopUtils.cpp - Loop Utility functions -------------------------===// 276aa662cSKarthik Bhat // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 676aa662cSKarthik Bhat // 776aa662cSKarthik Bhat //===----------------------------------------------------------------------===// 876aa662cSKarthik Bhat // 976aa662cSKarthik Bhat // This file defines common loop utility functions. 1076aa662cSKarthik Bhat // 1176aa662cSKarthik Bhat //===----------------------------------------------------------------------===// 1276aa662cSKarthik Bhat 132f2bd8caSAdam Nemet #include "llvm/Transforms/Utils/LoopUtils.h" 147a87e8f9SFlorian Hahn #include "llvm/ADT/DenseSet.h" 157a87e8f9SFlorian Hahn #include "llvm/ADT/Optional.h" 167a87e8f9SFlorian Hahn #include "llvm/ADT/PriorityWorklist.h" 174a000883SChandler Carruth #include "llvm/ADT/ScopeExit.h" 187a87e8f9SFlorian Hahn #include "llvm/ADT/SetVector.h" 197a87e8f9SFlorian Hahn #include "llvm/ADT/SmallPtrSet.h" 207a87e8f9SFlorian Hahn #include "llvm/ADT/SmallVector.h" 2131088a9dSChandler Carruth #include "llvm/Analysis/AliasAnalysis.h" 2231088a9dSChandler Carruth #include "llvm/Analysis/BasicAliasAnalysis.h" 235f436fc5SRichard Trieu #include "llvm/Analysis/DomTreeUpdater.h" 2431088a9dSChandler Carruth #include "llvm/Analysis/GlobalsModRef.h" 25a21d5f1eSPhilip Reames #include "llvm/Analysis/InstructionSimplify.h" 26616657b3SFlorian Hahn #include "llvm/Analysis/LoopAccessAnalysis.h" 272f2bd8caSAdam Nemet #include "llvm/Analysis/LoopInfo.h" 28c3ccf5d7SIgor Laevsky #include "llvm/Analysis/LoopPass.h" 296da79ce1SAlina Sbirlea #include "llvm/Analysis/MemorySSA.h" 3097468e92SAlina Sbirlea #include "llvm/Analysis/MemorySSAUpdater.h" 3123aed5efSPhilip Reames #include "llvm/Analysis/MustExecute.h" 3245d4cb9aSWeiming Zhao #include "llvm/Analysis/ScalarEvolution.h" 332f2bd8caSAdam Nemet #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 3445d4cb9aSWeiming Zhao #include "llvm/Analysis/ScalarEvolutionExpressions.h" 356bda14b3SChandler Carruth #include "llvm/Analysis/TargetTransformInfo.h" 36a097bc69SChad Rosier #include "llvm/Analysis/ValueTracking.h" 37744c3c32SDavide Italiano #include "llvm/IR/DIBuilder.h" 3831088a9dSChandler Carruth #include "llvm/IR/Dominators.h" 3976aa662cSKarthik Bhat #include "llvm/IR/Instructions.h" 40744c3c32SDavide Italiano #include "llvm/IR/IntrinsicInst.h" 41af7e1588SEvgeniy Brevnov #include "llvm/IR/MDBuilder.h" 4245d4cb9aSWeiming Zhao #include "llvm/IR/Module.h" 437a87e8f9SFlorian Hahn #include "llvm/IR/Operator.h" 4476aa662cSKarthik Bhat #include "llvm/IR/PatternMatch.h" 4576aa662cSKarthik Bhat #include "llvm/IR/ValueHandle.h" 4605da2fe5SReid Kleckner #include "llvm/InitializePasses.h" 4731088a9dSChandler Carruth #include "llvm/Pass.h" 4876aa662cSKarthik Bhat #include "llvm/Support/Debug.h" 49a097bc69SChad Rosier #include "llvm/Support/KnownBits.h" 504a000883SChandler Carruth #include "llvm/Transforms/Utils/BasicBlockUtils.h" 5193175a5cSSjoerd Meijer #include "llvm/Transforms/Utils/Local.h" 52bcbd26bfSFlorian Hahn #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 5376aa662cSKarthik Bhat 5476aa662cSKarthik Bhat using namespace llvm; 5576aa662cSKarthik Bhat using namespace llvm::PatternMatch; 5676aa662cSKarthik Bhat 5776aa662cSKarthik Bhat #define DEBUG_TYPE "loop-utils" 5876aa662cSKarthik Bhat 5972448525SMichael Kruse static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced"; 604f64f1baSTim Corringham static const char *LLVMLoopDisableLICM = "llvm.licm.disable"; 6172448525SMichael Kruse 624a000883SChandler Carruth bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, 6397468e92SAlina Sbirlea MemorySSAUpdater *MSSAU, 644a000883SChandler Carruth bool PreserveLCSSA) { 654a000883SChandler Carruth bool Changed = false; 664a000883SChandler Carruth 674a000883SChandler Carruth // We re-use a vector for the in-loop predecesosrs. 684a000883SChandler Carruth SmallVector<BasicBlock *, 4> InLoopPredecessors; 694a000883SChandler Carruth 704a000883SChandler Carruth auto RewriteExit = [&](BasicBlock *BB) { 714a000883SChandler Carruth assert(InLoopPredecessors.empty() && 724a000883SChandler Carruth "Must start with an empty predecessors list!"); 734a000883SChandler Carruth auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); }); 744a000883SChandler Carruth 754a000883SChandler Carruth // See if there are any non-loop predecessors of this exit block and 764a000883SChandler Carruth // keep track of the in-loop predecessors. 774a000883SChandler Carruth bool IsDedicatedExit = true; 784a000883SChandler Carruth for (auto *PredBB : predecessors(BB)) 794a000883SChandler Carruth if (L->contains(PredBB)) { 804a000883SChandler Carruth if (isa<IndirectBrInst>(PredBB->getTerminator())) 814a000883SChandler Carruth // We cannot rewrite exiting edges from an indirectbr. 824a000883SChandler Carruth return false; 83784929d0SCraig Topper if (isa<CallBrInst>(PredBB->getTerminator())) 84784929d0SCraig Topper // We cannot rewrite exiting edges from a callbr. 85784929d0SCraig Topper return false; 864a000883SChandler Carruth 874a000883SChandler Carruth InLoopPredecessors.push_back(PredBB); 884a000883SChandler Carruth } else { 894a000883SChandler Carruth IsDedicatedExit = false; 904a000883SChandler Carruth } 914a000883SChandler Carruth 924a000883SChandler Carruth assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!"); 934a000883SChandler Carruth 944a000883SChandler Carruth // Nothing to do if this is already a dedicated exit. 954a000883SChandler Carruth if (IsDedicatedExit) 964a000883SChandler Carruth return false; 974a000883SChandler Carruth 984a000883SChandler Carruth auto *NewExitBB = SplitBlockPredecessors( 9997468e92SAlina Sbirlea BB, InLoopPredecessors, ".loopexit", DT, LI, MSSAU, PreserveLCSSA); 1004a000883SChandler Carruth 1014a000883SChandler Carruth if (!NewExitBB) 102d34e60caSNicola Zaghen LLVM_DEBUG( 103d34e60caSNicola Zaghen dbgs() << "WARNING: Can't create a dedicated exit block for loop: " 1044a000883SChandler Carruth << *L << "\n"); 1054a000883SChandler Carruth else 106d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " 1074a000883SChandler Carruth << NewExitBB->getName() << "\n"); 1084a000883SChandler Carruth return true; 1094a000883SChandler Carruth }; 1104a000883SChandler Carruth 1114a000883SChandler Carruth // Walk the exit blocks directly rather than building up a data structure for 1124a000883SChandler Carruth // them, but only visit each one once. 1134a000883SChandler Carruth SmallPtrSet<BasicBlock *, 4> Visited; 1144a000883SChandler Carruth for (auto *BB : L->blocks()) 1154a000883SChandler Carruth for (auto *SuccBB : successors(BB)) { 1164a000883SChandler Carruth // We're looking for exit blocks so skip in-loop successors. 1174a000883SChandler Carruth if (L->contains(SuccBB)) 1184a000883SChandler Carruth continue; 1194a000883SChandler Carruth 1204a000883SChandler Carruth // Visit each exit block exactly once. 1214a000883SChandler Carruth if (!Visited.insert(SuccBB).second) 1224a000883SChandler Carruth continue; 1234a000883SChandler Carruth 1244a000883SChandler Carruth Changed |= RewriteExit(SuccBB); 1254a000883SChandler Carruth } 1264a000883SChandler Carruth 1274a000883SChandler Carruth return Changed; 1284a000883SChandler Carruth } 1294a000883SChandler Carruth 1305f8f34e4SAdrian Prantl /// Returns the instructions that use values defined in the loop. 131c5b7b555SAshutosh Nema SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) { 132c5b7b555SAshutosh Nema SmallVector<Instruction *, 8> UsedOutside; 133c5b7b555SAshutosh Nema 134c5b7b555SAshutosh Nema for (auto *Block : L->getBlocks()) 135c5b7b555SAshutosh Nema // FIXME: I believe that this could use copy_if if the Inst reference could 136c5b7b555SAshutosh Nema // be adapted into a pointer. 137c5b7b555SAshutosh Nema for (auto &Inst : *Block) { 138c5b7b555SAshutosh Nema auto Users = Inst.users(); 1390a16c228SDavid Majnemer if (any_of(Users, [&](User *U) { 140c5b7b555SAshutosh Nema auto *Use = cast<Instruction>(U); 141c5b7b555SAshutosh Nema return !L->contains(Use->getParent()); 142c5b7b555SAshutosh Nema })) 143c5b7b555SAshutosh Nema UsedOutside.push_back(&Inst); 144c5b7b555SAshutosh Nema } 145c5b7b555SAshutosh Nema 146c5b7b555SAshutosh Nema return UsedOutside; 147c5b7b555SAshutosh Nema } 14831088a9dSChandler Carruth 14931088a9dSChandler Carruth void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) { 15031088a9dSChandler Carruth // By definition, all loop passes need the LoopInfo analysis and the 15131088a9dSChandler Carruth // Dominator tree it depends on. Because they all participate in the loop 15231088a9dSChandler Carruth // pass manager, they must also preserve these. 15331088a9dSChandler Carruth AU.addRequired<DominatorTreeWrapperPass>(); 15431088a9dSChandler Carruth AU.addPreserved<DominatorTreeWrapperPass>(); 15531088a9dSChandler Carruth AU.addRequired<LoopInfoWrapperPass>(); 15631088a9dSChandler Carruth AU.addPreserved<LoopInfoWrapperPass>(); 15731088a9dSChandler Carruth 15831088a9dSChandler Carruth // We must also preserve LoopSimplify and LCSSA. We locally access their IDs 15931088a9dSChandler Carruth // here because users shouldn't directly get them from this header. 16031088a9dSChandler Carruth extern char &LoopSimplifyID; 16131088a9dSChandler Carruth extern char &LCSSAID; 16231088a9dSChandler Carruth AU.addRequiredID(LoopSimplifyID); 16331088a9dSChandler Carruth AU.addPreservedID(LoopSimplifyID); 16431088a9dSChandler Carruth AU.addRequiredID(LCSSAID); 16531088a9dSChandler Carruth AU.addPreservedID(LCSSAID); 166c3ccf5d7SIgor Laevsky // This is used in the LPPassManager to perform LCSSA verification on passes 167c3ccf5d7SIgor Laevsky // which preserve lcssa form 168c3ccf5d7SIgor Laevsky AU.addRequired<LCSSAVerificationPass>(); 169c3ccf5d7SIgor Laevsky AU.addPreserved<LCSSAVerificationPass>(); 17031088a9dSChandler Carruth 17131088a9dSChandler Carruth // Loop passes are designed to run inside of a loop pass manager which means 17231088a9dSChandler Carruth // that any function analyses they require must be required by the first loop 17331088a9dSChandler Carruth // pass in the manager (so that it is computed before the loop pass manager 17431088a9dSChandler Carruth // runs) and preserved by all loop pasess in the manager. To make this 17531088a9dSChandler Carruth // reasonably robust, the set needed for most loop passes is maintained here. 17631088a9dSChandler Carruth // If your loop pass requires an analysis not listed here, you will need to 17731088a9dSChandler Carruth // carefully audit the loop pass manager nesting structure that results. 17831088a9dSChandler Carruth AU.addRequired<AAResultsWrapperPass>(); 17931088a9dSChandler Carruth AU.addPreserved<AAResultsWrapperPass>(); 18031088a9dSChandler Carruth AU.addPreserved<BasicAAWrapperPass>(); 18131088a9dSChandler Carruth AU.addPreserved<GlobalsAAWrapperPass>(); 18231088a9dSChandler Carruth AU.addPreserved<SCEVAAWrapperPass>(); 18331088a9dSChandler Carruth AU.addRequired<ScalarEvolutionWrapperPass>(); 18431088a9dSChandler Carruth AU.addPreserved<ScalarEvolutionWrapperPass>(); 1856da79ce1SAlina Sbirlea // FIXME: When all loop passes preserve MemorySSA, it can be required and 1866da79ce1SAlina Sbirlea // preserved here instead of the individual handling in each pass. 18731088a9dSChandler Carruth } 18831088a9dSChandler Carruth 18931088a9dSChandler Carruth /// Manually defined generic "LoopPass" dependency initialization. This is used 19031088a9dSChandler Carruth /// to initialize the exact set of passes from above in \c 19131088a9dSChandler Carruth /// getLoopAnalysisUsage. It can be used within a loop pass's initialization 19231088a9dSChandler Carruth /// with: 19331088a9dSChandler Carruth /// 19431088a9dSChandler Carruth /// INITIALIZE_PASS_DEPENDENCY(LoopPass) 19531088a9dSChandler Carruth /// 19631088a9dSChandler Carruth /// As-if "LoopPass" were a pass. 19731088a9dSChandler Carruth void llvm::initializeLoopPassPass(PassRegistry &Registry) { 19831088a9dSChandler Carruth INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 19931088a9dSChandler Carruth INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 20031088a9dSChandler Carruth INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 201e12c487bSEaswaran Raman INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) 20231088a9dSChandler Carruth INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 20331088a9dSChandler Carruth INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) 20431088a9dSChandler Carruth INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 20531088a9dSChandler Carruth INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) 20631088a9dSChandler Carruth INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 2076da79ce1SAlina Sbirlea INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 20831088a9dSChandler Carruth } 209963341c8SAdam Nemet 2103c3a7652SSerguei Katkov /// Create MDNode for input string. 2113c3a7652SSerguei Katkov static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) { 2123c3a7652SSerguei Katkov LLVMContext &Context = TheLoop->getHeader()->getContext(); 2133c3a7652SSerguei Katkov Metadata *MDs[] = { 2143c3a7652SSerguei Katkov MDString::get(Context, Name), 2153c3a7652SSerguei Katkov ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))}; 2163c3a7652SSerguei Katkov return MDNode::get(Context, MDs); 2173c3a7652SSerguei Katkov } 2183c3a7652SSerguei Katkov 2193c3a7652SSerguei Katkov /// Set input string into loop metadata by keeping other values intact. 2207f8c8095SSerguei Katkov /// If the string is already in loop metadata update value if it is 2217f8c8095SSerguei Katkov /// different. 2227f8c8095SSerguei Katkov void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD, 2233c3a7652SSerguei Katkov unsigned V) { 2243c3a7652SSerguei Katkov SmallVector<Metadata *, 4> MDs(1); 2253c3a7652SSerguei Katkov // If the loop already has metadata, retain it. 2263c3a7652SSerguei Katkov MDNode *LoopID = TheLoop->getLoopID(); 2273c3a7652SSerguei Katkov if (LoopID) { 2283c3a7652SSerguei Katkov for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { 2293c3a7652SSerguei Katkov MDNode *Node = cast<MDNode>(LoopID->getOperand(i)); 2307f8c8095SSerguei Katkov // If it is of form key = value, try to parse it. 2317f8c8095SSerguei Katkov if (Node->getNumOperands() == 2) { 2327f8c8095SSerguei Katkov MDString *S = dyn_cast<MDString>(Node->getOperand(0)); 2337f8c8095SSerguei Katkov if (S && S->getString().equals(StringMD)) { 2347f8c8095SSerguei Katkov ConstantInt *IntMD = 2357f8c8095SSerguei Katkov mdconst::extract_or_null<ConstantInt>(Node->getOperand(1)); 2367f8c8095SSerguei Katkov if (IntMD && IntMD->getSExtValue() == V) 2377f8c8095SSerguei Katkov // It is already in place. Do nothing. 2387f8c8095SSerguei Katkov return; 2397f8c8095SSerguei Katkov // We need to update the value, so just skip it here and it will 2407f8c8095SSerguei Katkov // be added after copying other existed nodes. 2417f8c8095SSerguei Katkov continue; 2427f8c8095SSerguei Katkov } 2437f8c8095SSerguei Katkov } 2443c3a7652SSerguei Katkov MDs.push_back(Node); 2453c3a7652SSerguei Katkov } 2463c3a7652SSerguei Katkov } 2473c3a7652SSerguei Katkov // Add new metadata. 2487f8c8095SSerguei Katkov MDs.push_back(createStringMetadata(TheLoop, StringMD, V)); 2493c3a7652SSerguei Katkov // Replace current metadata node with new one. 2503c3a7652SSerguei Katkov LLVMContext &Context = TheLoop->getHeader()->getContext(); 2513c3a7652SSerguei Katkov MDNode *NewLoopID = MDNode::get(Context, MDs); 2523c3a7652SSerguei Katkov // Set operand 0 to refer to the loop id itself. 2533c3a7652SSerguei Katkov NewLoopID->replaceOperandWith(0, NewLoopID); 2543c3a7652SSerguei Katkov TheLoop->setLoopID(NewLoopID); 2553c3a7652SSerguei Katkov } 2563c3a7652SSerguei Katkov 25771bd59f0SDavid Sherwood Optional<ElementCount> 258ddb3b26aSBardia Mahjour llvm::getOptionalElementCountLoopAttribute(const Loop *TheLoop) { 25971bd59f0SDavid Sherwood Optional<int> Width = 26071bd59f0SDavid Sherwood getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width"); 26171bd59f0SDavid Sherwood 26271bd59f0SDavid Sherwood if (Width.hasValue()) { 26371bd59f0SDavid Sherwood Optional<int> IsScalable = getOptionalIntLoopAttribute( 26471bd59f0SDavid Sherwood TheLoop, "llvm.loop.vectorize.scalable.enable"); 2658a20e2b3SKazu Hirata return ElementCount::get(*Width, IsScalable.getValueOr(false)); 26671bd59f0SDavid Sherwood } 26771bd59f0SDavid Sherwood 26871bd59f0SDavid Sherwood return None; 26971bd59f0SDavid Sherwood } 27071bd59f0SDavid Sherwood 27172448525SMichael Kruse Optional<MDNode *> llvm::makeFollowupLoopID( 27272448525SMichael Kruse MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions, 27372448525SMichael Kruse const char *InheritOptionsExceptPrefix, bool AlwaysNew) { 27472448525SMichael Kruse if (!OrigLoopID) { 27572448525SMichael Kruse if (AlwaysNew) 27672448525SMichael Kruse return nullptr; 27772448525SMichael Kruse return None; 27872448525SMichael Kruse } 27972448525SMichael Kruse 28072448525SMichael Kruse assert(OrigLoopID->getOperand(0) == OrigLoopID); 28172448525SMichael Kruse 28272448525SMichael Kruse bool InheritAllAttrs = !InheritOptionsExceptPrefix; 28372448525SMichael Kruse bool InheritSomeAttrs = 28472448525SMichael Kruse InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[0] != '\0'; 28572448525SMichael Kruse SmallVector<Metadata *, 8> MDs; 28672448525SMichael Kruse MDs.push_back(nullptr); 28772448525SMichael Kruse 28872448525SMichael Kruse bool Changed = false; 28972448525SMichael Kruse if (InheritAllAttrs || InheritSomeAttrs) { 290dc300bebSKazu Hirata for (const MDOperand &Existing : drop_begin(OrigLoopID->operands())) { 29172448525SMichael Kruse MDNode *Op = cast<MDNode>(Existing.get()); 29272448525SMichael Kruse 29372448525SMichael Kruse auto InheritThisAttribute = [InheritSomeAttrs, 29472448525SMichael Kruse InheritOptionsExceptPrefix](MDNode *Op) { 29572448525SMichael Kruse if (!InheritSomeAttrs) 29672448525SMichael Kruse return false; 29772448525SMichael Kruse 29872448525SMichael Kruse // Skip malformatted attribute metadata nodes. 29972448525SMichael Kruse if (Op->getNumOperands() == 0) 30072448525SMichael Kruse return true; 30172448525SMichael Kruse Metadata *NameMD = Op->getOperand(0).get(); 30272448525SMichael Kruse if (!isa<MDString>(NameMD)) 30372448525SMichael Kruse return true; 30472448525SMichael Kruse StringRef AttrName = cast<MDString>(NameMD)->getString(); 30572448525SMichael Kruse 30672448525SMichael Kruse // Do not inherit excluded attributes. 30772448525SMichael Kruse return !AttrName.startswith(InheritOptionsExceptPrefix); 30872448525SMichael Kruse }; 30972448525SMichael Kruse 31072448525SMichael Kruse if (InheritThisAttribute(Op)) 31172448525SMichael Kruse MDs.push_back(Op); 31272448525SMichael Kruse else 31372448525SMichael Kruse Changed = true; 31472448525SMichael Kruse } 31572448525SMichael Kruse } else { 31672448525SMichael Kruse // Modified if we dropped at least one attribute. 31772448525SMichael Kruse Changed = OrigLoopID->getNumOperands() > 1; 31872448525SMichael Kruse } 31972448525SMichael Kruse 32072448525SMichael Kruse bool HasAnyFollowup = false; 32172448525SMichael Kruse for (StringRef OptionName : FollowupOptions) { 322978ba615SMichael Kruse MDNode *FollowupNode = findOptionMDForLoopID(OrigLoopID, OptionName); 32372448525SMichael Kruse if (!FollowupNode) 32472448525SMichael Kruse continue; 32572448525SMichael Kruse 32672448525SMichael Kruse HasAnyFollowup = true; 327dc300bebSKazu Hirata for (const MDOperand &Option : drop_begin(FollowupNode->operands())) { 32872448525SMichael Kruse MDs.push_back(Option.get()); 32972448525SMichael Kruse Changed = true; 33072448525SMichael Kruse } 33172448525SMichael Kruse } 33272448525SMichael Kruse 33372448525SMichael Kruse // Attributes of the followup loop not specified explicity, so signal to the 33472448525SMichael Kruse // transformation pass to add suitable attributes. 33572448525SMichael Kruse if (!AlwaysNew && !HasAnyFollowup) 33672448525SMichael Kruse return None; 33772448525SMichael Kruse 33872448525SMichael Kruse // If no attributes were added or remove, the previous loop Id can be reused. 33972448525SMichael Kruse if (!AlwaysNew && !Changed) 34072448525SMichael Kruse return OrigLoopID; 34172448525SMichael Kruse 34272448525SMichael Kruse // No attributes is equivalent to having no !llvm.loop metadata at all. 34372448525SMichael Kruse if (MDs.size() == 1) 34472448525SMichael Kruse return nullptr; 34572448525SMichael Kruse 34672448525SMichael Kruse // Build the new loop ID. 34772448525SMichael Kruse MDTuple *FollowupLoopID = MDNode::get(OrigLoopID->getContext(), MDs); 34872448525SMichael Kruse FollowupLoopID->replaceOperandWith(0, FollowupLoopID); 34972448525SMichael Kruse return FollowupLoopID; 35072448525SMichael Kruse } 35172448525SMichael Kruse 35272448525SMichael Kruse bool llvm::hasDisableAllTransformsHint(const Loop *L) { 35372448525SMichael Kruse return getBooleanLoopAttribute(L, LLVMLoopDisableNonforced); 35472448525SMichael Kruse } 35572448525SMichael Kruse 3564f64f1baSTim Corringham bool llvm::hasDisableLICMTransformsHint(const Loop *L) { 3574f64f1baSTim Corringham return getBooleanLoopAttribute(L, LLVMLoopDisableLICM); 3584f64f1baSTim Corringham } 3594f64f1baSTim Corringham 360ddb3b26aSBardia Mahjour TransformationMode llvm::hasUnrollTransformation(const Loop *L) { 36172448525SMichael Kruse if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable")) 36272448525SMichael Kruse return TM_SuppressedByUser; 36372448525SMichael Kruse 36472448525SMichael Kruse Optional<int> Count = 36572448525SMichael Kruse getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count"); 36672448525SMichael Kruse if (Count.hasValue()) 36772448525SMichael Kruse return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser; 36872448525SMichael Kruse 36972448525SMichael Kruse if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable")) 37072448525SMichael Kruse return TM_ForcedByUser; 37172448525SMichael Kruse 37272448525SMichael Kruse if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full")) 37372448525SMichael Kruse return TM_ForcedByUser; 37472448525SMichael Kruse 37572448525SMichael Kruse if (hasDisableAllTransformsHint(L)) 37672448525SMichael Kruse return TM_Disable; 37772448525SMichael Kruse 37872448525SMichael Kruse return TM_Unspecified; 37972448525SMichael Kruse } 38072448525SMichael Kruse 381ddb3b26aSBardia Mahjour TransformationMode llvm::hasUnrollAndJamTransformation(const Loop *L) { 38272448525SMichael Kruse if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable")) 38372448525SMichael Kruse return TM_SuppressedByUser; 38472448525SMichael Kruse 38572448525SMichael Kruse Optional<int> Count = 38672448525SMichael Kruse getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count"); 38772448525SMichael Kruse if (Count.hasValue()) 38872448525SMichael Kruse return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser; 38972448525SMichael Kruse 39072448525SMichael Kruse if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable")) 39172448525SMichael Kruse return TM_ForcedByUser; 39272448525SMichael Kruse 39372448525SMichael Kruse if (hasDisableAllTransformsHint(L)) 39472448525SMichael Kruse return TM_Disable; 39572448525SMichael Kruse 39672448525SMichael Kruse return TM_Unspecified; 39772448525SMichael Kruse } 39872448525SMichael Kruse 399ddb3b26aSBardia Mahjour TransformationMode llvm::hasVectorizeTransformation(const Loop *L) { 40072448525SMichael Kruse Optional<bool> Enable = 40172448525SMichael Kruse getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable"); 40272448525SMichael Kruse 40372448525SMichael Kruse if (Enable == false) 40472448525SMichael Kruse return TM_SuppressedByUser; 40572448525SMichael Kruse 40671bd59f0SDavid Sherwood Optional<ElementCount> VectorizeWidth = 40771bd59f0SDavid Sherwood getOptionalElementCountLoopAttribute(L); 40872448525SMichael Kruse Optional<int> InterleaveCount = 40972448525SMichael Kruse getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count"); 41072448525SMichael Kruse 41172448525SMichael Kruse // 'Forcing' vector width and interleave count to one effectively disables 41272448525SMichael Kruse // this tranformation. 41371bd59f0SDavid Sherwood if (Enable == true && VectorizeWidth && VectorizeWidth->isScalar() && 41471bd59f0SDavid Sherwood InterleaveCount == 1) 41572448525SMichael Kruse return TM_SuppressedByUser; 41672448525SMichael Kruse 41772448525SMichael Kruse if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized")) 41872448525SMichael Kruse return TM_Disable; 41972448525SMichael Kruse 42070560a0aSMichael Kruse if (Enable == true) 42170560a0aSMichael Kruse return TM_ForcedByUser; 42270560a0aSMichael Kruse 42371bd59f0SDavid Sherwood if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1) 42472448525SMichael Kruse return TM_Disable; 42572448525SMichael Kruse 42671bd59f0SDavid Sherwood if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1) 42772448525SMichael Kruse return TM_Enable; 42872448525SMichael Kruse 42972448525SMichael Kruse if (hasDisableAllTransformsHint(L)) 43072448525SMichael Kruse return TM_Disable; 43172448525SMichael Kruse 43272448525SMichael Kruse return TM_Unspecified; 43372448525SMichael Kruse } 43472448525SMichael Kruse 435ddb3b26aSBardia Mahjour TransformationMode llvm::hasDistributeTransformation(const Loop *L) { 43672448525SMichael Kruse if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable")) 43772448525SMichael Kruse return TM_ForcedByUser; 43872448525SMichael Kruse 43972448525SMichael Kruse if (hasDisableAllTransformsHint(L)) 44072448525SMichael Kruse return TM_Disable; 44172448525SMichael Kruse 44272448525SMichael Kruse return TM_Unspecified; 44372448525SMichael Kruse } 44472448525SMichael Kruse 445ddb3b26aSBardia Mahjour TransformationMode llvm::hasLICMVersioningTransformation(const Loop *L) { 44672448525SMichael Kruse if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable")) 44772448525SMichael Kruse return TM_SuppressedByUser; 44872448525SMichael Kruse 44972448525SMichael Kruse if (hasDisableAllTransformsHint(L)) 45072448525SMichael Kruse return TM_Disable; 45172448525SMichael Kruse 45272448525SMichael Kruse return TM_Unspecified; 453963341c8SAdam Nemet } 454122f984aSEvgeniy Stepanov 4557ed5856aSAlina Sbirlea /// Does a BFS from a given node to all of its children inside a given loop. 4567ed5856aSAlina Sbirlea /// The returned vector of nodes includes the starting point. 4577ed5856aSAlina Sbirlea SmallVector<DomTreeNode *, 16> 4587ed5856aSAlina Sbirlea llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) { 4597ed5856aSAlina Sbirlea SmallVector<DomTreeNode *, 16> Worklist; 4607ed5856aSAlina Sbirlea auto AddRegionToWorklist = [&](DomTreeNode *DTN) { 4617ed5856aSAlina Sbirlea // Only include subregions in the top level loop. 4627ed5856aSAlina Sbirlea BasicBlock *BB = DTN->getBlock(); 4637ed5856aSAlina Sbirlea if (CurLoop->contains(BB)) 4647ed5856aSAlina Sbirlea Worklist.push_back(DTN); 4657ed5856aSAlina Sbirlea }; 4667ed5856aSAlina Sbirlea 4677ed5856aSAlina Sbirlea AddRegionToWorklist(N); 4687ed5856aSAlina Sbirlea 46976c5cb05SNicolai Hähnle for (size_t I = 0; I < Worklist.size(); I++) { 47076c5cb05SNicolai Hähnle for (DomTreeNode *Child : Worklist[I]->children()) 4717ed5856aSAlina Sbirlea AddRegionToWorklist(Child); 47276c5cb05SNicolai Hähnle } 4737ed5856aSAlina Sbirlea 4747ed5856aSAlina Sbirlea return Worklist; 4757ed5856aSAlina Sbirlea } 4767ed5856aSAlina Sbirlea 477efb130fcSAlina Sbirlea void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, 478efb130fcSAlina Sbirlea LoopInfo *LI, MemorySSA *MSSA) { 479899809d5SHans Wennborg assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!"); 480df3e71e0SMarcello Maggioni auto *Preheader = L->getLoopPreheader(); 481df3e71e0SMarcello Maggioni assert(Preheader && "Preheader should exist!"); 482df3e71e0SMarcello Maggioni 483efb130fcSAlina Sbirlea std::unique_ptr<MemorySSAUpdater> MSSAU; 484efb130fcSAlina Sbirlea if (MSSA) 485efb130fcSAlina Sbirlea MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 486efb130fcSAlina Sbirlea 487df3e71e0SMarcello Maggioni // Now that we know the removal is safe, remove the loop by changing the 488df3e71e0SMarcello Maggioni // branch from the preheader to go to the single exit block. 489df3e71e0SMarcello Maggioni // 490df3e71e0SMarcello Maggioni // Because we're deleting a large chunk of code at once, the sequence in which 491df3e71e0SMarcello Maggioni // we remove things is very important to avoid invalidation issues. 492df3e71e0SMarcello Maggioni 493df3e71e0SMarcello Maggioni // Tell ScalarEvolution that the loop is deleted. Do this before 494df3e71e0SMarcello Maggioni // deleting the loop so that ScalarEvolution can look at the loop 495df3e71e0SMarcello Maggioni // to determine what it needs to clean up. 496df3e71e0SMarcello Maggioni if (SE) 497df3e71e0SMarcello Maggioni SE->forgetLoop(L); 498df3e71e0SMarcello Maggioni 499df3e71e0SMarcello Maggioni auto *OldBr = dyn_cast<BranchInst>(Preheader->getTerminator()); 500df3e71e0SMarcello Maggioni assert(OldBr && "Preheader must end with a branch"); 501df3e71e0SMarcello Maggioni assert(OldBr->isUnconditional() && "Preheader must have a single successor"); 502df3e71e0SMarcello Maggioni // Connect the preheader to the exit block. Keep the old edge to the header 503df3e71e0SMarcello Maggioni // around to perform the dominator tree update in two separate steps 504df3e71e0SMarcello Maggioni // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge 505df3e71e0SMarcello Maggioni // preheader -> header. 506df3e71e0SMarcello Maggioni // 507df3e71e0SMarcello Maggioni // 508df3e71e0SMarcello Maggioni // 0. Preheader 1. Preheader 2. Preheader 509df3e71e0SMarcello Maggioni // | | | | 510df3e71e0SMarcello Maggioni // V | V | 511df3e71e0SMarcello Maggioni // Header <--\ | Header <--\ | Header <--\ 512df3e71e0SMarcello Maggioni // | | | | | | | | | | | 513df3e71e0SMarcello Maggioni // | V | | | V | | | V | 514df3e71e0SMarcello Maggioni // | Body --/ | | Body --/ | | Body --/ 515df3e71e0SMarcello Maggioni // V V V V V 516df3e71e0SMarcello Maggioni // Exit Exit Exit 517df3e71e0SMarcello Maggioni // 518df3e71e0SMarcello Maggioni // By doing this is two separate steps we can perform the dominator tree 519df3e71e0SMarcello Maggioni // update without using the batch update API. 520df3e71e0SMarcello Maggioni // 521df3e71e0SMarcello Maggioni // Even when the loop is never executed, we cannot remove the edge from the 522df3e71e0SMarcello Maggioni // source block to the exit block. Consider the case where the unexecuted loop 523df3e71e0SMarcello Maggioni // branches back to an outer loop. If we deleted the loop and removed the edge 524df3e71e0SMarcello Maggioni // coming to this inner loop, this will break the outer loop structure (by 525df3e71e0SMarcello Maggioni // deleting the backedge of the outer loop). If the outer loop is indeed a 526df3e71e0SMarcello Maggioni // non-loop, it will be deleted in a future iteration of loop deletion pass. 527df3e71e0SMarcello Maggioni IRBuilder<> Builder(OldBr); 528babc224cSAtmn Patel 529babc224cSAtmn Patel auto *ExitBlock = L->getUniqueExitBlock(); 530f88a7975SAtmn Patel DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); 531babc224cSAtmn Patel if (ExitBlock) { 532babc224cSAtmn Patel assert(ExitBlock && "Should have a unique exit block!"); 533babc224cSAtmn Patel assert(L->hasDedicatedExits() && "Loop should have dedicated exits!"); 534babc224cSAtmn Patel 535df3e71e0SMarcello Maggioni Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock); 536df3e71e0SMarcello Maggioni // Remove the old branch. The conditional branch becomes a new terminator. 537df3e71e0SMarcello Maggioni OldBr->eraseFromParent(); 538df3e71e0SMarcello Maggioni 539df3e71e0SMarcello Maggioni // Rewrite phis in the exit block to get their inputs from the Preheader 540df3e71e0SMarcello Maggioni // instead of the exiting block. 541c7fc81e6SBenjamin Kramer for (PHINode &P : ExitBlock->phis()) { 542df3e71e0SMarcello Maggioni // Set the zero'th element of Phi to be from the preheader and remove all 543df3e71e0SMarcello Maggioni // other incoming values. Given the loop has dedicated exits, all other 544df3e71e0SMarcello Maggioni // incoming values must be from the exiting blocks. 545df3e71e0SMarcello Maggioni int PredIndex = 0; 546c7fc81e6SBenjamin Kramer P.setIncomingBlock(PredIndex, Preheader); 547df3e71e0SMarcello Maggioni // Removes all incoming values from all other exiting blocks (including 548df3e71e0SMarcello Maggioni // duplicate values from an exiting block). 549df3e71e0SMarcello Maggioni // Nuke all entries except the zero'th entry which is the preheader entry. 550df3e71e0SMarcello Maggioni // NOTE! We need to remove Incoming Values in the reverse order as done 551df3e71e0SMarcello Maggioni // below, to keep the indices valid for deletion (removeIncomingValues 552babc224cSAtmn Patel // updates getNumIncomingValues and shifts all values down into the 553babc224cSAtmn Patel // operand being deleted). 554c7fc81e6SBenjamin Kramer for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i) 555c7fc81e6SBenjamin Kramer P.removeIncomingValue(e - i, false); 556df3e71e0SMarcello Maggioni 557c7fc81e6SBenjamin Kramer assert((P.getNumIncomingValues() == 1 && 558c7fc81e6SBenjamin Kramer P.getIncomingBlock(PredIndex) == Preheader) && 559df3e71e0SMarcello Maggioni "Should have exactly one value and that's from the preheader!"); 560df3e71e0SMarcello Maggioni } 561df3e71e0SMarcello Maggioni 562efb130fcSAlina Sbirlea if (DT) { 563efb130fcSAlina Sbirlea DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}}); 564efb130fcSAlina Sbirlea if (MSSA) { 565babc224cSAtmn Patel MSSAU->applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}}, 566babc224cSAtmn Patel *DT); 567efb130fcSAlina Sbirlea if (VerifyMemorySSA) 568efb130fcSAlina Sbirlea MSSA->verifyMemorySSA(); 569efb130fcSAlina Sbirlea } 570efb130fcSAlina Sbirlea } 571efb130fcSAlina Sbirlea 572df3e71e0SMarcello Maggioni // Disconnect the loop body by branching directly to its exit. 573df3e71e0SMarcello Maggioni Builder.SetInsertPoint(Preheader->getTerminator()); 574df3e71e0SMarcello Maggioni Builder.CreateBr(ExitBlock); 575df3e71e0SMarcello Maggioni // Remove the old branch. 576df3e71e0SMarcello Maggioni Preheader->getTerminator()->eraseFromParent(); 577f88a7975SAtmn Patel } else { 578f88a7975SAtmn Patel assert(L->hasNoExitBlocks() && 579f88a7975SAtmn Patel "Loop should have either zero or one exit blocks."); 580f88a7975SAtmn Patel 581f88a7975SAtmn Patel Builder.SetInsertPoint(OldBr); 582f88a7975SAtmn Patel Builder.CreateUnreachable(); 583f88a7975SAtmn Patel Preheader->getTerminator()->eraseFromParent(); 584f88a7975SAtmn Patel } 585df3e71e0SMarcello Maggioni 586df3e71e0SMarcello Maggioni if (DT) { 587efb130fcSAlina Sbirlea DTU.applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}}); 588efb130fcSAlina Sbirlea if (MSSA) { 589f88a7975SAtmn Patel MSSAU->applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}}, 590f88a7975SAtmn Patel *DT); 591efb130fcSAlina Sbirlea SmallSetVector<BasicBlock *, 8> DeadBlockSet(L->block_begin(), 592efb130fcSAlina Sbirlea L->block_end()); 593efb130fcSAlina Sbirlea MSSAU->removeBlocks(DeadBlockSet); 594519b019aSAlina Sbirlea if (VerifyMemorySSA) 595519b019aSAlina Sbirlea MSSA->verifyMemorySSA(); 596efb130fcSAlina Sbirlea } 597df3e71e0SMarcello Maggioni } 598df3e71e0SMarcello Maggioni 599744c3c32SDavide Italiano // Use a map to unique and a vector to guarantee deterministic ordering. 6008ee59ca6SDavide Italiano llvm::SmallDenseSet<std::pair<DIVariable *, DIExpression *>, 4> DeadDebugSet; 601744c3c32SDavide Italiano llvm::SmallVector<DbgVariableIntrinsic *, 4> DeadDebugInst; 602744c3c32SDavide Italiano 603babc224cSAtmn Patel if (ExitBlock) { 604a757d65cSSerguei Katkov // Given LCSSA form is satisfied, we should not have users of instructions 605a757d65cSSerguei Katkov // within the dead loop outside of the loop. However, LCSSA doesn't take 606a757d65cSSerguei Katkov // unreachable uses into account. We handle them here. 607a757d65cSSerguei Katkov // We could do it after drop all references (in this case all users in the 608a757d65cSSerguei Katkov // loop will be already eliminated and we have less work to do but according 609a757d65cSSerguei Katkov // to API doc of User::dropAllReferences only valid operation after dropping 610a757d65cSSerguei Katkov // references, is deletion. So let's substitute all usages of 611a757d65cSSerguei Katkov // instruction from the loop with undef value of corresponding type first. 612a757d65cSSerguei Katkov for (auto *Block : L->blocks()) 613a757d65cSSerguei Katkov for (Instruction &I : *Block) { 614a757d65cSSerguei Katkov auto *Undef = UndefValue::get(I.getType()); 615babc224cSAtmn Patel for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); 616babc224cSAtmn Patel UI != E;) { 617a757d65cSSerguei Katkov Use &U = *UI; 618a757d65cSSerguei Katkov ++UI; 619a757d65cSSerguei Katkov if (auto *Usr = dyn_cast<Instruction>(U.getUser())) 620a757d65cSSerguei Katkov if (L->contains(Usr->getParent())) 621a757d65cSSerguei Katkov continue; 622a757d65cSSerguei Katkov // If we have a DT then we can check that uses outside a loop only in 623a757d65cSSerguei Katkov // unreachable block. 624a757d65cSSerguei Katkov if (DT) 625a757d65cSSerguei Katkov assert(!DT->isReachableFromEntry(U) && 626a757d65cSSerguei Katkov "Unexpected user in reachable block"); 627a757d65cSSerguei Katkov U.set(Undef); 628a757d65cSSerguei Katkov } 629744c3c32SDavide Italiano auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I); 630744c3c32SDavide Italiano if (!DVI) 631744c3c32SDavide Italiano continue; 632babc224cSAtmn Patel auto Key = 633babc224cSAtmn Patel DeadDebugSet.find({DVI->getVariable(), DVI->getExpression()}); 6348ee59ca6SDavide Italiano if (Key != DeadDebugSet.end()) 635744c3c32SDavide Italiano continue; 6368ee59ca6SDavide Italiano DeadDebugSet.insert({DVI->getVariable(), DVI->getExpression()}); 637744c3c32SDavide Italiano DeadDebugInst.push_back(DVI); 638a757d65cSSerguei Katkov } 639a757d65cSSerguei Katkov 640744c3c32SDavide Italiano // After the loop has been deleted all the values defined and modified 641744c3c32SDavide Italiano // inside the loop are going to be unavailable. 642744c3c32SDavide Italiano // Since debug values in the loop have been deleted, inserting an undef 643744c3c32SDavide Italiano // dbg.value truncates the range of any dbg.value before the loop where the 644744c3c32SDavide Italiano // loop used to be. This is particularly important for constant values. 645744c3c32SDavide Italiano DIBuilder DIB(*ExitBlock->getModule()); 646e5be660eSRoman Lebedev Instruction *InsertDbgValueBefore = ExitBlock->getFirstNonPHI(); 647e5be660eSRoman Lebedev assert(InsertDbgValueBefore && 648e5be660eSRoman Lebedev "There should be a non-PHI instruction in exit block, else these " 649e5be660eSRoman Lebedev "instructions will have no parent."); 650744c3c32SDavide Italiano for (auto *DVI : DeadDebugInst) 651e5be660eSRoman Lebedev DIB.insertDbgValueIntrinsic(UndefValue::get(Builder.getInt32Ty()), 652e5be660eSRoman Lebedev DVI->getVariable(), DVI->getExpression(), 653e5be660eSRoman Lebedev DVI->getDebugLoc(), InsertDbgValueBefore); 654babc224cSAtmn Patel } 655744c3c32SDavide Italiano 656df3e71e0SMarcello Maggioni // Remove the block from the reference counting scheme, so that we can 657df3e71e0SMarcello Maggioni // delete it freely later. 658df3e71e0SMarcello Maggioni for (auto *Block : L->blocks()) 659df3e71e0SMarcello Maggioni Block->dropAllReferences(); 660df3e71e0SMarcello Maggioni 661efb130fcSAlina Sbirlea if (MSSA && VerifyMemorySSA) 662efb130fcSAlina Sbirlea MSSA->verifyMemorySSA(); 663efb130fcSAlina Sbirlea 664df3e71e0SMarcello Maggioni if (LI) { 665df3e71e0SMarcello Maggioni // Erase the instructions and the blocks without having to worry 666df3e71e0SMarcello Maggioni // about ordering because we already dropped the references. 667df3e71e0SMarcello Maggioni // NOTE: This iteration is safe because erasing the block does not remove 668df3e71e0SMarcello Maggioni // its entry from the loop's block list. We do that in the next section. 669df3e71e0SMarcello Maggioni for (Loop::block_iterator LpI = L->block_begin(), LpE = L->block_end(); 670df3e71e0SMarcello Maggioni LpI != LpE; ++LpI) 671df3e71e0SMarcello Maggioni (*LpI)->eraseFromParent(); 672df3e71e0SMarcello Maggioni 673df3e71e0SMarcello Maggioni // Finally, the blocks from loopinfo. This has to happen late because 674df3e71e0SMarcello Maggioni // otherwise our loop iterators won't work. 675df3e71e0SMarcello Maggioni 676df3e71e0SMarcello Maggioni SmallPtrSet<BasicBlock *, 8> blocks; 677df3e71e0SMarcello Maggioni blocks.insert(L->block_begin(), L->block_end()); 678df3e71e0SMarcello Maggioni for (BasicBlock *BB : blocks) 679df3e71e0SMarcello Maggioni LI->removeBlock(BB); 680df3e71e0SMarcello Maggioni 681df3e71e0SMarcello Maggioni // The last step is to update LoopInfo now that we've eliminated this loop. 6829883d7edSWhitney Tsang // Note: LoopInfo::erase remove the given loop and relink its subloops with 6839883d7edSWhitney Tsang // its parent. While removeLoop/removeChildLoop remove the given loop but 6849883d7edSWhitney Tsang // not relink its subloops, which is what we want. 6859883d7edSWhitney Tsang if (Loop *ParentLoop = L->getParentLoop()) { 6865d6c5b46SWhitney Tsang Loop::iterator I = find(*ParentLoop, L); 6879883d7edSWhitney Tsang assert(I != ParentLoop->end() && "Couldn't find loop"); 6889883d7edSWhitney Tsang ParentLoop->removeChildLoop(I); 6899883d7edSWhitney Tsang } else { 6905d6c5b46SWhitney Tsang Loop::iterator I = find(*LI, L); 6919883d7edSWhitney Tsang assert(I != LI->end() && "Couldn't find loop"); 6929883d7edSWhitney Tsang LI->removeLoop(I); 6939883d7edSWhitney Tsang } 6949883d7edSWhitney Tsang LI->destroy(L); 695df3e71e0SMarcello Maggioni } 696df3e71e0SMarcello Maggioni } 697df3e71e0SMarcello Maggioni 698ef51eed3SPhilip Reames static Loop *getOutermostLoop(Loop *L) { 699ef51eed3SPhilip Reames while (Loop *Parent = L->getParentLoop()) 700ef51eed3SPhilip Reames L = Parent; 701ef51eed3SPhilip Reames return L; 702ef51eed3SPhilip Reames } 703ef51eed3SPhilip Reames 7044739dd67SPhilip Reames void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, 7054739dd67SPhilip Reames LoopInfo &LI, MemorySSA *MSSA) { 7064739dd67SPhilip Reames auto *Latch = L->getLoopLatch(); 7074739dd67SPhilip Reames assert(Latch && "multiple latches not yet supported"); 7084739dd67SPhilip Reames auto *Header = L->getHeader(); 709ef51eed3SPhilip Reames Loop *OutermostLoop = getOutermostLoop(L); 7104739dd67SPhilip Reames 7114739dd67SPhilip Reames SE.forgetLoop(L); 7124739dd67SPhilip Reames 7134739dd67SPhilip Reames std::unique_ptr<MemorySSAUpdater> MSSAU; 7144739dd67SPhilip Reames if (MSSA) 7154739dd67SPhilip Reames MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 7164739dd67SPhilip Reames 7176a823760SPhilip Reames // Update the CFG and domtree. We chose to special case a couple of 7186a823760SPhilip Reames // of common cases for code quality and test readability reasons. 7196a823760SPhilip Reames [&]() -> void { 7206a823760SPhilip Reames if (auto *BI = dyn_cast<BranchInst>(Latch->getTerminator())) { 7216a823760SPhilip Reames if (!BI->isConditional()) { 7226a823760SPhilip Reames DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager); 7236a823760SPhilip Reames (void)changeToUnreachable(BI, /*PreserveLCSSA*/ true, &DTU, 7246a823760SPhilip Reames MSSAU.get()); 7256a823760SPhilip Reames return; 7266a823760SPhilip Reames } 7276a823760SPhilip Reames 7286a823760SPhilip Reames // Conditional latch/exit - note that latch can be shared by inner 7296a823760SPhilip Reames // and outer loop so the other target doesn't need to an exit 7306a823760SPhilip Reames if (L->isLoopExiting(Latch)) { 7316a823760SPhilip Reames // TODO: Generalize ConstantFoldTerminator so that it can be used 732c3b3aa27SPhilip Reames // here without invalidating LCSSA or MemorySSA. (Tricky case for 733c3b3aa27SPhilip Reames // LCSSA: header is an exit block of a preceeding sibling loop w/o 734c3b3aa27SPhilip Reames // dedicated exits.) 7356a823760SPhilip Reames const unsigned ExitIdx = L->contains(BI->getSuccessor(0)) ? 1 : 0; 7366a823760SPhilip Reames BasicBlock *ExitBB = BI->getSuccessor(ExitIdx); 7376a823760SPhilip Reames 7386a823760SPhilip Reames DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager); 7396a823760SPhilip Reames Header->removePredecessor(Latch, true); 7406a823760SPhilip Reames 7416a823760SPhilip Reames IRBuilder<> Builder(BI); 7426a823760SPhilip Reames auto *NewBI = Builder.CreateBr(ExitBB); 7436a823760SPhilip Reames // Transfer the metadata to the new branch instruction (minus the 7446a823760SPhilip Reames // loop info since this is no longer a loop) 7456a823760SPhilip Reames NewBI->copyMetadata(*BI, {LLVMContext::MD_dbg, 7466a823760SPhilip Reames LLVMContext::MD_annotation}); 7476a823760SPhilip Reames 7486a823760SPhilip Reames BI->eraseFromParent(); 7496a823760SPhilip Reames DTU.applyUpdates({{DominatorTree::Delete, Latch, Header}}); 750c3b3aa27SPhilip Reames if (MSSA) 751c3b3aa27SPhilip Reames MSSAU->applyUpdates({{DominatorTree::Delete, Latch, Header}}, DT); 7526a823760SPhilip Reames return; 7536a823760SPhilip Reames } 7546a823760SPhilip Reames } 7556a823760SPhilip Reames 7566a823760SPhilip Reames // General case. By splitting the backedge, and then explicitly making it 7576a823760SPhilip Reames // unreachable we gracefully handle corner cases such as switch and invoke 7586a823760SPhilip Reames // termiantors. 7594739dd67SPhilip Reames auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get()); 7604739dd67SPhilip Reames 7614739dd67SPhilip Reames DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager); 76225a3130dSJohannes Doerfert (void)changeToUnreachable(BackedgeBB->getTerminator(), 7634739dd67SPhilip Reames /*PreserveLCSSA*/ true, &DTU, MSSAU.get()); 7646a823760SPhilip Reames }(); 7654739dd67SPhilip Reames 7664739dd67SPhilip Reames // Erase (and destroy) this loop instance. Handles relinking sub-loops 7674739dd67SPhilip Reames // and blocks within the loop as needed. 7684739dd67SPhilip Reames LI.erase(L); 769ef51eed3SPhilip Reames 770ef51eed3SPhilip Reames // If the loop we broke had a parent, then changeToUnreachable might have 771ef51eed3SPhilip Reames // caused a block to be removed from the parent loop (see loop_nest_lcssa 772ef51eed3SPhilip Reames // test case in zero-btc.ll for an example), thus changing the parent's 773ef51eed3SPhilip Reames // exit blocks. If that happened, we need to rebuild LCSSA on the outermost 774ef51eed3SPhilip Reames // loop which might have a had a block removed. 775ef51eed3SPhilip Reames if (OutermostLoop != L) 776ef51eed3SPhilip Reames formLCSSARecursively(*OutermostLoop, DT, &LI, &SE); 7774739dd67SPhilip Reames } 7784739dd67SPhilip Reames 7794739dd67SPhilip Reames 780af7e1588SEvgeniy Brevnov /// Checks if \p L has single exit through latch block except possibly 781af7e1588SEvgeniy Brevnov /// "deoptimizing" exits. Returns branch instruction terminating the loop 782af7e1588SEvgeniy Brevnov /// latch if above check is successful, nullptr otherwise. 783af7e1588SEvgeniy Brevnov static BranchInst *getExpectedExitLoopLatchBranch(Loop *L) { 78445c43e7dSSerguei Katkov BasicBlock *Latch = L->getLoopLatch(); 78545c43e7dSSerguei Katkov if (!Latch) 786af7e1588SEvgeniy Brevnov return nullptr; 787af7e1588SEvgeniy Brevnov 78845c43e7dSSerguei Katkov BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator()); 78945c43e7dSSerguei Katkov if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch)) 790af7e1588SEvgeniy Brevnov return nullptr; 79141d72a86SDehao Chen 79241d72a86SDehao Chen assert((LatchBR->getSuccessor(0) == L->getHeader() || 79341d72a86SDehao Chen LatchBR->getSuccessor(1) == L->getHeader()) && 79441d72a86SDehao Chen "At least one edge out of the latch must go to the header"); 79541d72a86SDehao Chen 79645c43e7dSSerguei Katkov SmallVector<BasicBlock *, 4> ExitBlocks; 79745c43e7dSSerguei Katkov L->getUniqueNonLatchExitBlocks(ExitBlocks); 79845c43e7dSSerguei Katkov if (any_of(ExitBlocks, [](const BasicBlock *EB) { 799eae0d2e9SSerguei Katkov return !EB->getTerminatingDeoptimizeCall(); 80045c43e7dSSerguei Katkov })) 801af7e1588SEvgeniy Brevnov return nullptr; 802af7e1588SEvgeniy Brevnov 803af7e1588SEvgeniy Brevnov return LatchBR; 804af7e1588SEvgeniy Brevnov } 805af7e1588SEvgeniy Brevnov 806af7e1588SEvgeniy Brevnov Optional<unsigned> 807af7e1588SEvgeniy Brevnov llvm::getLoopEstimatedTripCount(Loop *L, 808af7e1588SEvgeniy Brevnov unsigned *EstimatedLoopInvocationWeight) { 809af7e1588SEvgeniy Brevnov // Support loops with an exiting latch and other existing exists only 810af7e1588SEvgeniy Brevnov // deoptimize. 811af7e1588SEvgeniy Brevnov BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L); 812af7e1588SEvgeniy Brevnov if (!LatchBranch) 81345c43e7dSSerguei Katkov return None; 81445c43e7dSSerguei Katkov 81541d72a86SDehao Chen // To estimate the number of times the loop body was executed, we want to 81641d72a86SDehao Chen // know the number of times the backedge was taken, vs. the number of times 81741d72a86SDehao Chen // we exited the loop. 818f0abe820SEvgeniy Brevnov uint64_t BackedgeTakenWeight, LatchExitWeight; 819af7e1588SEvgeniy Brevnov if (!LatchBranch->extractProfMetadata(BackedgeTakenWeight, LatchExitWeight)) 82041d72a86SDehao Chen return None; 82141d72a86SDehao Chen 822af7e1588SEvgeniy Brevnov if (LatchBranch->getSuccessor(0) != L->getHeader()) 823f0abe820SEvgeniy Brevnov std::swap(BackedgeTakenWeight, LatchExitWeight); 824f0abe820SEvgeniy Brevnov 82510357e1cSEvgeniy Brevnov if (!LatchExitWeight) 82610357e1cSEvgeniy Brevnov return None; 82741d72a86SDehao Chen 828af7e1588SEvgeniy Brevnov if (EstimatedLoopInvocationWeight) 829af7e1588SEvgeniy Brevnov *EstimatedLoopInvocationWeight = LatchExitWeight; 830af7e1588SEvgeniy Brevnov 83110357e1cSEvgeniy Brevnov // Estimated backedge taken count is a ratio of the backedge taken weight by 832cfe97681SEvgeniy Brevnov // the weight of the edge exiting the loop, rounded to nearest. 83310357e1cSEvgeniy Brevnov uint64_t BackedgeTakenCount = 83410357e1cSEvgeniy Brevnov llvm::divideNearest(BackedgeTakenWeight, LatchExitWeight); 83510357e1cSEvgeniy Brevnov // Estimated trip count is one plus estimated backedge taken count. 83610357e1cSEvgeniy Brevnov return BackedgeTakenCount + 1; 83741d72a86SDehao Chen } 838cf9daa33SAmara Emerson 839af7e1588SEvgeniy Brevnov bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, 840af7e1588SEvgeniy Brevnov unsigned EstimatedloopInvocationWeight) { 841af7e1588SEvgeniy Brevnov // Support loops with an exiting latch and other existing exists only 842af7e1588SEvgeniy Brevnov // deoptimize. 843af7e1588SEvgeniy Brevnov BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L); 844af7e1588SEvgeniy Brevnov if (!LatchBranch) 845af7e1588SEvgeniy Brevnov return false; 846af7e1588SEvgeniy Brevnov 847af7e1588SEvgeniy Brevnov // Calculate taken and exit weights. 848af7e1588SEvgeniy Brevnov unsigned LatchExitWeight = 0; 849af7e1588SEvgeniy Brevnov unsigned BackedgeTakenWeight = 0; 850af7e1588SEvgeniy Brevnov 851af7e1588SEvgeniy Brevnov if (EstimatedTripCount > 0) { 852af7e1588SEvgeniy Brevnov LatchExitWeight = EstimatedloopInvocationWeight; 853af7e1588SEvgeniy Brevnov BackedgeTakenWeight = (EstimatedTripCount - 1) * LatchExitWeight; 854af7e1588SEvgeniy Brevnov } 855af7e1588SEvgeniy Brevnov 856af7e1588SEvgeniy Brevnov // Make a swap if back edge is taken when condition is "false". 857af7e1588SEvgeniy Brevnov if (LatchBranch->getSuccessor(0) != L->getHeader()) 858af7e1588SEvgeniy Brevnov std::swap(BackedgeTakenWeight, LatchExitWeight); 859af7e1588SEvgeniy Brevnov 860af7e1588SEvgeniy Brevnov MDBuilder MDB(LatchBranch->getContext()); 861af7e1588SEvgeniy Brevnov 862af7e1588SEvgeniy Brevnov // Set/Update profile metadata. 863af7e1588SEvgeniy Brevnov LatchBranch->setMetadata( 864af7e1588SEvgeniy Brevnov LLVMContext::MD_prof, 865af7e1588SEvgeniy Brevnov MDB.createBranchWeights(BackedgeTakenWeight, LatchExitWeight)); 866af7e1588SEvgeniy Brevnov 867af7e1588SEvgeniy Brevnov return true; 868af7e1588SEvgeniy Brevnov } 869af7e1588SEvgeniy Brevnov 8706cb64787SDavid Green bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop, 871395b80cdSDavid Green ScalarEvolution &SE) { 872395b80cdSDavid Green Loop *OuterL = InnerLoop->getParentLoop(); 873395b80cdSDavid Green if (!OuterL) 874395b80cdSDavid Green return true; 875395b80cdSDavid Green 876395b80cdSDavid Green // Get the backedge taken count for the inner loop 877395b80cdSDavid Green BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); 878395b80cdSDavid Green const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch); 879395b80cdSDavid Green if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) || 880395b80cdSDavid Green !InnerLoopBECountSC->getType()->isIntegerTy()) 881395b80cdSDavid Green return false; 882395b80cdSDavid Green 883395b80cdSDavid Green // Get whether count is invariant to the outer loop 884395b80cdSDavid Green ScalarEvolution::LoopDisposition LD = 885395b80cdSDavid Green SE.getLoopDisposition(InnerLoopBECountSC, OuterL); 886395b80cdSDavid Green if (LD != ScalarEvolution::LoopInvariant) 887395b80cdSDavid Green return false; 888395b80cdSDavid Green 889395b80cdSDavid Green return true; 890395b80cdSDavid Green } 891395b80cdSDavid Green 892*0dcd2b40SSimon Pilgrim CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) { 8936594dc37SVikram TV switch (RK) { 8946594dc37SVikram TV default: 8956594dc37SVikram TV llvm_unreachable("Unknown min/max recurrence kind"); 896c74e8539SSanjay Patel case RecurKind::UMin: 897*0dcd2b40SSimon Pilgrim return CmpInst::ICMP_ULT; 898c74e8539SSanjay Patel case RecurKind::UMax: 899*0dcd2b40SSimon Pilgrim return CmpInst::ICMP_UGT; 900c74e8539SSanjay Patel case RecurKind::SMin: 901*0dcd2b40SSimon Pilgrim return CmpInst::ICMP_SLT; 902c74e8539SSanjay Patel case RecurKind::SMax: 903*0dcd2b40SSimon Pilgrim return CmpInst::ICMP_SGT; 904c74e8539SSanjay Patel case RecurKind::FMin: 905*0dcd2b40SSimon Pilgrim return CmpInst::FCMP_OLT; 906c74e8539SSanjay Patel case RecurKind::FMax: 907*0dcd2b40SSimon Pilgrim return CmpInst::FCMP_OGT; 908*0dcd2b40SSimon Pilgrim } 9096594dc37SVikram TV } 9106594dc37SVikram TV 911*0dcd2b40SSimon Pilgrim Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, 912*0dcd2b40SSimon Pilgrim Value *Right) { 913*0dcd2b40SSimon Pilgrim CmpInst::Predicate Pred = getMinMaxReductionPredicate(RK); 91409b1c563SSanjay Patel Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp"); 9156594dc37SVikram TV Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); 9166594dc37SVikram TV return Select; 9176594dc37SVikram TV } 9186594dc37SVikram TV 91923c2182cSSimon Pilgrim // Helper to generate an ordered reduction. 920c74e8539SSanjay Patel Value *llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, 921c74e8539SSanjay Patel unsigned Op, RecurKind RdxKind, 92223c2182cSSimon Pilgrim ArrayRef<Value *> RedOps) { 9238d11ec66SChristopher Tetreault unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements(); 92423c2182cSSimon Pilgrim 92523c2182cSSimon Pilgrim // Extract and apply reduction ops in ascending order: 92623c2182cSSimon Pilgrim // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1] 92723c2182cSSimon Pilgrim Value *Result = Acc; 92823c2182cSSimon Pilgrim for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) { 92923c2182cSSimon Pilgrim Value *Ext = 93023c2182cSSimon Pilgrim Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx)); 93123c2182cSSimon Pilgrim 93223c2182cSSimon Pilgrim if (Op != Instruction::ICmp && Op != Instruction::FCmp) { 93323c2182cSSimon Pilgrim Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext, 93423c2182cSSimon Pilgrim "bin.rdx"); 93523c2182cSSimon Pilgrim } else { 936c74e8539SSanjay Patel assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) && 93723c2182cSSimon Pilgrim "Invalid min/max"); 938c74e8539SSanjay Patel Result = createMinMaxOp(Builder, RdxKind, Result, Ext); 93923c2182cSSimon Pilgrim } 94023c2182cSSimon Pilgrim 94123c2182cSSimon Pilgrim if (!RedOps.empty()) 94223c2182cSSimon Pilgrim propagateIRFlags(Result, RedOps); 94323c2182cSSimon Pilgrim } 94423c2182cSSimon Pilgrim 94523c2182cSSimon Pilgrim return Result; 94623c2182cSSimon Pilgrim } 94723c2182cSSimon Pilgrim 948cf9daa33SAmara Emerson // Helper to generate a log2 shuffle reduction. 949c74e8539SSanjay Patel Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, 950c74e8539SSanjay Patel unsigned Op, RecurKind RdxKind, 951ad62a3a2SSanjay Patel ArrayRef<Value *> RedOps) { 9528d11ec66SChristopher Tetreault unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements(); 953cf9daa33SAmara Emerson // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles 954cf9daa33SAmara Emerson // and vector ops, reducing the set of values being computed by half each 955cf9daa33SAmara Emerson // round. 956cf9daa33SAmara Emerson assert(isPowerOf2_32(VF) && 957cf9daa33SAmara Emerson "Reduction emission only supported for pow2 vectors!"); 958cf9daa33SAmara Emerson Value *TmpVec = Src; 9596f64dacaSBenjamin Kramer SmallVector<int, 32> ShuffleMask(VF); 960cf9daa33SAmara Emerson for (unsigned i = VF; i != 1; i >>= 1) { 961cf9daa33SAmara Emerson // Move the upper half of the vector to the lower half. 962cf9daa33SAmara Emerson for (unsigned j = 0; j != i / 2; ++j) 9636f64dacaSBenjamin Kramer ShuffleMask[j] = i / 2 + j; 964cf9daa33SAmara Emerson 965cf9daa33SAmara Emerson // Fill the rest of the mask with undef. 9666f64dacaSBenjamin Kramer std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1); 967cf9daa33SAmara Emerson 9689b296102SJuneyoung Lee Value *Shuf = Builder.CreateShuffleVector(TmpVec, ShuffleMask, "rdx.shuf"); 969cf9daa33SAmara Emerson 970cf9daa33SAmara Emerson if (Op != Instruction::ICmp && Op != Instruction::FCmp) { 971ad62a3a2SSanjay Patel // The builder propagates its fast-math-flags setting. 972ad62a3a2SSanjay Patel TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf, 973ad62a3a2SSanjay Patel "bin.rdx"); 974cf9daa33SAmara Emerson } else { 975c74e8539SSanjay Patel assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) && 976cf9daa33SAmara Emerson "Invalid min/max"); 977c74e8539SSanjay Patel TmpVec = createMinMaxOp(Builder, RdxKind, TmpVec, Shuf); 978cf9daa33SAmara Emerson } 979cf9daa33SAmara Emerson if (!RedOps.empty()) 980cf9daa33SAmara Emerson propagateIRFlags(TmpVec, RedOps); 981bc1148e7SSanjay Patel 982bc1148e7SSanjay Patel // We may compute the reassociated scalar ops in a way that does not 983bc1148e7SSanjay Patel // preserve nsw/nuw etc. Conservatively, drop those flags. 984bc1148e7SSanjay Patel if (auto *ReductionInst = dyn_cast<Instruction>(TmpVec)) 985bc1148e7SSanjay Patel ReductionInst->dropPoisonGeneratingFlags(); 986cf9daa33SAmara Emerson } 987cf9daa33SAmara Emerson // The result is in the first element of the vector. 988cf9daa33SAmara Emerson return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); 989cf9daa33SAmara Emerson } 990cf9daa33SAmara Emerson 991c74e8539SSanjay Patel Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, 992c74e8539SSanjay Patel const TargetTransformInfo *TTI, 99336263a7cSSanjay Patel Value *Src, RecurKind RdxKind, 994cf9daa33SAmara Emerson ArrayRef<Value *> RedOps) { 99597669575SSanjay Patel auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType(); 99636263a7cSSanjay Patel switch (RdxKind) { 99736263a7cSSanjay Patel case RecurKind::Add: 99897669575SSanjay Patel return Builder.CreateAddReduce(Src); 99936263a7cSSanjay Patel case RecurKind::Mul: 100097669575SSanjay Patel return Builder.CreateMulReduce(Src); 100136263a7cSSanjay Patel case RecurKind::And: 100297669575SSanjay Patel return Builder.CreateAndReduce(Src); 100336263a7cSSanjay Patel case RecurKind::Or: 100497669575SSanjay Patel return Builder.CreateOrReduce(Src); 100536263a7cSSanjay Patel case RecurKind::Xor: 100697669575SSanjay Patel return Builder.CreateXorReduce(Src); 100736263a7cSSanjay Patel case RecurKind::FAdd: 100897669575SSanjay Patel return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy), 100997669575SSanjay Patel Src); 101036263a7cSSanjay Patel case RecurKind::FMul: 101197669575SSanjay Patel return Builder.CreateFMulReduce(ConstantFP::get(SrcVecEltTy, 1.0), Src); 1012c74e8539SSanjay Patel case RecurKind::SMax: 101397669575SSanjay Patel return Builder.CreateIntMaxReduce(Src, true); 1014c74e8539SSanjay Patel case RecurKind::SMin: 101597669575SSanjay Patel return Builder.CreateIntMinReduce(Src, true); 1016c74e8539SSanjay Patel case RecurKind::UMax: 101797669575SSanjay Patel return Builder.CreateIntMaxReduce(Src, false); 1018c74e8539SSanjay Patel case RecurKind::UMin: 101997669575SSanjay Patel return Builder.CreateIntMinReduce(Src, false); 102036263a7cSSanjay Patel case RecurKind::FMax: 102197669575SSanjay Patel return Builder.CreateFPMaxReduce(Src); 102236263a7cSSanjay Patel case RecurKind::FMin: 102397669575SSanjay Patel return Builder.CreateFPMinReduce(Src); 1024cf9daa33SAmara Emerson default: 1025cf9daa33SAmara Emerson llvm_unreachable("Unhandled opcode"); 1026cf9daa33SAmara Emerson } 1027cf9daa33SAmara Emerson } 1028cf9daa33SAmara Emerson 102928ffe38bSNikita Popov Value *llvm::createTargetReduction(IRBuilderBase &B, 1030cf9daa33SAmara Emerson const TargetTransformInfo *TTI, 1031685f1bfdSKrasimir Georgiev const RecurrenceDescriptor &Desc, 1032685f1bfdSKrasimir Georgiev Value *Src) { 1033cf9daa33SAmara Emerson // TODO: Support in-order reductions based on the recurrence descriptor. 1034ad62a3a2SSanjay Patel // All ops in the reduction inherit fast-math-flags from the recurrence 1035ad62a3a2SSanjay Patel // descriptor. 103628ffe38bSNikita Popov IRBuilderBase::FastMathFlagGuard FMFGuard(B); 1037ad62a3a2SSanjay Patel B.setFastMathFlags(Desc.getFastMathFlags()); 1038685f1bfdSKrasimir Georgiev return createSimpleTargetReduction(B, TTI, Src, Desc.getRecurrenceKind()); 1039cf9daa33SAmara Emerson } 1040cf9daa33SAmara Emerson 10417344f3d3SKerry McLaughlin Value *llvm::createOrderedReduction(IRBuilderBase &B, 10425e6bfb66SSimon Pilgrim const RecurrenceDescriptor &Desc, 10435e6bfb66SSimon Pilgrim Value *Src, Value *Start) { 1044ce4acb01SBenjamin Kramer assert(Desc.getRecurrenceKind() == RecurKind::FAdd && 1045ce4acb01SBenjamin Kramer "Unexpected reduction kind"); 10467344f3d3SKerry McLaughlin assert(Src->getType()->isVectorTy() && "Expected a vector type"); 10477344f3d3SKerry McLaughlin assert(!Start->getType()->isVectorTy() && "Expected a scalar type"); 10487344f3d3SKerry McLaughlin 10497344f3d3SKerry McLaughlin return B.CreateFAddReduce(Start, Src); 10507344f3d3SKerry McLaughlin } 10517344f3d3SKerry McLaughlin 1052a61f4b89SDinar Temirbulatov void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) { 1053a61f4b89SDinar Temirbulatov auto *VecOp = dyn_cast<Instruction>(I); 1054a61f4b89SDinar Temirbulatov if (!VecOp) 1055a61f4b89SDinar Temirbulatov return; 1056a61f4b89SDinar Temirbulatov auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0]) 1057a61f4b89SDinar Temirbulatov : dyn_cast<Instruction>(OpValue); 1058a61f4b89SDinar Temirbulatov if (!Intersection) 1059a61f4b89SDinar Temirbulatov return; 1060a61f4b89SDinar Temirbulatov const unsigned Opcode = Intersection->getOpcode(); 1061a61f4b89SDinar Temirbulatov VecOp->copyIRFlags(Intersection); 1062a61f4b89SDinar Temirbulatov for (auto *V : VL) { 1063a61f4b89SDinar Temirbulatov auto *Instr = dyn_cast<Instruction>(V); 1064a61f4b89SDinar Temirbulatov if (!Instr) 1065a61f4b89SDinar Temirbulatov continue; 1066a61f4b89SDinar Temirbulatov if (OpValue == nullptr || Opcode == Instr->getOpcode()) 1067a61f4b89SDinar Temirbulatov VecOp->andIRFlags(V); 1068cf9daa33SAmara Emerson } 1069cf9daa33SAmara Emerson } 1070a78dc4d6SMax Kazantsev 1071a78dc4d6SMax Kazantsev bool llvm::isKnownNegativeInLoop(const SCEV *S, const Loop *L, 1072a78dc4d6SMax Kazantsev ScalarEvolution &SE) { 1073a78dc4d6SMax Kazantsev const SCEV *Zero = SE.getZero(S->getType()); 1074a78dc4d6SMax Kazantsev return SE.isAvailableAtLoopEntry(S, L) && 1075a78dc4d6SMax Kazantsev SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, S, Zero); 1076a78dc4d6SMax Kazantsev } 1077a78dc4d6SMax Kazantsev 1078a78dc4d6SMax Kazantsev bool llvm::isKnownNonNegativeInLoop(const SCEV *S, const Loop *L, 1079a78dc4d6SMax Kazantsev ScalarEvolution &SE) { 1080a78dc4d6SMax Kazantsev const SCEV *Zero = SE.getZero(S->getType()); 1081a78dc4d6SMax Kazantsev return SE.isAvailableAtLoopEntry(S, L) && 1082a78dc4d6SMax Kazantsev SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, S, Zero); 1083a78dc4d6SMax Kazantsev } 1084a78dc4d6SMax Kazantsev 1085a78dc4d6SMax Kazantsev bool llvm::cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, 1086a78dc4d6SMax Kazantsev bool Signed) { 1087a78dc4d6SMax Kazantsev unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth(); 1088a78dc4d6SMax Kazantsev APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) : 1089a78dc4d6SMax Kazantsev APInt::getMinValue(BitWidth); 1090a78dc4d6SMax Kazantsev auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; 1091a78dc4d6SMax Kazantsev return SE.isAvailableAtLoopEntry(S, L) && 1092a78dc4d6SMax Kazantsev SE.isLoopEntryGuardedByCond(L, Predicate, S, 1093a78dc4d6SMax Kazantsev SE.getConstant(Min)); 1094a78dc4d6SMax Kazantsev } 1095a78dc4d6SMax Kazantsev 1096a78dc4d6SMax Kazantsev bool llvm::cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, 1097a78dc4d6SMax Kazantsev bool Signed) { 1098a78dc4d6SMax Kazantsev unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth(); 1099a78dc4d6SMax Kazantsev APInt Max = Signed ? APInt::getSignedMaxValue(BitWidth) : 1100a78dc4d6SMax Kazantsev APInt::getMaxValue(BitWidth); 1101a78dc4d6SMax Kazantsev auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; 1102a78dc4d6SMax Kazantsev return SE.isAvailableAtLoopEntry(S, L) && 1103a78dc4d6SMax Kazantsev SE.isLoopEntryGuardedByCond(L, Predicate, S, 1104a78dc4d6SMax Kazantsev SE.getConstant(Max)); 1105a78dc4d6SMax Kazantsev } 110693175a5cSSjoerd Meijer 110793175a5cSSjoerd Meijer //===----------------------------------------------------------------------===// 110893175a5cSSjoerd Meijer // rewriteLoopExitValues - Optimize IV users outside the loop. 110993175a5cSSjoerd Meijer // As a side effect, reduces the amount of IV processing within the loop. 111093175a5cSSjoerd Meijer //===----------------------------------------------------------------------===// 111193175a5cSSjoerd Meijer 111293175a5cSSjoerd Meijer static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) { 111393175a5cSSjoerd Meijer SmallPtrSet<const Instruction *, 8> Visited; 111493175a5cSSjoerd Meijer SmallVector<const Instruction *, 8> WorkList; 111593175a5cSSjoerd Meijer Visited.insert(I); 111693175a5cSSjoerd Meijer WorkList.push_back(I); 111793175a5cSSjoerd Meijer while (!WorkList.empty()) { 111893175a5cSSjoerd Meijer const Instruction *Curr = WorkList.pop_back_val(); 111993175a5cSSjoerd Meijer // This use is outside the loop, nothing to do. 112093175a5cSSjoerd Meijer if (!L->contains(Curr)) 112193175a5cSSjoerd Meijer continue; 112293175a5cSSjoerd Meijer // Do we assume it is a "hard" use which will not be eliminated easily? 112393175a5cSSjoerd Meijer if (Curr->mayHaveSideEffects()) 112493175a5cSSjoerd Meijer return true; 112593175a5cSSjoerd Meijer // Otherwise, add all its users to worklist. 112693175a5cSSjoerd Meijer for (auto U : Curr->users()) { 112793175a5cSSjoerd Meijer auto *UI = cast<Instruction>(U); 112893175a5cSSjoerd Meijer if (Visited.insert(UI).second) 112993175a5cSSjoerd Meijer WorkList.push_back(UI); 113093175a5cSSjoerd Meijer } 113193175a5cSSjoerd Meijer } 113293175a5cSSjoerd Meijer return false; 113393175a5cSSjoerd Meijer } 113493175a5cSSjoerd Meijer 113593175a5cSSjoerd Meijer // Collect information about PHI nodes which can be transformed in 113693175a5cSSjoerd Meijer // rewriteLoopExitValues. 113793175a5cSSjoerd Meijer struct RewritePhi { 1138b2df9612SRoman Lebedev PHINode *PN; // For which PHI node is this replacement? 1139b2df9612SRoman Lebedev unsigned Ith; // For which incoming value? 1140b2df9612SRoman Lebedev const SCEV *ExpansionSCEV; // The SCEV of the incoming value we are rewriting. 1141b2df9612SRoman Lebedev Instruction *ExpansionPoint; // Where we'd like to expand that SCEV? 1142b2df9612SRoman Lebedev bool HighCost; // Is this expansion a high-cost? 114393175a5cSSjoerd Meijer 1144b2df9612SRoman Lebedev RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt, 1145b2df9612SRoman Lebedev bool H) 1146b2df9612SRoman Lebedev : PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt), 1147b2df9612SRoman Lebedev HighCost(H) {} 114893175a5cSSjoerd Meijer }; 114993175a5cSSjoerd Meijer 115093175a5cSSjoerd Meijer // Check whether it is possible to delete the loop after rewriting exit 115193175a5cSSjoerd Meijer // value. If it is possible, ignore ReplaceExitValue and do rewriting 115293175a5cSSjoerd Meijer // aggressively. 115393175a5cSSjoerd Meijer static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) { 115493175a5cSSjoerd Meijer BasicBlock *Preheader = L->getLoopPreheader(); 115593175a5cSSjoerd Meijer // If there is no preheader, the loop will not be deleted. 115693175a5cSSjoerd Meijer if (!Preheader) 115793175a5cSSjoerd Meijer return false; 115893175a5cSSjoerd Meijer 115993175a5cSSjoerd Meijer // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1. 116093175a5cSSjoerd Meijer // We obviate multiple ExitingBlocks case for simplicity. 116193175a5cSSjoerd Meijer // TODO: If we see testcase with multiple ExitingBlocks can be deleted 116293175a5cSSjoerd Meijer // after exit value rewriting, we can enhance the logic here. 116393175a5cSSjoerd Meijer SmallVector<BasicBlock *, 4> ExitingBlocks; 116493175a5cSSjoerd Meijer L->getExitingBlocks(ExitingBlocks); 116593175a5cSSjoerd Meijer SmallVector<BasicBlock *, 8> ExitBlocks; 116693175a5cSSjoerd Meijer L->getUniqueExitBlocks(ExitBlocks); 116793175a5cSSjoerd Meijer if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1) 116893175a5cSSjoerd Meijer return false; 116993175a5cSSjoerd Meijer 117093175a5cSSjoerd Meijer BasicBlock *ExitBlock = ExitBlocks[0]; 117193175a5cSSjoerd Meijer BasicBlock::iterator BI = ExitBlock->begin(); 117293175a5cSSjoerd Meijer while (PHINode *P = dyn_cast<PHINode>(BI)) { 117393175a5cSSjoerd Meijer Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]); 117493175a5cSSjoerd Meijer 117593175a5cSSjoerd Meijer // If the Incoming value of P is found in RewritePhiSet, we know it 117693175a5cSSjoerd Meijer // could be rewritten to use a loop invariant value in transformation 117793175a5cSSjoerd Meijer // phase later. Skip it in the loop invariant check below. 117893175a5cSSjoerd Meijer bool found = false; 117993175a5cSSjoerd Meijer for (const RewritePhi &Phi : RewritePhiSet) { 118093175a5cSSjoerd Meijer unsigned i = Phi.Ith; 118193175a5cSSjoerd Meijer if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) { 118293175a5cSSjoerd Meijer found = true; 118393175a5cSSjoerd Meijer break; 118493175a5cSSjoerd Meijer } 118593175a5cSSjoerd Meijer } 118693175a5cSSjoerd Meijer 118793175a5cSSjoerd Meijer Instruction *I; 118893175a5cSSjoerd Meijer if (!found && (I = dyn_cast<Instruction>(Incoming))) 118993175a5cSSjoerd Meijer if (!L->hasLoopInvariantOperands(I)) 119093175a5cSSjoerd Meijer return false; 119193175a5cSSjoerd Meijer 119293175a5cSSjoerd Meijer ++BI; 119393175a5cSSjoerd Meijer } 119493175a5cSSjoerd Meijer 119593175a5cSSjoerd Meijer for (auto *BB : L->blocks()) 119693175a5cSSjoerd Meijer if (llvm::any_of(*BB, [](Instruction &I) { 119793175a5cSSjoerd Meijer return I.mayHaveSideEffects(); 119893175a5cSSjoerd Meijer })) 119993175a5cSSjoerd Meijer return false; 120093175a5cSSjoerd Meijer 120193175a5cSSjoerd Meijer return true; 120293175a5cSSjoerd Meijer } 120393175a5cSSjoerd Meijer 12040789f280SRoman Lebedev int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, 12050789f280SRoman Lebedev ScalarEvolution *SE, 12060789f280SRoman Lebedev const TargetTransformInfo *TTI, 12070789f280SRoman Lebedev SCEVExpander &Rewriter, DominatorTree *DT, 12080789f280SRoman Lebedev ReplaceExitVal ReplaceExitValue, 120993175a5cSSjoerd Meijer SmallVector<WeakTrackingVH, 16> &DeadInsts) { 121093175a5cSSjoerd Meijer // Check a pre-condition. 121193175a5cSSjoerd Meijer assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 121293175a5cSSjoerd Meijer "Indvars did not preserve LCSSA!"); 121393175a5cSSjoerd Meijer 121493175a5cSSjoerd Meijer SmallVector<BasicBlock*, 8> ExitBlocks; 121593175a5cSSjoerd Meijer L->getUniqueExitBlocks(ExitBlocks); 121693175a5cSSjoerd Meijer 121793175a5cSSjoerd Meijer SmallVector<RewritePhi, 8> RewritePhiSet; 121893175a5cSSjoerd Meijer // Find all values that are computed inside the loop, but used outside of it. 121993175a5cSSjoerd Meijer // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan 122093175a5cSSjoerd Meijer // the exit blocks of the loop to find them. 122193175a5cSSjoerd Meijer for (BasicBlock *ExitBB : ExitBlocks) { 122293175a5cSSjoerd Meijer // If there are no PHI nodes in this exit block, then no values defined 122393175a5cSSjoerd Meijer // inside the loop are used on this path, skip it. 122493175a5cSSjoerd Meijer PHINode *PN = dyn_cast<PHINode>(ExitBB->begin()); 122593175a5cSSjoerd Meijer if (!PN) continue; 122693175a5cSSjoerd Meijer 122793175a5cSSjoerd Meijer unsigned NumPreds = PN->getNumIncomingValues(); 122893175a5cSSjoerd Meijer 122993175a5cSSjoerd Meijer // Iterate over all of the PHI nodes. 123093175a5cSSjoerd Meijer BasicBlock::iterator BBI = ExitBB->begin(); 123193175a5cSSjoerd Meijer while ((PN = dyn_cast<PHINode>(BBI++))) { 123293175a5cSSjoerd Meijer if (PN->use_empty()) 123393175a5cSSjoerd Meijer continue; // dead use, don't replace it 123493175a5cSSjoerd Meijer 123593175a5cSSjoerd Meijer if (!SE->isSCEVable(PN->getType())) 123693175a5cSSjoerd Meijer continue; 123793175a5cSSjoerd Meijer 123893175a5cSSjoerd Meijer // It's necessary to tell ScalarEvolution about this explicitly so that 123993175a5cSSjoerd Meijer // it can walk the def-use list and forget all SCEVs, as it may not be 124093175a5cSSjoerd Meijer // watching the PHI itself. Once the new exit value is in place, there 124193175a5cSSjoerd Meijer // may not be a def-use connection between the loop and every instruction 124293175a5cSSjoerd Meijer // which got a SCEVAddRecExpr for that loop. 124393175a5cSSjoerd Meijer SE->forgetValue(PN); 124493175a5cSSjoerd Meijer 124593175a5cSSjoerd Meijer // Iterate over all of the values in all the PHI nodes. 124693175a5cSSjoerd Meijer for (unsigned i = 0; i != NumPreds; ++i) { 124793175a5cSSjoerd Meijer // If the value being merged in is not integer or is not defined 124893175a5cSSjoerd Meijer // in the loop, skip it. 124993175a5cSSjoerd Meijer Value *InVal = PN->getIncomingValue(i); 125093175a5cSSjoerd Meijer if (!isa<Instruction>(InVal)) 125193175a5cSSjoerd Meijer continue; 125293175a5cSSjoerd Meijer 125393175a5cSSjoerd Meijer // If this pred is for a subloop, not L itself, skip it. 125493175a5cSSjoerd Meijer if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) 125593175a5cSSjoerd Meijer continue; // The Block is in a subloop, skip it. 125693175a5cSSjoerd Meijer 125793175a5cSSjoerd Meijer // Check that InVal is defined in the loop. 125893175a5cSSjoerd Meijer Instruction *Inst = cast<Instruction>(InVal); 125993175a5cSSjoerd Meijer if (!L->contains(Inst)) 126093175a5cSSjoerd Meijer continue; 126193175a5cSSjoerd Meijer 126293175a5cSSjoerd Meijer // Okay, this instruction has a user outside of the current loop 126393175a5cSSjoerd Meijer // and varies predictably *inside* the loop. Evaluate the value it 126493175a5cSSjoerd Meijer // contains when the loop exits, if possible. We prefer to start with 126593175a5cSSjoerd Meijer // expressions which are true for all exits (so as to maximize 126693175a5cSSjoerd Meijer // expression reuse by the SCEVExpander), but resort to per-exit 126793175a5cSSjoerd Meijer // evaluation if that fails. 126893175a5cSSjoerd Meijer const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); 126993175a5cSSjoerd Meijer if (isa<SCEVCouldNotCompute>(ExitValue) || 127093175a5cSSjoerd Meijer !SE->isLoopInvariant(ExitValue, L) || 127193175a5cSSjoerd Meijer !isSafeToExpand(ExitValue, *SE)) { 127293175a5cSSjoerd Meijer // TODO: This should probably be sunk into SCEV in some way; maybe a 127393175a5cSSjoerd Meijer // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for 127493175a5cSSjoerd Meijer // most SCEV expressions and other recurrence types (e.g. shift 127593175a5cSSjoerd Meijer // recurrences). Is there existing code we can reuse? 127693175a5cSSjoerd Meijer const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i)); 127793175a5cSSjoerd Meijer if (isa<SCEVCouldNotCompute>(ExitCount)) 127893175a5cSSjoerd Meijer continue; 127993175a5cSSjoerd Meijer if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst))) 128093175a5cSSjoerd Meijer if (AddRec->getLoop() == L) 128193175a5cSSjoerd Meijer ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE); 128293175a5cSSjoerd Meijer if (isa<SCEVCouldNotCompute>(ExitValue) || 128393175a5cSSjoerd Meijer !SE->isLoopInvariant(ExitValue, L) || 128493175a5cSSjoerd Meijer !isSafeToExpand(ExitValue, *SE)) 128593175a5cSSjoerd Meijer continue; 128693175a5cSSjoerd Meijer } 128793175a5cSSjoerd Meijer 128893175a5cSSjoerd Meijer // Computing the value outside of the loop brings no benefit if it is 128993175a5cSSjoerd Meijer // definitely used inside the loop in a way which can not be optimized 12907d572ef2SRoman Lebedev // away. Avoid doing so unless we know we have a value which computes 12917d572ef2SRoman Lebedev // the ExitValue already. TODO: This should be merged into SCEV 12927d572ef2SRoman Lebedev // expander to leverage its knowledge of existing expressions. 12937d572ef2SRoman Lebedev if (ReplaceExitValue != AlwaysRepl && !isa<SCEVConstant>(ExitValue) && 12947d572ef2SRoman Lebedev !isa<SCEVUnknown>(ExitValue) && hasHardUserWithinLoop(L, Inst)) 129593175a5cSSjoerd Meijer continue; 129693175a5cSSjoerd Meijer 1297b2df9612SRoman Lebedev // Check if expansions of this SCEV would count as being high cost. 12987d572ef2SRoman Lebedev bool HighCost = Rewriter.isHighCostExpansion( 12997d572ef2SRoman Lebedev ExitValue, L, SCEVCheapExpansionBudget, TTI, Inst); 1300b2df9612SRoman Lebedev 1301b2df9612SRoman Lebedev // Note that we must not perform expansions until after 1302b2df9612SRoman Lebedev // we query *all* the costs, because if we perform temporary expansion 1303b2df9612SRoman Lebedev // inbetween, one that we might not intend to keep, said expansion 1304b2df9612SRoman Lebedev // *may* affect cost calculation of the the next SCEV's we'll query, 1305b2df9612SRoman Lebedev // and next SCEV may errneously get smaller cost. 1306b2df9612SRoman Lebedev 1307b2df9612SRoman Lebedev // Collect all the candidate PHINodes to be rewritten. 1308b2df9612SRoman Lebedev RewritePhiSet.emplace_back(PN, i, ExitValue, Inst, HighCost); 1309b2df9612SRoman Lebedev } 1310b2df9612SRoman Lebedev } 1311b2df9612SRoman Lebedev } 1312b2df9612SRoman Lebedev 1313795d142dSRoman Lebedev // TODO: evaluate whether it is beneficial to change how we calculate 1314795d142dSRoman Lebedev // high-cost: if we have SCEV 'A' which we know we will expand, should we 1315795d142dSRoman Lebedev // calculate the cost of other SCEV's after expanding SCEV 'A', thus 1316795d142dSRoman Lebedev // potentially giving cost bonus to those other SCEV's? 131793175a5cSSjoerd Meijer 131893175a5cSSjoerd Meijer bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet); 131993175a5cSSjoerd Meijer int NumReplaced = 0; 132093175a5cSSjoerd Meijer 132193175a5cSSjoerd Meijer // Transformation. 132293175a5cSSjoerd Meijer for (const RewritePhi &Phi : RewritePhiSet) { 132393175a5cSSjoerd Meijer PHINode *PN = Phi.PN; 132493175a5cSSjoerd Meijer 132593175a5cSSjoerd Meijer // Only do the rewrite when the ExitValue can be expanded cheaply. 132693175a5cSSjoerd Meijer // If LoopCanBeDel is true, rewrite exit value aggressively. 1327795d142dSRoman Lebedev if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) 132893175a5cSSjoerd Meijer continue; 1329795d142dSRoman Lebedev 1330795d142dSRoman Lebedev Value *ExitVal = Rewriter.expandCodeFor( 1331795d142dSRoman Lebedev Phi.ExpansionSCEV, Phi.PN->getType(), Phi.ExpansionPoint); 1332795d142dSRoman Lebedev 1333795d142dSRoman Lebedev LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " << *ExitVal 1334795d142dSRoman Lebedev << '\n' 1335795d142dSRoman Lebedev << " LoopVal = " << *(Phi.ExpansionPoint) << "\n"); 1336795d142dSRoman Lebedev 1337795d142dSRoman Lebedev #ifndef NDEBUG 1338795d142dSRoman Lebedev // If we reuse an instruction from a loop which is neither L nor one of 1339795d142dSRoman Lebedev // its containing loops, we end up breaking LCSSA form for this loop by 1340795d142dSRoman Lebedev // creating a new use of its instruction. 1341795d142dSRoman Lebedev if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal)) 1342795d142dSRoman Lebedev if (auto *EVL = LI->getLoopFor(ExitInsn->getParent())) 1343795d142dSRoman Lebedev if (EVL != L) 1344795d142dSRoman Lebedev assert(EVL->contains(L) && "LCSSA breach detected!"); 1345795d142dSRoman Lebedev #endif 134693175a5cSSjoerd Meijer 134793175a5cSSjoerd Meijer NumReplaced++; 134893175a5cSSjoerd Meijer Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith)); 134993175a5cSSjoerd Meijer PN->setIncomingValue(Phi.Ith, ExitVal); 135093175a5cSSjoerd Meijer 135193175a5cSSjoerd Meijer // If this instruction is dead now, delete it. Don't do it now to avoid 135293175a5cSSjoerd Meijer // invalidating iterators. 135393175a5cSSjoerd Meijer if (isInstructionTriviallyDead(Inst, TLI)) 135493175a5cSSjoerd Meijer DeadInsts.push_back(Inst); 135593175a5cSSjoerd Meijer 135693175a5cSSjoerd Meijer // Replace PN with ExitVal if that is legal and does not break LCSSA. 135793175a5cSSjoerd Meijer if (PN->getNumIncomingValues() == 1 && 135893175a5cSSjoerd Meijer LI->replacementPreservesLCSSAForm(PN, ExitVal)) { 135993175a5cSSjoerd Meijer PN->replaceAllUsesWith(ExitVal); 136093175a5cSSjoerd Meijer PN->eraseFromParent(); 136193175a5cSSjoerd Meijer } 136293175a5cSSjoerd Meijer } 136393175a5cSSjoerd Meijer 136493175a5cSSjoerd Meijer // The insertion point instruction may have been deleted; clear it out 136593175a5cSSjoerd Meijer // so that the rewriter doesn't trip over it later. 136693175a5cSSjoerd Meijer Rewriter.clearInsertPoint(); 136793175a5cSSjoerd Meijer return NumReplaced; 136893175a5cSSjoerd Meijer } 1369af7e1588SEvgeniy Brevnov 1370af7e1588SEvgeniy Brevnov /// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for 1371af7e1588SEvgeniy Brevnov /// \p OrigLoop. 1372af7e1588SEvgeniy Brevnov void llvm::setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop, 1373af7e1588SEvgeniy Brevnov Loop *RemainderLoop, uint64_t UF) { 1374af7e1588SEvgeniy Brevnov assert(UF > 0 && "Zero unrolled factor is not supported"); 1375af7e1588SEvgeniy Brevnov assert(UnrolledLoop != RemainderLoop && 1376af7e1588SEvgeniy Brevnov "Unrolled and Remainder loops are expected to distinct"); 1377af7e1588SEvgeniy Brevnov 1378af7e1588SEvgeniy Brevnov // Get number of iterations in the original scalar loop. 1379af7e1588SEvgeniy Brevnov unsigned OrigLoopInvocationWeight = 0; 1380af7e1588SEvgeniy Brevnov Optional<unsigned> OrigAverageTripCount = 1381af7e1588SEvgeniy Brevnov getLoopEstimatedTripCount(OrigLoop, &OrigLoopInvocationWeight); 1382af7e1588SEvgeniy Brevnov if (!OrigAverageTripCount) 1383af7e1588SEvgeniy Brevnov return; 1384af7e1588SEvgeniy Brevnov 1385af7e1588SEvgeniy Brevnov // Calculate number of iterations in unrolled loop. 1386af7e1588SEvgeniy Brevnov unsigned UnrolledAverageTripCount = *OrigAverageTripCount / UF; 1387af7e1588SEvgeniy Brevnov // Calculate number of iterations for remainder loop. 1388af7e1588SEvgeniy Brevnov unsigned RemainderAverageTripCount = *OrigAverageTripCount % UF; 1389af7e1588SEvgeniy Brevnov 1390af7e1588SEvgeniy Brevnov setLoopEstimatedTripCount(UnrolledLoop, UnrolledAverageTripCount, 1391af7e1588SEvgeniy Brevnov OrigLoopInvocationWeight); 1392af7e1588SEvgeniy Brevnov setLoopEstimatedTripCount(RemainderLoop, RemainderAverageTripCount, 1393af7e1588SEvgeniy Brevnov OrigLoopInvocationWeight); 1394af7e1588SEvgeniy Brevnov } 1395388de9dfSAlina Sbirlea 1396388de9dfSAlina Sbirlea /// Utility that implements appending of loops onto a worklist. 1397388de9dfSAlina Sbirlea /// Loops are added in preorder (analogous for reverse postorder for trees), 1398388de9dfSAlina Sbirlea /// and the worklist is processed LIFO. 1399388de9dfSAlina Sbirlea template <typename RangeT> 1400388de9dfSAlina Sbirlea void llvm::appendReversedLoopsToWorklist( 1401388de9dfSAlina Sbirlea RangeT &&Loops, SmallPriorityWorklist<Loop *, 4> &Worklist) { 1402388de9dfSAlina Sbirlea // We use an internal worklist to build up the preorder traversal without 1403388de9dfSAlina Sbirlea // recursion. 1404388de9dfSAlina Sbirlea SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist; 1405388de9dfSAlina Sbirlea 1406388de9dfSAlina Sbirlea // We walk the initial sequence of loops in reverse because we generally want 1407388de9dfSAlina Sbirlea // to visit defs before uses and the worklist is LIFO. 1408388de9dfSAlina Sbirlea for (Loop *RootL : Loops) { 1409388de9dfSAlina Sbirlea assert(PreOrderLoops.empty() && "Must start with an empty preorder walk."); 1410388de9dfSAlina Sbirlea assert(PreOrderWorklist.empty() && 1411388de9dfSAlina Sbirlea "Must start with an empty preorder walk worklist."); 1412388de9dfSAlina Sbirlea PreOrderWorklist.push_back(RootL); 1413388de9dfSAlina Sbirlea do { 1414388de9dfSAlina Sbirlea Loop *L = PreOrderWorklist.pop_back_val(); 1415388de9dfSAlina Sbirlea PreOrderWorklist.append(L->begin(), L->end()); 1416388de9dfSAlina Sbirlea PreOrderLoops.push_back(L); 1417388de9dfSAlina Sbirlea } while (!PreOrderWorklist.empty()); 1418388de9dfSAlina Sbirlea 1419388de9dfSAlina Sbirlea Worklist.insert(std::move(PreOrderLoops)); 1420388de9dfSAlina Sbirlea PreOrderLoops.clear(); 1421388de9dfSAlina Sbirlea } 1422388de9dfSAlina Sbirlea } 1423388de9dfSAlina Sbirlea 1424388de9dfSAlina Sbirlea template <typename RangeT> 1425388de9dfSAlina Sbirlea void llvm::appendLoopsToWorklist(RangeT &&Loops, 1426388de9dfSAlina Sbirlea SmallPriorityWorklist<Loop *, 4> &Worklist) { 1427388de9dfSAlina Sbirlea appendReversedLoopsToWorklist(reverse(Loops), Worklist); 1428388de9dfSAlina Sbirlea } 1429388de9dfSAlina Sbirlea 1430388de9dfSAlina Sbirlea template void llvm::appendLoopsToWorklist<ArrayRef<Loop *> &>( 1431388de9dfSAlina Sbirlea ArrayRef<Loop *> &Loops, SmallPriorityWorklist<Loop *, 4> &Worklist); 1432388de9dfSAlina Sbirlea 143367904db2SAlina Sbirlea template void 143467904db2SAlina Sbirlea llvm::appendLoopsToWorklist<Loop &>(Loop &L, 143567904db2SAlina Sbirlea SmallPriorityWorklist<Loop *, 4> &Worklist); 143667904db2SAlina Sbirlea 1437388de9dfSAlina Sbirlea void llvm::appendLoopsToWorklist(LoopInfo &LI, 1438388de9dfSAlina Sbirlea SmallPriorityWorklist<Loop *, 4> &Worklist) { 1439388de9dfSAlina Sbirlea appendReversedLoopsToWorklist(LI, Worklist); 1440388de9dfSAlina Sbirlea } 14413dcaf296SArkady Shlykov 14423dcaf296SArkady Shlykov Loop *llvm::cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, 14433dcaf296SArkady Shlykov LoopInfo *LI, LPPassManager *LPM) { 14443dcaf296SArkady Shlykov Loop &New = *LI->AllocateLoop(); 14453dcaf296SArkady Shlykov if (PL) 14463dcaf296SArkady Shlykov PL->addChildLoop(&New); 14473dcaf296SArkady Shlykov else 14483dcaf296SArkady Shlykov LI->addTopLevelLoop(&New); 14493dcaf296SArkady Shlykov 14503dcaf296SArkady Shlykov if (LPM) 14513dcaf296SArkady Shlykov LPM->addLoop(New); 14523dcaf296SArkady Shlykov 14533dcaf296SArkady Shlykov // Add all of the blocks in L to the new loop. 14543dcaf296SArkady Shlykov for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); 14553dcaf296SArkady Shlykov I != E; ++I) 14563dcaf296SArkady Shlykov if (LI->getLoopFor(*I) == L) 14573dcaf296SArkady Shlykov New.addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), *LI); 14583dcaf296SArkady Shlykov 14593dcaf296SArkady Shlykov // Add all of the subloops to the new loop. 14603dcaf296SArkady Shlykov for (Loop *I : *L) 14613dcaf296SArkady Shlykov cloneLoop(I, &New, VM, LI, LPM); 14623dcaf296SArkady Shlykov 14633dcaf296SArkady Shlykov return &New; 14643dcaf296SArkady Shlykov } 14658528186bSFlorian Hahn 14668528186bSFlorian Hahn /// IR Values for the lower and upper bounds of a pointer evolution. We 14678528186bSFlorian Hahn /// need to use value-handles because SCEV expansion can invalidate previously 14688528186bSFlorian Hahn /// expanded values. Thus expansion of a pointer can invalidate the bounds for 14698528186bSFlorian Hahn /// a previous one. 14708528186bSFlorian Hahn struct PointerBounds { 14718528186bSFlorian Hahn TrackingVH<Value> Start; 14728528186bSFlorian Hahn TrackingVH<Value> End; 14738528186bSFlorian Hahn }; 14748528186bSFlorian Hahn 14758528186bSFlorian Hahn /// Expand code for the lower and upper bound of the pointer group \p CG 14768528186bSFlorian Hahn /// in \p TheLoop. \return the values for the bounds. 14778528186bSFlorian Hahn static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG, 14788528186bSFlorian Hahn Loop *TheLoop, Instruction *Loc, 147928410d17SFlorian Hahn SCEVExpander &Exp) { 14808528186bSFlorian Hahn LLVMContext &Ctx = Loc->getContext(); 14816d753b07SFlorian Hahn Type *PtrArithTy = Type::getInt8PtrTy(Ctx, CG->AddressSpace); 14828528186bSFlorian Hahn 14838528186bSFlorian Hahn Value *Start = nullptr, *End = nullptr; 14848528186bSFlorian Hahn LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); 14858528186bSFlorian Hahn Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc); 14868528186bSFlorian Hahn End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc); 1487e908e063SMindong Chen LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n"); 14888528186bSFlorian Hahn return {Start, End}; 14898528186bSFlorian Hahn } 14908528186bSFlorian Hahn 14918528186bSFlorian Hahn /// Turns a collection of checks into a collection of expanded upper and 14928528186bSFlorian Hahn /// lower bounds for both pointers in the check. 14938528186bSFlorian Hahn static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> 14948528186bSFlorian Hahn expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L, 149528410d17SFlorian Hahn Instruction *Loc, SCEVExpander &Exp) { 14968528186bSFlorian Hahn SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds; 14978528186bSFlorian Hahn 14988528186bSFlorian Hahn // Here we're relying on the SCEV Expander's cache to only emit code for the 14998528186bSFlorian Hahn // same bounds once. 15008528186bSFlorian Hahn transform(PointerChecks, std::back_inserter(ChecksWithBounds), 15018528186bSFlorian Hahn [&](const RuntimePointerCheck &Check) { 150228410d17SFlorian Hahn PointerBounds First = expandBounds(Check.first, L, Loc, Exp), 150328410d17SFlorian Hahn Second = expandBounds(Check.second, L, Loc, Exp); 15048528186bSFlorian Hahn return std::make_pair(First, Second); 15058528186bSFlorian Hahn }); 15068528186bSFlorian Hahn 15078528186bSFlorian Hahn return ChecksWithBounds; 15088528186bSFlorian Hahn } 15098528186bSFlorian Hahn 15108528186bSFlorian Hahn std::pair<Instruction *, Instruction *> llvm::addRuntimeChecks( 15118528186bSFlorian Hahn Instruction *Loc, Loop *TheLoop, 15128528186bSFlorian Hahn const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, 151328410d17SFlorian Hahn SCEVExpander &Exp) { 15148528186bSFlorian Hahn // TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible. 15158528186bSFlorian Hahn // TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible 151628410d17SFlorian Hahn auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, Exp); 15178528186bSFlorian Hahn 15188528186bSFlorian Hahn LLVMContext &Ctx = Loc->getContext(); 15198528186bSFlorian Hahn Instruction *FirstInst = nullptr; 15208528186bSFlorian Hahn IRBuilder<> ChkBuilder(Loc); 15218528186bSFlorian Hahn // Our instructions might fold to a constant. 15228528186bSFlorian Hahn Value *MemoryRuntimeCheck = nullptr; 15238528186bSFlorian Hahn 15248528186bSFlorian Hahn // FIXME: this helper is currently a duplicate of the one in 15258528186bSFlorian Hahn // LoopVectorize.cpp. 15268528186bSFlorian Hahn auto GetFirstInst = [](Instruction *FirstInst, Value *V, 15278528186bSFlorian Hahn Instruction *Loc) -> Instruction * { 15288528186bSFlorian Hahn if (FirstInst) 15298528186bSFlorian Hahn return FirstInst; 15308528186bSFlorian Hahn if (Instruction *I = dyn_cast<Instruction>(V)) 15318528186bSFlorian Hahn return I->getParent() == Loc->getParent() ? I : nullptr; 15328528186bSFlorian Hahn return nullptr; 15338528186bSFlorian Hahn }; 15348528186bSFlorian Hahn 15358528186bSFlorian Hahn for (const auto &Check : ExpandedChecks) { 15368528186bSFlorian Hahn const PointerBounds &A = Check.first, &B = Check.second; 15378528186bSFlorian Hahn // Check if two pointers (A and B) conflict where conflict is computed as: 15388528186bSFlorian Hahn // start(A) <= end(B) && start(B) <= end(A) 15398528186bSFlorian Hahn unsigned AS0 = A.Start->getType()->getPointerAddressSpace(); 15408528186bSFlorian Hahn unsigned AS1 = B.Start->getType()->getPointerAddressSpace(); 15418528186bSFlorian Hahn 15428528186bSFlorian Hahn assert((AS0 == B.End->getType()->getPointerAddressSpace()) && 15438528186bSFlorian Hahn (AS1 == A.End->getType()->getPointerAddressSpace()) && 15448528186bSFlorian Hahn "Trying to bounds check pointers with different address spaces"); 15458528186bSFlorian Hahn 15468528186bSFlorian Hahn Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0); 15478528186bSFlorian Hahn Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1); 15488528186bSFlorian Hahn 15498528186bSFlorian Hahn Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc"); 15508528186bSFlorian Hahn Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc"); 15518528186bSFlorian Hahn Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc"); 15528528186bSFlorian Hahn Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc"); 15538528186bSFlorian Hahn 15548528186bSFlorian Hahn // [A|B].Start points to the first accessed byte under base [A|B]. 15558528186bSFlorian Hahn // [A|B].End points to the last accessed byte, plus one. 15568528186bSFlorian Hahn // There is no conflict when the intervals are disjoint: 15578528186bSFlorian Hahn // NoConflict = (B.Start >= A.End) || (A.Start >= B.End) 15588528186bSFlorian Hahn // 15598528186bSFlorian Hahn // bound0 = (B.Start < A.End) 15608528186bSFlorian Hahn // bound1 = (A.Start < B.End) 15618528186bSFlorian Hahn // IsConflict = bound0 & bound1 15628528186bSFlorian Hahn Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0"); 15638528186bSFlorian Hahn FirstInst = GetFirstInst(FirstInst, Cmp0, Loc); 15648528186bSFlorian Hahn Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1"); 15658528186bSFlorian Hahn FirstInst = GetFirstInst(FirstInst, Cmp1, Loc); 15668528186bSFlorian Hahn Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); 15678528186bSFlorian Hahn FirstInst = GetFirstInst(FirstInst, IsConflict, Loc); 15688528186bSFlorian Hahn if (MemoryRuntimeCheck) { 15698528186bSFlorian Hahn IsConflict = 15708528186bSFlorian Hahn ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx"); 15718528186bSFlorian Hahn FirstInst = GetFirstInst(FirstInst, IsConflict, Loc); 15728528186bSFlorian Hahn } 15738528186bSFlorian Hahn MemoryRuntimeCheck = IsConflict; 15748528186bSFlorian Hahn } 15758528186bSFlorian Hahn 15768528186bSFlorian Hahn if (!MemoryRuntimeCheck) 15778528186bSFlorian Hahn return std::make_pair(nullptr, nullptr); 15788528186bSFlorian Hahn 15798528186bSFlorian Hahn // We have to do this trickery because the IRBuilder might fold the check to a 15808528186bSFlorian Hahn // constant expression in which case there is no Instruction anchored in a 15818528186bSFlorian Hahn // the block. 15828528186bSFlorian Hahn Instruction *Check = 15838528186bSFlorian Hahn BinaryOperator::CreateAnd(MemoryRuntimeCheck, ConstantInt::getTrue(Ctx)); 15848528186bSFlorian Hahn ChkBuilder.Insert(Check, "memcheck.conflict"); 15858528186bSFlorian Hahn FirstInst = GetFirstInst(FirstInst, Check, Loc); 15868528186bSFlorian Hahn return std::make_pair(FirstInst, Check); 15878528186bSFlorian Hahn } 1588cfe87d4eSJingu Kang 1589e4abb641SJingu Kang Optional<IVConditionInfo> llvm::hasPartialIVCondition(Loop &L, 1590cfe87d4eSJingu Kang unsigned MSSAThreshold, 1591e4abb641SJingu Kang MemorySSA &MSSA, 1592e4abb641SJingu Kang AAResults &AA) { 1593e4abb641SJingu Kang auto *TI = dyn_cast<BranchInst>(L.getHeader()->getTerminator()); 1594cfe87d4eSJingu Kang if (!TI || !TI->isConditional()) 1595cfe87d4eSJingu Kang return {}; 1596cfe87d4eSJingu Kang 1597cfe87d4eSJingu Kang auto *CondI = dyn_cast<CmpInst>(TI->getCondition()); 1598cfe87d4eSJingu Kang // The case with the condition outside the loop should already be handled 1599cfe87d4eSJingu Kang // earlier. 1600e4abb641SJingu Kang if (!CondI || !L.contains(CondI)) 1601cfe87d4eSJingu Kang return {}; 1602cfe87d4eSJingu Kang 1603cfe87d4eSJingu Kang SmallVector<Instruction *> InstToDuplicate; 1604cfe87d4eSJingu Kang InstToDuplicate.push_back(CondI); 1605cfe87d4eSJingu Kang 1606cfe87d4eSJingu Kang SmallVector<Value *, 4> WorkList; 1607cfe87d4eSJingu Kang WorkList.append(CondI->op_begin(), CondI->op_end()); 1608cfe87d4eSJingu Kang 1609cfe87d4eSJingu Kang SmallVector<MemoryAccess *, 4> AccessesToCheck; 1610cfe87d4eSJingu Kang SmallVector<MemoryLocation, 4> AccessedLocs; 1611cfe87d4eSJingu Kang while (!WorkList.empty()) { 1612cfe87d4eSJingu Kang Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val()); 1613e4abb641SJingu Kang if (!I || !L.contains(I)) 1614cfe87d4eSJingu Kang continue; 1615cfe87d4eSJingu Kang 1616cfe87d4eSJingu Kang // TODO: support additional instructions. 1617cfe87d4eSJingu Kang if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I)) 1618cfe87d4eSJingu Kang return {}; 1619cfe87d4eSJingu Kang 1620cfe87d4eSJingu Kang // Do not duplicate volatile and atomic loads. 1621cfe87d4eSJingu Kang if (auto *LI = dyn_cast<LoadInst>(I)) 1622cfe87d4eSJingu Kang if (LI->isVolatile() || LI->isAtomic()) 1623cfe87d4eSJingu Kang return {}; 1624cfe87d4eSJingu Kang 1625cfe87d4eSJingu Kang InstToDuplicate.push_back(I); 1626e4abb641SJingu Kang if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) { 1627cfe87d4eSJingu Kang if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) { 1628cfe87d4eSJingu Kang // Queue the defining access to check for alias checks. 1629cfe87d4eSJingu Kang AccessesToCheck.push_back(MemUse->getDefiningAccess()); 1630cfe87d4eSJingu Kang AccessedLocs.push_back(MemoryLocation::get(I)); 1631cfe87d4eSJingu Kang } else { 1632cfe87d4eSJingu Kang // MemoryDefs may clobber the location or may be atomic memory 1633cfe87d4eSJingu Kang // operations. Bail out. 1634cfe87d4eSJingu Kang return {}; 1635cfe87d4eSJingu Kang } 1636cfe87d4eSJingu Kang } 1637cfe87d4eSJingu Kang WorkList.append(I->op_begin(), I->op_end()); 1638cfe87d4eSJingu Kang } 1639cfe87d4eSJingu Kang 1640cfe87d4eSJingu Kang if (InstToDuplicate.empty()) 1641cfe87d4eSJingu Kang return {}; 1642cfe87d4eSJingu Kang 1643cfe87d4eSJingu Kang SmallVector<BasicBlock *, 4> ExitingBlocks; 1644e4abb641SJingu Kang L.getExitingBlocks(ExitingBlocks); 1645cfe87d4eSJingu Kang auto HasNoClobbersOnPath = 1646e4abb641SJingu Kang [&L, &AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate, 1647cfe87d4eSJingu Kang MSSAThreshold](BasicBlock *Succ, BasicBlock *Header, 1648cfe87d4eSJingu Kang SmallVector<MemoryAccess *, 4> AccessesToCheck) 1649cfe87d4eSJingu Kang -> Optional<IVConditionInfo> { 1650cfe87d4eSJingu Kang IVConditionInfo Info; 1651cfe87d4eSJingu Kang // First, collect all blocks in the loop that are on a patch from Succ 1652cfe87d4eSJingu Kang // to the header. 1653cfe87d4eSJingu Kang SmallVector<BasicBlock *, 4> WorkList; 1654cfe87d4eSJingu Kang WorkList.push_back(Succ); 1655cfe87d4eSJingu Kang WorkList.push_back(Header); 1656cfe87d4eSJingu Kang SmallPtrSet<BasicBlock *, 4> Seen; 1657cfe87d4eSJingu Kang Seen.insert(Header); 1658cfe87d4eSJingu Kang Info.PathIsNoop &= 1659cfe87d4eSJingu Kang all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); }); 1660cfe87d4eSJingu Kang 1661cfe87d4eSJingu Kang while (!WorkList.empty()) { 1662cfe87d4eSJingu Kang BasicBlock *Current = WorkList.pop_back_val(); 1663e4abb641SJingu Kang if (!L.contains(Current)) 1664cfe87d4eSJingu Kang continue; 1665cfe87d4eSJingu Kang const auto &SeenIns = Seen.insert(Current); 1666cfe87d4eSJingu Kang if (!SeenIns.second) 1667cfe87d4eSJingu Kang continue; 1668cfe87d4eSJingu Kang 1669cfe87d4eSJingu Kang Info.PathIsNoop &= all_of( 1670cfe87d4eSJingu Kang *Current, [](Instruction &I) { return !I.mayHaveSideEffects(); }); 1671cfe87d4eSJingu Kang WorkList.append(succ_begin(Current), succ_end(Current)); 1672cfe87d4eSJingu Kang } 1673cfe87d4eSJingu Kang 1674cfe87d4eSJingu Kang // Require at least 2 blocks on a path through the loop. This skips 1675cfe87d4eSJingu Kang // paths that directly exit the loop. 1676cfe87d4eSJingu Kang if (Seen.size() < 2) 1677cfe87d4eSJingu Kang return {}; 1678cfe87d4eSJingu Kang 1679cfe87d4eSJingu Kang // Next, check if there are any MemoryDefs that are on the path through 1680cfe87d4eSJingu Kang // the loop (in the Seen set) and they may-alias any of the locations in 1681cfe87d4eSJingu Kang // AccessedLocs. If that is the case, they may modify the condition and 1682cfe87d4eSJingu Kang // partial unswitching is not possible. 1683cfe87d4eSJingu Kang SmallPtrSet<MemoryAccess *, 4> SeenAccesses; 1684cfe87d4eSJingu Kang while (!AccessesToCheck.empty()) { 1685cfe87d4eSJingu Kang MemoryAccess *Current = AccessesToCheck.pop_back_val(); 1686cfe87d4eSJingu Kang auto SeenI = SeenAccesses.insert(Current); 1687cfe87d4eSJingu Kang if (!SeenI.second || !Seen.contains(Current->getBlock())) 1688cfe87d4eSJingu Kang continue; 1689cfe87d4eSJingu Kang 1690cfe87d4eSJingu Kang // Bail out if exceeded the threshold. 1691cfe87d4eSJingu Kang if (SeenAccesses.size() >= MSSAThreshold) 1692cfe87d4eSJingu Kang return {}; 1693cfe87d4eSJingu Kang 1694cfe87d4eSJingu Kang // MemoryUse are read-only accesses. 1695cfe87d4eSJingu Kang if (isa<MemoryUse>(Current)) 1696cfe87d4eSJingu Kang continue; 1697cfe87d4eSJingu Kang 1698cfe87d4eSJingu Kang // For a MemoryDef, check if is aliases any of the location feeding 1699cfe87d4eSJingu Kang // the original condition. 1700cfe87d4eSJingu Kang if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) { 1701e4abb641SJingu Kang if (any_of(AccessedLocs, [&AA, CurrentDef](MemoryLocation &Loc) { 1702cfe87d4eSJingu Kang return isModSet( 1703e4abb641SJingu Kang AA.getModRefInfo(CurrentDef->getMemoryInst(), Loc)); 1704cfe87d4eSJingu Kang })) 1705cfe87d4eSJingu Kang return {}; 1706cfe87d4eSJingu Kang } 1707cfe87d4eSJingu Kang 1708cfe87d4eSJingu Kang for (Use &U : Current->uses()) 1709cfe87d4eSJingu Kang AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser())); 1710cfe87d4eSJingu Kang } 1711cfe87d4eSJingu Kang 1712cfe87d4eSJingu Kang // We could also allow loops with known trip counts without mustprogress, 1713cfe87d4eSJingu Kang // but ScalarEvolution may not be available. 17147629b2a0SPhilip Reames Info.PathIsNoop &= isMustProgress(&L); 1715cfe87d4eSJingu Kang 1716cfe87d4eSJingu Kang // If the path is considered a no-op so far, check if it reaches a 1717cfe87d4eSJingu Kang // single exit block without any phis. This ensures no values from the 1718cfe87d4eSJingu Kang // loop are used outside of the loop. 1719cfe87d4eSJingu Kang if (Info.PathIsNoop) { 1720cfe87d4eSJingu Kang for (auto *Exiting : ExitingBlocks) { 1721cfe87d4eSJingu Kang if (!Seen.contains(Exiting)) 1722cfe87d4eSJingu Kang continue; 1723cfe87d4eSJingu Kang for (auto *Succ : successors(Exiting)) { 1724e4abb641SJingu Kang if (L.contains(Succ)) 1725cfe87d4eSJingu Kang continue; 1726cfe87d4eSJingu Kang 1727cfe87d4eSJingu Kang Info.PathIsNoop &= llvm::empty(Succ->phis()) && 1728cfe87d4eSJingu Kang (!Info.ExitForPath || Info.ExitForPath == Succ); 1729cfe87d4eSJingu Kang if (!Info.PathIsNoop) 1730cfe87d4eSJingu Kang break; 1731cfe87d4eSJingu Kang assert((!Info.ExitForPath || Info.ExitForPath == Succ) && 1732cfe87d4eSJingu Kang "cannot have multiple exit blocks"); 1733cfe87d4eSJingu Kang Info.ExitForPath = Succ; 1734cfe87d4eSJingu Kang } 1735cfe87d4eSJingu Kang } 1736cfe87d4eSJingu Kang } 1737cfe87d4eSJingu Kang if (!Info.ExitForPath) 1738cfe87d4eSJingu Kang Info.PathIsNoop = false; 1739cfe87d4eSJingu Kang 1740cfe87d4eSJingu Kang Info.InstToDuplicate = InstToDuplicate; 1741cfe87d4eSJingu Kang return Info; 1742cfe87d4eSJingu Kang }; 1743cfe87d4eSJingu Kang 1744cfe87d4eSJingu Kang // If we branch to the same successor, partial unswitching will not be 1745cfe87d4eSJingu Kang // beneficial. 1746cfe87d4eSJingu Kang if (TI->getSuccessor(0) == TI->getSuccessor(1)) 1747cfe87d4eSJingu Kang return {}; 1748cfe87d4eSJingu Kang 1749e4abb641SJingu Kang if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L.getHeader(), 1750cfe87d4eSJingu Kang AccessesToCheck)) { 1751cfe87d4eSJingu Kang Info->KnownValue = ConstantInt::getTrue(TI->getContext()); 1752cfe87d4eSJingu Kang return Info; 1753cfe87d4eSJingu Kang } 1754e4abb641SJingu Kang if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L.getHeader(), 1755cfe87d4eSJingu Kang AccessesToCheck)) { 1756cfe87d4eSJingu Kang Info->KnownValue = ConstantInt::getFalse(TI->getContext()); 1757cfe87d4eSJingu Kang return Info; 1758cfe87d4eSJingu Kang } 1759cfe87d4eSJingu Kang 1760cfe87d4eSJingu Kang return {}; 1761cfe87d4eSJingu Kang } 1762