176aa662cSKarthik Bhat //===-- LoopUtils.cpp - Loop Utility functions -------------------------===// 276aa662cSKarthik Bhat // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 676aa662cSKarthik Bhat // 776aa662cSKarthik Bhat //===----------------------------------------------------------------------===// 876aa662cSKarthik Bhat // 976aa662cSKarthik Bhat // This file defines common loop utility functions. 1076aa662cSKarthik Bhat // 1176aa662cSKarthik Bhat //===----------------------------------------------------------------------===// 1276aa662cSKarthik Bhat 132f2bd8caSAdam Nemet #include "llvm/Transforms/Utils/LoopUtils.h" 147a87e8f9SFlorian Hahn #include "llvm/ADT/DenseSet.h" 157a87e8f9SFlorian Hahn #include "llvm/ADT/Optional.h" 167a87e8f9SFlorian Hahn #include "llvm/ADT/PriorityWorklist.h" 174a000883SChandler Carruth #include "llvm/ADT/ScopeExit.h" 187a87e8f9SFlorian Hahn #include "llvm/ADT/SetVector.h" 197a87e8f9SFlorian Hahn #include "llvm/ADT/SmallPtrSet.h" 207a87e8f9SFlorian Hahn #include "llvm/ADT/SmallVector.h" 2131088a9dSChandler Carruth #include "llvm/Analysis/AliasAnalysis.h" 2231088a9dSChandler Carruth #include "llvm/Analysis/BasicAliasAnalysis.h" 235f436fc5SRichard Trieu #include "llvm/Analysis/DomTreeUpdater.h" 2431088a9dSChandler Carruth #include "llvm/Analysis/GlobalsModRef.h" 25a21d5f1eSPhilip Reames #include "llvm/Analysis/InstructionSimplify.h" 26616657b3SFlorian Hahn #include "llvm/Analysis/LoopAccessAnalysis.h" 272f2bd8caSAdam Nemet #include "llvm/Analysis/LoopInfo.h" 28c3ccf5d7SIgor Laevsky #include "llvm/Analysis/LoopPass.h" 296da79ce1SAlina Sbirlea #include "llvm/Analysis/MemorySSA.h" 3097468e92SAlina Sbirlea #include "llvm/Analysis/MemorySSAUpdater.h" 3123aed5efSPhilip Reames #include "llvm/Analysis/MustExecute.h" 3245d4cb9aSWeiming Zhao #include "llvm/Analysis/ScalarEvolution.h" 332f2bd8caSAdam Nemet #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 3445d4cb9aSWeiming Zhao #include "llvm/Analysis/ScalarEvolutionExpressions.h" 356bda14b3SChandler Carruth #include "llvm/Analysis/TargetTransformInfo.h" 36a097bc69SChad Rosier #include "llvm/Analysis/ValueTracking.h" 37744c3c32SDavide Italiano #include "llvm/IR/DIBuilder.h" 3831088a9dSChandler Carruth #include "llvm/IR/Dominators.h" 3976aa662cSKarthik Bhat #include "llvm/IR/Instructions.h" 40744c3c32SDavide Italiano #include "llvm/IR/IntrinsicInst.h" 41af7e1588SEvgeniy Brevnov #include "llvm/IR/MDBuilder.h" 4245d4cb9aSWeiming Zhao #include "llvm/IR/Module.h" 437a87e8f9SFlorian Hahn #include "llvm/IR/Operator.h" 4476aa662cSKarthik Bhat #include "llvm/IR/PatternMatch.h" 4576aa662cSKarthik Bhat #include "llvm/IR/ValueHandle.h" 4605da2fe5SReid Kleckner #include "llvm/InitializePasses.h" 4731088a9dSChandler Carruth #include "llvm/Pass.h" 4876aa662cSKarthik Bhat #include "llvm/Support/Debug.h" 49a097bc69SChad Rosier #include "llvm/Support/KnownBits.h" 504a000883SChandler Carruth #include "llvm/Transforms/Utils/BasicBlockUtils.h" 5193175a5cSSjoerd Meijer #include "llvm/Transforms/Utils/Local.h" 52bcbd26bfSFlorian Hahn #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 5376aa662cSKarthik Bhat 5476aa662cSKarthik Bhat using namespace llvm; 5576aa662cSKarthik Bhat using namespace llvm::PatternMatch; 5676aa662cSKarthik Bhat 5776aa662cSKarthik Bhat #define DEBUG_TYPE "loop-utils" 5876aa662cSKarthik Bhat 5972448525SMichael Kruse static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced"; 604f64f1baSTim Corringham static const char *LLVMLoopDisableLICM = "llvm.licm.disable"; 61f88a7975SAtmn Patel static const char *LLVMLoopMustProgress = "llvm.loop.mustprogress"; 6272448525SMichael Kruse 634a000883SChandler Carruth bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, 6497468e92SAlina Sbirlea MemorySSAUpdater *MSSAU, 654a000883SChandler Carruth bool PreserveLCSSA) { 664a000883SChandler Carruth bool Changed = false; 674a000883SChandler Carruth 684a000883SChandler Carruth // We re-use a vector for the in-loop predecesosrs. 694a000883SChandler Carruth SmallVector<BasicBlock *, 4> InLoopPredecessors; 704a000883SChandler Carruth 714a000883SChandler Carruth auto RewriteExit = [&](BasicBlock *BB) { 724a000883SChandler Carruth assert(InLoopPredecessors.empty() && 734a000883SChandler Carruth "Must start with an empty predecessors list!"); 744a000883SChandler Carruth auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); }); 754a000883SChandler Carruth 764a000883SChandler Carruth // See if there are any non-loop predecessors of this exit block and 774a000883SChandler Carruth // keep track of the in-loop predecessors. 784a000883SChandler Carruth bool IsDedicatedExit = true; 794a000883SChandler Carruth for (auto *PredBB : predecessors(BB)) 804a000883SChandler Carruth if (L->contains(PredBB)) { 814a000883SChandler Carruth if (isa<IndirectBrInst>(PredBB->getTerminator())) 824a000883SChandler Carruth // We cannot rewrite exiting edges from an indirectbr. 834a000883SChandler Carruth return false; 84784929d0SCraig Topper if (isa<CallBrInst>(PredBB->getTerminator())) 85784929d0SCraig Topper // We cannot rewrite exiting edges from a callbr. 86784929d0SCraig Topper return false; 874a000883SChandler Carruth 884a000883SChandler Carruth InLoopPredecessors.push_back(PredBB); 894a000883SChandler Carruth } else { 904a000883SChandler Carruth IsDedicatedExit = false; 914a000883SChandler Carruth } 924a000883SChandler Carruth 934a000883SChandler Carruth assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!"); 944a000883SChandler Carruth 954a000883SChandler Carruth // Nothing to do if this is already a dedicated exit. 964a000883SChandler Carruth if (IsDedicatedExit) 974a000883SChandler Carruth return false; 984a000883SChandler Carruth 994a000883SChandler Carruth auto *NewExitBB = SplitBlockPredecessors( 10097468e92SAlina Sbirlea BB, InLoopPredecessors, ".loopexit", DT, LI, MSSAU, PreserveLCSSA); 1014a000883SChandler Carruth 1024a000883SChandler Carruth if (!NewExitBB) 103d34e60caSNicola Zaghen LLVM_DEBUG( 104d34e60caSNicola Zaghen dbgs() << "WARNING: Can't create a dedicated exit block for loop: " 1054a000883SChandler Carruth << *L << "\n"); 1064a000883SChandler Carruth else 107d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " 1084a000883SChandler Carruth << NewExitBB->getName() << "\n"); 1094a000883SChandler Carruth return true; 1104a000883SChandler Carruth }; 1114a000883SChandler Carruth 1124a000883SChandler Carruth // Walk the exit blocks directly rather than building up a data structure for 1134a000883SChandler Carruth // them, but only visit each one once. 1144a000883SChandler Carruth SmallPtrSet<BasicBlock *, 4> Visited; 1154a000883SChandler Carruth for (auto *BB : L->blocks()) 1164a000883SChandler Carruth for (auto *SuccBB : successors(BB)) { 1174a000883SChandler Carruth // We're looking for exit blocks so skip in-loop successors. 1184a000883SChandler Carruth if (L->contains(SuccBB)) 1194a000883SChandler Carruth continue; 1204a000883SChandler Carruth 1214a000883SChandler Carruth // Visit each exit block exactly once. 1224a000883SChandler Carruth if (!Visited.insert(SuccBB).second) 1234a000883SChandler Carruth continue; 1244a000883SChandler Carruth 1254a000883SChandler Carruth Changed |= RewriteExit(SuccBB); 1264a000883SChandler Carruth } 1274a000883SChandler Carruth 1284a000883SChandler Carruth return Changed; 1294a000883SChandler Carruth } 1304a000883SChandler Carruth 1315f8f34e4SAdrian Prantl /// Returns the instructions that use values defined in the loop. 132c5b7b555SAshutosh Nema SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) { 133c5b7b555SAshutosh Nema SmallVector<Instruction *, 8> UsedOutside; 134c5b7b555SAshutosh Nema 135c5b7b555SAshutosh Nema for (auto *Block : L->getBlocks()) 136c5b7b555SAshutosh Nema // FIXME: I believe that this could use copy_if if the Inst reference could 137c5b7b555SAshutosh Nema // be adapted into a pointer. 138c5b7b555SAshutosh Nema for (auto &Inst : *Block) { 139c5b7b555SAshutosh Nema auto Users = Inst.users(); 1400a16c228SDavid Majnemer if (any_of(Users, [&](User *U) { 141c5b7b555SAshutosh Nema auto *Use = cast<Instruction>(U); 142c5b7b555SAshutosh Nema return !L->contains(Use->getParent()); 143c5b7b555SAshutosh Nema })) 144c5b7b555SAshutosh Nema UsedOutside.push_back(&Inst); 145c5b7b555SAshutosh Nema } 146c5b7b555SAshutosh Nema 147c5b7b555SAshutosh Nema return UsedOutside; 148c5b7b555SAshutosh Nema } 14931088a9dSChandler Carruth 15031088a9dSChandler Carruth void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) { 15131088a9dSChandler Carruth // By definition, all loop passes need the LoopInfo analysis and the 15231088a9dSChandler Carruth // Dominator tree it depends on. Because they all participate in the loop 15331088a9dSChandler Carruth // pass manager, they must also preserve these. 15431088a9dSChandler Carruth AU.addRequired<DominatorTreeWrapperPass>(); 15531088a9dSChandler Carruth AU.addPreserved<DominatorTreeWrapperPass>(); 15631088a9dSChandler Carruth AU.addRequired<LoopInfoWrapperPass>(); 15731088a9dSChandler Carruth AU.addPreserved<LoopInfoWrapperPass>(); 15831088a9dSChandler Carruth 15931088a9dSChandler Carruth // We must also preserve LoopSimplify and LCSSA. We locally access their IDs 16031088a9dSChandler Carruth // here because users shouldn't directly get them from this header. 16131088a9dSChandler Carruth extern char &LoopSimplifyID; 16231088a9dSChandler Carruth extern char &LCSSAID; 16331088a9dSChandler Carruth AU.addRequiredID(LoopSimplifyID); 16431088a9dSChandler Carruth AU.addPreservedID(LoopSimplifyID); 16531088a9dSChandler Carruth AU.addRequiredID(LCSSAID); 16631088a9dSChandler Carruth AU.addPreservedID(LCSSAID); 167c3ccf5d7SIgor Laevsky // This is used in the LPPassManager to perform LCSSA verification on passes 168c3ccf5d7SIgor Laevsky // which preserve lcssa form 169c3ccf5d7SIgor Laevsky AU.addRequired<LCSSAVerificationPass>(); 170c3ccf5d7SIgor Laevsky AU.addPreserved<LCSSAVerificationPass>(); 17131088a9dSChandler Carruth 17231088a9dSChandler Carruth // Loop passes are designed to run inside of a loop pass manager which means 17331088a9dSChandler Carruth // that any function analyses they require must be required by the first loop 17431088a9dSChandler Carruth // pass in the manager (so that it is computed before the loop pass manager 17531088a9dSChandler Carruth // runs) and preserved by all loop pasess in the manager. To make this 17631088a9dSChandler Carruth // reasonably robust, the set needed for most loop passes is maintained here. 17731088a9dSChandler Carruth // If your loop pass requires an analysis not listed here, you will need to 17831088a9dSChandler Carruth // carefully audit the loop pass manager nesting structure that results. 17931088a9dSChandler Carruth AU.addRequired<AAResultsWrapperPass>(); 18031088a9dSChandler Carruth AU.addPreserved<AAResultsWrapperPass>(); 18131088a9dSChandler Carruth AU.addPreserved<BasicAAWrapperPass>(); 18231088a9dSChandler Carruth AU.addPreserved<GlobalsAAWrapperPass>(); 18331088a9dSChandler Carruth AU.addPreserved<SCEVAAWrapperPass>(); 18431088a9dSChandler Carruth AU.addRequired<ScalarEvolutionWrapperPass>(); 18531088a9dSChandler Carruth AU.addPreserved<ScalarEvolutionWrapperPass>(); 1866da79ce1SAlina Sbirlea // FIXME: When all loop passes preserve MemorySSA, it can be required and 1876da79ce1SAlina Sbirlea // preserved here instead of the individual handling in each pass. 18831088a9dSChandler Carruth } 18931088a9dSChandler Carruth 19031088a9dSChandler Carruth /// Manually defined generic "LoopPass" dependency initialization. This is used 19131088a9dSChandler Carruth /// to initialize the exact set of passes from above in \c 19231088a9dSChandler Carruth /// getLoopAnalysisUsage. It can be used within a loop pass's initialization 19331088a9dSChandler Carruth /// with: 19431088a9dSChandler Carruth /// 19531088a9dSChandler Carruth /// INITIALIZE_PASS_DEPENDENCY(LoopPass) 19631088a9dSChandler Carruth /// 19731088a9dSChandler Carruth /// As-if "LoopPass" were a pass. 19831088a9dSChandler Carruth void llvm::initializeLoopPassPass(PassRegistry &Registry) { 19931088a9dSChandler Carruth INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 20031088a9dSChandler Carruth INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 20131088a9dSChandler Carruth INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 202e12c487bSEaswaran Raman INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) 20331088a9dSChandler Carruth INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 20431088a9dSChandler Carruth INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) 20531088a9dSChandler Carruth INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 20631088a9dSChandler Carruth INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) 20731088a9dSChandler Carruth INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 2086da79ce1SAlina Sbirlea INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 20931088a9dSChandler Carruth } 210963341c8SAdam Nemet 2113c3a7652SSerguei Katkov /// Create MDNode for input string. 2123c3a7652SSerguei Katkov static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) { 2133c3a7652SSerguei Katkov LLVMContext &Context = TheLoop->getHeader()->getContext(); 2143c3a7652SSerguei Katkov Metadata *MDs[] = { 2153c3a7652SSerguei Katkov MDString::get(Context, Name), 2163c3a7652SSerguei Katkov ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))}; 2173c3a7652SSerguei Katkov return MDNode::get(Context, MDs); 2183c3a7652SSerguei Katkov } 2193c3a7652SSerguei Katkov 2203c3a7652SSerguei Katkov /// Set input string into loop metadata by keeping other values intact. 2217f8c8095SSerguei Katkov /// If the string is already in loop metadata update value if it is 2227f8c8095SSerguei Katkov /// different. 2237f8c8095SSerguei Katkov void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD, 2243c3a7652SSerguei Katkov unsigned V) { 2253c3a7652SSerguei Katkov SmallVector<Metadata *, 4> MDs(1); 2263c3a7652SSerguei Katkov // If the loop already has metadata, retain it. 2273c3a7652SSerguei Katkov MDNode *LoopID = TheLoop->getLoopID(); 2283c3a7652SSerguei Katkov if (LoopID) { 2293c3a7652SSerguei Katkov for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { 2303c3a7652SSerguei Katkov MDNode *Node = cast<MDNode>(LoopID->getOperand(i)); 2317f8c8095SSerguei Katkov // If it is of form key = value, try to parse it. 2327f8c8095SSerguei Katkov if (Node->getNumOperands() == 2) { 2337f8c8095SSerguei Katkov MDString *S = dyn_cast<MDString>(Node->getOperand(0)); 2347f8c8095SSerguei Katkov if (S && S->getString().equals(StringMD)) { 2357f8c8095SSerguei Katkov ConstantInt *IntMD = 2367f8c8095SSerguei Katkov mdconst::extract_or_null<ConstantInt>(Node->getOperand(1)); 2377f8c8095SSerguei Katkov if (IntMD && IntMD->getSExtValue() == V) 2387f8c8095SSerguei Katkov // It is already in place. Do nothing. 2397f8c8095SSerguei Katkov return; 2407f8c8095SSerguei Katkov // We need to update the value, so just skip it here and it will 2417f8c8095SSerguei Katkov // be added after copying other existed nodes. 2427f8c8095SSerguei Katkov continue; 2437f8c8095SSerguei Katkov } 2447f8c8095SSerguei Katkov } 2453c3a7652SSerguei Katkov MDs.push_back(Node); 2463c3a7652SSerguei Katkov } 2473c3a7652SSerguei Katkov } 2483c3a7652SSerguei Katkov // Add new metadata. 2497f8c8095SSerguei Katkov MDs.push_back(createStringMetadata(TheLoop, StringMD, V)); 2503c3a7652SSerguei Katkov // Replace current metadata node with new one. 2513c3a7652SSerguei Katkov LLVMContext &Context = TheLoop->getHeader()->getContext(); 2523c3a7652SSerguei Katkov MDNode *NewLoopID = MDNode::get(Context, MDs); 2533c3a7652SSerguei Katkov // Set operand 0 to refer to the loop id itself. 2543c3a7652SSerguei Katkov NewLoopID->replaceOperandWith(0, NewLoopID); 2553c3a7652SSerguei Katkov TheLoop->setLoopID(NewLoopID); 2563c3a7652SSerguei Katkov } 2573c3a7652SSerguei Katkov 25872448525SMichael Kruse /// Find string metadata for loop 25972448525SMichael Kruse /// 26072448525SMichael Kruse /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an 26172448525SMichael Kruse /// operand or null otherwise. If the string metadata is not found return 26272448525SMichael Kruse /// Optional's not-a-value. 263978ba615SMichael Kruse Optional<const MDOperand *> llvm::findStringMetadataForLoop(const Loop *TheLoop, 26472448525SMichael Kruse StringRef Name) { 265978ba615SMichael Kruse MDNode *MD = findOptionMDForLoop(TheLoop, Name); 26672448525SMichael Kruse if (!MD) 26772448525SMichael Kruse return None; 268fe3def7cSAdam Nemet switch (MD->getNumOperands()) { 269fe3def7cSAdam Nemet case 1: 270fe3def7cSAdam Nemet return nullptr; 271fe3def7cSAdam Nemet case 2: 272fe3def7cSAdam Nemet return &MD->getOperand(1); 273fe3def7cSAdam Nemet default: 274fe3def7cSAdam Nemet llvm_unreachable("loop metadata has 0 or 1 operand"); 275963341c8SAdam Nemet } 276fe3def7cSAdam Nemet } 27772448525SMichael Kruse 27872448525SMichael Kruse static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop, 27972448525SMichael Kruse StringRef Name) { 280978ba615SMichael Kruse MDNode *MD = findOptionMDForLoop(TheLoop, Name); 281978ba615SMichael Kruse if (!MD) 282fe3def7cSAdam Nemet return None; 283978ba615SMichael Kruse switch (MD->getNumOperands()) { 28472448525SMichael Kruse case 1: 28572448525SMichael Kruse // When the value is absent it is interpreted as 'attribute set'. 28672448525SMichael Kruse return true; 28772448525SMichael Kruse case 2: 288f9027e55SAlina Sbirlea if (ConstantInt *IntMD = 289f9027e55SAlina Sbirlea mdconst::extract_or_null<ConstantInt>(MD->getOperand(1).get())) 290f9027e55SAlina Sbirlea return IntMD->getZExtValue(); 291f9027e55SAlina Sbirlea return true; 29272448525SMichael Kruse } 29372448525SMichael Kruse llvm_unreachable("unexpected number of options"); 29472448525SMichael Kruse } 29572448525SMichael Kruse 296c7e27538SDavid Green bool llvm::getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) { 29772448525SMichael Kruse return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false); 29872448525SMichael Kruse } 29972448525SMichael Kruse 30071bd59f0SDavid Sherwood Optional<ElementCount> 30171bd59f0SDavid Sherwood llvm::getOptionalElementCountLoopAttribute(Loop *TheLoop) { 30271bd59f0SDavid Sherwood Optional<int> Width = 30371bd59f0SDavid Sherwood getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width"); 30471bd59f0SDavid Sherwood 30571bd59f0SDavid Sherwood if (Width.hasValue()) { 30671bd59f0SDavid Sherwood Optional<int> IsScalable = getOptionalIntLoopAttribute( 30771bd59f0SDavid Sherwood TheLoop, "llvm.loop.vectorize.scalable.enable"); 3088a20e2b3SKazu Hirata return ElementCount::get(*Width, IsScalable.getValueOr(false)); 30971bd59f0SDavid Sherwood } 31071bd59f0SDavid Sherwood 31171bd59f0SDavid Sherwood return None; 31271bd59f0SDavid Sherwood } 31371bd59f0SDavid Sherwood 31472448525SMichael Kruse llvm::Optional<int> llvm::getOptionalIntLoopAttribute(Loop *TheLoop, 31572448525SMichael Kruse StringRef Name) { 31672448525SMichael Kruse const MDOperand *AttrMD = 31772448525SMichael Kruse findStringMetadataForLoop(TheLoop, Name).getValueOr(nullptr); 31872448525SMichael Kruse if (!AttrMD) 31972448525SMichael Kruse return None; 32072448525SMichael Kruse 32172448525SMichael Kruse ConstantInt *IntMD = mdconst::extract_or_null<ConstantInt>(AttrMD->get()); 32272448525SMichael Kruse if (!IntMD) 32372448525SMichael Kruse return None; 32472448525SMichael Kruse 32572448525SMichael Kruse return IntMD->getSExtValue(); 32672448525SMichael Kruse } 32772448525SMichael Kruse 32872448525SMichael Kruse Optional<MDNode *> llvm::makeFollowupLoopID( 32972448525SMichael Kruse MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions, 33072448525SMichael Kruse const char *InheritOptionsExceptPrefix, bool AlwaysNew) { 33172448525SMichael Kruse if (!OrigLoopID) { 33272448525SMichael Kruse if (AlwaysNew) 33372448525SMichael Kruse return nullptr; 33472448525SMichael Kruse return None; 33572448525SMichael Kruse } 33672448525SMichael Kruse 33772448525SMichael Kruse assert(OrigLoopID->getOperand(0) == OrigLoopID); 33872448525SMichael Kruse 33972448525SMichael Kruse bool InheritAllAttrs = !InheritOptionsExceptPrefix; 34072448525SMichael Kruse bool InheritSomeAttrs = 34172448525SMichael Kruse InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[0] != '\0'; 34272448525SMichael Kruse SmallVector<Metadata *, 8> MDs; 34372448525SMichael Kruse MDs.push_back(nullptr); 34472448525SMichael Kruse 34572448525SMichael Kruse bool Changed = false; 34672448525SMichael Kruse if (InheritAllAttrs || InheritSomeAttrs) { 347dc300bebSKazu Hirata for (const MDOperand &Existing : drop_begin(OrigLoopID->operands())) { 34872448525SMichael Kruse MDNode *Op = cast<MDNode>(Existing.get()); 34972448525SMichael Kruse 35072448525SMichael Kruse auto InheritThisAttribute = [InheritSomeAttrs, 35172448525SMichael Kruse InheritOptionsExceptPrefix](MDNode *Op) { 35272448525SMichael Kruse if (!InheritSomeAttrs) 35372448525SMichael Kruse return false; 35472448525SMichael Kruse 35572448525SMichael Kruse // Skip malformatted attribute metadata nodes. 35672448525SMichael Kruse if (Op->getNumOperands() == 0) 35772448525SMichael Kruse return true; 35872448525SMichael Kruse Metadata *NameMD = Op->getOperand(0).get(); 35972448525SMichael Kruse if (!isa<MDString>(NameMD)) 36072448525SMichael Kruse return true; 36172448525SMichael Kruse StringRef AttrName = cast<MDString>(NameMD)->getString(); 36272448525SMichael Kruse 36372448525SMichael Kruse // Do not inherit excluded attributes. 36472448525SMichael Kruse return !AttrName.startswith(InheritOptionsExceptPrefix); 36572448525SMichael Kruse }; 36672448525SMichael Kruse 36772448525SMichael Kruse if (InheritThisAttribute(Op)) 36872448525SMichael Kruse MDs.push_back(Op); 36972448525SMichael Kruse else 37072448525SMichael Kruse Changed = true; 37172448525SMichael Kruse } 37272448525SMichael Kruse } else { 37372448525SMichael Kruse // Modified if we dropped at least one attribute. 37472448525SMichael Kruse Changed = OrigLoopID->getNumOperands() > 1; 37572448525SMichael Kruse } 37672448525SMichael Kruse 37772448525SMichael Kruse bool HasAnyFollowup = false; 37872448525SMichael Kruse for (StringRef OptionName : FollowupOptions) { 379978ba615SMichael Kruse MDNode *FollowupNode = findOptionMDForLoopID(OrigLoopID, OptionName); 38072448525SMichael Kruse if (!FollowupNode) 38172448525SMichael Kruse continue; 38272448525SMichael Kruse 38372448525SMichael Kruse HasAnyFollowup = true; 384dc300bebSKazu Hirata for (const MDOperand &Option : drop_begin(FollowupNode->operands())) { 38572448525SMichael Kruse MDs.push_back(Option.get()); 38672448525SMichael Kruse Changed = true; 38772448525SMichael Kruse } 38872448525SMichael Kruse } 38972448525SMichael Kruse 39072448525SMichael Kruse // Attributes of the followup loop not specified explicity, so signal to the 39172448525SMichael Kruse // transformation pass to add suitable attributes. 39272448525SMichael Kruse if (!AlwaysNew && !HasAnyFollowup) 39372448525SMichael Kruse return None; 39472448525SMichael Kruse 39572448525SMichael Kruse // If no attributes were added or remove, the previous loop Id can be reused. 39672448525SMichael Kruse if (!AlwaysNew && !Changed) 39772448525SMichael Kruse return OrigLoopID; 39872448525SMichael Kruse 39972448525SMichael Kruse // No attributes is equivalent to having no !llvm.loop metadata at all. 40072448525SMichael Kruse if (MDs.size() == 1) 40172448525SMichael Kruse return nullptr; 40272448525SMichael Kruse 40372448525SMichael Kruse // Build the new loop ID. 40472448525SMichael Kruse MDTuple *FollowupLoopID = MDNode::get(OrigLoopID->getContext(), MDs); 40572448525SMichael Kruse FollowupLoopID->replaceOperandWith(0, FollowupLoopID); 40672448525SMichael Kruse return FollowupLoopID; 40772448525SMichael Kruse } 40872448525SMichael Kruse 40972448525SMichael Kruse bool llvm::hasDisableAllTransformsHint(const Loop *L) { 41072448525SMichael Kruse return getBooleanLoopAttribute(L, LLVMLoopDisableNonforced); 41172448525SMichael Kruse } 41272448525SMichael Kruse 4134f64f1baSTim Corringham bool llvm::hasDisableLICMTransformsHint(const Loop *L) { 4144f64f1baSTim Corringham return getBooleanLoopAttribute(L, LLVMLoopDisableLICM); 4154f64f1baSTim Corringham } 4164f64f1baSTim Corringham 417f88a7975SAtmn Patel bool llvm::hasMustProgress(const Loop *L) { 418f88a7975SAtmn Patel return getBooleanLoopAttribute(L, LLVMLoopMustProgress); 419f88a7975SAtmn Patel } 420f88a7975SAtmn Patel 42172448525SMichael Kruse TransformationMode llvm::hasUnrollTransformation(Loop *L) { 42272448525SMichael Kruse if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable")) 42372448525SMichael Kruse return TM_SuppressedByUser; 42472448525SMichael Kruse 42572448525SMichael Kruse Optional<int> Count = 42672448525SMichael Kruse getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count"); 42772448525SMichael Kruse if (Count.hasValue()) 42872448525SMichael Kruse return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser; 42972448525SMichael Kruse 43072448525SMichael Kruse if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable")) 43172448525SMichael Kruse return TM_ForcedByUser; 43272448525SMichael Kruse 43372448525SMichael Kruse if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full")) 43472448525SMichael Kruse return TM_ForcedByUser; 43572448525SMichael Kruse 43672448525SMichael Kruse if (hasDisableAllTransformsHint(L)) 43772448525SMichael Kruse return TM_Disable; 43872448525SMichael Kruse 43972448525SMichael Kruse return TM_Unspecified; 44072448525SMichael Kruse } 44172448525SMichael Kruse 44272448525SMichael Kruse TransformationMode llvm::hasUnrollAndJamTransformation(Loop *L) { 44372448525SMichael Kruse if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable")) 44472448525SMichael Kruse return TM_SuppressedByUser; 44572448525SMichael Kruse 44672448525SMichael Kruse Optional<int> Count = 44772448525SMichael Kruse getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count"); 44872448525SMichael Kruse if (Count.hasValue()) 44972448525SMichael Kruse return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser; 45072448525SMichael Kruse 45172448525SMichael Kruse if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable")) 45272448525SMichael Kruse return TM_ForcedByUser; 45372448525SMichael Kruse 45472448525SMichael Kruse if (hasDisableAllTransformsHint(L)) 45572448525SMichael Kruse return TM_Disable; 45672448525SMichael Kruse 45772448525SMichael Kruse return TM_Unspecified; 45872448525SMichael Kruse } 45972448525SMichael Kruse 46072448525SMichael Kruse TransformationMode llvm::hasVectorizeTransformation(Loop *L) { 46172448525SMichael Kruse Optional<bool> Enable = 46272448525SMichael Kruse getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable"); 46372448525SMichael Kruse 46472448525SMichael Kruse if (Enable == false) 46572448525SMichael Kruse return TM_SuppressedByUser; 46672448525SMichael Kruse 46771bd59f0SDavid Sherwood Optional<ElementCount> VectorizeWidth = 46871bd59f0SDavid Sherwood getOptionalElementCountLoopAttribute(L); 46972448525SMichael Kruse Optional<int> InterleaveCount = 47072448525SMichael Kruse getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count"); 47172448525SMichael Kruse 47272448525SMichael Kruse // 'Forcing' vector width and interleave count to one effectively disables 47372448525SMichael Kruse // this tranformation. 47471bd59f0SDavid Sherwood if (Enable == true && VectorizeWidth && VectorizeWidth->isScalar() && 47571bd59f0SDavid Sherwood InterleaveCount == 1) 47672448525SMichael Kruse return TM_SuppressedByUser; 47772448525SMichael Kruse 47872448525SMichael Kruse if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized")) 47972448525SMichael Kruse return TM_Disable; 48072448525SMichael Kruse 48170560a0aSMichael Kruse if (Enable == true) 48270560a0aSMichael Kruse return TM_ForcedByUser; 48370560a0aSMichael Kruse 48471bd59f0SDavid Sherwood if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1) 48572448525SMichael Kruse return TM_Disable; 48672448525SMichael Kruse 48771bd59f0SDavid Sherwood if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1) 48872448525SMichael Kruse return TM_Enable; 48972448525SMichael Kruse 49072448525SMichael Kruse if (hasDisableAllTransformsHint(L)) 49172448525SMichael Kruse return TM_Disable; 49272448525SMichael Kruse 49372448525SMichael Kruse return TM_Unspecified; 49472448525SMichael Kruse } 49572448525SMichael Kruse 49672448525SMichael Kruse TransformationMode llvm::hasDistributeTransformation(Loop *L) { 49772448525SMichael Kruse if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable")) 49872448525SMichael Kruse return TM_ForcedByUser; 49972448525SMichael Kruse 50072448525SMichael Kruse if (hasDisableAllTransformsHint(L)) 50172448525SMichael Kruse return TM_Disable; 50272448525SMichael Kruse 50372448525SMichael Kruse return TM_Unspecified; 50472448525SMichael Kruse } 50572448525SMichael Kruse 50672448525SMichael Kruse TransformationMode llvm::hasLICMVersioningTransformation(Loop *L) { 50772448525SMichael Kruse if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable")) 50872448525SMichael Kruse return TM_SuppressedByUser; 50972448525SMichael Kruse 51072448525SMichael Kruse if (hasDisableAllTransformsHint(L)) 51172448525SMichael Kruse return TM_Disable; 51272448525SMichael Kruse 51372448525SMichael Kruse return TM_Unspecified; 514963341c8SAdam Nemet } 515122f984aSEvgeniy Stepanov 5167ed5856aSAlina Sbirlea /// Does a BFS from a given node to all of its children inside a given loop. 5177ed5856aSAlina Sbirlea /// The returned vector of nodes includes the starting point. 5187ed5856aSAlina Sbirlea SmallVector<DomTreeNode *, 16> 5197ed5856aSAlina Sbirlea llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) { 5207ed5856aSAlina Sbirlea SmallVector<DomTreeNode *, 16> Worklist; 5217ed5856aSAlina Sbirlea auto AddRegionToWorklist = [&](DomTreeNode *DTN) { 5227ed5856aSAlina Sbirlea // Only include subregions in the top level loop. 5237ed5856aSAlina Sbirlea BasicBlock *BB = DTN->getBlock(); 5247ed5856aSAlina Sbirlea if (CurLoop->contains(BB)) 5257ed5856aSAlina Sbirlea Worklist.push_back(DTN); 5267ed5856aSAlina Sbirlea }; 5277ed5856aSAlina Sbirlea 5287ed5856aSAlina Sbirlea AddRegionToWorklist(N); 5297ed5856aSAlina Sbirlea 53076c5cb05SNicolai Hähnle for (size_t I = 0; I < Worklist.size(); I++) { 53176c5cb05SNicolai Hähnle for (DomTreeNode *Child : Worklist[I]->children()) 5327ed5856aSAlina Sbirlea AddRegionToWorklist(Child); 53376c5cb05SNicolai Hähnle } 5347ed5856aSAlina Sbirlea 5357ed5856aSAlina Sbirlea return Worklist; 5367ed5856aSAlina Sbirlea } 5377ed5856aSAlina Sbirlea 538efb130fcSAlina Sbirlea void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, 539efb130fcSAlina Sbirlea LoopInfo *LI, MemorySSA *MSSA) { 540899809d5SHans Wennborg assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!"); 541df3e71e0SMarcello Maggioni auto *Preheader = L->getLoopPreheader(); 542df3e71e0SMarcello Maggioni assert(Preheader && "Preheader should exist!"); 543df3e71e0SMarcello Maggioni 544efb130fcSAlina Sbirlea std::unique_ptr<MemorySSAUpdater> MSSAU; 545efb130fcSAlina Sbirlea if (MSSA) 546efb130fcSAlina Sbirlea MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 547efb130fcSAlina Sbirlea 548df3e71e0SMarcello Maggioni // Now that we know the removal is safe, remove the loop by changing the 549df3e71e0SMarcello Maggioni // branch from the preheader to go to the single exit block. 550df3e71e0SMarcello Maggioni // 551df3e71e0SMarcello Maggioni // Because we're deleting a large chunk of code at once, the sequence in which 552df3e71e0SMarcello Maggioni // we remove things is very important to avoid invalidation issues. 553df3e71e0SMarcello Maggioni 554df3e71e0SMarcello Maggioni // Tell ScalarEvolution that the loop is deleted. Do this before 555df3e71e0SMarcello Maggioni // deleting the loop so that ScalarEvolution can look at the loop 556df3e71e0SMarcello Maggioni // to determine what it needs to clean up. 557df3e71e0SMarcello Maggioni if (SE) 558df3e71e0SMarcello Maggioni SE->forgetLoop(L); 559df3e71e0SMarcello Maggioni 560df3e71e0SMarcello Maggioni auto *OldBr = dyn_cast<BranchInst>(Preheader->getTerminator()); 561df3e71e0SMarcello Maggioni assert(OldBr && "Preheader must end with a branch"); 562df3e71e0SMarcello Maggioni assert(OldBr->isUnconditional() && "Preheader must have a single successor"); 563df3e71e0SMarcello Maggioni // Connect the preheader to the exit block. Keep the old edge to the header 564df3e71e0SMarcello Maggioni // around to perform the dominator tree update in two separate steps 565df3e71e0SMarcello Maggioni // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge 566df3e71e0SMarcello Maggioni // preheader -> header. 567df3e71e0SMarcello Maggioni // 568df3e71e0SMarcello Maggioni // 569df3e71e0SMarcello Maggioni // 0. Preheader 1. Preheader 2. Preheader 570df3e71e0SMarcello Maggioni // | | | | 571df3e71e0SMarcello Maggioni // V | V | 572df3e71e0SMarcello Maggioni // Header <--\ | Header <--\ | Header <--\ 573df3e71e0SMarcello Maggioni // | | | | | | | | | | | 574df3e71e0SMarcello Maggioni // | V | | | V | | | V | 575df3e71e0SMarcello Maggioni // | Body --/ | | Body --/ | | Body --/ 576df3e71e0SMarcello Maggioni // V V V V V 577df3e71e0SMarcello Maggioni // Exit Exit Exit 578df3e71e0SMarcello Maggioni // 579df3e71e0SMarcello Maggioni // By doing this is two separate steps we can perform the dominator tree 580df3e71e0SMarcello Maggioni // update without using the batch update API. 581df3e71e0SMarcello Maggioni // 582df3e71e0SMarcello Maggioni // Even when the loop is never executed, we cannot remove the edge from the 583df3e71e0SMarcello Maggioni // source block to the exit block. Consider the case where the unexecuted loop 584df3e71e0SMarcello Maggioni // branches back to an outer loop. If we deleted the loop and removed the edge 585df3e71e0SMarcello Maggioni // coming to this inner loop, this will break the outer loop structure (by 586df3e71e0SMarcello Maggioni // deleting the backedge of the outer loop). If the outer loop is indeed a 587df3e71e0SMarcello Maggioni // non-loop, it will be deleted in a future iteration of loop deletion pass. 588df3e71e0SMarcello Maggioni IRBuilder<> Builder(OldBr); 589babc224cSAtmn Patel 590babc224cSAtmn Patel auto *ExitBlock = L->getUniqueExitBlock(); 591f88a7975SAtmn Patel DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); 592babc224cSAtmn Patel if (ExitBlock) { 593babc224cSAtmn Patel assert(ExitBlock && "Should have a unique exit block!"); 594babc224cSAtmn Patel assert(L->hasDedicatedExits() && "Loop should have dedicated exits!"); 595babc224cSAtmn Patel 596df3e71e0SMarcello Maggioni Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock); 597df3e71e0SMarcello Maggioni // Remove the old branch. The conditional branch becomes a new terminator. 598df3e71e0SMarcello Maggioni OldBr->eraseFromParent(); 599df3e71e0SMarcello Maggioni 600df3e71e0SMarcello Maggioni // Rewrite phis in the exit block to get their inputs from the Preheader 601df3e71e0SMarcello Maggioni // instead of the exiting block. 602c7fc81e6SBenjamin Kramer for (PHINode &P : ExitBlock->phis()) { 603df3e71e0SMarcello Maggioni // Set the zero'th element of Phi to be from the preheader and remove all 604df3e71e0SMarcello Maggioni // other incoming values. Given the loop has dedicated exits, all other 605df3e71e0SMarcello Maggioni // incoming values must be from the exiting blocks. 606df3e71e0SMarcello Maggioni int PredIndex = 0; 607c7fc81e6SBenjamin Kramer P.setIncomingBlock(PredIndex, Preheader); 608df3e71e0SMarcello Maggioni // Removes all incoming values from all other exiting blocks (including 609df3e71e0SMarcello Maggioni // duplicate values from an exiting block). 610df3e71e0SMarcello Maggioni // Nuke all entries except the zero'th entry which is the preheader entry. 611df3e71e0SMarcello Maggioni // NOTE! We need to remove Incoming Values in the reverse order as done 612df3e71e0SMarcello Maggioni // below, to keep the indices valid for deletion (removeIncomingValues 613babc224cSAtmn Patel // updates getNumIncomingValues and shifts all values down into the 614babc224cSAtmn Patel // operand being deleted). 615c7fc81e6SBenjamin Kramer for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i) 616c7fc81e6SBenjamin Kramer P.removeIncomingValue(e - i, false); 617df3e71e0SMarcello Maggioni 618c7fc81e6SBenjamin Kramer assert((P.getNumIncomingValues() == 1 && 619c7fc81e6SBenjamin Kramer P.getIncomingBlock(PredIndex) == Preheader) && 620df3e71e0SMarcello Maggioni "Should have exactly one value and that's from the preheader!"); 621df3e71e0SMarcello Maggioni } 622df3e71e0SMarcello Maggioni 623efb130fcSAlina Sbirlea if (DT) { 624efb130fcSAlina Sbirlea DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}}); 625efb130fcSAlina Sbirlea if (MSSA) { 626babc224cSAtmn Patel MSSAU->applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}}, 627babc224cSAtmn Patel *DT); 628efb130fcSAlina Sbirlea if (VerifyMemorySSA) 629efb130fcSAlina Sbirlea MSSA->verifyMemorySSA(); 630efb130fcSAlina Sbirlea } 631efb130fcSAlina Sbirlea } 632efb130fcSAlina Sbirlea 633df3e71e0SMarcello Maggioni // Disconnect the loop body by branching directly to its exit. 634df3e71e0SMarcello Maggioni Builder.SetInsertPoint(Preheader->getTerminator()); 635df3e71e0SMarcello Maggioni Builder.CreateBr(ExitBlock); 636df3e71e0SMarcello Maggioni // Remove the old branch. 637df3e71e0SMarcello Maggioni Preheader->getTerminator()->eraseFromParent(); 638f88a7975SAtmn Patel } else { 639f88a7975SAtmn Patel assert(L->hasNoExitBlocks() && 640f88a7975SAtmn Patel "Loop should have either zero or one exit blocks."); 641f88a7975SAtmn Patel 642f88a7975SAtmn Patel Builder.SetInsertPoint(OldBr); 643f88a7975SAtmn Patel Builder.CreateUnreachable(); 644f88a7975SAtmn Patel Preheader->getTerminator()->eraseFromParent(); 645f88a7975SAtmn Patel } 646df3e71e0SMarcello Maggioni 647df3e71e0SMarcello Maggioni if (DT) { 648efb130fcSAlina Sbirlea DTU.applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}}); 649efb130fcSAlina Sbirlea if (MSSA) { 650f88a7975SAtmn Patel MSSAU->applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}}, 651f88a7975SAtmn Patel *DT); 652efb130fcSAlina Sbirlea SmallSetVector<BasicBlock *, 8> DeadBlockSet(L->block_begin(), 653efb130fcSAlina Sbirlea L->block_end()); 654efb130fcSAlina Sbirlea MSSAU->removeBlocks(DeadBlockSet); 655519b019aSAlina Sbirlea if (VerifyMemorySSA) 656519b019aSAlina Sbirlea MSSA->verifyMemorySSA(); 657efb130fcSAlina Sbirlea } 658df3e71e0SMarcello Maggioni } 659df3e71e0SMarcello Maggioni 660744c3c32SDavide Italiano // Use a map to unique and a vector to guarantee deterministic ordering. 6618ee59ca6SDavide Italiano llvm::SmallDenseSet<std::pair<DIVariable *, DIExpression *>, 4> DeadDebugSet; 662744c3c32SDavide Italiano llvm::SmallVector<DbgVariableIntrinsic *, 4> DeadDebugInst; 663744c3c32SDavide Italiano 664babc224cSAtmn Patel if (ExitBlock) { 665a757d65cSSerguei Katkov // Given LCSSA form is satisfied, we should not have users of instructions 666a757d65cSSerguei Katkov // within the dead loop outside of the loop. However, LCSSA doesn't take 667a757d65cSSerguei Katkov // unreachable uses into account. We handle them here. 668a757d65cSSerguei Katkov // We could do it after drop all references (in this case all users in the 669a757d65cSSerguei Katkov // loop will be already eliminated and we have less work to do but according 670a757d65cSSerguei Katkov // to API doc of User::dropAllReferences only valid operation after dropping 671a757d65cSSerguei Katkov // references, is deletion. So let's substitute all usages of 672a757d65cSSerguei Katkov // instruction from the loop with undef value of corresponding type first. 673a757d65cSSerguei Katkov for (auto *Block : L->blocks()) 674a757d65cSSerguei Katkov for (Instruction &I : *Block) { 675a757d65cSSerguei Katkov auto *Undef = UndefValue::get(I.getType()); 676babc224cSAtmn Patel for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); 677babc224cSAtmn Patel UI != E;) { 678a757d65cSSerguei Katkov Use &U = *UI; 679a757d65cSSerguei Katkov ++UI; 680a757d65cSSerguei Katkov if (auto *Usr = dyn_cast<Instruction>(U.getUser())) 681a757d65cSSerguei Katkov if (L->contains(Usr->getParent())) 682a757d65cSSerguei Katkov continue; 683a757d65cSSerguei Katkov // If we have a DT then we can check that uses outside a loop only in 684a757d65cSSerguei Katkov // unreachable block. 685a757d65cSSerguei Katkov if (DT) 686a757d65cSSerguei Katkov assert(!DT->isReachableFromEntry(U) && 687a757d65cSSerguei Katkov "Unexpected user in reachable block"); 688a757d65cSSerguei Katkov U.set(Undef); 689a757d65cSSerguei Katkov } 690744c3c32SDavide Italiano auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I); 691744c3c32SDavide Italiano if (!DVI) 692744c3c32SDavide Italiano continue; 693babc224cSAtmn Patel auto Key = 694babc224cSAtmn Patel DeadDebugSet.find({DVI->getVariable(), DVI->getExpression()}); 6958ee59ca6SDavide Italiano if (Key != DeadDebugSet.end()) 696744c3c32SDavide Italiano continue; 6978ee59ca6SDavide Italiano DeadDebugSet.insert({DVI->getVariable(), DVI->getExpression()}); 698744c3c32SDavide Italiano DeadDebugInst.push_back(DVI); 699a757d65cSSerguei Katkov } 700a757d65cSSerguei Katkov 701744c3c32SDavide Italiano // After the loop has been deleted all the values defined and modified 702744c3c32SDavide Italiano // inside the loop are going to be unavailable. 703744c3c32SDavide Italiano // Since debug values in the loop have been deleted, inserting an undef 704744c3c32SDavide Italiano // dbg.value truncates the range of any dbg.value before the loop where the 705744c3c32SDavide Italiano // loop used to be. This is particularly important for constant values. 706744c3c32SDavide Italiano DIBuilder DIB(*ExitBlock->getModule()); 707e5be660eSRoman Lebedev Instruction *InsertDbgValueBefore = ExitBlock->getFirstNonPHI(); 708e5be660eSRoman Lebedev assert(InsertDbgValueBefore && 709e5be660eSRoman Lebedev "There should be a non-PHI instruction in exit block, else these " 710e5be660eSRoman Lebedev "instructions will have no parent."); 711744c3c32SDavide Italiano for (auto *DVI : DeadDebugInst) 712e5be660eSRoman Lebedev DIB.insertDbgValueIntrinsic(UndefValue::get(Builder.getInt32Ty()), 713e5be660eSRoman Lebedev DVI->getVariable(), DVI->getExpression(), 714e5be660eSRoman Lebedev DVI->getDebugLoc(), InsertDbgValueBefore); 715babc224cSAtmn Patel } 716744c3c32SDavide Italiano 717df3e71e0SMarcello Maggioni // Remove the block from the reference counting scheme, so that we can 718df3e71e0SMarcello Maggioni // delete it freely later. 719df3e71e0SMarcello Maggioni for (auto *Block : L->blocks()) 720df3e71e0SMarcello Maggioni Block->dropAllReferences(); 721df3e71e0SMarcello Maggioni 722efb130fcSAlina Sbirlea if (MSSA && VerifyMemorySSA) 723efb130fcSAlina Sbirlea MSSA->verifyMemorySSA(); 724efb130fcSAlina Sbirlea 725df3e71e0SMarcello Maggioni if (LI) { 726df3e71e0SMarcello Maggioni // Erase the instructions and the blocks without having to worry 727df3e71e0SMarcello Maggioni // about ordering because we already dropped the references. 728df3e71e0SMarcello Maggioni // NOTE: This iteration is safe because erasing the block does not remove 729df3e71e0SMarcello Maggioni // its entry from the loop's block list. We do that in the next section. 730df3e71e0SMarcello Maggioni for (Loop::block_iterator LpI = L->block_begin(), LpE = L->block_end(); 731df3e71e0SMarcello Maggioni LpI != LpE; ++LpI) 732df3e71e0SMarcello Maggioni (*LpI)->eraseFromParent(); 733df3e71e0SMarcello Maggioni 734df3e71e0SMarcello Maggioni // Finally, the blocks from loopinfo. This has to happen late because 735df3e71e0SMarcello Maggioni // otherwise our loop iterators won't work. 736df3e71e0SMarcello Maggioni 737df3e71e0SMarcello Maggioni SmallPtrSet<BasicBlock *, 8> blocks; 738df3e71e0SMarcello Maggioni blocks.insert(L->block_begin(), L->block_end()); 739df3e71e0SMarcello Maggioni for (BasicBlock *BB : blocks) 740df3e71e0SMarcello Maggioni LI->removeBlock(BB); 741df3e71e0SMarcello Maggioni 742df3e71e0SMarcello Maggioni // The last step is to update LoopInfo now that we've eliminated this loop. 7439883d7edSWhitney Tsang // Note: LoopInfo::erase remove the given loop and relink its subloops with 7449883d7edSWhitney Tsang // its parent. While removeLoop/removeChildLoop remove the given loop but 7459883d7edSWhitney Tsang // not relink its subloops, which is what we want. 7469883d7edSWhitney Tsang if (Loop *ParentLoop = L->getParentLoop()) { 7475d6c5b46SWhitney Tsang Loop::iterator I = find(*ParentLoop, L); 7489883d7edSWhitney Tsang assert(I != ParentLoop->end() && "Couldn't find loop"); 7499883d7edSWhitney Tsang ParentLoop->removeChildLoop(I); 7509883d7edSWhitney Tsang } else { 7515d6c5b46SWhitney Tsang Loop::iterator I = find(*LI, L); 7529883d7edSWhitney Tsang assert(I != LI->end() && "Couldn't find loop"); 7539883d7edSWhitney Tsang LI->removeLoop(I); 7549883d7edSWhitney Tsang } 7559883d7edSWhitney Tsang LI->destroy(L); 756df3e71e0SMarcello Maggioni } 757df3e71e0SMarcello Maggioni } 758df3e71e0SMarcello Maggioni 759ef51eed3SPhilip Reames static Loop *getOutermostLoop(Loop *L) { 760ef51eed3SPhilip Reames while (Loop *Parent = L->getParentLoop()) 761ef51eed3SPhilip Reames L = Parent; 762ef51eed3SPhilip Reames return L; 763ef51eed3SPhilip Reames } 764ef51eed3SPhilip Reames 7654739dd67SPhilip Reames void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, 7664739dd67SPhilip Reames LoopInfo &LI, MemorySSA *MSSA) { 7674739dd67SPhilip Reames auto *Latch = L->getLoopLatch(); 7684739dd67SPhilip Reames assert(Latch && "multiple latches not yet supported"); 7694739dd67SPhilip Reames auto *Header = L->getHeader(); 770ef51eed3SPhilip Reames Loop *OutermostLoop = getOutermostLoop(L); 7714739dd67SPhilip Reames 7724739dd67SPhilip Reames SE.forgetLoop(L); 7734739dd67SPhilip Reames 7744739dd67SPhilip Reames // Note: By splitting the backedge, and then explicitly making it unreachable 7754739dd67SPhilip Reames // we gracefully handle corner cases such as non-bottom tested loops and the 7764739dd67SPhilip Reames // like. We also have the benefit of being able to reuse existing well tested 7774739dd67SPhilip Reames // code. It might be worth special casing the common bottom tested case at 7784739dd67SPhilip Reames // some point to avoid code churn. 7794739dd67SPhilip Reames 7804739dd67SPhilip Reames std::unique_ptr<MemorySSAUpdater> MSSAU; 7814739dd67SPhilip Reames if (MSSA) 7824739dd67SPhilip Reames MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 7834739dd67SPhilip Reames 7844739dd67SPhilip Reames auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get()); 7854739dd67SPhilip Reames 7864739dd67SPhilip Reames DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager); 7874739dd67SPhilip Reames (void)changeToUnreachable(BackedgeBB->getTerminator(), /*UseTrap*/false, 7884739dd67SPhilip Reames /*PreserveLCSSA*/true, &DTU, MSSAU.get()); 7894739dd67SPhilip Reames 7904739dd67SPhilip Reames // Erase (and destroy) this loop instance. Handles relinking sub-loops 7914739dd67SPhilip Reames // and blocks within the loop as needed. 7924739dd67SPhilip Reames LI.erase(L); 793ef51eed3SPhilip Reames 794ef51eed3SPhilip Reames // If the loop we broke had a parent, then changeToUnreachable might have 795ef51eed3SPhilip Reames // caused a block to be removed from the parent loop (see loop_nest_lcssa 796ef51eed3SPhilip Reames // test case in zero-btc.ll for an example), thus changing the parent's 797ef51eed3SPhilip Reames // exit blocks. If that happened, we need to rebuild LCSSA on the outermost 798ef51eed3SPhilip Reames // loop which might have a had a block removed. 799ef51eed3SPhilip Reames if (OutermostLoop != L) 800ef51eed3SPhilip Reames formLCSSARecursively(*OutermostLoop, DT, &LI, &SE); 8014739dd67SPhilip Reames } 8024739dd67SPhilip Reames 8034739dd67SPhilip Reames 804af7e1588SEvgeniy Brevnov /// Checks if \p L has single exit through latch block except possibly 805af7e1588SEvgeniy Brevnov /// "deoptimizing" exits. Returns branch instruction terminating the loop 806af7e1588SEvgeniy Brevnov /// latch if above check is successful, nullptr otherwise. 807af7e1588SEvgeniy Brevnov static BranchInst *getExpectedExitLoopLatchBranch(Loop *L) { 80845c43e7dSSerguei Katkov BasicBlock *Latch = L->getLoopLatch(); 80945c43e7dSSerguei Katkov if (!Latch) 810af7e1588SEvgeniy Brevnov return nullptr; 811af7e1588SEvgeniy Brevnov 81245c43e7dSSerguei Katkov BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator()); 81345c43e7dSSerguei Katkov if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch)) 814af7e1588SEvgeniy Brevnov return nullptr; 81541d72a86SDehao Chen 81641d72a86SDehao Chen assert((LatchBR->getSuccessor(0) == L->getHeader() || 81741d72a86SDehao Chen LatchBR->getSuccessor(1) == L->getHeader()) && 81841d72a86SDehao Chen "At least one edge out of the latch must go to the header"); 81941d72a86SDehao Chen 82045c43e7dSSerguei Katkov SmallVector<BasicBlock *, 4> ExitBlocks; 82145c43e7dSSerguei Katkov L->getUniqueNonLatchExitBlocks(ExitBlocks); 82245c43e7dSSerguei Katkov if (any_of(ExitBlocks, [](const BasicBlock *EB) { 823eae0d2e9SSerguei Katkov return !EB->getTerminatingDeoptimizeCall(); 82445c43e7dSSerguei Katkov })) 825af7e1588SEvgeniy Brevnov return nullptr; 826af7e1588SEvgeniy Brevnov 827af7e1588SEvgeniy Brevnov return LatchBR; 828af7e1588SEvgeniy Brevnov } 829af7e1588SEvgeniy Brevnov 830af7e1588SEvgeniy Brevnov Optional<unsigned> 831af7e1588SEvgeniy Brevnov llvm::getLoopEstimatedTripCount(Loop *L, 832af7e1588SEvgeniy Brevnov unsigned *EstimatedLoopInvocationWeight) { 833af7e1588SEvgeniy Brevnov // Support loops with an exiting latch and other existing exists only 834af7e1588SEvgeniy Brevnov // deoptimize. 835af7e1588SEvgeniy Brevnov BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L); 836af7e1588SEvgeniy Brevnov if (!LatchBranch) 83745c43e7dSSerguei Katkov return None; 83845c43e7dSSerguei Katkov 83941d72a86SDehao Chen // To estimate the number of times the loop body was executed, we want to 84041d72a86SDehao Chen // know the number of times the backedge was taken, vs. the number of times 84141d72a86SDehao Chen // we exited the loop. 842f0abe820SEvgeniy Brevnov uint64_t BackedgeTakenWeight, LatchExitWeight; 843af7e1588SEvgeniy Brevnov if (!LatchBranch->extractProfMetadata(BackedgeTakenWeight, LatchExitWeight)) 84441d72a86SDehao Chen return None; 84541d72a86SDehao Chen 846af7e1588SEvgeniy Brevnov if (LatchBranch->getSuccessor(0) != L->getHeader()) 847f0abe820SEvgeniy Brevnov std::swap(BackedgeTakenWeight, LatchExitWeight); 848f0abe820SEvgeniy Brevnov 84910357e1cSEvgeniy Brevnov if (!LatchExitWeight) 85010357e1cSEvgeniy Brevnov return None; 85141d72a86SDehao Chen 852af7e1588SEvgeniy Brevnov if (EstimatedLoopInvocationWeight) 853af7e1588SEvgeniy Brevnov *EstimatedLoopInvocationWeight = LatchExitWeight; 854af7e1588SEvgeniy Brevnov 85510357e1cSEvgeniy Brevnov // Estimated backedge taken count is a ratio of the backedge taken weight by 856cfe97681SEvgeniy Brevnov // the weight of the edge exiting the loop, rounded to nearest. 85710357e1cSEvgeniy Brevnov uint64_t BackedgeTakenCount = 85810357e1cSEvgeniy Brevnov llvm::divideNearest(BackedgeTakenWeight, LatchExitWeight); 85910357e1cSEvgeniy Brevnov // Estimated trip count is one plus estimated backedge taken count. 86010357e1cSEvgeniy Brevnov return BackedgeTakenCount + 1; 86141d72a86SDehao Chen } 862cf9daa33SAmara Emerson 863af7e1588SEvgeniy Brevnov bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, 864af7e1588SEvgeniy Brevnov unsigned EstimatedloopInvocationWeight) { 865af7e1588SEvgeniy Brevnov // Support loops with an exiting latch and other existing exists only 866af7e1588SEvgeniy Brevnov // deoptimize. 867af7e1588SEvgeniy Brevnov BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L); 868af7e1588SEvgeniy Brevnov if (!LatchBranch) 869af7e1588SEvgeniy Brevnov return false; 870af7e1588SEvgeniy Brevnov 871af7e1588SEvgeniy Brevnov // Calculate taken and exit weights. 872af7e1588SEvgeniy Brevnov unsigned LatchExitWeight = 0; 873af7e1588SEvgeniy Brevnov unsigned BackedgeTakenWeight = 0; 874af7e1588SEvgeniy Brevnov 875af7e1588SEvgeniy Brevnov if (EstimatedTripCount > 0) { 876af7e1588SEvgeniy Brevnov LatchExitWeight = EstimatedloopInvocationWeight; 877af7e1588SEvgeniy Brevnov BackedgeTakenWeight = (EstimatedTripCount - 1) * LatchExitWeight; 878af7e1588SEvgeniy Brevnov } 879af7e1588SEvgeniy Brevnov 880af7e1588SEvgeniy Brevnov // Make a swap if back edge is taken when condition is "false". 881af7e1588SEvgeniy Brevnov if (LatchBranch->getSuccessor(0) != L->getHeader()) 882af7e1588SEvgeniy Brevnov std::swap(BackedgeTakenWeight, LatchExitWeight); 883af7e1588SEvgeniy Brevnov 884af7e1588SEvgeniy Brevnov MDBuilder MDB(LatchBranch->getContext()); 885af7e1588SEvgeniy Brevnov 886af7e1588SEvgeniy Brevnov // Set/Update profile metadata. 887af7e1588SEvgeniy Brevnov LatchBranch->setMetadata( 888af7e1588SEvgeniy Brevnov LLVMContext::MD_prof, 889af7e1588SEvgeniy Brevnov MDB.createBranchWeights(BackedgeTakenWeight, LatchExitWeight)); 890af7e1588SEvgeniy Brevnov 891af7e1588SEvgeniy Brevnov return true; 892af7e1588SEvgeniy Brevnov } 893af7e1588SEvgeniy Brevnov 8946cb64787SDavid Green bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop, 895395b80cdSDavid Green ScalarEvolution &SE) { 896395b80cdSDavid Green Loop *OuterL = InnerLoop->getParentLoop(); 897395b80cdSDavid Green if (!OuterL) 898395b80cdSDavid Green return true; 899395b80cdSDavid Green 900395b80cdSDavid Green // Get the backedge taken count for the inner loop 901395b80cdSDavid Green BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); 902395b80cdSDavid Green const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch); 903395b80cdSDavid Green if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) || 904395b80cdSDavid Green !InnerLoopBECountSC->getType()->isIntegerTy()) 905395b80cdSDavid Green return false; 906395b80cdSDavid Green 907395b80cdSDavid Green // Get whether count is invariant to the outer loop 908395b80cdSDavid Green ScalarEvolution::LoopDisposition LD = 909395b80cdSDavid Green SE.getLoopDisposition(InnerLoopBECountSC, OuterL); 910395b80cdSDavid Green if (LD != ScalarEvolution::LoopInvariant) 911395b80cdSDavid Green return false; 912395b80cdSDavid Green 913395b80cdSDavid Green return true; 914395b80cdSDavid Green } 915395b80cdSDavid Green 916c74e8539SSanjay Patel Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, 917c74e8539SSanjay Patel Value *Right) { 91809b1c563SSanjay Patel CmpInst::Predicate Pred; 9196594dc37SVikram TV switch (RK) { 9206594dc37SVikram TV default: 9216594dc37SVikram TV llvm_unreachable("Unknown min/max recurrence kind"); 922c74e8539SSanjay Patel case RecurKind::UMin: 92309b1c563SSanjay Patel Pred = CmpInst::ICMP_ULT; 9246594dc37SVikram TV break; 925c74e8539SSanjay Patel case RecurKind::UMax: 92609b1c563SSanjay Patel Pred = CmpInst::ICMP_UGT; 9276594dc37SVikram TV break; 928c74e8539SSanjay Patel case RecurKind::SMin: 92909b1c563SSanjay Patel Pred = CmpInst::ICMP_SLT; 9306594dc37SVikram TV break; 931c74e8539SSanjay Patel case RecurKind::SMax: 93209b1c563SSanjay Patel Pred = CmpInst::ICMP_SGT; 9336594dc37SVikram TV break; 934c74e8539SSanjay Patel case RecurKind::FMin: 93509b1c563SSanjay Patel Pred = CmpInst::FCMP_OLT; 9366594dc37SVikram TV break; 937c74e8539SSanjay Patel case RecurKind::FMax: 93809b1c563SSanjay Patel Pred = CmpInst::FCMP_OGT; 9396594dc37SVikram TV break; 9406594dc37SVikram TV } 9416594dc37SVikram TV 94209b1c563SSanjay Patel Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp"); 9436594dc37SVikram TV Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); 9446594dc37SVikram TV return Select; 9456594dc37SVikram TV } 9466594dc37SVikram TV 94723c2182cSSimon Pilgrim // Helper to generate an ordered reduction. 948c74e8539SSanjay Patel Value *llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, 949c74e8539SSanjay Patel unsigned Op, RecurKind RdxKind, 95023c2182cSSimon Pilgrim ArrayRef<Value *> RedOps) { 9518d11ec66SChristopher Tetreault unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements(); 95223c2182cSSimon Pilgrim 95323c2182cSSimon Pilgrim // Extract and apply reduction ops in ascending order: 95423c2182cSSimon Pilgrim // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1] 95523c2182cSSimon Pilgrim Value *Result = Acc; 95623c2182cSSimon Pilgrim for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) { 95723c2182cSSimon Pilgrim Value *Ext = 95823c2182cSSimon Pilgrim Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx)); 95923c2182cSSimon Pilgrim 96023c2182cSSimon Pilgrim if (Op != Instruction::ICmp && Op != Instruction::FCmp) { 96123c2182cSSimon Pilgrim Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext, 96223c2182cSSimon Pilgrim "bin.rdx"); 96323c2182cSSimon Pilgrim } else { 964c74e8539SSanjay Patel assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) && 96523c2182cSSimon Pilgrim "Invalid min/max"); 966c74e8539SSanjay Patel Result = createMinMaxOp(Builder, RdxKind, Result, Ext); 96723c2182cSSimon Pilgrim } 96823c2182cSSimon Pilgrim 96923c2182cSSimon Pilgrim if (!RedOps.empty()) 97023c2182cSSimon Pilgrim propagateIRFlags(Result, RedOps); 97123c2182cSSimon Pilgrim } 97223c2182cSSimon Pilgrim 97323c2182cSSimon Pilgrim return Result; 97423c2182cSSimon Pilgrim } 97523c2182cSSimon Pilgrim 976cf9daa33SAmara Emerson // Helper to generate a log2 shuffle reduction. 977c74e8539SSanjay Patel Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, 978c74e8539SSanjay Patel unsigned Op, RecurKind RdxKind, 979ad62a3a2SSanjay Patel ArrayRef<Value *> RedOps) { 9808d11ec66SChristopher Tetreault unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements(); 981cf9daa33SAmara Emerson // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles 982cf9daa33SAmara Emerson // and vector ops, reducing the set of values being computed by half each 983cf9daa33SAmara Emerson // round. 984cf9daa33SAmara Emerson assert(isPowerOf2_32(VF) && 985cf9daa33SAmara Emerson "Reduction emission only supported for pow2 vectors!"); 986cf9daa33SAmara Emerson Value *TmpVec = Src; 9876f64dacaSBenjamin Kramer SmallVector<int, 32> ShuffleMask(VF); 988cf9daa33SAmara Emerson for (unsigned i = VF; i != 1; i >>= 1) { 989cf9daa33SAmara Emerson // Move the upper half of the vector to the lower half. 990cf9daa33SAmara Emerson for (unsigned j = 0; j != i / 2; ++j) 9916f64dacaSBenjamin Kramer ShuffleMask[j] = i / 2 + j; 992cf9daa33SAmara Emerson 993cf9daa33SAmara Emerson // Fill the rest of the mask with undef. 9946f64dacaSBenjamin Kramer std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1); 995cf9daa33SAmara Emerson 9969b296102SJuneyoung Lee Value *Shuf = Builder.CreateShuffleVector(TmpVec, ShuffleMask, "rdx.shuf"); 997cf9daa33SAmara Emerson 998cf9daa33SAmara Emerson if (Op != Instruction::ICmp && Op != Instruction::FCmp) { 999ad62a3a2SSanjay Patel // The builder propagates its fast-math-flags setting. 1000ad62a3a2SSanjay Patel TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf, 1001ad62a3a2SSanjay Patel "bin.rdx"); 1002cf9daa33SAmara Emerson } else { 1003c74e8539SSanjay Patel assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) && 1004cf9daa33SAmara Emerson "Invalid min/max"); 1005c74e8539SSanjay Patel TmpVec = createMinMaxOp(Builder, RdxKind, TmpVec, Shuf); 1006cf9daa33SAmara Emerson } 1007cf9daa33SAmara Emerson if (!RedOps.empty()) 1008cf9daa33SAmara Emerson propagateIRFlags(TmpVec, RedOps); 1009bc1148e7SSanjay Patel 1010bc1148e7SSanjay Patel // We may compute the reassociated scalar ops in a way that does not 1011bc1148e7SSanjay Patel // preserve nsw/nuw etc. Conservatively, drop those flags. 1012bc1148e7SSanjay Patel if (auto *ReductionInst = dyn_cast<Instruction>(TmpVec)) 1013bc1148e7SSanjay Patel ReductionInst->dropPoisonGeneratingFlags(); 1014cf9daa33SAmara Emerson } 1015cf9daa33SAmara Emerson // The result is in the first element of the vector. 1016cf9daa33SAmara Emerson return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); 1017cf9daa33SAmara Emerson } 1018cf9daa33SAmara Emerson 1019c74e8539SSanjay Patel Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, 1020c74e8539SSanjay Patel const TargetTransformInfo *TTI, 102136263a7cSSanjay Patel Value *Src, RecurKind RdxKind, 1022cf9daa33SAmara Emerson ArrayRef<Value *> RedOps) { 102358b6c5d9SSanjay Patel TargetTransformInfo::ReductionFlags RdxFlags; 102497669575SSanjay Patel RdxFlags.IsMaxOp = RdxKind == RecurKind::SMax || RdxKind == RecurKind::UMax || 102558b6c5d9SSanjay Patel RdxKind == RecurKind::FMax; 102658b6c5d9SSanjay Patel RdxFlags.IsSigned = RdxKind == RecurKind::SMax || RdxKind == RecurKind::SMin; 102758b6c5d9SSanjay Patel 102897669575SSanjay Patel auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType(); 102936263a7cSSanjay Patel switch (RdxKind) { 103036263a7cSSanjay Patel case RecurKind::Add: 103197669575SSanjay Patel return Builder.CreateAddReduce(Src); 103236263a7cSSanjay Patel case RecurKind::Mul: 103397669575SSanjay Patel return Builder.CreateMulReduce(Src); 103436263a7cSSanjay Patel case RecurKind::And: 103597669575SSanjay Patel return Builder.CreateAndReduce(Src); 103636263a7cSSanjay Patel case RecurKind::Or: 103797669575SSanjay Patel return Builder.CreateOrReduce(Src); 103836263a7cSSanjay Patel case RecurKind::Xor: 103997669575SSanjay Patel return Builder.CreateXorReduce(Src); 104036263a7cSSanjay Patel case RecurKind::FAdd: 104197669575SSanjay Patel return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy), 104297669575SSanjay Patel Src); 104336263a7cSSanjay Patel case RecurKind::FMul: 104497669575SSanjay Patel return Builder.CreateFMulReduce(ConstantFP::get(SrcVecEltTy, 1.0), Src); 1045c74e8539SSanjay Patel case RecurKind::SMax: 104697669575SSanjay Patel return Builder.CreateIntMaxReduce(Src, true); 1047c74e8539SSanjay Patel case RecurKind::SMin: 104897669575SSanjay Patel return Builder.CreateIntMinReduce(Src, true); 1049c74e8539SSanjay Patel case RecurKind::UMax: 105097669575SSanjay Patel return Builder.CreateIntMaxReduce(Src, false); 1051c74e8539SSanjay Patel case RecurKind::UMin: 105297669575SSanjay Patel return Builder.CreateIntMinReduce(Src, false); 105336263a7cSSanjay Patel case RecurKind::FMax: 105497669575SSanjay Patel return Builder.CreateFPMaxReduce(Src); 105536263a7cSSanjay Patel case RecurKind::FMin: 105697669575SSanjay Patel return Builder.CreateFPMinReduce(Src); 1057cf9daa33SAmara Emerson default: 1058cf9daa33SAmara Emerson llvm_unreachable("Unhandled opcode"); 1059cf9daa33SAmara Emerson } 1060cf9daa33SAmara Emerson } 1061cf9daa33SAmara Emerson 106228ffe38bSNikita Popov Value *llvm::createTargetReduction(IRBuilderBase &B, 1063cf9daa33SAmara Emerson const TargetTransformInfo *TTI, 10648ca60db4SSanjay Patel RecurrenceDescriptor &Desc, Value *Src) { 1065cf9daa33SAmara Emerson // TODO: Support in-order reductions based on the recurrence descriptor. 1066ad62a3a2SSanjay Patel // All ops in the reduction inherit fast-math-flags from the recurrence 1067ad62a3a2SSanjay Patel // descriptor. 106828ffe38bSNikita Popov IRBuilderBase::FastMathFlagGuard FMFGuard(B); 1069ad62a3a2SSanjay Patel B.setFastMathFlags(Desc.getFastMathFlags()); 107036263a7cSSanjay Patel return createSimpleTargetReduction(B, TTI, Src, Desc.getRecurrenceKind()); 1071cf9daa33SAmara Emerson } 1072cf9daa33SAmara Emerson 1073a61f4b89SDinar Temirbulatov void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) { 1074a61f4b89SDinar Temirbulatov auto *VecOp = dyn_cast<Instruction>(I); 1075a61f4b89SDinar Temirbulatov if (!VecOp) 1076a61f4b89SDinar Temirbulatov return; 1077a61f4b89SDinar Temirbulatov auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0]) 1078a61f4b89SDinar Temirbulatov : dyn_cast<Instruction>(OpValue); 1079a61f4b89SDinar Temirbulatov if (!Intersection) 1080a61f4b89SDinar Temirbulatov return; 1081a61f4b89SDinar Temirbulatov const unsigned Opcode = Intersection->getOpcode(); 1082a61f4b89SDinar Temirbulatov VecOp->copyIRFlags(Intersection); 1083a61f4b89SDinar Temirbulatov for (auto *V : VL) { 1084a61f4b89SDinar Temirbulatov auto *Instr = dyn_cast<Instruction>(V); 1085a61f4b89SDinar Temirbulatov if (!Instr) 1086a61f4b89SDinar Temirbulatov continue; 1087a61f4b89SDinar Temirbulatov if (OpValue == nullptr || Opcode == Instr->getOpcode()) 1088a61f4b89SDinar Temirbulatov VecOp->andIRFlags(V); 1089cf9daa33SAmara Emerson } 1090cf9daa33SAmara Emerson } 1091a78dc4d6SMax Kazantsev 1092a78dc4d6SMax Kazantsev bool llvm::isKnownNegativeInLoop(const SCEV *S, const Loop *L, 1093a78dc4d6SMax Kazantsev ScalarEvolution &SE) { 1094a78dc4d6SMax Kazantsev const SCEV *Zero = SE.getZero(S->getType()); 1095a78dc4d6SMax Kazantsev return SE.isAvailableAtLoopEntry(S, L) && 1096a78dc4d6SMax Kazantsev SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, S, Zero); 1097a78dc4d6SMax Kazantsev } 1098a78dc4d6SMax Kazantsev 1099a78dc4d6SMax Kazantsev bool llvm::isKnownNonNegativeInLoop(const SCEV *S, const Loop *L, 1100a78dc4d6SMax Kazantsev ScalarEvolution &SE) { 1101a78dc4d6SMax Kazantsev const SCEV *Zero = SE.getZero(S->getType()); 1102a78dc4d6SMax Kazantsev return SE.isAvailableAtLoopEntry(S, L) && 1103a78dc4d6SMax Kazantsev SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, S, Zero); 1104a78dc4d6SMax Kazantsev } 1105a78dc4d6SMax Kazantsev 1106a78dc4d6SMax Kazantsev bool llvm::cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, 1107a78dc4d6SMax Kazantsev bool Signed) { 1108a78dc4d6SMax Kazantsev unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth(); 1109a78dc4d6SMax Kazantsev APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) : 1110a78dc4d6SMax Kazantsev APInt::getMinValue(BitWidth); 1111a78dc4d6SMax Kazantsev auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; 1112a78dc4d6SMax Kazantsev return SE.isAvailableAtLoopEntry(S, L) && 1113a78dc4d6SMax Kazantsev SE.isLoopEntryGuardedByCond(L, Predicate, S, 1114a78dc4d6SMax Kazantsev SE.getConstant(Min)); 1115a78dc4d6SMax Kazantsev } 1116a78dc4d6SMax Kazantsev 1117a78dc4d6SMax Kazantsev bool llvm::cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, 1118a78dc4d6SMax Kazantsev bool Signed) { 1119a78dc4d6SMax Kazantsev unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth(); 1120a78dc4d6SMax Kazantsev APInt Max = Signed ? APInt::getSignedMaxValue(BitWidth) : 1121a78dc4d6SMax Kazantsev APInt::getMaxValue(BitWidth); 1122a78dc4d6SMax Kazantsev auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; 1123a78dc4d6SMax Kazantsev return SE.isAvailableAtLoopEntry(S, L) && 1124a78dc4d6SMax Kazantsev SE.isLoopEntryGuardedByCond(L, Predicate, S, 1125a78dc4d6SMax Kazantsev SE.getConstant(Max)); 1126a78dc4d6SMax Kazantsev } 112793175a5cSSjoerd Meijer 112893175a5cSSjoerd Meijer //===----------------------------------------------------------------------===// 112993175a5cSSjoerd Meijer // rewriteLoopExitValues - Optimize IV users outside the loop. 113093175a5cSSjoerd Meijer // As a side effect, reduces the amount of IV processing within the loop. 113193175a5cSSjoerd Meijer //===----------------------------------------------------------------------===// 113293175a5cSSjoerd Meijer 113393175a5cSSjoerd Meijer // Return true if the SCEV expansion generated by the rewriter can replace the 113493175a5cSSjoerd Meijer // original value. SCEV guarantees that it produces the same value, but the way 113593175a5cSSjoerd Meijer // it is produced may be illegal IR. Ideally, this function will only be 113693175a5cSSjoerd Meijer // called for verification. 113793175a5cSSjoerd Meijer static bool isValidRewrite(ScalarEvolution *SE, Value *FromVal, Value *ToVal) { 113893175a5cSSjoerd Meijer // If an SCEV expression subsumed multiple pointers, its expansion could 113993175a5cSSjoerd Meijer // reassociate the GEP changing the base pointer. This is illegal because the 114093175a5cSSjoerd Meijer // final address produced by a GEP chain must be inbounds relative to its 114193175a5cSSjoerd Meijer // underlying object. Otherwise basic alias analysis, among other things, 114293175a5cSSjoerd Meijer // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid 114393175a5cSSjoerd Meijer // producing an expression involving multiple pointers. Until then, we must 114493175a5cSSjoerd Meijer // bail out here. 114593175a5cSSjoerd Meijer // 114689051ebaSVitaly Buka // Retrieve the pointer operand of the GEP. Don't use getUnderlyingObject 114793175a5cSSjoerd Meijer // because it understands lcssa phis while SCEV does not. 114893175a5cSSjoerd Meijer Value *FromPtr = FromVal; 114993175a5cSSjoerd Meijer Value *ToPtr = ToVal; 115093175a5cSSjoerd Meijer if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) 115193175a5cSSjoerd Meijer FromPtr = GEP->getPointerOperand(); 115293175a5cSSjoerd Meijer 115393175a5cSSjoerd Meijer if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) 115493175a5cSSjoerd Meijer ToPtr = GEP->getPointerOperand(); 115593175a5cSSjoerd Meijer 115693175a5cSSjoerd Meijer if (FromPtr != FromVal || ToPtr != ToVal) { 115793175a5cSSjoerd Meijer // Quickly check the common case 115893175a5cSSjoerd Meijer if (FromPtr == ToPtr) 115993175a5cSSjoerd Meijer return true; 116093175a5cSSjoerd Meijer 116193175a5cSSjoerd Meijer // SCEV may have rewritten an expression that produces the GEP's pointer 116293175a5cSSjoerd Meijer // operand. That's ok as long as the pointer operand has the same base 116389051ebaSVitaly Buka // pointer. Unlike getUnderlyingObject(), getPointerBase() will find the 116493175a5cSSjoerd Meijer // base of a recurrence. This handles the case in which SCEV expansion 116593175a5cSSjoerd Meijer // converts a pointer type recurrence into a nonrecurrent pointer base 116693175a5cSSjoerd Meijer // indexed by an integer recurrence. 116793175a5cSSjoerd Meijer 116893175a5cSSjoerd Meijer // If the GEP base pointer is a vector of pointers, abort. 116993175a5cSSjoerd Meijer if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy()) 117093175a5cSSjoerd Meijer return false; 117193175a5cSSjoerd Meijer 117293175a5cSSjoerd Meijer const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); 117393175a5cSSjoerd Meijer const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); 117493175a5cSSjoerd Meijer if (FromBase == ToBase) 117593175a5cSSjoerd Meijer return true; 117693175a5cSSjoerd Meijer 117793175a5cSSjoerd Meijer LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: GEP rewrite bail out " 117893175a5cSSjoerd Meijer << *FromBase << " != " << *ToBase << "\n"); 117993175a5cSSjoerd Meijer 118093175a5cSSjoerd Meijer return false; 118193175a5cSSjoerd Meijer } 118293175a5cSSjoerd Meijer return true; 118393175a5cSSjoerd Meijer } 118493175a5cSSjoerd Meijer 118593175a5cSSjoerd Meijer static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) { 118693175a5cSSjoerd Meijer SmallPtrSet<const Instruction *, 8> Visited; 118793175a5cSSjoerd Meijer SmallVector<const Instruction *, 8> WorkList; 118893175a5cSSjoerd Meijer Visited.insert(I); 118993175a5cSSjoerd Meijer WorkList.push_back(I); 119093175a5cSSjoerd Meijer while (!WorkList.empty()) { 119193175a5cSSjoerd Meijer const Instruction *Curr = WorkList.pop_back_val(); 119293175a5cSSjoerd Meijer // This use is outside the loop, nothing to do. 119393175a5cSSjoerd Meijer if (!L->contains(Curr)) 119493175a5cSSjoerd Meijer continue; 119593175a5cSSjoerd Meijer // Do we assume it is a "hard" use which will not be eliminated easily? 119693175a5cSSjoerd Meijer if (Curr->mayHaveSideEffects()) 119793175a5cSSjoerd Meijer return true; 119893175a5cSSjoerd Meijer // Otherwise, add all its users to worklist. 119993175a5cSSjoerd Meijer for (auto U : Curr->users()) { 120093175a5cSSjoerd Meijer auto *UI = cast<Instruction>(U); 120193175a5cSSjoerd Meijer if (Visited.insert(UI).second) 120293175a5cSSjoerd Meijer WorkList.push_back(UI); 120393175a5cSSjoerd Meijer } 120493175a5cSSjoerd Meijer } 120593175a5cSSjoerd Meijer return false; 120693175a5cSSjoerd Meijer } 120793175a5cSSjoerd Meijer 120893175a5cSSjoerd Meijer // Collect information about PHI nodes which can be transformed in 120993175a5cSSjoerd Meijer // rewriteLoopExitValues. 121093175a5cSSjoerd Meijer struct RewritePhi { 1211b2df9612SRoman Lebedev PHINode *PN; // For which PHI node is this replacement? 1212b2df9612SRoman Lebedev unsigned Ith; // For which incoming value? 1213b2df9612SRoman Lebedev const SCEV *ExpansionSCEV; // The SCEV of the incoming value we are rewriting. 1214b2df9612SRoman Lebedev Instruction *ExpansionPoint; // Where we'd like to expand that SCEV? 1215b2df9612SRoman Lebedev bool HighCost; // Is this expansion a high-cost? 121693175a5cSSjoerd Meijer 1217b2df9612SRoman Lebedev Value *Expansion = nullptr; 1218b2df9612SRoman Lebedev bool ValidRewrite = false; 1219b2df9612SRoman Lebedev 1220b2df9612SRoman Lebedev RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt, 1221b2df9612SRoman Lebedev bool H) 1222b2df9612SRoman Lebedev : PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt), 1223b2df9612SRoman Lebedev HighCost(H) {} 122493175a5cSSjoerd Meijer }; 122593175a5cSSjoerd Meijer 122693175a5cSSjoerd Meijer // Check whether it is possible to delete the loop after rewriting exit 122793175a5cSSjoerd Meijer // value. If it is possible, ignore ReplaceExitValue and do rewriting 122893175a5cSSjoerd Meijer // aggressively. 122993175a5cSSjoerd Meijer static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) { 123093175a5cSSjoerd Meijer BasicBlock *Preheader = L->getLoopPreheader(); 123193175a5cSSjoerd Meijer // If there is no preheader, the loop will not be deleted. 123293175a5cSSjoerd Meijer if (!Preheader) 123393175a5cSSjoerd Meijer return false; 123493175a5cSSjoerd Meijer 123593175a5cSSjoerd Meijer // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1. 123693175a5cSSjoerd Meijer // We obviate multiple ExitingBlocks case for simplicity. 123793175a5cSSjoerd Meijer // TODO: If we see testcase with multiple ExitingBlocks can be deleted 123893175a5cSSjoerd Meijer // after exit value rewriting, we can enhance the logic here. 123993175a5cSSjoerd Meijer SmallVector<BasicBlock *, 4> ExitingBlocks; 124093175a5cSSjoerd Meijer L->getExitingBlocks(ExitingBlocks); 124193175a5cSSjoerd Meijer SmallVector<BasicBlock *, 8> ExitBlocks; 124293175a5cSSjoerd Meijer L->getUniqueExitBlocks(ExitBlocks); 124393175a5cSSjoerd Meijer if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1) 124493175a5cSSjoerd Meijer return false; 124593175a5cSSjoerd Meijer 124693175a5cSSjoerd Meijer BasicBlock *ExitBlock = ExitBlocks[0]; 124793175a5cSSjoerd Meijer BasicBlock::iterator BI = ExitBlock->begin(); 124893175a5cSSjoerd Meijer while (PHINode *P = dyn_cast<PHINode>(BI)) { 124993175a5cSSjoerd Meijer Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]); 125093175a5cSSjoerd Meijer 125193175a5cSSjoerd Meijer // If the Incoming value of P is found in RewritePhiSet, we know it 125293175a5cSSjoerd Meijer // could be rewritten to use a loop invariant value in transformation 125393175a5cSSjoerd Meijer // phase later. Skip it in the loop invariant check below. 125493175a5cSSjoerd Meijer bool found = false; 125593175a5cSSjoerd Meijer for (const RewritePhi &Phi : RewritePhiSet) { 1256b2df9612SRoman Lebedev if (!Phi.ValidRewrite) 1257b2df9612SRoman Lebedev continue; 125893175a5cSSjoerd Meijer unsigned i = Phi.Ith; 125993175a5cSSjoerd Meijer if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) { 126093175a5cSSjoerd Meijer found = true; 126193175a5cSSjoerd Meijer break; 126293175a5cSSjoerd Meijer } 126393175a5cSSjoerd Meijer } 126493175a5cSSjoerd Meijer 126593175a5cSSjoerd Meijer Instruction *I; 126693175a5cSSjoerd Meijer if (!found && (I = dyn_cast<Instruction>(Incoming))) 126793175a5cSSjoerd Meijer if (!L->hasLoopInvariantOperands(I)) 126893175a5cSSjoerd Meijer return false; 126993175a5cSSjoerd Meijer 127093175a5cSSjoerd Meijer ++BI; 127193175a5cSSjoerd Meijer } 127293175a5cSSjoerd Meijer 127393175a5cSSjoerd Meijer for (auto *BB : L->blocks()) 127493175a5cSSjoerd Meijer if (llvm::any_of(*BB, [](Instruction &I) { 127593175a5cSSjoerd Meijer return I.mayHaveSideEffects(); 127693175a5cSSjoerd Meijer })) 127793175a5cSSjoerd Meijer return false; 127893175a5cSSjoerd Meijer 127993175a5cSSjoerd Meijer return true; 128093175a5cSSjoerd Meijer } 128193175a5cSSjoerd Meijer 12820789f280SRoman Lebedev int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, 12830789f280SRoman Lebedev ScalarEvolution *SE, 12840789f280SRoman Lebedev const TargetTransformInfo *TTI, 12850789f280SRoman Lebedev SCEVExpander &Rewriter, DominatorTree *DT, 12860789f280SRoman Lebedev ReplaceExitVal ReplaceExitValue, 128793175a5cSSjoerd Meijer SmallVector<WeakTrackingVH, 16> &DeadInsts) { 128893175a5cSSjoerd Meijer // Check a pre-condition. 128993175a5cSSjoerd Meijer assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 129093175a5cSSjoerd Meijer "Indvars did not preserve LCSSA!"); 129193175a5cSSjoerd Meijer 129293175a5cSSjoerd Meijer SmallVector<BasicBlock*, 8> ExitBlocks; 129393175a5cSSjoerd Meijer L->getUniqueExitBlocks(ExitBlocks); 129493175a5cSSjoerd Meijer 129593175a5cSSjoerd Meijer SmallVector<RewritePhi, 8> RewritePhiSet; 129693175a5cSSjoerd Meijer // Find all values that are computed inside the loop, but used outside of it. 129793175a5cSSjoerd Meijer // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan 129893175a5cSSjoerd Meijer // the exit blocks of the loop to find them. 129993175a5cSSjoerd Meijer for (BasicBlock *ExitBB : ExitBlocks) { 130093175a5cSSjoerd Meijer // If there are no PHI nodes in this exit block, then no values defined 130193175a5cSSjoerd Meijer // inside the loop are used on this path, skip it. 130293175a5cSSjoerd Meijer PHINode *PN = dyn_cast<PHINode>(ExitBB->begin()); 130393175a5cSSjoerd Meijer if (!PN) continue; 130493175a5cSSjoerd Meijer 130593175a5cSSjoerd Meijer unsigned NumPreds = PN->getNumIncomingValues(); 130693175a5cSSjoerd Meijer 130793175a5cSSjoerd Meijer // Iterate over all of the PHI nodes. 130893175a5cSSjoerd Meijer BasicBlock::iterator BBI = ExitBB->begin(); 130993175a5cSSjoerd Meijer while ((PN = dyn_cast<PHINode>(BBI++))) { 131093175a5cSSjoerd Meijer if (PN->use_empty()) 131193175a5cSSjoerd Meijer continue; // dead use, don't replace it 131293175a5cSSjoerd Meijer 131393175a5cSSjoerd Meijer if (!SE->isSCEVable(PN->getType())) 131493175a5cSSjoerd Meijer continue; 131593175a5cSSjoerd Meijer 131693175a5cSSjoerd Meijer // It's necessary to tell ScalarEvolution about this explicitly so that 131793175a5cSSjoerd Meijer // it can walk the def-use list and forget all SCEVs, as it may not be 131893175a5cSSjoerd Meijer // watching the PHI itself. Once the new exit value is in place, there 131993175a5cSSjoerd Meijer // may not be a def-use connection between the loop and every instruction 132093175a5cSSjoerd Meijer // which got a SCEVAddRecExpr for that loop. 132193175a5cSSjoerd Meijer SE->forgetValue(PN); 132293175a5cSSjoerd Meijer 132393175a5cSSjoerd Meijer // Iterate over all of the values in all the PHI nodes. 132493175a5cSSjoerd Meijer for (unsigned i = 0; i != NumPreds; ++i) { 132593175a5cSSjoerd Meijer // If the value being merged in is not integer or is not defined 132693175a5cSSjoerd Meijer // in the loop, skip it. 132793175a5cSSjoerd Meijer Value *InVal = PN->getIncomingValue(i); 132893175a5cSSjoerd Meijer if (!isa<Instruction>(InVal)) 132993175a5cSSjoerd Meijer continue; 133093175a5cSSjoerd Meijer 133193175a5cSSjoerd Meijer // If this pred is for a subloop, not L itself, skip it. 133293175a5cSSjoerd Meijer if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) 133393175a5cSSjoerd Meijer continue; // The Block is in a subloop, skip it. 133493175a5cSSjoerd Meijer 133593175a5cSSjoerd Meijer // Check that InVal is defined in the loop. 133693175a5cSSjoerd Meijer Instruction *Inst = cast<Instruction>(InVal); 133793175a5cSSjoerd Meijer if (!L->contains(Inst)) 133893175a5cSSjoerd Meijer continue; 133993175a5cSSjoerd Meijer 134093175a5cSSjoerd Meijer // Okay, this instruction has a user outside of the current loop 134193175a5cSSjoerd Meijer // and varies predictably *inside* the loop. Evaluate the value it 134293175a5cSSjoerd Meijer // contains when the loop exits, if possible. We prefer to start with 134393175a5cSSjoerd Meijer // expressions which are true for all exits (so as to maximize 134493175a5cSSjoerd Meijer // expression reuse by the SCEVExpander), but resort to per-exit 134593175a5cSSjoerd Meijer // evaluation if that fails. 134693175a5cSSjoerd Meijer const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); 134793175a5cSSjoerd Meijer if (isa<SCEVCouldNotCompute>(ExitValue) || 134893175a5cSSjoerd Meijer !SE->isLoopInvariant(ExitValue, L) || 134993175a5cSSjoerd Meijer !isSafeToExpand(ExitValue, *SE)) { 135093175a5cSSjoerd Meijer // TODO: This should probably be sunk into SCEV in some way; maybe a 135193175a5cSSjoerd Meijer // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for 135293175a5cSSjoerd Meijer // most SCEV expressions and other recurrence types (e.g. shift 135393175a5cSSjoerd Meijer // recurrences). Is there existing code we can reuse? 135493175a5cSSjoerd Meijer const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i)); 135593175a5cSSjoerd Meijer if (isa<SCEVCouldNotCompute>(ExitCount)) 135693175a5cSSjoerd Meijer continue; 135793175a5cSSjoerd Meijer if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst))) 135893175a5cSSjoerd Meijer if (AddRec->getLoop() == L) 135993175a5cSSjoerd Meijer ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE); 136093175a5cSSjoerd Meijer if (isa<SCEVCouldNotCompute>(ExitValue) || 136193175a5cSSjoerd Meijer !SE->isLoopInvariant(ExitValue, L) || 136293175a5cSSjoerd Meijer !isSafeToExpand(ExitValue, *SE)) 136393175a5cSSjoerd Meijer continue; 136493175a5cSSjoerd Meijer } 136593175a5cSSjoerd Meijer 136693175a5cSSjoerd Meijer // Computing the value outside of the loop brings no benefit if it is 136793175a5cSSjoerd Meijer // definitely used inside the loop in a way which can not be optimized 13687d572ef2SRoman Lebedev // away. Avoid doing so unless we know we have a value which computes 13697d572ef2SRoman Lebedev // the ExitValue already. TODO: This should be merged into SCEV 13707d572ef2SRoman Lebedev // expander to leverage its knowledge of existing expressions. 13717d572ef2SRoman Lebedev if (ReplaceExitValue != AlwaysRepl && !isa<SCEVConstant>(ExitValue) && 13727d572ef2SRoman Lebedev !isa<SCEVUnknown>(ExitValue) && hasHardUserWithinLoop(L, Inst)) 137393175a5cSSjoerd Meijer continue; 137493175a5cSSjoerd Meijer 1375b2df9612SRoman Lebedev // Check if expansions of this SCEV would count as being high cost. 13767d572ef2SRoman Lebedev bool HighCost = Rewriter.isHighCostExpansion( 13777d572ef2SRoman Lebedev ExitValue, L, SCEVCheapExpansionBudget, TTI, Inst); 1378b2df9612SRoman Lebedev 1379b2df9612SRoman Lebedev // Note that we must not perform expansions until after 1380b2df9612SRoman Lebedev // we query *all* the costs, because if we perform temporary expansion 1381b2df9612SRoman Lebedev // inbetween, one that we might not intend to keep, said expansion 1382b2df9612SRoman Lebedev // *may* affect cost calculation of the the next SCEV's we'll query, 1383b2df9612SRoman Lebedev // and next SCEV may errneously get smaller cost. 1384b2df9612SRoman Lebedev 1385b2df9612SRoman Lebedev // Collect all the candidate PHINodes to be rewritten. 1386b2df9612SRoman Lebedev RewritePhiSet.emplace_back(PN, i, ExitValue, Inst, HighCost); 1387b2df9612SRoman Lebedev } 1388b2df9612SRoman Lebedev } 1389b2df9612SRoman Lebedev } 1390b2df9612SRoman Lebedev 1391b2df9612SRoman Lebedev // Now that we've done preliminary filtering and billed all the SCEV's, 1392b2df9612SRoman Lebedev // we can perform the last sanity check - the expansion must be valid. 1393b2df9612SRoman Lebedev for (RewritePhi &Phi : RewritePhiSet) { 1394b2df9612SRoman Lebedev Phi.Expansion = Rewriter.expandCodeFor(Phi.ExpansionSCEV, Phi.PN->getType(), 1395b2df9612SRoman Lebedev Phi.ExpansionPoint); 139693175a5cSSjoerd Meijer 139793175a5cSSjoerd Meijer LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " 1398b2df9612SRoman Lebedev << *(Phi.Expansion) << '\n' 1399b2df9612SRoman Lebedev << " LoopVal = " << *(Phi.ExpansionPoint) << "\n"); 140093175a5cSSjoerd Meijer 1401b2df9612SRoman Lebedev // FIXME: isValidRewrite() is a hack. it should be an assert, eventually. 1402b2df9612SRoman Lebedev Phi.ValidRewrite = isValidRewrite(SE, Phi.ExpansionPoint, Phi.Expansion); 1403b2df9612SRoman Lebedev if (!Phi.ValidRewrite) { 1404b2df9612SRoman Lebedev DeadInsts.push_back(Phi.Expansion); 140593175a5cSSjoerd Meijer continue; 140693175a5cSSjoerd Meijer } 140793175a5cSSjoerd Meijer 140893175a5cSSjoerd Meijer #ifndef NDEBUG 140993175a5cSSjoerd Meijer // If we reuse an instruction from a loop which is neither L nor one of 141093175a5cSSjoerd Meijer // its containing loops, we end up breaking LCSSA form for this loop by 141193175a5cSSjoerd Meijer // creating a new use of its instruction. 1412b2df9612SRoman Lebedev if (auto *ExitInsn = dyn_cast<Instruction>(Phi.Expansion)) 141393175a5cSSjoerd Meijer if (auto *EVL = LI->getLoopFor(ExitInsn->getParent())) 141493175a5cSSjoerd Meijer if (EVL != L) 141593175a5cSSjoerd Meijer assert(EVL->contains(L) && "LCSSA breach detected!"); 141693175a5cSSjoerd Meijer #endif 1417b2df9612SRoman Lebedev } 141893175a5cSSjoerd Meijer 1419b2df9612SRoman Lebedev // TODO: after isValidRewrite() is an assertion, evaluate whether 1420b2df9612SRoman Lebedev // it is beneficial to change how we calculate high-cost: 1421b2df9612SRoman Lebedev // if we have SCEV 'A' which we know we will expand, should we calculate 1422b2df9612SRoman Lebedev // the cost of other SCEV's after expanding SCEV 'A', 1423b2df9612SRoman Lebedev // thus potentially giving cost bonus to those other SCEV's? 142493175a5cSSjoerd Meijer 142593175a5cSSjoerd Meijer bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet); 142693175a5cSSjoerd Meijer int NumReplaced = 0; 142793175a5cSSjoerd Meijer 142893175a5cSSjoerd Meijer // Transformation. 142993175a5cSSjoerd Meijer for (const RewritePhi &Phi : RewritePhiSet) { 1430b2df9612SRoman Lebedev if (!Phi.ValidRewrite) 1431b2df9612SRoman Lebedev continue; 1432b2df9612SRoman Lebedev 143393175a5cSSjoerd Meijer PHINode *PN = Phi.PN; 1434b2df9612SRoman Lebedev Value *ExitVal = Phi.Expansion; 143593175a5cSSjoerd Meijer 143693175a5cSSjoerd Meijer // Only do the rewrite when the ExitValue can be expanded cheaply. 143793175a5cSSjoerd Meijer // If LoopCanBeDel is true, rewrite exit value aggressively. 143893175a5cSSjoerd Meijer if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) { 143993175a5cSSjoerd Meijer DeadInsts.push_back(ExitVal); 144093175a5cSSjoerd Meijer continue; 144193175a5cSSjoerd Meijer } 144293175a5cSSjoerd Meijer 144393175a5cSSjoerd Meijer NumReplaced++; 144493175a5cSSjoerd Meijer Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith)); 144593175a5cSSjoerd Meijer PN->setIncomingValue(Phi.Ith, ExitVal); 144693175a5cSSjoerd Meijer 144793175a5cSSjoerd Meijer // If this instruction is dead now, delete it. Don't do it now to avoid 144893175a5cSSjoerd Meijer // invalidating iterators. 144993175a5cSSjoerd Meijer if (isInstructionTriviallyDead(Inst, TLI)) 145093175a5cSSjoerd Meijer DeadInsts.push_back(Inst); 145193175a5cSSjoerd Meijer 145293175a5cSSjoerd Meijer // Replace PN with ExitVal if that is legal and does not break LCSSA. 145393175a5cSSjoerd Meijer if (PN->getNumIncomingValues() == 1 && 145493175a5cSSjoerd Meijer LI->replacementPreservesLCSSAForm(PN, ExitVal)) { 145593175a5cSSjoerd Meijer PN->replaceAllUsesWith(ExitVal); 145693175a5cSSjoerd Meijer PN->eraseFromParent(); 145793175a5cSSjoerd Meijer } 145893175a5cSSjoerd Meijer } 145993175a5cSSjoerd Meijer 146093175a5cSSjoerd Meijer // The insertion point instruction may have been deleted; clear it out 146193175a5cSSjoerd Meijer // so that the rewriter doesn't trip over it later. 146293175a5cSSjoerd Meijer Rewriter.clearInsertPoint(); 146393175a5cSSjoerd Meijer return NumReplaced; 146493175a5cSSjoerd Meijer } 1465af7e1588SEvgeniy Brevnov 1466af7e1588SEvgeniy Brevnov /// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for 1467af7e1588SEvgeniy Brevnov /// \p OrigLoop. 1468af7e1588SEvgeniy Brevnov void llvm::setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop, 1469af7e1588SEvgeniy Brevnov Loop *RemainderLoop, uint64_t UF) { 1470af7e1588SEvgeniy Brevnov assert(UF > 0 && "Zero unrolled factor is not supported"); 1471af7e1588SEvgeniy Brevnov assert(UnrolledLoop != RemainderLoop && 1472af7e1588SEvgeniy Brevnov "Unrolled and Remainder loops are expected to distinct"); 1473af7e1588SEvgeniy Brevnov 1474af7e1588SEvgeniy Brevnov // Get number of iterations in the original scalar loop. 1475af7e1588SEvgeniy Brevnov unsigned OrigLoopInvocationWeight = 0; 1476af7e1588SEvgeniy Brevnov Optional<unsigned> OrigAverageTripCount = 1477af7e1588SEvgeniy Brevnov getLoopEstimatedTripCount(OrigLoop, &OrigLoopInvocationWeight); 1478af7e1588SEvgeniy Brevnov if (!OrigAverageTripCount) 1479af7e1588SEvgeniy Brevnov return; 1480af7e1588SEvgeniy Brevnov 1481af7e1588SEvgeniy Brevnov // Calculate number of iterations in unrolled loop. 1482af7e1588SEvgeniy Brevnov unsigned UnrolledAverageTripCount = *OrigAverageTripCount / UF; 1483af7e1588SEvgeniy Brevnov // Calculate number of iterations for remainder loop. 1484af7e1588SEvgeniy Brevnov unsigned RemainderAverageTripCount = *OrigAverageTripCount % UF; 1485af7e1588SEvgeniy Brevnov 1486af7e1588SEvgeniy Brevnov setLoopEstimatedTripCount(UnrolledLoop, UnrolledAverageTripCount, 1487af7e1588SEvgeniy Brevnov OrigLoopInvocationWeight); 1488af7e1588SEvgeniy Brevnov setLoopEstimatedTripCount(RemainderLoop, RemainderAverageTripCount, 1489af7e1588SEvgeniy Brevnov OrigLoopInvocationWeight); 1490af7e1588SEvgeniy Brevnov } 1491388de9dfSAlina Sbirlea 1492388de9dfSAlina Sbirlea /// Utility that implements appending of loops onto a worklist. 1493388de9dfSAlina Sbirlea /// Loops are added in preorder (analogous for reverse postorder for trees), 1494388de9dfSAlina Sbirlea /// and the worklist is processed LIFO. 1495388de9dfSAlina Sbirlea template <typename RangeT> 1496388de9dfSAlina Sbirlea void llvm::appendReversedLoopsToWorklist( 1497388de9dfSAlina Sbirlea RangeT &&Loops, SmallPriorityWorklist<Loop *, 4> &Worklist) { 1498388de9dfSAlina Sbirlea // We use an internal worklist to build up the preorder traversal without 1499388de9dfSAlina Sbirlea // recursion. 1500388de9dfSAlina Sbirlea SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist; 1501388de9dfSAlina Sbirlea 1502388de9dfSAlina Sbirlea // We walk the initial sequence of loops in reverse because we generally want 1503388de9dfSAlina Sbirlea // to visit defs before uses and the worklist is LIFO. 1504388de9dfSAlina Sbirlea for (Loop *RootL : Loops) { 1505388de9dfSAlina Sbirlea assert(PreOrderLoops.empty() && "Must start with an empty preorder walk."); 1506388de9dfSAlina Sbirlea assert(PreOrderWorklist.empty() && 1507388de9dfSAlina Sbirlea "Must start with an empty preorder walk worklist."); 1508388de9dfSAlina Sbirlea PreOrderWorklist.push_back(RootL); 1509388de9dfSAlina Sbirlea do { 1510388de9dfSAlina Sbirlea Loop *L = PreOrderWorklist.pop_back_val(); 1511388de9dfSAlina Sbirlea PreOrderWorklist.append(L->begin(), L->end()); 1512388de9dfSAlina Sbirlea PreOrderLoops.push_back(L); 1513388de9dfSAlina Sbirlea } while (!PreOrderWorklist.empty()); 1514388de9dfSAlina Sbirlea 1515388de9dfSAlina Sbirlea Worklist.insert(std::move(PreOrderLoops)); 1516388de9dfSAlina Sbirlea PreOrderLoops.clear(); 1517388de9dfSAlina Sbirlea } 1518388de9dfSAlina Sbirlea } 1519388de9dfSAlina Sbirlea 1520388de9dfSAlina Sbirlea template <typename RangeT> 1521388de9dfSAlina Sbirlea void llvm::appendLoopsToWorklist(RangeT &&Loops, 1522388de9dfSAlina Sbirlea SmallPriorityWorklist<Loop *, 4> &Worklist) { 1523388de9dfSAlina Sbirlea appendReversedLoopsToWorklist(reverse(Loops), Worklist); 1524388de9dfSAlina Sbirlea } 1525388de9dfSAlina Sbirlea 1526388de9dfSAlina Sbirlea template void llvm::appendLoopsToWorklist<ArrayRef<Loop *> &>( 1527388de9dfSAlina Sbirlea ArrayRef<Loop *> &Loops, SmallPriorityWorklist<Loop *, 4> &Worklist); 1528388de9dfSAlina Sbirlea 152967904db2SAlina Sbirlea template void 153067904db2SAlina Sbirlea llvm::appendLoopsToWorklist<Loop &>(Loop &L, 153167904db2SAlina Sbirlea SmallPriorityWorklist<Loop *, 4> &Worklist); 153267904db2SAlina Sbirlea 1533388de9dfSAlina Sbirlea void llvm::appendLoopsToWorklist(LoopInfo &LI, 1534388de9dfSAlina Sbirlea SmallPriorityWorklist<Loop *, 4> &Worklist) { 1535388de9dfSAlina Sbirlea appendReversedLoopsToWorklist(LI, Worklist); 1536388de9dfSAlina Sbirlea } 15373dcaf296SArkady Shlykov 15383dcaf296SArkady Shlykov Loop *llvm::cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, 15393dcaf296SArkady Shlykov LoopInfo *LI, LPPassManager *LPM) { 15403dcaf296SArkady Shlykov Loop &New = *LI->AllocateLoop(); 15413dcaf296SArkady Shlykov if (PL) 15423dcaf296SArkady Shlykov PL->addChildLoop(&New); 15433dcaf296SArkady Shlykov else 15443dcaf296SArkady Shlykov LI->addTopLevelLoop(&New); 15453dcaf296SArkady Shlykov 15463dcaf296SArkady Shlykov if (LPM) 15473dcaf296SArkady Shlykov LPM->addLoop(New); 15483dcaf296SArkady Shlykov 15493dcaf296SArkady Shlykov // Add all of the blocks in L to the new loop. 15503dcaf296SArkady Shlykov for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); 15513dcaf296SArkady Shlykov I != E; ++I) 15523dcaf296SArkady Shlykov if (LI->getLoopFor(*I) == L) 15533dcaf296SArkady Shlykov New.addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), *LI); 15543dcaf296SArkady Shlykov 15553dcaf296SArkady Shlykov // Add all of the subloops to the new loop. 15563dcaf296SArkady Shlykov for (Loop *I : *L) 15573dcaf296SArkady Shlykov cloneLoop(I, &New, VM, LI, LPM); 15583dcaf296SArkady Shlykov 15593dcaf296SArkady Shlykov return &New; 15603dcaf296SArkady Shlykov } 15618528186bSFlorian Hahn 15628528186bSFlorian Hahn /// IR Values for the lower and upper bounds of a pointer evolution. We 15638528186bSFlorian Hahn /// need to use value-handles because SCEV expansion can invalidate previously 15648528186bSFlorian Hahn /// expanded values. Thus expansion of a pointer can invalidate the bounds for 15658528186bSFlorian Hahn /// a previous one. 15668528186bSFlorian Hahn struct PointerBounds { 15678528186bSFlorian Hahn TrackingVH<Value> Start; 15688528186bSFlorian Hahn TrackingVH<Value> End; 15698528186bSFlorian Hahn }; 15708528186bSFlorian Hahn 15718528186bSFlorian Hahn /// Expand code for the lower and upper bound of the pointer group \p CG 15728528186bSFlorian Hahn /// in \p TheLoop. \return the values for the bounds. 15738528186bSFlorian Hahn static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG, 15748528186bSFlorian Hahn Loop *TheLoop, Instruction *Loc, 157528410d17SFlorian Hahn SCEVExpander &Exp) { 157628410d17SFlorian Hahn ScalarEvolution *SE = Exp.getSE(); 15778528186bSFlorian Hahn // TODO: Add helper to retrieve pointers to CG. 15788528186bSFlorian Hahn Value *Ptr = CG->RtCheck.Pointers[CG->Members[0]].PointerValue; 15798528186bSFlorian Hahn const SCEV *Sc = SE->getSCEV(Ptr); 15808528186bSFlorian Hahn 15818528186bSFlorian Hahn unsigned AS = Ptr->getType()->getPointerAddressSpace(); 15828528186bSFlorian Hahn LLVMContext &Ctx = Loc->getContext(); 15838528186bSFlorian Hahn 15848528186bSFlorian Hahn // Use this type for pointer arithmetic. 15858528186bSFlorian Hahn Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); 15868528186bSFlorian Hahn 15878528186bSFlorian Hahn if (SE->isLoopInvariant(Sc, TheLoop)) { 15888528186bSFlorian Hahn LLVM_DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" 15898528186bSFlorian Hahn << *Ptr << "\n"); 15908528186bSFlorian Hahn // Ptr could be in the loop body. If so, expand a new one at the correct 15918528186bSFlorian Hahn // location. 15928528186bSFlorian Hahn Instruction *Inst = dyn_cast<Instruction>(Ptr); 15938528186bSFlorian Hahn Value *NewPtr = (Inst && TheLoop->contains(Inst)) 15948528186bSFlorian Hahn ? Exp.expandCodeFor(Sc, PtrArithTy, Loc) 15958528186bSFlorian Hahn : Ptr; 15968528186bSFlorian Hahn // We must return a half-open range, which means incrementing Sc. 15978528186bSFlorian Hahn const SCEV *ScPlusOne = SE->getAddExpr(Sc, SE->getOne(PtrArithTy)); 15988528186bSFlorian Hahn Value *NewPtrPlusOne = Exp.expandCodeFor(ScPlusOne, PtrArithTy, Loc); 15998528186bSFlorian Hahn return {NewPtr, NewPtrPlusOne}; 16008528186bSFlorian Hahn } else { 16018528186bSFlorian Hahn Value *Start = nullptr, *End = nullptr; 16028528186bSFlorian Hahn LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); 16038528186bSFlorian Hahn Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc); 16048528186bSFlorian Hahn End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc); 16058528186bSFlorian Hahn LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High 16068528186bSFlorian Hahn << "\n"); 16078528186bSFlorian Hahn return {Start, End}; 16088528186bSFlorian Hahn } 16098528186bSFlorian Hahn } 16108528186bSFlorian Hahn 16118528186bSFlorian Hahn /// Turns a collection of checks into a collection of expanded upper and 16128528186bSFlorian Hahn /// lower bounds for both pointers in the check. 16138528186bSFlorian Hahn static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> 16148528186bSFlorian Hahn expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L, 161528410d17SFlorian Hahn Instruction *Loc, SCEVExpander &Exp) { 16168528186bSFlorian Hahn SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds; 16178528186bSFlorian Hahn 16188528186bSFlorian Hahn // Here we're relying on the SCEV Expander's cache to only emit code for the 16198528186bSFlorian Hahn // same bounds once. 16208528186bSFlorian Hahn transform(PointerChecks, std::back_inserter(ChecksWithBounds), 16218528186bSFlorian Hahn [&](const RuntimePointerCheck &Check) { 162228410d17SFlorian Hahn PointerBounds First = expandBounds(Check.first, L, Loc, Exp), 162328410d17SFlorian Hahn Second = expandBounds(Check.second, L, Loc, Exp); 16248528186bSFlorian Hahn return std::make_pair(First, Second); 16258528186bSFlorian Hahn }); 16268528186bSFlorian Hahn 16278528186bSFlorian Hahn return ChecksWithBounds; 16288528186bSFlorian Hahn } 16298528186bSFlorian Hahn 16308528186bSFlorian Hahn std::pair<Instruction *, Instruction *> llvm::addRuntimeChecks( 16318528186bSFlorian Hahn Instruction *Loc, Loop *TheLoop, 16328528186bSFlorian Hahn const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, 163328410d17SFlorian Hahn SCEVExpander &Exp) { 16348528186bSFlorian Hahn // TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible. 16358528186bSFlorian Hahn // TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible 163628410d17SFlorian Hahn auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, Exp); 16378528186bSFlorian Hahn 16388528186bSFlorian Hahn LLVMContext &Ctx = Loc->getContext(); 16398528186bSFlorian Hahn Instruction *FirstInst = nullptr; 16408528186bSFlorian Hahn IRBuilder<> ChkBuilder(Loc); 16418528186bSFlorian Hahn // Our instructions might fold to a constant. 16428528186bSFlorian Hahn Value *MemoryRuntimeCheck = nullptr; 16438528186bSFlorian Hahn 16448528186bSFlorian Hahn // FIXME: this helper is currently a duplicate of the one in 16458528186bSFlorian Hahn // LoopVectorize.cpp. 16468528186bSFlorian Hahn auto GetFirstInst = [](Instruction *FirstInst, Value *V, 16478528186bSFlorian Hahn Instruction *Loc) -> Instruction * { 16488528186bSFlorian Hahn if (FirstInst) 16498528186bSFlorian Hahn return FirstInst; 16508528186bSFlorian Hahn if (Instruction *I = dyn_cast<Instruction>(V)) 16518528186bSFlorian Hahn return I->getParent() == Loc->getParent() ? I : nullptr; 16528528186bSFlorian Hahn return nullptr; 16538528186bSFlorian Hahn }; 16548528186bSFlorian Hahn 16558528186bSFlorian Hahn for (const auto &Check : ExpandedChecks) { 16568528186bSFlorian Hahn const PointerBounds &A = Check.first, &B = Check.second; 16578528186bSFlorian Hahn // Check if two pointers (A and B) conflict where conflict is computed as: 16588528186bSFlorian Hahn // start(A) <= end(B) && start(B) <= end(A) 16598528186bSFlorian Hahn unsigned AS0 = A.Start->getType()->getPointerAddressSpace(); 16608528186bSFlorian Hahn unsigned AS1 = B.Start->getType()->getPointerAddressSpace(); 16618528186bSFlorian Hahn 16628528186bSFlorian Hahn assert((AS0 == B.End->getType()->getPointerAddressSpace()) && 16638528186bSFlorian Hahn (AS1 == A.End->getType()->getPointerAddressSpace()) && 16648528186bSFlorian Hahn "Trying to bounds check pointers with different address spaces"); 16658528186bSFlorian Hahn 16668528186bSFlorian Hahn Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0); 16678528186bSFlorian Hahn Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1); 16688528186bSFlorian Hahn 16698528186bSFlorian Hahn Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc"); 16708528186bSFlorian Hahn Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc"); 16718528186bSFlorian Hahn Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc"); 16728528186bSFlorian Hahn Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc"); 16738528186bSFlorian Hahn 16748528186bSFlorian Hahn // [A|B].Start points to the first accessed byte under base [A|B]. 16758528186bSFlorian Hahn // [A|B].End points to the last accessed byte, plus one. 16768528186bSFlorian Hahn // There is no conflict when the intervals are disjoint: 16778528186bSFlorian Hahn // NoConflict = (B.Start >= A.End) || (A.Start >= B.End) 16788528186bSFlorian Hahn // 16798528186bSFlorian Hahn // bound0 = (B.Start < A.End) 16808528186bSFlorian Hahn // bound1 = (A.Start < B.End) 16818528186bSFlorian Hahn // IsConflict = bound0 & bound1 16828528186bSFlorian Hahn Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0"); 16838528186bSFlorian Hahn FirstInst = GetFirstInst(FirstInst, Cmp0, Loc); 16848528186bSFlorian Hahn Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1"); 16858528186bSFlorian Hahn FirstInst = GetFirstInst(FirstInst, Cmp1, Loc); 16868528186bSFlorian Hahn Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); 16878528186bSFlorian Hahn FirstInst = GetFirstInst(FirstInst, IsConflict, Loc); 16888528186bSFlorian Hahn if (MemoryRuntimeCheck) { 16898528186bSFlorian Hahn IsConflict = 16908528186bSFlorian Hahn ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx"); 16918528186bSFlorian Hahn FirstInst = GetFirstInst(FirstInst, IsConflict, Loc); 16928528186bSFlorian Hahn } 16938528186bSFlorian Hahn MemoryRuntimeCheck = IsConflict; 16948528186bSFlorian Hahn } 16958528186bSFlorian Hahn 16968528186bSFlorian Hahn if (!MemoryRuntimeCheck) 16978528186bSFlorian Hahn return std::make_pair(nullptr, nullptr); 16988528186bSFlorian Hahn 16998528186bSFlorian Hahn // We have to do this trickery because the IRBuilder might fold the check to a 17008528186bSFlorian Hahn // constant expression in which case there is no Instruction anchored in a 17018528186bSFlorian Hahn // the block. 17028528186bSFlorian Hahn Instruction *Check = 17038528186bSFlorian Hahn BinaryOperator::CreateAnd(MemoryRuntimeCheck, ConstantInt::getTrue(Ctx)); 17048528186bSFlorian Hahn ChkBuilder.Insert(Check, "memcheck.conflict"); 17058528186bSFlorian Hahn FirstInst = GetFirstInst(FirstInst, Check, Loc); 17068528186bSFlorian Hahn return std::make_pair(FirstInst, Check); 17078528186bSFlorian Hahn } 1708*cfe87d4eSJingu Kang 1709*cfe87d4eSJingu Kang /// Check if the loop header has a conditional branch that is not 1710*cfe87d4eSJingu Kang /// loop-invariant, because it involves load instructions. If all paths from 1711*cfe87d4eSJingu Kang /// either the true or false successor to the header or loop exists do not 1712*cfe87d4eSJingu Kang /// modify the memory feeding the condition, perform 'partial unswitching'. That 1713*cfe87d4eSJingu Kang /// is, duplicate the instructions feeding the condition in the pre-header. Then 1714*cfe87d4eSJingu Kang /// unswitch on the duplicated condition. The condition is now known in the 1715*cfe87d4eSJingu Kang /// unswitched version for the 'invariant' path through the original loop. 1716*cfe87d4eSJingu Kang /// 1717*cfe87d4eSJingu Kang /// If the branch condition of the header is partially invariant, return a pair 1718*cfe87d4eSJingu Kang /// containing the instructions to duplicate and a boolean Constant to update 1719*cfe87d4eSJingu Kang /// the condition in the loops created for the true or false successors. 1720*cfe87d4eSJingu Kang Optional<IVConditionInfo> llvm::hasPartialIVCondition(Loop *L, 1721*cfe87d4eSJingu Kang unsigned MSSAThreshold, 1722*cfe87d4eSJingu Kang MemorySSA *MSSA, 1723*cfe87d4eSJingu Kang AAResults *AA) { 1724*cfe87d4eSJingu Kang auto *TI = dyn_cast<BranchInst>(L->getHeader()->getTerminator()); 1725*cfe87d4eSJingu Kang if (!TI || !TI->isConditional()) 1726*cfe87d4eSJingu Kang return {}; 1727*cfe87d4eSJingu Kang 1728*cfe87d4eSJingu Kang auto *CondI = dyn_cast<CmpInst>(TI->getCondition()); 1729*cfe87d4eSJingu Kang // The case with the condition outside the loop should already be handled 1730*cfe87d4eSJingu Kang // earlier. 1731*cfe87d4eSJingu Kang if (!CondI || !L->contains(CondI)) 1732*cfe87d4eSJingu Kang return {}; 1733*cfe87d4eSJingu Kang 1734*cfe87d4eSJingu Kang SmallVector<Instruction *> InstToDuplicate; 1735*cfe87d4eSJingu Kang InstToDuplicate.push_back(CondI); 1736*cfe87d4eSJingu Kang 1737*cfe87d4eSJingu Kang SmallVector<Value *, 4> WorkList; 1738*cfe87d4eSJingu Kang WorkList.append(CondI->op_begin(), CondI->op_end()); 1739*cfe87d4eSJingu Kang 1740*cfe87d4eSJingu Kang SmallVector<MemoryAccess *, 4> AccessesToCheck; 1741*cfe87d4eSJingu Kang SmallVector<MemoryLocation, 4> AccessedLocs; 1742*cfe87d4eSJingu Kang while (!WorkList.empty()) { 1743*cfe87d4eSJingu Kang Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val()); 1744*cfe87d4eSJingu Kang if (!I || !L->contains(I)) 1745*cfe87d4eSJingu Kang continue; 1746*cfe87d4eSJingu Kang 1747*cfe87d4eSJingu Kang // TODO: support additional instructions. 1748*cfe87d4eSJingu Kang if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I)) 1749*cfe87d4eSJingu Kang return {}; 1750*cfe87d4eSJingu Kang 1751*cfe87d4eSJingu Kang // Do not duplicate volatile and atomic loads. 1752*cfe87d4eSJingu Kang if (auto *LI = dyn_cast<LoadInst>(I)) 1753*cfe87d4eSJingu Kang if (LI->isVolatile() || LI->isAtomic()) 1754*cfe87d4eSJingu Kang return {}; 1755*cfe87d4eSJingu Kang 1756*cfe87d4eSJingu Kang InstToDuplicate.push_back(I); 1757*cfe87d4eSJingu Kang if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) { 1758*cfe87d4eSJingu Kang if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) { 1759*cfe87d4eSJingu Kang // Queue the defining access to check for alias checks. 1760*cfe87d4eSJingu Kang AccessesToCheck.push_back(MemUse->getDefiningAccess()); 1761*cfe87d4eSJingu Kang AccessedLocs.push_back(MemoryLocation::get(I)); 1762*cfe87d4eSJingu Kang } else { 1763*cfe87d4eSJingu Kang // MemoryDefs may clobber the location or may be atomic memory 1764*cfe87d4eSJingu Kang // operations. Bail out. 1765*cfe87d4eSJingu Kang return {}; 1766*cfe87d4eSJingu Kang } 1767*cfe87d4eSJingu Kang } 1768*cfe87d4eSJingu Kang WorkList.append(I->op_begin(), I->op_end()); 1769*cfe87d4eSJingu Kang } 1770*cfe87d4eSJingu Kang 1771*cfe87d4eSJingu Kang if (InstToDuplicate.empty()) 1772*cfe87d4eSJingu Kang return {}; 1773*cfe87d4eSJingu Kang 1774*cfe87d4eSJingu Kang SmallVector<BasicBlock *, 4> ExitingBlocks; 1775*cfe87d4eSJingu Kang L->getExitingBlocks(ExitingBlocks); 1776*cfe87d4eSJingu Kang auto HasNoClobbersOnPath = 1777*cfe87d4eSJingu Kang [L, AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate, 1778*cfe87d4eSJingu Kang MSSAThreshold](BasicBlock *Succ, BasicBlock *Header, 1779*cfe87d4eSJingu Kang SmallVector<MemoryAccess *, 4> AccessesToCheck) 1780*cfe87d4eSJingu Kang -> Optional<IVConditionInfo> { 1781*cfe87d4eSJingu Kang IVConditionInfo Info; 1782*cfe87d4eSJingu Kang // First, collect all blocks in the loop that are on a patch from Succ 1783*cfe87d4eSJingu Kang // to the header. 1784*cfe87d4eSJingu Kang SmallVector<BasicBlock *, 4> WorkList; 1785*cfe87d4eSJingu Kang WorkList.push_back(Succ); 1786*cfe87d4eSJingu Kang WorkList.push_back(Header); 1787*cfe87d4eSJingu Kang SmallPtrSet<BasicBlock *, 4> Seen; 1788*cfe87d4eSJingu Kang Seen.insert(Header); 1789*cfe87d4eSJingu Kang Info.PathIsNoop &= 1790*cfe87d4eSJingu Kang all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); }); 1791*cfe87d4eSJingu Kang 1792*cfe87d4eSJingu Kang while (!WorkList.empty()) { 1793*cfe87d4eSJingu Kang BasicBlock *Current = WorkList.pop_back_val(); 1794*cfe87d4eSJingu Kang if (!L->contains(Current)) 1795*cfe87d4eSJingu Kang continue; 1796*cfe87d4eSJingu Kang const auto &SeenIns = Seen.insert(Current); 1797*cfe87d4eSJingu Kang if (!SeenIns.second) 1798*cfe87d4eSJingu Kang continue; 1799*cfe87d4eSJingu Kang 1800*cfe87d4eSJingu Kang Info.PathIsNoop &= all_of( 1801*cfe87d4eSJingu Kang *Current, [](Instruction &I) { return !I.mayHaveSideEffects(); }); 1802*cfe87d4eSJingu Kang WorkList.append(succ_begin(Current), succ_end(Current)); 1803*cfe87d4eSJingu Kang } 1804*cfe87d4eSJingu Kang 1805*cfe87d4eSJingu Kang // Require at least 2 blocks on a path through the loop. This skips 1806*cfe87d4eSJingu Kang // paths that directly exit the loop. 1807*cfe87d4eSJingu Kang if (Seen.size() < 2) 1808*cfe87d4eSJingu Kang return {}; 1809*cfe87d4eSJingu Kang 1810*cfe87d4eSJingu Kang // Next, check if there are any MemoryDefs that are on the path through 1811*cfe87d4eSJingu Kang // the loop (in the Seen set) and they may-alias any of the locations in 1812*cfe87d4eSJingu Kang // AccessedLocs. If that is the case, they may modify the condition and 1813*cfe87d4eSJingu Kang // partial unswitching is not possible. 1814*cfe87d4eSJingu Kang SmallPtrSet<MemoryAccess *, 4> SeenAccesses; 1815*cfe87d4eSJingu Kang while (!AccessesToCheck.empty()) { 1816*cfe87d4eSJingu Kang MemoryAccess *Current = AccessesToCheck.pop_back_val(); 1817*cfe87d4eSJingu Kang auto SeenI = SeenAccesses.insert(Current); 1818*cfe87d4eSJingu Kang if (!SeenI.second || !Seen.contains(Current->getBlock())) 1819*cfe87d4eSJingu Kang continue; 1820*cfe87d4eSJingu Kang 1821*cfe87d4eSJingu Kang // Bail out if exceeded the threshold. 1822*cfe87d4eSJingu Kang if (SeenAccesses.size() >= MSSAThreshold) 1823*cfe87d4eSJingu Kang return {}; 1824*cfe87d4eSJingu Kang 1825*cfe87d4eSJingu Kang // MemoryUse are read-only accesses. 1826*cfe87d4eSJingu Kang if (isa<MemoryUse>(Current)) 1827*cfe87d4eSJingu Kang continue; 1828*cfe87d4eSJingu Kang 1829*cfe87d4eSJingu Kang // For a MemoryDef, check if is aliases any of the location feeding 1830*cfe87d4eSJingu Kang // the original condition. 1831*cfe87d4eSJingu Kang if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) { 1832*cfe87d4eSJingu Kang if (any_of(AccessedLocs, [AA, CurrentDef](MemoryLocation &Loc) { 1833*cfe87d4eSJingu Kang return isModSet( 1834*cfe87d4eSJingu Kang AA->getModRefInfo(CurrentDef->getMemoryInst(), Loc)); 1835*cfe87d4eSJingu Kang })) 1836*cfe87d4eSJingu Kang return {}; 1837*cfe87d4eSJingu Kang } 1838*cfe87d4eSJingu Kang 1839*cfe87d4eSJingu Kang for (Use &U : Current->uses()) 1840*cfe87d4eSJingu Kang AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser())); 1841*cfe87d4eSJingu Kang } 1842*cfe87d4eSJingu Kang 1843*cfe87d4eSJingu Kang // We could also allow loops with known trip counts without mustprogress, 1844*cfe87d4eSJingu Kang // but ScalarEvolution may not be available. 1845*cfe87d4eSJingu Kang Info.PathIsNoop &= 1846*cfe87d4eSJingu Kang L->getHeader()->getParent()->mustProgress() || hasMustProgress(L); 1847*cfe87d4eSJingu Kang 1848*cfe87d4eSJingu Kang // If the path is considered a no-op so far, check if it reaches a 1849*cfe87d4eSJingu Kang // single exit block without any phis. This ensures no values from the 1850*cfe87d4eSJingu Kang // loop are used outside of the loop. 1851*cfe87d4eSJingu Kang if (Info.PathIsNoop) { 1852*cfe87d4eSJingu Kang for (auto *Exiting : ExitingBlocks) { 1853*cfe87d4eSJingu Kang if (!Seen.contains(Exiting)) 1854*cfe87d4eSJingu Kang continue; 1855*cfe87d4eSJingu Kang for (auto *Succ : successors(Exiting)) { 1856*cfe87d4eSJingu Kang if (L->contains(Succ)) 1857*cfe87d4eSJingu Kang continue; 1858*cfe87d4eSJingu Kang 1859*cfe87d4eSJingu Kang Info.PathIsNoop &= llvm::empty(Succ->phis()) && 1860*cfe87d4eSJingu Kang (!Info.ExitForPath || Info.ExitForPath == Succ); 1861*cfe87d4eSJingu Kang if (!Info.PathIsNoop) 1862*cfe87d4eSJingu Kang break; 1863*cfe87d4eSJingu Kang assert((!Info.ExitForPath || Info.ExitForPath == Succ) && 1864*cfe87d4eSJingu Kang "cannot have multiple exit blocks"); 1865*cfe87d4eSJingu Kang Info.ExitForPath = Succ; 1866*cfe87d4eSJingu Kang } 1867*cfe87d4eSJingu Kang } 1868*cfe87d4eSJingu Kang } 1869*cfe87d4eSJingu Kang if (!Info.ExitForPath) 1870*cfe87d4eSJingu Kang Info.PathIsNoop = false; 1871*cfe87d4eSJingu Kang 1872*cfe87d4eSJingu Kang Info.InstToDuplicate = InstToDuplicate; 1873*cfe87d4eSJingu Kang return Info; 1874*cfe87d4eSJingu Kang }; 1875*cfe87d4eSJingu Kang 1876*cfe87d4eSJingu Kang // If we branch to the same successor, partial unswitching will not be 1877*cfe87d4eSJingu Kang // beneficial. 1878*cfe87d4eSJingu Kang if (TI->getSuccessor(0) == TI->getSuccessor(1)) 1879*cfe87d4eSJingu Kang return {}; 1880*cfe87d4eSJingu Kang 1881*cfe87d4eSJingu Kang if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L->getHeader(), 1882*cfe87d4eSJingu Kang AccessesToCheck)) { 1883*cfe87d4eSJingu Kang Info->KnownValue = ConstantInt::getTrue(TI->getContext()); 1884*cfe87d4eSJingu Kang return Info; 1885*cfe87d4eSJingu Kang } 1886*cfe87d4eSJingu Kang if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L->getHeader(), 1887*cfe87d4eSJingu Kang AccessesToCheck)) { 1888*cfe87d4eSJingu Kang Info->KnownValue = ConstantInt::getFalse(TI->getContext()); 1889*cfe87d4eSJingu Kang return Info; 1890*cfe87d4eSJingu Kang } 1891*cfe87d4eSJingu Kang 1892*cfe87d4eSJingu Kang return {}; 1893*cfe87d4eSJingu Kang } 1894