10b57cec5SDimitry Andric //===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file defines common loop utility functions.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric
130b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
145ffd83dbSDimitry Andric #include "llvm/ADT/DenseSet.h"
155ffd83dbSDimitry Andric #include "llvm/ADT/PriorityWorklist.h"
160b57cec5SDimitry Andric #include "llvm/ADT/ScopeExit.h"
175ffd83dbSDimitry Andric #include "llvm/ADT/SetVector.h"
185ffd83dbSDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
195ffd83dbSDimitry Andric #include "llvm/ADT/SmallVector.h"
200b57cec5SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h"
210b57cec5SDimitry Andric #include "llvm/Analysis/BasicAliasAnalysis.h"
220b57cec5SDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h"
230b57cec5SDimitry Andric #include "llvm/Analysis/GlobalsModRef.h"
2404eeddc0SDimitry Andric #include "llvm/Analysis/InstSimplifyFolder.h"
255ffd83dbSDimitry Andric #include "llvm/Analysis/LoopAccessAnalysis.h"
260b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
270b57cec5SDimitry Andric #include "llvm/Analysis/LoopPass.h"
288bcb0991SDimitry Andric #include "llvm/Analysis/MemorySSA.h"
290b57cec5SDimitry Andric #include "llvm/Analysis/MemorySSAUpdater.h"
300b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
310b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
320b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h"
330b57cec5SDimitry Andric #include "llvm/IR/DIBuilder.h"
340b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
350b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
360b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
375ffd83dbSDimitry Andric #include "llvm/IR/MDBuilder.h"
380b57cec5SDimitry Andric #include "llvm/IR/Module.h"
390b57cec5SDimitry Andric #include "llvm/IR/PatternMatch.h"
40bdd1243dSDimitry Andric #include "llvm/IR/ProfDataUtils.h"
410b57cec5SDimitry Andric #include "llvm/IR/ValueHandle.h"
42480093f4SDimitry Andric #include "llvm/InitializePasses.h"
430b57cec5SDimitry Andric #include "llvm/Pass.h"
440b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
450b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
465ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/Local.h"
475ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
480b57cec5SDimitry Andric
490b57cec5SDimitry Andric using namespace llvm;
500b57cec5SDimitry Andric using namespace llvm::PatternMatch;
510b57cec5SDimitry Andric
520b57cec5SDimitry Andric #define DEBUG_TYPE "loop-utils"
530b57cec5SDimitry Andric
540b57cec5SDimitry Andric static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
558bcb0991SDimitry Andric static const char *LLVMLoopDisableLICM = "llvm.licm.disable";
560b57cec5SDimitry Andric
formDedicatedExitBlocks(Loop * L,DominatorTree * DT,LoopInfo * LI,MemorySSAUpdater * MSSAU,bool PreserveLCSSA)570b57cec5SDimitry Andric bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
580b57cec5SDimitry Andric MemorySSAUpdater *MSSAU,
590b57cec5SDimitry Andric bool PreserveLCSSA) {
600b57cec5SDimitry Andric bool Changed = false;
610b57cec5SDimitry Andric
620b57cec5SDimitry Andric // We re-use a vector for the in-loop predecesosrs.
630b57cec5SDimitry Andric SmallVector<BasicBlock *, 4> InLoopPredecessors;
640b57cec5SDimitry Andric
650b57cec5SDimitry Andric auto RewriteExit = [&](BasicBlock *BB) {
660b57cec5SDimitry Andric assert(InLoopPredecessors.empty() &&
670b57cec5SDimitry Andric "Must start with an empty predecessors list!");
680b57cec5SDimitry Andric auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); });
690b57cec5SDimitry Andric
700b57cec5SDimitry Andric // See if there are any non-loop predecessors of this exit block and
710b57cec5SDimitry Andric // keep track of the in-loop predecessors.
720b57cec5SDimitry Andric bool IsDedicatedExit = true;
730b57cec5SDimitry Andric for (auto *PredBB : predecessors(BB))
740b57cec5SDimitry Andric if (L->contains(PredBB)) {
750b57cec5SDimitry Andric if (isa<IndirectBrInst>(PredBB->getTerminator()))
760b57cec5SDimitry Andric // We cannot rewrite exiting edges from an indirectbr.
770b57cec5SDimitry Andric return false;
780b57cec5SDimitry Andric
790b57cec5SDimitry Andric InLoopPredecessors.push_back(PredBB);
800b57cec5SDimitry Andric } else {
810b57cec5SDimitry Andric IsDedicatedExit = false;
820b57cec5SDimitry Andric }
830b57cec5SDimitry Andric
840b57cec5SDimitry Andric assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!");
850b57cec5SDimitry Andric
860b57cec5SDimitry Andric // Nothing to do if this is already a dedicated exit.
870b57cec5SDimitry Andric if (IsDedicatedExit)
880b57cec5SDimitry Andric return false;
890b57cec5SDimitry Andric
900b57cec5SDimitry Andric auto *NewExitBB = SplitBlockPredecessors(
910b57cec5SDimitry Andric BB, InLoopPredecessors, ".loopexit", DT, LI, MSSAU, PreserveLCSSA);
920b57cec5SDimitry Andric
930b57cec5SDimitry Andric if (!NewExitBB)
940b57cec5SDimitry Andric LLVM_DEBUG(
950b57cec5SDimitry Andric dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
960b57cec5SDimitry Andric << *L << "\n");
970b57cec5SDimitry Andric else
980b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
990b57cec5SDimitry Andric << NewExitBB->getName() << "\n");
1000b57cec5SDimitry Andric return true;
1010b57cec5SDimitry Andric };
1020b57cec5SDimitry Andric
1030b57cec5SDimitry Andric // Walk the exit blocks directly rather than building up a data structure for
1040b57cec5SDimitry Andric // them, but only visit each one once.
1050b57cec5SDimitry Andric SmallPtrSet<BasicBlock *, 4> Visited;
1060b57cec5SDimitry Andric for (auto *BB : L->blocks())
1070b57cec5SDimitry Andric for (auto *SuccBB : successors(BB)) {
1080b57cec5SDimitry Andric // We're looking for exit blocks so skip in-loop successors.
1090b57cec5SDimitry Andric if (L->contains(SuccBB))
1100b57cec5SDimitry Andric continue;
1110b57cec5SDimitry Andric
1120b57cec5SDimitry Andric // Visit each exit block exactly once.
1130b57cec5SDimitry Andric if (!Visited.insert(SuccBB).second)
1140b57cec5SDimitry Andric continue;
1150b57cec5SDimitry Andric
1160b57cec5SDimitry Andric Changed |= RewriteExit(SuccBB);
1170b57cec5SDimitry Andric }
1180b57cec5SDimitry Andric
1190b57cec5SDimitry Andric return Changed;
1200b57cec5SDimitry Andric }
1210b57cec5SDimitry Andric
1220b57cec5SDimitry Andric /// Returns the instructions that use values defined in the loop.
findDefsUsedOutsideOfLoop(Loop * L)1230b57cec5SDimitry Andric SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) {
1240b57cec5SDimitry Andric SmallVector<Instruction *, 8> UsedOutside;
1250b57cec5SDimitry Andric
1260b57cec5SDimitry Andric for (auto *Block : L->getBlocks())
1270b57cec5SDimitry Andric // FIXME: I believe that this could use copy_if if the Inst reference could
1280b57cec5SDimitry Andric // be adapted into a pointer.
1290b57cec5SDimitry Andric for (auto &Inst : *Block) {
1300b57cec5SDimitry Andric auto Users = Inst.users();
1310b57cec5SDimitry Andric if (any_of(Users, [&](User *U) {
1320b57cec5SDimitry Andric auto *Use = cast<Instruction>(U);
1330b57cec5SDimitry Andric return !L->contains(Use->getParent());
1340b57cec5SDimitry Andric }))
1350b57cec5SDimitry Andric UsedOutside.push_back(&Inst);
1360b57cec5SDimitry Andric }
1370b57cec5SDimitry Andric
1380b57cec5SDimitry Andric return UsedOutside;
1390b57cec5SDimitry Andric }
1400b57cec5SDimitry Andric
getLoopAnalysisUsage(AnalysisUsage & AU)1410b57cec5SDimitry Andric void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) {
1420b57cec5SDimitry Andric // By definition, all loop passes need the LoopInfo analysis and the
1430b57cec5SDimitry Andric // Dominator tree it depends on. Because they all participate in the loop
1440b57cec5SDimitry Andric // pass manager, they must also preserve these.
1450b57cec5SDimitry Andric AU.addRequired<DominatorTreeWrapperPass>();
1460b57cec5SDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>();
1470b57cec5SDimitry Andric AU.addRequired<LoopInfoWrapperPass>();
1480b57cec5SDimitry Andric AU.addPreserved<LoopInfoWrapperPass>();
1490b57cec5SDimitry Andric
1500b57cec5SDimitry Andric // We must also preserve LoopSimplify and LCSSA. We locally access their IDs
1510b57cec5SDimitry Andric // here because users shouldn't directly get them from this header.
1520b57cec5SDimitry Andric extern char &LoopSimplifyID;
1530b57cec5SDimitry Andric extern char &LCSSAID;
1540b57cec5SDimitry Andric AU.addRequiredID(LoopSimplifyID);
1550b57cec5SDimitry Andric AU.addPreservedID(LoopSimplifyID);
1560b57cec5SDimitry Andric AU.addRequiredID(LCSSAID);
1570b57cec5SDimitry Andric AU.addPreservedID(LCSSAID);
1580b57cec5SDimitry Andric // This is used in the LPPassManager to perform LCSSA verification on passes
1590b57cec5SDimitry Andric // which preserve lcssa form
1600b57cec5SDimitry Andric AU.addRequired<LCSSAVerificationPass>();
1610b57cec5SDimitry Andric AU.addPreserved<LCSSAVerificationPass>();
1620b57cec5SDimitry Andric
1630b57cec5SDimitry Andric // Loop passes are designed to run inside of a loop pass manager which means
1640b57cec5SDimitry Andric // that any function analyses they require must be required by the first loop
1650b57cec5SDimitry Andric // pass in the manager (so that it is computed before the loop pass manager
1660b57cec5SDimitry Andric // runs) and preserved by all loop pasess in the manager. To make this
1670b57cec5SDimitry Andric // reasonably robust, the set needed for most loop passes is maintained here.
1680b57cec5SDimitry Andric // If your loop pass requires an analysis not listed here, you will need to
1690b57cec5SDimitry Andric // carefully audit the loop pass manager nesting structure that results.
1700b57cec5SDimitry Andric AU.addRequired<AAResultsWrapperPass>();
1710b57cec5SDimitry Andric AU.addPreserved<AAResultsWrapperPass>();
1720b57cec5SDimitry Andric AU.addPreserved<BasicAAWrapperPass>();
1730b57cec5SDimitry Andric AU.addPreserved<GlobalsAAWrapperPass>();
1740b57cec5SDimitry Andric AU.addPreserved<SCEVAAWrapperPass>();
1750b57cec5SDimitry Andric AU.addRequired<ScalarEvolutionWrapperPass>();
1760b57cec5SDimitry Andric AU.addPreserved<ScalarEvolutionWrapperPass>();
1778bcb0991SDimitry Andric // FIXME: When all loop passes preserve MemorySSA, it can be required and
1788bcb0991SDimitry Andric // preserved here instead of the individual handling in each pass.
1790b57cec5SDimitry Andric }
1800b57cec5SDimitry Andric
1810b57cec5SDimitry Andric /// Manually defined generic "LoopPass" dependency initialization. This is used
1820b57cec5SDimitry Andric /// to initialize the exact set of passes from above in \c
1830b57cec5SDimitry Andric /// getLoopAnalysisUsage. It can be used within a loop pass's initialization
1840b57cec5SDimitry Andric /// with:
1850b57cec5SDimitry Andric ///
1860b57cec5SDimitry Andric /// INITIALIZE_PASS_DEPENDENCY(LoopPass)
1870b57cec5SDimitry Andric ///
1880b57cec5SDimitry Andric /// As-if "LoopPass" were a pass.
initializeLoopPassPass(PassRegistry & Registry)1890b57cec5SDimitry Andric void llvm::initializeLoopPassPass(PassRegistry &Registry) {
1900b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1910b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
1920b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
1930b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
1940b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
1950b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
1960b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
1970b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
1980b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
1998bcb0991SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
2008bcb0991SDimitry Andric }
2018bcb0991SDimitry Andric
2028bcb0991SDimitry Andric /// Create MDNode for input string.
createStringMetadata(Loop * TheLoop,StringRef Name,unsigned V)2038bcb0991SDimitry Andric static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) {
2048bcb0991SDimitry Andric LLVMContext &Context = TheLoop->getHeader()->getContext();
2058bcb0991SDimitry Andric Metadata *MDs[] = {
2068bcb0991SDimitry Andric MDString::get(Context, Name),
2078bcb0991SDimitry Andric ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))};
2088bcb0991SDimitry Andric return MDNode::get(Context, MDs);
2098bcb0991SDimitry Andric }
2108bcb0991SDimitry Andric
2118bcb0991SDimitry Andric /// Set input string into loop metadata by keeping other values intact.
2128bcb0991SDimitry Andric /// If the string is already in loop metadata update value if it is
2138bcb0991SDimitry Andric /// different.
addStringMetadataToLoop(Loop * TheLoop,const char * StringMD,unsigned V)2148bcb0991SDimitry Andric void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD,
2158bcb0991SDimitry Andric unsigned V) {
2168bcb0991SDimitry Andric SmallVector<Metadata *, 4> MDs(1);
2178bcb0991SDimitry Andric // If the loop already has metadata, retain it.
2188bcb0991SDimitry Andric MDNode *LoopID = TheLoop->getLoopID();
2198bcb0991SDimitry Andric if (LoopID) {
2208bcb0991SDimitry Andric for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
2218bcb0991SDimitry Andric MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
2228bcb0991SDimitry Andric // If it is of form key = value, try to parse it.
2238bcb0991SDimitry Andric if (Node->getNumOperands() == 2) {
2248bcb0991SDimitry Andric MDString *S = dyn_cast<MDString>(Node->getOperand(0));
2258bcb0991SDimitry Andric if (S && S->getString().equals(StringMD)) {
2268bcb0991SDimitry Andric ConstantInt *IntMD =
2278bcb0991SDimitry Andric mdconst::extract_or_null<ConstantInt>(Node->getOperand(1));
2288bcb0991SDimitry Andric if (IntMD && IntMD->getSExtValue() == V)
2298bcb0991SDimitry Andric // It is already in place. Do nothing.
2308bcb0991SDimitry Andric return;
2318bcb0991SDimitry Andric // We need to update the value, so just skip it here and it will
2328bcb0991SDimitry Andric // be added after copying other existed nodes.
2338bcb0991SDimitry Andric continue;
2348bcb0991SDimitry Andric }
2358bcb0991SDimitry Andric }
2368bcb0991SDimitry Andric MDs.push_back(Node);
2378bcb0991SDimitry Andric }
2388bcb0991SDimitry Andric }
2398bcb0991SDimitry Andric // Add new metadata.
2408bcb0991SDimitry Andric MDs.push_back(createStringMetadata(TheLoop, StringMD, V));
2418bcb0991SDimitry Andric // Replace current metadata node with new one.
2428bcb0991SDimitry Andric LLVMContext &Context = TheLoop->getHeader()->getContext();
2438bcb0991SDimitry Andric MDNode *NewLoopID = MDNode::get(Context, MDs);
2448bcb0991SDimitry Andric // Set operand 0 to refer to the loop id itself.
2458bcb0991SDimitry Andric NewLoopID->replaceOperandWith(0, NewLoopID);
2468bcb0991SDimitry Andric TheLoop->setLoopID(NewLoopID);
2470b57cec5SDimitry Andric }
2480b57cec5SDimitry Andric
249bdd1243dSDimitry Andric std::optional<ElementCount>
getOptionalElementCountLoopAttribute(const Loop * TheLoop)250fe6060f1SDimitry Andric llvm::getOptionalElementCountLoopAttribute(const Loop *TheLoop) {
251bdd1243dSDimitry Andric std::optional<int> Width =
252e8d8bef9SDimitry Andric getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width");
253e8d8bef9SDimitry Andric
25481ad6265SDimitry Andric if (Width) {
255bdd1243dSDimitry Andric std::optional<int> IsScalable = getOptionalIntLoopAttribute(
256e8d8bef9SDimitry Andric TheLoop, "llvm.loop.vectorize.scalable.enable");
25781ad6265SDimitry Andric return ElementCount::get(*Width, IsScalable.value_or(false));
258e8d8bef9SDimitry Andric }
259e8d8bef9SDimitry Andric
260bdd1243dSDimitry Andric return std::nullopt;
261e8d8bef9SDimitry Andric }
262e8d8bef9SDimitry Andric
makeFollowupLoopID(MDNode * OrigLoopID,ArrayRef<StringRef> FollowupOptions,const char * InheritOptionsExceptPrefix,bool AlwaysNew)263bdd1243dSDimitry Andric std::optional<MDNode *> llvm::makeFollowupLoopID(
2640b57cec5SDimitry Andric MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions,
2650b57cec5SDimitry Andric const char *InheritOptionsExceptPrefix, bool AlwaysNew) {
2660b57cec5SDimitry Andric if (!OrigLoopID) {
2670b57cec5SDimitry Andric if (AlwaysNew)
2680b57cec5SDimitry Andric return nullptr;
269bdd1243dSDimitry Andric return std::nullopt;
2700b57cec5SDimitry Andric }
2710b57cec5SDimitry Andric
2720b57cec5SDimitry Andric assert(OrigLoopID->getOperand(0) == OrigLoopID);
2730b57cec5SDimitry Andric
2740b57cec5SDimitry Andric bool InheritAllAttrs = !InheritOptionsExceptPrefix;
2750b57cec5SDimitry Andric bool InheritSomeAttrs =
2760b57cec5SDimitry Andric InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[0] != '\0';
2770b57cec5SDimitry Andric SmallVector<Metadata *, 8> MDs;
2780b57cec5SDimitry Andric MDs.push_back(nullptr);
2790b57cec5SDimitry Andric
2800b57cec5SDimitry Andric bool Changed = false;
2810b57cec5SDimitry Andric if (InheritAllAttrs || InheritSomeAttrs) {
282e8d8bef9SDimitry Andric for (const MDOperand &Existing : drop_begin(OrigLoopID->operands())) {
2830b57cec5SDimitry Andric MDNode *Op = cast<MDNode>(Existing.get());
2840b57cec5SDimitry Andric
2850b57cec5SDimitry Andric auto InheritThisAttribute = [InheritSomeAttrs,
2860b57cec5SDimitry Andric InheritOptionsExceptPrefix](MDNode *Op) {
2870b57cec5SDimitry Andric if (!InheritSomeAttrs)
2880b57cec5SDimitry Andric return false;
2890b57cec5SDimitry Andric
2900b57cec5SDimitry Andric // Skip malformatted attribute metadata nodes.
2910b57cec5SDimitry Andric if (Op->getNumOperands() == 0)
2920b57cec5SDimitry Andric return true;
2930b57cec5SDimitry Andric Metadata *NameMD = Op->getOperand(0).get();
2940b57cec5SDimitry Andric if (!isa<MDString>(NameMD))
2950b57cec5SDimitry Andric return true;
2960b57cec5SDimitry Andric StringRef AttrName = cast<MDString>(NameMD)->getString();
2970b57cec5SDimitry Andric
2980b57cec5SDimitry Andric // Do not inherit excluded attributes.
299*c9157d92SDimitry Andric return !AttrName.starts_with(InheritOptionsExceptPrefix);
3000b57cec5SDimitry Andric };
3010b57cec5SDimitry Andric
3020b57cec5SDimitry Andric if (InheritThisAttribute(Op))
3030b57cec5SDimitry Andric MDs.push_back(Op);
3040b57cec5SDimitry Andric else
3050b57cec5SDimitry Andric Changed = true;
3060b57cec5SDimitry Andric }
3070b57cec5SDimitry Andric } else {
3080b57cec5SDimitry Andric // Modified if we dropped at least one attribute.
3090b57cec5SDimitry Andric Changed = OrigLoopID->getNumOperands() > 1;
3100b57cec5SDimitry Andric }
3110b57cec5SDimitry Andric
3120b57cec5SDimitry Andric bool HasAnyFollowup = false;
3130b57cec5SDimitry Andric for (StringRef OptionName : FollowupOptions) {
3140b57cec5SDimitry Andric MDNode *FollowupNode = findOptionMDForLoopID(OrigLoopID, OptionName);
3150b57cec5SDimitry Andric if (!FollowupNode)
3160b57cec5SDimitry Andric continue;
3170b57cec5SDimitry Andric
3180b57cec5SDimitry Andric HasAnyFollowup = true;
319e8d8bef9SDimitry Andric for (const MDOperand &Option : drop_begin(FollowupNode->operands())) {
3200b57cec5SDimitry Andric MDs.push_back(Option.get());
3210b57cec5SDimitry Andric Changed = true;
3220b57cec5SDimitry Andric }
3230b57cec5SDimitry Andric }
3240b57cec5SDimitry Andric
3250b57cec5SDimitry Andric // Attributes of the followup loop not specified explicity, so signal to the
3260b57cec5SDimitry Andric // transformation pass to add suitable attributes.
3270b57cec5SDimitry Andric if (!AlwaysNew && !HasAnyFollowup)
328bdd1243dSDimitry Andric return std::nullopt;
3290b57cec5SDimitry Andric
3300b57cec5SDimitry Andric // If no attributes were added or remove, the previous loop Id can be reused.
3310b57cec5SDimitry Andric if (!AlwaysNew && !Changed)
3320b57cec5SDimitry Andric return OrigLoopID;
3330b57cec5SDimitry Andric
3340b57cec5SDimitry Andric // No attributes is equivalent to having no !llvm.loop metadata at all.
3350b57cec5SDimitry Andric if (MDs.size() == 1)
3360b57cec5SDimitry Andric return nullptr;
3370b57cec5SDimitry Andric
3380b57cec5SDimitry Andric // Build the new loop ID.
3390b57cec5SDimitry Andric MDTuple *FollowupLoopID = MDNode::get(OrigLoopID->getContext(), MDs);
3400b57cec5SDimitry Andric FollowupLoopID->replaceOperandWith(0, FollowupLoopID);
3410b57cec5SDimitry Andric return FollowupLoopID;
3420b57cec5SDimitry Andric }
3430b57cec5SDimitry Andric
hasDisableAllTransformsHint(const Loop * L)3440b57cec5SDimitry Andric bool llvm::hasDisableAllTransformsHint(const Loop *L) {
3450b57cec5SDimitry Andric return getBooleanLoopAttribute(L, LLVMLoopDisableNonforced);
3460b57cec5SDimitry Andric }
3470b57cec5SDimitry Andric
hasDisableLICMTransformsHint(const Loop * L)3488bcb0991SDimitry Andric bool llvm::hasDisableLICMTransformsHint(const Loop *L) {
3498bcb0991SDimitry Andric return getBooleanLoopAttribute(L, LLVMLoopDisableLICM);
3508bcb0991SDimitry Andric }
3518bcb0991SDimitry Andric
hasUnrollTransformation(const Loop * L)352fe6060f1SDimitry Andric TransformationMode llvm::hasUnrollTransformation(const Loop *L) {
3530b57cec5SDimitry Andric if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))
3540b57cec5SDimitry Andric return TM_SuppressedByUser;
3550b57cec5SDimitry Andric
356bdd1243dSDimitry Andric std::optional<int> Count =
3570b57cec5SDimitry Andric getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count");
35881ad6265SDimitry Andric if (Count)
359bdd1243dSDimitry Andric return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
3600b57cec5SDimitry Andric
3610b57cec5SDimitry Andric if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"))
3620b57cec5SDimitry Andric return TM_ForcedByUser;
3630b57cec5SDimitry Andric
3640b57cec5SDimitry Andric if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full"))
3650b57cec5SDimitry Andric return TM_ForcedByUser;
3660b57cec5SDimitry Andric
3670b57cec5SDimitry Andric if (hasDisableAllTransformsHint(L))
3680b57cec5SDimitry Andric return TM_Disable;
3690b57cec5SDimitry Andric
3700b57cec5SDimitry Andric return TM_Unspecified;
3710b57cec5SDimitry Andric }
3720b57cec5SDimitry Andric
hasUnrollAndJamTransformation(const Loop * L)373fe6060f1SDimitry Andric TransformationMode llvm::hasUnrollAndJamTransformation(const Loop *L) {
3740b57cec5SDimitry Andric if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable"))
3750b57cec5SDimitry Andric return TM_SuppressedByUser;
3760b57cec5SDimitry Andric
377bdd1243dSDimitry Andric std::optional<int> Count =
3780b57cec5SDimitry Andric getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count");
37981ad6265SDimitry Andric if (Count)
380bdd1243dSDimitry Andric return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
3810b57cec5SDimitry Andric
3820b57cec5SDimitry Andric if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable"))
3830b57cec5SDimitry Andric return TM_ForcedByUser;
3840b57cec5SDimitry Andric
3850b57cec5SDimitry Andric if (hasDisableAllTransformsHint(L))
3860b57cec5SDimitry Andric return TM_Disable;
3870b57cec5SDimitry Andric
3880b57cec5SDimitry Andric return TM_Unspecified;
3890b57cec5SDimitry Andric }
3900b57cec5SDimitry Andric
hasVectorizeTransformation(const Loop * L)391fe6060f1SDimitry Andric TransformationMode llvm::hasVectorizeTransformation(const Loop *L) {
392bdd1243dSDimitry Andric std::optional<bool> Enable =
3930b57cec5SDimitry Andric getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable");
3940b57cec5SDimitry Andric
3950b57cec5SDimitry Andric if (Enable == false)
3960b57cec5SDimitry Andric return TM_SuppressedByUser;
3970b57cec5SDimitry Andric
398bdd1243dSDimitry Andric std::optional<ElementCount> VectorizeWidth =
399e8d8bef9SDimitry Andric getOptionalElementCountLoopAttribute(L);
400bdd1243dSDimitry Andric std::optional<int> InterleaveCount =
4010b57cec5SDimitry Andric getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
4020b57cec5SDimitry Andric
4030b57cec5SDimitry Andric // 'Forcing' vector width and interleave count to one effectively disables
4040b57cec5SDimitry Andric // this tranformation.
405e8d8bef9SDimitry Andric if (Enable == true && VectorizeWidth && VectorizeWidth->isScalar() &&
406e8d8bef9SDimitry Andric InterleaveCount == 1)
4070b57cec5SDimitry Andric return TM_SuppressedByUser;
4080b57cec5SDimitry Andric
4090b57cec5SDimitry Andric if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
4100b57cec5SDimitry Andric return TM_Disable;
4110b57cec5SDimitry Andric
4120b57cec5SDimitry Andric if (Enable == true)
4130b57cec5SDimitry Andric return TM_ForcedByUser;
4140b57cec5SDimitry Andric
415e8d8bef9SDimitry Andric if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1)
4160b57cec5SDimitry Andric return TM_Disable;
4170b57cec5SDimitry Andric
418e8d8bef9SDimitry Andric if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1)
4190b57cec5SDimitry Andric return TM_Enable;
4200b57cec5SDimitry Andric
4210b57cec5SDimitry Andric if (hasDisableAllTransformsHint(L))
4220b57cec5SDimitry Andric return TM_Disable;
4230b57cec5SDimitry Andric
4240b57cec5SDimitry Andric return TM_Unspecified;
4250b57cec5SDimitry Andric }
4260b57cec5SDimitry Andric
hasDistributeTransformation(const Loop * L)427fe6060f1SDimitry Andric TransformationMode llvm::hasDistributeTransformation(const Loop *L) {
4280b57cec5SDimitry Andric if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable"))
4290b57cec5SDimitry Andric return TM_ForcedByUser;
4300b57cec5SDimitry Andric
4310b57cec5SDimitry Andric if (hasDisableAllTransformsHint(L))
4320b57cec5SDimitry Andric return TM_Disable;
4330b57cec5SDimitry Andric
4340b57cec5SDimitry Andric return TM_Unspecified;
4350b57cec5SDimitry Andric }
4360b57cec5SDimitry Andric
hasLICMVersioningTransformation(const Loop * L)437fe6060f1SDimitry Andric TransformationMode llvm::hasLICMVersioningTransformation(const Loop *L) {
4380b57cec5SDimitry Andric if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable"))
4390b57cec5SDimitry Andric return TM_SuppressedByUser;
4400b57cec5SDimitry Andric
4410b57cec5SDimitry Andric if (hasDisableAllTransformsHint(L))
4420b57cec5SDimitry Andric return TM_Disable;
4430b57cec5SDimitry Andric
4440b57cec5SDimitry Andric return TM_Unspecified;
4450b57cec5SDimitry Andric }
4460b57cec5SDimitry Andric
4470b57cec5SDimitry Andric /// Does a BFS from a given node to all of its children inside a given loop.
4480b57cec5SDimitry Andric /// The returned vector of nodes includes the starting point.
4490b57cec5SDimitry Andric SmallVector<DomTreeNode *, 16>
collectChildrenInLoop(DomTreeNode * N,const Loop * CurLoop)4500b57cec5SDimitry Andric llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) {
4510b57cec5SDimitry Andric SmallVector<DomTreeNode *, 16> Worklist;
4520b57cec5SDimitry Andric auto AddRegionToWorklist = [&](DomTreeNode *DTN) {
4530b57cec5SDimitry Andric // Only include subregions in the top level loop.
4540b57cec5SDimitry Andric BasicBlock *BB = DTN->getBlock();
4550b57cec5SDimitry Andric if (CurLoop->contains(BB))
4560b57cec5SDimitry Andric Worklist.push_back(DTN);
4570b57cec5SDimitry Andric };
4580b57cec5SDimitry Andric
4590b57cec5SDimitry Andric AddRegionToWorklist(N);
4600b57cec5SDimitry Andric
4615ffd83dbSDimitry Andric for (size_t I = 0; I < Worklist.size(); I++) {
4625ffd83dbSDimitry Andric for (DomTreeNode *Child : Worklist[I]->children())
4630b57cec5SDimitry Andric AddRegionToWorklist(Child);
4645ffd83dbSDimitry Andric }
4650b57cec5SDimitry Andric
4660b57cec5SDimitry Andric return Worklist;
4670b57cec5SDimitry Andric }
4680b57cec5SDimitry Andric
isAlmostDeadIV(PHINode * PN,BasicBlock * LatchBlock,Value * Cond)469fe013be4SDimitry Andric bool llvm::isAlmostDeadIV(PHINode *PN, BasicBlock *LatchBlock, Value *Cond) {
470fe013be4SDimitry Andric int LatchIdx = PN->getBasicBlockIndex(LatchBlock);
471fe013be4SDimitry Andric Value *IncV = PN->getIncomingValue(LatchIdx);
472fe013be4SDimitry Andric
473fe013be4SDimitry Andric for (User *U : PN->users())
474fe013be4SDimitry Andric if (U != Cond && U != IncV) return false;
475fe013be4SDimitry Andric
476fe013be4SDimitry Andric for (User *U : IncV->users())
477fe013be4SDimitry Andric if (U != Cond && U != PN) return false;
478fe013be4SDimitry Andric return true;
479fe013be4SDimitry Andric }
480fe013be4SDimitry Andric
481fe013be4SDimitry Andric
deleteDeadLoop(Loop * L,DominatorTree * DT,ScalarEvolution * SE,LoopInfo * LI,MemorySSA * MSSA)4825ffd83dbSDimitry Andric void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
4835ffd83dbSDimitry Andric LoopInfo *LI, MemorySSA *MSSA) {
4840b57cec5SDimitry Andric assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!");
4850b57cec5SDimitry Andric auto *Preheader = L->getLoopPreheader();
4860b57cec5SDimitry Andric assert(Preheader && "Preheader should exist!");
4870b57cec5SDimitry Andric
4885ffd83dbSDimitry Andric std::unique_ptr<MemorySSAUpdater> MSSAU;
4895ffd83dbSDimitry Andric if (MSSA)
4905ffd83dbSDimitry Andric MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
4915ffd83dbSDimitry Andric
4920b57cec5SDimitry Andric // Now that we know the removal is safe, remove the loop by changing the
4930b57cec5SDimitry Andric // branch from the preheader to go to the single exit block.
4940b57cec5SDimitry Andric //
4950b57cec5SDimitry Andric // Because we're deleting a large chunk of code at once, the sequence in which
4960b57cec5SDimitry Andric // we remove things is very important to avoid invalidation issues.
4970b57cec5SDimitry Andric
4980b57cec5SDimitry Andric // Tell ScalarEvolution that the loop is deleted. Do this before
4990b57cec5SDimitry Andric // deleting the loop so that ScalarEvolution can look at the loop
5000b57cec5SDimitry Andric // to determine what it needs to clean up.
501bdd1243dSDimitry Andric if (SE) {
5020b57cec5SDimitry Andric SE->forgetLoop(L);
503bdd1243dSDimitry Andric SE->forgetBlockAndLoopDispositions();
504bdd1243dSDimitry Andric }
5050b57cec5SDimitry Andric
50681ad6265SDimitry Andric Instruction *OldTerm = Preheader->getTerminator();
50781ad6265SDimitry Andric assert(!OldTerm->mayHaveSideEffects() &&
50881ad6265SDimitry Andric "Preheader must end with a side-effect-free terminator");
50981ad6265SDimitry Andric assert(OldTerm->getNumSuccessors() == 1 &&
51081ad6265SDimitry Andric "Preheader must have a single successor");
5110b57cec5SDimitry Andric // Connect the preheader to the exit block. Keep the old edge to the header
5120b57cec5SDimitry Andric // around to perform the dominator tree update in two separate steps
5130b57cec5SDimitry Andric // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge
5140b57cec5SDimitry Andric // preheader -> header.
5150b57cec5SDimitry Andric //
5160b57cec5SDimitry Andric //
5170b57cec5SDimitry Andric // 0. Preheader 1. Preheader 2. Preheader
5180b57cec5SDimitry Andric // | | | |
5190b57cec5SDimitry Andric // V | V |
5200b57cec5SDimitry Andric // Header <--\ | Header <--\ | Header <--\
5210b57cec5SDimitry Andric // | | | | | | | | | | |
5220b57cec5SDimitry Andric // | V | | | V | | | V |
5230b57cec5SDimitry Andric // | Body --/ | | Body --/ | | Body --/
5240b57cec5SDimitry Andric // V V V V V
5250b57cec5SDimitry Andric // Exit Exit Exit
5260b57cec5SDimitry Andric //
5270b57cec5SDimitry Andric // By doing this is two separate steps we can perform the dominator tree
5280b57cec5SDimitry Andric // update without using the batch update API.
5290b57cec5SDimitry Andric //
5300b57cec5SDimitry Andric // Even when the loop is never executed, we cannot remove the edge from the
5310b57cec5SDimitry Andric // source block to the exit block. Consider the case where the unexecuted loop
5320b57cec5SDimitry Andric // branches back to an outer loop. If we deleted the loop and removed the edge
5330b57cec5SDimitry Andric // coming to this inner loop, this will break the outer loop structure (by
5340b57cec5SDimitry Andric // deleting the backedge of the outer loop). If the outer loop is indeed a
5350b57cec5SDimitry Andric // non-loop, it will be deleted in a future iteration of loop deletion pass.
53681ad6265SDimitry Andric IRBuilder<> Builder(OldTerm);
537e8d8bef9SDimitry Andric
538e8d8bef9SDimitry Andric auto *ExitBlock = L->getUniqueExitBlock();
539e8d8bef9SDimitry Andric DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
540e8d8bef9SDimitry Andric if (ExitBlock) {
541e8d8bef9SDimitry Andric assert(ExitBlock && "Should have a unique exit block!");
542e8d8bef9SDimitry Andric assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
543e8d8bef9SDimitry Andric
5440b57cec5SDimitry Andric Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock);
5450b57cec5SDimitry Andric // Remove the old branch. The conditional branch becomes a new terminator.
54681ad6265SDimitry Andric OldTerm->eraseFromParent();
5470b57cec5SDimitry Andric
5480b57cec5SDimitry Andric // Rewrite phis in the exit block to get their inputs from the Preheader
5490b57cec5SDimitry Andric // instead of the exiting block.
5500b57cec5SDimitry Andric for (PHINode &P : ExitBlock->phis()) {
5510b57cec5SDimitry Andric // Set the zero'th element of Phi to be from the preheader and remove all
5520b57cec5SDimitry Andric // other incoming values. Given the loop has dedicated exits, all other
5530b57cec5SDimitry Andric // incoming values must be from the exiting blocks.
5540b57cec5SDimitry Andric int PredIndex = 0;
5550b57cec5SDimitry Andric P.setIncomingBlock(PredIndex, Preheader);
5560b57cec5SDimitry Andric // Removes all incoming values from all other exiting blocks (including
5570b57cec5SDimitry Andric // duplicate values from an exiting block).
5580b57cec5SDimitry Andric // Nuke all entries except the zero'th entry which is the preheader entry.
559*c9157d92SDimitry Andric P.removeIncomingValueIf([](unsigned Idx) { return Idx != 0; },
560*c9157d92SDimitry Andric /* DeletePHIIfEmpty */ false);
5610b57cec5SDimitry Andric
5620b57cec5SDimitry Andric assert((P.getNumIncomingValues() == 1 &&
5630b57cec5SDimitry Andric P.getIncomingBlock(PredIndex) == Preheader) &&
5640b57cec5SDimitry Andric "Should have exactly one value and that's from the preheader!");
5650b57cec5SDimitry Andric }
5660b57cec5SDimitry Andric
5675ffd83dbSDimitry Andric if (DT) {
5685ffd83dbSDimitry Andric DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}});
5695ffd83dbSDimitry Andric if (MSSA) {
570e8d8bef9SDimitry Andric MSSAU->applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}},
571e8d8bef9SDimitry Andric *DT);
5725ffd83dbSDimitry Andric if (VerifyMemorySSA)
5735ffd83dbSDimitry Andric MSSA->verifyMemorySSA();
5745ffd83dbSDimitry Andric }
5755ffd83dbSDimitry Andric }
5765ffd83dbSDimitry Andric
5770b57cec5SDimitry Andric // Disconnect the loop body by branching directly to its exit.
5780b57cec5SDimitry Andric Builder.SetInsertPoint(Preheader->getTerminator());
5790b57cec5SDimitry Andric Builder.CreateBr(ExitBlock);
5800b57cec5SDimitry Andric // Remove the old branch.
5810b57cec5SDimitry Andric Preheader->getTerminator()->eraseFromParent();
582e8d8bef9SDimitry Andric } else {
583e8d8bef9SDimitry Andric assert(L->hasNoExitBlocks() &&
584e8d8bef9SDimitry Andric "Loop should have either zero or one exit blocks.");
585e8d8bef9SDimitry Andric
58681ad6265SDimitry Andric Builder.SetInsertPoint(OldTerm);
587e8d8bef9SDimitry Andric Builder.CreateUnreachable();
588e8d8bef9SDimitry Andric Preheader->getTerminator()->eraseFromParent();
589e8d8bef9SDimitry Andric }
5900b57cec5SDimitry Andric
5910b57cec5SDimitry Andric if (DT) {
5925ffd83dbSDimitry Andric DTU.applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}});
5935ffd83dbSDimitry Andric if (MSSA) {
5945ffd83dbSDimitry Andric MSSAU->applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}},
5955ffd83dbSDimitry Andric *DT);
5965ffd83dbSDimitry Andric SmallSetVector<BasicBlock *, 8> DeadBlockSet(L->block_begin(),
5975ffd83dbSDimitry Andric L->block_end());
5985ffd83dbSDimitry Andric MSSAU->removeBlocks(DeadBlockSet);
5995ffd83dbSDimitry Andric if (VerifyMemorySSA)
6005ffd83dbSDimitry Andric MSSA->verifyMemorySSA();
6015ffd83dbSDimitry Andric }
6020b57cec5SDimitry Andric }
6030b57cec5SDimitry Andric
6040b57cec5SDimitry Andric // Use a map to unique and a vector to guarantee deterministic ordering.
605bdd1243dSDimitry Andric llvm::SmallDenseSet<DebugVariable, 4> DeadDebugSet;
6060b57cec5SDimitry Andric llvm::SmallVector<DbgVariableIntrinsic *, 4> DeadDebugInst;
607*c9157d92SDimitry Andric llvm::SmallVector<DPValue *, 4> DeadDPValues;
6080b57cec5SDimitry Andric
609e8d8bef9SDimitry Andric if (ExitBlock) {
6100b57cec5SDimitry Andric // Given LCSSA form is satisfied, we should not have users of instructions
6110b57cec5SDimitry Andric // within the dead loop outside of the loop. However, LCSSA doesn't take
6120b57cec5SDimitry Andric // unreachable uses into account. We handle them here.
6130b57cec5SDimitry Andric // We could do it after drop all references (in this case all users in the
6140b57cec5SDimitry Andric // loop will be already eliminated and we have less work to do but according
6150b57cec5SDimitry Andric // to API doc of User::dropAllReferences only valid operation after dropping
6160b57cec5SDimitry Andric // references, is deletion. So let's substitute all usages of
617fcaf7f86SDimitry Andric // instruction from the loop with poison value of corresponding type first.
6180b57cec5SDimitry Andric for (auto *Block : L->blocks())
6190b57cec5SDimitry Andric for (Instruction &I : *Block) {
620fcaf7f86SDimitry Andric auto *Poison = PoisonValue::get(I.getType());
621349cc55cSDimitry Andric for (Use &U : llvm::make_early_inc_range(I.uses())) {
6220b57cec5SDimitry Andric if (auto *Usr = dyn_cast<Instruction>(U.getUser()))
6230b57cec5SDimitry Andric if (L->contains(Usr->getParent()))
6240b57cec5SDimitry Andric continue;
6250b57cec5SDimitry Andric // If we have a DT then we can check that uses outside a loop only in
6260b57cec5SDimitry Andric // unreachable block.
6270b57cec5SDimitry Andric if (DT)
6280b57cec5SDimitry Andric assert(!DT->isReachableFromEntry(U) &&
6290b57cec5SDimitry Andric "Unexpected user in reachable block");
630fcaf7f86SDimitry Andric U.set(Poison);
6310b57cec5SDimitry Andric }
632*c9157d92SDimitry Andric
633*c9157d92SDimitry Andric // RemoveDIs: do the same as below for DPValues.
634*c9157d92SDimitry Andric if (Block->IsNewDbgInfoFormat) {
635*c9157d92SDimitry Andric for (DPValue &DPV :
636*c9157d92SDimitry Andric llvm::make_early_inc_range(I.getDbgValueRange())) {
637*c9157d92SDimitry Andric DebugVariable Key(DPV.getVariable(), DPV.getExpression(),
638*c9157d92SDimitry Andric DPV.getDebugLoc().get());
639*c9157d92SDimitry Andric if (!DeadDebugSet.insert(Key).second)
640*c9157d92SDimitry Andric continue;
641*c9157d92SDimitry Andric // Unlinks the DPV from it's container, for later insertion.
642*c9157d92SDimitry Andric DPV.removeFromParent();
643*c9157d92SDimitry Andric DeadDPValues.push_back(&DPV);
644*c9157d92SDimitry Andric }
645*c9157d92SDimitry Andric }
646*c9157d92SDimitry Andric
647*c9157d92SDimitry Andric // For one of each variable encountered, preserve a debug intrinsic (set
648*c9157d92SDimitry Andric // to Poison) and transfer it to the loop exit. This terminates any
649*c9157d92SDimitry Andric // variable locations that were set during the loop.
6500b57cec5SDimitry Andric auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I);
6510b57cec5SDimitry Andric if (!DVI)
6520b57cec5SDimitry Andric continue;
653bdd1243dSDimitry Andric if (!DeadDebugSet.insert(DebugVariable(DVI)).second)
6540b57cec5SDimitry Andric continue;
6550b57cec5SDimitry Andric DeadDebugInst.push_back(DVI);
6560b57cec5SDimitry Andric }
6570b57cec5SDimitry Andric
6580b57cec5SDimitry Andric // After the loop has been deleted all the values defined and modified
659fe013be4SDimitry Andric // inside the loop are going to be unavailable. Values computed in the
660fe013be4SDimitry Andric // loop will have been deleted, automatically causing their debug uses
661fe013be4SDimitry Andric // be be replaced with undef. Loop invariant values will still be available.
662fe013be4SDimitry Andric // Move dbg.values out the loop so that earlier location ranges are still
663fe013be4SDimitry Andric // terminated and loop invariant assignments are preserved.
664*c9157d92SDimitry Andric DIBuilder DIB(*ExitBlock->getModule());
665*c9157d92SDimitry Andric BasicBlock::iterator InsertDbgValueBefore =
666*c9157d92SDimitry Andric ExitBlock->getFirstInsertionPt();
667*c9157d92SDimitry Andric assert(InsertDbgValueBefore != ExitBlock->end() &&
6680b57cec5SDimitry Andric "There should be a non-PHI instruction in exit block, else these "
6690b57cec5SDimitry Andric "instructions will have no parent.");
670*c9157d92SDimitry Andric
671fe013be4SDimitry Andric for (auto *DVI : DeadDebugInst)
672*c9157d92SDimitry Andric DVI->moveBefore(*ExitBlock, InsertDbgValueBefore);
673*c9157d92SDimitry Andric
674*c9157d92SDimitry Andric // Due to the "head" bit in BasicBlock::iterator, we're going to insert
675*c9157d92SDimitry Andric // each DPValue right at the start of the block, wheras dbg.values would be
676*c9157d92SDimitry Andric // repeatedly inserted before the first instruction. To replicate this
677*c9157d92SDimitry Andric // behaviour, do it backwards.
678*c9157d92SDimitry Andric for (DPValue *DPV : llvm::reverse(DeadDPValues))
679*c9157d92SDimitry Andric ExitBlock->insertDPValueBefore(DPV, InsertDbgValueBefore);
680bdd1243dSDimitry Andric }
6810b57cec5SDimitry Andric
6820b57cec5SDimitry Andric // Remove the block from the reference counting scheme, so that we can
6830b57cec5SDimitry Andric // delete it freely later.
6840b57cec5SDimitry Andric for (auto *Block : L->blocks())
6850b57cec5SDimitry Andric Block->dropAllReferences();
6860b57cec5SDimitry Andric
6875ffd83dbSDimitry Andric if (MSSA && VerifyMemorySSA)
6885ffd83dbSDimitry Andric MSSA->verifyMemorySSA();
6895ffd83dbSDimitry Andric
6900b57cec5SDimitry Andric if (LI) {
6910b57cec5SDimitry Andric // Erase the instructions and the blocks without having to worry
6920b57cec5SDimitry Andric // about ordering because we already dropped the references.
6930b57cec5SDimitry Andric // NOTE: This iteration is safe because erasing the block does not remove
6940b57cec5SDimitry Andric // its entry from the loop's block list. We do that in the next section.
6955e801ac6SDimitry Andric for (BasicBlock *BB : L->blocks())
6965e801ac6SDimitry Andric BB->eraseFromParent();
6970b57cec5SDimitry Andric
6980b57cec5SDimitry Andric // Finally, the blocks from loopinfo. This has to happen late because
6990b57cec5SDimitry Andric // otherwise our loop iterators won't work.
7000b57cec5SDimitry Andric
7010b57cec5SDimitry Andric SmallPtrSet<BasicBlock *, 8> blocks;
7020b57cec5SDimitry Andric blocks.insert(L->block_begin(), L->block_end());
7030b57cec5SDimitry Andric for (BasicBlock *BB : blocks)
7040b57cec5SDimitry Andric LI->removeBlock(BB);
7050b57cec5SDimitry Andric
7060b57cec5SDimitry Andric // The last step is to update LoopInfo now that we've eliminated this loop.
707480093f4SDimitry Andric // Note: LoopInfo::erase remove the given loop and relink its subloops with
708480093f4SDimitry Andric // its parent. While removeLoop/removeChildLoop remove the given loop but
709480093f4SDimitry Andric // not relink its subloops, which is what we want.
710480093f4SDimitry Andric if (Loop *ParentLoop = L->getParentLoop()) {
7115ffd83dbSDimitry Andric Loop::iterator I = find(*ParentLoop, L);
712480093f4SDimitry Andric assert(I != ParentLoop->end() && "Couldn't find loop");
713480093f4SDimitry Andric ParentLoop->removeChildLoop(I);
714480093f4SDimitry Andric } else {
7155ffd83dbSDimitry Andric Loop::iterator I = find(*LI, L);
716480093f4SDimitry Andric assert(I != LI->end() && "Couldn't find loop");
717480093f4SDimitry Andric LI->removeLoop(I);
718480093f4SDimitry Andric }
719480093f4SDimitry Andric LI->destroy(L);
7200b57cec5SDimitry Andric }
7210b57cec5SDimitry Andric }
7220b57cec5SDimitry Andric
breakLoopBackedge(Loop * L,DominatorTree & DT,ScalarEvolution & SE,LoopInfo & LI,MemorySSA * MSSA)723e8d8bef9SDimitry Andric void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
724e8d8bef9SDimitry Andric LoopInfo &LI, MemorySSA *MSSA) {
725e8d8bef9SDimitry Andric auto *Latch = L->getLoopLatch();
726e8d8bef9SDimitry Andric assert(Latch && "multiple latches not yet supported");
727e8d8bef9SDimitry Andric auto *Header = L->getHeader();
72881ad6265SDimitry Andric Loop *OutermostLoop = L->getOutermostLoop();
729e8d8bef9SDimitry Andric
730e8d8bef9SDimitry Andric SE.forgetLoop(L);
731bdd1243dSDimitry Andric SE.forgetBlockAndLoopDispositions();
732e8d8bef9SDimitry Andric
733e8d8bef9SDimitry Andric std::unique_ptr<MemorySSAUpdater> MSSAU;
734e8d8bef9SDimitry Andric if (MSSA)
735e8d8bef9SDimitry Andric MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
736e8d8bef9SDimitry Andric
737349cc55cSDimitry Andric // Update the CFG and domtree. We chose to special case a couple of
738349cc55cSDimitry Andric // of common cases for code quality and test readability reasons.
739349cc55cSDimitry Andric [&]() -> void {
740349cc55cSDimitry Andric if (auto *BI = dyn_cast<BranchInst>(Latch->getTerminator())) {
741349cc55cSDimitry Andric if (!BI->isConditional()) {
742349cc55cSDimitry Andric DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
743349cc55cSDimitry Andric (void)changeToUnreachable(BI, /*PreserveLCSSA*/ true, &DTU,
744349cc55cSDimitry Andric MSSAU.get());
745349cc55cSDimitry Andric return;
746349cc55cSDimitry Andric }
747349cc55cSDimitry Andric
748349cc55cSDimitry Andric // Conditional latch/exit - note that latch can be shared by inner
749349cc55cSDimitry Andric // and outer loop so the other target doesn't need to an exit
750349cc55cSDimitry Andric if (L->isLoopExiting(Latch)) {
751349cc55cSDimitry Andric // TODO: Generalize ConstantFoldTerminator so that it can be used
752349cc55cSDimitry Andric // here without invalidating LCSSA or MemorySSA. (Tricky case for
753349cc55cSDimitry Andric // LCSSA: header is an exit block of a preceeding sibling loop w/o
754349cc55cSDimitry Andric // dedicated exits.)
755349cc55cSDimitry Andric const unsigned ExitIdx = L->contains(BI->getSuccessor(0)) ? 1 : 0;
756349cc55cSDimitry Andric BasicBlock *ExitBB = BI->getSuccessor(ExitIdx);
757349cc55cSDimitry Andric
758349cc55cSDimitry Andric DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
759349cc55cSDimitry Andric Header->removePredecessor(Latch, true);
760349cc55cSDimitry Andric
761349cc55cSDimitry Andric IRBuilder<> Builder(BI);
762349cc55cSDimitry Andric auto *NewBI = Builder.CreateBr(ExitBB);
763349cc55cSDimitry Andric // Transfer the metadata to the new branch instruction (minus the
764349cc55cSDimitry Andric // loop info since this is no longer a loop)
765349cc55cSDimitry Andric NewBI->copyMetadata(*BI, {LLVMContext::MD_dbg,
766349cc55cSDimitry Andric LLVMContext::MD_annotation});
767349cc55cSDimitry Andric
768349cc55cSDimitry Andric BI->eraseFromParent();
769349cc55cSDimitry Andric DTU.applyUpdates({{DominatorTree::Delete, Latch, Header}});
770349cc55cSDimitry Andric if (MSSA)
771349cc55cSDimitry Andric MSSAU->applyUpdates({{DominatorTree::Delete, Latch, Header}}, DT);
772349cc55cSDimitry Andric return;
773349cc55cSDimitry Andric }
774349cc55cSDimitry Andric }
775349cc55cSDimitry Andric
776349cc55cSDimitry Andric // General case. By splitting the backedge, and then explicitly making it
777349cc55cSDimitry Andric // unreachable we gracefully handle corner cases such as switch and invoke
778349cc55cSDimitry Andric // termiantors.
779e8d8bef9SDimitry Andric auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get());
780e8d8bef9SDimitry Andric
781e8d8bef9SDimitry Andric DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
782fe6060f1SDimitry Andric (void)changeToUnreachable(BackedgeBB->getTerminator(),
783e8d8bef9SDimitry Andric /*PreserveLCSSA*/ true, &DTU, MSSAU.get());
784349cc55cSDimitry Andric }();
785e8d8bef9SDimitry Andric
786e8d8bef9SDimitry Andric // Erase (and destroy) this loop instance. Handles relinking sub-loops
787e8d8bef9SDimitry Andric // and blocks within the loop as needed.
788e8d8bef9SDimitry Andric LI.erase(L);
789e8d8bef9SDimitry Andric
790e8d8bef9SDimitry Andric // If the loop we broke had a parent, then changeToUnreachable might have
791e8d8bef9SDimitry Andric // caused a block to be removed from the parent loop (see loop_nest_lcssa
792e8d8bef9SDimitry Andric // test case in zero-btc.ll for an example), thus changing the parent's
793e8d8bef9SDimitry Andric // exit blocks. If that happened, we need to rebuild LCSSA on the outermost
794e8d8bef9SDimitry Andric // loop which might have a had a block removed.
795e8d8bef9SDimitry Andric if (OutermostLoop != L)
796e8d8bef9SDimitry Andric formLCSSARecursively(*OutermostLoop, DT, &LI, &SE);
797e8d8bef9SDimitry Andric }
798e8d8bef9SDimitry Andric
799e8d8bef9SDimitry Andric
8000eae32dcSDimitry Andric /// Checks if \p L has an exiting latch branch. There may also be other
8010eae32dcSDimitry Andric /// exiting blocks. Returns branch instruction terminating the loop
8025ffd83dbSDimitry Andric /// latch if above check is successful, nullptr otherwise.
getExpectedExitLoopLatchBranch(Loop * L)8035ffd83dbSDimitry Andric static BranchInst *getExpectedExitLoopLatchBranch(Loop *L) {
8040b57cec5SDimitry Andric BasicBlock *Latch = L->getLoopLatch();
8050b57cec5SDimitry Andric if (!Latch)
8065ffd83dbSDimitry Andric return nullptr;
8075ffd83dbSDimitry Andric
8080b57cec5SDimitry Andric BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
8090b57cec5SDimitry Andric if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
8105ffd83dbSDimitry Andric return nullptr;
8110b57cec5SDimitry Andric
8120b57cec5SDimitry Andric assert((LatchBR->getSuccessor(0) == L->getHeader() ||
8130b57cec5SDimitry Andric LatchBR->getSuccessor(1) == L->getHeader()) &&
8140b57cec5SDimitry Andric "At least one edge out of the latch must go to the header");
8150b57cec5SDimitry Andric
8165ffd83dbSDimitry Andric return LatchBR;
8175ffd83dbSDimitry Andric }
8185ffd83dbSDimitry Andric
8190eae32dcSDimitry Andric /// Return the estimated trip count for any exiting branch which dominates
8200eae32dcSDimitry Andric /// the loop latch.
getEstimatedTripCount(BranchInst * ExitingBranch,Loop * L,uint64_t & OrigExitWeight)821bdd1243dSDimitry Andric static std::optional<uint64_t> getEstimatedTripCount(BranchInst *ExitingBranch,
822bdd1243dSDimitry Andric Loop *L,
8230eae32dcSDimitry Andric uint64_t &OrigExitWeight) {
8240eae32dcSDimitry Andric // To estimate the number of times the loop body was executed, we want to
8250eae32dcSDimitry Andric // know the number of times the backedge was taken, vs. the number of times
8260eae32dcSDimitry Andric // we exited the loop.
8270eae32dcSDimitry Andric uint64_t LoopWeight, ExitWeight;
828bdd1243dSDimitry Andric if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight))
829bdd1243dSDimitry Andric return std::nullopt;
8300eae32dcSDimitry Andric
8310eae32dcSDimitry Andric if (L->contains(ExitingBranch->getSuccessor(1)))
8320eae32dcSDimitry Andric std::swap(LoopWeight, ExitWeight);
8330eae32dcSDimitry Andric
8340eae32dcSDimitry Andric if (!ExitWeight)
8350eae32dcSDimitry Andric // Don't have a way to return predicated infinite
836bdd1243dSDimitry Andric return std::nullopt;
8370eae32dcSDimitry Andric
8380eae32dcSDimitry Andric OrigExitWeight = ExitWeight;
8390eae32dcSDimitry Andric
8400eae32dcSDimitry Andric // Estimated exit count is a ratio of the loop weight by the weight of the
8410eae32dcSDimitry Andric // edge exiting the loop, rounded to nearest.
8420eae32dcSDimitry Andric uint64_t ExitCount = llvm::divideNearest(LoopWeight, ExitWeight);
8430eae32dcSDimitry Andric // Estimated trip count is one plus estimated exit count.
8440eae32dcSDimitry Andric return ExitCount + 1;
8450eae32dcSDimitry Andric }
8460eae32dcSDimitry Andric
847bdd1243dSDimitry Andric std::optional<unsigned>
getLoopEstimatedTripCount(Loop * L,unsigned * EstimatedLoopInvocationWeight)8485ffd83dbSDimitry Andric llvm::getLoopEstimatedTripCount(Loop *L,
8495ffd83dbSDimitry Andric unsigned *EstimatedLoopInvocationWeight) {
8500eae32dcSDimitry Andric // Currently we take the estimate exit count only from the loop latch,
8510eae32dcSDimitry Andric // ignoring other exiting blocks. This can overestimate the trip count
8520eae32dcSDimitry Andric // if we exit through another exit, but can never underestimate it.
8530eae32dcSDimitry Andric // TODO: incorporate information from other exits
8540eae32dcSDimitry Andric if (BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L)) {
8550eae32dcSDimitry Andric uint64_t ExitWeight;
856bdd1243dSDimitry Andric if (std::optional<uint64_t> EstTripCount =
8570eae32dcSDimitry Andric getEstimatedTripCount(LatchBranch, L, ExitWeight)) {
8585ffd83dbSDimitry Andric if (EstimatedLoopInvocationWeight)
8590eae32dcSDimitry Andric *EstimatedLoopInvocationWeight = ExitWeight;
8600eae32dcSDimitry Andric return *EstTripCount;
8610eae32dcSDimitry Andric }
8620eae32dcSDimitry Andric }
863bdd1243dSDimitry Andric return std::nullopt;
8645ffd83dbSDimitry Andric }
8655ffd83dbSDimitry Andric
setLoopEstimatedTripCount(Loop * L,unsigned EstimatedTripCount,unsigned EstimatedloopInvocationWeight)8665ffd83dbSDimitry Andric bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount,
8675ffd83dbSDimitry Andric unsigned EstimatedloopInvocationWeight) {
8680eae32dcSDimitry Andric // At the moment, we currently support changing the estimate trip count of
8690eae32dcSDimitry Andric // the latch branch only. We could extend this API to manipulate estimated
8700eae32dcSDimitry Andric // trip counts for any exit.
8715ffd83dbSDimitry Andric BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L);
8725ffd83dbSDimitry Andric if (!LatchBranch)
8735ffd83dbSDimitry Andric return false;
8745ffd83dbSDimitry Andric
8755ffd83dbSDimitry Andric // Calculate taken and exit weights.
8765ffd83dbSDimitry Andric unsigned LatchExitWeight = 0;
8775ffd83dbSDimitry Andric unsigned BackedgeTakenWeight = 0;
8785ffd83dbSDimitry Andric
8795ffd83dbSDimitry Andric if (EstimatedTripCount > 0) {
8805ffd83dbSDimitry Andric LatchExitWeight = EstimatedloopInvocationWeight;
8815ffd83dbSDimitry Andric BackedgeTakenWeight = (EstimatedTripCount - 1) * LatchExitWeight;
8825ffd83dbSDimitry Andric }
8835ffd83dbSDimitry Andric
8845ffd83dbSDimitry Andric // Make a swap if back edge is taken when condition is "false".
8855ffd83dbSDimitry Andric if (LatchBranch->getSuccessor(0) != L->getHeader())
8865ffd83dbSDimitry Andric std::swap(BackedgeTakenWeight, LatchExitWeight);
8875ffd83dbSDimitry Andric
8885ffd83dbSDimitry Andric MDBuilder MDB(LatchBranch->getContext());
8895ffd83dbSDimitry Andric
8905ffd83dbSDimitry Andric // Set/Update profile metadata.
8915ffd83dbSDimitry Andric LatchBranch->setMetadata(
8925ffd83dbSDimitry Andric LLVMContext::MD_prof,
8935ffd83dbSDimitry Andric MDB.createBranchWeights(BackedgeTakenWeight, LatchExitWeight));
8945ffd83dbSDimitry Andric
8955ffd83dbSDimitry Andric return true;
8960b57cec5SDimitry Andric }
8970b57cec5SDimitry Andric
hasIterationCountInvariantInParent(Loop * InnerLoop,ScalarEvolution & SE)8980b57cec5SDimitry Andric bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
8990b57cec5SDimitry Andric ScalarEvolution &SE) {
9000b57cec5SDimitry Andric Loop *OuterL = InnerLoop->getParentLoop();
9010b57cec5SDimitry Andric if (!OuterL)
9020b57cec5SDimitry Andric return true;
9030b57cec5SDimitry Andric
9040b57cec5SDimitry Andric // Get the backedge taken count for the inner loop
9050b57cec5SDimitry Andric BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
9060b57cec5SDimitry Andric const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch);
9070b57cec5SDimitry Andric if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) ||
9080b57cec5SDimitry Andric !InnerLoopBECountSC->getType()->isIntegerTy())
9090b57cec5SDimitry Andric return false;
9100b57cec5SDimitry Andric
9110b57cec5SDimitry Andric // Get whether count is invariant to the outer loop
9120b57cec5SDimitry Andric ScalarEvolution::LoopDisposition LD =
9130b57cec5SDimitry Andric SE.getLoopDisposition(InnerLoopBECountSC, OuterL);
9140b57cec5SDimitry Andric if (LD != ScalarEvolution::LoopInvariant)
9150b57cec5SDimitry Andric return false;
9160b57cec5SDimitry Andric
9170b57cec5SDimitry Andric return true;
9180b57cec5SDimitry Andric }
9190b57cec5SDimitry Andric
getMinMaxReductionIntrinsicOp(RecurKind RK)920fe013be4SDimitry Andric Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(RecurKind RK) {
921fe013be4SDimitry Andric switch (RK) {
922fe013be4SDimitry Andric default:
923fe013be4SDimitry Andric llvm_unreachable("Unknown min/max recurrence kind");
924fe013be4SDimitry Andric case RecurKind::UMin:
925fe013be4SDimitry Andric return Intrinsic::umin;
926fe013be4SDimitry Andric case RecurKind::UMax:
927fe013be4SDimitry Andric return Intrinsic::umax;
928fe013be4SDimitry Andric case RecurKind::SMin:
929fe013be4SDimitry Andric return Intrinsic::smin;
930fe013be4SDimitry Andric case RecurKind::SMax:
931fe013be4SDimitry Andric return Intrinsic::smax;
932fe013be4SDimitry Andric case RecurKind::FMin:
933fe013be4SDimitry Andric return Intrinsic::minnum;
934fe013be4SDimitry Andric case RecurKind::FMax:
935fe013be4SDimitry Andric return Intrinsic::maxnum;
936fe013be4SDimitry Andric case RecurKind::FMinimum:
937fe013be4SDimitry Andric return Intrinsic::minimum;
938fe013be4SDimitry Andric case RecurKind::FMaximum:
939fe013be4SDimitry Andric return Intrinsic::maximum;
940fe013be4SDimitry Andric }
941fe013be4SDimitry Andric }
942fe013be4SDimitry Andric
getMinMaxReductionPredicate(RecurKind RK)943349cc55cSDimitry Andric CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
9440b57cec5SDimitry Andric switch (RK) {
9450b57cec5SDimitry Andric default:
9460b57cec5SDimitry Andric llvm_unreachable("Unknown min/max recurrence kind");
947e8d8bef9SDimitry Andric case RecurKind::UMin:
948349cc55cSDimitry Andric return CmpInst::ICMP_ULT;
949e8d8bef9SDimitry Andric case RecurKind::UMax:
950349cc55cSDimitry Andric return CmpInst::ICMP_UGT;
951e8d8bef9SDimitry Andric case RecurKind::SMin:
952349cc55cSDimitry Andric return CmpInst::ICMP_SLT;
953e8d8bef9SDimitry Andric case RecurKind::SMax:
954349cc55cSDimitry Andric return CmpInst::ICMP_SGT;
955e8d8bef9SDimitry Andric case RecurKind::FMin:
956349cc55cSDimitry Andric return CmpInst::FCMP_OLT;
957e8d8bef9SDimitry Andric case RecurKind::FMax:
958349cc55cSDimitry Andric return CmpInst::FCMP_OGT;
959fe013be4SDimitry Andric // We do not add FMinimum/FMaximum recurrence kind here since there is no
960fe013be4SDimitry Andric // equivalent predicate which compares signed zeroes according to the
961fe013be4SDimitry Andric // semantics of the intrinsics (llvm.minimum/maximum).
962349cc55cSDimitry Andric }
9630b57cec5SDimitry Andric }
9640b57cec5SDimitry Andric
createAnyOfOp(IRBuilderBase & Builder,Value * StartVal,RecurKind RK,Value * Left,Value * Right)965*c9157d92SDimitry Andric Value *llvm::createAnyOfOp(IRBuilderBase &Builder, Value *StartVal,
966349cc55cSDimitry Andric RecurKind RK, Value *Left, Value *Right) {
967349cc55cSDimitry Andric if (auto VTy = dyn_cast<VectorType>(Left->getType()))
968349cc55cSDimitry Andric StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal);
969349cc55cSDimitry Andric Value *Cmp =
970349cc55cSDimitry Andric Builder.CreateCmp(CmpInst::ICMP_NE, Left, StartVal, "rdx.select.cmp");
971349cc55cSDimitry Andric return Builder.CreateSelect(Cmp, Left, Right, "rdx.select");
972349cc55cSDimitry Andric }
973349cc55cSDimitry Andric
createMinMaxOp(IRBuilderBase & Builder,RecurKind RK,Value * Left,Value * Right)974349cc55cSDimitry Andric Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
975349cc55cSDimitry Andric Value *Right) {
976fe013be4SDimitry Andric Type *Ty = Left->getType();
977fe013be4SDimitry Andric if (Ty->isIntOrIntVectorTy() ||
978fe013be4SDimitry Andric (RK == RecurKind::FMinimum || RK == RecurKind::FMaximum)) {
979fe013be4SDimitry Andric // TODO: Add float minnum/maxnum support when FMF nnan is set.
980fe013be4SDimitry Andric Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RK);
981fe013be4SDimitry Andric return Builder.CreateIntrinsic(Ty, Id, {Left, Right}, nullptr,
982fe013be4SDimitry Andric "rdx.minmax");
983fe013be4SDimitry Andric }
984349cc55cSDimitry Andric CmpInst::Predicate Pred = getMinMaxReductionPredicate(RK);
985e8d8bef9SDimitry Andric Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp");
9860b57cec5SDimitry Andric Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
9870b57cec5SDimitry Andric return Select;
9880b57cec5SDimitry Andric }
9890b57cec5SDimitry Andric
9900b57cec5SDimitry Andric // Helper to generate an ordered reduction.
getOrderedReduction(IRBuilderBase & Builder,Value * Acc,Value * Src,unsigned Op,RecurKind RdxKind)991e8d8bef9SDimitry Andric Value *llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
9920eae32dcSDimitry Andric unsigned Op, RecurKind RdxKind) {
9935ffd83dbSDimitry Andric unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
9940b57cec5SDimitry Andric
9950b57cec5SDimitry Andric // Extract and apply reduction ops in ascending order:
9960b57cec5SDimitry Andric // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1]
9970b57cec5SDimitry Andric Value *Result = Acc;
9980b57cec5SDimitry Andric for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) {
9990b57cec5SDimitry Andric Value *Ext =
10000b57cec5SDimitry Andric Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx));
10010b57cec5SDimitry Andric
10020b57cec5SDimitry Andric if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
10030b57cec5SDimitry Andric Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext,
10040b57cec5SDimitry Andric "bin.rdx");
10050b57cec5SDimitry Andric } else {
1006e8d8bef9SDimitry Andric assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) &&
10070b57cec5SDimitry Andric "Invalid min/max");
1008e8d8bef9SDimitry Andric Result = createMinMaxOp(Builder, RdxKind, Result, Ext);
10090b57cec5SDimitry Andric }
10100b57cec5SDimitry Andric }
10110b57cec5SDimitry Andric
10120b57cec5SDimitry Andric return Result;
10130b57cec5SDimitry Andric }
10140b57cec5SDimitry Andric
10150b57cec5SDimitry Andric // Helper to generate a log2 shuffle reduction.
getShuffleReduction(IRBuilderBase & Builder,Value * Src,unsigned Op,RecurKind RdxKind)1016e8d8bef9SDimitry Andric Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src,
10170eae32dcSDimitry Andric unsigned Op, RecurKind RdxKind) {
10185ffd83dbSDimitry Andric unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
10190b57cec5SDimitry Andric // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
10200b57cec5SDimitry Andric // and vector ops, reducing the set of values being computed by half each
10210b57cec5SDimitry Andric // round.
10220b57cec5SDimitry Andric assert(isPowerOf2_32(VF) &&
10230b57cec5SDimitry Andric "Reduction emission only supported for pow2 vectors!");
10240eae32dcSDimitry Andric // Note: fast-math-flags flags are controlled by the builder configuration
10250eae32dcSDimitry Andric // and are assumed to apply to all generated arithmetic instructions. Other
10260eae32dcSDimitry Andric // poison generating flags (nsw/nuw/inbounds/inrange/exact) are not part
10270eae32dcSDimitry Andric // of the builder configuration, and since they're not passed explicitly,
10280eae32dcSDimitry Andric // will never be relevant here. Note that it would be generally unsound to
10290eae32dcSDimitry Andric // propagate these from an intrinsic call to the expansion anyways as we/
10300eae32dcSDimitry Andric // change the order of operations.
10310b57cec5SDimitry Andric Value *TmpVec = Src;
10325ffd83dbSDimitry Andric SmallVector<int, 32> ShuffleMask(VF);
10330b57cec5SDimitry Andric for (unsigned i = VF; i != 1; i >>= 1) {
10340b57cec5SDimitry Andric // Move the upper half of the vector to the lower half.
10350b57cec5SDimitry Andric for (unsigned j = 0; j != i / 2; ++j)
10365ffd83dbSDimitry Andric ShuffleMask[j] = i / 2 + j;
10370b57cec5SDimitry Andric
10380b57cec5SDimitry Andric // Fill the rest of the mask with undef.
10395ffd83dbSDimitry Andric std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1);
10400b57cec5SDimitry Andric
1041e8d8bef9SDimitry Andric Value *Shuf = Builder.CreateShuffleVector(TmpVec, ShuffleMask, "rdx.shuf");
10420b57cec5SDimitry Andric
10430b57cec5SDimitry Andric if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
10440b57cec5SDimitry Andric TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
10450b57cec5SDimitry Andric "bin.rdx");
10460b57cec5SDimitry Andric } else {
1047e8d8bef9SDimitry Andric assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) &&
10480b57cec5SDimitry Andric "Invalid min/max");
1049e8d8bef9SDimitry Andric TmpVec = createMinMaxOp(Builder, RdxKind, TmpVec, Shuf);
10500b57cec5SDimitry Andric }
10510b57cec5SDimitry Andric }
10520b57cec5SDimitry Andric // The result is in the first element of the vector.
10530b57cec5SDimitry Andric return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
10540b57cec5SDimitry Andric }
10550b57cec5SDimitry Andric
createAnyOfTargetReduction(IRBuilderBase & Builder,Value * Src,const RecurrenceDescriptor & Desc,PHINode * OrigPhi)1056*c9157d92SDimitry Andric Value *llvm::createAnyOfTargetReduction(IRBuilderBase &Builder, Value *Src,
1057349cc55cSDimitry Andric const RecurrenceDescriptor &Desc,
1058349cc55cSDimitry Andric PHINode *OrigPhi) {
1059*c9157d92SDimitry Andric assert(
1060*c9157d92SDimitry Andric RecurrenceDescriptor::isAnyOfRecurrenceKind(Desc.getRecurrenceKind()) &&
1061349cc55cSDimitry Andric "Unexpected reduction kind");
1062349cc55cSDimitry Andric Value *InitVal = Desc.getRecurrenceStartValue();
1063349cc55cSDimitry Andric Value *NewVal = nullptr;
1064349cc55cSDimitry Andric
1065349cc55cSDimitry Andric // First use the original phi to determine the new value we're trying to
1066349cc55cSDimitry Andric // select from in the loop.
1067349cc55cSDimitry Andric SelectInst *SI = nullptr;
1068349cc55cSDimitry Andric for (auto *U : OrigPhi->users()) {
1069349cc55cSDimitry Andric if ((SI = dyn_cast<SelectInst>(U)))
1070349cc55cSDimitry Andric break;
1071349cc55cSDimitry Andric }
1072349cc55cSDimitry Andric assert(SI && "One user of the original phi should be a select");
1073349cc55cSDimitry Andric
1074349cc55cSDimitry Andric if (SI->getTrueValue() == OrigPhi)
1075349cc55cSDimitry Andric NewVal = SI->getFalseValue();
1076349cc55cSDimitry Andric else {
1077349cc55cSDimitry Andric assert(SI->getFalseValue() == OrigPhi &&
1078349cc55cSDimitry Andric "At least one input to the select should be the original Phi");
1079349cc55cSDimitry Andric NewVal = SI->getTrueValue();
1080349cc55cSDimitry Andric }
1081349cc55cSDimitry Andric
1082349cc55cSDimitry Andric // Create a splat vector with the new value and compare this to the vector
1083349cc55cSDimitry Andric // we want to reduce.
1084349cc55cSDimitry Andric ElementCount EC = cast<VectorType>(Src->getType())->getElementCount();
1085349cc55cSDimitry Andric Value *Right = Builder.CreateVectorSplat(EC, InitVal);
1086349cc55cSDimitry Andric Value *Cmp =
1087349cc55cSDimitry Andric Builder.CreateCmp(CmpInst::ICMP_NE, Src, Right, "rdx.select.cmp");
1088349cc55cSDimitry Andric
1089349cc55cSDimitry Andric // If any predicate is true it means that we want to select the new value.
1090349cc55cSDimitry Andric Cmp = Builder.CreateOrReduce(Cmp);
1091349cc55cSDimitry Andric return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select");
1092349cc55cSDimitry Andric }
1093349cc55cSDimitry Andric
createSimpleTargetReduction(IRBuilderBase & Builder,Value * Src,RecurKind RdxKind)1094*c9157d92SDimitry Andric Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, Value *Src,
1095*c9157d92SDimitry Andric RecurKind RdxKind) {
1096e8d8bef9SDimitry Andric auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType();
1097e8d8bef9SDimitry Andric switch (RdxKind) {
1098e8d8bef9SDimitry Andric case RecurKind::Add:
1099e8d8bef9SDimitry Andric return Builder.CreateAddReduce(Src);
1100e8d8bef9SDimitry Andric case RecurKind::Mul:
1101e8d8bef9SDimitry Andric return Builder.CreateMulReduce(Src);
1102e8d8bef9SDimitry Andric case RecurKind::And:
1103e8d8bef9SDimitry Andric return Builder.CreateAndReduce(Src);
1104e8d8bef9SDimitry Andric case RecurKind::Or:
1105e8d8bef9SDimitry Andric return Builder.CreateOrReduce(Src);
1106e8d8bef9SDimitry Andric case RecurKind::Xor:
1107e8d8bef9SDimitry Andric return Builder.CreateXorReduce(Src);
11084824e7fdSDimitry Andric case RecurKind::FMulAdd:
1109e8d8bef9SDimitry Andric case RecurKind::FAdd:
1110e8d8bef9SDimitry Andric return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy),
1111e8d8bef9SDimitry Andric Src);
1112e8d8bef9SDimitry Andric case RecurKind::FMul:
1113e8d8bef9SDimitry Andric return Builder.CreateFMulReduce(ConstantFP::get(SrcVecEltTy, 1.0), Src);
1114e8d8bef9SDimitry Andric case RecurKind::SMax:
1115e8d8bef9SDimitry Andric return Builder.CreateIntMaxReduce(Src, true);
1116e8d8bef9SDimitry Andric case RecurKind::SMin:
1117e8d8bef9SDimitry Andric return Builder.CreateIntMinReduce(Src, true);
1118e8d8bef9SDimitry Andric case RecurKind::UMax:
1119e8d8bef9SDimitry Andric return Builder.CreateIntMaxReduce(Src, false);
1120e8d8bef9SDimitry Andric case RecurKind::UMin:
1121e8d8bef9SDimitry Andric return Builder.CreateIntMinReduce(Src, false);
1122e8d8bef9SDimitry Andric case RecurKind::FMax:
1123e8d8bef9SDimitry Andric return Builder.CreateFPMaxReduce(Src);
1124e8d8bef9SDimitry Andric case RecurKind::FMin:
1125e8d8bef9SDimitry Andric return Builder.CreateFPMinReduce(Src);
1126fe013be4SDimitry Andric case RecurKind::FMinimum:
1127fe013be4SDimitry Andric return Builder.CreateFPMinimumReduce(Src);
1128fe013be4SDimitry Andric case RecurKind::FMaximum:
1129fe013be4SDimitry Andric return Builder.CreateFPMaximumReduce(Src);
11300b57cec5SDimitry Andric default:
11310b57cec5SDimitry Andric llvm_unreachable("Unhandled opcode");
11320b57cec5SDimitry Andric }
11330b57cec5SDimitry Andric }
11340b57cec5SDimitry Andric
createTargetReduction(IRBuilderBase & B,const RecurrenceDescriptor & Desc,Value * Src,PHINode * OrigPhi)11355ffd83dbSDimitry Andric Value *llvm::createTargetReduction(IRBuilderBase &B,
1136349cc55cSDimitry Andric const RecurrenceDescriptor &Desc, Value *Src,
1137349cc55cSDimitry Andric PHINode *OrigPhi) {
11380b57cec5SDimitry Andric // TODO: Support in-order reductions based on the recurrence descriptor.
11390b57cec5SDimitry Andric // All ops in the reduction inherit fast-math-flags from the recurrence
11400b57cec5SDimitry Andric // descriptor.
11415ffd83dbSDimitry Andric IRBuilderBase::FastMathFlagGuard FMFGuard(B);
11420b57cec5SDimitry Andric B.setFastMathFlags(Desc.getFastMathFlags());
1143349cc55cSDimitry Andric
1144349cc55cSDimitry Andric RecurKind RK = Desc.getRecurrenceKind();
1145*c9157d92SDimitry Andric if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
1146*c9157d92SDimitry Andric return createAnyOfTargetReduction(B, Src, Desc, OrigPhi);
1147349cc55cSDimitry Andric
1148*c9157d92SDimitry Andric return createSimpleTargetReduction(B, Src, RK);
11490b57cec5SDimitry Andric }
11500b57cec5SDimitry Andric
createOrderedReduction(IRBuilderBase & B,const RecurrenceDescriptor & Desc,Value * Src,Value * Start)1151fe6060f1SDimitry Andric Value *llvm::createOrderedReduction(IRBuilderBase &B,
1152fe6060f1SDimitry Andric const RecurrenceDescriptor &Desc,
1153fe6060f1SDimitry Andric Value *Src, Value *Start) {
11544824e7fdSDimitry Andric assert((Desc.getRecurrenceKind() == RecurKind::FAdd ||
11554824e7fdSDimitry Andric Desc.getRecurrenceKind() == RecurKind::FMulAdd) &&
1156fe6060f1SDimitry Andric "Unexpected reduction kind");
1157fe6060f1SDimitry Andric assert(Src->getType()->isVectorTy() && "Expected a vector type");
1158fe6060f1SDimitry Andric assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
1159fe6060f1SDimitry Andric
1160fe6060f1SDimitry Andric return B.CreateFAddReduce(Start, Src);
1161fe6060f1SDimitry Andric }
1162fe6060f1SDimitry Andric
propagateIRFlags(Value * I,ArrayRef<Value * > VL,Value * OpValue,bool IncludeWrapFlags)116381ad6265SDimitry Andric void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue,
116481ad6265SDimitry Andric bool IncludeWrapFlags) {
11650b57cec5SDimitry Andric auto *VecOp = dyn_cast<Instruction>(I);
11660b57cec5SDimitry Andric if (!VecOp)
11670b57cec5SDimitry Andric return;
11680b57cec5SDimitry Andric auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0])
11690b57cec5SDimitry Andric : dyn_cast<Instruction>(OpValue);
11700b57cec5SDimitry Andric if (!Intersection)
11710b57cec5SDimitry Andric return;
11720b57cec5SDimitry Andric const unsigned Opcode = Intersection->getOpcode();
117381ad6265SDimitry Andric VecOp->copyIRFlags(Intersection, IncludeWrapFlags);
11740b57cec5SDimitry Andric for (auto *V : VL) {
11750b57cec5SDimitry Andric auto *Instr = dyn_cast<Instruction>(V);
11760b57cec5SDimitry Andric if (!Instr)
11770b57cec5SDimitry Andric continue;
11780b57cec5SDimitry Andric if (OpValue == nullptr || Opcode == Instr->getOpcode())
11790b57cec5SDimitry Andric VecOp->andIRFlags(V);
11800b57cec5SDimitry Andric }
11810b57cec5SDimitry Andric }
11820b57cec5SDimitry Andric
isKnownNegativeInLoop(const SCEV * S,const Loop * L,ScalarEvolution & SE)11830b57cec5SDimitry Andric bool llvm::isKnownNegativeInLoop(const SCEV *S, const Loop *L,
11840b57cec5SDimitry Andric ScalarEvolution &SE) {
11850b57cec5SDimitry Andric const SCEV *Zero = SE.getZero(S->getType());
11860b57cec5SDimitry Andric return SE.isAvailableAtLoopEntry(S, L) &&
11870b57cec5SDimitry Andric SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, S, Zero);
11880b57cec5SDimitry Andric }
11890b57cec5SDimitry Andric
isKnownNonNegativeInLoop(const SCEV * S,const Loop * L,ScalarEvolution & SE)11900b57cec5SDimitry Andric bool llvm::isKnownNonNegativeInLoop(const SCEV *S, const Loop *L,
11910b57cec5SDimitry Andric ScalarEvolution &SE) {
11920b57cec5SDimitry Andric const SCEV *Zero = SE.getZero(S->getType());
11930b57cec5SDimitry Andric return SE.isAvailableAtLoopEntry(S, L) &&
11940b57cec5SDimitry Andric SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, S, Zero);
11950b57cec5SDimitry Andric }
11960b57cec5SDimitry Andric
isKnownPositiveInLoop(const SCEV * S,const Loop * L,ScalarEvolution & SE)1197fe013be4SDimitry Andric bool llvm::isKnownPositiveInLoop(const SCEV *S, const Loop *L,
1198fe013be4SDimitry Andric ScalarEvolution &SE) {
1199fe013be4SDimitry Andric const SCEV *Zero = SE.getZero(S->getType());
1200fe013be4SDimitry Andric return SE.isAvailableAtLoopEntry(S, L) &&
1201fe013be4SDimitry Andric SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, S, Zero);
1202fe013be4SDimitry Andric }
1203fe013be4SDimitry Andric
isKnownNonPositiveInLoop(const SCEV * S,const Loop * L,ScalarEvolution & SE)1204fe013be4SDimitry Andric bool llvm::isKnownNonPositiveInLoop(const SCEV *S, const Loop *L,
1205fe013be4SDimitry Andric ScalarEvolution &SE) {
1206fe013be4SDimitry Andric const SCEV *Zero = SE.getZero(S->getType());
1207fe013be4SDimitry Andric return SE.isAvailableAtLoopEntry(S, L) &&
1208fe013be4SDimitry Andric SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLE, S, Zero);
1209fe013be4SDimitry Andric }
1210fe013be4SDimitry Andric
cannotBeMinInLoop(const SCEV * S,const Loop * L,ScalarEvolution & SE,bool Signed)12110b57cec5SDimitry Andric bool llvm::cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
12120b57cec5SDimitry Andric bool Signed) {
12130b57cec5SDimitry Andric unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
12140b57cec5SDimitry Andric APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) :
12150b57cec5SDimitry Andric APInt::getMinValue(BitWidth);
12160b57cec5SDimitry Andric auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
12170b57cec5SDimitry Andric return SE.isAvailableAtLoopEntry(S, L) &&
12180b57cec5SDimitry Andric SE.isLoopEntryGuardedByCond(L, Predicate, S,
12190b57cec5SDimitry Andric SE.getConstant(Min));
12200b57cec5SDimitry Andric }
12210b57cec5SDimitry Andric
cannotBeMaxInLoop(const SCEV * S,const Loop * L,ScalarEvolution & SE,bool Signed)12220b57cec5SDimitry Andric bool llvm::cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
12230b57cec5SDimitry Andric bool Signed) {
12240b57cec5SDimitry Andric unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
12250b57cec5SDimitry Andric APInt Max = Signed ? APInt::getSignedMaxValue(BitWidth) :
12260b57cec5SDimitry Andric APInt::getMaxValue(BitWidth);
12270b57cec5SDimitry Andric auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
12280b57cec5SDimitry Andric return SE.isAvailableAtLoopEntry(S, L) &&
12290b57cec5SDimitry Andric SE.isLoopEntryGuardedByCond(L, Predicate, S,
12300b57cec5SDimitry Andric SE.getConstant(Max));
12310b57cec5SDimitry Andric }
12325ffd83dbSDimitry Andric
12335ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
12345ffd83dbSDimitry Andric // rewriteLoopExitValues - Optimize IV users outside the loop.
12355ffd83dbSDimitry Andric // As a side effect, reduces the amount of IV processing within the loop.
12365ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
12375ffd83dbSDimitry Andric
hasHardUserWithinLoop(const Loop * L,const Instruction * I)12385ffd83dbSDimitry Andric static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) {
12395ffd83dbSDimitry Andric SmallPtrSet<const Instruction *, 8> Visited;
12405ffd83dbSDimitry Andric SmallVector<const Instruction *, 8> WorkList;
12415ffd83dbSDimitry Andric Visited.insert(I);
12425ffd83dbSDimitry Andric WorkList.push_back(I);
12435ffd83dbSDimitry Andric while (!WorkList.empty()) {
12445ffd83dbSDimitry Andric const Instruction *Curr = WorkList.pop_back_val();
12455ffd83dbSDimitry Andric // This use is outside the loop, nothing to do.
12465ffd83dbSDimitry Andric if (!L->contains(Curr))
12475ffd83dbSDimitry Andric continue;
12485ffd83dbSDimitry Andric // Do we assume it is a "hard" use which will not be eliminated easily?
12495ffd83dbSDimitry Andric if (Curr->mayHaveSideEffects())
12505ffd83dbSDimitry Andric return true;
12515ffd83dbSDimitry Andric // Otherwise, add all its users to worklist.
1252bdd1243dSDimitry Andric for (const auto *U : Curr->users()) {
12535ffd83dbSDimitry Andric auto *UI = cast<Instruction>(U);
12545ffd83dbSDimitry Andric if (Visited.insert(UI).second)
12555ffd83dbSDimitry Andric WorkList.push_back(UI);
12565ffd83dbSDimitry Andric }
12575ffd83dbSDimitry Andric }
12585ffd83dbSDimitry Andric return false;
12595ffd83dbSDimitry Andric }
12605ffd83dbSDimitry Andric
12615ffd83dbSDimitry Andric // Collect information about PHI nodes which can be transformed in
12625ffd83dbSDimitry Andric // rewriteLoopExitValues.
12635ffd83dbSDimitry Andric struct RewritePhi {
12645ffd83dbSDimitry Andric PHINode *PN; // For which PHI node is this replacement?
12655ffd83dbSDimitry Andric unsigned Ith; // For which incoming value?
12665ffd83dbSDimitry Andric const SCEV *ExpansionSCEV; // The SCEV of the incoming value we are rewriting.
12675ffd83dbSDimitry Andric Instruction *ExpansionPoint; // Where we'd like to expand that SCEV?
12685ffd83dbSDimitry Andric bool HighCost; // Is this expansion a high-cost?
12695ffd83dbSDimitry Andric
RewritePhiRewritePhi12705ffd83dbSDimitry Andric RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt,
12715ffd83dbSDimitry Andric bool H)
12725ffd83dbSDimitry Andric : PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt),
12735ffd83dbSDimitry Andric HighCost(H) {}
12745ffd83dbSDimitry Andric };
12755ffd83dbSDimitry Andric
12765ffd83dbSDimitry Andric // Check whether it is possible to delete the loop after rewriting exit
12775ffd83dbSDimitry Andric // value. If it is possible, ignore ReplaceExitValue and do rewriting
12785ffd83dbSDimitry Andric // aggressively.
canLoopBeDeleted(Loop * L,SmallVector<RewritePhi,8> & RewritePhiSet)12795ffd83dbSDimitry Andric static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
12805ffd83dbSDimitry Andric BasicBlock *Preheader = L->getLoopPreheader();
12815ffd83dbSDimitry Andric // If there is no preheader, the loop will not be deleted.
12825ffd83dbSDimitry Andric if (!Preheader)
12835ffd83dbSDimitry Andric return false;
12845ffd83dbSDimitry Andric
12855ffd83dbSDimitry Andric // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
12865ffd83dbSDimitry Andric // We obviate multiple ExitingBlocks case for simplicity.
12875ffd83dbSDimitry Andric // TODO: If we see testcase with multiple ExitingBlocks can be deleted
12885ffd83dbSDimitry Andric // after exit value rewriting, we can enhance the logic here.
12895ffd83dbSDimitry Andric SmallVector<BasicBlock *, 4> ExitingBlocks;
12905ffd83dbSDimitry Andric L->getExitingBlocks(ExitingBlocks);
12915ffd83dbSDimitry Andric SmallVector<BasicBlock *, 8> ExitBlocks;
12925ffd83dbSDimitry Andric L->getUniqueExitBlocks(ExitBlocks);
12935ffd83dbSDimitry Andric if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
12945ffd83dbSDimitry Andric return false;
12955ffd83dbSDimitry Andric
12965ffd83dbSDimitry Andric BasicBlock *ExitBlock = ExitBlocks[0];
12975ffd83dbSDimitry Andric BasicBlock::iterator BI = ExitBlock->begin();
12985ffd83dbSDimitry Andric while (PHINode *P = dyn_cast<PHINode>(BI)) {
12995ffd83dbSDimitry Andric Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
13005ffd83dbSDimitry Andric
13015ffd83dbSDimitry Andric // If the Incoming value of P is found in RewritePhiSet, we know it
13025ffd83dbSDimitry Andric // could be rewritten to use a loop invariant value in transformation
13035ffd83dbSDimitry Andric // phase later. Skip it in the loop invariant check below.
13045ffd83dbSDimitry Andric bool found = false;
13055ffd83dbSDimitry Andric for (const RewritePhi &Phi : RewritePhiSet) {
13065ffd83dbSDimitry Andric unsigned i = Phi.Ith;
13075ffd83dbSDimitry Andric if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
13085ffd83dbSDimitry Andric found = true;
13095ffd83dbSDimitry Andric break;
13105ffd83dbSDimitry Andric }
13115ffd83dbSDimitry Andric }
13125ffd83dbSDimitry Andric
13135ffd83dbSDimitry Andric Instruction *I;
13145ffd83dbSDimitry Andric if (!found && (I = dyn_cast<Instruction>(Incoming)))
13155ffd83dbSDimitry Andric if (!L->hasLoopInvariantOperands(I))
13165ffd83dbSDimitry Andric return false;
13175ffd83dbSDimitry Andric
13185ffd83dbSDimitry Andric ++BI;
13195ffd83dbSDimitry Andric }
13205ffd83dbSDimitry Andric
13215ffd83dbSDimitry Andric for (auto *BB : L->blocks())
13225ffd83dbSDimitry Andric if (llvm::any_of(*BB, [](Instruction &I) {
13235ffd83dbSDimitry Andric return I.mayHaveSideEffects();
13245ffd83dbSDimitry Andric }))
13255ffd83dbSDimitry Andric return false;
13265ffd83dbSDimitry Andric
13275ffd83dbSDimitry Andric return true;
13285ffd83dbSDimitry Andric }
13295ffd83dbSDimitry Andric
1330753f127fSDimitry Andric /// Checks if it is safe to call InductionDescriptor::isInductionPHI for \p Phi,
1331753f127fSDimitry Andric /// and returns true if this Phi is an induction phi in the loop. When
1332753f127fSDimitry Andric /// isInductionPHI returns true, \p ID will be also be set by isInductionPHI.
checkIsIndPhi(PHINode * Phi,Loop * L,ScalarEvolution * SE,InductionDescriptor & ID)1333753f127fSDimitry Andric static bool checkIsIndPhi(PHINode *Phi, Loop *L, ScalarEvolution *SE,
1334753f127fSDimitry Andric InductionDescriptor &ID) {
1335753f127fSDimitry Andric if (!Phi)
1336753f127fSDimitry Andric return false;
1337753f127fSDimitry Andric if (!L->getLoopPreheader())
1338753f127fSDimitry Andric return false;
1339753f127fSDimitry Andric if (Phi->getParent() != L->getHeader())
1340753f127fSDimitry Andric return false;
1341753f127fSDimitry Andric return InductionDescriptor::isInductionPHI(Phi, L, SE, ID);
1342753f127fSDimitry Andric }
1343753f127fSDimitry Andric
rewriteLoopExitValues(Loop * L,LoopInfo * LI,TargetLibraryInfo * TLI,ScalarEvolution * SE,const TargetTransformInfo * TTI,SCEVExpander & Rewriter,DominatorTree * DT,ReplaceExitVal ReplaceExitValue,SmallVector<WeakTrackingVH,16> & DeadInsts)13445ffd83dbSDimitry Andric int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
13455ffd83dbSDimitry Andric ScalarEvolution *SE,
13465ffd83dbSDimitry Andric const TargetTransformInfo *TTI,
13475ffd83dbSDimitry Andric SCEVExpander &Rewriter, DominatorTree *DT,
13485ffd83dbSDimitry Andric ReplaceExitVal ReplaceExitValue,
13495ffd83dbSDimitry Andric SmallVector<WeakTrackingVH, 16> &DeadInsts) {
13505ffd83dbSDimitry Andric // Check a pre-condition.
13515ffd83dbSDimitry Andric assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
13525ffd83dbSDimitry Andric "Indvars did not preserve LCSSA!");
13535ffd83dbSDimitry Andric
13545ffd83dbSDimitry Andric SmallVector<BasicBlock*, 8> ExitBlocks;
13555ffd83dbSDimitry Andric L->getUniqueExitBlocks(ExitBlocks);
13565ffd83dbSDimitry Andric
13575ffd83dbSDimitry Andric SmallVector<RewritePhi, 8> RewritePhiSet;
13585ffd83dbSDimitry Andric // Find all values that are computed inside the loop, but used outside of it.
13595ffd83dbSDimitry Andric // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
13605ffd83dbSDimitry Andric // the exit blocks of the loop to find them.
13615ffd83dbSDimitry Andric for (BasicBlock *ExitBB : ExitBlocks) {
13625ffd83dbSDimitry Andric // If there are no PHI nodes in this exit block, then no values defined
13635ffd83dbSDimitry Andric // inside the loop are used on this path, skip it.
13645ffd83dbSDimitry Andric PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
13655ffd83dbSDimitry Andric if (!PN) continue;
13665ffd83dbSDimitry Andric
13675ffd83dbSDimitry Andric unsigned NumPreds = PN->getNumIncomingValues();
13685ffd83dbSDimitry Andric
13695ffd83dbSDimitry Andric // Iterate over all of the PHI nodes.
13705ffd83dbSDimitry Andric BasicBlock::iterator BBI = ExitBB->begin();
13715ffd83dbSDimitry Andric while ((PN = dyn_cast<PHINode>(BBI++))) {
13725ffd83dbSDimitry Andric if (PN->use_empty())
13735ffd83dbSDimitry Andric continue; // dead use, don't replace it
13745ffd83dbSDimitry Andric
13755ffd83dbSDimitry Andric if (!SE->isSCEVable(PN->getType()))
13765ffd83dbSDimitry Andric continue;
13775ffd83dbSDimitry Andric
13785ffd83dbSDimitry Andric // Iterate over all of the values in all the PHI nodes.
13795ffd83dbSDimitry Andric for (unsigned i = 0; i != NumPreds; ++i) {
13805ffd83dbSDimitry Andric // If the value being merged in is not integer or is not defined
13815ffd83dbSDimitry Andric // in the loop, skip it.
13825ffd83dbSDimitry Andric Value *InVal = PN->getIncomingValue(i);
13835ffd83dbSDimitry Andric if (!isa<Instruction>(InVal))
13845ffd83dbSDimitry Andric continue;
13855ffd83dbSDimitry Andric
13865ffd83dbSDimitry Andric // If this pred is for a subloop, not L itself, skip it.
13875ffd83dbSDimitry Andric if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
13885ffd83dbSDimitry Andric continue; // The Block is in a subloop, skip it.
13895ffd83dbSDimitry Andric
13905ffd83dbSDimitry Andric // Check that InVal is defined in the loop.
13915ffd83dbSDimitry Andric Instruction *Inst = cast<Instruction>(InVal);
13925ffd83dbSDimitry Andric if (!L->contains(Inst))
13935ffd83dbSDimitry Andric continue;
13945ffd83dbSDimitry Andric
1395753f127fSDimitry Andric // Find exit values which are induction variables in the loop, and are
1396753f127fSDimitry Andric // unused in the loop, with the only use being the exit block PhiNode,
1397753f127fSDimitry Andric // and the induction variable update binary operator.
1398753f127fSDimitry Andric // The exit value can be replaced with the final value when it is cheap
1399753f127fSDimitry Andric // to do so.
1400753f127fSDimitry Andric if (ReplaceExitValue == UnusedIndVarInLoop) {
1401753f127fSDimitry Andric InductionDescriptor ID;
1402753f127fSDimitry Andric PHINode *IndPhi = dyn_cast<PHINode>(Inst);
1403753f127fSDimitry Andric if (IndPhi) {
1404753f127fSDimitry Andric if (!checkIsIndPhi(IndPhi, L, SE, ID))
1405753f127fSDimitry Andric continue;
1406753f127fSDimitry Andric // This is an induction PHI. Check that the only users are PHI
1407753f127fSDimitry Andric // nodes, and induction variable update binary operators.
1408753f127fSDimitry Andric if (llvm::any_of(Inst->users(), [&](User *U) {
1409753f127fSDimitry Andric if (!isa<PHINode>(U) && !isa<BinaryOperator>(U))
1410753f127fSDimitry Andric return true;
1411753f127fSDimitry Andric BinaryOperator *B = dyn_cast<BinaryOperator>(U);
1412753f127fSDimitry Andric if (B && B != ID.getInductionBinOp())
1413753f127fSDimitry Andric return true;
1414753f127fSDimitry Andric return false;
1415753f127fSDimitry Andric }))
1416753f127fSDimitry Andric continue;
1417753f127fSDimitry Andric } else {
1418753f127fSDimitry Andric // If it is not an induction phi, it must be an induction update
1419753f127fSDimitry Andric // binary operator with an induction phi user.
1420753f127fSDimitry Andric BinaryOperator *B = dyn_cast<BinaryOperator>(Inst);
1421753f127fSDimitry Andric if (!B)
1422753f127fSDimitry Andric continue;
1423753f127fSDimitry Andric if (llvm::any_of(Inst->users(), [&](User *U) {
1424753f127fSDimitry Andric PHINode *Phi = dyn_cast<PHINode>(U);
1425753f127fSDimitry Andric if (Phi != PN && !checkIsIndPhi(Phi, L, SE, ID))
1426753f127fSDimitry Andric return true;
1427753f127fSDimitry Andric return false;
1428753f127fSDimitry Andric }))
1429753f127fSDimitry Andric continue;
1430753f127fSDimitry Andric if (B != ID.getInductionBinOp())
1431753f127fSDimitry Andric continue;
1432753f127fSDimitry Andric }
1433753f127fSDimitry Andric }
1434753f127fSDimitry Andric
14355ffd83dbSDimitry Andric // Okay, this instruction has a user outside of the current loop
14365ffd83dbSDimitry Andric // and varies predictably *inside* the loop. Evaluate the value it
14375ffd83dbSDimitry Andric // contains when the loop exits, if possible. We prefer to start with
14385ffd83dbSDimitry Andric // expressions which are true for all exits (so as to maximize
14395ffd83dbSDimitry Andric // expression reuse by the SCEVExpander), but resort to per-exit
14405ffd83dbSDimitry Andric // evaluation if that fails.
14415ffd83dbSDimitry Andric const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
14425ffd83dbSDimitry Andric if (isa<SCEVCouldNotCompute>(ExitValue) ||
14435ffd83dbSDimitry Andric !SE->isLoopInvariant(ExitValue, L) ||
1444fcaf7f86SDimitry Andric !Rewriter.isSafeToExpand(ExitValue)) {
14455ffd83dbSDimitry Andric // TODO: This should probably be sunk into SCEV in some way; maybe a
14465ffd83dbSDimitry Andric // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for
14475ffd83dbSDimitry Andric // most SCEV expressions and other recurrence types (e.g. shift
14485ffd83dbSDimitry Andric // recurrences). Is there existing code we can reuse?
14495ffd83dbSDimitry Andric const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
14505ffd83dbSDimitry Andric if (isa<SCEVCouldNotCompute>(ExitCount))
14515ffd83dbSDimitry Andric continue;
14525ffd83dbSDimitry Andric if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
14535ffd83dbSDimitry Andric if (AddRec->getLoop() == L)
14545ffd83dbSDimitry Andric ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
14555ffd83dbSDimitry Andric if (isa<SCEVCouldNotCompute>(ExitValue) ||
14565ffd83dbSDimitry Andric !SE->isLoopInvariant(ExitValue, L) ||
1457fcaf7f86SDimitry Andric !Rewriter.isSafeToExpand(ExitValue))
14585ffd83dbSDimitry Andric continue;
14595ffd83dbSDimitry Andric }
14605ffd83dbSDimitry Andric
14615ffd83dbSDimitry Andric // Computing the value outside of the loop brings no benefit if it is
14625ffd83dbSDimitry Andric // definitely used inside the loop in a way which can not be optimized
14635ffd83dbSDimitry Andric // away. Avoid doing so unless we know we have a value which computes
14645ffd83dbSDimitry Andric // the ExitValue already. TODO: This should be merged into SCEV
14655ffd83dbSDimitry Andric // expander to leverage its knowledge of existing expressions.
14665ffd83dbSDimitry Andric if (ReplaceExitValue != AlwaysRepl && !isa<SCEVConstant>(ExitValue) &&
14675ffd83dbSDimitry Andric !isa<SCEVUnknown>(ExitValue) && hasHardUserWithinLoop(L, Inst))
14685ffd83dbSDimitry Andric continue;
14695ffd83dbSDimitry Andric
14705ffd83dbSDimitry Andric // Check if expansions of this SCEV would count as being high cost.
14715ffd83dbSDimitry Andric bool HighCost = Rewriter.isHighCostExpansion(
14725ffd83dbSDimitry Andric ExitValue, L, SCEVCheapExpansionBudget, TTI, Inst);
14735ffd83dbSDimitry Andric
14745ffd83dbSDimitry Andric // Note that we must not perform expansions until after
14755ffd83dbSDimitry Andric // we query *all* the costs, because if we perform temporary expansion
14765ffd83dbSDimitry Andric // inbetween, one that we might not intend to keep, said expansion
1477*c9157d92SDimitry Andric // *may* affect cost calculation of the next SCEV's we'll query,
14785ffd83dbSDimitry Andric // and next SCEV may errneously get smaller cost.
14795ffd83dbSDimitry Andric
14805ffd83dbSDimitry Andric // Collect all the candidate PHINodes to be rewritten.
1481a4a491e2SDimitry Andric Instruction *InsertPt =
1482a4a491e2SDimitry Andric (isa<PHINode>(Inst) || isa<LandingPadInst>(Inst)) ?
1483a4a491e2SDimitry Andric &*Inst->getParent()->getFirstInsertionPt() : Inst;
1484a4a491e2SDimitry Andric RewritePhiSet.emplace_back(PN, i, ExitValue, InsertPt, HighCost);
14855ffd83dbSDimitry Andric }
14865ffd83dbSDimitry Andric }
14875ffd83dbSDimitry Andric }
14885ffd83dbSDimitry Andric
1489349cc55cSDimitry Andric // TODO: evaluate whether it is beneficial to change how we calculate
1490349cc55cSDimitry Andric // high-cost: if we have SCEV 'A' which we know we will expand, should we
1491349cc55cSDimitry Andric // calculate the cost of other SCEV's after expanding SCEV 'A', thus
1492349cc55cSDimitry Andric // potentially giving cost bonus to those other SCEV's?
14935ffd83dbSDimitry Andric
14945ffd83dbSDimitry Andric bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
14955ffd83dbSDimitry Andric int NumReplaced = 0;
14965ffd83dbSDimitry Andric
14975ffd83dbSDimitry Andric // Transformation.
14985ffd83dbSDimitry Andric for (const RewritePhi &Phi : RewritePhiSet) {
14995ffd83dbSDimitry Andric PHINode *PN = Phi.PN;
15005ffd83dbSDimitry Andric
15015ffd83dbSDimitry Andric // Only do the rewrite when the ExitValue can be expanded cheaply.
15025ffd83dbSDimitry Andric // If LoopCanBeDel is true, rewrite exit value aggressively.
1503753f127fSDimitry Andric if ((ReplaceExitValue == OnlyCheapRepl ||
1504753f127fSDimitry Andric ReplaceExitValue == UnusedIndVarInLoop) &&
1505753f127fSDimitry Andric !LoopCanBeDel && Phi.HighCost)
15065ffd83dbSDimitry Andric continue;
1507349cc55cSDimitry Andric
1508349cc55cSDimitry Andric Value *ExitVal = Rewriter.expandCodeFor(
1509349cc55cSDimitry Andric Phi.ExpansionSCEV, Phi.PN->getType(), Phi.ExpansionPoint);
1510349cc55cSDimitry Andric
1511349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " << *ExitVal
1512349cc55cSDimitry Andric << '\n'
1513349cc55cSDimitry Andric << " LoopVal = " << *(Phi.ExpansionPoint) << "\n");
1514349cc55cSDimitry Andric
1515349cc55cSDimitry Andric #ifndef NDEBUG
1516349cc55cSDimitry Andric // If we reuse an instruction from a loop which is neither L nor one of
1517349cc55cSDimitry Andric // its containing loops, we end up breaking LCSSA form for this loop by
1518349cc55cSDimitry Andric // creating a new use of its instruction.
1519349cc55cSDimitry Andric if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
1520349cc55cSDimitry Andric if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
1521349cc55cSDimitry Andric if (EVL != L)
1522349cc55cSDimitry Andric assert(EVL->contains(L) && "LCSSA breach detected!");
1523349cc55cSDimitry Andric #endif
15245ffd83dbSDimitry Andric
15255ffd83dbSDimitry Andric NumReplaced++;
15265ffd83dbSDimitry Andric Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
15275ffd83dbSDimitry Andric PN->setIncomingValue(Phi.Ith, ExitVal);
1528349cc55cSDimitry Andric // It's necessary to tell ScalarEvolution about this explicitly so that
1529349cc55cSDimitry Andric // it can walk the def-use list and forget all SCEVs, as it may not be
1530349cc55cSDimitry Andric // watching the PHI itself. Once the new exit value is in place, there
1531349cc55cSDimitry Andric // may not be a def-use connection between the loop and every instruction
1532349cc55cSDimitry Andric // which got a SCEVAddRecExpr for that loop.
1533349cc55cSDimitry Andric SE->forgetValue(PN);
15345ffd83dbSDimitry Andric
15355ffd83dbSDimitry Andric // If this instruction is dead now, delete it. Don't do it now to avoid
15365ffd83dbSDimitry Andric // invalidating iterators.
15375ffd83dbSDimitry Andric if (isInstructionTriviallyDead(Inst, TLI))
15385ffd83dbSDimitry Andric DeadInsts.push_back(Inst);
15395ffd83dbSDimitry Andric
15405ffd83dbSDimitry Andric // Replace PN with ExitVal if that is legal and does not break LCSSA.
15415ffd83dbSDimitry Andric if (PN->getNumIncomingValues() == 1 &&
15425ffd83dbSDimitry Andric LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
15435ffd83dbSDimitry Andric PN->replaceAllUsesWith(ExitVal);
15445ffd83dbSDimitry Andric PN->eraseFromParent();
15455ffd83dbSDimitry Andric }
15465ffd83dbSDimitry Andric }
15475ffd83dbSDimitry Andric
15485ffd83dbSDimitry Andric // The insertion point instruction may have been deleted; clear it out
15495ffd83dbSDimitry Andric // so that the rewriter doesn't trip over it later.
15505ffd83dbSDimitry Andric Rewriter.clearInsertPoint();
15515ffd83dbSDimitry Andric return NumReplaced;
15525ffd83dbSDimitry Andric }
15535ffd83dbSDimitry Andric
15545ffd83dbSDimitry Andric /// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for
15555ffd83dbSDimitry Andric /// \p OrigLoop.
setProfileInfoAfterUnrolling(Loop * OrigLoop,Loop * UnrolledLoop,Loop * RemainderLoop,uint64_t UF)15565ffd83dbSDimitry Andric void llvm::setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop,
15575ffd83dbSDimitry Andric Loop *RemainderLoop, uint64_t UF) {
15585ffd83dbSDimitry Andric assert(UF > 0 && "Zero unrolled factor is not supported");
15595ffd83dbSDimitry Andric assert(UnrolledLoop != RemainderLoop &&
15605ffd83dbSDimitry Andric "Unrolled and Remainder loops are expected to distinct");
15615ffd83dbSDimitry Andric
15625ffd83dbSDimitry Andric // Get number of iterations in the original scalar loop.
15635ffd83dbSDimitry Andric unsigned OrigLoopInvocationWeight = 0;
1564bdd1243dSDimitry Andric std::optional<unsigned> OrigAverageTripCount =
15655ffd83dbSDimitry Andric getLoopEstimatedTripCount(OrigLoop, &OrigLoopInvocationWeight);
15665ffd83dbSDimitry Andric if (!OrigAverageTripCount)
15675ffd83dbSDimitry Andric return;
15685ffd83dbSDimitry Andric
15695ffd83dbSDimitry Andric // Calculate number of iterations in unrolled loop.
15705ffd83dbSDimitry Andric unsigned UnrolledAverageTripCount = *OrigAverageTripCount / UF;
15715ffd83dbSDimitry Andric // Calculate number of iterations for remainder loop.
15725ffd83dbSDimitry Andric unsigned RemainderAverageTripCount = *OrigAverageTripCount % UF;
15735ffd83dbSDimitry Andric
15745ffd83dbSDimitry Andric setLoopEstimatedTripCount(UnrolledLoop, UnrolledAverageTripCount,
15755ffd83dbSDimitry Andric OrigLoopInvocationWeight);
15765ffd83dbSDimitry Andric setLoopEstimatedTripCount(RemainderLoop, RemainderAverageTripCount,
15775ffd83dbSDimitry Andric OrigLoopInvocationWeight);
15785ffd83dbSDimitry Andric }
15795ffd83dbSDimitry Andric
15805ffd83dbSDimitry Andric /// Utility that implements appending of loops onto a worklist.
15815ffd83dbSDimitry Andric /// Loops are added in preorder (analogous for reverse postorder for trees),
15825ffd83dbSDimitry Andric /// and the worklist is processed LIFO.
15835ffd83dbSDimitry Andric template <typename RangeT>
appendReversedLoopsToWorklist(RangeT && Loops,SmallPriorityWorklist<Loop *,4> & Worklist)15845ffd83dbSDimitry Andric void llvm::appendReversedLoopsToWorklist(
15855ffd83dbSDimitry Andric RangeT &&Loops, SmallPriorityWorklist<Loop *, 4> &Worklist) {
15865ffd83dbSDimitry Andric // We use an internal worklist to build up the preorder traversal without
15875ffd83dbSDimitry Andric // recursion.
15885ffd83dbSDimitry Andric SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;
15895ffd83dbSDimitry Andric
15905ffd83dbSDimitry Andric // We walk the initial sequence of loops in reverse because we generally want
15915ffd83dbSDimitry Andric // to visit defs before uses and the worklist is LIFO.
15925ffd83dbSDimitry Andric for (Loop *RootL : Loops) {
15935ffd83dbSDimitry Andric assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
15945ffd83dbSDimitry Andric assert(PreOrderWorklist.empty() &&
15955ffd83dbSDimitry Andric "Must start with an empty preorder walk worklist.");
15965ffd83dbSDimitry Andric PreOrderWorklist.push_back(RootL);
15975ffd83dbSDimitry Andric do {
15985ffd83dbSDimitry Andric Loop *L = PreOrderWorklist.pop_back_val();
15995ffd83dbSDimitry Andric PreOrderWorklist.append(L->begin(), L->end());
16005ffd83dbSDimitry Andric PreOrderLoops.push_back(L);
16015ffd83dbSDimitry Andric } while (!PreOrderWorklist.empty());
16025ffd83dbSDimitry Andric
16035ffd83dbSDimitry Andric Worklist.insert(std::move(PreOrderLoops));
16045ffd83dbSDimitry Andric PreOrderLoops.clear();
16055ffd83dbSDimitry Andric }
16065ffd83dbSDimitry Andric }
16075ffd83dbSDimitry Andric
16085ffd83dbSDimitry Andric template <typename RangeT>
appendLoopsToWorklist(RangeT && Loops,SmallPriorityWorklist<Loop *,4> & Worklist)16095ffd83dbSDimitry Andric void llvm::appendLoopsToWorklist(RangeT &&Loops,
16105ffd83dbSDimitry Andric SmallPriorityWorklist<Loop *, 4> &Worklist) {
16115ffd83dbSDimitry Andric appendReversedLoopsToWorklist(reverse(Loops), Worklist);
16125ffd83dbSDimitry Andric }
16135ffd83dbSDimitry Andric
16145ffd83dbSDimitry Andric template void llvm::appendLoopsToWorklist<ArrayRef<Loop *> &>(
16155ffd83dbSDimitry Andric ArrayRef<Loop *> &Loops, SmallPriorityWorklist<Loop *, 4> &Worklist);
16165ffd83dbSDimitry Andric
16175ffd83dbSDimitry Andric template void
16185ffd83dbSDimitry Andric llvm::appendLoopsToWorklist<Loop &>(Loop &L,
16195ffd83dbSDimitry Andric SmallPriorityWorklist<Loop *, 4> &Worklist);
16205ffd83dbSDimitry Andric
appendLoopsToWorklist(LoopInfo & LI,SmallPriorityWorklist<Loop *,4> & Worklist)16215ffd83dbSDimitry Andric void llvm::appendLoopsToWorklist(LoopInfo &LI,
16225ffd83dbSDimitry Andric SmallPriorityWorklist<Loop *, 4> &Worklist) {
16235ffd83dbSDimitry Andric appendReversedLoopsToWorklist(LI, Worklist);
16245ffd83dbSDimitry Andric }
16255ffd83dbSDimitry Andric
cloneLoop(Loop * L,Loop * PL,ValueToValueMapTy & VM,LoopInfo * LI,LPPassManager * LPM)16265ffd83dbSDimitry Andric Loop *llvm::cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
16275ffd83dbSDimitry Andric LoopInfo *LI, LPPassManager *LPM) {
16285ffd83dbSDimitry Andric Loop &New = *LI->AllocateLoop();
16295ffd83dbSDimitry Andric if (PL)
16305ffd83dbSDimitry Andric PL->addChildLoop(&New);
16315ffd83dbSDimitry Andric else
16325ffd83dbSDimitry Andric LI->addTopLevelLoop(&New);
16335ffd83dbSDimitry Andric
16345ffd83dbSDimitry Andric if (LPM)
16355ffd83dbSDimitry Andric LPM->addLoop(New);
16365ffd83dbSDimitry Andric
16375ffd83dbSDimitry Andric // Add all of the blocks in L to the new loop.
16385e801ac6SDimitry Andric for (BasicBlock *BB : L->blocks())
16395e801ac6SDimitry Andric if (LI->getLoopFor(BB) == L)
16405e801ac6SDimitry Andric New.addBasicBlockToLoop(cast<BasicBlock>(VM[BB]), *LI);
16415ffd83dbSDimitry Andric
16425ffd83dbSDimitry Andric // Add all of the subloops to the new loop.
16435ffd83dbSDimitry Andric for (Loop *I : *L)
16445ffd83dbSDimitry Andric cloneLoop(I, &New, VM, LI, LPM);
16455ffd83dbSDimitry Andric
16465ffd83dbSDimitry Andric return &New;
16475ffd83dbSDimitry Andric }
16485ffd83dbSDimitry Andric
16495ffd83dbSDimitry Andric /// IR Values for the lower and upper bounds of a pointer evolution. We
16505ffd83dbSDimitry Andric /// need to use value-handles because SCEV expansion can invalidate previously
16515ffd83dbSDimitry Andric /// expanded values. Thus expansion of a pointer can invalidate the bounds for
16525ffd83dbSDimitry Andric /// a previous one.
16535ffd83dbSDimitry Andric struct PointerBounds {
16545ffd83dbSDimitry Andric TrackingVH<Value> Start;
16555ffd83dbSDimitry Andric TrackingVH<Value> End;
1656*c9157d92SDimitry Andric Value *StrideToCheck;
16575ffd83dbSDimitry Andric };
16585ffd83dbSDimitry Andric
16595ffd83dbSDimitry Andric /// Expand code for the lower and upper bound of the pointer group \p CG
16605ffd83dbSDimitry Andric /// in \p TheLoop. \return the values for the bounds.
expandBounds(const RuntimeCheckingPtrGroup * CG,Loop * TheLoop,Instruction * Loc,SCEVExpander & Exp,bool HoistRuntimeChecks)16615ffd83dbSDimitry Andric static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG,
16625ffd83dbSDimitry Andric Loop *TheLoop, Instruction *Loc,
1663*c9157d92SDimitry Andric SCEVExpander &Exp, bool HoistRuntimeChecks) {
16645ffd83dbSDimitry Andric LLVMContext &Ctx = Loc->getContext();
1665*c9157d92SDimitry Andric Type *PtrArithTy = PointerType::get(Ctx, CG->AddressSpace);
16665ffd83dbSDimitry Andric
16675ffd83dbSDimitry Andric Value *Start = nullptr, *End = nullptr;
16685ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
1669*c9157d92SDimitry Andric const SCEV *Low = CG->Low, *High = CG->High, *Stride = nullptr;
1670*c9157d92SDimitry Andric
1671*c9157d92SDimitry Andric // If the Low and High values are themselves loop-variant, then we may want
1672*c9157d92SDimitry Andric // to expand the range to include those covered by the outer loop as well.
1673*c9157d92SDimitry Andric // There is a trade-off here with the advantage being that creating checks
1674*c9157d92SDimitry Andric // using the expanded range permits the runtime memory checks to be hoisted
1675*c9157d92SDimitry Andric // out of the outer loop. This reduces the cost of entering the inner loop,
1676*c9157d92SDimitry Andric // which can be significant for low trip counts. The disadvantage is that
1677*c9157d92SDimitry Andric // there is a chance we may now never enter the vectorized inner loop,
1678*c9157d92SDimitry Andric // whereas using a restricted range check could have allowed us to enter at
1679*c9157d92SDimitry Andric // least once. This is why the behaviour is not currently the default and is
1680*c9157d92SDimitry Andric // controlled by the parameter 'HoistRuntimeChecks'.
1681*c9157d92SDimitry Andric if (HoistRuntimeChecks && TheLoop->getParentLoop() &&
1682*c9157d92SDimitry Andric isa<SCEVAddRecExpr>(High) && isa<SCEVAddRecExpr>(Low)) {
1683*c9157d92SDimitry Andric auto *HighAR = cast<SCEVAddRecExpr>(High);
1684*c9157d92SDimitry Andric auto *LowAR = cast<SCEVAddRecExpr>(Low);
1685*c9157d92SDimitry Andric const Loop *OuterLoop = TheLoop->getParentLoop();
1686*c9157d92SDimitry Andric const SCEV *Recur = LowAR->getStepRecurrence(*Exp.getSE());
1687*c9157d92SDimitry Andric if (Recur == HighAR->getStepRecurrence(*Exp.getSE()) &&
1688*c9157d92SDimitry Andric HighAR->getLoop() == OuterLoop && LowAR->getLoop() == OuterLoop) {
1689*c9157d92SDimitry Andric BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1690*c9157d92SDimitry Andric const SCEV *OuterExitCount =
1691*c9157d92SDimitry Andric Exp.getSE()->getExitCount(OuterLoop, OuterLoopLatch);
1692*c9157d92SDimitry Andric if (!isa<SCEVCouldNotCompute>(OuterExitCount) &&
1693*c9157d92SDimitry Andric OuterExitCount->getType()->isIntegerTy()) {
1694*c9157d92SDimitry Andric const SCEV *NewHigh = cast<SCEVAddRecExpr>(High)->evaluateAtIteration(
1695*c9157d92SDimitry Andric OuterExitCount, *Exp.getSE());
1696*c9157d92SDimitry Andric if (!isa<SCEVCouldNotCompute>(NewHigh)) {
1697*c9157d92SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Expanded RT check for range to include "
1698*c9157d92SDimitry Andric "outer loop in order to permit hoisting\n");
1699*c9157d92SDimitry Andric High = NewHigh;
1700*c9157d92SDimitry Andric Low = cast<SCEVAddRecExpr>(Low)->getStart();
1701*c9157d92SDimitry Andric // If there is a possibility that the stride is negative then we have
1702*c9157d92SDimitry Andric // to generate extra checks to ensure the stride is positive.
1703*c9157d92SDimitry Andric if (!Exp.getSE()->isKnownNonNegative(Recur)) {
1704*c9157d92SDimitry Andric Stride = Recur;
1705*c9157d92SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: ... but need to check stride is "
1706*c9157d92SDimitry Andric "positive: "
1707*c9157d92SDimitry Andric << *Stride << '\n');
1708*c9157d92SDimitry Andric }
1709*c9157d92SDimitry Andric }
1710*c9157d92SDimitry Andric }
1711*c9157d92SDimitry Andric }
1712*c9157d92SDimitry Andric }
1713*c9157d92SDimitry Andric
1714*c9157d92SDimitry Andric Start = Exp.expandCodeFor(Low, PtrArithTy, Loc);
1715*c9157d92SDimitry Andric End = Exp.expandCodeFor(High, PtrArithTy, Loc);
171681ad6265SDimitry Andric if (CG->NeedsFreeze) {
171781ad6265SDimitry Andric IRBuilder<> Builder(Loc);
171881ad6265SDimitry Andric Start = Builder.CreateFreeze(Start, Start->getName() + ".fr");
171981ad6265SDimitry Andric End = Builder.CreateFreeze(End, End->getName() + ".fr");
172081ad6265SDimitry Andric }
1721*c9157d92SDimitry Andric Value *StrideVal =
1722*c9157d92SDimitry Andric Stride ? Exp.expandCodeFor(Stride, Stride->getType(), Loc) : nullptr;
1723*c9157d92SDimitry Andric LLVM_DEBUG(dbgs() << "Start: " << *Low << " End: " << *High << "\n");
1724*c9157d92SDimitry Andric return {Start, End, StrideVal};
17255ffd83dbSDimitry Andric }
17265ffd83dbSDimitry Andric
17275ffd83dbSDimitry Andric /// Turns a collection of checks into a collection of expanded upper and
17285ffd83dbSDimitry Andric /// lower bounds for both pointers in the check.
17295ffd83dbSDimitry Andric static SmallVector<std::pair<PointerBounds, PointerBounds>, 4>
expandBounds(const SmallVectorImpl<RuntimePointerCheck> & PointerChecks,Loop * L,Instruction * Loc,SCEVExpander & Exp,bool HoistRuntimeChecks)17305ffd83dbSDimitry Andric expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L,
1731*c9157d92SDimitry Andric Instruction *Loc, SCEVExpander &Exp, bool HoistRuntimeChecks) {
17325ffd83dbSDimitry Andric SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds;
17335ffd83dbSDimitry Andric
17345ffd83dbSDimitry Andric // Here we're relying on the SCEV Expander's cache to only emit code for the
17355ffd83dbSDimitry Andric // same bounds once.
17365ffd83dbSDimitry Andric transform(PointerChecks, std::back_inserter(ChecksWithBounds),
17375ffd83dbSDimitry Andric [&](const RuntimePointerCheck &Check) {
1738*c9157d92SDimitry Andric PointerBounds First = expandBounds(Check.first, L, Loc, Exp,
1739*c9157d92SDimitry Andric HoistRuntimeChecks),
1740*c9157d92SDimitry Andric Second = expandBounds(Check.second, L, Loc, Exp,
1741*c9157d92SDimitry Andric HoistRuntimeChecks);
17425ffd83dbSDimitry Andric return std::make_pair(First, Second);
17435ffd83dbSDimitry Andric });
17445ffd83dbSDimitry Andric
17455ffd83dbSDimitry Andric return ChecksWithBounds;
17465ffd83dbSDimitry Andric }
17475ffd83dbSDimitry Andric
addRuntimeChecks(Instruction * Loc,Loop * TheLoop,const SmallVectorImpl<RuntimePointerCheck> & PointerChecks,SCEVExpander & Exp,bool HoistRuntimeChecks)1748349cc55cSDimitry Andric Value *llvm::addRuntimeChecks(
17495ffd83dbSDimitry Andric Instruction *Loc, Loop *TheLoop,
17505ffd83dbSDimitry Andric const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
1751*c9157d92SDimitry Andric SCEVExpander &Exp, bool HoistRuntimeChecks) {
17525ffd83dbSDimitry Andric // TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible.
17535ffd83dbSDimitry Andric // TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible
1754*c9157d92SDimitry Andric auto ExpandedChecks =
1755*c9157d92SDimitry Andric expandBounds(PointerChecks, TheLoop, Loc, Exp, HoistRuntimeChecks);
17565ffd83dbSDimitry Andric
17575ffd83dbSDimitry Andric LLVMContext &Ctx = Loc->getContext();
175804eeddc0SDimitry Andric IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx,
175904eeddc0SDimitry Andric Loc->getModule()->getDataLayout());
176004eeddc0SDimitry Andric ChkBuilder.SetInsertPoint(Loc);
17615ffd83dbSDimitry Andric // Our instructions might fold to a constant.
17625ffd83dbSDimitry Andric Value *MemoryRuntimeCheck = nullptr;
17635ffd83dbSDimitry Andric
17645ffd83dbSDimitry Andric for (const auto &Check : ExpandedChecks) {
17655ffd83dbSDimitry Andric const PointerBounds &A = Check.first, &B = Check.second;
17665ffd83dbSDimitry Andric // Check if two pointers (A and B) conflict where conflict is computed as:
17675ffd83dbSDimitry Andric // start(A) <= end(B) && start(B) <= end(A)
17685ffd83dbSDimitry Andric
1769*c9157d92SDimitry Andric assert((A.Start->getType()->getPointerAddressSpace() ==
1770*c9157d92SDimitry Andric B.End->getType()->getPointerAddressSpace()) &&
1771*c9157d92SDimitry Andric (B.Start->getType()->getPointerAddressSpace() ==
1772*c9157d92SDimitry Andric A.End->getType()->getPointerAddressSpace()) &&
17735ffd83dbSDimitry Andric "Trying to bounds check pointers with different address spaces");
17745ffd83dbSDimitry Andric
17755ffd83dbSDimitry Andric // [A|B].Start points to the first accessed byte under base [A|B].
17765ffd83dbSDimitry Andric // [A|B].End points to the last accessed byte, plus one.
17775ffd83dbSDimitry Andric // There is no conflict when the intervals are disjoint:
17785ffd83dbSDimitry Andric // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
17795ffd83dbSDimitry Andric //
17805ffd83dbSDimitry Andric // bound0 = (B.Start < A.End)
17815ffd83dbSDimitry Andric // bound1 = (A.Start < B.End)
17825ffd83dbSDimitry Andric // IsConflict = bound0 & bound1
1783*c9157d92SDimitry Andric Value *Cmp0 = ChkBuilder.CreateICmpULT(A.Start, B.End, "bound0");
1784*c9157d92SDimitry Andric Value *Cmp1 = ChkBuilder.CreateICmpULT(B.Start, A.End, "bound1");
17855ffd83dbSDimitry Andric Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
1786*c9157d92SDimitry Andric if (A.StrideToCheck) {
1787*c9157d92SDimitry Andric Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(
1788*c9157d92SDimitry Andric A.StrideToCheck, ConstantInt::get(A.StrideToCheck->getType(), 0),
1789*c9157d92SDimitry Andric "stride.check");
1790*c9157d92SDimitry Andric IsConflict = ChkBuilder.CreateOr(IsConflict, IsNegativeStride);
1791*c9157d92SDimitry Andric }
1792*c9157d92SDimitry Andric if (B.StrideToCheck) {
1793*c9157d92SDimitry Andric Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(
1794*c9157d92SDimitry Andric B.StrideToCheck, ConstantInt::get(B.StrideToCheck->getType(), 0),
1795*c9157d92SDimitry Andric "stride.check");
1796*c9157d92SDimitry Andric IsConflict = ChkBuilder.CreateOr(IsConflict, IsNegativeStride);
1797*c9157d92SDimitry Andric }
17985ffd83dbSDimitry Andric if (MemoryRuntimeCheck) {
17995ffd83dbSDimitry Andric IsConflict =
18005ffd83dbSDimitry Andric ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
18015ffd83dbSDimitry Andric }
18025ffd83dbSDimitry Andric MemoryRuntimeCheck = IsConflict;
18035ffd83dbSDimitry Andric }
18045ffd83dbSDimitry Andric
1805349cc55cSDimitry Andric return MemoryRuntimeCheck;
18065ffd83dbSDimitry Andric }
1807fe6060f1SDimitry Andric
addDiffRuntimeChecks(Instruction * Loc,ArrayRef<PointerDiffInfo> Checks,SCEVExpander & Expander,function_ref<Value * (IRBuilderBase &,unsigned)> GetVF,unsigned IC)180881ad6265SDimitry Andric Value *llvm::addDiffRuntimeChecks(
1809bdd1243dSDimitry Andric Instruction *Loc, ArrayRef<PointerDiffInfo> Checks, SCEVExpander &Expander,
181081ad6265SDimitry Andric function_ref<Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC) {
181181ad6265SDimitry Andric
181281ad6265SDimitry Andric LLVMContext &Ctx = Loc->getContext();
181381ad6265SDimitry Andric IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx,
181481ad6265SDimitry Andric Loc->getModule()->getDataLayout());
181581ad6265SDimitry Andric ChkBuilder.SetInsertPoint(Loc);
181681ad6265SDimitry Andric // Our instructions might fold to a constant.
181781ad6265SDimitry Andric Value *MemoryRuntimeCheck = nullptr;
181881ad6265SDimitry Andric
1819*c9157d92SDimitry Andric auto &SE = *Expander.getSE();
1820*c9157d92SDimitry Andric // Map to keep track of created compares, The key is the pair of operands for
1821*c9157d92SDimitry Andric // the compare, to allow detecting and re-using redundant compares.
1822*c9157d92SDimitry Andric DenseMap<std::pair<Value *, Value *>, Value *> SeenCompares;
1823bdd1243dSDimitry Andric for (const auto &C : Checks) {
182481ad6265SDimitry Andric Type *Ty = C.SinkStart->getType();
182581ad6265SDimitry Andric // Compute VF * IC * AccessSize.
182681ad6265SDimitry Andric auto *VFTimesUFTimesSize =
182781ad6265SDimitry Andric ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
182881ad6265SDimitry Andric ConstantInt::get(Ty, IC * C.AccessSize));
1829*c9157d92SDimitry Andric Value *Diff = Expander.expandCodeFor(
1830*c9157d92SDimitry Andric SE.getMinusSCEV(C.SinkStart, C.SrcStart), Ty, Loc);
183181ad6265SDimitry Andric
1832*c9157d92SDimitry Andric // Check if the same compare has already been created earlier. In that case,
1833*c9157d92SDimitry Andric // there is no need to check it again.
1834*c9157d92SDimitry Andric Value *IsConflict = SeenCompares.lookup({Diff, VFTimesUFTimesSize});
1835*c9157d92SDimitry Andric if (IsConflict)
1836*c9157d92SDimitry Andric continue;
1837*c9157d92SDimitry Andric
1838*c9157d92SDimitry Andric IsConflict =
1839*c9157d92SDimitry Andric ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "diff.check");
1840*c9157d92SDimitry Andric SeenCompares.insert({{Diff, VFTimesUFTimesSize}, IsConflict});
1841*c9157d92SDimitry Andric if (C.NeedsFreeze)
1842*c9157d92SDimitry Andric IsConflict =
1843*c9157d92SDimitry Andric ChkBuilder.CreateFreeze(IsConflict, IsConflict->getName() + ".fr");
184481ad6265SDimitry Andric if (MemoryRuntimeCheck) {
184581ad6265SDimitry Andric IsConflict =
184681ad6265SDimitry Andric ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
184781ad6265SDimitry Andric }
184881ad6265SDimitry Andric MemoryRuntimeCheck = IsConflict;
184981ad6265SDimitry Andric }
185081ad6265SDimitry Andric
185181ad6265SDimitry Andric return MemoryRuntimeCheck;
185281ad6265SDimitry Andric }
185381ad6265SDimitry Andric
1854bdd1243dSDimitry Andric std::optional<IVConditionInfo>
hasPartialIVCondition(const Loop & L,unsigned MSSAThreshold,const MemorySSA & MSSA,AAResults & AA)1855bdd1243dSDimitry Andric llvm::hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold,
1856bdd1243dSDimitry Andric const MemorySSA &MSSA, AAResults &AA) {
1857fe6060f1SDimitry Andric auto *TI = dyn_cast<BranchInst>(L.getHeader()->getTerminator());
1858fe6060f1SDimitry Andric if (!TI || !TI->isConditional())
1859fe6060f1SDimitry Andric return {};
1860fe6060f1SDimitry Andric
1861fe6060f1SDimitry Andric auto *CondI = dyn_cast<CmpInst>(TI->getCondition());
1862fe6060f1SDimitry Andric // The case with the condition outside the loop should already be handled
1863fe6060f1SDimitry Andric // earlier.
1864fe6060f1SDimitry Andric if (!CondI || !L.contains(CondI))
1865fe6060f1SDimitry Andric return {};
1866fe6060f1SDimitry Andric
1867fe6060f1SDimitry Andric SmallVector<Instruction *> InstToDuplicate;
1868fe6060f1SDimitry Andric InstToDuplicate.push_back(CondI);
1869fe6060f1SDimitry Andric
1870fe6060f1SDimitry Andric SmallVector<Value *, 4> WorkList;
1871fe6060f1SDimitry Andric WorkList.append(CondI->op_begin(), CondI->op_end());
1872fe6060f1SDimitry Andric
1873fe6060f1SDimitry Andric SmallVector<MemoryAccess *, 4> AccessesToCheck;
1874fe6060f1SDimitry Andric SmallVector<MemoryLocation, 4> AccessedLocs;
1875fe6060f1SDimitry Andric while (!WorkList.empty()) {
1876fe6060f1SDimitry Andric Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val());
1877fe6060f1SDimitry Andric if (!I || !L.contains(I))
1878fe6060f1SDimitry Andric continue;
1879fe6060f1SDimitry Andric
1880fe6060f1SDimitry Andric // TODO: support additional instructions.
1881fe6060f1SDimitry Andric if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I))
1882fe6060f1SDimitry Andric return {};
1883fe6060f1SDimitry Andric
1884fe6060f1SDimitry Andric // Do not duplicate volatile and atomic loads.
1885fe6060f1SDimitry Andric if (auto *LI = dyn_cast<LoadInst>(I))
1886fe6060f1SDimitry Andric if (LI->isVolatile() || LI->isAtomic())
1887fe6060f1SDimitry Andric return {};
1888fe6060f1SDimitry Andric
1889fe6060f1SDimitry Andric InstToDuplicate.push_back(I);
1890fe6060f1SDimitry Andric if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) {
1891fe6060f1SDimitry Andric if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) {
1892fe6060f1SDimitry Andric // Queue the defining access to check for alias checks.
1893fe6060f1SDimitry Andric AccessesToCheck.push_back(MemUse->getDefiningAccess());
1894fe6060f1SDimitry Andric AccessedLocs.push_back(MemoryLocation::get(I));
1895fe6060f1SDimitry Andric } else {
1896fe6060f1SDimitry Andric // MemoryDefs may clobber the location or may be atomic memory
1897fe6060f1SDimitry Andric // operations. Bail out.
1898fe6060f1SDimitry Andric return {};
1899fe6060f1SDimitry Andric }
1900fe6060f1SDimitry Andric }
1901fe6060f1SDimitry Andric WorkList.append(I->op_begin(), I->op_end());
1902fe6060f1SDimitry Andric }
1903fe6060f1SDimitry Andric
1904fe6060f1SDimitry Andric if (InstToDuplicate.empty())
1905fe6060f1SDimitry Andric return {};
1906fe6060f1SDimitry Andric
1907fe6060f1SDimitry Andric SmallVector<BasicBlock *, 4> ExitingBlocks;
1908fe6060f1SDimitry Andric L.getExitingBlocks(ExitingBlocks);
1909fe6060f1SDimitry Andric auto HasNoClobbersOnPath =
1910fe6060f1SDimitry Andric [&L, &AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate,
1911fe6060f1SDimitry Andric MSSAThreshold](BasicBlock *Succ, BasicBlock *Header,
1912fe6060f1SDimitry Andric SmallVector<MemoryAccess *, 4> AccessesToCheck)
1913bdd1243dSDimitry Andric -> std::optional<IVConditionInfo> {
1914fe6060f1SDimitry Andric IVConditionInfo Info;
1915fe6060f1SDimitry Andric // First, collect all blocks in the loop that are on a patch from Succ
1916fe6060f1SDimitry Andric // to the header.
1917fe6060f1SDimitry Andric SmallVector<BasicBlock *, 4> WorkList;
1918fe6060f1SDimitry Andric WorkList.push_back(Succ);
1919fe6060f1SDimitry Andric WorkList.push_back(Header);
1920fe6060f1SDimitry Andric SmallPtrSet<BasicBlock *, 4> Seen;
1921fe6060f1SDimitry Andric Seen.insert(Header);
1922fe6060f1SDimitry Andric Info.PathIsNoop &=
1923fe6060f1SDimitry Andric all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); });
1924fe6060f1SDimitry Andric
1925fe6060f1SDimitry Andric while (!WorkList.empty()) {
1926fe6060f1SDimitry Andric BasicBlock *Current = WorkList.pop_back_val();
1927fe6060f1SDimitry Andric if (!L.contains(Current))
1928fe6060f1SDimitry Andric continue;
1929fe6060f1SDimitry Andric const auto &SeenIns = Seen.insert(Current);
1930fe6060f1SDimitry Andric if (!SeenIns.second)
1931fe6060f1SDimitry Andric continue;
1932fe6060f1SDimitry Andric
1933fe6060f1SDimitry Andric Info.PathIsNoop &= all_of(
1934fe6060f1SDimitry Andric *Current, [](Instruction &I) { return !I.mayHaveSideEffects(); });
1935fe6060f1SDimitry Andric WorkList.append(succ_begin(Current), succ_end(Current));
1936fe6060f1SDimitry Andric }
1937fe6060f1SDimitry Andric
1938fe6060f1SDimitry Andric // Require at least 2 blocks on a path through the loop. This skips
1939fe6060f1SDimitry Andric // paths that directly exit the loop.
1940fe6060f1SDimitry Andric if (Seen.size() < 2)
1941fe6060f1SDimitry Andric return {};
1942fe6060f1SDimitry Andric
1943fe6060f1SDimitry Andric // Next, check if there are any MemoryDefs that are on the path through
1944fe6060f1SDimitry Andric // the loop (in the Seen set) and they may-alias any of the locations in
1945fe6060f1SDimitry Andric // AccessedLocs. If that is the case, they may modify the condition and
1946fe6060f1SDimitry Andric // partial unswitching is not possible.
1947fe6060f1SDimitry Andric SmallPtrSet<MemoryAccess *, 4> SeenAccesses;
1948fe6060f1SDimitry Andric while (!AccessesToCheck.empty()) {
1949fe6060f1SDimitry Andric MemoryAccess *Current = AccessesToCheck.pop_back_val();
1950fe6060f1SDimitry Andric auto SeenI = SeenAccesses.insert(Current);
1951fe6060f1SDimitry Andric if (!SeenI.second || !Seen.contains(Current->getBlock()))
1952fe6060f1SDimitry Andric continue;
1953fe6060f1SDimitry Andric
1954fe6060f1SDimitry Andric // Bail out if exceeded the threshold.
1955fe6060f1SDimitry Andric if (SeenAccesses.size() >= MSSAThreshold)
1956fe6060f1SDimitry Andric return {};
1957fe6060f1SDimitry Andric
1958fe6060f1SDimitry Andric // MemoryUse are read-only accesses.
1959fe6060f1SDimitry Andric if (isa<MemoryUse>(Current))
1960fe6060f1SDimitry Andric continue;
1961fe6060f1SDimitry Andric
1962fe6060f1SDimitry Andric // For a MemoryDef, check if is aliases any of the location feeding
1963fe6060f1SDimitry Andric // the original condition.
1964fe6060f1SDimitry Andric if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
1965fe6060f1SDimitry Andric if (any_of(AccessedLocs, [&AA, CurrentDef](MemoryLocation &Loc) {
1966fe6060f1SDimitry Andric return isModSet(
1967fe6060f1SDimitry Andric AA.getModRefInfo(CurrentDef->getMemoryInst(), Loc));
1968fe6060f1SDimitry Andric }))
1969fe6060f1SDimitry Andric return {};
1970fe6060f1SDimitry Andric }
1971fe6060f1SDimitry Andric
1972fe6060f1SDimitry Andric for (Use &U : Current->uses())
1973fe6060f1SDimitry Andric AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser()));
1974fe6060f1SDimitry Andric }
1975fe6060f1SDimitry Andric
1976fe6060f1SDimitry Andric // We could also allow loops with known trip counts without mustprogress,
1977fe6060f1SDimitry Andric // but ScalarEvolution may not be available.
1978fe6060f1SDimitry Andric Info.PathIsNoop &= isMustProgress(&L);
1979fe6060f1SDimitry Andric
1980fe6060f1SDimitry Andric // If the path is considered a no-op so far, check if it reaches a
1981fe6060f1SDimitry Andric // single exit block without any phis. This ensures no values from the
1982fe6060f1SDimitry Andric // loop are used outside of the loop.
1983fe6060f1SDimitry Andric if (Info.PathIsNoop) {
1984fe6060f1SDimitry Andric for (auto *Exiting : ExitingBlocks) {
1985fe6060f1SDimitry Andric if (!Seen.contains(Exiting))
1986fe6060f1SDimitry Andric continue;
1987fe6060f1SDimitry Andric for (auto *Succ : successors(Exiting)) {
1988fe6060f1SDimitry Andric if (L.contains(Succ))
1989fe6060f1SDimitry Andric continue;
1990fe6060f1SDimitry Andric
1991bdd1243dSDimitry Andric Info.PathIsNoop &= Succ->phis().empty() &&
1992fe6060f1SDimitry Andric (!Info.ExitForPath || Info.ExitForPath == Succ);
1993fe6060f1SDimitry Andric if (!Info.PathIsNoop)
1994fe6060f1SDimitry Andric break;
1995fe6060f1SDimitry Andric assert((!Info.ExitForPath || Info.ExitForPath == Succ) &&
1996fe6060f1SDimitry Andric "cannot have multiple exit blocks");
1997fe6060f1SDimitry Andric Info.ExitForPath = Succ;
1998fe6060f1SDimitry Andric }
1999fe6060f1SDimitry Andric }
2000fe6060f1SDimitry Andric }
2001fe6060f1SDimitry Andric if (!Info.ExitForPath)
2002fe6060f1SDimitry Andric Info.PathIsNoop = false;
2003fe6060f1SDimitry Andric
2004fe6060f1SDimitry Andric Info.InstToDuplicate = InstToDuplicate;
2005fe6060f1SDimitry Andric return Info;
2006fe6060f1SDimitry Andric };
2007fe6060f1SDimitry Andric
2008fe6060f1SDimitry Andric // If we branch to the same successor, partial unswitching will not be
2009fe6060f1SDimitry Andric // beneficial.
2010fe6060f1SDimitry Andric if (TI->getSuccessor(0) == TI->getSuccessor(1))
2011fe6060f1SDimitry Andric return {};
2012fe6060f1SDimitry Andric
2013fe6060f1SDimitry Andric if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L.getHeader(),
2014fe6060f1SDimitry Andric AccessesToCheck)) {
2015fe6060f1SDimitry Andric Info->KnownValue = ConstantInt::getTrue(TI->getContext());
2016fe6060f1SDimitry Andric return Info;
2017fe6060f1SDimitry Andric }
2018fe6060f1SDimitry Andric if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L.getHeader(),
2019fe6060f1SDimitry Andric AccessesToCheck)) {
2020fe6060f1SDimitry Andric Info->KnownValue = ConstantInt::getFalse(TI->getContext());
2021fe6060f1SDimitry Andric return Info;
2022fe6060f1SDimitry Andric }
2023fe6060f1SDimitry Andric
2024fe6060f1SDimitry Andric return {};
2025fe6060f1SDimitry Andric }
2026