1dd40f5e7SEugene Zelenko //===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
288db86ddSKarthik Bhat //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
688db86ddSKarthik Bhat //
788db86ddSKarthik Bhat //===----------------------------------------------------------------------===//
888db86ddSKarthik Bhat //
988db86ddSKarthik Bhat // This Pass handles loop interchange transform.
1088db86ddSKarthik Bhat // This pass interchanges loops to provide a more cache-friendly memory access
1188db86ddSKarthik Bhat // patterns.
1288db86ddSKarthik Bhat //
1388db86ddSKarthik Bhat //===----------------------------------------------------------------------===//
1488db86ddSKarthik Bhat 
159c21c6c9SArthur Eubanks #include "llvm/Transforms/Scalar/LoopInterchange.h"
16dd40f5e7SEugene Zelenko #include "llvm/ADT/STLExtras.h"
1788db86ddSKarthik Bhat #include "llvm/ADT/SmallVector.h"
186e004336SFlorian Hahn #include "llvm/ADT/Statistic.h"
19dd40f5e7SEugene Zelenko #include "llvm/ADT/StringRef.h"
2088db86ddSKarthik Bhat #include "llvm/Analysis/DependenceAnalysis.h"
21*b941857bSCongzhe Cao #include "llvm/Analysis/LoopCacheAnalysis.h"
2288db86ddSKarthik Bhat #include "llvm/Analysis/LoopInfo.h"
23829c1b64SCongzhe Cao #include "llvm/Analysis/LoopNestAnalysis.h"
248600fee5SFlorian Hahn #include "llvm/Analysis/LoopPass.h"
250965da20SAdam Nemet #include "llvm/Analysis/OptimizationRemarkEmitter.h"
2688db86ddSKarthik Bhat #include "llvm/Analysis/ScalarEvolution.h"
2788db86ddSKarthik Bhat #include "llvm/Analysis/ScalarEvolutionExpressions.h"
28dd40f5e7SEugene Zelenko #include "llvm/IR/BasicBlock.h"
29dd40f5e7SEugene Zelenko #include "llvm/IR/Constants.h"
30dd40f5e7SEugene Zelenko #include "llvm/IR/DiagnosticInfo.h"
31799003bfSBenjamin Kramer #include "llvm/IR/Dominators.h"
3288db86ddSKarthik Bhat #include "llvm/IR/Function.h"
3354cb552bSFlorian Hahn #include "llvm/IR/IRBuilder.h"
34dd40f5e7SEugene Zelenko #include "llvm/IR/InstrTypes.h"
35dd40f5e7SEugene Zelenko #include "llvm/IR/Instruction.h"
36dd40f5e7SEugene Zelenko #include "llvm/IR/Instructions.h"
37dd40f5e7SEugene Zelenko #include "llvm/IR/User.h"
38dd40f5e7SEugene Zelenko #include "llvm/IR/Value.h"
3905da2fe5SReid Kleckner #include "llvm/InitializePasses.h"
4088db86ddSKarthik Bhat #include "llvm/Pass.h"
41dd40f5e7SEugene Zelenko #include "llvm/Support/Casting.h"
42dd40f5e7SEugene Zelenko #include "llvm/Support/CommandLine.h"
4388db86ddSKarthik Bhat #include "llvm/Support/Debug.h"
44dd40f5e7SEugene Zelenko #include "llvm/Support/ErrorHandling.h"
4588db86ddSKarthik Bhat #include "llvm/Support/raw_ostream.h"
46799003bfSBenjamin Kramer #include "llvm/Transforms/Scalar.h"
4788db86ddSKarthik Bhat #include "llvm/Transforms/Utils/BasicBlockUtils.h"
48799003bfSBenjamin Kramer #include "llvm/Transforms/Utils/LoopUtils.h"
49dd40f5e7SEugene Zelenko #include <cassert>
50dd40f5e7SEugene Zelenko #include <utility>
51dd40f5e7SEugene Zelenko #include <vector>
529d8f6f8aSDavide Italiano 
5388db86ddSKarthik Bhat using namespace llvm;
5488db86ddSKarthik Bhat 
5588db86ddSKarthik Bhat #define DEBUG_TYPE "loop-interchange"
5688db86ddSKarthik Bhat 
576e004336SFlorian Hahn STATISTIC(LoopsInterchanged, "Number of loops interchanged");
586e004336SFlorian Hahn 
5972431890SChad Rosier static cl::opt<int> LoopInterchangeCostThreshold(
6072431890SChad Rosier     "loop-interchange-threshold", cl::init(0), cl::Hidden,
6172431890SChad Rosier     cl::desc("Interchange if you gain more than this number"));
6272431890SChad Rosier 
6388db86ddSKarthik Bhat namespace {
6488db86ddSKarthik Bhat 
65dd40f5e7SEugene Zelenko using LoopVector = SmallVector<Loop *, 8>;
6688db86ddSKarthik Bhat 
6788db86ddSKarthik Bhat // TODO: Check if we can use a sparse matrix here.
68dd40f5e7SEugene Zelenko using CharMatrix = std::vector<std::vector<char>>;
69dd40f5e7SEugene Zelenko 
70dd40f5e7SEugene Zelenko } // end anonymous namespace
7188db86ddSKarthik Bhat 
7288db86ddSKarthik Bhat // Maximum number of dependencies that can be handled in the dependency matrix.
7388db86ddSKarthik Bhat static const unsigned MaxMemInstrCount = 100;
7488db86ddSKarthik Bhat 
7588db86ddSKarthik Bhat // Maximum loop depth supported.
7688db86ddSKarthik Bhat static const unsigned MaxLoopNestDepth = 10;
7788db86ddSKarthik Bhat 
7888db86ddSKarthik Bhat #ifdef DUMP_DEP_MATRICIES
printDepMatrix(CharMatrix & DepMatrix)79dd40f5e7SEugene Zelenko static void printDepMatrix(CharMatrix &DepMatrix) {
80f66efd61SFlorian Hahn   for (auto &Row : DepMatrix) {
81f66efd61SFlorian Hahn     for (auto D : Row)
82d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << D << " ");
83d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "\n");
8488db86ddSKarthik Bhat   }
8588db86ddSKarthik Bhat }
8688db86ddSKarthik Bhat #endif
8788db86ddSKarthik Bhat 
populateDependencyMatrix(CharMatrix & DepMatrix,unsigned Level,Loop * L,DependenceInfo * DI)888210fdf2SKarthik Bhat static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
8949c22190SChandler Carruth                                      Loop *L, DependenceInfo *DI) {
90dd40f5e7SEugene Zelenko   using ValueVector = SmallVector<Value *, 16>;
91dd40f5e7SEugene Zelenko 
9288db86ddSKarthik Bhat   ValueVector MemInstr;
9388db86ddSKarthik Bhat 
9488db86ddSKarthik Bhat   // For each block.
95f66efd61SFlorian Hahn   for (BasicBlock *BB : L->blocks()) {
9688db86ddSKarthik Bhat     // Scan the BB and collect legal loads and stores.
97f66efd61SFlorian Hahn     for (Instruction &I : *BB) {
9809c1109bSChad Rosier       if (!isa<Instruction>(I))
9988db86ddSKarthik Bhat         return false;
100f66efd61SFlorian Hahn       if (auto *Ld = dyn_cast<LoadInst>(&I)) {
10109c1109bSChad Rosier         if (!Ld->isSimple())
10288db86ddSKarthik Bhat           return false;
103f66efd61SFlorian Hahn         MemInstr.push_back(&I);
104f66efd61SFlorian Hahn       } else if (auto *St = dyn_cast<StoreInst>(&I)) {
10509c1109bSChad Rosier         if (!St->isSimple())
10609c1109bSChad Rosier           return false;
107f66efd61SFlorian Hahn         MemInstr.push_back(&I);
10809c1109bSChad Rosier       }
10988db86ddSKarthik Bhat     }
11088db86ddSKarthik Bhat   }
11188db86ddSKarthik Bhat 
112d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()
11388db86ddSKarthik Bhat                     << " Loads and Stores to analyze\n");
11488db86ddSKarthik Bhat 
11588db86ddSKarthik Bhat   ValueVector::iterator I, IE, J, JE;
11688db86ddSKarthik Bhat 
11788db86ddSKarthik Bhat   for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
11888db86ddSKarthik Bhat     for (J = I, JE = MemInstr.end(); J != JE; ++J) {
11988db86ddSKarthik Bhat       std::vector<char> Dep;
12009c1109bSChad Rosier       Instruction *Src = cast<Instruction>(*I);
12109c1109bSChad Rosier       Instruction *Dst = cast<Instruction>(*J);
12200eb8db3SChad Rosier       // Ignore Input dependencies.
12390bcb917SChad Rosier       if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
12488db86ddSKarthik Bhat         continue;
12500eb8db3SChad Rosier       // Track Output, Flow, and Anti dependencies.
12690bcb917SChad Rosier       if (auto D = DI->depends(Src, Dst, true)) {
12700eb8db3SChad Rosier         assert(D->isOrdered() && "Expected an output, flow or anti dep.");
128d34e60caSNicola Zaghen         LLVM_DEBUG(StringRef DepType =
12900eb8db3SChad Rosier                        D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
13000eb8db3SChad Rosier                    dbgs() << "Found " << DepType
13100eb8db3SChad Rosier                           << " dependency between Src and Dst\n"
13290bcb917SChad Rosier                           << " Src:" << *Src << "\n Dst:" << *Dst << '\n');
13388db86ddSKarthik Bhat         unsigned Levels = D->getLevels();
13488db86ddSKarthik Bhat         char Direction;
13588db86ddSKarthik Bhat         for (unsigned II = 1; II <= Levels; ++II) {
13688db86ddSKarthik Bhat           const SCEV *Distance = D->getDistance(II);
13788db86ddSKarthik Bhat           const SCEVConstant *SCEVConst =
13888db86ddSKarthik Bhat               dyn_cast_or_null<SCEVConstant>(Distance);
13988db86ddSKarthik Bhat           if (SCEVConst) {
14088db86ddSKarthik Bhat             const ConstantInt *CI = SCEVConst->getValue();
14188db86ddSKarthik Bhat             if (CI->isNegative())
14288db86ddSKarthik Bhat               Direction = '<';
14388db86ddSKarthik Bhat             else if (CI->isZero())
14488db86ddSKarthik Bhat               Direction = '=';
14588db86ddSKarthik Bhat             else
14688db86ddSKarthik Bhat               Direction = '>';
14788db86ddSKarthik Bhat             Dep.push_back(Direction);
14888db86ddSKarthik Bhat           } else if (D->isScalar(II)) {
14988db86ddSKarthik Bhat             Direction = 'S';
15088db86ddSKarthik Bhat             Dep.push_back(Direction);
15188db86ddSKarthik Bhat           } else {
15288db86ddSKarthik Bhat             unsigned Dir = D->getDirection(II);
15388db86ddSKarthik Bhat             if (Dir == Dependence::DVEntry::LT ||
15488db86ddSKarthik Bhat                 Dir == Dependence::DVEntry::LE)
15588db86ddSKarthik Bhat               Direction = '<';
15688db86ddSKarthik Bhat             else if (Dir == Dependence::DVEntry::GT ||
15788db86ddSKarthik Bhat                      Dir == Dependence::DVEntry::GE)
15888db86ddSKarthik Bhat               Direction = '>';
15988db86ddSKarthik Bhat             else if (Dir == Dependence::DVEntry::EQ)
16088db86ddSKarthik Bhat               Direction = '=';
16188db86ddSKarthik Bhat             else
16288db86ddSKarthik Bhat               Direction = '*';
16388db86ddSKarthik Bhat             Dep.push_back(Direction);
16488db86ddSKarthik Bhat           }
16588db86ddSKarthik Bhat         }
16688db86ddSKarthik Bhat         while (Dep.size() != Level) {
16788db86ddSKarthik Bhat           Dep.push_back('I');
16888db86ddSKarthik Bhat         }
16988db86ddSKarthik Bhat 
17088db86ddSKarthik Bhat         DepMatrix.push_back(Dep);
17188db86ddSKarthik Bhat         if (DepMatrix.size() > MaxMemInstrCount) {
172d34e60caSNicola Zaghen           LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
17388db86ddSKarthik Bhat                             << " dependencies inside loop\n");
17488db86ddSKarthik Bhat           return false;
17588db86ddSKarthik Bhat         }
17688db86ddSKarthik Bhat       }
17788db86ddSKarthik Bhat     }
17888db86ddSKarthik Bhat   }
17988db86ddSKarthik Bhat 
18088db86ddSKarthik Bhat   return true;
18188db86ddSKarthik Bhat }
18288db86ddSKarthik Bhat 
18388db86ddSKarthik Bhat // A loop is moved from index 'from' to an index 'to'. Update the Dependence
18488db86ddSKarthik Bhat // matrix by exchanging the two columns.
interChangeDependencies(CharMatrix & DepMatrix,unsigned FromIndx,unsigned ToIndx)185d18ea065SChad Rosier static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
18688db86ddSKarthik Bhat                                     unsigned ToIndx) {
187ea1a1ebbSTa-Wei Tu   for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I)
188ea1a1ebbSTa-Wei Tu     std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]);
18988db86ddSKarthik Bhat }
19088db86ddSKarthik Bhat 
19188db86ddSKarthik Bhat // Checks if outermost non '=','S'or'I' dependence in the dependence matrix is
19288db86ddSKarthik Bhat // '>'
isOuterMostDepPositive(CharMatrix & DepMatrix,unsigned Row,unsigned Column)1938210fdf2SKarthik Bhat static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row,
19488db86ddSKarthik Bhat                                    unsigned Column) {
19588db86ddSKarthik Bhat   for (unsigned i = 0; i <= Column; ++i) {
19688db86ddSKarthik Bhat     if (DepMatrix[Row][i] == '<')
19788db86ddSKarthik Bhat       return false;
19888db86ddSKarthik Bhat     if (DepMatrix[Row][i] == '>')
19988db86ddSKarthik Bhat       return true;
20088db86ddSKarthik Bhat   }
20188db86ddSKarthik Bhat   // All dependencies were '=','S' or 'I'
20288db86ddSKarthik Bhat   return false;
20388db86ddSKarthik Bhat }
20488db86ddSKarthik Bhat 
20588db86ddSKarthik Bhat // Checks if no dependence exist in the dependency matrix in Row before Column.
containsNoDependence(CharMatrix & DepMatrix,unsigned Row,unsigned Column)2068210fdf2SKarthik Bhat static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
20788db86ddSKarthik Bhat                                  unsigned Column) {
20888db86ddSKarthik Bhat   for (unsigned i = 0; i < Column; ++i) {
209fca1ff0dSChandler Carruth     if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' &&
21088db86ddSKarthik Bhat         DepMatrix[Row][i] != 'I')
21188db86ddSKarthik Bhat       return false;
21288db86ddSKarthik Bhat   }
21388db86ddSKarthik Bhat   return true;
21488db86ddSKarthik Bhat }
21588db86ddSKarthik Bhat 
validDepInterchange(CharMatrix & DepMatrix,unsigned Row,unsigned OuterLoopId,char InnerDep,char OuterDep)2168210fdf2SKarthik Bhat static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
2178210fdf2SKarthik Bhat                                 unsigned OuterLoopId, char InnerDep,
2188210fdf2SKarthik Bhat                                 char OuterDep) {
21988db86ddSKarthik Bhat   if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId))
22088db86ddSKarthik Bhat     return false;
22188db86ddSKarthik Bhat 
22288db86ddSKarthik Bhat   if (InnerDep == OuterDep)
22388db86ddSKarthik Bhat     return true;
22488db86ddSKarthik Bhat 
22588db86ddSKarthik Bhat   // It is legal to interchange if and only if after interchange no row has a
22688db86ddSKarthik Bhat   // '>' direction as the leftmost non-'='.
22788db86ddSKarthik Bhat 
22888db86ddSKarthik Bhat   if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I')
22988db86ddSKarthik Bhat     return true;
23088db86ddSKarthik Bhat 
23188db86ddSKarthik Bhat   if (InnerDep == '<')
23288db86ddSKarthik Bhat     return true;
23388db86ddSKarthik Bhat 
23488db86ddSKarthik Bhat   if (InnerDep == '>') {
23588db86ddSKarthik Bhat     // If OuterLoopId represents outermost loop then interchanging will make the
23688db86ddSKarthik Bhat     // 1st dependency as '>'
23788db86ddSKarthik Bhat     if (OuterLoopId == 0)
23888db86ddSKarthik Bhat       return false;
23988db86ddSKarthik Bhat 
24088db86ddSKarthik Bhat     // If all dependencies before OuterloopId are '=','S'or 'I'. Then
24188db86ddSKarthik Bhat     // interchanging will result in this row having an outermost non '='
24288db86ddSKarthik Bhat     // dependency of '>'
24388db86ddSKarthik Bhat     if (!containsNoDependence(DepMatrix, Row, OuterLoopId))
24488db86ddSKarthik Bhat       return true;
24588db86ddSKarthik Bhat   }
24688db86ddSKarthik Bhat 
24788db86ddSKarthik Bhat   return false;
24888db86ddSKarthik Bhat }
24988db86ddSKarthik Bhat 
25088db86ddSKarthik Bhat // Checks if it is legal to interchange 2 loops.
2518210fdf2SKarthik Bhat // [Theorem] A permutation of the loops in a perfect nest is legal if and only
25261683a22SChad Rosier // if the direction matrix, after the same permutation is applied to its
25361683a22SChad Rosier // columns, has no ">" direction as the leftmost non-"=" direction in any row.
isLegalToInterChangeLoops(CharMatrix & DepMatrix,unsigned InnerLoopId,unsigned OuterLoopId)2548210fdf2SKarthik Bhat static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
2558210fdf2SKarthik Bhat                                       unsigned InnerLoopId,
25688db86ddSKarthik Bhat                                       unsigned OuterLoopId) {
25788db86ddSKarthik Bhat   unsigned NumRows = DepMatrix.size();
25888db86ddSKarthik Bhat   // For each row check if it is valid to interchange.
25988db86ddSKarthik Bhat   for (unsigned Row = 0; Row < NumRows; ++Row) {
26088db86ddSKarthik Bhat     char InnerDep = DepMatrix[Row][InnerLoopId];
26188db86ddSKarthik Bhat     char OuterDep = DepMatrix[Row][OuterLoopId];
26288db86ddSKarthik Bhat     if (InnerDep == '*' || OuterDep == '*')
26388db86ddSKarthik Bhat       return false;
26461683a22SChad Rosier     if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep))
26588db86ddSKarthik Bhat       return false;
26688db86ddSKarthik Bhat   }
26788db86ddSKarthik Bhat   return true;
26888db86ddSKarthik Bhat }
26988db86ddSKarthik Bhat 
populateWorklist(Loop & L,LoopVector & LoopList)270eac34875SCongzhe Cao static void populateWorklist(Loop &L, LoopVector &LoopList) {
271d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
272f5814f56SChad Rosier                     << L.getHeader()->getParent()->getName() << " Loop: %"
273f5814f56SChad Rosier                     << L.getHeader()->getName() << '\n');
274eac34875SCongzhe Cao   assert(LoopList.empty() && "LoopList should initially be empty!");
27588db86ddSKarthik Bhat   Loop *CurrentLoop = &L;
276e448b5beSBenjamin Kramer   const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
277e448b5beSBenjamin Kramer   while (!Vec->empty()) {
27888db86ddSKarthik Bhat     // The current loop has multiple subloops in it hence it is not tightly
27988db86ddSKarthik Bhat     // nested.
28088db86ddSKarthik Bhat     // Discard all loops above it added into Worklist.
281eac34875SCongzhe Cao     if (Vec->size() != 1) {
282eac34875SCongzhe Cao       LoopList = {};
283eac34875SCongzhe Cao       return;
284eac34875SCongzhe Cao     }
2858600fee5SFlorian Hahn 
28688db86ddSKarthik Bhat     LoopList.push_back(CurrentLoop);
287e448b5beSBenjamin Kramer     CurrentLoop = Vec->front();
288e448b5beSBenjamin Kramer     Vec = &CurrentLoop->getSubLoops();
28988db86ddSKarthik Bhat   }
29088db86ddSKarthik Bhat   LoopList.push_back(CurrentLoop);
29188db86ddSKarthik Bhat }
29288db86ddSKarthik Bhat 
293dd40f5e7SEugene Zelenko namespace {
294dd40f5e7SEugene Zelenko 
29588db86ddSKarthik Bhat /// LoopInterchangeLegality checks if it is legal to interchange the loop.
29688db86ddSKarthik Bhat class LoopInterchangeLegality {
29788db86ddSKarthik Bhat public:
LoopInterchangeLegality(Loop * Outer,Loop * Inner,ScalarEvolution * SE,OptimizationRemarkEmitter * ORE)29888db86ddSKarthik Bhat   LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
299ad993521SFlorian Hahn                           OptimizationRemarkEmitter *ORE)
300c51d0882SFlorian Hahn       : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
30188db86ddSKarthik Bhat 
30288db86ddSKarthik Bhat   /// Check if the loops can be interchanged.
30388db86ddSKarthik Bhat   bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
30488db86ddSKarthik Bhat                            CharMatrix &DepMatrix);
305dd40f5e7SEugene Zelenko 
306fa6a2876SCongzhe Cao   /// Discover induction PHIs in the header of \p L. Induction
307fa6a2876SCongzhe Cao   /// PHIs are added to \p Inductions.
308fa6a2876SCongzhe Cao   bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
309fa6a2876SCongzhe Cao 
31088db86ddSKarthik Bhat   /// Check if the loop structure is understood. We do not handle triangular
31188db86ddSKarthik Bhat   /// loops for now.
312fa6a2876SCongzhe Cao   bool isLoopStructureUnderstood();
31388db86ddSKarthik Bhat 
31488db86ddSKarthik Bhat   bool currentLimitations();
31588db86ddSKarthik Bhat 
getOuterInnerReductions() const316a684a994SFlorian Hahn   const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
317a684a994SFlorian Hahn     return OuterInnerReductions;
318a684a994SFlorian Hahn   }
319a684a994SFlorian Hahn 
getInnerLoopInductions() const320fa6a2876SCongzhe Cao   const SmallVectorImpl<PHINode *> &getInnerLoopInductions() const {
321fa6a2876SCongzhe Cao     return InnerLoopInductions;
322fa6a2876SCongzhe Cao   }
323fa6a2876SCongzhe Cao 
32488db86ddSKarthik Bhat private:
32588db86ddSKarthik Bhat   bool tightlyNested(Loop *Outer, Loop *Inner);
326c8bd6ea3SFlorian Hahn   bool containsUnsafeInstructions(BasicBlock *BB);
327a684a994SFlorian Hahn 
328a684a994SFlorian Hahn   /// Discover induction and reduction PHIs in the header of \p L. Induction
329a684a994SFlorian Hahn   /// PHIs are added to \p Inductions, reductions are added to
330a684a994SFlorian Hahn   /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
331a684a994SFlorian Hahn   /// to be passed as \p InnerLoop.
332a684a994SFlorian Hahn   bool findInductionAndReductions(Loop *L,
333a684a994SFlorian Hahn                                   SmallVector<PHINode *, 8> &Inductions,
334a684a994SFlorian Hahn                                   Loop *InnerLoop);
335dd40f5e7SEugene Zelenko 
33688db86ddSKarthik Bhat   Loop *OuterLoop;
33788db86ddSKarthik Bhat   Loop *InnerLoop;
33888db86ddSKarthik Bhat 
33988db86ddSKarthik Bhat   ScalarEvolution *SE;
340dd40f5e7SEugene Zelenko 
341ad993521SFlorian Hahn   /// Interface to emit optimization remarks.
342ad993521SFlorian Hahn   OptimizationRemarkEmitter *ORE;
3438210fdf2SKarthik Bhat 
344a684a994SFlorian Hahn   /// Set of reduction PHIs taking part of a reduction across the inner and
345a684a994SFlorian Hahn   /// outer loop.
346a684a994SFlorian Hahn   SmallPtrSet<PHINode *, 4> OuterInnerReductions;
347fa6a2876SCongzhe Cao 
348fa6a2876SCongzhe Cao   /// Set of inner loop induction PHIs
349fa6a2876SCongzhe Cao   SmallVector<PHINode *, 8> InnerLoopInductions;
35088db86ddSKarthik Bhat };
35188db86ddSKarthik Bhat 
35288db86ddSKarthik Bhat /// LoopInterchangeProfitability checks if it is profitable to interchange the
35388db86ddSKarthik Bhat /// loop.
35488db86ddSKarthik Bhat class LoopInterchangeProfitability {
35588db86ddSKarthik Bhat public:
LoopInterchangeProfitability(Loop * Outer,Loop * Inner,ScalarEvolution * SE,OptimizationRemarkEmitter * ORE)356ad993521SFlorian Hahn   LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
357ad993521SFlorian Hahn                                OptimizationRemarkEmitter *ORE)
358ad993521SFlorian Hahn       : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
35988db86ddSKarthik Bhat 
36074b41114SVikram TV   /// Check if the loop interchange is profitable.
361*b941857bSCongzhe Cao   bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
362*b941857bSCongzhe Cao                     unsigned InnerLoopId, unsigned OuterLoopId,
363*b941857bSCongzhe Cao                     CharMatrix &DepMatrix,
364*b941857bSCongzhe Cao                     const DenseMap<const Loop *, unsigned> &CostMap);
36588db86ddSKarthik Bhat 
36688db86ddSKarthik Bhat private:
36788db86ddSKarthik Bhat   int getInstrOrderCost();
36888db86ddSKarthik Bhat 
36988db86ddSKarthik Bhat   Loop *OuterLoop;
37088db86ddSKarthik Bhat   Loop *InnerLoop;
37188db86ddSKarthik Bhat 
37288db86ddSKarthik Bhat   /// Scev analysis.
37388db86ddSKarthik Bhat   ScalarEvolution *SE;
374dd40f5e7SEugene Zelenko 
375ad993521SFlorian Hahn   /// Interface to emit optimization remarks.
376ad993521SFlorian Hahn   OptimizationRemarkEmitter *ORE;
37788db86ddSKarthik Bhat };
37888db86ddSKarthik Bhat 
37974b41114SVikram TV /// LoopInterchangeTransform interchanges the loop.
38088db86ddSKarthik Bhat class LoopInterchangeTransform {
38188db86ddSKarthik Bhat public:
LoopInterchangeTransform(Loop * Outer,Loop * Inner,ScalarEvolution * SE,LoopInfo * LI,DominatorTree * DT,const LoopInterchangeLegality & LIL)38288db86ddSKarthik Bhat   LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
38388db86ddSKarthik Bhat                            LoopInfo *LI, DominatorTree *DT,
384a684a994SFlorian Hahn                            const LoopInterchangeLegality &LIL)
385ce2db900SCongzhe Cao       : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
38688db86ddSKarthik Bhat 
38788db86ddSKarthik Bhat   /// Interchange OuterLoop and InnerLoop.
38888db86ddSKarthik Bhat   bool transform();
389831a7577SFlorian Hahn   void restructureLoops(Loop *NewInner, Loop *NewOuter,
390831a7577SFlorian Hahn                         BasicBlock *OrigInnerPreHeader,
391831a7577SFlorian Hahn                         BasicBlock *OrigOuterPreHeader);
39288db86ddSKarthik Bhat   void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
39388db86ddSKarthik Bhat 
39488db86ddSKarthik Bhat private:
39588db86ddSKarthik Bhat   bool adjustLoopLinks();
39688db86ddSKarthik Bhat   bool adjustLoopBranches();
39788db86ddSKarthik Bhat 
39888db86ddSKarthik Bhat   Loop *OuterLoop;
39988db86ddSKarthik Bhat   Loop *InnerLoop;
40088db86ddSKarthik Bhat 
40188db86ddSKarthik Bhat   /// Scev analysis.
40288db86ddSKarthik Bhat   ScalarEvolution *SE;
403dd40f5e7SEugene Zelenko 
40488db86ddSKarthik Bhat   LoopInfo *LI;
40588db86ddSKarthik Bhat   DominatorTree *DT;
406a684a994SFlorian Hahn 
407a684a994SFlorian Hahn   const LoopInterchangeLegality &LIL;
40888db86ddSKarthik Bhat };
40988db86ddSKarthik Bhat 
4109c21c6c9SArthur Eubanks struct LoopInterchange {
411dd40f5e7SEugene Zelenko   ScalarEvolution *SE = nullptr;
412dd40f5e7SEugene Zelenko   LoopInfo *LI = nullptr;
413dd40f5e7SEugene Zelenko   DependenceInfo *DI = nullptr;
414dd40f5e7SEugene Zelenko   DominatorTree *DT = nullptr;
415*b941857bSCongzhe Cao   std::unique_ptr<CacheCost> CC = nullptr;
416dd40f5e7SEugene Zelenko 
417ad993521SFlorian Hahn   /// Interface to emit optimization remarks.
418ad993521SFlorian Hahn   OptimizationRemarkEmitter *ORE;
419ad993521SFlorian Hahn 
LoopInterchange__anon401b65a50211::LoopInterchange4209c21c6c9SArthur Eubanks   LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
421*b941857bSCongzhe Cao                   DominatorTree *DT, std::unique_ptr<CacheCost> &CC,
422*b941857bSCongzhe Cao                   OptimizationRemarkEmitter *ORE)
423*b941857bSCongzhe Cao       : SE(SE), LI(LI), DI(DI), DT(DT), CC(std::move(CC)), ORE(ORE) {}
42488db86ddSKarthik Bhat 
run__anon401b65a50211::LoopInterchange4259c21c6c9SArthur Eubanks   bool run(Loop *L) {
4269c21c6c9SArthur Eubanks     if (L->getParentLoop())
4278d72ecc3SFlorian Hahn       return false;
428eac34875SCongzhe Cao     SmallVector<Loop *, 8> LoopList;
429eac34875SCongzhe Cao     populateWorklist(*L, LoopList);
430eac34875SCongzhe Cao     return processLoopList(LoopList);
43188db86ddSKarthik Bhat   }
43288db86ddSKarthik Bhat 
run__anon401b65a50211::LoopInterchange4330eeaec2aSTa-Wei Tu   bool run(LoopNest &LN) {
434eac34875SCongzhe Cao     SmallVector<Loop *, 8> LoopList(LN.getLoops().begin(), LN.getLoops().end());
4350eeaec2aSTa-Wei Tu     for (unsigned I = 1; I < LoopList.size(); ++I)
4360eeaec2aSTa-Wei Tu       if (LoopList[I]->getParentLoop() != LoopList[I - 1])
4370eeaec2aSTa-Wei Tu         return false;
4380eeaec2aSTa-Wei Tu     return processLoopList(LoopList);
4390eeaec2aSTa-Wei Tu   }
4400eeaec2aSTa-Wei Tu 
isComputableLoopNest__anon401b65a50211::LoopInterchange4410eeaec2aSTa-Wei Tu   bool isComputableLoopNest(ArrayRef<Loop *> LoopList) {
442135f735aSBenjamin Kramer     for (Loop *L : LoopList) {
44388db86ddSKarthik Bhat       const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
44410ddb927SPhilip Reames       if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
445d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
44688db86ddSKarthik Bhat         return false;
44788db86ddSKarthik Bhat       }
44888db86ddSKarthik Bhat       if (L->getNumBackEdges() != 1) {
449d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
45088db86ddSKarthik Bhat         return false;
45188db86ddSKarthik Bhat       }
45288db86ddSKarthik Bhat       if (!L->getExitingBlock()) {
453d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
45488db86ddSKarthik Bhat         return false;
45588db86ddSKarthik Bhat       }
45688db86ddSKarthik Bhat     }
45788db86ddSKarthik Bhat     return true;
45888db86ddSKarthik Bhat   }
45988db86ddSKarthik Bhat 
selectLoopForInterchange__anon401b65a50211::LoopInterchange4600eeaec2aSTa-Wei Tu   unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
46188db86ddSKarthik Bhat     // TODO: Add a better heuristic to select the loop to be interchanged based
462df005cbeSBenjamin Kramer     // on the dependence matrix. Currently we select the innermost loop.
46388db86ddSKarthik Bhat     return LoopList.size() - 1;
46488db86ddSKarthik Bhat   }
46588db86ddSKarthik Bhat 
processLoopList__anon401b65a50211::LoopInterchange466eac34875SCongzhe Cao   bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
46788db86ddSKarthik Bhat     bool Changed = false;
4687ea0d394SChad Rosier     unsigned LoopNestDepth = LoopList.size();
4697ea0d394SChad Rosier     if (LoopNestDepth < 2) {
470d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
47188db86ddSKarthik Bhat       return false;
47288db86ddSKarthik Bhat     }
4737ea0d394SChad Rosier     if (LoopNestDepth > MaxLoopNestDepth) {
474d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "
4757ea0d394SChad Rosier                         << MaxLoopNestDepth << "\n");
4767ea0d394SChad Rosier       return false;
4777ea0d394SChad Rosier     }
47888db86ddSKarthik Bhat     if (!isComputableLoopNest(LoopList)) {
479d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
48088db86ddSKarthik Bhat       return false;
48188db86ddSKarthik Bhat     }
4827ea0d394SChad Rosier 
483d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth
484d34e60caSNicola Zaghen                       << "\n");
4857ea0d394SChad Rosier 
4867ea0d394SChad Rosier     CharMatrix DependencyMatrix;
48788db86ddSKarthik Bhat     Loop *OuterMostLoop = *(LoopList.begin());
4887ea0d394SChad Rosier     if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
48949c22190SChandler Carruth                                   OuterMostLoop, DI)) {
490d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
49188db86ddSKarthik Bhat       return false;
49288db86ddSKarthik Bhat     }
49388db86ddSKarthik Bhat #ifdef DUMP_DEP_MATRICIES
494d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "Dependence before interchange\n");
49588db86ddSKarthik Bhat     printDepMatrix(DependencyMatrix);
49688db86ddSKarthik Bhat #endif
49788db86ddSKarthik Bhat 
49888db86ddSKarthik Bhat     // Get the Outermost loop exit.
4991da30c65SFlorian Hahn     BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
5001da30c65SFlorian Hahn     if (!LoopNestExit) {
501d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block");
5021da30c65SFlorian Hahn       return false;
5031da30c65SFlorian Hahn     }
50488db86ddSKarthik Bhat 
50588db86ddSKarthik Bhat     unsigned SelecLoopId = selectLoopForInterchange(LoopList);
506*b941857bSCongzhe Cao     // Obtain the loop vector returned from loop cache analysis beforehand,
507*b941857bSCongzhe Cao     // and put each <Loop, index> pair into a map for constant time query
508*b941857bSCongzhe Cao     // later. Indices in loop vector reprsent the optimal order of the
509*b941857bSCongzhe Cao     // corresponding loop, e.g., given a loopnest with depth N, index 0
510*b941857bSCongzhe Cao     // indicates the loop should be placed as the outermost loop and index N
511*b941857bSCongzhe Cao     // indicates the loop should be placed as the innermost loop.
512*b941857bSCongzhe Cao     //
513*b941857bSCongzhe Cao     // For the old pass manager CacheCost would be null.
514*b941857bSCongzhe Cao     DenseMap<const Loop *, unsigned> CostMap;
515*b941857bSCongzhe Cao     if (CC != nullptr) {
516*b941857bSCongzhe Cao       const auto &LoopCosts = CC->getLoopCosts();
517*b941857bSCongzhe Cao       for (unsigned i = 0; i < LoopCosts.size(); i++) {
518*b941857bSCongzhe Cao         CostMap[LoopCosts[i].first] = i;
519*b941857bSCongzhe Cao       }
520*b941857bSCongzhe Cao     }
521eac34875SCongzhe Cao     // We try to achieve the globally optimal memory access for the loopnest,
522eac34875SCongzhe Cao     // and do interchange based on a bubble-sort fasion. We start from
523eac34875SCongzhe Cao     // the innermost loop, move it outwards to the best possible position
524eac34875SCongzhe Cao     // and repeat this process.
525eac34875SCongzhe Cao     for (unsigned j = SelecLoopId; j > 0; j--) {
526eac34875SCongzhe Cao       bool ChangedPerIter = false;
527eac34875SCongzhe Cao       for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
528eac34875SCongzhe Cao         bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1,
529*b941857bSCongzhe Cao                                         DependencyMatrix, CostMap);
53088db86ddSKarthik Bhat         if (!Interchanged)
531eac34875SCongzhe Cao           continue;
532eac34875SCongzhe Cao         // Loops interchanged, update LoopList accordingly.
533eac34875SCongzhe Cao         std::swap(LoopList[i - 1], LoopList[i]);
53488db86ddSKarthik Bhat         // Update the DependencyMatrix
535d18ea065SChad Rosier         interChangeDependencies(DependencyMatrix, i, i - 1);
53688db86ddSKarthik Bhat #ifdef DUMP_DEP_MATRICIES
537d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "Dependence after interchange\n");
53888db86ddSKarthik Bhat         printDepMatrix(DependencyMatrix);
53988db86ddSKarthik Bhat #endif
540eac34875SCongzhe Cao         ChangedPerIter |= Interchanged;
54188db86ddSKarthik Bhat         Changed |= Interchanged;
54288db86ddSKarthik Bhat       }
543eac34875SCongzhe Cao       // Early abort if there was no interchange during an entire round of
544eac34875SCongzhe Cao       // moving loops outwards.
545eac34875SCongzhe Cao       if (!ChangedPerIter)
546eac34875SCongzhe Cao         break;
547eac34875SCongzhe Cao     }
54888db86ddSKarthik Bhat     return Changed;
54988db86ddSKarthik Bhat   }
55088db86ddSKarthik Bhat 
processLoop__anon401b65a50211::LoopInterchange5516b612a7bSTa-Wei Tu   bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
552ce2db900SCongzhe Cao                    unsigned OuterLoopId,
553*b941857bSCongzhe Cao                    std::vector<std::vector<char>> &DependencyMatrix,
554*b941857bSCongzhe Cao                    const DenseMap<const Loop *, unsigned> &CostMap) {
555d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
55688db86ddSKarthik Bhat                       << " and OuterLoopId = " << OuterLoopId << "\n");
557c51d0882SFlorian Hahn     LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
55888db86ddSKarthik Bhat     if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
559d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
56088db86ddSKarthik Bhat       return false;
56188db86ddSKarthik Bhat     }
562d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
563ad993521SFlorian Hahn     LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
564*b941857bSCongzhe Cao     if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
565*b941857bSCongzhe Cao                           DependencyMatrix, CostMap)) {
566d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
56788db86ddSKarthik Bhat       return false;
56888db86ddSKarthik Bhat     }
56988db86ddSKarthik Bhat 
5709590658fSVivek Pandya     ORE->emit([&]() {
5719590658fSVivek Pandya       return OptimizationRemark(DEBUG_TYPE, "Interchanged",
572ad993521SFlorian Hahn                                 InnerLoop->getStartLoc(),
573ad993521SFlorian Hahn                                 InnerLoop->getHeader())
5749590658fSVivek Pandya              << "Loop interchanged with enclosing loop.";
5759590658fSVivek Pandya     });
576ad993521SFlorian Hahn 
577ce2db900SCongzhe Cao     LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
57888db86ddSKarthik Bhat     LIT.transform();
579d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
5806e004336SFlorian Hahn     LoopsInterchanged++;
581f71abec6SAlexey Zhikhartsev 
582f71abec6SAlexey Zhikhartsev     assert(InnerLoop->isLCSSAForm(*DT) &&
583f71abec6SAlexey Zhikhartsev            "Inner loop not left in LCSSA form after loop interchange!");
584f71abec6SAlexey Zhikhartsev     assert(OuterLoop->isLCSSAForm(*DT) &&
585f71abec6SAlexey Zhikhartsev            "Outer loop not left in LCSSA form after loop interchange!");
586f71abec6SAlexey Zhikhartsev 
58788db86ddSKarthik Bhat     return true;
58888db86ddSKarthik Bhat   }
58988db86ddSKarthik Bhat };
59088db86ddSKarthik Bhat 
591dd40f5e7SEugene Zelenko } // end anonymous namespace
592dd40f5e7SEugene Zelenko 
containsUnsafeInstructions(BasicBlock * BB)593c8bd6ea3SFlorian Hahn bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
594c8bd6ea3SFlorian Hahn   return any_of(*BB, [](const Instruction &I) {
595c8bd6ea3SFlorian Hahn     return I.mayHaveSideEffects() || I.mayReadFromMemory();
5968210fdf2SKarthik Bhat   });
5978210fdf2SKarthik Bhat }
59888db86ddSKarthik Bhat 
tightlyNested(Loop * OuterLoop,Loop * InnerLoop)59988db86ddSKarthik Bhat bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
60088db86ddSKarthik Bhat   BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
60188db86ddSKarthik Bhat   BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
60288db86ddSKarthik Bhat   BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
60388db86ddSKarthik Bhat 
6047ff2768bSTa-Wei Tu   LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n");
6057ff2768bSTa-Wei Tu 
6067ff2768bSTa-Wei Tu   // A perfectly nested loop will not have any branch in between the outer and
6077ff2768bSTa-Wei Tu   // inner block i.e. outer header will branch to either inner preheader and
6087ff2768bSTa-Wei Tu   // outerloop latch.
6097ff2768bSTa-Wei Tu   BranchInst *OuterLoopHeaderBI =
6107ff2768bSTa-Wei Tu       dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
6117ff2768bSTa-Wei Tu   if (!OuterLoopHeaderBI)
6127ff2768bSTa-Wei Tu     return false;
6137ff2768bSTa-Wei Tu 
6147ff2768bSTa-Wei Tu   for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
6157ff2768bSTa-Wei Tu     if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
6167ff2768bSTa-Wei Tu         Succ != OuterLoopLatch)
6177ff2768bSTa-Wei Tu       return false;
6187ff2768bSTa-Wei Tu 
6197ff2768bSTa-Wei Tu   LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
62088db86ddSKarthik Bhat   // We do not have any basic block in between now make sure the outer header
621df005cbeSBenjamin Kramer   // and outer loop latch doesn't contain any unsafe instructions.
622c8bd6ea3SFlorian Hahn   if (containsUnsafeInstructions(OuterLoopHeader) ||
623c8bd6ea3SFlorian Hahn       containsUnsafeInstructions(OuterLoopLatch))
62488db86ddSKarthik Bhat     return false;
62588db86ddSKarthik Bhat 
6268393b9fdSFlorian Hahn   // Also make sure the inner loop preheader does not contain any unsafe
6278393b9fdSFlorian Hahn   // instructions. Note that all instructions in the preheader will be moved to
6288393b9fdSFlorian Hahn   // the outer loop header when interchanging.
6298393b9fdSFlorian Hahn   if (InnerLoopPreHeader != OuterLoopHeader &&
6308393b9fdSFlorian Hahn       containsUnsafeInstructions(InnerLoopPreHeader))
6318393b9fdSFlorian Hahn     return false;
6328393b9fdSFlorian Hahn 
633829c1b64SCongzhe Cao   BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
634829c1b64SCongzhe Cao   // Ensure the inner loop exit block flows to the outer loop latch possibly
635829c1b64SCongzhe Cao   // through empty blocks.
636829c1b64SCongzhe Cao   const BasicBlock &SuccInner =
637829c1b64SCongzhe Cao       LoopNest::skipEmptyBlockUntil(InnerLoopExit, OuterLoopLatch);
638829c1b64SCongzhe Cao   if (&SuccInner != OuterLoopLatch) {
639829c1b64SCongzhe Cao     LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExit
640829c1b64SCongzhe Cao                       << " does not lead to the outer loop latch.\n";);
641829c1b64SCongzhe Cao     return false;
642829c1b64SCongzhe Cao   }
643829c1b64SCongzhe Cao   // The inner loop exit block does flow to the outer loop latch and not some
644829c1b64SCongzhe Cao   // other BBs, now make sure it contains safe instructions, since it will be
645829c1b64SCongzhe Cao   // moved into the (new) inner loop after interchange.
646829c1b64SCongzhe Cao   if (containsUnsafeInstructions(InnerLoopExit))
647829c1b64SCongzhe Cao     return false;
648829c1b64SCongzhe Cao 
649d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
65088db86ddSKarthik Bhat   // We have a perfect loop nest.
65188db86ddSKarthik Bhat   return true;
65288db86ddSKarthik Bhat }
65388db86ddSKarthik Bhat 
isLoopStructureUnderstood()654fa6a2876SCongzhe Cao bool LoopInterchangeLegality::isLoopStructureUnderstood() {
65588db86ddSKarthik Bhat   BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
656fa6a2876SCongzhe Cao   for (PHINode *InnerInduction : InnerLoopInductions) {
657fa6a2876SCongzhe Cao     unsigned Num = InnerInduction->getNumOperands();
65888db86ddSKarthik Bhat     for (unsigned i = 0; i < Num; ++i) {
65988db86ddSKarthik Bhat       Value *Val = InnerInduction->getOperand(i);
66088db86ddSKarthik Bhat       if (isa<Constant>(Val))
66188db86ddSKarthik Bhat         continue;
66288db86ddSKarthik Bhat       Instruction *I = dyn_cast<Instruction>(Val);
66388db86ddSKarthik Bhat       if (!I)
66488db86ddSKarthik Bhat         return false;
66588db86ddSKarthik Bhat       // TODO: Handle triangular loops.
66688db86ddSKarthik Bhat       // e.g. for(int i=0;i<N;i++)
66788db86ddSKarthik Bhat       //        for(int j=i;j<N;j++)
66888db86ddSKarthik Bhat       unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
66988db86ddSKarthik Bhat       if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
67088db86ddSKarthik Bhat               InnerLoopPreheader &&
67188db86ddSKarthik Bhat           !OuterLoop->isLoopInvariant(I)) {
67288db86ddSKarthik Bhat         return false;
67388db86ddSKarthik Bhat       }
67488db86ddSKarthik Bhat     }
675fa6a2876SCongzhe Cao   }
67640e3aa39SCongzhe Cao 
67740e3aa39SCongzhe Cao   // TODO: Handle triangular loops of another form.
67840e3aa39SCongzhe Cao   // e.g. for(int i=0;i<N;i++)
67940e3aa39SCongzhe Cao   //        for(int j=0;j<i;j++)
68040e3aa39SCongzhe Cao   // or,
68140e3aa39SCongzhe Cao   //      for(int i=0;i<N;i++)
68240e3aa39SCongzhe Cao   //        for(int j=0;j*i<N;j++)
68340e3aa39SCongzhe Cao   BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
68440e3aa39SCongzhe Cao   BranchInst *InnerLoopLatchBI =
68540e3aa39SCongzhe Cao       dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
68640e3aa39SCongzhe Cao   if (!InnerLoopLatchBI->isConditional())
68740e3aa39SCongzhe Cao     return false;
68840e3aa39SCongzhe Cao   if (CmpInst *InnerLoopCmp =
68940e3aa39SCongzhe Cao           dyn_cast<CmpInst>(InnerLoopLatchBI->getCondition())) {
69040e3aa39SCongzhe Cao     Value *Op0 = InnerLoopCmp->getOperand(0);
69140e3aa39SCongzhe Cao     Value *Op1 = InnerLoopCmp->getOperand(1);
69240e3aa39SCongzhe Cao 
69340e3aa39SCongzhe Cao     // LHS and RHS of the inner loop exit condition, e.g.,
69440e3aa39SCongzhe Cao     // in "for(int j=0;j<i;j++)", LHS is j and RHS is i.
69540e3aa39SCongzhe Cao     Value *Left = nullptr;
69640e3aa39SCongzhe Cao     Value *Right = nullptr;
69740e3aa39SCongzhe Cao 
69840e3aa39SCongzhe Cao     // Check if V only involves inner loop induction variable.
69940e3aa39SCongzhe Cao     // Return true if V is InnerInduction, or a cast from
70040e3aa39SCongzhe Cao     // InnerInduction, or a binary operator that involves
70140e3aa39SCongzhe Cao     // InnerInduction and a constant.
702fa6a2876SCongzhe Cao     std::function<bool(Value *)> IsPathToInnerIndVar;
703fa6a2876SCongzhe Cao     IsPathToInnerIndVar = [this, &IsPathToInnerIndVar](const Value *V) -> bool {
704fa6a2876SCongzhe Cao       if (llvm::is_contained(InnerLoopInductions, V))
70540e3aa39SCongzhe Cao         return true;
70640e3aa39SCongzhe Cao       if (isa<Constant>(V))
70740e3aa39SCongzhe Cao         return true;
708fa6a2876SCongzhe Cao       const Instruction *I = dyn_cast<Instruction>(V);
70940e3aa39SCongzhe Cao       if (!I)
71040e3aa39SCongzhe Cao         return false;
71140e3aa39SCongzhe Cao       if (isa<CastInst>(I))
712fa6a2876SCongzhe Cao         return IsPathToInnerIndVar(I->getOperand(0));
71340e3aa39SCongzhe Cao       if (isa<BinaryOperator>(I))
714fa6a2876SCongzhe Cao         return IsPathToInnerIndVar(I->getOperand(0)) &&
715fa6a2876SCongzhe Cao                IsPathToInnerIndVar(I->getOperand(1));
71640e3aa39SCongzhe Cao       return false;
71740e3aa39SCongzhe Cao     };
71840e3aa39SCongzhe Cao 
719fa6a2876SCongzhe Cao     // In case of multiple inner loop indvars, it is okay if LHS and RHS
720fa6a2876SCongzhe Cao     // are both inner indvar related variables.
721fa6a2876SCongzhe Cao     if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
722fa6a2876SCongzhe Cao       return true;
723fa6a2876SCongzhe Cao 
724fa6a2876SCongzhe Cao     // Otherwise we check if the cmp instruction compares an inner indvar
725fa6a2876SCongzhe Cao     // related variable (Left) with a outer loop invariant (Right).
726fa6a2876SCongzhe Cao     if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) {
72740e3aa39SCongzhe Cao       Left = Op0;
72840e3aa39SCongzhe Cao       Right = Op1;
729fa6a2876SCongzhe Cao     } else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) {
73040e3aa39SCongzhe Cao       Left = Op1;
73140e3aa39SCongzhe Cao       Right = Op0;
73240e3aa39SCongzhe Cao     }
73340e3aa39SCongzhe Cao 
73440e3aa39SCongzhe Cao     if (Left == nullptr)
73540e3aa39SCongzhe Cao       return false;
73640e3aa39SCongzhe Cao 
73740e3aa39SCongzhe Cao     const SCEV *S = SE->getSCEV(Right);
73840e3aa39SCongzhe Cao     if (!SE->isLoopInvariant(S, OuterLoop))
73940e3aa39SCongzhe Cao       return false;
74040e3aa39SCongzhe Cao   }
74140e3aa39SCongzhe Cao 
74288db86ddSKarthik Bhat   return true;
74388db86ddSKarthik Bhat }
74488db86ddSKarthik Bhat 
745a684a994SFlorian Hahn // If SV is a LCSSA PHI node with a single incoming value, return the incoming
746a684a994SFlorian Hahn // value.
followLCSSA(Value * SV)747a684a994SFlorian Hahn static Value *followLCSSA(Value *SV) {
748a684a994SFlorian Hahn   PHINode *PHI = dyn_cast<PHINode>(SV);
749a684a994SFlorian Hahn   if (!PHI)
750a684a994SFlorian Hahn     return SV;
751a684a994SFlorian Hahn 
752a684a994SFlorian Hahn   if (PHI->getNumIncomingValues() != 1)
753a684a994SFlorian Hahn     return SV;
754a684a994SFlorian Hahn   return followLCSSA(PHI->getIncomingValue(0));
755a684a994SFlorian Hahn }
756a684a994SFlorian Hahn 
757a684a994SFlorian Hahn // Check V's users to see if it is involved in a reduction in L.
findInnerReductionPhi(Loop * L,Value * V)758a684a994SFlorian Hahn static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
759bfec9148SAnton Rapetov   // Reduction variables cannot be constants.
760bfec9148SAnton Rapetov   if (isa<Constant>(V))
761bfec9148SAnton Rapetov     return nullptr;
762bfec9148SAnton Rapetov 
763a684a994SFlorian Hahn   for (Value *User : V->users()) {
764a684a994SFlorian Hahn     if (PHINode *PHI = dyn_cast<PHINode>(User)) {
765a684a994SFlorian Hahn       if (PHI->getNumIncomingValues() == 1)
766a684a994SFlorian Hahn         continue;
767a684a994SFlorian Hahn       RecurrenceDescriptor RD;
7681ef04326SCongzhe Cao       if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) {
7691ef04326SCongzhe Cao         // Detect floating point reduction only when it can be reordered.
7701ef04326SCongzhe Cao         if (RD.getExactFPMathInst() != nullptr)
7711ef04326SCongzhe Cao           return nullptr;
772a684a994SFlorian Hahn         return PHI;
7731ef04326SCongzhe Cao       }
774a684a994SFlorian Hahn       return nullptr;
775a684a994SFlorian Hahn     }
776a684a994SFlorian Hahn   }
777a684a994SFlorian Hahn 
778a684a994SFlorian Hahn   return nullptr;
779a684a994SFlorian Hahn }
780a684a994SFlorian Hahn 
findInductionAndReductions(Loop * L,SmallVector<PHINode *,8> & Inductions,Loop * InnerLoop)781a684a994SFlorian Hahn bool LoopInterchangeLegality::findInductionAndReductions(
782a684a994SFlorian Hahn     Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
7838210fdf2SKarthik Bhat   if (!L->getLoopLatch() || !L->getLoopPredecessor())
7848210fdf2SKarthik Bhat     return false;
7855912c667SFlorian Hahn   for (PHINode &PHI : L->getHeader()->phis()) {
7860a91310cSTyler Nowicki     RecurrenceDescriptor RD;
7871bbf15c5SJames Molloy     InductionDescriptor ID;
7885912c667SFlorian Hahn     if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
7895912c667SFlorian Hahn       Inductions.push_back(&PHI);
7908210fdf2SKarthik Bhat     else {
791a684a994SFlorian Hahn       // PHIs in inner loops need to be part of a reduction in the outer loop,
792a684a994SFlorian Hahn       // discovered when checking the PHIs of the outer loop earlier.
793a684a994SFlorian Hahn       if (!InnerLoop) {
7943badd17bSBenjamin Kramer         if (!OuterInnerReductions.count(&PHI)) {
795a684a994SFlorian Hahn           LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "
796a684a994SFlorian Hahn                                "across the outer loop.\n");
7978210fdf2SKarthik Bhat           return false;
7988210fdf2SKarthik Bhat         }
799a684a994SFlorian Hahn       } else {
800a684a994SFlorian Hahn         assert(PHI.getNumIncomingValues() == 2 &&
801a684a994SFlorian Hahn                "Phis in loop header should have exactly 2 incoming values");
802a684a994SFlorian Hahn         // Check if we have a PHI node in the outer loop that has a reduction
803a684a994SFlorian Hahn         // result from the inner loop as an incoming value.
804a684a994SFlorian Hahn         Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
805a684a994SFlorian Hahn         PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
806a684a994SFlorian Hahn         if (!InnerRedPhi ||
80743c0e4f6SKazu Hirata             !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
808a684a994SFlorian Hahn           LLVM_DEBUG(
809a684a994SFlorian Hahn               dbgs()
810a684a994SFlorian Hahn               << "Failed to recognize PHI as an induction or reduction.\n");
811a684a994SFlorian Hahn           return false;
812a684a994SFlorian Hahn         }
813a684a994SFlorian Hahn         OuterInnerReductions.insert(&PHI);
814a684a994SFlorian Hahn         OuterInnerReductions.insert(InnerRedPhi);
815a684a994SFlorian Hahn       }
816a684a994SFlorian Hahn     }
8178210fdf2SKarthik Bhat   }
8188210fdf2SKarthik Bhat   return true;
8198210fdf2SKarthik Bhat }
8208210fdf2SKarthik Bhat 
82188db86ddSKarthik Bhat // This function indicates the current limitations in the transform as a result
82288db86ddSKarthik Bhat // of which we do not proceed.
currentLimitations()82388db86ddSKarthik Bhat bool LoopInterchangeLegality::currentLimitations() {
82488db86ddSKarthik Bhat   BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
8251da30c65SFlorian Hahn 
8261da30c65SFlorian Hahn   // transform currently expects the loop latches to also be the exiting
8271da30c65SFlorian Hahn   // blocks.
8281da30c65SFlorian Hahn   if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
8291da30c65SFlorian Hahn       OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
8301da30c65SFlorian Hahn       !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
8311da30c65SFlorian Hahn       !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
832d34e60caSNicola Zaghen     LLVM_DEBUG(
833d34e60caSNicola Zaghen         dbgs() << "Loops where the latch is not the exiting block are not"
8341da30c65SFlorian Hahn                << " supported currently.\n");
8351da30c65SFlorian Hahn     ORE->emit([&]() {
8361da30c65SFlorian Hahn       return OptimizationRemarkMissed(DEBUG_TYPE, "ExitingNotLatch",
8371da30c65SFlorian Hahn                                       OuterLoop->getStartLoc(),
8381da30c65SFlorian Hahn                                       OuterLoop->getHeader())
8391da30c65SFlorian Hahn              << "Loops where the latch is not the exiting block cannot be"
8401da30c65SFlorian Hahn                 " interchange currently.";
8411da30c65SFlorian Hahn     });
8421da30c65SFlorian Hahn     return true;
8431da30c65SFlorian Hahn   }
84488db86ddSKarthik Bhat 
8458210fdf2SKarthik Bhat   SmallVector<PHINode *, 8> Inductions;
846a684a994SFlorian Hahn   if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
847a684a994SFlorian Hahn     LLVM_DEBUG(
848a684a994SFlorian Hahn         dbgs() << "Only outer loops with induction or reduction PHI nodes "
849a684a994SFlorian Hahn                << "are supported currently.\n");
850a684a994SFlorian Hahn     ORE->emit([&]() {
851a684a994SFlorian Hahn       return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
852a684a994SFlorian Hahn                                       OuterLoop->getStartLoc(),
853a684a994SFlorian Hahn                                       OuterLoop->getHeader())
854a684a994SFlorian Hahn              << "Only outer loops with induction or reduction PHI nodes can be"
855a684a994SFlorian Hahn                 " interchanged currently.";
856a684a994SFlorian Hahn     });
857a684a994SFlorian Hahn     return true;
858a684a994SFlorian Hahn   }
859a684a994SFlorian Hahn 
860a684a994SFlorian Hahn   Inductions.clear();
861a684a994SFlorian Hahn   if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) {
862d34e60caSNicola Zaghen     LLVM_DEBUG(
863d34e60caSNicola Zaghen         dbgs() << "Only inner loops with induction or reduction PHI nodes "
8644eeff394SFlorian Hahn                << "are supported currently.\n");
8659590658fSVivek Pandya     ORE->emit([&]() {
8669590658fSVivek Pandya       return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
867ad993521SFlorian Hahn                                       InnerLoop->getStartLoc(),
868ad993521SFlorian Hahn                                       InnerLoop->getHeader())
869ad993521SFlorian Hahn              << "Only inner loops with induction or reduction PHI nodes can be"
8709590658fSVivek Pandya                 " interchange currently.";
8719590658fSVivek Pandya     });
87288db86ddSKarthik Bhat     return true;
8734eeff394SFlorian Hahn   }
87488db86ddSKarthik Bhat 
87588db86ddSKarthik Bhat   // TODO: Triangular loops are not handled for now.
876fa6a2876SCongzhe Cao   if (!isLoopStructureUnderstood()) {
877d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n");
8789590658fSVivek Pandya     ORE->emit([&]() {
8799590658fSVivek Pandya       return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner",
880ad993521SFlorian Hahn                                       InnerLoop->getStartLoc(),
881ad993521SFlorian Hahn                                       InnerLoop->getHeader())
8829590658fSVivek Pandya              << "Inner loop structure not understood currently.";
8839590658fSVivek Pandya     });
88488db86ddSKarthik Bhat     return true;
88588db86ddSKarthik Bhat   }
88688db86ddSKarthik Bhat 
88788db86ddSKarthik Bhat   return false;
88888db86ddSKarthik Bhat }
88988db86ddSKarthik Bhat 
findInductions(Loop * L,SmallVectorImpl<PHINode * > & Inductions)890fa6a2876SCongzhe Cao bool LoopInterchangeLegality::findInductions(
891fa6a2876SCongzhe Cao     Loop *L, SmallVectorImpl<PHINode *> &Inductions) {
892fa6a2876SCongzhe Cao   for (PHINode &PHI : L->getHeader()->phis()) {
893fa6a2876SCongzhe Cao     InductionDescriptor ID;
894fa6a2876SCongzhe Cao     if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
895fa6a2876SCongzhe Cao       Inductions.push_back(&PHI);
896fa6a2876SCongzhe Cao   }
897fa6a2876SCongzhe Cao   return !Inductions.empty();
898fa6a2876SCongzhe Cao }
899fa6a2876SCongzhe Cao 
900e8a5c172SFlorian Hahn // We currently only support LCSSA PHI nodes in the inner loop exit, if their
901e8a5c172SFlorian Hahn // users are either reduction PHIs or PHIs outside the outer loop (which means
902e8a5c172SFlorian Hahn // the we are only interested in the final value after the loop).
903e8a5c172SFlorian Hahn static bool
areInnerLoopExitPHIsSupported(Loop * InnerL,Loop * OuterL,SmallPtrSetImpl<PHINode * > & Reductions)904e8a5c172SFlorian Hahn areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL,
905e8a5c172SFlorian Hahn                               SmallPtrSetImpl<PHINode *> &Reductions) {
906e8a5c172SFlorian Hahn   BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
907e8a5c172SFlorian Hahn   for (PHINode &PHI : InnerExit->phis()) {
908e8a5c172SFlorian Hahn     // Reduction lcssa phi will have only 1 incoming block that from loop latch.
909e8a5c172SFlorian Hahn     if (PHI.getNumIncomingValues() > 1)
910e8a5c172SFlorian Hahn       return false;
911e8a5c172SFlorian Hahn     if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
912e8a5c172SFlorian Hahn           PHINode *PN = dyn_cast<PHINode>(U);
9133badd17bSBenjamin Kramer           return !PN ||
9143badd17bSBenjamin Kramer                  (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
915e8a5c172SFlorian Hahn         })) {
916e8a5c172SFlorian Hahn       return false;
917e8a5c172SFlorian Hahn     }
918e8a5c172SFlorian Hahn   }
919e8a5c172SFlorian Hahn   return true;
920e8a5c172SFlorian Hahn }
921e8a5c172SFlorian Hahn 
922f3fea0f1SFlorian Hahn // We currently support LCSSA PHI nodes in the outer loop exit, if their
923f3fea0f1SFlorian Hahn // incoming values do not come from the outer loop latch or if the
924f3fea0f1SFlorian Hahn // outer loop latch has a single predecessor. In that case, the value will
925f3fea0f1SFlorian Hahn // be available if both the inner and outer loop conditions are true, which
926f3fea0f1SFlorian Hahn // will still be true after interchanging. If we have multiple predecessor,
927f3fea0f1SFlorian Hahn // that may not be the case, e.g. because the outer loop latch may be executed
928f3fea0f1SFlorian Hahn // if the inner loop is not executed.
areOuterLoopExitPHIsSupported(Loop * OuterLoop,Loop * InnerLoop)929e8a5c172SFlorian Hahn static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
930f3fea0f1SFlorian Hahn   BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
931f3fea0f1SFlorian Hahn   for (PHINode &PHI : LoopNestExit->phis()) {
932f3fea0f1SFlorian Hahn     for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
933f3fea0f1SFlorian Hahn       Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
934f3fea0f1SFlorian Hahn       if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
935f3fea0f1SFlorian Hahn         continue;
936f3fea0f1SFlorian Hahn 
937f3fea0f1SFlorian Hahn       // The incoming value is defined in the outer loop latch. Currently we
938f3fea0f1SFlorian Hahn       // only support that in case the outer loop latch has a single predecessor.
939f3fea0f1SFlorian Hahn       // This guarantees that the outer loop latch is executed if and only if
940f3fea0f1SFlorian Hahn       // the inner loop is executed (because tightlyNested() guarantees that the
941f3fea0f1SFlorian Hahn       // outer loop header only branches to the inner loop or the outer loop
942f3fea0f1SFlorian Hahn       // latch).
943f3fea0f1SFlorian Hahn       // FIXME: We could weaken this logic and allow multiple predecessors,
944f3fea0f1SFlorian Hahn       //        if the values are produced outside the loop latch. We would need
945f3fea0f1SFlorian Hahn       //        additional logic to update the PHI nodes in the exit block as
946f3fea0f1SFlorian Hahn       //        well.
947f3fea0f1SFlorian Hahn       if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
948f3fea0f1SFlorian Hahn         return false;
949f3fea0f1SFlorian Hahn     }
950f3fea0f1SFlorian Hahn   }
951f3fea0f1SFlorian Hahn   return true;
952f3fea0f1SFlorian Hahn }
953f3fea0f1SFlorian Hahn 
954a7b7d22dSCongzhe Cao // In case of multi-level nested loops, it may occur that lcssa phis exist in
955a7b7d22dSCongzhe Cao // the latch of InnerLoop, i.e., when defs of the incoming values are further
956a7b7d22dSCongzhe Cao // inside the loopnest. Sometimes those incoming values are not available
957a7b7d22dSCongzhe Cao // after interchange, since the original inner latch will become the new outer
958a7b7d22dSCongzhe Cao // latch which may have predecessor paths that do not include those incoming
959a7b7d22dSCongzhe Cao // values.
960a7b7d22dSCongzhe Cao // TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of
961a7b7d22dSCongzhe Cao // multi-level loop nests.
areInnerLoopLatchPHIsSupported(Loop * OuterLoop,Loop * InnerLoop)962a7b7d22dSCongzhe Cao static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
963a7b7d22dSCongzhe Cao   if (InnerLoop->getSubLoops().empty())
964a7b7d22dSCongzhe Cao     return true;
965a7b7d22dSCongzhe Cao   // If the original outer latch has only one predecessor, then values defined
966a7b7d22dSCongzhe Cao   // further inside the looploop, e.g., in the innermost loop, will be available
967a7b7d22dSCongzhe Cao   // at the new outer latch after interchange.
968a7b7d22dSCongzhe Cao   if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr)
969a7b7d22dSCongzhe Cao     return true;
970a7b7d22dSCongzhe Cao 
971a7b7d22dSCongzhe Cao   // The outer latch has more than one predecessors, i.e., the inner
972a7b7d22dSCongzhe Cao   // exit and the inner header.
973a7b7d22dSCongzhe Cao   // PHI nodes in the inner latch are lcssa phis where the incoming values
974a7b7d22dSCongzhe Cao   // are defined further inside the loopnest. Check if those phis are used
975a7b7d22dSCongzhe Cao   // in the original inner latch. If that is the case then bail out since
976a7b7d22dSCongzhe Cao   // those incoming values may not be available at the new outer latch.
977a7b7d22dSCongzhe Cao   BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
978a7b7d22dSCongzhe Cao   for (PHINode &PHI : InnerLoopLatch->phis()) {
979a7b7d22dSCongzhe Cao     for (auto *U : PHI.users()) {
980a7b7d22dSCongzhe Cao       Instruction *UI = cast<Instruction>(U);
981a7b7d22dSCongzhe Cao       if (InnerLoopLatch == UI->getParent())
982a7b7d22dSCongzhe Cao         return false;
983a7b7d22dSCongzhe Cao     }
984a7b7d22dSCongzhe Cao   }
985a7b7d22dSCongzhe Cao   return true;
986a7b7d22dSCongzhe Cao }
987a7b7d22dSCongzhe Cao 
canInterchangeLoops(unsigned InnerLoopId,unsigned OuterLoopId,CharMatrix & DepMatrix)98888db86ddSKarthik Bhat bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
98988db86ddSKarthik Bhat                                                   unsigned OuterLoopId,
99088db86ddSKarthik Bhat                                                   CharMatrix &DepMatrix) {
99188db86ddSKarthik Bhat   if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
992d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
99388db86ddSKarthik Bhat                       << " and OuterLoopId = " << OuterLoopId
99488db86ddSKarthik Bhat                       << " due to dependence\n");
9959590658fSVivek Pandya     ORE->emit([&]() {
9969590658fSVivek Pandya       return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence",
997ad993521SFlorian Hahn                                       InnerLoop->getStartLoc(),
998ad993521SFlorian Hahn                                       InnerLoop->getHeader())
9999590658fSVivek Pandya              << "Cannot interchange loops due to dependences.";
10009590658fSVivek Pandya     });
100188db86ddSKarthik Bhat     return false;
100288db86ddSKarthik Bhat   }
10034284049dSFlorian Hahn   // Check if outer and inner loop contain legal instructions only.
10044284049dSFlorian Hahn   for (auto *BB : OuterLoop->blocks())
1005fd2bc112SFlorian Hahn     for (Instruction &I : BB->instructionsWithoutDebug())
10064284049dSFlorian Hahn       if (CallInst *CI = dyn_cast<CallInst>(&I)) {
10074284049dSFlorian Hahn         // readnone functions do not prevent interchanging.
1008c16fd6a3SPhilip Reames         if (CI->onlyWritesMemory())
10094284049dSFlorian Hahn           continue;
1010d34e60caSNicola Zaghen         LLVM_DEBUG(
1011d34e60caSNicola Zaghen             dbgs() << "Loops with call instructions cannot be interchanged "
10124284049dSFlorian Hahn                    << "safely.");
10139467ccf4SFlorian Hahn         ORE->emit([&]() {
10149467ccf4SFlorian Hahn           return OptimizationRemarkMissed(DEBUG_TYPE, "CallInst",
10159467ccf4SFlorian Hahn                                           CI->getDebugLoc(),
10169467ccf4SFlorian Hahn                                           CI->getParent())
10179467ccf4SFlorian Hahn                  << "Cannot interchange loops due to call instruction.";
10189467ccf4SFlorian Hahn         });
10199467ccf4SFlorian Hahn 
10204284049dSFlorian Hahn         return false;
10214284049dSFlorian Hahn       }
10224284049dSFlorian Hahn 
1023fa6a2876SCongzhe Cao   if (!findInductions(InnerLoop, InnerLoopInductions)) {
1024fa6a2876SCongzhe Cao     LLVM_DEBUG(dbgs() << "Cound not find inner loop induction variables.\n");
1025fa6a2876SCongzhe Cao     return false;
1026fa6a2876SCongzhe Cao   }
1027fa6a2876SCongzhe Cao 
1028a7b7d22dSCongzhe Cao   if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) {
1029a7b7d22dSCongzhe Cao     LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n");
1030a7b7d22dSCongzhe Cao     ORE->emit([&]() {
1031a7b7d22dSCongzhe Cao       return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedInnerLatchPHI",
1032a7b7d22dSCongzhe Cao                                       InnerLoop->getStartLoc(),
1033a7b7d22dSCongzhe Cao                                       InnerLoop->getHeader())
1034a7b7d22dSCongzhe Cao              << "Cannot interchange loops because unsupported PHI nodes found "
1035a7b7d22dSCongzhe Cao                 "in inner loop latch.";
1036a7b7d22dSCongzhe Cao     });
1037a7b7d22dSCongzhe Cao     return false;
1038a7b7d22dSCongzhe Cao   }
1039a7b7d22dSCongzhe Cao 
104088db86ddSKarthik Bhat   // TODO: The loops could not be interchanged due to current limitations in the
104188db86ddSKarthik Bhat   // transform module.
104288db86ddSKarthik Bhat   if (currentLimitations()) {
1043d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n");
104488db86ddSKarthik Bhat     return false;
104588db86ddSKarthik Bhat   }
104688db86ddSKarthik Bhat 
10478210fdf2SKarthik Bhat   // Check if the loops are tightly nested.
10488210fdf2SKarthik Bhat   if (!tightlyNested(OuterLoop, InnerLoop)) {
1049d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "Loops not tightly nested\n");
10509590658fSVivek Pandya     ORE->emit([&]() {
10519590658fSVivek Pandya       return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested",
1052ad993521SFlorian Hahn                                       InnerLoop->getStartLoc(),
1053ad993521SFlorian Hahn                                       InnerLoop->getHeader())
1054ad993521SFlorian Hahn              << "Cannot interchange loops because they are not tightly "
10559590658fSVivek Pandya                 "nested.";
10569590658fSVivek Pandya     });
10578210fdf2SKarthik Bhat     return false;
10588210fdf2SKarthik Bhat   }
10598210fdf2SKarthik Bhat 
1060e8a5c172SFlorian Hahn   if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
1061e8a5c172SFlorian Hahn                                      OuterInnerReductions)) {
1062e8a5c172SFlorian Hahn     LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n");
1063e8a5c172SFlorian Hahn     ORE->emit([&]() {
1064e8a5c172SFlorian Hahn       return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1065e8a5c172SFlorian Hahn                                       InnerLoop->getStartLoc(),
1066e8a5c172SFlorian Hahn                                       InnerLoop->getHeader())
1067e8a5c172SFlorian Hahn              << "Found unsupported PHI node in loop exit.";
1068e8a5c172SFlorian Hahn     });
1069e8a5c172SFlorian Hahn     return false;
1070e8a5c172SFlorian Hahn   }
1071e8a5c172SFlorian Hahn 
1072e8a5c172SFlorian Hahn   if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
1073d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");
1074f3fea0f1SFlorian Hahn     ORE->emit([&]() {
1075f3fea0f1SFlorian Hahn       return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1076f3fea0f1SFlorian Hahn                                       OuterLoop->getStartLoc(),
1077f3fea0f1SFlorian Hahn                                       OuterLoop->getHeader())
1078f3fea0f1SFlorian Hahn              << "Found unsupported PHI node in loop exit.";
1079f3fea0f1SFlorian Hahn     });
1080f3fea0f1SFlorian Hahn     return false;
1081f3fea0f1SFlorian Hahn   }
1082f3fea0f1SFlorian Hahn 
108388db86ddSKarthik Bhat   return true;
108488db86ddSKarthik Bhat }
108588db86ddSKarthik Bhat 
getInstrOrderCost()108688db86ddSKarthik Bhat int LoopInterchangeProfitability::getInstrOrderCost() {
108788db86ddSKarthik Bhat   unsigned GoodOrder, BadOrder;
108888db86ddSKarthik Bhat   BadOrder = GoodOrder = 0;
1089f66efd61SFlorian Hahn   for (BasicBlock *BB : InnerLoop->blocks()) {
1090f66efd61SFlorian Hahn     for (Instruction &Ins : *BB) {
109188db86ddSKarthik Bhat       if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
109288db86ddSKarthik Bhat         unsigned NumOp = GEP->getNumOperands();
109388db86ddSKarthik Bhat         bool FoundInnerInduction = false;
109488db86ddSKarthik Bhat         bool FoundOuterInduction = false;
109588db86ddSKarthik Bhat         for (unsigned i = 0; i < NumOp; ++i) {
1096e8dc17a2SFlorian Hahn           // Skip operands that are not SCEV-able.
1097e8dc17a2SFlorian Hahn           if (!SE->isSCEVable(GEP->getOperand(i)->getType()))
1098e8dc17a2SFlorian Hahn             continue;
1099e8dc17a2SFlorian Hahn 
110088db86ddSKarthik Bhat           const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
110188db86ddSKarthik Bhat           const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
110288db86ddSKarthik Bhat           if (!AR)
110388db86ddSKarthik Bhat             continue;
110488db86ddSKarthik Bhat 
110588db86ddSKarthik Bhat           // If we find the inner induction after an outer induction e.g.
110688db86ddSKarthik Bhat           // for(int i=0;i<N;i++)
110788db86ddSKarthik Bhat           //   for(int j=0;j<N;j++)
110888db86ddSKarthik Bhat           //     A[i][j] = A[i-1][j-1]+k;
110988db86ddSKarthik Bhat           // then it is a good order.
111088db86ddSKarthik Bhat           if (AR->getLoop() == InnerLoop) {
111188db86ddSKarthik Bhat             // We found an InnerLoop induction after OuterLoop induction. It is
111288db86ddSKarthik Bhat             // a good order.
111388db86ddSKarthik Bhat             FoundInnerInduction = true;
111488db86ddSKarthik Bhat             if (FoundOuterInduction) {
111588db86ddSKarthik Bhat               GoodOrder++;
111688db86ddSKarthik Bhat               break;
111788db86ddSKarthik Bhat             }
111888db86ddSKarthik Bhat           }
111988db86ddSKarthik Bhat           // If we find the outer induction after an inner induction e.g.
112088db86ddSKarthik Bhat           // for(int i=0;i<N;i++)
112188db86ddSKarthik Bhat           //   for(int j=0;j<N;j++)
112288db86ddSKarthik Bhat           //     A[j][i] = A[j-1][i-1]+k;
112388db86ddSKarthik Bhat           // then it is a bad order.
112488db86ddSKarthik Bhat           if (AR->getLoop() == OuterLoop) {
112588db86ddSKarthik Bhat             // We found an OuterLoop induction after InnerLoop induction. It is
112688db86ddSKarthik Bhat             // a bad order.
112788db86ddSKarthik Bhat             FoundOuterInduction = true;
112888db86ddSKarthik Bhat             if (FoundInnerInduction) {
112988db86ddSKarthik Bhat               BadOrder++;
113088db86ddSKarthik Bhat               break;
113188db86ddSKarthik Bhat             }
113288db86ddSKarthik Bhat           }
113388db86ddSKarthik Bhat         }
113488db86ddSKarthik Bhat       }
113588db86ddSKarthik Bhat     }
113688db86ddSKarthik Bhat   }
113788db86ddSKarthik Bhat   return GoodOrder - BadOrder;
113888db86ddSKarthik Bhat }
113988db86ddSKarthik Bhat 
isProfitableForVectorization(unsigned InnerLoopId,unsigned OuterLoopId,CharMatrix & DepMatrix)1140e6b3a63aSChad Rosier static bool isProfitableForVectorization(unsigned InnerLoopId,
1141f044d3f9SBenjamin Kramer                                          unsigned OuterLoopId,
114288db86ddSKarthik Bhat                                          CharMatrix &DepMatrix) {
114388db86ddSKarthik Bhat   // TODO: Improve this heuristic to catch more cases.
114488db86ddSKarthik Bhat   // If the inner loop is loop independent or doesn't carry any dependency it is
114588db86ddSKarthik Bhat   // profitable to move this to outer position.
1146f66efd61SFlorian Hahn   for (auto &Row : DepMatrix) {
1147f66efd61SFlorian Hahn     if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I')
114888db86ddSKarthik Bhat       return false;
114988db86ddSKarthik Bhat     // TODO: We need to improve this heuristic.
1150f66efd61SFlorian Hahn     if (Row[OuterLoopId] != '=')
115188db86ddSKarthik Bhat       return false;
115288db86ddSKarthik Bhat   }
115388db86ddSKarthik Bhat   // If outer loop has dependence and inner loop is loop independent then it is
115488db86ddSKarthik Bhat   // profitable to interchange to enable parallelism.
1155ceee7889SFlorian Hahn   // If there are no dependences, interchanging will not improve anything.
1156ceee7889SFlorian Hahn   return !DepMatrix.empty();
115788db86ddSKarthik Bhat }
115888db86ddSKarthik Bhat 
isProfitable(const Loop * InnerLoop,const Loop * OuterLoop,unsigned InnerLoopId,unsigned OuterLoopId,CharMatrix & DepMatrix,const DenseMap<const Loop *,unsigned> & CostMap)1159*b941857bSCongzhe Cao bool LoopInterchangeProfitability::isProfitable(
1160*b941857bSCongzhe Cao     const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
1161*b941857bSCongzhe Cao     unsigned OuterLoopId, CharMatrix &DepMatrix,
1162*b941857bSCongzhe Cao     const DenseMap<const Loop *, unsigned> &CostMap) {
1163*b941857bSCongzhe Cao   // TODO: Remove the legacy cost model.
116488db86ddSKarthik Bhat 
1165*b941857bSCongzhe Cao   // This is the new cost model returned from loop cache analysis.
1166*b941857bSCongzhe Cao   // A smaller index means the loop should be placed an outer loop, and vice
1167*b941857bSCongzhe Cao   // versa.
1168*b941857bSCongzhe Cao   if (CostMap.find(InnerLoop) != CostMap.end() &&
1169*b941857bSCongzhe Cao       CostMap.find(OuterLoop) != CostMap.end()) {
1170*b941857bSCongzhe Cao     unsigned InnerIndex = 0, OuterIndex = 0;
1171*b941857bSCongzhe Cao     InnerIndex = CostMap.find(InnerLoop)->second;
1172*b941857bSCongzhe Cao     OuterIndex = CostMap.find(OuterLoop)->second;
1173*b941857bSCongzhe Cao     LLVM_DEBUG(dbgs() << "InnerIndex = " << InnerIndex
1174*b941857bSCongzhe Cao                       << ", OuterIndex = " << OuterIndex << "\n");
1175*b941857bSCongzhe Cao     if (InnerIndex < OuterIndex)
1176*b941857bSCongzhe Cao       return true;
1177*b941857bSCongzhe Cao   } else {
1178*b941857bSCongzhe Cao     // Legacy cost model: this is rough cost estimation algorithm. It counts the
1179*b941857bSCongzhe Cao     // good and bad order of induction variables in the instruction and allows
1180*b941857bSCongzhe Cao     // reordering if number of bad orders is more than good.
118172431890SChad Rosier     int Cost = getInstrOrderCost();
1182d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
118372431890SChad Rosier     if (Cost < -LoopInterchangeCostThreshold)
118488db86ddSKarthik Bhat       return true;
1185*b941857bSCongzhe Cao   }
118688db86ddSKarthik Bhat 
1187df005cbeSBenjamin Kramer   // It is not profitable as per current cache profitability model. But check if
118888db86ddSKarthik Bhat   // we can move this loop outside to improve parallelism.
1189ad993521SFlorian Hahn   if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
1190ad993521SFlorian Hahn     return true;
1191ad993521SFlorian Hahn 
11929590658fSVivek Pandya   ORE->emit([&]() {
11939590658fSVivek Pandya     return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1194ad993521SFlorian Hahn                                     InnerLoop->getStartLoc(),
1195ad993521SFlorian Hahn                                     InnerLoop->getHeader())
1196*b941857bSCongzhe Cao            << "Interchanging loops is too costly and it does not improve "
1197*b941857bSCongzhe Cao               "parallelism.";
11989590658fSVivek Pandya   });
1199ad993521SFlorian Hahn   return false;
120088db86ddSKarthik Bhat }
120188db86ddSKarthik Bhat 
removeChildLoop(Loop * OuterLoop,Loop * InnerLoop)120288db86ddSKarthik Bhat void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
120388db86ddSKarthik Bhat                                                Loop *InnerLoop) {
12045912c667SFlorian Hahn   for (Loop *L : *OuterLoop)
12055912c667SFlorian Hahn     if (L == InnerLoop) {
12065912c667SFlorian Hahn       OuterLoop->removeChildLoop(L);
120788db86ddSKarthik Bhat       return;
120888db86ddSKarthik Bhat     }
12098ceb323bSBenjamin Kramer   llvm_unreachable("Couldn't find loop");
121088db86ddSKarthik Bhat }
12116adbd7aeSDaniel Jasper 
1212831a7577SFlorian Hahn /// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1213831a7577SFlorian Hahn /// new inner and outer loop after interchanging: NewInner is the original
1214831a7577SFlorian Hahn /// outer loop and NewOuter is the original inner loop.
1215831a7577SFlorian Hahn ///
1216831a7577SFlorian Hahn /// Before interchanging, we have the following structure
1217831a7577SFlorian Hahn /// Outer preheader
1218831a7577SFlorian Hahn //  Outer header
1219831a7577SFlorian Hahn //    Inner preheader
1220831a7577SFlorian Hahn //    Inner header
1221831a7577SFlorian Hahn //      Inner body
1222831a7577SFlorian Hahn //      Inner latch
1223831a7577SFlorian Hahn //   outer bbs
1224831a7577SFlorian Hahn //   Outer latch
1225831a7577SFlorian Hahn //
1226831a7577SFlorian Hahn // After interchanging:
1227831a7577SFlorian Hahn // Inner preheader
1228831a7577SFlorian Hahn // Inner header
1229831a7577SFlorian Hahn //   Outer preheader
1230831a7577SFlorian Hahn //   Outer header
1231831a7577SFlorian Hahn //     Inner body
1232831a7577SFlorian Hahn //     outer bbs
1233831a7577SFlorian Hahn //     Outer latch
1234831a7577SFlorian Hahn //   Inner latch
restructureLoops(Loop * NewInner,Loop * NewOuter,BasicBlock * OrigInnerPreHeader,BasicBlock * OrigOuterPreHeader)1235831a7577SFlorian Hahn void LoopInterchangeTransform::restructureLoops(
1236831a7577SFlorian Hahn     Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1237831a7577SFlorian Hahn     BasicBlock *OrigOuterPreHeader) {
123888db86ddSKarthik Bhat   Loop *OuterLoopParent = OuterLoop->getParentLoop();
1239831a7577SFlorian Hahn   // The original inner loop preheader moves from the new inner loop to
1240831a7577SFlorian Hahn   // the parent loop, if there is one.
1241831a7577SFlorian Hahn   NewInner->removeBlockFromLoop(OrigInnerPreHeader);
1242831a7577SFlorian Hahn   LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1243831a7577SFlorian Hahn 
1244831a7577SFlorian Hahn   // Switch the loop levels.
124588db86ddSKarthik Bhat   if (OuterLoopParent) {
124688db86ddSKarthik Bhat     // Remove the loop from its parent loop.
1247831a7577SFlorian Hahn     removeChildLoop(OuterLoopParent, NewInner);
1248831a7577SFlorian Hahn     removeChildLoop(NewInner, NewOuter);
1249831a7577SFlorian Hahn     OuterLoopParent->addChildLoop(NewOuter);
125088db86ddSKarthik Bhat   } else {
1251831a7577SFlorian Hahn     removeChildLoop(NewInner, NewOuter);
1252831a7577SFlorian Hahn     LI->changeTopLevelLoop(NewInner, NewOuter);
1253831a7577SFlorian Hahn   }
125489c1e35fSStefanos Baziotis   while (!NewOuter->isInnermost())
1255831a7577SFlorian Hahn     NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
1256831a7577SFlorian Hahn   NewOuter->addChildLoop(NewInner);
1257831a7577SFlorian Hahn 
1258831a7577SFlorian Hahn   // BBs from the original inner loop.
1259831a7577SFlorian Hahn   SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1260831a7577SFlorian Hahn 
1261831a7577SFlorian Hahn   // Add BBs from the original outer loop to the original inner loop (excluding
1262831a7577SFlorian Hahn   // BBs already in inner loop)
1263831a7577SFlorian Hahn   for (BasicBlock *BB : NewInner->blocks())
1264831a7577SFlorian Hahn     if (LI->getLoopFor(BB) == NewInner)
1265831a7577SFlorian Hahn       NewOuter->addBlockEntry(BB);
1266831a7577SFlorian Hahn 
1267831a7577SFlorian Hahn   // Now remove inner loop header and latch from the new inner loop and move
1268831a7577SFlorian Hahn   // other BBs (the loop body) to the new inner loop.
1269831a7577SFlorian Hahn   BasicBlock *OuterHeader = NewOuter->getHeader();
1270831a7577SFlorian Hahn   BasicBlock *OuterLatch = NewOuter->getLoopLatch();
1271831a7577SFlorian Hahn   for (BasicBlock *BB : OrigInnerBBs) {
127274418185SFlorian Hahn     // Nothing will change for BBs in child loops.
127374418185SFlorian Hahn     if (LI->getLoopFor(BB) != NewOuter)
127474418185SFlorian Hahn       continue;
1275831a7577SFlorian Hahn     // Remove the new outer loop header and latch from the new inner loop.
1276831a7577SFlorian Hahn     if (BB == OuterHeader || BB == OuterLatch)
1277831a7577SFlorian Hahn       NewInner->removeBlockFromLoop(BB);
1278831a7577SFlorian Hahn     else
1279831a7577SFlorian Hahn       LI->changeLoopFor(BB, NewInner);
128088db86ddSKarthik Bhat   }
128188db86ddSKarthik Bhat 
1282831a7577SFlorian Hahn   // The preheader of the original outer loop becomes part of the new
1283831a7577SFlorian Hahn   // outer loop.
1284831a7577SFlorian Hahn   NewOuter->addBlockEntry(OrigOuterPreHeader);
1285831a7577SFlorian Hahn   LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
12863afb974aSFlorian Hahn 
12873afb974aSFlorian Hahn   // Tell SE that we move the loops around.
12883afb974aSFlorian Hahn   SE->forgetLoop(NewOuter);
12893afb974aSFlorian Hahn   SE->forgetLoop(NewInner);
129088db86ddSKarthik Bhat }
129188db86ddSKarthik Bhat 
transform()129288db86ddSKarthik Bhat bool LoopInterchangeTransform::transform() {
129388db86ddSKarthik Bhat   bool Transformed = false;
129488db86ddSKarthik Bhat 
1295dd40f5e7SEugene Zelenko   if (InnerLoop->getSubLoops().empty()) {
129688db86ddSKarthik Bhat     BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1297e4961218SFlorian Hahn     LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n");
1298fa6a2876SCongzhe Cao     auto &InductionPHIs = LIL.getInnerLoopInductions();
1299fa6a2876SCongzhe Cao     if (InductionPHIs.empty()) {
1300d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
130188db86ddSKarthik Bhat       return false;
130288db86ddSKarthik Bhat     }
130388db86ddSKarthik Bhat 
1304fa6a2876SCongzhe Cao     SmallVector<Instruction *, 8> InnerIndexVarList;
1305fa6a2876SCongzhe Cao     for (PHINode *CurInductionPHI : InductionPHIs) {
1306fa6a2876SCongzhe Cao       if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1307fa6a2876SCongzhe Cao         InnerIndexVarList.push_back(
1308fa6a2876SCongzhe Cao             dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1)));
130988db86ddSKarthik Bhat       else
1310fa6a2876SCongzhe Cao         InnerIndexVarList.push_back(
1311fa6a2876SCongzhe Cao             dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0)));
1312fa6a2876SCongzhe Cao     }
1313907b60fbSDavid Green 
1314e4961218SFlorian Hahn     // Create a new latch block for the inner loop. We split at the
1315e4961218SFlorian Hahn     // current latch's terminator and then move the condition and all
1316e4961218SFlorian Hahn     // operands that are not either loop-invariant or the induction PHI into the
1317e4961218SFlorian Hahn     // new latch block.
1318e4961218SFlorian Hahn     BasicBlock *NewLatch =
1319e4961218SFlorian Hahn         SplitBlock(InnerLoop->getLoopLatch(),
1320e4961218SFlorian Hahn                    InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
1321e4961218SFlorian Hahn 
1322e4961218SFlorian Hahn     SmallSetVector<Instruction *, 4> WorkList;
1323e4961218SFlorian Hahn     unsigned i = 0;
1324fa6a2876SCongzhe Cao     auto MoveInstructions = [&i, &WorkList, this, &InductionPHIs, NewLatch]() {
1325e4961218SFlorian Hahn       for (; i < WorkList.size(); i++) {
1326e4961218SFlorian Hahn         // Duplicate instruction and move it the new latch. Update uses that
1327e4961218SFlorian Hahn         // have been moved.
1328e4961218SFlorian Hahn         Instruction *NewI = WorkList[i]->clone();
1329e4961218SFlorian Hahn         NewI->insertBefore(NewLatch->getFirstNonPHI());
1330e4961218SFlorian Hahn         assert(!NewI->mayHaveSideEffects() &&
1331e4961218SFlorian Hahn                "Moving instructions with side-effects may change behavior of "
1332e4961218SFlorian Hahn                "the loop nest!");
13335fc9e309SKazu Hirata         for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) {
1334e4961218SFlorian Hahn           Instruction *UserI = cast<Instruction>(U.getUser());
1335e4961218SFlorian Hahn           if (!InnerLoop->contains(UserI->getParent()) ||
1336fa6a2876SCongzhe Cao               UserI->getParent() == NewLatch ||
1337fa6a2876SCongzhe Cao               llvm::is_contained(InductionPHIs, UserI))
1338e4961218SFlorian Hahn             U.set(NewI);
1339e4961218SFlorian Hahn         }
1340e4961218SFlorian Hahn         // Add operands of moved instruction to the worklist, except if they are
1341e4961218SFlorian Hahn         // outside the inner loop or are the induction PHI.
1342e4961218SFlorian Hahn         for (Value *Op : WorkList[i]->operands()) {
1343e4961218SFlorian Hahn           Instruction *OpI = dyn_cast<Instruction>(Op);
1344e4961218SFlorian Hahn           if (!OpI ||
1345e4961218SFlorian Hahn               this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
1346fa6a2876SCongzhe Cao               llvm::is_contained(InductionPHIs, OpI))
1347e4961218SFlorian Hahn             continue;
1348e4961218SFlorian Hahn           WorkList.insert(OpI);
1349e4961218SFlorian Hahn         }
1350e4961218SFlorian Hahn       }
1351e4961218SFlorian Hahn     };
1352e4961218SFlorian Hahn 
1353e4961218SFlorian Hahn     // FIXME: Should we interchange when we have a constant condition?
1354e4961218SFlorian Hahn     Instruction *CondI = dyn_cast<Instruction>(
1355e4961218SFlorian Hahn         cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator())
1356e4961218SFlorian Hahn             ->getCondition());
1357e4961218SFlorian Hahn     if (CondI)
1358e4961218SFlorian Hahn       WorkList.insert(CondI);
1359e4961218SFlorian Hahn     MoveInstructions();
1360fa6a2876SCongzhe Cao     for (Instruction *InnerIndexVar : InnerIndexVarList)
1361e4961218SFlorian Hahn       WorkList.insert(cast<Instruction>(InnerIndexVar));
1362e4961218SFlorian Hahn     MoveInstructions();
136388db86ddSKarthik Bhat 
1364df005cbeSBenjamin Kramer     // Splits the inner loops phi nodes out into a separate basic block.
1365d8fcf0deSFlorian Hahn     BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1366d8fcf0deSFlorian Hahn     SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
1367d8fcf0deSFlorian Hahn     LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
136888db86ddSKarthik Bhat   }
136988db86ddSKarthik Bhat 
13708393b9fdSFlorian Hahn   // Instructions in the original inner loop preheader may depend on values
13718393b9fdSFlorian Hahn   // defined in the outer loop header. Move them there, because the original
13728393b9fdSFlorian Hahn   // inner loop preheader will become the entry into the interchanged loop nest.
13738393b9fdSFlorian Hahn   // Currently we move all instructions and rely on LICM to move invariant
13748393b9fdSFlorian Hahn   // instructions outside the loop nest.
13758393b9fdSFlorian Hahn   BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
13768393b9fdSFlorian Hahn   BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
13778393b9fdSFlorian Hahn   if (InnerLoopPreHeader != OuterLoopHeader) {
13788393b9fdSFlorian Hahn     SmallPtrSet<Instruction *, 4> NeedsMoving;
13798393b9fdSFlorian Hahn     for (Instruction &I :
13808393b9fdSFlorian Hahn          make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
13818393b9fdSFlorian Hahn                                          std::prev(InnerLoopPreHeader->end()))))
13828393b9fdSFlorian Hahn       I.moveBefore(OuterLoopHeader->getTerminator());
13838393b9fdSFlorian Hahn   }
13848393b9fdSFlorian Hahn 
138588db86ddSKarthik Bhat   Transformed |= adjustLoopLinks();
138688db86ddSKarthik Bhat   if (!Transformed) {
1387d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");
138888db86ddSKarthik Bhat     return false;
138988db86ddSKarthik Bhat   }
139088db86ddSKarthik Bhat 
139188db86ddSKarthik Bhat   return true;
139288db86ddSKarthik Bhat }
139388db86ddSKarthik Bhat 
1394d8fcf0deSFlorian Hahn /// \brief Move all instructions except the terminator from FromBB right before
139579442920SBenjamin Kramer /// InsertBefore
moveBBContents(BasicBlock * FromBB,Instruction * InsertBefore)139679442920SBenjamin Kramer static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
139779442920SBenjamin Kramer   auto &ToList = InsertBefore->getParent()->getInstList();
139879442920SBenjamin Kramer   auto &FromList = FromBB->getInstList();
139979442920SBenjamin Kramer 
1400be4d8cbaSDuncan P. N. Exon Smith   ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
1401be4d8cbaSDuncan P. N. Exon Smith                 FromBB->getTerminator()->getIterator());
140288db86ddSKarthik Bhat }
140388db86ddSKarthik Bhat 
1404f71abec6SAlexey Zhikhartsev /// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
swapBBContents(BasicBlock * BB1,BasicBlock * BB2)1405f71abec6SAlexey Zhikhartsev static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) {
1406f71abec6SAlexey Zhikhartsev   // Save all non-terminator instructions of BB1 into TempInstrs and unlink them
1407f71abec6SAlexey Zhikhartsev   // from BB1 afterwards.
1408f71abec6SAlexey Zhikhartsev   auto Iter = map_range(*BB1, [](Instruction &I) { return &I; });
1409f71abec6SAlexey Zhikhartsev   SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end()));
1410f71abec6SAlexey Zhikhartsev   for (Instruction *I : TempInstrs)
1411f71abec6SAlexey Zhikhartsev     I->removeFromParent();
1412f71abec6SAlexey Zhikhartsev 
1413f71abec6SAlexey Zhikhartsev   // Move instructions from BB2 to BB1.
1414f71abec6SAlexey Zhikhartsev   moveBBContents(BB2, BB1->getTerminator());
1415f71abec6SAlexey Zhikhartsev 
1416f71abec6SAlexey Zhikhartsev   // Move instructions from TempInstrs to BB2.
1417f71abec6SAlexey Zhikhartsev   for (Instruction *I : TempInstrs)
1418f71abec6SAlexey Zhikhartsev     I->insertBefore(BB2->getTerminator());
1419f71abec6SAlexey Zhikhartsev }
1420f71abec6SAlexey Zhikhartsev 
14219a432161SFlorian Hahn // Update BI to jump to NewBB instead of OldBB. Records updates to the
14229a432161SFlorian Hahn // dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
14239a432161SFlorian Hahn // \p OldBB  is exactly once in BI's successor list.
updateSuccessor(BranchInst * BI,BasicBlock * OldBB,BasicBlock * NewBB,std::vector<DominatorTree::UpdateType> & DTUpdates,bool MustUpdateOnce=true)1424c6296feaSFlorian Hahn static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
1425c6296feaSFlorian Hahn                             BasicBlock *NewBB,
14269a432161SFlorian Hahn                             std::vector<DominatorTree::UpdateType> &DTUpdates,
14279a432161SFlorian Hahn                             bool MustUpdateOnce = true) {
14289a432161SFlorian Hahn   assert((!MustUpdateOnce ||
14299a432161SFlorian Hahn           llvm::count_if(successors(BI),
14309a432161SFlorian Hahn                          [OldBB](BasicBlock *BB) {
14319a432161SFlorian Hahn                            return BB == OldBB;
14329a432161SFlorian Hahn                          }) == 1) && "BI must jump to OldBB exactly once.");
14339a432161SFlorian Hahn   bool Changed = false;
14349a432161SFlorian Hahn   for (Use &Op : BI->operands())
14359a432161SFlorian Hahn     if (Op == OldBB) {
14369a432161SFlorian Hahn       Op.set(NewBB);
14379a432161SFlorian Hahn       Changed = true;
14389a432161SFlorian Hahn     }
1439c6296feaSFlorian Hahn 
14409a432161SFlorian Hahn   if (Changed) {
1441c6296feaSFlorian Hahn     DTUpdates.push_back(
1442c6296feaSFlorian Hahn         {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
1443c6296feaSFlorian Hahn     DTUpdates.push_back(
1444c6296feaSFlorian Hahn         {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
1445c6296feaSFlorian Hahn   }
14469a432161SFlorian Hahn   assert(Changed && "Expected a successor to be updated");
1447c6296feaSFlorian Hahn }
1448c6296feaSFlorian Hahn 
14496feb6371SFlorian Hahn // Move Lcssa PHIs to the right place.
moveLCSSAPhis(BasicBlock * InnerExit,BasicBlock * InnerHeader,BasicBlock * InnerLatch,BasicBlock * OuterHeader,BasicBlock * OuterLatch,BasicBlock * OuterExit,Loop * InnerLoop,LoopInfo * LI)145011b2f4feSFlorian Hahn static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
145111b2f4feSFlorian Hahn                           BasicBlock *InnerLatch, BasicBlock *OuterHeader,
14521ee93240SFlorian Hahn                           BasicBlock *OuterLatch, BasicBlock *OuterExit,
14531ee93240SFlorian Hahn                           Loop *InnerLoop, LoopInfo *LI) {
145411b2f4feSFlorian Hahn 
145511b2f4feSFlorian Hahn   // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
145611b2f4feSFlorian Hahn   // defined either in the header or latch. Those blocks will become header and
145711b2f4feSFlorian Hahn   // latch of the new outer loop, and the only possible users can PHI nodes
145811b2f4feSFlorian Hahn   // in the exit block of the loop nest or the outer loop header (reduction
145911b2f4feSFlorian Hahn   // PHIs, in that case, the incoming value must be defined in the inner loop
146011b2f4feSFlorian Hahn   // header). We can just substitute the user with the incoming value and remove
146111b2f4feSFlorian Hahn   // the PHI.
146211b2f4feSFlorian Hahn   for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
146311b2f4feSFlorian Hahn     assert(P.getNumIncomingValues() == 1 &&
146411b2f4feSFlorian Hahn            "Only loops with a single exit are supported!");
146511b2f4feSFlorian Hahn 
146611b2f4feSFlorian Hahn     // Incoming values are guaranteed be instructions currently.
146711b2f4feSFlorian Hahn     auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
1468eac34875SCongzhe Cao     // In case of multi-level nested loops, follow LCSSA to find the incoming
1469eac34875SCongzhe Cao     // value defined from the innermost loop.
1470eac34875SCongzhe Cao     auto IncIInnerMost = cast<Instruction>(followLCSSA(IncI));
147111b2f4feSFlorian Hahn     // Skip phis with incoming values from the inner loop body, excluding the
147211b2f4feSFlorian Hahn     // header and latch.
1473eac34875SCongzhe Cao     if (IncIInnerMost->getParent() != InnerLatch &&
1474eac34875SCongzhe Cao         IncIInnerMost->getParent() != InnerHeader)
147511b2f4feSFlorian Hahn       continue;
147611b2f4feSFlorian Hahn 
147711b2f4feSFlorian Hahn     assert(all_of(P.users(),
147811b2f4feSFlorian Hahn                   [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {
147911b2f4feSFlorian Hahn                     return (cast<PHINode>(U)->getParent() == OuterHeader &&
148011b2f4feSFlorian Hahn                             IncI->getParent() == InnerHeader) ||
148111b2f4feSFlorian Hahn                            cast<PHINode>(U)->getParent() == OuterExit;
148211b2f4feSFlorian Hahn                   }) &&
148311b2f4feSFlorian Hahn            "Can only replace phis iff the uses are in the loop nest exit or "
148411b2f4feSFlorian Hahn            "the incoming value is defined in the inner header (it will "
148511b2f4feSFlorian Hahn            "dominate all loop blocks after interchanging)");
148611b2f4feSFlorian Hahn     P.replaceAllUsesWith(IncI);
148711b2f4feSFlorian Hahn     P.eraseFromParent();
148811b2f4feSFlorian Hahn   }
148911b2f4feSFlorian Hahn 
14906feb6371SFlorian Hahn   SmallVector<PHINode *, 8> LcssaInnerExit;
14916feb6371SFlorian Hahn   for (PHINode &P : InnerExit->phis())
14926feb6371SFlorian Hahn     LcssaInnerExit.push_back(&P);
14936feb6371SFlorian Hahn 
14946feb6371SFlorian Hahn   SmallVector<PHINode *, 8> LcssaInnerLatch;
14956feb6371SFlorian Hahn   for (PHINode &P : InnerLatch->phis())
14966feb6371SFlorian Hahn     LcssaInnerLatch.push_back(&P);
14976feb6371SFlorian Hahn 
14986feb6371SFlorian Hahn   // Lcssa PHIs for values used outside the inner loop are in InnerExit.
14996feb6371SFlorian Hahn   // If a PHI node has users outside of InnerExit, it has a use outside the
15006feb6371SFlorian Hahn   // interchanged loop and we have to preserve it. We move these to
15016feb6371SFlorian Hahn   // InnerLatch, which will become the new exit block for the innermost
150211b2f4feSFlorian Hahn   // loop after interchanging.
150311b2f4feSFlorian Hahn   for (PHINode *P : LcssaInnerExit)
15046feb6371SFlorian Hahn     P->moveBefore(InnerLatch->getFirstNonPHI());
15056feb6371SFlorian Hahn 
15066feb6371SFlorian Hahn   // If the inner loop latch contains LCSSA PHIs, those come from a child loop
15076feb6371SFlorian Hahn   // and we have to move them to the new inner latch.
15086feb6371SFlorian Hahn   for (PHINode *P : LcssaInnerLatch)
15096feb6371SFlorian Hahn     P->moveBefore(InnerExit->getFirstNonPHI());
15106feb6371SFlorian Hahn 
151111b2f4feSFlorian Hahn   // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
15121ee93240SFlorian Hahn   // incoming values defined in the outer loop, we have to add a new PHI
151311b2f4feSFlorian Hahn   // in the inner loop latch, which became the exit block of the outer loop,
151411b2f4feSFlorian Hahn   // after interchanging.
151511b2f4feSFlorian Hahn   if (OuterExit) {
151611b2f4feSFlorian Hahn     for (PHINode &P : OuterExit->phis()) {
151711b2f4feSFlorian Hahn       if (P.getNumIncomingValues() != 1)
151811b2f4feSFlorian Hahn         continue;
15191ee93240SFlorian Hahn       // Skip Phis with incoming values defined in the inner loop. Those should
152011b2f4feSFlorian Hahn       // already have been updated.
152111b2f4feSFlorian Hahn       auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
15221ee93240SFlorian Hahn       if (!I || LI->getLoopFor(I->getParent()) == InnerLoop)
152311b2f4feSFlorian Hahn         continue;
152411b2f4feSFlorian Hahn 
152511b2f4feSFlorian Hahn       PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
152611b2f4feSFlorian Hahn       NewPhi->setIncomingValue(0, P.getIncomingValue(0));
152711b2f4feSFlorian Hahn       NewPhi->setIncomingBlock(0, OuterLatch);
15283f8be15fSCongzhe Cao       // We might have incoming edges from other BBs, i.e., the original outer
15293f8be15fSCongzhe Cao       // header.
15303f8be15fSCongzhe Cao       for (auto *Pred : predecessors(InnerLatch)) {
15313f8be15fSCongzhe Cao         if (Pred == OuterLatch)
15323f8be15fSCongzhe Cao           continue;
15333f8be15fSCongzhe Cao         NewPhi->addIncoming(P.getIncomingValue(0), Pred);
15343f8be15fSCongzhe Cao       }
153511b2f4feSFlorian Hahn       NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
153611b2f4feSFlorian Hahn       P.setIncomingValue(0, NewPhi);
153711b2f4feSFlorian Hahn     }
153811b2f4feSFlorian Hahn   }
153911b2f4feSFlorian Hahn 
15406feb6371SFlorian Hahn   // Now adjust the incoming blocks for the LCSSA PHIs.
15416feb6371SFlorian Hahn   // For PHIs moved from Inner's exit block, we need to replace Inner's latch
15426feb6371SFlorian Hahn   // with the new latch.
15431a1b9221SRoman Lebedev   InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
15446feb6371SFlorian Hahn }
15456feb6371SFlorian Hahn 
adjustLoopBranches()154688db86ddSKarthik Bhat bool LoopInterchangeTransform::adjustLoopBranches() {
1547d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
1548c6296feaSFlorian Hahn   std::vector<DominatorTree::UpdateType> DTUpdates;
1549c6296feaSFlorian Hahn 
1550236f6febSFlorian Hahn   BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1551236f6febSFlorian Hahn   BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1552236f6febSFlorian Hahn 
1553236f6febSFlorian Hahn   assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
1554236f6febSFlorian Hahn          InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&
1555236f6febSFlorian Hahn          InnerLoopPreHeader && "Guaranteed by loop-simplify form");
1556236f6febSFlorian Hahn   // Ensure that both preheaders do not contain PHI nodes and have single
1557236f6febSFlorian Hahn   // predecessors. This allows us to move them easily. We use
1558236f6febSFlorian Hahn   // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1559236f6febSFlorian Hahn   // preheaders do not satisfy those conditions.
1560236f6febSFlorian Hahn   if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1561236f6febSFlorian Hahn       !OuterLoopPreHeader->getUniquePredecessor())
1562f31eba64SAlina Sbirlea     OuterLoopPreHeader =
1563f31eba64SAlina Sbirlea         InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
1564236f6febSFlorian Hahn   if (InnerLoopPreHeader == OuterLoop->getHeader())
1565f31eba64SAlina Sbirlea     InnerLoopPreHeader =
1566f31eba64SAlina Sbirlea         InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
1567236f6febSFlorian Hahn 
156888db86ddSKarthik Bhat   // Adjust the loop preheader
156988db86ddSKarthik Bhat   BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
157088db86ddSKarthik Bhat   BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
157188db86ddSKarthik Bhat   BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
157288db86ddSKarthik Bhat   BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
157388db86ddSKarthik Bhat   BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
157488db86ddSKarthik Bhat   BasicBlock *InnerLoopLatchPredecessor =
157588db86ddSKarthik Bhat       InnerLoopLatch->getUniquePredecessor();
157688db86ddSKarthik Bhat   BasicBlock *InnerLoopLatchSuccessor;
157788db86ddSKarthik Bhat   BasicBlock *OuterLoopLatchSuccessor;
157888db86ddSKarthik Bhat 
157988db86ddSKarthik Bhat   BranchInst *OuterLoopLatchBI =
158088db86ddSKarthik Bhat       dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
158188db86ddSKarthik Bhat   BranchInst *InnerLoopLatchBI =
158288db86ddSKarthik Bhat       dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
158388db86ddSKarthik Bhat   BranchInst *OuterLoopHeaderBI =
158488db86ddSKarthik Bhat       dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
158588db86ddSKarthik Bhat   BranchInst *InnerLoopHeaderBI =
158688db86ddSKarthik Bhat       dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
158788db86ddSKarthik Bhat 
158888db86ddSKarthik Bhat   if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
158988db86ddSKarthik Bhat       !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
159088db86ddSKarthik Bhat       !InnerLoopHeaderBI)
159188db86ddSKarthik Bhat     return false;
159288db86ddSKarthik Bhat 
159388db86ddSKarthik Bhat   BranchInst *InnerLoopLatchPredecessorBI =
159488db86ddSKarthik Bhat       dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
159588db86ddSKarthik Bhat   BranchInst *OuterLoopPredecessorBI =
159688db86ddSKarthik Bhat       dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
159788db86ddSKarthik Bhat 
159888db86ddSKarthik Bhat   if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
159988db86ddSKarthik Bhat     return false;
1600df005cbeSBenjamin Kramer   BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1601df005cbeSBenjamin Kramer   if (!InnerLoopHeaderSuccessor)
160288db86ddSKarthik Bhat     return false;
160388db86ddSKarthik Bhat 
16049a432161SFlorian Hahn   // Adjust Loop Preheader and headers.
16059a432161SFlorian Hahn   // The branches in the outer loop predecessor and the outer loop header can
16069a432161SFlorian Hahn   // be unconditional branches or conditional branches with duplicates. Consider
16079a432161SFlorian Hahn   // this when updating the successors.
1608c6296feaSFlorian Hahn   updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
16099a432161SFlorian Hahn                   InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
16109a432161SFlorian Hahn   // The outer loop header might or might not branch to the outer latch.
16119a432161SFlorian Hahn   // We are guaranteed to branch to the inner loop preheader.
1612ce2db900SCongzhe Cao   if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) {
1613ce2db900SCongzhe Cao     // In this case the outerLoopHeader should branch to the InnerLoopLatch.
1614ce2db900SCongzhe Cao     updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,
1615ce2db900SCongzhe Cao                     DTUpdates,
16169a432161SFlorian Hahn                     /*MustUpdateOnce=*/false);
1617ce2db900SCongzhe Cao   }
1618c6296feaSFlorian Hahn   updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
16199a432161SFlorian Hahn                   InnerLoopHeaderSuccessor, DTUpdates,
16209a432161SFlorian Hahn                   /*MustUpdateOnce=*/false);
162188db86ddSKarthik Bhat 
16228210fdf2SKarthik Bhat   // Adjust reduction PHI's now that the incoming block has changed.
16231a1b9221SRoman Lebedev   InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
16248210fdf2SKarthik Bhat                                                OuterLoopHeader);
16258210fdf2SKarthik Bhat 
1626c6296feaSFlorian Hahn   updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
1627c6296feaSFlorian Hahn                   OuterLoopPreHeader, DTUpdates);
162888db86ddSKarthik Bhat 
162988db86ddSKarthik Bhat   // -------------Adjust loop latches-----------
163088db86ddSKarthik Bhat   if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
163188db86ddSKarthik Bhat     InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
163288db86ddSKarthik Bhat   else
163388db86ddSKarthik Bhat     InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
163488db86ddSKarthik Bhat 
1635c6296feaSFlorian Hahn   updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
1636c6296feaSFlorian Hahn                   InnerLoopLatchSuccessor, DTUpdates);
163788db86ddSKarthik Bhat 
163888db86ddSKarthik Bhat   if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
163988db86ddSKarthik Bhat     OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
164088db86ddSKarthik Bhat   else
164188db86ddSKarthik Bhat     OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
164288db86ddSKarthik Bhat 
1643c6296feaSFlorian Hahn   updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
1644c6296feaSFlorian Hahn                   OuterLoopLatchSuccessor, DTUpdates);
1645c6296feaSFlorian Hahn   updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1646c6296feaSFlorian Hahn                   DTUpdates);
164788db86ddSKarthik Bhat 
1648c6296feaSFlorian Hahn   DT->applyUpdates(DTUpdates);
1649831a7577SFlorian Hahn   restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1650831a7577SFlorian Hahn                    OuterLoopPreHeader);
1651831a7577SFlorian Hahn 
165211b2f4feSFlorian Hahn   moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
16531ee93240SFlorian Hahn                 OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
16541ee93240SFlorian Hahn                 InnerLoop, LI);
16556feb6371SFlorian Hahn   // For PHIs in the exit block of the outer loop, outer's latch has been
16566feb6371SFlorian Hahn   // replaced by Inners'.
16571a1b9221SRoman Lebedev   OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
16586feb6371SFlorian Hahn 
1659bfefde22SCongzhe Cao   auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1660a684a994SFlorian Hahn   // Now update the reduction PHIs in the inner and outer loop headers.
1661a684a994SFlorian Hahn   SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1662c714da2cSKazu Hirata   for (PHINode &PHI : InnerLoopHeader->phis())
1663c714da2cSKazu Hirata     if (OuterInnerReductions.contains(&PHI))
1664fa6a2876SCongzhe Cao       InnerLoopPHIs.push_back(&PHI);
1665fa6a2876SCongzhe Cao 
1666c714da2cSKazu Hirata   for (PHINode &PHI : OuterLoopHeader->phis())
1667c714da2cSKazu Hirata     if (OuterInnerReductions.contains(&PHI))
166837e34b74SCongzhe Cao       OuterLoopPHIs.push_back(&PHI);
1669a684a994SFlorian Hahn 
1670a684a994SFlorian Hahn   // Now move the remaining reduction PHIs from outer to inner loop header and
1671a684a994SFlorian Hahn   // vice versa. The PHI nodes must be part of a reduction across the inner and
1672a684a994SFlorian Hahn   // outer loop and all the remains to do is and updating the incoming blocks.
1673a684a994SFlorian Hahn   for (PHINode *PHI : OuterLoopPHIs) {
167437e34b74SCongzhe Cao     LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump(););
1675a684a994SFlorian Hahn     PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
16763badd17bSBenjamin Kramer     assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
1677a684a994SFlorian Hahn   }
1678a684a994SFlorian Hahn   for (PHINode *PHI : InnerLoopPHIs) {
1679fa6a2876SCongzhe Cao     LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI->dump(););
1680a684a994SFlorian Hahn     PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
16813badd17bSBenjamin Kramer     assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
1682a684a994SFlorian Hahn   }
1683d8fcf0deSFlorian Hahn 
1684d8fcf0deSFlorian Hahn   // Update the incoming blocks for moved PHI nodes.
16851a1b9221SRoman Lebedev   OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
16861a1b9221SRoman Lebedev   OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
16871a1b9221SRoman Lebedev   InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
16881a1b9221SRoman Lebedev   InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1689d8fcf0deSFlorian Hahn 
169054cb552bSFlorian Hahn   // Values defined in the outer loop header could be used in the inner loop
169154cb552bSFlorian Hahn   // latch. In that case, we need to create LCSSA phis for them, because after
169254cb552bSFlorian Hahn   // interchanging they will be defined in the new inner loop and used in the
169354cb552bSFlorian Hahn   // new outer loop.
169454cb552bSFlorian Hahn   IRBuilder<> Builder(OuterLoopHeader->getContext());
169554cb552bSFlorian Hahn   SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
169654cb552bSFlorian Hahn   for (Instruction &I :
169754cb552bSFlorian Hahn        make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
169854cb552bSFlorian Hahn     MayNeedLCSSAPhis.push_back(&I);
169954cb552bSFlorian Hahn   formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE, Builder);
170054cb552bSFlorian Hahn 
170188db86ddSKarthik Bhat   return true;
170288db86ddSKarthik Bhat }
170388db86ddSKarthik Bhat 
adjustLoopLinks()170488db86ddSKarthik Bhat bool LoopInterchangeTransform::adjustLoopLinks() {
170588db86ddSKarthik Bhat   // Adjust all branches in the inner and outer loop.
170688db86ddSKarthik Bhat   bool Changed = adjustLoopBranches();
1707f71abec6SAlexey Zhikhartsev   if (Changed) {
1708f71abec6SAlexey Zhikhartsev     // We have interchanged the preheaders so we need to interchange the data in
1709f71abec6SAlexey Zhikhartsev     // the preheaders as well. This is because the content of the inner
1710f71abec6SAlexey Zhikhartsev     // preheader was previously executed inside the outer loop.
1711f71abec6SAlexey Zhikhartsev     BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1712f71abec6SAlexey Zhikhartsev     BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1713f71abec6SAlexey Zhikhartsev     swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);
1714f71abec6SAlexey Zhikhartsev   }
171588db86ddSKarthik Bhat   return Changed;
171688db86ddSKarthik Bhat }
171788db86ddSKarthik Bhat 
17189b8b1645SBenjamin Kramer namespace {
17199c21c6c9SArthur Eubanks /// Main LoopInterchange Pass.
17209c21c6c9SArthur Eubanks struct LoopInterchangeLegacyPass : public LoopPass {
17219c21c6c9SArthur Eubanks   static char ID;
1722dd40f5e7SEugene Zelenko 
LoopInterchangeLegacyPass__anon401b65a51611::LoopInterchangeLegacyPass17239c21c6c9SArthur Eubanks   LoopInterchangeLegacyPass() : LoopPass(ID) {
17249c21c6c9SArthur Eubanks     initializeLoopInterchangeLegacyPassPass(*PassRegistry::getPassRegistry());
17259c21c6c9SArthur Eubanks   }
17269c21c6c9SArthur Eubanks 
getAnalysisUsage__anon401b65a51611::LoopInterchangeLegacyPass17279c21c6c9SArthur Eubanks   void getAnalysisUsage(AnalysisUsage &AU) const override {
17289c21c6c9SArthur Eubanks     AU.addRequired<DependenceAnalysisWrapperPass>();
17299c21c6c9SArthur Eubanks     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
17309c21c6c9SArthur Eubanks 
17319c21c6c9SArthur Eubanks     getLoopAnalysisUsage(AU);
17329c21c6c9SArthur Eubanks   }
17339c21c6c9SArthur Eubanks 
runOnLoop__anon401b65a51611::LoopInterchangeLegacyPass17349c21c6c9SArthur Eubanks   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
17359c21c6c9SArthur Eubanks     if (skipLoop(L))
17369c21c6c9SArthur Eubanks       return false;
17379c21c6c9SArthur Eubanks 
17389c21c6c9SArthur Eubanks     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
17399c21c6c9SArthur Eubanks     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
17409c21c6c9SArthur Eubanks     auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
17419c21c6c9SArthur Eubanks     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
17429c21c6c9SArthur Eubanks     auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1743*b941857bSCongzhe Cao     std::unique_ptr<CacheCost> CC = nullptr;
1744*b941857bSCongzhe Cao     return LoopInterchange(SE, LI, DI, DT, CC, ORE).run(L);
17459c21c6c9SArthur Eubanks   }
17469c21c6c9SArthur Eubanks };
17479b8b1645SBenjamin Kramer } // namespace
17489c21c6c9SArthur Eubanks 
17499c21c6c9SArthur Eubanks char LoopInterchangeLegacyPass::ID = 0;
17509c21c6c9SArthur Eubanks 
17519c21c6c9SArthur Eubanks INITIALIZE_PASS_BEGIN(LoopInterchangeLegacyPass, "loop-interchange",
175288db86ddSKarthik Bhat                       "Interchanges loops for cache reuse", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)17538600fee5SFlorian Hahn INITIALIZE_PASS_DEPENDENCY(LoopPass)
175449c22190SChandler Carruth INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
1755ad993521SFlorian Hahn INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
175688db86ddSKarthik Bhat 
17579c21c6c9SArthur Eubanks INITIALIZE_PASS_END(LoopInterchangeLegacyPass, "loop-interchange",
175888db86ddSKarthik Bhat                     "Interchanges loops for cache reuse", false, false)
175988db86ddSKarthik Bhat 
17609c21c6c9SArthur Eubanks Pass *llvm::createLoopInterchangePass() {
17619c21c6c9SArthur Eubanks   return new LoopInterchangeLegacyPass();
17629c21c6c9SArthur Eubanks }
17639c21c6c9SArthur Eubanks 
run(LoopNest & LN,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR,LPMUpdater & U)17640eeaec2aSTa-Wei Tu PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
17650eeaec2aSTa-Wei Tu                                            LoopAnalysisManager &AM,
17669c21c6c9SArthur Eubanks                                            LoopStandardAnalysisResults &AR,
17679c21c6c9SArthur Eubanks                                            LPMUpdater &U) {
17680eeaec2aSTa-Wei Tu   Function &F = *LN.getParent();
17699c21c6c9SArthur Eubanks 
17709c21c6c9SArthur Eubanks   DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
1771*b941857bSCongzhe Cao   std::unique_ptr<CacheCost> CC =
1772*b941857bSCongzhe Cao       CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
17739c21c6c9SArthur Eubanks   OptimizationRemarkEmitter ORE(&F);
1774*b941857bSCongzhe Cao   if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
17759c21c6c9SArthur Eubanks     return PreservedAnalyses::all();
17769c21c6c9SArthur Eubanks   return getLoopPassPreservedAnalyses();
17779c21c6c9SArthur Eubanks }
1778