1dd40f5e7SEugene Zelenko //===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
288db86ddSKarthik Bhat //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
688db86ddSKarthik Bhat //
788db86ddSKarthik Bhat //===----------------------------------------------------------------------===//
888db86ddSKarthik Bhat //
988db86ddSKarthik Bhat // This Pass handles loop interchange transform.
1088db86ddSKarthik Bhat // This pass interchanges loops to provide a more cache-friendly memory access
1188db86ddSKarthik Bhat // patterns.
1288db86ddSKarthik Bhat //
1388db86ddSKarthik Bhat //===----------------------------------------------------------------------===//
1488db86ddSKarthik Bhat
159c21c6c9SArthur Eubanks #include "llvm/Transforms/Scalar/LoopInterchange.h"
16dd40f5e7SEugene Zelenko #include "llvm/ADT/STLExtras.h"
1788db86ddSKarthik Bhat #include "llvm/ADT/SmallVector.h"
186e004336SFlorian Hahn #include "llvm/ADT/Statistic.h"
19dd40f5e7SEugene Zelenko #include "llvm/ADT/StringRef.h"
2088db86ddSKarthik Bhat #include "llvm/Analysis/DependenceAnalysis.h"
21*b941857bSCongzhe Cao #include "llvm/Analysis/LoopCacheAnalysis.h"
2288db86ddSKarthik Bhat #include "llvm/Analysis/LoopInfo.h"
23829c1b64SCongzhe Cao #include "llvm/Analysis/LoopNestAnalysis.h"
248600fee5SFlorian Hahn #include "llvm/Analysis/LoopPass.h"
250965da20SAdam Nemet #include "llvm/Analysis/OptimizationRemarkEmitter.h"
2688db86ddSKarthik Bhat #include "llvm/Analysis/ScalarEvolution.h"
2788db86ddSKarthik Bhat #include "llvm/Analysis/ScalarEvolutionExpressions.h"
28dd40f5e7SEugene Zelenko #include "llvm/IR/BasicBlock.h"
29dd40f5e7SEugene Zelenko #include "llvm/IR/Constants.h"
30dd40f5e7SEugene Zelenko #include "llvm/IR/DiagnosticInfo.h"
31799003bfSBenjamin Kramer #include "llvm/IR/Dominators.h"
3288db86ddSKarthik Bhat #include "llvm/IR/Function.h"
3354cb552bSFlorian Hahn #include "llvm/IR/IRBuilder.h"
34dd40f5e7SEugene Zelenko #include "llvm/IR/InstrTypes.h"
35dd40f5e7SEugene Zelenko #include "llvm/IR/Instruction.h"
36dd40f5e7SEugene Zelenko #include "llvm/IR/Instructions.h"
37dd40f5e7SEugene Zelenko #include "llvm/IR/User.h"
38dd40f5e7SEugene Zelenko #include "llvm/IR/Value.h"
3905da2fe5SReid Kleckner #include "llvm/InitializePasses.h"
4088db86ddSKarthik Bhat #include "llvm/Pass.h"
41dd40f5e7SEugene Zelenko #include "llvm/Support/Casting.h"
42dd40f5e7SEugene Zelenko #include "llvm/Support/CommandLine.h"
4388db86ddSKarthik Bhat #include "llvm/Support/Debug.h"
44dd40f5e7SEugene Zelenko #include "llvm/Support/ErrorHandling.h"
4588db86ddSKarthik Bhat #include "llvm/Support/raw_ostream.h"
46799003bfSBenjamin Kramer #include "llvm/Transforms/Scalar.h"
4788db86ddSKarthik Bhat #include "llvm/Transforms/Utils/BasicBlockUtils.h"
48799003bfSBenjamin Kramer #include "llvm/Transforms/Utils/LoopUtils.h"
49dd40f5e7SEugene Zelenko #include <cassert>
50dd40f5e7SEugene Zelenko #include <utility>
51dd40f5e7SEugene Zelenko #include <vector>
529d8f6f8aSDavide Italiano
5388db86ddSKarthik Bhat using namespace llvm;
5488db86ddSKarthik Bhat
5588db86ddSKarthik Bhat #define DEBUG_TYPE "loop-interchange"
5688db86ddSKarthik Bhat
576e004336SFlorian Hahn STATISTIC(LoopsInterchanged, "Number of loops interchanged");
586e004336SFlorian Hahn
5972431890SChad Rosier static cl::opt<int> LoopInterchangeCostThreshold(
6072431890SChad Rosier "loop-interchange-threshold", cl::init(0), cl::Hidden,
6172431890SChad Rosier cl::desc("Interchange if you gain more than this number"));
6272431890SChad Rosier
6388db86ddSKarthik Bhat namespace {
6488db86ddSKarthik Bhat
65dd40f5e7SEugene Zelenko using LoopVector = SmallVector<Loop *, 8>;
6688db86ddSKarthik Bhat
6788db86ddSKarthik Bhat // TODO: Check if we can use a sparse matrix here.
68dd40f5e7SEugene Zelenko using CharMatrix = std::vector<std::vector<char>>;
69dd40f5e7SEugene Zelenko
70dd40f5e7SEugene Zelenko } // end anonymous namespace
7188db86ddSKarthik Bhat
7288db86ddSKarthik Bhat // Maximum number of dependencies that can be handled in the dependency matrix.
7388db86ddSKarthik Bhat static const unsigned MaxMemInstrCount = 100;
7488db86ddSKarthik Bhat
7588db86ddSKarthik Bhat // Maximum loop depth supported.
7688db86ddSKarthik Bhat static const unsigned MaxLoopNestDepth = 10;
7788db86ddSKarthik Bhat
7888db86ddSKarthik Bhat #ifdef DUMP_DEP_MATRICIES
printDepMatrix(CharMatrix & DepMatrix)79dd40f5e7SEugene Zelenko static void printDepMatrix(CharMatrix &DepMatrix) {
80f66efd61SFlorian Hahn for (auto &Row : DepMatrix) {
81f66efd61SFlorian Hahn for (auto D : Row)
82d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << D << " ");
83d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "\n");
8488db86ddSKarthik Bhat }
8588db86ddSKarthik Bhat }
8688db86ddSKarthik Bhat #endif
8788db86ddSKarthik Bhat
populateDependencyMatrix(CharMatrix & DepMatrix,unsigned Level,Loop * L,DependenceInfo * DI)888210fdf2SKarthik Bhat static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
8949c22190SChandler Carruth Loop *L, DependenceInfo *DI) {
90dd40f5e7SEugene Zelenko using ValueVector = SmallVector<Value *, 16>;
91dd40f5e7SEugene Zelenko
9288db86ddSKarthik Bhat ValueVector MemInstr;
9388db86ddSKarthik Bhat
9488db86ddSKarthik Bhat // For each block.
95f66efd61SFlorian Hahn for (BasicBlock *BB : L->blocks()) {
9688db86ddSKarthik Bhat // Scan the BB and collect legal loads and stores.
97f66efd61SFlorian Hahn for (Instruction &I : *BB) {
9809c1109bSChad Rosier if (!isa<Instruction>(I))
9988db86ddSKarthik Bhat return false;
100f66efd61SFlorian Hahn if (auto *Ld = dyn_cast<LoadInst>(&I)) {
10109c1109bSChad Rosier if (!Ld->isSimple())
10288db86ddSKarthik Bhat return false;
103f66efd61SFlorian Hahn MemInstr.push_back(&I);
104f66efd61SFlorian Hahn } else if (auto *St = dyn_cast<StoreInst>(&I)) {
10509c1109bSChad Rosier if (!St->isSimple())
10609c1109bSChad Rosier return false;
107f66efd61SFlorian Hahn MemInstr.push_back(&I);
10809c1109bSChad Rosier }
10988db86ddSKarthik Bhat }
11088db86ddSKarthik Bhat }
11188db86ddSKarthik Bhat
112d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()
11388db86ddSKarthik Bhat << " Loads and Stores to analyze\n");
11488db86ddSKarthik Bhat
11588db86ddSKarthik Bhat ValueVector::iterator I, IE, J, JE;
11688db86ddSKarthik Bhat
11788db86ddSKarthik Bhat for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
11888db86ddSKarthik Bhat for (J = I, JE = MemInstr.end(); J != JE; ++J) {
11988db86ddSKarthik Bhat std::vector<char> Dep;
12009c1109bSChad Rosier Instruction *Src = cast<Instruction>(*I);
12109c1109bSChad Rosier Instruction *Dst = cast<Instruction>(*J);
12200eb8db3SChad Rosier // Ignore Input dependencies.
12390bcb917SChad Rosier if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
12488db86ddSKarthik Bhat continue;
12500eb8db3SChad Rosier // Track Output, Flow, and Anti dependencies.
12690bcb917SChad Rosier if (auto D = DI->depends(Src, Dst, true)) {
12700eb8db3SChad Rosier assert(D->isOrdered() && "Expected an output, flow or anti dep.");
128d34e60caSNicola Zaghen LLVM_DEBUG(StringRef DepType =
12900eb8db3SChad Rosier D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
13000eb8db3SChad Rosier dbgs() << "Found " << DepType
13100eb8db3SChad Rosier << " dependency between Src and Dst\n"
13290bcb917SChad Rosier << " Src:" << *Src << "\n Dst:" << *Dst << '\n');
13388db86ddSKarthik Bhat unsigned Levels = D->getLevels();
13488db86ddSKarthik Bhat char Direction;
13588db86ddSKarthik Bhat for (unsigned II = 1; II <= Levels; ++II) {
13688db86ddSKarthik Bhat const SCEV *Distance = D->getDistance(II);
13788db86ddSKarthik Bhat const SCEVConstant *SCEVConst =
13888db86ddSKarthik Bhat dyn_cast_or_null<SCEVConstant>(Distance);
13988db86ddSKarthik Bhat if (SCEVConst) {
14088db86ddSKarthik Bhat const ConstantInt *CI = SCEVConst->getValue();
14188db86ddSKarthik Bhat if (CI->isNegative())
14288db86ddSKarthik Bhat Direction = '<';
14388db86ddSKarthik Bhat else if (CI->isZero())
14488db86ddSKarthik Bhat Direction = '=';
14588db86ddSKarthik Bhat else
14688db86ddSKarthik Bhat Direction = '>';
14788db86ddSKarthik Bhat Dep.push_back(Direction);
14888db86ddSKarthik Bhat } else if (D->isScalar(II)) {
14988db86ddSKarthik Bhat Direction = 'S';
15088db86ddSKarthik Bhat Dep.push_back(Direction);
15188db86ddSKarthik Bhat } else {
15288db86ddSKarthik Bhat unsigned Dir = D->getDirection(II);
15388db86ddSKarthik Bhat if (Dir == Dependence::DVEntry::LT ||
15488db86ddSKarthik Bhat Dir == Dependence::DVEntry::LE)
15588db86ddSKarthik Bhat Direction = '<';
15688db86ddSKarthik Bhat else if (Dir == Dependence::DVEntry::GT ||
15788db86ddSKarthik Bhat Dir == Dependence::DVEntry::GE)
15888db86ddSKarthik Bhat Direction = '>';
15988db86ddSKarthik Bhat else if (Dir == Dependence::DVEntry::EQ)
16088db86ddSKarthik Bhat Direction = '=';
16188db86ddSKarthik Bhat else
16288db86ddSKarthik Bhat Direction = '*';
16388db86ddSKarthik Bhat Dep.push_back(Direction);
16488db86ddSKarthik Bhat }
16588db86ddSKarthik Bhat }
16688db86ddSKarthik Bhat while (Dep.size() != Level) {
16788db86ddSKarthik Bhat Dep.push_back('I');
16888db86ddSKarthik Bhat }
16988db86ddSKarthik Bhat
17088db86ddSKarthik Bhat DepMatrix.push_back(Dep);
17188db86ddSKarthik Bhat if (DepMatrix.size() > MaxMemInstrCount) {
172d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
17388db86ddSKarthik Bhat << " dependencies inside loop\n");
17488db86ddSKarthik Bhat return false;
17588db86ddSKarthik Bhat }
17688db86ddSKarthik Bhat }
17788db86ddSKarthik Bhat }
17888db86ddSKarthik Bhat }
17988db86ddSKarthik Bhat
18088db86ddSKarthik Bhat return true;
18188db86ddSKarthik Bhat }
18288db86ddSKarthik Bhat
18388db86ddSKarthik Bhat // A loop is moved from index 'from' to an index 'to'. Update the Dependence
18488db86ddSKarthik Bhat // matrix by exchanging the two columns.
interChangeDependencies(CharMatrix & DepMatrix,unsigned FromIndx,unsigned ToIndx)185d18ea065SChad Rosier static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
18688db86ddSKarthik Bhat unsigned ToIndx) {
187ea1a1ebbSTa-Wei Tu for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I)
188ea1a1ebbSTa-Wei Tu std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]);
18988db86ddSKarthik Bhat }
19088db86ddSKarthik Bhat
19188db86ddSKarthik Bhat // Checks if outermost non '=','S'or'I' dependence in the dependence matrix is
19288db86ddSKarthik Bhat // '>'
isOuterMostDepPositive(CharMatrix & DepMatrix,unsigned Row,unsigned Column)1938210fdf2SKarthik Bhat static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row,
19488db86ddSKarthik Bhat unsigned Column) {
19588db86ddSKarthik Bhat for (unsigned i = 0; i <= Column; ++i) {
19688db86ddSKarthik Bhat if (DepMatrix[Row][i] == '<')
19788db86ddSKarthik Bhat return false;
19888db86ddSKarthik Bhat if (DepMatrix[Row][i] == '>')
19988db86ddSKarthik Bhat return true;
20088db86ddSKarthik Bhat }
20188db86ddSKarthik Bhat // All dependencies were '=','S' or 'I'
20288db86ddSKarthik Bhat return false;
20388db86ddSKarthik Bhat }
20488db86ddSKarthik Bhat
20588db86ddSKarthik Bhat // Checks if no dependence exist in the dependency matrix in Row before Column.
containsNoDependence(CharMatrix & DepMatrix,unsigned Row,unsigned Column)2068210fdf2SKarthik Bhat static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
20788db86ddSKarthik Bhat unsigned Column) {
20888db86ddSKarthik Bhat for (unsigned i = 0; i < Column; ++i) {
209fca1ff0dSChandler Carruth if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' &&
21088db86ddSKarthik Bhat DepMatrix[Row][i] != 'I')
21188db86ddSKarthik Bhat return false;
21288db86ddSKarthik Bhat }
21388db86ddSKarthik Bhat return true;
21488db86ddSKarthik Bhat }
21588db86ddSKarthik Bhat
validDepInterchange(CharMatrix & DepMatrix,unsigned Row,unsigned OuterLoopId,char InnerDep,char OuterDep)2168210fdf2SKarthik Bhat static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
2178210fdf2SKarthik Bhat unsigned OuterLoopId, char InnerDep,
2188210fdf2SKarthik Bhat char OuterDep) {
21988db86ddSKarthik Bhat if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId))
22088db86ddSKarthik Bhat return false;
22188db86ddSKarthik Bhat
22288db86ddSKarthik Bhat if (InnerDep == OuterDep)
22388db86ddSKarthik Bhat return true;
22488db86ddSKarthik Bhat
22588db86ddSKarthik Bhat // It is legal to interchange if and only if after interchange no row has a
22688db86ddSKarthik Bhat // '>' direction as the leftmost non-'='.
22788db86ddSKarthik Bhat
22888db86ddSKarthik Bhat if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I')
22988db86ddSKarthik Bhat return true;
23088db86ddSKarthik Bhat
23188db86ddSKarthik Bhat if (InnerDep == '<')
23288db86ddSKarthik Bhat return true;
23388db86ddSKarthik Bhat
23488db86ddSKarthik Bhat if (InnerDep == '>') {
23588db86ddSKarthik Bhat // If OuterLoopId represents outermost loop then interchanging will make the
23688db86ddSKarthik Bhat // 1st dependency as '>'
23788db86ddSKarthik Bhat if (OuterLoopId == 0)
23888db86ddSKarthik Bhat return false;
23988db86ddSKarthik Bhat
24088db86ddSKarthik Bhat // If all dependencies before OuterloopId are '=','S'or 'I'. Then
24188db86ddSKarthik Bhat // interchanging will result in this row having an outermost non '='
24288db86ddSKarthik Bhat // dependency of '>'
24388db86ddSKarthik Bhat if (!containsNoDependence(DepMatrix, Row, OuterLoopId))
24488db86ddSKarthik Bhat return true;
24588db86ddSKarthik Bhat }
24688db86ddSKarthik Bhat
24788db86ddSKarthik Bhat return false;
24888db86ddSKarthik Bhat }
24988db86ddSKarthik Bhat
25088db86ddSKarthik Bhat // Checks if it is legal to interchange 2 loops.
2518210fdf2SKarthik Bhat // [Theorem] A permutation of the loops in a perfect nest is legal if and only
25261683a22SChad Rosier // if the direction matrix, after the same permutation is applied to its
25361683a22SChad Rosier // columns, has no ">" direction as the leftmost non-"=" direction in any row.
isLegalToInterChangeLoops(CharMatrix & DepMatrix,unsigned InnerLoopId,unsigned OuterLoopId)2548210fdf2SKarthik Bhat static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
2558210fdf2SKarthik Bhat unsigned InnerLoopId,
25688db86ddSKarthik Bhat unsigned OuterLoopId) {
25788db86ddSKarthik Bhat unsigned NumRows = DepMatrix.size();
25888db86ddSKarthik Bhat // For each row check if it is valid to interchange.
25988db86ddSKarthik Bhat for (unsigned Row = 0; Row < NumRows; ++Row) {
26088db86ddSKarthik Bhat char InnerDep = DepMatrix[Row][InnerLoopId];
26188db86ddSKarthik Bhat char OuterDep = DepMatrix[Row][OuterLoopId];
26288db86ddSKarthik Bhat if (InnerDep == '*' || OuterDep == '*')
26388db86ddSKarthik Bhat return false;
26461683a22SChad Rosier if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep))
26588db86ddSKarthik Bhat return false;
26688db86ddSKarthik Bhat }
26788db86ddSKarthik Bhat return true;
26888db86ddSKarthik Bhat }
26988db86ddSKarthik Bhat
populateWorklist(Loop & L,LoopVector & LoopList)270eac34875SCongzhe Cao static void populateWorklist(Loop &L, LoopVector &LoopList) {
271d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
272f5814f56SChad Rosier << L.getHeader()->getParent()->getName() << " Loop: %"
273f5814f56SChad Rosier << L.getHeader()->getName() << '\n');
274eac34875SCongzhe Cao assert(LoopList.empty() && "LoopList should initially be empty!");
27588db86ddSKarthik Bhat Loop *CurrentLoop = &L;
276e448b5beSBenjamin Kramer const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
277e448b5beSBenjamin Kramer while (!Vec->empty()) {
27888db86ddSKarthik Bhat // The current loop has multiple subloops in it hence it is not tightly
27988db86ddSKarthik Bhat // nested.
28088db86ddSKarthik Bhat // Discard all loops above it added into Worklist.
281eac34875SCongzhe Cao if (Vec->size() != 1) {
282eac34875SCongzhe Cao LoopList = {};
283eac34875SCongzhe Cao return;
284eac34875SCongzhe Cao }
2858600fee5SFlorian Hahn
28688db86ddSKarthik Bhat LoopList.push_back(CurrentLoop);
287e448b5beSBenjamin Kramer CurrentLoop = Vec->front();
288e448b5beSBenjamin Kramer Vec = &CurrentLoop->getSubLoops();
28988db86ddSKarthik Bhat }
29088db86ddSKarthik Bhat LoopList.push_back(CurrentLoop);
29188db86ddSKarthik Bhat }
29288db86ddSKarthik Bhat
293dd40f5e7SEugene Zelenko namespace {
294dd40f5e7SEugene Zelenko
29588db86ddSKarthik Bhat /// LoopInterchangeLegality checks if it is legal to interchange the loop.
29688db86ddSKarthik Bhat class LoopInterchangeLegality {
29788db86ddSKarthik Bhat public:
LoopInterchangeLegality(Loop * Outer,Loop * Inner,ScalarEvolution * SE,OptimizationRemarkEmitter * ORE)29888db86ddSKarthik Bhat LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
299ad993521SFlorian Hahn OptimizationRemarkEmitter *ORE)
300c51d0882SFlorian Hahn : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
30188db86ddSKarthik Bhat
30288db86ddSKarthik Bhat /// Check if the loops can be interchanged.
30388db86ddSKarthik Bhat bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
30488db86ddSKarthik Bhat CharMatrix &DepMatrix);
305dd40f5e7SEugene Zelenko
306fa6a2876SCongzhe Cao /// Discover induction PHIs in the header of \p L. Induction
307fa6a2876SCongzhe Cao /// PHIs are added to \p Inductions.
308fa6a2876SCongzhe Cao bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
309fa6a2876SCongzhe Cao
31088db86ddSKarthik Bhat /// Check if the loop structure is understood. We do not handle triangular
31188db86ddSKarthik Bhat /// loops for now.
312fa6a2876SCongzhe Cao bool isLoopStructureUnderstood();
31388db86ddSKarthik Bhat
31488db86ddSKarthik Bhat bool currentLimitations();
31588db86ddSKarthik Bhat
getOuterInnerReductions() const316a684a994SFlorian Hahn const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
317a684a994SFlorian Hahn return OuterInnerReductions;
318a684a994SFlorian Hahn }
319a684a994SFlorian Hahn
getInnerLoopInductions() const320fa6a2876SCongzhe Cao const SmallVectorImpl<PHINode *> &getInnerLoopInductions() const {
321fa6a2876SCongzhe Cao return InnerLoopInductions;
322fa6a2876SCongzhe Cao }
323fa6a2876SCongzhe Cao
32488db86ddSKarthik Bhat private:
32588db86ddSKarthik Bhat bool tightlyNested(Loop *Outer, Loop *Inner);
326c8bd6ea3SFlorian Hahn bool containsUnsafeInstructions(BasicBlock *BB);
327a684a994SFlorian Hahn
328a684a994SFlorian Hahn /// Discover induction and reduction PHIs in the header of \p L. Induction
329a684a994SFlorian Hahn /// PHIs are added to \p Inductions, reductions are added to
330a684a994SFlorian Hahn /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
331a684a994SFlorian Hahn /// to be passed as \p InnerLoop.
332a684a994SFlorian Hahn bool findInductionAndReductions(Loop *L,
333a684a994SFlorian Hahn SmallVector<PHINode *, 8> &Inductions,
334a684a994SFlorian Hahn Loop *InnerLoop);
335dd40f5e7SEugene Zelenko
33688db86ddSKarthik Bhat Loop *OuterLoop;
33788db86ddSKarthik Bhat Loop *InnerLoop;
33888db86ddSKarthik Bhat
33988db86ddSKarthik Bhat ScalarEvolution *SE;
340dd40f5e7SEugene Zelenko
341ad993521SFlorian Hahn /// Interface to emit optimization remarks.
342ad993521SFlorian Hahn OptimizationRemarkEmitter *ORE;
3438210fdf2SKarthik Bhat
344a684a994SFlorian Hahn /// Set of reduction PHIs taking part of a reduction across the inner and
345a684a994SFlorian Hahn /// outer loop.
346a684a994SFlorian Hahn SmallPtrSet<PHINode *, 4> OuterInnerReductions;
347fa6a2876SCongzhe Cao
348fa6a2876SCongzhe Cao /// Set of inner loop induction PHIs
349fa6a2876SCongzhe Cao SmallVector<PHINode *, 8> InnerLoopInductions;
35088db86ddSKarthik Bhat };
35188db86ddSKarthik Bhat
35288db86ddSKarthik Bhat /// LoopInterchangeProfitability checks if it is profitable to interchange the
35388db86ddSKarthik Bhat /// loop.
35488db86ddSKarthik Bhat class LoopInterchangeProfitability {
35588db86ddSKarthik Bhat public:
LoopInterchangeProfitability(Loop * Outer,Loop * Inner,ScalarEvolution * SE,OptimizationRemarkEmitter * ORE)356ad993521SFlorian Hahn LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
357ad993521SFlorian Hahn OptimizationRemarkEmitter *ORE)
358ad993521SFlorian Hahn : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
35988db86ddSKarthik Bhat
36074b41114SVikram TV /// Check if the loop interchange is profitable.
361*b941857bSCongzhe Cao bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
362*b941857bSCongzhe Cao unsigned InnerLoopId, unsigned OuterLoopId,
363*b941857bSCongzhe Cao CharMatrix &DepMatrix,
364*b941857bSCongzhe Cao const DenseMap<const Loop *, unsigned> &CostMap);
36588db86ddSKarthik Bhat
36688db86ddSKarthik Bhat private:
36788db86ddSKarthik Bhat int getInstrOrderCost();
36888db86ddSKarthik Bhat
36988db86ddSKarthik Bhat Loop *OuterLoop;
37088db86ddSKarthik Bhat Loop *InnerLoop;
37188db86ddSKarthik Bhat
37288db86ddSKarthik Bhat /// Scev analysis.
37388db86ddSKarthik Bhat ScalarEvolution *SE;
374dd40f5e7SEugene Zelenko
375ad993521SFlorian Hahn /// Interface to emit optimization remarks.
376ad993521SFlorian Hahn OptimizationRemarkEmitter *ORE;
37788db86ddSKarthik Bhat };
37888db86ddSKarthik Bhat
37974b41114SVikram TV /// LoopInterchangeTransform interchanges the loop.
38088db86ddSKarthik Bhat class LoopInterchangeTransform {
38188db86ddSKarthik Bhat public:
LoopInterchangeTransform(Loop * Outer,Loop * Inner,ScalarEvolution * SE,LoopInfo * LI,DominatorTree * DT,const LoopInterchangeLegality & LIL)38288db86ddSKarthik Bhat LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
38388db86ddSKarthik Bhat LoopInfo *LI, DominatorTree *DT,
384a684a994SFlorian Hahn const LoopInterchangeLegality &LIL)
385ce2db900SCongzhe Cao : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
38688db86ddSKarthik Bhat
38788db86ddSKarthik Bhat /// Interchange OuterLoop and InnerLoop.
38888db86ddSKarthik Bhat bool transform();
389831a7577SFlorian Hahn void restructureLoops(Loop *NewInner, Loop *NewOuter,
390831a7577SFlorian Hahn BasicBlock *OrigInnerPreHeader,
391831a7577SFlorian Hahn BasicBlock *OrigOuterPreHeader);
39288db86ddSKarthik Bhat void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
39388db86ddSKarthik Bhat
39488db86ddSKarthik Bhat private:
39588db86ddSKarthik Bhat bool adjustLoopLinks();
39688db86ddSKarthik Bhat bool adjustLoopBranches();
39788db86ddSKarthik Bhat
39888db86ddSKarthik Bhat Loop *OuterLoop;
39988db86ddSKarthik Bhat Loop *InnerLoop;
40088db86ddSKarthik Bhat
40188db86ddSKarthik Bhat /// Scev analysis.
40288db86ddSKarthik Bhat ScalarEvolution *SE;
403dd40f5e7SEugene Zelenko
40488db86ddSKarthik Bhat LoopInfo *LI;
40588db86ddSKarthik Bhat DominatorTree *DT;
406a684a994SFlorian Hahn
407a684a994SFlorian Hahn const LoopInterchangeLegality &LIL;
40888db86ddSKarthik Bhat };
40988db86ddSKarthik Bhat
4109c21c6c9SArthur Eubanks struct LoopInterchange {
411dd40f5e7SEugene Zelenko ScalarEvolution *SE = nullptr;
412dd40f5e7SEugene Zelenko LoopInfo *LI = nullptr;
413dd40f5e7SEugene Zelenko DependenceInfo *DI = nullptr;
414dd40f5e7SEugene Zelenko DominatorTree *DT = nullptr;
415*b941857bSCongzhe Cao std::unique_ptr<CacheCost> CC = nullptr;
416dd40f5e7SEugene Zelenko
417ad993521SFlorian Hahn /// Interface to emit optimization remarks.
418ad993521SFlorian Hahn OptimizationRemarkEmitter *ORE;
419ad993521SFlorian Hahn
LoopInterchange__anon401b65a50211::LoopInterchange4209c21c6c9SArthur Eubanks LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
421*b941857bSCongzhe Cao DominatorTree *DT, std::unique_ptr<CacheCost> &CC,
422*b941857bSCongzhe Cao OptimizationRemarkEmitter *ORE)
423*b941857bSCongzhe Cao : SE(SE), LI(LI), DI(DI), DT(DT), CC(std::move(CC)), ORE(ORE) {}
42488db86ddSKarthik Bhat
run__anon401b65a50211::LoopInterchange4259c21c6c9SArthur Eubanks bool run(Loop *L) {
4269c21c6c9SArthur Eubanks if (L->getParentLoop())
4278d72ecc3SFlorian Hahn return false;
428eac34875SCongzhe Cao SmallVector<Loop *, 8> LoopList;
429eac34875SCongzhe Cao populateWorklist(*L, LoopList);
430eac34875SCongzhe Cao return processLoopList(LoopList);
43188db86ddSKarthik Bhat }
43288db86ddSKarthik Bhat
run__anon401b65a50211::LoopInterchange4330eeaec2aSTa-Wei Tu bool run(LoopNest &LN) {
434eac34875SCongzhe Cao SmallVector<Loop *, 8> LoopList(LN.getLoops().begin(), LN.getLoops().end());
4350eeaec2aSTa-Wei Tu for (unsigned I = 1; I < LoopList.size(); ++I)
4360eeaec2aSTa-Wei Tu if (LoopList[I]->getParentLoop() != LoopList[I - 1])
4370eeaec2aSTa-Wei Tu return false;
4380eeaec2aSTa-Wei Tu return processLoopList(LoopList);
4390eeaec2aSTa-Wei Tu }
4400eeaec2aSTa-Wei Tu
isComputableLoopNest__anon401b65a50211::LoopInterchange4410eeaec2aSTa-Wei Tu bool isComputableLoopNest(ArrayRef<Loop *> LoopList) {
442135f735aSBenjamin Kramer for (Loop *L : LoopList) {
44388db86ddSKarthik Bhat const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
44410ddb927SPhilip Reames if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
445d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
44688db86ddSKarthik Bhat return false;
44788db86ddSKarthik Bhat }
44888db86ddSKarthik Bhat if (L->getNumBackEdges() != 1) {
449d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
45088db86ddSKarthik Bhat return false;
45188db86ddSKarthik Bhat }
45288db86ddSKarthik Bhat if (!L->getExitingBlock()) {
453d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
45488db86ddSKarthik Bhat return false;
45588db86ddSKarthik Bhat }
45688db86ddSKarthik Bhat }
45788db86ddSKarthik Bhat return true;
45888db86ddSKarthik Bhat }
45988db86ddSKarthik Bhat
selectLoopForInterchange__anon401b65a50211::LoopInterchange4600eeaec2aSTa-Wei Tu unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
46188db86ddSKarthik Bhat // TODO: Add a better heuristic to select the loop to be interchanged based
462df005cbeSBenjamin Kramer // on the dependence matrix. Currently we select the innermost loop.
46388db86ddSKarthik Bhat return LoopList.size() - 1;
46488db86ddSKarthik Bhat }
46588db86ddSKarthik Bhat
processLoopList__anon401b65a50211::LoopInterchange466eac34875SCongzhe Cao bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
46788db86ddSKarthik Bhat bool Changed = false;
4687ea0d394SChad Rosier unsigned LoopNestDepth = LoopList.size();
4697ea0d394SChad Rosier if (LoopNestDepth < 2) {
470d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
47188db86ddSKarthik Bhat return false;
47288db86ddSKarthik Bhat }
4737ea0d394SChad Rosier if (LoopNestDepth > MaxLoopNestDepth) {
474d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "
4757ea0d394SChad Rosier << MaxLoopNestDepth << "\n");
4767ea0d394SChad Rosier return false;
4777ea0d394SChad Rosier }
47888db86ddSKarthik Bhat if (!isComputableLoopNest(LoopList)) {
479d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
48088db86ddSKarthik Bhat return false;
48188db86ddSKarthik Bhat }
4827ea0d394SChad Rosier
483d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth
484d34e60caSNicola Zaghen << "\n");
4857ea0d394SChad Rosier
4867ea0d394SChad Rosier CharMatrix DependencyMatrix;
48788db86ddSKarthik Bhat Loop *OuterMostLoop = *(LoopList.begin());
4887ea0d394SChad Rosier if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
48949c22190SChandler Carruth OuterMostLoop, DI)) {
490d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
49188db86ddSKarthik Bhat return false;
49288db86ddSKarthik Bhat }
49388db86ddSKarthik Bhat #ifdef DUMP_DEP_MATRICIES
494d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Dependence before interchange\n");
49588db86ddSKarthik Bhat printDepMatrix(DependencyMatrix);
49688db86ddSKarthik Bhat #endif
49788db86ddSKarthik Bhat
49888db86ddSKarthik Bhat // Get the Outermost loop exit.
4991da30c65SFlorian Hahn BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
5001da30c65SFlorian Hahn if (!LoopNestExit) {
501d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block");
5021da30c65SFlorian Hahn return false;
5031da30c65SFlorian Hahn }
50488db86ddSKarthik Bhat
50588db86ddSKarthik Bhat unsigned SelecLoopId = selectLoopForInterchange(LoopList);
506*b941857bSCongzhe Cao // Obtain the loop vector returned from loop cache analysis beforehand,
507*b941857bSCongzhe Cao // and put each <Loop, index> pair into a map for constant time query
508*b941857bSCongzhe Cao // later. Indices in loop vector reprsent the optimal order of the
509*b941857bSCongzhe Cao // corresponding loop, e.g., given a loopnest with depth N, index 0
510*b941857bSCongzhe Cao // indicates the loop should be placed as the outermost loop and index N
511*b941857bSCongzhe Cao // indicates the loop should be placed as the innermost loop.
512*b941857bSCongzhe Cao //
513*b941857bSCongzhe Cao // For the old pass manager CacheCost would be null.
514*b941857bSCongzhe Cao DenseMap<const Loop *, unsigned> CostMap;
515*b941857bSCongzhe Cao if (CC != nullptr) {
516*b941857bSCongzhe Cao const auto &LoopCosts = CC->getLoopCosts();
517*b941857bSCongzhe Cao for (unsigned i = 0; i < LoopCosts.size(); i++) {
518*b941857bSCongzhe Cao CostMap[LoopCosts[i].first] = i;
519*b941857bSCongzhe Cao }
520*b941857bSCongzhe Cao }
521eac34875SCongzhe Cao // We try to achieve the globally optimal memory access for the loopnest,
522eac34875SCongzhe Cao // and do interchange based on a bubble-sort fasion. We start from
523eac34875SCongzhe Cao // the innermost loop, move it outwards to the best possible position
524eac34875SCongzhe Cao // and repeat this process.
525eac34875SCongzhe Cao for (unsigned j = SelecLoopId; j > 0; j--) {
526eac34875SCongzhe Cao bool ChangedPerIter = false;
527eac34875SCongzhe Cao for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
528eac34875SCongzhe Cao bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1,
529*b941857bSCongzhe Cao DependencyMatrix, CostMap);
53088db86ddSKarthik Bhat if (!Interchanged)
531eac34875SCongzhe Cao continue;
532eac34875SCongzhe Cao // Loops interchanged, update LoopList accordingly.
533eac34875SCongzhe Cao std::swap(LoopList[i - 1], LoopList[i]);
53488db86ddSKarthik Bhat // Update the DependencyMatrix
535d18ea065SChad Rosier interChangeDependencies(DependencyMatrix, i, i - 1);
53688db86ddSKarthik Bhat #ifdef DUMP_DEP_MATRICIES
537d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Dependence after interchange\n");
53888db86ddSKarthik Bhat printDepMatrix(DependencyMatrix);
53988db86ddSKarthik Bhat #endif
540eac34875SCongzhe Cao ChangedPerIter |= Interchanged;
54188db86ddSKarthik Bhat Changed |= Interchanged;
54288db86ddSKarthik Bhat }
543eac34875SCongzhe Cao // Early abort if there was no interchange during an entire round of
544eac34875SCongzhe Cao // moving loops outwards.
545eac34875SCongzhe Cao if (!ChangedPerIter)
546eac34875SCongzhe Cao break;
547eac34875SCongzhe Cao }
54888db86ddSKarthik Bhat return Changed;
54988db86ddSKarthik Bhat }
55088db86ddSKarthik Bhat
processLoop__anon401b65a50211::LoopInterchange5516b612a7bSTa-Wei Tu bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
552ce2db900SCongzhe Cao unsigned OuterLoopId,
553*b941857bSCongzhe Cao std::vector<std::vector<char>> &DependencyMatrix,
554*b941857bSCongzhe Cao const DenseMap<const Loop *, unsigned> &CostMap) {
555d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
55688db86ddSKarthik Bhat << " and OuterLoopId = " << OuterLoopId << "\n");
557c51d0882SFlorian Hahn LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
55888db86ddSKarthik Bhat if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
559d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
56088db86ddSKarthik Bhat return false;
56188db86ddSKarthik Bhat }
562d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
563ad993521SFlorian Hahn LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
564*b941857bSCongzhe Cao if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
565*b941857bSCongzhe Cao DependencyMatrix, CostMap)) {
566d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
56788db86ddSKarthik Bhat return false;
56888db86ddSKarthik Bhat }
56988db86ddSKarthik Bhat
5709590658fSVivek Pandya ORE->emit([&]() {
5719590658fSVivek Pandya return OptimizationRemark(DEBUG_TYPE, "Interchanged",
572ad993521SFlorian Hahn InnerLoop->getStartLoc(),
573ad993521SFlorian Hahn InnerLoop->getHeader())
5749590658fSVivek Pandya << "Loop interchanged with enclosing loop.";
5759590658fSVivek Pandya });
576ad993521SFlorian Hahn
577ce2db900SCongzhe Cao LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
57888db86ddSKarthik Bhat LIT.transform();
579d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
5806e004336SFlorian Hahn LoopsInterchanged++;
581f71abec6SAlexey Zhikhartsev
582f71abec6SAlexey Zhikhartsev assert(InnerLoop->isLCSSAForm(*DT) &&
583f71abec6SAlexey Zhikhartsev "Inner loop not left in LCSSA form after loop interchange!");
584f71abec6SAlexey Zhikhartsev assert(OuterLoop->isLCSSAForm(*DT) &&
585f71abec6SAlexey Zhikhartsev "Outer loop not left in LCSSA form after loop interchange!");
586f71abec6SAlexey Zhikhartsev
58788db86ddSKarthik Bhat return true;
58888db86ddSKarthik Bhat }
58988db86ddSKarthik Bhat };
59088db86ddSKarthik Bhat
591dd40f5e7SEugene Zelenko } // end anonymous namespace
592dd40f5e7SEugene Zelenko
containsUnsafeInstructions(BasicBlock * BB)593c8bd6ea3SFlorian Hahn bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
594c8bd6ea3SFlorian Hahn return any_of(*BB, [](const Instruction &I) {
595c8bd6ea3SFlorian Hahn return I.mayHaveSideEffects() || I.mayReadFromMemory();
5968210fdf2SKarthik Bhat });
5978210fdf2SKarthik Bhat }
59888db86ddSKarthik Bhat
tightlyNested(Loop * OuterLoop,Loop * InnerLoop)59988db86ddSKarthik Bhat bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
60088db86ddSKarthik Bhat BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
60188db86ddSKarthik Bhat BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
60288db86ddSKarthik Bhat BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
60388db86ddSKarthik Bhat
6047ff2768bSTa-Wei Tu LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n");
6057ff2768bSTa-Wei Tu
6067ff2768bSTa-Wei Tu // A perfectly nested loop will not have any branch in between the outer and
6077ff2768bSTa-Wei Tu // inner block i.e. outer header will branch to either inner preheader and
6087ff2768bSTa-Wei Tu // outerloop latch.
6097ff2768bSTa-Wei Tu BranchInst *OuterLoopHeaderBI =
6107ff2768bSTa-Wei Tu dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
6117ff2768bSTa-Wei Tu if (!OuterLoopHeaderBI)
6127ff2768bSTa-Wei Tu return false;
6137ff2768bSTa-Wei Tu
6147ff2768bSTa-Wei Tu for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
6157ff2768bSTa-Wei Tu if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
6167ff2768bSTa-Wei Tu Succ != OuterLoopLatch)
6177ff2768bSTa-Wei Tu return false;
6187ff2768bSTa-Wei Tu
6197ff2768bSTa-Wei Tu LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
62088db86ddSKarthik Bhat // We do not have any basic block in between now make sure the outer header
621df005cbeSBenjamin Kramer // and outer loop latch doesn't contain any unsafe instructions.
622c8bd6ea3SFlorian Hahn if (containsUnsafeInstructions(OuterLoopHeader) ||
623c8bd6ea3SFlorian Hahn containsUnsafeInstructions(OuterLoopLatch))
62488db86ddSKarthik Bhat return false;
62588db86ddSKarthik Bhat
6268393b9fdSFlorian Hahn // Also make sure the inner loop preheader does not contain any unsafe
6278393b9fdSFlorian Hahn // instructions. Note that all instructions in the preheader will be moved to
6288393b9fdSFlorian Hahn // the outer loop header when interchanging.
6298393b9fdSFlorian Hahn if (InnerLoopPreHeader != OuterLoopHeader &&
6308393b9fdSFlorian Hahn containsUnsafeInstructions(InnerLoopPreHeader))
6318393b9fdSFlorian Hahn return false;
6328393b9fdSFlorian Hahn
633829c1b64SCongzhe Cao BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
634829c1b64SCongzhe Cao // Ensure the inner loop exit block flows to the outer loop latch possibly
635829c1b64SCongzhe Cao // through empty blocks.
636829c1b64SCongzhe Cao const BasicBlock &SuccInner =
637829c1b64SCongzhe Cao LoopNest::skipEmptyBlockUntil(InnerLoopExit, OuterLoopLatch);
638829c1b64SCongzhe Cao if (&SuccInner != OuterLoopLatch) {
639829c1b64SCongzhe Cao LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExit
640829c1b64SCongzhe Cao << " does not lead to the outer loop latch.\n";);
641829c1b64SCongzhe Cao return false;
642829c1b64SCongzhe Cao }
643829c1b64SCongzhe Cao // The inner loop exit block does flow to the outer loop latch and not some
644829c1b64SCongzhe Cao // other BBs, now make sure it contains safe instructions, since it will be
645829c1b64SCongzhe Cao // moved into the (new) inner loop after interchange.
646829c1b64SCongzhe Cao if (containsUnsafeInstructions(InnerLoopExit))
647829c1b64SCongzhe Cao return false;
648829c1b64SCongzhe Cao
649d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
65088db86ddSKarthik Bhat // We have a perfect loop nest.
65188db86ddSKarthik Bhat return true;
65288db86ddSKarthik Bhat }
65388db86ddSKarthik Bhat
isLoopStructureUnderstood()654fa6a2876SCongzhe Cao bool LoopInterchangeLegality::isLoopStructureUnderstood() {
65588db86ddSKarthik Bhat BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
656fa6a2876SCongzhe Cao for (PHINode *InnerInduction : InnerLoopInductions) {
657fa6a2876SCongzhe Cao unsigned Num = InnerInduction->getNumOperands();
65888db86ddSKarthik Bhat for (unsigned i = 0; i < Num; ++i) {
65988db86ddSKarthik Bhat Value *Val = InnerInduction->getOperand(i);
66088db86ddSKarthik Bhat if (isa<Constant>(Val))
66188db86ddSKarthik Bhat continue;
66288db86ddSKarthik Bhat Instruction *I = dyn_cast<Instruction>(Val);
66388db86ddSKarthik Bhat if (!I)
66488db86ddSKarthik Bhat return false;
66588db86ddSKarthik Bhat // TODO: Handle triangular loops.
66688db86ddSKarthik Bhat // e.g. for(int i=0;i<N;i++)
66788db86ddSKarthik Bhat // for(int j=i;j<N;j++)
66888db86ddSKarthik Bhat unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
66988db86ddSKarthik Bhat if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
67088db86ddSKarthik Bhat InnerLoopPreheader &&
67188db86ddSKarthik Bhat !OuterLoop->isLoopInvariant(I)) {
67288db86ddSKarthik Bhat return false;
67388db86ddSKarthik Bhat }
67488db86ddSKarthik Bhat }
675fa6a2876SCongzhe Cao }
67640e3aa39SCongzhe Cao
67740e3aa39SCongzhe Cao // TODO: Handle triangular loops of another form.
67840e3aa39SCongzhe Cao // e.g. for(int i=0;i<N;i++)
67940e3aa39SCongzhe Cao // for(int j=0;j<i;j++)
68040e3aa39SCongzhe Cao // or,
68140e3aa39SCongzhe Cao // for(int i=0;i<N;i++)
68240e3aa39SCongzhe Cao // for(int j=0;j*i<N;j++)
68340e3aa39SCongzhe Cao BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
68440e3aa39SCongzhe Cao BranchInst *InnerLoopLatchBI =
68540e3aa39SCongzhe Cao dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
68640e3aa39SCongzhe Cao if (!InnerLoopLatchBI->isConditional())
68740e3aa39SCongzhe Cao return false;
68840e3aa39SCongzhe Cao if (CmpInst *InnerLoopCmp =
68940e3aa39SCongzhe Cao dyn_cast<CmpInst>(InnerLoopLatchBI->getCondition())) {
69040e3aa39SCongzhe Cao Value *Op0 = InnerLoopCmp->getOperand(0);
69140e3aa39SCongzhe Cao Value *Op1 = InnerLoopCmp->getOperand(1);
69240e3aa39SCongzhe Cao
69340e3aa39SCongzhe Cao // LHS and RHS of the inner loop exit condition, e.g.,
69440e3aa39SCongzhe Cao // in "for(int j=0;j<i;j++)", LHS is j and RHS is i.
69540e3aa39SCongzhe Cao Value *Left = nullptr;
69640e3aa39SCongzhe Cao Value *Right = nullptr;
69740e3aa39SCongzhe Cao
69840e3aa39SCongzhe Cao // Check if V only involves inner loop induction variable.
69940e3aa39SCongzhe Cao // Return true if V is InnerInduction, or a cast from
70040e3aa39SCongzhe Cao // InnerInduction, or a binary operator that involves
70140e3aa39SCongzhe Cao // InnerInduction and a constant.
702fa6a2876SCongzhe Cao std::function<bool(Value *)> IsPathToInnerIndVar;
703fa6a2876SCongzhe Cao IsPathToInnerIndVar = [this, &IsPathToInnerIndVar](const Value *V) -> bool {
704fa6a2876SCongzhe Cao if (llvm::is_contained(InnerLoopInductions, V))
70540e3aa39SCongzhe Cao return true;
70640e3aa39SCongzhe Cao if (isa<Constant>(V))
70740e3aa39SCongzhe Cao return true;
708fa6a2876SCongzhe Cao const Instruction *I = dyn_cast<Instruction>(V);
70940e3aa39SCongzhe Cao if (!I)
71040e3aa39SCongzhe Cao return false;
71140e3aa39SCongzhe Cao if (isa<CastInst>(I))
712fa6a2876SCongzhe Cao return IsPathToInnerIndVar(I->getOperand(0));
71340e3aa39SCongzhe Cao if (isa<BinaryOperator>(I))
714fa6a2876SCongzhe Cao return IsPathToInnerIndVar(I->getOperand(0)) &&
715fa6a2876SCongzhe Cao IsPathToInnerIndVar(I->getOperand(1));
71640e3aa39SCongzhe Cao return false;
71740e3aa39SCongzhe Cao };
71840e3aa39SCongzhe Cao
719fa6a2876SCongzhe Cao // In case of multiple inner loop indvars, it is okay if LHS and RHS
720fa6a2876SCongzhe Cao // are both inner indvar related variables.
721fa6a2876SCongzhe Cao if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
722fa6a2876SCongzhe Cao return true;
723fa6a2876SCongzhe Cao
724fa6a2876SCongzhe Cao // Otherwise we check if the cmp instruction compares an inner indvar
725fa6a2876SCongzhe Cao // related variable (Left) with a outer loop invariant (Right).
726fa6a2876SCongzhe Cao if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) {
72740e3aa39SCongzhe Cao Left = Op0;
72840e3aa39SCongzhe Cao Right = Op1;
729fa6a2876SCongzhe Cao } else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) {
73040e3aa39SCongzhe Cao Left = Op1;
73140e3aa39SCongzhe Cao Right = Op0;
73240e3aa39SCongzhe Cao }
73340e3aa39SCongzhe Cao
73440e3aa39SCongzhe Cao if (Left == nullptr)
73540e3aa39SCongzhe Cao return false;
73640e3aa39SCongzhe Cao
73740e3aa39SCongzhe Cao const SCEV *S = SE->getSCEV(Right);
73840e3aa39SCongzhe Cao if (!SE->isLoopInvariant(S, OuterLoop))
73940e3aa39SCongzhe Cao return false;
74040e3aa39SCongzhe Cao }
74140e3aa39SCongzhe Cao
74288db86ddSKarthik Bhat return true;
74388db86ddSKarthik Bhat }
74488db86ddSKarthik Bhat
745a684a994SFlorian Hahn // If SV is a LCSSA PHI node with a single incoming value, return the incoming
746a684a994SFlorian Hahn // value.
followLCSSA(Value * SV)747a684a994SFlorian Hahn static Value *followLCSSA(Value *SV) {
748a684a994SFlorian Hahn PHINode *PHI = dyn_cast<PHINode>(SV);
749a684a994SFlorian Hahn if (!PHI)
750a684a994SFlorian Hahn return SV;
751a684a994SFlorian Hahn
752a684a994SFlorian Hahn if (PHI->getNumIncomingValues() != 1)
753a684a994SFlorian Hahn return SV;
754a684a994SFlorian Hahn return followLCSSA(PHI->getIncomingValue(0));
755a684a994SFlorian Hahn }
756a684a994SFlorian Hahn
757a684a994SFlorian Hahn // Check V's users to see if it is involved in a reduction in L.
findInnerReductionPhi(Loop * L,Value * V)758a684a994SFlorian Hahn static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
759bfec9148SAnton Rapetov // Reduction variables cannot be constants.
760bfec9148SAnton Rapetov if (isa<Constant>(V))
761bfec9148SAnton Rapetov return nullptr;
762bfec9148SAnton Rapetov
763a684a994SFlorian Hahn for (Value *User : V->users()) {
764a684a994SFlorian Hahn if (PHINode *PHI = dyn_cast<PHINode>(User)) {
765a684a994SFlorian Hahn if (PHI->getNumIncomingValues() == 1)
766a684a994SFlorian Hahn continue;
767a684a994SFlorian Hahn RecurrenceDescriptor RD;
7681ef04326SCongzhe Cao if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) {
7691ef04326SCongzhe Cao // Detect floating point reduction only when it can be reordered.
7701ef04326SCongzhe Cao if (RD.getExactFPMathInst() != nullptr)
7711ef04326SCongzhe Cao return nullptr;
772a684a994SFlorian Hahn return PHI;
7731ef04326SCongzhe Cao }
774a684a994SFlorian Hahn return nullptr;
775a684a994SFlorian Hahn }
776a684a994SFlorian Hahn }
777a684a994SFlorian Hahn
778a684a994SFlorian Hahn return nullptr;
779a684a994SFlorian Hahn }
780a684a994SFlorian Hahn
findInductionAndReductions(Loop * L,SmallVector<PHINode *,8> & Inductions,Loop * InnerLoop)781a684a994SFlorian Hahn bool LoopInterchangeLegality::findInductionAndReductions(
782a684a994SFlorian Hahn Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
7838210fdf2SKarthik Bhat if (!L->getLoopLatch() || !L->getLoopPredecessor())
7848210fdf2SKarthik Bhat return false;
7855912c667SFlorian Hahn for (PHINode &PHI : L->getHeader()->phis()) {
7860a91310cSTyler Nowicki RecurrenceDescriptor RD;
7871bbf15c5SJames Molloy InductionDescriptor ID;
7885912c667SFlorian Hahn if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
7895912c667SFlorian Hahn Inductions.push_back(&PHI);
7908210fdf2SKarthik Bhat else {
791a684a994SFlorian Hahn // PHIs in inner loops need to be part of a reduction in the outer loop,
792a684a994SFlorian Hahn // discovered when checking the PHIs of the outer loop earlier.
793a684a994SFlorian Hahn if (!InnerLoop) {
7943badd17bSBenjamin Kramer if (!OuterInnerReductions.count(&PHI)) {
795a684a994SFlorian Hahn LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "
796a684a994SFlorian Hahn "across the outer loop.\n");
7978210fdf2SKarthik Bhat return false;
7988210fdf2SKarthik Bhat }
799a684a994SFlorian Hahn } else {
800a684a994SFlorian Hahn assert(PHI.getNumIncomingValues() == 2 &&
801a684a994SFlorian Hahn "Phis in loop header should have exactly 2 incoming values");
802a684a994SFlorian Hahn // Check if we have a PHI node in the outer loop that has a reduction
803a684a994SFlorian Hahn // result from the inner loop as an incoming value.
804a684a994SFlorian Hahn Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
805a684a994SFlorian Hahn PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
806a684a994SFlorian Hahn if (!InnerRedPhi ||
80743c0e4f6SKazu Hirata !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
808a684a994SFlorian Hahn LLVM_DEBUG(
809a684a994SFlorian Hahn dbgs()
810a684a994SFlorian Hahn << "Failed to recognize PHI as an induction or reduction.\n");
811a684a994SFlorian Hahn return false;
812a684a994SFlorian Hahn }
813a684a994SFlorian Hahn OuterInnerReductions.insert(&PHI);
814a684a994SFlorian Hahn OuterInnerReductions.insert(InnerRedPhi);
815a684a994SFlorian Hahn }
816a684a994SFlorian Hahn }
8178210fdf2SKarthik Bhat }
8188210fdf2SKarthik Bhat return true;
8198210fdf2SKarthik Bhat }
8208210fdf2SKarthik Bhat
82188db86ddSKarthik Bhat // This function indicates the current limitations in the transform as a result
82288db86ddSKarthik Bhat // of which we do not proceed.
currentLimitations()82388db86ddSKarthik Bhat bool LoopInterchangeLegality::currentLimitations() {
82488db86ddSKarthik Bhat BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
8251da30c65SFlorian Hahn
8261da30c65SFlorian Hahn // transform currently expects the loop latches to also be the exiting
8271da30c65SFlorian Hahn // blocks.
8281da30c65SFlorian Hahn if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
8291da30c65SFlorian Hahn OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
8301da30c65SFlorian Hahn !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
8311da30c65SFlorian Hahn !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
832d34e60caSNicola Zaghen LLVM_DEBUG(
833d34e60caSNicola Zaghen dbgs() << "Loops where the latch is not the exiting block are not"
8341da30c65SFlorian Hahn << " supported currently.\n");
8351da30c65SFlorian Hahn ORE->emit([&]() {
8361da30c65SFlorian Hahn return OptimizationRemarkMissed(DEBUG_TYPE, "ExitingNotLatch",
8371da30c65SFlorian Hahn OuterLoop->getStartLoc(),
8381da30c65SFlorian Hahn OuterLoop->getHeader())
8391da30c65SFlorian Hahn << "Loops where the latch is not the exiting block cannot be"
8401da30c65SFlorian Hahn " interchange currently.";
8411da30c65SFlorian Hahn });
8421da30c65SFlorian Hahn return true;
8431da30c65SFlorian Hahn }
84488db86ddSKarthik Bhat
8458210fdf2SKarthik Bhat SmallVector<PHINode *, 8> Inductions;
846a684a994SFlorian Hahn if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
847a684a994SFlorian Hahn LLVM_DEBUG(
848a684a994SFlorian Hahn dbgs() << "Only outer loops with induction or reduction PHI nodes "
849a684a994SFlorian Hahn << "are supported currently.\n");
850a684a994SFlorian Hahn ORE->emit([&]() {
851a684a994SFlorian Hahn return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
852a684a994SFlorian Hahn OuterLoop->getStartLoc(),
853a684a994SFlorian Hahn OuterLoop->getHeader())
854a684a994SFlorian Hahn << "Only outer loops with induction or reduction PHI nodes can be"
855a684a994SFlorian Hahn " interchanged currently.";
856a684a994SFlorian Hahn });
857a684a994SFlorian Hahn return true;
858a684a994SFlorian Hahn }
859a684a994SFlorian Hahn
860a684a994SFlorian Hahn Inductions.clear();
861a684a994SFlorian Hahn if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) {
862d34e60caSNicola Zaghen LLVM_DEBUG(
863d34e60caSNicola Zaghen dbgs() << "Only inner loops with induction or reduction PHI nodes "
8644eeff394SFlorian Hahn << "are supported currently.\n");
8659590658fSVivek Pandya ORE->emit([&]() {
8669590658fSVivek Pandya return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
867ad993521SFlorian Hahn InnerLoop->getStartLoc(),
868ad993521SFlorian Hahn InnerLoop->getHeader())
869ad993521SFlorian Hahn << "Only inner loops with induction or reduction PHI nodes can be"
8709590658fSVivek Pandya " interchange currently.";
8719590658fSVivek Pandya });
87288db86ddSKarthik Bhat return true;
8734eeff394SFlorian Hahn }
87488db86ddSKarthik Bhat
87588db86ddSKarthik Bhat // TODO: Triangular loops are not handled for now.
876fa6a2876SCongzhe Cao if (!isLoopStructureUnderstood()) {
877d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n");
8789590658fSVivek Pandya ORE->emit([&]() {
8799590658fSVivek Pandya return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner",
880ad993521SFlorian Hahn InnerLoop->getStartLoc(),
881ad993521SFlorian Hahn InnerLoop->getHeader())
8829590658fSVivek Pandya << "Inner loop structure not understood currently.";
8839590658fSVivek Pandya });
88488db86ddSKarthik Bhat return true;
88588db86ddSKarthik Bhat }
88688db86ddSKarthik Bhat
88788db86ddSKarthik Bhat return false;
88888db86ddSKarthik Bhat }
88988db86ddSKarthik Bhat
findInductions(Loop * L,SmallVectorImpl<PHINode * > & Inductions)890fa6a2876SCongzhe Cao bool LoopInterchangeLegality::findInductions(
891fa6a2876SCongzhe Cao Loop *L, SmallVectorImpl<PHINode *> &Inductions) {
892fa6a2876SCongzhe Cao for (PHINode &PHI : L->getHeader()->phis()) {
893fa6a2876SCongzhe Cao InductionDescriptor ID;
894fa6a2876SCongzhe Cao if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
895fa6a2876SCongzhe Cao Inductions.push_back(&PHI);
896fa6a2876SCongzhe Cao }
897fa6a2876SCongzhe Cao return !Inductions.empty();
898fa6a2876SCongzhe Cao }
899fa6a2876SCongzhe Cao
900e8a5c172SFlorian Hahn // We currently only support LCSSA PHI nodes in the inner loop exit, if their
901e8a5c172SFlorian Hahn // users are either reduction PHIs or PHIs outside the outer loop (which means
902e8a5c172SFlorian Hahn // the we are only interested in the final value after the loop).
903e8a5c172SFlorian Hahn static bool
areInnerLoopExitPHIsSupported(Loop * InnerL,Loop * OuterL,SmallPtrSetImpl<PHINode * > & Reductions)904e8a5c172SFlorian Hahn areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL,
905e8a5c172SFlorian Hahn SmallPtrSetImpl<PHINode *> &Reductions) {
906e8a5c172SFlorian Hahn BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
907e8a5c172SFlorian Hahn for (PHINode &PHI : InnerExit->phis()) {
908e8a5c172SFlorian Hahn // Reduction lcssa phi will have only 1 incoming block that from loop latch.
909e8a5c172SFlorian Hahn if (PHI.getNumIncomingValues() > 1)
910e8a5c172SFlorian Hahn return false;
911e8a5c172SFlorian Hahn if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
912e8a5c172SFlorian Hahn PHINode *PN = dyn_cast<PHINode>(U);
9133badd17bSBenjamin Kramer return !PN ||
9143badd17bSBenjamin Kramer (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
915e8a5c172SFlorian Hahn })) {
916e8a5c172SFlorian Hahn return false;
917e8a5c172SFlorian Hahn }
918e8a5c172SFlorian Hahn }
919e8a5c172SFlorian Hahn return true;
920e8a5c172SFlorian Hahn }
921e8a5c172SFlorian Hahn
922f3fea0f1SFlorian Hahn // We currently support LCSSA PHI nodes in the outer loop exit, if their
923f3fea0f1SFlorian Hahn // incoming values do not come from the outer loop latch or if the
924f3fea0f1SFlorian Hahn // outer loop latch has a single predecessor. In that case, the value will
925f3fea0f1SFlorian Hahn // be available if both the inner and outer loop conditions are true, which
926f3fea0f1SFlorian Hahn // will still be true after interchanging. If we have multiple predecessor,
927f3fea0f1SFlorian Hahn // that may not be the case, e.g. because the outer loop latch may be executed
928f3fea0f1SFlorian Hahn // if the inner loop is not executed.
areOuterLoopExitPHIsSupported(Loop * OuterLoop,Loop * InnerLoop)929e8a5c172SFlorian Hahn static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
930f3fea0f1SFlorian Hahn BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
931f3fea0f1SFlorian Hahn for (PHINode &PHI : LoopNestExit->phis()) {
932f3fea0f1SFlorian Hahn for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
933f3fea0f1SFlorian Hahn Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
934f3fea0f1SFlorian Hahn if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
935f3fea0f1SFlorian Hahn continue;
936f3fea0f1SFlorian Hahn
937f3fea0f1SFlorian Hahn // The incoming value is defined in the outer loop latch. Currently we
938f3fea0f1SFlorian Hahn // only support that in case the outer loop latch has a single predecessor.
939f3fea0f1SFlorian Hahn // This guarantees that the outer loop latch is executed if and only if
940f3fea0f1SFlorian Hahn // the inner loop is executed (because tightlyNested() guarantees that the
941f3fea0f1SFlorian Hahn // outer loop header only branches to the inner loop or the outer loop
942f3fea0f1SFlorian Hahn // latch).
943f3fea0f1SFlorian Hahn // FIXME: We could weaken this logic and allow multiple predecessors,
944f3fea0f1SFlorian Hahn // if the values are produced outside the loop latch. We would need
945f3fea0f1SFlorian Hahn // additional logic to update the PHI nodes in the exit block as
946f3fea0f1SFlorian Hahn // well.
947f3fea0f1SFlorian Hahn if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
948f3fea0f1SFlorian Hahn return false;
949f3fea0f1SFlorian Hahn }
950f3fea0f1SFlorian Hahn }
951f3fea0f1SFlorian Hahn return true;
952f3fea0f1SFlorian Hahn }
953f3fea0f1SFlorian Hahn
954a7b7d22dSCongzhe Cao // In case of multi-level nested loops, it may occur that lcssa phis exist in
955a7b7d22dSCongzhe Cao // the latch of InnerLoop, i.e., when defs of the incoming values are further
956a7b7d22dSCongzhe Cao // inside the loopnest. Sometimes those incoming values are not available
957a7b7d22dSCongzhe Cao // after interchange, since the original inner latch will become the new outer
958a7b7d22dSCongzhe Cao // latch which may have predecessor paths that do not include those incoming
959a7b7d22dSCongzhe Cao // values.
960a7b7d22dSCongzhe Cao // TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of
961a7b7d22dSCongzhe Cao // multi-level loop nests.
areInnerLoopLatchPHIsSupported(Loop * OuterLoop,Loop * InnerLoop)962a7b7d22dSCongzhe Cao static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
963a7b7d22dSCongzhe Cao if (InnerLoop->getSubLoops().empty())
964a7b7d22dSCongzhe Cao return true;
965a7b7d22dSCongzhe Cao // If the original outer latch has only one predecessor, then values defined
966a7b7d22dSCongzhe Cao // further inside the looploop, e.g., in the innermost loop, will be available
967a7b7d22dSCongzhe Cao // at the new outer latch after interchange.
968a7b7d22dSCongzhe Cao if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr)
969a7b7d22dSCongzhe Cao return true;
970a7b7d22dSCongzhe Cao
971a7b7d22dSCongzhe Cao // The outer latch has more than one predecessors, i.e., the inner
972a7b7d22dSCongzhe Cao // exit and the inner header.
973a7b7d22dSCongzhe Cao // PHI nodes in the inner latch are lcssa phis where the incoming values
974a7b7d22dSCongzhe Cao // are defined further inside the loopnest. Check if those phis are used
975a7b7d22dSCongzhe Cao // in the original inner latch. If that is the case then bail out since
976a7b7d22dSCongzhe Cao // those incoming values may not be available at the new outer latch.
977a7b7d22dSCongzhe Cao BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
978a7b7d22dSCongzhe Cao for (PHINode &PHI : InnerLoopLatch->phis()) {
979a7b7d22dSCongzhe Cao for (auto *U : PHI.users()) {
980a7b7d22dSCongzhe Cao Instruction *UI = cast<Instruction>(U);
981a7b7d22dSCongzhe Cao if (InnerLoopLatch == UI->getParent())
982a7b7d22dSCongzhe Cao return false;
983a7b7d22dSCongzhe Cao }
984a7b7d22dSCongzhe Cao }
985a7b7d22dSCongzhe Cao return true;
986a7b7d22dSCongzhe Cao }
987a7b7d22dSCongzhe Cao
canInterchangeLoops(unsigned InnerLoopId,unsigned OuterLoopId,CharMatrix & DepMatrix)98888db86ddSKarthik Bhat bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
98988db86ddSKarthik Bhat unsigned OuterLoopId,
99088db86ddSKarthik Bhat CharMatrix &DepMatrix) {
99188db86ddSKarthik Bhat if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
992d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
99388db86ddSKarthik Bhat << " and OuterLoopId = " << OuterLoopId
99488db86ddSKarthik Bhat << " due to dependence\n");
9959590658fSVivek Pandya ORE->emit([&]() {
9969590658fSVivek Pandya return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence",
997ad993521SFlorian Hahn InnerLoop->getStartLoc(),
998ad993521SFlorian Hahn InnerLoop->getHeader())
9999590658fSVivek Pandya << "Cannot interchange loops due to dependences.";
10009590658fSVivek Pandya });
100188db86ddSKarthik Bhat return false;
100288db86ddSKarthik Bhat }
10034284049dSFlorian Hahn // Check if outer and inner loop contain legal instructions only.
10044284049dSFlorian Hahn for (auto *BB : OuterLoop->blocks())
1005fd2bc112SFlorian Hahn for (Instruction &I : BB->instructionsWithoutDebug())
10064284049dSFlorian Hahn if (CallInst *CI = dyn_cast<CallInst>(&I)) {
10074284049dSFlorian Hahn // readnone functions do not prevent interchanging.
1008c16fd6a3SPhilip Reames if (CI->onlyWritesMemory())
10094284049dSFlorian Hahn continue;
1010d34e60caSNicola Zaghen LLVM_DEBUG(
1011d34e60caSNicola Zaghen dbgs() << "Loops with call instructions cannot be interchanged "
10124284049dSFlorian Hahn << "safely.");
10139467ccf4SFlorian Hahn ORE->emit([&]() {
10149467ccf4SFlorian Hahn return OptimizationRemarkMissed(DEBUG_TYPE, "CallInst",
10159467ccf4SFlorian Hahn CI->getDebugLoc(),
10169467ccf4SFlorian Hahn CI->getParent())
10179467ccf4SFlorian Hahn << "Cannot interchange loops due to call instruction.";
10189467ccf4SFlorian Hahn });
10199467ccf4SFlorian Hahn
10204284049dSFlorian Hahn return false;
10214284049dSFlorian Hahn }
10224284049dSFlorian Hahn
1023fa6a2876SCongzhe Cao if (!findInductions(InnerLoop, InnerLoopInductions)) {
1024fa6a2876SCongzhe Cao LLVM_DEBUG(dbgs() << "Cound not find inner loop induction variables.\n");
1025fa6a2876SCongzhe Cao return false;
1026fa6a2876SCongzhe Cao }
1027fa6a2876SCongzhe Cao
1028a7b7d22dSCongzhe Cao if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) {
1029a7b7d22dSCongzhe Cao LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n");
1030a7b7d22dSCongzhe Cao ORE->emit([&]() {
1031a7b7d22dSCongzhe Cao return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedInnerLatchPHI",
1032a7b7d22dSCongzhe Cao InnerLoop->getStartLoc(),
1033a7b7d22dSCongzhe Cao InnerLoop->getHeader())
1034a7b7d22dSCongzhe Cao << "Cannot interchange loops because unsupported PHI nodes found "
1035a7b7d22dSCongzhe Cao "in inner loop latch.";
1036a7b7d22dSCongzhe Cao });
1037a7b7d22dSCongzhe Cao return false;
1038a7b7d22dSCongzhe Cao }
1039a7b7d22dSCongzhe Cao
104088db86ddSKarthik Bhat // TODO: The loops could not be interchanged due to current limitations in the
104188db86ddSKarthik Bhat // transform module.
104288db86ddSKarthik Bhat if (currentLimitations()) {
1043d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n");
104488db86ddSKarthik Bhat return false;
104588db86ddSKarthik Bhat }
104688db86ddSKarthik Bhat
10478210fdf2SKarthik Bhat // Check if the loops are tightly nested.
10488210fdf2SKarthik Bhat if (!tightlyNested(OuterLoop, InnerLoop)) {
1049d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Loops not tightly nested\n");
10509590658fSVivek Pandya ORE->emit([&]() {
10519590658fSVivek Pandya return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested",
1052ad993521SFlorian Hahn InnerLoop->getStartLoc(),
1053ad993521SFlorian Hahn InnerLoop->getHeader())
1054ad993521SFlorian Hahn << "Cannot interchange loops because they are not tightly "
10559590658fSVivek Pandya "nested.";
10569590658fSVivek Pandya });
10578210fdf2SKarthik Bhat return false;
10588210fdf2SKarthik Bhat }
10598210fdf2SKarthik Bhat
1060e8a5c172SFlorian Hahn if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
1061e8a5c172SFlorian Hahn OuterInnerReductions)) {
1062e8a5c172SFlorian Hahn LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n");
1063e8a5c172SFlorian Hahn ORE->emit([&]() {
1064e8a5c172SFlorian Hahn return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1065e8a5c172SFlorian Hahn InnerLoop->getStartLoc(),
1066e8a5c172SFlorian Hahn InnerLoop->getHeader())
1067e8a5c172SFlorian Hahn << "Found unsupported PHI node in loop exit.";
1068e8a5c172SFlorian Hahn });
1069e8a5c172SFlorian Hahn return false;
1070e8a5c172SFlorian Hahn }
1071e8a5c172SFlorian Hahn
1072e8a5c172SFlorian Hahn if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
1073d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");
1074f3fea0f1SFlorian Hahn ORE->emit([&]() {
1075f3fea0f1SFlorian Hahn return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1076f3fea0f1SFlorian Hahn OuterLoop->getStartLoc(),
1077f3fea0f1SFlorian Hahn OuterLoop->getHeader())
1078f3fea0f1SFlorian Hahn << "Found unsupported PHI node in loop exit.";
1079f3fea0f1SFlorian Hahn });
1080f3fea0f1SFlorian Hahn return false;
1081f3fea0f1SFlorian Hahn }
1082f3fea0f1SFlorian Hahn
108388db86ddSKarthik Bhat return true;
108488db86ddSKarthik Bhat }
108588db86ddSKarthik Bhat
getInstrOrderCost()108688db86ddSKarthik Bhat int LoopInterchangeProfitability::getInstrOrderCost() {
108788db86ddSKarthik Bhat unsigned GoodOrder, BadOrder;
108888db86ddSKarthik Bhat BadOrder = GoodOrder = 0;
1089f66efd61SFlorian Hahn for (BasicBlock *BB : InnerLoop->blocks()) {
1090f66efd61SFlorian Hahn for (Instruction &Ins : *BB) {
109188db86ddSKarthik Bhat if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
109288db86ddSKarthik Bhat unsigned NumOp = GEP->getNumOperands();
109388db86ddSKarthik Bhat bool FoundInnerInduction = false;
109488db86ddSKarthik Bhat bool FoundOuterInduction = false;
109588db86ddSKarthik Bhat for (unsigned i = 0; i < NumOp; ++i) {
1096e8dc17a2SFlorian Hahn // Skip operands that are not SCEV-able.
1097e8dc17a2SFlorian Hahn if (!SE->isSCEVable(GEP->getOperand(i)->getType()))
1098e8dc17a2SFlorian Hahn continue;
1099e8dc17a2SFlorian Hahn
110088db86ddSKarthik Bhat const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
110188db86ddSKarthik Bhat const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
110288db86ddSKarthik Bhat if (!AR)
110388db86ddSKarthik Bhat continue;
110488db86ddSKarthik Bhat
110588db86ddSKarthik Bhat // If we find the inner induction after an outer induction e.g.
110688db86ddSKarthik Bhat // for(int i=0;i<N;i++)
110788db86ddSKarthik Bhat // for(int j=0;j<N;j++)
110888db86ddSKarthik Bhat // A[i][j] = A[i-1][j-1]+k;
110988db86ddSKarthik Bhat // then it is a good order.
111088db86ddSKarthik Bhat if (AR->getLoop() == InnerLoop) {
111188db86ddSKarthik Bhat // We found an InnerLoop induction after OuterLoop induction. It is
111288db86ddSKarthik Bhat // a good order.
111388db86ddSKarthik Bhat FoundInnerInduction = true;
111488db86ddSKarthik Bhat if (FoundOuterInduction) {
111588db86ddSKarthik Bhat GoodOrder++;
111688db86ddSKarthik Bhat break;
111788db86ddSKarthik Bhat }
111888db86ddSKarthik Bhat }
111988db86ddSKarthik Bhat // If we find the outer induction after an inner induction e.g.
112088db86ddSKarthik Bhat // for(int i=0;i<N;i++)
112188db86ddSKarthik Bhat // for(int j=0;j<N;j++)
112288db86ddSKarthik Bhat // A[j][i] = A[j-1][i-1]+k;
112388db86ddSKarthik Bhat // then it is a bad order.
112488db86ddSKarthik Bhat if (AR->getLoop() == OuterLoop) {
112588db86ddSKarthik Bhat // We found an OuterLoop induction after InnerLoop induction. It is
112688db86ddSKarthik Bhat // a bad order.
112788db86ddSKarthik Bhat FoundOuterInduction = true;
112888db86ddSKarthik Bhat if (FoundInnerInduction) {
112988db86ddSKarthik Bhat BadOrder++;
113088db86ddSKarthik Bhat break;
113188db86ddSKarthik Bhat }
113288db86ddSKarthik Bhat }
113388db86ddSKarthik Bhat }
113488db86ddSKarthik Bhat }
113588db86ddSKarthik Bhat }
113688db86ddSKarthik Bhat }
113788db86ddSKarthik Bhat return GoodOrder - BadOrder;
113888db86ddSKarthik Bhat }
113988db86ddSKarthik Bhat
isProfitableForVectorization(unsigned InnerLoopId,unsigned OuterLoopId,CharMatrix & DepMatrix)1140e6b3a63aSChad Rosier static bool isProfitableForVectorization(unsigned InnerLoopId,
1141f044d3f9SBenjamin Kramer unsigned OuterLoopId,
114288db86ddSKarthik Bhat CharMatrix &DepMatrix) {
114388db86ddSKarthik Bhat // TODO: Improve this heuristic to catch more cases.
114488db86ddSKarthik Bhat // If the inner loop is loop independent or doesn't carry any dependency it is
114588db86ddSKarthik Bhat // profitable to move this to outer position.
1146f66efd61SFlorian Hahn for (auto &Row : DepMatrix) {
1147f66efd61SFlorian Hahn if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I')
114888db86ddSKarthik Bhat return false;
114988db86ddSKarthik Bhat // TODO: We need to improve this heuristic.
1150f66efd61SFlorian Hahn if (Row[OuterLoopId] != '=')
115188db86ddSKarthik Bhat return false;
115288db86ddSKarthik Bhat }
115388db86ddSKarthik Bhat // If outer loop has dependence and inner loop is loop independent then it is
115488db86ddSKarthik Bhat // profitable to interchange to enable parallelism.
1155ceee7889SFlorian Hahn // If there are no dependences, interchanging will not improve anything.
1156ceee7889SFlorian Hahn return !DepMatrix.empty();
115788db86ddSKarthik Bhat }
115888db86ddSKarthik Bhat
isProfitable(const Loop * InnerLoop,const Loop * OuterLoop,unsigned InnerLoopId,unsigned OuterLoopId,CharMatrix & DepMatrix,const DenseMap<const Loop *,unsigned> & CostMap)1159*b941857bSCongzhe Cao bool LoopInterchangeProfitability::isProfitable(
1160*b941857bSCongzhe Cao const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
1161*b941857bSCongzhe Cao unsigned OuterLoopId, CharMatrix &DepMatrix,
1162*b941857bSCongzhe Cao const DenseMap<const Loop *, unsigned> &CostMap) {
1163*b941857bSCongzhe Cao // TODO: Remove the legacy cost model.
116488db86ddSKarthik Bhat
1165*b941857bSCongzhe Cao // This is the new cost model returned from loop cache analysis.
1166*b941857bSCongzhe Cao // A smaller index means the loop should be placed an outer loop, and vice
1167*b941857bSCongzhe Cao // versa.
1168*b941857bSCongzhe Cao if (CostMap.find(InnerLoop) != CostMap.end() &&
1169*b941857bSCongzhe Cao CostMap.find(OuterLoop) != CostMap.end()) {
1170*b941857bSCongzhe Cao unsigned InnerIndex = 0, OuterIndex = 0;
1171*b941857bSCongzhe Cao InnerIndex = CostMap.find(InnerLoop)->second;
1172*b941857bSCongzhe Cao OuterIndex = CostMap.find(OuterLoop)->second;
1173*b941857bSCongzhe Cao LLVM_DEBUG(dbgs() << "InnerIndex = " << InnerIndex
1174*b941857bSCongzhe Cao << ", OuterIndex = " << OuterIndex << "\n");
1175*b941857bSCongzhe Cao if (InnerIndex < OuterIndex)
1176*b941857bSCongzhe Cao return true;
1177*b941857bSCongzhe Cao } else {
1178*b941857bSCongzhe Cao // Legacy cost model: this is rough cost estimation algorithm. It counts the
1179*b941857bSCongzhe Cao // good and bad order of induction variables in the instruction and allows
1180*b941857bSCongzhe Cao // reordering if number of bad orders is more than good.
118172431890SChad Rosier int Cost = getInstrOrderCost();
1182d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
118372431890SChad Rosier if (Cost < -LoopInterchangeCostThreshold)
118488db86ddSKarthik Bhat return true;
1185*b941857bSCongzhe Cao }
118688db86ddSKarthik Bhat
1187df005cbeSBenjamin Kramer // It is not profitable as per current cache profitability model. But check if
118888db86ddSKarthik Bhat // we can move this loop outside to improve parallelism.
1189ad993521SFlorian Hahn if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
1190ad993521SFlorian Hahn return true;
1191ad993521SFlorian Hahn
11929590658fSVivek Pandya ORE->emit([&]() {
11939590658fSVivek Pandya return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1194ad993521SFlorian Hahn InnerLoop->getStartLoc(),
1195ad993521SFlorian Hahn InnerLoop->getHeader())
1196*b941857bSCongzhe Cao << "Interchanging loops is too costly and it does not improve "
1197*b941857bSCongzhe Cao "parallelism.";
11989590658fSVivek Pandya });
1199ad993521SFlorian Hahn return false;
120088db86ddSKarthik Bhat }
120188db86ddSKarthik Bhat
removeChildLoop(Loop * OuterLoop,Loop * InnerLoop)120288db86ddSKarthik Bhat void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
120388db86ddSKarthik Bhat Loop *InnerLoop) {
12045912c667SFlorian Hahn for (Loop *L : *OuterLoop)
12055912c667SFlorian Hahn if (L == InnerLoop) {
12065912c667SFlorian Hahn OuterLoop->removeChildLoop(L);
120788db86ddSKarthik Bhat return;
120888db86ddSKarthik Bhat }
12098ceb323bSBenjamin Kramer llvm_unreachable("Couldn't find loop");
121088db86ddSKarthik Bhat }
12116adbd7aeSDaniel Jasper
1212831a7577SFlorian Hahn /// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1213831a7577SFlorian Hahn /// new inner and outer loop after interchanging: NewInner is the original
1214831a7577SFlorian Hahn /// outer loop and NewOuter is the original inner loop.
1215831a7577SFlorian Hahn ///
1216831a7577SFlorian Hahn /// Before interchanging, we have the following structure
1217831a7577SFlorian Hahn /// Outer preheader
1218831a7577SFlorian Hahn // Outer header
1219831a7577SFlorian Hahn // Inner preheader
1220831a7577SFlorian Hahn // Inner header
1221831a7577SFlorian Hahn // Inner body
1222831a7577SFlorian Hahn // Inner latch
1223831a7577SFlorian Hahn // outer bbs
1224831a7577SFlorian Hahn // Outer latch
1225831a7577SFlorian Hahn //
1226831a7577SFlorian Hahn // After interchanging:
1227831a7577SFlorian Hahn // Inner preheader
1228831a7577SFlorian Hahn // Inner header
1229831a7577SFlorian Hahn // Outer preheader
1230831a7577SFlorian Hahn // Outer header
1231831a7577SFlorian Hahn // Inner body
1232831a7577SFlorian Hahn // outer bbs
1233831a7577SFlorian Hahn // Outer latch
1234831a7577SFlorian Hahn // Inner latch
restructureLoops(Loop * NewInner,Loop * NewOuter,BasicBlock * OrigInnerPreHeader,BasicBlock * OrigOuterPreHeader)1235831a7577SFlorian Hahn void LoopInterchangeTransform::restructureLoops(
1236831a7577SFlorian Hahn Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1237831a7577SFlorian Hahn BasicBlock *OrigOuterPreHeader) {
123888db86ddSKarthik Bhat Loop *OuterLoopParent = OuterLoop->getParentLoop();
1239831a7577SFlorian Hahn // The original inner loop preheader moves from the new inner loop to
1240831a7577SFlorian Hahn // the parent loop, if there is one.
1241831a7577SFlorian Hahn NewInner->removeBlockFromLoop(OrigInnerPreHeader);
1242831a7577SFlorian Hahn LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1243831a7577SFlorian Hahn
1244831a7577SFlorian Hahn // Switch the loop levels.
124588db86ddSKarthik Bhat if (OuterLoopParent) {
124688db86ddSKarthik Bhat // Remove the loop from its parent loop.
1247831a7577SFlorian Hahn removeChildLoop(OuterLoopParent, NewInner);
1248831a7577SFlorian Hahn removeChildLoop(NewInner, NewOuter);
1249831a7577SFlorian Hahn OuterLoopParent->addChildLoop(NewOuter);
125088db86ddSKarthik Bhat } else {
1251831a7577SFlorian Hahn removeChildLoop(NewInner, NewOuter);
1252831a7577SFlorian Hahn LI->changeTopLevelLoop(NewInner, NewOuter);
1253831a7577SFlorian Hahn }
125489c1e35fSStefanos Baziotis while (!NewOuter->isInnermost())
1255831a7577SFlorian Hahn NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
1256831a7577SFlorian Hahn NewOuter->addChildLoop(NewInner);
1257831a7577SFlorian Hahn
1258831a7577SFlorian Hahn // BBs from the original inner loop.
1259831a7577SFlorian Hahn SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1260831a7577SFlorian Hahn
1261831a7577SFlorian Hahn // Add BBs from the original outer loop to the original inner loop (excluding
1262831a7577SFlorian Hahn // BBs already in inner loop)
1263831a7577SFlorian Hahn for (BasicBlock *BB : NewInner->blocks())
1264831a7577SFlorian Hahn if (LI->getLoopFor(BB) == NewInner)
1265831a7577SFlorian Hahn NewOuter->addBlockEntry(BB);
1266831a7577SFlorian Hahn
1267831a7577SFlorian Hahn // Now remove inner loop header and latch from the new inner loop and move
1268831a7577SFlorian Hahn // other BBs (the loop body) to the new inner loop.
1269831a7577SFlorian Hahn BasicBlock *OuterHeader = NewOuter->getHeader();
1270831a7577SFlorian Hahn BasicBlock *OuterLatch = NewOuter->getLoopLatch();
1271831a7577SFlorian Hahn for (BasicBlock *BB : OrigInnerBBs) {
127274418185SFlorian Hahn // Nothing will change for BBs in child loops.
127374418185SFlorian Hahn if (LI->getLoopFor(BB) != NewOuter)
127474418185SFlorian Hahn continue;
1275831a7577SFlorian Hahn // Remove the new outer loop header and latch from the new inner loop.
1276831a7577SFlorian Hahn if (BB == OuterHeader || BB == OuterLatch)
1277831a7577SFlorian Hahn NewInner->removeBlockFromLoop(BB);
1278831a7577SFlorian Hahn else
1279831a7577SFlorian Hahn LI->changeLoopFor(BB, NewInner);
128088db86ddSKarthik Bhat }
128188db86ddSKarthik Bhat
1282831a7577SFlorian Hahn // The preheader of the original outer loop becomes part of the new
1283831a7577SFlorian Hahn // outer loop.
1284831a7577SFlorian Hahn NewOuter->addBlockEntry(OrigOuterPreHeader);
1285831a7577SFlorian Hahn LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
12863afb974aSFlorian Hahn
12873afb974aSFlorian Hahn // Tell SE that we move the loops around.
12883afb974aSFlorian Hahn SE->forgetLoop(NewOuter);
12893afb974aSFlorian Hahn SE->forgetLoop(NewInner);
129088db86ddSKarthik Bhat }
129188db86ddSKarthik Bhat
transform()129288db86ddSKarthik Bhat bool LoopInterchangeTransform::transform() {
129388db86ddSKarthik Bhat bool Transformed = false;
129488db86ddSKarthik Bhat
1295dd40f5e7SEugene Zelenko if (InnerLoop->getSubLoops().empty()) {
129688db86ddSKarthik Bhat BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1297e4961218SFlorian Hahn LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n");
1298fa6a2876SCongzhe Cao auto &InductionPHIs = LIL.getInnerLoopInductions();
1299fa6a2876SCongzhe Cao if (InductionPHIs.empty()) {
1300d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
130188db86ddSKarthik Bhat return false;
130288db86ddSKarthik Bhat }
130388db86ddSKarthik Bhat
1304fa6a2876SCongzhe Cao SmallVector<Instruction *, 8> InnerIndexVarList;
1305fa6a2876SCongzhe Cao for (PHINode *CurInductionPHI : InductionPHIs) {
1306fa6a2876SCongzhe Cao if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1307fa6a2876SCongzhe Cao InnerIndexVarList.push_back(
1308fa6a2876SCongzhe Cao dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1)));
130988db86ddSKarthik Bhat else
1310fa6a2876SCongzhe Cao InnerIndexVarList.push_back(
1311fa6a2876SCongzhe Cao dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0)));
1312fa6a2876SCongzhe Cao }
1313907b60fbSDavid Green
1314e4961218SFlorian Hahn // Create a new latch block for the inner loop. We split at the
1315e4961218SFlorian Hahn // current latch's terminator and then move the condition and all
1316e4961218SFlorian Hahn // operands that are not either loop-invariant or the induction PHI into the
1317e4961218SFlorian Hahn // new latch block.
1318e4961218SFlorian Hahn BasicBlock *NewLatch =
1319e4961218SFlorian Hahn SplitBlock(InnerLoop->getLoopLatch(),
1320e4961218SFlorian Hahn InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
1321e4961218SFlorian Hahn
1322e4961218SFlorian Hahn SmallSetVector<Instruction *, 4> WorkList;
1323e4961218SFlorian Hahn unsigned i = 0;
1324fa6a2876SCongzhe Cao auto MoveInstructions = [&i, &WorkList, this, &InductionPHIs, NewLatch]() {
1325e4961218SFlorian Hahn for (; i < WorkList.size(); i++) {
1326e4961218SFlorian Hahn // Duplicate instruction and move it the new latch. Update uses that
1327e4961218SFlorian Hahn // have been moved.
1328e4961218SFlorian Hahn Instruction *NewI = WorkList[i]->clone();
1329e4961218SFlorian Hahn NewI->insertBefore(NewLatch->getFirstNonPHI());
1330e4961218SFlorian Hahn assert(!NewI->mayHaveSideEffects() &&
1331e4961218SFlorian Hahn "Moving instructions with side-effects may change behavior of "
1332e4961218SFlorian Hahn "the loop nest!");
13335fc9e309SKazu Hirata for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) {
1334e4961218SFlorian Hahn Instruction *UserI = cast<Instruction>(U.getUser());
1335e4961218SFlorian Hahn if (!InnerLoop->contains(UserI->getParent()) ||
1336fa6a2876SCongzhe Cao UserI->getParent() == NewLatch ||
1337fa6a2876SCongzhe Cao llvm::is_contained(InductionPHIs, UserI))
1338e4961218SFlorian Hahn U.set(NewI);
1339e4961218SFlorian Hahn }
1340e4961218SFlorian Hahn // Add operands of moved instruction to the worklist, except if they are
1341e4961218SFlorian Hahn // outside the inner loop or are the induction PHI.
1342e4961218SFlorian Hahn for (Value *Op : WorkList[i]->operands()) {
1343e4961218SFlorian Hahn Instruction *OpI = dyn_cast<Instruction>(Op);
1344e4961218SFlorian Hahn if (!OpI ||
1345e4961218SFlorian Hahn this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
1346fa6a2876SCongzhe Cao llvm::is_contained(InductionPHIs, OpI))
1347e4961218SFlorian Hahn continue;
1348e4961218SFlorian Hahn WorkList.insert(OpI);
1349e4961218SFlorian Hahn }
1350e4961218SFlorian Hahn }
1351e4961218SFlorian Hahn };
1352e4961218SFlorian Hahn
1353e4961218SFlorian Hahn // FIXME: Should we interchange when we have a constant condition?
1354e4961218SFlorian Hahn Instruction *CondI = dyn_cast<Instruction>(
1355e4961218SFlorian Hahn cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator())
1356e4961218SFlorian Hahn ->getCondition());
1357e4961218SFlorian Hahn if (CondI)
1358e4961218SFlorian Hahn WorkList.insert(CondI);
1359e4961218SFlorian Hahn MoveInstructions();
1360fa6a2876SCongzhe Cao for (Instruction *InnerIndexVar : InnerIndexVarList)
1361e4961218SFlorian Hahn WorkList.insert(cast<Instruction>(InnerIndexVar));
1362e4961218SFlorian Hahn MoveInstructions();
136388db86ddSKarthik Bhat
1364df005cbeSBenjamin Kramer // Splits the inner loops phi nodes out into a separate basic block.
1365d8fcf0deSFlorian Hahn BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1366d8fcf0deSFlorian Hahn SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
1367d8fcf0deSFlorian Hahn LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
136888db86ddSKarthik Bhat }
136988db86ddSKarthik Bhat
13708393b9fdSFlorian Hahn // Instructions in the original inner loop preheader may depend on values
13718393b9fdSFlorian Hahn // defined in the outer loop header. Move them there, because the original
13728393b9fdSFlorian Hahn // inner loop preheader will become the entry into the interchanged loop nest.
13738393b9fdSFlorian Hahn // Currently we move all instructions and rely on LICM to move invariant
13748393b9fdSFlorian Hahn // instructions outside the loop nest.
13758393b9fdSFlorian Hahn BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
13768393b9fdSFlorian Hahn BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
13778393b9fdSFlorian Hahn if (InnerLoopPreHeader != OuterLoopHeader) {
13788393b9fdSFlorian Hahn SmallPtrSet<Instruction *, 4> NeedsMoving;
13798393b9fdSFlorian Hahn for (Instruction &I :
13808393b9fdSFlorian Hahn make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
13818393b9fdSFlorian Hahn std::prev(InnerLoopPreHeader->end()))))
13828393b9fdSFlorian Hahn I.moveBefore(OuterLoopHeader->getTerminator());
13838393b9fdSFlorian Hahn }
13848393b9fdSFlorian Hahn
138588db86ddSKarthik Bhat Transformed |= adjustLoopLinks();
138688db86ddSKarthik Bhat if (!Transformed) {
1387d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");
138888db86ddSKarthik Bhat return false;
138988db86ddSKarthik Bhat }
139088db86ddSKarthik Bhat
139188db86ddSKarthik Bhat return true;
139288db86ddSKarthik Bhat }
139388db86ddSKarthik Bhat
1394d8fcf0deSFlorian Hahn /// \brief Move all instructions except the terminator from FromBB right before
139579442920SBenjamin Kramer /// InsertBefore
moveBBContents(BasicBlock * FromBB,Instruction * InsertBefore)139679442920SBenjamin Kramer static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
139779442920SBenjamin Kramer auto &ToList = InsertBefore->getParent()->getInstList();
139879442920SBenjamin Kramer auto &FromList = FromBB->getInstList();
139979442920SBenjamin Kramer
1400be4d8cbaSDuncan P. N. Exon Smith ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
1401be4d8cbaSDuncan P. N. Exon Smith FromBB->getTerminator()->getIterator());
140288db86ddSKarthik Bhat }
140388db86ddSKarthik Bhat
1404f71abec6SAlexey Zhikhartsev /// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
swapBBContents(BasicBlock * BB1,BasicBlock * BB2)1405f71abec6SAlexey Zhikhartsev static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) {
1406f71abec6SAlexey Zhikhartsev // Save all non-terminator instructions of BB1 into TempInstrs and unlink them
1407f71abec6SAlexey Zhikhartsev // from BB1 afterwards.
1408f71abec6SAlexey Zhikhartsev auto Iter = map_range(*BB1, [](Instruction &I) { return &I; });
1409f71abec6SAlexey Zhikhartsev SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end()));
1410f71abec6SAlexey Zhikhartsev for (Instruction *I : TempInstrs)
1411f71abec6SAlexey Zhikhartsev I->removeFromParent();
1412f71abec6SAlexey Zhikhartsev
1413f71abec6SAlexey Zhikhartsev // Move instructions from BB2 to BB1.
1414f71abec6SAlexey Zhikhartsev moveBBContents(BB2, BB1->getTerminator());
1415f71abec6SAlexey Zhikhartsev
1416f71abec6SAlexey Zhikhartsev // Move instructions from TempInstrs to BB2.
1417f71abec6SAlexey Zhikhartsev for (Instruction *I : TempInstrs)
1418f71abec6SAlexey Zhikhartsev I->insertBefore(BB2->getTerminator());
1419f71abec6SAlexey Zhikhartsev }
1420f71abec6SAlexey Zhikhartsev
14219a432161SFlorian Hahn // Update BI to jump to NewBB instead of OldBB. Records updates to the
14229a432161SFlorian Hahn // dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
14239a432161SFlorian Hahn // \p OldBB is exactly once in BI's successor list.
updateSuccessor(BranchInst * BI,BasicBlock * OldBB,BasicBlock * NewBB,std::vector<DominatorTree::UpdateType> & DTUpdates,bool MustUpdateOnce=true)1424c6296feaSFlorian Hahn static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
1425c6296feaSFlorian Hahn BasicBlock *NewBB,
14269a432161SFlorian Hahn std::vector<DominatorTree::UpdateType> &DTUpdates,
14279a432161SFlorian Hahn bool MustUpdateOnce = true) {
14289a432161SFlorian Hahn assert((!MustUpdateOnce ||
14299a432161SFlorian Hahn llvm::count_if(successors(BI),
14309a432161SFlorian Hahn [OldBB](BasicBlock *BB) {
14319a432161SFlorian Hahn return BB == OldBB;
14329a432161SFlorian Hahn }) == 1) && "BI must jump to OldBB exactly once.");
14339a432161SFlorian Hahn bool Changed = false;
14349a432161SFlorian Hahn for (Use &Op : BI->operands())
14359a432161SFlorian Hahn if (Op == OldBB) {
14369a432161SFlorian Hahn Op.set(NewBB);
14379a432161SFlorian Hahn Changed = true;
14389a432161SFlorian Hahn }
1439c6296feaSFlorian Hahn
14409a432161SFlorian Hahn if (Changed) {
1441c6296feaSFlorian Hahn DTUpdates.push_back(
1442c6296feaSFlorian Hahn {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
1443c6296feaSFlorian Hahn DTUpdates.push_back(
1444c6296feaSFlorian Hahn {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
1445c6296feaSFlorian Hahn }
14469a432161SFlorian Hahn assert(Changed && "Expected a successor to be updated");
1447c6296feaSFlorian Hahn }
1448c6296feaSFlorian Hahn
14496feb6371SFlorian Hahn // Move Lcssa PHIs to the right place.
moveLCSSAPhis(BasicBlock * InnerExit,BasicBlock * InnerHeader,BasicBlock * InnerLatch,BasicBlock * OuterHeader,BasicBlock * OuterLatch,BasicBlock * OuterExit,Loop * InnerLoop,LoopInfo * LI)145011b2f4feSFlorian Hahn static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
145111b2f4feSFlorian Hahn BasicBlock *InnerLatch, BasicBlock *OuterHeader,
14521ee93240SFlorian Hahn BasicBlock *OuterLatch, BasicBlock *OuterExit,
14531ee93240SFlorian Hahn Loop *InnerLoop, LoopInfo *LI) {
145411b2f4feSFlorian Hahn
145511b2f4feSFlorian Hahn // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
145611b2f4feSFlorian Hahn // defined either in the header or latch. Those blocks will become header and
145711b2f4feSFlorian Hahn // latch of the new outer loop, and the only possible users can PHI nodes
145811b2f4feSFlorian Hahn // in the exit block of the loop nest or the outer loop header (reduction
145911b2f4feSFlorian Hahn // PHIs, in that case, the incoming value must be defined in the inner loop
146011b2f4feSFlorian Hahn // header). We can just substitute the user with the incoming value and remove
146111b2f4feSFlorian Hahn // the PHI.
146211b2f4feSFlorian Hahn for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
146311b2f4feSFlorian Hahn assert(P.getNumIncomingValues() == 1 &&
146411b2f4feSFlorian Hahn "Only loops with a single exit are supported!");
146511b2f4feSFlorian Hahn
146611b2f4feSFlorian Hahn // Incoming values are guaranteed be instructions currently.
146711b2f4feSFlorian Hahn auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
1468eac34875SCongzhe Cao // In case of multi-level nested loops, follow LCSSA to find the incoming
1469eac34875SCongzhe Cao // value defined from the innermost loop.
1470eac34875SCongzhe Cao auto IncIInnerMost = cast<Instruction>(followLCSSA(IncI));
147111b2f4feSFlorian Hahn // Skip phis with incoming values from the inner loop body, excluding the
147211b2f4feSFlorian Hahn // header and latch.
1473eac34875SCongzhe Cao if (IncIInnerMost->getParent() != InnerLatch &&
1474eac34875SCongzhe Cao IncIInnerMost->getParent() != InnerHeader)
147511b2f4feSFlorian Hahn continue;
147611b2f4feSFlorian Hahn
147711b2f4feSFlorian Hahn assert(all_of(P.users(),
147811b2f4feSFlorian Hahn [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {
147911b2f4feSFlorian Hahn return (cast<PHINode>(U)->getParent() == OuterHeader &&
148011b2f4feSFlorian Hahn IncI->getParent() == InnerHeader) ||
148111b2f4feSFlorian Hahn cast<PHINode>(U)->getParent() == OuterExit;
148211b2f4feSFlorian Hahn }) &&
148311b2f4feSFlorian Hahn "Can only replace phis iff the uses are in the loop nest exit or "
148411b2f4feSFlorian Hahn "the incoming value is defined in the inner header (it will "
148511b2f4feSFlorian Hahn "dominate all loop blocks after interchanging)");
148611b2f4feSFlorian Hahn P.replaceAllUsesWith(IncI);
148711b2f4feSFlorian Hahn P.eraseFromParent();
148811b2f4feSFlorian Hahn }
148911b2f4feSFlorian Hahn
14906feb6371SFlorian Hahn SmallVector<PHINode *, 8> LcssaInnerExit;
14916feb6371SFlorian Hahn for (PHINode &P : InnerExit->phis())
14926feb6371SFlorian Hahn LcssaInnerExit.push_back(&P);
14936feb6371SFlorian Hahn
14946feb6371SFlorian Hahn SmallVector<PHINode *, 8> LcssaInnerLatch;
14956feb6371SFlorian Hahn for (PHINode &P : InnerLatch->phis())
14966feb6371SFlorian Hahn LcssaInnerLatch.push_back(&P);
14976feb6371SFlorian Hahn
14986feb6371SFlorian Hahn // Lcssa PHIs for values used outside the inner loop are in InnerExit.
14996feb6371SFlorian Hahn // If a PHI node has users outside of InnerExit, it has a use outside the
15006feb6371SFlorian Hahn // interchanged loop and we have to preserve it. We move these to
15016feb6371SFlorian Hahn // InnerLatch, which will become the new exit block for the innermost
150211b2f4feSFlorian Hahn // loop after interchanging.
150311b2f4feSFlorian Hahn for (PHINode *P : LcssaInnerExit)
15046feb6371SFlorian Hahn P->moveBefore(InnerLatch->getFirstNonPHI());
15056feb6371SFlorian Hahn
15066feb6371SFlorian Hahn // If the inner loop latch contains LCSSA PHIs, those come from a child loop
15076feb6371SFlorian Hahn // and we have to move them to the new inner latch.
15086feb6371SFlorian Hahn for (PHINode *P : LcssaInnerLatch)
15096feb6371SFlorian Hahn P->moveBefore(InnerExit->getFirstNonPHI());
15106feb6371SFlorian Hahn
151111b2f4feSFlorian Hahn // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
15121ee93240SFlorian Hahn // incoming values defined in the outer loop, we have to add a new PHI
151311b2f4feSFlorian Hahn // in the inner loop latch, which became the exit block of the outer loop,
151411b2f4feSFlorian Hahn // after interchanging.
151511b2f4feSFlorian Hahn if (OuterExit) {
151611b2f4feSFlorian Hahn for (PHINode &P : OuterExit->phis()) {
151711b2f4feSFlorian Hahn if (P.getNumIncomingValues() != 1)
151811b2f4feSFlorian Hahn continue;
15191ee93240SFlorian Hahn // Skip Phis with incoming values defined in the inner loop. Those should
152011b2f4feSFlorian Hahn // already have been updated.
152111b2f4feSFlorian Hahn auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
15221ee93240SFlorian Hahn if (!I || LI->getLoopFor(I->getParent()) == InnerLoop)
152311b2f4feSFlorian Hahn continue;
152411b2f4feSFlorian Hahn
152511b2f4feSFlorian Hahn PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
152611b2f4feSFlorian Hahn NewPhi->setIncomingValue(0, P.getIncomingValue(0));
152711b2f4feSFlorian Hahn NewPhi->setIncomingBlock(0, OuterLatch);
15283f8be15fSCongzhe Cao // We might have incoming edges from other BBs, i.e., the original outer
15293f8be15fSCongzhe Cao // header.
15303f8be15fSCongzhe Cao for (auto *Pred : predecessors(InnerLatch)) {
15313f8be15fSCongzhe Cao if (Pred == OuterLatch)
15323f8be15fSCongzhe Cao continue;
15333f8be15fSCongzhe Cao NewPhi->addIncoming(P.getIncomingValue(0), Pred);
15343f8be15fSCongzhe Cao }
153511b2f4feSFlorian Hahn NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
153611b2f4feSFlorian Hahn P.setIncomingValue(0, NewPhi);
153711b2f4feSFlorian Hahn }
153811b2f4feSFlorian Hahn }
153911b2f4feSFlorian Hahn
15406feb6371SFlorian Hahn // Now adjust the incoming blocks for the LCSSA PHIs.
15416feb6371SFlorian Hahn // For PHIs moved from Inner's exit block, we need to replace Inner's latch
15426feb6371SFlorian Hahn // with the new latch.
15431a1b9221SRoman Lebedev InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
15446feb6371SFlorian Hahn }
15456feb6371SFlorian Hahn
adjustLoopBranches()154688db86ddSKarthik Bhat bool LoopInterchangeTransform::adjustLoopBranches() {
1547d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
1548c6296feaSFlorian Hahn std::vector<DominatorTree::UpdateType> DTUpdates;
1549c6296feaSFlorian Hahn
1550236f6febSFlorian Hahn BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1551236f6febSFlorian Hahn BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1552236f6febSFlorian Hahn
1553236f6febSFlorian Hahn assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
1554236f6febSFlorian Hahn InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&
1555236f6febSFlorian Hahn InnerLoopPreHeader && "Guaranteed by loop-simplify form");
1556236f6febSFlorian Hahn // Ensure that both preheaders do not contain PHI nodes and have single
1557236f6febSFlorian Hahn // predecessors. This allows us to move them easily. We use
1558236f6febSFlorian Hahn // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1559236f6febSFlorian Hahn // preheaders do not satisfy those conditions.
1560236f6febSFlorian Hahn if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1561236f6febSFlorian Hahn !OuterLoopPreHeader->getUniquePredecessor())
1562f31eba64SAlina Sbirlea OuterLoopPreHeader =
1563f31eba64SAlina Sbirlea InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
1564236f6febSFlorian Hahn if (InnerLoopPreHeader == OuterLoop->getHeader())
1565f31eba64SAlina Sbirlea InnerLoopPreHeader =
1566f31eba64SAlina Sbirlea InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
1567236f6febSFlorian Hahn
156888db86ddSKarthik Bhat // Adjust the loop preheader
156988db86ddSKarthik Bhat BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
157088db86ddSKarthik Bhat BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
157188db86ddSKarthik Bhat BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
157288db86ddSKarthik Bhat BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
157388db86ddSKarthik Bhat BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
157488db86ddSKarthik Bhat BasicBlock *InnerLoopLatchPredecessor =
157588db86ddSKarthik Bhat InnerLoopLatch->getUniquePredecessor();
157688db86ddSKarthik Bhat BasicBlock *InnerLoopLatchSuccessor;
157788db86ddSKarthik Bhat BasicBlock *OuterLoopLatchSuccessor;
157888db86ddSKarthik Bhat
157988db86ddSKarthik Bhat BranchInst *OuterLoopLatchBI =
158088db86ddSKarthik Bhat dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
158188db86ddSKarthik Bhat BranchInst *InnerLoopLatchBI =
158288db86ddSKarthik Bhat dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
158388db86ddSKarthik Bhat BranchInst *OuterLoopHeaderBI =
158488db86ddSKarthik Bhat dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
158588db86ddSKarthik Bhat BranchInst *InnerLoopHeaderBI =
158688db86ddSKarthik Bhat dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
158788db86ddSKarthik Bhat
158888db86ddSKarthik Bhat if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
158988db86ddSKarthik Bhat !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
159088db86ddSKarthik Bhat !InnerLoopHeaderBI)
159188db86ddSKarthik Bhat return false;
159288db86ddSKarthik Bhat
159388db86ddSKarthik Bhat BranchInst *InnerLoopLatchPredecessorBI =
159488db86ddSKarthik Bhat dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
159588db86ddSKarthik Bhat BranchInst *OuterLoopPredecessorBI =
159688db86ddSKarthik Bhat dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
159788db86ddSKarthik Bhat
159888db86ddSKarthik Bhat if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
159988db86ddSKarthik Bhat return false;
1600df005cbeSBenjamin Kramer BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1601df005cbeSBenjamin Kramer if (!InnerLoopHeaderSuccessor)
160288db86ddSKarthik Bhat return false;
160388db86ddSKarthik Bhat
16049a432161SFlorian Hahn // Adjust Loop Preheader and headers.
16059a432161SFlorian Hahn // The branches in the outer loop predecessor and the outer loop header can
16069a432161SFlorian Hahn // be unconditional branches or conditional branches with duplicates. Consider
16079a432161SFlorian Hahn // this when updating the successors.
1608c6296feaSFlorian Hahn updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
16099a432161SFlorian Hahn InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
16109a432161SFlorian Hahn // The outer loop header might or might not branch to the outer latch.
16119a432161SFlorian Hahn // We are guaranteed to branch to the inner loop preheader.
1612ce2db900SCongzhe Cao if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) {
1613ce2db900SCongzhe Cao // In this case the outerLoopHeader should branch to the InnerLoopLatch.
1614ce2db900SCongzhe Cao updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,
1615ce2db900SCongzhe Cao DTUpdates,
16169a432161SFlorian Hahn /*MustUpdateOnce=*/false);
1617ce2db900SCongzhe Cao }
1618c6296feaSFlorian Hahn updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
16199a432161SFlorian Hahn InnerLoopHeaderSuccessor, DTUpdates,
16209a432161SFlorian Hahn /*MustUpdateOnce=*/false);
162188db86ddSKarthik Bhat
16228210fdf2SKarthik Bhat // Adjust reduction PHI's now that the incoming block has changed.
16231a1b9221SRoman Lebedev InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
16248210fdf2SKarthik Bhat OuterLoopHeader);
16258210fdf2SKarthik Bhat
1626c6296feaSFlorian Hahn updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
1627c6296feaSFlorian Hahn OuterLoopPreHeader, DTUpdates);
162888db86ddSKarthik Bhat
162988db86ddSKarthik Bhat // -------------Adjust loop latches-----------
163088db86ddSKarthik Bhat if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
163188db86ddSKarthik Bhat InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
163288db86ddSKarthik Bhat else
163388db86ddSKarthik Bhat InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
163488db86ddSKarthik Bhat
1635c6296feaSFlorian Hahn updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
1636c6296feaSFlorian Hahn InnerLoopLatchSuccessor, DTUpdates);
163788db86ddSKarthik Bhat
163888db86ddSKarthik Bhat if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
163988db86ddSKarthik Bhat OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
164088db86ddSKarthik Bhat else
164188db86ddSKarthik Bhat OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
164288db86ddSKarthik Bhat
1643c6296feaSFlorian Hahn updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
1644c6296feaSFlorian Hahn OuterLoopLatchSuccessor, DTUpdates);
1645c6296feaSFlorian Hahn updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1646c6296feaSFlorian Hahn DTUpdates);
164788db86ddSKarthik Bhat
1648c6296feaSFlorian Hahn DT->applyUpdates(DTUpdates);
1649831a7577SFlorian Hahn restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1650831a7577SFlorian Hahn OuterLoopPreHeader);
1651831a7577SFlorian Hahn
165211b2f4feSFlorian Hahn moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
16531ee93240SFlorian Hahn OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
16541ee93240SFlorian Hahn InnerLoop, LI);
16556feb6371SFlorian Hahn // For PHIs in the exit block of the outer loop, outer's latch has been
16566feb6371SFlorian Hahn // replaced by Inners'.
16571a1b9221SRoman Lebedev OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
16586feb6371SFlorian Hahn
1659bfefde22SCongzhe Cao auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1660a684a994SFlorian Hahn // Now update the reduction PHIs in the inner and outer loop headers.
1661a684a994SFlorian Hahn SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1662c714da2cSKazu Hirata for (PHINode &PHI : InnerLoopHeader->phis())
1663c714da2cSKazu Hirata if (OuterInnerReductions.contains(&PHI))
1664fa6a2876SCongzhe Cao InnerLoopPHIs.push_back(&PHI);
1665fa6a2876SCongzhe Cao
1666c714da2cSKazu Hirata for (PHINode &PHI : OuterLoopHeader->phis())
1667c714da2cSKazu Hirata if (OuterInnerReductions.contains(&PHI))
166837e34b74SCongzhe Cao OuterLoopPHIs.push_back(&PHI);
1669a684a994SFlorian Hahn
1670a684a994SFlorian Hahn // Now move the remaining reduction PHIs from outer to inner loop header and
1671a684a994SFlorian Hahn // vice versa. The PHI nodes must be part of a reduction across the inner and
1672a684a994SFlorian Hahn // outer loop and all the remains to do is and updating the incoming blocks.
1673a684a994SFlorian Hahn for (PHINode *PHI : OuterLoopPHIs) {
167437e34b74SCongzhe Cao LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump(););
1675a684a994SFlorian Hahn PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
16763badd17bSBenjamin Kramer assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
1677a684a994SFlorian Hahn }
1678a684a994SFlorian Hahn for (PHINode *PHI : InnerLoopPHIs) {
1679fa6a2876SCongzhe Cao LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI->dump(););
1680a684a994SFlorian Hahn PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
16813badd17bSBenjamin Kramer assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
1682a684a994SFlorian Hahn }
1683d8fcf0deSFlorian Hahn
1684d8fcf0deSFlorian Hahn // Update the incoming blocks for moved PHI nodes.
16851a1b9221SRoman Lebedev OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
16861a1b9221SRoman Lebedev OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
16871a1b9221SRoman Lebedev InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
16881a1b9221SRoman Lebedev InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1689d8fcf0deSFlorian Hahn
169054cb552bSFlorian Hahn // Values defined in the outer loop header could be used in the inner loop
169154cb552bSFlorian Hahn // latch. In that case, we need to create LCSSA phis for them, because after
169254cb552bSFlorian Hahn // interchanging they will be defined in the new inner loop and used in the
169354cb552bSFlorian Hahn // new outer loop.
169454cb552bSFlorian Hahn IRBuilder<> Builder(OuterLoopHeader->getContext());
169554cb552bSFlorian Hahn SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
169654cb552bSFlorian Hahn for (Instruction &I :
169754cb552bSFlorian Hahn make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
169854cb552bSFlorian Hahn MayNeedLCSSAPhis.push_back(&I);
169954cb552bSFlorian Hahn formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE, Builder);
170054cb552bSFlorian Hahn
170188db86ddSKarthik Bhat return true;
170288db86ddSKarthik Bhat }
170388db86ddSKarthik Bhat
adjustLoopLinks()170488db86ddSKarthik Bhat bool LoopInterchangeTransform::adjustLoopLinks() {
170588db86ddSKarthik Bhat // Adjust all branches in the inner and outer loop.
170688db86ddSKarthik Bhat bool Changed = adjustLoopBranches();
1707f71abec6SAlexey Zhikhartsev if (Changed) {
1708f71abec6SAlexey Zhikhartsev // We have interchanged the preheaders so we need to interchange the data in
1709f71abec6SAlexey Zhikhartsev // the preheaders as well. This is because the content of the inner
1710f71abec6SAlexey Zhikhartsev // preheader was previously executed inside the outer loop.
1711f71abec6SAlexey Zhikhartsev BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1712f71abec6SAlexey Zhikhartsev BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1713f71abec6SAlexey Zhikhartsev swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);
1714f71abec6SAlexey Zhikhartsev }
171588db86ddSKarthik Bhat return Changed;
171688db86ddSKarthik Bhat }
171788db86ddSKarthik Bhat
17189b8b1645SBenjamin Kramer namespace {
17199c21c6c9SArthur Eubanks /// Main LoopInterchange Pass.
17209c21c6c9SArthur Eubanks struct LoopInterchangeLegacyPass : public LoopPass {
17219c21c6c9SArthur Eubanks static char ID;
1722dd40f5e7SEugene Zelenko
LoopInterchangeLegacyPass__anon401b65a51611::LoopInterchangeLegacyPass17239c21c6c9SArthur Eubanks LoopInterchangeLegacyPass() : LoopPass(ID) {
17249c21c6c9SArthur Eubanks initializeLoopInterchangeLegacyPassPass(*PassRegistry::getPassRegistry());
17259c21c6c9SArthur Eubanks }
17269c21c6c9SArthur Eubanks
getAnalysisUsage__anon401b65a51611::LoopInterchangeLegacyPass17279c21c6c9SArthur Eubanks void getAnalysisUsage(AnalysisUsage &AU) const override {
17289c21c6c9SArthur Eubanks AU.addRequired<DependenceAnalysisWrapperPass>();
17299c21c6c9SArthur Eubanks AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
17309c21c6c9SArthur Eubanks
17319c21c6c9SArthur Eubanks getLoopAnalysisUsage(AU);
17329c21c6c9SArthur Eubanks }
17339c21c6c9SArthur Eubanks
runOnLoop__anon401b65a51611::LoopInterchangeLegacyPass17349c21c6c9SArthur Eubanks bool runOnLoop(Loop *L, LPPassManager &LPM) override {
17359c21c6c9SArthur Eubanks if (skipLoop(L))
17369c21c6c9SArthur Eubanks return false;
17379c21c6c9SArthur Eubanks
17389c21c6c9SArthur Eubanks auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
17399c21c6c9SArthur Eubanks auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
17409c21c6c9SArthur Eubanks auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
17419c21c6c9SArthur Eubanks auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
17429c21c6c9SArthur Eubanks auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1743*b941857bSCongzhe Cao std::unique_ptr<CacheCost> CC = nullptr;
1744*b941857bSCongzhe Cao return LoopInterchange(SE, LI, DI, DT, CC, ORE).run(L);
17459c21c6c9SArthur Eubanks }
17469c21c6c9SArthur Eubanks };
17479b8b1645SBenjamin Kramer } // namespace
17489c21c6c9SArthur Eubanks
17499c21c6c9SArthur Eubanks char LoopInterchangeLegacyPass::ID = 0;
17509c21c6c9SArthur Eubanks
17519c21c6c9SArthur Eubanks INITIALIZE_PASS_BEGIN(LoopInterchangeLegacyPass, "loop-interchange",
175288db86ddSKarthik Bhat "Interchanges loops for cache reuse", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)17538600fee5SFlorian Hahn INITIALIZE_PASS_DEPENDENCY(LoopPass)
175449c22190SChandler Carruth INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
1755ad993521SFlorian Hahn INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
175688db86ddSKarthik Bhat
17579c21c6c9SArthur Eubanks INITIALIZE_PASS_END(LoopInterchangeLegacyPass, "loop-interchange",
175888db86ddSKarthik Bhat "Interchanges loops for cache reuse", false, false)
175988db86ddSKarthik Bhat
17609c21c6c9SArthur Eubanks Pass *llvm::createLoopInterchangePass() {
17619c21c6c9SArthur Eubanks return new LoopInterchangeLegacyPass();
17629c21c6c9SArthur Eubanks }
17639c21c6c9SArthur Eubanks
run(LoopNest & LN,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR,LPMUpdater & U)17640eeaec2aSTa-Wei Tu PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
17650eeaec2aSTa-Wei Tu LoopAnalysisManager &AM,
17669c21c6c9SArthur Eubanks LoopStandardAnalysisResults &AR,
17679c21c6c9SArthur Eubanks LPMUpdater &U) {
17680eeaec2aSTa-Wei Tu Function &F = *LN.getParent();
17699c21c6c9SArthur Eubanks
17709c21c6c9SArthur Eubanks DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
1771*b941857bSCongzhe Cao std::unique_ptr<CacheCost> CC =
1772*b941857bSCongzhe Cao CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
17739c21c6c9SArthur Eubanks OptimizationRemarkEmitter ORE(&F);
1774*b941857bSCongzhe Cao if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
17759c21c6c9SArthur Eubanks return PreservedAnalyses::all();
17769c21c6c9SArthur Eubanks return getLoopPassPreservedAnalyses();
17779c21c6c9SArthur Eubanks }
1778