12cab237bSDimitry Andric //===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
2ff0cc061SDimitry Andric //
3ff0cc061SDimitry Andric // The LLVM Compiler Infrastructure
4ff0cc061SDimitry Andric //
5ff0cc061SDimitry Andric // This file is distributed under the University of Illinois Open Source
6ff0cc061SDimitry Andric // License. See LICENSE.TXT for details.
7ff0cc061SDimitry Andric //
8ff0cc061SDimitry Andric //===----------------------------------------------------------------------===//
9ff0cc061SDimitry Andric //
10ff0cc061SDimitry Andric // This Pass handles loop interchange transform.
11ff0cc061SDimitry Andric // This pass interchanges loops to provide a more cache-friendly memory access
12ff0cc061SDimitry Andric // patterns.
13ff0cc061SDimitry Andric //
14ff0cc061SDimitry Andric //===----------------------------------------------------------------------===//
15ff0cc061SDimitry Andric
162cab237bSDimitry Andric #include "llvm/ADT/STLExtras.h"
17ff0cc061SDimitry Andric #include "llvm/ADT/SmallVector.h"
184ba319b5SDimitry Andric #include "llvm/ADT/Statistic.h"
192cab237bSDimitry Andric #include "llvm/ADT/StringRef.h"
20ff0cc061SDimitry Andric #include "llvm/Analysis/DependenceAnalysis.h"
21ff0cc061SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
22*b5893f02SDimitry Andric #include "llvm/Analysis/LoopPass.h"
232cab237bSDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
24ff0cc061SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
25ff0cc061SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h"
262cab237bSDimitry Andric #include "llvm/IR/BasicBlock.h"
272cab237bSDimitry Andric #include "llvm/IR/Constants.h"
282cab237bSDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
29ff0cc061SDimitry Andric #include "llvm/IR/Dominators.h"
30ff0cc061SDimitry Andric #include "llvm/IR/Function.h"
312cab237bSDimitry Andric #include "llvm/IR/InstrTypes.h"
322cab237bSDimitry Andric #include "llvm/IR/Instruction.h"
332cab237bSDimitry Andric #include "llvm/IR/Instructions.h"
342cab237bSDimitry Andric #include "llvm/IR/Type.h"
352cab237bSDimitry Andric #include "llvm/IR/User.h"
362cab237bSDimitry Andric #include "llvm/IR/Value.h"
37ff0cc061SDimitry Andric #include "llvm/Pass.h"
382cab237bSDimitry Andric #include "llvm/Support/Casting.h"
392cab237bSDimitry Andric #include "llvm/Support/CommandLine.h"
40ff0cc061SDimitry Andric #include "llvm/Support/Debug.h"
412cab237bSDimitry Andric #include "llvm/Support/ErrorHandling.h"
42ff0cc061SDimitry Andric #include "llvm/Support/raw_ostream.h"
43ff0cc061SDimitry Andric #include "llvm/Transforms/Scalar.h"
444ba319b5SDimitry Andric #include "llvm/Transforms/Utils.h"
45ff0cc061SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
46ff0cc061SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
472cab237bSDimitry Andric #include <cassert>
482cab237bSDimitry Andric #include <utility>
492cab237bSDimitry Andric #include <vector>
507a7e6055SDimitry Andric
51ff0cc061SDimitry Andric using namespace llvm;
52ff0cc061SDimitry Andric
53ff0cc061SDimitry Andric #define DEBUG_TYPE "loop-interchange"
54ff0cc061SDimitry Andric
554ba319b5SDimitry Andric STATISTIC(LoopsInterchanged, "Number of loops interchanged");
564ba319b5SDimitry Andric
57d88c1a5aSDimitry Andric static cl::opt<int> LoopInterchangeCostThreshold(
58d88c1a5aSDimitry Andric "loop-interchange-threshold", cl::init(0), cl::Hidden,
59d88c1a5aSDimitry Andric cl::desc("Interchange if you gain more than this number"));
60d88c1a5aSDimitry Andric
61ff0cc061SDimitry Andric namespace {
62ff0cc061SDimitry Andric
632cab237bSDimitry Andric using LoopVector = SmallVector<Loop *, 8>;
64ff0cc061SDimitry Andric
65ff0cc061SDimitry Andric // TODO: Check if we can use a sparse matrix here.
662cab237bSDimitry Andric using CharMatrix = std::vector<std::vector<char>>;
672cab237bSDimitry Andric
682cab237bSDimitry Andric } // end anonymous namespace
69ff0cc061SDimitry Andric
70ff0cc061SDimitry Andric // Maximum number of dependencies that can be handled in the dependency matrix.
71ff0cc061SDimitry Andric static const unsigned MaxMemInstrCount = 100;
72ff0cc061SDimitry Andric
73ff0cc061SDimitry Andric // Maximum loop depth supported.
74ff0cc061SDimitry Andric static const unsigned MaxLoopNestDepth = 10;
75ff0cc061SDimitry Andric
76ff0cc061SDimitry Andric #ifdef DUMP_DEP_MATRICIES
printDepMatrix(CharMatrix & DepMatrix)772cab237bSDimitry Andric static void printDepMatrix(CharMatrix &DepMatrix) {
782cab237bSDimitry Andric for (auto &Row : DepMatrix) {
792cab237bSDimitry Andric for (auto D : Row)
804ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << D << " ");
814ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "\n");
82ff0cc061SDimitry Andric }
83ff0cc061SDimitry Andric }
84ff0cc061SDimitry Andric #endif
85ff0cc061SDimitry Andric
populateDependencyMatrix(CharMatrix & DepMatrix,unsigned Level,Loop * L,DependenceInfo * DI)86ff0cc061SDimitry Andric static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
873ca95b02SDimitry Andric Loop *L, DependenceInfo *DI) {
882cab237bSDimitry Andric using ValueVector = SmallVector<Value *, 16>;
892cab237bSDimitry Andric
90ff0cc061SDimitry Andric ValueVector MemInstr;
91ff0cc061SDimitry Andric
92ff0cc061SDimitry Andric // For each block.
932cab237bSDimitry Andric for (BasicBlock *BB : L->blocks()) {
94ff0cc061SDimitry Andric // Scan the BB and collect legal loads and stores.
952cab237bSDimitry Andric for (Instruction &I : *BB) {
96d88c1a5aSDimitry Andric if (!isa<Instruction>(I))
97ff0cc061SDimitry Andric return false;
982cab237bSDimitry Andric if (auto *Ld = dyn_cast<LoadInst>(&I)) {
99d88c1a5aSDimitry Andric if (!Ld->isSimple())
100ff0cc061SDimitry Andric return false;
1012cab237bSDimitry Andric MemInstr.push_back(&I);
1022cab237bSDimitry Andric } else if (auto *St = dyn_cast<StoreInst>(&I)) {
103d88c1a5aSDimitry Andric if (!St->isSimple())
104d88c1a5aSDimitry Andric return false;
1052cab237bSDimitry Andric MemInstr.push_back(&I);
106d88c1a5aSDimitry Andric }
107ff0cc061SDimitry Andric }
108ff0cc061SDimitry Andric }
109ff0cc061SDimitry Andric
1104ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()
111ff0cc061SDimitry Andric << " Loads and Stores to analyze\n");
112ff0cc061SDimitry Andric
113ff0cc061SDimitry Andric ValueVector::iterator I, IE, J, JE;
114ff0cc061SDimitry Andric
115ff0cc061SDimitry Andric for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
116ff0cc061SDimitry Andric for (J = I, JE = MemInstr.end(); J != JE; ++J) {
117ff0cc061SDimitry Andric std::vector<char> Dep;
118d88c1a5aSDimitry Andric Instruction *Src = cast<Instruction>(*I);
119d88c1a5aSDimitry Andric Instruction *Dst = cast<Instruction>(*J);
120d88c1a5aSDimitry Andric if (Src == Dst)
121ff0cc061SDimitry Andric continue;
122d88c1a5aSDimitry Andric // Ignore Input dependencies.
123d88c1a5aSDimitry Andric if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
124ff0cc061SDimitry Andric continue;
125d88c1a5aSDimitry Andric // Track Output, Flow, and Anti dependencies.
126d88c1a5aSDimitry Andric if (auto D = DI->depends(Src, Dst, true)) {
127d88c1a5aSDimitry Andric assert(D->isOrdered() && "Expected an output, flow or anti dep.");
1284ba319b5SDimitry Andric LLVM_DEBUG(StringRef DepType =
129d88c1a5aSDimitry Andric D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
130d88c1a5aSDimitry Andric dbgs() << "Found " << DepType
131d88c1a5aSDimitry Andric << " dependency between Src and Dst\n"
132d88c1a5aSDimitry Andric << " Src:" << *Src << "\n Dst:" << *Dst << '\n');
133ff0cc061SDimitry Andric unsigned Levels = D->getLevels();
134ff0cc061SDimitry Andric char Direction;
135ff0cc061SDimitry Andric for (unsigned II = 1; II <= Levels; ++II) {
136ff0cc061SDimitry Andric const SCEV *Distance = D->getDistance(II);
137ff0cc061SDimitry Andric const SCEVConstant *SCEVConst =
138ff0cc061SDimitry Andric dyn_cast_or_null<SCEVConstant>(Distance);
139ff0cc061SDimitry Andric if (SCEVConst) {
140ff0cc061SDimitry Andric const ConstantInt *CI = SCEVConst->getValue();
141ff0cc061SDimitry Andric if (CI->isNegative())
142ff0cc061SDimitry Andric Direction = '<';
143ff0cc061SDimitry Andric else if (CI->isZero())
144ff0cc061SDimitry Andric Direction = '=';
145ff0cc061SDimitry Andric else
146ff0cc061SDimitry Andric Direction = '>';
147ff0cc061SDimitry Andric Dep.push_back(Direction);
148ff0cc061SDimitry Andric } else if (D->isScalar(II)) {
149ff0cc061SDimitry Andric Direction = 'S';
150ff0cc061SDimitry Andric Dep.push_back(Direction);
151ff0cc061SDimitry Andric } else {
152ff0cc061SDimitry Andric unsigned Dir = D->getDirection(II);
153ff0cc061SDimitry Andric if (Dir == Dependence::DVEntry::LT ||
154ff0cc061SDimitry Andric Dir == Dependence::DVEntry::LE)
155ff0cc061SDimitry Andric Direction = '<';
156ff0cc061SDimitry Andric else if (Dir == Dependence::DVEntry::GT ||
157ff0cc061SDimitry Andric Dir == Dependence::DVEntry::GE)
158ff0cc061SDimitry Andric Direction = '>';
159ff0cc061SDimitry Andric else if (Dir == Dependence::DVEntry::EQ)
160ff0cc061SDimitry Andric Direction = '=';
161ff0cc061SDimitry Andric else
162ff0cc061SDimitry Andric Direction = '*';
163ff0cc061SDimitry Andric Dep.push_back(Direction);
164ff0cc061SDimitry Andric }
165ff0cc061SDimitry Andric }
166ff0cc061SDimitry Andric while (Dep.size() != Level) {
167ff0cc061SDimitry Andric Dep.push_back('I');
168ff0cc061SDimitry Andric }
169ff0cc061SDimitry Andric
170ff0cc061SDimitry Andric DepMatrix.push_back(Dep);
171ff0cc061SDimitry Andric if (DepMatrix.size() > MaxMemInstrCount) {
1724ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
173ff0cc061SDimitry Andric << " dependencies inside loop\n");
174ff0cc061SDimitry Andric return false;
175ff0cc061SDimitry Andric }
176ff0cc061SDimitry Andric }
177ff0cc061SDimitry Andric }
178ff0cc061SDimitry Andric }
179ff0cc061SDimitry Andric
180ff0cc061SDimitry Andric return true;
181ff0cc061SDimitry Andric }
182ff0cc061SDimitry Andric
183ff0cc061SDimitry Andric // A loop is moved from index 'from' to an index 'to'. Update the Dependence
184ff0cc061SDimitry Andric // matrix by exchanging the two columns.
interChangeDependencies(CharMatrix & DepMatrix,unsigned FromIndx,unsigned ToIndx)185d88c1a5aSDimitry Andric static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
186ff0cc061SDimitry Andric unsigned ToIndx) {
187ff0cc061SDimitry Andric unsigned numRows = DepMatrix.size();
188ff0cc061SDimitry Andric for (unsigned i = 0; i < numRows; ++i) {
189ff0cc061SDimitry Andric char TmpVal = DepMatrix[i][ToIndx];
190ff0cc061SDimitry Andric DepMatrix[i][ToIndx] = DepMatrix[i][FromIndx];
191ff0cc061SDimitry Andric DepMatrix[i][FromIndx] = TmpVal;
192ff0cc061SDimitry Andric }
193ff0cc061SDimitry Andric }
194ff0cc061SDimitry Andric
195ff0cc061SDimitry Andric // Checks if outermost non '=','S'or'I' dependence in the dependence matrix is
196ff0cc061SDimitry Andric // '>'
isOuterMostDepPositive(CharMatrix & DepMatrix,unsigned Row,unsigned Column)197ff0cc061SDimitry Andric static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row,
198ff0cc061SDimitry Andric unsigned Column) {
199ff0cc061SDimitry Andric for (unsigned i = 0; i <= Column; ++i) {
200ff0cc061SDimitry Andric if (DepMatrix[Row][i] == '<')
201ff0cc061SDimitry Andric return false;
202ff0cc061SDimitry Andric if (DepMatrix[Row][i] == '>')
203ff0cc061SDimitry Andric return true;
204ff0cc061SDimitry Andric }
205ff0cc061SDimitry Andric // All dependencies were '=','S' or 'I'
206ff0cc061SDimitry Andric return false;
207ff0cc061SDimitry Andric }
208ff0cc061SDimitry Andric
209ff0cc061SDimitry Andric // Checks if no dependence exist in the dependency matrix in Row before Column.
containsNoDependence(CharMatrix & DepMatrix,unsigned Row,unsigned Column)210ff0cc061SDimitry Andric static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
211ff0cc061SDimitry Andric unsigned Column) {
212ff0cc061SDimitry Andric for (unsigned i = 0; i < Column; ++i) {
213d88c1a5aSDimitry Andric if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' &&
214ff0cc061SDimitry Andric DepMatrix[Row][i] != 'I')
215ff0cc061SDimitry Andric return false;
216ff0cc061SDimitry Andric }
217ff0cc061SDimitry Andric return true;
218ff0cc061SDimitry Andric }
219ff0cc061SDimitry Andric
validDepInterchange(CharMatrix & DepMatrix,unsigned Row,unsigned OuterLoopId,char InnerDep,char OuterDep)220ff0cc061SDimitry Andric static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
221ff0cc061SDimitry Andric unsigned OuterLoopId, char InnerDep,
222ff0cc061SDimitry Andric char OuterDep) {
223ff0cc061SDimitry Andric if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId))
224ff0cc061SDimitry Andric return false;
225ff0cc061SDimitry Andric
226ff0cc061SDimitry Andric if (InnerDep == OuterDep)
227ff0cc061SDimitry Andric return true;
228ff0cc061SDimitry Andric
229ff0cc061SDimitry Andric // It is legal to interchange if and only if after interchange no row has a
230ff0cc061SDimitry Andric // '>' direction as the leftmost non-'='.
231ff0cc061SDimitry Andric
232ff0cc061SDimitry Andric if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I')
233ff0cc061SDimitry Andric return true;
234ff0cc061SDimitry Andric
235ff0cc061SDimitry Andric if (InnerDep == '<')
236ff0cc061SDimitry Andric return true;
237ff0cc061SDimitry Andric
238ff0cc061SDimitry Andric if (InnerDep == '>') {
239ff0cc061SDimitry Andric // If OuterLoopId represents outermost loop then interchanging will make the
240ff0cc061SDimitry Andric // 1st dependency as '>'
241ff0cc061SDimitry Andric if (OuterLoopId == 0)
242ff0cc061SDimitry Andric return false;
243ff0cc061SDimitry Andric
244ff0cc061SDimitry Andric // If all dependencies before OuterloopId are '=','S'or 'I'. Then
245ff0cc061SDimitry Andric // interchanging will result in this row having an outermost non '='
246ff0cc061SDimitry Andric // dependency of '>'
247ff0cc061SDimitry Andric if (!containsNoDependence(DepMatrix, Row, OuterLoopId))
248ff0cc061SDimitry Andric return true;
249ff0cc061SDimitry Andric }
250ff0cc061SDimitry Andric
251ff0cc061SDimitry Andric return false;
252ff0cc061SDimitry Andric }
253ff0cc061SDimitry Andric
254ff0cc061SDimitry Andric // Checks if it is legal to interchange 2 loops.
255ff0cc061SDimitry Andric // [Theorem] A permutation of the loops in a perfect nest is legal if and only
256d88c1a5aSDimitry Andric // if the direction matrix, after the same permutation is applied to its
257d88c1a5aSDimitry Andric // columns, has no ">" direction as the leftmost non-"=" direction in any row.
isLegalToInterChangeLoops(CharMatrix & DepMatrix,unsigned InnerLoopId,unsigned OuterLoopId)258ff0cc061SDimitry Andric static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
259ff0cc061SDimitry Andric unsigned InnerLoopId,
260ff0cc061SDimitry Andric unsigned OuterLoopId) {
261ff0cc061SDimitry Andric unsigned NumRows = DepMatrix.size();
262ff0cc061SDimitry Andric // For each row check if it is valid to interchange.
263ff0cc061SDimitry Andric for (unsigned Row = 0; Row < NumRows; ++Row) {
264ff0cc061SDimitry Andric char InnerDep = DepMatrix[Row][InnerLoopId];
265ff0cc061SDimitry Andric char OuterDep = DepMatrix[Row][OuterLoopId];
266ff0cc061SDimitry Andric if (InnerDep == '*' || OuterDep == '*')
267ff0cc061SDimitry Andric return false;
268d88c1a5aSDimitry Andric if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep))
269ff0cc061SDimitry Andric return false;
270ff0cc061SDimitry Andric }
271ff0cc061SDimitry Andric return true;
272ff0cc061SDimitry Andric }
273ff0cc061SDimitry Andric
populateWorklist(Loop & L)274*b5893f02SDimitry Andric static LoopVector populateWorklist(Loop &L) {
2754ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
276d88c1a5aSDimitry Andric << L.getHeader()->getParent()->getName() << " Loop: %"
277d88c1a5aSDimitry Andric << L.getHeader()->getName() << '\n');
278ff0cc061SDimitry Andric LoopVector LoopList;
279ff0cc061SDimitry Andric Loop *CurrentLoop = &L;
280875ed548SDimitry Andric const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
281875ed548SDimitry Andric while (!Vec->empty()) {
282ff0cc061SDimitry Andric // The current loop has multiple subloops in it hence it is not tightly
283ff0cc061SDimitry Andric // nested.
284ff0cc061SDimitry Andric // Discard all loops above it added into Worklist.
285*b5893f02SDimitry Andric if (Vec->size() != 1)
286*b5893f02SDimitry Andric return {};
287*b5893f02SDimitry Andric
288ff0cc061SDimitry Andric LoopList.push_back(CurrentLoop);
289875ed548SDimitry Andric CurrentLoop = Vec->front();
290875ed548SDimitry Andric Vec = &CurrentLoop->getSubLoops();
291ff0cc061SDimitry Andric }
292ff0cc061SDimitry Andric LoopList.push_back(CurrentLoop);
293*b5893f02SDimitry Andric return LoopList;
294ff0cc061SDimitry Andric }
295ff0cc061SDimitry Andric
getInductionVariable(Loop * L,ScalarEvolution * SE)296ff0cc061SDimitry Andric static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) {
297ff0cc061SDimitry Andric PHINode *InnerIndexVar = L->getCanonicalInductionVariable();
298ff0cc061SDimitry Andric if (InnerIndexVar)
299ff0cc061SDimitry Andric return InnerIndexVar;
300ff0cc061SDimitry Andric if (L->getLoopLatch() == nullptr || L->getLoopPredecessor() == nullptr)
301ff0cc061SDimitry Andric return nullptr;
302ff0cc061SDimitry Andric for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
303ff0cc061SDimitry Andric PHINode *PhiVar = cast<PHINode>(I);
304ff0cc061SDimitry Andric Type *PhiTy = PhiVar->getType();
305ff0cc061SDimitry Andric if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
306ff0cc061SDimitry Andric !PhiTy->isPointerTy())
307ff0cc061SDimitry Andric return nullptr;
308ff0cc061SDimitry Andric const SCEVAddRecExpr *AddRec =
309ff0cc061SDimitry Andric dyn_cast<SCEVAddRecExpr>(SE->getSCEV(PhiVar));
310ff0cc061SDimitry Andric if (!AddRec || !AddRec->isAffine())
311ff0cc061SDimitry Andric continue;
312ff0cc061SDimitry Andric const SCEV *Step = AddRec->getStepRecurrence(*SE);
313d88c1a5aSDimitry Andric if (!isa<SCEVConstant>(Step))
314ff0cc061SDimitry Andric continue;
315ff0cc061SDimitry Andric // Found the induction variable.
316ff0cc061SDimitry Andric // FIXME: Handle loops with more than one induction variable. Note that,
317ff0cc061SDimitry Andric // currently, legality makes sure we have only one induction variable.
318ff0cc061SDimitry Andric return PhiVar;
319ff0cc061SDimitry Andric }
320ff0cc061SDimitry Andric return nullptr;
321ff0cc061SDimitry Andric }
322ff0cc061SDimitry Andric
3232cab237bSDimitry Andric namespace {
3242cab237bSDimitry Andric
325ff0cc061SDimitry Andric /// LoopInterchangeLegality checks if it is legal to interchange the loop.
326ff0cc061SDimitry Andric class LoopInterchangeLegality {
327ff0cc061SDimitry Andric public:
LoopInterchangeLegality(Loop * Outer,Loop * Inner,ScalarEvolution * SE,OptimizationRemarkEmitter * ORE)328ff0cc061SDimitry Andric LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
329b40b48b8SDimitry Andric OptimizationRemarkEmitter *ORE)
330*b5893f02SDimitry Andric : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
331ff0cc061SDimitry Andric
332ff0cc061SDimitry Andric /// Check if the loops can be interchanged.
333ff0cc061SDimitry Andric bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
334ff0cc061SDimitry Andric CharMatrix &DepMatrix);
3352cab237bSDimitry Andric
336ff0cc061SDimitry Andric /// Check if the loop structure is understood. We do not handle triangular
337ff0cc061SDimitry Andric /// loops for now.
338ff0cc061SDimitry Andric bool isLoopStructureUnderstood(PHINode *InnerInductionVar);
339ff0cc061SDimitry Andric
340ff0cc061SDimitry Andric bool currentLimitations();
341ff0cc061SDimitry Andric
getOuterInnerReductions() const342*b5893f02SDimitry Andric const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
343*b5893f02SDimitry Andric return OuterInnerReductions;
344*b5893f02SDimitry Andric }
345ff0cc061SDimitry Andric
346ff0cc061SDimitry Andric private:
347ff0cc061SDimitry Andric bool tightlyNested(Loop *Outer, Loop *Inner);
348*b5893f02SDimitry Andric bool containsUnsafeInstructions(BasicBlock *BB);
349*b5893f02SDimitry Andric
350*b5893f02SDimitry Andric /// Discover induction and reduction PHIs in the header of \p L. Induction
351*b5893f02SDimitry Andric /// PHIs are added to \p Inductions, reductions are added to
352*b5893f02SDimitry Andric /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
353*b5893f02SDimitry Andric /// to be passed as \p InnerLoop.
354ff0cc061SDimitry Andric bool findInductionAndReductions(Loop *L,
355ff0cc061SDimitry Andric SmallVector<PHINode *, 8> &Inductions,
356*b5893f02SDimitry Andric Loop *InnerLoop);
3572cab237bSDimitry Andric
358ff0cc061SDimitry Andric Loop *OuterLoop;
359ff0cc061SDimitry Andric Loop *InnerLoop;
360ff0cc061SDimitry Andric
361ff0cc061SDimitry Andric ScalarEvolution *SE;
3622cab237bSDimitry Andric
363b40b48b8SDimitry Andric /// Interface to emit optimization remarks.
364b40b48b8SDimitry Andric OptimizationRemarkEmitter *ORE;
365ff0cc061SDimitry Andric
366*b5893f02SDimitry Andric /// Set of reduction PHIs taking part of a reduction across the inner and
367*b5893f02SDimitry Andric /// outer loop.
368*b5893f02SDimitry Andric SmallPtrSet<PHINode *, 4> OuterInnerReductions;
369ff0cc061SDimitry Andric };
370ff0cc061SDimitry Andric
371ff0cc061SDimitry Andric /// LoopInterchangeProfitability checks if it is profitable to interchange the
372ff0cc061SDimitry Andric /// loop.
373ff0cc061SDimitry Andric class LoopInterchangeProfitability {
374ff0cc061SDimitry Andric public:
LoopInterchangeProfitability(Loop * Outer,Loop * Inner,ScalarEvolution * SE,OptimizationRemarkEmitter * ORE)375b40b48b8SDimitry Andric LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
376b40b48b8SDimitry Andric OptimizationRemarkEmitter *ORE)
377b40b48b8SDimitry Andric : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
378ff0cc061SDimitry Andric
3797d523365SDimitry Andric /// Check if the loop interchange is profitable.
380ff0cc061SDimitry Andric bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
381ff0cc061SDimitry Andric CharMatrix &DepMatrix);
382ff0cc061SDimitry Andric
383ff0cc061SDimitry Andric private:
384ff0cc061SDimitry Andric int getInstrOrderCost();
385ff0cc061SDimitry Andric
386ff0cc061SDimitry Andric Loop *OuterLoop;
387ff0cc061SDimitry Andric Loop *InnerLoop;
388ff0cc061SDimitry Andric
389ff0cc061SDimitry Andric /// Scev analysis.
390ff0cc061SDimitry Andric ScalarEvolution *SE;
3912cab237bSDimitry Andric
392b40b48b8SDimitry Andric /// Interface to emit optimization remarks.
393b40b48b8SDimitry Andric OptimizationRemarkEmitter *ORE;
394ff0cc061SDimitry Andric };
395ff0cc061SDimitry Andric
3967d523365SDimitry Andric /// LoopInterchangeTransform interchanges the loop.
397ff0cc061SDimitry Andric class LoopInterchangeTransform {
398ff0cc061SDimitry Andric public:
LoopInterchangeTransform(Loop * Outer,Loop * Inner,ScalarEvolution * SE,LoopInfo * LI,DominatorTree * DT,BasicBlock * LoopNestExit,const LoopInterchangeLegality & LIL)399ff0cc061SDimitry Andric LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
400ff0cc061SDimitry Andric LoopInfo *LI, DominatorTree *DT,
4017d523365SDimitry Andric BasicBlock *LoopNestExit,
402*b5893f02SDimitry Andric const LoopInterchangeLegality &LIL)
403ff0cc061SDimitry Andric : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
404*b5893f02SDimitry Andric LoopExit(LoopNestExit), LIL(LIL) {}
405ff0cc061SDimitry Andric
406ff0cc061SDimitry Andric /// Interchange OuterLoop and InnerLoop.
407ff0cc061SDimitry Andric bool transform();
4084ba319b5SDimitry Andric void restructureLoops(Loop *NewInner, Loop *NewOuter,
4094ba319b5SDimitry Andric BasicBlock *OrigInnerPreHeader,
4104ba319b5SDimitry Andric BasicBlock *OrigOuterPreHeader);
411ff0cc061SDimitry Andric void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
412ff0cc061SDimitry Andric
413ff0cc061SDimitry Andric private:
414ff0cc061SDimitry Andric void splitInnerLoopLatch(Instruction *);
415ff0cc061SDimitry Andric void splitInnerLoopHeader();
416ff0cc061SDimitry Andric bool adjustLoopLinks();
417ff0cc061SDimitry Andric void adjustLoopPreheaders();
418ff0cc061SDimitry Andric bool adjustLoopBranches();
419ff0cc061SDimitry Andric
420ff0cc061SDimitry Andric Loop *OuterLoop;
421ff0cc061SDimitry Andric Loop *InnerLoop;
422ff0cc061SDimitry Andric
423ff0cc061SDimitry Andric /// Scev analysis.
424ff0cc061SDimitry Andric ScalarEvolution *SE;
4252cab237bSDimitry Andric
426ff0cc061SDimitry Andric LoopInfo *LI;
427ff0cc061SDimitry Andric DominatorTree *DT;
428ff0cc061SDimitry Andric BasicBlock *LoopExit;
429*b5893f02SDimitry Andric
430*b5893f02SDimitry Andric const LoopInterchangeLegality &LIL;
431ff0cc061SDimitry Andric };
432ff0cc061SDimitry Andric
4337d523365SDimitry Andric // Main LoopInterchange Pass.
434*b5893f02SDimitry Andric struct LoopInterchange : public LoopPass {
435ff0cc061SDimitry Andric static char ID;
4362cab237bSDimitry Andric ScalarEvolution *SE = nullptr;
4372cab237bSDimitry Andric LoopInfo *LI = nullptr;
4382cab237bSDimitry Andric DependenceInfo *DI = nullptr;
4392cab237bSDimitry Andric DominatorTree *DT = nullptr;
4402cab237bSDimitry Andric
441b40b48b8SDimitry Andric /// Interface to emit optimization remarks.
442b40b48b8SDimitry Andric OptimizationRemarkEmitter *ORE;
443b40b48b8SDimitry Andric
LoopInterchange__anonceaba13a0211::LoopInterchange444*b5893f02SDimitry Andric LoopInterchange() : LoopPass(ID) {
445ff0cc061SDimitry Andric initializeLoopInterchangePass(*PassRegistry::getPassRegistry());
446ff0cc061SDimitry Andric }
447ff0cc061SDimitry Andric
getAnalysisUsage__anonceaba13a0211::LoopInterchange448ff0cc061SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
4493ca95b02SDimitry Andric AU.addRequired<DependenceAnalysisWrapperPass>();
450b40b48b8SDimitry Andric AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
4514ba319b5SDimitry Andric
452*b5893f02SDimitry Andric getLoopAnalysisUsage(AU);
453ff0cc061SDimitry Andric }
454ff0cc061SDimitry Andric
runOnLoop__anonceaba13a0211::LoopInterchange455*b5893f02SDimitry Andric bool runOnLoop(Loop *L, LPPassManager &LPM) override {
456*b5893f02SDimitry Andric if (skipLoop(L) || L->getParentLoop())
4573ca95b02SDimitry Andric return false;
4583ca95b02SDimitry Andric
4597d523365SDimitry Andric SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
460ff0cc061SDimitry Andric LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
4613ca95b02SDimitry Andric DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
4624ba319b5SDimitry Andric DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
463b40b48b8SDimitry Andric ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
4647d523365SDimitry Andric
465*b5893f02SDimitry Andric return processLoopList(populateWorklist(*L));
466ff0cc061SDimitry Andric }
467ff0cc061SDimitry Andric
isComputableLoopNest__anonceaba13a0211::LoopInterchange468ff0cc061SDimitry Andric bool isComputableLoopNest(LoopVector LoopList) {
4693ca95b02SDimitry Andric for (Loop *L : LoopList) {
470ff0cc061SDimitry Andric const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
471ff0cc061SDimitry Andric if (ExitCountOuter == SE->getCouldNotCompute()) {
4724ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
473ff0cc061SDimitry Andric return false;
474ff0cc061SDimitry Andric }
475ff0cc061SDimitry Andric if (L->getNumBackEdges() != 1) {
4764ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
477ff0cc061SDimitry Andric return false;
478ff0cc061SDimitry Andric }
479ff0cc061SDimitry Andric if (!L->getExitingBlock()) {
4804ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
481ff0cc061SDimitry Andric return false;
482ff0cc061SDimitry Andric }
483ff0cc061SDimitry Andric }
484ff0cc061SDimitry Andric return true;
485ff0cc061SDimitry Andric }
486ff0cc061SDimitry Andric
selectLoopForInterchange__anonceaba13a0211::LoopInterchange4873ca95b02SDimitry Andric unsigned selectLoopForInterchange(const LoopVector &LoopList) {
488ff0cc061SDimitry Andric // TODO: Add a better heuristic to select the loop to be interchanged based
4897d523365SDimitry Andric // on the dependence matrix. Currently we select the innermost loop.
490ff0cc061SDimitry Andric return LoopList.size() - 1;
491ff0cc061SDimitry Andric }
492ff0cc061SDimitry Andric
processLoopList__anonceaba13a0211::LoopInterchange493*b5893f02SDimitry Andric bool processLoopList(LoopVector LoopList) {
494ff0cc061SDimitry Andric bool Changed = false;
495d88c1a5aSDimitry Andric unsigned LoopNestDepth = LoopList.size();
496d88c1a5aSDimitry Andric if (LoopNestDepth < 2) {
4974ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
498ff0cc061SDimitry Andric return false;
499ff0cc061SDimitry Andric }
500d88c1a5aSDimitry Andric if (LoopNestDepth > MaxLoopNestDepth) {
5014ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "
502d88c1a5aSDimitry Andric << MaxLoopNestDepth << "\n");
503ff0cc061SDimitry Andric return false;
504ff0cc061SDimitry Andric }
505d88c1a5aSDimitry Andric if (!isComputableLoopNest(LoopList)) {
5064ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
507d88c1a5aSDimitry Andric return false;
508d88c1a5aSDimitry Andric }
509d88c1a5aSDimitry Andric
5104ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth
5114ba319b5SDimitry Andric << "\n");
512d88c1a5aSDimitry Andric
513d88c1a5aSDimitry Andric CharMatrix DependencyMatrix;
514ff0cc061SDimitry Andric Loop *OuterMostLoop = *(LoopList.begin());
515d88c1a5aSDimitry Andric if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
5163ca95b02SDimitry Andric OuterMostLoop, DI)) {
5174ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
518ff0cc061SDimitry Andric return false;
519ff0cc061SDimitry Andric }
520ff0cc061SDimitry Andric #ifdef DUMP_DEP_MATRICIES
5214ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Dependence before interchange\n");
522ff0cc061SDimitry Andric printDepMatrix(DependencyMatrix);
523ff0cc061SDimitry Andric #endif
524ff0cc061SDimitry Andric
525ff0cc061SDimitry Andric // Get the Outermost loop exit.
5264ba319b5SDimitry Andric BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
5274ba319b5SDimitry Andric if (!LoopNestExit) {
5284ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block");
529ff0cc061SDimitry Andric return false;
530ff0cc061SDimitry Andric }
531ff0cc061SDimitry Andric
532ff0cc061SDimitry Andric unsigned SelecLoopId = selectLoopForInterchange(LoopList);
5337d523365SDimitry Andric // Move the selected loop outwards to the best possible position.
534ff0cc061SDimitry Andric for (unsigned i = SelecLoopId; i > 0; i--) {
535ff0cc061SDimitry Andric bool Interchanged =
536ff0cc061SDimitry Andric processLoop(LoopList, i, i - 1, LoopNestExit, DependencyMatrix);
537ff0cc061SDimitry Andric if (!Interchanged)
538ff0cc061SDimitry Andric return Changed;
539ff0cc061SDimitry Andric // Loops interchanged reflect the same in LoopList
540ff0cc061SDimitry Andric std::swap(LoopList[i - 1], LoopList[i]);
541ff0cc061SDimitry Andric
542ff0cc061SDimitry Andric // Update the DependencyMatrix
543d88c1a5aSDimitry Andric interChangeDependencies(DependencyMatrix, i, i - 1);
544ff0cc061SDimitry Andric #ifdef DUMP_DEP_MATRICIES
5454ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Dependence after interchange\n");
546ff0cc061SDimitry Andric printDepMatrix(DependencyMatrix);
547ff0cc061SDimitry Andric #endif
548ff0cc061SDimitry Andric Changed |= Interchanged;
549ff0cc061SDimitry Andric }
550ff0cc061SDimitry Andric return Changed;
551ff0cc061SDimitry Andric }
552ff0cc061SDimitry Andric
processLoop__anonceaba13a0211::LoopInterchange553ff0cc061SDimitry Andric bool processLoop(LoopVector LoopList, unsigned InnerLoopId,
554ff0cc061SDimitry Andric unsigned OuterLoopId, BasicBlock *LoopNestExit,
555ff0cc061SDimitry Andric std::vector<std::vector<char>> &DependencyMatrix) {
5564ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Processing Inner Loop Id = " << InnerLoopId
557ff0cc061SDimitry Andric << " and OuterLoopId = " << OuterLoopId << "\n");
558ff0cc061SDimitry Andric Loop *InnerLoop = LoopList[InnerLoopId];
559ff0cc061SDimitry Andric Loop *OuterLoop = LoopList[OuterLoopId];
560ff0cc061SDimitry Andric
561*b5893f02SDimitry Andric LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
562ff0cc061SDimitry Andric if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
5634ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
564ff0cc061SDimitry Andric return false;
565ff0cc061SDimitry Andric }
5664ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
567b40b48b8SDimitry Andric LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
568ff0cc061SDimitry Andric if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
5694ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
570ff0cc061SDimitry Andric return false;
571ff0cc061SDimitry Andric }
572ff0cc061SDimitry Andric
5732cab237bSDimitry Andric ORE->emit([&]() {
5742cab237bSDimitry Andric return OptimizationRemark(DEBUG_TYPE, "Interchanged",
575b40b48b8SDimitry Andric InnerLoop->getStartLoc(),
576b40b48b8SDimitry Andric InnerLoop->getHeader())
5772cab237bSDimitry Andric << "Loop interchanged with enclosing loop.";
5782cab237bSDimitry Andric });
579b40b48b8SDimitry Andric
580*b5893f02SDimitry Andric LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LoopNestExit,
581*b5893f02SDimitry Andric LIL);
582ff0cc061SDimitry Andric LIT.transform();
5834ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
5844ba319b5SDimitry Andric LoopsInterchanged++;
585ff0cc061SDimitry Andric return true;
586ff0cc061SDimitry Andric }
587ff0cc061SDimitry Andric };
588ff0cc061SDimitry Andric
5892cab237bSDimitry Andric } // end anonymous namespace
5902cab237bSDimitry Andric
containsUnsafeInstructions(BasicBlock * BB)591*b5893f02SDimitry Andric bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
592*b5893f02SDimitry Andric return any_of(*BB, [](const Instruction &I) {
593*b5893f02SDimitry Andric return I.mayHaveSideEffects() || I.mayReadFromMemory();
594ff0cc061SDimitry Andric });
595ff0cc061SDimitry Andric }
596ff0cc061SDimitry Andric
tightlyNested(Loop * OuterLoop,Loop * InnerLoop)597ff0cc061SDimitry Andric bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
598ff0cc061SDimitry Andric BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
599ff0cc061SDimitry Andric BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
600ff0cc061SDimitry Andric BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
601ff0cc061SDimitry Andric
6024ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n");
603ff0cc061SDimitry Andric
604ff0cc061SDimitry Andric // A perfectly nested loop will not have any branch in between the outer and
605ff0cc061SDimitry Andric // inner block i.e. outer header will branch to either inner preheader and
606ff0cc061SDimitry Andric // outerloop latch.
607d88c1a5aSDimitry Andric BranchInst *OuterLoopHeaderBI =
608ff0cc061SDimitry Andric dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
609d88c1a5aSDimitry Andric if (!OuterLoopHeaderBI)
610ff0cc061SDimitry Andric return false;
611d88c1a5aSDimitry Andric
612*b5893f02SDimitry Andric for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
613*b5893f02SDimitry Andric if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
614*b5893f02SDimitry Andric Succ != OuterLoopLatch)
615ff0cc061SDimitry Andric return false;
616ff0cc061SDimitry Andric
6174ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
618ff0cc061SDimitry Andric // We do not have any basic block in between now make sure the outer header
6197d523365SDimitry Andric // and outer loop latch doesn't contain any unsafe instructions.
620*b5893f02SDimitry Andric if (containsUnsafeInstructions(OuterLoopHeader) ||
621*b5893f02SDimitry Andric containsUnsafeInstructions(OuterLoopLatch))
622ff0cc061SDimitry Andric return false;
623ff0cc061SDimitry Andric
6244ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
625ff0cc061SDimitry Andric // We have a perfect loop nest.
626ff0cc061SDimitry Andric return true;
627ff0cc061SDimitry Andric }
628ff0cc061SDimitry Andric
isLoopStructureUnderstood(PHINode * InnerInduction)629ff0cc061SDimitry Andric bool LoopInterchangeLegality::isLoopStructureUnderstood(
630ff0cc061SDimitry Andric PHINode *InnerInduction) {
631ff0cc061SDimitry Andric unsigned Num = InnerInduction->getNumOperands();
632ff0cc061SDimitry Andric BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
633ff0cc061SDimitry Andric for (unsigned i = 0; i < Num; ++i) {
634ff0cc061SDimitry Andric Value *Val = InnerInduction->getOperand(i);
635ff0cc061SDimitry Andric if (isa<Constant>(Val))
636ff0cc061SDimitry Andric continue;
637ff0cc061SDimitry Andric Instruction *I = dyn_cast<Instruction>(Val);
638ff0cc061SDimitry Andric if (!I)
639ff0cc061SDimitry Andric return false;
640ff0cc061SDimitry Andric // TODO: Handle triangular loops.
641ff0cc061SDimitry Andric // e.g. for(int i=0;i<N;i++)
642ff0cc061SDimitry Andric // for(int j=i;j<N;j++)
643ff0cc061SDimitry Andric unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
644ff0cc061SDimitry Andric if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
645ff0cc061SDimitry Andric InnerLoopPreheader &&
646ff0cc061SDimitry Andric !OuterLoop->isLoopInvariant(I)) {
647ff0cc061SDimitry Andric return false;
648ff0cc061SDimitry Andric }
649ff0cc061SDimitry Andric }
650ff0cc061SDimitry Andric return true;
651ff0cc061SDimitry Andric }
652ff0cc061SDimitry Andric
653*b5893f02SDimitry Andric // If SV is a LCSSA PHI node with a single incoming value, return the incoming
654*b5893f02SDimitry Andric // value.
followLCSSA(Value * SV)655*b5893f02SDimitry Andric static Value *followLCSSA(Value *SV) {
656*b5893f02SDimitry Andric PHINode *PHI = dyn_cast<PHINode>(SV);
657*b5893f02SDimitry Andric if (!PHI)
658*b5893f02SDimitry Andric return SV;
659*b5893f02SDimitry Andric
660*b5893f02SDimitry Andric if (PHI->getNumIncomingValues() != 1)
661*b5893f02SDimitry Andric return SV;
662*b5893f02SDimitry Andric return followLCSSA(PHI->getIncomingValue(0));
663*b5893f02SDimitry Andric }
664*b5893f02SDimitry Andric
665*b5893f02SDimitry Andric // Check V's users to see if it is involved in a reduction in L.
findInnerReductionPhi(Loop * L,Value * V)666*b5893f02SDimitry Andric static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
667*b5893f02SDimitry Andric for (Value *User : V->users()) {
668*b5893f02SDimitry Andric if (PHINode *PHI = dyn_cast<PHINode>(User)) {
669*b5893f02SDimitry Andric if (PHI->getNumIncomingValues() == 1)
670*b5893f02SDimitry Andric continue;
671*b5893f02SDimitry Andric RecurrenceDescriptor RD;
672*b5893f02SDimitry Andric if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
673*b5893f02SDimitry Andric return PHI;
674*b5893f02SDimitry Andric return nullptr;
675*b5893f02SDimitry Andric }
676*b5893f02SDimitry Andric }
677*b5893f02SDimitry Andric
678*b5893f02SDimitry Andric return nullptr;
679*b5893f02SDimitry Andric }
680*b5893f02SDimitry Andric
findInductionAndReductions(Loop * L,SmallVector<PHINode *,8> & Inductions,Loop * InnerLoop)681ff0cc061SDimitry Andric bool LoopInterchangeLegality::findInductionAndReductions(
682*b5893f02SDimitry Andric Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
683ff0cc061SDimitry Andric if (!L->getLoopLatch() || !L->getLoopPredecessor())
684ff0cc061SDimitry Andric return false;
6854ba319b5SDimitry Andric for (PHINode &PHI : L->getHeader()->phis()) {
6868f0fd8f6SDimitry Andric RecurrenceDescriptor RD;
6877d523365SDimitry Andric InductionDescriptor ID;
6884ba319b5SDimitry Andric if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
6894ba319b5SDimitry Andric Inductions.push_back(&PHI);
690ff0cc061SDimitry Andric else {
691*b5893f02SDimitry Andric // PHIs in inner loops need to be part of a reduction in the outer loop,
692*b5893f02SDimitry Andric // discovered when checking the PHIs of the outer loop earlier.
693*b5893f02SDimitry Andric if (!InnerLoop) {
694*b5893f02SDimitry Andric if (OuterInnerReductions.find(&PHI) == OuterInnerReductions.end()) {
695*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "
696*b5893f02SDimitry Andric "across the outer loop.\n");
697ff0cc061SDimitry Andric return false;
698ff0cc061SDimitry Andric }
699*b5893f02SDimitry Andric } else {
700*b5893f02SDimitry Andric assert(PHI.getNumIncomingValues() == 2 &&
701*b5893f02SDimitry Andric "Phis in loop header should have exactly 2 incoming values");
702*b5893f02SDimitry Andric // Check if we have a PHI node in the outer loop that has a reduction
703*b5893f02SDimitry Andric // result from the inner loop as an incoming value.
704*b5893f02SDimitry Andric Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
705*b5893f02SDimitry Andric PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
706*b5893f02SDimitry Andric if (!InnerRedPhi ||
707*b5893f02SDimitry Andric !llvm::any_of(InnerRedPhi->incoming_values(),
708*b5893f02SDimitry Andric [&PHI](Value *V) { return V == &PHI; })) {
709*b5893f02SDimitry Andric LLVM_DEBUG(
710*b5893f02SDimitry Andric dbgs()
711*b5893f02SDimitry Andric << "Failed to recognize PHI as an induction or reduction.\n");
712*b5893f02SDimitry Andric return false;
713*b5893f02SDimitry Andric }
714*b5893f02SDimitry Andric OuterInnerReductions.insert(&PHI);
715*b5893f02SDimitry Andric OuterInnerReductions.insert(InnerRedPhi);
716*b5893f02SDimitry Andric }
717*b5893f02SDimitry Andric }
718ff0cc061SDimitry Andric }
719ff0cc061SDimitry Andric return true;
720ff0cc061SDimitry Andric }
721ff0cc061SDimitry Andric
containsSafePHI(BasicBlock * Block,bool isOuterLoopExitBlock)722ff0cc061SDimitry Andric static bool containsSafePHI(BasicBlock *Block, bool isOuterLoopExitBlock) {
7234ba319b5SDimitry Andric for (PHINode &PHI : Block->phis()) {
724ff0cc061SDimitry Andric // Reduction lcssa phi will have only 1 incoming block that from loop latch.
7254ba319b5SDimitry Andric if (PHI.getNumIncomingValues() > 1)
726ff0cc061SDimitry Andric return false;
7274ba319b5SDimitry Andric Instruction *Ins = dyn_cast<Instruction>(PHI.getIncomingValue(0));
728ff0cc061SDimitry Andric if (!Ins)
729ff0cc061SDimitry Andric return false;
730ff0cc061SDimitry Andric // Incoming value for lcssa phi's in outer loop exit can only be inner loop
731ff0cc061SDimitry Andric // exits lcssa phi else it would not be tightly nested.
732ff0cc061SDimitry Andric if (!isa<PHINode>(Ins) && isOuterLoopExitBlock)
733ff0cc061SDimitry Andric return false;
734ff0cc061SDimitry Andric }
735ff0cc061SDimitry Andric return true;
736ff0cc061SDimitry Andric }
737ff0cc061SDimitry Andric
738ff0cc061SDimitry Andric // This function indicates the current limitations in the transform as a result
739ff0cc061SDimitry Andric // of which we do not proceed.
currentLimitations()740ff0cc061SDimitry Andric bool LoopInterchangeLegality::currentLimitations() {
741ff0cc061SDimitry Andric BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
742ff0cc061SDimitry Andric BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
7434ba319b5SDimitry Andric
7444ba319b5SDimitry Andric // transform currently expects the loop latches to also be the exiting
7454ba319b5SDimitry Andric // blocks.
7464ba319b5SDimitry Andric if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
7474ba319b5SDimitry Andric OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
7484ba319b5SDimitry Andric !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
7494ba319b5SDimitry Andric !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
7504ba319b5SDimitry Andric LLVM_DEBUG(
7514ba319b5SDimitry Andric dbgs() << "Loops where the latch is not the exiting block are not"
7524ba319b5SDimitry Andric << " supported currently.\n");
7534ba319b5SDimitry Andric ORE->emit([&]() {
7544ba319b5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "ExitingNotLatch",
7554ba319b5SDimitry Andric OuterLoop->getStartLoc(),
7564ba319b5SDimitry Andric OuterLoop->getHeader())
7574ba319b5SDimitry Andric << "Loops where the latch is not the exiting block cannot be"
7584ba319b5SDimitry Andric " interchange currently.";
7594ba319b5SDimitry Andric });
7604ba319b5SDimitry Andric return true;
7614ba319b5SDimitry Andric }
762ff0cc061SDimitry Andric
763ff0cc061SDimitry Andric PHINode *InnerInductionVar;
764ff0cc061SDimitry Andric SmallVector<PHINode *, 8> Inductions;
765*b5893f02SDimitry Andric if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
766*b5893f02SDimitry Andric LLVM_DEBUG(
767*b5893f02SDimitry Andric dbgs() << "Only outer loops with induction or reduction PHI nodes "
768*b5893f02SDimitry Andric << "are supported currently.\n");
769*b5893f02SDimitry Andric ORE->emit([&]() {
770*b5893f02SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
771*b5893f02SDimitry Andric OuterLoop->getStartLoc(),
772*b5893f02SDimitry Andric OuterLoop->getHeader())
773*b5893f02SDimitry Andric << "Only outer loops with induction or reduction PHI nodes can be"
774*b5893f02SDimitry Andric " interchanged currently.";
775*b5893f02SDimitry Andric });
776*b5893f02SDimitry Andric return true;
777*b5893f02SDimitry Andric }
778*b5893f02SDimitry Andric
779*b5893f02SDimitry Andric // TODO: Currently we handle only loops with 1 induction variable.
780*b5893f02SDimitry Andric if (Inductions.size() != 1) {
781*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << "Loops with more than 1 induction variables are not "
782*b5893f02SDimitry Andric << "supported currently.\n");
783*b5893f02SDimitry Andric ORE->emit([&]() {
784*b5893f02SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "MultiIndutionOuter",
785*b5893f02SDimitry Andric OuterLoop->getStartLoc(),
786*b5893f02SDimitry Andric OuterLoop->getHeader())
787*b5893f02SDimitry Andric << "Only outer loops with 1 induction variable can be "
788*b5893f02SDimitry Andric "interchanged currently.";
789*b5893f02SDimitry Andric });
790*b5893f02SDimitry Andric return true;
791*b5893f02SDimitry Andric }
792*b5893f02SDimitry Andric
793*b5893f02SDimitry Andric Inductions.clear();
794*b5893f02SDimitry Andric if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) {
7954ba319b5SDimitry Andric LLVM_DEBUG(
7964ba319b5SDimitry Andric dbgs() << "Only inner loops with induction or reduction PHI nodes "
797c4394386SDimitry Andric << "are supported currently.\n");
7982cab237bSDimitry Andric ORE->emit([&]() {
7992cab237bSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
800b40b48b8SDimitry Andric InnerLoop->getStartLoc(),
801b40b48b8SDimitry Andric InnerLoop->getHeader())
802b40b48b8SDimitry Andric << "Only inner loops with induction or reduction PHI nodes can be"
8032cab237bSDimitry Andric " interchange currently.";
8042cab237bSDimitry Andric });
805ff0cc061SDimitry Andric return true;
806c4394386SDimitry Andric }
807ff0cc061SDimitry Andric
808ff0cc061SDimitry Andric // TODO: Currently we handle only loops with 1 induction variable.
809ff0cc061SDimitry Andric if (Inductions.size() != 1) {
8104ba319b5SDimitry Andric LLVM_DEBUG(
8114ba319b5SDimitry Andric dbgs() << "We currently only support loops with 1 induction variable."
812ff0cc061SDimitry Andric << "Failed to interchange due to current limitation\n");
8132cab237bSDimitry Andric ORE->emit([&]() {
8142cab237bSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "MultiInductionInner",
815b40b48b8SDimitry Andric InnerLoop->getStartLoc(),
816b40b48b8SDimitry Andric InnerLoop->getHeader())
817b40b48b8SDimitry Andric << "Only inner loops with 1 induction variable can be "
8182cab237bSDimitry Andric "interchanged currently.";
8192cab237bSDimitry Andric });
820ff0cc061SDimitry Andric return true;
821ff0cc061SDimitry Andric }
822ff0cc061SDimitry Andric InnerInductionVar = Inductions.pop_back_val();
823ff0cc061SDimitry Andric
824ff0cc061SDimitry Andric // TODO: Triangular loops are not handled for now.
825ff0cc061SDimitry Andric if (!isLoopStructureUnderstood(InnerInductionVar)) {
8264ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n");
8272cab237bSDimitry Andric ORE->emit([&]() {
8282cab237bSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner",
829b40b48b8SDimitry Andric InnerLoop->getStartLoc(),
830b40b48b8SDimitry Andric InnerLoop->getHeader())
8312cab237bSDimitry Andric << "Inner loop structure not understood currently.";
8322cab237bSDimitry Andric });
833ff0cc061SDimitry Andric return true;
834ff0cc061SDimitry Andric }
835ff0cc061SDimitry Andric
836ff0cc061SDimitry Andric // TODO: We only handle LCSSA PHI's corresponding to reduction for now.
8374ba319b5SDimitry Andric BasicBlock *InnerExit = InnerLoop->getExitBlock();
8384ba319b5SDimitry Andric if (!containsSafePHI(InnerExit, false)) {
8394ba319b5SDimitry Andric LLVM_DEBUG(
8404ba319b5SDimitry Andric dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n");
8412cab237bSDimitry Andric ORE->emit([&]() {
8422cab237bSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "NoLCSSAPHIOuterInner",
843b40b48b8SDimitry Andric InnerLoop->getStartLoc(),
844b40b48b8SDimitry Andric InnerLoop->getHeader())
845b40b48b8SDimitry Andric << "Only inner loops with LCSSA PHIs can be interchange "
8462cab237bSDimitry Andric "currently.";
8472cab237bSDimitry Andric });
848ff0cc061SDimitry Andric return true;
849c4394386SDimitry Andric }
850ff0cc061SDimitry Andric
851ff0cc061SDimitry Andric // TODO: Current limitation: Since we split the inner loop latch at the point
852ff0cc061SDimitry Andric // were induction variable is incremented (induction.next); We cannot have
853ff0cc061SDimitry Andric // more than 1 user of induction.next since it would result in broken code
854ff0cc061SDimitry Andric // after split.
855ff0cc061SDimitry Andric // e.g.
856ff0cc061SDimitry Andric // for(i=0;i<N;i++) {
857ff0cc061SDimitry Andric // for(j = 0;j<M;j++) {
858ff0cc061SDimitry Andric // A[j+1][i+2] = A[j][i]+k;
859ff0cc061SDimitry Andric // }
860ff0cc061SDimitry Andric // }
861ff0cc061SDimitry Andric Instruction *InnerIndexVarInc = nullptr;
862ff0cc061SDimitry Andric if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader)
863ff0cc061SDimitry Andric InnerIndexVarInc =
864ff0cc061SDimitry Andric dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(1));
865ff0cc061SDimitry Andric else
866ff0cc061SDimitry Andric InnerIndexVarInc =
867ff0cc061SDimitry Andric dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0));
868ff0cc061SDimitry Andric
869c4394386SDimitry Andric if (!InnerIndexVarInc) {
8704ba319b5SDimitry Andric LLVM_DEBUG(
8714ba319b5SDimitry Andric dbgs() << "Did not find an instruction to increment the induction "
872c4394386SDimitry Andric << "variable.\n");
8732cab237bSDimitry Andric ORE->emit([&]() {
8742cab237bSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "NoIncrementInInner",
875b40b48b8SDimitry Andric InnerLoop->getStartLoc(),
876b40b48b8SDimitry Andric InnerLoop->getHeader())
8772cab237bSDimitry Andric << "The inner loop does not increment the induction variable.";
8782cab237bSDimitry Andric });
879ff0cc061SDimitry Andric return true;
880c4394386SDimitry Andric }
881ff0cc061SDimitry Andric
882ff0cc061SDimitry Andric // Since we split the inner loop latch on this induction variable. Make sure
883ff0cc061SDimitry Andric // we do not have any instruction between the induction variable and branch
884ff0cc061SDimitry Andric // instruction.
885ff0cc061SDimitry Andric
8863ca95b02SDimitry Andric bool FoundInduction = false;
8874ba319b5SDimitry Andric for (const Instruction &I :
8884ba319b5SDimitry Andric llvm::reverse(InnerLoopLatch->instructionsWithoutDebug())) {
8892cab237bSDimitry Andric if (isa<BranchInst>(I) || isa<CmpInst>(I) || isa<TruncInst>(I) ||
8902cab237bSDimitry Andric isa<ZExtInst>(I))
891ff0cc061SDimitry Andric continue;
892c4394386SDimitry Andric
893ff0cc061SDimitry Andric // We found an instruction. If this is not induction variable then it is not
894ff0cc061SDimitry Andric // safe to split this loop latch.
895c4394386SDimitry Andric if (!I.isIdenticalTo(InnerIndexVarInc)) {
8964ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Found unsupported instructions between induction "
897c4394386SDimitry Andric << "variable increment and branch.\n");
8982cab237bSDimitry Andric ORE->emit([&]() {
8992cab237bSDimitry Andric return OptimizationRemarkMissed(
9002cab237bSDimitry Andric DEBUG_TYPE, "UnsupportedInsBetweenInduction",
9012cab237bSDimitry Andric InnerLoop->getStartLoc(), InnerLoop->getHeader())
902b40b48b8SDimitry Andric << "Found unsupported instruction between induction variable "
9032cab237bSDimitry Andric "increment and branch.";
9042cab237bSDimitry Andric });
905ff0cc061SDimitry Andric return true;
906c4394386SDimitry Andric }
9073ca95b02SDimitry Andric
908ff0cc061SDimitry Andric FoundInduction = true;
9093ca95b02SDimitry Andric break;
910ff0cc061SDimitry Andric }
9117d523365SDimitry Andric // The loop latch ended and we didn't find the induction variable return as
912ff0cc061SDimitry Andric // current limitation.
913c4394386SDimitry Andric if (!FoundInduction) {
9144ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Did not find the induction variable.\n");
9152cab237bSDimitry Andric ORE->emit([&]() {
9162cab237bSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "NoIndutionVariable",
917b40b48b8SDimitry Andric InnerLoop->getStartLoc(),
918b40b48b8SDimitry Andric InnerLoop->getHeader())
9192cab237bSDimitry Andric << "Did not find the induction variable.";
9202cab237bSDimitry Andric });
921ff0cc061SDimitry Andric return true;
922c4394386SDimitry Andric }
923ff0cc061SDimitry Andric return false;
924ff0cc061SDimitry Andric }
925ff0cc061SDimitry Andric
9264ba319b5SDimitry Andric // We currently support LCSSA PHI nodes in the outer loop exit, if their
9274ba319b5SDimitry Andric // incoming values do not come from the outer loop latch or if the
9284ba319b5SDimitry Andric // outer loop latch has a single predecessor. In that case, the value will
9294ba319b5SDimitry Andric // be available if both the inner and outer loop conditions are true, which
9304ba319b5SDimitry Andric // will still be true after interchanging. If we have multiple predecessor,
9314ba319b5SDimitry Andric // that may not be the case, e.g. because the outer loop latch may be executed
9324ba319b5SDimitry Andric // if the inner loop is not executed.
areLoopExitPHIsSupported(Loop * OuterLoop,Loop * InnerLoop)9334ba319b5SDimitry Andric static bool areLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
9344ba319b5SDimitry Andric BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
9354ba319b5SDimitry Andric for (PHINode &PHI : LoopNestExit->phis()) {
9364ba319b5SDimitry Andric // FIXME: We currently are not able to detect floating point reductions
9374ba319b5SDimitry Andric // and have to use floating point PHIs as a proxy to prevent
9384ba319b5SDimitry Andric // interchanging in the presence of floating point reductions.
9394ba319b5SDimitry Andric if (PHI.getType()->isFloatingPointTy())
9404ba319b5SDimitry Andric return false;
9414ba319b5SDimitry Andric for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
9424ba319b5SDimitry Andric Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
9434ba319b5SDimitry Andric if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
9444ba319b5SDimitry Andric continue;
9454ba319b5SDimitry Andric
9464ba319b5SDimitry Andric // The incoming value is defined in the outer loop latch. Currently we
9474ba319b5SDimitry Andric // only support that in case the outer loop latch has a single predecessor.
9484ba319b5SDimitry Andric // This guarantees that the outer loop latch is executed if and only if
9494ba319b5SDimitry Andric // the inner loop is executed (because tightlyNested() guarantees that the
9504ba319b5SDimitry Andric // outer loop header only branches to the inner loop or the outer loop
9514ba319b5SDimitry Andric // latch).
9524ba319b5SDimitry Andric // FIXME: We could weaken this logic and allow multiple predecessors,
9534ba319b5SDimitry Andric // if the values are produced outside the loop latch. We would need
9544ba319b5SDimitry Andric // additional logic to update the PHI nodes in the exit block as
9554ba319b5SDimitry Andric // well.
9564ba319b5SDimitry Andric if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
9574ba319b5SDimitry Andric return false;
9584ba319b5SDimitry Andric }
9594ba319b5SDimitry Andric }
9604ba319b5SDimitry Andric return true;
9614ba319b5SDimitry Andric }
9624ba319b5SDimitry Andric
canInterchangeLoops(unsigned InnerLoopId,unsigned OuterLoopId,CharMatrix & DepMatrix)963ff0cc061SDimitry Andric bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
964ff0cc061SDimitry Andric unsigned OuterLoopId,
965ff0cc061SDimitry Andric CharMatrix &DepMatrix) {
966ff0cc061SDimitry Andric if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
9674ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
968ff0cc061SDimitry Andric << " and OuterLoopId = " << OuterLoopId
969ff0cc061SDimitry Andric << " due to dependence\n");
9702cab237bSDimitry Andric ORE->emit([&]() {
9712cab237bSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence",
972b40b48b8SDimitry Andric InnerLoop->getStartLoc(),
973b40b48b8SDimitry Andric InnerLoop->getHeader())
9742cab237bSDimitry Andric << "Cannot interchange loops due to dependences.";
9752cab237bSDimitry Andric });
9762cab237bSDimitry Andric return false;
9772cab237bSDimitry Andric }
9782cab237bSDimitry Andric // Check if outer and inner loop contain legal instructions only.
9792cab237bSDimitry Andric for (auto *BB : OuterLoop->blocks())
9804ba319b5SDimitry Andric for (Instruction &I : BB->instructionsWithoutDebug())
9812cab237bSDimitry Andric if (CallInst *CI = dyn_cast<CallInst>(&I)) {
9822cab237bSDimitry Andric // readnone functions do not prevent interchanging.
9832cab237bSDimitry Andric if (CI->doesNotReadMemory())
9842cab237bSDimitry Andric continue;
9854ba319b5SDimitry Andric LLVM_DEBUG(
9864ba319b5SDimitry Andric dbgs() << "Loops with call instructions cannot be interchanged "
9872cab237bSDimitry Andric << "safely.");
9884ba319b5SDimitry Andric ORE->emit([&]() {
9894ba319b5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "CallInst",
9904ba319b5SDimitry Andric CI->getDebugLoc(),
9914ba319b5SDimitry Andric CI->getParent())
9924ba319b5SDimitry Andric << "Cannot interchange loops due to call instruction.";
9934ba319b5SDimitry Andric });
9944ba319b5SDimitry Andric
995ff0cc061SDimitry Andric return false;
996ff0cc061SDimitry Andric }
997ff0cc061SDimitry Andric
998ff0cc061SDimitry Andric // TODO: The loops could not be interchanged due to current limitations in the
999ff0cc061SDimitry Andric // transform module.
1000ff0cc061SDimitry Andric if (currentLimitations()) {
10014ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n");
1002ff0cc061SDimitry Andric return false;
1003ff0cc061SDimitry Andric }
1004ff0cc061SDimitry Andric
1005ff0cc061SDimitry Andric // Check if the loops are tightly nested.
1006ff0cc061SDimitry Andric if (!tightlyNested(OuterLoop, InnerLoop)) {
10074ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Loops not tightly nested\n");
10082cab237bSDimitry Andric ORE->emit([&]() {
10092cab237bSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested",
1010b40b48b8SDimitry Andric InnerLoop->getStartLoc(),
1011b40b48b8SDimitry Andric InnerLoop->getHeader())
1012b40b48b8SDimitry Andric << "Cannot interchange loops because they are not tightly "
10132cab237bSDimitry Andric "nested.";
10142cab237bSDimitry Andric });
1015ff0cc061SDimitry Andric return false;
1016ff0cc061SDimitry Andric }
1017ff0cc061SDimitry Andric
10184ba319b5SDimitry Andric if (!areLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
10194ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");
10204ba319b5SDimitry Andric ORE->emit([&]() {
10214ba319b5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
10224ba319b5SDimitry Andric OuterLoop->getStartLoc(),
10234ba319b5SDimitry Andric OuterLoop->getHeader())
10244ba319b5SDimitry Andric << "Found unsupported PHI node in loop exit.";
10254ba319b5SDimitry Andric });
10264ba319b5SDimitry Andric return false;
10274ba319b5SDimitry Andric }
10284ba319b5SDimitry Andric
1029ff0cc061SDimitry Andric return true;
1030ff0cc061SDimitry Andric }
1031ff0cc061SDimitry Andric
getInstrOrderCost()1032ff0cc061SDimitry Andric int LoopInterchangeProfitability::getInstrOrderCost() {
1033ff0cc061SDimitry Andric unsigned GoodOrder, BadOrder;
1034ff0cc061SDimitry Andric BadOrder = GoodOrder = 0;
10352cab237bSDimitry Andric for (BasicBlock *BB : InnerLoop->blocks()) {
10362cab237bSDimitry Andric for (Instruction &Ins : *BB) {
1037ff0cc061SDimitry Andric if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
1038ff0cc061SDimitry Andric unsigned NumOp = GEP->getNumOperands();
1039ff0cc061SDimitry Andric bool FoundInnerInduction = false;
1040ff0cc061SDimitry Andric bool FoundOuterInduction = false;
1041ff0cc061SDimitry Andric for (unsigned i = 0; i < NumOp; ++i) {
1042ff0cc061SDimitry Andric const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
1043ff0cc061SDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
1044ff0cc061SDimitry Andric if (!AR)
1045ff0cc061SDimitry Andric continue;
1046ff0cc061SDimitry Andric
1047ff0cc061SDimitry Andric // If we find the inner induction after an outer induction e.g.
1048ff0cc061SDimitry Andric // for(int i=0;i<N;i++)
1049ff0cc061SDimitry Andric // for(int j=0;j<N;j++)
1050ff0cc061SDimitry Andric // A[i][j] = A[i-1][j-1]+k;
1051ff0cc061SDimitry Andric // then it is a good order.
1052ff0cc061SDimitry Andric if (AR->getLoop() == InnerLoop) {
1053ff0cc061SDimitry Andric // We found an InnerLoop induction after OuterLoop induction. It is
1054ff0cc061SDimitry Andric // a good order.
1055ff0cc061SDimitry Andric FoundInnerInduction = true;
1056ff0cc061SDimitry Andric if (FoundOuterInduction) {
1057ff0cc061SDimitry Andric GoodOrder++;
1058ff0cc061SDimitry Andric break;
1059ff0cc061SDimitry Andric }
1060ff0cc061SDimitry Andric }
1061ff0cc061SDimitry Andric // If we find the outer induction after an inner induction e.g.
1062ff0cc061SDimitry Andric // for(int i=0;i<N;i++)
1063ff0cc061SDimitry Andric // for(int j=0;j<N;j++)
1064ff0cc061SDimitry Andric // A[j][i] = A[j-1][i-1]+k;
1065ff0cc061SDimitry Andric // then it is a bad order.
1066ff0cc061SDimitry Andric if (AR->getLoop() == OuterLoop) {
1067ff0cc061SDimitry Andric // We found an OuterLoop induction after InnerLoop induction. It is
1068ff0cc061SDimitry Andric // a bad order.
1069ff0cc061SDimitry Andric FoundOuterInduction = true;
1070ff0cc061SDimitry Andric if (FoundInnerInduction) {
1071ff0cc061SDimitry Andric BadOrder++;
1072ff0cc061SDimitry Andric break;
1073ff0cc061SDimitry Andric }
1074ff0cc061SDimitry Andric }
1075ff0cc061SDimitry Andric }
1076ff0cc061SDimitry Andric }
1077ff0cc061SDimitry Andric }
1078ff0cc061SDimitry Andric }
1079ff0cc061SDimitry Andric return GoodOrder - BadOrder;
1080ff0cc061SDimitry Andric }
1081ff0cc061SDimitry Andric
isProfitableForVectorization(unsigned InnerLoopId,unsigned OuterLoopId,CharMatrix & DepMatrix)1082d88c1a5aSDimitry Andric static bool isProfitableForVectorization(unsigned InnerLoopId,
1083ff0cc061SDimitry Andric unsigned OuterLoopId,
1084ff0cc061SDimitry Andric CharMatrix &DepMatrix) {
1085ff0cc061SDimitry Andric // TODO: Improve this heuristic to catch more cases.
1086ff0cc061SDimitry Andric // If the inner loop is loop independent or doesn't carry any dependency it is
1087ff0cc061SDimitry Andric // profitable to move this to outer position.
10882cab237bSDimitry Andric for (auto &Row : DepMatrix) {
10892cab237bSDimitry Andric if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I')
1090ff0cc061SDimitry Andric return false;
1091ff0cc061SDimitry Andric // TODO: We need to improve this heuristic.
10922cab237bSDimitry Andric if (Row[OuterLoopId] != '=')
1093ff0cc061SDimitry Andric return false;
1094ff0cc061SDimitry Andric }
1095ff0cc061SDimitry Andric // If outer loop has dependence and inner loop is loop independent then it is
1096ff0cc061SDimitry Andric // profitable to interchange to enable parallelism.
10974ba319b5SDimitry Andric // If there are no dependences, interchanging will not improve anything.
10984ba319b5SDimitry Andric return !DepMatrix.empty();
1099ff0cc061SDimitry Andric }
1100ff0cc061SDimitry Andric
isProfitable(unsigned InnerLoopId,unsigned OuterLoopId,CharMatrix & DepMatrix)1101ff0cc061SDimitry Andric bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
1102ff0cc061SDimitry Andric unsigned OuterLoopId,
1103ff0cc061SDimitry Andric CharMatrix &DepMatrix) {
11047d523365SDimitry Andric // TODO: Add better profitability checks.
1105ff0cc061SDimitry Andric // e.g
1106ff0cc061SDimitry Andric // 1) Construct dependency matrix and move the one with no loop carried dep
1107ff0cc061SDimitry Andric // inside to enable vectorization.
1108ff0cc061SDimitry Andric
1109ff0cc061SDimitry Andric // This is rough cost estimation algorithm. It counts the good and bad order
1110ff0cc061SDimitry Andric // of induction variables in the instruction and allows reordering if number
1111ff0cc061SDimitry Andric // of bad orders is more than good.
1112d88c1a5aSDimitry Andric int Cost = getInstrOrderCost();
11134ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
1114d88c1a5aSDimitry Andric if (Cost < -LoopInterchangeCostThreshold)
1115ff0cc061SDimitry Andric return true;
1116ff0cc061SDimitry Andric
11177d523365SDimitry Andric // It is not profitable as per current cache profitability model. But check if
1118ff0cc061SDimitry Andric // we can move this loop outside to improve parallelism.
1119b40b48b8SDimitry Andric if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
1120b40b48b8SDimitry Andric return true;
1121b40b48b8SDimitry Andric
11222cab237bSDimitry Andric ORE->emit([&]() {
11232cab237bSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1124b40b48b8SDimitry Andric InnerLoop->getStartLoc(),
1125b40b48b8SDimitry Andric InnerLoop->getHeader())
1126b40b48b8SDimitry Andric << "Interchanging loops is too costly (cost="
1127b40b48b8SDimitry Andric << ore::NV("Cost", Cost) << ", threshold="
11282cab237bSDimitry Andric << ore::NV("Threshold", LoopInterchangeCostThreshold)
11292cab237bSDimitry Andric << ") and it does not improve parallelism.";
11302cab237bSDimitry Andric });
1131b40b48b8SDimitry Andric return false;
1132ff0cc061SDimitry Andric }
1133ff0cc061SDimitry Andric
removeChildLoop(Loop * OuterLoop,Loop * InnerLoop)1134ff0cc061SDimitry Andric void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1135ff0cc061SDimitry Andric Loop *InnerLoop) {
11364ba319b5SDimitry Andric for (Loop *L : *OuterLoop)
11374ba319b5SDimitry Andric if (L == InnerLoop) {
11384ba319b5SDimitry Andric OuterLoop->removeChildLoop(L);
1139ff0cc061SDimitry Andric return;
1140ff0cc061SDimitry Andric }
11417d523365SDimitry Andric llvm_unreachable("Couldn't find loop");
1142ff0cc061SDimitry Andric }
1143ff0cc061SDimitry Andric
11444ba319b5SDimitry Andric /// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
11454ba319b5SDimitry Andric /// new inner and outer loop after interchanging: NewInner is the original
11464ba319b5SDimitry Andric /// outer loop and NewOuter is the original inner loop.
11474ba319b5SDimitry Andric ///
11484ba319b5SDimitry Andric /// Before interchanging, we have the following structure
11494ba319b5SDimitry Andric /// Outer preheader
11504ba319b5SDimitry Andric // Outer header
11514ba319b5SDimitry Andric // Inner preheader
11524ba319b5SDimitry Andric // Inner header
11534ba319b5SDimitry Andric // Inner body
11544ba319b5SDimitry Andric // Inner latch
11554ba319b5SDimitry Andric // outer bbs
11564ba319b5SDimitry Andric // Outer latch
11574ba319b5SDimitry Andric //
11584ba319b5SDimitry Andric // After interchanging:
11594ba319b5SDimitry Andric // Inner preheader
11604ba319b5SDimitry Andric // Inner header
11614ba319b5SDimitry Andric // Outer preheader
11624ba319b5SDimitry Andric // Outer header
11634ba319b5SDimitry Andric // Inner body
11644ba319b5SDimitry Andric // outer bbs
11654ba319b5SDimitry Andric // Outer latch
11664ba319b5SDimitry Andric // Inner latch
restructureLoops(Loop * NewInner,Loop * NewOuter,BasicBlock * OrigInnerPreHeader,BasicBlock * OrigOuterPreHeader)11674ba319b5SDimitry Andric void LoopInterchangeTransform::restructureLoops(
11684ba319b5SDimitry Andric Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
11694ba319b5SDimitry Andric BasicBlock *OrigOuterPreHeader) {
1170ff0cc061SDimitry Andric Loop *OuterLoopParent = OuterLoop->getParentLoop();
11714ba319b5SDimitry Andric // The original inner loop preheader moves from the new inner loop to
11724ba319b5SDimitry Andric // the parent loop, if there is one.
11734ba319b5SDimitry Andric NewInner->removeBlockFromLoop(OrigInnerPreHeader);
11744ba319b5SDimitry Andric LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
11754ba319b5SDimitry Andric
11764ba319b5SDimitry Andric // Switch the loop levels.
1177ff0cc061SDimitry Andric if (OuterLoopParent) {
1178ff0cc061SDimitry Andric // Remove the loop from its parent loop.
11794ba319b5SDimitry Andric removeChildLoop(OuterLoopParent, NewInner);
11804ba319b5SDimitry Andric removeChildLoop(NewInner, NewOuter);
11814ba319b5SDimitry Andric OuterLoopParent->addChildLoop(NewOuter);
1182ff0cc061SDimitry Andric } else {
11834ba319b5SDimitry Andric removeChildLoop(NewInner, NewOuter);
11844ba319b5SDimitry Andric LI->changeTopLevelLoop(NewInner, NewOuter);
11854ba319b5SDimitry Andric }
11864ba319b5SDimitry Andric while (!NewOuter->empty())
11874ba319b5SDimitry Andric NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
11884ba319b5SDimitry Andric NewOuter->addChildLoop(NewInner);
11894ba319b5SDimitry Andric
11904ba319b5SDimitry Andric // BBs from the original inner loop.
11914ba319b5SDimitry Andric SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
11924ba319b5SDimitry Andric
11934ba319b5SDimitry Andric // Add BBs from the original outer loop to the original inner loop (excluding
11944ba319b5SDimitry Andric // BBs already in inner loop)
11954ba319b5SDimitry Andric for (BasicBlock *BB : NewInner->blocks())
11964ba319b5SDimitry Andric if (LI->getLoopFor(BB) == NewInner)
11974ba319b5SDimitry Andric NewOuter->addBlockEntry(BB);
11984ba319b5SDimitry Andric
11994ba319b5SDimitry Andric // Now remove inner loop header and latch from the new inner loop and move
12004ba319b5SDimitry Andric // other BBs (the loop body) to the new inner loop.
12014ba319b5SDimitry Andric BasicBlock *OuterHeader = NewOuter->getHeader();
12024ba319b5SDimitry Andric BasicBlock *OuterLatch = NewOuter->getLoopLatch();
12034ba319b5SDimitry Andric for (BasicBlock *BB : OrigInnerBBs) {
12044ba319b5SDimitry Andric // Nothing will change for BBs in child loops.
12054ba319b5SDimitry Andric if (LI->getLoopFor(BB) != NewOuter)
12064ba319b5SDimitry Andric continue;
12074ba319b5SDimitry Andric // Remove the new outer loop header and latch from the new inner loop.
12084ba319b5SDimitry Andric if (BB == OuterHeader || BB == OuterLatch)
12094ba319b5SDimitry Andric NewInner->removeBlockFromLoop(BB);
12104ba319b5SDimitry Andric else
12114ba319b5SDimitry Andric LI->changeLoopFor(BB, NewInner);
1212ff0cc061SDimitry Andric }
1213ff0cc061SDimitry Andric
12144ba319b5SDimitry Andric // The preheader of the original outer loop becomes part of the new
12154ba319b5SDimitry Andric // outer loop.
12164ba319b5SDimitry Andric NewOuter->addBlockEntry(OrigOuterPreHeader);
12174ba319b5SDimitry Andric LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
1218*b5893f02SDimitry Andric
1219*b5893f02SDimitry Andric // Tell SE that we move the loops around.
1220*b5893f02SDimitry Andric SE->forgetLoop(NewOuter);
1221*b5893f02SDimitry Andric SE->forgetLoop(NewInner);
1222ff0cc061SDimitry Andric }
1223ff0cc061SDimitry Andric
transform()1224ff0cc061SDimitry Andric bool LoopInterchangeTransform::transform() {
1225ff0cc061SDimitry Andric bool Transformed = false;
1226ff0cc061SDimitry Andric Instruction *InnerIndexVar;
1227ff0cc061SDimitry Andric
12282cab237bSDimitry Andric if (InnerLoop->getSubLoops().empty()) {
1229ff0cc061SDimitry Andric BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
12304ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Calling Split Inner Loop\n");
1231ff0cc061SDimitry Andric PHINode *InductionPHI = getInductionVariable(InnerLoop, SE);
1232ff0cc061SDimitry Andric if (!InductionPHI) {
12334ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
1234ff0cc061SDimitry Andric return false;
1235ff0cc061SDimitry Andric }
1236ff0cc061SDimitry Andric
1237ff0cc061SDimitry Andric if (InductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1238ff0cc061SDimitry Andric InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(1));
1239ff0cc061SDimitry Andric else
1240ff0cc061SDimitry Andric InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(0));
1241ff0cc061SDimitry Andric
12424ba319b5SDimitry Andric // Ensure that InductionPHI is the first Phi node.
12432cab237bSDimitry Andric if (&InductionPHI->getParent()->front() != InductionPHI)
12442cab237bSDimitry Andric InductionPHI->moveBefore(&InductionPHI->getParent()->front());
12452cab237bSDimitry Andric
1246ff0cc061SDimitry Andric // Split at the place were the induction variable is
1247ff0cc061SDimitry Andric // incremented/decremented.
1248ff0cc061SDimitry Andric // TODO: This splitting logic may not work always. Fix this.
1249ff0cc061SDimitry Andric splitInnerLoopLatch(InnerIndexVar);
12504ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "splitInnerLoopLatch done\n");
1251ff0cc061SDimitry Andric
12527d523365SDimitry Andric // Splits the inner loops phi nodes out into a separate basic block.
12534ba319b5SDimitry Andric BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
12544ba319b5SDimitry Andric SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
12554ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
1256ff0cc061SDimitry Andric }
1257ff0cc061SDimitry Andric
1258ff0cc061SDimitry Andric Transformed |= adjustLoopLinks();
1259ff0cc061SDimitry Andric if (!Transformed) {
12604ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");
1261ff0cc061SDimitry Andric return false;
1262ff0cc061SDimitry Andric }
1263ff0cc061SDimitry Andric
1264ff0cc061SDimitry Andric return true;
1265ff0cc061SDimitry Andric }
1266ff0cc061SDimitry Andric
splitInnerLoopLatch(Instruction * Inc)1267ff0cc061SDimitry Andric void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) {
1268ff0cc061SDimitry Andric BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1269ff0cc061SDimitry Andric BasicBlock *InnerLoopLatchPred = InnerLoopLatch;
1270ff0cc061SDimitry Andric InnerLoopLatch = SplitBlock(InnerLoopLatchPred, Inc, DT, LI);
1271ff0cc061SDimitry Andric }
1272ff0cc061SDimitry Andric
1273ff0cc061SDimitry Andric /// \brief Move all instructions except the terminator from FromBB right before
1274ff0cc061SDimitry Andric /// InsertBefore
moveBBContents(BasicBlock * FromBB,Instruction * InsertBefore)1275ff0cc061SDimitry Andric static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1276ff0cc061SDimitry Andric auto &ToList = InsertBefore->getParent()->getInstList();
1277ff0cc061SDimitry Andric auto &FromList = FromBB->getInstList();
1278ff0cc061SDimitry Andric
12797d523365SDimitry Andric ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
12807d523365SDimitry Andric FromBB->getTerminator()->getIterator());
1281ff0cc061SDimitry Andric }
1282ff0cc061SDimitry Andric
updateIncomingBlock(BasicBlock * CurrBlock,BasicBlock * OldPred,BasicBlock * NewPred)1283*b5893f02SDimitry Andric static void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred,
1284ff0cc061SDimitry Andric BasicBlock *NewPred) {
12854ba319b5SDimitry Andric for (PHINode &PHI : CurrBlock->phis()) {
12864ba319b5SDimitry Andric unsigned Num = PHI.getNumIncomingValues();
1287ff0cc061SDimitry Andric for (unsigned i = 0; i < Num; ++i) {
12884ba319b5SDimitry Andric if (PHI.getIncomingBlock(i) == OldPred)
12894ba319b5SDimitry Andric PHI.setIncomingBlock(i, NewPred);
12904ba319b5SDimitry Andric }
12914ba319b5SDimitry Andric }
12924ba319b5SDimitry Andric }
12934ba319b5SDimitry Andric
12944ba319b5SDimitry Andric /// Update BI to jump to NewBB instead of OldBB. Records updates to
12954ba319b5SDimitry Andric /// the dominator tree in DTUpdates, if DT should be preserved.
updateSuccessor(BranchInst * BI,BasicBlock * OldBB,BasicBlock * NewBB,std::vector<DominatorTree::UpdateType> & DTUpdates)12964ba319b5SDimitry Andric static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
12974ba319b5SDimitry Andric BasicBlock *NewBB,
12984ba319b5SDimitry Andric std::vector<DominatorTree::UpdateType> &DTUpdates) {
1299*b5893f02SDimitry Andric assert(llvm::count_if(successors(BI),
13004ba319b5SDimitry Andric [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 &&
13014ba319b5SDimitry Andric "BI must jump to OldBB at most once.");
13024ba319b5SDimitry Andric for (unsigned i = 0, e = BI->getNumSuccessors(); i < e; ++i) {
13034ba319b5SDimitry Andric if (BI->getSuccessor(i) == OldBB) {
13044ba319b5SDimitry Andric BI->setSuccessor(i, NewBB);
13054ba319b5SDimitry Andric
13064ba319b5SDimitry Andric DTUpdates.push_back(
13074ba319b5SDimitry Andric {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
13084ba319b5SDimitry Andric DTUpdates.push_back(
13094ba319b5SDimitry Andric {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
13104ba319b5SDimitry Andric break;
1311ff0cc061SDimitry Andric }
1312ff0cc061SDimitry Andric }
1313ff0cc061SDimitry Andric }
1314ff0cc061SDimitry Andric
1315*b5893f02SDimitry Andric // Move Lcssa PHIs to the right place.
moveLCSSAPhis(BasicBlock * InnerExit,BasicBlock * InnerLatch,BasicBlock * OuterLatch)1316*b5893f02SDimitry Andric static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerLatch,
1317*b5893f02SDimitry Andric BasicBlock *OuterLatch) {
1318*b5893f02SDimitry Andric SmallVector<PHINode *, 8> LcssaInnerExit;
1319*b5893f02SDimitry Andric for (PHINode &P : InnerExit->phis())
1320*b5893f02SDimitry Andric LcssaInnerExit.push_back(&P);
1321*b5893f02SDimitry Andric
1322*b5893f02SDimitry Andric SmallVector<PHINode *, 8> LcssaInnerLatch;
1323*b5893f02SDimitry Andric for (PHINode &P : InnerLatch->phis())
1324*b5893f02SDimitry Andric LcssaInnerLatch.push_back(&P);
1325*b5893f02SDimitry Andric
1326*b5893f02SDimitry Andric // Lcssa PHIs for values used outside the inner loop are in InnerExit.
1327*b5893f02SDimitry Andric // If a PHI node has users outside of InnerExit, it has a use outside the
1328*b5893f02SDimitry Andric // interchanged loop and we have to preserve it. We move these to
1329*b5893f02SDimitry Andric // InnerLatch, which will become the new exit block for the innermost
1330*b5893f02SDimitry Andric // loop after interchanging. For PHIs only used in InnerExit, we can just
1331*b5893f02SDimitry Andric // replace them with the incoming value.
1332*b5893f02SDimitry Andric for (PHINode *P : LcssaInnerExit) {
1333*b5893f02SDimitry Andric bool hasUsersOutside = false;
1334*b5893f02SDimitry Andric for (auto UI = P->use_begin(), E = P->use_end(); UI != E;) {
1335*b5893f02SDimitry Andric Use &U = *UI;
1336*b5893f02SDimitry Andric ++UI;
1337*b5893f02SDimitry Andric auto *Usr = cast<Instruction>(U.getUser());
1338*b5893f02SDimitry Andric if (Usr->getParent() != InnerExit) {
1339*b5893f02SDimitry Andric hasUsersOutside = true;
1340*b5893f02SDimitry Andric continue;
1341*b5893f02SDimitry Andric }
1342*b5893f02SDimitry Andric U.set(P->getIncomingValueForBlock(InnerLatch));
1343*b5893f02SDimitry Andric }
1344*b5893f02SDimitry Andric if (hasUsersOutside)
1345*b5893f02SDimitry Andric P->moveBefore(InnerLatch->getFirstNonPHI());
1346*b5893f02SDimitry Andric else
1347*b5893f02SDimitry Andric P->eraseFromParent();
1348*b5893f02SDimitry Andric }
1349*b5893f02SDimitry Andric
1350*b5893f02SDimitry Andric // If the inner loop latch contains LCSSA PHIs, those come from a child loop
1351*b5893f02SDimitry Andric // and we have to move them to the new inner latch.
1352*b5893f02SDimitry Andric for (PHINode *P : LcssaInnerLatch)
1353*b5893f02SDimitry Andric P->moveBefore(InnerExit->getFirstNonPHI());
1354*b5893f02SDimitry Andric
1355*b5893f02SDimitry Andric // Now adjust the incoming blocks for the LCSSA PHIs.
1356*b5893f02SDimitry Andric // For PHIs moved from Inner's exit block, we need to replace Inner's latch
1357*b5893f02SDimitry Andric // with the new latch.
1358*b5893f02SDimitry Andric updateIncomingBlock(InnerLatch, InnerLatch, OuterLatch);
1359*b5893f02SDimitry Andric }
1360*b5893f02SDimitry Andric
adjustLoopBranches()1361ff0cc061SDimitry Andric bool LoopInterchangeTransform::adjustLoopBranches() {
13624ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
13634ba319b5SDimitry Andric std::vector<DominatorTree::UpdateType> DTUpdates;
13644ba319b5SDimitry Andric
1365*b5893f02SDimitry Andric BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1366*b5893f02SDimitry Andric BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1367*b5893f02SDimitry Andric
1368*b5893f02SDimitry Andric assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
1369*b5893f02SDimitry Andric InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&
1370*b5893f02SDimitry Andric InnerLoopPreHeader && "Guaranteed by loop-simplify form");
1371*b5893f02SDimitry Andric // Ensure that both preheaders do not contain PHI nodes and have single
1372*b5893f02SDimitry Andric // predecessors. This allows us to move them easily. We use
1373*b5893f02SDimitry Andric // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1374*b5893f02SDimitry Andric // preheaders do not satisfy those conditions.
1375*b5893f02SDimitry Andric if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1376*b5893f02SDimitry Andric !OuterLoopPreHeader->getUniquePredecessor())
1377*b5893f02SDimitry Andric OuterLoopPreHeader = InsertPreheaderForLoop(OuterLoop, DT, LI, true);
1378*b5893f02SDimitry Andric if (InnerLoopPreHeader == OuterLoop->getHeader())
1379*b5893f02SDimitry Andric InnerLoopPreHeader = InsertPreheaderForLoop(InnerLoop, DT, LI, true);
1380*b5893f02SDimitry Andric
1381ff0cc061SDimitry Andric // Adjust the loop preheader
1382ff0cc061SDimitry Andric BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1383ff0cc061SDimitry Andric BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1384ff0cc061SDimitry Andric BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1385ff0cc061SDimitry Andric BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1386ff0cc061SDimitry Andric BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1387ff0cc061SDimitry Andric BasicBlock *InnerLoopLatchPredecessor =
1388ff0cc061SDimitry Andric InnerLoopLatch->getUniquePredecessor();
1389ff0cc061SDimitry Andric BasicBlock *InnerLoopLatchSuccessor;
1390ff0cc061SDimitry Andric BasicBlock *OuterLoopLatchSuccessor;
1391ff0cc061SDimitry Andric
1392ff0cc061SDimitry Andric BranchInst *OuterLoopLatchBI =
1393ff0cc061SDimitry Andric dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1394ff0cc061SDimitry Andric BranchInst *InnerLoopLatchBI =
1395ff0cc061SDimitry Andric dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1396ff0cc061SDimitry Andric BranchInst *OuterLoopHeaderBI =
1397ff0cc061SDimitry Andric dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1398ff0cc061SDimitry Andric BranchInst *InnerLoopHeaderBI =
1399ff0cc061SDimitry Andric dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1400ff0cc061SDimitry Andric
1401ff0cc061SDimitry Andric if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1402ff0cc061SDimitry Andric !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1403ff0cc061SDimitry Andric !InnerLoopHeaderBI)
1404ff0cc061SDimitry Andric return false;
1405ff0cc061SDimitry Andric
1406ff0cc061SDimitry Andric BranchInst *InnerLoopLatchPredecessorBI =
1407ff0cc061SDimitry Andric dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1408ff0cc061SDimitry Andric BranchInst *OuterLoopPredecessorBI =
1409ff0cc061SDimitry Andric dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1410ff0cc061SDimitry Andric
1411ff0cc061SDimitry Andric if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1412ff0cc061SDimitry Andric return false;
14137d523365SDimitry Andric BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
14147d523365SDimitry Andric if (!InnerLoopHeaderSuccessor)
1415ff0cc061SDimitry Andric return false;
1416ff0cc061SDimitry Andric
1417ff0cc061SDimitry Andric // Adjust Loop Preheader and headers
14184ba319b5SDimitry Andric updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
14194ba319b5SDimitry Andric InnerLoopPreHeader, DTUpdates);
14204ba319b5SDimitry Andric updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates);
14214ba319b5SDimitry Andric updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
14224ba319b5SDimitry Andric InnerLoopHeaderSuccessor, DTUpdates);
1423ff0cc061SDimitry Andric
1424ff0cc061SDimitry Andric // Adjust reduction PHI's now that the incoming block has changed.
14257d523365SDimitry Andric updateIncomingBlock(InnerLoopHeaderSuccessor, InnerLoopHeader,
1426ff0cc061SDimitry Andric OuterLoopHeader);
1427ff0cc061SDimitry Andric
14284ba319b5SDimitry Andric updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
14294ba319b5SDimitry Andric OuterLoopPreHeader, DTUpdates);
1430ff0cc061SDimitry Andric
1431ff0cc061SDimitry Andric // -------------Adjust loop latches-----------
1432ff0cc061SDimitry Andric if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
1433ff0cc061SDimitry Andric InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
1434ff0cc061SDimitry Andric else
1435ff0cc061SDimitry Andric InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
1436ff0cc061SDimitry Andric
14374ba319b5SDimitry Andric updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
14384ba319b5SDimitry Andric InnerLoopLatchSuccessor, DTUpdates);
1439ff0cc061SDimitry Andric
1440ff0cc061SDimitry Andric
1441ff0cc061SDimitry Andric if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
1442ff0cc061SDimitry Andric OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
1443ff0cc061SDimitry Andric else
1444ff0cc061SDimitry Andric OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
1445ff0cc061SDimitry Andric
14464ba319b5SDimitry Andric updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
14474ba319b5SDimitry Andric OuterLoopLatchSuccessor, DTUpdates);
14484ba319b5SDimitry Andric updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
14494ba319b5SDimitry Andric DTUpdates);
1450ff0cc061SDimitry Andric
14514ba319b5SDimitry Andric DT->applyUpdates(DTUpdates);
14524ba319b5SDimitry Andric restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
14534ba319b5SDimitry Andric OuterLoopPreHeader);
14544ba319b5SDimitry Andric
1455*b5893f02SDimitry Andric moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopLatch, OuterLoopLatch);
1456*b5893f02SDimitry Andric // For PHIs in the exit block of the outer loop, outer's latch has been
1457*b5893f02SDimitry Andric // replaced by Inners'.
1458*b5893f02SDimitry Andric updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch);
1459*b5893f02SDimitry Andric
14604ba319b5SDimitry Andric // Now update the reduction PHIs in the inner and outer loop headers.
14614ba319b5SDimitry Andric SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
14624ba319b5SDimitry Andric for (PHINode &PHI : drop_begin(InnerLoopHeader->phis(), 1))
14634ba319b5SDimitry Andric InnerLoopPHIs.push_back(cast<PHINode>(&PHI));
14644ba319b5SDimitry Andric for (PHINode &PHI : drop_begin(OuterLoopHeader->phis(), 1))
14654ba319b5SDimitry Andric OuterLoopPHIs.push_back(cast<PHINode>(&PHI));
14664ba319b5SDimitry Andric
1467*b5893f02SDimitry Andric auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1468*b5893f02SDimitry Andric (void)OuterInnerReductions;
14694ba319b5SDimitry Andric
1470*b5893f02SDimitry Andric // Now move the remaining reduction PHIs from outer to inner loop header and
1471*b5893f02SDimitry Andric // vice versa. The PHI nodes must be part of a reduction across the inner and
1472*b5893f02SDimitry Andric // outer loop and all the remains to do is and updating the incoming blocks.
1473*b5893f02SDimitry Andric for (PHINode *PHI : OuterLoopPHIs) {
1474*b5893f02SDimitry Andric PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
1475*b5893f02SDimitry Andric assert(OuterInnerReductions.find(PHI) != OuterInnerReductions.end() &&
1476*b5893f02SDimitry Andric "Expected a reduction PHI node");
1477*b5893f02SDimitry Andric }
14784ba319b5SDimitry Andric for (PHINode *PHI : InnerLoopPHIs) {
14794ba319b5SDimitry Andric PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
1480*b5893f02SDimitry Andric assert(OuterInnerReductions.find(PHI) != OuterInnerReductions.end() &&
1481*b5893f02SDimitry Andric "Expected a reduction PHI node");
14824ba319b5SDimitry Andric }
14834ba319b5SDimitry Andric
14844ba319b5SDimitry Andric // Update the incoming blocks for moved PHI nodes.
14854ba319b5SDimitry Andric updateIncomingBlock(OuterLoopHeader, InnerLoopPreHeader, OuterLoopPreHeader);
14864ba319b5SDimitry Andric updateIncomingBlock(OuterLoopHeader, InnerLoopLatch, OuterLoopLatch);
14874ba319b5SDimitry Andric updateIncomingBlock(InnerLoopHeader, OuterLoopPreHeader, InnerLoopPreHeader);
14884ba319b5SDimitry Andric updateIncomingBlock(InnerLoopHeader, OuterLoopLatch, InnerLoopLatch);
1489ff0cc061SDimitry Andric
1490ff0cc061SDimitry Andric return true;
1491ff0cc061SDimitry Andric }
1492ff0cc061SDimitry Andric
adjustLoopPreheaders()14932cab237bSDimitry Andric void LoopInterchangeTransform::adjustLoopPreheaders() {
1494ff0cc061SDimitry Andric // We have interchanged the preheaders so we need to interchange the data in
1495ff0cc061SDimitry Andric // the preheader as well.
1496ff0cc061SDimitry Andric // This is because the content of inner preheader was previously executed
1497ff0cc061SDimitry Andric // inside the outer loop.
1498ff0cc061SDimitry Andric BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1499ff0cc061SDimitry Andric BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1500ff0cc061SDimitry Andric BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1501ff0cc061SDimitry Andric BranchInst *InnerTermBI =
1502ff0cc061SDimitry Andric cast<BranchInst>(InnerLoopPreHeader->getTerminator());
1503ff0cc061SDimitry Andric
1504ff0cc061SDimitry Andric // These instructions should now be executed inside the loop.
1505ff0cc061SDimitry Andric // Move instruction into a new block after outer header.
1506ff0cc061SDimitry Andric moveBBContents(InnerLoopPreHeader, OuterLoopHeader->getTerminator());
1507ff0cc061SDimitry Andric // These instructions were not executed previously in the loop so move them to
1508ff0cc061SDimitry Andric // the older inner loop preheader.
1509ff0cc061SDimitry Andric moveBBContents(OuterLoopPreHeader, InnerTermBI);
1510ff0cc061SDimitry Andric }
1511ff0cc061SDimitry Andric
adjustLoopLinks()1512ff0cc061SDimitry Andric bool LoopInterchangeTransform::adjustLoopLinks() {
1513ff0cc061SDimitry Andric // Adjust all branches in the inner and outer loop.
1514ff0cc061SDimitry Andric bool Changed = adjustLoopBranches();
1515ff0cc061SDimitry Andric if (Changed)
1516ff0cc061SDimitry Andric adjustLoopPreheaders();
1517ff0cc061SDimitry Andric return Changed;
1518ff0cc061SDimitry Andric }
1519ff0cc061SDimitry Andric
1520ff0cc061SDimitry Andric char LoopInterchange::ID = 0;
15212cab237bSDimitry Andric
1522ff0cc061SDimitry Andric INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange",
1523ff0cc061SDimitry Andric "Interchanges loops for cache reuse", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)1524*b5893f02SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopPass)
15253ca95b02SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
1526b40b48b8SDimitry Andric INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
1527ff0cc061SDimitry Andric
1528ff0cc061SDimitry Andric INITIALIZE_PASS_END(LoopInterchange, "loop-interchange",
1529ff0cc061SDimitry Andric "Interchanges loops for cache reuse", false, false)
1530ff0cc061SDimitry Andric
1531ff0cc061SDimitry Andric Pass *llvm::createLoopInterchangePass() { return new LoopInterchange(); }
1532