1 //===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This Pass handles loop interchange transform.
10 // This pass interchanges loops to provide a more cache-friendly memory access
11 // patterns.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/Analysis/DependenceAnalysis.h"
20 #include "llvm/Analysis/LoopInfo.h"
21 #include "llvm/Analysis/LoopPass.h"
22 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
23 #include "llvm/Analysis/ScalarEvolution.h"
24 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
25 #include "llvm/IR/BasicBlock.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/DiagnosticInfo.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/InstrTypes.h"
32 #include "llvm/IR/Instruction.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/Type.h"
35 #include "llvm/IR/User.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/InitializePasses.h"
38 #include "llvm/Pass.h"
39 #include "llvm/Support/Casting.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include "llvm/Transforms/Scalar.h"
45 #include "llvm/Transforms/Utils.h"
46 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
47 #include "llvm/Transforms/Utils/LoopUtils.h"
48 #include <cassert>
49 #include <utility>
50 #include <vector>
51 
52 using namespace llvm;
53 
54 #define DEBUG_TYPE "loop-interchange"
55 
56 STATISTIC(LoopsInterchanged, "Number of loops interchanged");
57 
58 static cl::opt<int> LoopInterchangeCostThreshold(
59     "loop-interchange-threshold", cl::init(0), cl::Hidden,
60     cl::desc("Interchange if you gain more than this number"));
61 
62 namespace {
63 
64 using LoopVector = SmallVector<Loop *, 8>;
65 
66 // TODO: Check if we can use a sparse matrix here.
67 using CharMatrix = std::vector<std::vector<char>>;
68 
69 } // end anonymous namespace
70 
71 // Maximum number of dependencies that can be handled in the dependency matrix.
72 static const unsigned MaxMemInstrCount = 100;
73 
74 // Maximum loop depth supported.
75 static const unsigned MaxLoopNestDepth = 10;
76 
77 #ifdef DUMP_DEP_MATRICIES
78 static void printDepMatrix(CharMatrix &DepMatrix) {
79   for (auto &Row : DepMatrix) {
80     for (auto D : Row)
81       LLVM_DEBUG(dbgs() << D << " ");
82     LLVM_DEBUG(dbgs() << "\n");
83   }
84 }
85 #endif
86 
87 static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
88                                      Loop *L, DependenceInfo *DI) {
89   using ValueVector = SmallVector<Value *, 16>;
90 
91   ValueVector MemInstr;
92 
93   // For each block.
94   for (BasicBlock *BB : L->blocks()) {
95     // Scan the BB and collect legal loads and stores.
96     for (Instruction &I : *BB) {
97       if (!isa<Instruction>(I))
98         return false;
99       if (auto *Ld = dyn_cast<LoadInst>(&I)) {
100         if (!Ld->isSimple())
101           return false;
102         MemInstr.push_back(&I);
103       } else if (auto *St = dyn_cast<StoreInst>(&I)) {
104         if (!St->isSimple())
105           return false;
106         MemInstr.push_back(&I);
107       }
108     }
109   }
110 
111   LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()
112                     << " Loads and Stores to analyze\n");
113 
114   ValueVector::iterator I, IE, J, JE;
115 
116   for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
117     for (J = I, JE = MemInstr.end(); J != JE; ++J) {
118       std::vector<char> Dep;
119       Instruction *Src = cast<Instruction>(*I);
120       Instruction *Dst = cast<Instruction>(*J);
121       if (Src == Dst)
122         continue;
123       // Ignore Input dependencies.
124       if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
125         continue;
126       // Track Output, Flow, and Anti dependencies.
127       if (auto D = DI->depends(Src, Dst, true)) {
128         assert(D->isOrdered() && "Expected an output, flow or anti dep.");
129         LLVM_DEBUG(StringRef DepType =
130                        D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
131                    dbgs() << "Found " << DepType
132                           << " dependency between Src and Dst\n"
133                           << " Src:" << *Src << "\n Dst:" << *Dst << '\n');
134         unsigned Levels = D->getLevels();
135         char Direction;
136         for (unsigned II = 1; II <= Levels; ++II) {
137           const SCEV *Distance = D->getDistance(II);
138           const SCEVConstant *SCEVConst =
139               dyn_cast_or_null<SCEVConstant>(Distance);
140           if (SCEVConst) {
141             const ConstantInt *CI = SCEVConst->getValue();
142             if (CI->isNegative())
143               Direction = '<';
144             else if (CI->isZero())
145               Direction = '=';
146             else
147               Direction = '>';
148             Dep.push_back(Direction);
149           } else if (D->isScalar(II)) {
150             Direction = 'S';
151             Dep.push_back(Direction);
152           } else {
153             unsigned Dir = D->getDirection(II);
154             if (Dir == Dependence::DVEntry::LT ||
155                 Dir == Dependence::DVEntry::LE)
156               Direction = '<';
157             else if (Dir == Dependence::DVEntry::GT ||
158                      Dir == Dependence::DVEntry::GE)
159               Direction = '>';
160             else if (Dir == Dependence::DVEntry::EQ)
161               Direction = '=';
162             else
163               Direction = '*';
164             Dep.push_back(Direction);
165           }
166         }
167         while (Dep.size() != Level) {
168           Dep.push_back('I');
169         }
170 
171         DepMatrix.push_back(Dep);
172         if (DepMatrix.size() > MaxMemInstrCount) {
173           LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
174                             << " dependencies inside loop\n");
175           return false;
176         }
177       }
178     }
179   }
180 
181   return true;
182 }
183 
184 // A loop is moved from index 'from' to an index 'to'. Update the Dependence
185 // matrix by exchanging the two columns.
186 static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
187                                     unsigned ToIndx) {
188   unsigned numRows = DepMatrix.size();
189   for (unsigned i = 0; i < numRows; ++i) {
190     char TmpVal = DepMatrix[i][ToIndx];
191     DepMatrix[i][ToIndx] = DepMatrix[i][FromIndx];
192     DepMatrix[i][FromIndx] = TmpVal;
193   }
194 }
195 
196 // Checks if outermost non '=','S'or'I' dependence in the dependence matrix is
197 // '>'
198 static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row,
199                                    unsigned Column) {
200   for (unsigned i = 0; i <= Column; ++i) {
201     if (DepMatrix[Row][i] == '<')
202       return false;
203     if (DepMatrix[Row][i] == '>')
204       return true;
205   }
206   // All dependencies were '=','S' or 'I'
207   return false;
208 }
209 
210 // Checks if no dependence exist in the dependency matrix in Row before Column.
211 static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
212                                  unsigned Column) {
213   for (unsigned i = 0; i < Column; ++i) {
214     if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' &&
215         DepMatrix[Row][i] != 'I')
216       return false;
217   }
218   return true;
219 }
220 
221 static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
222                                 unsigned OuterLoopId, char InnerDep,
223                                 char OuterDep) {
224   if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId))
225     return false;
226 
227   if (InnerDep == OuterDep)
228     return true;
229 
230   // It is legal to interchange if and only if after interchange no row has a
231   // '>' direction as the leftmost non-'='.
232 
233   if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I')
234     return true;
235 
236   if (InnerDep == '<')
237     return true;
238 
239   if (InnerDep == '>') {
240     // If OuterLoopId represents outermost loop then interchanging will make the
241     // 1st dependency as '>'
242     if (OuterLoopId == 0)
243       return false;
244 
245     // If all dependencies before OuterloopId are '=','S'or 'I'. Then
246     // interchanging will result in this row having an outermost non '='
247     // dependency of '>'
248     if (!containsNoDependence(DepMatrix, Row, OuterLoopId))
249       return true;
250   }
251 
252   return false;
253 }
254 
255 // Checks if it is legal to interchange 2 loops.
256 // [Theorem] A permutation of the loops in a perfect nest is legal if and only
257 // if the direction matrix, after the same permutation is applied to its
258 // columns, has no ">" direction as the leftmost non-"=" direction in any row.
259 static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
260                                       unsigned InnerLoopId,
261                                       unsigned OuterLoopId) {
262   unsigned NumRows = DepMatrix.size();
263   // For each row check if it is valid to interchange.
264   for (unsigned Row = 0; Row < NumRows; ++Row) {
265     char InnerDep = DepMatrix[Row][InnerLoopId];
266     char OuterDep = DepMatrix[Row][OuterLoopId];
267     if (InnerDep == '*' || OuterDep == '*')
268       return false;
269     if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep))
270       return false;
271   }
272   return true;
273 }
274 
275 static LoopVector populateWorklist(Loop &L) {
276   LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
277                     << L.getHeader()->getParent()->getName() << " Loop: %"
278                     << L.getHeader()->getName() << '\n');
279   LoopVector LoopList;
280   Loop *CurrentLoop = &L;
281   const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
282   while (!Vec->empty()) {
283     // The current loop has multiple subloops in it hence it is not tightly
284     // nested.
285     // Discard all loops above it added into Worklist.
286     if (Vec->size() != 1)
287       return {};
288 
289     LoopList.push_back(CurrentLoop);
290     CurrentLoop = Vec->front();
291     Vec = &CurrentLoop->getSubLoops();
292   }
293   LoopList.push_back(CurrentLoop);
294   return LoopList;
295 }
296 
297 static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) {
298   PHINode *InnerIndexVar = L->getCanonicalInductionVariable();
299   if (InnerIndexVar)
300     return InnerIndexVar;
301   if (L->getLoopLatch() == nullptr || L->getLoopPredecessor() == nullptr)
302     return nullptr;
303   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
304     PHINode *PhiVar = cast<PHINode>(I);
305     Type *PhiTy = PhiVar->getType();
306     if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
307         !PhiTy->isPointerTy())
308       return nullptr;
309     const SCEVAddRecExpr *AddRec =
310         dyn_cast<SCEVAddRecExpr>(SE->getSCEV(PhiVar));
311     if (!AddRec || !AddRec->isAffine())
312       continue;
313     const SCEV *Step = AddRec->getStepRecurrence(*SE);
314     if (!isa<SCEVConstant>(Step))
315       continue;
316     // Found the induction variable.
317     // FIXME: Handle loops with more than one induction variable. Note that,
318     // currently, legality makes sure we have only one induction variable.
319     return PhiVar;
320   }
321   return nullptr;
322 }
323 
324 namespace {
325 
326 /// LoopInterchangeLegality checks if it is legal to interchange the loop.
327 class LoopInterchangeLegality {
328 public:
329   LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
330                           OptimizationRemarkEmitter *ORE)
331       : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
332 
333   /// Check if the loops can be interchanged.
334   bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
335                            CharMatrix &DepMatrix);
336 
337   /// Check if the loop structure is understood. We do not handle triangular
338   /// loops for now.
339   bool isLoopStructureUnderstood(PHINode *InnerInductionVar);
340 
341   bool currentLimitations();
342 
343   const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
344     return OuterInnerReductions;
345   }
346 
347 private:
348   bool tightlyNested(Loop *Outer, Loop *Inner);
349   bool containsUnsafeInstructions(BasicBlock *BB);
350 
351   /// Discover induction and reduction PHIs in the header of \p L. Induction
352   /// PHIs are added to \p Inductions, reductions are added to
353   /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
354   /// to be passed as \p InnerLoop.
355   bool findInductionAndReductions(Loop *L,
356                                   SmallVector<PHINode *, 8> &Inductions,
357                                   Loop *InnerLoop);
358 
359   Loop *OuterLoop;
360   Loop *InnerLoop;
361 
362   ScalarEvolution *SE;
363 
364   /// Interface to emit optimization remarks.
365   OptimizationRemarkEmitter *ORE;
366 
367   /// Set of reduction PHIs taking part of a reduction across the inner and
368   /// outer loop.
369   SmallPtrSet<PHINode *, 4> OuterInnerReductions;
370 };
371 
372 /// LoopInterchangeProfitability checks if it is profitable to interchange the
373 /// loop.
374 class LoopInterchangeProfitability {
375 public:
376   LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
377                                OptimizationRemarkEmitter *ORE)
378       : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
379 
380   /// Check if the loop interchange is profitable.
381   bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
382                     CharMatrix &DepMatrix);
383 
384 private:
385   int getInstrOrderCost();
386 
387   Loop *OuterLoop;
388   Loop *InnerLoop;
389 
390   /// Scev analysis.
391   ScalarEvolution *SE;
392 
393   /// Interface to emit optimization remarks.
394   OptimizationRemarkEmitter *ORE;
395 };
396 
397 /// LoopInterchangeTransform interchanges the loop.
398 class LoopInterchangeTransform {
399 public:
400   LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
401                            LoopInfo *LI, DominatorTree *DT,
402                            BasicBlock *LoopNestExit,
403                            const LoopInterchangeLegality &LIL)
404       : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
405         LoopExit(LoopNestExit), LIL(LIL) {}
406 
407   /// Interchange OuterLoop and InnerLoop.
408   bool transform();
409   void restructureLoops(Loop *NewInner, Loop *NewOuter,
410                         BasicBlock *OrigInnerPreHeader,
411                         BasicBlock *OrigOuterPreHeader);
412   void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
413 
414 private:
415   bool adjustLoopLinks();
416   bool adjustLoopBranches();
417 
418   Loop *OuterLoop;
419   Loop *InnerLoop;
420 
421   /// Scev analysis.
422   ScalarEvolution *SE;
423 
424   LoopInfo *LI;
425   DominatorTree *DT;
426   BasicBlock *LoopExit;
427 
428   const LoopInterchangeLegality &LIL;
429 };
430 
431 // Main LoopInterchange Pass.
432 struct LoopInterchange : public LoopPass {
433   static char ID;
434   ScalarEvolution *SE = nullptr;
435   LoopInfo *LI = nullptr;
436   DependenceInfo *DI = nullptr;
437   DominatorTree *DT = nullptr;
438 
439   /// Interface to emit optimization remarks.
440   OptimizationRemarkEmitter *ORE;
441 
442   LoopInterchange() : LoopPass(ID) {
443     initializeLoopInterchangePass(*PassRegistry::getPassRegistry());
444   }
445 
446   void getAnalysisUsage(AnalysisUsage &AU) const override {
447     AU.addRequired<DependenceAnalysisWrapperPass>();
448     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
449 
450     getLoopAnalysisUsage(AU);
451   }
452 
453   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
454     if (skipLoop(L) || L->getParentLoop())
455       return false;
456 
457     SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
458     LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
459     DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
460     DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
461     ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
462 
463     return processLoopList(populateWorklist(*L));
464   }
465 
466   bool isComputableLoopNest(LoopVector LoopList) {
467     for (Loop *L : LoopList) {
468       const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
469       if (ExitCountOuter == SE->getCouldNotCompute()) {
470         LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
471         return false;
472       }
473       if (L->getNumBackEdges() != 1) {
474         LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
475         return false;
476       }
477       if (!L->getExitingBlock()) {
478         LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
479         return false;
480       }
481     }
482     return true;
483   }
484 
485   unsigned selectLoopForInterchange(const LoopVector &LoopList) {
486     // TODO: Add a better heuristic to select the loop to be interchanged based
487     // on the dependence matrix. Currently we select the innermost loop.
488     return LoopList.size() - 1;
489   }
490 
491   bool processLoopList(LoopVector LoopList) {
492     bool Changed = false;
493     unsigned LoopNestDepth = LoopList.size();
494     if (LoopNestDepth < 2) {
495       LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
496       return false;
497     }
498     if (LoopNestDepth > MaxLoopNestDepth) {
499       LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "
500                         << MaxLoopNestDepth << "\n");
501       return false;
502     }
503     if (!isComputableLoopNest(LoopList)) {
504       LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
505       return false;
506     }
507 
508     LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth
509                       << "\n");
510 
511     CharMatrix DependencyMatrix;
512     Loop *OuterMostLoop = *(LoopList.begin());
513     if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
514                                   OuterMostLoop, DI)) {
515       LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
516       return false;
517     }
518 #ifdef DUMP_DEP_MATRICIES
519     LLVM_DEBUG(dbgs() << "Dependence before interchange\n");
520     printDepMatrix(DependencyMatrix);
521 #endif
522 
523     // Get the Outermost loop exit.
524     BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
525     if (!LoopNestExit) {
526       LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block");
527       return false;
528     }
529 
530     unsigned SelecLoopId = selectLoopForInterchange(LoopList);
531     // Move the selected loop outwards to the best possible position.
532     for (unsigned i = SelecLoopId; i > 0; i--) {
533       bool Interchanged =
534           processLoop(LoopList, i, i - 1, LoopNestExit, DependencyMatrix);
535       if (!Interchanged)
536         return Changed;
537       // Loops interchanged reflect the same in LoopList
538       std::swap(LoopList[i - 1], LoopList[i]);
539 
540       // Update the DependencyMatrix
541       interChangeDependencies(DependencyMatrix, i, i - 1);
542 #ifdef DUMP_DEP_MATRICIES
543       LLVM_DEBUG(dbgs() << "Dependence after interchange\n");
544       printDepMatrix(DependencyMatrix);
545 #endif
546       Changed |= Interchanged;
547     }
548     return Changed;
549   }
550 
551   bool processLoop(LoopVector LoopList, unsigned InnerLoopId,
552                    unsigned OuterLoopId, BasicBlock *LoopNestExit,
553                    std::vector<std::vector<char>> &DependencyMatrix) {
554     LLVM_DEBUG(dbgs() << "Processing Inner Loop Id = " << InnerLoopId
555                       << " and OuterLoopId = " << OuterLoopId << "\n");
556     Loop *InnerLoop = LoopList[InnerLoopId];
557     Loop *OuterLoop = LoopList[OuterLoopId];
558 
559     LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
560     if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
561       LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
562       return false;
563     }
564     LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
565     LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
566     if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
567       LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
568       return false;
569     }
570 
571     ORE->emit([&]() {
572       return OptimizationRemark(DEBUG_TYPE, "Interchanged",
573                                 InnerLoop->getStartLoc(),
574                                 InnerLoop->getHeader())
575              << "Loop interchanged with enclosing loop.";
576     });
577 
578     LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LoopNestExit,
579                                  LIL);
580     LIT.transform();
581     LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
582     LoopsInterchanged++;
583 
584     assert(InnerLoop->isLCSSAForm(*DT) &&
585            "Inner loop not left in LCSSA form after loop interchange!");
586     assert(OuterLoop->isLCSSAForm(*DT) &&
587            "Outer loop not left in LCSSA form after loop interchange!");
588 
589     return true;
590   }
591 };
592 
593 } // end anonymous namespace
594 
595 bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
596   return any_of(*BB, [](const Instruction &I) {
597     return I.mayHaveSideEffects() || I.mayReadFromMemory();
598   });
599 }
600 
601 bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
602   BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
603   BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
604   BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
605 
606   LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n");
607 
608   // A perfectly nested loop will not have any branch in between the outer and
609   // inner block i.e. outer header will branch to either inner preheader and
610   // outerloop latch.
611   BranchInst *OuterLoopHeaderBI =
612       dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
613   if (!OuterLoopHeaderBI)
614     return false;
615 
616   for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
617     if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
618         Succ != OuterLoopLatch)
619       return false;
620 
621   LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
622   // We do not have any basic block in between now make sure the outer header
623   // and outer loop latch doesn't contain any unsafe instructions.
624   if (containsUnsafeInstructions(OuterLoopHeader) ||
625       containsUnsafeInstructions(OuterLoopLatch))
626     return false;
627 
628   // Also make sure the inner loop preheader does not contain any unsafe
629   // instructions. Note that all instructions in the preheader will be moved to
630   // the outer loop header when interchanging.
631   if (InnerLoopPreHeader != OuterLoopHeader &&
632       containsUnsafeInstructions(InnerLoopPreHeader))
633     return false;
634 
635   LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
636   // We have a perfect loop nest.
637   return true;
638 }
639 
640 bool LoopInterchangeLegality::isLoopStructureUnderstood(
641     PHINode *InnerInduction) {
642   unsigned Num = InnerInduction->getNumOperands();
643   BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
644   for (unsigned i = 0; i < Num; ++i) {
645     Value *Val = InnerInduction->getOperand(i);
646     if (isa<Constant>(Val))
647       continue;
648     Instruction *I = dyn_cast<Instruction>(Val);
649     if (!I)
650       return false;
651     // TODO: Handle triangular loops.
652     // e.g. for(int i=0;i<N;i++)
653     //        for(int j=i;j<N;j++)
654     unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
655     if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
656             InnerLoopPreheader &&
657         !OuterLoop->isLoopInvariant(I)) {
658       return false;
659     }
660   }
661   return true;
662 }
663 
664 // If SV is a LCSSA PHI node with a single incoming value, return the incoming
665 // value.
666 static Value *followLCSSA(Value *SV) {
667   PHINode *PHI = dyn_cast<PHINode>(SV);
668   if (!PHI)
669     return SV;
670 
671   if (PHI->getNumIncomingValues() != 1)
672     return SV;
673   return followLCSSA(PHI->getIncomingValue(0));
674 }
675 
676 // Check V's users to see if it is involved in a reduction in L.
677 static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
678   for (Value *User : V->users()) {
679     if (PHINode *PHI = dyn_cast<PHINode>(User)) {
680       if (PHI->getNumIncomingValues() == 1)
681         continue;
682       RecurrenceDescriptor RD;
683       if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
684         return PHI;
685       return nullptr;
686     }
687   }
688 
689   return nullptr;
690 }
691 
692 bool LoopInterchangeLegality::findInductionAndReductions(
693     Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
694   if (!L->getLoopLatch() || !L->getLoopPredecessor())
695     return false;
696   for (PHINode &PHI : L->getHeader()->phis()) {
697     RecurrenceDescriptor RD;
698     InductionDescriptor ID;
699     if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
700       Inductions.push_back(&PHI);
701     else {
702       // PHIs in inner loops need to be part of a reduction in the outer loop,
703       // discovered when checking the PHIs of the outer loop earlier.
704       if (!InnerLoop) {
705         if (!OuterInnerReductions.count(&PHI)) {
706           LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "
707                                "across the outer loop.\n");
708           return false;
709         }
710       } else {
711         assert(PHI.getNumIncomingValues() == 2 &&
712                "Phis in loop header should have exactly 2 incoming values");
713         // Check if we have a PHI node in the outer loop that has a reduction
714         // result from the inner loop as an incoming value.
715         Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
716         PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
717         if (!InnerRedPhi ||
718             !llvm::any_of(InnerRedPhi->incoming_values(),
719                           [&PHI](Value *V) { return V == &PHI; })) {
720           LLVM_DEBUG(
721               dbgs()
722               << "Failed to recognize PHI as an induction or reduction.\n");
723           return false;
724         }
725         OuterInnerReductions.insert(&PHI);
726         OuterInnerReductions.insert(InnerRedPhi);
727       }
728     }
729   }
730   return true;
731 }
732 
733 // This function indicates the current limitations in the transform as a result
734 // of which we do not proceed.
735 bool LoopInterchangeLegality::currentLimitations() {
736   BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
737   BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
738 
739   // transform currently expects the loop latches to also be the exiting
740   // blocks.
741   if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
742       OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
743       !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
744       !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
745     LLVM_DEBUG(
746         dbgs() << "Loops where the latch is not the exiting block are not"
747                << " supported currently.\n");
748     ORE->emit([&]() {
749       return OptimizationRemarkMissed(DEBUG_TYPE, "ExitingNotLatch",
750                                       OuterLoop->getStartLoc(),
751                                       OuterLoop->getHeader())
752              << "Loops where the latch is not the exiting block cannot be"
753                 " interchange currently.";
754     });
755     return true;
756   }
757 
758   PHINode *InnerInductionVar;
759   SmallVector<PHINode *, 8> Inductions;
760   if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
761     LLVM_DEBUG(
762         dbgs() << "Only outer loops with induction or reduction PHI nodes "
763                << "are supported currently.\n");
764     ORE->emit([&]() {
765       return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
766                                       OuterLoop->getStartLoc(),
767                                       OuterLoop->getHeader())
768              << "Only outer loops with induction or reduction PHI nodes can be"
769                 " interchanged currently.";
770     });
771     return true;
772   }
773 
774   // TODO: Currently we handle only loops with 1 induction variable.
775   if (Inductions.size() != 1) {
776     LLVM_DEBUG(dbgs() << "Loops with more than 1 induction variables are not "
777                       << "supported currently.\n");
778     ORE->emit([&]() {
779       return OptimizationRemarkMissed(DEBUG_TYPE, "MultiIndutionOuter",
780                                       OuterLoop->getStartLoc(),
781                                       OuterLoop->getHeader())
782              << "Only outer loops with 1 induction variable can be "
783                 "interchanged currently.";
784     });
785     return true;
786   }
787 
788   Inductions.clear();
789   if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) {
790     LLVM_DEBUG(
791         dbgs() << "Only inner loops with induction or reduction PHI nodes "
792                << "are supported currently.\n");
793     ORE->emit([&]() {
794       return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
795                                       InnerLoop->getStartLoc(),
796                                       InnerLoop->getHeader())
797              << "Only inner loops with induction or reduction PHI nodes can be"
798                 " interchange currently.";
799     });
800     return true;
801   }
802 
803   // TODO: Currently we handle only loops with 1 induction variable.
804   if (Inductions.size() != 1) {
805     LLVM_DEBUG(
806         dbgs() << "We currently only support loops with 1 induction variable."
807                << "Failed to interchange due to current limitation\n");
808     ORE->emit([&]() {
809       return OptimizationRemarkMissed(DEBUG_TYPE, "MultiInductionInner",
810                                       InnerLoop->getStartLoc(),
811                                       InnerLoop->getHeader())
812              << "Only inner loops with 1 induction variable can be "
813                 "interchanged currently.";
814     });
815     return true;
816   }
817   InnerInductionVar = Inductions.pop_back_val();
818 
819   // TODO: Triangular loops are not handled for now.
820   if (!isLoopStructureUnderstood(InnerInductionVar)) {
821     LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n");
822     ORE->emit([&]() {
823       return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner",
824                                       InnerLoop->getStartLoc(),
825                                       InnerLoop->getHeader())
826              << "Inner loop structure not understood currently.";
827     });
828     return true;
829   }
830 
831   // TODO: Current limitation: Since we split the inner loop latch at the point
832   // were induction variable is incremented (induction.next); We cannot have
833   // more than 1 user of induction.next since it would result in broken code
834   // after split.
835   // e.g.
836   // for(i=0;i<N;i++) {
837   //    for(j = 0;j<M;j++) {
838   //      A[j+1][i+2] = A[j][i]+k;
839   //  }
840   // }
841   Instruction *InnerIndexVarInc = nullptr;
842   if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader)
843     InnerIndexVarInc =
844         dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(1));
845   else
846     InnerIndexVarInc =
847         dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0));
848 
849   if (!InnerIndexVarInc) {
850     LLVM_DEBUG(
851         dbgs() << "Did not find an instruction to increment the induction "
852                << "variable.\n");
853     ORE->emit([&]() {
854       return OptimizationRemarkMissed(DEBUG_TYPE, "NoIncrementInInner",
855                                       InnerLoop->getStartLoc(),
856                                       InnerLoop->getHeader())
857              << "The inner loop does not increment the induction variable.";
858     });
859     return true;
860   }
861 
862   // Since we split the inner loop latch on this induction variable. Make sure
863   // we do not have any instruction between the induction variable and branch
864   // instruction.
865 
866   bool FoundInduction = false;
867   for (const Instruction &I :
868        llvm::reverse(InnerLoopLatch->instructionsWithoutDebug())) {
869     if (isa<BranchInst>(I) || isa<CmpInst>(I) || isa<TruncInst>(I) ||
870         isa<ZExtInst>(I))
871       continue;
872 
873     // We found an instruction. If this is not induction variable then it is not
874     // safe to split this loop latch.
875     if (!I.isIdenticalTo(InnerIndexVarInc)) {
876       LLVM_DEBUG(dbgs() << "Found unsupported instructions between induction "
877                         << "variable increment and branch.\n");
878       ORE->emit([&]() {
879         return OptimizationRemarkMissed(
880                    DEBUG_TYPE, "UnsupportedInsBetweenInduction",
881                    InnerLoop->getStartLoc(), InnerLoop->getHeader())
882                << "Found unsupported instruction between induction variable "
883                   "increment and branch.";
884       });
885       return true;
886     }
887 
888     FoundInduction = true;
889     break;
890   }
891   // The loop latch ended and we didn't find the induction variable return as
892   // current limitation.
893   if (!FoundInduction) {
894     LLVM_DEBUG(dbgs() << "Did not find the induction variable.\n");
895     ORE->emit([&]() {
896       return OptimizationRemarkMissed(DEBUG_TYPE, "NoIndutionVariable",
897                                       InnerLoop->getStartLoc(),
898                                       InnerLoop->getHeader())
899              << "Did not find the induction variable.";
900     });
901     return true;
902   }
903   return false;
904 }
905 
906 // We currently only support LCSSA PHI nodes in the inner loop exit, if their
907 // users are either reduction PHIs or PHIs outside the outer loop (which means
908 // the we are only interested in the final value after the loop).
909 static bool
910 areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL,
911                               SmallPtrSetImpl<PHINode *> &Reductions) {
912   BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
913   for (PHINode &PHI : InnerExit->phis()) {
914     // Reduction lcssa phi will have only 1 incoming block that from loop latch.
915     if (PHI.getNumIncomingValues() > 1)
916       return false;
917     if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
918           PHINode *PN = dyn_cast<PHINode>(U);
919           return !PN ||
920                  (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
921         })) {
922       return false;
923     }
924   }
925   return true;
926 }
927 
928 // We currently support LCSSA PHI nodes in the outer loop exit, if their
929 // incoming values do not come from the outer loop latch or if the
930 // outer loop latch has a single predecessor. In that case, the value will
931 // be available if both the inner and outer loop conditions are true, which
932 // will still be true after interchanging. If we have multiple predecessor,
933 // that may not be the case, e.g. because the outer loop latch may be executed
934 // if the inner loop is not executed.
935 static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
936   BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
937   for (PHINode &PHI : LoopNestExit->phis()) {
938     //  FIXME: We currently are not able to detect floating point reductions
939     //         and have to use floating point PHIs as a proxy to prevent
940     //         interchanging in the presence of floating point reductions.
941     if (PHI.getType()->isFloatingPointTy())
942       return false;
943     for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
944      Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
945      if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
946        continue;
947 
948      // The incoming value is defined in the outer loop latch. Currently we
949      // only support that in case the outer loop latch has a single predecessor.
950      // This guarantees that the outer loop latch is executed if and only if
951      // the inner loop is executed (because tightlyNested() guarantees that the
952      // outer loop header only branches to the inner loop or the outer loop
953      // latch).
954      // FIXME: We could weaken this logic and allow multiple predecessors,
955      //        if the values are produced outside the loop latch. We would need
956      //        additional logic to update the PHI nodes in the exit block as
957      //        well.
958      if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
959        return false;
960     }
961   }
962   return true;
963 }
964 
965 bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
966                                                   unsigned OuterLoopId,
967                                                   CharMatrix &DepMatrix) {
968   if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
969     LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
970                       << " and OuterLoopId = " << OuterLoopId
971                       << " due to dependence\n");
972     ORE->emit([&]() {
973       return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence",
974                                       InnerLoop->getStartLoc(),
975                                       InnerLoop->getHeader())
976              << "Cannot interchange loops due to dependences.";
977     });
978     return false;
979   }
980   // Check if outer and inner loop contain legal instructions only.
981   for (auto *BB : OuterLoop->blocks())
982     for (Instruction &I : BB->instructionsWithoutDebug())
983       if (CallInst *CI = dyn_cast<CallInst>(&I)) {
984         // readnone functions do not prevent interchanging.
985         if (CI->doesNotReadMemory())
986           continue;
987         LLVM_DEBUG(
988             dbgs() << "Loops with call instructions cannot be interchanged "
989                    << "safely.");
990         ORE->emit([&]() {
991           return OptimizationRemarkMissed(DEBUG_TYPE, "CallInst",
992                                           CI->getDebugLoc(),
993                                           CI->getParent())
994                  << "Cannot interchange loops due to call instruction.";
995         });
996 
997         return false;
998       }
999 
1000   // TODO: The loops could not be interchanged due to current limitations in the
1001   // transform module.
1002   if (currentLimitations()) {
1003     LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n");
1004     return false;
1005   }
1006 
1007   // Check if the loops are tightly nested.
1008   if (!tightlyNested(OuterLoop, InnerLoop)) {
1009     LLVM_DEBUG(dbgs() << "Loops not tightly nested\n");
1010     ORE->emit([&]() {
1011       return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested",
1012                                       InnerLoop->getStartLoc(),
1013                                       InnerLoop->getHeader())
1014              << "Cannot interchange loops because they are not tightly "
1015                 "nested.";
1016     });
1017     return false;
1018   }
1019 
1020   if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
1021                                      OuterInnerReductions)) {
1022     LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n");
1023     ORE->emit([&]() {
1024       return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1025                                       InnerLoop->getStartLoc(),
1026                                       InnerLoop->getHeader())
1027              << "Found unsupported PHI node in loop exit.";
1028     });
1029     return false;
1030   }
1031 
1032   if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
1033     LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");
1034     ORE->emit([&]() {
1035       return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1036                                       OuterLoop->getStartLoc(),
1037                                       OuterLoop->getHeader())
1038              << "Found unsupported PHI node in loop exit.";
1039     });
1040     return false;
1041   }
1042 
1043   return true;
1044 }
1045 
1046 int LoopInterchangeProfitability::getInstrOrderCost() {
1047   unsigned GoodOrder, BadOrder;
1048   BadOrder = GoodOrder = 0;
1049   for (BasicBlock *BB : InnerLoop->blocks()) {
1050     for (Instruction &Ins : *BB) {
1051       if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
1052         unsigned NumOp = GEP->getNumOperands();
1053         bool FoundInnerInduction = false;
1054         bool FoundOuterInduction = false;
1055         for (unsigned i = 0; i < NumOp; ++i) {
1056           const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
1057           const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
1058           if (!AR)
1059             continue;
1060 
1061           // If we find the inner induction after an outer induction e.g.
1062           // for(int i=0;i<N;i++)
1063           //   for(int j=0;j<N;j++)
1064           //     A[i][j] = A[i-1][j-1]+k;
1065           // then it is a good order.
1066           if (AR->getLoop() == InnerLoop) {
1067             // We found an InnerLoop induction after OuterLoop induction. It is
1068             // a good order.
1069             FoundInnerInduction = true;
1070             if (FoundOuterInduction) {
1071               GoodOrder++;
1072               break;
1073             }
1074           }
1075           // If we find the outer induction after an inner induction e.g.
1076           // for(int i=0;i<N;i++)
1077           //   for(int j=0;j<N;j++)
1078           //     A[j][i] = A[j-1][i-1]+k;
1079           // then it is a bad order.
1080           if (AR->getLoop() == OuterLoop) {
1081             // We found an OuterLoop induction after InnerLoop induction. It is
1082             // a bad order.
1083             FoundOuterInduction = true;
1084             if (FoundInnerInduction) {
1085               BadOrder++;
1086               break;
1087             }
1088           }
1089         }
1090       }
1091     }
1092   }
1093   return GoodOrder - BadOrder;
1094 }
1095 
1096 static bool isProfitableForVectorization(unsigned InnerLoopId,
1097                                          unsigned OuterLoopId,
1098                                          CharMatrix &DepMatrix) {
1099   // TODO: Improve this heuristic to catch more cases.
1100   // If the inner loop is loop independent or doesn't carry any dependency it is
1101   // profitable to move this to outer position.
1102   for (auto &Row : DepMatrix) {
1103     if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I')
1104       return false;
1105     // TODO: We need to improve this heuristic.
1106     if (Row[OuterLoopId] != '=')
1107       return false;
1108   }
1109   // If outer loop has dependence and inner loop is loop independent then it is
1110   // profitable to interchange to enable parallelism.
1111   // If there are no dependences, interchanging will not improve anything.
1112   return !DepMatrix.empty();
1113 }
1114 
1115 bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
1116                                                 unsigned OuterLoopId,
1117                                                 CharMatrix &DepMatrix) {
1118   // TODO: Add better profitability checks.
1119   // e.g
1120   // 1) Construct dependency matrix and move the one with no loop carried dep
1121   //    inside to enable vectorization.
1122 
1123   // This is rough cost estimation algorithm. It counts the good and bad order
1124   // of induction variables in the instruction and allows reordering if number
1125   // of bad orders is more than good.
1126   int Cost = getInstrOrderCost();
1127   LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
1128   if (Cost < -LoopInterchangeCostThreshold)
1129     return true;
1130 
1131   // It is not profitable as per current cache profitability model. But check if
1132   // we can move this loop outside to improve parallelism.
1133   if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
1134     return true;
1135 
1136   ORE->emit([&]() {
1137     return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1138                                     InnerLoop->getStartLoc(),
1139                                     InnerLoop->getHeader())
1140            << "Interchanging loops is too costly (cost="
1141            << ore::NV("Cost", Cost) << ", threshold="
1142            << ore::NV("Threshold", LoopInterchangeCostThreshold)
1143            << ") and it does not improve parallelism.";
1144   });
1145   return false;
1146 }
1147 
1148 void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1149                                                Loop *InnerLoop) {
1150   for (Loop *L : *OuterLoop)
1151     if (L == InnerLoop) {
1152       OuterLoop->removeChildLoop(L);
1153       return;
1154     }
1155   llvm_unreachable("Couldn't find loop");
1156 }
1157 
1158 /// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1159 /// new inner and outer loop after interchanging: NewInner is the original
1160 /// outer loop and NewOuter is the original inner loop.
1161 ///
1162 /// Before interchanging, we have the following structure
1163 /// Outer preheader
1164 //  Outer header
1165 //    Inner preheader
1166 //    Inner header
1167 //      Inner body
1168 //      Inner latch
1169 //   outer bbs
1170 //   Outer latch
1171 //
1172 // After interchanging:
1173 // Inner preheader
1174 // Inner header
1175 //   Outer preheader
1176 //   Outer header
1177 //     Inner body
1178 //     outer bbs
1179 //     Outer latch
1180 //   Inner latch
1181 void LoopInterchangeTransform::restructureLoops(
1182     Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1183     BasicBlock *OrigOuterPreHeader) {
1184   Loop *OuterLoopParent = OuterLoop->getParentLoop();
1185   // The original inner loop preheader moves from the new inner loop to
1186   // the parent loop, if there is one.
1187   NewInner->removeBlockFromLoop(OrigInnerPreHeader);
1188   LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1189 
1190   // Switch the loop levels.
1191   if (OuterLoopParent) {
1192     // Remove the loop from its parent loop.
1193     removeChildLoop(OuterLoopParent, NewInner);
1194     removeChildLoop(NewInner, NewOuter);
1195     OuterLoopParent->addChildLoop(NewOuter);
1196   } else {
1197     removeChildLoop(NewInner, NewOuter);
1198     LI->changeTopLevelLoop(NewInner, NewOuter);
1199   }
1200   while (!NewOuter->empty())
1201     NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
1202   NewOuter->addChildLoop(NewInner);
1203 
1204   // BBs from the original inner loop.
1205   SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1206 
1207   // Add BBs from the original outer loop to the original inner loop (excluding
1208   // BBs already in inner loop)
1209   for (BasicBlock *BB : NewInner->blocks())
1210     if (LI->getLoopFor(BB) == NewInner)
1211       NewOuter->addBlockEntry(BB);
1212 
1213   // Now remove inner loop header and latch from the new inner loop and move
1214   // other BBs (the loop body) to the new inner loop.
1215   BasicBlock *OuterHeader = NewOuter->getHeader();
1216   BasicBlock *OuterLatch = NewOuter->getLoopLatch();
1217   for (BasicBlock *BB : OrigInnerBBs) {
1218     // Nothing will change for BBs in child loops.
1219     if (LI->getLoopFor(BB) != NewOuter)
1220       continue;
1221     // Remove the new outer loop header and latch from the new inner loop.
1222     if (BB == OuterHeader || BB == OuterLatch)
1223       NewInner->removeBlockFromLoop(BB);
1224     else
1225       LI->changeLoopFor(BB, NewInner);
1226   }
1227 
1228   // The preheader of the original outer loop becomes part of the new
1229   // outer loop.
1230   NewOuter->addBlockEntry(OrigOuterPreHeader);
1231   LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
1232 
1233   // Tell SE that we move the loops around.
1234   SE->forgetLoop(NewOuter);
1235   SE->forgetLoop(NewInner);
1236 }
1237 
1238 bool LoopInterchangeTransform::transform() {
1239   bool Transformed = false;
1240   Instruction *InnerIndexVar;
1241 
1242   if (InnerLoop->getSubLoops().empty()) {
1243     BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1244     LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n");
1245     PHINode *InductionPHI = getInductionVariable(InnerLoop, SE);
1246     if (!InductionPHI) {
1247       LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
1248       return false;
1249     }
1250 
1251     if (InductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1252       InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(1));
1253     else
1254       InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(0));
1255 
1256     // Ensure that InductionPHI is the first Phi node.
1257     if (&InductionPHI->getParent()->front() != InductionPHI)
1258       InductionPHI->moveBefore(&InductionPHI->getParent()->front());
1259 
1260     // Create a new latch block for the inner loop. We split at the
1261     // current latch's terminator and then move the condition and all
1262     // operands that are not either loop-invariant or the induction PHI into the
1263     // new latch block.
1264     BasicBlock *NewLatch =
1265         SplitBlock(InnerLoop->getLoopLatch(),
1266                    InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
1267 
1268     SmallSetVector<Instruction *, 4> WorkList;
1269     unsigned i = 0;
1270     auto MoveInstructions = [&i, &WorkList, this, InductionPHI, NewLatch]() {
1271       for (; i < WorkList.size(); i++) {
1272         // Duplicate instruction and move it the new latch. Update uses that
1273         // have been moved.
1274         Instruction *NewI = WorkList[i]->clone();
1275         NewI->insertBefore(NewLatch->getFirstNonPHI());
1276         assert(!NewI->mayHaveSideEffects() &&
1277                "Moving instructions with side-effects may change behavior of "
1278                "the loop nest!");
1279         for (auto UI = WorkList[i]->use_begin(), UE = WorkList[i]->use_end();
1280              UI != UE;) {
1281           Use &U = *UI++;
1282           Instruction *UserI = cast<Instruction>(U.getUser());
1283           if (!InnerLoop->contains(UserI->getParent()) ||
1284               UserI->getParent() == NewLatch || UserI == InductionPHI)
1285             U.set(NewI);
1286         }
1287         // Add operands of moved instruction to the worklist, except if they are
1288         // outside the inner loop or are the induction PHI.
1289         for (Value *Op : WorkList[i]->operands()) {
1290           Instruction *OpI = dyn_cast<Instruction>(Op);
1291           if (!OpI ||
1292               this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
1293               OpI == InductionPHI)
1294             continue;
1295           WorkList.insert(OpI);
1296         }
1297       }
1298     };
1299 
1300     // FIXME: Should we interchange when we have a constant condition?
1301     Instruction *CondI = dyn_cast<Instruction>(
1302         cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator())
1303             ->getCondition());
1304     if (CondI)
1305       WorkList.insert(CondI);
1306     MoveInstructions();
1307     WorkList.insert(cast<Instruction>(InnerIndexVar));
1308     MoveInstructions();
1309 
1310     // Splits the inner loops phi nodes out into a separate basic block.
1311     BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1312     SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
1313     LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
1314   }
1315 
1316   // Instructions in the original inner loop preheader may depend on values
1317   // defined in the outer loop header. Move them there, because the original
1318   // inner loop preheader will become the entry into the interchanged loop nest.
1319   // Currently we move all instructions and rely on LICM to move invariant
1320   // instructions outside the loop nest.
1321   BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1322   BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1323   if (InnerLoopPreHeader != OuterLoopHeader) {
1324     SmallPtrSet<Instruction *, 4> NeedsMoving;
1325     for (Instruction &I :
1326          make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
1327                                          std::prev(InnerLoopPreHeader->end()))))
1328       I.moveBefore(OuterLoopHeader->getTerminator());
1329   }
1330 
1331   Transformed |= adjustLoopLinks();
1332   if (!Transformed) {
1333     LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");
1334     return false;
1335   }
1336 
1337   return true;
1338 }
1339 
1340 /// \brief Move all instructions except the terminator from FromBB right before
1341 /// InsertBefore
1342 static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1343   auto &ToList = InsertBefore->getParent()->getInstList();
1344   auto &FromList = FromBB->getInstList();
1345 
1346   ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
1347                 FromBB->getTerminator()->getIterator());
1348 }
1349 
1350 /// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
1351 static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) {
1352   // Save all non-terminator instructions of BB1 into TempInstrs and unlink them
1353   // from BB1 afterwards.
1354   auto Iter = map_range(*BB1, [](Instruction &I) { return &I; });
1355   SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end()));
1356   for (Instruction *I : TempInstrs)
1357     I->removeFromParent();
1358 
1359   // Move instructions from BB2 to BB1.
1360   moveBBContents(BB2, BB1->getTerminator());
1361 
1362   // Move instructions from TempInstrs to BB2.
1363   for (Instruction *I : TempInstrs)
1364     I->insertBefore(BB2->getTerminator());
1365 }
1366 
1367 // Update BI to jump to NewBB instead of OldBB. Records updates to the
1368 // dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
1369 // \p OldBB  is exactly once in BI's successor list.
1370 static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
1371                             BasicBlock *NewBB,
1372                             std::vector<DominatorTree::UpdateType> &DTUpdates,
1373                             bool MustUpdateOnce = true) {
1374   assert((!MustUpdateOnce ||
1375           llvm::count_if(successors(BI),
1376                          [OldBB](BasicBlock *BB) {
1377                            return BB == OldBB;
1378                          }) == 1) && "BI must jump to OldBB exactly once.");
1379   bool Changed = false;
1380   for (Use &Op : BI->operands())
1381     if (Op == OldBB) {
1382       Op.set(NewBB);
1383       Changed = true;
1384     }
1385 
1386   if (Changed) {
1387     DTUpdates.push_back(
1388         {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
1389     DTUpdates.push_back(
1390         {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
1391   }
1392   assert(Changed && "Expected a successor to be updated");
1393 }
1394 
1395 // Move Lcssa PHIs to the right place.
1396 static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
1397                           BasicBlock *InnerLatch, BasicBlock *OuterHeader,
1398                           BasicBlock *OuterLatch, BasicBlock *OuterExit,
1399                           Loop *InnerLoop, LoopInfo *LI) {
1400 
1401   // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
1402   // defined either in the header or latch. Those blocks will become header and
1403   // latch of the new outer loop, and the only possible users can PHI nodes
1404   // in the exit block of the loop nest or the outer loop header (reduction
1405   // PHIs, in that case, the incoming value must be defined in the inner loop
1406   // header). We can just substitute the user with the incoming value and remove
1407   // the PHI.
1408   for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
1409     assert(P.getNumIncomingValues() == 1 &&
1410            "Only loops with a single exit are supported!");
1411 
1412     // Incoming values are guaranteed be instructions currently.
1413     auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
1414     // Skip phis with incoming values from the inner loop body, excluding the
1415     // header and latch.
1416     if (IncI->getParent() != InnerLatch && IncI->getParent() != InnerHeader)
1417       continue;
1418 
1419     assert(all_of(P.users(),
1420                   [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {
1421                     return (cast<PHINode>(U)->getParent() == OuterHeader &&
1422                             IncI->getParent() == InnerHeader) ||
1423                            cast<PHINode>(U)->getParent() == OuterExit;
1424                   }) &&
1425            "Can only replace phis iff the uses are in the loop nest exit or "
1426            "the incoming value is defined in the inner header (it will "
1427            "dominate all loop blocks after interchanging)");
1428     P.replaceAllUsesWith(IncI);
1429     P.eraseFromParent();
1430   }
1431 
1432   SmallVector<PHINode *, 8> LcssaInnerExit;
1433   for (PHINode &P : InnerExit->phis())
1434     LcssaInnerExit.push_back(&P);
1435 
1436   SmallVector<PHINode *, 8> LcssaInnerLatch;
1437   for (PHINode &P : InnerLatch->phis())
1438     LcssaInnerLatch.push_back(&P);
1439 
1440   // Lcssa PHIs for values used outside the inner loop are in InnerExit.
1441   // If a PHI node has users outside of InnerExit, it has a use outside the
1442   // interchanged loop and we have to preserve it. We move these to
1443   // InnerLatch, which will become the new exit block for the innermost
1444   // loop after interchanging.
1445   for (PHINode *P : LcssaInnerExit)
1446     P->moveBefore(InnerLatch->getFirstNonPHI());
1447 
1448   // If the inner loop latch contains LCSSA PHIs, those come from a child loop
1449   // and we have to move them to the new inner latch.
1450   for (PHINode *P : LcssaInnerLatch)
1451     P->moveBefore(InnerExit->getFirstNonPHI());
1452 
1453   // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
1454   // incoming values defined in the outer loop, we have to add a new PHI
1455   // in the inner loop latch, which became the exit block of the outer loop,
1456   // after interchanging.
1457   if (OuterExit) {
1458     for (PHINode &P : OuterExit->phis()) {
1459       if (P.getNumIncomingValues() != 1)
1460         continue;
1461       // Skip Phis with incoming values defined in the inner loop. Those should
1462       // already have been updated.
1463       auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
1464       if (!I || LI->getLoopFor(I->getParent()) == InnerLoop)
1465         continue;
1466 
1467       PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
1468       NewPhi->setIncomingValue(0, P.getIncomingValue(0));
1469       NewPhi->setIncomingBlock(0, OuterLatch);
1470       NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
1471       P.setIncomingValue(0, NewPhi);
1472     }
1473   }
1474 
1475   // Now adjust the incoming blocks for the LCSSA PHIs.
1476   // For PHIs moved from Inner's exit block, we need to replace Inner's latch
1477   // with the new latch.
1478   InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
1479 }
1480 
1481 bool LoopInterchangeTransform::adjustLoopBranches() {
1482   LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
1483   std::vector<DominatorTree::UpdateType> DTUpdates;
1484 
1485   BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1486   BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1487 
1488   assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
1489          InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&
1490          InnerLoopPreHeader && "Guaranteed by loop-simplify form");
1491   // Ensure that both preheaders do not contain PHI nodes and have single
1492   // predecessors. This allows us to move them easily. We use
1493   // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1494   // preheaders do not satisfy those conditions.
1495   if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1496       !OuterLoopPreHeader->getUniquePredecessor())
1497     OuterLoopPreHeader =
1498         InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
1499   if (InnerLoopPreHeader == OuterLoop->getHeader())
1500     InnerLoopPreHeader =
1501         InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
1502 
1503   // Adjust the loop preheader
1504   BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1505   BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1506   BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1507   BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1508   BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1509   BasicBlock *InnerLoopLatchPredecessor =
1510       InnerLoopLatch->getUniquePredecessor();
1511   BasicBlock *InnerLoopLatchSuccessor;
1512   BasicBlock *OuterLoopLatchSuccessor;
1513 
1514   BranchInst *OuterLoopLatchBI =
1515       dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1516   BranchInst *InnerLoopLatchBI =
1517       dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1518   BranchInst *OuterLoopHeaderBI =
1519       dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1520   BranchInst *InnerLoopHeaderBI =
1521       dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1522 
1523   if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1524       !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1525       !InnerLoopHeaderBI)
1526     return false;
1527 
1528   BranchInst *InnerLoopLatchPredecessorBI =
1529       dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1530   BranchInst *OuterLoopPredecessorBI =
1531       dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1532 
1533   if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1534     return false;
1535   BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1536   if (!InnerLoopHeaderSuccessor)
1537     return false;
1538 
1539   // Adjust Loop Preheader and headers.
1540   // The branches in the outer loop predecessor and the outer loop header can
1541   // be unconditional branches or conditional branches with duplicates. Consider
1542   // this when updating the successors.
1543   updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1544                   InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
1545   // The outer loop header might or might not branch to the outer latch.
1546   // We are guaranteed to branch to the inner loop preheader.
1547   if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch))
1548     updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates,
1549                     /*MustUpdateOnce=*/false);
1550   updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1551                   InnerLoopHeaderSuccessor, DTUpdates,
1552                   /*MustUpdateOnce=*/false);
1553 
1554   // Adjust reduction PHI's now that the incoming block has changed.
1555   InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
1556                                                OuterLoopHeader);
1557 
1558   updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
1559                   OuterLoopPreHeader, DTUpdates);
1560 
1561   // -------------Adjust loop latches-----------
1562   if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
1563     InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
1564   else
1565     InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
1566 
1567   updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
1568                   InnerLoopLatchSuccessor, DTUpdates);
1569 
1570 
1571   if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
1572     OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
1573   else
1574     OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
1575 
1576   updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
1577                   OuterLoopLatchSuccessor, DTUpdates);
1578   updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1579                   DTUpdates);
1580 
1581   DT->applyUpdates(DTUpdates);
1582   restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1583                    OuterLoopPreHeader);
1584 
1585   moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1586                 OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
1587                 InnerLoop, LI);
1588   // For PHIs in the exit block of the outer loop, outer's latch has been
1589   // replaced by Inners'.
1590   OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1591 
1592   // Now update the reduction PHIs in the inner and outer loop headers.
1593   SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1594   for (PHINode &PHI : drop_begin(InnerLoopHeader->phis(), 1))
1595     InnerLoopPHIs.push_back(cast<PHINode>(&PHI));
1596   for (PHINode &PHI : drop_begin(OuterLoopHeader->phis(), 1))
1597     OuterLoopPHIs.push_back(cast<PHINode>(&PHI));
1598 
1599   auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1600   (void)OuterInnerReductions;
1601 
1602   // Now move the remaining reduction PHIs from outer to inner loop header and
1603   // vice versa. The PHI nodes must be part of a reduction across the inner and
1604   // outer loop and all the remains to do is and updating the incoming blocks.
1605   for (PHINode *PHI : OuterLoopPHIs) {
1606     PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
1607     assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
1608   }
1609   for (PHINode *PHI : InnerLoopPHIs) {
1610     PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
1611     assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
1612   }
1613 
1614   // Update the incoming blocks for moved PHI nodes.
1615   OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
1616   OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
1617   InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
1618   InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1619 
1620   // Values defined in the outer loop header could be used in the inner loop
1621   // latch. In that case, we need to create LCSSA phis for them, because after
1622   // interchanging they will be defined in the new inner loop and used in the
1623   // new outer loop.
1624   IRBuilder<> Builder(OuterLoopHeader->getContext());
1625   SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
1626   for (Instruction &I :
1627        make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
1628     MayNeedLCSSAPhis.push_back(&I);
1629   formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE, Builder);
1630 
1631   return true;
1632 }
1633 
1634 bool LoopInterchangeTransform::adjustLoopLinks() {
1635   // Adjust all branches in the inner and outer loop.
1636   bool Changed = adjustLoopBranches();
1637   if (Changed) {
1638     // We have interchanged the preheaders so we need to interchange the data in
1639     // the preheaders as well. This is because the content of the inner
1640     // preheader was previously executed inside the outer loop.
1641     BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1642     BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1643     swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);
1644   }
1645   return Changed;
1646 }
1647 
1648 char LoopInterchange::ID = 0;
1649 
1650 INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange",
1651                       "Interchanges loops for cache reuse", false, false)
1652 INITIALIZE_PASS_DEPENDENCY(LoopPass)
1653 INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
1654 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
1655 
1656 INITIALIZE_PASS_END(LoopInterchange, "loop-interchange",
1657                     "Interchanges loops for cache reuse", false, false)
1658 
1659 Pass *llvm::createLoopInterchangePass() { return new LoopInterchange(); }
1660