1 //===- LoopInterchange.cpp - Loop interchange pass------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This Pass handles loop interchange transform. 11 // This pass interchanges loops to provide a more cache-friendly memory access 12 // patterns. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/ADT/SmallVector.h" 17 #include "llvm/Analysis/AliasAnalysis.h" 18 #include "llvm/Analysis/AliasSetTracker.h" 19 #include "llvm/Analysis/AssumptionCache.h" 20 #include "llvm/Analysis/BlockFrequencyInfo.h" 21 #include "llvm/Analysis/CodeMetrics.h" 22 #include "llvm/Analysis/DependenceAnalysis.h" 23 #include "llvm/Analysis/LoopInfo.h" 24 #include "llvm/Analysis/LoopIterator.h" 25 #include "llvm/Analysis/LoopPass.h" 26 #include "llvm/Analysis/ScalarEvolution.h" 27 #include "llvm/Analysis/ScalarEvolutionExpander.h" 28 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 29 #include "llvm/Analysis/TargetTransformInfo.h" 30 #include "llvm/Analysis/ValueTracking.h" 31 #include "llvm/IR/Dominators.h" 32 #include "llvm/IR/Function.h" 33 #include "llvm/IR/IRBuilder.h" 34 #include "llvm/IR/InstIterator.h" 35 #include "llvm/IR/IntrinsicInst.h" 36 #include "llvm/IR/Module.h" 37 #include "llvm/Pass.h" 38 #include "llvm/Support/Debug.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include "llvm/Transforms/Scalar.h" 41 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 42 #include "llvm/Transforms/Utils/LoopUtils.h" 43 #include "llvm/Transforms/Utils/SSAUpdater.h" 44 using namespace llvm; 45 46 #define DEBUG_TYPE "loop-interchange" 47 48 namespace { 49 50 typedef SmallVector<Loop *, 8> LoopVector; 51 52 // TODO: Check if we can use a sparse matrix here. 53 typedef std::vector<std::vector<char>> CharMatrix; 54 55 // Maximum number of dependencies that can be handled in the dependency matrix. 56 static const unsigned MaxMemInstrCount = 100; 57 58 // Maximum loop depth supported. 59 static const unsigned MaxLoopNestDepth = 10; 60 61 struct LoopInterchange; 62 63 #ifdef DUMP_DEP_MATRICIES 64 void printDepMatrix(CharMatrix &DepMatrix) { 65 for (auto I = DepMatrix.begin(), E = DepMatrix.end(); I != E; ++I) { 66 std::vector<char> Vec = *I; 67 for (auto II = Vec.begin(), EE = Vec.end(); II != EE; ++II) 68 DEBUG(dbgs() << *II << " "); 69 DEBUG(dbgs() << "\n"); 70 } 71 } 72 #endif 73 74 static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, 75 Loop *L, DependenceAnalysis *DA) { 76 typedef SmallVector<Value *, 16> ValueVector; 77 ValueVector MemInstr; 78 79 if (Level > MaxLoopNestDepth) { 80 DEBUG(dbgs() << "Cannot handle loops of depth greater than " 81 << MaxLoopNestDepth << "\n"); 82 return false; 83 } 84 85 // For each block. 86 for (Loop::block_iterator BB = L->block_begin(), BE = L->block_end(); 87 BB != BE; ++BB) { 88 // Scan the BB and collect legal loads and stores. 89 for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; 90 ++I) { 91 Instruction *Ins = dyn_cast<Instruction>(I); 92 if (!Ins) 93 return false; 94 LoadInst *Ld = dyn_cast<LoadInst>(I); 95 StoreInst *St = dyn_cast<StoreInst>(I); 96 if (!St && !Ld) 97 continue; 98 if (Ld && !Ld->isSimple()) 99 return false; 100 if (St && !St->isSimple()) 101 return false; 102 MemInstr.push_back(&*I); 103 } 104 } 105 106 DEBUG(dbgs() << "Found " << MemInstr.size() 107 << " Loads and Stores to analyze\n"); 108 109 ValueVector::iterator I, IE, J, JE; 110 111 for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) { 112 for (J = I, JE = MemInstr.end(); J != JE; ++J) { 113 std::vector<char> Dep; 114 Instruction *Src = dyn_cast<Instruction>(*I); 115 Instruction *Des = dyn_cast<Instruction>(*J); 116 if (Src == Des) 117 continue; 118 if (isa<LoadInst>(Src) && isa<LoadInst>(Des)) 119 continue; 120 if (auto D = DA->depends(Src, Des, true)) { 121 DEBUG(dbgs() << "Found Dependency between Src=" << Src << " Des=" << Des 122 << "\n"); 123 if (D->isFlow()) { 124 // TODO: Handle Flow dependence.Check if it is sufficient to populate 125 // the Dependence Matrix with the direction reversed. 126 DEBUG(dbgs() << "Flow dependence not handled"); 127 return false; 128 } 129 if (D->isAnti()) { 130 DEBUG(dbgs() << "Found Anti dependence \n"); 131 unsigned Levels = D->getLevels(); 132 char Direction; 133 for (unsigned II = 1; II <= Levels; ++II) { 134 const SCEV *Distance = D->getDistance(II); 135 const SCEVConstant *SCEVConst = 136 dyn_cast_or_null<SCEVConstant>(Distance); 137 if (SCEVConst) { 138 const ConstantInt *CI = SCEVConst->getValue(); 139 if (CI->isNegative()) 140 Direction = '<'; 141 else if (CI->isZero()) 142 Direction = '='; 143 else 144 Direction = '>'; 145 Dep.push_back(Direction); 146 } else if (D->isScalar(II)) { 147 Direction = 'S'; 148 Dep.push_back(Direction); 149 } else { 150 unsigned Dir = D->getDirection(II); 151 if (Dir == Dependence::DVEntry::LT || 152 Dir == Dependence::DVEntry::LE) 153 Direction = '<'; 154 else if (Dir == Dependence::DVEntry::GT || 155 Dir == Dependence::DVEntry::GE) 156 Direction = '>'; 157 else if (Dir == Dependence::DVEntry::EQ) 158 Direction = '='; 159 else 160 Direction = '*'; 161 Dep.push_back(Direction); 162 } 163 } 164 while (Dep.size() != Level) { 165 Dep.push_back('I'); 166 } 167 168 DepMatrix.push_back(Dep); 169 if (DepMatrix.size() > MaxMemInstrCount) { 170 DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount 171 << " dependencies inside loop\n"); 172 return false; 173 } 174 } 175 } 176 } 177 } 178 179 // We don't have a DepMatrix to check legality return false. 180 if (DepMatrix.size() == 0) 181 return false; 182 return true; 183 } 184 185 // A loop is moved from index 'from' to an index 'to'. Update the Dependence 186 // matrix by exchanging the two columns. 187 static void interChangeDepedencies(CharMatrix &DepMatrix, unsigned FromIndx, 188 unsigned ToIndx) { 189 unsigned numRows = DepMatrix.size(); 190 for (unsigned i = 0; i < numRows; ++i) { 191 char TmpVal = DepMatrix[i][ToIndx]; 192 DepMatrix[i][ToIndx] = DepMatrix[i][FromIndx]; 193 DepMatrix[i][FromIndx] = TmpVal; 194 } 195 } 196 197 // Checks if outermost non '=','S'or'I' dependence in the dependence matrix is 198 // '>' 199 static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row, 200 unsigned Column) { 201 for (unsigned i = 0; i <= Column; ++i) { 202 if (DepMatrix[Row][i] == '<') 203 return false; 204 if (DepMatrix[Row][i] == '>') 205 return true; 206 } 207 // All dependencies were '=','S' or 'I' 208 return false; 209 } 210 211 // Checks if no dependence exist in the dependency matrix in Row before Column. 212 static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row, 213 unsigned Column) { 214 for (unsigned i = 0; i < Column; ++i) { 215 if (DepMatrix[Row][i] != '=' || DepMatrix[Row][i] != 'S' || 216 DepMatrix[Row][i] != 'I') 217 return false; 218 } 219 return true; 220 } 221 222 static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row, 223 unsigned OuterLoopId, char InnerDep, 224 char OuterDep) { 225 226 if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId)) 227 return false; 228 229 if (InnerDep == OuterDep) 230 return true; 231 232 // It is legal to interchange if and only if after interchange no row has a 233 // '>' direction as the leftmost non-'='. 234 235 if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I') 236 return true; 237 238 if (InnerDep == '<') 239 return true; 240 241 if (InnerDep == '>') { 242 // If OuterLoopId represents outermost loop then interchanging will make the 243 // 1st dependency as '>' 244 if (OuterLoopId == 0) 245 return false; 246 247 // If all dependencies before OuterloopId are '=','S'or 'I'. Then 248 // interchanging will result in this row having an outermost non '=' 249 // dependency of '>' 250 if (!containsNoDependence(DepMatrix, Row, OuterLoopId)) 251 return true; 252 } 253 254 return false; 255 } 256 257 // Checks if it is legal to interchange 2 loops. 258 // [Theorem] A permutation of the loops in a perfect nest is legal if and only 259 // if 260 // the direction matrix, after the same permutation is applied to its columns, 261 // has no ">" direction as the leftmost non-"=" direction in any row. 262 static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, 263 unsigned InnerLoopId, 264 unsigned OuterLoopId) { 265 266 unsigned NumRows = DepMatrix.size(); 267 // For each row check if it is valid to interchange. 268 for (unsigned Row = 0; Row < NumRows; ++Row) { 269 char InnerDep = DepMatrix[Row][InnerLoopId]; 270 char OuterDep = DepMatrix[Row][OuterLoopId]; 271 if (InnerDep == '*' || OuterDep == '*') 272 return false; 273 else if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, 274 OuterDep)) 275 return false; 276 } 277 return true; 278 } 279 280 static void populateWorklist(Loop &L, SmallVector<LoopVector, 8> &V) { 281 282 DEBUG(dbgs() << "Calling populateWorklist called\n"); 283 LoopVector LoopList; 284 Loop *CurrentLoop = &L; 285 const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops(); 286 while (!Vec->empty()) { 287 // The current loop has multiple subloops in it hence it is not tightly 288 // nested. 289 // Discard all loops above it added into Worklist. 290 if (Vec->size() != 1) { 291 LoopList.clear(); 292 return; 293 } 294 LoopList.push_back(CurrentLoop); 295 CurrentLoop = Vec->front(); 296 Vec = &CurrentLoop->getSubLoops(); 297 } 298 LoopList.push_back(CurrentLoop); 299 V.push_back(std::move(LoopList)); 300 } 301 302 static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) { 303 PHINode *InnerIndexVar = L->getCanonicalInductionVariable(); 304 if (InnerIndexVar) 305 return InnerIndexVar; 306 if (L->getLoopLatch() == nullptr || L->getLoopPredecessor() == nullptr) 307 return nullptr; 308 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 309 PHINode *PhiVar = cast<PHINode>(I); 310 Type *PhiTy = PhiVar->getType(); 311 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && 312 !PhiTy->isPointerTy()) 313 return nullptr; 314 const SCEVAddRecExpr *AddRec = 315 dyn_cast<SCEVAddRecExpr>(SE->getSCEV(PhiVar)); 316 if (!AddRec || !AddRec->isAffine()) 317 continue; 318 const SCEV *Step = AddRec->getStepRecurrence(*SE); 319 const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); 320 if (!C) 321 continue; 322 // Found the induction variable. 323 // FIXME: Handle loops with more than one induction variable. Note that, 324 // currently, legality makes sure we have only one induction variable. 325 return PhiVar; 326 } 327 return nullptr; 328 } 329 330 /// LoopInterchangeLegality checks if it is legal to interchange the loop. 331 class LoopInterchangeLegality { 332 public: 333 LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE, 334 LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA) 335 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), 336 PreserveLCSSA(PreserveLCSSA), InnerLoopHasReduction(false) {} 337 338 /// Check if the loops can be interchanged. 339 bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, 340 CharMatrix &DepMatrix); 341 /// Check if the loop structure is understood. We do not handle triangular 342 /// loops for now. 343 bool isLoopStructureUnderstood(PHINode *InnerInductionVar); 344 345 bool currentLimitations(); 346 347 bool hasInnerLoopReduction() { return InnerLoopHasReduction; } 348 349 private: 350 bool tightlyNested(Loop *Outer, Loop *Inner); 351 bool containsUnsafeInstructionsInHeader(BasicBlock *BB); 352 bool areAllUsesReductions(Instruction *Ins, Loop *L); 353 bool containsUnsafeInstructionsInLatch(BasicBlock *BB); 354 bool findInductionAndReductions(Loop *L, 355 SmallVector<PHINode *, 8> &Inductions, 356 SmallVector<PHINode *, 8> &Reductions); 357 Loop *OuterLoop; 358 Loop *InnerLoop; 359 360 ScalarEvolution *SE; 361 LoopInfo *LI; 362 DominatorTree *DT; 363 bool PreserveLCSSA; 364 365 bool InnerLoopHasReduction; 366 }; 367 368 /// LoopInterchangeProfitability checks if it is profitable to interchange the 369 /// loop. 370 class LoopInterchangeProfitability { 371 public: 372 LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE) 373 : OuterLoop(Outer), InnerLoop(Inner), SE(SE) {} 374 375 /// Check if the loop interchange is profitable. 376 bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId, 377 CharMatrix &DepMatrix); 378 379 private: 380 int getInstrOrderCost(); 381 382 Loop *OuterLoop; 383 Loop *InnerLoop; 384 385 /// Scev analysis. 386 ScalarEvolution *SE; 387 }; 388 389 /// LoopInterchangeTransform interchanges the loop. 390 class LoopInterchangeTransform { 391 public: 392 LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE, 393 LoopInfo *LI, DominatorTree *DT, 394 BasicBlock *LoopNestExit, 395 bool InnerLoopContainsReductions) 396 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), 397 LoopExit(LoopNestExit), 398 InnerLoopHasReduction(InnerLoopContainsReductions) {} 399 400 /// Interchange OuterLoop and InnerLoop. 401 bool transform(); 402 void restructureLoops(Loop *InnerLoop, Loop *OuterLoop); 403 void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop); 404 405 private: 406 void splitInnerLoopLatch(Instruction *); 407 void splitOuterLoopLatch(); 408 void splitInnerLoopHeader(); 409 bool adjustLoopLinks(); 410 void adjustLoopPreheaders(); 411 void adjustOuterLoopPreheader(); 412 void adjustInnerLoopPreheader(); 413 bool adjustLoopBranches(); 414 void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred, 415 BasicBlock *NewPred); 416 417 Loop *OuterLoop; 418 Loop *InnerLoop; 419 420 /// Scev analysis. 421 ScalarEvolution *SE; 422 LoopInfo *LI; 423 DominatorTree *DT; 424 BasicBlock *LoopExit; 425 bool InnerLoopHasReduction; 426 }; 427 428 // Main LoopInterchange Pass. 429 struct LoopInterchange : public FunctionPass { 430 static char ID; 431 ScalarEvolution *SE; 432 LoopInfo *LI; 433 DependenceAnalysis *DA; 434 DominatorTree *DT; 435 bool PreserveLCSSA; 436 LoopInterchange() 437 : FunctionPass(ID), SE(nullptr), LI(nullptr), DA(nullptr), DT(nullptr) { 438 initializeLoopInterchangePass(*PassRegistry::getPassRegistry()); 439 } 440 441 void getAnalysisUsage(AnalysisUsage &AU) const override { 442 AU.addRequired<ScalarEvolutionWrapperPass>(); 443 AU.addRequired<AAResultsWrapperPass>(); 444 AU.addRequired<DominatorTreeWrapperPass>(); 445 AU.addRequired<LoopInfoWrapperPass>(); 446 AU.addRequired<DependenceAnalysis>(); 447 AU.addRequiredID(LoopSimplifyID); 448 AU.addRequiredID(LCSSAID); 449 } 450 451 bool runOnFunction(Function &F) override { 452 if (skipFunction(F)) 453 return false; 454 455 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 456 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 457 DA = &getAnalysis<DependenceAnalysis>(); 458 auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); 459 DT = DTWP ? &DTWP->getDomTree() : nullptr; 460 PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); 461 462 // Build up a worklist of loop pairs to analyze. 463 SmallVector<LoopVector, 8> Worklist; 464 465 for (Loop *L : *LI) 466 populateWorklist(*L, Worklist); 467 468 DEBUG(dbgs() << "Worklist size = " << Worklist.size() << "\n"); 469 bool Changed = true; 470 while (!Worklist.empty()) { 471 LoopVector LoopList = Worklist.pop_back_val(); 472 Changed = processLoopList(LoopList, F); 473 } 474 return Changed; 475 } 476 477 bool isComputableLoopNest(LoopVector LoopList) { 478 for (auto I = LoopList.begin(), E = LoopList.end(); I != E; ++I) { 479 Loop *L = *I; 480 const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L); 481 if (ExitCountOuter == SE->getCouldNotCompute()) { 482 DEBUG(dbgs() << "Couldn't compute Backedge count\n"); 483 return false; 484 } 485 if (L->getNumBackEdges() != 1) { 486 DEBUG(dbgs() << "NumBackEdges is not equal to 1\n"); 487 return false; 488 } 489 if (!L->getExitingBlock()) { 490 DEBUG(dbgs() << "Loop Doesn't have unique exit block\n"); 491 return false; 492 } 493 } 494 return true; 495 } 496 497 unsigned selectLoopForInterchange(LoopVector LoopList) { 498 // TODO: Add a better heuristic to select the loop to be interchanged based 499 // on the dependence matrix. Currently we select the innermost loop. 500 return LoopList.size() - 1; 501 } 502 503 bool processLoopList(LoopVector LoopList, Function &F) { 504 505 bool Changed = false; 506 CharMatrix DependencyMatrix; 507 if (LoopList.size() < 2) { 508 DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n"); 509 return false; 510 } 511 if (!isComputableLoopNest(LoopList)) { 512 DEBUG(dbgs() << "Not vaild loop candidate for interchange\n"); 513 return false; 514 } 515 Loop *OuterMostLoop = *(LoopList.begin()); 516 517 DEBUG(dbgs() << "Processing LoopList of size = " << LoopList.size() 518 << "\n"); 519 520 if (!populateDependencyMatrix(DependencyMatrix, LoopList.size(), 521 OuterMostLoop, DA)) { 522 DEBUG(dbgs() << "Populating Dependency matrix failed\n"); 523 return false; 524 } 525 #ifdef DUMP_DEP_MATRICIES 526 DEBUG(dbgs() << "Dependence before inter change \n"); 527 printDepMatrix(DependencyMatrix); 528 #endif 529 530 BasicBlock *OuterMostLoopLatch = OuterMostLoop->getLoopLatch(); 531 BranchInst *OuterMostLoopLatchBI = 532 dyn_cast<BranchInst>(OuterMostLoopLatch->getTerminator()); 533 if (!OuterMostLoopLatchBI) 534 return false; 535 536 // Since we currently do not handle LCSSA PHI's any failure in loop 537 // condition will now branch to LoopNestExit. 538 // TODO: This should be removed once we handle LCSSA PHI nodes. 539 540 // Get the Outermost loop exit. 541 BasicBlock *LoopNestExit; 542 if (OuterMostLoopLatchBI->getSuccessor(0) == OuterMostLoop->getHeader()) 543 LoopNestExit = OuterMostLoopLatchBI->getSuccessor(1); 544 else 545 LoopNestExit = OuterMostLoopLatchBI->getSuccessor(0); 546 547 if (isa<PHINode>(LoopNestExit->begin())) { 548 DEBUG(dbgs() << "PHI Nodes in loop nest exit is not handled for now " 549 "since on failure all loops branch to loop nest exit.\n"); 550 return false; 551 } 552 553 unsigned SelecLoopId = selectLoopForInterchange(LoopList); 554 // Move the selected loop outwards to the best possible position. 555 for (unsigned i = SelecLoopId; i > 0; i--) { 556 bool Interchanged = 557 processLoop(LoopList, i, i - 1, LoopNestExit, DependencyMatrix); 558 if (!Interchanged) 559 return Changed; 560 // Loops interchanged reflect the same in LoopList 561 std::swap(LoopList[i - 1], LoopList[i]); 562 563 // Update the DependencyMatrix 564 interChangeDepedencies(DependencyMatrix, i, i - 1); 565 DT->recalculate(F); 566 #ifdef DUMP_DEP_MATRICIES 567 DEBUG(dbgs() << "Dependence after inter change \n"); 568 printDepMatrix(DependencyMatrix); 569 #endif 570 Changed |= Interchanged; 571 } 572 return Changed; 573 } 574 575 bool processLoop(LoopVector LoopList, unsigned InnerLoopId, 576 unsigned OuterLoopId, BasicBlock *LoopNestExit, 577 std::vector<std::vector<char>> &DependencyMatrix) { 578 579 DEBUG(dbgs() << "Processing Innder Loop Id = " << InnerLoopId 580 << " and OuterLoopId = " << OuterLoopId << "\n"); 581 Loop *InnerLoop = LoopList[InnerLoopId]; 582 Loop *OuterLoop = LoopList[OuterLoopId]; 583 584 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, LI, DT, 585 PreserveLCSSA); 586 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) { 587 DEBUG(dbgs() << "Not interchanging Loops. Cannot prove legality\n"); 588 return false; 589 } 590 DEBUG(dbgs() << "Loops are legal to interchange\n"); 591 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE); 592 if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) { 593 DEBUG(dbgs() << "Interchanging Loops not profitable\n"); 594 return false; 595 } 596 597 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, 598 LoopNestExit, LIL.hasInnerLoopReduction()); 599 LIT.transform(); 600 DEBUG(dbgs() << "Loops interchanged\n"); 601 return true; 602 } 603 }; 604 605 } // end of namespace 606 bool LoopInterchangeLegality::areAllUsesReductions(Instruction *Ins, Loop *L) { 607 return !std::any_of(Ins->user_begin(), Ins->user_end(), [=](User *U) -> bool { 608 PHINode *UserIns = dyn_cast<PHINode>(U); 609 RecurrenceDescriptor RD; 610 return !UserIns || !RecurrenceDescriptor::isReductionPHI(UserIns, L, RD); 611 }); 612 } 613 614 bool LoopInterchangeLegality::containsUnsafeInstructionsInHeader( 615 BasicBlock *BB) { 616 for (auto I = BB->begin(), E = BB->end(); I != E; ++I) { 617 // Load corresponding to reduction PHI's are safe while concluding if 618 // tightly nested. 619 if (LoadInst *L = dyn_cast<LoadInst>(I)) { 620 if (!areAllUsesReductions(L, InnerLoop)) 621 return true; 622 } else if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 623 return true; 624 } 625 return false; 626 } 627 628 bool LoopInterchangeLegality::containsUnsafeInstructionsInLatch( 629 BasicBlock *BB) { 630 for (auto I = BB->begin(), E = BB->end(); I != E; ++I) { 631 // Stores corresponding to reductions are safe while concluding if tightly 632 // nested. 633 if (StoreInst *L = dyn_cast<StoreInst>(I)) { 634 PHINode *PHI = dyn_cast<PHINode>(L->getOperand(0)); 635 if (!PHI) 636 return true; 637 } else if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 638 return true; 639 } 640 return false; 641 } 642 643 bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) { 644 BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); 645 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); 646 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); 647 648 DEBUG(dbgs() << "Checking if Loops are Tightly Nested\n"); 649 650 // A perfectly nested loop will not have any branch in between the outer and 651 // inner block i.e. outer header will branch to either inner preheader and 652 // outerloop latch. 653 BranchInst *outerLoopHeaderBI = 654 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator()); 655 if (!outerLoopHeaderBI) 656 return false; 657 unsigned num = outerLoopHeaderBI->getNumSuccessors(); 658 for (unsigned i = 0; i < num; i++) { 659 if (outerLoopHeaderBI->getSuccessor(i) != InnerLoopPreHeader && 660 outerLoopHeaderBI->getSuccessor(i) != OuterLoopLatch) 661 return false; 662 } 663 664 DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch \n"); 665 // We do not have any basic block in between now make sure the outer header 666 // and outer loop latch doesn't contain any unsafe instructions. 667 if (containsUnsafeInstructionsInHeader(OuterLoopHeader) || 668 containsUnsafeInstructionsInLatch(OuterLoopLatch)) 669 return false; 670 671 DEBUG(dbgs() << "Loops are perfectly nested \n"); 672 // We have a perfect loop nest. 673 return true; 674 } 675 676 677 bool LoopInterchangeLegality::isLoopStructureUnderstood( 678 PHINode *InnerInduction) { 679 680 unsigned Num = InnerInduction->getNumOperands(); 681 BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader(); 682 for (unsigned i = 0; i < Num; ++i) { 683 Value *Val = InnerInduction->getOperand(i); 684 if (isa<Constant>(Val)) 685 continue; 686 Instruction *I = dyn_cast<Instruction>(Val); 687 if (!I) 688 return false; 689 // TODO: Handle triangular loops. 690 // e.g. for(int i=0;i<N;i++) 691 // for(int j=i;j<N;j++) 692 unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i); 693 if (InnerInduction->getIncomingBlock(IncomBlockIndx) == 694 InnerLoopPreheader && 695 !OuterLoop->isLoopInvariant(I)) { 696 return false; 697 } 698 } 699 return true; 700 } 701 702 bool LoopInterchangeLegality::findInductionAndReductions( 703 Loop *L, SmallVector<PHINode *, 8> &Inductions, 704 SmallVector<PHINode *, 8> &Reductions) { 705 if (!L->getLoopLatch() || !L->getLoopPredecessor()) 706 return false; 707 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 708 RecurrenceDescriptor RD; 709 InductionDescriptor ID; 710 PHINode *PHI = cast<PHINode>(I); 711 if (InductionDescriptor::isInductionPHI(PHI, SE, ID)) 712 Inductions.push_back(PHI); 713 else if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) 714 Reductions.push_back(PHI); 715 else { 716 DEBUG( 717 dbgs() << "Failed to recognize PHI as an induction or reduction.\n"); 718 return false; 719 } 720 } 721 return true; 722 } 723 724 static bool containsSafePHI(BasicBlock *Block, bool isOuterLoopExitBlock) { 725 for (auto I = Block->begin(); isa<PHINode>(I); ++I) { 726 PHINode *PHI = cast<PHINode>(I); 727 // Reduction lcssa phi will have only 1 incoming block that from loop latch. 728 if (PHI->getNumIncomingValues() > 1) 729 return false; 730 Instruction *Ins = dyn_cast<Instruction>(PHI->getIncomingValue(0)); 731 if (!Ins) 732 return false; 733 // Incoming value for lcssa phi's in outer loop exit can only be inner loop 734 // exits lcssa phi else it would not be tightly nested. 735 if (!isa<PHINode>(Ins) && isOuterLoopExitBlock) 736 return false; 737 } 738 return true; 739 } 740 741 static BasicBlock *getLoopLatchExitBlock(BasicBlock *LatchBlock, 742 BasicBlock *LoopHeader) { 743 if (BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator())) { 744 unsigned Num = BI->getNumSuccessors(); 745 assert(Num == 2); 746 for (unsigned i = 0; i < Num; ++i) { 747 if (BI->getSuccessor(i) == LoopHeader) 748 continue; 749 return BI->getSuccessor(i); 750 } 751 } 752 return nullptr; 753 } 754 755 // This function indicates the current limitations in the transform as a result 756 // of which we do not proceed. 757 bool LoopInterchangeLegality::currentLimitations() { 758 759 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); 760 BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); 761 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); 762 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); 763 BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); 764 765 PHINode *InnerInductionVar; 766 SmallVector<PHINode *, 8> Inductions; 767 SmallVector<PHINode *, 8> Reductions; 768 if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) 769 return true; 770 771 // TODO: Currently we handle only loops with 1 induction variable. 772 if (Inductions.size() != 1) { 773 DEBUG(dbgs() << "We currently only support loops with 1 induction variable." 774 << "Failed to interchange due to current limitation\n"); 775 return true; 776 } 777 if (Reductions.size() > 0) 778 InnerLoopHasReduction = true; 779 780 InnerInductionVar = Inductions.pop_back_val(); 781 Reductions.clear(); 782 if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) 783 return true; 784 785 // Outer loop cannot have reduction because then loops will not be tightly 786 // nested. 787 if (!Reductions.empty()) 788 return true; 789 // TODO: Currently we handle only loops with 1 induction variable. 790 if (Inductions.size() != 1) 791 return true; 792 793 // TODO: Triangular loops are not handled for now. 794 if (!isLoopStructureUnderstood(InnerInductionVar)) { 795 DEBUG(dbgs() << "Loop structure not understood by pass\n"); 796 return true; 797 } 798 799 // TODO: We only handle LCSSA PHI's corresponding to reduction for now. 800 BasicBlock *LoopExitBlock = 801 getLoopLatchExitBlock(OuterLoopLatch, OuterLoopHeader); 802 if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true)) 803 return true; 804 805 LoopExitBlock = getLoopLatchExitBlock(InnerLoopLatch, InnerLoopHeader); 806 if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false)) 807 return true; 808 809 // TODO: Current limitation: Since we split the inner loop latch at the point 810 // were induction variable is incremented (induction.next); We cannot have 811 // more than 1 user of induction.next since it would result in broken code 812 // after split. 813 // e.g. 814 // for(i=0;i<N;i++) { 815 // for(j = 0;j<M;j++) { 816 // A[j+1][i+2] = A[j][i]+k; 817 // } 818 // } 819 bool FoundInduction = false; 820 Instruction *InnerIndexVarInc = nullptr; 821 if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader) 822 InnerIndexVarInc = 823 dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(1)); 824 else 825 InnerIndexVarInc = 826 dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0)); 827 828 if (!InnerIndexVarInc) 829 return true; 830 831 // Since we split the inner loop latch on this induction variable. Make sure 832 // we do not have any instruction between the induction variable and branch 833 // instruction. 834 835 for (auto I = InnerLoopLatch->rbegin(), E = InnerLoopLatch->rend(); 836 I != E && !FoundInduction; ++I) { 837 if (isa<BranchInst>(*I) || isa<CmpInst>(*I) || isa<TruncInst>(*I)) 838 continue; 839 const Instruction &Ins = *I; 840 // We found an instruction. If this is not induction variable then it is not 841 // safe to split this loop latch. 842 if (!Ins.isIdenticalTo(InnerIndexVarInc)) 843 return true; 844 else 845 FoundInduction = true; 846 } 847 // The loop latch ended and we didn't find the induction variable return as 848 // current limitation. 849 if (!FoundInduction) 850 return true; 851 852 return false; 853 } 854 855 bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, 856 unsigned OuterLoopId, 857 CharMatrix &DepMatrix) { 858 859 if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) { 860 DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId 861 << "and OuterLoopId = " << OuterLoopId 862 << "due to dependence\n"); 863 return false; 864 } 865 866 // Create unique Preheaders if we already do not have one. 867 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); 868 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); 869 870 // Create a unique outer preheader - 871 // 1) If OuterLoop preheader is not present. 872 // 2) If OuterLoop Preheader is same as OuterLoop Header 873 // 3) If OuterLoop Preheader is same as Header of the previous loop. 874 // 4) If OuterLoop Preheader is Entry node. 875 if (!OuterLoopPreHeader || OuterLoopPreHeader == OuterLoop->getHeader() || 876 isa<PHINode>(OuterLoopPreHeader->begin()) || 877 !OuterLoopPreHeader->getUniquePredecessor()) { 878 OuterLoopPreHeader = 879 InsertPreheaderForLoop(OuterLoop, DT, LI, PreserveLCSSA); 880 } 881 882 if (!InnerLoopPreHeader || InnerLoopPreHeader == InnerLoop->getHeader() || 883 InnerLoopPreHeader == OuterLoop->getHeader()) { 884 InnerLoopPreHeader = 885 InsertPreheaderForLoop(InnerLoop, DT, LI, PreserveLCSSA); 886 } 887 888 // TODO: The loops could not be interchanged due to current limitations in the 889 // transform module. 890 if (currentLimitations()) { 891 DEBUG(dbgs() << "Not legal because of current transform limitation\n"); 892 return false; 893 } 894 895 // Check if the loops are tightly nested. 896 if (!tightlyNested(OuterLoop, InnerLoop)) { 897 DEBUG(dbgs() << "Loops not tightly nested\n"); 898 return false; 899 } 900 901 return true; 902 } 903 904 int LoopInterchangeProfitability::getInstrOrderCost() { 905 unsigned GoodOrder, BadOrder; 906 BadOrder = GoodOrder = 0; 907 for (auto BI = InnerLoop->block_begin(), BE = InnerLoop->block_end(); 908 BI != BE; ++BI) { 909 for (auto I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I) { 910 const Instruction &Ins = *I; 911 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) { 912 unsigned NumOp = GEP->getNumOperands(); 913 bool FoundInnerInduction = false; 914 bool FoundOuterInduction = false; 915 for (unsigned i = 0; i < NumOp; ++i) { 916 const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i)); 917 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal); 918 if (!AR) 919 continue; 920 921 // If we find the inner induction after an outer induction e.g. 922 // for(int i=0;i<N;i++) 923 // for(int j=0;j<N;j++) 924 // A[i][j] = A[i-1][j-1]+k; 925 // then it is a good order. 926 if (AR->getLoop() == InnerLoop) { 927 // We found an InnerLoop induction after OuterLoop induction. It is 928 // a good order. 929 FoundInnerInduction = true; 930 if (FoundOuterInduction) { 931 GoodOrder++; 932 break; 933 } 934 } 935 // If we find the outer induction after an inner induction e.g. 936 // for(int i=0;i<N;i++) 937 // for(int j=0;j<N;j++) 938 // A[j][i] = A[j-1][i-1]+k; 939 // then it is a bad order. 940 if (AR->getLoop() == OuterLoop) { 941 // We found an OuterLoop induction after InnerLoop induction. It is 942 // a bad order. 943 FoundOuterInduction = true; 944 if (FoundInnerInduction) { 945 BadOrder++; 946 break; 947 } 948 } 949 } 950 } 951 } 952 } 953 return GoodOrder - BadOrder; 954 } 955 956 static bool isProfitabileForVectorization(unsigned InnerLoopId, 957 unsigned OuterLoopId, 958 CharMatrix &DepMatrix) { 959 // TODO: Improve this heuristic to catch more cases. 960 // If the inner loop is loop independent or doesn't carry any dependency it is 961 // profitable to move this to outer position. 962 unsigned Row = DepMatrix.size(); 963 for (unsigned i = 0; i < Row; ++i) { 964 if (DepMatrix[i][InnerLoopId] != 'S' && DepMatrix[i][InnerLoopId] != 'I') 965 return false; 966 // TODO: We need to improve this heuristic. 967 if (DepMatrix[i][OuterLoopId] != '=') 968 return false; 969 } 970 // If outer loop has dependence and inner loop is loop independent then it is 971 // profitable to interchange to enable parallelism. 972 return true; 973 } 974 975 bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId, 976 unsigned OuterLoopId, 977 CharMatrix &DepMatrix) { 978 979 // TODO: Add better profitability checks. 980 // e.g 981 // 1) Construct dependency matrix and move the one with no loop carried dep 982 // inside to enable vectorization. 983 984 // This is rough cost estimation algorithm. It counts the good and bad order 985 // of induction variables in the instruction and allows reordering if number 986 // of bad orders is more than good. 987 int Cost = 0; 988 Cost += getInstrOrderCost(); 989 DEBUG(dbgs() << "Cost = " << Cost << "\n"); 990 if (Cost < 0) 991 return true; 992 993 // It is not profitable as per current cache profitability model. But check if 994 // we can move this loop outside to improve parallelism. 995 bool ImprovesPar = 996 isProfitabileForVectorization(InnerLoopId, OuterLoopId, DepMatrix); 997 return ImprovesPar; 998 } 999 1000 void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop, 1001 Loop *InnerLoop) { 1002 for (Loop::iterator I = OuterLoop->begin(), E = OuterLoop->end(); I != E; 1003 ++I) { 1004 if (*I == InnerLoop) { 1005 OuterLoop->removeChildLoop(I); 1006 return; 1007 } 1008 } 1009 llvm_unreachable("Couldn't find loop"); 1010 } 1011 1012 void LoopInterchangeTransform::restructureLoops(Loop *InnerLoop, 1013 Loop *OuterLoop) { 1014 Loop *OuterLoopParent = OuterLoop->getParentLoop(); 1015 if (OuterLoopParent) { 1016 // Remove the loop from its parent loop. 1017 removeChildLoop(OuterLoopParent, OuterLoop); 1018 removeChildLoop(OuterLoop, InnerLoop); 1019 OuterLoopParent->addChildLoop(InnerLoop); 1020 } else { 1021 removeChildLoop(OuterLoop, InnerLoop); 1022 LI->changeTopLevelLoop(OuterLoop, InnerLoop); 1023 } 1024 1025 while (!InnerLoop->empty()) 1026 OuterLoop->addChildLoop(InnerLoop->removeChildLoop(InnerLoop->begin())); 1027 1028 InnerLoop->addChildLoop(OuterLoop); 1029 } 1030 1031 bool LoopInterchangeTransform::transform() { 1032 1033 DEBUG(dbgs() << "transform\n"); 1034 bool Transformed = false; 1035 Instruction *InnerIndexVar; 1036 1037 if (InnerLoop->getSubLoops().size() == 0) { 1038 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); 1039 DEBUG(dbgs() << "Calling Split Inner Loop\n"); 1040 PHINode *InductionPHI = getInductionVariable(InnerLoop, SE); 1041 if (!InductionPHI) { 1042 DEBUG(dbgs() << "Failed to find the point to split loop latch \n"); 1043 return false; 1044 } 1045 1046 if (InductionPHI->getIncomingBlock(0) == InnerLoopPreHeader) 1047 InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(1)); 1048 else 1049 InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(0)); 1050 1051 // 1052 // Split at the place were the induction variable is 1053 // incremented/decremented. 1054 // TODO: This splitting logic may not work always. Fix this. 1055 splitInnerLoopLatch(InnerIndexVar); 1056 DEBUG(dbgs() << "splitInnerLoopLatch Done\n"); 1057 1058 // Splits the inner loops phi nodes out into a separate basic block. 1059 splitInnerLoopHeader(); 1060 DEBUG(dbgs() << "splitInnerLoopHeader Done\n"); 1061 } 1062 1063 Transformed |= adjustLoopLinks(); 1064 if (!Transformed) { 1065 DEBUG(dbgs() << "adjustLoopLinks Failed\n"); 1066 return false; 1067 } 1068 1069 restructureLoops(InnerLoop, OuterLoop); 1070 return true; 1071 } 1072 1073 void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) { 1074 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); 1075 BasicBlock *InnerLoopLatchPred = InnerLoopLatch; 1076 InnerLoopLatch = SplitBlock(InnerLoopLatchPred, Inc, DT, LI); 1077 } 1078 1079 void LoopInterchangeTransform::splitOuterLoopLatch() { 1080 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); 1081 BasicBlock *OuterLatchLcssaPhiBlock = OuterLoopLatch; 1082 OuterLoopLatch = SplitBlock(OuterLatchLcssaPhiBlock, 1083 OuterLoopLatch->getFirstNonPHI(), DT, LI); 1084 } 1085 1086 void LoopInterchangeTransform::splitInnerLoopHeader() { 1087 1088 // Split the inner loop header out. Here make sure that the reduction PHI's 1089 // stay in the innerloop body. 1090 BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); 1091 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); 1092 if (InnerLoopHasReduction) { 1093 // FIXME: Check if the induction PHI will always be the first PHI. 1094 BasicBlock *New = InnerLoopHeader->splitBasicBlock( 1095 ++(InnerLoopHeader->begin()), InnerLoopHeader->getName() + ".split"); 1096 if (LI) 1097 if (Loop *L = LI->getLoopFor(InnerLoopHeader)) 1098 L->addBasicBlockToLoop(New, *LI); 1099 1100 // Adjust Reduction PHI's in the block. 1101 SmallVector<PHINode *, 8> PHIVec; 1102 for (auto I = New->begin(); isa<PHINode>(I); ++I) { 1103 PHINode *PHI = dyn_cast<PHINode>(I); 1104 Value *V = PHI->getIncomingValueForBlock(InnerLoopPreHeader); 1105 PHI->replaceAllUsesWith(V); 1106 PHIVec.push_back((PHI)); 1107 } 1108 for (auto I = PHIVec.begin(), E = PHIVec.end(); I != E; ++I) { 1109 PHINode *P = *I; 1110 P->eraseFromParent(); 1111 } 1112 } else { 1113 SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI); 1114 } 1115 1116 DEBUG(dbgs() << "Output of splitInnerLoopHeader InnerLoopHeaderSucc & " 1117 "InnerLoopHeader \n"); 1118 } 1119 1120 /// \brief Move all instructions except the terminator from FromBB right before 1121 /// InsertBefore 1122 static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) { 1123 auto &ToList = InsertBefore->getParent()->getInstList(); 1124 auto &FromList = FromBB->getInstList(); 1125 1126 ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(), 1127 FromBB->getTerminator()->getIterator()); 1128 } 1129 1130 void LoopInterchangeTransform::adjustOuterLoopPreheader() { 1131 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); 1132 BasicBlock *InnerPreHeader = InnerLoop->getLoopPreheader(); 1133 1134 moveBBContents(OuterLoopPreHeader, InnerPreHeader->getTerminator()); 1135 } 1136 1137 void LoopInterchangeTransform::adjustInnerLoopPreheader() { 1138 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); 1139 BasicBlock *OuterHeader = OuterLoop->getHeader(); 1140 1141 moveBBContents(InnerLoopPreHeader, OuterHeader->getTerminator()); 1142 } 1143 1144 void LoopInterchangeTransform::updateIncomingBlock(BasicBlock *CurrBlock, 1145 BasicBlock *OldPred, 1146 BasicBlock *NewPred) { 1147 for (auto I = CurrBlock->begin(); isa<PHINode>(I); ++I) { 1148 PHINode *PHI = cast<PHINode>(I); 1149 unsigned Num = PHI->getNumIncomingValues(); 1150 for (unsigned i = 0; i < Num; ++i) { 1151 if (PHI->getIncomingBlock(i) == OldPred) 1152 PHI->setIncomingBlock(i, NewPred); 1153 } 1154 } 1155 } 1156 1157 bool LoopInterchangeTransform::adjustLoopBranches() { 1158 1159 DEBUG(dbgs() << "adjustLoopBranches called\n"); 1160 // Adjust the loop preheader 1161 BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); 1162 BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); 1163 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); 1164 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); 1165 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); 1166 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); 1167 BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor(); 1168 BasicBlock *InnerLoopLatchPredecessor = 1169 InnerLoopLatch->getUniquePredecessor(); 1170 BasicBlock *InnerLoopLatchSuccessor; 1171 BasicBlock *OuterLoopLatchSuccessor; 1172 1173 BranchInst *OuterLoopLatchBI = 1174 dyn_cast<BranchInst>(OuterLoopLatch->getTerminator()); 1175 BranchInst *InnerLoopLatchBI = 1176 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator()); 1177 BranchInst *OuterLoopHeaderBI = 1178 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator()); 1179 BranchInst *InnerLoopHeaderBI = 1180 dyn_cast<BranchInst>(InnerLoopHeader->getTerminator()); 1181 1182 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor || 1183 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI || 1184 !InnerLoopHeaderBI) 1185 return false; 1186 1187 BranchInst *InnerLoopLatchPredecessorBI = 1188 dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator()); 1189 BranchInst *OuterLoopPredecessorBI = 1190 dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator()); 1191 1192 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI) 1193 return false; 1194 BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor(); 1195 if (!InnerLoopHeaderSuccessor) 1196 return false; 1197 1198 // Adjust Loop Preheader and headers 1199 1200 unsigned NumSucc = OuterLoopPredecessorBI->getNumSuccessors(); 1201 for (unsigned i = 0; i < NumSucc; ++i) { 1202 if (OuterLoopPredecessorBI->getSuccessor(i) == OuterLoopPreHeader) 1203 OuterLoopPredecessorBI->setSuccessor(i, InnerLoopPreHeader); 1204 } 1205 1206 NumSucc = OuterLoopHeaderBI->getNumSuccessors(); 1207 for (unsigned i = 0; i < NumSucc; ++i) { 1208 if (OuterLoopHeaderBI->getSuccessor(i) == OuterLoopLatch) 1209 OuterLoopHeaderBI->setSuccessor(i, LoopExit); 1210 else if (OuterLoopHeaderBI->getSuccessor(i) == InnerLoopPreHeader) 1211 OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSuccessor); 1212 } 1213 1214 // Adjust reduction PHI's now that the incoming block has changed. 1215 updateIncomingBlock(InnerLoopHeaderSuccessor, InnerLoopHeader, 1216 OuterLoopHeader); 1217 1218 BranchInst::Create(OuterLoopPreHeader, InnerLoopHeaderBI); 1219 InnerLoopHeaderBI->eraseFromParent(); 1220 1221 // -------------Adjust loop latches----------- 1222 if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader) 1223 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1); 1224 else 1225 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0); 1226 1227 NumSucc = InnerLoopLatchPredecessorBI->getNumSuccessors(); 1228 for (unsigned i = 0; i < NumSucc; ++i) { 1229 if (InnerLoopLatchPredecessorBI->getSuccessor(i) == InnerLoopLatch) 1230 InnerLoopLatchPredecessorBI->setSuccessor(i, InnerLoopLatchSuccessor); 1231 } 1232 1233 // Adjust PHI nodes in InnerLoopLatchSuccessor. Update all uses of PHI with 1234 // the value and remove this PHI node from inner loop. 1235 SmallVector<PHINode *, 8> LcssaVec; 1236 for (auto I = InnerLoopLatchSuccessor->begin(); isa<PHINode>(I); ++I) { 1237 PHINode *LcssaPhi = cast<PHINode>(I); 1238 LcssaVec.push_back(LcssaPhi); 1239 } 1240 for (auto I = LcssaVec.begin(), E = LcssaVec.end(); I != E; ++I) { 1241 PHINode *P = *I; 1242 Value *Incoming = P->getIncomingValueForBlock(InnerLoopLatch); 1243 P->replaceAllUsesWith(Incoming); 1244 P->eraseFromParent(); 1245 } 1246 1247 if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader) 1248 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1); 1249 else 1250 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0); 1251 1252 if (InnerLoopLatchBI->getSuccessor(1) == InnerLoopLatchSuccessor) 1253 InnerLoopLatchBI->setSuccessor(1, OuterLoopLatchSuccessor); 1254 else 1255 InnerLoopLatchBI->setSuccessor(0, OuterLoopLatchSuccessor); 1256 1257 updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch); 1258 1259 if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopLatchSuccessor) { 1260 OuterLoopLatchBI->setSuccessor(0, InnerLoopLatch); 1261 } else { 1262 OuterLoopLatchBI->setSuccessor(1, InnerLoopLatch); 1263 } 1264 1265 return true; 1266 } 1267 void LoopInterchangeTransform::adjustLoopPreheaders() { 1268 1269 // We have interchanged the preheaders so we need to interchange the data in 1270 // the preheader as well. 1271 // This is because the content of inner preheader was previously executed 1272 // inside the outer loop. 1273 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); 1274 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); 1275 BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); 1276 BranchInst *InnerTermBI = 1277 cast<BranchInst>(InnerLoopPreHeader->getTerminator()); 1278 1279 // These instructions should now be executed inside the loop. 1280 // Move instruction into a new block after outer header. 1281 moveBBContents(InnerLoopPreHeader, OuterLoopHeader->getTerminator()); 1282 // These instructions were not executed previously in the loop so move them to 1283 // the older inner loop preheader. 1284 moveBBContents(OuterLoopPreHeader, InnerTermBI); 1285 } 1286 1287 bool LoopInterchangeTransform::adjustLoopLinks() { 1288 1289 // Adjust all branches in the inner and outer loop. 1290 bool Changed = adjustLoopBranches(); 1291 if (Changed) 1292 adjustLoopPreheaders(); 1293 return Changed; 1294 } 1295 1296 char LoopInterchange::ID = 0; 1297 INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange", 1298 "Interchanges loops for cache reuse", false, false) 1299 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 1300 INITIALIZE_PASS_DEPENDENCY(DependenceAnalysis) 1301 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 1302 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 1303 INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 1304 INITIALIZE_PASS_DEPENDENCY(LCSSA) 1305 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 1306 1307 INITIALIZE_PASS_END(LoopInterchange, "loop-interchange", 1308 "Interchanges loops for cache reuse", false, false) 1309 1310 Pass *llvm::createLoopInterchangePass() { return new LoopInterchange(); } 1311