1 //===- LoopInterchange.cpp - Loop interchange pass------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This Pass handles loop interchange transform. 11 // This pass interchanges loops to provide a more cache-friendly memory access 12 // patterns. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/ADT/SmallVector.h" 17 #include "llvm/Analysis/AliasAnalysis.h" 18 #include "llvm/Analysis/AssumptionCache.h" 19 #include "llvm/Analysis/BlockFrequencyInfo.h" 20 #include "llvm/Analysis/CodeMetrics.h" 21 #include "llvm/Analysis/DependenceAnalysis.h" 22 #include "llvm/Analysis/LoopInfo.h" 23 #include "llvm/Analysis/LoopIterator.h" 24 #include "llvm/Analysis/LoopPass.h" 25 #include "llvm/Analysis/ScalarEvolution.h" 26 #include "llvm/Analysis/ScalarEvolutionExpander.h" 27 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 28 #include "llvm/Analysis/TargetTransformInfo.h" 29 #include "llvm/Analysis/ValueTracking.h" 30 #include "llvm/IR/Dominators.h" 31 #include "llvm/IR/Function.h" 32 #include "llvm/IR/IRBuilder.h" 33 #include "llvm/IR/InstIterator.h" 34 #include "llvm/IR/IntrinsicInst.h" 35 #include "llvm/IR/Module.h" 36 #include "llvm/Pass.h" 37 #include "llvm/Support/Debug.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include "llvm/Transforms/Scalar.h" 40 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 41 #include "llvm/Transforms/Utils/LoopUtils.h" 42 #include "llvm/Transforms/Utils/SSAUpdater.h" 43 using namespace llvm; 44 45 #define DEBUG_TYPE "loop-interchange" 46 47 namespace { 48 49 typedef SmallVector<Loop *, 8> LoopVector; 50 51 // TODO: Check if we can use a sparse matrix here. 52 typedef std::vector<std::vector<char>> CharMatrix; 53 54 // Maximum number of dependencies that can be handled in the dependency matrix. 55 static const unsigned MaxMemInstrCount = 100; 56 57 // Maximum loop depth supported. 58 static const unsigned MaxLoopNestDepth = 10; 59 60 struct LoopInterchange; 61 62 #ifdef DUMP_DEP_MATRICIES 63 void printDepMatrix(CharMatrix &DepMatrix) { 64 for (auto I = DepMatrix.begin(), E = DepMatrix.end(); I != E; ++I) { 65 std::vector<char> Vec = *I; 66 for (auto II = Vec.begin(), EE = Vec.end(); II != EE; ++II) 67 DEBUG(dbgs() << *II << " "); 68 DEBUG(dbgs() << "\n"); 69 } 70 } 71 #endif 72 73 static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, 74 Loop *L, DependenceInfo *DI) { 75 typedef SmallVector<Value *, 16> ValueVector; 76 ValueVector MemInstr; 77 78 if (Level > MaxLoopNestDepth) { 79 DEBUG(dbgs() << "Cannot handle loops of depth greater than " 80 << MaxLoopNestDepth << "\n"); 81 return false; 82 } 83 84 // For each block. 85 for (Loop::block_iterator BB = L->block_begin(), BE = L->block_end(); 86 BB != BE; ++BB) { 87 // Scan the BB and collect legal loads and stores. 88 for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; 89 ++I) { 90 Instruction *Ins = dyn_cast<Instruction>(I); 91 if (!Ins) 92 return false; 93 LoadInst *Ld = dyn_cast<LoadInst>(I); 94 StoreInst *St = dyn_cast<StoreInst>(I); 95 if (!St && !Ld) 96 continue; 97 if (Ld && !Ld->isSimple()) 98 return false; 99 if (St && !St->isSimple()) 100 return false; 101 MemInstr.push_back(&*I); 102 } 103 } 104 105 DEBUG(dbgs() << "Found " << MemInstr.size() 106 << " Loads and Stores to analyze\n"); 107 108 ValueVector::iterator I, IE, J, JE; 109 110 for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) { 111 for (J = I, JE = MemInstr.end(); J != JE; ++J) { 112 std::vector<char> Dep; 113 Instruction *Src = dyn_cast<Instruction>(*I); 114 Instruction *Des = dyn_cast<Instruction>(*J); 115 if (Src == Des) 116 continue; 117 if (isa<LoadInst>(Src) && isa<LoadInst>(Des)) 118 continue; 119 if (auto D = DI->depends(Src, Des, true)) { 120 DEBUG(dbgs() << "Found Dependency between Src=" << Src << " Des=" << Des 121 << "\n"); 122 if (D->isFlow()) { 123 // TODO: Handle Flow dependence.Check if it is sufficient to populate 124 // the Dependence Matrix with the direction reversed. 125 DEBUG(dbgs() << "Flow dependence not handled"); 126 return false; 127 } 128 if (D->isAnti()) { 129 DEBUG(dbgs() << "Found Anti dependence \n"); 130 unsigned Levels = D->getLevels(); 131 char Direction; 132 for (unsigned II = 1; II <= Levels; ++II) { 133 const SCEV *Distance = D->getDistance(II); 134 const SCEVConstant *SCEVConst = 135 dyn_cast_or_null<SCEVConstant>(Distance); 136 if (SCEVConst) { 137 const ConstantInt *CI = SCEVConst->getValue(); 138 if (CI->isNegative()) 139 Direction = '<'; 140 else if (CI->isZero()) 141 Direction = '='; 142 else 143 Direction = '>'; 144 Dep.push_back(Direction); 145 } else if (D->isScalar(II)) { 146 Direction = 'S'; 147 Dep.push_back(Direction); 148 } else { 149 unsigned Dir = D->getDirection(II); 150 if (Dir == Dependence::DVEntry::LT || 151 Dir == Dependence::DVEntry::LE) 152 Direction = '<'; 153 else if (Dir == Dependence::DVEntry::GT || 154 Dir == Dependence::DVEntry::GE) 155 Direction = '>'; 156 else if (Dir == Dependence::DVEntry::EQ) 157 Direction = '='; 158 else 159 Direction = '*'; 160 Dep.push_back(Direction); 161 } 162 } 163 while (Dep.size() != Level) { 164 Dep.push_back('I'); 165 } 166 167 DepMatrix.push_back(Dep); 168 if (DepMatrix.size() > MaxMemInstrCount) { 169 DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount 170 << " dependencies inside loop\n"); 171 return false; 172 } 173 } 174 } 175 } 176 } 177 178 // We don't have a DepMatrix to check legality return false. 179 if (DepMatrix.size() == 0) 180 return false; 181 return true; 182 } 183 184 // A loop is moved from index 'from' to an index 'to'. Update the Dependence 185 // matrix by exchanging the two columns. 186 static void interChangeDepedencies(CharMatrix &DepMatrix, unsigned FromIndx, 187 unsigned ToIndx) { 188 unsigned numRows = DepMatrix.size(); 189 for (unsigned i = 0; i < numRows; ++i) { 190 char TmpVal = DepMatrix[i][ToIndx]; 191 DepMatrix[i][ToIndx] = DepMatrix[i][FromIndx]; 192 DepMatrix[i][FromIndx] = TmpVal; 193 } 194 } 195 196 // Checks if outermost non '=','S'or'I' dependence in the dependence matrix is 197 // '>' 198 static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row, 199 unsigned Column) { 200 for (unsigned i = 0; i <= Column; ++i) { 201 if (DepMatrix[Row][i] == '<') 202 return false; 203 if (DepMatrix[Row][i] == '>') 204 return true; 205 } 206 // All dependencies were '=','S' or 'I' 207 return false; 208 } 209 210 // Checks if no dependence exist in the dependency matrix in Row before Column. 211 static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row, 212 unsigned Column) { 213 for (unsigned i = 0; i < Column; ++i) { 214 if (DepMatrix[Row][i] != '=' || DepMatrix[Row][i] != 'S' || 215 DepMatrix[Row][i] != 'I') 216 return false; 217 } 218 return true; 219 } 220 221 static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row, 222 unsigned OuterLoopId, char InnerDep, 223 char OuterDep) { 224 225 if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId)) 226 return false; 227 228 if (InnerDep == OuterDep) 229 return true; 230 231 // It is legal to interchange if and only if after interchange no row has a 232 // '>' direction as the leftmost non-'='. 233 234 if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I') 235 return true; 236 237 if (InnerDep == '<') 238 return true; 239 240 if (InnerDep == '>') { 241 // If OuterLoopId represents outermost loop then interchanging will make the 242 // 1st dependency as '>' 243 if (OuterLoopId == 0) 244 return false; 245 246 // If all dependencies before OuterloopId are '=','S'or 'I'. Then 247 // interchanging will result in this row having an outermost non '=' 248 // dependency of '>' 249 if (!containsNoDependence(DepMatrix, Row, OuterLoopId)) 250 return true; 251 } 252 253 return false; 254 } 255 256 // Checks if it is legal to interchange 2 loops. 257 // [Theorem] A permutation of the loops in a perfect nest is legal if and only 258 // if 259 // the direction matrix, after the same permutation is applied to its columns, 260 // has no ">" direction as the leftmost non-"=" direction in any row. 261 static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, 262 unsigned InnerLoopId, 263 unsigned OuterLoopId) { 264 265 unsigned NumRows = DepMatrix.size(); 266 // For each row check if it is valid to interchange. 267 for (unsigned Row = 0; Row < NumRows; ++Row) { 268 char InnerDep = DepMatrix[Row][InnerLoopId]; 269 char OuterDep = DepMatrix[Row][OuterLoopId]; 270 if (InnerDep == '*' || OuterDep == '*') 271 return false; 272 else if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, 273 OuterDep)) 274 return false; 275 } 276 return true; 277 } 278 279 static void populateWorklist(Loop &L, SmallVector<LoopVector, 8> &V) { 280 281 DEBUG(dbgs() << "Calling populateWorklist called\n"); 282 LoopVector LoopList; 283 Loop *CurrentLoop = &L; 284 const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops(); 285 while (!Vec->empty()) { 286 // The current loop has multiple subloops in it hence it is not tightly 287 // nested. 288 // Discard all loops above it added into Worklist. 289 if (Vec->size() != 1) { 290 LoopList.clear(); 291 return; 292 } 293 LoopList.push_back(CurrentLoop); 294 CurrentLoop = Vec->front(); 295 Vec = &CurrentLoop->getSubLoops(); 296 } 297 LoopList.push_back(CurrentLoop); 298 V.push_back(std::move(LoopList)); 299 } 300 301 static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) { 302 PHINode *InnerIndexVar = L->getCanonicalInductionVariable(); 303 if (InnerIndexVar) 304 return InnerIndexVar; 305 if (L->getLoopLatch() == nullptr || L->getLoopPredecessor() == nullptr) 306 return nullptr; 307 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 308 PHINode *PhiVar = cast<PHINode>(I); 309 Type *PhiTy = PhiVar->getType(); 310 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && 311 !PhiTy->isPointerTy()) 312 return nullptr; 313 const SCEVAddRecExpr *AddRec = 314 dyn_cast<SCEVAddRecExpr>(SE->getSCEV(PhiVar)); 315 if (!AddRec || !AddRec->isAffine()) 316 continue; 317 const SCEV *Step = AddRec->getStepRecurrence(*SE); 318 const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); 319 if (!C) 320 continue; 321 // Found the induction variable. 322 // FIXME: Handle loops with more than one induction variable. Note that, 323 // currently, legality makes sure we have only one induction variable. 324 return PhiVar; 325 } 326 return nullptr; 327 } 328 329 /// LoopInterchangeLegality checks if it is legal to interchange the loop. 330 class LoopInterchangeLegality { 331 public: 332 LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE, 333 LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA) 334 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), 335 PreserveLCSSA(PreserveLCSSA), InnerLoopHasReduction(false) {} 336 337 /// Check if the loops can be interchanged. 338 bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, 339 CharMatrix &DepMatrix); 340 /// Check if the loop structure is understood. We do not handle triangular 341 /// loops for now. 342 bool isLoopStructureUnderstood(PHINode *InnerInductionVar); 343 344 bool currentLimitations(); 345 346 bool hasInnerLoopReduction() { return InnerLoopHasReduction; } 347 348 private: 349 bool tightlyNested(Loop *Outer, Loop *Inner); 350 bool containsUnsafeInstructionsInHeader(BasicBlock *BB); 351 bool areAllUsesReductions(Instruction *Ins, Loop *L); 352 bool containsUnsafeInstructionsInLatch(BasicBlock *BB); 353 bool findInductionAndReductions(Loop *L, 354 SmallVector<PHINode *, 8> &Inductions, 355 SmallVector<PHINode *, 8> &Reductions); 356 Loop *OuterLoop; 357 Loop *InnerLoop; 358 359 ScalarEvolution *SE; 360 LoopInfo *LI; 361 DominatorTree *DT; 362 bool PreserveLCSSA; 363 364 bool InnerLoopHasReduction; 365 }; 366 367 /// LoopInterchangeProfitability checks if it is profitable to interchange the 368 /// loop. 369 class LoopInterchangeProfitability { 370 public: 371 LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE) 372 : OuterLoop(Outer), InnerLoop(Inner), SE(SE) {} 373 374 /// Check if the loop interchange is profitable. 375 bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId, 376 CharMatrix &DepMatrix); 377 378 private: 379 int getInstrOrderCost(); 380 381 Loop *OuterLoop; 382 Loop *InnerLoop; 383 384 /// Scev analysis. 385 ScalarEvolution *SE; 386 }; 387 388 /// LoopInterchangeTransform interchanges the loop. 389 class LoopInterchangeTransform { 390 public: 391 LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE, 392 LoopInfo *LI, DominatorTree *DT, 393 BasicBlock *LoopNestExit, 394 bool InnerLoopContainsReductions) 395 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), 396 LoopExit(LoopNestExit), 397 InnerLoopHasReduction(InnerLoopContainsReductions) {} 398 399 /// Interchange OuterLoop and InnerLoop. 400 bool transform(); 401 void restructureLoops(Loop *InnerLoop, Loop *OuterLoop); 402 void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop); 403 404 private: 405 void splitInnerLoopLatch(Instruction *); 406 void splitOuterLoopLatch(); 407 void splitInnerLoopHeader(); 408 bool adjustLoopLinks(); 409 void adjustLoopPreheaders(); 410 void adjustOuterLoopPreheader(); 411 void adjustInnerLoopPreheader(); 412 bool adjustLoopBranches(); 413 void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred, 414 BasicBlock *NewPred); 415 416 Loop *OuterLoop; 417 Loop *InnerLoop; 418 419 /// Scev analysis. 420 ScalarEvolution *SE; 421 LoopInfo *LI; 422 DominatorTree *DT; 423 BasicBlock *LoopExit; 424 bool InnerLoopHasReduction; 425 }; 426 427 // Main LoopInterchange Pass. 428 struct LoopInterchange : public FunctionPass { 429 static char ID; 430 ScalarEvolution *SE; 431 LoopInfo *LI; 432 DependenceInfo *DI; 433 DominatorTree *DT; 434 bool PreserveLCSSA; 435 LoopInterchange() 436 : FunctionPass(ID), SE(nullptr), LI(nullptr), DI(nullptr), DT(nullptr) { 437 initializeLoopInterchangePass(*PassRegistry::getPassRegistry()); 438 } 439 440 void getAnalysisUsage(AnalysisUsage &AU) const override { 441 AU.addRequired<ScalarEvolutionWrapperPass>(); 442 AU.addRequired<AAResultsWrapperPass>(); 443 AU.addRequired<DominatorTreeWrapperPass>(); 444 AU.addRequired<LoopInfoWrapperPass>(); 445 AU.addRequired<DependenceAnalysisWrapperPass>(); 446 AU.addRequiredID(LoopSimplifyID); 447 AU.addRequiredID(LCSSAID); 448 } 449 450 bool runOnFunction(Function &F) override { 451 if (skipFunction(F)) 452 return false; 453 454 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 455 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 456 DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI(); 457 auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); 458 DT = DTWP ? &DTWP->getDomTree() : nullptr; 459 PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); 460 461 // Build up a worklist of loop pairs to analyze. 462 SmallVector<LoopVector, 8> Worklist; 463 464 for (Loop *L : *LI) 465 populateWorklist(*L, Worklist); 466 467 DEBUG(dbgs() << "Worklist size = " << Worklist.size() << "\n"); 468 bool Changed = true; 469 while (!Worklist.empty()) { 470 LoopVector LoopList = Worklist.pop_back_val(); 471 Changed = processLoopList(LoopList, F); 472 } 473 return Changed; 474 } 475 476 bool isComputableLoopNest(LoopVector LoopList) { 477 for (auto I = LoopList.begin(), E = LoopList.end(); I != E; ++I) { 478 Loop *L = *I; 479 const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L); 480 if (ExitCountOuter == SE->getCouldNotCompute()) { 481 DEBUG(dbgs() << "Couldn't compute Backedge count\n"); 482 return false; 483 } 484 if (L->getNumBackEdges() != 1) { 485 DEBUG(dbgs() << "NumBackEdges is not equal to 1\n"); 486 return false; 487 } 488 if (!L->getExitingBlock()) { 489 DEBUG(dbgs() << "Loop Doesn't have unique exit block\n"); 490 return false; 491 } 492 } 493 return true; 494 } 495 496 unsigned selectLoopForInterchange(LoopVector LoopList) { 497 // TODO: Add a better heuristic to select the loop to be interchanged based 498 // on the dependence matrix. Currently we select the innermost loop. 499 return LoopList.size() - 1; 500 } 501 502 bool processLoopList(LoopVector LoopList, Function &F) { 503 504 bool Changed = false; 505 CharMatrix DependencyMatrix; 506 if (LoopList.size() < 2) { 507 DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n"); 508 return false; 509 } 510 if (!isComputableLoopNest(LoopList)) { 511 DEBUG(dbgs() << "Not vaild loop candidate for interchange\n"); 512 return false; 513 } 514 Loop *OuterMostLoop = *(LoopList.begin()); 515 516 DEBUG(dbgs() << "Processing LoopList of size = " << LoopList.size() 517 << "\n"); 518 519 if (!populateDependencyMatrix(DependencyMatrix, LoopList.size(), 520 OuterMostLoop, DI)) { 521 DEBUG(dbgs() << "Populating Dependency matrix failed\n"); 522 return false; 523 } 524 #ifdef DUMP_DEP_MATRICIES 525 DEBUG(dbgs() << "Dependence before inter change \n"); 526 printDepMatrix(DependencyMatrix); 527 #endif 528 529 BasicBlock *OuterMostLoopLatch = OuterMostLoop->getLoopLatch(); 530 BranchInst *OuterMostLoopLatchBI = 531 dyn_cast<BranchInst>(OuterMostLoopLatch->getTerminator()); 532 if (!OuterMostLoopLatchBI) 533 return false; 534 535 // Since we currently do not handle LCSSA PHI's any failure in loop 536 // condition will now branch to LoopNestExit. 537 // TODO: This should be removed once we handle LCSSA PHI nodes. 538 539 // Get the Outermost loop exit. 540 BasicBlock *LoopNestExit; 541 if (OuterMostLoopLatchBI->getSuccessor(0) == OuterMostLoop->getHeader()) 542 LoopNestExit = OuterMostLoopLatchBI->getSuccessor(1); 543 else 544 LoopNestExit = OuterMostLoopLatchBI->getSuccessor(0); 545 546 if (isa<PHINode>(LoopNestExit->begin())) { 547 DEBUG(dbgs() << "PHI Nodes in loop nest exit is not handled for now " 548 "since on failure all loops branch to loop nest exit.\n"); 549 return false; 550 } 551 552 unsigned SelecLoopId = selectLoopForInterchange(LoopList); 553 // Move the selected loop outwards to the best possible position. 554 for (unsigned i = SelecLoopId; i > 0; i--) { 555 bool Interchanged = 556 processLoop(LoopList, i, i - 1, LoopNestExit, DependencyMatrix); 557 if (!Interchanged) 558 return Changed; 559 // Loops interchanged reflect the same in LoopList 560 std::swap(LoopList[i - 1], LoopList[i]); 561 562 // Update the DependencyMatrix 563 interChangeDepedencies(DependencyMatrix, i, i - 1); 564 DT->recalculate(F); 565 #ifdef DUMP_DEP_MATRICIES 566 DEBUG(dbgs() << "Dependence after inter change \n"); 567 printDepMatrix(DependencyMatrix); 568 #endif 569 Changed |= Interchanged; 570 } 571 return Changed; 572 } 573 574 bool processLoop(LoopVector LoopList, unsigned InnerLoopId, 575 unsigned OuterLoopId, BasicBlock *LoopNestExit, 576 std::vector<std::vector<char>> &DependencyMatrix) { 577 578 DEBUG(dbgs() << "Processing Innder Loop Id = " << InnerLoopId 579 << " and OuterLoopId = " << OuterLoopId << "\n"); 580 Loop *InnerLoop = LoopList[InnerLoopId]; 581 Loop *OuterLoop = LoopList[OuterLoopId]; 582 583 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, LI, DT, 584 PreserveLCSSA); 585 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) { 586 DEBUG(dbgs() << "Not interchanging Loops. Cannot prove legality\n"); 587 return false; 588 } 589 DEBUG(dbgs() << "Loops are legal to interchange\n"); 590 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE); 591 if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) { 592 DEBUG(dbgs() << "Interchanging Loops not profitable\n"); 593 return false; 594 } 595 596 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, 597 LoopNestExit, LIL.hasInnerLoopReduction()); 598 LIT.transform(); 599 DEBUG(dbgs() << "Loops interchanged\n"); 600 return true; 601 } 602 }; 603 604 } // end of namespace 605 bool LoopInterchangeLegality::areAllUsesReductions(Instruction *Ins, Loop *L) { 606 return !std::any_of(Ins->user_begin(), Ins->user_end(), [=](User *U) -> bool { 607 PHINode *UserIns = dyn_cast<PHINode>(U); 608 RecurrenceDescriptor RD; 609 return !UserIns || !RecurrenceDescriptor::isReductionPHI(UserIns, L, RD); 610 }); 611 } 612 613 bool LoopInterchangeLegality::containsUnsafeInstructionsInHeader( 614 BasicBlock *BB) { 615 for (auto I = BB->begin(), E = BB->end(); I != E; ++I) { 616 // Load corresponding to reduction PHI's are safe while concluding if 617 // tightly nested. 618 if (LoadInst *L = dyn_cast<LoadInst>(I)) { 619 if (!areAllUsesReductions(L, InnerLoop)) 620 return true; 621 } else if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 622 return true; 623 } 624 return false; 625 } 626 627 bool LoopInterchangeLegality::containsUnsafeInstructionsInLatch( 628 BasicBlock *BB) { 629 for (auto I = BB->begin(), E = BB->end(); I != E; ++I) { 630 // Stores corresponding to reductions are safe while concluding if tightly 631 // nested. 632 if (StoreInst *L = dyn_cast<StoreInst>(I)) { 633 PHINode *PHI = dyn_cast<PHINode>(L->getOperand(0)); 634 if (!PHI) 635 return true; 636 } else if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 637 return true; 638 } 639 return false; 640 } 641 642 bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) { 643 BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); 644 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); 645 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); 646 647 DEBUG(dbgs() << "Checking if Loops are Tightly Nested\n"); 648 649 // A perfectly nested loop will not have any branch in between the outer and 650 // inner block i.e. outer header will branch to either inner preheader and 651 // outerloop latch. 652 BranchInst *outerLoopHeaderBI = 653 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator()); 654 if (!outerLoopHeaderBI) 655 return false; 656 unsigned num = outerLoopHeaderBI->getNumSuccessors(); 657 for (unsigned i = 0; i < num; i++) { 658 if (outerLoopHeaderBI->getSuccessor(i) != InnerLoopPreHeader && 659 outerLoopHeaderBI->getSuccessor(i) != OuterLoopLatch) 660 return false; 661 } 662 663 DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch \n"); 664 // We do not have any basic block in between now make sure the outer header 665 // and outer loop latch doesn't contain any unsafe instructions. 666 if (containsUnsafeInstructionsInHeader(OuterLoopHeader) || 667 containsUnsafeInstructionsInLatch(OuterLoopLatch)) 668 return false; 669 670 DEBUG(dbgs() << "Loops are perfectly nested \n"); 671 // We have a perfect loop nest. 672 return true; 673 } 674 675 676 bool LoopInterchangeLegality::isLoopStructureUnderstood( 677 PHINode *InnerInduction) { 678 679 unsigned Num = InnerInduction->getNumOperands(); 680 BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader(); 681 for (unsigned i = 0; i < Num; ++i) { 682 Value *Val = InnerInduction->getOperand(i); 683 if (isa<Constant>(Val)) 684 continue; 685 Instruction *I = dyn_cast<Instruction>(Val); 686 if (!I) 687 return false; 688 // TODO: Handle triangular loops. 689 // e.g. for(int i=0;i<N;i++) 690 // for(int j=i;j<N;j++) 691 unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i); 692 if (InnerInduction->getIncomingBlock(IncomBlockIndx) == 693 InnerLoopPreheader && 694 !OuterLoop->isLoopInvariant(I)) { 695 return false; 696 } 697 } 698 return true; 699 } 700 701 bool LoopInterchangeLegality::findInductionAndReductions( 702 Loop *L, SmallVector<PHINode *, 8> &Inductions, 703 SmallVector<PHINode *, 8> &Reductions) { 704 if (!L->getLoopLatch() || !L->getLoopPredecessor()) 705 return false; 706 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 707 RecurrenceDescriptor RD; 708 InductionDescriptor ID; 709 PHINode *PHI = cast<PHINode>(I); 710 if (InductionDescriptor::isInductionPHI(PHI, SE, ID)) 711 Inductions.push_back(PHI); 712 else if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) 713 Reductions.push_back(PHI); 714 else { 715 DEBUG( 716 dbgs() << "Failed to recognize PHI as an induction or reduction.\n"); 717 return false; 718 } 719 } 720 return true; 721 } 722 723 static bool containsSafePHI(BasicBlock *Block, bool isOuterLoopExitBlock) { 724 for (auto I = Block->begin(); isa<PHINode>(I); ++I) { 725 PHINode *PHI = cast<PHINode>(I); 726 // Reduction lcssa phi will have only 1 incoming block that from loop latch. 727 if (PHI->getNumIncomingValues() > 1) 728 return false; 729 Instruction *Ins = dyn_cast<Instruction>(PHI->getIncomingValue(0)); 730 if (!Ins) 731 return false; 732 // Incoming value for lcssa phi's in outer loop exit can only be inner loop 733 // exits lcssa phi else it would not be tightly nested. 734 if (!isa<PHINode>(Ins) && isOuterLoopExitBlock) 735 return false; 736 } 737 return true; 738 } 739 740 static BasicBlock *getLoopLatchExitBlock(BasicBlock *LatchBlock, 741 BasicBlock *LoopHeader) { 742 if (BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator())) { 743 unsigned Num = BI->getNumSuccessors(); 744 assert(Num == 2); 745 for (unsigned i = 0; i < Num; ++i) { 746 if (BI->getSuccessor(i) == LoopHeader) 747 continue; 748 return BI->getSuccessor(i); 749 } 750 } 751 return nullptr; 752 } 753 754 // This function indicates the current limitations in the transform as a result 755 // of which we do not proceed. 756 bool LoopInterchangeLegality::currentLimitations() { 757 758 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); 759 BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); 760 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); 761 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); 762 BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); 763 764 PHINode *InnerInductionVar; 765 SmallVector<PHINode *, 8> Inductions; 766 SmallVector<PHINode *, 8> Reductions; 767 if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) 768 return true; 769 770 // TODO: Currently we handle only loops with 1 induction variable. 771 if (Inductions.size() != 1) { 772 DEBUG(dbgs() << "We currently only support loops with 1 induction variable." 773 << "Failed to interchange due to current limitation\n"); 774 return true; 775 } 776 if (Reductions.size() > 0) 777 InnerLoopHasReduction = true; 778 779 InnerInductionVar = Inductions.pop_back_val(); 780 Reductions.clear(); 781 if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) 782 return true; 783 784 // Outer loop cannot have reduction because then loops will not be tightly 785 // nested. 786 if (!Reductions.empty()) 787 return true; 788 // TODO: Currently we handle only loops with 1 induction variable. 789 if (Inductions.size() != 1) 790 return true; 791 792 // TODO: Triangular loops are not handled for now. 793 if (!isLoopStructureUnderstood(InnerInductionVar)) { 794 DEBUG(dbgs() << "Loop structure not understood by pass\n"); 795 return true; 796 } 797 798 // TODO: We only handle LCSSA PHI's corresponding to reduction for now. 799 BasicBlock *LoopExitBlock = 800 getLoopLatchExitBlock(OuterLoopLatch, OuterLoopHeader); 801 if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true)) 802 return true; 803 804 LoopExitBlock = getLoopLatchExitBlock(InnerLoopLatch, InnerLoopHeader); 805 if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false)) 806 return true; 807 808 // TODO: Current limitation: Since we split the inner loop latch at the point 809 // were induction variable is incremented (induction.next); We cannot have 810 // more than 1 user of induction.next since it would result in broken code 811 // after split. 812 // e.g. 813 // for(i=0;i<N;i++) { 814 // for(j = 0;j<M;j++) { 815 // A[j+1][i+2] = A[j][i]+k; 816 // } 817 // } 818 bool FoundInduction = false; 819 Instruction *InnerIndexVarInc = nullptr; 820 if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader) 821 InnerIndexVarInc = 822 dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(1)); 823 else 824 InnerIndexVarInc = 825 dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0)); 826 827 if (!InnerIndexVarInc) 828 return true; 829 830 // Since we split the inner loop latch on this induction variable. Make sure 831 // we do not have any instruction between the induction variable and branch 832 // instruction. 833 834 for (auto I = InnerLoopLatch->rbegin(), E = InnerLoopLatch->rend(); 835 I != E && !FoundInduction; ++I) { 836 if (isa<BranchInst>(*I) || isa<CmpInst>(*I) || isa<TruncInst>(*I)) 837 continue; 838 const Instruction &Ins = *I; 839 // We found an instruction. If this is not induction variable then it is not 840 // safe to split this loop latch. 841 if (!Ins.isIdenticalTo(InnerIndexVarInc)) 842 return true; 843 else 844 FoundInduction = true; 845 } 846 // The loop latch ended and we didn't find the induction variable return as 847 // current limitation. 848 if (!FoundInduction) 849 return true; 850 851 return false; 852 } 853 854 bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, 855 unsigned OuterLoopId, 856 CharMatrix &DepMatrix) { 857 858 if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) { 859 DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId 860 << "and OuterLoopId = " << OuterLoopId 861 << "due to dependence\n"); 862 return false; 863 } 864 865 // Create unique Preheaders if we already do not have one. 866 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); 867 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); 868 869 // Create a unique outer preheader - 870 // 1) If OuterLoop preheader is not present. 871 // 2) If OuterLoop Preheader is same as OuterLoop Header 872 // 3) If OuterLoop Preheader is same as Header of the previous loop. 873 // 4) If OuterLoop Preheader is Entry node. 874 if (!OuterLoopPreHeader || OuterLoopPreHeader == OuterLoop->getHeader() || 875 isa<PHINode>(OuterLoopPreHeader->begin()) || 876 !OuterLoopPreHeader->getUniquePredecessor()) { 877 OuterLoopPreHeader = 878 InsertPreheaderForLoop(OuterLoop, DT, LI, PreserveLCSSA); 879 } 880 881 if (!InnerLoopPreHeader || InnerLoopPreHeader == InnerLoop->getHeader() || 882 InnerLoopPreHeader == OuterLoop->getHeader()) { 883 InnerLoopPreHeader = 884 InsertPreheaderForLoop(InnerLoop, DT, LI, PreserveLCSSA); 885 } 886 887 // TODO: The loops could not be interchanged due to current limitations in the 888 // transform module. 889 if (currentLimitations()) { 890 DEBUG(dbgs() << "Not legal because of current transform limitation\n"); 891 return false; 892 } 893 894 // Check if the loops are tightly nested. 895 if (!tightlyNested(OuterLoop, InnerLoop)) { 896 DEBUG(dbgs() << "Loops not tightly nested\n"); 897 return false; 898 } 899 900 return true; 901 } 902 903 int LoopInterchangeProfitability::getInstrOrderCost() { 904 unsigned GoodOrder, BadOrder; 905 BadOrder = GoodOrder = 0; 906 for (auto BI = InnerLoop->block_begin(), BE = InnerLoop->block_end(); 907 BI != BE; ++BI) { 908 for (auto I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I) { 909 const Instruction &Ins = *I; 910 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) { 911 unsigned NumOp = GEP->getNumOperands(); 912 bool FoundInnerInduction = false; 913 bool FoundOuterInduction = false; 914 for (unsigned i = 0; i < NumOp; ++i) { 915 const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i)); 916 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal); 917 if (!AR) 918 continue; 919 920 // If we find the inner induction after an outer induction e.g. 921 // for(int i=0;i<N;i++) 922 // for(int j=0;j<N;j++) 923 // A[i][j] = A[i-1][j-1]+k; 924 // then it is a good order. 925 if (AR->getLoop() == InnerLoop) { 926 // We found an InnerLoop induction after OuterLoop induction. It is 927 // a good order. 928 FoundInnerInduction = true; 929 if (FoundOuterInduction) { 930 GoodOrder++; 931 break; 932 } 933 } 934 // If we find the outer induction after an inner induction e.g. 935 // for(int i=0;i<N;i++) 936 // for(int j=0;j<N;j++) 937 // A[j][i] = A[j-1][i-1]+k; 938 // then it is a bad order. 939 if (AR->getLoop() == OuterLoop) { 940 // We found an OuterLoop induction after InnerLoop induction. It is 941 // a bad order. 942 FoundOuterInduction = true; 943 if (FoundInnerInduction) { 944 BadOrder++; 945 break; 946 } 947 } 948 } 949 } 950 } 951 } 952 return GoodOrder - BadOrder; 953 } 954 955 static bool isProfitabileForVectorization(unsigned InnerLoopId, 956 unsigned OuterLoopId, 957 CharMatrix &DepMatrix) { 958 // TODO: Improve this heuristic to catch more cases. 959 // If the inner loop is loop independent or doesn't carry any dependency it is 960 // profitable to move this to outer position. 961 unsigned Row = DepMatrix.size(); 962 for (unsigned i = 0; i < Row; ++i) { 963 if (DepMatrix[i][InnerLoopId] != 'S' && DepMatrix[i][InnerLoopId] != 'I') 964 return false; 965 // TODO: We need to improve this heuristic. 966 if (DepMatrix[i][OuterLoopId] != '=') 967 return false; 968 } 969 // If outer loop has dependence and inner loop is loop independent then it is 970 // profitable to interchange to enable parallelism. 971 return true; 972 } 973 974 bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId, 975 unsigned OuterLoopId, 976 CharMatrix &DepMatrix) { 977 978 // TODO: Add better profitability checks. 979 // e.g 980 // 1) Construct dependency matrix and move the one with no loop carried dep 981 // inside to enable vectorization. 982 983 // This is rough cost estimation algorithm. It counts the good and bad order 984 // of induction variables in the instruction and allows reordering if number 985 // of bad orders is more than good. 986 int Cost = 0; 987 Cost += getInstrOrderCost(); 988 DEBUG(dbgs() << "Cost = " << Cost << "\n"); 989 if (Cost < 0) 990 return true; 991 992 // It is not profitable as per current cache profitability model. But check if 993 // we can move this loop outside to improve parallelism. 994 bool ImprovesPar = 995 isProfitabileForVectorization(InnerLoopId, OuterLoopId, DepMatrix); 996 return ImprovesPar; 997 } 998 999 void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop, 1000 Loop *InnerLoop) { 1001 for (Loop::iterator I = OuterLoop->begin(), E = OuterLoop->end(); I != E; 1002 ++I) { 1003 if (*I == InnerLoop) { 1004 OuterLoop->removeChildLoop(I); 1005 return; 1006 } 1007 } 1008 llvm_unreachable("Couldn't find loop"); 1009 } 1010 1011 void LoopInterchangeTransform::restructureLoops(Loop *InnerLoop, 1012 Loop *OuterLoop) { 1013 Loop *OuterLoopParent = OuterLoop->getParentLoop(); 1014 if (OuterLoopParent) { 1015 // Remove the loop from its parent loop. 1016 removeChildLoop(OuterLoopParent, OuterLoop); 1017 removeChildLoop(OuterLoop, InnerLoop); 1018 OuterLoopParent->addChildLoop(InnerLoop); 1019 } else { 1020 removeChildLoop(OuterLoop, InnerLoop); 1021 LI->changeTopLevelLoop(OuterLoop, InnerLoop); 1022 } 1023 1024 while (!InnerLoop->empty()) 1025 OuterLoop->addChildLoop(InnerLoop->removeChildLoop(InnerLoop->begin())); 1026 1027 InnerLoop->addChildLoop(OuterLoop); 1028 } 1029 1030 bool LoopInterchangeTransform::transform() { 1031 1032 DEBUG(dbgs() << "transform\n"); 1033 bool Transformed = false; 1034 Instruction *InnerIndexVar; 1035 1036 if (InnerLoop->getSubLoops().size() == 0) { 1037 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); 1038 DEBUG(dbgs() << "Calling Split Inner Loop\n"); 1039 PHINode *InductionPHI = getInductionVariable(InnerLoop, SE); 1040 if (!InductionPHI) { 1041 DEBUG(dbgs() << "Failed to find the point to split loop latch \n"); 1042 return false; 1043 } 1044 1045 if (InductionPHI->getIncomingBlock(0) == InnerLoopPreHeader) 1046 InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(1)); 1047 else 1048 InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(0)); 1049 1050 // 1051 // Split at the place were the induction variable is 1052 // incremented/decremented. 1053 // TODO: This splitting logic may not work always. Fix this. 1054 splitInnerLoopLatch(InnerIndexVar); 1055 DEBUG(dbgs() << "splitInnerLoopLatch Done\n"); 1056 1057 // Splits the inner loops phi nodes out into a separate basic block. 1058 splitInnerLoopHeader(); 1059 DEBUG(dbgs() << "splitInnerLoopHeader Done\n"); 1060 } 1061 1062 Transformed |= adjustLoopLinks(); 1063 if (!Transformed) { 1064 DEBUG(dbgs() << "adjustLoopLinks Failed\n"); 1065 return false; 1066 } 1067 1068 restructureLoops(InnerLoop, OuterLoop); 1069 return true; 1070 } 1071 1072 void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) { 1073 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); 1074 BasicBlock *InnerLoopLatchPred = InnerLoopLatch; 1075 InnerLoopLatch = SplitBlock(InnerLoopLatchPred, Inc, DT, LI); 1076 } 1077 1078 void LoopInterchangeTransform::splitOuterLoopLatch() { 1079 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); 1080 BasicBlock *OuterLatchLcssaPhiBlock = OuterLoopLatch; 1081 OuterLoopLatch = SplitBlock(OuterLatchLcssaPhiBlock, 1082 OuterLoopLatch->getFirstNonPHI(), DT, LI); 1083 } 1084 1085 void LoopInterchangeTransform::splitInnerLoopHeader() { 1086 1087 // Split the inner loop header out. Here make sure that the reduction PHI's 1088 // stay in the innerloop body. 1089 BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); 1090 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); 1091 if (InnerLoopHasReduction) { 1092 // FIXME: Check if the induction PHI will always be the first PHI. 1093 BasicBlock *New = InnerLoopHeader->splitBasicBlock( 1094 ++(InnerLoopHeader->begin()), InnerLoopHeader->getName() + ".split"); 1095 if (LI) 1096 if (Loop *L = LI->getLoopFor(InnerLoopHeader)) 1097 L->addBasicBlockToLoop(New, *LI); 1098 1099 // Adjust Reduction PHI's in the block. 1100 SmallVector<PHINode *, 8> PHIVec; 1101 for (auto I = New->begin(); isa<PHINode>(I); ++I) { 1102 PHINode *PHI = dyn_cast<PHINode>(I); 1103 Value *V = PHI->getIncomingValueForBlock(InnerLoopPreHeader); 1104 PHI->replaceAllUsesWith(V); 1105 PHIVec.push_back((PHI)); 1106 } 1107 for (auto I = PHIVec.begin(), E = PHIVec.end(); I != E; ++I) { 1108 PHINode *P = *I; 1109 P->eraseFromParent(); 1110 } 1111 } else { 1112 SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI); 1113 } 1114 1115 DEBUG(dbgs() << "Output of splitInnerLoopHeader InnerLoopHeaderSucc & " 1116 "InnerLoopHeader \n"); 1117 } 1118 1119 /// \brief Move all instructions except the terminator from FromBB right before 1120 /// InsertBefore 1121 static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) { 1122 auto &ToList = InsertBefore->getParent()->getInstList(); 1123 auto &FromList = FromBB->getInstList(); 1124 1125 ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(), 1126 FromBB->getTerminator()->getIterator()); 1127 } 1128 1129 void LoopInterchangeTransform::adjustOuterLoopPreheader() { 1130 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); 1131 BasicBlock *InnerPreHeader = InnerLoop->getLoopPreheader(); 1132 1133 moveBBContents(OuterLoopPreHeader, InnerPreHeader->getTerminator()); 1134 } 1135 1136 void LoopInterchangeTransform::adjustInnerLoopPreheader() { 1137 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); 1138 BasicBlock *OuterHeader = OuterLoop->getHeader(); 1139 1140 moveBBContents(InnerLoopPreHeader, OuterHeader->getTerminator()); 1141 } 1142 1143 void LoopInterchangeTransform::updateIncomingBlock(BasicBlock *CurrBlock, 1144 BasicBlock *OldPred, 1145 BasicBlock *NewPred) { 1146 for (auto I = CurrBlock->begin(); isa<PHINode>(I); ++I) { 1147 PHINode *PHI = cast<PHINode>(I); 1148 unsigned Num = PHI->getNumIncomingValues(); 1149 for (unsigned i = 0; i < Num; ++i) { 1150 if (PHI->getIncomingBlock(i) == OldPred) 1151 PHI->setIncomingBlock(i, NewPred); 1152 } 1153 } 1154 } 1155 1156 bool LoopInterchangeTransform::adjustLoopBranches() { 1157 1158 DEBUG(dbgs() << "adjustLoopBranches called\n"); 1159 // Adjust the loop preheader 1160 BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); 1161 BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); 1162 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); 1163 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); 1164 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); 1165 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); 1166 BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor(); 1167 BasicBlock *InnerLoopLatchPredecessor = 1168 InnerLoopLatch->getUniquePredecessor(); 1169 BasicBlock *InnerLoopLatchSuccessor; 1170 BasicBlock *OuterLoopLatchSuccessor; 1171 1172 BranchInst *OuterLoopLatchBI = 1173 dyn_cast<BranchInst>(OuterLoopLatch->getTerminator()); 1174 BranchInst *InnerLoopLatchBI = 1175 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator()); 1176 BranchInst *OuterLoopHeaderBI = 1177 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator()); 1178 BranchInst *InnerLoopHeaderBI = 1179 dyn_cast<BranchInst>(InnerLoopHeader->getTerminator()); 1180 1181 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor || 1182 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI || 1183 !InnerLoopHeaderBI) 1184 return false; 1185 1186 BranchInst *InnerLoopLatchPredecessorBI = 1187 dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator()); 1188 BranchInst *OuterLoopPredecessorBI = 1189 dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator()); 1190 1191 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI) 1192 return false; 1193 BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor(); 1194 if (!InnerLoopHeaderSuccessor) 1195 return false; 1196 1197 // Adjust Loop Preheader and headers 1198 1199 unsigned NumSucc = OuterLoopPredecessorBI->getNumSuccessors(); 1200 for (unsigned i = 0; i < NumSucc; ++i) { 1201 if (OuterLoopPredecessorBI->getSuccessor(i) == OuterLoopPreHeader) 1202 OuterLoopPredecessorBI->setSuccessor(i, InnerLoopPreHeader); 1203 } 1204 1205 NumSucc = OuterLoopHeaderBI->getNumSuccessors(); 1206 for (unsigned i = 0; i < NumSucc; ++i) { 1207 if (OuterLoopHeaderBI->getSuccessor(i) == OuterLoopLatch) 1208 OuterLoopHeaderBI->setSuccessor(i, LoopExit); 1209 else if (OuterLoopHeaderBI->getSuccessor(i) == InnerLoopPreHeader) 1210 OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSuccessor); 1211 } 1212 1213 // Adjust reduction PHI's now that the incoming block has changed. 1214 updateIncomingBlock(InnerLoopHeaderSuccessor, InnerLoopHeader, 1215 OuterLoopHeader); 1216 1217 BranchInst::Create(OuterLoopPreHeader, InnerLoopHeaderBI); 1218 InnerLoopHeaderBI->eraseFromParent(); 1219 1220 // -------------Adjust loop latches----------- 1221 if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader) 1222 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1); 1223 else 1224 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0); 1225 1226 NumSucc = InnerLoopLatchPredecessorBI->getNumSuccessors(); 1227 for (unsigned i = 0; i < NumSucc; ++i) { 1228 if (InnerLoopLatchPredecessorBI->getSuccessor(i) == InnerLoopLatch) 1229 InnerLoopLatchPredecessorBI->setSuccessor(i, InnerLoopLatchSuccessor); 1230 } 1231 1232 // Adjust PHI nodes in InnerLoopLatchSuccessor. Update all uses of PHI with 1233 // the value and remove this PHI node from inner loop. 1234 SmallVector<PHINode *, 8> LcssaVec; 1235 for (auto I = InnerLoopLatchSuccessor->begin(); isa<PHINode>(I); ++I) { 1236 PHINode *LcssaPhi = cast<PHINode>(I); 1237 LcssaVec.push_back(LcssaPhi); 1238 } 1239 for (auto I = LcssaVec.begin(), E = LcssaVec.end(); I != E; ++I) { 1240 PHINode *P = *I; 1241 Value *Incoming = P->getIncomingValueForBlock(InnerLoopLatch); 1242 P->replaceAllUsesWith(Incoming); 1243 P->eraseFromParent(); 1244 } 1245 1246 if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader) 1247 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1); 1248 else 1249 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0); 1250 1251 if (InnerLoopLatchBI->getSuccessor(1) == InnerLoopLatchSuccessor) 1252 InnerLoopLatchBI->setSuccessor(1, OuterLoopLatchSuccessor); 1253 else 1254 InnerLoopLatchBI->setSuccessor(0, OuterLoopLatchSuccessor); 1255 1256 updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch); 1257 1258 if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopLatchSuccessor) { 1259 OuterLoopLatchBI->setSuccessor(0, InnerLoopLatch); 1260 } else { 1261 OuterLoopLatchBI->setSuccessor(1, InnerLoopLatch); 1262 } 1263 1264 return true; 1265 } 1266 void LoopInterchangeTransform::adjustLoopPreheaders() { 1267 1268 // We have interchanged the preheaders so we need to interchange the data in 1269 // the preheader as well. 1270 // This is because the content of inner preheader was previously executed 1271 // inside the outer loop. 1272 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); 1273 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); 1274 BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); 1275 BranchInst *InnerTermBI = 1276 cast<BranchInst>(InnerLoopPreHeader->getTerminator()); 1277 1278 // These instructions should now be executed inside the loop. 1279 // Move instruction into a new block after outer header. 1280 moveBBContents(InnerLoopPreHeader, OuterLoopHeader->getTerminator()); 1281 // These instructions were not executed previously in the loop so move them to 1282 // the older inner loop preheader. 1283 moveBBContents(OuterLoopPreHeader, InnerTermBI); 1284 } 1285 1286 bool LoopInterchangeTransform::adjustLoopLinks() { 1287 1288 // Adjust all branches in the inner and outer loop. 1289 bool Changed = adjustLoopBranches(); 1290 if (Changed) 1291 adjustLoopPreheaders(); 1292 return Changed; 1293 } 1294 1295 char LoopInterchange::ID = 0; 1296 INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange", 1297 "Interchanges loops for cache reuse", false, false) 1298 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 1299 INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass) 1300 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 1301 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 1302 INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 1303 INITIALIZE_PASS_DEPENDENCY(LCSSA) 1304 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 1305 1306 INITIALIZE_PASS_END(LoopInterchange, "loop-interchange", 1307 "Interchanges loops for cache reuse", false, false) 1308 1309 Pass *llvm::createLoopInterchangePass() { return new LoopInterchange(); } 1310