1 //===------ PPCLoopPreIncPrep.cpp - Loop Pre-Inc. AM Prep. Pass -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a pass to prepare loops for pre-increment addressing 11 // modes. Additional PHIs are created for loop induction variables used by 12 // load/store instructions so that the pre-increment forms can be used. 13 // Generically, this means transforming loops like this: 14 // for (int i = 0; i < n; ++i) 15 // array[i] = c; 16 // to look like this: 17 // T *p = array[-1]; 18 // for (int i = 0; i < n; ++i) 19 // *++p = c; 20 //===----------------------------------------------------------------------===// 21 22 #define DEBUG_TYPE "ppc-loop-preinc-prep" 23 24 #include "PPC.h" 25 #include "PPCSubtarget.h" 26 #include "PPCTargetMachine.h" 27 #include "llvm/ADT/DepthFirstIterator.h" 28 #include "llvm/ADT/SmallPtrSet.h" 29 #include "llvm/ADT/SmallSet.h" 30 #include "llvm/ADT/SmallVector.h" 31 #include "llvm/ADT/Statistic.h" 32 #include "llvm/Analysis/LoopInfo.h" 33 #include "llvm/Analysis/ScalarEvolution.h" 34 #include "llvm/Analysis/ScalarEvolutionExpander.h" 35 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 36 #include "llvm/IR/BasicBlock.h" 37 #include "llvm/IR/CFG.h" 38 #include "llvm/IR/Dominators.h" 39 #include "llvm/IR/Instruction.h" 40 #include "llvm/IR/Instructions.h" 41 #include "llvm/IR/IntrinsicInst.h" 42 #include "llvm/IR/Module.h" 43 #include "llvm/IR/Type.h" 44 #include "llvm/IR/Value.h" 45 #include "llvm/Pass.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/CommandLine.h" 48 #include "llvm/Support/Debug.h" 49 #include "llvm/Transforms/Scalar.h" 50 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 51 #include "llvm/Transforms/Utils/Local.h" 52 #include "llvm/Transforms/Utils/LoopUtils.h" 53 #include <cassert> 54 #include <iterator> 55 #include <utility> 56 57 using namespace llvm; 58 59 // By default, we limit this to creating 16 PHIs (which is a little over half 60 // of the allocatable register set). 61 static cl::opt<unsigned> MaxVars("ppc-preinc-prep-max-vars", 62 cl::Hidden, cl::init(16), 63 cl::desc("Potential PHI threshold for PPC preinc loop prep")); 64 65 STATISTIC(PHINodeAlreadyExists, "PHI node already in pre-increment form"); 66 67 namespace llvm { 68 69 void initializePPCLoopPreIncPrepPass(PassRegistry&); 70 71 } // end namespace llvm 72 73 namespace { 74 75 class PPCLoopPreIncPrep : public FunctionPass { 76 public: 77 static char ID; // Pass ID, replacement for typeid 78 79 PPCLoopPreIncPrep() : FunctionPass(ID) { 80 initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry()); 81 } 82 83 PPCLoopPreIncPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) { 84 initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry()); 85 } 86 87 void getAnalysisUsage(AnalysisUsage &AU) const override { 88 AU.addPreserved<DominatorTreeWrapperPass>(); 89 AU.addRequired<LoopInfoWrapperPass>(); 90 AU.addPreserved<LoopInfoWrapperPass>(); 91 AU.addRequired<ScalarEvolutionWrapperPass>(); 92 } 93 94 bool alreadyPrepared(Loop *L, Instruction* MemI, 95 const SCEV *BasePtrStartSCEV, 96 const SCEVConstant *BasePtrIncSCEV); 97 bool runOnFunction(Function &F) override; 98 99 bool runOnLoop(Loop *L); 100 void simplifyLoopLatch(Loop *L); 101 bool rotateLoop(Loop *L); 102 103 private: 104 PPCTargetMachine *TM = nullptr; 105 DominatorTree *DT; 106 LoopInfo *LI; 107 ScalarEvolution *SE; 108 bool PreserveLCSSA; 109 }; 110 111 } // end anonymous namespace 112 113 char PPCLoopPreIncPrep::ID = 0; 114 static const char *name = "Prepare loop for pre-inc. addressing modes"; 115 INITIALIZE_PASS_BEGIN(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false) 116 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 117 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 118 INITIALIZE_PASS_END(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false) 119 120 FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) { 121 return new PPCLoopPreIncPrep(TM); 122 } 123 124 namespace { 125 126 struct BucketElement { 127 BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {} 128 BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {} 129 130 const SCEVConstant *Offset; 131 Instruction *Instr; 132 }; 133 134 struct Bucket { 135 Bucket(const SCEV *B, Instruction *I) : BaseSCEV(B), 136 Elements(1, BucketElement(I)) {} 137 138 const SCEV *BaseSCEV; 139 SmallVector<BucketElement, 16> Elements; 140 }; 141 142 } // end anonymous namespace 143 144 static bool IsPtrInBounds(Value *BasePtr) { 145 Value *StrippedBasePtr = BasePtr; 146 while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBasePtr)) 147 StrippedBasePtr = BC->getOperand(0); 148 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(StrippedBasePtr)) 149 return GEP->isInBounds(); 150 151 return false; 152 } 153 154 static Value *GetPointerOperand(Value *MemI) { 155 if (LoadInst *LMemI = dyn_cast<LoadInst>(MemI)) { 156 return LMemI->getPointerOperand(); 157 } else if (StoreInst *SMemI = dyn_cast<StoreInst>(MemI)) { 158 return SMemI->getPointerOperand(); 159 } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(MemI)) { 160 if (IMemI->getIntrinsicID() == Intrinsic::prefetch) 161 return IMemI->getArgOperand(0); 162 } 163 164 return nullptr; 165 } 166 167 bool PPCLoopPreIncPrep::runOnFunction(Function &F) { 168 if (skipFunction(F)) 169 return false; 170 171 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 172 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 173 auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); 174 DT = DTWP ? &DTWP->getDomTree() : nullptr; 175 PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); 176 177 bool MadeChange = false; 178 179 for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I) 180 for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L) 181 MadeChange |= runOnLoop(*L); 182 183 return MadeChange; 184 } 185 186 // In order to prepare for the pre-increment a PHI is added. 187 // This function will check to see if that PHI already exists and will return 188 // true if it found an existing PHI with the same start and increment as the 189 // one we wanted to create. 190 bool PPCLoopPreIncPrep::alreadyPrepared(Loop *L, Instruction* MemI, 191 const SCEV *BasePtrStartSCEV, 192 const SCEVConstant *BasePtrIncSCEV) { 193 BasicBlock *BB = MemI->getParent(); 194 if (!BB) 195 return false; 196 197 BasicBlock *PredBB = L->getLoopPredecessor(); 198 BasicBlock *LatchBB = L->getLoopLatch(); 199 200 if (!PredBB || !LatchBB) 201 return false; 202 203 // Run through the PHIs and see if we have some that looks like a preparation 204 iterator_range<BasicBlock::phi_iterator> PHIIter = BB->phis(); 205 for (auto & CurrentPHI : PHIIter) { 206 PHINode *CurrentPHINode = dyn_cast<PHINode>(&CurrentPHI); 207 if (!CurrentPHINode) 208 continue; 209 210 if (!SE->isSCEVable(CurrentPHINode->getType())) 211 continue; 212 213 const SCEV *PHISCEV = SE->getSCEVAtScope(CurrentPHINode, L); 214 215 const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(PHISCEV); 216 if (!PHIBasePtrSCEV) 217 continue; 218 219 const SCEVConstant *PHIBasePtrIncSCEV = 220 dyn_cast<SCEVConstant>(PHIBasePtrSCEV->getStepRecurrence(*SE)); 221 if (!PHIBasePtrIncSCEV) 222 continue; 223 224 if (CurrentPHINode->getNumIncomingValues() == 2) { 225 if ( (CurrentPHINode->getIncomingBlock(0) == LatchBB && 226 CurrentPHINode->getIncomingBlock(1) == PredBB) || 227 (CurrentPHINode->getIncomingBlock(1) == LatchBB && 228 CurrentPHINode->getIncomingBlock(0) == PredBB) ) { 229 if (PHIBasePtrSCEV->getStart() == BasePtrStartSCEV && 230 PHIBasePtrIncSCEV == BasePtrIncSCEV) { 231 // The existing PHI (CurrentPHINode) has the same start and increment 232 // as the PHI that we wanted to create. 233 ++PHINodeAlreadyExists; 234 return true; 235 } 236 } 237 } 238 } 239 return false; 240 } 241 242 bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { 243 bool MadeChange = false; 244 245 // Only prep. the inner-most loop 246 if (!L->empty()) 247 return MadeChange; 248 249 DEBUG(dbgs() << "PIP: Examining: " << *L << "\n"); 250 251 BasicBlock *Header = L->getHeader(); 252 253 const PPCSubtarget *ST = 254 TM ? TM->getSubtargetImpl(*Header->getParent()) : nullptr; 255 256 unsigned HeaderLoopPredCount = 257 std::distance(pred_begin(Header), pred_end(Header)); 258 259 // Collect buckets of comparable addresses used by loads and stores. 260 SmallVector<Bucket, 16> Buckets; 261 for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); 262 I != IE; ++I) { 263 for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); 264 J != JE; ++J) { 265 Value *PtrValue; 266 Instruction *MemI; 267 268 if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) { 269 MemI = LMemI; 270 PtrValue = LMemI->getPointerOperand(); 271 } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) { 272 MemI = SMemI; 273 PtrValue = SMemI->getPointerOperand(); 274 } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(J)) { 275 if (IMemI->getIntrinsicID() == Intrinsic::prefetch) { 276 MemI = IMemI; 277 PtrValue = IMemI->getArgOperand(0); 278 } else continue; 279 } else continue; 280 281 unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace(); 282 if (PtrAddrSpace) 283 continue; 284 285 // There are no update forms for Altivec vector load/stores. 286 if (ST && ST->hasAltivec() && 287 PtrValue->getType()->getPointerElementType()->isVectorTy()) 288 continue; 289 290 if (L->isLoopInvariant(PtrValue)) 291 continue; 292 293 const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L); 294 if (const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV)) { 295 if (LARSCEV->getLoop() != L) 296 continue; 297 } else { 298 continue; 299 } 300 301 bool FoundBucket = false; 302 for (auto &B : Buckets) { 303 const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV); 304 if (const auto *CDiff = dyn_cast<SCEVConstant>(Diff)) { 305 B.Elements.push_back(BucketElement(CDiff, MemI)); 306 FoundBucket = true; 307 break; 308 } 309 } 310 311 if (!FoundBucket) { 312 if (Buckets.size() == MaxVars) 313 return MadeChange; 314 Buckets.push_back(Bucket(LSCEV, MemI)); 315 } 316 } 317 } 318 319 if (Buckets.empty()) 320 return MadeChange; 321 322 BasicBlock *LoopPredecessor = L->getLoopPredecessor(); 323 // If there is no loop predecessor, or the loop predecessor's terminator 324 // returns a value (which might contribute to determining the loop's 325 // iteration space), insert a new preheader for the loop. 326 if (!LoopPredecessor || 327 !LoopPredecessor->getTerminator()->getType()->isVoidTy()) { 328 LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); 329 if (LoopPredecessor) 330 MadeChange = true; 331 } 332 if (!LoopPredecessor) 333 return MadeChange; 334 335 DEBUG(dbgs() << "PIP: Found " << Buckets.size() << " buckets\n"); 336 337 SmallSet<BasicBlock *, 16> BBChanged; 338 for (unsigned i = 0, e = Buckets.size(); i != e; ++i) { 339 // The base address of each bucket is transformed into a phi and the others 340 // are rewritten as offsets of that variable. 341 342 // We have a choice now of which instruction's memory operand we use as the 343 // base for the generated PHI. Always picking the first instruction in each 344 // bucket does not work well, specifically because that instruction might 345 // be a prefetch (and there are no pre-increment dcbt variants). Otherwise, 346 // the choice is somewhat arbitrary, because the backend will happily 347 // generate direct offsets from both the pre-incremented and 348 // post-incremented pointer values. Thus, we'll pick the first non-prefetch 349 // instruction in each bucket, and adjust the recurrence and other offsets 350 // accordingly. 351 for (int j = 0, je = Buckets[i].Elements.size(); j != je; ++j) { 352 if (auto *II = dyn_cast<IntrinsicInst>(Buckets[i].Elements[j].Instr)) 353 if (II->getIntrinsicID() == Intrinsic::prefetch) 354 continue; 355 356 // If we'd otherwise pick the first element anyway, there's nothing to do. 357 if (j == 0) 358 break; 359 360 // If our chosen element has no offset from the base pointer, there's 361 // nothing to do. 362 if (!Buckets[i].Elements[j].Offset || 363 Buckets[i].Elements[j].Offset->isZero()) 364 break; 365 366 const SCEV *Offset = Buckets[i].Elements[j].Offset; 367 Buckets[i].BaseSCEV = SE->getAddExpr(Buckets[i].BaseSCEV, Offset); 368 for (auto &E : Buckets[i].Elements) { 369 if (E.Offset) 370 E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset)); 371 else 372 E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset)); 373 } 374 375 std::swap(Buckets[i].Elements[j], Buckets[i].Elements[0]); 376 break; 377 } 378 379 const SCEVAddRecExpr *BasePtrSCEV = 380 cast<SCEVAddRecExpr>(Buckets[i].BaseSCEV); 381 if (!BasePtrSCEV->isAffine()) 382 continue; 383 384 DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n"); 385 assert(BasePtrSCEV->getLoop() == L && 386 "AddRec for the wrong loop?"); 387 388 // The instruction corresponding to the Bucket's BaseSCEV must be the first 389 // in the vector of elements. 390 Instruction *MemI = Buckets[i].Elements.begin()->Instr; 391 Value *BasePtr = GetPointerOperand(MemI); 392 assert(BasePtr && "No pointer operand"); 393 394 Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext()); 395 Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(), 396 BasePtr->getType()->getPointerAddressSpace()); 397 398 const SCEV *BasePtrStartSCEV = BasePtrSCEV->getStart(); 399 if (!SE->isLoopInvariant(BasePtrStartSCEV, L)) 400 continue; 401 402 const SCEVConstant *BasePtrIncSCEV = 403 dyn_cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE)); 404 if (!BasePtrIncSCEV) 405 continue; 406 BasePtrStartSCEV = SE->getMinusSCEV(BasePtrStartSCEV, BasePtrIncSCEV); 407 if (!isSafeToExpand(BasePtrStartSCEV, *SE)) 408 continue; 409 410 DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n"); 411 412 if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV)) 413 continue; 414 415 PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount, 416 MemI->hasName() ? MemI->getName() + ".phi" : "", 417 Header->getFirstNonPHI()); 418 419 SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart"); 420 Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy, 421 LoopPredecessor->getTerminator()); 422 423 // Note that LoopPredecessor might occur in the predecessor list multiple 424 // times, and we need to add it the right number of times. 425 for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); 426 PI != PE; ++PI) { 427 if (*PI != LoopPredecessor) 428 continue; 429 430 NewPHI->addIncoming(BasePtrStart, LoopPredecessor); 431 } 432 433 Instruction *InsPoint = &*Header->getFirstInsertionPt(); 434 GetElementPtrInst *PtrInc = GetElementPtrInst::Create( 435 I8Ty, NewPHI, BasePtrIncSCEV->getValue(), 436 MemI->hasName() ? MemI->getName() + ".inc" : "", InsPoint); 437 PtrInc->setIsInBounds(IsPtrInBounds(BasePtr)); 438 for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); 439 PI != PE; ++PI) { 440 if (*PI == LoopPredecessor) 441 continue; 442 443 NewPHI->addIncoming(PtrInc, *PI); 444 } 445 446 Instruction *NewBasePtr; 447 if (PtrInc->getType() != BasePtr->getType()) 448 NewBasePtr = new BitCastInst(PtrInc, BasePtr->getType(), 449 PtrInc->hasName() ? PtrInc->getName() + ".cast" : "", InsPoint); 450 else 451 NewBasePtr = PtrInc; 452 453 if (Instruction *IDel = dyn_cast<Instruction>(BasePtr)) 454 BBChanged.insert(IDel->getParent()); 455 BasePtr->replaceAllUsesWith(NewBasePtr); 456 RecursivelyDeleteTriviallyDeadInstructions(BasePtr); 457 458 // Keep track of the replacement pointer values we've inserted so that we 459 // don't generate more pointer values than necessary. 460 SmallPtrSet<Value *, 16> NewPtrs; 461 NewPtrs.insert( NewBasePtr); 462 463 for (auto I = std::next(Buckets[i].Elements.begin()), 464 IE = Buckets[i].Elements.end(); I != IE; ++I) { 465 Value *Ptr = GetPointerOperand(I->Instr); 466 assert(Ptr && "No pointer operand"); 467 if (NewPtrs.count(Ptr)) 468 continue; 469 470 Instruction *RealNewPtr; 471 if (!I->Offset || I->Offset->getValue()->isZero()) { 472 RealNewPtr = NewBasePtr; 473 } else { 474 Instruction *PtrIP = dyn_cast<Instruction>(Ptr); 475 if (PtrIP && isa<Instruction>(NewBasePtr) && 476 cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent()) 477 PtrIP = nullptr; 478 else if (isa<PHINode>(PtrIP)) 479 PtrIP = &*PtrIP->getParent()->getFirstInsertionPt(); 480 else if (!PtrIP) 481 PtrIP = I->Instr; 482 483 GetElementPtrInst *NewPtr = GetElementPtrInst::Create( 484 I8Ty, PtrInc, I->Offset->getValue(), 485 I->Instr->hasName() ? I->Instr->getName() + ".off" : "", PtrIP); 486 if (!PtrIP) 487 NewPtr->insertAfter(cast<Instruction>(PtrInc)); 488 NewPtr->setIsInBounds(IsPtrInBounds(Ptr)); 489 RealNewPtr = NewPtr; 490 } 491 492 if (Instruction *IDel = dyn_cast<Instruction>(Ptr)) 493 BBChanged.insert(IDel->getParent()); 494 495 Instruction *ReplNewPtr; 496 if (Ptr->getType() != RealNewPtr->getType()) { 497 ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(), 498 Ptr->hasName() ? Ptr->getName() + ".cast" : ""); 499 ReplNewPtr->insertAfter(RealNewPtr); 500 } else 501 ReplNewPtr = RealNewPtr; 502 503 Ptr->replaceAllUsesWith(ReplNewPtr); 504 RecursivelyDeleteTriviallyDeadInstructions(Ptr); 505 506 NewPtrs.insert(RealNewPtr); 507 } 508 509 MadeChange = true; 510 } 511 512 for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); 513 I != IE; ++I) { 514 if (BBChanged.count(*I)) 515 DeleteDeadPHIs(*I); 516 } 517 518 return MadeChange; 519 } 520