1 //===------ PPCLoopPreIncPrep.cpp - Loop Pre-Inc. AM Prep. Pass -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a pass to prepare loops for pre-increment addressing 11 // modes. Additional PHIs are created for loop induction variables used by 12 // load/store instructions so that the pre-increment forms can be used. 13 // Generically, this means transforming loops like this: 14 // for (int i = 0; i < n; ++i) 15 // array[i] = c; 16 // to look like this: 17 // T *p = array[-1]; 18 // for (int i = 0; i < n; ++i) 19 // *++p = c; 20 //===----------------------------------------------------------------------===// 21 22 #define DEBUG_TYPE "ppc-loop-preinc-prep" 23 24 #include "PPC.h" 25 #include "PPCSubtarget.h" 26 #include "PPCTargetMachine.h" 27 #include "llvm/ADT/DepthFirstIterator.h" 28 #include "llvm/ADT/SmallPtrSet.h" 29 #include "llvm/ADT/SmallSet.h" 30 #include "llvm/ADT/SmallVector.h" 31 #include "llvm/ADT/Statistic.h" 32 #include "llvm/Analysis/LoopInfo.h" 33 #include "llvm/Analysis/ScalarEvolution.h" 34 #include "llvm/Analysis/ScalarEvolutionExpander.h" 35 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 36 #include "llvm/Transforms/Utils/Local.h" 37 #include "llvm/IR/BasicBlock.h" 38 #include "llvm/IR/CFG.h" 39 #include "llvm/IR/Dominators.h" 40 #include "llvm/IR/Instruction.h" 41 #include "llvm/IR/Instructions.h" 42 #include "llvm/IR/IntrinsicInst.h" 43 #include "llvm/IR/Module.h" 44 #include "llvm/IR/Type.h" 45 #include "llvm/IR/Value.h" 46 #include "llvm/Pass.h" 47 #include "llvm/Support/Casting.h" 48 #include "llvm/Support/CommandLine.h" 49 #include "llvm/Support/Debug.h" 50 #include "llvm/Transforms/Scalar.h" 51 #include "llvm/Transforms/Utils.h" 52 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 53 #include "llvm/Transforms/Utils/LoopUtils.h" 54 #include <cassert> 55 #include <iterator> 56 #include <utility> 57 58 using namespace llvm; 59 60 // By default, we limit this to creating 16 PHIs (which is a little over half 61 // of the allocatable register set). 62 static cl::opt<unsigned> MaxVars("ppc-preinc-prep-max-vars", 63 cl::Hidden, cl::init(16), 64 cl::desc("Potential PHI threshold for PPC preinc loop prep")); 65 66 STATISTIC(PHINodeAlreadyExists, "PHI node already in pre-increment form"); 67 68 namespace llvm { 69 70 void initializePPCLoopPreIncPrepPass(PassRegistry&); 71 72 } // end namespace llvm 73 74 namespace { 75 76 class PPCLoopPreIncPrep : public FunctionPass { 77 public: 78 static char ID; // Pass ID, replacement for typeid 79 80 PPCLoopPreIncPrep() : FunctionPass(ID) { 81 initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry()); 82 } 83 84 PPCLoopPreIncPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) { 85 initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry()); 86 } 87 88 void getAnalysisUsage(AnalysisUsage &AU) const override { 89 AU.addPreserved<DominatorTreeWrapperPass>(); 90 AU.addRequired<LoopInfoWrapperPass>(); 91 AU.addPreserved<LoopInfoWrapperPass>(); 92 AU.addRequired<ScalarEvolutionWrapperPass>(); 93 } 94 95 bool alreadyPrepared(Loop *L, Instruction* MemI, 96 const SCEV *BasePtrStartSCEV, 97 const SCEVConstant *BasePtrIncSCEV); 98 bool runOnFunction(Function &F) override; 99 100 bool runOnLoop(Loop *L); 101 void simplifyLoopLatch(Loop *L); 102 bool rotateLoop(Loop *L); 103 104 private: 105 PPCTargetMachine *TM = nullptr; 106 DominatorTree *DT; 107 LoopInfo *LI; 108 ScalarEvolution *SE; 109 bool PreserveLCSSA; 110 }; 111 112 } // end anonymous namespace 113 114 char PPCLoopPreIncPrep::ID = 0; 115 static const char *name = "Prepare loop for pre-inc. addressing modes"; 116 INITIALIZE_PASS_BEGIN(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false) 117 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 118 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 119 INITIALIZE_PASS_END(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false) 120 121 FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) { 122 return new PPCLoopPreIncPrep(TM); 123 } 124 125 namespace { 126 127 struct BucketElement { 128 BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {} 129 BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {} 130 131 const SCEVConstant *Offset; 132 Instruction *Instr; 133 }; 134 135 struct Bucket { 136 Bucket(const SCEV *B, Instruction *I) : BaseSCEV(B), 137 Elements(1, BucketElement(I)) {} 138 139 const SCEV *BaseSCEV; 140 SmallVector<BucketElement, 16> Elements; 141 }; 142 143 } // end anonymous namespace 144 145 static bool IsPtrInBounds(Value *BasePtr) { 146 Value *StrippedBasePtr = BasePtr; 147 while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBasePtr)) 148 StrippedBasePtr = BC->getOperand(0); 149 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(StrippedBasePtr)) 150 return GEP->isInBounds(); 151 152 return false; 153 } 154 155 static Value *GetPointerOperand(Value *MemI) { 156 if (LoadInst *LMemI = dyn_cast<LoadInst>(MemI)) { 157 return LMemI->getPointerOperand(); 158 } else if (StoreInst *SMemI = dyn_cast<StoreInst>(MemI)) { 159 return SMemI->getPointerOperand(); 160 } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(MemI)) { 161 if (IMemI->getIntrinsicID() == Intrinsic::prefetch) 162 return IMemI->getArgOperand(0); 163 } 164 165 return nullptr; 166 } 167 168 bool PPCLoopPreIncPrep::runOnFunction(Function &F) { 169 if (skipFunction(F)) 170 return false; 171 172 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 173 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 174 auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); 175 DT = DTWP ? &DTWP->getDomTree() : nullptr; 176 PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); 177 178 bool MadeChange = false; 179 180 for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I) 181 for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L) 182 MadeChange |= runOnLoop(*L); 183 184 return MadeChange; 185 } 186 187 // In order to prepare for the pre-increment a PHI is added. 188 // This function will check to see if that PHI already exists and will return 189 // true if it found an existing PHI with the same start and increment as the 190 // one we wanted to create. 191 bool PPCLoopPreIncPrep::alreadyPrepared(Loop *L, Instruction* MemI, 192 const SCEV *BasePtrStartSCEV, 193 const SCEVConstant *BasePtrIncSCEV) { 194 BasicBlock *BB = MemI->getParent(); 195 if (!BB) 196 return false; 197 198 BasicBlock *PredBB = L->getLoopPredecessor(); 199 BasicBlock *LatchBB = L->getLoopLatch(); 200 201 if (!PredBB || !LatchBB) 202 return false; 203 204 // Run through the PHIs and see if we have some that looks like a preparation 205 iterator_range<BasicBlock::phi_iterator> PHIIter = BB->phis(); 206 for (auto & CurrentPHI : PHIIter) { 207 PHINode *CurrentPHINode = dyn_cast<PHINode>(&CurrentPHI); 208 if (!CurrentPHINode) 209 continue; 210 211 if (!SE->isSCEVable(CurrentPHINode->getType())) 212 continue; 213 214 const SCEV *PHISCEV = SE->getSCEVAtScope(CurrentPHINode, L); 215 216 const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(PHISCEV); 217 if (!PHIBasePtrSCEV) 218 continue; 219 220 const SCEVConstant *PHIBasePtrIncSCEV = 221 dyn_cast<SCEVConstant>(PHIBasePtrSCEV->getStepRecurrence(*SE)); 222 if (!PHIBasePtrIncSCEV) 223 continue; 224 225 if (CurrentPHINode->getNumIncomingValues() == 2) { 226 if ( (CurrentPHINode->getIncomingBlock(0) == LatchBB && 227 CurrentPHINode->getIncomingBlock(1) == PredBB) || 228 (CurrentPHINode->getIncomingBlock(1) == LatchBB && 229 CurrentPHINode->getIncomingBlock(0) == PredBB) ) { 230 if (PHIBasePtrSCEV->getStart() == BasePtrStartSCEV && 231 PHIBasePtrIncSCEV == BasePtrIncSCEV) { 232 // The existing PHI (CurrentPHINode) has the same start and increment 233 // as the PHI that we wanted to create. 234 ++PHINodeAlreadyExists; 235 return true; 236 } 237 } 238 } 239 } 240 return false; 241 } 242 243 bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { 244 bool MadeChange = false; 245 246 // Only prep. the inner-most loop 247 if (!L->empty()) 248 return MadeChange; 249 250 LLVM_DEBUG(dbgs() << "PIP: Examining: " << *L << "\n"); 251 252 BasicBlock *Header = L->getHeader(); 253 254 const PPCSubtarget *ST = 255 TM ? TM->getSubtargetImpl(*Header->getParent()) : nullptr; 256 257 unsigned HeaderLoopPredCount = pred_size(Header); 258 259 // Collect buckets of comparable addresses used by loads and stores. 260 SmallVector<Bucket, 16> Buckets; 261 for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); 262 I != IE; ++I) { 263 for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); 264 J != JE; ++J) { 265 Value *PtrValue; 266 Instruction *MemI; 267 268 if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) { 269 MemI = LMemI; 270 PtrValue = LMemI->getPointerOperand(); 271 } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) { 272 MemI = SMemI; 273 PtrValue = SMemI->getPointerOperand(); 274 } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(J)) { 275 if (IMemI->getIntrinsicID() == Intrinsic::prefetch) { 276 MemI = IMemI; 277 PtrValue = IMemI->getArgOperand(0); 278 } else continue; 279 } else continue; 280 281 unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace(); 282 if (PtrAddrSpace) 283 continue; 284 285 // There are no update forms for Altivec vector load/stores. 286 if (ST && ST->hasAltivec() && 287 PtrValue->getType()->getPointerElementType()->isVectorTy()) 288 continue; 289 290 if (L->isLoopInvariant(PtrValue)) 291 continue; 292 293 const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L); 294 if (const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV)) { 295 if (LARSCEV->getLoop() != L) 296 continue; 297 // See getPreIndexedAddressParts, the displacement for LDU/STDU has to 298 // be 4's multiple (DS-form). For i64 loads/stores when the displacement 299 // fits in a 16-bit signed field but isn't a multiple of 4, it will be 300 // useless and possible to break some original well-form addressing mode 301 // to make this pre-inc prep for it. 302 if (PtrValue->getType()->getPointerElementType()->isIntegerTy(64)) { 303 if (const SCEVConstant *StepConst = 304 dyn_cast<SCEVConstant>(LARSCEV->getStepRecurrence(*SE))) { 305 const APInt &ConstInt = StepConst->getValue()->getValue(); 306 if (ConstInt.isSignedIntN(16) && ConstInt.srem(4) != 0) 307 continue; 308 } 309 } 310 } else { 311 continue; 312 } 313 314 bool FoundBucket = false; 315 for (auto &B : Buckets) { 316 const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV); 317 if (const auto *CDiff = dyn_cast<SCEVConstant>(Diff)) { 318 B.Elements.push_back(BucketElement(CDiff, MemI)); 319 FoundBucket = true; 320 break; 321 } 322 } 323 324 if (!FoundBucket) { 325 if (Buckets.size() == MaxVars) 326 return MadeChange; 327 Buckets.push_back(Bucket(LSCEV, MemI)); 328 } 329 } 330 } 331 332 if (Buckets.empty()) 333 return MadeChange; 334 335 BasicBlock *LoopPredecessor = L->getLoopPredecessor(); 336 // If there is no loop predecessor, or the loop predecessor's terminator 337 // returns a value (which might contribute to determining the loop's 338 // iteration space), insert a new preheader for the loop. 339 if (!LoopPredecessor || 340 !LoopPredecessor->getTerminator()->getType()->isVoidTy()) { 341 LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); 342 if (LoopPredecessor) 343 MadeChange = true; 344 } 345 if (!LoopPredecessor) 346 return MadeChange; 347 348 LLVM_DEBUG(dbgs() << "PIP: Found " << Buckets.size() << " buckets\n"); 349 350 SmallSet<BasicBlock *, 16> BBChanged; 351 for (unsigned i = 0, e = Buckets.size(); i != e; ++i) { 352 // The base address of each bucket is transformed into a phi and the others 353 // are rewritten as offsets of that variable. 354 355 // We have a choice now of which instruction's memory operand we use as the 356 // base for the generated PHI. Always picking the first instruction in each 357 // bucket does not work well, specifically because that instruction might 358 // be a prefetch (and there are no pre-increment dcbt variants). Otherwise, 359 // the choice is somewhat arbitrary, because the backend will happily 360 // generate direct offsets from both the pre-incremented and 361 // post-incremented pointer values. Thus, we'll pick the first non-prefetch 362 // instruction in each bucket, and adjust the recurrence and other offsets 363 // accordingly. 364 for (int j = 0, je = Buckets[i].Elements.size(); j != je; ++j) { 365 if (auto *II = dyn_cast<IntrinsicInst>(Buckets[i].Elements[j].Instr)) 366 if (II->getIntrinsicID() == Intrinsic::prefetch) 367 continue; 368 369 // If we'd otherwise pick the first element anyway, there's nothing to do. 370 if (j == 0) 371 break; 372 373 // If our chosen element has no offset from the base pointer, there's 374 // nothing to do. 375 if (!Buckets[i].Elements[j].Offset || 376 Buckets[i].Elements[j].Offset->isZero()) 377 break; 378 379 const SCEV *Offset = Buckets[i].Elements[j].Offset; 380 Buckets[i].BaseSCEV = SE->getAddExpr(Buckets[i].BaseSCEV, Offset); 381 for (auto &E : Buckets[i].Elements) { 382 if (E.Offset) 383 E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset)); 384 else 385 E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset)); 386 } 387 388 std::swap(Buckets[i].Elements[j], Buckets[i].Elements[0]); 389 break; 390 } 391 392 const SCEVAddRecExpr *BasePtrSCEV = 393 cast<SCEVAddRecExpr>(Buckets[i].BaseSCEV); 394 if (!BasePtrSCEV->isAffine()) 395 continue; 396 397 LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n"); 398 assert(BasePtrSCEV->getLoop() == L && 399 "AddRec for the wrong loop?"); 400 401 // The instruction corresponding to the Bucket's BaseSCEV must be the first 402 // in the vector of elements. 403 Instruction *MemI = Buckets[i].Elements.begin()->Instr; 404 Value *BasePtr = GetPointerOperand(MemI); 405 assert(BasePtr && "No pointer operand"); 406 407 Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext()); 408 Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(), 409 BasePtr->getType()->getPointerAddressSpace()); 410 411 const SCEV *BasePtrStartSCEV = BasePtrSCEV->getStart(); 412 if (!SE->isLoopInvariant(BasePtrStartSCEV, L)) 413 continue; 414 415 const SCEVConstant *BasePtrIncSCEV = 416 dyn_cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE)); 417 if (!BasePtrIncSCEV) 418 continue; 419 BasePtrStartSCEV = SE->getMinusSCEV(BasePtrStartSCEV, BasePtrIncSCEV); 420 if (!isSafeToExpand(BasePtrStartSCEV, *SE)) 421 continue; 422 423 LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n"); 424 425 if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV)) 426 continue; 427 428 PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount, 429 MemI->hasName() ? MemI->getName() + ".phi" : "", 430 Header->getFirstNonPHI()); 431 432 SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart"); 433 Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy, 434 LoopPredecessor->getTerminator()); 435 436 // Note that LoopPredecessor might occur in the predecessor list multiple 437 // times, and we need to add it the right number of times. 438 for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); 439 PI != PE; ++PI) { 440 if (*PI != LoopPredecessor) 441 continue; 442 443 NewPHI->addIncoming(BasePtrStart, LoopPredecessor); 444 } 445 446 Instruction *InsPoint = &*Header->getFirstInsertionPt(); 447 GetElementPtrInst *PtrInc = GetElementPtrInst::Create( 448 I8Ty, NewPHI, BasePtrIncSCEV->getValue(), 449 MemI->hasName() ? MemI->getName() + ".inc" : "", InsPoint); 450 PtrInc->setIsInBounds(IsPtrInBounds(BasePtr)); 451 for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); 452 PI != PE; ++PI) { 453 if (*PI == LoopPredecessor) 454 continue; 455 456 NewPHI->addIncoming(PtrInc, *PI); 457 } 458 459 Instruction *NewBasePtr; 460 if (PtrInc->getType() != BasePtr->getType()) 461 NewBasePtr = new BitCastInst(PtrInc, BasePtr->getType(), 462 PtrInc->hasName() ? PtrInc->getName() + ".cast" : "", InsPoint); 463 else 464 NewBasePtr = PtrInc; 465 466 if (Instruction *IDel = dyn_cast<Instruction>(BasePtr)) 467 BBChanged.insert(IDel->getParent()); 468 BasePtr->replaceAllUsesWith(NewBasePtr); 469 RecursivelyDeleteTriviallyDeadInstructions(BasePtr); 470 471 // Keep track of the replacement pointer values we've inserted so that we 472 // don't generate more pointer values than necessary. 473 SmallPtrSet<Value *, 16> NewPtrs; 474 NewPtrs.insert( NewBasePtr); 475 476 for (auto I = std::next(Buckets[i].Elements.begin()), 477 IE = Buckets[i].Elements.end(); I != IE; ++I) { 478 Value *Ptr = GetPointerOperand(I->Instr); 479 assert(Ptr && "No pointer operand"); 480 if (NewPtrs.count(Ptr)) 481 continue; 482 483 Instruction *RealNewPtr; 484 if (!I->Offset || I->Offset->getValue()->isZero()) { 485 RealNewPtr = NewBasePtr; 486 } else { 487 Instruction *PtrIP = dyn_cast<Instruction>(Ptr); 488 if (PtrIP && isa<Instruction>(NewBasePtr) && 489 cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent()) 490 PtrIP = nullptr; 491 else if (isa<PHINode>(PtrIP)) 492 PtrIP = &*PtrIP->getParent()->getFirstInsertionPt(); 493 else if (!PtrIP) 494 PtrIP = I->Instr; 495 496 GetElementPtrInst *NewPtr = GetElementPtrInst::Create( 497 I8Ty, PtrInc, I->Offset->getValue(), 498 I->Instr->hasName() ? I->Instr->getName() + ".off" : "", PtrIP); 499 if (!PtrIP) 500 NewPtr->insertAfter(cast<Instruction>(PtrInc)); 501 NewPtr->setIsInBounds(IsPtrInBounds(Ptr)); 502 RealNewPtr = NewPtr; 503 } 504 505 if (Instruction *IDel = dyn_cast<Instruction>(Ptr)) 506 BBChanged.insert(IDel->getParent()); 507 508 Instruction *ReplNewPtr; 509 if (Ptr->getType() != RealNewPtr->getType()) { 510 ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(), 511 Ptr->hasName() ? Ptr->getName() + ".cast" : ""); 512 ReplNewPtr->insertAfter(RealNewPtr); 513 } else 514 ReplNewPtr = RealNewPtr; 515 516 Ptr->replaceAllUsesWith(ReplNewPtr); 517 RecursivelyDeleteTriviallyDeadInstructions(Ptr); 518 519 NewPtrs.insert(RealNewPtr); 520 } 521 522 MadeChange = true; 523 } 524 525 for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); 526 I != IE; ++I) { 527 if (BBChanged.count(*I)) 528 DeleteDeadPHIs(*I); 529 } 530 531 return MadeChange; 532 } 533