1 //===------ PPCLoopPreIncPrep.cpp - Loop Pre-Inc. AM Prep. Pass -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a pass to prepare loops for pre-increment addressing 11 // modes. Additional PHIs are created for loop induction variables used by 12 // load/store instructions so that the pre-increment forms can be used. 13 // Generically, this means transforming loops like this: 14 // for (int i = 0; i < n; ++i) 15 // array[i] = c; 16 // to look like this: 17 // T *p = array[-1]; 18 // for (int i = 0; i < n; ++i) 19 // *++p = c; 20 //===----------------------------------------------------------------------===// 21 22 #define DEBUG_TYPE "ppc-loop-preinc-prep" 23 #include "PPC.h" 24 #include "PPCTargetMachine.h" 25 #include "llvm/ADT/DepthFirstIterator.h" 26 #include "llvm/ADT/STLExtras.h" 27 #include "llvm/ADT/SmallSet.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/Analysis/CodeMetrics.h" 30 #include "llvm/Analysis/InstructionSimplify.h" 31 #include "llvm/Analysis/LoopInfo.h" 32 #include "llvm/Analysis/ScalarEvolution.h" 33 #include "llvm/Analysis/ScalarEvolutionExpander.h" 34 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 35 #include "llvm/Analysis/ValueTracking.h" 36 #include "llvm/IR/CFG.h" 37 #include "llvm/IR/Dominators.h" 38 #include "llvm/IR/Function.h" 39 #include "llvm/IR/IntrinsicInst.h" 40 #include "llvm/IR/Module.h" 41 #include "llvm/Support/CommandLine.h" 42 #include "llvm/Support/Debug.h" 43 #include "llvm/Transforms/Scalar.h" 44 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 45 #include "llvm/Transforms/Utils/Local.h" 46 #include "llvm/Transforms/Utils/LoopUtils.h" 47 #include "llvm/Transforms/Utils/ValueMapper.h" 48 using namespace llvm; 49 50 // By default, we limit this to creating 16 PHIs (which is a little over half 51 // of the allocatable register set). 52 static cl::opt<unsigned> MaxVars("ppc-preinc-prep-max-vars", 53 cl::Hidden, cl::init(16), 54 cl::desc("Potential PHI threshold for PPC preinc loop prep")); 55 56 namespace llvm { 57 void initializePPCLoopPreIncPrepPass(PassRegistry&); 58 } 59 60 namespace { 61 62 class PPCLoopPreIncPrep : public FunctionPass { 63 public: 64 static char ID; // Pass ID, replacement for typeid 65 PPCLoopPreIncPrep() : FunctionPass(ID), TM(nullptr) { 66 initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry()); 67 } 68 PPCLoopPreIncPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) { 69 initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry()); 70 } 71 72 void getAnalysisUsage(AnalysisUsage &AU) const override { 73 AU.addPreserved<DominatorTreeWrapperPass>(); 74 AU.addRequired<LoopInfoWrapperPass>(); 75 AU.addPreserved<LoopInfoWrapperPass>(); 76 AU.addRequired<ScalarEvolution>(); 77 } 78 79 bool runOnFunction(Function &F) override; 80 81 bool runOnLoop(Loop *L); 82 void simplifyLoopLatch(Loop *L); 83 bool rotateLoop(Loop *L); 84 85 private: 86 PPCTargetMachine *TM; 87 LoopInfo *LI; 88 ScalarEvolution *SE; 89 }; 90 } 91 92 char PPCLoopPreIncPrep::ID = 0; 93 static const char *name = "Prepare loop for pre-inc. addressing modes"; 94 INITIALIZE_PASS_BEGIN(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false) 95 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 96 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 97 INITIALIZE_PASS_END(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false) 98 99 FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) { 100 return new PPCLoopPreIncPrep(TM); 101 } 102 103 namespace { 104 struct SCEVLess : std::binary_function<const SCEV *, const SCEV *, bool> 105 { 106 SCEVLess(ScalarEvolution *SE) : SE(SE) {} 107 108 bool operator() (const SCEV *X, const SCEV *Y) const { 109 const SCEV *Diff = SE->getMinusSCEV(X, Y); 110 return cast<SCEVConstant>(Diff)->getValue()->getSExtValue() < 0; 111 } 112 113 protected: 114 ScalarEvolution *SE; 115 }; 116 } 117 118 static bool IsPtrInBounds(Value *BasePtr) { 119 Value *StrippedBasePtr = BasePtr; 120 while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBasePtr)) 121 StrippedBasePtr = BC->getOperand(0); 122 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(StrippedBasePtr)) 123 return GEP->isInBounds(); 124 125 return false; 126 } 127 128 static Value *GetPointerOperand(Value *MemI) { 129 if (LoadInst *LMemI = dyn_cast<LoadInst>(MemI)) { 130 return LMemI->getPointerOperand(); 131 } else if (StoreInst *SMemI = dyn_cast<StoreInst>(MemI)) { 132 return SMemI->getPointerOperand(); 133 } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(MemI)) { 134 if (IMemI->getIntrinsicID() == Intrinsic::prefetch) 135 return IMemI->getArgOperand(0); 136 } 137 138 return 0; 139 } 140 141 bool PPCLoopPreIncPrep::runOnFunction(Function &F) { 142 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 143 SE = &getAnalysis<ScalarEvolution>(); 144 145 bool MadeChange = false; 146 147 for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I) 148 for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L) 149 MadeChange |= runOnLoop(*L); 150 151 return MadeChange; 152 } 153 154 bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { 155 bool MadeChange = false; 156 157 // Only prep. the inner-most loop 158 if (!L->empty()) 159 return MadeChange; 160 161 DEBUG(dbgs() << "PIP: Examining: " << *L << "\n"); 162 163 BasicBlock *Header = L->getHeader(); 164 165 const PPCSubtarget *ST = 166 TM ? TM->getSubtargetImpl(*Header->getParent()) : nullptr; 167 168 unsigned HeaderLoopPredCount = 169 std::distance(pred_begin(Header), pred_end(Header)); 170 171 // Collect buckets of comparable addresses used by loads and stores. 172 typedef std::multimap<const SCEV *, Instruction *, SCEVLess> Bucket; 173 SmallVector<Bucket, 16> Buckets; 174 for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); 175 I != IE; ++I) { 176 for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); 177 J != JE; ++J) { 178 Value *PtrValue; 179 Instruction *MemI; 180 181 if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) { 182 MemI = LMemI; 183 PtrValue = LMemI->getPointerOperand(); 184 } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) { 185 MemI = SMemI; 186 PtrValue = SMemI->getPointerOperand(); 187 } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(J)) { 188 if (IMemI->getIntrinsicID() == Intrinsic::prefetch) { 189 MemI = IMemI; 190 PtrValue = IMemI->getArgOperand(0); 191 } else continue; 192 } else continue; 193 194 unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace(); 195 if (PtrAddrSpace) 196 continue; 197 198 // There are no update forms for Altivec vector load/stores. 199 if (ST && ST->hasAltivec() && 200 PtrValue->getType()->getPointerElementType()->isVectorTy()) 201 continue; 202 203 if (L->isLoopInvariant(PtrValue)) 204 continue; 205 206 const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L); 207 if (const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV)) { 208 if (LARSCEV->getLoop() != L) 209 continue; 210 } else { 211 continue; 212 } 213 214 bool FoundBucket = false; 215 for (unsigned i = 0, e = Buckets.size(); i != e; ++i) 216 for (Bucket::iterator K = Buckets[i].begin(), KE = Buckets[i].end(); 217 K != KE; ++K) { 218 const SCEV *Diff = SE->getMinusSCEV(K->first, LSCEV); 219 if (isa<SCEVConstant>(Diff)) { 220 Buckets[i].insert(std::make_pair(LSCEV, MemI)); 221 FoundBucket = true; 222 break; 223 } 224 } 225 226 if (!FoundBucket) { 227 Buckets.push_back(Bucket(SCEVLess(SE))); 228 Buckets[Buckets.size()-1].insert(std::make_pair(LSCEV, MemI)); 229 } 230 } 231 } 232 233 if (Buckets.empty() || Buckets.size() > MaxVars) 234 return MadeChange; 235 236 BasicBlock *LoopPredecessor = L->getLoopPredecessor(); 237 // If there is no loop predecessor, or the loop predecessor's terminator 238 // returns a value (which might contribute to determining the loop's 239 // iteration space), insert a new preheader for the loop. 240 if (!LoopPredecessor || 241 !LoopPredecessor->getTerminator()->getType()->isVoidTy()) { 242 LoopPredecessor = InsertPreheaderForLoop(L, this); 243 if (LoopPredecessor) 244 MadeChange = true; 245 } 246 if (!LoopPredecessor) 247 return MadeChange; 248 249 DEBUG(dbgs() << "PIP: Found " << Buckets.size() << " buckets\n"); 250 251 SmallSet<BasicBlock *, 16> BBChanged; 252 for (unsigned i = 0, e = Buckets.size(); i != e; ++i) { 253 // The base address of each bucket is transformed into a phi and the others 254 // are rewritten as offsets of that variable. 255 256 const SCEVAddRecExpr *BasePtrSCEV = 257 cast<SCEVAddRecExpr>(Buckets[i].begin()->first); 258 if (!BasePtrSCEV->isAffine()) 259 continue; 260 261 DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n"); 262 assert(BasePtrSCEV->getLoop() == L && 263 "AddRec for the wrong loop?"); 264 265 Instruction *MemI = Buckets[i].begin()->second; 266 Value *BasePtr = GetPointerOperand(MemI); 267 assert(BasePtr && "No pointer operand"); 268 269 Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext()); 270 Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(), 271 BasePtr->getType()->getPointerAddressSpace()); 272 273 const SCEV *BasePtrStartSCEV = BasePtrSCEV->getStart(); 274 if (!SE->isLoopInvariant(BasePtrStartSCEV, L)) 275 continue; 276 277 const SCEVConstant *BasePtrIncSCEV = 278 dyn_cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE)); 279 if (!BasePtrIncSCEV) 280 continue; 281 BasePtrStartSCEV = SE->getMinusSCEV(BasePtrStartSCEV, BasePtrIncSCEV); 282 if (!isSafeToExpand(BasePtrStartSCEV, *SE)) 283 continue; 284 285 DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n"); 286 287 PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount, 288 MemI->hasName() ? MemI->getName() + ".phi" : "", 289 Header->getFirstNonPHI()); 290 291 SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart"); 292 Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy, 293 LoopPredecessor->getTerminator()); 294 295 // Note that LoopPredecessor might occur in the predecessor list multiple 296 // times, and we need to add it the right number of times. 297 for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); 298 PI != PE; ++PI) { 299 if (*PI != LoopPredecessor) 300 continue; 301 302 NewPHI->addIncoming(BasePtrStart, LoopPredecessor); 303 } 304 305 Instruction *InsPoint = Header->getFirstInsertionPt(); 306 GetElementPtrInst *PtrInc = GetElementPtrInst::Create( 307 I8Ty, NewPHI, BasePtrIncSCEV->getValue(), 308 MemI->hasName() ? MemI->getName() + ".inc" : "", InsPoint); 309 PtrInc->setIsInBounds(IsPtrInBounds(BasePtr)); 310 for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); 311 PI != PE; ++PI) { 312 if (*PI == LoopPredecessor) 313 continue; 314 315 NewPHI->addIncoming(PtrInc, *PI); 316 } 317 318 Instruction *NewBasePtr; 319 if (PtrInc->getType() != BasePtr->getType()) 320 NewBasePtr = new BitCastInst(PtrInc, BasePtr->getType(), 321 PtrInc->hasName() ? PtrInc->getName() + ".cast" : "", InsPoint); 322 else 323 NewBasePtr = PtrInc; 324 325 if (Instruction *IDel = dyn_cast<Instruction>(BasePtr)) 326 BBChanged.insert(IDel->getParent()); 327 BasePtr->replaceAllUsesWith(NewBasePtr); 328 RecursivelyDeleteTriviallyDeadInstructions(BasePtr); 329 330 Value *LastNewPtr = NewBasePtr; 331 for (Bucket::iterator I = std::next(Buckets[i].begin()), 332 IE = Buckets[i].end(); I != IE; ++I) { 333 Value *Ptr = GetPointerOperand(I->second); 334 assert(Ptr && "No pointer operand"); 335 if (Ptr == LastNewPtr) 336 continue; 337 338 Instruction *RealNewPtr; 339 const SCEVConstant *Diff = 340 cast<SCEVConstant>(SE->getMinusSCEV(I->first, BasePtrSCEV)); 341 if (Diff->isZero()) { 342 RealNewPtr = NewBasePtr; 343 } else { 344 Instruction *PtrIP = dyn_cast<Instruction>(Ptr); 345 if (PtrIP && isa<Instruction>(NewBasePtr) && 346 cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent()) 347 PtrIP = 0; 348 else if (isa<PHINode>(PtrIP)) 349 PtrIP = PtrIP->getParent()->getFirstInsertionPt(); 350 else if (!PtrIP) 351 PtrIP = I->second; 352 353 GetElementPtrInst *NewPtr = GetElementPtrInst::Create( 354 I8Ty, PtrInc, Diff->getValue(), 355 I->second->hasName() ? I->second->getName() + ".off" : "", PtrIP); 356 if (!PtrIP) 357 NewPtr->insertAfter(cast<Instruction>(PtrInc)); 358 NewPtr->setIsInBounds(IsPtrInBounds(Ptr)); 359 RealNewPtr = NewPtr; 360 } 361 362 if (Instruction *IDel = dyn_cast<Instruction>(Ptr)) 363 BBChanged.insert(IDel->getParent()); 364 365 Instruction *ReplNewPtr; 366 if (Ptr->getType() != RealNewPtr->getType()) { 367 ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(), 368 Ptr->hasName() ? Ptr->getName() + ".cast" : ""); 369 ReplNewPtr->insertAfter(RealNewPtr); 370 } else 371 ReplNewPtr = RealNewPtr; 372 373 Ptr->replaceAllUsesWith(ReplNewPtr); 374 RecursivelyDeleteTriviallyDeadInstructions(Ptr); 375 376 LastNewPtr = RealNewPtr; 377 } 378 379 MadeChange = true; 380 } 381 382 for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); 383 I != IE; ++I) { 384 if (BBChanged.count(*I)) 385 DeleteDeadPHIs(*I); 386 } 387 388 return MadeChange; 389 } 390 391