1 //===-- LoopIdiomRecognize.cpp - Loop idiom recognition -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass implements an idiom recognizer that transforms simple loops into a 11 // non-loop form. In cases that this kicks in, it can be a significant 12 // performance win. 13 // 14 //===----------------------------------------------------------------------===// 15 // 16 // TODO List: 17 // 18 // Future loop memory idioms to recognize: 19 // memcmp, memmove, strlen, etc. 20 // Future floating point idioms to recognize in -ffast-math mode: 21 // fpowi 22 // Future integer operation idioms to recognize: 23 // ctpop, ctlz, cttz 24 // 25 // Beware that isel's default lowering for ctpop is highly inefficient for 26 // i64 and larger types when i64 is legal and the value has few bits set. It 27 // would be good to enhance isel to emit a loop for ctpop in this case. 28 // 29 // We should enhance the memset/memcpy recognition to handle multiple stores in 30 // the loop. This would handle things like: 31 // void foo(_Complex float *P) 32 // for (i) { __real__(*P) = 0; __imag__(*P) = 0; } 33 // 34 // We should enhance this to handle negative strides through memory. 35 // Alternatively (and perhaps better) we could rely on an earlier pass to force 36 // forward iteration through memory, which is generally better for cache 37 // behavior. Negative strides *do* happen for memset/memcpy loops. 38 // 39 // This could recognize common matrix multiplies and dot product idioms and 40 // replace them with calls to BLAS (if linked in??). 41 // 42 //===----------------------------------------------------------------------===// 43 44 #define DEBUG_TYPE "loop-idiom" 45 #include "llvm/Transforms/Scalar.h" 46 #include "llvm/IntrinsicInst.h" 47 #include "llvm/Module.h" 48 #include "llvm/Analysis/AliasAnalysis.h" 49 #include "llvm/Analysis/LoopPass.h" 50 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 51 #include "llvm/Analysis/ScalarEvolutionExpander.h" 52 #include "llvm/Analysis/ValueTracking.h" 53 #include "llvm/Target/TargetData.h" 54 #include "llvm/Target/TargetLibraryInfo.h" 55 #include "llvm/Transforms/Utils/Local.h" 56 #include "llvm/Support/Debug.h" 57 #include "llvm/Support/IRBuilder.h" 58 #include "llvm/Support/raw_ostream.h" 59 #include "llvm/ADT/Statistic.h" 60 using namespace llvm; 61 62 STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); 63 STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); 64 65 namespace { 66 class LoopIdiomRecognize : public LoopPass { 67 Loop *CurLoop; 68 const TargetData *TD; 69 DominatorTree *DT; 70 ScalarEvolution *SE; 71 TargetLibraryInfo *TLI; 72 public: 73 static char ID; 74 explicit LoopIdiomRecognize() : LoopPass(ID) { 75 initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry()); 76 } 77 78 bool runOnLoop(Loop *L, LPPassManager &LPM); 79 bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, 80 SmallVectorImpl<BasicBlock*> &ExitBlocks); 81 82 bool processLoopStore(StoreInst *SI, const SCEV *BECount); 83 bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); 84 85 bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize, 86 unsigned StoreAlignment, 87 Value *SplatValue, Instruction *TheStore, 88 const SCEVAddRecExpr *Ev, 89 const SCEV *BECount); 90 bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, 91 const SCEVAddRecExpr *StoreEv, 92 const SCEVAddRecExpr *LoadEv, 93 const SCEV *BECount); 94 95 /// This transformation requires natural loop information & requires that 96 /// loop preheaders be inserted into the CFG. 97 /// 98 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 99 AU.addRequired<LoopInfo>(); 100 AU.addPreserved<LoopInfo>(); 101 AU.addRequiredID(LoopSimplifyID); 102 AU.addPreservedID(LoopSimplifyID); 103 AU.addRequiredID(LCSSAID); 104 AU.addPreservedID(LCSSAID); 105 AU.addRequired<AliasAnalysis>(); 106 AU.addPreserved<AliasAnalysis>(); 107 AU.addRequired<ScalarEvolution>(); 108 AU.addPreserved<ScalarEvolution>(); 109 AU.addPreserved<DominatorTree>(); 110 AU.addRequired<DominatorTree>(); 111 AU.addRequired<TargetLibraryInfo>(); 112 } 113 }; 114 } 115 116 char LoopIdiomRecognize::ID = 0; 117 INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", 118 false, false) 119 INITIALIZE_PASS_DEPENDENCY(LoopInfo) 120 INITIALIZE_PASS_DEPENDENCY(DominatorTree) 121 INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 122 INITIALIZE_PASS_DEPENDENCY(LCSSA) 123 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 124 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) 125 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 126 INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", 127 false, false) 128 129 Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); } 130 131 /// DeleteDeadInstruction - Delete this instruction. Before we do, go through 132 /// and zero out all the operands of this instruction. If any of them become 133 /// dead, delete them and the computation tree that feeds them. 134 /// 135 static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { 136 SmallVector<Instruction*, 32> NowDeadInsts; 137 138 NowDeadInsts.push_back(I); 139 140 // Before we touch this instruction, remove it from SE! 141 do { 142 Instruction *DeadInst = NowDeadInsts.pop_back_val(); 143 144 // This instruction is dead, zap it, in stages. Start by removing it from 145 // SCEV. 146 SE.forgetValue(DeadInst); 147 148 for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { 149 Value *Op = DeadInst->getOperand(op); 150 DeadInst->setOperand(op, 0); 151 152 // If this operand just became dead, add it to the NowDeadInsts list. 153 if (!Op->use_empty()) continue; 154 155 if (Instruction *OpI = dyn_cast<Instruction>(Op)) 156 if (isInstructionTriviallyDead(OpI)) 157 NowDeadInsts.push_back(OpI); 158 } 159 160 DeadInst->eraseFromParent(); 161 162 } while (!NowDeadInsts.empty()); 163 } 164 165 bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { 166 CurLoop = L; 167 168 // The trip count of the loop must be analyzable. 169 SE = &getAnalysis<ScalarEvolution>(); 170 if (!SE->hasLoopInvariantBackedgeTakenCount(L)) 171 return false; 172 const SCEV *BECount = SE->getBackedgeTakenCount(L); 173 if (isa<SCEVCouldNotCompute>(BECount)) return false; 174 175 // If this loop executes exactly one time, then it should be peeled, not 176 // optimized by this pass. 177 if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount)) 178 if (BECst->getValue()->getValue() == 0) 179 return false; 180 181 // We require target data for now. 182 TD = getAnalysisIfAvailable<TargetData>(); 183 if (TD == 0) return false; 184 185 DT = &getAnalysis<DominatorTree>(); 186 LoopInfo &LI = getAnalysis<LoopInfo>(); 187 TLI = &getAnalysis<TargetLibraryInfo>(); 188 189 SmallVector<BasicBlock*, 8> ExitBlocks; 190 CurLoop->getUniqueExitBlocks(ExitBlocks); 191 192 DEBUG(dbgs() << "loop-idiom Scanning: F[" 193 << L->getHeader()->getParent()->getName() 194 << "] Loop %" << L->getHeader()->getName() << "\n"); 195 196 bool MadeChange = false; 197 // Scan all the blocks in the loop that are not in subloops. 198 for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E; 199 ++BI) { 200 // Ignore blocks in subloops. 201 if (LI.getLoopFor(*BI) != CurLoop) 202 continue; 203 204 MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks); 205 } 206 return MadeChange; 207 } 208 209 /// runOnLoopBlock - Process the specified block, which lives in a counted loop 210 /// with the specified backedge count. This block is known to be in the current 211 /// loop and not in any subloops. 212 bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, 213 SmallVectorImpl<BasicBlock*> &ExitBlocks) { 214 // We can only promote stores in this block if they are unconditionally 215 // executed in the loop. For a block to be unconditionally executed, it has 216 // to dominate all the exit blocks of the loop. Verify this now. 217 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) 218 if (!DT->dominates(BB, ExitBlocks[i])) 219 return false; 220 221 bool MadeChange = false; 222 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { 223 Instruction *Inst = I++; 224 // Look for store instructions, which may be optimized to memset/memcpy. 225 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 226 WeakVH InstPtr(I); 227 if (!processLoopStore(SI, BECount)) continue; 228 MadeChange = true; 229 230 // If processing the store invalidated our iterator, start over from the 231 // top of the block. 232 if (InstPtr == 0) 233 I = BB->begin(); 234 continue; 235 } 236 237 // Look for memset instructions, which may be optimized to a larger memset. 238 if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) { 239 WeakVH InstPtr(I); 240 if (!processLoopMemSet(MSI, BECount)) continue; 241 MadeChange = true; 242 243 // If processing the memset invalidated our iterator, start over from the 244 // top of the block. 245 if (InstPtr == 0) 246 I = BB->begin(); 247 continue; 248 } 249 } 250 251 return MadeChange; 252 } 253 254 255 /// processLoopStore - See if this store can be promoted to a memset or memcpy. 256 bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { 257 if (SI->isVolatile()) return false; 258 259 Value *StoredVal = SI->getValueOperand(); 260 Value *StorePtr = SI->getPointerOperand(); 261 262 // Reject stores that are so large that they overflow an unsigned. 263 uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType()); 264 if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) 265 return false; 266 267 // See if the pointer expression is an AddRec like {base,+,1} on the current 268 // loop, which indicates a strided store. If we have something else, it's a 269 // random store we can't handle. 270 const SCEVAddRecExpr *StoreEv = 271 dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); 272 if (StoreEv == 0 || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) 273 return false; 274 275 // Check to see if the stride matches the size of the store. If so, then we 276 // know that every byte is touched in the loop. 277 unsigned StoreSize = (unsigned)SizeInBits >> 3; 278 const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1)); 279 280 if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) { 281 // TODO: Could also handle negative stride here someday, that will require 282 // the validity check in mayLoopAccessLocation to be updated though. 283 // Enable this to print exact negative strides. 284 if (0 && Stride && StoreSize == -Stride->getValue()->getValue()) { 285 dbgs() << "NEGATIVE STRIDE: " << *SI << "\n"; 286 dbgs() << "BB: " << *SI->getParent(); 287 } 288 289 return false; 290 } 291 292 // See if we can optimize just this store in isolation. 293 if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(), 294 StoredVal, SI, StoreEv, BECount)) 295 return true; 296 297 // If the stored value is a strided load in the same loop with the same stride 298 // this this may be transformable into a memcpy. This kicks in for stuff like 299 // for (i) A[i] = B[i]; 300 if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { 301 const SCEVAddRecExpr *LoadEv = 302 dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0))); 303 if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() && 304 StoreEv->getOperand(1) == LoadEv->getOperand(1) && !LI->isVolatile()) 305 if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount)) 306 return true; 307 } 308 //errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n"; 309 310 return false; 311 } 312 313 /// processLoopMemSet - See if this memset can be promoted to a large memset. 314 bool LoopIdiomRecognize:: 315 processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { 316 // We can only handle non-volatile memsets with a constant size. 317 if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) return false; 318 319 // If we're not allowed to hack on memset, we fail. 320 if (!TLI->has(LibFunc::memset)) 321 return false; 322 323 Value *Pointer = MSI->getDest(); 324 325 // See if the pointer expression is an AddRec like {base,+,1} on the current 326 // loop, which indicates a strided store. If we have something else, it's a 327 // random store we can't handle. 328 const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer)); 329 if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine()) 330 return false; 331 332 // Reject memsets that are so large that they overflow an unsigned. 333 uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue(); 334 if ((SizeInBytes >> 32) != 0) 335 return false; 336 337 // Check to see if the stride matches the size of the memset. If so, then we 338 // know that every byte is touched in the loop. 339 const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1)); 340 341 // TODO: Could also handle negative stride here someday, that will require the 342 // validity check in mayLoopAccessLocation to be updated though. 343 if (Stride == 0 || MSI->getLength() != Stride->getValue()) 344 return false; 345 346 return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, 347 MSI->getAlignment(), MSI->getValue(), 348 MSI, Ev, BECount); 349 } 350 351 352 /// mayLoopAccessLocation - Return true if the specified loop might access the 353 /// specified pointer location, which is a loop-strided access. The 'Access' 354 /// argument specifies what the verboten forms of access are (read or write). 355 static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access, 356 Loop *L, const SCEV *BECount, 357 unsigned StoreSize, AliasAnalysis &AA, 358 Instruction *IgnoredStore) { 359 // Get the location that may be stored across the loop. Since the access is 360 // strided positively through memory, we say that the modified location starts 361 // at the pointer and has infinite size. 362 uint64_t AccessSize = AliasAnalysis::UnknownSize; 363 364 // If the loop iterates a fixed number of times, we can refine the access size 365 // to be exactly the size of the memset, which is (BECount+1)*StoreSize 366 if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount)) 367 AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize; 368 369 // TODO: For this to be really effective, we have to dive into the pointer 370 // operand in the store. Store to &A[i] of 100 will always return may alias 371 // with store of &A[100], we need to StoreLoc to be "A" with size of 100, 372 // which will then no-alias a store to &A[100]. 373 AliasAnalysis::Location StoreLoc(Ptr, AccessSize); 374 375 for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E; 376 ++BI) 377 for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I) 378 if (&*I != IgnoredStore && 379 (AA.getModRefInfo(I, StoreLoc) & Access)) 380 return true; 381 382 return false; 383 } 384 385 /// getMemSetPatternValue - If a strided store of the specified value is safe to 386 /// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should 387 /// be passed in. Otherwise, return null. 388 /// 389 /// Note that we don't ever attempt to use memset_pattern8 or 4, because these 390 /// just replicate their input array and then pass on to memset_pattern16. 391 static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) { 392 // If the value isn't a constant, we can't promote it to being in a constant 393 // array. We could theoretically do a store to an alloca or something, but 394 // that doesn't seem worthwhile. 395 Constant *C = dyn_cast<Constant>(V); 396 if (C == 0) return 0; 397 398 // Only handle simple values that are a power of two bytes in size. 399 uint64_t Size = TD.getTypeSizeInBits(V->getType()); 400 if (Size == 0 || (Size & 7) || (Size & (Size-1))) 401 return 0; 402 403 // Don't care enough about darwin/ppc to implement this. 404 if (TD.isBigEndian()) 405 return 0; 406 407 // Convert to size in bytes. 408 Size /= 8; 409 410 // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see 411 // if the top and bottom are the same (e.g. for vectors and large integers). 412 if (Size > 16) return 0; 413 414 // If the constant is exactly 16 bytes, just use it. 415 if (Size == 16) return C; 416 417 // Otherwise, we'll use an array of the constants. 418 unsigned ArraySize = 16/Size; 419 ArrayType *AT = ArrayType::get(V->getType(), ArraySize); 420 return ConstantArray::get(AT, std::vector<Constant*>(ArraySize, C)); 421 } 422 423 424 /// processLoopStridedStore - We see a strided store of some value. If we can 425 /// transform this into a memset or memset_pattern in the loop preheader, do so. 426 bool LoopIdiomRecognize:: 427 processLoopStridedStore(Value *DestPtr, unsigned StoreSize, 428 unsigned StoreAlignment, Value *StoredVal, 429 Instruction *TheStore, const SCEVAddRecExpr *Ev, 430 const SCEV *BECount) { 431 432 // If the stored value is a byte-wise value (like i32 -1), then it may be 433 // turned into a memset of i8 -1, assuming that all the consecutive bytes 434 // are stored. A store of i32 0x01020304 can never be turned into a memset, 435 // but it can be turned into memset_pattern if the target supports it. 436 Value *SplatValue = isBytewiseValue(StoredVal); 437 Constant *PatternValue = 0; 438 439 // If we're allowed to form a memset, and the stored value would be acceptable 440 // for memset, use it. 441 if (SplatValue && TLI->has(LibFunc::memset) && 442 // Verify that the stored value is loop invariant. If not, we can't 443 // promote the memset. 444 CurLoop->isLoopInvariant(SplatValue)) { 445 // Keep and use SplatValue. 446 PatternValue = 0; 447 } else if (TLI->has(LibFunc::memset_pattern16) && 448 (PatternValue = getMemSetPatternValue(StoredVal, *TD))) { 449 // It looks like we can use PatternValue! 450 SplatValue = 0; 451 } else { 452 // Otherwise, this isn't an idiom we can transform. For example, we can't 453 // do anything with a 3-byte store, for example. 454 return false; 455 } 456 457 458 // Okay, we have a strided store "p[i]" of a splattable value. We can turn 459 // this into a memset in the loop preheader now if we want. However, this 460 // would be unsafe to do if there is anything else in the loop that may read 461 // or write to the aliased location. Check for an alias. 462 if (mayLoopAccessLocation(DestPtr, AliasAnalysis::ModRef, 463 CurLoop, BECount, 464 StoreSize, getAnalysis<AliasAnalysis>(), TheStore)) 465 return false; 466 467 // Okay, everything looks good, insert the memset. 468 BasicBlock *Preheader = CurLoop->getLoopPreheader(); 469 470 IRBuilder<> Builder(Preheader->getTerminator()); 471 472 // The trip count of the loop and the base pointer of the addrec SCEV is 473 // guaranteed to be loop invariant, which means that it should dominate the 474 // header. Just insert code for it in the preheader. 475 SCEVExpander Expander(*SE); 476 477 unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace(); 478 Value *BasePtr = 479 Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace), 480 Preheader->getTerminator()); 481 482 // The # stored bytes is (BECount+1)*Size. Expand the trip count out to 483 // pointer size if it isn't already. 484 const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext()); 485 BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); 486 487 const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), 488 SCEV::FlagNUW); 489 if (StoreSize != 1) 490 NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), 491 SCEV::FlagNUW); 492 493 Value *NumBytes = 494 Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); 495 496 CallInst *NewCall; 497 if (SplatValue) 498 NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment); 499 else { 500 Module *M = TheStore->getParent()->getParent()->getParent(); 501 Value *MSP = M->getOrInsertFunction("memset_pattern16", 502 Builder.getVoidTy(), 503 Builder.getInt8PtrTy(), 504 Builder.getInt8PtrTy(), IntPtr, 505 (void*)0); 506 507 // Otherwise we should form a memset_pattern16. PatternValue is known to be 508 // an constant array of 16-bytes. Plop the value into a mergable global. 509 GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true, 510 GlobalValue::InternalLinkage, 511 PatternValue, ".memset_pattern"); 512 GV->setUnnamedAddr(true); // Ok to merge these. 513 GV->setAlignment(16); 514 Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy()); 515 NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes); 516 } 517 518 DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n" 519 << " from store to: " << *Ev << " at: " << *TheStore << "\n"); 520 NewCall->setDebugLoc(TheStore->getDebugLoc()); 521 522 // Okay, the memset has been formed. Zap the original store and anything that 523 // feeds into it. 524 DeleteDeadInstruction(TheStore, *SE); 525 ++NumMemSet; 526 return true; 527 } 528 529 /// processLoopStoreOfLoopLoad - We see a strided store whose value is a 530 /// same-strided load. 531 bool LoopIdiomRecognize:: 532 processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, 533 const SCEVAddRecExpr *StoreEv, 534 const SCEVAddRecExpr *LoadEv, 535 const SCEV *BECount) { 536 // If we're not allowed to form memcpy, we fail. 537 if (!TLI->has(LibFunc::memcpy)) 538 return false; 539 540 LoadInst *LI = cast<LoadInst>(SI->getValueOperand()); 541 542 // Okay, we have a strided store "p[i]" of a loaded value. We can turn 543 // this into a memcpy in the loop preheader now if we want. However, this 544 // would be unsafe to do if there is anything else in the loop that may read 545 // or write to the stored location (including the load feeding the stores). 546 // Check for an alias. 547 if (mayLoopAccessLocation(SI->getPointerOperand(), AliasAnalysis::ModRef, 548 CurLoop, BECount, StoreSize, 549 getAnalysis<AliasAnalysis>(), SI)) 550 return false; 551 552 // For a memcpy, we have to make sure that the input array is not being 553 // mutated by the loop. 554 if (mayLoopAccessLocation(LI->getPointerOperand(), AliasAnalysis::Mod, 555 CurLoop, BECount, StoreSize, 556 getAnalysis<AliasAnalysis>(), SI)) 557 return false; 558 559 // Okay, everything looks good, insert the memcpy. 560 BasicBlock *Preheader = CurLoop->getLoopPreheader(); 561 562 IRBuilder<> Builder(Preheader->getTerminator()); 563 564 // The trip count of the loop and the base pointer of the addrec SCEV is 565 // guaranteed to be loop invariant, which means that it should dominate the 566 // header. Just insert code for it in the preheader. 567 SCEVExpander Expander(*SE); 568 569 Value *LoadBasePtr = 570 Expander.expandCodeFor(LoadEv->getStart(), 571 Builder.getInt8PtrTy(LI->getPointerAddressSpace()), 572 Preheader->getTerminator()); 573 Value *StoreBasePtr = 574 Expander.expandCodeFor(StoreEv->getStart(), 575 Builder.getInt8PtrTy(SI->getPointerAddressSpace()), 576 Preheader->getTerminator()); 577 578 // The # stored bytes is (BECount+1)*Size. Expand the trip count out to 579 // pointer size if it isn't already. 580 const Type *IntPtr = TD->getIntPtrType(SI->getContext()); 581 BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); 582 583 const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), 584 SCEV::FlagNUW); 585 if (StoreSize != 1) 586 NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), 587 SCEV::FlagNUW); 588 589 Value *NumBytes = 590 Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); 591 592 Value *NewCall = 593 Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, 594 std::min(SI->getAlignment(), LI->getAlignment())); 595 596 DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" 597 << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" 598 << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); 599 (void)NewCall; 600 601 // Okay, the memset has been formed. Zap the original store and anything that 602 // feeds into it. 603 DeleteDeadInstruction(SI, *SE); 604 ++NumMemCpy; 605 return true; 606 } 607