1 //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // The implementation for the loop memory dependence that was originally 11 // developed for the loop vectorizer. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Analysis/LoopAccessAnalysis.h" 16 #include "llvm/Analysis/LoopInfo.h" 17 #include "llvm/Analysis/ScalarEvolutionExpander.h" 18 #include "llvm/Analysis/TargetLibraryInfo.h" 19 #include "llvm/Analysis/ValueTracking.h" 20 #include "llvm/IR/DiagnosticInfo.h" 21 #include "llvm/IR/Dominators.h" 22 #include "llvm/IR/IRBuilder.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include "llvm/Transforms/Utils/VectorUtils.h" 26 using namespace llvm; 27 28 #define DEBUG_TYPE "loop-accesses" 29 30 static cl::opt<unsigned, true> 31 VectorizationFactor("force-vector-width", cl::Hidden, 32 cl::desc("Sets the SIMD width. Zero is autoselect."), 33 cl::location(VectorizerParams::VectorizationFactor)); 34 unsigned VectorizerParams::VectorizationFactor; 35 36 static cl::opt<unsigned, true> 37 VectorizationInterleave("force-vector-interleave", cl::Hidden, 38 cl::desc("Sets the vectorization interleave count. " 39 "Zero is autoselect."), 40 cl::location( 41 VectorizerParams::VectorizationInterleave)); 42 unsigned VectorizerParams::VectorizationInterleave; 43 44 static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold( 45 "runtime-memory-check-threshold", cl::Hidden, 46 cl::desc("When performing memory disambiguation checks at runtime do not " 47 "generate more than this number of comparisons (default = 8)."), 48 cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8)); 49 unsigned VectorizerParams::RuntimeMemoryCheckThreshold; 50 51 /// Maximum SIMD width. 52 const unsigned VectorizerParams::MaxVectorWidth = 64; 53 54 /// \brief We collect interesting dependences up to this threshold. 55 static cl::opt<unsigned> MaxInterestingDependence( 56 "max-interesting-dependences", cl::Hidden, 57 cl::desc("Maximum number of interesting dependences collected by " 58 "loop-access analysis (default = 100)"), 59 cl::init(100)); 60 61 bool VectorizerParams::isInterleaveForced() { 62 return ::VectorizationInterleave.getNumOccurrences() > 0; 63 } 64 65 void LoopAccessReport::emitAnalysis(const LoopAccessReport &Message, 66 const Function *TheFunction, 67 const Loop *TheLoop, 68 const char *PassName) { 69 DebugLoc DL = TheLoop->getStartLoc(); 70 if (const Instruction *I = Message.getInstr()) 71 DL = I->getDebugLoc(); 72 emitOptimizationRemarkAnalysis(TheFunction->getContext(), PassName, 73 *TheFunction, DL, Message.str()); 74 } 75 76 Value *llvm::stripIntegerCast(Value *V) { 77 if (CastInst *CI = dyn_cast<CastInst>(V)) 78 if (CI->getOperand(0)->getType()->isIntegerTy()) 79 return CI->getOperand(0); 80 return V; 81 } 82 83 const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE, 84 const ValueToValueMap &PtrToStride, 85 Value *Ptr, Value *OrigPtr) { 86 87 const SCEV *OrigSCEV = SE->getSCEV(Ptr); 88 89 // If there is an entry in the map return the SCEV of the pointer with the 90 // symbolic stride replaced by one. 91 ValueToValueMap::const_iterator SI = 92 PtrToStride.find(OrigPtr ? OrigPtr : Ptr); 93 if (SI != PtrToStride.end()) { 94 Value *StrideVal = SI->second; 95 96 // Strip casts. 97 StrideVal = stripIntegerCast(StrideVal); 98 99 // Replace symbolic stride by one. 100 Value *One = ConstantInt::get(StrideVal->getType(), 1); 101 ValueToValueMap RewriteMap; 102 RewriteMap[StrideVal] = One; 103 104 const SCEV *ByOne = 105 SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true); 106 DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne 107 << "\n"); 108 return ByOne; 109 } 110 111 // Otherwise, just return the SCEV of the original pointer. 112 return SE->getSCEV(Ptr); 113 } 114 115 void LoopAccessInfo::RuntimePointerCheck::insert( 116 ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, 117 unsigned ASId, const ValueToValueMap &Strides) { 118 // Get the stride replaced scev. 119 const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr); 120 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc); 121 assert(AR && "Invalid addrec expression"); 122 const SCEV *Ex = SE->getBackedgeTakenCount(Lp); 123 const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE); 124 Pointers.push_back(Ptr); 125 Starts.push_back(AR->getStart()); 126 Ends.push_back(ScEnd); 127 IsWritePtr.push_back(WritePtr); 128 DependencySetId.push_back(DepSetId); 129 AliasSetId.push_back(ASId); 130 } 131 132 bool LoopAccessInfo::RuntimePointerCheck::needsChecking( 133 unsigned I, unsigned J, const SmallVectorImpl<int> *PtrPartition) const { 134 // No need to check if two readonly pointers intersect. 135 if (!IsWritePtr[I] && !IsWritePtr[J]) 136 return false; 137 138 // Only need to check pointers between two different dependency sets. 139 if (DependencySetId[I] == DependencySetId[J]) 140 return false; 141 142 // Only need to check pointers in the same alias set. 143 if (AliasSetId[I] != AliasSetId[J]) 144 return false; 145 146 // If PtrPartition is set omit checks between pointers of the same partition. 147 // Partition number -1 means that the pointer is used in multiple partitions. 148 // In this case we can't omit the check. 149 if (PtrPartition && (*PtrPartition)[I] != -1 && 150 (*PtrPartition)[I] == (*PtrPartition)[J]) 151 return false; 152 153 return true; 154 } 155 156 void LoopAccessInfo::RuntimePointerCheck::print( 157 raw_ostream &OS, unsigned Depth, 158 const SmallVectorImpl<int> *PtrPartition) const { 159 unsigned NumPointers = Pointers.size(); 160 if (NumPointers == 0) 161 return; 162 163 OS.indent(Depth) << "Run-time memory checks:\n"; 164 unsigned N = 0; 165 for (unsigned I = 0; I < NumPointers; ++I) 166 for (unsigned J = I + 1; J < NumPointers; ++J) 167 if (needsChecking(I, J, PtrPartition)) { 168 OS.indent(Depth) << N++ << ":\n"; 169 OS.indent(Depth + 2) << *Pointers[I]; 170 if (PtrPartition) 171 OS << " (Partition: " << (*PtrPartition)[I] << ")"; 172 OS << "\n"; 173 OS.indent(Depth + 2) << *Pointers[J]; 174 if (PtrPartition) 175 OS << " (Partition: " << (*PtrPartition)[J] << ")"; 176 OS << "\n"; 177 } 178 } 179 180 bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking( 181 const SmallVectorImpl<int> *PtrPartition) const { 182 unsigned NumPointers = Pointers.size(); 183 184 for (unsigned I = 0; I < NumPointers; ++I) 185 for (unsigned J = I + 1; J < NumPointers; ++J) 186 if (needsChecking(I, J, PtrPartition)) 187 return true; 188 return false; 189 } 190 191 namespace { 192 /// \brief Analyses memory accesses in a loop. 193 /// 194 /// Checks whether run time pointer checks are needed and builds sets for data 195 /// dependence checking. 196 class AccessAnalysis { 197 public: 198 /// \brief Read or write access location. 199 typedef PointerIntPair<Value *, 1, bool> MemAccessInfo; 200 typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet; 201 202 AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI, 203 MemoryDepChecker::DepCandidates &DA) 204 : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckNeeded(false) {} 205 206 /// \brief Register a load and whether it is only read from. 207 void addLoad(AliasAnalysis::Location &Loc, bool IsReadOnly) { 208 Value *Ptr = const_cast<Value*>(Loc.Ptr); 209 AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags); 210 Accesses.insert(MemAccessInfo(Ptr, false)); 211 if (IsReadOnly) 212 ReadOnlyPtr.insert(Ptr); 213 } 214 215 /// \brief Register a store. 216 void addStore(AliasAnalysis::Location &Loc) { 217 Value *Ptr = const_cast<Value*>(Loc.Ptr); 218 AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags); 219 Accesses.insert(MemAccessInfo(Ptr, true)); 220 } 221 222 /// \brief Check whether we can check the pointers at runtime for 223 /// non-intersection. 224 bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck, 225 unsigned &NumComparisons, ScalarEvolution *SE, 226 Loop *TheLoop, const ValueToValueMap &Strides, 227 bool ShouldCheckStride = false); 228 229 /// \brief Goes over all memory accesses, checks whether a RT check is needed 230 /// and builds sets of dependent accesses. 231 void buildDependenceSets() { 232 processMemAccesses(); 233 } 234 235 bool isRTCheckNeeded() { return IsRTCheckNeeded; } 236 237 bool isDependencyCheckNeeded() { return !CheckDeps.empty(); } 238 void resetDepChecks() { CheckDeps.clear(); } 239 240 MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; } 241 242 private: 243 typedef SetVector<MemAccessInfo> PtrAccessSet; 244 245 /// \brief Go over all memory access and check whether runtime pointer checks 246 /// are needed /// and build sets of dependency check candidates. 247 void processMemAccesses(); 248 249 /// Set of all accesses. 250 PtrAccessSet Accesses; 251 252 const DataLayout &DL; 253 254 /// Set of accesses that need a further dependence check. 255 MemAccessInfoSet CheckDeps; 256 257 /// Set of pointers that are read only. 258 SmallPtrSet<Value*, 16> ReadOnlyPtr; 259 260 /// An alias set tracker to partition the access set by underlying object and 261 //intrinsic property (such as TBAA metadata). 262 AliasSetTracker AST; 263 264 LoopInfo *LI; 265 266 /// Sets of potentially dependent accesses - members of one set share an 267 /// underlying pointer. The set "CheckDeps" identfies which sets really need a 268 /// dependence check. 269 MemoryDepChecker::DepCandidates &DepCands; 270 271 bool IsRTCheckNeeded; 272 }; 273 274 } // end anonymous namespace 275 276 /// \brief Check whether a pointer can participate in a runtime bounds check. 277 static bool hasComputableBounds(ScalarEvolution *SE, 278 const ValueToValueMap &Strides, Value *Ptr) { 279 const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Ptr); 280 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev); 281 if (!AR) 282 return false; 283 284 return AR->isAffine(); 285 } 286 287 /// \brief Check the stride of the pointer and ensure that it does not wrap in 288 /// the address space. 289 static int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, 290 const ValueToValueMap &StridesMap); 291 292 bool AccessAnalysis::canCheckPtrAtRT( 293 LoopAccessInfo::RuntimePointerCheck &RtCheck, unsigned &NumComparisons, 294 ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap, 295 bool ShouldCheckStride) { 296 // Find pointers with computable bounds. We are going to use this information 297 // to place a runtime bound check. 298 bool CanDoRT = true; 299 300 bool IsDepCheckNeeded = isDependencyCheckNeeded(); 301 NumComparisons = 0; 302 303 // We assign a consecutive id to access from different alias sets. 304 // Accesses between different groups doesn't need to be checked. 305 unsigned ASId = 1; 306 for (auto &AS : AST) { 307 unsigned NumReadPtrChecks = 0; 308 unsigned NumWritePtrChecks = 0; 309 310 // We assign consecutive id to access from different dependence sets. 311 // Accesses within the same set don't need a runtime check. 312 unsigned RunningDepId = 1; 313 DenseMap<Value *, unsigned> DepSetId; 314 315 for (auto A : AS) { 316 Value *Ptr = A.getValue(); 317 bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true)); 318 MemAccessInfo Access(Ptr, IsWrite); 319 320 if (IsWrite) 321 ++NumWritePtrChecks; 322 else 323 ++NumReadPtrChecks; 324 325 if (hasComputableBounds(SE, StridesMap, Ptr) && 326 // When we run after a failing dependency check we have to make sure 327 // we don't have wrapping pointers. 328 (!ShouldCheckStride || 329 isStridedPtr(SE, Ptr, TheLoop, StridesMap) == 1)) { 330 // The id of the dependence set. 331 unsigned DepId; 332 333 if (IsDepCheckNeeded) { 334 Value *Leader = DepCands.getLeaderValue(Access).getPointer(); 335 unsigned &LeaderId = DepSetId[Leader]; 336 if (!LeaderId) 337 LeaderId = RunningDepId++; 338 DepId = LeaderId; 339 } else 340 // Each access has its own dependence set. 341 DepId = RunningDepId++; 342 343 RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap); 344 345 DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); 346 } else { 347 CanDoRT = false; 348 } 349 } 350 351 if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2) 352 NumComparisons += 0; // Only one dependence set. 353 else { 354 NumComparisons += (NumWritePtrChecks * (NumReadPtrChecks + 355 NumWritePtrChecks - 1)); 356 } 357 358 ++ASId; 359 } 360 361 // If the pointers that we would use for the bounds comparison have different 362 // address spaces, assume the values aren't directly comparable, so we can't 363 // use them for the runtime check. We also have to assume they could 364 // overlap. In the future there should be metadata for whether address spaces 365 // are disjoint. 366 unsigned NumPointers = RtCheck.Pointers.size(); 367 for (unsigned i = 0; i < NumPointers; ++i) { 368 for (unsigned j = i + 1; j < NumPointers; ++j) { 369 // Only need to check pointers between two different dependency sets. 370 if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j]) 371 continue; 372 // Only need to check pointers in the same alias set. 373 if (RtCheck.AliasSetId[i] != RtCheck.AliasSetId[j]) 374 continue; 375 376 Value *PtrI = RtCheck.Pointers[i]; 377 Value *PtrJ = RtCheck.Pointers[j]; 378 379 unsigned ASi = PtrI->getType()->getPointerAddressSpace(); 380 unsigned ASj = PtrJ->getType()->getPointerAddressSpace(); 381 if (ASi != ASj) { 382 DEBUG(dbgs() << "LAA: Runtime check would require comparison between" 383 " different address spaces\n"); 384 return false; 385 } 386 } 387 } 388 389 return CanDoRT; 390 } 391 392 void AccessAnalysis::processMemAccesses() { 393 // We process the set twice: first we process read-write pointers, last we 394 // process read-only pointers. This allows us to skip dependence tests for 395 // read-only pointers. 396 397 DEBUG(dbgs() << "LAA: Processing memory accesses...\n"); 398 DEBUG(dbgs() << " AST: "; AST.dump()); 399 DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n"); 400 DEBUG({ 401 for (auto A : Accesses) 402 dbgs() << "\t" << *A.getPointer() << " (" << 403 (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? 404 "read-only" : "read")) << ")\n"; 405 }); 406 407 // The AliasSetTracker has nicely partitioned our pointers by metadata 408 // compatibility and potential for underlying-object overlap. As a result, we 409 // only need to check for potential pointer dependencies within each alias 410 // set. 411 for (auto &AS : AST) { 412 // Note that both the alias-set tracker and the alias sets themselves used 413 // linked lists internally and so the iteration order here is deterministic 414 // (matching the original instruction order within each set). 415 416 bool SetHasWrite = false; 417 418 // Map of pointers to last access encountered. 419 typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap; 420 UnderlyingObjToAccessMap ObjToLastAccess; 421 422 // Set of access to check after all writes have been processed. 423 PtrAccessSet DeferredAccesses; 424 425 // Iterate over each alias set twice, once to process read/write pointers, 426 // and then to process read-only pointers. 427 for (int SetIteration = 0; SetIteration < 2; ++SetIteration) { 428 bool UseDeferred = SetIteration > 0; 429 PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses; 430 431 for (auto AV : AS) { 432 Value *Ptr = AV.getValue(); 433 434 // For a single memory access in AliasSetTracker, Accesses may contain 435 // both read and write, and they both need to be handled for CheckDeps. 436 for (auto AC : S) { 437 if (AC.getPointer() != Ptr) 438 continue; 439 440 bool IsWrite = AC.getInt(); 441 442 // If we're using the deferred access set, then it contains only 443 // reads. 444 bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite; 445 if (UseDeferred && !IsReadOnlyPtr) 446 continue; 447 // Otherwise, the pointer must be in the PtrAccessSet, either as a 448 // read or a write. 449 assert(((IsReadOnlyPtr && UseDeferred) || IsWrite || 450 S.count(MemAccessInfo(Ptr, false))) && 451 "Alias-set pointer not in the access set?"); 452 453 MemAccessInfo Access(Ptr, IsWrite); 454 DepCands.insert(Access); 455 456 // Memorize read-only pointers for later processing and skip them in 457 // the first round (they need to be checked after we have seen all 458 // write pointers). Note: we also mark pointer that are not 459 // consecutive as "read-only" pointers (so that we check 460 // "a[b[i]] +="). Hence, we need the second check for "!IsWrite". 461 if (!UseDeferred && IsReadOnlyPtr) { 462 DeferredAccesses.insert(Access); 463 continue; 464 } 465 466 // If this is a write - check other reads and writes for conflicts. If 467 // this is a read only check other writes for conflicts (but only if 468 // there is no other write to the ptr - this is an optimization to 469 // catch "a[i] = a[i] + " without having to do a dependence check). 470 if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) { 471 CheckDeps.insert(Access); 472 IsRTCheckNeeded = true; 473 } 474 475 if (IsWrite) 476 SetHasWrite = true; 477 478 // Create sets of pointers connected by a shared alias set and 479 // underlying object. 480 typedef SmallVector<Value *, 16> ValueVector; 481 ValueVector TempObjects; 482 483 GetUnderlyingObjects(Ptr, TempObjects, DL, LI); 484 DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n"); 485 for (Value *UnderlyingObj : TempObjects) { 486 UnderlyingObjToAccessMap::iterator Prev = 487 ObjToLastAccess.find(UnderlyingObj); 488 if (Prev != ObjToLastAccess.end()) 489 DepCands.unionSets(Access, Prev->second); 490 491 ObjToLastAccess[UnderlyingObj] = Access; 492 DEBUG(dbgs() << " " << *UnderlyingObj << "\n"); 493 } 494 } 495 } 496 } 497 } 498 } 499 500 static bool isInBoundsGep(Value *Ptr) { 501 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) 502 return GEP->isInBounds(); 503 return false; 504 } 505 506 /// \brief Check whether the access through \p Ptr has a constant stride. 507 static int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, 508 const ValueToValueMap &StridesMap) { 509 const Type *Ty = Ptr->getType(); 510 assert(Ty->isPointerTy() && "Unexpected non-ptr"); 511 512 // Make sure that the pointer does not point to aggregate types. 513 const PointerType *PtrTy = cast<PointerType>(Ty); 514 if (PtrTy->getElementType()->isAggregateType()) { 515 DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type" 516 << *Ptr << "\n"); 517 return 0; 518 } 519 520 const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Ptr); 521 522 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev); 523 if (!AR) { 524 DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " 525 << *Ptr << " SCEV: " << *PtrScev << "\n"); 526 return 0; 527 } 528 529 // The accesss function must stride over the innermost loop. 530 if (Lp != AR->getLoop()) { 531 DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " << 532 *Ptr << " SCEV: " << *PtrScev << "\n"); 533 } 534 535 // The address calculation must not wrap. Otherwise, a dependence could be 536 // inverted. 537 // An inbounds getelementptr that is a AddRec with a unit stride 538 // cannot wrap per definition. The unit stride requirement is checked later. 539 // An getelementptr without an inbounds attribute and unit stride would have 540 // to access the pointer value "0" which is undefined behavior in address 541 // space 0, therefore we can also vectorize this case. 542 bool IsInBoundsGEP = isInBoundsGep(Ptr); 543 bool IsNoWrapAddRec = AR->getNoWrapFlags(SCEV::NoWrapMask); 544 bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0; 545 if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) { 546 DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " 547 << *Ptr << " SCEV: " << *PtrScev << "\n"); 548 return 0; 549 } 550 551 // Check the step is constant. 552 const SCEV *Step = AR->getStepRecurrence(*SE); 553 554 // Calculate the pointer stride and check if it is consecutive. 555 const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); 556 if (!C) { 557 DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr << 558 " SCEV: " << *PtrScev << "\n"); 559 return 0; 560 } 561 562 auto &DL = Lp->getHeader()->getModule()->getDataLayout(); 563 int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType()); 564 const APInt &APStepVal = C->getValue()->getValue(); 565 566 // Huge step value - give up. 567 if (APStepVal.getBitWidth() > 64) 568 return 0; 569 570 int64_t StepVal = APStepVal.getSExtValue(); 571 572 // Strided access. 573 int64_t Stride = StepVal / Size; 574 int64_t Rem = StepVal % Size; 575 if (Rem) 576 return 0; 577 578 // If the SCEV could wrap but we have an inbounds gep with a unit stride we 579 // know we can't "wrap around the address space". In case of address space 580 // zero we know that this won't happen without triggering undefined behavior. 581 if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) && 582 Stride != 1 && Stride != -1) 583 return 0; 584 585 return Stride; 586 } 587 588 bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) { 589 switch (Type) { 590 case NoDep: 591 case Forward: 592 case BackwardVectorizable: 593 return true; 594 595 case Unknown: 596 case ForwardButPreventsForwarding: 597 case Backward: 598 case BackwardVectorizableButPreventsForwarding: 599 return false; 600 } 601 llvm_unreachable("unexpected DepType!"); 602 } 603 604 bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) { 605 switch (Type) { 606 case NoDep: 607 case Forward: 608 return false; 609 610 case BackwardVectorizable: 611 case Unknown: 612 case ForwardButPreventsForwarding: 613 case Backward: 614 case BackwardVectorizableButPreventsForwarding: 615 return true; 616 } 617 llvm_unreachable("unexpected DepType!"); 618 } 619 620 bool MemoryDepChecker::Dependence::isPossiblyBackward() const { 621 switch (Type) { 622 case NoDep: 623 case Forward: 624 case ForwardButPreventsForwarding: 625 return false; 626 627 case Unknown: 628 case BackwardVectorizable: 629 case Backward: 630 case BackwardVectorizableButPreventsForwarding: 631 return true; 632 } 633 llvm_unreachable("unexpected DepType!"); 634 } 635 636 bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance, 637 unsigned TypeByteSize) { 638 // If loads occur at a distance that is not a multiple of a feasible vector 639 // factor store-load forwarding does not take place. 640 // Positive dependences might cause troubles because vectorizing them might 641 // prevent store-load forwarding making vectorized code run a lot slower. 642 // a[i] = a[i-3] ^ a[i-8]; 643 // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and 644 // hence on your typical architecture store-load forwarding does not take 645 // place. Vectorizing in such cases does not make sense. 646 // Store-load forwarding distance. 647 const unsigned NumCyclesForStoreLoadThroughMemory = 8*TypeByteSize; 648 // Maximum vector factor. 649 unsigned MaxVFWithoutSLForwardIssues = 650 VectorizerParams::MaxVectorWidth * TypeByteSize; 651 if(MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues) 652 MaxVFWithoutSLForwardIssues = MaxSafeDepDistBytes; 653 654 for (unsigned vf = 2*TypeByteSize; vf <= MaxVFWithoutSLForwardIssues; 655 vf *= 2) { 656 if (Distance % vf && Distance / vf < NumCyclesForStoreLoadThroughMemory) { 657 MaxVFWithoutSLForwardIssues = (vf >>=1); 658 break; 659 } 660 } 661 662 if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) { 663 DEBUG(dbgs() << "LAA: Distance " << Distance << 664 " that could cause a store-load forwarding conflict\n"); 665 return true; 666 } 667 668 if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes && 669 MaxVFWithoutSLForwardIssues != 670 VectorizerParams::MaxVectorWidth * TypeByteSize) 671 MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues; 672 return false; 673 } 674 675 MemoryDepChecker::Dependence::DepType 676 MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, 677 const MemAccessInfo &B, unsigned BIdx, 678 const ValueToValueMap &Strides) { 679 assert (AIdx < BIdx && "Must pass arguments in program order"); 680 681 Value *APtr = A.getPointer(); 682 Value *BPtr = B.getPointer(); 683 bool AIsWrite = A.getInt(); 684 bool BIsWrite = B.getInt(); 685 686 // Two reads are independent. 687 if (!AIsWrite && !BIsWrite) 688 return Dependence::NoDep; 689 690 // We cannot check pointers in different address spaces. 691 if (APtr->getType()->getPointerAddressSpace() != 692 BPtr->getType()->getPointerAddressSpace()) 693 return Dependence::Unknown; 694 695 const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr); 696 const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr); 697 698 int StrideAPtr = isStridedPtr(SE, APtr, InnermostLoop, Strides); 699 int StrideBPtr = isStridedPtr(SE, BPtr, InnermostLoop, Strides); 700 701 const SCEV *Src = AScev; 702 const SCEV *Sink = BScev; 703 704 // If the induction step is negative we have to invert source and sink of the 705 // dependence. 706 if (StrideAPtr < 0) { 707 //Src = BScev; 708 //Sink = AScev; 709 std::swap(APtr, BPtr); 710 std::swap(Src, Sink); 711 std::swap(AIsWrite, BIsWrite); 712 std::swap(AIdx, BIdx); 713 std::swap(StrideAPtr, StrideBPtr); 714 } 715 716 const SCEV *Dist = SE->getMinusSCEV(Sink, Src); 717 718 DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink 719 << "(Induction step: " << StrideAPtr << ")\n"); 720 DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to " 721 << *InstMap[BIdx] << ": " << *Dist << "\n"); 722 723 // Need consecutive accesses. We don't want to vectorize 724 // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in 725 // the address space. 726 if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){ 727 DEBUG(dbgs() << "Non-consecutive pointer access\n"); 728 return Dependence::Unknown; 729 } 730 731 const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist); 732 if (!C) { 733 DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n"); 734 ShouldRetryWithRuntimeCheck = true; 735 return Dependence::Unknown; 736 } 737 738 Type *ATy = APtr->getType()->getPointerElementType(); 739 Type *BTy = BPtr->getType()->getPointerElementType(); 740 auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); 741 unsigned TypeByteSize = DL.getTypeAllocSize(ATy); 742 743 // Negative distances are not plausible dependencies. 744 const APInt &Val = C->getValue()->getValue(); 745 if (Val.isNegative()) { 746 bool IsTrueDataDependence = (AIsWrite && !BIsWrite); 747 if (IsTrueDataDependence && 748 (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) || 749 ATy != BTy)) 750 return Dependence::ForwardButPreventsForwarding; 751 752 DEBUG(dbgs() << "LAA: Dependence is negative: NoDep\n"); 753 return Dependence::Forward; 754 } 755 756 // Write to the same location with the same size. 757 // Could be improved to assert type sizes are the same (i32 == float, etc). 758 if (Val == 0) { 759 if (ATy == BTy) 760 return Dependence::NoDep; 761 DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n"); 762 return Dependence::Unknown; 763 } 764 765 assert(Val.isStrictlyPositive() && "Expect a positive value"); 766 767 if (ATy != BTy) { 768 DEBUG(dbgs() << 769 "LAA: ReadWrite-Write positive dependency with different types\n"); 770 return Dependence::Unknown; 771 } 772 773 unsigned Distance = (unsigned) Val.getZExtValue(); 774 775 // Bail out early if passed-in parameters make vectorization not feasible. 776 unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ? 777 VectorizerParams::VectorizationFactor : 1); 778 unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ? 779 VectorizerParams::VectorizationInterleave : 1); 780 781 // The distance must be bigger than the size needed for a vectorized version 782 // of the operation and the size of the vectorized operation must not be 783 // bigger than the currrent maximum size. 784 if (Distance < 2*TypeByteSize || 785 2*TypeByteSize > MaxSafeDepDistBytes || 786 Distance < TypeByteSize * ForcedUnroll * ForcedFactor) { 787 DEBUG(dbgs() << "LAA: Failure because of Positive distance " 788 << Val.getSExtValue() << '\n'); 789 return Dependence::Backward; 790 } 791 792 // Positive distance bigger than max vectorization factor. 793 MaxSafeDepDistBytes = Distance < MaxSafeDepDistBytes ? 794 Distance : MaxSafeDepDistBytes; 795 796 bool IsTrueDataDependence = (!AIsWrite && BIsWrite); 797 if (IsTrueDataDependence && 798 couldPreventStoreLoadForward(Distance, TypeByteSize)) 799 return Dependence::BackwardVectorizableButPreventsForwarding; 800 801 DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() << 802 " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n'); 803 804 return Dependence::BackwardVectorizable; 805 } 806 807 bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, 808 MemAccessInfoSet &CheckDeps, 809 const ValueToValueMap &Strides) { 810 811 MaxSafeDepDistBytes = -1U; 812 while (!CheckDeps.empty()) { 813 MemAccessInfo CurAccess = *CheckDeps.begin(); 814 815 // Get the relevant memory access set. 816 EquivalenceClasses<MemAccessInfo>::iterator I = 817 AccessSets.findValue(AccessSets.getLeaderValue(CurAccess)); 818 819 // Check accesses within this set. 820 EquivalenceClasses<MemAccessInfo>::member_iterator AI, AE; 821 AI = AccessSets.member_begin(I), AE = AccessSets.member_end(); 822 823 // Check every access pair. 824 while (AI != AE) { 825 CheckDeps.erase(*AI); 826 EquivalenceClasses<MemAccessInfo>::member_iterator OI = std::next(AI); 827 while (OI != AE) { 828 // Check every accessing instruction pair in program order. 829 for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(), 830 I1E = Accesses[*AI].end(); I1 != I1E; ++I1) 831 for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(), 832 I2E = Accesses[*OI].end(); I2 != I2E; ++I2) { 833 auto A = std::make_pair(&*AI, *I1); 834 auto B = std::make_pair(&*OI, *I2); 835 836 assert(*I1 != *I2); 837 if (*I1 > *I2) 838 std::swap(A, B); 839 840 Dependence::DepType Type = 841 isDependent(*A.first, A.second, *B.first, B.second, Strides); 842 SafeForVectorization &= Dependence::isSafeForVectorization(Type); 843 844 // Gather dependences unless we accumulated MaxInterestingDependence 845 // dependences. In that case return as soon as we find the first 846 // unsafe dependence. This puts a limit on this quadratic 847 // algorithm. 848 if (RecordInterestingDependences) { 849 if (Dependence::isInterestingDependence(Type)) 850 InterestingDependences.push_back( 851 Dependence(A.second, B.second, Type)); 852 853 if (InterestingDependences.size() >= MaxInterestingDependence) { 854 RecordInterestingDependences = false; 855 InterestingDependences.clear(); 856 DEBUG(dbgs() << "Too many dependences, stopped recording\n"); 857 } 858 } 859 if (!RecordInterestingDependences && !SafeForVectorization) 860 return false; 861 } 862 ++OI; 863 } 864 AI++; 865 } 866 } 867 868 DEBUG(dbgs() << "Total Interesting Dependences: " 869 << InterestingDependences.size() << "\n"); 870 return SafeForVectorization; 871 } 872 873 SmallVector<Instruction *, 4> 874 MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const { 875 MemAccessInfo Access(Ptr, isWrite); 876 auto &IndexVector = Accesses.find(Access)->second; 877 878 SmallVector<Instruction *, 4> Insts; 879 std::transform(IndexVector.begin(), IndexVector.end(), 880 std::back_inserter(Insts), 881 [&](unsigned Idx) { return this->InstMap[Idx]; }); 882 return Insts; 883 } 884 885 const char *MemoryDepChecker::Dependence::DepName[] = { 886 "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward", 887 "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"}; 888 889 void MemoryDepChecker::Dependence::print( 890 raw_ostream &OS, unsigned Depth, 891 const SmallVectorImpl<Instruction *> &Instrs) const { 892 OS.indent(Depth) << DepName[Type] << ":\n"; 893 OS.indent(Depth + 2) << *Instrs[Source] << " -> \n"; 894 OS.indent(Depth + 2) << *Instrs[Destination] << "\n"; 895 } 896 897 bool LoopAccessInfo::canAnalyzeLoop() { 898 // We need to have a loop header. 899 DEBUG(dbgs() << "LAA: Found a loop: " << 900 TheLoop->getHeader()->getName() << '\n'); 901 902 // We can only analyze innermost loops. 903 if (!TheLoop->empty()) { 904 DEBUG(dbgs() << "LAA: loop is not the innermost loop\n"); 905 emitAnalysis(LoopAccessReport() << "loop is not the innermost loop"); 906 return false; 907 } 908 909 // We must have a single backedge. 910 if (TheLoop->getNumBackEdges() != 1) { 911 DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n"); 912 emitAnalysis( 913 LoopAccessReport() << 914 "loop control flow is not understood by analyzer"); 915 return false; 916 } 917 918 // We must have a single exiting block. 919 if (!TheLoop->getExitingBlock()) { 920 DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n"); 921 emitAnalysis( 922 LoopAccessReport() << 923 "loop control flow is not understood by analyzer"); 924 return false; 925 } 926 927 // We only handle bottom-tested loops, i.e. loop in which the condition is 928 // checked at the end of each iteration. With that we can assume that all 929 // instructions in the loop are executed the same number of times. 930 if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { 931 DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n"); 932 emitAnalysis( 933 LoopAccessReport() << 934 "loop control flow is not understood by analyzer"); 935 return false; 936 } 937 938 // ScalarEvolution needs to be able to find the exit count. 939 const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop); 940 if (ExitCount == SE->getCouldNotCompute()) { 941 emitAnalysis(LoopAccessReport() << 942 "could not determine number of loop iterations"); 943 DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n"); 944 return false; 945 } 946 947 return true; 948 } 949 950 void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { 951 952 typedef SmallVector<Value*, 16> ValueVector; 953 typedef SmallPtrSet<Value*, 16> ValueSet; 954 955 // Holds the Load and Store *instructions*. 956 ValueVector Loads; 957 ValueVector Stores; 958 959 // Holds all the different accesses in the loop. 960 unsigned NumReads = 0; 961 unsigned NumReadWrites = 0; 962 963 PtrRtCheck.Pointers.clear(); 964 PtrRtCheck.Need = false; 965 966 const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); 967 968 // For each block. 969 for (Loop::block_iterator bb = TheLoop->block_begin(), 970 be = TheLoop->block_end(); bb != be; ++bb) { 971 972 // Scan the BB and collect legal loads and stores. 973 for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e; 974 ++it) { 975 976 // If this is a load, save it. If this instruction can read from memory 977 // but is not a load, then we quit. Notice that we don't handle function 978 // calls that read or write. 979 if (it->mayReadFromMemory()) { 980 // Many math library functions read the rounding mode. We will only 981 // vectorize a loop if it contains known function calls that don't set 982 // the flag. Therefore, it is safe to ignore this read from memory. 983 CallInst *Call = dyn_cast<CallInst>(it); 984 if (Call && getIntrinsicIDForCall(Call, TLI)) 985 continue; 986 987 // If the function has an explicit vectorized counterpart, we can safely 988 // assume that it can be vectorized. 989 if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() && 990 TLI->isFunctionVectorizable(Call->getCalledFunction()->getName())) 991 continue; 992 993 LoadInst *Ld = dyn_cast<LoadInst>(it); 994 if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) { 995 emitAnalysis(LoopAccessReport(Ld) 996 << "read with atomic ordering or volatile read"); 997 DEBUG(dbgs() << "LAA: Found a non-simple load.\n"); 998 CanVecMem = false; 999 return; 1000 } 1001 NumLoads++; 1002 Loads.push_back(Ld); 1003 DepChecker.addAccess(Ld); 1004 continue; 1005 } 1006 1007 // Save 'store' instructions. Abort if other instructions write to memory. 1008 if (it->mayWriteToMemory()) { 1009 StoreInst *St = dyn_cast<StoreInst>(it); 1010 if (!St) { 1011 emitAnalysis(LoopAccessReport(it) << 1012 "instruction cannot be vectorized"); 1013 CanVecMem = false; 1014 return; 1015 } 1016 if (!St->isSimple() && !IsAnnotatedParallel) { 1017 emitAnalysis(LoopAccessReport(St) 1018 << "write with atomic ordering or volatile write"); 1019 DEBUG(dbgs() << "LAA: Found a non-simple store.\n"); 1020 CanVecMem = false; 1021 return; 1022 } 1023 NumStores++; 1024 Stores.push_back(St); 1025 DepChecker.addAccess(St); 1026 } 1027 } // Next instr. 1028 } // Next block. 1029 1030 // Now we have two lists that hold the loads and the stores. 1031 // Next, we find the pointers that they use. 1032 1033 // Check if we see any stores. If there are no stores, then we don't 1034 // care if the pointers are *restrict*. 1035 if (!Stores.size()) { 1036 DEBUG(dbgs() << "LAA: Found a read-only loop!\n"); 1037 CanVecMem = true; 1038 return; 1039 } 1040 1041 MemoryDepChecker::DepCandidates DependentAccesses; 1042 AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(), 1043 AA, LI, DependentAccesses); 1044 1045 // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects 1046 // multiple times on the same object. If the ptr is accessed twice, once 1047 // for read and once for write, it will only appear once (on the write 1048 // list). This is okay, since we are going to check for conflicts between 1049 // writes and between reads and writes, but not between reads and reads. 1050 ValueSet Seen; 1051 1052 ValueVector::iterator I, IE; 1053 for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) { 1054 StoreInst *ST = cast<StoreInst>(*I); 1055 Value* Ptr = ST->getPointerOperand(); 1056 // Check for store to loop invariant address. 1057 StoreToLoopInvariantAddress |= isUniform(Ptr); 1058 // If we did *not* see this pointer before, insert it to the read-write 1059 // list. At this phase it is only a 'write' list. 1060 if (Seen.insert(Ptr).second) { 1061 ++NumReadWrites; 1062 1063 AliasAnalysis::Location Loc = AA->getLocation(ST); 1064 // The TBAA metadata could have a control dependency on the predication 1065 // condition, so we cannot rely on it when determining whether or not we 1066 // need runtime pointer checks. 1067 if (blockNeedsPredication(ST->getParent(), TheLoop, DT)) 1068 Loc.AATags.TBAA = nullptr; 1069 1070 Accesses.addStore(Loc); 1071 } 1072 } 1073 1074 if (IsAnnotatedParallel) { 1075 DEBUG(dbgs() 1076 << "LAA: A loop annotated parallel, ignore memory dependency " 1077 << "checks.\n"); 1078 CanVecMem = true; 1079 return; 1080 } 1081 1082 for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) { 1083 LoadInst *LD = cast<LoadInst>(*I); 1084 Value* Ptr = LD->getPointerOperand(); 1085 // If we did *not* see this pointer before, insert it to the 1086 // read list. If we *did* see it before, then it is already in 1087 // the read-write list. This allows us to vectorize expressions 1088 // such as A[i] += x; Because the address of A[i] is a read-write 1089 // pointer. This only works if the index of A[i] is consecutive. 1090 // If the address of i is unknown (for example A[B[i]]) then we may 1091 // read a few words, modify, and write a few words, and some of the 1092 // words may be written to the same address. 1093 bool IsReadOnlyPtr = false; 1094 if (Seen.insert(Ptr).second || !isStridedPtr(SE, Ptr, TheLoop, Strides)) { 1095 ++NumReads; 1096 IsReadOnlyPtr = true; 1097 } 1098 1099 AliasAnalysis::Location Loc = AA->getLocation(LD); 1100 // The TBAA metadata could have a control dependency on the predication 1101 // condition, so we cannot rely on it when determining whether or not we 1102 // need runtime pointer checks. 1103 if (blockNeedsPredication(LD->getParent(), TheLoop, DT)) 1104 Loc.AATags.TBAA = nullptr; 1105 1106 Accesses.addLoad(Loc, IsReadOnlyPtr); 1107 } 1108 1109 // If we write (or read-write) to a single destination and there are no 1110 // other reads in this loop then is it safe to vectorize. 1111 if (NumReadWrites == 1 && NumReads == 0) { 1112 DEBUG(dbgs() << "LAA: Found a write-only loop!\n"); 1113 CanVecMem = true; 1114 return; 1115 } 1116 1117 // Build dependence sets and check whether we need a runtime pointer bounds 1118 // check. 1119 Accesses.buildDependenceSets(); 1120 bool NeedRTCheck = Accesses.isRTCheckNeeded(); 1121 1122 // Find pointers with computable bounds. We are going to use this information 1123 // to place a runtime bound check. 1124 bool CanDoRT = false; 1125 if (NeedRTCheck) 1126 CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop, 1127 Strides); 1128 1129 DEBUG(dbgs() << "LAA: We need to do " << NumComparisons << 1130 " pointer comparisons.\n"); 1131 1132 // If we only have one set of dependences to check pointers among we don't 1133 // need a runtime check. 1134 if (NumComparisons == 0 && NeedRTCheck) 1135 NeedRTCheck = false; 1136 1137 // Check that we found the bounds for the pointer. 1138 if (CanDoRT) 1139 DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n"); 1140 else if (NeedRTCheck) { 1141 emitAnalysis(LoopAccessReport() << "cannot identify array bounds"); 1142 DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " << 1143 "the array bounds.\n"); 1144 PtrRtCheck.reset(); 1145 CanVecMem = false; 1146 return; 1147 } 1148 1149 PtrRtCheck.Need = NeedRTCheck; 1150 1151 CanVecMem = true; 1152 if (Accesses.isDependencyCheckNeeded()) { 1153 DEBUG(dbgs() << "LAA: Checking memory dependencies\n"); 1154 CanVecMem = DepChecker.areDepsSafe( 1155 DependentAccesses, Accesses.getDependenciesToCheck(), Strides); 1156 MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes(); 1157 1158 if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) { 1159 DEBUG(dbgs() << "LAA: Retrying with memory checks\n"); 1160 NeedRTCheck = true; 1161 1162 // Clear the dependency checks. We assume they are not needed. 1163 Accesses.resetDepChecks(); 1164 1165 PtrRtCheck.reset(); 1166 PtrRtCheck.Need = true; 1167 1168 CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, 1169 TheLoop, Strides, true); 1170 // Check that we found the bounds for the pointer. 1171 if (!CanDoRT && NumComparisons > 0) { 1172 emitAnalysis(LoopAccessReport() 1173 << "cannot check memory dependencies at runtime"); 1174 DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n"); 1175 PtrRtCheck.reset(); 1176 CanVecMem = false; 1177 return; 1178 } 1179 1180 CanVecMem = true; 1181 } 1182 } 1183 1184 if (CanVecMem) 1185 DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop. We" 1186 << (NeedRTCheck ? "" : " don't") 1187 << " need a runtime memory check.\n"); 1188 else { 1189 emitAnalysis(LoopAccessReport() << 1190 "unsafe dependent memory operations in loop"); 1191 DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n"); 1192 } 1193 } 1194 1195 bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, 1196 DominatorTree *DT) { 1197 assert(TheLoop->contains(BB) && "Unknown block used"); 1198 1199 // Blocks that do not dominate the latch need predication. 1200 BasicBlock* Latch = TheLoop->getLoopLatch(); 1201 return !DT->dominates(BB, Latch); 1202 } 1203 1204 void LoopAccessInfo::emitAnalysis(LoopAccessReport &Message) { 1205 assert(!Report && "Multiple reports generated"); 1206 Report = Message; 1207 } 1208 1209 bool LoopAccessInfo::isUniform(Value *V) const { 1210 return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop)); 1211 } 1212 1213 // FIXME: this function is currently a duplicate of the one in 1214 // LoopVectorize.cpp. 1215 static Instruction *getFirstInst(Instruction *FirstInst, Value *V, 1216 Instruction *Loc) { 1217 if (FirstInst) 1218 return FirstInst; 1219 if (Instruction *I = dyn_cast<Instruction>(V)) 1220 return I->getParent() == Loc->getParent() ? I : nullptr; 1221 return nullptr; 1222 } 1223 1224 std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck( 1225 Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const { 1226 if (!PtrRtCheck.Need) 1227 return std::make_pair(nullptr, nullptr); 1228 1229 unsigned NumPointers = PtrRtCheck.Pointers.size(); 1230 SmallVector<TrackingVH<Value> , 2> Starts; 1231 SmallVector<TrackingVH<Value> , 2> Ends; 1232 1233 LLVMContext &Ctx = Loc->getContext(); 1234 SCEVExpander Exp(*SE, DL, "induction"); 1235 Instruction *FirstInst = nullptr; 1236 1237 for (unsigned i = 0; i < NumPointers; ++i) { 1238 Value *Ptr = PtrRtCheck.Pointers[i]; 1239 const SCEV *Sc = SE->getSCEV(Ptr); 1240 1241 if (SE->isLoopInvariant(Sc, TheLoop)) { 1242 DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << 1243 *Ptr <<"\n"); 1244 Starts.push_back(Ptr); 1245 Ends.push_back(Ptr); 1246 } else { 1247 DEBUG(dbgs() << "LAA: Adding RT check for range:" << *Ptr << '\n'); 1248 unsigned AS = Ptr->getType()->getPointerAddressSpace(); 1249 1250 // Use this type for pointer arithmetic. 1251 Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); 1252 1253 Value *Start = Exp.expandCodeFor(PtrRtCheck.Starts[i], PtrArithTy, Loc); 1254 Value *End = Exp.expandCodeFor(PtrRtCheck.Ends[i], PtrArithTy, Loc); 1255 Starts.push_back(Start); 1256 Ends.push_back(End); 1257 } 1258 } 1259 1260 IRBuilder<> ChkBuilder(Loc); 1261 // Our instructions might fold to a constant. 1262 Value *MemoryRuntimeCheck = nullptr; 1263 for (unsigned i = 0; i < NumPointers; ++i) { 1264 for (unsigned j = i+1; j < NumPointers; ++j) { 1265 if (!PtrRtCheck.needsChecking(i, j, PtrPartition)) 1266 continue; 1267 1268 unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace(); 1269 unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace(); 1270 1271 assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) && 1272 (AS1 == Ends[i]->getType()->getPointerAddressSpace()) && 1273 "Trying to bounds check pointers with different address spaces"); 1274 1275 Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0); 1276 Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1); 1277 1278 Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc"); 1279 Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc"); 1280 Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy1, "bc"); 1281 Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy0, "bc"); 1282 1283 Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0"); 1284 FirstInst = getFirstInst(FirstInst, Cmp0, Loc); 1285 Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1"); 1286 FirstInst = getFirstInst(FirstInst, Cmp1, Loc); 1287 Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); 1288 FirstInst = getFirstInst(FirstInst, IsConflict, Loc); 1289 if (MemoryRuntimeCheck) { 1290 IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, 1291 "conflict.rdx"); 1292 FirstInst = getFirstInst(FirstInst, IsConflict, Loc); 1293 } 1294 MemoryRuntimeCheck = IsConflict; 1295 } 1296 } 1297 1298 if (!MemoryRuntimeCheck) 1299 return std::make_pair(nullptr, nullptr); 1300 1301 // We have to do this trickery because the IRBuilder might fold the check to a 1302 // constant expression in which case there is no Instruction anchored in a 1303 // the block. 1304 Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck, 1305 ConstantInt::getTrue(Ctx)); 1306 ChkBuilder.Insert(Check, "memcheck.conflict"); 1307 FirstInst = getFirstInst(FirstInst, Check, Loc); 1308 return std::make_pair(FirstInst, Check); 1309 } 1310 1311 LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, 1312 const DataLayout &DL, 1313 const TargetLibraryInfo *TLI, AliasAnalysis *AA, 1314 DominatorTree *DT, LoopInfo *LI, 1315 const ValueToValueMap &Strides) 1316 : DepChecker(SE, L), NumComparisons(0), TheLoop(L), SE(SE), DL(DL), 1317 TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0), 1318 MaxSafeDepDistBytes(-1U), CanVecMem(false), 1319 StoreToLoopInvariantAddress(false) { 1320 if (canAnalyzeLoop()) 1321 analyzeLoop(Strides); 1322 } 1323 1324 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { 1325 if (CanVecMem) { 1326 if (PtrRtCheck.Need) 1327 OS.indent(Depth) << "Memory dependences are safe with run-time checks\n"; 1328 else 1329 OS.indent(Depth) << "Memory dependences are safe\n"; 1330 } 1331 1332 OS.indent(Depth) << "Store to invariant address was " 1333 << (StoreToLoopInvariantAddress ? "" : "not ") 1334 << "found in loop.\n"; 1335 1336 if (Report) 1337 OS.indent(Depth) << "Report: " << Report->str() << "\n"; 1338 1339 if (auto *InterestingDependences = DepChecker.getInterestingDependences()) { 1340 OS.indent(Depth) << "Interesting Dependences:\n"; 1341 for (auto &Dep : *InterestingDependences) { 1342 Dep.print(OS, Depth + 2, DepChecker.getMemoryInstructions()); 1343 OS << "\n"; 1344 } 1345 } else 1346 OS.indent(Depth) << "Too many interesting dependences, not recorded\n"; 1347 1348 // List the pair of accesses need run-time checks to prove independence. 1349 PtrRtCheck.print(OS, Depth); 1350 OS << "\n"; 1351 } 1352 1353 const LoopAccessInfo & 1354 LoopAccessAnalysis::getInfo(Loop *L, const ValueToValueMap &Strides) { 1355 auto &LAI = LoopAccessInfoMap[L]; 1356 1357 #ifndef NDEBUG 1358 assert((!LAI || LAI->NumSymbolicStrides == Strides.size()) && 1359 "Symbolic strides changed for loop"); 1360 #endif 1361 1362 if (!LAI) { 1363 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 1364 LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI, 1365 Strides); 1366 #ifndef NDEBUG 1367 LAI->NumSymbolicStrides = Strides.size(); 1368 #endif 1369 } 1370 return *LAI.get(); 1371 } 1372 1373 void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const { 1374 LoopAccessAnalysis &LAA = *const_cast<LoopAccessAnalysis *>(this); 1375 1376 ValueToValueMap NoSymbolicStrides; 1377 1378 for (Loop *TopLevelLoop : *LI) 1379 for (Loop *L : depth_first(TopLevelLoop)) { 1380 OS.indent(2) << L->getHeader()->getName() << ":\n"; 1381 auto &LAI = LAA.getInfo(L, NoSymbolicStrides); 1382 LAI.print(OS, 4); 1383 } 1384 } 1385 1386 bool LoopAccessAnalysis::runOnFunction(Function &F) { 1387 SE = &getAnalysis<ScalarEvolution>(); 1388 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 1389 TLI = TLIP ? &TLIP->getTLI() : nullptr; 1390 AA = &getAnalysis<AliasAnalysis>(); 1391 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1392 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1393 1394 return false; 1395 } 1396 1397 void LoopAccessAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 1398 AU.addRequired<ScalarEvolution>(); 1399 AU.addRequired<AliasAnalysis>(); 1400 AU.addRequired<DominatorTreeWrapperPass>(); 1401 AU.addRequired<LoopInfoWrapperPass>(); 1402 1403 AU.setPreservesAll(); 1404 } 1405 1406 char LoopAccessAnalysis::ID = 0; 1407 static const char laa_name[] = "Loop Access Analysis"; 1408 #define LAA_NAME "loop-accesses" 1409 1410 INITIALIZE_PASS_BEGIN(LoopAccessAnalysis, LAA_NAME, laa_name, false, true) 1411 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 1412 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 1413 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 1414 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 1415 INITIALIZE_PASS_END(LoopAccessAnalysis, LAA_NAME, laa_name, false, true) 1416 1417 namespace llvm { 1418 Pass *createLAAPass() { 1419 return new LoopAccessAnalysis(); 1420 } 1421 } 1422