10456327cSAdam Nemet //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
20456327cSAdam Nemet //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60456327cSAdam Nemet //
70456327cSAdam Nemet //===----------------------------------------------------------------------===//
80456327cSAdam Nemet //
90456327cSAdam Nemet // The implementation for the loop memory dependence that was originally
100456327cSAdam Nemet // developed for the loop vectorizer.
110456327cSAdam Nemet //
120456327cSAdam Nemet //===----------------------------------------------------------------------===//
130456327cSAdam Nemet
143bab7e1aSChandler Carruth #include "llvm/Analysis/LoopAccessAnalysis.h"
15a3fe70d2SEugene Zelenko #include "llvm/ADT/APInt.h"
16a3fe70d2SEugene Zelenko #include "llvm/ADT/DenseMap.h"
17a3fe70d2SEugene Zelenko #include "llvm/ADT/DepthFirstIterator.h"
18a3fe70d2SEugene Zelenko #include "llvm/ADT/EquivalenceClasses.h"
19a3fe70d2SEugene Zelenko #include "llvm/ADT/PointerIntPair.h"
203bab7e1aSChandler Carruth #include "llvm/ADT/STLExtras.h"
21a3fe70d2SEugene Zelenko #include "llvm/ADT/SetVector.h"
22a3fe70d2SEugene Zelenko #include "llvm/ADT/SmallPtrSet.h"
23a3fe70d2SEugene Zelenko #include "llvm/ADT/SmallSet.h"
24a3fe70d2SEugene Zelenko #include "llvm/ADT/SmallVector.h"
253bab7e1aSChandler Carruth #include "llvm/ADT/iterator_range.h"
26a3fe70d2SEugene Zelenko #include "llvm/Analysis/AliasAnalysis.h"
27a3fe70d2SEugene Zelenko #include "llvm/Analysis/AliasSetTracker.h"
283bab7e1aSChandler Carruth #include "llvm/Analysis/LoopAnalysisManager.h"
290456327cSAdam Nemet #include "llvm/Analysis/LoopInfo.h"
30a3fe70d2SEugene Zelenko #include "llvm/Analysis/MemoryLocation.h"
310965da20SAdam Nemet #include "llvm/Analysis/OptimizationRemarkEmitter.h"
32a3fe70d2SEugene Zelenko #include "llvm/Analysis/ScalarEvolution.h"
33a3fe70d2SEugene Zelenko #include "llvm/Analysis/ScalarEvolutionExpressions.h"
34799003bfSBenjamin Kramer #include "llvm/Analysis/TargetLibraryInfo.h"
350456327cSAdam Nemet #include "llvm/Analysis/ValueTracking.h"
36f45594c9SAdam Nemet #include "llvm/Analysis/VectorUtils.h"
37a3fe70d2SEugene Zelenko #include "llvm/IR/BasicBlock.h"
38a3fe70d2SEugene Zelenko #include "llvm/IR/Constants.h"
39a3fe70d2SEugene Zelenko #include "llvm/IR/DataLayout.h"
40a3fe70d2SEugene Zelenko #include "llvm/IR/DebugLoc.h"
41a3fe70d2SEugene Zelenko #include "llvm/IR/DerivedTypes.h"
42a3fe70d2SEugene Zelenko #include "llvm/IR/DiagnosticInfo.h"
430456327cSAdam Nemet #include "llvm/IR/Dominators.h"
44a3fe70d2SEugene Zelenko #include "llvm/IR/Function.h"
45a3fe70d2SEugene Zelenko #include "llvm/IR/InstrTypes.h"
46a3fe70d2SEugene Zelenko #include "llvm/IR/Instruction.h"
47a3fe70d2SEugene Zelenko #include "llvm/IR/Instructions.h"
48a3fe70d2SEugene Zelenko #include "llvm/IR/Operator.h"
498a021317SXinliang David Li #include "llvm/IR/PassManager.h"
50e9cced27SFlorian Hahn #include "llvm/IR/PatternMatch.h"
51a3fe70d2SEugene Zelenko #include "llvm/IR/Type.h"
52a3fe70d2SEugene Zelenko #include "llvm/IR/Value.h"
53a3fe70d2SEugene Zelenko #include "llvm/IR/ValueHandle.h"
5405da2fe5SReid Kleckner #include "llvm/InitializePasses.h"
55a3fe70d2SEugene Zelenko #include "llvm/Pass.h"
56a3fe70d2SEugene Zelenko #include "llvm/Support/Casting.h"
57a3fe70d2SEugene Zelenko #include "llvm/Support/CommandLine.h"
580456327cSAdam Nemet #include "llvm/Support/Debug.h"
59a3fe70d2SEugene Zelenko #include "llvm/Support/ErrorHandling.h"
60799003bfSBenjamin Kramer #include "llvm/Support/raw_ostream.h"
61a3fe70d2SEugene Zelenko #include <algorithm>
62a3fe70d2SEugene Zelenko #include <cassert>
63a3fe70d2SEugene Zelenko #include <cstdint>
64a3fe70d2SEugene Zelenko #include <iterator>
65a3fe70d2SEugene Zelenko #include <utility>
66a3fe70d2SEugene Zelenko #include <vector>
67a3fe70d2SEugene Zelenko
680456327cSAdam Nemet using namespace llvm;
69e9cced27SFlorian Hahn using namespace llvm::PatternMatch;
700456327cSAdam Nemet
71339f42b3SAdam Nemet #define DEBUG_TYPE "loop-accesses"
720456327cSAdam Nemet
73f219c647SAdam Nemet static cl::opt<unsigned, true>
74f219c647SAdam Nemet VectorizationFactor("force-vector-width", cl::Hidden,
75f219c647SAdam Nemet cl::desc("Sets the SIMD width. Zero is autoselect."),
76f219c647SAdam Nemet cl::location(VectorizerParams::VectorizationFactor));
771d862af7SAdam Nemet unsigned VectorizerParams::VectorizationFactor;
78f219c647SAdam Nemet
79f219c647SAdam Nemet static cl::opt<unsigned, true>
80f219c647SAdam Nemet VectorizationInterleave("force-vector-interleave", cl::Hidden,
81f219c647SAdam Nemet cl::desc("Sets the vectorization interleave count. "
82f219c647SAdam Nemet "Zero is autoselect."),
83f219c647SAdam Nemet cl::location(
84f219c647SAdam Nemet VectorizerParams::VectorizationInterleave));
851d862af7SAdam Nemet unsigned VectorizerParams::VectorizationInterleave;
86f219c647SAdam Nemet
871d862af7SAdam Nemet static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold(
881d862af7SAdam Nemet "runtime-memory-check-threshold", cl::Hidden,
891d862af7SAdam Nemet cl::desc("When performing memory disambiguation checks at runtime do not "
901d862af7SAdam Nemet "generate more than this number of comparisons (default = 8)."),
911d862af7SAdam Nemet cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8));
921d862af7SAdam Nemet unsigned VectorizerParams::RuntimeMemoryCheckThreshold;
93f219c647SAdam Nemet
945f8f34e4SAdrian Prantl /// The maximum iterations used to merge memory checks
951b6b50a9SSilviu Baranga static cl::opt<unsigned> MemoryCheckMergeThreshold(
961b6b50a9SSilviu Baranga "memory-check-merge-threshold", cl::Hidden,
971b6b50a9SSilviu Baranga cl::desc("Maximum number of comparisons done when trying to merge "
981b6b50a9SSilviu Baranga "runtime memory checks. (default = 100)"),
991b6b50a9SSilviu Baranga cl::init(100));
1001b6b50a9SSilviu Baranga
101f219c647SAdam Nemet /// Maximum SIMD width.
102f219c647SAdam Nemet const unsigned VectorizerParams::MaxVectorWidth = 64;
103f219c647SAdam Nemet
1045f8f34e4SAdrian Prantl /// We collect dependences up to this threshold.
105a2df750fSAdam Nemet static cl::opt<unsigned>
106a2df750fSAdam Nemet MaxDependences("max-dependences", cl::Hidden,
107a2df750fSAdam Nemet cl::desc("Maximum number of dependences collected by "
1089c926579SAdam Nemet "loop-access analysis (default = 100)"),
1099c926579SAdam Nemet cl::init(100));
1109c926579SAdam Nemet
111a9f09c62SAdam Nemet /// This enables versioning on the strides of symbolically striding memory
112a9f09c62SAdam Nemet /// accesses in code like the following.
113a9f09c62SAdam Nemet /// for (i = 0; i < N; ++i)
114a9f09c62SAdam Nemet /// A[i * Stride1] += B[i * Stride2] ...
115a9f09c62SAdam Nemet ///
116a9f09c62SAdam Nemet /// Will be roughly translated to
117a9f09c62SAdam Nemet /// if (Stride1 == 1 && Stride2 == 1) {
118a9f09c62SAdam Nemet /// for (i = 0; i < N; i+=4)
119a9f09c62SAdam Nemet /// A[i:i+3] += ...
120a9f09c62SAdam Nemet /// } else
121a9f09c62SAdam Nemet /// ...
122a9f09c62SAdam Nemet static cl::opt<bool> EnableMemAccessVersioning(
123a9f09c62SAdam Nemet "enable-mem-access-versioning", cl::init(true), cl::Hidden,
124a9f09c62SAdam Nemet cl::desc("Enable symbolic stride memory access versioning"));
125a9f09c62SAdam Nemet
1265f8f34e4SAdrian Prantl /// Enable store-to-load forwarding conflict detection. This option can
12737ec5f91SMatthew Simpson /// be disabled for correctness testing.
12837ec5f91SMatthew Simpson static cl::opt<bool> EnableForwardingConflictDetection(
12937ec5f91SMatthew Simpson "store-to-load-forwarding-conflict-detection", cl::Hidden,
130a250dc9fSMatthew Simpson cl::desc("Enable conflict detection in loop-access analysis"),
131a250dc9fSMatthew Simpson cl::init(true));
132a250dc9fSMatthew Simpson
133db8fcb2cSGraham Hunter static cl::opt<unsigned> MaxForkedSCEVDepth(
134db8fcb2cSGraham Hunter "max-forked-scev-depth", cl::Hidden,
135db8fcb2cSGraham Hunter cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"),
136db8fcb2cSGraham Hunter cl::init(5));
137db8fcb2cSGraham Hunter
isInterleaveForced()138f219c647SAdam Nemet bool VectorizerParams::isInterleaveForced() {
139f219c647SAdam Nemet return ::VectorizationInterleave.getNumOccurrences() > 0;
140f219c647SAdam Nemet }
141f219c647SAdam Nemet
stripIntegerCast(Value * V)1420456327cSAdam Nemet Value *llvm::stripIntegerCast(Value *V) {
1438b401013SDavid Majnemer if (auto *CI = dyn_cast<CastInst>(V))
1440456327cSAdam Nemet if (CI->getOperand(0)->getType()->isIntegerTy())
1450456327cSAdam Nemet return CI->getOperand(0);
1460456327cSAdam Nemet return V;
1470456327cSAdam Nemet }
1480456327cSAdam Nemet
replaceSymbolicStrideSCEV(PredicatedScalarEvolution & PSE,const ValueToValueMap & PtrToStride,Value * Ptr)1499cd9a7e3SSilviu Baranga const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
1508bc61df9SAdam Nemet const ValueToValueMap &PtrToStride,
151f4726e72SFlorian Hahn Value *Ptr) {
1529cd9a7e3SSilviu Baranga const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
1530456327cSAdam Nemet
1540456327cSAdam Nemet // If there is an entry in the map return the SCEV of the pointer with the
1550456327cSAdam Nemet // symbolic stride replaced by one.
156f4726e72SFlorian Hahn ValueToValueMap::const_iterator SI = PtrToStride.find(Ptr);
157b3a8a153SPhilip Reames if (SI == PtrToStride.end())
158b3a8a153SPhilip Reames // For a non-symbolic stride, just return the original expression.
159b3a8a153SPhilip Reames return OrigSCEV;
1600456327cSAdam Nemet
161b3a8a153SPhilip Reames Value *StrideVal = stripIntegerCast(SI->second);
1620456327cSAdam Nemet
1639cd9a7e3SSilviu Baranga ScalarEvolution *SE = PSE.getSE();
164e3c0534bSSilviu Baranga const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
165e3c0534bSSilviu Baranga const auto *CT =
166e3c0534bSSilviu Baranga static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
167e3c0534bSSilviu Baranga
1689cd9a7e3SSilviu Baranga PSE.addPredicate(*SE->getEqualPredicate(U, CT));
1699cd9a7e3SSilviu Baranga auto *Expr = PSE.getSCEV(Ptr);
170e3c0534bSSilviu Baranga
171d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
172d34e60caSNicola Zaghen << " by: " << *Expr << "\n");
1739cd9a7e3SSilviu Baranga return Expr;
1740456327cSAdam Nemet }
1750456327cSAdam Nemet
RuntimeCheckingPtrGroup(unsigned Index,RuntimePointerChecking & RtCheck)176616657b3SFlorian Hahn RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
177616657b3SFlorian Hahn unsigned Index, RuntimePointerChecking &RtCheck)
1786d753b07SFlorian Hahn : High(RtCheck.Pointers[Index].End), Low(RtCheck.Pointers[Index].Start),
1796d753b07SFlorian Hahn AddressSpace(RtCheck.Pointers[Index]
1806d753b07SFlorian Hahn .PointerValue->getType()
181e9cced27SFlorian Hahn ->getPointerAddressSpace()),
182e9cced27SFlorian Hahn NeedsFreeze(RtCheck.Pointers[Index].NeedsFreeze) {
183616657b3SFlorian Hahn Members.push_back(Index);
184616657b3SFlorian Hahn }
185616657b3SFlorian Hahn
1863622fbfcSElena Demikhovsky /// Calculate Start and End points of memory access.
1873622fbfcSElena Demikhovsky /// Let's assume A is the first access and B is a memory access on N-th loop
1883622fbfcSElena Demikhovsky /// iteration. Then B is calculated as:
1893622fbfcSElena Demikhovsky /// B = A + Step*N .
1903622fbfcSElena Demikhovsky /// Step value may be positive or negative.
1913622fbfcSElena Demikhovsky /// N is a calculated back-edge taken count:
1923622fbfcSElena Demikhovsky /// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
1933622fbfcSElena Demikhovsky /// Start and End points are calculated in the following way:
1943622fbfcSElena Demikhovsky /// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
1953622fbfcSElena Demikhovsky /// where SizeOfElt is the size of single memory access in bytes.
1963622fbfcSElena Demikhovsky ///
1973622fbfcSElena Demikhovsky /// There is no conflict when the intervals are disjoint:
1983622fbfcSElena Demikhovsky /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
insert(Loop * Lp,Value * Ptr,const SCEV * PtrExpr,Type * AccessTy,bool WritePtr,unsigned DepSetId,unsigned ASId,PredicatedScalarEvolution & PSE,bool NeedsFreeze)199e9cced27SFlorian Hahn void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
200e9cced27SFlorian Hahn Type *AccessTy, bool WritePtr,
201e9cced27SFlorian Hahn unsigned DepSetId, unsigned ASId,
202e9cced27SFlorian Hahn PredicatedScalarEvolution &PSE,
203e9cced27SFlorian Hahn bool NeedsFreeze) {
204279784ffSAdam Nemet ScalarEvolution *SE = PSE.getSE();
205279784ffSAdam Nemet
206279784ffSAdam Nemet const SCEV *ScStart;
207279784ffSAdam Nemet const SCEV *ScEnd;
208279784ffSAdam Nemet
209e9cced27SFlorian Hahn if (SE->isLoopInvariant(PtrExpr, Lp)) {
210e9cced27SFlorian Hahn ScStart = ScEnd = PtrExpr;
211e908e063SMindong Chen } else {
212e9cced27SFlorian Hahn const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr);
2130456327cSAdam Nemet assert(AR && "Invalid addrec expression");
2146f444dfdSSilviu Baranga const SCEV *Ex = PSE.getBackedgeTakenCount();
2150e5804a6SSilviu Baranga
216279784ffSAdam Nemet ScStart = AR->getStart();
217279784ffSAdam Nemet ScEnd = AR->evaluateAtIteration(Ex, *SE);
2180e5804a6SSilviu Baranga const SCEV *Step = AR->getStepRecurrence(*SE);
2190e5804a6SSilviu Baranga
2200e5804a6SSilviu Baranga // For expressions with negative step, the upper bound is ScStart and the
2210e5804a6SSilviu Baranga // lower bound is ScEnd.
2228b401013SDavid Majnemer if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
2230e5804a6SSilviu Baranga if (CStep->getValue()->isNegative())
2240e5804a6SSilviu Baranga std::swap(ScStart, ScEnd);
2250e5804a6SSilviu Baranga } else {
2263622fbfcSElena Demikhovsky // Fallback case: the step is not constant, but we can still
2270e5804a6SSilviu Baranga // get the upper and lower bounds of the interval by using min/max
2280e5804a6SSilviu Baranga // expressions.
2290e5804a6SSilviu Baranga ScStart = SE->getUMinExpr(ScStart, ScEnd);
2300e5804a6SSilviu Baranga ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
2310e5804a6SSilviu Baranga }
232e908e063SMindong Chen }
2333622fbfcSElena Demikhovsky // Add the size of the pointed element to ScEnd.
234a73166a4SFlorian Hahn auto &DL = Lp->getHeader()->getModule()->getDataLayout();
23506654a53SJoe Ellis Type *IdxTy = DL.getIndexType(Ptr->getType());
236ff31020eSArthur Eubanks const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
2373622fbfcSElena Demikhovsky ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
2380e5804a6SSilviu Baranga
239e9cced27SFlorian Hahn Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, PtrExpr,
240e9cced27SFlorian Hahn NeedsFreeze);
2411b6b50a9SSilviu Baranga }
2421b6b50a9SSilviu Baranga
tryToCreateDiffCheck(const RuntimeCheckingPtrGroup & CGI,const RuntimeCheckingPtrGroup & CGJ)243b7315ffcSFlorian Hahn void RuntimePointerChecking::tryToCreateDiffCheck(
244b7315ffcSFlorian Hahn const RuntimeCheckingPtrGroup &CGI, const RuntimeCheckingPtrGroup &CGJ) {
245b7315ffcSFlorian Hahn if (!CanUseDiffCheck)
246b7315ffcSFlorian Hahn return;
247b7315ffcSFlorian Hahn
248b7315ffcSFlorian Hahn // If either group contains multiple different pointers, bail out.
249b7315ffcSFlorian Hahn // TODO: Support multiple pointers by using the minimum or maximum pointer,
250b7315ffcSFlorian Hahn // depending on src & sink.
251b7315ffcSFlorian Hahn if (CGI.Members.size() != 1 || CGJ.Members.size() != 1) {
252b7315ffcSFlorian Hahn CanUseDiffCheck = false;
253b7315ffcSFlorian Hahn return;
254b7315ffcSFlorian Hahn }
255b7315ffcSFlorian Hahn
256b7315ffcSFlorian Hahn PointerInfo *Src = &Pointers[CGI.Members[0]];
257b7315ffcSFlorian Hahn PointerInfo *Sink = &Pointers[CGJ.Members[0]];
258b7315ffcSFlorian Hahn
259b7315ffcSFlorian Hahn // If either pointer is read and written, multiple checks may be needed. Bail
260b7315ffcSFlorian Hahn // out.
261b7315ffcSFlorian Hahn if (!DC.getOrderForAccess(Src->PointerValue, !Src->IsWritePtr).empty() ||
262b7315ffcSFlorian Hahn !DC.getOrderForAccess(Sink->PointerValue, !Sink->IsWritePtr).empty()) {
263b7315ffcSFlorian Hahn CanUseDiffCheck = false;
264b7315ffcSFlorian Hahn return;
265b7315ffcSFlorian Hahn }
266b7315ffcSFlorian Hahn
267b7315ffcSFlorian Hahn ArrayRef<unsigned> AccSrc =
268b7315ffcSFlorian Hahn DC.getOrderForAccess(Src->PointerValue, Src->IsWritePtr);
269b7315ffcSFlorian Hahn ArrayRef<unsigned> AccSink =
270b7315ffcSFlorian Hahn DC.getOrderForAccess(Sink->PointerValue, Sink->IsWritePtr);
271b7315ffcSFlorian Hahn // If either pointer is accessed multiple times, there may not be a clear
272b7315ffcSFlorian Hahn // src/sink relation. Bail out for now.
273b7315ffcSFlorian Hahn if (AccSrc.size() != 1 || AccSink.size() != 1) {
274b7315ffcSFlorian Hahn CanUseDiffCheck = false;
275b7315ffcSFlorian Hahn return;
276b7315ffcSFlorian Hahn }
277b7315ffcSFlorian Hahn // If the sink is accessed before src, swap src/sink.
278b7315ffcSFlorian Hahn if (AccSink[0] < AccSrc[0])
279b7315ffcSFlorian Hahn std::swap(Src, Sink);
280b7315ffcSFlorian Hahn
281b7315ffcSFlorian Hahn auto *SrcAR = dyn_cast<SCEVAddRecExpr>(Src->Expr);
282b7315ffcSFlorian Hahn auto *SinkAR = dyn_cast<SCEVAddRecExpr>(Sink->Expr);
283*9070c258SFlorian Hahn if (!SrcAR || !SinkAR || SrcAR->getLoop() != DC.getInnermostLoop() ||
284*9070c258SFlorian Hahn SinkAR->getLoop() != DC.getInnermostLoop()) {
285b7315ffcSFlorian Hahn CanUseDiffCheck = false;
286b7315ffcSFlorian Hahn return;
287b7315ffcSFlorian Hahn }
288b7315ffcSFlorian Hahn
289b7315ffcSFlorian Hahn const DataLayout &DL =
290b7315ffcSFlorian Hahn SinkAR->getLoop()->getHeader()->getModule()->getDataLayout();
291b7315ffcSFlorian Hahn SmallVector<Instruction *, 4> SrcInsts =
292b7315ffcSFlorian Hahn DC.getInstructionsForAccess(Src->PointerValue, Src->IsWritePtr);
293b7315ffcSFlorian Hahn SmallVector<Instruction *, 4> SinkInsts =
294b7315ffcSFlorian Hahn DC.getInstructionsForAccess(Sink->PointerValue, Sink->IsWritePtr);
295b7315ffcSFlorian Hahn Type *SrcTy = getLoadStoreType(SrcInsts[0]);
296b7315ffcSFlorian Hahn Type *DstTy = getLoadStoreType(SinkInsts[0]);
297f494f89bSPhilip Reames if (isa<ScalableVectorType>(SrcTy) || isa<ScalableVectorType>(DstTy)) {
298f494f89bSPhilip Reames CanUseDiffCheck = false;
299b7315ffcSFlorian Hahn return;
300f494f89bSPhilip Reames }
301b7315ffcSFlorian Hahn unsigned AllocSize =
302b7315ffcSFlorian Hahn std::max(DL.getTypeAllocSize(SrcTy), DL.getTypeAllocSize(DstTy));
303b7315ffcSFlorian Hahn IntegerType *IntTy =
304b7315ffcSFlorian Hahn IntegerType::get(Src->PointerValue->getContext(),
305b7315ffcSFlorian Hahn DL.getPointerSizeInBits(CGI.AddressSpace));
306b7315ffcSFlorian Hahn
307b7315ffcSFlorian Hahn // Only matching constant steps matching the AllocSize are supported at the
308b7315ffcSFlorian Hahn // moment. This simplifies the difference computation. Can be extended in the
309b7315ffcSFlorian Hahn // future.
310b7315ffcSFlorian Hahn auto *Step = dyn_cast<SCEVConstant>(SinkAR->getStepRecurrence(*SE));
311b7315ffcSFlorian Hahn if (!Step || Step != SrcAR->getStepRecurrence(*SE) ||
312b7315ffcSFlorian Hahn Step->getAPInt().abs() != AllocSize) {
313b7315ffcSFlorian Hahn CanUseDiffCheck = false;
314b7315ffcSFlorian Hahn return;
315b7315ffcSFlorian Hahn }
316b7315ffcSFlorian Hahn
317b7315ffcSFlorian Hahn // When counting down, the dependence distance needs to be swapped.
318b7315ffcSFlorian Hahn if (Step->getValue()->isNegative())
319b7315ffcSFlorian Hahn std::swap(SinkAR, SrcAR);
320b7315ffcSFlorian Hahn
321b7315ffcSFlorian Hahn const SCEV *SinkStartInt = SE->getPtrToIntExpr(SinkAR->getStart(), IntTy);
322b7315ffcSFlorian Hahn const SCEV *SrcStartInt = SE->getPtrToIntExpr(SrcAR->getStart(), IntTy);
323b7315ffcSFlorian Hahn if (isa<SCEVCouldNotCompute>(SinkStartInt) ||
324b7315ffcSFlorian Hahn isa<SCEVCouldNotCompute>(SrcStartInt)) {
325b7315ffcSFlorian Hahn CanUseDiffCheck = false;
326b7315ffcSFlorian Hahn return;
327b7315ffcSFlorian Hahn }
328e9cced27SFlorian Hahn DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize,
329e9cced27SFlorian Hahn Src->NeedsFreeze || Sink->NeedsFreeze);
330b7315ffcSFlorian Hahn }
331b7315ffcSFlorian Hahn
generateChecks()332b7315ffcSFlorian Hahn SmallVector<RuntimePointerCheck, 4> RuntimePointerChecking::generateChecks() {
333616657b3SFlorian Hahn SmallVector<RuntimePointerCheck, 4> Checks;
334bbe1f1deSAdam Nemet
3357c52e052SAdam Nemet for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
3367c52e052SAdam Nemet for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
337616657b3SFlorian Hahn const RuntimeCheckingPtrGroup &CGI = CheckingGroups[I];
338616657b3SFlorian Hahn const RuntimeCheckingPtrGroup &CGJ = CheckingGroups[J];
339bbe1f1deSAdam Nemet
340b7315ffcSFlorian Hahn if (needsChecking(CGI, CGJ)) {
341b7315ffcSFlorian Hahn tryToCreateDiffCheck(CGI, CGJ);
342bbe1f1deSAdam Nemet Checks.push_back(std::make_pair(&CGI, &CGJ));
343bbe1f1deSAdam Nemet }
344bbe1f1deSAdam Nemet }
345b7315ffcSFlorian Hahn }
346bbe1f1deSAdam Nemet return Checks;
347bbe1f1deSAdam Nemet }
348bbe1f1deSAdam Nemet
generateChecks(MemoryDepChecker::DepCandidates & DepCands,bool UseDependencies)34915840393SAdam Nemet void RuntimePointerChecking::generateChecks(
35015840393SAdam Nemet MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
35115840393SAdam Nemet assert(Checks.empty() && "Checks is not empty");
35215840393SAdam Nemet groupChecks(DepCands, UseDependencies);
35315840393SAdam Nemet Checks = generateChecks();
35415840393SAdam Nemet }
35515840393SAdam Nemet
needsChecking(const RuntimeCheckingPtrGroup & M,const RuntimeCheckingPtrGroup & N) const356616657b3SFlorian Hahn bool RuntimePointerChecking::needsChecking(
357616657b3SFlorian Hahn const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const {
3581b6b50a9SSilviu Baranga for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
3591b6b50a9SSilviu Baranga for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
360651a5a24SAdam Nemet if (needsChecking(M.Members[I], N.Members[J]))
3611b6b50a9SSilviu Baranga return true;
3621b6b50a9SSilviu Baranga return false;
3631b6b50a9SSilviu Baranga }
3641b6b50a9SSilviu Baranga
3651b6b50a9SSilviu Baranga /// Compare \p I and \p J and return the minimum.
3661b6b50a9SSilviu Baranga /// Return nullptr in case we couldn't find an answer.
getMinFromExprs(const SCEV * I,const SCEV * J,ScalarEvolution * SE)3671b6b50a9SSilviu Baranga static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
3681b6b50a9SSilviu Baranga ScalarEvolution *SE) {
3691b6b50a9SSilviu Baranga const SCEV *Diff = SE->getMinusSCEV(J, I);
3701b6b50a9SSilviu Baranga const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
3711b6b50a9SSilviu Baranga
3721b6b50a9SSilviu Baranga if (!C)
3731b6b50a9SSilviu Baranga return nullptr;
3741b6b50a9SSilviu Baranga if (C->getValue()->isNegative())
3751b6b50a9SSilviu Baranga return J;
3761b6b50a9SSilviu Baranga return I;
3771b6b50a9SSilviu Baranga }
3781b6b50a9SSilviu Baranga
addPointer(unsigned Index,RuntimePointerChecking & RtCheck)3796d753b07SFlorian Hahn bool RuntimeCheckingPtrGroup::addPointer(unsigned Index,
3806d753b07SFlorian Hahn RuntimePointerChecking &RtCheck) {
3816d753b07SFlorian Hahn return addPointer(
3826d753b07SFlorian Hahn Index, RtCheck.Pointers[Index].Start, RtCheck.Pointers[Index].End,
3836d753b07SFlorian Hahn RtCheck.Pointers[Index].PointerValue->getType()->getPointerAddressSpace(),
384e9cced27SFlorian Hahn RtCheck.Pointers[Index].NeedsFreeze, *RtCheck.SE);
3856d753b07SFlorian Hahn }
3866d753b07SFlorian Hahn
addPointer(unsigned Index,const SCEV * Start,const SCEV * End,unsigned AS,bool NeedsFreeze,ScalarEvolution & SE)3876d753b07SFlorian Hahn bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start,
3886d753b07SFlorian Hahn const SCEV *End, unsigned AS,
389e9cced27SFlorian Hahn bool NeedsFreeze,
3906d753b07SFlorian Hahn ScalarEvolution &SE) {
3916d753b07SFlorian Hahn assert(AddressSpace == AS &&
3926d753b07SFlorian Hahn "all pointers in a checking group must be in the same address space");
3939f7dedc3SAdam Nemet
3941b6b50a9SSilviu Baranga // Compare the starts and ends with the known minimum and maximum
3951b6b50a9SSilviu Baranga // of this set. We need to know how we compare against the min/max
3961b6b50a9SSilviu Baranga // of the set in order to be able to emit memchecks.
3976d753b07SFlorian Hahn const SCEV *Min0 = getMinFromExprs(Start, Low, &SE);
3981b6b50a9SSilviu Baranga if (!Min0)
3991b6b50a9SSilviu Baranga return false;
4001b6b50a9SSilviu Baranga
4016d753b07SFlorian Hahn const SCEV *Min1 = getMinFromExprs(End, High, &SE);
4021b6b50a9SSilviu Baranga if (!Min1)
4031b6b50a9SSilviu Baranga return false;
4041b6b50a9SSilviu Baranga
4051b6b50a9SSilviu Baranga // Update the low bound expression if we've found a new min value.
4069f7dedc3SAdam Nemet if (Min0 == Start)
4079f7dedc3SAdam Nemet Low = Start;
4081b6b50a9SSilviu Baranga
4091b6b50a9SSilviu Baranga // Update the high bound expression if we've found a new max value.
4109f7dedc3SAdam Nemet if (Min1 != End)
4119f7dedc3SAdam Nemet High = End;
4121b6b50a9SSilviu Baranga
4131b6b50a9SSilviu Baranga Members.push_back(Index);
414e9cced27SFlorian Hahn this->NeedsFreeze |= NeedsFreeze;
4151b6b50a9SSilviu Baranga return true;
4161b6b50a9SSilviu Baranga }
4171b6b50a9SSilviu Baranga
groupChecks(MemoryDepChecker::DepCandidates & DepCands,bool UseDependencies)4187cdebac0SAdam Nemet void RuntimePointerChecking::groupChecks(
4197cdebac0SAdam Nemet MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
4201b6b50a9SSilviu Baranga // We build the groups from dependency candidates equivalence classes
4211b6b50a9SSilviu Baranga // because:
4221b6b50a9SSilviu Baranga // - We know that pointers in the same equivalence class share
4231b6b50a9SSilviu Baranga // the same underlying object and therefore there is a chance
4241b6b50a9SSilviu Baranga // that we can compare pointers
4251b6b50a9SSilviu Baranga // - We wouldn't be able to merge two pointers for which we need
4261b6b50a9SSilviu Baranga // to emit a memcheck. The classes in DepCands are already
4271b6b50a9SSilviu Baranga // conveniently built such that no two pointers in the same
4281b6b50a9SSilviu Baranga // class need checking against each other.
4291b6b50a9SSilviu Baranga
4301b6b50a9SSilviu Baranga // We use the following (greedy) algorithm to construct the groups
4311b6b50a9SSilviu Baranga // For every pointer in the equivalence class:
4321b6b50a9SSilviu Baranga // For each existing group:
4331b6b50a9SSilviu Baranga // - if the difference between this pointer and the min/max bounds
4341b6b50a9SSilviu Baranga // of the group is a constant, then make the pointer part of the
4351b6b50a9SSilviu Baranga // group and update the min/max bounds of that group as required.
4361b6b50a9SSilviu Baranga
4371b6b50a9SSilviu Baranga CheckingGroups.clear();
4381b6b50a9SSilviu Baranga
43948250600SSilviu Baranga // If we need to check two pointers to the same underlying object
44048250600SSilviu Baranga // with a non-constant difference, we shouldn't perform any pointer
44148250600SSilviu Baranga // grouping with those pointers. This is because we can easily get
44248250600SSilviu Baranga // into cases where the resulting check would return false, even when
44348250600SSilviu Baranga // the accesses are safe.
44448250600SSilviu Baranga //
44548250600SSilviu Baranga // The following example shows this:
44648250600SSilviu Baranga // for (i = 0; i < 1000; ++i)
44748250600SSilviu Baranga // a[5000 + i * m] = a[i] + a[i + 9000]
44848250600SSilviu Baranga //
44948250600SSilviu Baranga // Here grouping gives a check of (5000, 5000 + 1000 * m) against
45048250600SSilviu Baranga // (0, 10000) which is always false. However, if m is 1, there is no
45148250600SSilviu Baranga // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
45248250600SSilviu Baranga // us to perform an accurate check in this case.
45348250600SSilviu Baranga //
45448250600SSilviu Baranga // The above case requires that we have an UnknownDependence between
45548250600SSilviu Baranga // accesses to the same underlying object. This cannot happen unless
456ef307b8cSFlorian Hahn // FoundNonConstantDistanceDependence is set, and therefore UseDependencies
45748250600SSilviu Baranga // is also false. In this case we will use the fallback path and create
45848250600SSilviu Baranga // separate checking groups for all pointers.
45948250600SSilviu Baranga
4601b6b50a9SSilviu Baranga // If we don't have the dependency partitions, construct a new
46148250600SSilviu Baranga // checking pointer group for each pointer. This is also required
46248250600SSilviu Baranga // for correctness, because in this case we can have checking between
46348250600SSilviu Baranga // pointers to the same underlying object.
4641b6b50a9SSilviu Baranga if (!UseDependencies) {
4651b6b50a9SSilviu Baranga for (unsigned I = 0; I < Pointers.size(); ++I)
466616657b3SFlorian Hahn CheckingGroups.push_back(RuntimeCheckingPtrGroup(I, *this));
4671b6b50a9SSilviu Baranga return;
4681b6b50a9SSilviu Baranga }
4691b6b50a9SSilviu Baranga
4701b6b50a9SSilviu Baranga unsigned TotalComparisons = 0;
4711b6b50a9SSilviu Baranga
472e9cced27SFlorian Hahn DenseMap<Value *, SmallVector<unsigned>> PositionMap;
473e9cced27SFlorian Hahn for (unsigned Index = 0; Index < Pointers.size(); ++Index) {
474e9cced27SFlorian Hahn auto Iter = PositionMap.insert({Pointers[Index].PointerValue, {}});
475e9cced27SFlorian Hahn Iter.first->second.push_back(Index);
476e9cced27SFlorian Hahn }
4771b6b50a9SSilviu Baranga
478ce3877fcSSilviu Baranga // We need to keep track of what pointers we've already seen so we
479ce3877fcSSilviu Baranga // don't process them twice.
480ce3877fcSSilviu Baranga SmallSet<unsigned, 2> Seen;
481ce3877fcSSilviu Baranga
482e4b9f507SSanjay Patel // Go through all equivalence classes, get the "pointer check groups"
483ce3877fcSSilviu Baranga // and add them to the overall solution. We use the order in which accesses
484ce3877fcSSilviu Baranga // appear in 'Pointers' to enforce determinism.
485ce3877fcSSilviu Baranga for (unsigned I = 0; I < Pointers.size(); ++I) {
486ce3877fcSSilviu Baranga // We've seen this pointer before, and therefore already processed
487ce3877fcSSilviu Baranga // its equivalence class.
488ce3877fcSSilviu Baranga if (Seen.count(I))
4891b6b50a9SSilviu Baranga continue;
4901b6b50a9SSilviu Baranga
4919f7dedc3SAdam Nemet MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
4929f7dedc3SAdam Nemet Pointers[I].IsWritePtr);
4931b6b50a9SSilviu Baranga
494616657b3SFlorian Hahn SmallVector<RuntimeCheckingPtrGroup, 2> Groups;
495ce3877fcSSilviu Baranga auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
496ce3877fcSSilviu Baranga
497a647c30fSSilviu Baranga // Because DepCands is constructed by visiting accesses in the order in
498a647c30fSSilviu Baranga // which they appear in alias sets (which is deterministic) and the
499a647c30fSSilviu Baranga // iteration order within an equivalence class member is only dependent on
500a647c30fSSilviu Baranga // the order in which unions and insertions are performed on the
501a647c30fSSilviu Baranga // equivalence class, the iteration order is deterministic.
502ce3877fcSSilviu Baranga for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
5031b6b50a9SSilviu Baranga MI != ME; ++MI) {
5042062b370SFlorian Hahn auto PointerI = PositionMap.find(MI->getPointer());
5052062b370SFlorian Hahn assert(PointerI != PositionMap.end() &&
5062062b370SFlorian Hahn "pointer in equivalence class not found in PositionMap");
507e9cced27SFlorian Hahn for (unsigned Pointer : PointerI->second) {
5081b6b50a9SSilviu Baranga bool Merged = false;
509ce3877fcSSilviu Baranga // Mark this pointer as seen.
510ce3877fcSSilviu Baranga Seen.insert(Pointer);
5111b6b50a9SSilviu Baranga
5121b6b50a9SSilviu Baranga // Go through all the existing sets and see if we can find one
5131b6b50a9SSilviu Baranga // which can include this pointer.
514616657b3SFlorian Hahn for (RuntimeCheckingPtrGroup &Group : Groups) {
5151b6b50a9SSilviu Baranga // Don't perform more than a certain amount of comparisons.
5161b6b50a9SSilviu Baranga // This should limit the cost of grouping the pointers to something
5171b6b50a9SSilviu Baranga // reasonable. If we do end up hitting this threshold, the algorithm
5181b6b50a9SSilviu Baranga // will create separate groups for all remaining pointers.
5191b6b50a9SSilviu Baranga if (TotalComparisons > MemoryCheckMergeThreshold)
5201b6b50a9SSilviu Baranga break;
5211b6b50a9SSilviu Baranga
5221b6b50a9SSilviu Baranga TotalComparisons++;
5231b6b50a9SSilviu Baranga
5246d753b07SFlorian Hahn if (Group.addPointer(Pointer, *this)) {
5251b6b50a9SSilviu Baranga Merged = true;
5261b6b50a9SSilviu Baranga break;
5271b6b50a9SSilviu Baranga }
5281b6b50a9SSilviu Baranga }
5291b6b50a9SSilviu Baranga
5301b6b50a9SSilviu Baranga if (!Merged)
5311b6b50a9SSilviu Baranga // We couldn't add this pointer to any existing set or the threshold
5321b6b50a9SSilviu Baranga // for the number of comparisons has been reached. Create a new group
5331b6b50a9SSilviu Baranga // to hold the current pointer.
534616657b3SFlorian Hahn Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this));
5351b6b50a9SSilviu Baranga }
536e9cced27SFlorian Hahn }
5371b6b50a9SSilviu Baranga
5381b6b50a9SSilviu Baranga // We've computed the grouped checks for this partition.
5391b6b50a9SSilviu Baranga // Save the results and continue with the next one.
54075709329SFangrui Song llvm::copy(Groups, std::back_inserter(CheckingGroups));
5411b6b50a9SSilviu Baranga }
5420456327cSAdam Nemet }
5430456327cSAdam Nemet
arePointersInSamePartition(const SmallVectorImpl<int> & PtrToPartition,unsigned PtrIdx1,unsigned PtrIdx2)544041e6debSAdam Nemet bool RuntimePointerChecking::arePointersInSamePartition(
545041e6debSAdam Nemet const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
546041e6debSAdam Nemet unsigned PtrIdx2) {
547041e6debSAdam Nemet return (PtrToPartition[PtrIdx1] != -1 &&
548041e6debSAdam Nemet PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
549041e6debSAdam Nemet }
550041e6debSAdam Nemet
needsChecking(unsigned I,unsigned J) const551651a5a24SAdam Nemet bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
5529f7dedc3SAdam Nemet const PointerInfo &PointerI = Pointers[I];
5539f7dedc3SAdam Nemet const PointerInfo &PointerJ = Pointers[J];
5549f7dedc3SAdam Nemet
555a8945b77SAdam Nemet // No need to check if two readonly pointers intersect.
5569f7dedc3SAdam Nemet if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
557a8945b77SAdam Nemet return false;
558a8945b77SAdam Nemet
559a8945b77SAdam Nemet // Only need to check pointers between two different dependency sets.
5609f7dedc3SAdam Nemet if (PointerI.DependencySetId == PointerJ.DependencySetId)
561a8945b77SAdam Nemet return false;
562a8945b77SAdam Nemet
563a8945b77SAdam Nemet // Only need to check pointers in the same alias set.
5649f7dedc3SAdam Nemet if (PointerI.AliasSetId != PointerJ.AliasSetId)
565a8945b77SAdam Nemet return false;
566a8945b77SAdam Nemet
567a8945b77SAdam Nemet return true;
568a8945b77SAdam Nemet }
569a8945b77SAdam Nemet
printChecks(raw_ostream & OS,const SmallVectorImpl<RuntimePointerCheck> & Checks,unsigned Depth) const57054f0b83eSAdam Nemet void RuntimePointerChecking::printChecks(
571616657b3SFlorian Hahn raw_ostream &OS, const SmallVectorImpl<RuntimePointerCheck> &Checks,
57254f0b83eSAdam Nemet unsigned Depth) const {
57354f0b83eSAdam Nemet unsigned N = 0;
57454f0b83eSAdam Nemet for (const auto &Check : Checks) {
57554f0b83eSAdam Nemet const auto &First = Check.first->Members, &Second = Check.second->Members;
57654f0b83eSAdam Nemet
57754f0b83eSAdam Nemet OS.indent(Depth) << "Check " << N++ << ":\n";
57854f0b83eSAdam Nemet
57954f0b83eSAdam Nemet OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
58054f0b83eSAdam Nemet for (unsigned K = 0; K < First.size(); ++K)
58154f0b83eSAdam Nemet OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
58254f0b83eSAdam Nemet
58354f0b83eSAdam Nemet OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
58454f0b83eSAdam Nemet for (unsigned K = 0; K < Second.size(); ++K)
58554f0b83eSAdam Nemet OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
58654f0b83eSAdam Nemet }
58754f0b83eSAdam Nemet }
58854f0b83eSAdam Nemet
print(raw_ostream & OS,unsigned Depth) const5893a91e947SAdam Nemet void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const {
590e91cc6efSAdam Nemet
591e91cc6efSAdam Nemet OS.indent(Depth) << "Run-time memory checks:\n";
59215840393SAdam Nemet printChecks(OS, Checks, Depth);
5931b6b50a9SSilviu Baranga
5941b6b50a9SSilviu Baranga OS.indent(Depth) << "Grouped accesses:\n";
5951b6b50a9SSilviu Baranga for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
59654f0b83eSAdam Nemet const auto &CG = CheckingGroups[I];
59754f0b83eSAdam Nemet
59854f0b83eSAdam Nemet OS.indent(Depth + 2) << "Group " << &CG << ":\n";
59954f0b83eSAdam Nemet OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
60054f0b83eSAdam Nemet << ")\n";
60154f0b83eSAdam Nemet for (unsigned J = 0; J < CG.Members.size(); ++J) {
60254f0b83eSAdam Nemet OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
6031b6b50a9SSilviu Baranga << "\n";
6041b6b50a9SSilviu Baranga }
605e91cc6efSAdam Nemet }
606e91cc6efSAdam Nemet }
607e91cc6efSAdam Nemet
6080456327cSAdam Nemet namespace {
609a3fe70d2SEugene Zelenko
6105f8f34e4SAdrian Prantl /// Analyses memory accesses in a loop.
6110456327cSAdam Nemet ///
6120456327cSAdam Nemet /// Checks whether run time pointer checks are needed and builds sets for data
6130456327cSAdam Nemet /// dependence checking.
6140456327cSAdam Nemet class AccessAnalysis {
6150456327cSAdam Nemet public:
6165f8f34e4SAdrian Prantl /// Read or write access location.
6170456327cSAdam Nemet typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
6185448e989SAmjad Aboud typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
6190456327cSAdam Nemet
AccessAnalysis(Loop * TheLoop,AAResults * AA,LoopInfo * LI,MemoryDepChecker::DepCandidates & DA,PredicatedScalarEvolution & PSE)620b0eb40caSVitaly Buka AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI,
621b0eb40caSVitaly Buka MemoryDepChecker::DepCandidates &DA,
6229cd9a7e3SSilviu Baranga PredicatedScalarEvolution &PSE)
623b752eb88SKazu Hirata : TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), PSE(PSE) {}
6240456327cSAdam Nemet
6255f8f34e4SAdrian Prantl /// Register a load and whether it is only read from.
addLoad(MemoryLocation & Loc,Type * AccessTy,bool IsReadOnly)626ff31020eSArthur Eubanks void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {
6270456327cSAdam Nemet Value *Ptr = const_cast<Value*>(Loc.Ptr);
6284df8efceSNikita Popov AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
629ff31020eSArthur Eubanks Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);
6300456327cSAdam Nemet if (IsReadOnly)
6310456327cSAdam Nemet ReadOnlyPtr.insert(Ptr);
6320456327cSAdam Nemet }
6330456327cSAdam Nemet
6345f8f34e4SAdrian Prantl /// Register a store.
addStore(MemoryLocation & Loc,Type * AccessTy)635ff31020eSArthur Eubanks void addStore(MemoryLocation &Loc, Type *AccessTy) {
6360456327cSAdam Nemet Value *Ptr = const_cast<Value*>(Loc.Ptr);
6374df8efceSNikita Popov AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
638ff31020eSArthur Eubanks Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);
6390456327cSAdam Nemet }
6400456327cSAdam Nemet
6415f8f34e4SAdrian Prantl /// Check if we can emit a run-time no-alias check for \p Access.
642ac920f77SSilviu Baranga ///
643ac920f77SSilviu Baranga /// Returns true if we can emit a run-time no alias check for \p Access.
644ac920f77SSilviu Baranga /// If we can check this access, this also adds it to a dependence set and
645ac920f77SSilviu Baranga /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
646ac920f77SSilviu Baranga /// we will attempt to use additional run-time checks in order to get
647ac920f77SSilviu Baranga /// the bounds of the pointer.
648ac920f77SSilviu Baranga bool createCheckForAccess(RuntimePointerChecking &RtCheck,
649ff31020eSArthur Eubanks MemAccessInfo Access, Type *AccessTy,
650ac920f77SSilviu Baranga const ValueToValueMap &Strides,
651ac920f77SSilviu Baranga DenseMap<Value *, unsigned> &DepSetId,
652ac920f77SSilviu Baranga Loop *TheLoop, unsigned &RunningDepId,
653ff31020eSArthur Eubanks unsigned ASId, bool ShouldCheckStride, bool Assume);
654ac920f77SSilviu Baranga
6555f8f34e4SAdrian Prantl /// Check whether we can check the pointers at runtime for
656ee61474aSAdam Nemet /// non-intersection.
657ee61474aSAdam Nemet ///
658ee61474aSAdam Nemet /// Returns true if we need no check or if we do and we can generate them
659ee61474aSAdam Nemet /// (i.e. the pointers have computable bounds).
6607cdebac0SAdam Nemet bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
6617cdebac0SAdam Nemet Loop *TheLoop, const ValueToValueMap &Strides,
6629f1c6fbfSMalhar Jajoo Value *&UncomputablePtr, bool ShouldCheckWrap = false);
6630456327cSAdam Nemet
6645f8f34e4SAdrian Prantl /// Goes over all memory accesses, checks whether a RT check is needed
6650456327cSAdam Nemet /// and builds sets of dependent accesses.
buildDependenceSets()6660456327cSAdam Nemet void buildDependenceSets() {
6670456327cSAdam Nemet processMemAccesses();
6680456327cSAdam Nemet }
6690456327cSAdam Nemet
6705f8f34e4SAdrian Prantl /// Initial processing of memory accesses determined that we need to
6715dc3b2cfSAdam Nemet /// perform dependency checking.
6725dc3b2cfSAdam Nemet ///
6735dc3b2cfSAdam Nemet /// Note that this can later be cleared if we retry memcheck analysis without
674ef307b8cSFlorian Hahn /// dependency checking (i.e. FoundNonConstantDistanceDependence).
isDependencyCheckNeeded()6750456327cSAdam Nemet bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
676df3dc5b9SAdam Nemet
677df3dc5b9SAdam Nemet /// We decided that no dependence analysis would be used. Reset the state.
resetDepChecks(MemoryDepChecker & DepChecker)678df3dc5b9SAdam Nemet void resetDepChecks(MemoryDepChecker &DepChecker) {
679df3dc5b9SAdam Nemet CheckDeps.clear();
680a2df750fSAdam Nemet DepChecker.clearDependences();
681df3dc5b9SAdam Nemet }
6820456327cSAdam Nemet
getDependenciesToCheck()6835448e989SAmjad Aboud MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
6840456327cSAdam Nemet
6850456327cSAdam Nemet private:
686ff31020eSArthur Eubanks typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;
6870456327cSAdam Nemet
6885f8f34e4SAdrian Prantl /// Go over all memory access and check whether runtime pointer checks
689b41d2d3fSAdam Nemet /// are needed and build sets of dependency check candidates.
6900456327cSAdam Nemet void processMemAccesses();
6910456327cSAdam Nemet
692ff31020eSArthur Eubanks /// Map of all accesses. Values are the types used to access memory pointed to
693ff31020eSArthur Eubanks /// by the pointer.
694ff31020eSArthur Eubanks PtrAccessMap Accesses;
6950456327cSAdam Nemet
69677eeac3dSManoj Gupta /// The loop being checked.
69777eeac3dSManoj Gupta const Loop *TheLoop;
69877eeac3dSManoj Gupta
6995448e989SAmjad Aboud /// List of accesses that need a further dependence check.
7005448e989SAmjad Aboud MemAccessInfoList CheckDeps;
7010456327cSAdam Nemet
7020456327cSAdam Nemet /// Set of pointers that are read only.
7030456327cSAdam Nemet SmallPtrSet<Value*, 16> ReadOnlyPtr;
7040456327cSAdam Nemet
7050456327cSAdam Nemet /// An alias set tracker to partition the access set by underlying object and
7060456327cSAdam Nemet //intrinsic property (such as TBAA metadata).
7070456327cSAdam Nemet AliasSetTracker AST;
7080456327cSAdam Nemet
709e2b885c4SAdam Nemet LoopInfo *LI;
710e2b885c4SAdam Nemet
7110456327cSAdam Nemet /// Sets of potentially dependent accesses - members of one set share an
7120456327cSAdam Nemet /// underlying pointer. The set "CheckDeps" identfies which sets really need a
7130456327cSAdam Nemet /// dependence check.
714dee666bcSAdam Nemet MemoryDepChecker::DepCandidates &DepCands;
7150456327cSAdam Nemet
7165f8f34e4SAdrian Prantl /// Initial processing of memory accesses determined that we may need
7175dc3b2cfSAdam Nemet /// to add memchecks. Perform the analysis to determine the necessary checks.
7185dc3b2cfSAdam Nemet ///
7195dc3b2cfSAdam Nemet /// Note that, this is different from isDependencyCheckNeeded. When we retry
7205dc3b2cfSAdam Nemet /// memcheck analysis without dependency checking
721ef307b8cSFlorian Hahn /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is
722ef307b8cSFlorian Hahn /// cleared while this remains set if we have potentially dependent accesses.
723b752eb88SKazu Hirata bool IsRTCheckAnalysisNeeded = false;
724e3c0534bSSilviu Baranga
725e3c0534bSSilviu Baranga /// The SCEV predicate containing all the SCEV-related assumptions.
7269cd9a7e3SSilviu Baranga PredicatedScalarEvolution &PSE;
7270456327cSAdam Nemet };
7280456327cSAdam Nemet
7290456327cSAdam Nemet } // end anonymous namespace
7300456327cSAdam Nemet
7315f8f34e4SAdrian Prantl /// Check whether a pointer can participate in a runtime bounds check.
732ac920f77SSilviu Baranga /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
733ac920f77SSilviu Baranga /// by adding run-time checks (overflow checks) if necessary.
hasComputableBounds(PredicatedScalarEvolution & PSE,Value * Ptr,const SCEV * PtrScev,Loop * L,bool Assume)734e9cced27SFlorian Hahn static bool hasComputableBounds(PredicatedScalarEvolution &PSE, Value *Ptr,
735e9cced27SFlorian Hahn const SCEV *PtrScev, Loop *L, bool Assume) {
736279784ffSAdam Nemet // The bounds for loop-invariant pointer is trivial.
737279784ffSAdam Nemet if (PSE.getSE()->isLoopInvariant(PtrScev, L))
738279784ffSAdam Nemet return true;
739279784ffSAdam Nemet
7400456327cSAdam Nemet const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
741ac920f77SSilviu Baranga
742ac920f77SSilviu Baranga if (!AR && Assume)
743ac920f77SSilviu Baranga AR = PSE.getAsAddRec(Ptr);
744ac920f77SSilviu Baranga
7450456327cSAdam Nemet if (!AR)
7460456327cSAdam Nemet return false;
7470456327cSAdam Nemet
7480456327cSAdam Nemet return AR->isAffine();
7490456327cSAdam Nemet }
7500456327cSAdam Nemet
7515f8f34e4SAdrian Prantl /// Check whether a pointer address cannot wrap.
isNoWrap(PredicatedScalarEvolution & PSE,const ValueToValueMap & Strides,Value * Ptr,Type * AccessTy,Loop * L)7529f02c586SAndrey Turetskiy static bool isNoWrap(PredicatedScalarEvolution &PSE,
753ff31020eSArthur Eubanks const ValueToValueMap &Strides, Value *Ptr, Type *AccessTy,
754ff31020eSArthur Eubanks Loop *L) {
7559f02c586SAndrey Turetskiy const SCEV *PtrScev = PSE.getSCEV(Ptr);
7569f02c586SAndrey Turetskiy if (PSE.getSE()->isLoopInvariant(PtrScev, L))
7579f02c586SAndrey Turetskiy return true;
7589f02c586SAndrey Turetskiy
75945c46734SNikita Popov int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, L, Strides);
760ac920f77SSilviu Baranga if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
761ac920f77SSilviu Baranga return true;
762ac920f77SSilviu Baranga
763ac920f77SSilviu Baranga return false;
764ac920f77SSilviu Baranga }
765ac920f77SSilviu Baranga
visitPointers(Value * StartPtr,const Loop & InnermostLoop,function_ref<void (Value *)> AddPointer)76673a05cc8SFlorian Hahn static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
76773a05cc8SFlorian Hahn function_ref<void(Value *)> AddPointer) {
76873a05cc8SFlorian Hahn SmallPtrSet<Value *, 8> Visited;
76973a05cc8SFlorian Hahn SmallVector<Value *> WorkList;
77073a05cc8SFlorian Hahn WorkList.push_back(StartPtr);
77173a05cc8SFlorian Hahn
77273a05cc8SFlorian Hahn while (!WorkList.empty()) {
77373a05cc8SFlorian Hahn Value *Ptr = WorkList.pop_back_val();
77473a05cc8SFlorian Hahn if (!Visited.insert(Ptr).second)
77573a05cc8SFlorian Hahn continue;
77673a05cc8SFlorian Hahn auto *PN = dyn_cast<PHINode>(Ptr);
77773a05cc8SFlorian Hahn // SCEV does not look through non-header PHIs inside the loop. Such phis
77873a05cc8SFlorian Hahn // can be analyzed by adding separate accesses for each incoming pointer
77973a05cc8SFlorian Hahn // value.
78073a05cc8SFlorian Hahn if (PN && InnermostLoop.contains(PN->getParent()) &&
78173a05cc8SFlorian Hahn PN->getParent() != InnermostLoop.getHeader()) {
78273a05cc8SFlorian Hahn for (const Use &Inc : PN->incoming_values())
78373a05cc8SFlorian Hahn WorkList.push_back(Inc);
78473a05cc8SFlorian Hahn } else
78573a05cc8SFlorian Hahn AddPointer(Ptr);
78673a05cc8SFlorian Hahn }
78773a05cc8SFlorian Hahn }
78873a05cc8SFlorian Hahn
789db8fcb2cSGraham Hunter // Walk back through the IR for a pointer, looking for a select like the
790db8fcb2cSGraham Hunter // following:
791db8fcb2cSGraham Hunter //
792db8fcb2cSGraham Hunter // %offset = select i1 %cmp, i64 %a, i64 %b
793db8fcb2cSGraham Hunter // %addr = getelementptr double, double* %base, i64 %offset
794db8fcb2cSGraham Hunter // %ld = load double, double* %addr, align 8
795db8fcb2cSGraham Hunter //
796db8fcb2cSGraham Hunter // We won't be able to form a single SCEVAddRecExpr from this since the
797db8fcb2cSGraham Hunter // address for each loop iteration depends on %cmp. We could potentially
798db8fcb2cSGraham Hunter // produce multiple valid SCEVAddRecExprs, though, and check all of them for
799db8fcb2cSGraham Hunter // memory safety/aliasing if needed.
800db8fcb2cSGraham Hunter //
801db8fcb2cSGraham Hunter // If we encounter some IR we don't yet handle, or something obviously fine
802db8fcb2cSGraham Hunter // like a constant, then we just add the SCEV for that term to the list passed
803db8fcb2cSGraham Hunter // in by the caller. If we have a node that may potentially yield a valid
804db8fcb2cSGraham Hunter // SCEVAddRecExpr then we decompose it into parts and build the SCEV terms
805db8fcb2cSGraham Hunter // ourselves before adding to the list.
806db8fcb2cSGraham Hunter static void
findForkedSCEVs(ScalarEvolution * SE,const Loop * L,Value * Ptr,SmallVectorImpl<std::pair<const SCEV *,bool>> & ScevList,unsigned Depth)807db8fcb2cSGraham Hunter findForkedSCEVs(ScalarEvolution *SE, const Loop *L, Value *Ptr,
808db8fcb2cSGraham Hunter SmallVectorImpl<std::pair<const SCEV *, bool>> &ScevList,
809db8fcb2cSGraham Hunter unsigned Depth) {
810db8fcb2cSGraham Hunter // If our Value is a SCEVAddRecExpr, loop invariant, not an instruction, or
811db8fcb2cSGraham Hunter // we've exceeded our limit on recursion, just return whatever we have
812db8fcb2cSGraham Hunter // regardless of whether it can be used for a forked pointer or not, along
813db8fcb2cSGraham Hunter // with an indication of whether it might be a poison or undef value.
814db8fcb2cSGraham Hunter const SCEV *Scev = SE->getSCEV(Ptr);
815db8fcb2cSGraham Hunter if (isa<SCEVAddRecExpr>(Scev) || L->isLoopInvariant(Ptr) ||
816db8fcb2cSGraham Hunter !isa<Instruction>(Ptr) || Depth == 0) {
817db8fcb2cSGraham Hunter ScevList.push_back(
818db8fcb2cSGraham Hunter std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr)));
819db8fcb2cSGraham Hunter return;
820db8fcb2cSGraham Hunter }
821db8fcb2cSGraham Hunter
822db8fcb2cSGraham Hunter Depth--;
823db8fcb2cSGraham Hunter
824db8fcb2cSGraham Hunter auto UndefPoisonCheck = [](std::pair<const SCEV *, bool> S) -> bool {
825db8fcb2cSGraham Hunter return S.second;
826db8fcb2cSGraham Hunter };
827db8fcb2cSGraham Hunter
828db8fcb2cSGraham Hunter Instruction *I = cast<Instruction>(Ptr);
829db8fcb2cSGraham Hunter unsigned Opcode = I->getOpcode();
830db8fcb2cSGraham Hunter switch (Opcode) {
831db8fcb2cSGraham Hunter case Instruction::GetElementPtr: {
832db8fcb2cSGraham Hunter GetElementPtrInst *GEP = cast<GetElementPtrInst>(I);
833db8fcb2cSGraham Hunter Type *SourceTy = GEP->getSourceElementType();
834db8fcb2cSGraham Hunter // We only handle base + single offset GEPs here for now.
835db8fcb2cSGraham Hunter // Not dealing with preexisting gathers yet, so no vectors.
836db8fcb2cSGraham Hunter if (I->getNumOperands() != 2 || SourceTy->isVectorTy()) {
837db8fcb2cSGraham Hunter ScevList.push_back(
838db8fcb2cSGraham Hunter std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(GEP)));
839db8fcb2cSGraham Hunter break;
840db8fcb2cSGraham Hunter }
841db8fcb2cSGraham Hunter SmallVector<std::pair<const SCEV *, bool>, 2> BaseScevs;
842db8fcb2cSGraham Hunter SmallVector<std::pair<const SCEV *, bool>, 2> OffsetScevs;
843db8fcb2cSGraham Hunter findForkedSCEVs(SE, L, I->getOperand(0), BaseScevs, Depth);
844db8fcb2cSGraham Hunter findForkedSCEVs(SE, L, I->getOperand(1), OffsetScevs, Depth);
845db8fcb2cSGraham Hunter
846db8fcb2cSGraham Hunter // See if we need to freeze our fork...
847db8fcb2cSGraham Hunter bool NeedsFreeze = any_of(BaseScevs, UndefPoisonCheck) ||
848db8fcb2cSGraham Hunter any_of(OffsetScevs, UndefPoisonCheck);
849db8fcb2cSGraham Hunter
850db8fcb2cSGraham Hunter // Check that we only have a single fork, on either the base or the offset.
851db8fcb2cSGraham Hunter // Copy the SCEV across for the one without a fork in order to generate
852db8fcb2cSGraham Hunter // the full SCEV for both sides of the GEP.
853db8fcb2cSGraham Hunter if (OffsetScevs.size() == 2 && BaseScevs.size() == 1)
854db8fcb2cSGraham Hunter BaseScevs.push_back(BaseScevs[0]);
855db8fcb2cSGraham Hunter else if (BaseScevs.size() == 2 && OffsetScevs.size() == 1)
856db8fcb2cSGraham Hunter OffsetScevs.push_back(OffsetScevs[0]);
857db8fcb2cSGraham Hunter else {
858db8fcb2cSGraham Hunter ScevList.push_back(std::make_pair(Scev, NeedsFreeze));
859db8fcb2cSGraham Hunter break;
860db8fcb2cSGraham Hunter }
861db8fcb2cSGraham Hunter
862db8fcb2cSGraham Hunter // Find the pointer type we need to extend to.
863db8fcb2cSGraham Hunter Type *IntPtrTy = SE->getEffectiveSCEVType(
864db8fcb2cSGraham Hunter SE->getSCEV(GEP->getPointerOperand())->getType());
865db8fcb2cSGraham Hunter
866db8fcb2cSGraham Hunter // Find the size of the type being pointed to. We only have a single
867db8fcb2cSGraham Hunter // index term (guarded above) so we don't need to index into arrays or
868db8fcb2cSGraham Hunter // structures, just get the size of the scalar value.
869db8fcb2cSGraham Hunter const SCEV *Size = SE->getSizeOfExpr(IntPtrTy, SourceTy);
870db8fcb2cSGraham Hunter
871db8fcb2cSGraham Hunter // Scale up the offsets by the size of the type, then add to the bases.
872db8fcb2cSGraham Hunter const SCEV *Scaled1 = SE->getMulExpr(
873db8fcb2cSGraham Hunter Size, SE->getTruncateOrSignExtend(OffsetScevs[0].first, IntPtrTy));
874db8fcb2cSGraham Hunter const SCEV *Scaled2 = SE->getMulExpr(
875db8fcb2cSGraham Hunter Size, SE->getTruncateOrSignExtend(OffsetScevs[1].first, IntPtrTy));
876db8fcb2cSGraham Hunter ScevList.push_back(std::make_pair(
877db8fcb2cSGraham Hunter SE->getAddExpr(BaseScevs[0].first, Scaled1), NeedsFreeze));
878db8fcb2cSGraham Hunter ScevList.push_back(std::make_pair(
879db8fcb2cSGraham Hunter SE->getAddExpr(BaseScevs[1].first, Scaled2), NeedsFreeze));
880db8fcb2cSGraham Hunter break;
881db8fcb2cSGraham Hunter }
882db8fcb2cSGraham Hunter case Instruction::Select: {
883db8fcb2cSGraham Hunter SmallVector<std::pair<const SCEV *, bool>, 2> ChildScevs;
884db8fcb2cSGraham Hunter // A select means we've found a forked pointer, but we currently only
885db8fcb2cSGraham Hunter // support a single select per pointer so if there's another behind this
886db8fcb2cSGraham Hunter // then we just bail out and return the generic SCEV.
887db8fcb2cSGraham Hunter findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth);
888db8fcb2cSGraham Hunter findForkedSCEVs(SE, L, I->getOperand(2), ChildScevs, Depth);
889db8fcb2cSGraham Hunter if (ChildScevs.size() == 2) {
890db8fcb2cSGraham Hunter ScevList.push_back(ChildScevs[0]);
891db8fcb2cSGraham Hunter ScevList.push_back(ChildScevs[1]);
892db8fcb2cSGraham Hunter } else
893db8fcb2cSGraham Hunter ScevList.push_back(
894db8fcb2cSGraham Hunter std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr)));
895db8fcb2cSGraham Hunter break;
896db8fcb2cSGraham Hunter }
897db8fcb2cSGraham Hunter default:
898db8fcb2cSGraham Hunter // Just return the current SCEV if we haven't handled the instruction yet.
899db8fcb2cSGraham Hunter LLVM_DEBUG(dbgs() << "ForkedPtr unhandled instruction: " << *I << "\n");
900db8fcb2cSGraham Hunter ScevList.push_back(
901db8fcb2cSGraham Hunter std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr)));
902db8fcb2cSGraham Hunter break;
903db8fcb2cSGraham Hunter }
904db8fcb2cSGraham Hunter }
905db8fcb2cSGraham Hunter
906db8fcb2cSGraham Hunter static SmallVector<std::pair<const SCEV *, bool>>
findForkedPointer(PredicatedScalarEvolution & PSE,const ValueToValueMap & StridesMap,Value * Ptr,const Loop * L)907db8fcb2cSGraham Hunter findForkedPointer(PredicatedScalarEvolution &PSE,
908db8fcb2cSGraham Hunter const ValueToValueMap &StridesMap, Value *Ptr,
909db8fcb2cSGraham Hunter const Loop *L) {
910db8fcb2cSGraham Hunter ScalarEvolution *SE = PSE.getSE();
911db8fcb2cSGraham Hunter assert(SE->isSCEVable(Ptr->getType()) && "Value is not SCEVable!");
9124bd072c5SBenjamin Kramer SmallVector<std::pair<const SCEV *, bool>> Scevs;
913db8fcb2cSGraham Hunter findForkedSCEVs(SE, L, Ptr, Scevs, MaxForkedSCEVDepth);
914db8fcb2cSGraham Hunter
915db8fcb2cSGraham Hunter // For now, we will only accept a forked pointer with two possible SCEVs.
916db8fcb2cSGraham Hunter if (Scevs.size() == 2)
917db8fcb2cSGraham Hunter return Scevs;
918db8fcb2cSGraham Hunter
919db8fcb2cSGraham Hunter return {
920db8fcb2cSGraham Hunter std::make_pair(replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false)};
921db8fcb2cSGraham Hunter }
922db8fcb2cSGraham Hunter
createCheckForAccess(RuntimePointerChecking & RtCheck,MemAccessInfo Access,Type * AccessTy,const ValueToValueMap & StridesMap,DenseMap<Value *,unsigned> & DepSetId,Loop * TheLoop,unsigned & RunningDepId,unsigned ASId,bool ShouldCheckWrap,bool Assume)923ac920f77SSilviu Baranga bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
924ff31020eSArthur Eubanks MemAccessInfo Access, Type *AccessTy,
925ac920f77SSilviu Baranga const ValueToValueMap &StridesMap,
926ac920f77SSilviu Baranga DenseMap<Value *, unsigned> &DepSetId,
927ac920f77SSilviu Baranga Loop *TheLoop, unsigned &RunningDepId,
928ac920f77SSilviu Baranga unsigned ASId, bool ShouldCheckWrap,
929ac920f77SSilviu Baranga bool Assume) {
930ac920f77SSilviu Baranga Value *Ptr = Access.getPointer();
931ac920f77SSilviu Baranga
932db8fcb2cSGraham Hunter SmallVector<std::pair<const SCEV *, bool>> TranslatedPtrs =
933db8fcb2cSGraham Hunter findForkedPointer(PSE, StridesMap, Ptr, TheLoop);
934e9cced27SFlorian Hahn
935e9cced27SFlorian Hahn for (auto &P : TranslatedPtrs) {
936e9cced27SFlorian Hahn const SCEV *PtrExpr = P.first;
937e9cced27SFlorian Hahn if (!hasComputableBounds(PSE, Ptr, PtrExpr, TheLoop, Assume))
938ac920f77SSilviu Baranga return false;
939ac920f77SSilviu Baranga
940ac920f77SSilviu Baranga // When we run after a failing dependency check we have to make sure
941ac920f77SSilviu Baranga // we don't have wrapping pointers.
942e9cced27SFlorian Hahn if (ShouldCheckWrap) {
943e9cced27SFlorian Hahn // Skip wrap checking when translating pointers.
944e9cced27SFlorian Hahn if (TranslatedPtrs.size() > 1)
945e9cced27SFlorian Hahn return false;
946e9cced27SFlorian Hahn
947e9cced27SFlorian Hahn if (!isNoWrap(PSE, StridesMap, Ptr, AccessTy, TheLoop)) {
948ac920f77SSilviu Baranga auto *Expr = PSE.getSCEV(Ptr);
949ac920f77SSilviu Baranga if (!Assume || !isa<SCEVAddRecExpr>(Expr))
950ac920f77SSilviu Baranga return false;
951ac920f77SSilviu Baranga PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
952ac920f77SSilviu Baranga }
953e9cced27SFlorian Hahn }
954e9cced27SFlorian Hahn // If there's only one option for Ptr, look it up after bounds and wrap
955e9cced27SFlorian Hahn // checking, because assumptions might have been added to PSE.
956e9cced27SFlorian Hahn if (TranslatedPtrs.size() == 1)
957e9cced27SFlorian Hahn TranslatedPtrs[0] = std::make_pair(
958e9cced27SFlorian Hahn replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false);
959e9cced27SFlorian Hahn }
960e9cced27SFlorian Hahn
961e9cced27SFlorian Hahn for (auto &P : TranslatedPtrs) {
962e9cced27SFlorian Hahn const SCEV *PtrExpr = P.first;
963ac920f77SSilviu Baranga
964ac920f77SSilviu Baranga // The id of the dependence set.
965ac920f77SSilviu Baranga unsigned DepId;
966ac920f77SSilviu Baranga
967ac920f77SSilviu Baranga if (isDependencyCheckNeeded()) {
968ac920f77SSilviu Baranga Value *Leader = DepCands.getLeaderValue(Access).getPointer();
969ac920f77SSilviu Baranga unsigned &LeaderId = DepSetId[Leader];
970ac920f77SSilviu Baranga if (!LeaderId)
971ac920f77SSilviu Baranga LeaderId = RunningDepId++;
972ac920f77SSilviu Baranga DepId = LeaderId;
973ac920f77SSilviu Baranga } else
974ac920f77SSilviu Baranga // Each access has its own dependence set.
975ac920f77SSilviu Baranga DepId = RunningDepId++;
976ac920f77SSilviu Baranga
977ac920f77SSilviu Baranga bool IsWrite = Access.getInt();
978e9cced27SFlorian Hahn RtCheck.insert(TheLoop, Ptr, PtrExpr, AccessTy, IsWrite, DepId, ASId, PSE,
979e9cced27SFlorian Hahn P.second);
980d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
981e9cced27SFlorian Hahn }
982ac920f77SSilviu Baranga
983ac920f77SSilviu Baranga return true;
9849f02c586SAndrey Turetskiy }
9859f02c586SAndrey Turetskiy
canCheckPtrAtRT(RuntimePointerChecking & RtCheck,ScalarEvolution * SE,Loop * TheLoop,const ValueToValueMap & StridesMap,Value * & UncomputablePtr,bool ShouldCheckWrap)9867cdebac0SAdam Nemet bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
9877cdebac0SAdam Nemet ScalarEvolution *SE, Loop *TheLoop,
9887cdebac0SAdam Nemet const ValueToValueMap &StridesMap,
9899f1c6fbfSMalhar Jajoo Value *&UncomputablePtr, bool ShouldCheckWrap) {
9900456327cSAdam Nemet // Find pointers with computable bounds. We are going to use this information
9910456327cSAdam Nemet // to place a runtime bound check.
9920456327cSAdam Nemet bool CanDoRT = true;
9930456327cSAdam Nemet
9949b507b21SFlorian Hahn bool MayNeedRTCheck = false;
9952d038982SFlorian Hahn if (!IsRTCheckAnalysisNeeded) return true;
99698a13719SSilviu Baranga
9970456327cSAdam Nemet bool IsDepCheckNeeded = isDependencyCheckNeeded();
9980456327cSAdam Nemet
9990456327cSAdam Nemet // We assign a consecutive id to access from different alias sets.
10000456327cSAdam Nemet // Accesses between different groups doesn't need to be checked.
10016176f044SFlorian Hahn unsigned ASId = 0;
10020456327cSAdam Nemet for (auto &AS : AST) {
1003424edc6cSAdam Nemet int NumReadPtrChecks = 0;
1004424edc6cSAdam Nemet int NumWritePtrChecks = 0;
1005ac920f77SSilviu Baranga bool CanDoAliasSetRT = true;
10066176f044SFlorian Hahn ++ASId;
1007424edc6cSAdam Nemet
10080456327cSAdam Nemet // We assign consecutive id to access from different dependence sets.
10090456327cSAdam Nemet // Accesses within the same set don't need a runtime check.
10100456327cSAdam Nemet unsigned RunningDepId = 1;
10110456327cSAdam Nemet DenseMap<Value *, unsigned> DepSetId;
10120456327cSAdam Nemet
101304d398dbSArthur Eubanks SmallVector<std::pair<MemAccessInfo, Type *>, 4> Retries;
1014ac920f77SSilviu Baranga
10152062b370SFlorian Hahn // First, count how many write and read accesses are in the alias set. Also
10162062b370SFlorian Hahn // collect MemAccessInfos for later.
10172062b370SFlorian Hahn SmallVector<MemAccessInfo, 4> AccessInfos;
101871b89b14SSimon Pilgrim for (const auto &A : AS) {
10190456327cSAdam Nemet Value *Ptr = A.getValue();
10200456327cSAdam Nemet bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
10210456327cSAdam Nemet
1022424edc6cSAdam Nemet if (IsWrite)
1023424edc6cSAdam Nemet ++NumWritePtrChecks;
1024424edc6cSAdam Nemet else
1025424edc6cSAdam Nemet ++NumReadPtrChecks;
10262062b370SFlorian Hahn AccessInfos.emplace_back(Ptr, IsWrite);
10270456327cSAdam Nemet }
10280456327cSAdam Nemet
10292062b370SFlorian Hahn // We do not need runtime checks for this alias set, if there are no writes
10302062b370SFlorian Hahn // or a single write and no reads.
10312062b370SFlorian Hahn if (NumWritePtrChecks == 0 ||
10322062b370SFlorian Hahn (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) {
10336176f044SFlorian Hahn assert((AS.size() <= 1 ||
10346176f044SFlorian Hahn all_of(AS,
10356176f044SFlorian Hahn [this](auto AC) {
10366176f044SFlorian Hahn MemAccessInfo AccessWrite(AC.getValue(), true);
10372062b370SFlorian Hahn return DepCands.findValue(AccessWrite) == DepCands.end();
10386176f044SFlorian Hahn })) &&
10396176f044SFlorian Hahn "Can only skip updating CanDoRT below, if all entries in AS "
10406176f044SFlorian Hahn "are reads or there is at most 1 entry");
10416176f044SFlorian Hahn continue;
10426176f044SFlorian Hahn }
10432062b370SFlorian Hahn
10442062b370SFlorian Hahn for (auto &Access : AccessInfos) {
1045601b3a13SKazu Hirata for (const auto &AccessTy : Accesses[Access]) {
1046ff31020eSArthur Eubanks if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
1047ff31020eSArthur Eubanks DepSetId, TheLoop, RunningDepId, ASId,
1048ff31020eSArthur Eubanks ShouldCheckWrap, false)) {
10492062b370SFlorian Hahn LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
10502062b370SFlorian Hahn << *Access.getPointer() << '\n');
105104d398dbSArthur Eubanks Retries.push_back({Access, AccessTy});
10522062b370SFlorian Hahn CanDoAliasSetRT = false;
10536176f044SFlorian Hahn }
10542062b370SFlorian Hahn }
1055ff31020eSArthur Eubanks }
10562062b370SFlorian Hahn
10572062b370SFlorian Hahn // Note that this function computes CanDoRT and MayNeedRTCheck
10582062b370SFlorian Hahn // independently. For example CanDoRT=false, MayNeedRTCheck=false means that
10592062b370SFlorian Hahn // we have a pointer for which we couldn't find the bounds but we don't
10602062b370SFlorian Hahn // actually need to emit any checks so it does not matter.
10612062b370SFlorian Hahn //
10622062b370SFlorian Hahn // We need runtime checks for this alias set, if there are at least 2
10632062b370SFlorian Hahn // dependence sets (in which case RunningDepId > 2) or if we need to re-try
10642062b370SFlorian Hahn // any bound checks (because in that case the number of dependence sets is
10652062b370SFlorian Hahn // incomplete).
10662062b370SFlorian Hahn bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty();
1067424edc6cSAdam Nemet
1068ac920f77SSilviu Baranga // We need to perform run-time alias checks, but some pointers had bounds
1069ac920f77SSilviu Baranga // that couldn't be checked.
1070ac920f77SSilviu Baranga if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
1071ac920f77SSilviu Baranga // Reset the CanDoSetRt flag and retry all accesses that have failed.
1072ac920f77SSilviu Baranga // We know that we need these checks, so we can now be more aggressive
1073ac920f77SSilviu Baranga // and add further checks if required (overflow checks).
1074ac920f77SSilviu Baranga CanDoAliasSetRT = true;
107504d398dbSArthur Eubanks for (auto Retry : Retries) {
107604d398dbSArthur Eubanks MemAccessInfo Access = Retry.first;
107704d398dbSArthur Eubanks Type *AccessTy = Retry.second;
1078ff31020eSArthur Eubanks if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
1079ff31020eSArthur Eubanks DepSetId, TheLoop, RunningDepId, ASId,
1080ac920f77SSilviu Baranga ShouldCheckWrap, /*Assume=*/true)) {
1081ac920f77SSilviu Baranga CanDoAliasSetRT = false;
10829f1c6fbfSMalhar Jajoo UncomputablePtr = Access.getPointer();
1083ac920f77SSilviu Baranga break;
1084ac920f77SSilviu Baranga }
1085ac920f77SSilviu Baranga }
1086ff31020eSArthur Eubanks }
1087ac920f77SSilviu Baranga
1088ac920f77SSilviu Baranga CanDoRT &= CanDoAliasSetRT;
10899b507b21SFlorian Hahn MayNeedRTCheck |= NeedsAliasSetRTCheck;
10900456327cSAdam Nemet ++ASId;
10910456327cSAdam Nemet }
10920456327cSAdam Nemet
10930456327cSAdam Nemet // If the pointers that we would use for the bounds comparison have different
10940456327cSAdam Nemet // address spaces, assume the values aren't directly comparable, so we can't
10950456327cSAdam Nemet // use them for the runtime check. We also have to assume they could
10960456327cSAdam Nemet // overlap. In the future there should be metadata for whether address spaces
10970456327cSAdam Nemet // are disjoint.
10980456327cSAdam Nemet unsigned NumPointers = RtCheck.Pointers.size();
10990456327cSAdam Nemet for (unsigned i = 0; i < NumPointers; ++i) {
11000456327cSAdam Nemet for (unsigned j = i + 1; j < NumPointers; ++j) {
11010456327cSAdam Nemet // Only need to check pointers between two different dependency sets.
11029f7dedc3SAdam Nemet if (RtCheck.Pointers[i].DependencySetId ==
11039f7dedc3SAdam Nemet RtCheck.Pointers[j].DependencySetId)
11040456327cSAdam Nemet continue;
11050456327cSAdam Nemet // Only need to check pointers in the same alias set.
11069f7dedc3SAdam Nemet if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
11070456327cSAdam Nemet continue;
11080456327cSAdam Nemet
11099f7dedc3SAdam Nemet Value *PtrI = RtCheck.Pointers[i].PointerValue;
11109f7dedc3SAdam Nemet Value *PtrJ = RtCheck.Pointers[j].PointerValue;
11110456327cSAdam Nemet
11120456327cSAdam Nemet unsigned ASi = PtrI->getType()->getPointerAddressSpace();
11130456327cSAdam Nemet unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
11140456327cSAdam Nemet if (ASi != ASj) {
1115d34e60caSNicola Zaghen LLVM_DEBUG(
1116d34e60caSNicola Zaghen dbgs() << "LAA: Runtime check would require comparison between"
11170456327cSAdam Nemet " different address spaces\n");
11180456327cSAdam Nemet return false;
11190456327cSAdam Nemet }
11200456327cSAdam Nemet }
11210456327cSAdam Nemet }
11220456327cSAdam Nemet
11239b507b21SFlorian Hahn if (MayNeedRTCheck && CanDoRT)
112415840393SAdam Nemet RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
11251b6b50a9SSilviu Baranga
1126d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
1127ee61474aSAdam Nemet << " pointer comparisons.\n");
1128ee61474aSAdam Nemet
11299b507b21SFlorian Hahn // If we can do run-time checks, but there are no checks, no runtime checks
11309b507b21SFlorian Hahn // are needed. This can happen when all pointers point to the same underlying
11319b507b21SFlorian Hahn // object for example.
11329b507b21SFlorian Hahn RtCheck.Need = CanDoRT ? RtCheck.getNumberOfChecks() != 0 : MayNeedRTCheck;
1133ee61474aSAdam Nemet
11349b507b21SFlorian Hahn bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT;
1135ee61474aSAdam Nemet if (!CanDoRTIfNeeded)
1136ee61474aSAdam Nemet RtCheck.reset();
1137ee61474aSAdam Nemet return CanDoRTIfNeeded;
11380456327cSAdam Nemet }
11390456327cSAdam Nemet
processMemAccesses()11400456327cSAdam Nemet void AccessAnalysis::processMemAccesses() {
11410456327cSAdam Nemet // We process the set twice: first we process read-write pointers, last we
11420456327cSAdam Nemet // process read-only pointers. This allows us to skip dependence tests for
11430456327cSAdam Nemet // read-only pointers.
11440456327cSAdam Nemet
1145d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
1146d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << " AST: "; AST.dump());
1147d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
1148d34e60caSNicola Zaghen LLVM_DEBUG({
11490456327cSAdam Nemet for (auto A : Accesses)
1150ff31020eSArthur Eubanks dbgs() << "\t" << *A.first.getPointer() << " ("
1151ff31020eSArthur Eubanks << (A.first.getInt()
1152ff31020eSArthur Eubanks ? "write"
1153ff31020eSArthur Eubanks : (ReadOnlyPtr.count(A.first.getPointer()) ? "read-only"
1154ff31020eSArthur Eubanks : "read"))
1155ff31020eSArthur Eubanks << ")\n";
11560456327cSAdam Nemet });
11570456327cSAdam Nemet
11580456327cSAdam Nemet // The AliasSetTracker has nicely partitioned our pointers by metadata
11590456327cSAdam Nemet // compatibility and potential for underlying-object overlap. As a result, we
11600456327cSAdam Nemet // only need to check for potential pointer dependencies within each alias
11610456327cSAdam Nemet // set.
116271b89b14SSimon Pilgrim for (const auto &AS : AST) {
11630456327cSAdam Nemet // Note that both the alias-set tracker and the alias sets themselves used
11640456327cSAdam Nemet // linked lists internally and so the iteration order here is deterministic
11650456327cSAdam Nemet // (matching the original instruction order within each set).
11660456327cSAdam Nemet
11670456327cSAdam Nemet bool SetHasWrite = false;
11680456327cSAdam Nemet
11690456327cSAdam Nemet // Map of pointers to last access encountered.
117071e8c6f2SBjorn Pettersson typedef DenseMap<const Value*, MemAccessInfo> UnderlyingObjToAccessMap;
11710456327cSAdam Nemet UnderlyingObjToAccessMap ObjToLastAccess;
11720456327cSAdam Nemet
11730456327cSAdam Nemet // Set of access to check after all writes have been processed.
1174ff31020eSArthur Eubanks PtrAccessMap DeferredAccesses;
11750456327cSAdam Nemet
11760456327cSAdam Nemet // Iterate over each alias set twice, once to process read/write pointers,
11770456327cSAdam Nemet // and then to process read-only pointers.
11780456327cSAdam Nemet for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
11790456327cSAdam Nemet bool UseDeferred = SetIteration > 0;
1180ff31020eSArthur Eubanks PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses;
11810456327cSAdam Nemet
118271b89b14SSimon Pilgrim for (const auto &AV : AS) {
11830456327cSAdam Nemet Value *Ptr = AV.getValue();
11840456327cSAdam Nemet
11850456327cSAdam Nemet // For a single memory access in AliasSetTracker, Accesses may contain
11860456327cSAdam Nemet // both read and write, and they both need to be handled for CheckDeps.
118771b89b14SSimon Pilgrim for (const auto &AC : S) {
1188ff31020eSArthur Eubanks if (AC.first.getPointer() != Ptr)
11890456327cSAdam Nemet continue;
11900456327cSAdam Nemet
1191ff31020eSArthur Eubanks bool IsWrite = AC.first.getInt();
11920456327cSAdam Nemet
11930456327cSAdam Nemet // If we're using the deferred access set, then it contains only
11940456327cSAdam Nemet // reads.
11950456327cSAdam Nemet bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
11960456327cSAdam Nemet if (UseDeferred && !IsReadOnlyPtr)
11970456327cSAdam Nemet continue;
11980456327cSAdam Nemet // Otherwise, the pointer must be in the PtrAccessSet, either as a
11990456327cSAdam Nemet // read or a write.
12000456327cSAdam Nemet assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
12010456327cSAdam Nemet S.count(MemAccessInfo(Ptr, false))) &&
12020456327cSAdam Nemet "Alias-set pointer not in the access set?");
12030456327cSAdam Nemet
12040456327cSAdam Nemet MemAccessInfo Access(Ptr, IsWrite);
12050456327cSAdam Nemet DepCands.insert(Access);
12060456327cSAdam Nemet
12070456327cSAdam Nemet // Memorize read-only pointers for later processing and skip them in
12080456327cSAdam Nemet // the first round (they need to be checked after we have seen all
12090456327cSAdam Nemet // write pointers). Note: we also mark pointer that are not
12100456327cSAdam Nemet // consecutive as "read-only" pointers (so that we check
12110456327cSAdam Nemet // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
12120456327cSAdam Nemet if (!UseDeferred && IsReadOnlyPtr) {
1213ff31020eSArthur Eubanks // We only use the pointer keys, the types vector values don't
1214ff31020eSArthur Eubanks // matter.
1215ff31020eSArthur Eubanks DeferredAccesses.insert({Access, {}});
12160456327cSAdam Nemet continue;
12170456327cSAdam Nemet }
12180456327cSAdam Nemet
12190456327cSAdam Nemet // If this is a write - check other reads and writes for conflicts. If
12200456327cSAdam Nemet // this is a read only check other writes for conflicts (but only if
12210456327cSAdam Nemet // there is no other write to the ptr - this is an optimization to
12220456327cSAdam Nemet // catch "a[i] = a[i] + " without having to do a dependence check).
12230456327cSAdam Nemet if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
12245448e989SAmjad Aboud CheckDeps.push_back(Access);
12255dc3b2cfSAdam Nemet IsRTCheckAnalysisNeeded = true;
12260456327cSAdam Nemet }
12270456327cSAdam Nemet
12280456327cSAdam Nemet if (IsWrite)
12290456327cSAdam Nemet SetHasWrite = true;
12300456327cSAdam Nemet
12310456327cSAdam Nemet // Create sets of pointers connected by a shared alias set and
12320456327cSAdam Nemet // underlying object.
123371e8c6f2SBjorn Pettersson typedef SmallVector<const Value *, 16> ValueVector;
12340456327cSAdam Nemet ValueVector TempObjects;
1235e2b885c4SAdam Nemet
1236b0eb40caSVitaly Buka getUnderlyingObjects(Ptr, TempObjects, LI);
1237d34e60caSNicola Zaghen LLVM_DEBUG(dbgs()
1238d34e60caSNicola Zaghen << "Underlying objects for pointer " << *Ptr << "\n");
123971e8c6f2SBjorn Pettersson for (const Value *UnderlyingObj : TempObjects) {
1240afd13519SMehdi Amini // nullptr never alias, don't join sets for pointer that have "null"
1241afd13519SMehdi Amini // in their UnderlyingObjects list.
124277eeac3dSManoj Gupta if (isa<ConstantPointerNull>(UnderlyingObj) &&
124377eeac3dSManoj Gupta !NullPointerIsDefined(
124477eeac3dSManoj Gupta TheLoop->getHeader()->getParent(),
124577eeac3dSManoj Gupta UnderlyingObj->getType()->getPointerAddressSpace()))
1246afd13519SMehdi Amini continue;
1247afd13519SMehdi Amini
12480456327cSAdam Nemet UnderlyingObjToAccessMap::iterator Prev =
12490456327cSAdam Nemet ObjToLastAccess.find(UnderlyingObj);
12500456327cSAdam Nemet if (Prev != ObjToLastAccess.end())
12510456327cSAdam Nemet DepCands.unionSets(Access, Prev->second);
12520456327cSAdam Nemet
12530456327cSAdam Nemet ObjToLastAccess[UnderlyingObj] = Access;
1254d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
12550456327cSAdam Nemet }
12560456327cSAdam Nemet }
12570456327cSAdam Nemet }
12580456327cSAdam Nemet }
12590456327cSAdam Nemet }
12600456327cSAdam Nemet }
12610456327cSAdam Nemet
isInBoundsGep(Value * Ptr)12620456327cSAdam Nemet static bool isInBoundsGep(Value *Ptr) {
12630456327cSAdam Nemet if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
12640456327cSAdam Nemet return GEP->isInBounds();
12650456327cSAdam Nemet return false;
12660456327cSAdam Nemet }
12670456327cSAdam Nemet
12685f8f34e4SAdrian Prantl /// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
1269c4866d29SAdam Nemet /// i.e. monotonically increasing/decreasing.
isNoWrapAddRec(Value * Ptr,const SCEVAddRecExpr * AR,PredicatedScalarEvolution & PSE,const Loop * L)1270c4866d29SAdam Nemet static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
1271ea63a7f5SSilviu Baranga PredicatedScalarEvolution &PSE, const Loop *L) {
1272c4866d29SAdam Nemet // FIXME: This should probably only return true for NUW.
1273c4866d29SAdam Nemet if (AR->getNoWrapFlags(SCEV::NoWrapMask))
1274c4866d29SAdam Nemet return true;
1275c4866d29SAdam Nemet
1276c4866d29SAdam Nemet // Scalar evolution does not propagate the non-wrapping flags to values that
1277c4866d29SAdam Nemet // are derived from a non-wrapping induction variable because non-wrapping
1278c4866d29SAdam Nemet // could be flow-sensitive.
1279c4866d29SAdam Nemet //
1280c4866d29SAdam Nemet // Look through the potentially overflowing instruction to try to prove
1281c4866d29SAdam Nemet // non-wrapping for the *specific* value of Ptr.
1282c4866d29SAdam Nemet
1283c4866d29SAdam Nemet // The arithmetic implied by an inbounds GEP can't overflow.
1284c4866d29SAdam Nemet auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
1285c4866d29SAdam Nemet if (!GEP || !GEP->isInBounds())
1286c4866d29SAdam Nemet return false;
1287c4866d29SAdam Nemet
1288c4866d29SAdam Nemet // Make sure there is only one non-const index and analyze that.
1289c4866d29SAdam Nemet Value *NonConstIndex = nullptr;
12906a6e3821SKazu Hirata for (Value *Index : GEP->indices())
12918b401013SDavid Majnemer if (!isa<ConstantInt>(Index)) {
1292c4866d29SAdam Nemet if (NonConstIndex)
1293c4866d29SAdam Nemet return false;
12948b401013SDavid Majnemer NonConstIndex = Index;
1295c4866d29SAdam Nemet }
1296c4866d29SAdam Nemet if (!NonConstIndex)
1297c4866d29SAdam Nemet // The recurrence is on the pointer, ignore for now.
1298c4866d29SAdam Nemet return false;
1299c4866d29SAdam Nemet
1300c4866d29SAdam Nemet // The index in GEP is signed. It is non-wrapping if it's derived from a NSW
1301c4866d29SAdam Nemet // AddRec using a NSW operation.
1302c4866d29SAdam Nemet if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
1303c4866d29SAdam Nemet if (OBO->hasNoSignedWrap() &&
1304c4866d29SAdam Nemet // Assume constant for other the operand so that the AddRec can be
1305c4866d29SAdam Nemet // easily found.
1306c4866d29SAdam Nemet isa<ConstantInt>(OBO->getOperand(1))) {
1307ea63a7f5SSilviu Baranga auto *OpScev = PSE.getSCEV(OBO->getOperand(0));
1308c4866d29SAdam Nemet
1309c4866d29SAdam Nemet if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
1310c4866d29SAdam Nemet return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
1311c4866d29SAdam Nemet }
1312c4866d29SAdam Nemet
1313c4866d29SAdam Nemet return false;
1314c4866d29SAdam Nemet }
1315c4866d29SAdam Nemet
13165f8f34e4SAdrian Prantl /// Check whether the access through \p Ptr has a constant stride.
getPtrStride(PredicatedScalarEvolution & PSE,Type * AccessTy,Value * Ptr,const Loop * Lp,const ValueToValueMap & StridesMap,bool Assume,bool ShouldCheckWrap)131745c46734SNikita Popov int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy,
131845c46734SNikita Popov Value *Ptr, const Loop *Lp,
131945c46734SNikita Popov const ValueToValueMap &StridesMap, bool Assume,
132045c46734SNikita Popov bool ShouldCheckWrap) {
1321e3dcce97SCraig Topper Type *Ty = Ptr->getType();
13220456327cSAdam Nemet assert(Ty->isPointerTy() && "Unexpected non-ptr");
13230456327cSAdam Nemet
1324787b66ebSPeter Waller if (isa<ScalableVectorType>(AccessTy)) {
1325787b66ebSPeter Waller LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
1326787b66ebSPeter Waller << "\n");
1327787b66ebSPeter Waller return 0;
1328787b66ebSPeter Waller }
1329787b66ebSPeter Waller
13309cd9a7e3SSilviu Baranga const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
13310456327cSAdam Nemet
13320456327cSAdam Nemet const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
1333ea63a7f5SSilviu Baranga if (Assume && !AR)
1334d68ed854SSilviu Baranga AR = PSE.getAsAddRec(Ptr);
1335ea63a7f5SSilviu Baranga
13360456327cSAdam Nemet if (!AR) {
1337d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
1338ea63a7f5SSilviu Baranga << " SCEV: " << *PtrScev << "\n");
13390456327cSAdam Nemet return 0;
13400456327cSAdam Nemet }
13410456327cSAdam Nemet
1342c437f310SHiroshi Inoue // The access function must stride over the innermost loop.
13430456327cSAdam Nemet if (Lp != AR->getLoop()) {
1344d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "
1345d34e60caSNicola Zaghen << *Ptr << " SCEV: " << *AR << "\n");
1346a02ce98bSKyle Butt return 0;
13470456327cSAdam Nemet }
13480456327cSAdam Nemet
13490456327cSAdam Nemet // The address calculation must not wrap. Otherwise, a dependence could be
13500456327cSAdam Nemet // inverted.
13510456327cSAdam Nemet // An inbounds getelementptr that is a AddRec with a unit stride
13520456327cSAdam Nemet // cannot wrap per definition. The unit stride requirement is checked later.
13530456327cSAdam Nemet // An getelementptr without an inbounds attribute and unit stride would have
13540456327cSAdam Nemet // to access the pointer value "0" which is undefined behavior in address
13550456327cSAdam Nemet // space 0, therefore we can also vectorize this case.
13560a00d64eSFlorian Hahn unsigned AddrSpace = Ty->getPointerAddressSpace();
13570456327cSAdam Nemet bool IsInBoundsGEP = isInBoundsGep(Ptr);
13585f8cc0c3SElena Demikhovsky bool IsNoWrapAddRec = !ShouldCheckWrap ||
1359ea63a7f5SSilviu Baranga PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
1360ea63a7f5SSilviu Baranga isNoWrapAddRec(Ptr, AR, PSE, Lp);
136177eeac3dSManoj Gupta if (!IsNoWrapAddRec && !IsInBoundsGEP &&
136245c46734SNikita Popov NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace)) {
1363ea63a7f5SSilviu Baranga if (Assume) {
1364ea63a7f5SSilviu Baranga PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
1365ea63a7f5SSilviu Baranga IsNoWrapAddRec = true;
1366d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n"
1367ea63a7f5SSilviu Baranga << "LAA: Pointer: " << *Ptr << "\n"
1368ea63a7f5SSilviu Baranga << "LAA: SCEV: " << *AR << "\n"
1369ea63a7f5SSilviu Baranga << "LAA: Added an overflow assumption\n");
1370ea63a7f5SSilviu Baranga } else {
1371d34e60caSNicola Zaghen LLVM_DEBUG(
1372d34e60caSNicola Zaghen dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
1373ea63a7f5SSilviu Baranga << *Ptr << " SCEV: " << *AR << "\n");
13740456327cSAdam Nemet return 0;
13750456327cSAdam Nemet }
1376ea63a7f5SSilviu Baranga }
13770456327cSAdam Nemet
13780456327cSAdam Nemet // Check the step is constant.
13799cd9a7e3SSilviu Baranga const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
13800456327cSAdam Nemet
1381943befedSAdam Nemet // Calculate the pointer stride and check if it is constant.
13820456327cSAdam Nemet const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
13830456327cSAdam Nemet if (!C) {
1384d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr
1385d34e60caSNicola Zaghen << " SCEV: " << *AR << "\n");
13860456327cSAdam Nemet return 0;
13870456327cSAdam Nemet }
13880456327cSAdam Nemet
1389a28d91d8SMehdi Amini auto &DL = Lp->getHeader()->getModule()->getDataLayout();
1390787b66ebSPeter Waller TypeSize AllocSize = DL.getTypeAllocSize(AccessTy);
1391787b66ebSPeter Waller int64_t Size = AllocSize.getFixedSize();
13920de2feceSSanjoy Das const APInt &APStepVal = C->getAPInt();
13930456327cSAdam Nemet
13940456327cSAdam Nemet // Huge step value - give up.
13950456327cSAdam Nemet if (APStepVal.getBitWidth() > 64)
13960456327cSAdam Nemet return 0;
13970456327cSAdam Nemet
13980456327cSAdam Nemet int64_t StepVal = APStepVal.getSExtValue();
13990456327cSAdam Nemet
14000456327cSAdam Nemet // Strided access.
14010456327cSAdam Nemet int64_t Stride = StepVal / Size;
14020456327cSAdam Nemet int64_t Rem = StepVal % Size;
14030456327cSAdam Nemet if (Rem)
14040456327cSAdam Nemet return 0;
14050456327cSAdam Nemet
14060456327cSAdam Nemet // If the SCEV could wrap but we have an inbounds gep with a unit stride we
14070456327cSAdam Nemet // know we can't "wrap around the address space". In case of address space
14080456327cSAdam Nemet // zero we know that this won't happen without triggering undefined behavior.
140977eeac3dSManoj Gupta if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 &&
141077eeac3dSManoj Gupta (IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(),
141145c46734SNikita Popov AddrSpace))) {
1412ea63a7f5SSilviu Baranga if (Assume) {
1413ea63a7f5SSilviu Baranga // We can avoid this case by adding a run-time check.
1414d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
1415c437f310SHiroshi Inoue << "inbounds or in address space 0 may wrap:\n"
1416ea63a7f5SSilviu Baranga << "LAA: Pointer: " << *Ptr << "\n"
1417ea63a7f5SSilviu Baranga << "LAA: SCEV: " << *AR << "\n"
1418ea63a7f5SSilviu Baranga << "LAA: Added an overflow assumption\n");
1419ea63a7f5SSilviu Baranga PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
1420ea63a7f5SSilviu Baranga } else
14210456327cSAdam Nemet return 0;
1422ea63a7f5SSilviu Baranga }
14230456327cSAdam Nemet
14240456327cSAdam Nemet return Stride;
14250456327cSAdam Nemet }
14260456327cSAdam Nemet
getPointersDiff(Type * ElemTyA,Value * PtrA,Type * ElemTyB,Value * PtrB,const DataLayout & DL,ScalarEvolution & SE,bool StrictCheck,bool CheckType)142700d3f7ccSNikita Popov Optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
142800d3f7ccSNikita Popov Value *PtrB, const DataLayout &DL,
142900d3f7ccSNikita Popov ScalarEvolution &SE, bool StrictCheck,
143000d3f7ccSNikita Popov bool CheckType) {
143199203f20SAlexey Bataev assert(PtrA && PtrB && "Expected non-nullptr pointers.");
143200d3f7ccSNikita Popov assert(cast<PointerType>(PtrA->getType())
143300d3f7ccSNikita Popov ->isOpaqueOrPointeeTypeMatches(ElemTyA) && "Wrong PtrA type");
143400d3f7ccSNikita Popov assert(cast<PointerType>(PtrB->getType())
143500d3f7ccSNikita Popov ->isOpaqueOrPointeeTypeMatches(ElemTyB) && "Wrong PtrB type");
143600d3f7ccSNikita Popov
143799203f20SAlexey Bataev // Make sure that A and B are different pointers.
143899203f20SAlexey Bataev if (PtrA == PtrB)
143999203f20SAlexey Bataev return 0;
144099203f20SAlexey Bataev
144100d3f7ccSNikita Popov // Make sure that the element types are the same if required.
144200d3f7ccSNikita Popov if (CheckType && ElemTyA != ElemTyB)
144399203f20SAlexey Bataev return None;
144499203f20SAlexey Bataev
144599203f20SAlexey Bataev unsigned ASA = PtrA->getType()->getPointerAddressSpace();
144699203f20SAlexey Bataev unsigned ASB = PtrB->getType()->getPointerAddressSpace();
144799203f20SAlexey Bataev
144899203f20SAlexey Bataev // Check that the address spaces match.
144999203f20SAlexey Bataev if (ASA != ASB)
145099203f20SAlexey Bataev return None;
145199203f20SAlexey Bataev unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
145299203f20SAlexey Bataev
145399203f20SAlexey Bataev APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
145499203f20SAlexey Bataev Value *PtrA1 = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
145599203f20SAlexey Bataev Value *PtrB1 = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
145699203f20SAlexey Bataev
145799203f20SAlexey Bataev int Val;
145899203f20SAlexey Bataev if (PtrA1 == PtrB1) {
145999203f20SAlexey Bataev // Retrieve the address space again as pointer stripping now tracks through
146099203f20SAlexey Bataev // `addrspacecast`.
146199203f20SAlexey Bataev ASA = cast<PointerType>(PtrA1->getType())->getAddressSpace();
146299203f20SAlexey Bataev ASB = cast<PointerType>(PtrB1->getType())->getAddressSpace();
146399203f20SAlexey Bataev // Check that the address spaces match and that the pointers are valid.
146499203f20SAlexey Bataev if (ASA != ASB)
146599203f20SAlexey Bataev return None;
146699203f20SAlexey Bataev
146799203f20SAlexey Bataev IdxWidth = DL.getIndexSizeInBits(ASA);
146899203f20SAlexey Bataev OffsetA = OffsetA.sextOrTrunc(IdxWidth);
146999203f20SAlexey Bataev OffsetB = OffsetB.sextOrTrunc(IdxWidth);
147099203f20SAlexey Bataev
147199203f20SAlexey Bataev OffsetB -= OffsetA;
147299203f20SAlexey Bataev Val = OffsetB.getSExtValue();
147399203f20SAlexey Bataev } else {
147499203f20SAlexey Bataev // Otherwise compute the distance with SCEV between the base pointers.
147599203f20SAlexey Bataev const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
147699203f20SAlexey Bataev const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
147799203f20SAlexey Bataev const auto *Diff =
147899203f20SAlexey Bataev dyn_cast<SCEVConstant>(SE.getMinusSCEV(PtrSCEVB, PtrSCEVA));
147999203f20SAlexey Bataev if (!Diff)
148099203f20SAlexey Bataev return None;
148199203f20SAlexey Bataev Val = Diff->getAPInt().getSExtValue();
148299203f20SAlexey Bataev }
148300d3f7ccSNikita Popov int Size = DL.getTypeStoreSize(ElemTyA);
148499203f20SAlexey Bataev int Dist = Val / Size;
148599203f20SAlexey Bataev
148699203f20SAlexey Bataev // Ensure that the calculated distance matches the type-based one after all
148799203f20SAlexey Bataev // the bitcasts removal in the provided pointers.
148899203f20SAlexey Bataev if (!StrictCheck || Dist * Size == Val)
148999203f20SAlexey Bataev return Dist;
149099203f20SAlexey Bataev return None;
149199203f20SAlexey Bataev }
149299203f20SAlexey Bataev
sortPtrAccesses(ArrayRef<Value * > VL,Type * ElemTy,const DataLayout & DL,ScalarEvolution & SE,SmallVectorImpl<unsigned> & SortedIndices)149300d3f7ccSNikita Popov bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy,
149400d3f7ccSNikita Popov const DataLayout &DL, ScalarEvolution &SE,
1495428e9d9dSAlexey Bataev SmallVectorImpl<unsigned> &SortedIndices) {
1496428e9d9dSAlexey Bataev assert(llvm::all_of(
1497428e9d9dSAlexey Bataev VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
1498428e9d9dSAlexey Bataev "Expected list of pointer operands.");
1499428e9d9dSAlexey Bataev // Walk over the pointers, and map each of them to an offset relative to
1500428e9d9dSAlexey Bataev // first pointer in the array.
1501428e9d9dSAlexey Bataev Value *Ptr0 = VL[0];
1502428e9d9dSAlexey Bataev
150399203f20SAlexey Bataev using DistOrdPair = std::pair<int64_t, int>;
1504acf648b5SKazu Hirata auto Compare = llvm::less_first();
150599203f20SAlexey Bataev std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
150699203f20SAlexey Bataev Offsets.emplace(0, 0);
150799203f20SAlexey Bataev int Cnt = 1;
150899203f20SAlexey Bataev bool IsConsecutive = true;
150999203f20SAlexey Bataev for (auto *Ptr : VL.drop_front()) {
151000d3f7ccSNikita Popov Optional<int> Diff = getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE,
151100d3f7ccSNikita Popov /*StrictCheck=*/true);
1512428e9d9dSAlexey Bataev if (!Diff)
1513428e9d9dSAlexey Bataev return false;
1514428e9d9dSAlexey Bataev
1515428e9d9dSAlexey Bataev // Check if the pointer with the same offset is found.
151699203f20SAlexey Bataev int64_t Offset = *Diff;
151799203f20SAlexey Bataev auto Res = Offsets.emplace(Offset, Cnt);
151899203f20SAlexey Bataev if (!Res.second)
1519428e9d9dSAlexey Bataev return false;
152099203f20SAlexey Bataev // Consecutive order if the inserted element is the last one.
152199203f20SAlexey Bataev IsConsecutive = IsConsecutive && std::next(Res.first) == Offsets.end();
152299203f20SAlexey Bataev ++Cnt;
1523428e9d9dSAlexey Bataev }
1524428e9d9dSAlexey Bataev SortedIndices.clear();
152599203f20SAlexey Bataev if (!IsConsecutive) {
152699203f20SAlexey Bataev // Fill SortedIndices array only if it is non-consecutive.
1527428e9d9dSAlexey Bataev SortedIndices.resize(VL.size());
152899203f20SAlexey Bataev Cnt = 0;
152999203f20SAlexey Bataev for (const std::pair<int64_t, int> &Pair : Offsets) {
153099203f20SAlexey Bataev SortedIndices[Cnt] = Pair.second;
153199203f20SAlexey Bataev ++Cnt;
1532f1c00a22SHaicheng Wu }
153399203f20SAlexey Bataev }
153499203f20SAlexey Bataev return true;
1535f1b47ad2SAlexey Bataev }
1536f1b47ad2SAlexey Bataev
1537f1c00a22SHaicheng Wu /// Returns true if the memory operations \p A and \p B are consecutive.
isConsecutiveAccess(Value * A,Value * B,const DataLayout & DL,ScalarEvolution & SE,bool CheckType)1538f1c00a22SHaicheng Wu bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
1539f1c00a22SHaicheng Wu ScalarEvolution &SE, bool CheckType) {
1540038ede2aSRenato Golin Value *PtrA = getLoadStorePointerOperand(A);
1541038ede2aSRenato Golin Value *PtrB = getLoadStorePointerOperand(B);
154299203f20SAlexey Bataev if (!PtrA || !PtrB)
1543f1c00a22SHaicheng Wu return false;
154400d3f7ccSNikita Popov Type *ElemTyA = getLoadStoreType(A);
154500d3f7ccSNikita Popov Type *ElemTyB = getLoadStoreType(B);
154600d3f7ccSNikita Popov Optional<int> Diff = getPointersDiff(ElemTyA, PtrA, ElemTyB, PtrB, DL, SE,
154700d3f7ccSNikita Popov /*StrictCheck=*/true, CheckType);
154899203f20SAlexey Bataev return Diff && *Diff == 1;
1549f1c00a22SHaicheng Wu }
1550f1c00a22SHaicheng Wu
addAccess(StoreInst * SI)1551e248d690SFlorian Hahn void MemoryDepChecker::addAccess(StoreInst *SI) {
1552e248d690SFlorian Hahn visitPointers(SI->getPointerOperand(), *InnermostLoop,
1553e248d690SFlorian Hahn [this, SI](Value *Ptr) {
1554e248d690SFlorian Hahn Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
1555e248d690SFlorian Hahn InstMap.push_back(SI);
1556e248d690SFlorian Hahn ++AccessIdx;
1557e248d690SFlorian Hahn });
1558e248d690SFlorian Hahn }
1559e248d690SFlorian Hahn
addAccess(LoadInst * LI)1560e248d690SFlorian Hahn void MemoryDepChecker::addAccess(LoadInst *LI) {
1561e248d690SFlorian Hahn visitPointers(LI->getPointerOperand(), *InnermostLoop,
1562e248d690SFlorian Hahn [this, LI](Value *Ptr) {
1563e248d690SFlorian Hahn Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
1564e248d690SFlorian Hahn InstMap.push_back(LI);
1565e248d690SFlorian Hahn ++AccessIdx;
1566e248d690SFlorian Hahn });
1567e248d690SFlorian Hahn }
1568e248d690SFlorian Hahn
1569485f2826SFlorian Hahn MemoryDepChecker::VectorizationSafetyStatus
isSafeForVectorization(DepType Type)1570485f2826SFlorian Hahn MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
15719c926579SAdam Nemet switch (Type) {
15729c926579SAdam Nemet case NoDep:
15739c926579SAdam Nemet case Forward:
15749c926579SAdam Nemet case BackwardVectorizable:
1575485f2826SFlorian Hahn return VectorizationSafetyStatus::Safe;
15769c926579SAdam Nemet
15779c926579SAdam Nemet case Unknown:
1578ef307b8cSFlorian Hahn return VectorizationSafetyStatus::PossiblySafeWithRtChecks;
15799c926579SAdam Nemet case ForwardButPreventsForwarding:
15809c926579SAdam Nemet case Backward:
15819c926579SAdam Nemet case BackwardVectorizableButPreventsForwarding:
1582485f2826SFlorian Hahn return VectorizationSafetyStatus::Unsafe;
15839c926579SAdam Nemet }
1584d388e930SDavid Majnemer llvm_unreachable("unexpected DepType!");
15859c926579SAdam Nemet }
15869c926579SAdam Nemet
isBackward() const1587397f5829SAdam Nemet bool MemoryDepChecker::Dependence::isBackward() const {
15889c926579SAdam Nemet switch (Type) {
15899c926579SAdam Nemet case NoDep:
15909c926579SAdam Nemet case Forward:
15919c926579SAdam Nemet case ForwardButPreventsForwarding:
1592397f5829SAdam Nemet case Unknown:
15939c926579SAdam Nemet return false;
15949c926579SAdam Nemet
15959c926579SAdam Nemet case BackwardVectorizable:
15969c926579SAdam Nemet case Backward:
15979c926579SAdam Nemet case BackwardVectorizableButPreventsForwarding:
15989c926579SAdam Nemet return true;
15999c926579SAdam Nemet }
1600d388e930SDavid Majnemer llvm_unreachable("unexpected DepType!");
16019c926579SAdam Nemet }
16029c926579SAdam Nemet
isPossiblyBackward() const1603397f5829SAdam Nemet bool MemoryDepChecker::Dependence::isPossiblyBackward() const {
1604397f5829SAdam Nemet return isBackward() || Type == Unknown;
1605397f5829SAdam Nemet }
1606397f5829SAdam Nemet
isForward() const1607397f5829SAdam Nemet bool MemoryDepChecker::Dependence::isForward() const {
1608397f5829SAdam Nemet switch (Type) {
1609397f5829SAdam Nemet case Forward:
1610397f5829SAdam Nemet case ForwardButPreventsForwarding:
1611397f5829SAdam Nemet return true;
1612397f5829SAdam Nemet
1613397f5829SAdam Nemet case NoDep:
1614397f5829SAdam Nemet case Unknown:
1615397f5829SAdam Nemet case BackwardVectorizable:
1616397f5829SAdam Nemet case Backward:
1617397f5829SAdam Nemet case BackwardVectorizableButPreventsForwarding:
1618397f5829SAdam Nemet return false;
1619397f5829SAdam Nemet }
1620397f5829SAdam Nemet llvm_unreachable("unexpected DepType!");
1621397f5829SAdam Nemet }
1622397f5829SAdam Nemet
couldPreventStoreLoadForward(uint64_t Distance,uint64_t TypeByteSize)16237afb46d3SDavid Majnemer bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
16247afb46d3SDavid Majnemer uint64_t TypeByteSize) {
16250456327cSAdam Nemet // If loads occur at a distance that is not a multiple of a feasible vector
16260456327cSAdam Nemet // factor store-load forwarding does not take place.
16270456327cSAdam Nemet // Positive dependences might cause troubles because vectorizing them might
16280456327cSAdam Nemet // prevent store-load forwarding making vectorized code run a lot slower.
16290456327cSAdam Nemet // a[i] = a[i-3] ^ a[i-8];
16300456327cSAdam Nemet // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
16310456327cSAdam Nemet // hence on your typical architecture store-load forwarding does not take
16320456327cSAdam Nemet // place. Vectorizing in such cases does not make sense.
16330456327cSAdam Nemet // Store-load forwarding distance.
1634884d313bSAdam Nemet
1635884d313bSAdam Nemet // After this many iterations store-to-load forwarding conflicts should not
1636884d313bSAdam Nemet // cause any slowdowns.
16377afb46d3SDavid Majnemer const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
16380456327cSAdam Nemet // Maximum vector factor.
16397afb46d3SDavid Majnemer uint64_t MaxVFWithoutSLForwardIssues = std::min(
16402c34ab51SAdam Nemet VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);
16410456327cSAdam Nemet
1642884d313bSAdam Nemet // Compute the smallest VF at which the store and load would be misaligned.
16437afb46d3SDavid Majnemer for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
16449b5852aeSAdam Nemet VF *= 2) {
1645884d313bSAdam Nemet // If the number of vector iteration between the store and the load are
1646884d313bSAdam Nemet // small we could incur conflicts.
1647884d313bSAdam Nemet if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1648fa6d8977SSimon Pilgrim MaxVFWithoutSLForwardIssues = (VF >> 1);
16490456327cSAdam Nemet break;
16500456327cSAdam Nemet }
16510456327cSAdam Nemet }
16520456327cSAdam Nemet
16530456327cSAdam Nemet if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
1654d34e60caSNicola Zaghen LLVM_DEBUG(
1655d34e60caSNicola Zaghen dbgs() << "LAA: Distance " << Distance
16569b5852aeSAdam Nemet << " that could cause a store-load forwarding conflict\n");
16570456327cSAdam Nemet return true;
16580456327cSAdam Nemet }
16590456327cSAdam Nemet
16600456327cSAdam Nemet if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
1661f219c647SAdam Nemet MaxVFWithoutSLForwardIssues !=
1662f219c647SAdam Nemet VectorizerParams::MaxVectorWidth * TypeByteSize)
16630456327cSAdam Nemet MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
16640456327cSAdam Nemet return false;
16650456327cSAdam Nemet }
16660456327cSAdam Nemet
mergeInStatus(VectorizationSafetyStatus S)1667485f2826SFlorian Hahn void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
1668485f2826SFlorian Hahn if (Status < S)
1669485f2826SFlorian Hahn Status = S;
1670485f2826SFlorian Hahn }
1671485f2826SFlorian Hahn
1672eac89d73SDorit Nuzman /// Given a non-constant (unknown) dependence-distance \p Dist between two
1673eac89d73SDorit Nuzman /// memory accesses, that have the same stride whose absolute value is given
1674eac89d73SDorit Nuzman /// in \p Stride, and that have the same type size \p TypeByteSize,
1675eac89d73SDorit Nuzman /// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
1676eac89d73SDorit Nuzman /// possible to prove statically that the dependence distance is larger
1677eac89d73SDorit Nuzman /// than the range that the accesses will travel through the execution of
1678eac89d73SDorit Nuzman /// the loop. If so, return true; false otherwise. This is useful for
1679eac89d73SDorit Nuzman /// example in loops such as the following (PR31098):
1680eac89d73SDorit Nuzman /// for (i = 0; i < D; ++i) {
1681eac89d73SDorit Nuzman /// = out[i];
1682eac89d73SDorit Nuzman /// out[i+D] =
1683eac89d73SDorit Nuzman /// }
isSafeDependenceDistance(const DataLayout & DL,ScalarEvolution & SE,const SCEV & BackedgeTakenCount,const SCEV & Dist,uint64_t Stride,uint64_t TypeByteSize)1684eac89d73SDorit Nuzman static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
1685eac89d73SDorit Nuzman const SCEV &BackedgeTakenCount,
1686eac89d73SDorit Nuzman const SCEV &Dist, uint64_t Stride,
1687eac89d73SDorit Nuzman uint64_t TypeByteSize) {
1688eac89d73SDorit Nuzman
1689eac89d73SDorit Nuzman // If we can prove that
1690eac89d73SDorit Nuzman // (**) |Dist| > BackedgeTakenCount * Step
1691eac89d73SDorit Nuzman // where Step is the absolute stride of the memory accesses in bytes,
1692eac89d73SDorit Nuzman // then there is no dependence.
1693eac89d73SDorit Nuzman //
1694c437f310SHiroshi Inoue // Rationale:
1695eac89d73SDorit Nuzman // We basically want to check if the absolute distance (|Dist/Step|)
1696eac89d73SDorit Nuzman // is >= the loop iteration count (or > BackedgeTakenCount).
1697eac89d73SDorit Nuzman // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
1698eac89d73SDorit Nuzman // Section 4.2.1); Note, that for vectorization it is sufficient to prove
1699eac89d73SDorit Nuzman // that the dependence distance is >= VF; This is checked elsewhere.
1700eac89d73SDorit Nuzman // But in some cases we can prune unknown dependence distances early, and
1701eac89d73SDorit Nuzman // even before selecting the VF, and without a runtime test, by comparing
1702eac89d73SDorit Nuzman // the distance against the loop iteration count. Since the vectorized code
1703eac89d73SDorit Nuzman // will be executed only if LoopCount >= VF, proving distance >= LoopCount
1704eac89d73SDorit Nuzman // also guarantees that distance >= VF.
1705eac89d73SDorit Nuzman //
1706eac89d73SDorit Nuzman const uint64_t ByteStride = Stride * TypeByteSize;
1707eac89d73SDorit Nuzman const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride);
1708eac89d73SDorit Nuzman const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step);
1709eac89d73SDorit Nuzman
1710eac89d73SDorit Nuzman const SCEV *CastedDist = &Dist;
1711eac89d73SDorit Nuzman const SCEV *CastedProduct = Product;
17127ee30a0eSChang-Sun Lin Jr uint64_t DistTypeSizeBits = DL.getTypeSizeInBits(Dist.getType());
17137ee30a0eSChang-Sun Lin Jr uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(Product->getType());
1714eac89d73SDorit Nuzman
1715eac89d73SDorit Nuzman // The dependence distance can be positive/negative, so we sign extend Dist;
1716eac89d73SDorit Nuzman // The multiplication of the absolute stride in bytes and the
1717c437f310SHiroshi Inoue // backedgeTakenCount is non-negative, so we zero extend Product.
17187ee30a0eSChang-Sun Lin Jr if (DistTypeSizeBits > ProductTypeSizeBits)
1719eac89d73SDorit Nuzman CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
1720eac89d73SDorit Nuzman else
1721eac89d73SDorit Nuzman CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
1722eac89d73SDorit Nuzman
1723eac89d73SDorit Nuzman // Is Dist - (BackedgeTakenCount * Step) > 0 ?
1724eac89d73SDorit Nuzman // (If so, then we have proven (**) because |Dist| >= Dist)
1725eac89d73SDorit Nuzman const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
1726eac89d73SDorit Nuzman if (SE.isKnownPositive(Minus))
1727eac89d73SDorit Nuzman return true;
1728eac89d73SDorit Nuzman
1729eac89d73SDorit Nuzman // Second try: Is -Dist - (BackedgeTakenCount * Step) > 0 ?
1730eac89d73SDorit Nuzman // (If so, then we have proven (**) because |Dist| >= -1*Dist)
1731eac89d73SDorit Nuzman const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
1732eac89d73SDorit Nuzman Minus = SE.getMinusSCEV(NegDist, CastedProduct);
1733eac89d73SDorit Nuzman if (SE.isKnownPositive(Minus))
1734eac89d73SDorit Nuzman return true;
1735eac89d73SDorit Nuzman
1736eac89d73SDorit Nuzman return false;
1737eac89d73SDorit Nuzman }
1738eac89d73SDorit Nuzman
17395f8f34e4SAdrian Prantl /// Check the dependence for two accesses with the same stride \p Stride.
1740751004a6SHao Liu /// \p Distance is the positive distance and \p TypeByteSize is type size in
1741751004a6SHao Liu /// bytes.
1742751004a6SHao Liu ///
1743751004a6SHao Liu /// \returns true if they are independent.
areStridedAccessesIndependent(uint64_t Distance,uint64_t Stride,uint64_t TypeByteSize)17447afb46d3SDavid Majnemer static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
17457afb46d3SDavid Majnemer uint64_t TypeByteSize) {
1746751004a6SHao Liu assert(Stride > 1 && "The stride must be greater than 1");
1747751004a6SHao Liu assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
1748751004a6SHao Liu assert(Distance > 0 && "The distance must be non-zero");
1749751004a6SHao Liu
1750751004a6SHao Liu // Skip if the distance is not multiple of type byte size.
1751751004a6SHao Liu if (Distance % TypeByteSize)
1752751004a6SHao Liu return false;
1753751004a6SHao Liu
17547afb46d3SDavid Majnemer uint64_t ScaledDist = Distance / TypeByteSize;
1755751004a6SHao Liu
1756751004a6SHao Liu // No dependence if the scaled distance is not multiple of the stride.
1757751004a6SHao Liu // E.g.
1758751004a6SHao Liu // for (i = 0; i < 1024 ; i += 4)
1759751004a6SHao Liu // A[i+2] = A[i] + 1;
1760751004a6SHao Liu //
1761751004a6SHao Liu // Two accesses in memory (scaled distance is 2, stride is 4):
1762751004a6SHao Liu // | A[0] | | | | A[4] | | | |
1763751004a6SHao Liu // | | | A[2] | | | | A[6] | |
1764751004a6SHao Liu //
1765751004a6SHao Liu // E.g.
1766751004a6SHao Liu // for (i = 0; i < 1024 ; i += 3)
1767751004a6SHao Liu // A[i+4] = A[i] + 1;
1768751004a6SHao Liu //
1769751004a6SHao Liu // Two accesses in memory (scaled distance is 4, stride is 3):
1770751004a6SHao Liu // | A[0] | | | A[3] | | | A[6] | | |
1771751004a6SHao Liu // | | | | | A[4] | | | A[7] | |
1772751004a6SHao Liu return ScaledDist % Stride;
1773751004a6SHao Liu }
1774751004a6SHao Liu
17759c926579SAdam Nemet MemoryDepChecker::Dependence::DepType
isDependent(const MemAccessInfo & A,unsigned AIdx,const MemAccessInfo & B,unsigned BIdx,const ValueToValueMap & Strides)17769c926579SAdam Nemet MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
17770456327cSAdam Nemet const MemAccessInfo &B, unsigned BIdx,
17788bc61df9SAdam Nemet const ValueToValueMap &Strides) {
17790456327cSAdam Nemet assert (AIdx < BIdx && "Must pass arguments in program order");
17800456327cSAdam Nemet
17810456327cSAdam Nemet Value *APtr = A.getPointer();
17820456327cSAdam Nemet Value *BPtr = B.getPointer();
17830456327cSAdam Nemet bool AIsWrite = A.getInt();
17840456327cSAdam Nemet bool BIsWrite = B.getInt();
1785ff31020eSArthur Eubanks Type *ATy = getLoadStoreType(InstMap[AIdx]);
1786ff31020eSArthur Eubanks Type *BTy = getLoadStoreType(InstMap[BIdx]);
17870456327cSAdam Nemet
17880456327cSAdam Nemet // Two reads are independent.
17890456327cSAdam Nemet if (!AIsWrite && !BIsWrite)
17909c926579SAdam Nemet return Dependence::NoDep;
17910456327cSAdam Nemet
17920456327cSAdam Nemet // We cannot check pointers in different address spaces.
17930456327cSAdam Nemet if (APtr->getType()->getPointerAddressSpace() !=
17940456327cSAdam Nemet BPtr->getType()->getPointerAddressSpace())
17959c926579SAdam Nemet return Dependence::Unknown;
17960456327cSAdam Nemet
179745c46734SNikita Popov int64_t StrideAPtr =
179845c46734SNikita Popov getPtrStride(PSE, ATy, APtr, InnermostLoop, Strides, true);
179945c46734SNikita Popov int64_t StrideBPtr =
180045c46734SNikita Popov getPtrStride(PSE, BTy, BPtr, InnermostLoop, Strides, true);
18010456327cSAdam Nemet
1802adf4b739SSilviu Baranga const SCEV *Src = PSE.getSCEV(APtr);
1803adf4b739SSilviu Baranga const SCEV *Sink = PSE.getSCEV(BPtr);
18040456327cSAdam Nemet
18050456327cSAdam Nemet // If the induction step is negative we have to invert source and sink of the
18060456327cSAdam Nemet // dependence.
18070456327cSAdam Nemet if (StrideAPtr < 0) {
18080456327cSAdam Nemet std::swap(APtr, BPtr);
180945c46734SNikita Popov std::swap(ATy, BTy);
18100456327cSAdam Nemet std::swap(Src, Sink);
18110456327cSAdam Nemet std::swap(AIsWrite, BIsWrite);
18120456327cSAdam Nemet std::swap(AIdx, BIdx);
18130456327cSAdam Nemet std::swap(StrideAPtr, StrideBPtr);
18140456327cSAdam Nemet }
18150456327cSAdam Nemet
18169cd9a7e3SSilviu Baranga const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
18170456327cSAdam Nemet
1818d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
18190456327cSAdam Nemet << "(Induction step: " << StrideAPtr << ")\n");
1820d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
18210456327cSAdam Nemet << *InstMap[BIdx] << ": " << *Dist << "\n");
18220456327cSAdam Nemet
1823943befedSAdam Nemet // Need accesses with constant stride. We don't want to vectorize
18240456327cSAdam Nemet // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
18250456327cSAdam Nemet // the address space.
18260456327cSAdam Nemet if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
1827d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
18289c926579SAdam Nemet return Dependence::Unknown;
18290456327cSAdam Nemet }
18300456327cSAdam Nemet
1831eac89d73SDorit Nuzman auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
1832eac89d73SDorit Nuzman uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
183377b2bb55SJolanta Jensen bool HasSameSize =
183477b2bb55SJolanta Jensen DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
1835eac89d73SDorit Nuzman uint64_t Stride = std::abs(StrideAPtr);
18360456327cSAdam Nemet const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
18370456327cSAdam Nemet if (!C) {
183877b2bb55SJolanta Jensen if (!isa<SCEVCouldNotCompute>(Dist) && HasSameSize &&
1839eac89d73SDorit Nuzman isSafeDependenceDistance(DL, *(PSE.getSE()),
1840eac89d73SDorit Nuzman *(PSE.getBackedgeTakenCount()), *Dist, Stride,
1841eac89d73SDorit Nuzman TypeByteSize))
1842eac89d73SDorit Nuzman return Dependence::NoDep;
1843eac89d73SDorit Nuzman
1844d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
1845ef307b8cSFlorian Hahn FoundNonConstantDistanceDependence = true;
18469c926579SAdam Nemet return Dependence::Unknown;
18470456327cSAdam Nemet }
18480456327cSAdam Nemet
18490de2feceSSanjoy Das const APInt &Val = C->getAPInt();
18506feebe98SMatthew Simpson int64_t Distance = Val.getSExtValue();
18516feebe98SMatthew Simpson
18526feebe98SMatthew Simpson // Attempt to prove strided accesses independent.
185377b2bb55SJolanta Jensen if (std::abs(Distance) > 0 && Stride > 1 && HasSameSize &&
18546feebe98SMatthew Simpson areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
1855d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
18566feebe98SMatthew Simpson return Dependence::NoDep;
18576feebe98SMatthew Simpson }
18586feebe98SMatthew Simpson
18596feebe98SMatthew Simpson // Negative distances are not plausible dependencies.
18600456327cSAdam Nemet if (Val.isNegative()) {
18610456327cSAdam Nemet bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
186237ec5f91SMatthew Simpson if (IsTrueDataDependence && EnableForwardingConflictDetection &&
18630456327cSAdam Nemet (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
186477b2bb55SJolanta Jensen !HasSameSize)) {
1865d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
18669c926579SAdam Nemet return Dependence::ForwardButPreventsForwarding;
1867b8486e5aSAdam Nemet }
18680456327cSAdam Nemet
1869d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
18709c926579SAdam Nemet return Dependence::Forward;
18710456327cSAdam Nemet }
18720456327cSAdam Nemet
18730456327cSAdam Nemet // Write to the same location with the same size.
18740456327cSAdam Nemet if (Val == 0) {
187577b2bb55SJolanta Jensen if (HasSameSize)
1876d7037c56SAdam Nemet return Dependence::Forward;
1877d34e60caSNicola Zaghen LLVM_DEBUG(
187877b2bb55SJolanta Jensen dbgs() << "LAA: Zero dependence difference but different type sizes\n");
18799c926579SAdam Nemet return Dependence::Unknown;
18800456327cSAdam Nemet }
18810456327cSAdam Nemet
18820456327cSAdam Nemet assert(Val.isStrictlyPositive() && "Expect a positive value");
18830456327cSAdam Nemet
188477b2bb55SJolanta Jensen if (!HasSameSize) {
188577b2bb55SJolanta Jensen LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
188677b2bb55SJolanta Jensen "different type sizes\n");
18879c926579SAdam Nemet return Dependence::Unknown;
18880456327cSAdam Nemet }
18890456327cSAdam Nemet
18900456327cSAdam Nemet // Bail out early if passed-in parameters make vectorization not feasible.
1891f219c647SAdam Nemet unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
1892f219c647SAdam Nemet VectorizerParams::VectorizationFactor : 1);
1893f219c647SAdam Nemet unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
1894f219c647SAdam Nemet VectorizerParams::VectorizationInterleave : 1);
1895751004a6SHao Liu // The minimum number of iterations for a vectorized/unrolled version.
1896751004a6SHao Liu unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
18970456327cSAdam Nemet
1898751004a6SHao Liu // It's not vectorizable if the distance is smaller than the minimum distance
1899751004a6SHao Liu // needed for a vectroized/unrolled version. Vectorizing one iteration in
1900751004a6SHao Liu // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
1901751004a6SHao Liu // TypeByteSize (No need to plus the last gap distance).
1902751004a6SHao Liu //
1903751004a6SHao Liu // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1904751004a6SHao Liu // foo(int *A) {
1905751004a6SHao Liu // int *B = (int *)((char *)A + 14);
1906751004a6SHao Liu // for (i = 0 ; i < 1024 ; i += 2)
1907751004a6SHao Liu // B[i] = A[i] + 1;
1908751004a6SHao Liu // }
1909751004a6SHao Liu //
1910751004a6SHao Liu // Two accesses in memory (stride is 2):
1911751004a6SHao Liu // | A[0] | | A[2] | | A[4] | | A[6] | |
1912751004a6SHao Liu // | B[0] | | B[2] | | B[4] |
1913751004a6SHao Liu //
1914751004a6SHao Liu // Distance needs for vectorizing iterations except the last iteration:
1915751004a6SHao Liu // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
1916751004a6SHao Liu // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
1917751004a6SHao Liu //
1918751004a6SHao Liu // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
1919751004a6SHao Liu // 12, which is less than distance.
1920751004a6SHao Liu //
1921751004a6SHao Liu // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
1922751004a6SHao Liu // the minimum distance needed is 28, which is greater than distance. It is
1923751004a6SHao Liu // not safe to do vectorization.
19247afb46d3SDavid Majnemer uint64_t MinDistanceNeeded =
1925751004a6SHao Liu TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
19267afb46d3SDavid Majnemer if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
1927d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "
1928d34e60caSNicola Zaghen << Distance << '\n');
1929751004a6SHao Liu return Dependence::Backward;
1930751004a6SHao Liu }
1931751004a6SHao Liu
1932751004a6SHao Liu // Unsafe if the minimum distance needed is greater than max safe distance.
1933751004a6SHao Liu if (MinDistanceNeeded > MaxSafeDepDistBytes) {
1934d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
1935751004a6SHao Liu << MinDistanceNeeded << " size in bytes");
19369c926579SAdam Nemet return Dependence::Backward;
19370456327cSAdam Nemet }
19380456327cSAdam Nemet
19399cc0c399SAdam Nemet // Positive distance bigger than max vectorization factor.
1940751004a6SHao Liu // FIXME: Should use max factor instead of max distance in bytes, which could
1941751004a6SHao Liu // not handle different types.
1942751004a6SHao Liu // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1943751004a6SHao Liu // void foo (int *A, char *B) {
1944751004a6SHao Liu // for (unsigned i = 0; i < 1024; i++) {
1945751004a6SHao Liu // A[i+2] = A[i] + 1;
1946751004a6SHao Liu // B[i+2] = B[i] + 1;
1947751004a6SHao Liu // }
1948751004a6SHao Liu // }
1949751004a6SHao Liu //
1950751004a6SHao Liu // This case is currently unsafe according to the max safe distance. If we
1951751004a6SHao Liu // analyze the two accesses on array B, the max safe dependence distance
1952751004a6SHao Liu // is 2. Then we analyze the accesses on array A, the minimum distance needed
1953751004a6SHao Liu // is 8, which is less than 2 and forbidden vectorization, But actually
1954751004a6SHao Liu // both A and B could be vectorized by 2 iterations.
1955751004a6SHao Liu MaxSafeDepDistBytes =
19567afb46d3SDavid Majnemer std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
19570456327cSAdam Nemet
19580456327cSAdam Nemet bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
195937ec5f91SMatthew Simpson if (IsTrueDataDependence && EnableForwardingConflictDetection &&
19600456327cSAdam Nemet couldPreventStoreLoadForward(Distance, TypeByteSize))
19619c926579SAdam Nemet return Dependence::BackwardVectorizableButPreventsForwarding;
19620456327cSAdam Nemet
1963682cfc1dSAlon Kom uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride);
1964d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
1965682cfc1dSAlon Kom << " with max VF = " << MaxVF << '\n');
1966682cfc1dSAlon Kom uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
19671ba4b82fSCullen Rhodes MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits);
19689c926579SAdam Nemet return Dependence::BackwardVectorizable;
19690456327cSAdam Nemet }
19700456327cSAdam Nemet
areDepsSafe(DepCandidates & AccessSets,MemAccessInfoList & CheckDeps,const ValueToValueMap & Strides)1971dee666bcSAdam Nemet bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
19725448e989SAmjad Aboud MemAccessInfoList &CheckDeps,
19738bc61df9SAdam Nemet const ValueToValueMap &Strides) {
19740456327cSAdam Nemet
19757afb46d3SDavid Majnemer MaxSafeDepDistBytes = -1;
19765448e989SAmjad Aboud SmallPtrSet<MemAccessInfo, 8> Visited;
19775448e989SAmjad Aboud for (MemAccessInfo CurAccess : CheckDeps) {
19785448e989SAmjad Aboud if (Visited.count(CurAccess))
19795448e989SAmjad Aboud continue;
19800456327cSAdam Nemet
19810456327cSAdam Nemet // Get the relevant memory access set.
19820456327cSAdam Nemet EquivalenceClasses<MemAccessInfo>::iterator I =
19830456327cSAdam Nemet AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
19840456327cSAdam Nemet
19850456327cSAdam Nemet // Check accesses within this set.
19867a083814SRichard Trieu EquivalenceClasses<MemAccessInfo>::member_iterator AI =
19877a083814SRichard Trieu AccessSets.member_begin(I);
19887a083814SRichard Trieu EquivalenceClasses<MemAccessInfo>::member_iterator AE =
19897a083814SRichard Trieu AccessSets.member_end();
19900456327cSAdam Nemet
19910456327cSAdam Nemet // Check every access pair.
19920456327cSAdam Nemet while (AI != AE) {
19935448e989SAmjad Aboud Visited.insert(*AI);
199409fac245SHideki Saito bool AIIsWrite = AI->getInt();
199509fac245SHideki Saito // Check loads only against next equivalent class, but stores also against
199609fac245SHideki Saito // other stores in the same equivalence class - to the same address.
199709fac245SHideki Saito EquivalenceClasses<MemAccessInfo>::member_iterator OI =
199809fac245SHideki Saito (AIIsWrite ? AI : std::next(AI));
19990456327cSAdam Nemet while (OI != AE) {
20000456327cSAdam Nemet // Check every accessing instruction pair in program order.
20010456327cSAdam Nemet for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
20020456327cSAdam Nemet I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
200309fac245SHideki Saito // Scan all accesses of another equivalence class, but only the next
200409fac245SHideki Saito // accesses of the same equivalent class.
200509fac245SHideki Saito for (std::vector<unsigned>::iterator
200609fac245SHideki Saito I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()),
200709fac245SHideki Saito I2E = (OI == AI ? I1E : Accesses[*OI].end());
200809fac245SHideki Saito I2 != I2E; ++I2) {
20099c926579SAdam Nemet auto A = std::make_pair(&*AI, *I1);
20109c926579SAdam Nemet auto B = std::make_pair(&*OI, *I2);
20119c926579SAdam Nemet
20129c926579SAdam Nemet assert(*I1 != *I2);
20139c926579SAdam Nemet if (*I1 > *I2)
20149c926579SAdam Nemet std::swap(A, B);
20159c926579SAdam Nemet
20169c926579SAdam Nemet Dependence::DepType Type =
20179c926579SAdam Nemet isDependent(*A.first, A.second, *B.first, B.second, Strides);
2018485f2826SFlorian Hahn mergeInStatus(Dependence::isSafeForVectorization(Type));
20199c926579SAdam Nemet
2020a2df750fSAdam Nemet // Gather dependences unless we accumulated MaxDependences
20219c926579SAdam Nemet // dependences. In that case return as soon as we find the first
20229c926579SAdam Nemet // unsafe dependence. This puts a limit on this quadratic
20239c926579SAdam Nemet // algorithm.
2024a2df750fSAdam Nemet if (RecordDependences) {
2025a2df750fSAdam Nemet if (Type != Dependence::NoDep)
2026a2df750fSAdam Nemet Dependences.push_back(Dependence(A.second, B.second, Type));
20279c926579SAdam Nemet
2028a2df750fSAdam Nemet if (Dependences.size() >= MaxDependences) {
2029a2df750fSAdam Nemet RecordDependences = false;
2030a2df750fSAdam Nemet Dependences.clear();
2031d34e60caSNicola Zaghen LLVM_DEBUG(dbgs()
2032d34e60caSNicola Zaghen << "Too many dependences, stopped recording\n");
20339c926579SAdam Nemet }
20349c926579SAdam Nemet }
2035485f2826SFlorian Hahn if (!RecordDependences && !isSafeForVectorization())
20360456327cSAdam Nemet return false;
20370456327cSAdam Nemet }
20380456327cSAdam Nemet ++OI;
20390456327cSAdam Nemet }
20400456327cSAdam Nemet AI++;
20410456327cSAdam Nemet }
20420456327cSAdam Nemet }
20439c926579SAdam Nemet
2044d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
2045485f2826SFlorian Hahn return isSafeForVectorization();
20460456327cSAdam Nemet }
20470456327cSAdam Nemet
2048ec1e2bb6SAdam Nemet SmallVector<Instruction *, 4>
getInstructionsForAccess(Value * Ptr,bool isWrite) const2049ec1e2bb6SAdam Nemet MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const {
2050ec1e2bb6SAdam Nemet MemAccessInfo Access(Ptr, isWrite);
2051ec1e2bb6SAdam Nemet auto &IndexVector = Accesses.find(Access)->second;
2052ec1e2bb6SAdam Nemet
2053ec1e2bb6SAdam Nemet SmallVector<Instruction *, 4> Insts;
20542d006e76SDavid Majnemer transform(IndexVector,
2055ec1e2bb6SAdam Nemet std::back_inserter(Insts),
2056ec1e2bb6SAdam Nemet [&](unsigned Idx) { return this->InstMap[Idx]; });
2057ec1e2bb6SAdam Nemet return Insts;
2058ec1e2bb6SAdam Nemet }
2059ec1e2bb6SAdam Nemet
206058913d65SAdam Nemet const char *MemoryDepChecker::Dependence::DepName[] = {
206158913d65SAdam Nemet "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
206258913d65SAdam Nemet "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
206358913d65SAdam Nemet
print(raw_ostream & OS,unsigned Depth,const SmallVectorImpl<Instruction * > & Instrs) const206458913d65SAdam Nemet void MemoryDepChecker::Dependence::print(
206558913d65SAdam Nemet raw_ostream &OS, unsigned Depth,
206658913d65SAdam Nemet const SmallVectorImpl<Instruction *> &Instrs) const {
206758913d65SAdam Nemet OS.indent(Depth) << DepName[Type] << ":\n";
206858913d65SAdam Nemet OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
206958913d65SAdam Nemet OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
207058913d65SAdam Nemet }
207158913d65SAdam Nemet
canAnalyzeLoop()2072929c38e8SAdam Nemet bool LoopAccessInfo::canAnalyzeLoop() {
20738dcb3b6aSAdam Nemet // We need to have a loop header.
2074d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Found a loop in "
2075d8968f09SAdam Nemet << TheLoop->getHeader()->getParent()->getName() << ": "
2076d8968f09SAdam Nemet << TheLoop->getHeader()->getName() << '\n');
20778dcb3b6aSAdam Nemet
2078929c38e8SAdam Nemet // We can only analyze innermost loops.
207989c1e35fSStefanos Baziotis if (!TheLoop->isInnermost()) {
2080d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
2081877ccee8SAdam Nemet recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
2082929c38e8SAdam Nemet return false;
2083929c38e8SAdam Nemet }
2084929c38e8SAdam Nemet
2085929c38e8SAdam Nemet // We must have a single backedge.
2086929c38e8SAdam Nemet if (TheLoop->getNumBackEdges() != 1) {
2087d34e60caSNicola Zaghen LLVM_DEBUG(
2088d34e60caSNicola Zaghen dbgs() << "LAA: loop control flow is not understood by analyzer\n");
2089877ccee8SAdam Nemet recordAnalysis("CFGNotUnderstood")
2090877ccee8SAdam Nemet << "loop control flow is not understood by analyzer";
2091929c38e8SAdam Nemet return false;
2092929c38e8SAdam Nemet }
2093929c38e8SAdam Nemet
2094929c38e8SAdam Nemet // ScalarEvolution needs to be able to find the exit count.
209594734eefSXinliang David Li const SCEV *ExitCount = PSE->getBackedgeTakenCount();
209610ddb927SPhilip Reames if (isa<SCEVCouldNotCompute>(ExitCount)) {
2097877ccee8SAdam Nemet recordAnalysis("CantComputeNumberOfIterations")
2098877ccee8SAdam Nemet << "could not determine number of loop iterations";
2099d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
2100929c38e8SAdam Nemet return false;
2101929c38e8SAdam Nemet }
2102929c38e8SAdam Nemet
2103929c38e8SAdam Nemet return true;
2104929c38e8SAdam Nemet }
2105929c38e8SAdam Nemet
analyzeLoop(AAResults * AA,LoopInfo * LI,const TargetLibraryInfo * TLI,DominatorTree * DT)2106db69b174SSimon Pilgrim void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
21077da74abfSAdam Nemet const TargetLibraryInfo *TLI,
21087da74abfSAdam Nemet DominatorTree *DT) {
2109e3e3b994SMatthew Simpson // Holds the Load and Store instructions.
2110e3e3b994SMatthew Simpson SmallVector<LoadInst *, 16> Loads;
2111e3e3b994SMatthew Simpson SmallVector<StoreInst *, 16> Stores;
21120456327cSAdam Nemet
21130456327cSAdam Nemet // Holds all the different accesses in the loop.
21140456327cSAdam Nemet unsigned NumReads = 0;
21150456327cSAdam Nemet unsigned NumReadWrites = 0;
21160456327cSAdam Nemet
21172466ba97SMatt Arsenault bool HasComplexMemInst = false;
21182466ba97SMatt Arsenault
21192466ba97SMatt Arsenault // A runtime check is only legal to insert if there are no convergent calls.
21202466ba97SMatt Arsenault HasConvergentOp = false;
21212466ba97SMatt Arsenault
2122ce030acbSXinliang David Li PtrRtChecking->Pointers.clear();
2123ce030acbSXinliang David Li PtrRtChecking->Need = false;
21240456327cSAdam Nemet
21250456327cSAdam Nemet const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
21260456327cSAdam Nemet
21277bf299c8SAyal Zaks const bool EnableMemAccessVersioningOfLoop =
21287bf299c8SAyal Zaks EnableMemAccessVersioning &&
21297bf299c8SAyal Zaks !TheLoop->getHeader()->getParent()->hasOptSize();
21307bf299c8SAyal Zaks
21310456327cSAdam Nemet // For each block.
21328b401013SDavid Majnemer for (BasicBlock *BB : TheLoop->blocks()) {
21332466ba97SMatt Arsenault // Scan the BB and collect legal loads and stores. Also detect any
21342466ba97SMatt Arsenault // convergent instructions.
21358b401013SDavid Majnemer for (Instruction &I : *BB) {
21362466ba97SMatt Arsenault if (auto *Call = dyn_cast<CallBase>(&I)) {
21372466ba97SMatt Arsenault if (Call->isConvergent())
21382466ba97SMatt Arsenault HasConvergentOp = true;
21392466ba97SMatt Arsenault }
21402466ba97SMatt Arsenault
21412466ba97SMatt Arsenault // With both a non-vectorizable memory instruction and a convergent
21422466ba97SMatt Arsenault // operation, found in this loop, no reason to continue the search.
21432466ba97SMatt Arsenault if (HasComplexMemInst && HasConvergentOp) {
21442466ba97SMatt Arsenault CanVecMem = false;
21452466ba97SMatt Arsenault return;
21462466ba97SMatt Arsenault }
21472466ba97SMatt Arsenault
21482466ba97SMatt Arsenault // Avoid hitting recordAnalysis multiple times.
21492466ba97SMatt Arsenault if (HasComplexMemInst)
21502466ba97SMatt Arsenault continue;
21512466ba97SMatt Arsenault
21520456327cSAdam Nemet // If this is a load, save it. If this instruction can read from memory
21530456327cSAdam Nemet // but is not a load, then we quit. Notice that we don't handle function
21540456327cSAdam Nemet // calls that read or write.
21558b401013SDavid Majnemer if (I.mayReadFromMemory()) {
21560456327cSAdam Nemet // Many math library functions read the rounding mode. We will only
21570456327cSAdam Nemet // vectorize a loop if it contains known function calls that don't set
21580456327cSAdam Nemet // the flag. Therefore, it is safe to ignore this read from memory.
21598b401013SDavid Majnemer auto *Call = dyn_cast<CallInst>(&I);
2160b4b27230SDavid Majnemer if (Call && getVectorIntrinsicIDForCall(Call, TLI))
21610456327cSAdam Nemet continue;
21620456327cSAdam Nemet
21639b3cf604SMichael Zolotukhin // If the function has an explicit vectorized counterpart, we can safely
21649b3cf604SMichael Zolotukhin // assume that it can be vectorized.
21659b3cf604SMichael Zolotukhin if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
216666c120f0SFrancesco Petrogalli !VFDatabase::getMappings(*Call).empty())
21679b3cf604SMichael Zolotukhin continue;
21689b3cf604SMichael Zolotukhin
21698b401013SDavid Majnemer auto *Ld = dyn_cast<LoadInst>(&I);
21702466ba97SMatt Arsenault if (!Ld) {
21712466ba97SMatt Arsenault recordAnalysis("CantVectorizeInstruction", Ld)
21722466ba97SMatt Arsenault << "instruction cannot be vectorized";
21732466ba97SMatt Arsenault HasComplexMemInst = true;
21742466ba97SMatt Arsenault continue;
21752466ba97SMatt Arsenault }
21762466ba97SMatt Arsenault if (!Ld->isSimple() && !IsAnnotatedParallel) {
2177877ccee8SAdam Nemet recordAnalysis("NonSimpleLoad", Ld)
2178877ccee8SAdam Nemet << "read with atomic ordering or volatile read";
2179d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
21802466ba97SMatt Arsenault HasComplexMemInst = true;
21812466ba97SMatt Arsenault continue;
21820456327cSAdam Nemet }
21830456327cSAdam Nemet NumLoads++;
21840456327cSAdam Nemet Loads.push_back(Ld);
2185ce030acbSXinliang David Li DepChecker->addAccess(Ld);
21867bf299c8SAyal Zaks if (EnableMemAccessVersioningOfLoop)
2187c953bb99SAdam Nemet collectStridedAccess(Ld);
21880456327cSAdam Nemet continue;
21890456327cSAdam Nemet }
21900456327cSAdam Nemet
21910456327cSAdam Nemet // Save 'store' instructions. Abort if other instructions write to memory.
21928b401013SDavid Majnemer if (I.mayWriteToMemory()) {
21938b401013SDavid Majnemer auto *St = dyn_cast<StoreInst>(&I);
21940456327cSAdam Nemet if (!St) {
2195877ccee8SAdam Nemet recordAnalysis("CantVectorizeInstruction", St)
2196877ccee8SAdam Nemet << "instruction cannot be vectorized";
21972466ba97SMatt Arsenault HasComplexMemInst = true;
21982466ba97SMatt Arsenault continue;
21990456327cSAdam Nemet }
22000456327cSAdam Nemet if (!St->isSimple() && !IsAnnotatedParallel) {
2201877ccee8SAdam Nemet recordAnalysis("NonSimpleStore", St)
2202877ccee8SAdam Nemet << "write with atomic ordering or volatile write";
2203d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
22042466ba97SMatt Arsenault HasComplexMemInst = true;
22052466ba97SMatt Arsenault continue;
22060456327cSAdam Nemet }
22070456327cSAdam Nemet NumStores++;
22080456327cSAdam Nemet Stores.push_back(St);
2209ce030acbSXinliang David Li DepChecker->addAccess(St);
22107bf299c8SAyal Zaks if (EnableMemAccessVersioningOfLoop)
2211c953bb99SAdam Nemet collectStridedAccess(St);
22120456327cSAdam Nemet }
22130456327cSAdam Nemet } // Next instr.
22140456327cSAdam Nemet } // Next block.
22150456327cSAdam Nemet
22162466ba97SMatt Arsenault if (HasComplexMemInst) {
22172466ba97SMatt Arsenault CanVecMem = false;
22182466ba97SMatt Arsenault return;
22192466ba97SMatt Arsenault }
22202466ba97SMatt Arsenault
22210456327cSAdam Nemet // Now we have two lists that hold the loads and the stores.
22220456327cSAdam Nemet // Next, we find the pointers that they use.
22230456327cSAdam Nemet
22240456327cSAdam Nemet // Check if we see any stores. If there are no stores, then we don't
22250456327cSAdam Nemet // care if the pointers are *restrict*.
22260456327cSAdam Nemet if (!Stores.size()) {
2227d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
2228436018c3SAdam Nemet CanVecMem = true;
2229436018c3SAdam Nemet return;
22300456327cSAdam Nemet }
22310456327cSAdam Nemet
2232dee666bcSAdam Nemet MemoryDepChecker::DepCandidates DependentAccesses;
2233b0eb40caSVitaly Buka AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE);
22340456327cSAdam Nemet
223589051ebaSVitaly Buka // Holds the analyzed pointers. We don't want to call getUnderlyingObjects
22360456327cSAdam Nemet // multiple times on the same object. If the ptr is accessed twice, once
22370456327cSAdam Nemet // for read and once for write, it will only appear once (on the write
22380456327cSAdam Nemet // list). This is okay, since we are going to check for conflicts between
22390456327cSAdam Nemet // writes and between reads and writes, but not between reads and reads.
2240ff31020eSArthur Eubanks SmallSet<std::pair<Value *, Type *>, 16> Seen;
22410456327cSAdam Nemet
2242b1e3d453SAnna Thomas // Record uniform store addresses to identify if we have multiple stores
2243b1e3d453SAnna Thomas // to the same address.
2244ff31020eSArthur Eubanks SmallPtrSet<Value *, 16> UniformStores;
2245b1e3d453SAnna Thomas
2246e3e3b994SMatthew Simpson for (StoreInst *ST : Stores) {
22470456327cSAdam Nemet Value *Ptr = ST->getPointerOperand();
2248b1e3d453SAnna Thomas
22494e5e042dSIgor Kirillov if (isUniform(Ptr)) {
22504e5e042dSIgor Kirillov // Record store instructions to loop invariant addresses
22514e5e042dSIgor Kirillov StoresToInvariantAddresses.push_back(ST);
22525e9215f0SAnna Thomas HasDependenceInvolvingLoopInvariantAddress |=
22536f732bfbSAnna Thomas !UniformStores.insert(Ptr).second;
22544e5e042dSIgor Kirillov }
2255b1e3d453SAnna Thomas
22560456327cSAdam Nemet // If we did *not* see this pointer before, insert it to the read-write
22570456327cSAdam Nemet // list. At this phase it is only a 'write' list.
2258ff31020eSArthur Eubanks Type *AccessTy = getLoadStoreType(ST);
2259ff31020eSArthur Eubanks if (Seen.insert({Ptr, AccessTy}).second) {
22600456327cSAdam Nemet ++NumReadWrites;
22610456327cSAdam Nemet
2262ac80dc75SChandler Carruth MemoryLocation Loc = MemoryLocation::get(ST);
22630456327cSAdam Nemet // The TBAA metadata could have a control dependency on the predication
22640456327cSAdam Nemet // condition, so we cannot rely on it when determining whether or not we
22650456327cSAdam Nemet // need runtime pointer checks.
226601abb2c3SAdam Nemet if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
22670456327cSAdam Nemet Loc.AATags.TBAA = nullptr;
22680456327cSAdam Nemet
2269e248d690SFlorian Hahn visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
2270ff31020eSArthur Eubanks [&Accesses, AccessTy, Loc](Value *Ptr) {
2271e248d690SFlorian Hahn MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2272ff31020eSArthur Eubanks Accesses.addStore(NewLoc, AccessTy);
2273e248d690SFlorian Hahn });
22740456327cSAdam Nemet }
22750456327cSAdam Nemet }
22760456327cSAdam Nemet
22770456327cSAdam Nemet if (IsAnnotatedParallel) {
2278d34e60caSNicola Zaghen LLVM_DEBUG(
2279d34e60caSNicola Zaghen dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
22800456327cSAdam Nemet << "checks.\n");
2281436018c3SAdam Nemet CanVecMem = true;
2282436018c3SAdam Nemet return;
22830456327cSAdam Nemet }
22840456327cSAdam Nemet
2285e3e3b994SMatthew Simpson for (LoadInst *LD : Loads) {
22860456327cSAdam Nemet Value *Ptr = LD->getPointerOperand();
22870456327cSAdam Nemet // If we did *not* see this pointer before, insert it to the
22880456327cSAdam Nemet // read list. If we *did* see it before, then it is already in
22890456327cSAdam Nemet // the read-write list. This allows us to vectorize expressions
22900456327cSAdam Nemet // such as A[i] += x; Because the address of A[i] is a read-write
22910456327cSAdam Nemet // pointer. This only works if the index of A[i] is consecutive.
22920456327cSAdam Nemet // If the address of i is unknown (for example A[B[i]]) then we may
22930456327cSAdam Nemet // read a few words, modify, and write a few words, and some of the
22940456327cSAdam Nemet // words may be written to the same address.
22950456327cSAdam Nemet bool IsReadOnlyPtr = false;
2296ff31020eSArthur Eubanks Type *AccessTy = getLoadStoreType(LD);
2297ff31020eSArthur Eubanks if (Seen.insert({Ptr, AccessTy}).second ||
229845c46734SNikita Popov !getPtrStride(*PSE, LD->getType(), Ptr, TheLoop, SymbolicStrides)) {
22990456327cSAdam Nemet ++NumReads;
23000456327cSAdam Nemet IsReadOnlyPtr = true;
23010456327cSAdam Nemet }
23020456327cSAdam Nemet
23035e9215f0SAnna Thomas // See if there is an unsafe dependency between a load to a uniform address and
23045e9215f0SAnna Thomas // store to the same uniform address.
23055e9215f0SAnna Thomas if (UniformStores.count(Ptr)) {
23065e9215f0SAnna Thomas LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
23075e9215f0SAnna Thomas "load and uniform store to the same address!\n");
23085e9215f0SAnna Thomas HasDependenceInvolvingLoopInvariantAddress = true;
23095e9215f0SAnna Thomas }
23105e9215f0SAnna Thomas
2311ac80dc75SChandler Carruth MemoryLocation Loc = MemoryLocation::get(LD);
23120456327cSAdam Nemet // The TBAA metadata could have a control dependency on the predication
23130456327cSAdam Nemet // condition, so we cannot rely on it when determining whether or not we
23140456327cSAdam Nemet // need runtime pointer checks.
231501abb2c3SAdam Nemet if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
23160456327cSAdam Nemet Loc.AATags.TBAA = nullptr;
23170456327cSAdam Nemet
2318e248d690SFlorian Hahn visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
2319ff31020eSArthur Eubanks [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) {
2320e248d690SFlorian Hahn MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2321ff31020eSArthur Eubanks Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr);
2322e248d690SFlorian Hahn });
23230456327cSAdam Nemet }
23240456327cSAdam Nemet
23250456327cSAdam Nemet // If we write (or read-write) to a single destination and there are no
23260456327cSAdam Nemet // other reads in this loop then is it safe to vectorize.
23270456327cSAdam Nemet if (NumReadWrites == 1 && NumReads == 0) {
2328d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
2329436018c3SAdam Nemet CanVecMem = true;
2330436018c3SAdam Nemet return;
23310456327cSAdam Nemet }
23320456327cSAdam Nemet
23330456327cSAdam Nemet // Build dependence sets and check whether we need a runtime pointer bounds
23340456327cSAdam Nemet // check.
23350456327cSAdam Nemet Accesses.buildDependenceSets();
23360456327cSAdam Nemet
23370456327cSAdam Nemet // Find pointers with computable bounds. We are going to use this information
23380456327cSAdam Nemet // to place a runtime bound check.
23399f1c6fbfSMalhar Jajoo Value *UncomputablePtr = nullptr;
23409f1c6fbfSMalhar Jajoo bool CanDoRTIfNeeded =
23419f1c6fbfSMalhar Jajoo Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(), TheLoop,
23429f1c6fbfSMalhar Jajoo SymbolicStrides, UncomputablePtr, false);
2343ee61474aSAdam Nemet if (!CanDoRTIfNeeded) {
23449f1c6fbfSMalhar Jajoo auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
23459f1c6fbfSMalhar Jajoo recordAnalysis("CantIdentifyArrayBounds", I)
23469f1c6fbfSMalhar Jajoo << "cannot identify array bounds";
2347d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
2348ee61474aSAdam Nemet << "the array bounds.\n");
2349436018c3SAdam Nemet CanVecMem = false;
2350436018c3SAdam Nemet return;
23510456327cSAdam Nemet }
23520456327cSAdam Nemet
2353d34e60caSNicola Zaghen LLVM_DEBUG(
23542466ba97SMatt Arsenault dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n");
23550456327cSAdam Nemet
2356436018c3SAdam Nemet CanVecMem = true;
23570456327cSAdam Nemet if (Accesses.isDependencyCheckNeeded()) {
2358d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
2359ce030acbSXinliang David Li CanVecMem = DepChecker->areDepsSafe(
2360139ffba3SAdam Nemet DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
2361ce030acbSXinliang David Li MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
23620456327cSAdam Nemet
2363ce030acbSXinliang David Li if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
2364d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
23650456327cSAdam Nemet
23660456327cSAdam Nemet // Clear the dependency checks. We assume they are not needed.
2367ce030acbSXinliang David Li Accesses.resetDepChecks(*DepChecker);
23680456327cSAdam Nemet
2369ce030acbSXinliang David Li PtrRtChecking->reset();
2370ce030acbSXinliang David Li PtrRtChecking->Need = true;
23710456327cSAdam Nemet
237294734eefSXinliang David Li auto *SE = PSE->getSE();
23739f1c6fbfSMalhar Jajoo UncomputablePtr = nullptr;
23749f1c6fbfSMalhar Jajoo CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(
23759f1c6fbfSMalhar Jajoo *PtrRtChecking, SE, TheLoop, SymbolicStrides, UncomputablePtr, true);
237698a13719SSilviu Baranga
2377949e91a6SAdam Nemet // Check that we found the bounds for the pointer.
2378ee61474aSAdam Nemet if (!CanDoRTIfNeeded) {
23799f1c6fbfSMalhar Jajoo auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
23809f1c6fbfSMalhar Jajoo recordAnalysis("CantCheckMemDepsAtRunTime", I)
2381877ccee8SAdam Nemet << "cannot check memory dependencies at runtime";
2382d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
2383b6dc76ffSAdam Nemet CanVecMem = false;
2384b6dc76ffSAdam Nemet return;
2385b6dc76ffSAdam Nemet }
2386b6dc76ffSAdam Nemet
23870456327cSAdam Nemet CanVecMem = true;
23880456327cSAdam Nemet }
23890456327cSAdam Nemet }
23900456327cSAdam Nemet
23912466ba97SMatt Arsenault if (HasConvergentOp) {
23922466ba97SMatt Arsenault recordAnalysis("CantInsertRuntimeCheckWithConvergent")
23932466ba97SMatt Arsenault << "cannot add control dependency to convergent operation";
23942466ba97SMatt Arsenault LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "
23952466ba97SMatt Arsenault "would be needed with a convergent operation\n");
23962466ba97SMatt Arsenault CanVecMem = false;
23972466ba97SMatt Arsenault return;
23982466ba97SMatt Arsenault }
23992466ba97SMatt Arsenault
24004bb90a71SAdam Nemet if (CanVecMem)
2401d34e60caSNicola Zaghen LLVM_DEBUG(
2402d34e60caSNicola Zaghen dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
2403ce030acbSXinliang David Li << (PtrRtChecking->Need ? "" : " don't")
24040f67c6c1SAdam Nemet << " need runtime memory checks.\n");
2405778b455dSMalhar Jajoo else
2406778b455dSMalhar Jajoo emitUnsafeDependenceRemark();
2407778b455dSMalhar Jajoo }
2408778b455dSMalhar Jajoo
emitUnsafeDependenceRemark()2409778b455dSMalhar Jajoo void LoopAccessInfo::emitUnsafeDependenceRemark() {
2410778b455dSMalhar Jajoo auto Deps = getDepChecker().getDependences();
2411778b455dSMalhar Jajoo if (!Deps)
2412778b455dSMalhar Jajoo return;
2413778b455dSMalhar Jajoo auto Found = std::find_if(
2414778b455dSMalhar Jajoo Deps->begin(), Deps->end(), [](const MemoryDepChecker::Dependence &D) {
2415778b455dSMalhar Jajoo return MemoryDepChecker::Dependence::isSafeForVectorization(D.Type) !=
2416778b455dSMalhar Jajoo MemoryDepChecker::VectorizationSafetyStatus::Safe;
2417778b455dSMalhar Jajoo });
2418778b455dSMalhar Jajoo if (Found == Deps->end())
2419778b455dSMalhar Jajoo return;
2420778b455dSMalhar Jajoo MemoryDepChecker::Dependence Dep = *Found;
2421778b455dSMalhar Jajoo
2422778b455dSMalhar Jajoo LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
2423778b455dSMalhar Jajoo
2424778b455dSMalhar Jajoo // Emit remark for first unsafe dependence
2425778b455dSMalhar Jajoo OptimizationRemarkAnalysis &R =
2426778b455dSMalhar Jajoo recordAnalysis("UnsafeDep", Dep.getDestination(*this))
24270a77dfadSAdam Nemet << "unsafe dependent memory operations in loop. Use "
24280a77dfadSAdam Nemet "#pragma loop distribute(enable) to allow loop distribution "
24290a77dfadSAdam Nemet "to attempt to isolate the offending operations into a separate "
2430877ccee8SAdam Nemet "loop";
2431778b455dSMalhar Jajoo
2432778b455dSMalhar Jajoo switch (Dep.Type) {
2433778b455dSMalhar Jajoo case MemoryDepChecker::Dependence::NoDep:
2434778b455dSMalhar Jajoo case MemoryDepChecker::Dependence::Forward:
2435778b455dSMalhar Jajoo case MemoryDepChecker::Dependence::BackwardVectorizable:
2436778b455dSMalhar Jajoo llvm_unreachable("Unexpected dependence");
2437778b455dSMalhar Jajoo case MemoryDepChecker::Dependence::Backward:
2438778b455dSMalhar Jajoo R << "\nBackward loop carried data dependence.";
2439778b455dSMalhar Jajoo break;
2440778b455dSMalhar Jajoo case MemoryDepChecker::Dependence::ForwardButPreventsForwarding:
2441778b455dSMalhar Jajoo R << "\nForward loop carried data dependence that prevents "
2442778b455dSMalhar Jajoo "store-to-load forwarding.";
2443778b455dSMalhar Jajoo break;
2444778b455dSMalhar Jajoo case MemoryDepChecker::Dependence::BackwardVectorizableButPreventsForwarding:
2445778b455dSMalhar Jajoo R << "\nBackward loop carried data dependence that prevents "
2446778b455dSMalhar Jajoo "store-to-load forwarding.";
2447778b455dSMalhar Jajoo break;
2448778b455dSMalhar Jajoo case MemoryDepChecker::Dependence::Unknown:
2449778b455dSMalhar Jajoo R << "\nUnknown data dependence.";
2450778b455dSMalhar Jajoo break;
2451778b455dSMalhar Jajoo }
2452778b455dSMalhar Jajoo
2453778b455dSMalhar Jajoo if (Instruction *I = Dep.getSource(*this)) {
2454778b455dSMalhar Jajoo DebugLoc SourceLoc = I->getDebugLoc();
2455778b455dSMalhar Jajoo if (auto *DD = dyn_cast_or_null<Instruction>(getPointerOperand(I)))
2456778b455dSMalhar Jajoo SourceLoc = DD->getDebugLoc();
2457778b455dSMalhar Jajoo if (SourceLoc)
2458778b455dSMalhar Jajoo R << " Memory location is the same as accessed at "
2459778b455dSMalhar Jajoo << ore::NV("Location", SourceLoc);
24604bb90a71SAdam Nemet }
24610456327cSAdam Nemet }
24620456327cSAdam Nemet
blockNeedsPredication(BasicBlock * BB,Loop * TheLoop,DominatorTree * DT)246301abb2c3SAdam Nemet bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
246401abb2c3SAdam Nemet DominatorTree *DT) {
24650456327cSAdam Nemet assert(TheLoop->contains(BB) && "Unknown block used");
24660456327cSAdam Nemet
24670456327cSAdam Nemet // Blocks that do not dominate the latch need predication.
24680456327cSAdam Nemet BasicBlock* Latch = TheLoop->getLoopLatch();
24690456327cSAdam Nemet return !DT->dominates(BB, Latch);
24700456327cSAdam Nemet }
24710456327cSAdam Nemet
recordAnalysis(StringRef RemarkName,Instruction * I)2472877ccee8SAdam Nemet OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
2473877ccee8SAdam Nemet Instruction *I) {
2474c922853bSAdam Nemet assert(!Report && "Multiple reports generated");
2475877ccee8SAdam Nemet
2476877ccee8SAdam Nemet Value *CodeRegion = TheLoop->getHeader();
2477877ccee8SAdam Nemet DebugLoc DL = TheLoop->getStartLoc();
2478877ccee8SAdam Nemet
2479877ccee8SAdam Nemet if (I) {
2480877ccee8SAdam Nemet CodeRegion = I->getParent();
2481877ccee8SAdam Nemet // If there is no debug location attached to the instruction, revert back to
2482877ccee8SAdam Nemet // using the loop's.
2483877ccee8SAdam Nemet if (I->getDebugLoc())
2484877ccee8SAdam Nemet DL = I->getDebugLoc();
2485877ccee8SAdam Nemet }
2486877ccee8SAdam Nemet
24870eaee545SJonas Devlieghere Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
2488877ccee8SAdam Nemet CodeRegion);
2489877ccee8SAdam Nemet return *Report;
24900456327cSAdam Nemet }
24910456327cSAdam Nemet
isUniform(Value * V) const249257ac766eSAdam Nemet bool LoopAccessInfo::isUniform(Value *V) const {
24933ceac2bbSMichael Kuperstein auto *SE = PSE->getSE();
24943ceac2bbSMichael Kuperstein // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
24953ceac2bbSMichael Kuperstein // never considered uniform.
24963ceac2bbSMichael Kuperstein // TODO: Is this really what we want? Even without FP SCEV, we may want some
24973ceac2bbSMichael Kuperstein // trivially loop-invariant FP values to be considered uniform.
24983ceac2bbSMichael Kuperstein if (!SE->isSCEVable(V->getType()))
24993ceac2bbSMichael Kuperstein return false;
25003ceac2bbSMichael Kuperstein return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
25010456327cSAdam Nemet }
25027206d7a5SAdam Nemet
collectStridedAccess(Value * MemAccess)2503c953bb99SAdam Nemet void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
2504b3a8a153SPhilip Reames Value *Ptr = getLoadStorePointerOperand(MemAccess);
2505b3a8a153SPhilip Reames if (!Ptr)
2506c953bb99SAdam Nemet return;
2507c953bb99SAdam Nemet
250894734eefSXinliang David Li Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
2509c953bb99SAdam Nemet if (!Stride)
2510c953bb99SAdam Nemet return;
2511c953bb99SAdam Nemet
2512d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
2513eb13dd3eSDorit Nuzman "versioning:");
2514d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
2515eb13dd3eSDorit Nuzman
2516eb13dd3eSDorit Nuzman // Avoid adding the "Stride == 1" predicate when we know that
2517eb13dd3eSDorit Nuzman // Stride >= Trip-Count. Such a predicate will effectively optimize a single
2518eb13dd3eSDorit Nuzman // or zero iteration loop, as Trip-Count <= Stride == 1.
2519eb13dd3eSDorit Nuzman //
2520eb13dd3eSDorit Nuzman // TODO: We are currently not making a very informed decision on when it is
2521eb13dd3eSDorit Nuzman // beneficial to apply stride versioning. It might make more sense that the
2522eb13dd3eSDorit Nuzman // users of this analysis (such as the vectorizer) will trigger it, based on
2523eb13dd3eSDorit Nuzman // their specific cost considerations; For example, in cases where stride
2524eb13dd3eSDorit Nuzman // versioning does not help resolving memory accesses/dependences, the
2525eb13dd3eSDorit Nuzman // vectorizer should evaluate the cost of the runtime test, and the benefit
2526eb13dd3eSDorit Nuzman // of various possible stride specializations, considering the alternatives
2527eb13dd3eSDorit Nuzman // of using gather/scatters (if available).
2528eb13dd3eSDorit Nuzman
2529eb13dd3eSDorit Nuzman const SCEV *StrideExpr = PSE->getSCEV(Stride);
2530eb13dd3eSDorit Nuzman const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
2531eb13dd3eSDorit Nuzman
2532eb13dd3eSDorit Nuzman // Match the types so we can compare the stride and the BETakenCount.
2533eb13dd3eSDorit Nuzman // The Stride can be positive/negative, so we sign extend Stride;
253402a2bb2fSHiroshi Inoue // The backedgeTakenCount is non-negative, so we zero extend BETakenCount.
2535eb13dd3eSDorit Nuzman const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
25367ee30a0eSChang-Sun Lin Jr uint64_t StrideTypeSizeBits = DL.getTypeSizeInBits(StrideExpr->getType());
25377ee30a0eSChang-Sun Lin Jr uint64_t BETypeSizeBits = DL.getTypeSizeInBits(BETakenCount->getType());
2538eb13dd3eSDorit Nuzman const SCEV *CastedStride = StrideExpr;
2539eb13dd3eSDorit Nuzman const SCEV *CastedBECount = BETakenCount;
2540eb13dd3eSDorit Nuzman ScalarEvolution *SE = PSE->getSE();
25417ee30a0eSChang-Sun Lin Jr if (BETypeSizeBits >= StrideTypeSizeBits)
2542eb13dd3eSDorit Nuzman CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType());
2543eb13dd3eSDorit Nuzman else
2544eb13dd3eSDorit Nuzman CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType());
2545eb13dd3eSDorit Nuzman const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
2546eb13dd3eSDorit Nuzman // Since TripCount == BackEdgeTakenCount + 1, checking:
2547eb13dd3eSDorit Nuzman // "Stride >= TripCount" is equivalent to checking:
2548eb13dd3eSDorit Nuzman // Stride - BETakenCount > 0
2549eb13dd3eSDorit Nuzman if (SE->isKnownPositive(StrideMinusBETaken)) {
2550d34e60caSNicola Zaghen LLVM_DEBUG(
2551d34e60caSNicola Zaghen dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
2552eb13dd3eSDorit Nuzman "Stride==1 predicate will imply that the loop executes "
2553eb13dd3eSDorit Nuzman "at most once.\n");
2554eb13dd3eSDorit Nuzman return;
2555eb13dd3eSDorit Nuzman }
255640f90819SThomas Preud'homme LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.\n");
2557eb13dd3eSDorit Nuzman
2558c953bb99SAdam Nemet SymbolicStrides[Ptr] = Stride;
2559c953bb99SAdam Nemet StrideSet.insert(Stride);
2560c953bb99SAdam Nemet }
2561c953bb99SAdam Nemet
LoopAccessInfo(Loop * L,ScalarEvolution * SE,const TargetLibraryInfo * TLI,AAResults * AA,DominatorTree * DT,LoopInfo * LI)25623bfd93d7SAdam Nemet LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
2563db69b174SSimon Pilgrim const TargetLibraryInfo *TLI, AAResults *AA,
2564a9f09c62SAdam Nemet DominatorTree *DT, LoopInfo *LI)
25650eaee545SJonas Devlieghere : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
2566b7315ffcSFlorian Hahn PtrRtChecking(nullptr),
2567b752eb88SKazu Hirata DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L) {
2568b7315ffcSFlorian Hahn PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
2569b7315ffcSFlorian Hahn if (canAnalyzeLoop()) {
25707da74abfSAdam Nemet analyzeLoop(AA, LI, TLI, DT);
25713bfd93d7SAdam Nemet }
2572b7315ffcSFlorian Hahn }
25733bfd93d7SAdam Nemet
print(raw_ostream & OS,unsigned Depth) const2574e91cc6efSAdam Nemet void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
2575e91cc6efSAdam Nemet if (CanVecMem) {
25764ad38b63SAdam Nemet OS.indent(Depth) << "Memory dependences are safe";
25777afb46d3SDavid Majnemer if (MaxSafeDepDistBytes != -1ULL)
2578c62e554eSAdam Nemet OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes
2579c62e554eSAdam Nemet << " bytes";
2580ce030acbSXinliang David Li if (PtrRtChecking->Need)
25814ad38b63SAdam Nemet OS << " with run-time checks";
25824ad38b63SAdam Nemet OS << "\n";
2583e91cc6efSAdam Nemet }
2584e91cc6efSAdam Nemet
25852466ba97SMatt Arsenault if (HasConvergentOp)
25862466ba97SMatt Arsenault OS.indent(Depth) << "Has convergent operation in loop\n";
25872466ba97SMatt Arsenault
2588e91cc6efSAdam Nemet if (Report)
2589877ccee8SAdam Nemet OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
2590e91cc6efSAdam Nemet
2591ce030acbSXinliang David Li if (auto *Dependences = DepChecker->getDependences()) {
2592a2df750fSAdam Nemet OS.indent(Depth) << "Dependences:\n";
2593601b3a13SKazu Hirata for (const auto &Dep : *Dependences) {
2594ce030acbSXinliang David Li Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
259558913d65SAdam Nemet OS << "\n";
259658913d65SAdam Nemet }
259758913d65SAdam Nemet } else
2598a2df750fSAdam Nemet OS.indent(Depth) << "Too many dependences, not recorded\n";
2599e91cc6efSAdam Nemet
2600e91cc6efSAdam Nemet // List the pair of accesses need run-time checks to prove independence.
2601ce030acbSXinliang David Li PtrRtChecking->print(OS, Depth);
2602e91cc6efSAdam Nemet OS << "\n";
2603c3384320SAdam Nemet
26045e9215f0SAnna Thomas OS.indent(Depth) << "Non vectorizable stores to invariant address were "
26055e9215f0SAnna Thomas << (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ")
2606c3384320SAdam Nemet << "found in loop.\n";
2607e3c0534bSSilviu Baranga
2608e3c0534bSSilviu Baranga OS.indent(Depth) << "SCEV assumptions:\n";
26095ba11503SPhilip Reames PSE->getPredicate().print(OS, Depth);
2610b77365b5SSilviu Baranga
2611b77365b5SSilviu Baranga OS << "\n";
2612b77365b5SSilviu Baranga
2613b77365b5SSilviu Baranga OS.indent(Depth) << "Expressions re-written:\n";
261494734eefSXinliang David Li PSE->print(OS, Depth);
2615e91cc6efSAdam Nemet }
2616e91cc6efSAdam Nemet
LoopAccessLegacyAnalysis()261705da2fe5SReid Kleckner LoopAccessLegacyAnalysis::LoopAccessLegacyAnalysis() : FunctionPass(ID) {
261805da2fe5SReid Kleckner initializeLoopAccessLegacyAnalysisPass(*PassRegistry::getPassRegistry());
261905da2fe5SReid Kleckner }
262005da2fe5SReid Kleckner
getInfo(Loop * L)26217853c1ddSXinliang David Li const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) {
26223bfd93d7SAdam Nemet auto &LAI = LoopAccessInfoMap[L];
26233bfd93d7SAdam Nemet
26241824e411SAdam Nemet if (!LAI)
26250eaee545SJonas Devlieghere LAI = std::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
26261824e411SAdam Nemet
26279aa52ba5SKazu Hirata return *LAI;
26283bfd93d7SAdam Nemet }
26293bfd93d7SAdam Nemet
print(raw_ostream & OS,const Module * M) const26307853c1ddSXinliang David Li void LoopAccessLegacyAnalysis::print(raw_ostream &OS, const Module *M) const {
26317853c1ddSXinliang David Li LoopAccessLegacyAnalysis &LAA = *const_cast<LoopAccessLegacyAnalysis *>(this);
2632ecde1c7fSXinliang David Li
2633e91cc6efSAdam Nemet for (Loop *TopLevelLoop : *LI)
2634e91cc6efSAdam Nemet for (Loop *L : depth_first(TopLevelLoop)) {
2635e91cc6efSAdam Nemet OS.indent(2) << L->getHeader()->getName() << ":\n";
2636bdbc5227SAdam Nemet auto &LAI = LAA.getInfo(L);
2637e91cc6efSAdam Nemet LAI.print(OS, 4);
2638e91cc6efSAdam Nemet }
2639e91cc6efSAdam Nemet }
2640e91cc6efSAdam Nemet
runOnFunction(Function & F)26417853c1ddSXinliang David Li bool LoopAccessLegacyAnalysis::runOnFunction(Function &F) {
2642ecde1c7fSXinliang David Li SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
26433bfd93d7SAdam Nemet auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
26449c27b59cSTeresa Johnson TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
2645ecde1c7fSXinliang David Li AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2646ecde1c7fSXinliang David Li DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2647ecde1c7fSXinliang David Li LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
26483bfd93d7SAdam Nemet
26493bfd93d7SAdam Nemet return false;
26503bfd93d7SAdam Nemet }
26513bfd93d7SAdam Nemet
getAnalysisUsage(AnalysisUsage & AU) const26527853c1ddSXinliang David Li void LoopAccessLegacyAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
26533ee82659SBjorn Pettersson AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
26543ee82659SBjorn Pettersson AU.addRequiredTransitive<AAResultsWrapperPass>();
26553ee82659SBjorn Pettersson AU.addRequiredTransitive<DominatorTreeWrapperPass>();
26563ee82659SBjorn Pettersson AU.addRequiredTransitive<LoopInfoWrapperPass>();
26573bfd93d7SAdam Nemet
26583bfd93d7SAdam Nemet AU.setPreservesAll();
26593bfd93d7SAdam Nemet }
26603bfd93d7SAdam Nemet
26617853c1ddSXinliang David Li char LoopAccessLegacyAnalysis::ID = 0;
26623bfd93d7SAdam Nemet static const char laa_name[] = "Loop Access Analysis";
26633bfd93d7SAdam Nemet #define LAA_NAME "loop-accesses"
26643bfd93d7SAdam Nemet
26657853c1ddSXinliang David Li INITIALIZE_PASS_BEGIN(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true)
26667b560d40SChandler Carruth INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
26672f1fd165SChandler Carruth INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
26683bfd93d7SAdam Nemet INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
2669e91cc6efSAdam Nemet INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
26707853c1ddSXinliang David Li INITIALIZE_PASS_END(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true)
26713bfd93d7SAdam Nemet
2672dab4eae2SChandler Carruth AnalysisKey LoopAccessAnalysis::Key;
26738a021317SXinliang David Li
run(Loop & L,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR)2674410eaeb0SChandler Carruth LoopAccessInfo LoopAccessAnalysis::run(Loop &L, LoopAnalysisManager &AM,
2675410eaeb0SChandler Carruth LoopStandardAnalysisResults &AR) {
2676410eaeb0SChandler Carruth return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI);
26778a021317SXinliang David Li }
26788a021317SXinliang David Li
26793bfd93d7SAdam Nemet namespace llvm {
2680a3fe70d2SEugene Zelenko
createLAAPass()26813bfd93d7SAdam Nemet Pass *createLAAPass() {
26827853c1ddSXinliang David Li return new LoopAccessLegacyAnalysis();
26833bfd93d7SAdam Nemet }
2684a3fe70d2SEugene Zelenko
2685a3fe70d2SEugene Zelenko } // end namespace llvm
2686