1*f2ec16ccSHideki Saito //===- LoopVectorizationLegality.cpp --------------------------------------===//
2*f2ec16ccSHideki Saito //
3*f2ec16ccSHideki Saito //                     The LLVM Compiler Infrastructure
4*f2ec16ccSHideki Saito //
5*f2ec16ccSHideki Saito // This file is distributed under the University of Illinois Open Source
6*f2ec16ccSHideki Saito // License. See LICENSE.TXT for details.
7*f2ec16ccSHideki Saito //
8*f2ec16ccSHideki Saito //===----------------------------------------------------------------------===//
9*f2ec16ccSHideki Saito //
10*f2ec16ccSHideki Saito // This file provides loop vectorization legality analysis. Original code
11*f2ec16ccSHideki Saito // resided in LoopVectorize.cpp for a long time.
12*f2ec16ccSHideki Saito //
13*f2ec16ccSHideki Saito // At this point, it is implemented as a utility class, not as an analysis
14*f2ec16ccSHideki Saito // pass. It should be easy to create an analysis pass around it if there
15*f2ec16ccSHideki Saito // is a need (but D45420 needs to happen first).
16*f2ec16ccSHideki Saito //
17*f2ec16ccSHideki Saito #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
18*f2ec16ccSHideki Saito #include "llvm/Analysis/VectorUtils.h"
19*f2ec16ccSHideki Saito #include "llvm/IR/IntrinsicInst.h"
20*f2ec16ccSHideki Saito 
21*f2ec16ccSHideki Saito using namespace llvm;
22*f2ec16ccSHideki Saito 
23*f2ec16ccSHideki Saito #define LV_NAME "loop-vectorize"
24*f2ec16ccSHideki Saito #define DEBUG_TYPE LV_NAME
25*f2ec16ccSHideki Saito 
26*f2ec16ccSHideki Saito static cl::opt<bool>
27*f2ec16ccSHideki Saito     EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
28*f2ec16ccSHideki Saito                        cl::desc("Enable if-conversion during vectorization."));
29*f2ec16ccSHideki Saito 
30*f2ec16ccSHideki Saito static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
31*f2ec16ccSHideki Saito     "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
32*f2ec16ccSHideki Saito     cl::desc("The maximum allowed number of runtime memory checks with a "
33*f2ec16ccSHideki Saito              "vectorize(enable) pragma."));
34*f2ec16ccSHideki Saito 
35*f2ec16ccSHideki Saito static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
36*f2ec16ccSHideki Saito     "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
37*f2ec16ccSHideki Saito     cl::desc("The maximum number of SCEV checks allowed."));
38*f2ec16ccSHideki Saito 
39*f2ec16ccSHideki Saito static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
40*f2ec16ccSHideki Saito     "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
41*f2ec16ccSHideki Saito     cl::desc("The maximum number of SCEV checks allowed with a "
42*f2ec16ccSHideki Saito              "vectorize(enable) pragma"));
43*f2ec16ccSHideki Saito 
44*f2ec16ccSHideki Saito /// Maximum vectorization interleave count.
45*f2ec16ccSHideki Saito static const unsigned MaxInterleaveFactor = 16;
46*f2ec16ccSHideki Saito 
47*f2ec16ccSHideki Saito namespace llvm {
48*f2ec16ccSHideki Saito 
49*f2ec16ccSHideki Saito OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName,
50*f2ec16ccSHideki Saito                                                   StringRef RemarkName,
51*f2ec16ccSHideki Saito                                                   Loop *TheLoop,
52*f2ec16ccSHideki Saito                                                   Instruction *I) {
53*f2ec16ccSHideki Saito   Value *CodeRegion = TheLoop->getHeader();
54*f2ec16ccSHideki Saito   DebugLoc DL = TheLoop->getStartLoc();
55*f2ec16ccSHideki Saito 
56*f2ec16ccSHideki Saito   if (I) {
57*f2ec16ccSHideki Saito     CodeRegion = I->getParent();
58*f2ec16ccSHideki Saito     // If there is no debug location attached to the instruction, revert back to
59*f2ec16ccSHideki Saito     // using the loop's.
60*f2ec16ccSHideki Saito     if (I->getDebugLoc())
61*f2ec16ccSHideki Saito       DL = I->getDebugLoc();
62*f2ec16ccSHideki Saito   }
63*f2ec16ccSHideki Saito 
64*f2ec16ccSHideki Saito   OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion);
65*f2ec16ccSHideki Saito   R << "loop not vectorized: ";
66*f2ec16ccSHideki Saito   return R;
67*f2ec16ccSHideki Saito }
68*f2ec16ccSHideki Saito 
69*f2ec16ccSHideki Saito bool LoopVectorizeHints::Hint::validate(unsigned Val) {
70*f2ec16ccSHideki Saito   switch (Kind) {
71*f2ec16ccSHideki Saito   case HK_WIDTH:
72*f2ec16ccSHideki Saito     return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
73*f2ec16ccSHideki Saito   case HK_UNROLL:
74*f2ec16ccSHideki Saito     return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
75*f2ec16ccSHideki Saito   case HK_FORCE:
76*f2ec16ccSHideki Saito     return (Val <= 1);
77*f2ec16ccSHideki Saito   case HK_ISVECTORIZED:
78*f2ec16ccSHideki Saito     return (Val == 0 || Val == 1);
79*f2ec16ccSHideki Saito   }
80*f2ec16ccSHideki Saito   return false;
81*f2ec16ccSHideki Saito }
82*f2ec16ccSHideki Saito 
83*f2ec16ccSHideki Saito LoopVectorizeHints::LoopVectorizeHints(const Loop *L, bool DisableInterleaving,
84*f2ec16ccSHideki Saito                                        OptimizationRemarkEmitter &ORE)
85*f2ec16ccSHideki Saito     : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
86*f2ec16ccSHideki Saito       Interleave("interleave.count", DisableInterleaving, HK_UNROLL),
87*f2ec16ccSHideki Saito       Force("vectorize.enable", FK_Undefined, HK_FORCE),
88*f2ec16ccSHideki Saito       IsVectorized("isvectorized", 0, HK_ISVECTORIZED), TheLoop(L), ORE(ORE) {
89*f2ec16ccSHideki Saito   // Populate values with existing loop metadata.
90*f2ec16ccSHideki Saito   getHintsFromMetadata();
91*f2ec16ccSHideki Saito 
92*f2ec16ccSHideki Saito   // force-vector-interleave overrides DisableInterleaving.
93*f2ec16ccSHideki Saito   if (VectorizerParams::isInterleaveForced())
94*f2ec16ccSHideki Saito     Interleave.Value = VectorizerParams::VectorizationInterleave;
95*f2ec16ccSHideki Saito 
96*f2ec16ccSHideki Saito   if (IsVectorized.Value != 1)
97*f2ec16ccSHideki Saito     // If the vectorization width and interleaving count are both 1 then
98*f2ec16ccSHideki Saito     // consider the loop to have been already vectorized because there's
99*f2ec16ccSHideki Saito     // nothing more that we can do.
100*f2ec16ccSHideki Saito     IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
101*f2ec16ccSHideki Saito   DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs()
102*f2ec16ccSHideki Saito         << "LV: Interleaving disabled by the pass manager\n");
103*f2ec16ccSHideki Saito }
104*f2ec16ccSHideki Saito 
105*f2ec16ccSHideki Saito bool LoopVectorizeHints::allowVectorization(Function *F, Loop *L,
106*f2ec16ccSHideki Saito                                             bool AlwaysVectorize) const {
107*f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Disabled) {
108*f2ec16ccSHideki Saito     DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
109*f2ec16ccSHideki Saito     emitRemarkWithHints();
110*f2ec16ccSHideki Saito     return false;
111*f2ec16ccSHideki Saito   }
112*f2ec16ccSHideki Saito 
113*f2ec16ccSHideki Saito   if (!AlwaysVectorize && getForce() != LoopVectorizeHints::FK_Enabled) {
114*f2ec16ccSHideki Saito     DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
115*f2ec16ccSHideki Saito     emitRemarkWithHints();
116*f2ec16ccSHideki Saito     return false;
117*f2ec16ccSHideki Saito   }
118*f2ec16ccSHideki Saito 
119*f2ec16ccSHideki Saito   if (getIsVectorized() == 1) {
120*f2ec16ccSHideki Saito     DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
121*f2ec16ccSHideki Saito     // FIXME: Add interleave.disable metadata. This will allow
122*f2ec16ccSHideki Saito     // vectorize.disable to be used without disabling the pass and errors
123*f2ec16ccSHideki Saito     // to differentiate between disabled vectorization and a width of 1.
124*f2ec16ccSHideki Saito     ORE.emit([&]() {
125*f2ec16ccSHideki Saito       return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
126*f2ec16ccSHideki Saito                                         "AllDisabled", L->getStartLoc(),
127*f2ec16ccSHideki Saito                                         L->getHeader())
128*f2ec16ccSHideki Saito              << "loop not vectorized: vectorization and interleaving are "
129*f2ec16ccSHideki Saito                 "explicitly disabled, or the loop has already been "
130*f2ec16ccSHideki Saito                 "vectorized";
131*f2ec16ccSHideki Saito     });
132*f2ec16ccSHideki Saito     return false;
133*f2ec16ccSHideki Saito   }
134*f2ec16ccSHideki Saito 
135*f2ec16ccSHideki Saito   return true;
136*f2ec16ccSHideki Saito }
137*f2ec16ccSHideki Saito 
138*f2ec16ccSHideki Saito void LoopVectorizeHints::emitRemarkWithHints() const {
139*f2ec16ccSHideki Saito   using namespace ore;
140*f2ec16ccSHideki Saito 
141*f2ec16ccSHideki Saito   ORE.emit([&]() {
142*f2ec16ccSHideki Saito     if (Force.Value == LoopVectorizeHints::FK_Disabled)
143*f2ec16ccSHideki Saito       return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
144*f2ec16ccSHideki Saito                                       TheLoop->getStartLoc(),
145*f2ec16ccSHideki Saito                                       TheLoop->getHeader())
146*f2ec16ccSHideki Saito              << "loop not vectorized: vectorization is explicitly disabled";
147*f2ec16ccSHideki Saito     else {
148*f2ec16ccSHideki Saito       OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
149*f2ec16ccSHideki Saito                                  TheLoop->getStartLoc(), TheLoop->getHeader());
150*f2ec16ccSHideki Saito       R << "loop not vectorized";
151*f2ec16ccSHideki Saito       if (Force.Value == LoopVectorizeHints::FK_Enabled) {
152*f2ec16ccSHideki Saito         R << " (Force=" << NV("Force", true);
153*f2ec16ccSHideki Saito         if (Width.Value != 0)
154*f2ec16ccSHideki Saito           R << ", Vector Width=" << NV("VectorWidth", Width.Value);
155*f2ec16ccSHideki Saito         if (Interleave.Value != 0)
156*f2ec16ccSHideki Saito           R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
157*f2ec16ccSHideki Saito         R << ")";
158*f2ec16ccSHideki Saito       }
159*f2ec16ccSHideki Saito       return R;
160*f2ec16ccSHideki Saito     }
161*f2ec16ccSHideki Saito   });
162*f2ec16ccSHideki Saito }
163*f2ec16ccSHideki Saito 
164*f2ec16ccSHideki Saito const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
165*f2ec16ccSHideki Saito   if (getWidth() == 1)
166*f2ec16ccSHideki Saito     return LV_NAME;
167*f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Disabled)
168*f2ec16ccSHideki Saito     return LV_NAME;
169*f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
170*f2ec16ccSHideki Saito     return LV_NAME;
171*f2ec16ccSHideki Saito   return OptimizationRemarkAnalysis::AlwaysPrint;
172*f2ec16ccSHideki Saito }
173*f2ec16ccSHideki Saito 
174*f2ec16ccSHideki Saito void LoopVectorizeHints::getHintsFromMetadata() {
175*f2ec16ccSHideki Saito   MDNode *LoopID = TheLoop->getLoopID();
176*f2ec16ccSHideki Saito   if (!LoopID)
177*f2ec16ccSHideki Saito     return;
178*f2ec16ccSHideki Saito 
179*f2ec16ccSHideki Saito   // First operand should refer to the loop id itself.
180*f2ec16ccSHideki Saito   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
181*f2ec16ccSHideki Saito   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
182*f2ec16ccSHideki Saito 
183*f2ec16ccSHideki Saito   for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
184*f2ec16ccSHideki Saito     const MDString *S = nullptr;
185*f2ec16ccSHideki Saito     SmallVector<Metadata *, 4> Args;
186*f2ec16ccSHideki Saito 
187*f2ec16ccSHideki Saito     // The expected hint is either a MDString or a MDNode with the first
188*f2ec16ccSHideki Saito     // operand a MDString.
189*f2ec16ccSHideki Saito     if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
190*f2ec16ccSHideki Saito       if (!MD || MD->getNumOperands() == 0)
191*f2ec16ccSHideki Saito         continue;
192*f2ec16ccSHideki Saito       S = dyn_cast<MDString>(MD->getOperand(0));
193*f2ec16ccSHideki Saito       for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
194*f2ec16ccSHideki Saito         Args.push_back(MD->getOperand(i));
195*f2ec16ccSHideki Saito     } else {
196*f2ec16ccSHideki Saito       S = dyn_cast<MDString>(LoopID->getOperand(i));
197*f2ec16ccSHideki Saito       assert(Args.size() == 0 && "too many arguments for MDString");
198*f2ec16ccSHideki Saito     }
199*f2ec16ccSHideki Saito 
200*f2ec16ccSHideki Saito     if (!S)
201*f2ec16ccSHideki Saito       continue;
202*f2ec16ccSHideki Saito 
203*f2ec16ccSHideki Saito     // Check if the hint starts with the loop metadata prefix.
204*f2ec16ccSHideki Saito     StringRef Name = S->getString();
205*f2ec16ccSHideki Saito     if (Args.size() == 1)
206*f2ec16ccSHideki Saito       setHint(Name, Args[0]);
207*f2ec16ccSHideki Saito   }
208*f2ec16ccSHideki Saito }
209*f2ec16ccSHideki Saito 
210*f2ec16ccSHideki Saito void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
211*f2ec16ccSHideki Saito   if (!Name.startswith(Prefix()))
212*f2ec16ccSHideki Saito     return;
213*f2ec16ccSHideki Saito   Name = Name.substr(Prefix().size(), StringRef::npos);
214*f2ec16ccSHideki Saito 
215*f2ec16ccSHideki Saito   const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
216*f2ec16ccSHideki Saito   if (!C)
217*f2ec16ccSHideki Saito     return;
218*f2ec16ccSHideki Saito   unsigned Val = C->getZExtValue();
219*f2ec16ccSHideki Saito 
220*f2ec16ccSHideki Saito   Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized};
221*f2ec16ccSHideki Saito   for (auto H : Hints) {
222*f2ec16ccSHideki Saito     if (Name == H->Name) {
223*f2ec16ccSHideki Saito       if (H->validate(Val))
224*f2ec16ccSHideki Saito         H->Value = Val;
225*f2ec16ccSHideki Saito       else
226*f2ec16ccSHideki Saito         DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
227*f2ec16ccSHideki Saito       break;
228*f2ec16ccSHideki Saito     }
229*f2ec16ccSHideki Saito   }
230*f2ec16ccSHideki Saito }
231*f2ec16ccSHideki Saito 
232*f2ec16ccSHideki Saito MDNode *LoopVectorizeHints::createHintMetadata(StringRef Name,
233*f2ec16ccSHideki Saito                                                unsigned V) const {
234*f2ec16ccSHideki Saito   LLVMContext &Context = TheLoop->getHeader()->getContext();
235*f2ec16ccSHideki Saito   Metadata *MDs[] = {
236*f2ec16ccSHideki Saito       MDString::get(Context, Name),
237*f2ec16ccSHideki Saito       ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))};
238*f2ec16ccSHideki Saito   return MDNode::get(Context, MDs);
239*f2ec16ccSHideki Saito }
240*f2ec16ccSHideki Saito 
241*f2ec16ccSHideki Saito bool LoopVectorizeHints::matchesHintMetadataName(MDNode *Node,
242*f2ec16ccSHideki Saito                                                  ArrayRef<Hint> HintTypes) {
243*f2ec16ccSHideki Saito   MDString *Name = dyn_cast<MDString>(Node->getOperand(0));
244*f2ec16ccSHideki Saito   if (!Name)
245*f2ec16ccSHideki Saito     return false;
246*f2ec16ccSHideki Saito 
247*f2ec16ccSHideki Saito   for (auto H : HintTypes)
248*f2ec16ccSHideki Saito     if (Name->getString().endswith(H.Name))
249*f2ec16ccSHideki Saito       return true;
250*f2ec16ccSHideki Saito   return false;
251*f2ec16ccSHideki Saito }
252*f2ec16ccSHideki Saito 
253*f2ec16ccSHideki Saito void LoopVectorizeHints::writeHintsToMetadata(ArrayRef<Hint> HintTypes) {
254*f2ec16ccSHideki Saito   if (HintTypes.empty())
255*f2ec16ccSHideki Saito     return;
256*f2ec16ccSHideki Saito 
257*f2ec16ccSHideki Saito   // Reserve the first element to LoopID (see below).
258*f2ec16ccSHideki Saito   SmallVector<Metadata *, 4> MDs(1);
259*f2ec16ccSHideki Saito   // If the loop already has metadata, then ignore the existing operands.
260*f2ec16ccSHideki Saito   MDNode *LoopID = TheLoop->getLoopID();
261*f2ec16ccSHideki Saito   if (LoopID) {
262*f2ec16ccSHideki Saito     for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
263*f2ec16ccSHideki Saito       MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
264*f2ec16ccSHideki Saito       // If node in update list, ignore old value.
265*f2ec16ccSHideki Saito       if (!matchesHintMetadataName(Node, HintTypes))
266*f2ec16ccSHideki Saito         MDs.push_back(Node);
267*f2ec16ccSHideki Saito     }
268*f2ec16ccSHideki Saito   }
269*f2ec16ccSHideki Saito 
270*f2ec16ccSHideki Saito   // Now, add the missing hints.
271*f2ec16ccSHideki Saito   for (auto H : HintTypes)
272*f2ec16ccSHideki Saito     MDs.push_back(createHintMetadata(Twine(Prefix(), H.Name).str(), H.Value));
273*f2ec16ccSHideki Saito 
274*f2ec16ccSHideki Saito   // Replace current metadata node with new one.
275*f2ec16ccSHideki Saito   LLVMContext &Context = TheLoop->getHeader()->getContext();
276*f2ec16ccSHideki Saito   MDNode *NewLoopID = MDNode::get(Context, MDs);
277*f2ec16ccSHideki Saito   // Set operand 0 to refer to the loop id itself.
278*f2ec16ccSHideki Saito   NewLoopID->replaceOperandWith(0, NewLoopID);
279*f2ec16ccSHideki Saito 
280*f2ec16ccSHideki Saito   TheLoop->setLoopID(NewLoopID);
281*f2ec16ccSHideki Saito }
282*f2ec16ccSHideki Saito 
283*f2ec16ccSHideki Saito bool LoopVectorizationRequirements::doesNotMeet(
284*f2ec16ccSHideki Saito     Function *F, Loop *L, const LoopVectorizeHints &Hints) {
285*f2ec16ccSHideki Saito   const char *PassName = Hints.vectorizeAnalysisPassName();
286*f2ec16ccSHideki Saito   bool Failed = false;
287*f2ec16ccSHideki Saito   if (UnsafeAlgebraInst && !Hints.allowReordering()) {
288*f2ec16ccSHideki Saito     ORE.emit([&]() {
289*f2ec16ccSHideki Saito       return OptimizationRemarkAnalysisFPCommute(
290*f2ec16ccSHideki Saito                  PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
291*f2ec16ccSHideki Saito                  UnsafeAlgebraInst->getParent())
292*f2ec16ccSHideki Saito              << "loop not vectorized: cannot prove it is safe to reorder "
293*f2ec16ccSHideki Saito                 "floating-point operations";
294*f2ec16ccSHideki Saito     });
295*f2ec16ccSHideki Saito     Failed = true;
296*f2ec16ccSHideki Saito   }
297*f2ec16ccSHideki Saito 
298*f2ec16ccSHideki Saito   // Test if runtime memcheck thresholds are exceeded.
299*f2ec16ccSHideki Saito   bool PragmaThresholdReached =
300*f2ec16ccSHideki Saito       NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
301*f2ec16ccSHideki Saito   bool ThresholdReached =
302*f2ec16ccSHideki Saito       NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
303*f2ec16ccSHideki Saito   if ((ThresholdReached && !Hints.allowReordering()) ||
304*f2ec16ccSHideki Saito       PragmaThresholdReached) {
305*f2ec16ccSHideki Saito     ORE.emit([&]() {
306*f2ec16ccSHideki Saito       return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
307*f2ec16ccSHideki Saito                                                 L->getStartLoc(),
308*f2ec16ccSHideki Saito                                                 L->getHeader())
309*f2ec16ccSHideki Saito              << "loop not vectorized: cannot prove it is safe to reorder "
310*f2ec16ccSHideki Saito                 "memory operations";
311*f2ec16ccSHideki Saito     });
312*f2ec16ccSHideki Saito     DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
313*f2ec16ccSHideki Saito     Failed = true;
314*f2ec16ccSHideki Saito   }
315*f2ec16ccSHideki Saito 
316*f2ec16ccSHideki Saito   return Failed;
317*f2ec16ccSHideki Saito }
318*f2ec16ccSHideki Saito 
319*f2ec16ccSHideki Saito // Return true if the inner loop \p Lp is uniform with regard to the outer loop
320*f2ec16ccSHideki Saito // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
321*f2ec16ccSHideki Saito // executing the inner loop will execute the same iterations). This check is
322*f2ec16ccSHideki Saito // very constrained for now but it will be relaxed in the future. \p Lp is
323*f2ec16ccSHideki Saito // considered uniform if it meets all the following conditions:
324*f2ec16ccSHideki Saito //   1) it has a canonical IV (starting from 0 and with stride 1),
325*f2ec16ccSHideki Saito //   2) its latch terminator is a conditional branch and,
326*f2ec16ccSHideki Saito //   3) its latch condition is a compare instruction whose operands are the
327*f2ec16ccSHideki Saito //      canonical IV and an OuterLp invariant.
328*f2ec16ccSHideki Saito // This check doesn't take into account the uniformity of other conditions not
329*f2ec16ccSHideki Saito // related to the loop latch because they don't affect the loop uniformity.
330*f2ec16ccSHideki Saito //
331*f2ec16ccSHideki Saito // NOTE: We decided to keep all these checks and its associated documentation
332*f2ec16ccSHideki Saito // together so that we can easily have a picture of the current supported loop
333*f2ec16ccSHideki Saito // nests. However, some of the current checks don't depend on \p OuterLp and
334*f2ec16ccSHideki Saito // would be redundantly executed for each \p Lp if we invoked this function for
335*f2ec16ccSHideki Saito // different candidate outer loops. This is not the case for now because we
336*f2ec16ccSHideki Saito // don't currently have the infrastructure to evaluate multiple candidate outer
337*f2ec16ccSHideki Saito // loops and \p OuterLp will be a fixed parameter while we only support explicit
338*f2ec16ccSHideki Saito // outer loop vectorization. It's also very likely that these checks go away
339*f2ec16ccSHideki Saito // before introducing the aforementioned infrastructure. However, if this is not
340*f2ec16ccSHideki Saito // the case, we should move the \p OuterLp independent checks to a separate
341*f2ec16ccSHideki Saito // function that is only executed once for each \p Lp.
342*f2ec16ccSHideki Saito static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
343*f2ec16ccSHideki Saito   assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
344*f2ec16ccSHideki Saito 
345*f2ec16ccSHideki Saito   // If Lp is the outer loop, it's uniform by definition.
346*f2ec16ccSHideki Saito   if (Lp == OuterLp)
347*f2ec16ccSHideki Saito     return true;
348*f2ec16ccSHideki Saito   assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
349*f2ec16ccSHideki Saito 
350*f2ec16ccSHideki Saito   // 1.
351*f2ec16ccSHideki Saito   PHINode *IV = Lp->getCanonicalInductionVariable();
352*f2ec16ccSHideki Saito   if (!IV) {
353*f2ec16ccSHideki Saito     DEBUG(dbgs() << "LV: Canonical IV not found.\n");
354*f2ec16ccSHideki Saito     return false;
355*f2ec16ccSHideki Saito   }
356*f2ec16ccSHideki Saito 
357*f2ec16ccSHideki Saito   // 2.
358*f2ec16ccSHideki Saito   BasicBlock *Latch = Lp->getLoopLatch();
359*f2ec16ccSHideki Saito   auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
360*f2ec16ccSHideki Saito   if (!LatchBr || LatchBr->isUnconditional()) {
361*f2ec16ccSHideki Saito     DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
362*f2ec16ccSHideki Saito     return false;
363*f2ec16ccSHideki Saito   }
364*f2ec16ccSHideki Saito 
365*f2ec16ccSHideki Saito   // 3.
366*f2ec16ccSHideki Saito   auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
367*f2ec16ccSHideki Saito   if (!LatchCmp) {
368*f2ec16ccSHideki Saito     DEBUG(dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
369*f2ec16ccSHideki Saito     return false;
370*f2ec16ccSHideki Saito   }
371*f2ec16ccSHideki Saito 
372*f2ec16ccSHideki Saito   Value *CondOp0 = LatchCmp->getOperand(0);
373*f2ec16ccSHideki Saito   Value *CondOp1 = LatchCmp->getOperand(1);
374*f2ec16ccSHideki Saito   Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
375*f2ec16ccSHideki Saito   if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
376*f2ec16ccSHideki Saito       !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
377*f2ec16ccSHideki Saito     DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
378*f2ec16ccSHideki Saito     return false;
379*f2ec16ccSHideki Saito   }
380*f2ec16ccSHideki Saito 
381*f2ec16ccSHideki Saito   return true;
382*f2ec16ccSHideki Saito }
383*f2ec16ccSHideki Saito 
384*f2ec16ccSHideki Saito // Return true if \p Lp and all its nested loops are uniform with regard to \p
385*f2ec16ccSHideki Saito // OuterLp.
386*f2ec16ccSHideki Saito static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
387*f2ec16ccSHideki Saito   if (!isUniformLoop(Lp, OuterLp))
388*f2ec16ccSHideki Saito     return false;
389*f2ec16ccSHideki Saito 
390*f2ec16ccSHideki Saito   // Check if nested loops are uniform.
391*f2ec16ccSHideki Saito   for (Loop *SubLp : *Lp)
392*f2ec16ccSHideki Saito     if (!isUniformLoopNest(SubLp, OuterLp))
393*f2ec16ccSHideki Saito       return false;
394*f2ec16ccSHideki Saito 
395*f2ec16ccSHideki Saito   return true;
396*f2ec16ccSHideki Saito }
397*f2ec16ccSHideki Saito 
398*f2ec16ccSHideki Saito /// \brief Check whether it is safe to if-convert this phi node.
399*f2ec16ccSHideki Saito ///
400*f2ec16ccSHideki Saito /// Phi nodes with constant expressions that can trap are not safe to if
401*f2ec16ccSHideki Saito /// convert.
402*f2ec16ccSHideki Saito static bool canIfConvertPHINodes(BasicBlock *BB) {
403*f2ec16ccSHideki Saito   for (PHINode &Phi : BB->phis()) {
404*f2ec16ccSHideki Saito     for (Value *V : Phi.incoming_values())
405*f2ec16ccSHideki Saito       if (auto *C = dyn_cast<Constant>(V))
406*f2ec16ccSHideki Saito         if (C->canTrap())
407*f2ec16ccSHideki Saito           return false;
408*f2ec16ccSHideki Saito   }
409*f2ec16ccSHideki Saito   return true;
410*f2ec16ccSHideki Saito }
411*f2ec16ccSHideki Saito 
412*f2ec16ccSHideki Saito static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
413*f2ec16ccSHideki Saito   if (Ty->isPointerTy())
414*f2ec16ccSHideki Saito     return DL.getIntPtrType(Ty);
415*f2ec16ccSHideki Saito 
416*f2ec16ccSHideki Saito   // It is possible that char's or short's overflow when we ask for the loop's
417*f2ec16ccSHideki Saito   // trip count, work around this by changing the type size.
418*f2ec16ccSHideki Saito   if (Ty->getScalarSizeInBits() < 32)
419*f2ec16ccSHideki Saito     return Type::getInt32Ty(Ty->getContext());
420*f2ec16ccSHideki Saito 
421*f2ec16ccSHideki Saito   return Ty;
422*f2ec16ccSHideki Saito }
423*f2ec16ccSHideki Saito 
424*f2ec16ccSHideki Saito static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
425*f2ec16ccSHideki Saito   Ty0 = convertPointerToIntegerType(DL, Ty0);
426*f2ec16ccSHideki Saito   Ty1 = convertPointerToIntegerType(DL, Ty1);
427*f2ec16ccSHideki Saito   if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
428*f2ec16ccSHideki Saito     return Ty0;
429*f2ec16ccSHideki Saito   return Ty1;
430*f2ec16ccSHideki Saito }
431*f2ec16ccSHideki Saito 
432*f2ec16ccSHideki Saito /// \brief Check that the instruction has outside loop users and is not an
433*f2ec16ccSHideki Saito /// identified reduction variable.
434*f2ec16ccSHideki Saito static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
435*f2ec16ccSHideki Saito                                SmallPtrSetImpl<Value *> &AllowedExit) {
436*f2ec16ccSHideki Saito   // Reduction and Induction instructions are allowed to have exit users. All
437*f2ec16ccSHideki Saito   // other instructions must not have external users.
438*f2ec16ccSHideki Saito   if (!AllowedExit.count(Inst))
439*f2ec16ccSHideki Saito     // Check that all of the users of the loop are inside the BB.
440*f2ec16ccSHideki Saito     for (User *U : Inst->users()) {
441*f2ec16ccSHideki Saito       Instruction *UI = cast<Instruction>(U);
442*f2ec16ccSHideki Saito       // This user may be a reduction exit value.
443*f2ec16ccSHideki Saito       if (!TheLoop->contains(UI)) {
444*f2ec16ccSHideki Saito         DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
445*f2ec16ccSHideki Saito         return true;
446*f2ec16ccSHideki Saito       }
447*f2ec16ccSHideki Saito     }
448*f2ec16ccSHideki Saito   return false;
449*f2ec16ccSHideki Saito }
450*f2ec16ccSHideki Saito 
451*f2ec16ccSHideki Saito int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
452*f2ec16ccSHideki Saito   const ValueToValueMap &Strides =
453*f2ec16ccSHideki Saito       getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
454*f2ec16ccSHideki Saito 
455*f2ec16ccSHideki Saito   int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
456*f2ec16ccSHideki Saito   if (Stride == 1 || Stride == -1)
457*f2ec16ccSHideki Saito     return Stride;
458*f2ec16ccSHideki Saito   return 0;
459*f2ec16ccSHideki Saito }
460*f2ec16ccSHideki Saito 
461*f2ec16ccSHideki Saito bool LoopVectorizationLegality::isUniform(Value *V) {
462*f2ec16ccSHideki Saito   return LAI->isUniform(V);
463*f2ec16ccSHideki Saito }
464*f2ec16ccSHideki Saito 
465*f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeOuterLoop() {
466*f2ec16ccSHideki Saito   assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
467*f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
468*f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
469*f2ec16ccSHideki Saito   bool Result = true;
470*f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
471*f2ec16ccSHideki Saito 
472*f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
473*f2ec16ccSHideki Saito     // Check whether the BB terminator is a BranchInst. Any other terminator is
474*f2ec16ccSHideki Saito     // not supported yet.
475*f2ec16ccSHideki Saito     auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
476*f2ec16ccSHideki Saito     if (!Br) {
477*f2ec16ccSHideki Saito       DEBUG(dbgs() << "LV: Unsupported basic block terminator.\n");
478*f2ec16ccSHideki Saito       ORE->emit(createMissedAnalysis("CFGNotUnderstood")
479*f2ec16ccSHideki Saito                 << "loop control flow is not understood by vectorizer");
480*f2ec16ccSHideki Saito       if (DoExtraAnalysis)
481*f2ec16ccSHideki Saito         Result = false;
482*f2ec16ccSHideki Saito       else
483*f2ec16ccSHideki Saito         return false;
484*f2ec16ccSHideki Saito     }
485*f2ec16ccSHideki Saito 
486*f2ec16ccSHideki Saito     // Check whether the BranchInst is a supported one. Only unconditional
487*f2ec16ccSHideki Saito     // branches, conditional branches with an outer loop invariant condition or
488*f2ec16ccSHideki Saito     // backedges are supported.
489*f2ec16ccSHideki Saito     if (Br && Br->isConditional() &&
490*f2ec16ccSHideki Saito         !TheLoop->isLoopInvariant(Br->getCondition()) &&
491*f2ec16ccSHideki Saito         !LI->isLoopHeader(Br->getSuccessor(0)) &&
492*f2ec16ccSHideki Saito         !LI->isLoopHeader(Br->getSuccessor(1))) {
493*f2ec16ccSHideki Saito       DEBUG(dbgs() << "LV: Unsupported conditional branch.\n");
494*f2ec16ccSHideki Saito       ORE->emit(createMissedAnalysis("CFGNotUnderstood")
495*f2ec16ccSHideki Saito                 << "loop control flow is not understood by vectorizer");
496*f2ec16ccSHideki Saito       if (DoExtraAnalysis)
497*f2ec16ccSHideki Saito         Result = false;
498*f2ec16ccSHideki Saito       else
499*f2ec16ccSHideki Saito         return false;
500*f2ec16ccSHideki Saito     }
501*f2ec16ccSHideki Saito   }
502*f2ec16ccSHideki Saito 
503*f2ec16ccSHideki Saito   // Check whether inner loops are uniform. At this point, we only support
504*f2ec16ccSHideki Saito   // simple outer loops scenarios with uniform nested loops.
505*f2ec16ccSHideki Saito   if (!isUniformLoopNest(TheLoop /*loop nest*/,
506*f2ec16ccSHideki Saito                          TheLoop /*context outer loop*/)) {
507*f2ec16ccSHideki Saito     DEBUG(dbgs()
508*f2ec16ccSHideki Saito           << "LV: Not vectorizing: Outer loop contains divergent loops.\n");
509*f2ec16ccSHideki Saito     ORE->emit(createMissedAnalysis("CFGNotUnderstood")
510*f2ec16ccSHideki Saito               << "loop control flow is not understood by vectorizer");
511*f2ec16ccSHideki Saito     if (DoExtraAnalysis)
512*f2ec16ccSHideki Saito       Result = false;
513*f2ec16ccSHideki Saito     else
514*f2ec16ccSHideki Saito       return false;
515*f2ec16ccSHideki Saito   }
516*f2ec16ccSHideki Saito 
517*f2ec16ccSHideki Saito   return Result;
518*f2ec16ccSHideki Saito }
519*f2ec16ccSHideki Saito 
520*f2ec16ccSHideki Saito void LoopVectorizationLegality::addInductionPhi(
521*f2ec16ccSHideki Saito     PHINode *Phi, const InductionDescriptor &ID,
522*f2ec16ccSHideki Saito     SmallPtrSetImpl<Value *> &AllowedExit) {
523*f2ec16ccSHideki Saito   Inductions[Phi] = ID;
524*f2ec16ccSHideki Saito 
525*f2ec16ccSHideki Saito   // In case this induction also comes with casts that we know we can ignore
526*f2ec16ccSHideki Saito   // in the vectorized loop body, record them here. All casts could be recorded
527*f2ec16ccSHideki Saito   // here for ignoring, but suffices to record only the first (as it is the
528*f2ec16ccSHideki Saito   // only one that may bw used outside the cast sequence).
529*f2ec16ccSHideki Saito   const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
530*f2ec16ccSHideki Saito   if (!Casts.empty())
531*f2ec16ccSHideki Saito     InductionCastsToIgnore.insert(*Casts.begin());
532*f2ec16ccSHideki Saito 
533*f2ec16ccSHideki Saito   Type *PhiTy = Phi->getType();
534*f2ec16ccSHideki Saito   const DataLayout &DL = Phi->getModule()->getDataLayout();
535*f2ec16ccSHideki Saito 
536*f2ec16ccSHideki Saito   // Get the widest type.
537*f2ec16ccSHideki Saito   if (!PhiTy->isFloatingPointTy()) {
538*f2ec16ccSHideki Saito     if (!WidestIndTy)
539*f2ec16ccSHideki Saito       WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
540*f2ec16ccSHideki Saito     else
541*f2ec16ccSHideki Saito       WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
542*f2ec16ccSHideki Saito   }
543*f2ec16ccSHideki Saito 
544*f2ec16ccSHideki Saito   // Int inductions are special because we only allow one IV.
545*f2ec16ccSHideki Saito   if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
546*f2ec16ccSHideki Saito       ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
547*f2ec16ccSHideki Saito       isa<Constant>(ID.getStartValue()) &&
548*f2ec16ccSHideki Saito       cast<Constant>(ID.getStartValue())->isNullValue()) {
549*f2ec16ccSHideki Saito 
550*f2ec16ccSHideki Saito     // Use the phi node with the widest type as induction. Use the last
551*f2ec16ccSHideki Saito     // one if there are multiple (no good reason for doing this other
552*f2ec16ccSHideki Saito     // than it is expedient). We've checked that it begins at zero and
553*f2ec16ccSHideki Saito     // steps by one, so this is a canonical induction variable.
554*f2ec16ccSHideki Saito     if (!PrimaryInduction || PhiTy == WidestIndTy)
555*f2ec16ccSHideki Saito       PrimaryInduction = Phi;
556*f2ec16ccSHideki Saito   }
557*f2ec16ccSHideki Saito 
558*f2ec16ccSHideki Saito   // Both the PHI node itself, and the "post-increment" value feeding
559*f2ec16ccSHideki Saito   // back into the PHI node may have external users.
560*f2ec16ccSHideki Saito   // We can allow those uses, except if the SCEVs we have for them rely
561*f2ec16ccSHideki Saito   // on predicates that only hold within the loop, since allowing the exit
562*f2ec16ccSHideki Saito   // currently means re-using this SCEV outside the loop.
563*f2ec16ccSHideki Saito   if (PSE.getUnionPredicate().isAlwaysTrue()) {
564*f2ec16ccSHideki Saito     AllowedExit.insert(Phi);
565*f2ec16ccSHideki Saito     AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
566*f2ec16ccSHideki Saito   }
567*f2ec16ccSHideki Saito 
568*f2ec16ccSHideki Saito   DEBUG(dbgs() << "LV: Found an induction variable.\n");
569*f2ec16ccSHideki Saito }
570*f2ec16ccSHideki Saito 
571*f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeInstrs() {
572*f2ec16ccSHideki Saito   BasicBlock *Header = TheLoop->getHeader();
573*f2ec16ccSHideki Saito 
574*f2ec16ccSHideki Saito   // Look for the attribute signaling the absence of NaNs.
575*f2ec16ccSHideki Saito   Function &F = *Header->getParent();
576*f2ec16ccSHideki Saito   HasFunNoNaNAttr =
577*f2ec16ccSHideki Saito       F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
578*f2ec16ccSHideki Saito 
579*f2ec16ccSHideki Saito   // For each block in the loop.
580*f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
581*f2ec16ccSHideki Saito     // Scan the instructions in the block and look for hazards.
582*f2ec16ccSHideki Saito     for (Instruction &I : *BB) {
583*f2ec16ccSHideki Saito       if (auto *Phi = dyn_cast<PHINode>(&I)) {
584*f2ec16ccSHideki Saito         Type *PhiTy = Phi->getType();
585*f2ec16ccSHideki Saito         // Check that this PHI type is allowed.
586*f2ec16ccSHideki Saito         if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
587*f2ec16ccSHideki Saito             !PhiTy->isPointerTy()) {
588*f2ec16ccSHideki Saito           ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi)
589*f2ec16ccSHideki Saito                     << "loop control flow is not understood by vectorizer");
590*f2ec16ccSHideki Saito           DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
591*f2ec16ccSHideki Saito           return false;
592*f2ec16ccSHideki Saito         }
593*f2ec16ccSHideki Saito 
594*f2ec16ccSHideki Saito         // If this PHINode is not in the header block, then we know that we
595*f2ec16ccSHideki Saito         // can convert it to select during if-conversion. No need to check if
596*f2ec16ccSHideki Saito         // the PHIs in this block are induction or reduction variables.
597*f2ec16ccSHideki Saito         if (BB != Header) {
598*f2ec16ccSHideki Saito           // Check that this instruction has no outside users or is an
599*f2ec16ccSHideki Saito           // identified reduction value with an outside user.
600*f2ec16ccSHideki Saito           if (!hasOutsideLoopUser(TheLoop, Phi, AllowedExit))
601*f2ec16ccSHideki Saito             continue;
602*f2ec16ccSHideki Saito           ORE->emit(createMissedAnalysis("NeitherInductionNorReduction", Phi)
603*f2ec16ccSHideki Saito                     << "value could not be identified as "
604*f2ec16ccSHideki Saito                        "an induction or reduction variable");
605*f2ec16ccSHideki Saito           return false;
606*f2ec16ccSHideki Saito         }
607*f2ec16ccSHideki Saito 
608*f2ec16ccSHideki Saito         // We only allow if-converted PHIs with exactly two incoming values.
609*f2ec16ccSHideki Saito         if (Phi->getNumIncomingValues() != 2) {
610*f2ec16ccSHideki Saito           ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi)
611*f2ec16ccSHideki Saito                     << "control flow not understood by vectorizer");
612*f2ec16ccSHideki Saito           DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
613*f2ec16ccSHideki Saito           return false;
614*f2ec16ccSHideki Saito         }
615*f2ec16ccSHideki Saito 
616*f2ec16ccSHideki Saito         RecurrenceDescriptor RedDes;
617*f2ec16ccSHideki Saito         if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
618*f2ec16ccSHideki Saito                                                  DT)) {
619*f2ec16ccSHideki Saito           if (RedDes.hasUnsafeAlgebra())
620*f2ec16ccSHideki Saito             Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
621*f2ec16ccSHideki Saito           AllowedExit.insert(RedDes.getLoopExitInstr());
622*f2ec16ccSHideki Saito           Reductions[Phi] = RedDes;
623*f2ec16ccSHideki Saito           continue;
624*f2ec16ccSHideki Saito         }
625*f2ec16ccSHideki Saito 
626*f2ec16ccSHideki Saito         InductionDescriptor ID;
627*f2ec16ccSHideki Saito         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
628*f2ec16ccSHideki Saito           addInductionPhi(Phi, ID, AllowedExit);
629*f2ec16ccSHideki Saito           if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
630*f2ec16ccSHideki Saito             Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
631*f2ec16ccSHideki Saito           continue;
632*f2ec16ccSHideki Saito         }
633*f2ec16ccSHideki Saito 
634*f2ec16ccSHideki Saito         if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
635*f2ec16ccSHideki Saito                                                          SinkAfter, DT)) {
636*f2ec16ccSHideki Saito           FirstOrderRecurrences.insert(Phi);
637*f2ec16ccSHideki Saito           continue;
638*f2ec16ccSHideki Saito         }
639*f2ec16ccSHideki Saito 
640*f2ec16ccSHideki Saito         // As a last resort, coerce the PHI to a AddRec expression
641*f2ec16ccSHideki Saito         // and re-try classifying it a an induction PHI.
642*f2ec16ccSHideki Saito         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
643*f2ec16ccSHideki Saito           addInductionPhi(Phi, ID, AllowedExit);
644*f2ec16ccSHideki Saito           continue;
645*f2ec16ccSHideki Saito         }
646*f2ec16ccSHideki Saito 
647*f2ec16ccSHideki Saito         ORE->emit(createMissedAnalysis("NonReductionValueUsedOutsideLoop", Phi)
648*f2ec16ccSHideki Saito                   << "value that could not be identified as "
649*f2ec16ccSHideki Saito                      "reduction is used outside the loop");
650*f2ec16ccSHideki Saito         DEBUG(dbgs() << "LV: Found an unidentified PHI." << *Phi << "\n");
651*f2ec16ccSHideki Saito         return false;
652*f2ec16ccSHideki Saito       } // end of PHI handling
653*f2ec16ccSHideki Saito 
654*f2ec16ccSHideki Saito       // We handle calls that:
655*f2ec16ccSHideki Saito       //   * Are debug info intrinsics.
656*f2ec16ccSHideki Saito       //   * Have a mapping to an IR intrinsic.
657*f2ec16ccSHideki Saito       //   * Have a vector version available.
658*f2ec16ccSHideki Saito       auto *CI = dyn_cast<CallInst>(&I);
659*f2ec16ccSHideki Saito       if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
660*f2ec16ccSHideki Saito           !isa<DbgInfoIntrinsic>(CI) &&
661*f2ec16ccSHideki Saito           !(CI->getCalledFunction() && TLI &&
662*f2ec16ccSHideki Saito             TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
663*f2ec16ccSHideki Saito         ORE->emit(createMissedAnalysis("CantVectorizeCall", CI)
664*f2ec16ccSHideki Saito                   << "call instruction cannot be vectorized");
665*f2ec16ccSHideki Saito         DEBUG(dbgs() << "LV: Found a non-intrinsic, non-libfunc callsite.\n");
666*f2ec16ccSHideki Saito         return false;
667*f2ec16ccSHideki Saito       }
668*f2ec16ccSHideki Saito 
669*f2ec16ccSHideki Saito       // Intrinsics such as powi,cttz and ctlz are legal to vectorize if the
670*f2ec16ccSHideki Saito       // second argument is the same (i.e. loop invariant)
671*f2ec16ccSHideki Saito       if (CI && hasVectorInstrinsicScalarOpd(
672*f2ec16ccSHideki Saito                     getVectorIntrinsicIDForCall(CI, TLI), 1)) {
673*f2ec16ccSHideki Saito         auto *SE = PSE.getSE();
674*f2ec16ccSHideki Saito         if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(1)), TheLoop)) {
675*f2ec16ccSHideki Saito           ORE->emit(createMissedAnalysis("CantVectorizeIntrinsic", CI)
676*f2ec16ccSHideki Saito                     << "intrinsic instruction cannot be vectorized");
677*f2ec16ccSHideki Saito           DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n");
678*f2ec16ccSHideki Saito           return false;
679*f2ec16ccSHideki Saito         }
680*f2ec16ccSHideki Saito       }
681*f2ec16ccSHideki Saito 
682*f2ec16ccSHideki Saito       // Check that the instruction return type is vectorizable.
683*f2ec16ccSHideki Saito       // Also, we can't vectorize extractelement instructions.
684*f2ec16ccSHideki Saito       if ((!VectorType::isValidElementType(I.getType()) &&
685*f2ec16ccSHideki Saito            !I.getType()->isVoidTy()) ||
686*f2ec16ccSHideki Saito           isa<ExtractElementInst>(I)) {
687*f2ec16ccSHideki Saito         ORE->emit(createMissedAnalysis("CantVectorizeInstructionReturnType", &I)
688*f2ec16ccSHideki Saito                   << "instruction return type cannot be vectorized");
689*f2ec16ccSHideki Saito         DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
690*f2ec16ccSHideki Saito         return false;
691*f2ec16ccSHideki Saito       }
692*f2ec16ccSHideki Saito 
693*f2ec16ccSHideki Saito       // Check that the stored type is vectorizable.
694*f2ec16ccSHideki Saito       if (auto *ST = dyn_cast<StoreInst>(&I)) {
695*f2ec16ccSHideki Saito         Type *T = ST->getValueOperand()->getType();
696*f2ec16ccSHideki Saito         if (!VectorType::isValidElementType(T)) {
697*f2ec16ccSHideki Saito           ORE->emit(createMissedAnalysis("CantVectorizeStore", ST)
698*f2ec16ccSHideki Saito                     << "store instruction cannot be vectorized");
699*f2ec16ccSHideki Saito           return false;
700*f2ec16ccSHideki Saito         }
701*f2ec16ccSHideki Saito 
702*f2ec16ccSHideki Saito         // FP instructions can allow unsafe algebra, thus vectorizable by
703*f2ec16ccSHideki Saito         // non-IEEE-754 compliant SIMD units.
704*f2ec16ccSHideki Saito         // This applies to floating-point math operations and calls, not memory
705*f2ec16ccSHideki Saito         // operations, shuffles, or casts, as they don't change precision or
706*f2ec16ccSHideki Saito         // semantics.
707*f2ec16ccSHideki Saito       } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
708*f2ec16ccSHideki Saito                  !I.isFast()) {
709*f2ec16ccSHideki Saito         DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
710*f2ec16ccSHideki Saito         Hints->setPotentiallyUnsafe();
711*f2ec16ccSHideki Saito       }
712*f2ec16ccSHideki Saito 
713*f2ec16ccSHideki Saito       // Reduction instructions are allowed to have exit users.
714*f2ec16ccSHideki Saito       // All other instructions must not have external users.
715*f2ec16ccSHideki Saito       if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
716*f2ec16ccSHideki Saito         ORE->emit(createMissedAnalysis("ValueUsedOutsideLoop", &I)
717*f2ec16ccSHideki Saito                   << "value cannot be used outside the loop");
718*f2ec16ccSHideki Saito         return false;
719*f2ec16ccSHideki Saito       }
720*f2ec16ccSHideki Saito     } // next instr.
721*f2ec16ccSHideki Saito   }
722*f2ec16ccSHideki Saito 
723*f2ec16ccSHideki Saito   if (!PrimaryInduction) {
724*f2ec16ccSHideki Saito     DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
725*f2ec16ccSHideki Saito     if (Inductions.empty()) {
726*f2ec16ccSHideki Saito       ORE->emit(createMissedAnalysis("NoInductionVariable")
727*f2ec16ccSHideki Saito                 << "loop induction variable could not be identified");
728*f2ec16ccSHideki Saito       return false;
729*f2ec16ccSHideki Saito     }
730*f2ec16ccSHideki Saito   }
731*f2ec16ccSHideki Saito 
732*f2ec16ccSHideki Saito   // Now we know the widest induction type, check if our found induction
733*f2ec16ccSHideki Saito   // is the same size. If it's not, unset it here and InnerLoopVectorizer
734*f2ec16ccSHideki Saito   // will create another.
735*f2ec16ccSHideki Saito   if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
736*f2ec16ccSHideki Saito     PrimaryInduction = nullptr;
737*f2ec16ccSHideki Saito 
738*f2ec16ccSHideki Saito   return true;
739*f2ec16ccSHideki Saito }
740*f2ec16ccSHideki Saito 
741*f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeMemory() {
742*f2ec16ccSHideki Saito   LAI = &(*GetLAA)(*TheLoop);
743*f2ec16ccSHideki Saito   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
744*f2ec16ccSHideki Saito   if (LAR) {
745*f2ec16ccSHideki Saito     ORE->emit([&]() {
746*f2ec16ccSHideki Saito       return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
747*f2ec16ccSHideki Saito                                         "loop not vectorized: ", *LAR);
748*f2ec16ccSHideki Saito     });
749*f2ec16ccSHideki Saito   }
750*f2ec16ccSHideki Saito   if (!LAI->canVectorizeMemory())
751*f2ec16ccSHideki Saito     return false;
752*f2ec16ccSHideki Saito 
753*f2ec16ccSHideki Saito   if (LAI->hasStoreToLoopInvariantAddress()) {
754*f2ec16ccSHideki Saito     ORE->emit(createMissedAnalysis("CantVectorizeStoreToLoopInvariantAddress")
755*f2ec16ccSHideki Saito               << "write to a loop invariant address could not be vectorized");
756*f2ec16ccSHideki Saito     DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
757*f2ec16ccSHideki Saito     return false;
758*f2ec16ccSHideki Saito   }
759*f2ec16ccSHideki Saito 
760*f2ec16ccSHideki Saito   Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
761*f2ec16ccSHideki Saito   PSE.addPredicate(LAI->getPSE().getUnionPredicate());
762*f2ec16ccSHideki Saito 
763*f2ec16ccSHideki Saito   return true;
764*f2ec16ccSHideki Saito }
765*f2ec16ccSHideki Saito 
766*f2ec16ccSHideki Saito bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
767*f2ec16ccSHideki Saito   Value *In0 = const_cast<Value *>(V);
768*f2ec16ccSHideki Saito   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
769*f2ec16ccSHideki Saito   if (!PN)
770*f2ec16ccSHideki Saito     return false;
771*f2ec16ccSHideki Saito 
772*f2ec16ccSHideki Saito   return Inductions.count(PN);
773*f2ec16ccSHideki Saito }
774*f2ec16ccSHideki Saito 
775*f2ec16ccSHideki Saito bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
776*f2ec16ccSHideki Saito   auto *Inst = dyn_cast<Instruction>(V);
777*f2ec16ccSHideki Saito   return (Inst && InductionCastsToIgnore.count(Inst));
778*f2ec16ccSHideki Saito }
779*f2ec16ccSHideki Saito 
780*f2ec16ccSHideki Saito bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
781*f2ec16ccSHideki Saito   return isInductionPhi(V) || isCastedInductionVariable(V);
782*f2ec16ccSHideki Saito }
783*f2ec16ccSHideki Saito 
784*f2ec16ccSHideki Saito bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
785*f2ec16ccSHideki Saito   return FirstOrderRecurrences.count(Phi);
786*f2ec16ccSHideki Saito }
787*f2ec16ccSHideki Saito 
788*f2ec16ccSHideki Saito bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
789*f2ec16ccSHideki Saito   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
790*f2ec16ccSHideki Saito }
791*f2ec16ccSHideki Saito 
792*f2ec16ccSHideki Saito bool LoopVectorizationLegality::blockCanBePredicated(
793*f2ec16ccSHideki Saito     BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs) {
794*f2ec16ccSHideki Saito   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
795*f2ec16ccSHideki Saito 
796*f2ec16ccSHideki Saito   for (Instruction &I : *BB) {
797*f2ec16ccSHideki Saito     // Check that we don't have a constant expression that can trap as operand.
798*f2ec16ccSHideki Saito     for (Value *Operand : I.operands()) {
799*f2ec16ccSHideki Saito       if (auto *C = dyn_cast<Constant>(Operand))
800*f2ec16ccSHideki Saito         if (C->canTrap())
801*f2ec16ccSHideki Saito           return false;
802*f2ec16ccSHideki Saito     }
803*f2ec16ccSHideki Saito     // We might be able to hoist the load.
804*f2ec16ccSHideki Saito     if (I.mayReadFromMemory()) {
805*f2ec16ccSHideki Saito       auto *LI = dyn_cast<LoadInst>(&I);
806*f2ec16ccSHideki Saito       if (!LI)
807*f2ec16ccSHideki Saito         return false;
808*f2ec16ccSHideki Saito       if (!SafePtrs.count(LI->getPointerOperand())) {
809*f2ec16ccSHideki Saito         // !llvm.mem.parallel_loop_access implies if-conversion safety.
810*f2ec16ccSHideki Saito         // Otherwise, record that the load needs (real or emulated) masking
811*f2ec16ccSHideki Saito         // and let the cost model decide.
812*f2ec16ccSHideki Saito         if (!IsAnnotatedParallel)
813*f2ec16ccSHideki Saito           MaskedOp.insert(LI);
814*f2ec16ccSHideki Saito         continue;
815*f2ec16ccSHideki Saito       }
816*f2ec16ccSHideki Saito     }
817*f2ec16ccSHideki Saito 
818*f2ec16ccSHideki Saito     if (I.mayWriteToMemory()) {
819*f2ec16ccSHideki Saito       auto *SI = dyn_cast<StoreInst>(&I);
820*f2ec16ccSHideki Saito       if (!SI)
821*f2ec16ccSHideki Saito         return false;
822*f2ec16ccSHideki Saito       // Predicated store requires some form of masking:
823*f2ec16ccSHideki Saito       // 1) masked store HW instruction,
824*f2ec16ccSHideki Saito       // 2) emulation via load-blend-store (only if safe and legal to do so,
825*f2ec16ccSHideki Saito       //    be aware on the race conditions), or
826*f2ec16ccSHideki Saito       // 3) element-by-element predicate check and scalar store.
827*f2ec16ccSHideki Saito       MaskedOp.insert(SI);
828*f2ec16ccSHideki Saito       continue;
829*f2ec16ccSHideki Saito     }
830*f2ec16ccSHideki Saito     if (I.mayThrow())
831*f2ec16ccSHideki Saito       return false;
832*f2ec16ccSHideki Saito   }
833*f2ec16ccSHideki Saito 
834*f2ec16ccSHideki Saito   return true;
835*f2ec16ccSHideki Saito }
836*f2ec16ccSHideki Saito 
837*f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
838*f2ec16ccSHideki Saito   if (!EnableIfConversion) {
839*f2ec16ccSHideki Saito     ORE->emit(createMissedAnalysis("IfConversionDisabled")
840*f2ec16ccSHideki Saito               << "if-conversion is disabled");
841*f2ec16ccSHideki Saito     return false;
842*f2ec16ccSHideki Saito   }
843*f2ec16ccSHideki Saito 
844*f2ec16ccSHideki Saito   assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
845*f2ec16ccSHideki Saito 
846*f2ec16ccSHideki Saito   // A list of pointers that we can safely read and write to.
847*f2ec16ccSHideki Saito   SmallPtrSet<Value *, 8> SafePointes;
848*f2ec16ccSHideki Saito 
849*f2ec16ccSHideki Saito   // Collect safe addresses.
850*f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
851*f2ec16ccSHideki Saito     if (blockNeedsPredication(BB))
852*f2ec16ccSHideki Saito       continue;
853*f2ec16ccSHideki Saito 
854*f2ec16ccSHideki Saito     for (Instruction &I : *BB)
855*f2ec16ccSHideki Saito       if (auto *Ptr = getLoadStorePointerOperand(&I))
856*f2ec16ccSHideki Saito         SafePointes.insert(Ptr);
857*f2ec16ccSHideki Saito   }
858*f2ec16ccSHideki Saito 
859*f2ec16ccSHideki Saito   // Collect the blocks that need predication.
860*f2ec16ccSHideki Saito   BasicBlock *Header = TheLoop->getHeader();
861*f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
862*f2ec16ccSHideki Saito     // We don't support switch statements inside loops.
863*f2ec16ccSHideki Saito     if (!isa<BranchInst>(BB->getTerminator())) {
864*f2ec16ccSHideki Saito       ORE->emit(createMissedAnalysis("LoopContainsSwitch", BB->getTerminator())
865*f2ec16ccSHideki Saito                 << "loop contains a switch statement");
866*f2ec16ccSHideki Saito       return false;
867*f2ec16ccSHideki Saito     }
868*f2ec16ccSHideki Saito 
869*f2ec16ccSHideki Saito     // We must be able to predicate all blocks that need to be predicated.
870*f2ec16ccSHideki Saito     if (blockNeedsPredication(BB)) {
871*f2ec16ccSHideki Saito       if (!blockCanBePredicated(BB, SafePointes)) {
872*f2ec16ccSHideki Saito         ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
873*f2ec16ccSHideki Saito                   << "control flow cannot be substituted for a select");
874*f2ec16ccSHideki Saito         return false;
875*f2ec16ccSHideki Saito       }
876*f2ec16ccSHideki Saito     } else if (BB != Header && !canIfConvertPHINodes(BB)) {
877*f2ec16ccSHideki Saito       ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
878*f2ec16ccSHideki Saito                 << "control flow cannot be substituted for a select");
879*f2ec16ccSHideki Saito       return false;
880*f2ec16ccSHideki Saito     }
881*f2ec16ccSHideki Saito   }
882*f2ec16ccSHideki Saito 
883*f2ec16ccSHideki Saito   // We can if-convert this loop.
884*f2ec16ccSHideki Saito   return true;
885*f2ec16ccSHideki Saito }
886*f2ec16ccSHideki Saito 
887*f2ec16ccSHideki Saito // Helper function to canVectorizeLoopNestCFG.
888*f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
889*f2ec16ccSHideki Saito                                                     bool UseVPlanNativePath) {
890*f2ec16ccSHideki Saito   assert((UseVPlanNativePath || Lp->empty()) &&
891*f2ec16ccSHideki Saito          "VPlan-native path is not enabled.");
892*f2ec16ccSHideki Saito 
893*f2ec16ccSHideki Saito   // TODO: ORE should be improved to show more accurate information when an
894*f2ec16ccSHideki Saito   // outer loop can't be vectorized because a nested loop is not understood or
895*f2ec16ccSHideki Saito   // legal. Something like: "outer_loop_location: loop not vectorized:
896*f2ec16ccSHideki Saito   // (inner_loop_location) loop control flow is not understood by vectorizer".
897*f2ec16ccSHideki Saito 
898*f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
899*f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
900*f2ec16ccSHideki Saito   bool Result = true;
901*f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
902*f2ec16ccSHideki Saito 
903*f2ec16ccSHideki Saito   // We must have a loop in canonical form. Loops with indirectbr in them cannot
904*f2ec16ccSHideki Saito   // be canonicalized.
905*f2ec16ccSHideki Saito   if (!Lp->getLoopPreheader()) {
906*f2ec16ccSHideki Saito     DEBUG(dbgs() << "LV: Loop doesn't have a legal pre-header.\n");
907*f2ec16ccSHideki Saito     ORE->emit(createMissedAnalysis("CFGNotUnderstood")
908*f2ec16ccSHideki Saito               << "loop control flow is not understood by vectorizer");
909*f2ec16ccSHideki Saito     if (DoExtraAnalysis)
910*f2ec16ccSHideki Saito       Result = false;
911*f2ec16ccSHideki Saito     else
912*f2ec16ccSHideki Saito       return false;
913*f2ec16ccSHideki Saito   }
914*f2ec16ccSHideki Saito 
915*f2ec16ccSHideki Saito   // We must have a single backedge.
916*f2ec16ccSHideki Saito   if (Lp->getNumBackEdges() != 1) {
917*f2ec16ccSHideki Saito     ORE->emit(createMissedAnalysis("CFGNotUnderstood")
918*f2ec16ccSHideki Saito               << "loop control flow is not understood by vectorizer");
919*f2ec16ccSHideki Saito     if (DoExtraAnalysis)
920*f2ec16ccSHideki Saito       Result = false;
921*f2ec16ccSHideki Saito     else
922*f2ec16ccSHideki Saito       return false;
923*f2ec16ccSHideki Saito   }
924*f2ec16ccSHideki Saito 
925*f2ec16ccSHideki Saito   // We must have a single exiting block.
926*f2ec16ccSHideki Saito   if (!Lp->getExitingBlock()) {
927*f2ec16ccSHideki Saito     ORE->emit(createMissedAnalysis("CFGNotUnderstood")
928*f2ec16ccSHideki Saito               << "loop control flow is not understood by vectorizer");
929*f2ec16ccSHideki Saito     if (DoExtraAnalysis)
930*f2ec16ccSHideki Saito       Result = false;
931*f2ec16ccSHideki Saito     else
932*f2ec16ccSHideki Saito       return false;
933*f2ec16ccSHideki Saito   }
934*f2ec16ccSHideki Saito 
935*f2ec16ccSHideki Saito   // We only handle bottom-tested loops, i.e. loop in which the condition is
936*f2ec16ccSHideki Saito   // checked at the end of each iteration. With that we can assume that all
937*f2ec16ccSHideki Saito   // instructions in the loop are executed the same number of times.
938*f2ec16ccSHideki Saito   if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
939*f2ec16ccSHideki Saito     ORE->emit(createMissedAnalysis("CFGNotUnderstood")
940*f2ec16ccSHideki Saito               << "loop control flow is not understood by vectorizer");
941*f2ec16ccSHideki Saito     if (DoExtraAnalysis)
942*f2ec16ccSHideki Saito       Result = false;
943*f2ec16ccSHideki Saito     else
944*f2ec16ccSHideki Saito       return false;
945*f2ec16ccSHideki Saito   }
946*f2ec16ccSHideki Saito 
947*f2ec16ccSHideki Saito   return Result;
948*f2ec16ccSHideki Saito }
949*f2ec16ccSHideki Saito 
950*f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
951*f2ec16ccSHideki Saito     Loop *Lp, bool UseVPlanNativePath) {
952*f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
953*f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
954*f2ec16ccSHideki Saito   bool Result = true;
955*f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
956*f2ec16ccSHideki Saito   if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
957*f2ec16ccSHideki Saito     if (DoExtraAnalysis)
958*f2ec16ccSHideki Saito       Result = false;
959*f2ec16ccSHideki Saito     else
960*f2ec16ccSHideki Saito       return false;
961*f2ec16ccSHideki Saito   }
962*f2ec16ccSHideki Saito 
963*f2ec16ccSHideki Saito   // Recursively check whether the loop control flow of nested loops is
964*f2ec16ccSHideki Saito   // understood.
965*f2ec16ccSHideki Saito   for (Loop *SubLp : *Lp)
966*f2ec16ccSHideki Saito     if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
967*f2ec16ccSHideki Saito       if (DoExtraAnalysis)
968*f2ec16ccSHideki Saito         Result = false;
969*f2ec16ccSHideki Saito       else
970*f2ec16ccSHideki Saito         return false;
971*f2ec16ccSHideki Saito     }
972*f2ec16ccSHideki Saito 
973*f2ec16ccSHideki Saito   return Result;
974*f2ec16ccSHideki Saito }
975*f2ec16ccSHideki Saito 
976*f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
977*f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
978*f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
979*f2ec16ccSHideki Saito   bool Result = true;
980*f2ec16ccSHideki Saito 
981*f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
982*f2ec16ccSHideki Saito   // Check whether the loop-related control flow in the loop nest is expected by
983*f2ec16ccSHideki Saito   // vectorizer.
984*f2ec16ccSHideki Saito   if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
985*f2ec16ccSHideki Saito     if (DoExtraAnalysis)
986*f2ec16ccSHideki Saito       Result = false;
987*f2ec16ccSHideki Saito     else
988*f2ec16ccSHideki Saito       return false;
989*f2ec16ccSHideki Saito   }
990*f2ec16ccSHideki Saito 
991*f2ec16ccSHideki Saito   // We need to have a loop header.
992*f2ec16ccSHideki Saito   DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
993*f2ec16ccSHideki Saito                << '\n');
994*f2ec16ccSHideki Saito 
995*f2ec16ccSHideki Saito   // Specific checks for outer loops. We skip the remaining legal checks at this
996*f2ec16ccSHideki Saito   // point because they don't support outer loops.
997*f2ec16ccSHideki Saito   if (!TheLoop->empty()) {
998*f2ec16ccSHideki Saito     assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
999*f2ec16ccSHideki Saito 
1000*f2ec16ccSHideki Saito     if (!canVectorizeOuterLoop()) {
1001*f2ec16ccSHideki Saito       DEBUG(dbgs() << "LV: Not vectorizing: Unsupported outer loop.\n");
1002*f2ec16ccSHideki Saito       // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1003*f2ec16ccSHideki Saito       // outer loops.
1004*f2ec16ccSHideki Saito       return false;
1005*f2ec16ccSHideki Saito     }
1006*f2ec16ccSHideki Saito 
1007*f2ec16ccSHideki Saito     DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1008*f2ec16ccSHideki Saito     return Result;
1009*f2ec16ccSHideki Saito   }
1010*f2ec16ccSHideki Saito 
1011*f2ec16ccSHideki Saito   assert(TheLoop->empty() && "Inner loop expected.");
1012*f2ec16ccSHideki Saito   // Check if we can if-convert non-single-bb loops.
1013*f2ec16ccSHideki Saito   unsigned NumBlocks = TheLoop->getNumBlocks();
1014*f2ec16ccSHideki Saito   if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1015*f2ec16ccSHideki Saito     DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1016*f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1017*f2ec16ccSHideki Saito       Result = false;
1018*f2ec16ccSHideki Saito     else
1019*f2ec16ccSHideki Saito       return false;
1020*f2ec16ccSHideki Saito   }
1021*f2ec16ccSHideki Saito 
1022*f2ec16ccSHideki Saito   // Check if we can vectorize the instructions and CFG in this loop.
1023*f2ec16ccSHideki Saito   if (!canVectorizeInstrs()) {
1024*f2ec16ccSHideki Saito     DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1025*f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1026*f2ec16ccSHideki Saito       Result = false;
1027*f2ec16ccSHideki Saito     else
1028*f2ec16ccSHideki Saito       return false;
1029*f2ec16ccSHideki Saito   }
1030*f2ec16ccSHideki Saito 
1031*f2ec16ccSHideki Saito   // Go over each instruction and look at memory deps.
1032*f2ec16ccSHideki Saito   if (!canVectorizeMemory()) {
1033*f2ec16ccSHideki Saito     DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1034*f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1035*f2ec16ccSHideki Saito       Result = false;
1036*f2ec16ccSHideki Saito     else
1037*f2ec16ccSHideki Saito       return false;
1038*f2ec16ccSHideki Saito   }
1039*f2ec16ccSHideki Saito 
1040*f2ec16ccSHideki Saito   DEBUG(dbgs() << "LV: We can vectorize this loop"
1041*f2ec16ccSHideki Saito                << (LAI->getRuntimePointerChecking()->Need
1042*f2ec16ccSHideki Saito                        ? " (with a runtime bound check)"
1043*f2ec16ccSHideki Saito                        : "")
1044*f2ec16ccSHideki Saito                << "!\n");
1045*f2ec16ccSHideki Saito 
1046*f2ec16ccSHideki Saito   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1047*f2ec16ccSHideki Saito   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1048*f2ec16ccSHideki Saito     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1049*f2ec16ccSHideki Saito 
1050*f2ec16ccSHideki Saito   if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
1051*f2ec16ccSHideki Saito     ORE->emit(createMissedAnalysis("TooManySCEVRunTimeChecks")
1052*f2ec16ccSHideki Saito               << "Too many SCEV assumptions need to be made and checked "
1053*f2ec16ccSHideki Saito               << "at runtime");
1054*f2ec16ccSHideki Saito     DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n");
1055*f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1056*f2ec16ccSHideki Saito       Result = false;
1057*f2ec16ccSHideki Saito     else
1058*f2ec16ccSHideki Saito       return false;
1059*f2ec16ccSHideki Saito   }
1060*f2ec16ccSHideki Saito 
1061*f2ec16ccSHideki Saito   // Okay! We've done all the tests. If any have failed, return false. Otherwise
1062*f2ec16ccSHideki Saito   // we can vectorize, and at this point we don't have any other mem analysis
1063*f2ec16ccSHideki Saito   // which may limit our maximum vectorization factor, so just return true with
1064*f2ec16ccSHideki Saito   // no restrictions.
1065*f2ec16ccSHideki Saito   return Result;
1066*f2ec16ccSHideki Saito }
1067*f2ec16ccSHideki Saito 
1068*f2ec16ccSHideki Saito } // namespace llvm
1069