1 //===- LoopVectorizationLegality.cpp --------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides loop vectorization legality analysis. Original code
10 // resided in LoopVectorize.cpp for a long time.
11 //
12 // At this point, it is implemented as a utility class, not as an analysis
13 // pass. It should be easy to create an analysis pass around it if there
14 // is a need (but D45420 needs to happen first).
15 //
16 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
17 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
18 #include "llvm/Analysis/Loads.h"
19 #include "llvm/Analysis/ValueTracking.h"
20 #include "llvm/Analysis/VectorUtils.h"
21 #include "llvm/IR/IntrinsicInst.h"
22 
23 using namespace llvm;
24 
25 #define LV_NAME "loop-vectorize"
26 #define DEBUG_TYPE LV_NAME
27 
28 extern cl::opt<bool> EnableVPlanPredication;
29 
30 static cl::opt<bool>
31     EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
32                        cl::desc("Enable if-conversion during vectorization."));
33 
34 static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
35     "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
36     cl::desc("The maximum allowed number of runtime memory checks with a "
37              "vectorize(enable) pragma."));
38 
39 static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
40     "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
41     cl::desc("The maximum number of SCEV checks allowed."));
42 
43 static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
44     "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
45     cl::desc("The maximum number of SCEV checks allowed with a "
46              "vectorize(enable) pragma"));
47 
48 /// Maximum vectorization interleave count.
49 static const unsigned MaxInterleaveFactor = 16;
50 
51 namespace llvm {
52 
53 bool LoopVectorizeHints::Hint::validate(unsigned Val) {
54   switch (Kind) {
55   case HK_WIDTH:
56     return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
57   case HK_UNROLL:
58     return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
59   case HK_FORCE:
60     return (Val <= 1);
61   case HK_ISVECTORIZED:
62   case HK_PREDICATE:
63     return (Val == 0 || Val == 1);
64   }
65   return false;
66 }
67 
68 LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
69                                        bool InterleaveOnlyWhenForced,
70                                        OptimizationRemarkEmitter &ORE)
71     : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
72       Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
73       Force("vectorize.enable", FK_Undefined, HK_FORCE),
74       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
75       Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), TheLoop(L),
76       ORE(ORE) {
77   // Populate values with existing loop metadata.
78   getHintsFromMetadata();
79 
80   // force-vector-interleave overrides DisableInterleaving.
81   if (VectorizerParams::isInterleaveForced())
82     Interleave.Value = VectorizerParams::VectorizationInterleave;
83 
84   if (IsVectorized.Value != 1)
85     // If the vectorization width and interleaving count are both 1 then
86     // consider the loop to have been already vectorized because there's
87     // nothing more that we can do.
88     IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
89   LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
90              << "LV: Interleaving disabled by the pass manager\n");
91 }
92 
93 void LoopVectorizeHints::setAlreadyVectorized() {
94   LLVMContext &Context = TheLoop->getHeader()->getContext();
95 
96   MDNode *IsVectorizedMD = MDNode::get(
97       Context,
98       {MDString::get(Context, "llvm.loop.isvectorized"),
99        ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
100   MDNode *LoopID = TheLoop->getLoopID();
101   MDNode *NewLoopID =
102       makePostTransformationMetadata(Context, LoopID,
103                                      {Twine(Prefix(), "vectorize.").str(),
104                                       Twine(Prefix(), "interleave.").str()},
105                                      {IsVectorizedMD});
106   TheLoop->setLoopID(NewLoopID);
107 
108   // Update internal cache.
109   IsVectorized.Value = 1;
110 }
111 
112 bool LoopVectorizeHints::allowVectorization(
113     Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
114   if (getForce() == LoopVectorizeHints::FK_Disabled) {
115     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
116     emitRemarkWithHints();
117     return false;
118   }
119 
120   if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
121     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
122     emitRemarkWithHints();
123     return false;
124   }
125 
126   if (getIsVectorized() == 1) {
127     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
128     // FIXME: Add interleave.disable metadata. This will allow
129     // vectorize.disable to be used without disabling the pass and errors
130     // to differentiate between disabled vectorization and a width of 1.
131     ORE.emit([&]() {
132       return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
133                                         "AllDisabled", L->getStartLoc(),
134                                         L->getHeader())
135              << "loop not vectorized: vectorization and interleaving are "
136                 "explicitly disabled, or the loop has already been "
137                 "vectorized";
138     });
139     return false;
140   }
141 
142   return true;
143 }
144 
145 void LoopVectorizeHints::emitRemarkWithHints() const {
146   using namespace ore;
147 
148   ORE.emit([&]() {
149     if (Force.Value == LoopVectorizeHints::FK_Disabled)
150       return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
151                                       TheLoop->getStartLoc(),
152                                       TheLoop->getHeader())
153              << "loop not vectorized: vectorization is explicitly disabled";
154     else {
155       OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
156                                  TheLoop->getStartLoc(), TheLoop->getHeader());
157       R << "loop not vectorized";
158       if (Force.Value == LoopVectorizeHints::FK_Enabled) {
159         R << " (Force=" << NV("Force", true);
160         if (Width.Value != 0)
161           R << ", Vector Width=" << NV("VectorWidth", Width.Value);
162         if (Interleave.Value != 0)
163           R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
164         R << ")";
165       }
166       return R;
167     }
168   });
169 }
170 
171 const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
172   if (getWidth() == 1)
173     return LV_NAME;
174   if (getForce() == LoopVectorizeHints::FK_Disabled)
175     return LV_NAME;
176   if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
177     return LV_NAME;
178   return OptimizationRemarkAnalysis::AlwaysPrint;
179 }
180 
181 void LoopVectorizeHints::getHintsFromMetadata() {
182   MDNode *LoopID = TheLoop->getLoopID();
183   if (!LoopID)
184     return;
185 
186   // First operand should refer to the loop id itself.
187   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
188   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
189 
190   for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
191     const MDString *S = nullptr;
192     SmallVector<Metadata *, 4> Args;
193 
194     // The expected hint is either a MDString or a MDNode with the first
195     // operand a MDString.
196     if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
197       if (!MD || MD->getNumOperands() == 0)
198         continue;
199       S = dyn_cast<MDString>(MD->getOperand(0));
200       for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
201         Args.push_back(MD->getOperand(i));
202     } else {
203       S = dyn_cast<MDString>(LoopID->getOperand(i));
204       assert(Args.size() == 0 && "too many arguments for MDString");
205     }
206 
207     if (!S)
208       continue;
209 
210     // Check if the hint starts with the loop metadata prefix.
211     StringRef Name = S->getString();
212     if (Args.size() == 1)
213       setHint(Name, Args[0]);
214   }
215 }
216 
217 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
218   if (!Name.startswith(Prefix()))
219     return;
220   Name = Name.substr(Prefix().size(), StringRef::npos);
221 
222   const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
223   if (!C)
224     return;
225   unsigned Val = C->getZExtValue();
226 
227   Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
228   for (auto H : Hints) {
229     if (Name == H->Name) {
230       if (H->validate(Val))
231         H->Value = Val;
232       else
233         LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
234       break;
235     }
236   }
237 }
238 
239 bool LoopVectorizationRequirements::doesNotMeet(
240     Function *F, Loop *L, const LoopVectorizeHints &Hints) {
241   const char *PassName = Hints.vectorizeAnalysisPassName();
242   bool Failed = false;
243   if (UnsafeAlgebraInst && !Hints.allowReordering()) {
244     ORE.emit([&]() {
245       return OptimizationRemarkAnalysisFPCommute(
246                  PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
247                  UnsafeAlgebraInst->getParent())
248              << "loop not vectorized: cannot prove it is safe to reorder "
249                 "floating-point operations";
250     });
251     Failed = true;
252   }
253 
254   // Test if runtime memcheck thresholds are exceeded.
255   bool PragmaThresholdReached =
256       NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
257   bool ThresholdReached =
258       NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
259   if ((ThresholdReached && !Hints.allowReordering()) ||
260       PragmaThresholdReached) {
261     ORE.emit([&]() {
262       return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
263                                                 L->getStartLoc(),
264                                                 L->getHeader())
265              << "loop not vectorized: cannot prove it is safe to reorder "
266                 "memory operations";
267     });
268     LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
269     Failed = true;
270   }
271 
272   return Failed;
273 }
274 
275 // Return true if the inner loop \p Lp is uniform with regard to the outer loop
276 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
277 // executing the inner loop will execute the same iterations). This check is
278 // very constrained for now but it will be relaxed in the future. \p Lp is
279 // considered uniform if it meets all the following conditions:
280 //   1) it has a canonical IV (starting from 0 and with stride 1),
281 //   2) its latch terminator is a conditional branch and,
282 //   3) its latch condition is a compare instruction whose operands are the
283 //      canonical IV and an OuterLp invariant.
284 // This check doesn't take into account the uniformity of other conditions not
285 // related to the loop latch because they don't affect the loop uniformity.
286 //
287 // NOTE: We decided to keep all these checks and its associated documentation
288 // together so that we can easily have a picture of the current supported loop
289 // nests. However, some of the current checks don't depend on \p OuterLp and
290 // would be redundantly executed for each \p Lp if we invoked this function for
291 // different candidate outer loops. This is not the case for now because we
292 // don't currently have the infrastructure to evaluate multiple candidate outer
293 // loops and \p OuterLp will be a fixed parameter while we only support explicit
294 // outer loop vectorization. It's also very likely that these checks go away
295 // before introducing the aforementioned infrastructure. However, if this is not
296 // the case, we should move the \p OuterLp independent checks to a separate
297 // function that is only executed once for each \p Lp.
298 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
299   assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
300 
301   // If Lp is the outer loop, it's uniform by definition.
302   if (Lp == OuterLp)
303     return true;
304   assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
305 
306   // 1.
307   PHINode *IV = Lp->getCanonicalInductionVariable();
308   if (!IV) {
309     LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
310     return false;
311   }
312 
313   // 2.
314   BasicBlock *Latch = Lp->getLoopLatch();
315   auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
316   if (!LatchBr || LatchBr->isUnconditional()) {
317     LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
318     return false;
319   }
320 
321   // 3.
322   auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
323   if (!LatchCmp) {
324     LLVM_DEBUG(
325         dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
326     return false;
327   }
328 
329   Value *CondOp0 = LatchCmp->getOperand(0);
330   Value *CondOp1 = LatchCmp->getOperand(1);
331   Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
332   if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
333       !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
334     LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
335     return false;
336   }
337 
338   return true;
339 }
340 
341 // Return true if \p Lp and all its nested loops are uniform with regard to \p
342 // OuterLp.
343 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
344   if (!isUniformLoop(Lp, OuterLp))
345     return false;
346 
347   // Check if nested loops are uniform.
348   for (Loop *SubLp : *Lp)
349     if (!isUniformLoopNest(SubLp, OuterLp))
350       return false;
351 
352   return true;
353 }
354 
355 /// Check whether it is safe to if-convert this phi node.
356 ///
357 /// Phi nodes with constant expressions that can trap are not safe to if
358 /// convert.
359 static bool canIfConvertPHINodes(BasicBlock *BB) {
360   for (PHINode &Phi : BB->phis()) {
361     for (Value *V : Phi.incoming_values())
362       if (auto *C = dyn_cast<Constant>(V))
363         if (C->canTrap())
364           return false;
365   }
366   return true;
367 }
368 
369 static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
370   if (Ty->isPointerTy())
371     return DL.getIntPtrType(Ty);
372 
373   // It is possible that char's or short's overflow when we ask for the loop's
374   // trip count, work around this by changing the type size.
375   if (Ty->getScalarSizeInBits() < 32)
376     return Type::getInt32Ty(Ty->getContext());
377 
378   return Ty;
379 }
380 
381 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
382   Ty0 = convertPointerToIntegerType(DL, Ty0);
383   Ty1 = convertPointerToIntegerType(DL, Ty1);
384   if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
385     return Ty0;
386   return Ty1;
387 }
388 
389 /// Check that the instruction has outside loop users and is not an
390 /// identified reduction variable.
391 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
392                                SmallPtrSetImpl<Value *> &AllowedExit) {
393   // Reductions, Inductions and non-header phis are allowed to have exit users. All
394   // other instructions must not have external users.
395   if (!AllowedExit.count(Inst))
396     // Check that all of the users of the loop are inside the BB.
397     for (User *U : Inst->users()) {
398       Instruction *UI = cast<Instruction>(U);
399       // This user may be a reduction exit value.
400       if (!TheLoop->contains(UI)) {
401         LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
402         return true;
403       }
404     }
405   return false;
406 }
407 
408 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
409   const ValueToValueMap &Strides =
410       getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
411 
412   int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
413   if (Stride == 1 || Stride == -1)
414     return Stride;
415   return 0;
416 }
417 
418 bool LoopVectorizationLegality::isUniform(Value *V) {
419   return LAI->isUniform(V);
420 }
421 
422 bool LoopVectorizationLegality::canVectorizeOuterLoop() {
423   assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
424   // Store the result and return it at the end instead of exiting early, in case
425   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
426   bool Result = true;
427   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
428 
429   for (BasicBlock *BB : TheLoop->blocks()) {
430     // Check whether the BB terminator is a BranchInst. Any other terminator is
431     // not supported yet.
432     auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
433     if (!Br) {
434       reportVectorizationFailure("Unsupported basic block terminator",
435           "loop control flow is not understood by vectorizer",
436           "CFGNotUnderstood", ORE, TheLoop);
437       if (DoExtraAnalysis)
438         Result = false;
439       else
440         return false;
441     }
442 
443     // Check whether the BranchInst is a supported one. Only unconditional
444     // branches, conditional branches with an outer loop invariant condition or
445     // backedges are supported.
446     // FIXME: We skip these checks when VPlan predication is enabled as we
447     // want to allow divergent branches. This whole check will be removed
448     // once VPlan predication is on by default.
449     if (!EnableVPlanPredication && Br && Br->isConditional() &&
450         !TheLoop->isLoopInvariant(Br->getCondition()) &&
451         !LI->isLoopHeader(Br->getSuccessor(0)) &&
452         !LI->isLoopHeader(Br->getSuccessor(1))) {
453       reportVectorizationFailure("Unsupported conditional branch",
454           "loop control flow is not understood by vectorizer",
455           "CFGNotUnderstood", ORE, TheLoop);
456       if (DoExtraAnalysis)
457         Result = false;
458       else
459         return false;
460     }
461   }
462 
463   // Check whether inner loops are uniform. At this point, we only support
464   // simple outer loops scenarios with uniform nested loops.
465   if (!isUniformLoopNest(TheLoop /*loop nest*/,
466                          TheLoop /*context outer loop*/)) {
467     reportVectorizationFailure("Outer loop contains divergent loops",
468         "loop control flow is not understood by vectorizer",
469         "CFGNotUnderstood", ORE, TheLoop);
470     if (DoExtraAnalysis)
471       Result = false;
472     else
473       return false;
474   }
475 
476   // Check whether we are able to set up outer loop induction.
477   if (!setupOuterLoopInductions()) {
478     reportVectorizationFailure("Unsupported outer loop Phi(s)",
479                                "Unsupported outer loop Phi(s)",
480                                "UnsupportedPhi", ORE, TheLoop);
481     if (DoExtraAnalysis)
482       Result = false;
483     else
484       return false;
485   }
486 
487   return Result;
488 }
489 
490 void LoopVectorizationLegality::addInductionPhi(
491     PHINode *Phi, const InductionDescriptor &ID,
492     SmallPtrSetImpl<Value *> &AllowedExit) {
493   Inductions[Phi] = ID;
494 
495   // In case this induction also comes with casts that we know we can ignore
496   // in the vectorized loop body, record them here. All casts could be recorded
497   // here for ignoring, but suffices to record only the first (as it is the
498   // only one that may bw used outside the cast sequence).
499   const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
500   if (!Casts.empty())
501     InductionCastsToIgnore.insert(*Casts.begin());
502 
503   Type *PhiTy = Phi->getType();
504   const DataLayout &DL = Phi->getModule()->getDataLayout();
505 
506   // Get the widest type.
507   if (!PhiTy->isFloatingPointTy()) {
508     if (!WidestIndTy)
509       WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
510     else
511       WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
512   }
513 
514   // Int inductions are special because we only allow one IV.
515   if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
516       ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
517       isa<Constant>(ID.getStartValue()) &&
518       cast<Constant>(ID.getStartValue())->isNullValue()) {
519 
520     // Use the phi node with the widest type as induction. Use the last
521     // one if there are multiple (no good reason for doing this other
522     // than it is expedient). We've checked that it begins at zero and
523     // steps by one, so this is a canonical induction variable.
524     if (!PrimaryInduction || PhiTy == WidestIndTy)
525       PrimaryInduction = Phi;
526   }
527 
528   // Both the PHI node itself, and the "post-increment" value feeding
529   // back into the PHI node may have external users.
530   // We can allow those uses, except if the SCEVs we have for them rely
531   // on predicates that only hold within the loop, since allowing the exit
532   // currently means re-using this SCEV outside the loop (see PR33706 for more
533   // details).
534   if (PSE.getUnionPredicate().isAlwaysTrue()) {
535     AllowedExit.insert(Phi);
536     AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
537   }
538 
539   LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
540 }
541 
542 bool LoopVectorizationLegality::setupOuterLoopInductions() {
543   BasicBlock *Header = TheLoop->getHeader();
544 
545   // Returns true if a given Phi is a supported induction.
546   auto isSupportedPhi = [&](PHINode &Phi) -> bool {
547     InductionDescriptor ID;
548     if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
549         ID.getKind() == InductionDescriptor::IK_IntInduction) {
550       addInductionPhi(&Phi, ID, AllowedExit);
551       return true;
552     } else {
553       // Bail out for any Phi in the outer loop header that is not a supported
554       // induction.
555       LLVM_DEBUG(
556           dbgs()
557           << "LV: Found unsupported PHI for outer loop vectorization.\n");
558       return false;
559     }
560   };
561 
562   if (llvm::all_of(Header->phis(), isSupportedPhi))
563     return true;
564   else
565     return false;
566 }
567 
568 bool LoopVectorizationLegality::canVectorizeInstrs() {
569   BasicBlock *Header = TheLoop->getHeader();
570 
571   // Look for the attribute signaling the absence of NaNs.
572   Function &F = *Header->getParent();
573   HasFunNoNaNAttr =
574       F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
575 
576   // For each block in the loop.
577   for (BasicBlock *BB : TheLoop->blocks()) {
578     // Scan the instructions in the block and look for hazards.
579     for (Instruction &I : *BB) {
580       if (auto *Phi = dyn_cast<PHINode>(&I)) {
581         Type *PhiTy = Phi->getType();
582         // Check that this PHI type is allowed.
583         if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
584             !PhiTy->isPointerTy()) {
585           reportVectorizationFailure("Found a non-int non-pointer PHI",
586                                      "loop control flow is not understood by vectorizer",
587                                      "CFGNotUnderstood", ORE, TheLoop);
588           return false;
589         }
590 
591         // If this PHINode is not in the header block, then we know that we
592         // can convert it to select during if-conversion. No need to check if
593         // the PHIs in this block are induction or reduction variables.
594         if (BB != Header) {
595           // Non-header phi nodes that have outside uses can be vectorized. Add
596           // them to the list of allowed exits.
597           // Unsafe cyclic dependencies with header phis are identified during
598           // legalization for reduction, induction and first order
599           // recurrences.
600           AllowedExit.insert(&I);
601           continue;
602         }
603 
604         // We only allow if-converted PHIs with exactly two incoming values.
605         if (Phi->getNumIncomingValues() != 2) {
606           reportVectorizationFailure("Found an invalid PHI",
607               "loop control flow is not understood by vectorizer",
608               "CFGNotUnderstood", ORE, TheLoop, Phi);
609           return false;
610         }
611 
612         RecurrenceDescriptor RedDes;
613         if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
614                                                  DT)) {
615           if (RedDes.hasUnsafeAlgebra())
616             Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
617           AllowedExit.insert(RedDes.getLoopExitInstr());
618           Reductions[Phi] = RedDes;
619           continue;
620         }
621 
622         // TODO: Instead of recording the AllowedExit, it would be good to record the
623         // complementary set: NotAllowedExit. These include (but may not be
624         // limited to):
625         // 1. Reduction phis as they represent the one-before-last value, which
626         // is not available when vectorized
627         // 2. Induction phis and increment when SCEV predicates cannot be used
628         // outside the loop - see addInductionPhi
629         // 3. Non-Phis with outside uses when SCEV predicates cannot be used
630         // outside the loop - see call to hasOutsideLoopUser in the non-phi
631         // handling below
632         // 4. FirstOrderRecurrence phis that can possibly be handled by
633         // extraction.
634         // By recording these, we can then reason about ways to vectorize each
635         // of these NotAllowedExit.
636         InductionDescriptor ID;
637         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
638           addInductionPhi(Phi, ID, AllowedExit);
639           if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
640             Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
641           continue;
642         }
643 
644         if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
645                                                          SinkAfter, DT)) {
646           FirstOrderRecurrences.insert(Phi);
647           continue;
648         }
649 
650         // As a last resort, coerce the PHI to a AddRec expression
651         // and re-try classifying it a an induction PHI.
652         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
653           addInductionPhi(Phi, ID, AllowedExit);
654           continue;
655         }
656 
657         reportVectorizationFailure("Found an unidentified PHI",
658             "value that could not be identified as "
659             "reduction is used outside the loop",
660             "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
661         return false;
662       } // end of PHI handling
663 
664       // We handle calls that:
665       //   * Are debug info intrinsics.
666       //   * Have a mapping to an IR intrinsic.
667       //   * Have a vector version available.
668       auto *CI = dyn_cast<CallInst>(&I);
669       if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
670           !isa<DbgInfoIntrinsic>(CI) &&
671           !(CI->getCalledFunction() && TLI &&
672             TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
673         // If the call is a recognized math libary call, it is likely that
674         // we can vectorize it given loosened floating-point constraints.
675         LibFunc Func;
676         bool IsMathLibCall =
677             TLI && CI->getCalledFunction() &&
678             CI->getType()->isFloatingPointTy() &&
679             TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
680             TLI->hasOptimizedCodeGen(Func);
681 
682         if (IsMathLibCall) {
683           // TODO: Ideally, we should not use clang-specific language here,
684           // but it's hard to provide meaningful yet generic advice.
685           // Also, should this be guarded by allowExtraAnalysis() and/or be part
686           // of the returned info from isFunctionVectorizable()?
687           reportVectorizationFailure("Found a non-intrinsic callsite",
688               "library call cannot be vectorized. "
689               "Try compiling with -fno-math-errno, -ffast-math, "
690               "or similar flags",
691               "CantVectorizeLibcall", ORE, TheLoop, CI);
692         } else {
693           reportVectorizationFailure("Found a non-intrinsic callsite",
694                                      "call instruction cannot be vectorized",
695                                      "CantVectorizeLibcall", ORE, TheLoop, CI);
696         }
697         return false;
698       }
699 
700       // Some intrinsics have scalar arguments and should be same in order for
701       // them to be vectorized (i.e. loop invariant).
702       if (CI) {
703         auto *SE = PSE.getSE();
704         Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
705         for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
706           if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
707             if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
708               reportVectorizationFailure("Found unvectorizable intrinsic",
709                   "intrinsic instruction cannot be vectorized",
710                   "CantVectorizeIntrinsic", ORE, TheLoop, CI);
711               return false;
712             }
713           }
714       }
715 
716       // Check that the instruction return type is vectorizable.
717       // Also, we can't vectorize extractelement instructions.
718       if ((!VectorType::isValidElementType(I.getType()) &&
719            !I.getType()->isVoidTy()) ||
720           isa<ExtractElementInst>(I)) {
721         reportVectorizationFailure("Found unvectorizable type",
722             "instruction return type cannot be vectorized",
723             "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
724         return false;
725       }
726 
727       // Check that the stored type is vectorizable.
728       if (auto *ST = dyn_cast<StoreInst>(&I)) {
729         Type *T = ST->getValueOperand()->getType();
730         if (!VectorType::isValidElementType(T)) {
731           reportVectorizationFailure("Store instruction cannot be vectorized",
732                                      "store instruction cannot be vectorized",
733                                      "CantVectorizeStore", ORE, TheLoop, ST);
734           return false;
735         }
736 
737         // For nontemporal stores, check that a nontemporal vector version is
738         // supported on the target.
739         if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
740           // Arbitrarily try a vector of 2 elements.
741           Type *VecTy = VectorType::get(T, /*NumElements=*/2);
742           assert(VecTy && "did not find vectorized version of stored type");
743           unsigned Alignment = getLoadStoreAlignment(ST);
744           assert(Alignment && "Alignment should be set");
745           if (!TTI->isLegalNTStore(VecTy, llvm::Align(Alignment))) {
746             reportVectorizationFailure(
747                 "nontemporal store instruction cannot be vectorized",
748                 "nontemporal store instruction cannot be vectorized",
749                 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
750             return false;
751           }
752         }
753 
754       } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
755         if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
756           // For nontemporal loads, check that a nontemporal vector version is
757           // supported on the target (arbitrarily try a vector of 2 elements).
758           Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
759           assert(VecTy && "did not find vectorized version of load type");
760           unsigned Alignment = getLoadStoreAlignment(LD);
761           assert(Alignment && "Alignment should be set");
762           if (!TTI->isLegalNTLoad(VecTy, llvm::Align(Alignment))) {
763             reportVectorizationFailure(
764                 "nontemporal load instruction cannot be vectorized",
765                 "nontemporal load instruction cannot be vectorized",
766                 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
767             return false;
768           }
769         }
770 
771         // FP instructions can allow unsafe algebra, thus vectorizable by
772         // non-IEEE-754 compliant SIMD units.
773         // This applies to floating-point math operations and calls, not memory
774         // operations, shuffles, or casts, as they don't change precision or
775         // semantics.
776       } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
777                  !I.isFast()) {
778         LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
779         Hints->setPotentiallyUnsafe();
780       }
781 
782       // Reduction instructions are allowed to have exit users.
783       // All other instructions must not have external users.
784       if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
785         // We can safely vectorize loops where instructions within the loop are
786         // used outside the loop only if the SCEV predicates within the loop is
787         // same as outside the loop. Allowing the exit means reusing the SCEV
788         // outside the loop.
789         if (PSE.getUnionPredicate().isAlwaysTrue()) {
790           AllowedExit.insert(&I);
791           continue;
792         }
793         reportVectorizationFailure("Value cannot be used outside the loop",
794                                    "value cannot be used outside the loop",
795                                    "ValueUsedOutsideLoop", ORE, TheLoop, &I);
796         return false;
797       }
798     } // next instr.
799   }
800 
801   if (!PrimaryInduction) {
802     if (Inductions.empty()) {
803       reportVectorizationFailure("Did not find one integer induction var",
804           "loop induction variable could not be identified",
805           "NoInductionVariable", ORE, TheLoop);
806       return false;
807     } else if (!WidestIndTy) {
808       reportVectorizationFailure("Did not find one integer induction var",
809           "integer loop induction variable could not be identified",
810           "NoIntegerInductionVariable", ORE, TheLoop);
811       return false;
812     } else {
813       LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
814     }
815   }
816 
817   // Now we know the widest induction type, check if our found induction
818   // is the same size. If it's not, unset it here and InnerLoopVectorizer
819   // will create another.
820   if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
821     PrimaryInduction = nullptr;
822 
823   return true;
824 }
825 
826 bool LoopVectorizationLegality::canVectorizeMemory() {
827   LAI = &(*GetLAA)(*TheLoop);
828   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
829   if (LAR) {
830     ORE->emit([&]() {
831       return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
832                                         "loop not vectorized: ", *LAR);
833     });
834   }
835   if (!LAI->canVectorizeMemory())
836     return false;
837 
838   if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
839     reportVectorizationFailure("Stores to a uniform address",
840         "write to a loop invariant address could not be vectorized",
841         "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
842     return false;
843   }
844   Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
845   PSE.addPredicate(LAI->getPSE().getUnionPredicate());
846 
847   return true;
848 }
849 
850 bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
851   Value *In0 = const_cast<Value *>(V);
852   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
853   if (!PN)
854     return false;
855 
856   return Inductions.count(PN);
857 }
858 
859 bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
860   auto *Inst = dyn_cast<Instruction>(V);
861   return (Inst && InductionCastsToIgnore.count(Inst));
862 }
863 
864 bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
865   return isInductionPhi(V) || isCastedInductionVariable(V);
866 }
867 
868 bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
869   return FirstOrderRecurrences.count(Phi);
870 }
871 
872 bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
873   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
874 }
875 
876 bool LoopVectorizationLegality::blockCanBePredicated(
877     BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) {
878   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
879 
880   for (Instruction &I : *BB) {
881     // Check that we don't have a constant expression that can trap as operand.
882     for (Value *Operand : I.operands()) {
883       if (auto *C = dyn_cast<Constant>(Operand))
884         if (C->canTrap())
885           return false;
886     }
887     // We might be able to hoist the load.
888     if (I.mayReadFromMemory()) {
889       auto *LI = dyn_cast<LoadInst>(&I);
890       if (!LI)
891         return false;
892       if (!SafePtrs.count(LI->getPointerOperand())) {
893         // !llvm.mem.parallel_loop_access implies if-conversion safety.
894         // Otherwise, record that the load needs (real or emulated) masking
895         // and let the cost model decide.
896         if (!IsAnnotatedParallel || PreserveGuards)
897           MaskedOp.insert(LI);
898         continue;
899       }
900     }
901 
902     if (I.mayWriteToMemory()) {
903       auto *SI = dyn_cast<StoreInst>(&I);
904       if (!SI)
905         return false;
906       // Predicated store requires some form of masking:
907       // 1) masked store HW instruction,
908       // 2) emulation via load-blend-store (only if safe and legal to do so,
909       //    be aware on the race conditions), or
910       // 3) element-by-element predicate check and scalar store.
911       MaskedOp.insert(SI);
912       continue;
913     }
914     if (I.mayThrow())
915       return false;
916   }
917 
918   return true;
919 }
920 
921 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
922   if (!EnableIfConversion) {
923     reportVectorizationFailure("If-conversion is disabled",
924                                "if-conversion is disabled",
925                                "IfConversionDisabled",
926                                ORE, TheLoop);
927     return false;
928   }
929 
930   assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
931 
932   // A list of pointers which are known to be dereferenceable within scope of
933   // the loop body for each iteration of the loop which executes.  That is,
934   // the memory pointed to can be dereferenced (with the access size implied by
935   // the value's type) unconditionally within the loop header without
936   // introducing a new fault.
937   SmallPtrSet<Value *, 8> SafePointes;
938 
939   // Collect safe addresses.
940   for (BasicBlock *BB : TheLoop->blocks()) {
941     if (!blockNeedsPredication(BB)) {
942       for (Instruction &I : *BB)
943         if (auto *Ptr = getLoadStorePointerOperand(&I))
944           SafePointes.insert(Ptr);
945       continue;
946     }
947 
948     // For a block which requires predication, a address may be safe to access
949     // in the loop w/o predication if we can prove dereferenceability facts
950     // sufficient to ensure it'll never fault within the loop. For the moment,
951     // we restrict this to loads; stores are more complicated due to
952     // concurrency restrictions.
953     ScalarEvolution &SE = *PSE.getSE();
954     for (Instruction &I : *BB) {
955       LoadInst *LI = dyn_cast<LoadInst>(&I);
956       if (LI && !mustSuppressSpeculation(*LI) &&
957           isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
958         SafePointes.insert(LI->getPointerOperand());
959     }
960   }
961 
962   // Collect the blocks that need predication.
963   BasicBlock *Header = TheLoop->getHeader();
964   for (BasicBlock *BB : TheLoop->blocks()) {
965     // We don't support switch statements inside loops.
966     if (!isa<BranchInst>(BB->getTerminator())) {
967       reportVectorizationFailure("Loop contains a switch statement",
968                                  "loop contains a switch statement",
969                                  "LoopContainsSwitch", ORE, TheLoop,
970                                  BB->getTerminator());
971       return false;
972     }
973 
974     // We must be able to predicate all blocks that need to be predicated.
975     if (blockNeedsPredication(BB)) {
976       if (!blockCanBePredicated(BB, SafePointes)) {
977         reportVectorizationFailure(
978             "Control flow cannot be substituted for a select",
979             "control flow cannot be substituted for a select",
980             "NoCFGForSelect", ORE, TheLoop,
981             BB->getTerminator());
982         return false;
983       }
984     } else if (BB != Header && !canIfConvertPHINodes(BB)) {
985       reportVectorizationFailure(
986           "Control flow cannot be substituted for a select",
987           "control flow cannot be substituted for a select",
988           "NoCFGForSelect", ORE, TheLoop,
989           BB->getTerminator());
990       return false;
991     }
992   }
993 
994   // We can if-convert this loop.
995   return true;
996 }
997 
998 // Helper function to canVectorizeLoopNestCFG.
999 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1000                                                     bool UseVPlanNativePath) {
1001   assert((UseVPlanNativePath || Lp->empty()) &&
1002          "VPlan-native path is not enabled.");
1003 
1004   // TODO: ORE should be improved to show more accurate information when an
1005   // outer loop can't be vectorized because a nested loop is not understood or
1006   // legal. Something like: "outer_loop_location: loop not vectorized:
1007   // (inner_loop_location) loop control flow is not understood by vectorizer".
1008 
1009   // Store the result and return it at the end instead of exiting early, in case
1010   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1011   bool Result = true;
1012   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1013 
1014   // We must have a loop in canonical form. Loops with indirectbr in them cannot
1015   // be canonicalized.
1016   if (!Lp->getLoopPreheader()) {
1017     reportVectorizationFailure("Loop doesn't have a legal pre-header",
1018         "loop control flow is not understood by vectorizer",
1019         "CFGNotUnderstood", ORE, TheLoop);
1020     if (DoExtraAnalysis)
1021       Result = false;
1022     else
1023       return false;
1024   }
1025 
1026   // We must have a single backedge.
1027   if (Lp->getNumBackEdges() != 1) {
1028     reportVectorizationFailure("The loop must have a single backedge",
1029         "loop control flow is not understood by vectorizer",
1030         "CFGNotUnderstood", ORE, TheLoop);
1031     if (DoExtraAnalysis)
1032       Result = false;
1033     else
1034       return false;
1035   }
1036 
1037   // We must have a single exiting block.
1038   if (!Lp->getExitingBlock()) {
1039     reportVectorizationFailure("The loop must have an exiting block",
1040         "loop control flow is not understood by vectorizer",
1041         "CFGNotUnderstood", ORE, TheLoop);
1042     if (DoExtraAnalysis)
1043       Result = false;
1044     else
1045       return false;
1046   }
1047 
1048   // We only handle bottom-tested loops, i.e. loop in which the condition is
1049   // checked at the end of each iteration. With that we can assume that all
1050   // instructions in the loop are executed the same number of times.
1051   if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
1052     reportVectorizationFailure("The exiting block is not the loop latch",
1053         "loop control flow is not understood by vectorizer",
1054         "CFGNotUnderstood", ORE, TheLoop);
1055     if (DoExtraAnalysis)
1056       Result = false;
1057     else
1058       return false;
1059   }
1060 
1061   return Result;
1062 }
1063 
1064 bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1065     Loop *Lp, bool UseVPlanNativePath) {
1066   // Store the result and return it at the end instead of exiting early, in case
1067   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1068   bool Result = true;
1069   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1070   if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1071     if (DoExtraAnalysis)
1072       Result = false;
1073     else
1074       return false;
1075   }
1076 
1077   // Recursively check whether the loop control flow of nested loops is
1078   // understood.
1079   for (Loop *SubLp : *Lp)
1080     if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1081       if (DoExtraAnalysis)
1082         Result = false;
1083       else
1084         return false;
1085     }
1086 
1087   return Result;
1088 }
1089 
1090 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1091   // Store the result and return it at the end instead of exiting early, in case
1092   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1093   bool Result = true;
1094 
1095   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1096   // Check whether the loop-related control flow in the loop nest is expected by
1097   // vectorizer.
1098   if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1099     if (DoExtraAnalysis)
1100       Result = false;
1101     else
1102       return false;
1103   }
1104 
1105   // We need to have a loop header.
1106   LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1107                     << '\n');
1108 
1109   // Specific checks for outer loops. We skip the remaining legal checks at this
1110   // point because they don't support outer loops.
1111   if (!TheLoop->empty()) {
1112     assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1113 
1114     if (!canVectorizeOuterLoop()) {
1115       reportVectorizationFailure("Unsupported outer loop",
1116                                  "unsupported outer loop",
1117                                  "UnsupportedOuterLoop",
1118                                  ORE, TheLoop);
1119       // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1120       // outer loops.
1121       return false;
1122     }
1123 
1124     LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1125     return Result;
1126   }
1127 
1128   assert(TheLoop->empty() && "Inner loop expected.");
1129   // Check if we can if-convert non-single-bb loops.
1130   unsigned NumBlocks = TheLoop->getNumBlocks();
1131   if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1132     LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1133     if (DoExtraAnalysis)
1134       Result = false;
1135     else
1136       return false;
1137   }
1138 
1139   // Check if we can vectorize the instructions and CFG in this loop.
1140   if (!canVectorizeInstrs()) {
1141     LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1142     if (DoExtraAnalysis)
1143       Result = false;
1144     else
1145       return false;
1146   }
1147 
1148   // Go over each instruction and look at memory deps.
1149   if (!canVectorizeMemory()) {
1150     LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1151     if (DoExtraAnalysis)
1152       Result = false;
1153     else
1154       return false;
1155   }
1156 
1157   LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1158                     << (LAI->getRuntimePointerChecking()->Need
1159                             ? " (with a runtime bound check)"
1160                             : "")
1161                     << "!\n");
1162 
1163   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1164   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1165     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1166 
1167   if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
1168     reportVectorizationFailure("Too many SCEV checks needed",
1169         "Too many SCEV assumptions need to be made and checked at runtime",
1170         "TooManySCEVRunTimeChecks", ORE, TheLoop);
1171     if (DoExtraAnalysis)
1172       Result = false;
1173     else
1174       return false;
1175   }
1176 
1177   // Okay! We've done all the tests. If any have failed, return false. Otherwise
1178   // we can vectorize, and at this point we don't have any other mem analysis
1179   // which may limit our maximum vectorization factor, so just return true with
1180   // no restrictions.
1181   return Result;
1182 }
1183 
1184 bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
1185 
1186   LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1187 
1188   if (!PrimaryInduction) {
1189     reportVectorizationFailure(
1190         "No primary induction, cannot fold tail by masking",
1191         "Missing a primary induction variable in the loop, which is "
1192         "needed in order to fold tail by masking as required.",
1193         "NoPrimaryInduction", ORE, TheLoop);
1194     return false;
1195   }
1196 
1197   SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1198 
1199   for (auto &Reduction : *getReductionVars())
1200     ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1201 
1202   // TODO: handle non-reduction outside users when tail is folded by masking.
1203   for (auto *AE : AllowedExit) {
1204     // Check that all users of allowed exit values are inside the loop or
1205     // are the live-out of a reduction.
1206     if (ReductionLiveOuts.count(AE))
1207       continue;
1208     for (User *U : AE->users()) {
1209       Instruction *UI = cast<Instruction>(U);
1210       if (TheLoop->contains(UI))
1211         continue;
1212       reportVectorizationFailure(
1213           "Cannot fold tail by masking, loop has an outside user for",
1214           "Cannot fold tail by masking in the presence of live outs.",
1215           "LiveOutFoldingTailByMasking", ORE, TheLoop, UI);
1216       return false;
1217     }
1218   }
1219 
1220   // The list of pointers that we can safely read and write to remains empty.
1221   SmallPtrSet<Value *, 8> SafePointers;
1222 
1223   // Check and mark all blocks for predication, including those that ordinarily
1224   // do not need predication such as the header block.
1225   for (BasicBlock *BB : TheLoop->blocks()) {
1226     if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) {
1227       reportVectorizationFailure(
1228           "Cannot fold tail by masking as required",
1229           "control flow cannot be substituted for a select",
1230           "NoCFGForSelect", ORE, TheLoop,
1231           BB->getTerminator());
1232       return false;
1233     }
1234   }
1235 
1236   LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1237   return true;
1238 }
1239 
1240 } // namespace llvm
1241