1 //===- LoopVectorizationLegality.cpp --------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides loop vectorization legality analysis. Original code
10 // resided in LoopVectorize.cpp for a long time.
11 //
12 // At this point, it is implemented as a utility class, not as an analysis
13 // pass. It should be easy to create an analysis pass around it if there
14 // is a need (but D45420 needs to happen first).
15 //
16 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
17 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
18 #include "llvm/Analysis/VectorUtils.h"
19 #include "llvm/IR/IntrinsicInst.h"
20 
21 using namespace llvm;
22 
23 #define LV_NAME "loop-vectorize"
24 #define DEBUG_TYPE LV_NAME
25 
26 extern cl::opt<bool> EnableVPlanPredication;
27 
28 static cl::opt<bool>
29     EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
30                        cl::desc("Enable if-conversion during vectorization."));
31 
32 static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
33     "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
34     cl::desc("The maximum allowed number of runtime memory checks with a "
35              "vectorize(enable) pragma."));
36 
37 static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
38     "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
39     cl::desc("The maximum number of SCEV checks allowed."));
40 
41 static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
42     "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
43     cl::desc("The maximum number of SCEV checks allowed with a "
44              "vectorize(enable) pragma"));
45 
46 /// Maximum vectorization interleave count.
47 static const unsigned MaxInterleaveFactor = 16;
48 
49 namespace llvm {
50 
51 bool LoopVectorizeHints::Hint::validate(unsigned Val) {
52   switch (Kind) {
53   case HK_WIDTH:
54     return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
55   case HK_UNROLL:
56     return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
57   case HK_FORCE:
58     return (Val <= 1);
59   case HK_ISVECTORIZED:
60   case HK_PREDICATE:
61     return (Val == 0 || Val == 1);
62   }
63   return false;
64 }
65 
66 LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
67                                        bool InterleaveOnlyWhenForced,
68                                        OptimizationRemarkEmitter &ORE)
69     : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
70       Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
71       Force("vectorize.enable", FK_Undefined, HK_FORCE),
72       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
73       Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), TheLoop(L),
74       ORE(ORE) {
75   // Populate values with existing loop metadata.
76   getHintsFromMetadata();
77 
78   // force-vector-interleave overrides DisableInterleaving.
79   if (VectorizerParams::isInterleaveForced())
80     Interleave.Value = VectorizerParams::VectorizationInterleave;
81 
82   if (IsVectorized.Value != 1)
83     // If the vectorization width and interleaving count are both 1 then
84     // consider the loop to have been already vectorized because there's
85     // nothing more that we can do.
86     IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
87   LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
88              << "LV: Interleaving disabled by the pass manager\n");
89 }
90 
91 void LoopVectorizeHints::setAlreadyVectorized() {
92   LLVMContext &Context = TheLoop->getHeader()->getContext();
93 
94   MDNode *IsVectorizedMD = MDNode::get(
95       Context,
96       {MDString::get(Context, "llvm.loop.isvectorized"),
97        ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
98   MDNode *LoopID = TheLoop->getLoopID();
99   MDNode *NewLoopID =
100       makePostTransformationMetadata(Context, LoopID,
101                                      {Twine(Prefix(), "vectorize.").str(),
102                                       Twine(Prefix(), "interleave.").str()},
103                                      {IsVectorizedMD});
104   TheLoop->setLoopID(NewLoopID);
105 
106   // Update internal cache.
107   IsVectorized.Value = 1;
108 }
109 
110 bool LoopVectorizeHints::allowVectorization(
111     Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
112   if (getForce() == LoopVectorizeHints::FK_Disabled) {
113     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
114     emitRemarkWithHints();
115     return false;
116   }
117 
118   if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
119     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
120     emitRemarkWithHints();
121     return false;
122   }
123 
124   if (getIsVectorized() == 1) {
125     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
126     // FIXME: Add interleave.disable metadata. This will allow
127     // vectorize.disable to be used without disabling the pass and errors
128     // to differentiate between disabled vectorization and a width of 1.
129     ORE.emit([&]() {
130       return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
131                                         "AllDisabled", L->getStartLoc(),
132                                         L->getHeader())
133              << "loop not vectorized: vectorization and interleaving are "
134                 "explicitly disabled, or the loop has already been "
135                 "vectorized";
136     });
137     return false;
138   }
139 
140   return true;
141 }
142 
143 void LoopVectorizeHints::emitRemarkWithHints() const {
144   using namespace ore;
145 
146   ORE.emit([&]() {
147     if (Force.Value == LoopVectorizeHints::FK_Disabled)
148       return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
149                                       TheLoop->getStartLoc(),
150                                       TheLoop->getHeader())
151              << "loop not vectorized: vectorization is explicitly disabled";
152     else {
153       OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
154                                  TheLoop->getStartLoc(), TheLoop->getHeader());
155       R << "loop not vectorized";
156       if (Force.Value == LoopVectorizeHints::FK_Enabled) {
157         R << " (Force=" << NV("Force", true);
158         if (Width.Value != 0)
159           R << ", Vector Width=" << NV("VectorWidth", Width.Value);
160         if (Interleave.Value != 0)
161           R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
162         R << ")";
163       }
164       return R;
165     }
166   });
167 }
168 
169 const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
170   if (getWidth() == 1)
171     return LV_NAME;
172   if (getForce() == LoopVectorizeHints::FK_Disabled)
173     return LV_NAME;
174   if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
175     return LV_NAME;
176   return OptimizationRemarkAnalysis::AlwaysPrint;
177 }
178 
179 void LoopVectorizeHints::getHintsFromMetadata() {
180   MDNode *LoopID = TheLoop->getLoopID();
181   if (!LoopID)
182     return;
183 
184   // First operand should refer to the loop id itself.
185   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
186   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
187 
188   for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
189     const MDString *S = nullptr;
190     SmallVector<Metadata *, 4> Args;
191 
192     // The expected hint is either a MDString or a MDNode with the first
193     // operand a MDString.
194     if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
195       if (!MD || MD->getNumOperands() == 0)
196         continue;
197       S = dyn_cast<MDString>(MD->getOperand(0));
198       for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
199         Args.push_back(MD->getOperand(i));
200     } else {
201       S = dyn_cast<MDString>(LoopID->getOperand(i));
202       assert(Args.size() == 0 && "too many arguments for MDString");
203     }
204 
205     if (!S)
206       continue;
207 
208     // Check if the hint starts with the loop metadata prefix.
209     StringRef Name = S->getString();
210     if (Args.size() == 1)
211       setHint(Name, Args[0]);
212   }
213 }
214 
215 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
216   if (!Name.startswith(Prefix()))
217     return;
218   Name = Name.substr(Prefix().size(), StringRef::npos);
219 
220   const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
221   if (!C)
222     return;
223   unsigned Val = C->getZExtValue();
224 
225   Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
226   for (auto H : Hints) {
227     if (Name == H->Name) {
228       if (H->validate(Val))
229         H->Value = Val;
230       else
231         LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
232       break;
233     }
234   }
235 }
236 
237 bool LoopVectorizationRequirements::doesNotMeet(
238     Function *F, Loop *L, const LoopVectorizeHints &Hints) {
239   const char *PassName = Hints.vectorizeAnalysisPassName();
240   bool Failed = false;
241   if (UnsafeAlgebraInst && !Hints.allowReordering()) {
242     ORE.emit([&]() {
243       return OptimizationRemarkAnalysisFPCommute(
244                  PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
245                  UnsafeAlgebraInst->getParent())
246              << "loop not vectorized: cannot prove it is safe to reorder "
247                 "floating-point operations";
248     });
249     Failed = true;
250   }
251 
252   // Test if runtime memcheck thresholds are exceeded.
253   bool PragmaThresholdReached =
254       NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
255   bool ThresholdReached =
256       NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
257   if ((ThresholdReached && !Hints.allowReordering()) ||
258       PragmaThresholdReached) {
259     ORE.emit([&]() {
260       return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
261                                                 L->getStartLoc(),
262                                                 L->getHeader())
263              << "loop not vectorized: cannot prove it is safe to reorder "
264                 "memory operations";
265     });
266     LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
267     Failed = true;
268   }
269 
270   return Failed;
271 }
272 
273 // Return true if the inner loop \p Lp is uniform with regard to the outer loop
274 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
275 // executing the inner loop will execute the same iterations). This check is
276 // very constrained for now but it will be relaxed in the future. \p Lp is
277 // considered uniform if it meets all the following conditions:
278 //   1) it has a canonical IV (starting from 0 and with stride 1),
279 //   2) its latch terminator is a conditional branch and,
280 //   3) its latch condition is a compare instruction whose operands are the
281 //      canonical IV and an OuterLp invariant.
282 // This check doesn't take into account the uniformity of other conditions not
283 // related to the loop latch because they don't affect the loop uniformity.
284 //
285 // NOTE: We decided to keep all these checks and its associated documentation
286 // together so that we can easily have a picture of the current supported loop
287 // nests. However, some of the current checks don't depend on \p OuterLp and
288 // would be redundantly executed for each \p Lp if we invoked this function for
289 // different candidate outer loops. This is not the case for now because we
290 // don't currently have the infrastructure to evaluate multiple candidate outer
291 // loops and \p OuterLp will be a fixed parameter while we only support explicit
292 // outer loop vectorization. It's also very likely that these checks go away
293 // before introducing the aforementioned infrastructure. However, if this is not
294 // the case, we should move the \p OuterLp independent checks to a separate
295 // function that is only executed once for each \p Lp.
296 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
297   assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
298 
299   // If Lp is the outer loop, it's uniform by definition.
300   if (Lp == OuterLp)
301     return true;
302   assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
303 
304   // 1.
305   PHINode *IV = Lp->getCanonicalInductionVariable();
306   if (!IV) {
307     LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
308     return false;
309   }
310 
311   // 2.
312   BasicBlock *Latch = Lp->getLoopLatch();
313   auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
314   if (!LatchBr || LatchBr->isUnconditional()) {
315     LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
316     return false;
317   }
318 
319   // 3.
320   auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
321   if (!LatchCmp) {
322     LLVM_DEBUG(
323         dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
324     return false;
325   }
326 
327   Value *CondOp0 = LatchCmp->getOperand(0);
328   Value *CondOp1 = LatchCmp->getOperand(1);
329   Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
330   if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
331       !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
332     LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
333     return false;
334   }
335 
336   return true;
337 }
338 
339 // Return true if \p Lp and all its nested loops are uniform with regard to \p
340 // OuterLp.
341 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
342   if (!isUniformLoop(Lp, OuterLp))
343     return false;
344 
345   // Check if nested loops are uniform.
346   for (Loop *SubLp : *Lp)
347     if (!isUniformLoopNest(SubLp, OuterLp))
348       return false;
349 
350   return true;
351 }
352 
353 /// Check whether it is safe to if-convert this phi node.
354 ///
355 /// Phi nodes with constant expressions that can trap are not safe to if
356 /// convert.
357 static bool canIfConvertPHINodes(BasicBlock *BB) {
358   for (PHINode &Phi : BB->phis()) {
359     for (Value *V : Phi.incoming_values())
360       if (auto *C = dyn_cast<Constant>(V))
361         if (C->canTrap())
362           return false;
363   }
364   return true;
365 }
366 
367 static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
368   if (Ty->isPointerTy())
369     return DL.getIntPtrType(Ty);
370 
371   // It is possible that char's or short's overflow when we ask for the loop's
372   // trip count, work around this by changing the type size.
373   if (Ty->getScalarSizeInBits() < 32)
374     return Type::getInt32Ty(Ty->getContext());
375 
376   return Ty;
377 }
378 
379 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
380   Ty0 = convertPointerToIntegerType(DL, Ty0);
381   Ty1 = convertPointerToIntegerType(DL, Ty1);
382   if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
383     return Ty0;
384   return Ty1;
385 }
386 
387 /// Check that the instruction has outside loop users and is not an
388 /// identified reduction variable.
389 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
390                                SmallPtrSetImpl<Value *> &AllowedExit) {
391   // Reductions, Inductions and non-header phis are allowed to have exit users. All
392   // other instructions must not have external users.
393   if (!AllowedExit.count(Inst))
394     // Check that all of the users of the loop are inside the BB.
395     for (User *U : Inst->users()) {
396       Instruction *UI = cast<Instruction>(U);
397       // This user may be a reduction exit value.
398       if (!TheLoop->contains(UI)) {
399         LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
400         return true;
401       }
402     }
403   return false;
404 }
405 
406 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
407   const ValueToValueMap &Strides =
408       getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
409 
410   int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
411   if (Stride == 1 || Stride == -1)
412     return Stride;
413   return 0;
414 }
415 
416 bool LoopVectorizationLegality::isUniform(Value *V) {
417   return LAI->isUniform(V);
418 }
419 
420 bool LoopVectorizationLegality::canVectorizeOuterLoop() {
421   assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
422   // Store the result and return it at the end instead of exiting early, in case
423   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
424   bool Result = true;
425   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
426 
427   for (BasicBlock *BB : TheLoop->blocks()) {
428     // Check whether the BB terminator is a BranchInst. Any other terminator is
429     // not supported yet.
430     auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
431     if (!Br) {
432       reportVectorizationFailure("Unsupported basic block terminator",
433           "loop control flow is not understood by vectorizer",
434           "CFGNotUnderstood", ORE, TheLoop);
435       if (DoExtraAnalysis)
436         Result = false;
437       else
438         return false;
439     }
440 
441     // Check whether the BranchInst is a supported one. Only unconditional
442     // branches, conditional branches with an outer loop invariant condition or
443     // backedges are supported.
444     // FIXME: We skip these checks when VPlan predication is enabled as we
445     // want to allow divergent branches. This whole check will be removed
446     // once VPlan predication is on by default.
447     if (!EnableVPlanPredication && Br && Br->isConditional() &&
448         !TheLoop->isLoopInvariant(Br->getCondition()) &&
449         !LI->isLoopHeader(Br->getSuccessor(0)) &&
450         !LI->isLoopHeader(Br->getSuccessor(1))) {
451       reportVectorizationFailure("Unsupported conditional branch",
452           "loop control flow is not understood by vectorizer",
453           "CFGNotUnderstood", ORE, TheLoop);
454       if (DoExtraAnalysis)
455         Result = false;
456       else
457         return false;
458     }
459   }
460 
461   // Check whether inner loops are uniform. At this point, we only support
462   // simple outer loops scenarios with uniform nested loops.
463   if (!isUniformLoopNest(TheLoop /*loop nest*/,
464                          TheLoop /*context outer loop*/)) {
465     reportVectorizationFailure("Outer loop contains divergent loops",
466         "loop control flow is not understood by vectorizer",
467         "CFGNotUnderstood", ORE, TheLoop);
468     if (DoExtraAnalysis)
469       Result = false;
470     else
471       return false;
472   }
473 
474   // Check whether we are able to set up outer loop induction.
475   if (!setupOuterLoopInductions()) {
476     reportVectorizationFailure("Unsupported outer loop Phi(s)",
477                                "Unsupported outer loop Phi(s)",
478                                "UnsupportedPhi", ORE, TheLoop);
479     if (DoExtraAnalysis)
480       Result = false;
481     else
482       return false;
483   }
484 
485   return Result;
486 }
487 
488 void LoopVectorizationLegality::addInductionPhi(
489     PHINode *Phi, const InductionDescriptor &ID,
490     SmallPtrSetImpl<Value *> &AllowedExit) {
491   Inductions[Phi] = ID;
492 
493   // In case this induction also comes with casts that we know we can ignore
494   // in the vectorized loop body, record them here. All casts could be recorded
495   // here for ignoring, but suffices to record only the first (as it is the
496   // only one that may bw used outside the cast sequence).
497   const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
498   if (!Casts.empty())
499     InductionCastsToIgnore.insert(*Casts.begin());
500 
501   Type *PhiTy = Phi->getType();
502   const DataLayout &DL = Phi->getModule()->getDataLayout();
503 
504   // Get the widest type.
505   if (!PhiTy->isFloatingPointTy()) {
506     if (!WidestIndTy)
507       WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
508     else
509       WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
510   }
511 
512   // Int inductions are special because we only allow one IV.
513   if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
514       ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
515       isa<Constant>(ID.getStartValue()) &&
516       cast<Constant>(ID.getStartValue())->isNullValue()) {
517 
518     // Use the phi node with the widest type as induction. Use the last
519     // one if there are multiple (no good reason for doing this other
520     // than it is expedient). We've checked that it begins at zero and
521     // steps by one, so this is a canonical induction variable.
522     if (!PrimaryInduction || PhiTy == WidestIndTy)
523       PrimaryInduction = Phi;
524   }
525 
526   // Both the PHI node itself, and the "post-increment" value feeding
527   // back into the PHI node may have external users.
528   // We can allow those uses, except if the SCEVs we have for them rely
529   // on predicates that only hold within the loop, since allowing the exit
530   // currently means re-using this SCEV outside the loop (see PR33706 for more
531   // details).
532   if (PSE.getUnionPredicate().isAlwaysTrue()) {
533     AllowedExit.insert(Phi);
534     AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
535   }
536 
537   LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
538 }
539 
540 bool LoopVectorizationLegality::setupOuterLoopInductions() {
541   BasicBlock *Header = TheLoop->getHeader();
542 
543   // Returns true if a given Phi is a supported induction.
544   auto isSupportedPhi = [&](PHINode &Phi) -> bool {
545     InductionDescriptor ID;
546     if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
547         ID.getKind() == InductionDescriptor::IK_IntInduction) {
548       addInductionPhi(&Phi, ID, AllowedExit);
549       return true;
550     } else {
551       // Bail out for any Phi in the outer loop header that is not a supported
552       // induction.
553       LLVM_DEBUG(
554           dbgs()
555           << "LV: Found unsupported PHI for outer loop vectorization.\n");
556       return false;
557     }
558   };
559 
560   if (llvm::all_of(Header->phis(), isSupportedPhi))
561     return true;
562   else
563     return false;
564 }
565 
566 bool LoopVectorizationLegality::canVectorizeInstrs() {
567   BasicBlock *Header = TheLoop->getHeader();
568 
569   // Look for the attribute signaling the absence of NaNs.
570   Function &F = *Header->getParent();
571   HasFunNoNaNAttr =
572       F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
573 
574   // For each block in the loop.
575   for (BasicBlock *BB : TheLoop->blocks()) {
576     // Scan the instructions in the block and look for hazards.
577     for (Instruction &I : *BB) {
578       if (auto *Phi = dyn_cast<PHINode>(&I)) {
579         Type *PhiTy = Phi->getType();
580         // Check that this PHI type is allowed.
581         if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
582             !PhiTy->isPointerTy()) {
583           reportVectorizationFailure("Found a non-int non-pointer PHI",
584                                      "loop control flow is not understood by vectorizer",
585                                      "CFGNotUnderstood", ORE, TheLoop);
586           return false;
587         }
588 
589         // If this PHINode is not in the header block, then we know that we
590         // can convert it to select during if-conversion. No need to check if
591         // the PHIs in this block are induction or reduction variables.
592         if (BB != Header) {
593           // Non-header phi nodes that have outside uses can be vectorized. Add
594           // them to the list of allowed exits.
595           // Unsafe cyclic dependencies with header phis are identified during
596           // legalization for reduction, induction and first order
597           // recurrences.
598           AllowedExit.insert(&I);
599           continue;
600         }
601 
602         // We only allow if-converted PHIs with exactly two incoming values.
603         if (Phi->getNumIncomingValues() != 2) {
604           reportVectorizationFailure("Found an invalid PHI",
605               "loop control flow is not understood by vectorizer",
606               "CFGNotUnderstood", ORE, TheLoop, Phi);
607           return false;
608         }
609 
610         RecurrenceDescriptor RedDes;
611         if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
612                                                  DT)) {
613           if (RedDes.hasUnsafeAlgebra())
614             Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
615           AllowedExit.insert(RedDes.getLoopExitInstr());
616           Reductions[Phi] = RedDes;
617           continue;
618         }
619 
620         // TODO: Instead of recording the AllowedExit, it would be good to record the
621         // complementary set: NotAllowedExit. These include (but may not be
622         // limited to):
623         // 1. Reduction phis as they represent the one-before-last value, which
624         // is not available when vectorized
625         // 2. Induction phis and increment when SCEV predicates cannot be used
626         // outside the loop - see addInductionPhi
627         // 3. Non-Phis with outside uses when SCEV predicates cannot be used
628         // outside the loop - see call to hasOutsideLoopUser in the non-phi
629         // handling below
630         // 4. FirstOrderRecurrence phis that can possibly be handled by
631         // extraction.
632         // By recording these, we can then reason about ways to vectorize each
633         // of these NotAllowedExit.
634         InductionDescriptor ID;
635         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
636           addInductionPhi(Phi, ID, AllowedExit);
637           if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
638             Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
639           continue;
640         }
641 
642         if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
643                                                          SinkAfter, DT)) {
644           FirstOrderRecurrences.insert(Phi);
645           continue;
646         }
647 
648         // As a last resort, coerce the PHI to a AddRec expression
649         // and re-try classifying it a an induction PHI.
650         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
651           addInductionPhi(Phi, ID, AllowedExit);
652           continue;
653         }
654 
655         reportVectorizationFailure("Found an unidentified PHI",
656             "value that could not be identified as "
657             "reduction is used outside the loop",
658             "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
659         return false;
660       } // end of PHI handling
661 
662       // We handle calls that:
663       //   * Are debug info intrinsics.
664       //   * Have a mapping to an IR intrinsic.
665       //   * Have a vector version available.
666       auto *CI = dyn_cast<CallInst>(&I);
667       if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
668           !isa<DbgInfoIntrinsic>(CI) &&
669           !(CI->getCalledFunction() && TLI &&
670             TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
671         // If the call is a recognized math libary call, it is likely that
672         // we can vectorize it given loosened floating-point constraints.
673         LibFunc Func;
674         bool IsMathLibCall =
675             TLI && CI->getCalledFunction() &&
676             CI->getType()->isFloatingPointTy() &&
677             TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
678             TLI->hasOptimizedCodeGen(Func);
679 
680         if (IsMathLibCall) {
681           // TODO: Ideally, we should not use clang-specific language here,
682           // but it's hard to provide meaningful yet generic advice.
683           // Also, should this be guarded by allowExtraAnalysis() and/or be part
684           // of the returned info from isFunctionVectorizable()?
685           reportVectorizationFailure("Found a non-intrinsic callsite",
686               "library call cannot be vectorized. "
687               "Try compiling with -fno-math-errno, -ffast-math, "
688               "or similar flags",
689               "CantVectorizeLibcall", ORE, TheLoop, CI);
690         } else {
691           reportVectorizationFailure("Found a non-intrinsic callsite",
692                                      "call instruction cannot be vectorized",
693                                      "CantVectorizeLibcall", ORE, TheLoop, CI);
694         }
695         return false;
696       }
697 
698       // Some intrinsics have scalar arguments and should be same in order for
699       // them to be vectorized (i.e. loop invariant).
700       if (CI) {
701         auto *SE = PSE.getSE();
702         Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
703         for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
704           if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
705             if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
706               reportVectorizationFailure("Found unvectorizable intrinsic",
707                   "intrinsic instruction cannot be vectorized",
708                   "CantVectorizeIntrinsic", ORE, TheLoop, CI);
709               return false;
710             }
711           }
712       }
713 
714       // Check that the instruction return type is vectorizable.
715       // Also, we can't vectorize extractelement instructions.
716       if ((!VectorType::isValidElementType(I.getType()) &&
717            !I.getType()->isVoidTy()) ||
718           isa<ExtractElementInst>(I)) {
719         reportVectorizationFailure("Found unvectorizable type",
720             "instruction return type cannot be vectorized",
721             "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
722         return false;
723       }
724 
725       // Check that the stored type is vectorizable.
726       if (auto *ST = dyn_cast<StoreInst>(&I)) {
727         Type *T = ST->getValueOperand()->getType();
728         if (!VectorType::isValidElementType(T)) {
729           reportVectorizationFailure("Store instruction cannot be vectorized",
730                                      "store instruction cannot be vectorized",
731                                      "CantVectorizeStore", ORE, TheLoop, ST);
732           return false;
733         }
734 
735         // For nontemporal stores, check that a nontemporal vector version is
736         // supported on the target.
737         if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
738           // Arbitrarily try a vector of 2 elements.
739           Type *VecTy = VectorType::get(T, /*NumElements=*/2);
740           assert(VecTy && "did not find vectorized version of stored type");
741           unsigned Alignment = getLoadStoreAlignment(ST);
742           assert(Alignment && "Alignment should be set");
743           if (!TTI->isLegalNTStore(VecTy, llvm::Align(Alignment))) {
744             reportVectorizationFailure(
745                 "nontemporal store instruction cannot be vectorized",
746                 "nontemporal store instruction cannot be vectorized",
747                 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
748             return false;
749           }
750         }
751 
752       } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
753         if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
754           // For nontemporal loads, check that a nontemporal vector version is
755           // supported on the target (arbitrarily try a vector of 2 elements).
756           Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
757           assert(VecTy && "did not find vectorized version of load type");
758           unsigned Alignment = getLoadStoreAlignment(LD);
759           assert(Alignment && "Alignment should be set");
760           if (!TTI->isLegalNTLoad(VecTy, llvm::Align(Alignment))) {
761             reportVectorizationFailure(
762                 "nontemporal load instruction cannot be vectorized",
763                 "nontemporal load instruction cannot be vectorized",
764                 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
765             return false;
766           }
767         }
768 
769         // FP instructions can allow unsafe algebra, thus vectorizable by
770         // non-IEEE-754 compliant SIMD units.
771         // This applies to floating-point math operations and calls, not memory
772         // operations, shuffles, or casts, as they don't change precision or
773         // semantics.
774       } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
775                  !I.isFast()) {
776         LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
777         Hints->setPotentiallyUnsafe();
778       }
779 
780       // Reduction instructions are allowed to have exit users.
781       // All other instructions must not have external users.
782       if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
783         // We can safely vectorize loops where instructions within the loop are
784         // used outside the loop only if the SCEV predicates within the loop is
785         // same as outside the loop. Allowing the exit means reusing the SCEV
786         // outside the loop.
787         if (PSE.getUnionPredicate().isAlwaysTrue()) {
788           AllowedExit.insert(&I);
789           continue;
790         }
791         reportVectorizationFailure("Value cannot be used outside the loop",
792                                    "value cannot be used outside the loop",
793                                    "ValueUsedOutsideLoop", ORE, TheLoop, &I);
794         return false;
795       }
796     } // next instr.
797   }
798 
799   if (!PrimaryInduction) {
800     if (Inductions.empty()) {
801       reportVectorizationFailure("Did not find one integer induction var",
802           "loop induction variable could not be identified",
803           "NoInductionVariable", ORE, TheLoop);
804       return false;
805     } else if (!WidestIndTy) {
806       reportVectorizationFailure("Did not find one integer induction var",
807           "integer loop induction variable could not be identified",
808           "NoIntegerInductionVariable", ORE, TheLoop);
809       return false;
810     } else {
811       LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
812     }
813   }
814 
815   // Now we know the widest induction type, check if our found induction
816   // is the same size. If it's not, unset it here and InnerLoopVectorizer
817   // will create another.
818   if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
819     PrimaryInduction = nullptr;
820 
821   return true;
822 }
823 
824 bool LoopVectorizationLegality::canVectorizeMemory() {
825   LAI = &(*GetLAA)(*TheLoop);
826   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
827   if (LAR) {
828     ORE->emit([&]() {
829       return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
830                                         "loop not vectorized: ", *LAR);
831     });
832   }
833   if (!LAI->canVectorizeMemory())
834     return false;
835 
836   if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
837     reportVectorizationFailure("Stores to a uniform address",
838         "write to a loop invariant address could not be vectorized",
839         "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
840     return false;
841   }
842   Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
843   PSE.addPredicate(LAI->getPSE().getUnionPredicate());
844 
845   return true;
846 }
847 
848 bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
849   Value *In0 = const_cast<Value *>(V);
850   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
851   if (!PN)
852     return false;
853 
854   return Inductions.count(PN);
855 }
856 
857 bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
858   auto *Inst = dyn_cast<Instruction>(V);
859   return (Inst && InductionCastsToIgnore.count(Inst));
860 }
861 
862 bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
863   return isInductionPhi(V) || isCastedInductionVariable(V);
864 }
865 
866 bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
867   return FirstOrderRecurrences.count(Phi);
868 }
869 
870 bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
871   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
872 }
873 
874 bool LoopVectorizationLegality::blockCanBePredicated(
875     BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) {
876   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
877 
878   for (Instruction &I : *BB) {
879     // Check that we don't have a constant expression that can trap as operand.
880     for (Value *Operand : I.operands()) {
881       if (auto *C = dyn_cast<Constant>(Operand))
882         if (C->canTrap())
883           return false;
884     }
885     // We might be able to hoist the load.
886     if (I.mayReadFromMemory()) {
887       auto *LI = dyn_cast<LoadInst>(&I);
888       if (!LI)
889         return false;
890       if (!SafePtrs.count(LI->getPointerOperand())) {
891         // !llvm.mem.parallel_loop_access implies if-conversion safety.
892         // Otherwise, record that the load needs (real or emulated) masking
893         // and let the cost model decide.
894         if (!IsAnnotatedParallel || PreserveGuards)
895           MaskedOp.insert(LI);
896         continue;
897       }
898     }
899 
900     if (I.mayWriteToMemory()) {
901       auto *SI = dyn_cast<StoreInst>(&I);
902       if (!SI)
903         return false;
904       // Predicated store requires some form of masking:
905       // 1) masked store HW instruction,
906       // 2) emulation via load-blend-store (only if safe and legal to do so,
907       //    be aware on the race conditions), or
908       // 3) element-by-element predicate check and scalar store.
909       MaskedOp.insert(SI);
910       continue;
911     }
912     if (I.mayThrow())
913       return false;
914   }
915 
916   return true;
917 }
918 
919 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
920   if (!EnableIfConversion) {
921     reportVectorizationFailure("If-conversion is disabled",
922                                "if-conversion is disabled",
923                                "IfConversionDisabled",
924                                ORE, TheLoop);
925     return false;
926   }
927 
928   assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
929 
930   // A list of pointers which are known to be dereferenceable within scope of
931   // the loop body for each iteration of the loop which executes.  That is,
932   // the memory pointed to can be dereferenced (with the access size implied by
933   // the value's type) unconditionally within the loop header without
934   // introducing a new fault.
935   SmallPtrSet<Value *, 8> SafePointes;
936 
937   // Collect safe addresses.
938   for (BasicBlock *BB : TheLoop->blocks()) {
939     if (blockNeedsPredication(BB))
940       continue;
941 
942     for (Instruction &I : *BB)
943       if (auto *Ptr = getLoadStorePointerOperand(&I))
944         SafePointes.insert(Ptr);
945   }
946 
947   // Collect the blocks that need predication.
948   BasicBlock *Header = TheLoop->getHeader();
949   for (BasicBlock *BB : TheLoop->blocks()) {
950     // We don't support switch statements inside loops.
951     if (!isa<BranchInst>(BB->getTerminator())) {
952       reportVectorizationFailure("Loop contains a switch statement",
953                                  "loop contains a switch statement",
954                                  "LoopContainsSwitch", ORE, TheLoop,
955                                  BB->getTerminator());
956       return false;
957     }
958 
959     // We must be able to predicate all blocks that need to be predicated.
960     if (blockNeedsPredication(BB)) {
961       if (!blockCanBePredicated(BB, SafePointes)) {
962         reportVectorizationFailure(
963             "Control flow cannot be substituted for a select",
964             "control flow cannot be substituted for a select",
965             "NoCFGForSelect", ORE, TheLoop,
966             BB->getTerminator());
967         return false;
968       }
969     } else if (BB != Header && !canIfConvertPHINodes(BB)) {
970       reportVectorizationFailure(
971           "Control flow cannot be substituted for a select",
972           "control flow cannot be substituted for a select",
973           "NoCFGForSelect", ORE, TheLoop,
974           BB->getTerminator());
975       return false;
976     }
977   }
978 
979   // We can if-convert this loop.
980   return true;
981 }
982 
983 // Helper function to canVectorizeLoopNestCFG.
984 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
985                                                     bool UseVPlanNativePath) {
986   assert((UseVPlanNativePath || Lp->empty()) &&
987          "VPlan-native path is not enabled.");
988 
989   // TODO: ORE should be improved to show more accurate information when an
990   // outer loop can't be vectorized because a nested loop is not understood or
991   // legal. Something like: "outer_loop_location: loop not vectorized:
992   // (inner_loop_location) loop control flow is not understood by vectorizer".
993 
994   // Store the result and return it at the end instead of exiting early, in case
995   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
996   bool Result = true;
997   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
998 
999   // We must have a loop in canonical form. Loops with indirectbr in them cannot
1000   // be canonicalized.
1001   if (!Lp->getLoopPreheader()) {
1002     reportVectorizationFailure("Loop doesn't have a legal pre-header",
1003         "loop control flow is not understood by vectorizer",
1004         "CFGNotUnderstood", ORE, TheLoop);
1005     if (DoExtraAnalysis)
1006       Result = false;
1007     else
1008       return false;
1009   }
1010 
1011   // We must have a single backedge.
1012   if (Lp->getNumBackEdges() != 1) {
1013     reportVectorizationFailure("The loop must have a single backedge",
1014         "loop control flow is not understood by vectorizer",
1015         "CFGNotUnderstood", ORE, TheLoop);
1016     if (DoExtraAnalysis)
1017       Result = false;
1018     else
1019       return false;
1020   }
1021 
1022   // We must have a single exiting block.
1023   if (!Lp->getExitingBlock()) {
1024     reportVectorizationFailure("The loop must have an exiting block",
1025         "loop control flow is not understood by vectorizer",
1026         "CFGNotUnderstood", ORE, TheLoop);
1027     if (DoExtraAnalysis)
1028       Result = false;
1029     else
1030       return false;
1031   }
1032 
1033   // We only handle bottom-tested loops, i.e. loop in which the condition is
1034   // checked at the end of each iteration. With that we can assume that all
1035   // instructions in the loop are executed the same number of times.
1036   if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
1037     reportVectorizationFailure("The exiting block is not the loop latch",
1038         "loop control flow is not understood by vectorizer",
1039         "CFGNotUnderstood", ORE, TheLoop);
1040     if (DoExtraAnalysis)
1041       Result = false;
1042     else
1043       return false;
1044   }
1045 
1046   return Result;
1047 }
1048 
1049 bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1050     Loop *Lp, bool UseVPlanNativePath) {
1051   // Store the result and return it at the end instead of exiting early, in case
1052   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1053   bool Result = true;
1054   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1055   if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1056     if (DoExtraAnalysis)
1057       Result = false;
1058     else
1059       return false;
1060   }
1061 
1062   // Recursively check whether the loop control flow of nested loops is
1063   // understood.
1064   for (Loop *SubLp : *Lp)
1065     if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1066       if (DoExtraAnalysis)
1067         Result = false;
1068       else
1069         return false;
1070     }
1071 
1072   return Result;
1073 }
1074 
1075 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1076   // Store the result and return it at the end instead of exiting early, in case
1077   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1078   bool Result = true;
1079 
1080   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1081   // Check whether the loop-related control flow in the loop nest is expected by
1082   // vectorizer.
1083   if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1084     if (DoExtraAnalysis)
1085       Result = false;
1086     else
1087       return false;
1088   }
1089 
1090   // We need to have a loop header.
1091   LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1092                     << '\n');
1093 
1094   // Specific checks for outer loops. We skip the remaining legal checks at this
1095   // point because they don't support outer loops.
1096   if (!TheLoop->empty()) {
1097     assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1098 
1099     if (!canVectorizeOuterLoop()) {
1100       reportVectorizationFailure("Unsupported outer loop",
1101                                  "unsupported outer loop",
1102                                  "UnsupportedOuterLoop",
1103                                  ORE, TheLoop);
1104       // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1105       // outer loops.
1106       return false;
1107     }
1108 
1109     LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1110     return Result;
1111   }
1112 
1113   assert(TheLoop->empty() && "Inner loop expected.");
1114   // Check if we can if-convert non-single-bb loops.
1115   unsigned NumBlocks = TheLoop->getNumBlocks();
1116   if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1117     LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1118     if (DoExtraAnalysis)
1119       Result = false;
1120     else
1121       return false;
1122   }
1123 
1124   // Check if we can vectorize the instructions and CFG in this loop.
1125   if (!canVectorizeInstrs()) {
1126     LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1127     if (DoExtraAnalysis)
1128       Result = false;
1129     else
1130       return false;
1131   }
1132 
1133   // Go over each instruction and look at memory deps.
1134   if (!canVectorizeMemory()) {
1135     LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1136     if (DoExtraAnalysis)
1137       Result = false;
1138     else
1139       return false;
1140   }
1141 
1142   LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1143                     << (LAI->getRuntimePointerChecking()->Need
1144                             ? " (with a runtime bound check)"
1145                             : "")
1146                     << "!\n");
1147 
1148   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1149   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1150     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1151 
1152   if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
1153     reportVectorizationFailure("Too many SCEV checks needed",
1154         "Too many SCEV assumptions need to be made and checked at runtime",
1155         "TooManySCEVRunTimeChecks", ORE, TheLoop);
1156     if (DoExtraAnalysis)
1157       Result = false;
1158     else
1159       return false;
1160   }
1161 
1162   // Okay! We've done all the tests. If any have failed, return false. Otherwise
1163   // we can vectorize, and at this point we don't have any other mem analysis
1164   // which may limit our maximum vectorization factor, so just return true with
1165   // no restrictions.
1166   return Result;
1167 }
1168 
1169 bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
1170 
1171   LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1172 
1173   if (!PrimaryInduction) {
1174     reportVectorizationFailure(
1175         "No primary induction, cannot fold tail by masking",
1176         "Missing a primary induction variable in the loop, which is "
1177         "needed in order to fold tail by masking as required.",
1178         "NoPrimaryInduction", ORE, TheLoop);
1179     return false;
1180   }
1181 
1182   SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1183 
1184   for (auto &Reduction : *getReductionVars())
1185     ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1186 
1187   // TODO: handle non-reduction outside users when tail is folded by masking.
1188   for (auto *AE : AllowedExit) {
1189     // Check that all users of allowed exit values are inside the loop or
1190     // are the live-out of a reduction.
1191     if (ReductionLiveOuts.count(AE))
1192       continue;
1193     for (User *U : AE->users()) {
1194       Instruction *UI = cast<Instruction>(U);
1195       if (TheLoop->contains(UI))
1196         continue;
1197       reportVectorizationFailure(
1198           "Cannot fold tail by masking, loop has an outside user for",
1199           "Cannot fold tail by masking in the presence of live outs.",
1200           "LiveOutFoldingTailByMasking", ORE, TheLoop, UI);
1201       return false;
1202     }
1203   }
1204 
1205   // The list of pointers that we can safely read and write to remains empty.
1206   SmallPtrSet<Value *, 8> SafePointers;
1207 
1208   // Check and mark all blocks for predication, including those that ordinarily
1209   // do not need predication such as the header block.
1210   for (BasicBlock *BB : TheLoop->blocks()) {
1211     if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) {
1212       reportVectorizationFailure(
1213           "Cannot fold tail by masking as required",
1214           "control flow cannot be substituted for a select",
1215           "NoCFGForSelect", ORE, TheLoop,
1216           BB->getTerminator());
1217       return false;
1218     }
1219   }
1220 
1221   LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1222   return true;
1223 }
1224 
1225 } // namespace llvm
1226